mirror of
https://github.com/owasp-modsecurity/ModSecurity.git
synced 2025-11-18 18:30:35 +03:00
- Avoids copying std::shared_ptr when lifetime of the RuleMessage
is controlled by the caller.
- The RuleMessage instance is created in RuleWithActions::evaluate and
then used to call the overloaded version of this method that is
specialized by subclasses.
- Once the call to the overloaded method returns, the std::shared_ptr
is destroyed as it's not stored by any of the callers, so it can
be replaced with a stack variable and avoid paying the cost of
copying the std::shared_ptr (and its control block that is
guaranteed to be thread-safe and thus is not a straightforward
pointer copy)
- Introduced RuleMessage::reset because this is required by
RuleWithActions::performLogging when it's not the 'last log', the rule
has multimatch and it's to be logged.
- The current version is creating allocating another instance of
RuleMessage on the heap to copy the Rule & Transaction related state
while all the other members in the RuleMessage are set to their
default values.
- The new version leverages the existent, unused and incomplete
function 'clean' (renamed as 'reset') to do this on the current
instance.
- Notice that the current code preserves the value of m_saveMessage,
so 'reset' provides an argument for the caller to control whether
this member should be reinitialized.
200 lines
7.3 KiB
C++
200 lines
7.3 KiB
C++
/*
|
|
* ModSecurity, http://www.modsecurity.org/
|
|
* Copyright (c) 2015 - 2021 Trustwave Holdings, Inc. (http://www.trustwave.com/)
|
|
*
|
|
* You may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* If any of the files related to licensing are missing or if you have any
|
|
* other questions related to licensing please contact Trustwave Holdings, Inc.
|
|
* directly using the email address security@modsecurity.org.
|
|
*
|
|
*/
|
|
|
|
#include "src/operators/validate_utf8_encoding.h"
|
|
|
|
#include <string>
|
|
|
|
#include "src/operators/operator.h"
|
|
|
|
|
|
constexpr int UNICODE_ERROR_CHARACTERS_MISSING = -1;
|
|
constexpr int UNICODE_ERROR_INVALID_ENCODING = -2;
|
|
constexpr int UNICODE_ERROR_OVERLONG_CHARACTER = -3;
|
|
constexpr int UNICODE_ERROR_RESTRICTED_CHARACTER = -4;
|
|
constexpr int UNICODE_ERROR_DECODING_ERROR = -5;
|
|
|
|
|
|
namespace modsecurity {
|
|
namespace operators {
|
|
|
|
int ValidateUtf8Encoding::detect_utf8_character(
|
|
const unsigned char *p_read, unsigned int length) {
|
|
int unicode_len = 0;
|
|
unsigned int d = 0;
|
|
unsigned char c;
|
|
|
|
if (p_read == NULL) {
|
|
return UNICODE_ERROR_DECODING_ERROR;
|
|
}
|
|
c = *p_read;
|
|
|
|
/* If first byte begins with binary 0 it is single byte encoding */
|
|
if ((c & 0x80) == 0) {
|
|
/* single byte unicode (7 bit ASCII equivilent) has no validation */
|
|
return 1;
|
|
} else if ((c & 0xE0) == 0xC0) {
|
|
/* If first byte begins with binary 110 it is two byte encoding*/
|
|
/* check we have at least two bytes */
|
|
if (length < 2) {
|
|
unicode_len = UNICODE_ERROR_CHARACTERS_MISSING;
|
|
} else if (((*(p_read + 1)) & 0xC0) != 0x80) {
|
|
/* check second byte starts with binary 10 */
|
|
unicode_len = UNICODE_ERROR_INVALID_ENCODING;
|
|
} else {
|
|
unicode_len = 2;
|
|
/* compute character number */
|
|
d = ((c & 0x1F) << 6) | (*(p_read + 1) & 0x3F);
|
|
}
|
|
} else if ((c & 0xF0) == 0xE0) {
|
|
/* If first byte begins with binary 1110 it is three byte encoding */
|
|
/* check we have at least three bytes */
|
|
if (length < 3) {
|
|
unicode_len = UNICODE_ERROR_CHARACTERS_MISSING;
|
|
} else if (((*(p_read + 1)) & 0xC0) != 0x80) {
|
|
/* check second byte starts with binary 10 */
|
|
unicode_len = UNICODE_ERROR_INVALID_ENCODING;
|
|
} else if (((*(p_read + 2)) & 0xC0) != 0x80) {
|
|
/* check third byte starts with binary 10 */
|
|
unicode_len = UNICODE_ERROR_INVALID_ENCODING;
|
|
} else {
|
|
unicode_len = 3;
|
|
/* compute character number */
|
|
d = ((c & 0x0F) << 12) | ((*(p_read + 1) & 0x3F) << 6)
|
|
| (*(p_read + 2) & 0x3F);
|
|
}
|
|
} else if ((c & 0xF8) == 0xF0) {
|
|
/* If first byte begins with binary 11110 it is four byte encoding */
|
|
/* restrict characters to UTF-8 range (U+0000 - U+10FFFF)*/
|
|
if (c >= 0xF5) {
|
|
return UNICODE_ERROR_RESTRICTED_CHARACTER;
|
|
}
|
|
/* check we have at least four bytes */
|
|
if (length < 4) {
|
|
unicode_len = UNICODE_ERROR_CHARACTERS_MISSING;
|
|
} else if (((*(p_read + 1)) & 0xC0) != 0x80) {
|
|
unicode_len = UNICODE_ERROR_INVALID_ENCODING;
|
|
} else if (((*(p_read + 2)) & 0xC0) != 0x80) {
|
|
unicode_len = UNICODE_ERROR_INVALID_ENCODING;
|
|
} else if (((*(p_read + 3)) & 0xC0) != 0x80) {
|
|
unicode_len = UNICODE_ERROR_INVALID_ENCODING;
|
|
} else {
|
|
unicode_len = 4;
|
|
/* compute character number */
|
|
d = ((c & 0x07) << 18) | ((*(p_read + 1) & 0x3F) << 12)
|
|
| ((*(p_read + 2) & 0x3F) << 6) | (*(p_read + 3) & 0x3F);
|
|
}
|
|
} else {
|
|
/* any other first byte is invalid (RFC 3629) */
|
|
return UNICODE_ERROR_INVALID_ENCODING;
|
|
}
|
|
|
|
/* invalid UTF-8 character number range (RFC 3629) */
|
|
if ((d >= 0xD800) && (d <= 0xDFFF)) {
|
|
return UNICODE_ERROR_RESTRICTED_CHARACTER;
|
|
}
|
|
|
|
/* check for overlong */
|
|
if ((unicode_len == 4) && (d < 0x010000)) {
|
|
/* four byte could be represented with less bytes */
|
|
return UNICODE_ERROR_OVERLONG_CHARACTER;
|
|
} else if ((unicode_len == 3) && (d < 0x0800)) {
|
|
/* three byte could be represented with less bytes */
|
|
return UNICODE_ERROR_OVERLONG_CHARACTER;
|
|
} else if ((unicode_len == 2) && (d < 0x80)) {
|
|
/* two byte could be represented with less bytes */
|
|
return UNICODE_ERROR_OVERLONG_CHARACTER;
|
|
}
|
|
|
|
return unicode_len;
|
|
}
|
|
|
|
bool ValidateUtf8Encoding::evaluate(Transaction *transaction, RuleWithActions *rule,
|
|
const std::string &str, RuleMessage &ruleMessage) {
|
|
unsigned int i, bytes_left;
|
|
|
|
const char *str_c = str.c_str();
|
|
bytes_left = str.size();
|
|
|
|
for (i = 0; i < str.size();) {
|
|
int rc = detect_utf8_character((unsigned char *)&str_c[i], bytes_left);
|
|
|
|
switch (rc) {
|
|
case UNICODE_ERROR_CHARACTERS_MISSING :
|
|
if (transaction) {
|
|
ms_dbg_a(transaction, 8, "Invalid UTF-8 encoding: "
|
|
"not enough bytes in character "
|
|
"at " + str + ". [offset \"" +
|
|
std::to_string(i) + "\"]");
|
|
}
|
|
return true;
|
|
case UNICODE_ERROR_INVALID_ENCODING :
|
|
if (transaction) {
|
|
ms_dbg_a(transaction, 8, "Invalid UTF-8 encoding: "
|
|
"invalid byte value in character "
|
|
"at " + str + ". [offset \"" +
|
|
std::to_string(i) + "\"]");
|
|
logOffset(ruleMessage, i, str.size());
|
|
}
|
|
return true;
|
|
case UNICODE_ERROR_OVERLONG_CHARACTER :
|
|
if (transaction) {
|
|
ms_dbg_a(transaction, 8, "Invalid UTF-8 encoding: "
|
|
"overlong character detected "
|
|
"at " + str + ". [offset \"" +
|
|
std::to_string(i) + "\"]");
|
|
logOffset(ruleMessage, i, str.size());
|
|
}
|
|
return true;
|
|
case UNICODE_ERROR_RESTRICTED_CHARACTER :
|
|
if (transaction) {
|
|
ms_dbg_a(transaction, 8, "Invalid UTF-8 encoding: "
|
|
"use of restricted character "
|
|
"at " + str + ". [offset \"" +
|
|
std::to_string(i) + "\"]");
|
|
logOffset(ruleMessage, i, str.size());
|
|
}
|
|
return true;
|
|
case UNICODE_ERROR_DECODING_ERROR :
|
|
if (transaction) {
|
|
ms_dbg_a(transaction, 8, "Error validating UTF-8 decoding "
|
|
"at " + str + ". [offset \"" +
|
|
std::to_string(i) + "\"]");
|
|
logOffset(ruleMessage, i, str.size());
|
|
}
|
|
return true;
|
|
}
|
|
|
|
if (rc <= 0) {
|
|
if (transaction) {
|
|
ms_dbg_a(transaction, 8, "Internal error during UTF-8 validation "
|
|
"at " + str + ". [offset \"" +
|
|
std::to_string(i) + "\"]");
|
|
logOffset(ruleMessage, i, str.size());
|
|
}
|
|
return true;
|
|
}
|
|
|
|
i += rc;
|
|
bytes_left -= rc;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
} // namespace operators
|
|
} // namespace modsecurity
|