Adds support to the ValidateUtf8Encoding operator

This commit is contained in:
Felipe Zimmerle 2015-08-10 14:51:27 -03:00
parent 9096055ea7
commit f231df16ad
3 changed files with 179 additions and 15 deletions

View File

@ -22,21 +22,169 @@
namespace ModSecurity {
namespace operators {
bool ValidateUtf8Encoding::evaluate(Assay *assay) {
/**
* @todo Implement the operator ValidateUtf8Encoding.
* Reference: https://github.com/SpiderLabs/ModSecurity/wiki/Reference-Manual#validateUtf8Encoding
*/
return true;
int ValidateUtf8Encoding::detect_utf8_character(
const unsigned char *p_read, unsigned int length) {
int unicode_len = 0;
unsigned int d = 0;
unsigned char c;
if (p_read == NULL) {
return UNICODE_ERROR_DECODING_ERROR;
}
c = *p_read;
/* If first byte begins with binary 0 it is single byte encoding */
if ((c & 0x80) == 0) {
/* single byte unicode (7 bit ASCII equivilent) has no validation */
return 1;
} else if ((c & 0xE0) == 0xC0) {
/* If first byte begins with binary 110 it is two byte encoding*/
/* check we have at least two bytes */
if (length < 2) {
unicode_len = UNICODE_ERROR_CHARACTERS_MISSING;
} else if (((*(p_read + 1)) & 0xC0) != 0x80) {
/* check second byte starts with binary 10 */
unicode_len = UNICODE_ERROR_INVALID_ENCODING;
} else {
unicode_len = 2;
/* compute character number */
d = ((c & 0x1F) << 6) | (*(p_read + 1) & 0x3F);
}
} else if ((c & 0xF0) == 0xE0) {
/* If first byte begins with binary 1110 it is three byte encoding */
/* check we have at least three bytes */
if (length < 3) {
unicode_len = UNICODE_ERROR_CHARACTERS_MISSING;
} else if (((*(p_read + 1)) & 0xC0) != 0x80) {
/* check second byte starts with binary 10 */
unicode_len = UNICODE_ERROR_INVALID_ENCODING;
} else if (((*(p_read + 2)) & 0xC0) != 0x80) {
/* check third byte starts with binary 10 */
unicode_len = UNICODE_ERROR_INVALID_ENCODING;
} else {
unicode_len = 3;
/* compute character number */
d = ((c & 0x0F) << 12) | ((*(p_read + 1) & 0x3F) << 6)
| (*(p_read + 2) & 0x3F);
}
} else if ((c & 0xF8) == 0xF0) {
/* If first byte begins with binary 11110 it is four byte encoding */
/* restrict characters to UTF-8 range (U+0000 - U+10FFFF)*/
if (c >= 0xF5) {
return UNICODE_ERROR_RESTRICTED_CHARACTER;
}
/* check we have at least four bytes */
if (length < 4) {
unicode_len = UNICODE_ERROR_CHARACTERS_MISSING;
} else if (((*(p_read + 1)) & 0xC0) != 0x80) {
unicode_len = UNICODE_ERROR_INVALID_ENCODING;
} else if (((*(p_read + 2)) & 0xC0) != 0x80) {
unicode_len = UNICODE_ERROR_INVALID_ENCODING;
} else if (((*(p_read + 3)) & 0xC0) != 0x80) {
unicode_len = UNICODE_ERROR_INVALID_ENCODING;
} else {
unicode_len = 4;
/* compute character number */
d = ((c & 0x07) << 18) | ((*(p_read + 1) & 0x3F) << 12)
| ((*(p_read + 2) & 0x3F) < 6) | (*(p_read + 3) & 0x3F);
}
} else {
/* any other first byte is invalid (RFC 3629) */
return UNICODE_ERROR_INVALID_ENCODING;
}
/* invalid UTF-8 character number range (RFC 3629) */
if ((d >= 0xD800) && (d <= 0xDFFF)) {
return UNICODE_ERROR_RESTRICTED_CHARACTER;
}
/* check for overlong */
if ((unicode_len == 4) && (d < 0x010000)) {
/* four byte could be represented with less bytes */
return UNICODE_ERROR_OVERLONG_CHARACTER;
} else if ((unicode_len == 3) && (d < 0x0800)) {
/* three byte could be represented with less bytes */
return UNICODE_ERROR_OVERLONG_CHARACTER;
} else if ((unicode_len == 2) && (d < 0x80)) {
/* two byte could be represented with less bytes */
return UNICODE_ERROR_OVERLONG_CHARACTER;
}
return unicode_len;
}
bool ValidateUtf8Encoding::evaluate(Assay *assay, const std::string &str) {
unsigned int i, bytes_left;
ValidateUtf8Encoding::ValidateUtf8Encoding(std::string op, std::string param,
bool negation)
: Operator() {
this->op = op;
this->param = param;
const char *str_c = str.c_str();
bytes_left = str.size();
for (i = 0; i < str.size();) {
int rc = detect_utf8_character((unsigned char *)&str_c[i], bytes_left);
switch (rc) {
case UNICODE_ERROR_CHARACTERS_MISSING :
if (assay) {
assay->debug(8, "Invalid UTF-8 encoding: "
"not enough bytes in character "
"at " + str + ". [offset \"" +
std::to_string(i) + "\"]");
}
return true;
break;
case UNICODE_ERROR_INVALID_ENCODING :
if (assay) {
assay->debug(8, "Invalid UTF-8 encoding: "
"invalid byte value in character "
"at " + str + ". [offset \"" +
std::to_string(i) + "\"]");
}
return true;
break;
case UNICODE_ERROR_OVERLONG_CHARACTER :
if (assay) {
assay->debug(8, "Invalid UTF-8 encoding: "
"overlong character detected "
"at " + str + ". [offset \"" +
std::to_string(i) + "\"]");
}
return true;
break;
case UNICODE_ERROR_RESTRICTED_CHARACTER :
if (assay) {
assay->debug(8, "Invalid UTF-8 encoding: "
"use of restricted character "
"at " + str + ". [offset \"" +
std::to_string(i) + "\"]");
}
return true;
break;
case UNICODE_ERROR_DECODING_ERROR :
if (assay) {
assay->debug(8, "Error validating UTF-8 decoding "
"at " + str + ". [offset \"" +
std::to_string(i) + "\"]");
}
return true;
break;
}
if (rc <= 0) {
if (assay) {
assay->debug(8, "Internal error during UTF-8 validation "
"at " + str + ". [offset \"" +
std::to_string(i) + "\"]");
}
return true;
}
i += rc;
bytes_left -= rc;
}
return false;
}
} // namespace operators
} // namespace ModSecurity

View File

@ -20,7 +20,13 @@
#include "operators/operator.h"
#ifdef __cplusplus
#define UNICODE_ERROR_CHARACTERS_MISSING -1
#define UNICODE_ERROR_INVALID_ENCODING -2
#define UNICODE_ERROR_OVERLONG_CHARACTER -3
#define UNICODE_ERROR_RESTRICTED_CHARACTER -4
#define UNICODE_ERROR_DECODING_ERROR -5
namespace ModSecurity {
namespace operators {
@ -28,14 +34,18 @@ namespace operators {
class ValidateUtf8Encoding : public Operator {
public:
/** @ingroup ModSecurity_Operator */
ValidateUtf8Encoding(std::string o, std::string p, bool i);
bool evaluate(Assay *assay);
ValidateUtf8Encoding(std::string op, std::string param, bool negation)
: Operator(op, param, negation) { }
bool evaluate(Assay *assay, const std::string &input) override;
int detect_utf8_character(const unsigned char *p_read,
unsigned int length);
};
} // namespace operators
} // namespace ModSecurity
#endif
#endif // SRC_OPERATORS_VALIDATE_UTF8_ENCODING_H_

View File

@ -94,6 +94,12 @@ UnitTest *UnitTest::from_yajl_node(yajl_val &node) {
} else if (strcmp(key, "input") == 0) {
u->input = YAJL_GET_STRING(val);
replaceAll(&(u->input), "\\0", '\0');
replaceAll(&(u->input), "\\xe4", '\xe4');
replaceAll(&(u->input), "\\x03", '\x03');
replaceAll(&(u->input), "\\xbf", '\xbf');
replaceAll(&(u->input), "\\xc9", '\xc9');
replaceAll(&(u->input), "\\x3b", '\x3b');
replaceAll(&(u->input), "\\xFF", '\xff');
} else if (strcmp(key, "name") == 0) {
u->name = YAJL_GET_STRING(val);
} else if (strcmp(key, "type") == 0) {