diff --git a/src/actions/transformations/html_entity_decode.cc b/src/actions/transformations/html_entity_decode.cc index 9455b936..be5c87cb 100644 --- a/src/actions/transformations/html_entity_decode.cc +++ b/src/actions/transformations/html_entity_decode.cc @@ -15,6 +15,8 @@ #include "actions/transformations/html_entity_decode.h" +#include + #include #include #include @@ -24,26 +26,26 @@ #include "modsecurity/assay.h" #include "actions/transformations/transformation.h" +#include "src/utils.h" namespace ModSecurity { namespace actions { namespace transformations { -HtmlEntityDecode::HtmlEntityDecode(std::string action) - : Transformation(action) { - this->action_kind = 1; -} std::string HtmlEntityDecode::evaluate(std::string value, Assay *assay) { - /** - * @todo Implement the transformation HtmlEntityDecode - */ - assay->debug(4, "Transformation HtmlEntityDecode is not implemented yet."); - return value; + char *tmp = strdup(value.c_str()); + int res = html_entities_decode_inplace((unsigned char *)tmp, value.size()); + std::string ret(""); + ret.assign(tmp); + free(tmp); + + return ret; } + } // namespace transformations } // namespace actions } // namespace ModSecurity diff --git a/src/actions/transformations/html_entity_decode.h b/src/actions/transformations/html_entity_decode.h index 0490d02f..59158516 100644 --- a/src/actions/transformations/html_entity_decode.h +++ b/src/actions/transformations/html_entity_decode.h @@ -28,13 +28,17 @@ class Assay; namespace actions { namespace transformations { + class HtmlEntityDecode : public Transformation { public: - explicit HtmlEntityDecode(std::string action); + explicit HtmlEntityDecode(std::string action) + : Transformation(action) { } + std::string evaluate(std::string exp, Assay *assay) override; }; + } // namespace transformations } // namespace actions } // namespace ModSecurity diff --git a/src/actions/transformations/transformation.cc b/src/actions/transformations/transformation.cc index 60302d26..3cb61549 100644 --- a/src/actions/transformations/transformation.cc +++ b/src/actions/transformations/transformation.cc @@ -81,7 +81,7 @@ Transformation* Transformation::instantiate(std::string a) { IF_MATCH(escape_seq_decode) { return new EscapeSeqDecode(a); } IF_MATCH(hex_decode) { return new HexDecode(a); } IF_MATCH(hex_encode) { return new HexEncode(a); } - IF_MATCH(html_entity_decode) { return new HtmlEntityDecode(a); } + IF_MATCH(htmlEntityDecode) { return new HtmlEntityDecode(a); } IF_MATCH(jsDecode) { return new JsDecode(a); } IF_MATCH(length) { return new Length(a); } IF_MATCH(lowercase) { return new LowerCase(a); } diff --git a/src/utils.cc b/src/utils.cc index d30dad9f..a95847f2 100644 --- a/src/utils.cc +++ b/src/utils.cc @@ -45,6 +45,7 @@ #define VALID_HEX(X) (((X >= '0') && (X <= '9')) || \ ((X >= 'a') && (X <= 'f')) || ((X >= 'A') && (X <= 'F'))) #define ISODIGIT(X) ((X >= '0') && (X <= '7')) +#define NBSP 160 namespace ModSecurity { @@ -468,6 +469,156 @@ int css_decode_inplace(unsigned char *input, int64_t input_len) { } +/** + * + * IMP1 Assumes NUL-terminated + */ +int html_entities_decode_inplace(unsigned char *input, int input_len) { + unsigned char *d = input; + int i, count; + + if ((input == NULL) || (input_len <= 0)) return 0; + + i = count = 0; + while ((i < input_len) && (count < input_len)) { + int z, copy = 1; + + /* Require an ampersand and at least one character to + * start looking into the entity. + */ + if ((input[i] == '&') && (i + 1 < input_len)) { + int k, j = i + 1; + + if (input[j] == '#') { + /* Numerical entity. */ + copy++; + + if (!(j + 1 < input_len)) { + /* Not enough bytes. */ + goto HTML_ENT_OUT; + } + j++; + + if ((input[j] == 'x') || (input[j] == 'X')) { + /* Hexadecimal entity. */ + copy++; + + if (!(j + 1 < input_len)) { + /* Not enough bytes. */ + goto HTML_ENT_OUT; + } + j++; /* j is the position of the first digit now. */ + + k = j; + while ((j < input_len) && (isxdigit(input[j]))) j++; + if (j > k) { /* Do we have at least one digit? */ + char *x; + /* Decode the entity. */ + /* char *x = apr_pstrmemdup(mp, + * (const char *)&input[k], j - k); */ + x = reinterpret_cast(malloc(sizeof(char) * + (j - k))); + memcpy(x, (const char *)&input[k], j - k); + *d++ = (unsigned char)strtol(x, NULL, 16); + count++; + free(x); + /* Skip over the semicolon if it's there. */ + if ((j < input_len) && (input[j] == ';')) { + i = j + 1; + } else { + i = j; + } + + continue; + } else { + goto HTML_ENT_OUT; + } + } else { + /* Decimal entity. */ + k = j; + while ((j < input_len) && (isdigit(input[j]))) j++; + if (j > k) { /* Do we have at least one digit? */ + /* Decode the entity. */ + char *x = NULL; + /* char *x = apr_pstrmemdup(mp, + * (const char *)&input[k], j - k); */ + x = reinterpret_cast(malloc(sizeof(char) * + (j - k))); + memcpy(x, (const char *)&input[k], j - k); + *d++ = (unsigned char)strtol(x, NULL, 10); + count++; + free(x); + /* Skip over the semicolon if it's there. */ + if ((j < input_len) && (input[j] == ';')) { + i = j + 1; + } else { + i = j; + } + + continue; + } else { + goto HTML_ENT_OUT; + } + } + } else { + /* Text entity. */ + + k = j; + while ((j < input_len) && (isalnum(input[j]))) j++; + if (j > k) { /* Do we have at least one digit? */ + /* char *x = apr_pstrmemdup(mp, + * (const char *)&input[k], j - k); */ + char *x = NULL; + x = reinterpret_cast(malloc(sizeof(char) * + (j - k))); + memcpy(x, (const char *)&input[k], j - k); + + /* Decode the entity. */ + /* ENH What about others? */ + if (strcasecmp(x, "quot") == 0) { + *d++ = '"'; + } else if (strcasecmp(x, "amp") == 0) { + *d++ = '&'; + } else if (strcasecmp(x, "lt") == 0) { + *d++ = '<'; + } else if (strcasecmp(x, "gt") == 0) { + *d++ = '>'; + } else if (strcasecmp(x, "nbsp") == 0) { + *d++ = NBSP; + } else { + /* We do no want to convert this entity, + * copy the raw data over. */ + copy = j - k + 1; + free(x); + goto HTML_ENT_OUT; + } + + count++; + + /* Skip over the semicolon if it's there. */ + if ((j < input_len) && (input[j] == ';')) { + i = j + 1; + } else { + i = j; + } + + free(x); + continue; + } + } + } +HTML_ENT_OUT: + for (z = 0; ((z < copy) && (count < input_len)); z++) { + *d++ = input[i++]; + count++; + } + } + + *d = '\0'; + + return count; +} + /** * Converts a single hexadecimal digit into a decimal value. */ diff --git a/src/utils.h b/src/utils.h index 3499c4bf..ce41ffaf 100644 --- a/src/utils.h +++ b/src/utils.h @@ -37,6 +37,7 @@ namespace ModSecurity { static unsigned char x2c(unsigned char *what); int css_decode_inplace(unsigned char *input, int64_t input_len); static unsigned char xsingle2c(unsigned char *what); + int html_entities_decode_inplace(unsigned char *input, int input_len); } // namespace ModSecurity #define SRC_UTILS_H_