Adds support to the HtmlEntityDecode transformation

This commit is contained in:
Felipe Zimmerle 2015-08-05 17:20:53 -03:00
parent ce298165dd
commit 1353403c93
5 changed files with 169 additions and 11 deletions

View File

@ -15,6 +15,8 @@
#include "actions/transformations/html_entity_decode.h" #include "actions/transformations/html_entity_decode.h"
#include <string.h>
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <algorithm> #include <algorithm>
@ -24,26 +26,26 @@
#include "modsecurity/assay.h" #include "modsecurity/assay.h"
#include "actions/transformations/transformation.h" #include "actions/transformations/transformation.h"
#include "src/utils.h"
namespace ModSecurity { namespace ModSecurity {
namespace actions { namespace actions {
namespace transformations { namespace transformations {
HtmlEntityDecode::HtmlEntityDecode(std::string action)
: Transformation(action) {
this->action_kind = 1;
}
std::string HtmlEntityDecode::evaluate(std::string value, std::string HtmlEntityDecode::evaluate(std::string value,
Assay *assay) { Assay *assay) {
/** char *tmp = strdup(value.c_str());
* @todo Implement the transformation HtmlEntityDecode int res = html_entities_decode_inplace((unsigned char *)tmp, value.size());
*/ std::string ret("");
assay->debug(4, "Transformation HtmlEntityDecode is not implemented yet."); ret.assign(tmp);
return value; free(tmp);
return ret;
} }
} // namespace transformations } // namespace transformations
} // namespace actions } // namespace actions
} // namespace ModSecurity } // namespace ModSecurity

View File

@ -28,13 +28,17 @@ class Assay;
namespace actions { namespace actions {
namespace transformations { namespace transformations {
class HtmlEntityDecode : public Transformation { class HtmlEntityDecode : public Transformation {
public: public:
explicit HtmlEntityDecode(std::string action); explicit HtmlEntityDecode(std::string action)
: Transformation(action) { }
std::string evaluate(std::string exp, std::string evaluate(std::string exp,
Assay *assay) override; Assay *assay) override;
}; };
} // namespace transformations } // namespace transformations
} // namespace actions } // namespace actions
} // namespace ModSecurity } // namespace ModSecurity

View File

@ -81,7 +81,7 @@ Transformation* Transformation::instantiate(std::string a) {
IF_MATCH(escape_seq_decode) { return new EscapeSeqDecode(a); } IF_MATCH(escape_seq_decode) { return new EscapeSeqDecode(a); }
IF_MATCH(hex_decode) { return new HexDecode(a); } IF_MATCH(hex_decode) { return new HexDecode(a); }
IF_MATCH(hex_encode) { return new HexEncode(a); } IF_MATCH(hex_encode) { return new HexEncode(a); }
IF_MATCH(html_entity_decode) { return new HtmlEntityDecode(a); } IF_MATCH(htmlEntityDecode) { return new HtmlEntityDecode(a); }
IF_MATCH(jsDecode) { return new JsDecode(a); } IF_MATCH(jsDecode) { return new JsDecode(a); }
IF_MATCH(length) { return new Length(a); } IF_MATCH(length) { return new Length(a); }
IF_MATCH(lowercase) { return new LowerCase(a); } IF_MATCH(lowercase) { return new LowerCase(a); }

View File

@ -45,6 +45,7 @@
#define VALID_HEX(X) (((X >= '0') && (X <= '9')) || \ #define VALID_HEX(X) (((X >= '0') && (X <= '9')) || \
((X >= 'a') && (X <= 'f')) || ((X >= 'A') && (X <= 'F'))) ((X >= 'a') && (X <= 'f')) || ((X >= 'A') && (X <= 'F')))
#define ISODIGIT(X) ((X >= '0') && (X <= '7')) #define ISODIGIT(X) ((X >= '0') && (X <= '7'))
#define NBSP 160
namespace ModSecurity { namespace ModSecurity {
@ -468,6 +469,156 @@ int css_decode_inplace(unsigned char *input, int64_t input_len) {
} }
/**
*
* IMP1 Assumes NUL-terminated
*/
int html_entities_decode_inplace(unsigned char *input, int input_len) {
unsigned char *d = input;
int i, count;
if ((input == NULL) || (input_len <= 0)) return 0;
i = count = 0;
while ((i < input_len) && (count < input_len)) {
int z, copy = 1;
/* Require an ampersand and at least one character to
* start looking into the entity.
*/
if ((input[i] == '&') && (i + 1 < input_len)) {
int k, j = i + 1;
if (input[j] == '#') {
/* Numerical entity. */
copy++;
if (!(j + 1 < input_len)) {
/* Not enough bytes. */
goto HTML_ENT_OUT;
}
j++;
if ((input[j] == 'x') || (input[j] == 'X')) {
/* Hexadecimal entity. */
copy++;
if (!(j + 1 < input_len)) {
/* Not enough bytes. */
goto HTML_ENT_OUT;
}
j++; /* j is the position of the first digit now. */
k = j;
while ((j < input_len) && (isxdigit(input[j]))) j++;
if (j > k) { /* Do we have at least one digit? */
char *x;
/* Decode the entity. */
/* char *x = apr_pstrmemdup(mp,
* (const char *)&input[k], j - k); */
x = reinterpret_cast<char *>(malloc(sizeof(char) *
(j - k)));
memcpy(x, (const char *)&input[k], j - k);
*d++ = (unsigned char)strtol(x, NULL, 16);
count++;
free(x);
/* Skip over the semicolon if it's there. */
if ((j < input_len) && (input[j] == ';')) {
i = j + 1;
} else {
i = j;
}
continue;
} else {
goto HTML_ENT_OUT;
}
} else {
/* Decimal entity. */
k = j;
while ((j < input_len) && (isdigit(input[j]))) j++;
if (j > k) { /* Do we have at least one digit? */
/* Decode the entity. */
char *x = NULL;
/* char *x = apr_pstrmemdup(mp,
* (const char *)&input[k], j - k); */
x = reinterpret_cast<char *>(malloc(sizeof(char) *
(j - k)));
memcpy(x, (const char *)&input[k], j - k);
*d++ = (unsigned char)strtol(x, NULL, 10);
count++;
free(x);
/* Skip over the semicolon if it's there. */
if ((j < input_len) && (input[j] == ';')) {
i = j + 1;
} else {
i = j;
}
continue;
} else {
goto HTML_ENT_OUT;
}
}
} else {
/* Text entity. */
k = j;
while ((j < input_len) && (isalnum(input[j]))) j++;
if (j > k) { /* Do we have at least one digit? */
/* char *x = apr_pstrmemdup(mp,
* (const char *)&input[k], j - k); */
char *x = NULL;
x = reinterpret_cast<char *>(malloc(sizeof(char) *
(j - k)));
memcpy(x, (const char *)&input[k], j - k);
/* Decode the entity. */
/* ENH What about others? */
if (strcasecmp(x, "quot") == 0) {
*d++ = '"';
} else if (strcasecmp(x, "amp") == 0) {
*d++ = '&';
} else if (strcasecmp(x, "lt") == 0) {
*d++ = '<';
} else if (strcasecmp(x, "gt") == 0) {
*d++ = '>';
} else if (strcasecmp(x, "nbsp") == 0) {
*d++ = NBSP;
} else {
/* We do no want to convert this entity,
* copy the raw data over. */
copy = j - k + 1;
free(x);
goto HTML_ENT_OUT;
}
count++;
/* Skip over the semicolon if it's there. */
if ((j < input_len) && (input[j] == ';')) {
i = j + 1;
} else {
i = j;
}
free(x);
continue;
}
}
}
HTML_ENT_OUT:
for (z = 0; ((z < copy) && (count < input_len)); z++) {
*d++ = input[i++];
count++;
}
}
*d = '\0';
return count;
}
/** /**
* Converts a single hexadecimal digit into a decimal value. * Converts a single hexadecimal digit into a decimal value.
*/ */

View File

@ -37,6 +37,7 @@ namespace ModSecurity {
static unsigned char x2c(unsigned char *what); static unsigned char x2c(unsigned char *what);
int css_decode_inplace(unsigned char *input, int64_t input_len); int css_decode_inplace(unsigned char *input, int64_t input_len);
static unsigned char xsingle2c(unsigned char *what); static unsigned char xsingle2c(unsigned char *what);
int html_entities_decode_inplace(unsigned char *input, int input_len);
} // namespace ModSecurity } // namespace ModSecurity
#define SRC_UTILS_H_ #define SRC_UTILS_H_