Adds support to the HtmlEntityDecode transformation

This commit is contained in:
Felipe Zimmerle 2015-08-05 17:20:53 -03:00
parent ce298165dd
commit 1353403c93
5 changed files with 169 additions and 11 deletions

View File

@ -15,6 +15,8 @@
#include "actions/transformations/html_entity_decode.h"
#include <string.h>
#include <iostream>
#include <string>
#include <algorithm>
@ -24,26 +26,26 @@
#include "modsecurity/assay.h"
#include "actions/transformations/transformation.h"
#include "src/utils.h"
namespace ModSecurity {
namespace actions {
namespace transformations {
HtmlEntityDecode::HtmlEntityDecode(std::string action)
: Transformation(action) {
this->action_kind = 1;
}
std::string HtmlEntityDecode::evaluate(std::string value,
Assay *assay) {
/**
* @todo Implement the transformation HtmlEntityDecode
*/
assay->debug(4, "Transformation HtmlEntityDecode is not implemented yet.");
return value;
char *tmp = strdup(value.c_str());
int res = html_entities_decode_inplace((unsigned char *)tmp, value.size());
std::string ret("");
ret.assign(tmp);
free(tmp);
return ret;
}
} // namespace transformations
} // namespace actions
} // namespace ModSecurity

View File

@ -28,13 +28,17 @@ class Assay;
namespace actions {
namespace transformations {
class HtmlEntityDecode : public Transformation {
public:
explicit HtmlEntityDecode(std::string action);
explicit HtmlEntityDecode(std::string action)
: Transformation(action) { }
std::string evaluate(std::string exp,
Assay *assay) override;
};
} // namespace transformations
} // namespace actions
} // namespace ModSecurity

View File

@ -81,7 +81,7 @@ Transformation* Transformation::instantiate(std::string a) {
IF_MATCH(escape_seq_decode) { return new EscapeSeqDecode(a); }
IF_MATCH(hex_decode) { return new HexDecode(a); }
IF_MATCH(hex_encode) { return new HexEncode(a); }
IF_MATCH(html_entity_decode) { return new HtmlEntityDecode(a); }
IF_MATCH(htmlEntityDecode) { return new HtmlEntityDecode(a); }
IF_MATCH(jsDecode) { return new JsDecode(a); }
IF_MATCH(length) { return new Length(a); }
IF_MATCH(lowercase) { return new LowerCase(a); }

View File

@ -45,6 +45,7 @@
#define VALID_HEX(X) (((X >= '0') && (X <= '9')) || \
((X >= 'a') && (X <= 'f')) || ((X >= 'A') && (X <= 'F')))
#define ISODIGIT(X) ((X >= '0') && (X <= '7'))
#define NBSP 160
namespace ModSecurity {
@ -468,6 +469,156 @@ int css_decode_inplace(unsigned char *input, int64_t input_len) {
}
/**
*
* IMP1 Assumes NUL-terminated
*/
int html_entities_decode_inplace(unsigned char *input, int input_len) {
unsigned char *d = input;
int i, count;
if ((input == NULL) || (input_len <= 0)) return 0;
i = count = 0;
while ((i < input_len) && (count < input_len)) {
int z, copy = 1;
/* Require an ampersand and at least one character to
* start looking into the entity.
*/
if ((input[i] == '&') && (i + 1 < input_len)) {
int k, j = i + 1;
if (input[j] == '#') {
/* Numerical entity. */
copy++;
if (!(j + 1 < input_len)) {
/* Not enough bytes. */
goto HTML_ENT_OUT;
}
j++;
if ((input[j] == 'x') || (input[j] == 'X')) {
/* Hexadecimal entity. */
copy++;
if (!(j + 1 < input_len)) {
/* Not enough bytes. */
goto HTML_ENT_OUT;
}
j++; /* j is the position of the first digit now. */
k = j;
while ((j < input_len) && (isxdigit(input[j]))) j++;
if (j > k) { /* Do we have at least one digit? */
char *x;
/* Decode the entity. */
/* char *x = apr_pstrmemdup(mp,
* (const char *)&input[k], j - k); */
x = reinterpret_cast<char *>(malloc(sizeof(char) *
(j - k)));
memcpy(x, (const char *)&input[k], j - k);
*d++ = (unsigned char)strtol(x, NULL, 16);
count++;
free(x);
/* Skip over the semicolon if it's there. */
if ((j < input_len) && (input[j] == ';')) {
i = j + 1;
} else {
i = j;
}
continue;
} else {
goto HTML_ENT_OUT;
}
} else {
/* Decimal entity. */
k = j;
while ((j < input_len) && (isdigit(input[j]))) j++;
if (j > k) { /* Do we have at least one digit? */
/* Decode the entity. */
char *x = NULL;
/* char *x = apr_pstrmemdup(mp,
* (const char *)&input[k], j - k); */
x = reinterpret_cast<char *>(malloc(sizeof(char) *
(j - k)));
memcpy(x, (const char *)&input[k], j - k);
*d++ = (unsigned char)strtol(x, NULL, 10);
count++;
free(x);
/* Skip over the semicolon if it's there. */
if ((j < input_len) && (input[j] == ';')) {
i = j + 1;
} else {
i = j;
}
continue;
} else {
goto HTML_ENT_OUT;
}
}
} else {
/* Text entity. */
k = j;
while ((j < input_len) && (isalnum(input[j]))) j++;
if (j > k) { /* Do we have at least one digit? */
/* char *x = apr_pstrmemdup(mp,
* (const char *)&input[k], j - k); */
char *x = NULL;
x = reinterpret_cast<char *>(malloc(sizeof(char) *
(j - k)));
memcpy(x, (const char *)&input[k], j - k);
/* Decode the entity. */
/* ENH What about others? */
if (strcasecmp(x, "quot") == 0) {
*d++ = '"';
} else if (strcasecmp(x, "amp") == 0) {
*d++ = '&';
} else if (strcasecmp(x, "lt") == 0) {
*d++ = '<';
} else if (strcasecmp(x, "gt") == 0) {
*d++ = '>';
} else if (strcasecmp(x, "nbsp") == 0) {
*d++ = NBSP;
} else {
/* We do no want to convert this entity,
* copy the raw data over. */
copy = j - k + 1;
free(x);
goto HTML_ENT_OUT;
}
count++;
/* Skip over the semicolon if it's there. */
if ((j < input_len) && (input[j] == ';')) {
i = j + 1;
} else {
i = j;
}
free(x);
continue;
}
}
}
HTML_ENT_OUT:
for (z = 0; ((z < copy) && (count < input_len)); z++) {
*d++ = input[i++];
count++;
}
}
*d = '\0';
return count;
}
/**
* Converts a single hexadecimal digit into a decimal value.
*/

View File

@ -37,6 +37,7 @@ namespace ModSecurity {
static unsigned char x2c(unsigned char *what);
int css_decode_inplace(unsigned char *input, int64_t input_len);
static unsigned char xsingle2c(unsigned char *what);
int html_entities_decode_inplace(unsigned char *input, int input_len);
} // namespace ModSecurity
#define SRC_UTILS_H_