diff --git a/src/actions/transformations/html_entity_decode.cc b/src/actions/transformations/html_entity_decode.cc
index 9455b936..be5c87cb 100644
--- a/src/actions/transformations/html_entity_decode.cc
+++ b/src/actions/transformations/html_entity_decode.cc
@@ -15,6 +15,8 @@
#include "actions/transformations/html_entity_decode.h"
+#include
+
#include
#include
#include
@@ -24,26 +26,26 @@
#include "modsecurity/assay.h"
#include "actions/transformations/transformation.h"
+#include "src/utils.h"
namespace ModSecurity {
namespace actions {
namespace transformations {
-HtmlEntityDecode::HtmlEntityDecode(std::string action)
- : Transformation(action) {
- this->action_kind = 1;
-}
std::string HtmlEntityDecode::evaluate(std::string value,
Assay *assay) {
- /**
- * @todo Implement the transformation HtmlEntityDecode
- */
- assay->debug(4, "Transformation HtmlEntityDecode is not implemented yet.");
- return value;
+ char *tmp = strdup(value.c_str());
+ int res = html_entities_decode_inplace((unsigned char *)tmp, value.size());
+ std::string ret("");
+ ret.assign(tmp);
+ free(tmp);
+
+ return ret;
}
+
} // namespace transformations
} // namespace actions
} // namespace ModSecurity
diff --git a/src/actions/transformations/html_entity_decode.h b/src/actions/transformations/html_entity_decode.h
index 0490d02f..59158516 100644
--- a/src/actions/transformations/html_entity_decode.h
+++ b/src/actions/transformations/html_entity_decode.h
@@ -28,13 +28,17 @@ class Assay;
namespace actions {
namespace transformations {
+
class HtmlEntityDecode : public Transformation {
public:
- explicit HtmlEntityDecode(std::string action);
+ explicit HtmlEntityDecode(std::string action)
+ : Transformation(action) { }
+
std::string evaluate(std::string exp,
Assay *assay) override;
};
+
} // namespace transformations
} // namespace actions
} // namespace ModSecurity
diff --git a/src/actions/transformations/transformation.cc b/src/actions/transformations/transformation.cc
index 60302d26..3cb61549 100644
--- a/src/actions/transformations/transformation.cc
+++ b/src/actions/transformations/transformation.cc
@@ -81,7 +81,7 @@ Transformation* Transformation::instantiate(std::string a) {
IF_MATCH(escape_seq_decode) { return new EscapeSeqDecode(a); }
IF_MATCH(hex_decode) { return new HexDecode(a); }
IF_MATCH(hex_encode) { return new HexEncode(a); }
- IF_MATCH(html_entity_decode) { return new HtmlEntityDecode(a); }
+ IF_MATCH(htmlEntityDecode) { return new HtmlEntityDecode(a); }
IF_MATCH(jsDecode) { return new JsDecode(a); }
IF_MATCH(length) { return new Length(a); }
IF_MATCH(lowercase) { return new LowerCase(a); }
diff --git a/src/utils.cc b/src/utils.cc
index d30dad9f..a95847f2 100644
--- a/src/utils.cc
+++ b/src/utils.cc
@@ -45,6 +45,7 @@
#define VALID_HEX(X) (((X >= '0') && (X <= '9')) || \
((X >= 'a') && (X <= 'f')) || ((X >= 'A') && (X <= 'F')))
#define ISODIGIT(X) ((X >= '0') && (X <= '7'))
+#define NBSP 160
namespace ModSecurity {
@@ -468,6 +469,156 @@ int css_decode_inplace(unsigned char *input, int64_t input_len) {
}
+/**
+ *
+ * IMP1 Assumes NUL-terminated
+ */
+int html_entities_decode_inplace(unsigned char *input, int input_len) {
+ unsigned char *d = input;
+ int i, count;
+
+ if ((input == NULL) || (input_len <= 0)) return 0;
+
+ i = count = 0;
+ while ((i < input_len) && (count < input_len)) {
+ int z, copy = 1;
+
+ /* Require an ampersand and at least one character to
+ * start looking into the entity.
+ */
+ if ((input[i] == '&') && (i + 1 < input_len)) {
+ int k, j = i + 1;
+
+ if (input[j] == '#') {
+ /* Numerical entity. */
+ copy++;
+
+ if (!(j + 1 < input_len)) {
+ /* Not enough bytes. */
+ goto HTML_ENT_OUT;
+ }
+ j++;
+
+ if ((input[j] == 'x') || (input[j] == 'X')) {
+ /* Hexadecimal entity. */
+ copy++;
+
+ if (!(j + 1 < input_len)) {
+ /* Not enough bytes. */
+ goto HTML_ENT_OUT;
+ }
+ j++; /* j is the position of the first digit now. */
+
+ k = j;
+ while ((j < input_len) && (isxdigit(input[j]))) j++;
+ if (j > k) { /* Do we have at least one digit? */
+ char *x;
+ /* Decode the entity. */
+ /* char *x = apr_pstrmemdup(mp,
+ * (const char *)&input[k], j - k); */
+ x = reinterpret_cast(malloc(sizeof(char) *
+ (j - k)));
+ memcpy(x, (const char *)&input[k], j - k);
+ *d++ = (unsigned char)strtol(x, NULL, 16);
+ count++;
+ free(x);
+ /* Skip over the semicolon if it's there. */
+ if ((j < input_len) && (input[j] == ';')) {
+ i = j + 1;
+ } else {
+ i = j;
+ }
+
+ continue;
+ } else {
+ goto HTML_ENT_OUT;
+ }
+ } else {
+ /* Decimal entity. */
+ k = j;
+ while ((j < input_len) && (isdigit(input[j]))) j++;
+ if (j > k) { /* Do we have at least one digit? */
+ /* Decode the entity. */
+ char *x = NULL;
+ /* char *x = apr_pstrmemdup(mp,
+ * (const char *)&input[k], j - k); */
+ x = reinterpret_cast(malloc(sizeof(char) *
+ (j - k)));
+ memcpy(x, (const char *)&input[k], j - k);
+ *d++ = (unsigned char)strtol(x, NULL, 10);
+ count++;
+ free(x);
+ /* Skip over the semicolon if it's there. */
+ if ((j < input_len) && (input[j] == ';')) {
+ i = j + 1;
+ } else {
+ i = j;
+ }
+
+ continue;
+ } else {
+ goto HTML_ENT_OUT;
+ }
+ }
+ } else {
+ /* Text entity. */
+
+ k = j;
+ while ((j < input_len) && (isalnum(input[j]))) j++;
+ if (j > k) { /* Do we have at least one digit? */
+ /* char *x = apr_pstrmemdup(mp,
+ * (const char *)&input[k], j - k); */
+ char *x = NULL;
+ x = reinterpret_cast(malloc(sizeof(char) *
+ (j - k)));
+ memcpy(x, (const char *)&input[k], j - k);
+
+ /* Decode the entity. */
+ /* ENH What about others? */
+ if (strcasecmp(x, "quot") == 0) {
+ *d++ = '"';
+ } else if (strcasecmp(x, "amp") == 0) {
+ *d++ = '&';
+ } else if (strcasecmp(x, "lt") == 0) {
+ *d++ = '<';
+ } else if (strcasecmp(x, "gt") == 0) {
+ *d++ = '>';
+ } else if (strcasecmp(x, "nbsp") == 0) {
+ *d++ = NBSP;
+ } else {
+ /* We do no want to convert this entity,
+ * copy the raw data over. */
+ copy = j - k + 1;
+ free(x);
+ goto HTML_ENT_OUT;
+ }
+
+ count++;
+
+ /* Skip over the semicolon if it's there. */
+ if ((j < input_len) && (input[j] == ';')) {
+ i = j + 1;
+ } else {
+ i = j;
+ }
+
+ free(x);
+ continue;
+ }
+ }
+ }
+HTML_ENT_OUT:
+ for (z = 0; ((z < copy) && (count < input_len)); z++) {
+ *d++ = input[i++];
+ count++;
+ }
+ }
+
+ *d = '\0';
+
+ return count;
+}
+
/**
* Converts a single hexadecimal digit into a decimal value.
*/
diff --git a/src/utils.h b/src/utils.h
index 3499c4bf..ce41ffaf 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -37,6 +37,7 @@ namespace ModSecurity {
static unsigned char x2c(unsigned char *what);
int css_decode_inplace(unsigned char *input, int64_t input_len);
static unsigned char xsingle2c(unsigned char *what);
+ int html_entities_decode_inplace(unsigned char *input, int input_len);
} // namespace ModSecurity
#define SRC_UTILS_H_