Perform HtmlEntityDecode transformation in-place

- Removed inplace helper function from the class, as it's only
  referenced by the implementation.
This commit is contained in:
Eduardo Arias 2024-08-19 09:04:42 -07:00
parent 7d5c9faa43
commit 8bf4d96e6b
2 changed files with 38 additions and 70 deletions

View File

@ -27,47 +27,21 @@
namespace modsecurity::actions::transformations { namespace modsecurity::actions::transformations {
bool HtmlEntityDecode::transform(std::string &value, const Transaction *trans) const { static inline bool inplace(std::string &value) {
std::string ret; const auto input_len = value.length();
unsigned char *input; auto d = reinterpret_cast<unsigned char*>(value.data());
const unsigned char *input = d;
const unsigned char *end = input + input_len;
input = reinterpret_cast<unsigned char *> std::string::size_type i = 0;
(malloc(sizeof(char) * value.length()+1)); while (i < input_len) {
std::string::size_type copy = 1;
if (input == NULL) {
return "";
}
memcpy(input, value.c_str(), value.length()+1);
size_t i = inplace(input, value.length());
ret.assign(reinterpret_cast<char *>(input), i);
free(input);
const auto changed = ret != value;
value = ret;
return changed;
}
int HtmlEntityDecode::inplace(unsigned char *input, uint64_t input_len) {
unsigned char *d = input;
int i, count;
if ((input == NULL) || (input_len == 0)) {
return 0;
}
i = count = 0;
while ((i < input_len) && (count < input_len)) {
int z, copy = 1;
/* Require an ampersand and at least one character to /* Require an ampersand and at least one character to
* start looking into the entity. * start looking into the entity.
*/ */
if ((input[i] == '&') && (i + 1 < input_len)) { if ((input[i] == '&') && (i + 1 < input_len)) {
int k, j = i + 1; auto j = i + 1;
if (input[j] == '#') { if (input[j] == '#') {
/* Numerical entity. */ /* Numerical entity. */
@ -87,19 +61,18 @@ int HtmlEntityDecode::inplace(unsigned char *input, uint64_t input_len) {
} }
j++; /* j is the position of the first digit now. */ j++; /* j is the position of the first digit now. */
k = j; constexpr int MAX_HEX_DIGITS = 2; // supports only bytes (max value 0xff)
while ((j < input_len) && (isxdigit(input[j]))) { auto k = j;
while ((j - k < MAX_HEX_DIGITS) && (j < input_len) && (isxdigit(input[j]))) {
j++; j++;
} }
if (j > k) { /* Do we have at least one digit? */ if (j > k) { /* Do we have at least one digit? */
/* Decode the entity. */ /* Decode the entity. */
char *x; char x[MAX_HEX_DIGITS + 1];
x = reinterpret_cast<char *>(calloc(sizeof(char),
((j - k) + 1)));
memcpy(x, (const char *)&input[k], j - k); memcpy(x, (const char *)&input[k], j - k);
*d++ = (unsigned char)strtol(x, NULL, 16); x[j - k] = '\0';
free(x);
count++; *d++ = (unsigned char)strtol(x, nullptr, 16);
/* Skip over the semicolon if it's there. */ /* Skip over the semicolon if it's there. */
if ((j < input_len) && (input[j] == ';')) { if ((j < input_len) && (input[j] == ';')) {
@ -113,19 +86,18 @@ int HtmlEntityDecode::inplace(unsigned char *input, uint64_t input_len) {
} }
} else { } else {
/* Decimal entity. */ /* Decimal entity. */
k = j; constexpr int MAX_DEC_DIGITS = 3; // supports only bytes (max value 255)
while ((j < input_len) && (isdigit(input[j]))) { auto k = j;
while ((j - k < MAX_DEC_DIGITS) && (j < input_len) && (isdigit(input[j]))) {
j++; j++;
} }
if (j > k) { /* Do we have at least one digit? */ if (j > k) { /* Do we have at least one digit? */
/* Decode the entity. */ /* Decode the entity. */
char *x; char x[MAX_DEC_DIGITS + 1];
x = reinterpret_cast<char *>(calloc(sizeof(char),
((j - k) + 1)));
memcpy(x, (const char *)&input[k], j - k); memcpy(x, (const char *)&input[k], j - k);
*d++ = (unsigned char)strtol(x, NULL, 10); x[j - k] = '\0';
free(x);
count++; *d++ = (unsigned char)strtol(x, nullptr, 10);
/* Skip over the semicolon if it's there. */ /* Skip over the semicolon if it's there. */
if ((j < input_len) && (input[j] == ';')) { if ((j < input_len) && (input[j] == ';')) {
@ -140,38 +112,31 @@ int HtmlEntityDecode::inplace(unsigned char *input, uint64_t input_len) {
} }
} else { } else {
/* Text entity. */ /* Text entity. */
k = j; auto k = j;
while ((j < input_len) && (isalnum(input[j]))) { while ((j < input_len) && (isalnum(input[j]))) {
j++; j++;
} }
if (j > k) { /* Do we have at least one digit? */ if (j > k) { /* Do we have at least one digit? */
char *x; const auto x = reinterpret_cast<const char*>(&input[k]);
x = reinterpret_cast<char *>(calloc(sizeof(char),
((j - k) + 1)));
memcpy(x, (const char *)&input[k], j - k);
/* Decode the entity. */ /* Decode the entity. */
/* ENH What about others? */ /* ENH What about others? */
if (strcasecmp(x, "quot") == 0) { if (strncasecmp(x, "quot", 4) == 0) {
*d++ = '"'; *d++ = '"';
} else if (strcasecmp(x, "amp") == 0) { } else if (strncasecmp(x, "amp", 3) == 0) {
*d++ = '&'; *d++ = '&';
} else if (strcasecmp(x, "lt") == 0) { } else if (strncasecmp(x, "lt", 2) == 0) {
*d++ = '<'; *d++ = '<';
} else if (strcasecmp(x, "gt") == 0) { } else if (strncasecmp(x, "gt", 2) == 0) {
*d++ = '>'; *d++ = '>';
} else if (strcasecmp(x, "nbsp") == 0) { } else if (strncasecmp(x, "nbsp", 4) == 0) {
*d++ = NBSP; *d++ = NBSP;
} else { } else {
/* We do no want to convert this entity, /* We do no want to convert this entity,
* copy the raw data over. */ * copy the raw data over. */
copy = j - k + 1; copy = j - k + 1;
free(x);
goto HTML_ENT_OUT; goto HTML_ENT_OUT;
} }
free(x);
count++;
/* Skip over the semicolon if it's there. */ /* Skip over the semicolon if it's there. */
if ((j < input_len) && (input[j] == ';')) { if ((j < input_len) && (input[j] == ';')) {
@ -187,15 +152,20 @@ int HtmlEntityDecode::inplace(unsigned char *input, uint64_t input_len) {
HTML_ENT_OUT: HTML_ENT_OUT:
for (z = 0; ((z < copy) && (count < input_len)); z++) { for (auto z = 0; z < copy; z++) {
*d++ = input[i++]; *d++ = input[i++];
count++;
} }
} }
*d = '\0'; *d = '\0';
return count; value.resize(d - input);
return d != end;
}
bool HtmlEntityDecode::transform(std::string &value, const Transaction *trans) const {
return inplace(value);
} }

View File

@ -26,8 +26,6 @@ class HtmlEntityDecode : public Transformation {
: Transformation(action) { } : Transformation(action) { }
bool transform(std::string &value, const Transaction *trans) const override; bool transform(std::string &value, const Transaction *trans) const override;
static int inplace(unsigned char *input, uint64_t input_len);
}; };
} // namespace modsecurity::actions::transformations } // namespace modsecurity::actions::transformations