mirror of
https://github.com/owasp-modsecurity/ModSecurity.git
synced 2025-08-14 05:45:59 +03:00
174 lines
5.7 KiB
C++
174 lines
5.7 KiB
C++
/*
|
|
* ModSecurity, http://www.modsecurity.org/
|
|
* Copyright (c) 2015 - 2021 Trustwave Holdings, Inc. (http://www.trustwave.com/)
|
|
*
|
|
* You may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* If any of the files related to licensing are missing or if you have any
|
|
* other questions related to licensing please contact Trustwave Holdings, Inc.
|
|
* directly using the email address security@modsecurity.org.
|
|
*
|
|
*/
|
|
|
|
#include "html_entity_decode.h"
|
|
|
|
#include <cstring>
|
|
|
|
#include "src/utils/string.h"
|
|
|
|
#ifdef WIN32
|
|
#include "src/compat/msvc.h"
|
|
#endif
|
|
|
|
using namespace modsecurity::utils::string;
|
|
|
|
namespace modsecurity::actions::transformations {
|
|
|
|
|
|
static inline bool inplace(std::string &value) {
|
|
const auto input_len = value.length();
|
|
auto d = reinterpret_cast<unsigned char*>(value.data());
|
|
const unsigned char *input = d;
|
|
const unsigned char *end = input + input_len;
|
|
|
|
std::string::size_type i = 0;
|
|
while (i < input_len) {
|
|
std::string::size_type copy = 1;
|
|
|
|
/* Require an ampersand and at least one character to
|
|
* start looking into the entity.
|
|
*/
|
|
if ((input[i] == '&') && (i + 1 < input_len)) {
|
|
auto j = i + 1;
|
|
|
|
if (input[j] == '#') {
|
|
/* Numerical entity. */
|
|
copy++;
|
|
|
|
if (!(j + 1 < input_len)) {
|
|
goto HTML_ENT_OUT; /* Not enough bytes. */
|
|
}
|
|
j++;
|
|
|
|
if ((input[j] == 'x') || (input[j] == 'X')) {
|
|
/* Hexadecimal entity. */
|
|
copy++;
|
|
|
|
if (!(j + 1 < input_len)) {
|
|
goto HTML_ENT_OUT; /* Not enough bytes. */
|
|
}
|
|
j++; /* j is the position of the first digit now. */
|
|
|
|
constexpr int MAX_HEX_DIGITS = 2; // supports only bytes (max value 0xff)
|
|
auto k = j;
|
|
while ((j - k < MAX_HEX_DIGITS) && (j < input_len) && (isxdigit(input[j]))) {
|
|
j++;
|
|
}
|
|
if (j > k) { /* Do we have at least one digit? */
|
|
/* Decode the entity. */
|
|
char x[MAX_HEX_DIGITS + 1];
|
|
memcpy(x, (const char *)&input[k], j - k);
|
|
x[j - k] = '\0';
|
|
|
|
*d++ = (unsigned char)strtol(x, nullptr, 16);
|
|
|
|
/* Skip over the semicolon if it's there. */
|
|
if ((j < input_len) && (input[j] == ';')) {
|
|
i = j + 1;
|
|
} else {
|
|
i = j;
|
|
}
|
|
continue;
|
|
} else {
|
|
goto HTML_ENT_OUT;
|
|
}
|
|
} else {
|
|
/* Decimal entity. */
|
|
constexpr int MAX_DEC_DIGITS = 3; // supports only bytes (max value 255)
|
|
auto k = j;
|
|
while ((j - k < MAX_DEC_DIGITS) && (j < input_len) && (isdigit(input[j]))) {
|
|
j++;
|
|
}
|
|
if (j > k) { /* Do we have at least one digit? */
|
|
/* Decode the entity. */
|
|
char x[MAX_DEC_DIGITS + 1];
|
|
memcpy(x, (const char *)&input[k], j - k);
|
|
x[j - k] = '\0';
|
|
|
|
*d++ = (unsigned char)strtol(x, nullptr, 10);
|
|
|
|
/* Skip over the semicolon if it's there. */
|
|
if ((j < input_len) && (input[j] == ';')) {
|
|
i = j + 1;
|
|
} else {
|
|
i = j;
|
|
}
|
|
continue;
|
|
} else {
|
|
goto HTML_ENT_OUT;
|
|
}
|
|
}
|
|
} else {
|
|
/* Text entity. */
|
|
auto k = j;
|
|
while ((j < input_len) && (isalnum(input[j]))) {
|
|
j++;
|
|
}
|
|
if (j > k) { /* Do we have at least one digit? */
|
|
const auto x = reinterpret_cast<const char*>(&input[k]);
|
|
|
|
/* Decode the entity. */
|
|
/* ENH What about others? */
|
|
if (strncasecmp(x, "quot", 4) == 0) {
|
|
*d++ = '"';
|
|
} else if (strncasecmp(x, "amp", 3) == 0) {
|
|
*d++ = '&';
|
|
} else if (strncasecmp(x, "lt", 2) == 0) {
|
|
*d++ = '<';
|
|
} else if (strncasecmp(x, "gt", 2) == 0) {
|
|
*d++ = '>';
|
|
} else if (strncasecmp(x, "nbsp", 4) == 0) {
|
|
*d++ = NBSP;
|
|
} else {
|
|
/* We do no want to convert this entity,
|
|
* copy the raw data over. */
|
|
copy = j - k + 1;
|
|
goto HTML_ENT_OUT;
|
|
}
|
|
|
|
/* Skip over the semicolon if it's there. */
|
|
if ((j < input_len) && (input[j] == ';')) {
|
|
i = j + 1;
|
|
} else {
|
|
i = j;
|
|
}
|
|
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
HTML_ENT_OUT:
|
|
|
|
for (auto z = 0; z < copy; z++) {
|
|
*d++ = input[i++];
|
|
}
|
|
}
|
|
|
|
*d = '\0';
|
|
|
|
value.resize(d - input);
|
|
return d != end;
|
|
}
|
|
|
|
|
|
bool HtmlEntityDecode::transform(std::string &value, const Transaction *trans) const {
|
|
return inplace(value);
|
|
}
|
|
|
|
|
|
} // namespace modsecurity::actions::transformations
|