mirror of
https://github.com/owasp-modsecurity/ModSecurity.git
synced 2025-08-13 21:36:00 +03:00
211 lines
6.5 KiB
C++
211 lines
6.5 KiB
C++
/*
|
|
* ModSecurity, http://www.modsecurity.org/
|
|
* Copyright (c) 2015 Trustwave Holdings, Inc. (http://www.trustwave.com/)
|
|
*
|
|
* You may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* If any of the files related to licensing are missing or if you have any
|
|
* other questions related to licensing please contact Trustwave Holdings, Inc.
|
|
* directly using the email address security@modsecurity.org.
|
|
*
|
|
*/
|
|
|
|
#include "actions/transformations/html_entity_decode.h"
|
|
|
|
#include <string.h>
|
|
|
|
#include <iostream>
|
|
#include <string>
|
|
#include <algorithm>
|
|
#include <functional>
|
|
#include <cctype>
|
|
#include <locale>
|
|
|
|
#include "modsecurity/transaction.h"
|
|
#include "actions/transformations/transformation.h"
|
|
#include "src/utils.h"
|
|
|
|
|
|
namespace modsecurity {
|
|
namespace actions {
|
|
namespace transformations {
|
|
|
|
|
|
std::string HtmlEntityDecode::evaluate(std::string value,
|
|
Transaction *transaction) {
|
|
std::string ret;
|
|
unsigned char *input = NULL;
|
|
|
|
input = reinterpret_cast<unsigned char *>
|
|
(malloc(sizeof(char) * value.length()+1));
|
|
|
|
if (input == NULL) {
|
|
return "";
|
|
}
|
|
|
|
memcpy(input, value.c_str(), value.length()+1);
|
|
|
|
size_t i = inplace(input, value.length());
|
|
|
|
ret.assign(reinterpret_cast<char *>(input), i);
|
|
free(input);
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
int HtmlEntityDecode::inplace(unsigned char *input, u_int64_t input_len) {
|
|
unsigned char *d = input;
|
|
int i, count;
|
|
|
|
if ((input == NULL) || (input_len <= 0)) {
|
|
return 0;
|
|
}
|
|
|
|
i = count = 0;
|
|
while ((i < input_len) && (count < input_len)) {
|
|
int z, copy = 1;
|
|
|
|
/* Require an ampersand and at least one character to
|
|
* start looking into the entity.
|
|
*/
|
|
if ((input[i] == '&') && (i + 1 < input_len)) {
|
|
int k, j = i + 1;
|
|
|
|
if (input[j] == '#') {
|
|
/* Numerical entity. */
|
|
copy++;
|
|
|
|
if (!(j + 1 < input_len)) {
|
|
goto HTML_ENT_OUT; /* Not enough bytes. */
|
|
}
|
|
j++;
|
|
|
|
if ((input[j] == 'x') || (input[j] == 'X')) {
|
|
/* Hexadecimal entity. */
|
|
copy++;
|
|
|
|
if (!(j + 1 < input_len)) {
|
|
goto HTML_ENT_OUT; /* Not enough bytes. */
|
|
}
|
|
j++; /* j is the position of the first digit now. */
|
|
|
|
k = j;
|
|
while ((j < input_len) && (isxdigit(input[j]))) {
|
|
j++;
|
|
}
|
|
if (j > k) { /* Do we have at least one digit? */
|
|
/* Decode the entity. */
|
|
char *x = NULL;
|
|
x = reinterpret_cast<char *>(malloc(sizeof(char) *
|
|
((j - k) + 1)));
|
|
memset(x, '\0', (j - k) + 1);
|
|
memcpy(x, (const char *)&input[k], j - k);
|
|
*d++ = (unsigned char)strtol(x, NULL, 16);
|
|
|
|
count++;
|
|
|
|
/* Skip over the semicolon if it's there. */
|
|
if ((j < input_len) && (input[j] == ';')) {
|
|
i = j + 1;
|
|
} else {
|
|
i = j;
|
|
}
|
|
continue;
|
|
} else {
|
|
goto HTML_ENT_OUT;
|
|
}
|
|
} else {
|
|
/* Decimal entity. */
|
|
k = j;
|
|
while ((j < input_len) && (isdigit(input[j]))) {
|
|
j++;
|
|
}
|
|
if (j > k) { /* Do we have at least one digit? */
|
|
/* Decode the entity. */
|
|
char *x = NULL;
|
|
x = reinterpret_cast<char *>(malloc(sizeof(char) *
|
|
((j - k) + 1)));
|
|
memset(x, '\0', (j - k) + 1);
|
|
memcpy(x, (const char *)&input[k], j - k);
|
|
*d++ = (unsigned char)strtol(x, NULL, 10);
|
|
|
|
count++;
|
|
|
|
/* Skip over the semicolon if it's there. */
|
|
if ((j < input_len) && (input[j] == ';')) {
|
|
i = j + 1;
|
|
} else {
|
|
i = j;
|
|
}
|
|
continue;
|
|
} else {
|
|
goto HTML_ENT_OUT;
|
|
}
|
|
}
|
|
} else {
|
|
/* Text entity. */
|
|
k = j;
|
|
while ((j < input_len) && (isalnum(input[j]))) {
|
|
j++;
|
|
}
|
|
if (j > k) { /* Do we have at least one digit? */
|
|
char *x = NULL;
|
|
x = reinterpret_cast<char *>(malloc(sizeof(char) *
|
|
((j - k) + 1)));
|
|
memset(x, '\0', (j - k) + 1);
|
|
memcpy(x, (const char *)&input[k], j - k);
|
|
|
|
/* Decode the entity. */
|
|
/* ENH What about others? */
|
|
if (strcasecmp(x, "quot") == 0) {
|
|
*d++ = '"';
|
|
} else if (strcasecmp(x, "amp") == 0) {
|
|
*d++ = '&';
|
|
} else if (strcasecmp(x, "lt") == 0) {
|
|
*d++ = '<';
|
|
} else if (strcasecmp(x, "gt") == 0) {
|
|
*d++ = '>';
|
|
} else if (strcasecmp(x, "nbsp") == 0) {
|
|
*d++ = NBSP;
|
|
} else {
|
|
/* We do no want to convert this entity,
|
|
* copy the raw data over. */
|
|
copy = j - k + 1;
|
|
goto HTML_ENT_OUT;
|
|
}
|
|
|
|
count++;
|
|
|
|
/* Skip over the semicolon if it's there. */
|
|
if ((j < input_len) && (input[j] == ';')) {
|
|
i = j + 1;
|
|
} else {
|
|
i = j;
|
|
}
|
|
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
HTML_ENT_OUT:
|
|
|
|
for (z = 0; ((z < copy) && (count < input_len)); z++) {
|
|
*d++ = input[i++];
|
|
count++;
|
|
}
|
|
}
|
|
|
|
*d = '\0';
|
|
|
|
return count;
|
|
}
|
|
|
|
} // namespace transformations
|
|
} // namespace actions
|
|
} // namespace modsecurity
|