mirror of
https://github.com/owasp-modsecurity/ModSecurity.git
synced 2025-08-13 21:36:00 +03:00
Perform Utf8ToUnicode transformation in-place
- Removed inplace helper function from the class, as it's only referenced by the implementation.
This commit is contained in:
parent
17a2cbd164
commit
2c3c228725
@ -20,67 +20,33 @@
|
||||
#include "src/utils/string.h"
|
||||
|
||||
|
||||
constexpr int UNICODE_ERROR_CHARACTERS_MISSING = -1;
|
||||
constexpr int UNICODE_ERROR_INVALID_ENCODING = -2;
|
||||
|
||||
|
||||
namespace modsecurity::actions::transformations {
|
||||
|
||||
|
||||
bool Utf8ToUnicode::transform(std::string &value, const Transaction *trans) const {
|
||||
std::string ret;
|
||||
unsigned char *input;
|
||||
int _changed = 0;
|
||||
char *out;
|
||||
static inline bool encode(std::string &value) {
|
||||
auto input = reinterpret_cast<unsigned char*>(value.data());
|
||||
const auto input_len = value.length();
|
||||
|
||||
input = reinterpret_cast<unsigned char *>
|
||||
(malloc(sizeof(char) * value.length()+1));
|
||||
|
||||
if (input == NULL) {
|
||||
return "";
|
||||
}
|
||||
|
||||
memcpy(input, value.c_str(), value.length()+1);
|
||||
|
||||
out = inplace(input, value.size() + 1, &_changed);
|
||||
free(input);
|
||||
if (out != NULL) {
|
||||
ret.assign(reinterpret_cast<char *>(out),
|
||||
strlen(reinterpret_cast<char *>(out)));
|
||||
free(out);
|
||||
}
|
||||
|
||||
const auto changed = ret != value;
|
||||
value = ret;
|
||||
return changed;
|
||||
}
|
||||
|
||||
|
||||
char *Utf8ToUnicode::inplace(unsigned char *input,
|
||||
uint64_t input_len, int *changed) {
|
||||
unsigned int count = 0;
|
||||
char *data;
|
||||
char *data_orig;
|
||||
unsigned int i, len, j;
|
||||
unsigned int bytes_left = input_len;
|
||||
bool changed = false;
|
||||
std::string::size_type count = 0;
|
||||
auto bytes_left = input_len;
|
||||
unsigned char unicode[8];
|
||||
*changed = 0;
|
||||
|
||||
/* RFC3629 states that UTF-8 are encoded using sequences of 1 to 4 octets. */
|
||||
/* Max size per character should fit in 4 bytes */
|
||||
len = input_len * 4 + 1;
|
||||
data = reinterpret_cast<char *>(malloc(sizeof(char) * len));
|
||||
if (data == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
data_orig = data;
|
||||
const auto len = input_len * 4 + 1;
|
||||
std::string ret(len, {});
|
||||
auto data = ret.data();
|
||||
|
||||
if (input == NULL) {
|
||||
free(data);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < bytes_left;) {
|
||||
for (std::string::size_type i = 0; i < bytes_left;) {
|
||||
int unicode_len = 0;
|
||||
unsigned int d = 0;
|
||||
unsigned char c;
|
||||
unsigned char *utf = (unsigned char *)&input[i];
|
||||
auto utf = &input[i];
|
||||
|
||||
c = *utf;
|
||||
|
||||
@ -108,7 +74,7 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
|
||||
unicode_len = UNICODE_ERROR_INVALID_ENCODING;
|
||||
} else {
|
||||
unicode_len = 2;
|
||||
count+=6;
|
||||
count += 6;
|
||||
if (count <= len) {
|
||||
int length = 0;
|
||||
/* compute character number */
|
||||
@ -138,11 +104,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
|
||||
break;
|
||||
}
|
||||
|
||||
for (j = 0; j < length; j++) {
|
||||
for (std::string::size_type j = 0; j < length; j++) {
|
||||
*data++ = unicode[j];
|
||||
}
|
||||
|
||||
*changed = 1;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
} else if ((c & 0xF0) == 0xE0) {
|
||||
@ -190,11 +156,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
|
||||
break;
|
||||
}
|
||||
|
||||
for (j = 0; j < length; j++) {
|
||||
for (std::string::size_type j = 0; j < length; j++) {
|
||||
*data++ = unicode[j];
|
||||
}
|
||||
|
||||
*changed = 1;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
} else if ((c & 0xF8) == 0xF0) {
|
||||
@ -252,11 +218,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
|
||||
break;
|
||||
}
|
||||
|
||||
for (j = 0; j < length; j++) {
|
||||
for (std::string::size_type j = 0; j < length; j++) {
|
||||
*data++ = unicode[j];
|
||||
}
|
||||
|
||||
*changed = 1;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -300,7 +266,14 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
|
||||
|
||||
*data ='\0';
|
||||
|
||||
return data_orig;
|
||||
ret.resize(data - ret.c_str());
|
||||
std::swap(value, ret);
|
||||
return changed;
|
||||
}
|
||||
|
||||
|
||||
bool Utf8ToUnicode::transform(std::string &value, const Transaction *trans) const {
|
||||
return encode(value);
|
||||
}
|
||||
|
||||
|
||||
|
@ -18,12 +18,6 @@
|
||||
|
||||
#include "transformation.h"
|
||||
|
||||
#define UNICODE_ERROR_CHARACTERS_MISSING -1
|
||||
#define UNICODE_ERROR_INVALID_ENCODING -2
|
||||
#define UNICODE_ERROR_OVERLONG_CHARACTER -3
|
||||
#define UNICODE_ERROR_RESTRICTED_CHARACTER -4
|
||||
#define UNICODE_ERROR_DECODING_ERROR -5
|
||||
|
||||
namespace modsecurity::actions::transformations {
|
||||
|
||||
class Utf8ToUnicode : public Transformation {
|
||||
@ -32,9 +26,6 @@ class Utf8ToUnicode : public Transformation {
|
||||
: Transformation(action) { }
|
||||
|
||||
bool transform(std::string &value, const Transaction *trans) const override;
|
||||
|
||||
static char *inplace(unsigned char *input, uint64_t input_len,
|
||||
int *changed);
|
||||
};
|
||||
|
||||
} // namespace modsecurity::actions::transformations
|
||||
|
Loading…
x
Reference in New Issue
Block a user