mirror of
https://github.com/owasp-modsecurity/ModSecurity.git
synced 2025-09-30 03:34:29 +03:00
Perform Utf8ToUnicode transformation in-place
- Removed inplace helper function from the class, as it's only referenced by the implementation.
This commit is contained in:
@@ -20,67 +20,33 @@
|
|||||||
#include "src/utils/string.h"
|
#include "src/utils/string.h"
|
||||||
|
|
||||||
|
|
||||||
|
constexpr int UNICODE_ERROR_CHARACTERS_MISSING = -1;
|
||||||
|
constexpr int UNICODE_ERROR_INVALID_ENCODING = -2;
|
||||||
|
|
||||||
|
|
||||||
namespace modsecurity::actions::transformations {
|
namespace modsecurity::actions::transformations {
|
||||||
|
|
||||||
|
|
||||||
bool Utf8ToUnicode::transform(std::string &value, const Transaction *trans) const {
|
static inline bool encode(std::string &value) {
|
||||||
std::string ret;
|
auto input = reinterpret_cast<unsigned char*>(value.data());
|
||||||
unsigned char *input;
|
const auto input_len = value.length();
|
||||||
int _changed = 0;
|
|
||||||
char *out;
|
|
||||||
|
|
||||||
input = reinterpret_cast<unsigned char *>
|
bool changed = false;
|
||||||
(malloc(sizeof(char) * value.length()+1));
|
std::string::size_type count = 0;
|
||||||
|
auto bytes_left = input_len;
|
||||||
if (input == NULL) {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(input, value.c_str(), value.length()+1);
|
|
||||||
|
|
||||||
out = inplace(input, value.size() + 1, &_changed);
|
|
||||||
free(input);
|
|
||||||
if (out != NULL) {
|
|
||||||
ret.assign(reinterpret_cast<char *>(out),
|
|
||||||
strlen(reinterpret_cast<char *>(out)));
|
|
||||||
free(out);
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto changed = ret != value;
|
|
||||||
value = ret;
|
|
||||||
return changed;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
char *Utf8ToUnicode::inplace(unsigned char *input,
|
|
||||||
uint64_t input_len, int *changed) {
|
|
||||||
unsigned int count = 0;
|
|
||||||
char *data;
|
|
||||||
char *data_orig;
|
|
||||||
unsigned int i, len, j;
|
|
||||||
unsigned int bytes_left = input_len;
|
|
||||||
unsigned char unicode[8];
|
unsigned char unicode[8];
|
||||||
*changed = 0;
|
|
||||||
|
|
||||||
/* RFC3629 states that UTF-8 are encoded using sequences of 1 to 4 octets. */
|
/* RFC3629 states that UTF-8 are encoded using sequences of 1 to 4 octets. */
|
||||||
/* Max size per character should fit in 4 bytes */
|
/* Max size per character should fit in 4 bytes */
|
||||||
len = input_len * 4 + 1;
|
const auto len = input_len * 4 + 1;
|
||||||
data = reinterpret_cast<char *>(malloc(sizeof(char) * len));
|
std::string ret(len, {});
|
||||||
if (data == NULL) {
|
auto data = ret.data();
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
data_orig = data;
|
|
||||||
|
|
||||||
if (input == NULL) {
|
for (std::string::size_type i = 0; i < bytes_left;) {
|
||||||
free(data);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < bytes_left;) {
|
|
||||||
int unicode_len = 0;
|
int unicode_len = 0;
|
||||||
unsigned int d = 0;
|
unsigned int d = 0;
|
||||||
unsigned char c;
|
unsigned char c;
|
||||||
unsigned char *utf = (unsigned char *)&input[i];
|
auto utf = &input[i];
|
||||||
|
|
||||||
c = *utf;
|
c = *utf;
|
||||||
|
|
||||||
@@ -108,7 +74,7 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
|
|||||||
unicode_len = UNICODE_ERROR_INVALID_ENCODING;
|
unicode_len = UNICODE_ERROR_INVALID_ENCODING;
|
||||||
} else {
|
} else {
|
||||||
unicode_len = 2;
|
unicode_len = 2;
|
||||||
count+=6;
|
count += 6;
|
||||||
if (count <= len) {
|
if (count <= len) {
|
||||||
int length = 0;
|
int length = 0;
|
||||||
/* compute character number */
|
/* compute character number */
|
||||||
@@ -138,11 +104,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = 0; j < length; j++) {
|
for (std::string::size_type j = 0; j < length; j++) {
|
||||||
*data++ = unicode[j];
|
*data++ = unicode[j];
|
||||||
}
|
}
|
||||||
|
|
||||||
*changed = 1;
|
changed = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if ((c & 0xF0) == 0xE0) {
|
} else if ((c & 0xF0) == 0xE0) {
|
||||||
@@ -190,11 +156,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = 0; j < length; j++) {
|
for (std::string::size_type j = 0; j < length; j++) {
|
||||||
*data++ = unicode[j];
|
*data++ = unicode[j];
|
||||||
}
|
}
|
||||||
|
|
||||||
*changed = 1;
|
changed = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if ((c & 0xF8) == 0xF0) {
|
} else if ((c & 0xF8) == 0xF0) {
|
||||||
@@ -252,11 +218,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = 0; j < length; j++) {
|
for (std::string::size_type j = 0; j < length; j++) {
|
||||||
*data++ = unicode[j];
|
*data++ = unicode[j];
|
||||||
}
|
}
|
||||||
|
|
||||||
*changed = 1;
|
changed = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -300,7 +266,14 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
|
|||||||
|
|
||||||
*data ='\0';
|
*data ='\0';
|
||||||
|
|
||||||
return data_orig;
|
ret.resize(data - ret.c_str());
|
||||||
|
std::swap(value, ret);
|
||||||
|
return changed;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Utf8ToUnicode::transform(std::string &value, const Transaction *trans) const {
|
||||||
|
return encode(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@@ -18,12 +18,6 @@
|
|||||||
|
|
||||||
#include "transformation.h"
|
#include "transformation.h"
|
||||||
|
|
||||||
#define UNICODE_ERROR_CHARACTERS_MISSING -1
|
|
||||||
#define UNICODE_ERROR_INVALID_ENCODING -2
|
|
||||||
#define UNICODE_ERROR_OVERLONG_CHARACTER -3
|
|
||||||
#define UNICODE_ERROR_RESTRICTED_CHARACTER -4
|
|
||||||
#define UNICODE_ERROR_DECODING_ERROR -5
|
|
||||||
|
|
||||||
namespace modsecurity::actions::transformations {
|
namespace modsecurity::actions::transformations {
|
||||||
|
|
||||||
class Utf8ToUnicode : public Transformation {
|
class Utf8ToUnicode : public Transformation {
|
||||||
@@ -32,9 +26,6 @@ class Utf8ToUnicode : public Transformation {
|
|||||||
: Transformation(action) { }
|
: Transformation(action) { }
|
||||||
|
|
||||||
bool transform(std::string &value, const Transaction *trans) const override;
|
bool transform(std::string &value, const Transaction *trans) const override;
|
||||||
|
|
||||||
static char *inplace(unsigned char *input, uint64_t input_len,
|
|
||||||
int *changed);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace modsecurity::actions::transformations
|
} // namespace modsecurity::actions::transformations
|
||||||
|
Reference in New Issue
Block a user