Perform Utf8ToUnicode transformation in-place

- Removed inplace helper function from the class, as it's only
  referenced by the implementation.
This commit is contained in:
Eduardo Arias 2024-08-19 10:15:12 -07:00
parent 17a2cbd164
commit 2c3c228725
2 changed files with 30 additions and 66 deletions

View File

@ -20,67 +20,33 @@
#include "src/utils/string.h"
constexpr int UNICODE_ERROR_CHARACTERS_MISSING = -1;
constexpr int UNICODE_ERROR_INVALID_ENCODING = -2;
namespace modsecurity::actions::transformations {
bool Utf8ToUnicode::transform(std::string &value, const Transaction *trans) const {
std::string ret;
unsigned char *input;
int _changed = 0;
char *out;
static inline bool encode(std::string &value) {
auto input = reinterpret_cast<unsigned char*>(value.data());
const auto input_len = value.length();
input = reinterpret_cast<unsigned char *>
(malloc(sizeof(char) * value.length()+1));
if (input == NULL) {
return "";
}
memcpy(input, value.c_str(), value.length()+1);
out = inplace(input, value.size() + 1, &_changed);
free(input);
if (out != NULL) {
ret.assign(reinterpret_cast<char *>(out),
strlen(reinterpret_cast<char *>(out)));
free(out);
}
const auto changed = ret != value;
value = ret;
return changed;
}
char *Utf8ToUnicode::inplace(unsigned char *input,
uint64_t input_len, int *changed) {
unsigned int count = 0;
char *data;
char *data_orig;
unsigned int i, len, j;
unsigned int bytes_left = input_len;
bool changed = false;
std::string::size_type count = 0;
auto bytes_left = input_len;
unsigned char unicode[8];
*changed = 0;
/* RFC3629 states that UTF-8 are encoded using sequences of 1 to 4 octets. */
/* Max size per character should fit in 4 bytes */
len = input_len * 4 + 1;
data = reinterpret_cast<char *>(malloc(sizeof(char) * len));
if (data == NULL) {
return NULL;
}
data_orig = data;
const auto len = input_len * 4 + 1;
std::string ret(len, {});
auto data = ret.data();
if (input == NULL) {
free(data);
return NULL;
}
for (i = 0; i < bytes_left;) {
for (std::string::size_type i = 0; i < bytes_left;) {
int unicode_len = 0;
unsigned int d = 0;
unsigned char c;
unsigned char *utf = (unsigned char *)&input[i];
auto utf = &input[i];
c = *utf;
@ -108,7 +74,7 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
unicode_len = UNICODE_ERROR_INVALID_ENCODING;
} else {
unicode_len = 2;
count+=6;
count += 6;
if (count <= len) {
int length = 0;
/* compute character number */
@ -138,11 +104,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
break;
}
for (j = 0; j < length; j++) {
for (std::string::size_type j = 0; j < length; j++) {
*data++ = unicode[j];
}
*changed = 1;
changed = true;
}
}
} else if ((c & 0xF0) == 0xE0) {
@ -190,11 +156,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
break;
}
for (j = 0; j < length; j++) {
for (std::string::size_type j = 0; j < length; j++) {
*data++ = unicode[j];
}
*changed = 1;
changed = true;
}
}
} else if ((c & 0xF8) == 0xF0) {
@ -252,11 +218,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
break;
}
for (j = 0; j < length; j++) {
for (std::string::size_type j = 0; j < length; j++) {
*data++ = unicode[j];
}
*changed = 1;
changed = true;
}
}
} else {
@ -300,7 +266,14 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
*data ='\0';
return data_orig;
ret.resize(data - ret.c_str());
std::swap(value, ret);
return changed;
}
bool Utf8ToUnicode::transform(std::string &value, const Transaction *trans) const {
return encode(value);
}

View File

@ -18,12 +18,6 @@
#include "transformation.h"
#define UNICODE_ERROR_CHARACTERS_MISSING -1
#define UNICODE_ERROR_INVALID_ENCODING -2
#define UNICODE_ERROR_OVERLONG_CHARACTER -3
#define UNICODE_ERROR_RESTRICTED_CHARACTER -4
#define UNICODE_ERROR_DECODING_ERROR -5
namespace modsecurity::actions::transformations {
class Utf8ToUnicode : public Transformation {
@ -32,9 +26,6 @@ class Utf8ToUnicode : public Transformation {
: Transformation(action) { }
bool transform(std::string &value, const Transaction *trans) const override;
static char *inplace(unsigned char *input, uint64_t input_len,
int *changed);
};
} // namespace modsecurity::actions::transformations