Perform Utf8ToUnicode transformation in-place

- Removed inplace helper function from the class, as it's only
  referenced by the implementation.
This commit is contained in:
Eduardo Arias
2024-08-19 10:15:12 -07:00
parent 17a2cbd164
commit 2c3c228725
2 changed files with 30 additions and 66 deletions

View File

@@ -20,67 +20,33 @@
#include "src/utils/string.h" #include "src/utils/string.h"
constexpr int UNICODE_ERROR_CHARACTERS_MISSING = -1;
constexpr int UNICODE_ERROR_INVALID_ENCODING = -2;
namespace modsecurity::actions::transformations { namespace modsecurity::actions::transformations {
bool Utf8ToUnicode::transform(std::string &value, const Transaction *trans) const { static inline bool encode(std::string &value) {
std::string ret; auto input = reinterpret_cast<unsigned char*>(value.data());
unsigned char *input; const auto input_len = value.length();
int _changed = 0;
char *out;
input = reinterpret_cast<unsigned char *> bool changed = false;
(malloc(sizeof(char) * value.length()+1)); std::string::size_type count = 0;
auto bytes_left = input_len;
if (input == NULL) {
return "";
}
memcpy(input, value.c_str(), value.length()+1);
out = inplace(input, value.size() + 1, &_changed);
free(input);
if (out != NULL) {
ret.assign(reinterpret_cast<char *>(out),
strlen(reinterpret_cast<char *>(out)));
free(out);
}
const auto changed = ret != value;
value = ret;
return changed;
}
char *Utf8ToUnicode::inplace(unsigned char *input,
uint64_t input_len, int *changed) {
unsigned int count = 0;
char *data;
char *data_orig;
unsigned int i, len, j;
unsigned int bytes_left = input_len;
unsigned char unicode[8]; unsigned char unicode[8];
*changed = 0;
/* RFC3629 states that UTF-8 are encoded using sequences of 1 to 4 octets. */ /* RFC3629 states that UTF-8 are encoded using sequences of 1 to 4 octets. */
/* Max size per character should fit in 4 bytes */ /* Max size per character should fit in 4 bytes */
len = input_len * 4 + 1; const auto len = input_len * 4 + 1;
data = reinterpret_cast<char *>(malloc(sizeof(char) * len)); std::string ret(len, {});
if (data == NULL) { auto data = ret.data();
return NULL;
}
data_orig = data;
if (input == NULL) { for (std::string::size_type i = 0; i < bytes_left;) {
free(data);
return NULL;
}
for (i = 0; i < bytes_left;) {
int unicode_len = 0; int unicode_len = 0;
unsigned int d = 0; unsigned int d = 0;
unsigned char c; unsigned char c;
unsigned char *utf = (unsigned char *)&input[i]; auto utf = &input[i];
c = *utf; c = *utf;
@@ -108,7 +74,7 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
unicode_len = UNICODE_ERROR_INVALID_ENCODING; unicode_len = UNICODE_ERROR_INVALID_ENCODING;
} else { } else {
unicode_len = 2; unicode_len = 2;
count+=6; count += 6;
if (count <= len) { if (count <= len) {
int length = 0; int length = 0;
/* compute character number */ /* compute character number */
@@ -138,11 +104,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
break; break;
} }
for (j = 0; j < length; j++) { for (std::string::size_type j = 0; j < length; j++) {
*data++ = unicode[j]; *data++ = unicode[j];
} }
*changed = 1; changed = true;
} }
} }
} else if ((c & 0xF0) == 0xE0) { } else if ((c & 0xF0) == 0xE0) {
@@ -190,11 +156,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
break; break;
} }
for (j = 0; j < length; j++) { for (std::string::size_type j = 0; j < length; j++) {
*data++ = unicode[j]; *data++ = unicode[j];
} }
*changed = 1; changed = true;
} }
} }
} else if ((c & 0xF8) == 0xF0) { } else if ((c & 0xF8) == 0xF0) {
@@ -252,11 +218,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
break; break;
} }
for (j = 0; j < length; j++) { for (std::string::size_type j = 0; j < length; j++) {
*data++ = unicode[j]; *data++ = unicode[j];
} }
*changed = 1; changed = true;
} }
} }
} else { } else {
@@ -300,7 +266,14 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
*data ='\0'; *data ='\0';
return data_orig; ret.resize(data - ret.c_str());
std::swap(value, ret);
return changed;
}
bool Utf8ToUnicode::transform(std::string &value, const Transaction *trans) const {
return encode(value);
} }

View File

@@ -18,12 +18,6 @@
#include "transformation.h" #include "transformation.h"
#define UNICODE_ERROR_CHARACTERS_MISSING -1
#define UNICODE_ERROR_INVALID_ENCODING -2
#define UNICODE_ERROR_OVERLONG_CHARACTER -3
#define UNICODE_ERROR_RESTRICTED_CHARACTER -4
#define UNICODE_ERROR_DECODING_ERROR -5
namespace modsecurity::actions::transformations { namespace modsecurity::actions::transformations {
class Utf8ToUnicode : public Transformation { class Utf8ToUnicode : public Transformation {
@@ -32,9 +26,6 @@ class Utf8ToUnicode : public Transformation {
: Transformation(action) { } : Transformation(action) { }
bool transform(std::string &value, const Transaction *trans) const override; bool transform(std::string &value, const Transaction *trans) const override;
static char *inplace(unsigned char *input, uint64_t input_len,
int *changed);
}; };
} // namespace modsecurity::actions::transformations } // namespace modsecurity::actions::transformations