From 2c3c2287259b68206141efb6bc270ddd83727654 Mon Sep 17 00:00:00 2001 From: Eduardo Arias Date: Mon, 19 Aug 2024 10:15:12 -0700 Subject: [PATCH] Perform Utf8ToUnicode transformation in-place - Removed inplace helper function from the class, as it's only referenced by the implementation. --- .../transformations/utf8_to_unicode.cc | 87 +++++++------------ src/actions/transformations/utf8_to_unicode.h | 9 -- 2 files changed, 30 insertions(+), 66 deletions(-) diff --git a/src/actions/transformations/utf8_to_unicode.cc b/src/actions/transformations/utf8_to_unicode.cc index 2f40ce60..4b01583e 100644 --- a/src/actions/transformations/utf8_to_unicode.cc +++ b/src/actions/transformations/utf8_to_unicode.cc @@ -20,67 +20,33 @@ #include "src/utils/string.h" +constexpr int UNICODE_ERROR_CHARACTERS_MISSING = -1; +constexpr int UNICODE_ERROR_INVALID_ENCODING = -2; + + namespace modsecurity::actions::transformations { -bool Utf8ToUnicode::transform(std::string &value, const Transaction *trans) const { - std::string ret; - unsigned char *input; - int _changed = 0; - char *out; +static inline bool encode(std::string &value) { + auto input = reinterpret_cast(value.data()); + const auto input_len = value.length(); - input = reinterpret_cast - (malloc(sizeof(char) * value.length()+1)); - - if (input == NULL) { - return ""; - } - - memcpy(input, value.c_str(), value.length()+1); - - out = inplace(input, value.size() + 1, &_changed); - free(input); - if (out != NULL) { - ret.assign(reinterpret_cast(out), - strlen(reinterpret_cast(out))); - free(out); - } - - const auto changed = ret != value; - value = ret; - return changed; -} - - -char *Utf8ToUnicode::inplace(unsigned char *input, - uint64_t input_len, int *changed) { - unsigned int count = 0; - char *data; - char *data_orig; - unsigned int i, len, j; - unsigned int bytes_left = input_len; + bool changed = false; + std::string::size_type count = 0; + auto bytes_left = input_len; unsigned char unicode[8]; - *changed = 0; /* RFC3629 states that UTF-8 are encoded using sequences of 1 to 4 octets. */ /* Max size per character should fit in 4 bytes */ - len = input_len * 4 + 1; - data = reinterpret_cast(malloc(sizeof(char) * len)); - if (data == NULL) { - return NULL; - } - data_orig = data; + const auto len = input_len * 4 + 1; + std::string ret(len, {}); + auto data = ret.data(); - if (input == NULL) { - free(data); - return NULL; - } - - for (i = 0; i < bytes_left;) { + for (std::string::size_type i = 0; i < bytes_left;) { int unicode_len = 0; unsigned int d = 0; unsigned char c; - unsigned char *utf = (unsigned char *)&input[i]; + auto utf = &input[i]; c = *utf; @@ -108,7 +74,7 @@ char *Utf8ToUnicode::inplace(unsigned char *input, unicode_len = UNICODE_ERROR_INVALID_ENCODING; } else { unicode_len = 2; - count+=6; + count += 6; if (count <= len) { int length = 0; /* compute character number */ @@ -138,11 +104,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input, break; } - for (j = 0; j < length; j++) { + for (std::string::size_type j = 0; j < length; j++) { *data++ = unicode[j]; } - *changed = 1; + changed = true; } } } else if ((c & 0xF0) == 0xE0) { @@ -190,11 +156,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input, break; } - for (j = 0; j < length; j++) { + for (std::string::size_type j = 0; j < length; j++) { *data++ = unicode[j]; } - *changed = 1; + changed = true; } } } else if ((c & 0xF8) == 0xF0) { @@ -252,11 +218,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input, break; } - for (j = 0; j < length; j++) { + for (std::string::size_type j = 0; j < length; j++) { *data++ = unicode[j]; } - *changed = 1; + changed = true; } } } else { @@ -300,7 +266,14 @@ char *Utf8ToUnicode::inplace(unsigned char *input, *data ='\0'; - return data_orig; + ret.resize(data - ret.c_str()); + std::swap(value, ret); + return changed; +} + + +bool Utf8ToUnicode::transform(std::string &value, const Transaction *trans) const { + return encode(value); } diff --git a/src/actions/transformations/utf8_to_unicode.h b/src/actions/transformations/utf8_to_unicode.h index 83aba819..bbb6cdc4 100644 --- a/src/actions/transformations/utf8_to_unicode.h +++ b/src/actions/transformations/utf8_to_unicode.h @@ -18,12 +18,6 @@ #include "transformation.h" -#define UNICODE_ERROR_CHARACTERS_MISSING -1 -#define UNICODE_ERROR_INVALID_ENCODING -2 -#define UNICODE_ERROR_OVERLONG_CHARACTER -3 -#define UNICODE_ERROR_RESTRICTED_CHARACTER -4 -#define UNICODE_ERROR_DECODING_ERROR -5 - namespace modsecurity::actions::transformations { class Utf8ToUnicode : public Transformation { @@ -32,9 +26,6 @@ class Utf8ToUnicode : public Transformation { : Transformation(action) { } bool transform(std::string &value, const Transaction *trans) const override; - - static char *inplace(unsigned char *input, uint64_t input_len, - int *changed); }; } // namespace modsecurity::actions::transformations