Remove unnecessary heap-allocation & copy in Transaction::extractArguments

- utils::urldecode_nonstrict_inplace decodes inplace so key & value, which are values returned by utils::string::ssplit_pair can be just be modified and do not need to be copied. - Updated signature of utils::urldecode_nonstrict_inplace, as its two callers already have std::string values.
2026-01-16 08:27:10 +03:00 · 2024-06-01 21:59:16 +00:00
parent 021d0caa33
commit b647dbd905
4 changed files with 40 additions and 80 deletions
--- a/src/actions/transformations/url_decode.cc
+++ b/src/actions/transformations/url_decode.cc
@@ -27,16 +27,8 @@ UrlDecode::UrlDecode(const std::string &action)
 }

 bool UrlDecode::transform(std::string &value, const Transaction *trans) const {
-    int invalid_count = 0;
-    int changed;
-    const auto new_len = utils::urldecode_nonstrict_inplace(
-        (unsigned char*)value.data(),
-        value.length(),
-        &invalid_count,
-        &changed);
-
-    value.resize(new_len);
-    return changed != 0;
+    int invalid_count;
+    return utils::urldecode_nonstrict_inplace(value, invalid_count);
 }


--- a/src/transaction.cc
+++ b/src/transaction.cc
@@ -348,46 +348,22 @@ bool Transaction::extractArguments(const std::string &orig,
    if (m_rules->m_secArgumentSeparator.m_set) {
        sep1 = m_rules->m_secArgumentSeparator.m_value.at(0);
    }
-    std::vector<std::string> key_value_sets = utils::string::ssplit(buf, sep1);
+    const auto key_value_sets = utils::string::ssplit(buf, sep1);

-    for (std::string t : key_value_sets) {
-        char sep2 = '=';
-        size_t key_s = 0;
-        size_t value_s = 0;
-        int invalid = 0;
-        int changed = 0;
+    for (const auto &t : key_value_sets) {
+        const auto sep2 = '=';
+        auto [key, value] = utils::string::ssplit_pair(t, sep2);

-        std::string key;
-        std::string value;
-        std::pair<std::string, std::string> key_value_pair = utils::string::ssplit_pair(t, sep2);
-        key = key_value_pair.first;
-        value = key_value_pair.second;
+        int invalid_count;
+        utils::urldecode_nonstrict_inplace(key, invalid_count);
+        utils::urldecode_nonstrict_inplace(value, invalid_count);

-        key_s = (key.length() + 1);
-        value_s = (value.length() + 1);
-        unsigned char *key_c = reinterpret_cast<unsigned char *>(
-            calloc(sizeof(char), key_s));
-        unsigned char *value_c = reinterpret_cast<unsigned char *>(
-            calloc(sizeof(char), value_s));
-
-        memcpy(key_c, key.c_str(), key_s);
-        memcpy(value_c, value.c_str(), value_s);
-
-        key_s = utils::urldecode_nonstrict_inplace(key_c, key_s,
-            &invalid, &changed);
-        value_s = utils::urldecode_nonstrict_inplace(value_c, value_s,
-            &invalid, &changed);
-
-        if (invalid) {
+        if (invalid_count > 0) {
            m_variableUrlEncodedError.set("1", m_variableOffset);
        }

-        addArgument(orig, std::string(reinterpret_cast<char *>(key_c), key_s-1),
-            std::string(reinterpret_cast<char *>(value_c), value_s-1), offset);
+        addArgument(orig, key, value, offset);
        offset = offset + t.size() + 1;
-
-        free(key_c);
-        free(value_c);
    }

    return true;
--- a/src/utils/decode.cc
+++ b/src/utils/decode.cc
@@ -22,63 +22,55 @@ namespace modsecurity {
 namespace utils {


-int urldecode_nonstrict_inplace(unsigned char *input,
-    uint64_t input_len, int *invalid_count, int *changed) {
-    unsigned char *d = (unsigned char *)input;
-    uint64_t i, count;
+bool urldecode_nonstrict_inplace(std::string &val,
+    int &invalid_count) {
+    unsigned char *d = (unsigned char *)val.data();
+    unsigned char *s = d;
+    const unsigned char *e = s + val.size();

-    *changed = 0;
+    invalid_count = 0;
+    bool changed = false;

-    if (input == NULL) {
-        return -1;
-    }
-
-    i = count = 0;
-    while (i < input_len) {
-        if (input[i] == '%') {
+    while (s != e) {
+        if (*s == '%') {
            /* Character is a percent sign. */

            /* Are there enough bytes available? */
-            if (i + 2 < input_len) {
-                char c1 = input[i + 1];
-                char c2 = input[i + 2];
+            if (s + 2 < e) {
+                const auto c1 = *(s + 1);
+                const auto c2 = *(s + 2);
                if (VALID_HEX(c1) && VALID_HEX(c2)) {
-                    uint64_t uni = string::x2c(&input[i + 1]);
+                    const auto uni = string::x2c(s + 1);

-                    *d++ = (wchar_t)uni;
-                    count++;
-                    i += 3;
-                    *changed = 1;
+                    *d++ = uni;
+                    s += 3;
+                    changed = true;
                } else {
                    /* Not a valid encoding, skip this % */
-                    *d++ = input[i++];
-                    count++;
-                    (*invalid_count)++;
+                    *d++ = *s++;
+                    invalid_count++;
                }
            } else {
                /* Not enough bytes available, copy the raw bytes. */
-                *d++ = input[i++];
-                count++;
-                (*invalid_count)++;
+                *d++ = *s++;
+                invalid_count++;
            }
        } else {
            /* Character is not a percent sign. */
-            if (input[i] == '+') {
+            if (*s == '+') {
                *d++ = ' ';
-                *changed = 1;
+                changed = true;
            } else {
-                *d++ = input[i];
+                *d++ = *s;
            }
-            count++;
-            i++;
+            s++;
        }
    }

-#if 0
-    *d = '\0';
-#endif
+    if (changed)
+        val.resize((char*) d - val.c_str());

-    return count;
+    return changed;
 }


--- a/src/utils/decode.h
+++ b/src/utils/decode.h
@@ -29,8 +29,8 @@ namespace modsecurity {
 namespace utils {


-int urldecode_nonstrict_inplace(unsigned char *input,
-    uint64_t input_len, int *invalid_count, int *changed);
+bool urldecode_nonstrict_inplace(std::string &val,
+    int &invalid_count);
 std::string uri_decode(const std::string & sSrc);