Fixed minor behavior on the trasnformations and added sha1-mbedtls

2025-11-19 18:54:23 +03:00 · 2016-05-30 16:53:08 -03:00
parent f35d28b8d3
commit 967c8c90f2
17 changed files with 1077 additions and 775 deletions
--- a/src/actions/transformations/html_entity_decode.cc
+++ b/src/actions/transformations/html_entity_decode.cc
@@ -36,26 +36,175 @@ namespace transformations {

 std::string HtmlEntityDecode::evaluate(std::string value,
    Transaction *transaction) {
+    std::string ret;
+    unsigned char *input = NULL;

-    if (HtmlEntityDecodeInstantCache::getInstance().count(value) > 0) {
-        return HtmlEntityDecodeInstantCache::getInstance().at(value);
+    input = reinterpret_cast<unsigned char *>
+        (malloc(sizeof(char) * value.length()+1));
+
+    if (input == NULL) {
+        return "";
    }

-    char *tmp = strdup(value.c_str());
+    memcpy(input, value.c_str(), value.length()+1);

-    // FIXME: html_entities_decode_inplace is not working as expected
-    //        temporary disabled to perform the audit_log tests.
-    //  html_entities_decode_inplace((unsigned char *)tmp, value.size());
-    std::string ret("");
-    ret.assign(tmp);
-    free(tmp);
+    size_t i = inplace(input, value.length());

-    HtmlEntityDecodeInstantCache::getInstance().cache(value, ret);
+    ret.assign(reinterpret_cast<char *>(input), i);
+    free(input);

    return ret;
 }


+int HtmlEntityDecode::inplace(unsigned char *input, u_int64_t input_len) {
+    unsigned char *d = input;
+    int i, count;
+
+    if ((input == NULL) || (input_len <= 0)) {
+        return 0;
+    }
+
+    i = count = 0;
+    while ((i < input_len) && (count < input_len)) {
+        int z, copy = 1;
+
+        /* Require an ampersand and at least one character to
+         * start looking into the entity.
+         */
+        if ((input[i] == '&') && (i + 1 < input_len)) {
+            int k, j = i + 1;
+
+            if (input[j] == '#') {
+                /* Numerical entity. */
+                copy++;
+
+                if (!(j + 1 < input_len)) {
+                    goto HTML_ENT_OUT; /* Not enough bytes. */
+                }
+                j++;
+
+                if ((input[j] == 'x') || (input[j] == 'X')) {
+                    /* Hexadecimal entity. */
+                    copy++;
+
+                    if (!(j + 1 < input_len)) {
+                        goto HTML_ENT_OUT; /* Not enough bytes. */
+                    }
+                    j++; /* j is the position of the first digit now. */
+
+                    k = j;
+                    while ((j < input_len) && (isxdigit(input[j]))) {
+                        j++;
+                    }
+                    if (j > k) { /* Do we have at least one digit? */
+                        /* Decode the entity. */
+                        char *x = NULL;
+                        x = reinterpret_cast<char *>(malloc(sizeof(char) *
+                            ((j - k) + 1)));
+                        memset(x, '\0', (j - k) + 1);
+                        memcpy(x, (const char *)&input[k], j - k);
+                        *d++ = (unsigned char)strtol(x, NULL, 16);
+
+                        count++;
+
+                        /* Skip over the semicolon if it's there. */
+                        if ((j < input_len) && (input[j] == ';')) {
+                            i = j + 1;
+                        } else {
+                            i = j;
+                        }
+                        continue;
+                    } else {
+                        goto HTML_ENT_OUT;
+                    }
+                } else {
+                    /* Decimal entity. */
+                    k = j;
+                    while ((j < input_len) && (isdigit(input[j]))) {
+                        j++;
+                    }
+                    if (j > k) { /* Do we have at least one digit? */
+                        /* Decode the entity. */
+                        char *x = NULL;
+                        x = reinterpret_cast<char *>(malloc(sizeof(char) *
+                            ((j - k) + 1)));
+                        memset(x, '\0', (j - k) + 1);
+                        memcpy(x, (const char *)&input[k], j - k);
+                        *d++ = (unsigned char)strtol(x, NULL, 10);
+
+                        count++;
+
+                        /* Skip over the semicolon if it's there. */
+                        if ((j < input_len) && (input[j] == ';')) {
+                            i = j + 1;
+                        } else {
+                            i = j;
+                        }
+                        continue;
+                    } else {
+                        goto HTML_ENT_OUT;
+                    }
+                }
+            } else {
+                /* Text entity. */
+                k = j;
+                while ((j < input_len) && (isalnum(input[j]))) {
+                    j++;
+                }
+                if (j > k) { /* Do we have at least one digit? */
+                    char *x = NULL;
+                    x = reinterpret_cast<char *>(malloc(sizeof(char) *
+                        ((j - k) + 1)));
+                    memset(x, '\0', (j - k) + 1);
+                    memcpy(x, (const char *)&input[k], j - k);
+
+                    /* Decode the entity. */
+                    /* ENH What about others? */
+                    if (strcasecmp(x, "quot") == 0) {
+                        *d++ = '"';
+                    } else if (strcasecmp(x, "amp") == 0) {
+                        *d++ = '&';
+                    } else if (strcasecmp(x, "lt") == 0) {
+                        *d++ = '<';
+                    } else if (strcasecmp(x, "gt") == 0) {
+                        *d++ = '>';
+                    } else if (strcasecmp(x, "nbsp") == 0) {
+                        *d++ = NBSP;
+                    } else {
+                        /* We do no want to convert this entity,
+                         * copy the raw data over. */
+                        copy = j - k + 1;
+                        goto HTML_ENT_OUT;
+                    }
+
+                    count++;
+
+                    /* Skip over the semicolon if it's there. */
+                    if ((j < input_len) && (input[j] == ';')) {
+                        i = j + 1;
+                    } else {
+                        i = j;
+                    }
+
+                    continue;
+                }
+            }
+        }
+
+HTML_ENT_OUT:
+
+        for (z = 0; ((z < copy) && (count < input_len)); z++) {
+            *d++ = input[i++];
+            count++;
+        }
+    }
+
+    *d = '\0';
+
+    return count;
+}
+
 }  // namespace transformations
 }  // namespace actions
 }  // namespace modsecurity
--- a/src/actions/transformations/html_entity_decode.h
+++ b/src/actions/transformations/html_entity_decode.h
@@ -30,25 +30,6 @@ namespace actions {
 namespace transformations {


-class HtmlEntityDecodeInstantCache :
-    public std::unordered_map<std::string, std::string> {
- public:
-    static HtmlEntityDecodeInstantCache& getInstance() {
-        static HtmlEntityDecodeInstantCache instance;
-        return instance;
-    }
-
-    void cache(const std::string& value, const std::string& out) {
-        emplace(value, out);
-        if (size() > 100) {
-            erase(begin());
-        }
-    }
- private:
-    HtmlEntityDecodeInstantCache() {}
-};
-
-
 class HtmlEntityDecode : public Transformation {
 public:
    explicit HtmlEntityDecode(std::string action)
@@ -56,6 +37,8 @@ class HtmlEntityDecode : public Transformation {

    std::string evaluate(std::string exp,
        Transaction *transaction) override;
+
+    static int inplace(unsigned char *input, u_int64_t input_len);
 };


--- a/src/actions/transformations/js_decode.cc
+++ b/src/actions/transformations/js_decode.cc
@@ -36,21 +36,133 @@ namespace transformations {

 std::string JsDecode::evaluate(std::string value,
    Transaction *transaction) {
+    std::string ret;
+    unsigned char *input = NULL;

-    char *val = reinterpret_cast<char *>(
-        malloc(sizeof(char) * value.size() + 1));
-    memcpy(val, value.c_str(), value.size() + 1);
-    val[value.size()] = '\0';
+    input = reinterpret_cast<unsigned char *>
+        (malloc(sizeof(char) * value.length()+1));

-    js_decode_nonstrict_inplace((unsigned char *)val, value.size());
-    std::string ret("");
-    ret.assign(val);
-    free(val);
+    if (input == NULL) {
+        return "";
+    }
+
+    memcpy(input, value.c_str(), value.length()+1);
+
+    size_t i = inplace(input, value.length());
+
+    ret.assign(reinterpret_cast<char *>(input), i);
+    free(input);

    return ret;
 }


+int JsDecode::inplace(unsigned char *input, u_int64_t input_len) {
+    unsigned char *d = (unsigned char *)input;
+    int64_t i, count;
+
+    i = count = 0;
+    while (i < input_len) {
+        if (input[i] == '\\') {
+            /* Character is an escape. */
+
+            if ((i + 5 < input_len) && (input[i + 1] == 'u')
+                && (VALID_HEX(input[i + 2])) && (VALID_HEX(input[i + 3]))
+                && (VALID_HEX(input[i + 4])) && (VALID_HEX(input[i + 5]))) {
+                /* \uHHHH */
+
+                /* Use only the lower byte. */
+                *d = x2c(&input[i + 4]);
+
+                /* Full width ASCII (ff01 - ff5e) needs 0x20 added */
+                if ((*d > 0x00) && (*d < 0x5f)
+                    && ((input[i + 2] == 'f') || (input[i + 2] == 'F'))
+                    && ((input[i + 3] == 'f') || (input[i + 3] == 'F'))) {
+                    (*d) += 0x20;
+                }
+
+                d++;
+                count++;
+                i += 6;
+            } else if ((i + 3 < input_len) && (input[i + 1] == 'x')
+                    && VALID_HEX(input[i + 2]) && VALID_HEX(input[i + 3])) {
+                /* \xHH */
+                *d++ = x2c(&input[i + 2]);
+                count++;
+                i += 4;
+            } else if ((i + 1 < input_len) && ISODIGIT(input[i + 1])) {
+                /* \OOO (only one byte, \000 - \377) */
+                char buf[4];
+                int j = 0;
+
+                while ((i + 1 + j < input_len) && (j < 3)) {
+                    buf[j] = input[i + 1 + j];
+                    j++;
+                    if (!ISODIGIT(input[i + 1 + j])) break;
+                }
+                buf[j] = '\0';
+
+                if (j > 0) {
+                    /* Do not use 3 characters if we will be > 1 byte */
+                    if ((j == 3) && (buf[0] > '3')) {
+                        j = 2;
+                        buf[j] = '\0';
+                    }
+                    *d++ = (unsigned char)strtol(buf, NULL, 8);
+                    i += 1 + j;
+                    count++;
+                }
+            } else if (i + 1 < input_len) {
+                /* \C */
+                unsigned char c = input[i + 1];
+                switch (input[i + 1]) {
+                    case 'a' :
+                        c = '\a';
+                        break;
+                    case 'b' :
+                        c = '\b';
+                        break;
+                    case 'f' :
+                        c = '\f';
+                        break;
+                    case 'n' :
+                        c = '\n';
+                        break;
+                    case 'r' :
+                        c = '\r';
+                        break;
+                    case 't' :
+                        c = '\t';
+                        break;
+                    case 'v' :
+                        c = '\v';
+                        break;
+                        /* The remaining (\?,\\,\',\") are just a removal
+                         * of the escape char which is default.
+                         */
+                }
+
+                *d++ = c;
+                i += 2;
+                count++;
+            } else {
+                /* Not enough bytes */
+                while (i < input_len) {
+                    *d++ = input[i++];
+                    count++;
+                }
+            }
+        } else {
+            *d++ = input[i++];
+            count++;
+        }
+    }
+
+    *d = '\0';
+
+    return count;
+}
+
 }  // namespace transformations
 }  // namespace actions
 }  // namespace modsecurity
--- a/src/actions/transformations/js_decode.h
+++ b/src/actions/transformations/js_decode.h
@@ -35,6 +35,7 @@ class JsDecode : public Transformation {

    std::string evaluate(std::string exp,
        Transaction *transaction) override;
+    static int inplace(unsigned char *input, u_int64_t input_len);
 };

 }  // namespace transformations
--- a/src/actions/transformations/sha1.cc
+++ b/src/actions/transformations/sha1.cc
@@ -40,11 +40,7 @@ Sha1::Sha1(std::string action)
 std::string Sha1::evaluate(std::string value,
    Transaction *transaction) {

-    Utils::SHA1 sha1;
-    sha1.update(&value);
-    std::string sha1_bin = sha1.final_bin();
-
-    return sha1_bin;
+    return Utils::Sha1::digest(value);
 }

 }  // namespace transformations
--- a/src/actions/transformations/transformation.cc
+++ b/src/actions/transformations/transformation.cc
@@ -110,8 +110,8 @@ Transformation* Transformation::instantiate(std::string a) {
    IF_MATCH(trimLeft) { return new TrimLeft(a); }
    IF_MATCH(trimRight) { return new TrimRight(a); }
    IF_MATCH(trim) { return new Trim(a); }
-    IF_MATCH(urlDecode) { return new UrlDecode(a); }
    IF_MATCH(urlDecodeUni) { return new UrlDecodeUni(a); }
+    IF_MATCH(urlDecode) { return new UrlDecode(a); }
    IF_MATCH(urlEncode) { return new UrlEncode(a); }
    IF_MATCH(utf8ToUnicode) { return new Utf8ToUnicode(a); }

--- a/src/actions/transformations/url_decode_uni.cc
+++ b/src/actions/transformations/url_decode_uni.cc
@@ -36,18 +36,155 @@ namespace transformations {

 std::string UrlDecodeUni::evaluate(std::string value,
    Transaction *transaction) {
-    int changed = 0;
-    char *tmp = strdup(value.c_str());
-    urldecode_uni_nonstrict_inplace_ex(transaction, (unsigned char *)tmp,
-        value.size(), &changed);
-    std::string ret("");
-    ret.assign(tmp);
-    free(tmp);
+    std::string ret;
+    unsigned char *input = NULL;
+
+    input = reinterpret_cast<unsigned char *>
+        (malloc(sizeof(char) * value.length()+1));
+
+    if (input == NULL) {
+        return "";
+    }
+
+    memcpy(input, value.c_str(), value.length()+1);
+
+    size_t i = inplace(input, value.length(), transaction);
+
+    ret.assign(reinterpret_cast<char *>(input), i);
+    free(input);

    return ret;
 }


+/**
+ *
+ * IMP1 Assumes NUL-terminated
+ */
+int UrlDecodeUni::inplace(unsigned char *input, u_int64_t input_len,
+    Transaction *transaction) {
+    unsigned char *d = input;
+    int64_t i, count, fact, j, xv;
+    int Code, hmap = -1;
+
+    if (input == NULL) return -1;
+
+    i = count = 0;
+    while (i < input_len) {
+        if (input[i] == '%') {
+            if ((i + 1 < input_len) &&
+                ((input[i + 1] == 'u') || (input[i + 1] == 'U'))) {
+            /* Character is a percent sign. */
+                /* IIS-specific %u encoding. */
+                if (i + 5 < input_len) {
+                    /* We have at least 4 data bytes. */
+                    if ((VALID_HEX(input[i + 2])) &&
+                        (VALID_HEX(input[i + 3])) &&
+                        (VALID_HEX(input[i + 4])) &&
+                        (VALID_HEX(input[i + 5]))) {
+                        Code = 0;
+                        fact = 1;
+
+                        if (transaction
+                            && transaction->m_rules->unicode_map_table != NULL
+                            && transaction->m_rules->unicode_codepage > 0)  {
+                            for (j = 5; j >= 2; j--) {
+                                if (isxdigit((input[i+j]))) {
+                                    if (input[i+j] >= 97) {
+                                        xv = (input[i+j] - 97) + 10;
+                                    } else if (input[i+j] >= 65)  {
+                                        xv = (input[i+j] - 65) + 10;
+                                    } else {
+                                        xv = (input[i+j]) - 48;
+                                    }
+                                    Code += (xv * fact);
+                                    fact *= 16;
+                                }
+                            }
+
+                            if (Code >= 0 && Code <= 65535)  {
+                                Rules *r = transaction->m_rules;
+                                hmap = r->unicode_map_table[Code];
+                            }
+                        }
+
+                        if (hmap != -1)  {
+                            *d = hmap;
+                        } else {
+                            /* We first make use of the lower byte here,
+                             * ignoring the higher byte. */
+                            *d = x2c(&input[i + 4]);
+
+                            /* Full width ASCII (ff01 - ff5e)
+                             * needs 0x20 added */
+                            if ((*d > 0x00) && (*d < 0x5f)
+                                    && ((input[i + 2] == 'f')
+                                    || (input[i + 2] == 'F'))
+                                    && ((input[i + 3] == 'f')
+                                    || (input[i + 3] == 'F'))) {
+                                (*d) += 0x20;
+                            }
+                        }
+                        d++;
+                        count++;
+                        i += 6;
+                    } else {
+                        /* Invalid data, skip %u. */
+                        *d++ = input[i++];
+                        *d++ = input[i++];
+                        count += 2;
+                    }
+                } else {
+                    /* Not enough bytes (4 data bytes), skip %u. */
+                    *d++ = input[i++];
+                    *d++ = input[i++];
+                    count += 2;
+                }
+            } else {
+                /* Standard URL encoding. */
+                /* Are there enough bytes available? */
+                if (i + 2 < input_len) {
+                    /* Yes. */
+
+                    /* Decode a %xx combo only if it is valid.
+                     */
+                    char c1 = input[i + 1];
+                    char c2 = input[i + 2];
+
+                    if (VALID_HEX(c1) && VALID_HEX(c2)) {
+                        *d++ = x2c(&input[i + 1]);
+                        count++;
+                        i += 3;
+                    } else {
+                        /* Not a valid encoding, skip this % */
+                        *d++ = input[i++];
+                        count++;
+                    }
+                } else {
+                    /* Not enough bytes available, skip this % */
+                    *d++ = input[i++];
+                    count++;
+                }
+            }
+        } else {
+            /* Character is not a percent sign. */
+            if (input[i] == '+') {
+                *d++ = ' ';
+            } else {
+                *d++ = input[i];
+            }
+
+            count++;
+            i++;
+        }
+    }
+
+    *d = '\0';
+
+    return count;
+}
+
+
 }  // namespace transformations
 }  // namespace actions
 }  // namespace modsecurity
--- a/src/actions/transformations/url_decode_uni.h
+++ b/src/actions/transformations/url_decode_uni.h
@@ -30,11 +30,11 @@ namespace transformations {

 class UrlDecodeUni : public Transformation {
 public:
-    explicit UrlDecodeUni(std::string action)
-        : Transformation(action) { }
+    explicit UrlDecodeUni(std::string action) : Transformation(action) { }

-    std::string evaluate(std::string exp,
-        Transaction *transaction) override;
+    std::string evaluate(std::string exp, Transaction *transaction) override;
+    static int inplace(unsigned char *input, u_int64_t input_len,
+        Transaction *transaction);
 };

 }  // namespace transformations