Change jsDecodeuni to jsDecode which also decodes all the other JS escapes. See #193.

2026-01-16 08:27:10 +03:00 · 2007-12-14 00:19:46 +00:00
parent b0de659133
commit 8aa31fd099
5 changed files with 113 additions and 45 deletions
--- a/2
+++ b/2
@@ -1,7 +1,7 @@
 12 Dec 2007 - 2.5.0-rc1
 -----------------------

- * Added t:jsDecodeUni to decode JavScript \uXXXX encoding.
+ * Added t:jsDecode to decode JavScript escape sequences.

 * Added IS_NEW and IS_EXPIRED built-in collection variables.

--- a/apache2/msc_util.c
+++ b/apache2/msc_util.c
@@ -16,10 +16,12 @@
 #include <sys/types.h>
 #include <sys/stat.h>

-/* NOTE: Be careful as this can only be used on static values for X.
+/* NOTE: Be careful as these can ONLY be used on static values for X.
 * (i.e. VALID_HEX(c++) will NOT work)
 */
 #define VALID_HEX(X) (((X >= '0')&&(X <= '9')) || ((X >= 'a')&&(X <= 'f')) || ((X >= 'A')&&(X <= 'F')))
+#define ISODIGIT(X) ((X >= '0')&&(X <= '7'))
+

 /**
 *
@@ -68,6 +70,7 @@ int parse_name_eq_value(apr_pool_t *mp, const char *input, char **name, char **v

 /**
 *
+ * IMP1 Assumes NUL-terminated
 */
 char *url_encode(apr_pool_t *mp, char *input, unsigned int input_len) {
    char *rval, *d;
@@ -572,9 +575,11 @@ char *_log_escape(apr_pool_t *mp, const unsigned char *input, unsigned long int
 }

 /**
- * JavaScript \uXXXX decoding.
+ * JavaScript decoding.
+ * IMP1 Assumes NUL-terminated
 */
-int jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len) {
+
+int js_decode_nonstrict_inplace(unsigned char *input, long int input_len) {
    unsigned char *d = (unsigned char *)input;
    long int i, count;

@@ -585,38 +590,92 @@ int jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len)
        if (input[i] == '\\') {
            /* Character is an escape. */

-            if ((i + 5 < input_len) && (input[i + 1] == 'u')) {
-                /* We have at least 4 data bytes. */
-                if (   (VALID_HEX(input[i + 2])) && (VALID_HEX(input[i + 3]))
-                    && (VALID_HEX(input[i + 4])) && (VALID_HEX(input[i + 5])) )
+            if (   (i + 5 < input_len) && (input[i + 1] == 'u')
+                && (VALID_HEX(input[i + 2])) && (VALID_HEX(input[i + 3]))
+                && (VALID_HEX(input[i + 4])) && (VALID_HEX(input[i + 5])) )
+            {
+                /* \uHHHH */
+
+                /* Use only the lower byte. */
+                *d = x2c(&input[i + 4]);
+
+                /* Full width ASCII (ff01 - ff5e) needs 0x20 added */
+                if (   (*d > 0x00) && (*d < 0x5f)
+                    && ((input[i + 2] == 'f') || (input[i + 2] == 'F'))
+                    && ((input[i + 3] == 'f') || (input[i + 3] == 'F')))
                {
-                    /* We first make use of the lower byte here, ignoring the higher byte. */
-                    *d = x2c(&input[i + 4]);
-
-                    /* Full width ASCII (ff01 - ff5e) needs 0x20 added */
-                    if (   (*d > 0x00) && (*d < 0x5f)
-                        && ((input[i + 2] == 'f') || (input[i + 2] == 'F'))
-                        && ((input[i + 3] == 'f') || (input[i + 3] == 'F')))
-                    {
-                        (*d) += 0x20;
-                    }
-
-                    d++;
-                    count++;
-                    i += 6;
+                    (*d) += 0x20;
                }
-                else {
-                    /* Invalid data. */
-                    int j;

-                    for(j = 0; (j < 6)&&(i < input_len); j++) {
-                        *d++ = input[i++];
-                        count++;
+                d++;
+                count++;
+                i += 6;
+            }
+            else if ((i + 3 < input_len) && (input[i + 1] == 'x')) {
+                /* \xHH */
+                *d++ = x2c(&input[i + 2]);
+                count++;
+                i += 4;
+            }
+            else if ((i + 1 < input_len) && ISODIGIT(input[i + 1])) {
+                /* \OOO (only one byte, \000 - \377) */
+                char buf[4];
+                int j = 0;
+
+                while((i + 1 + j < input_len)&&(j < 3)) {
+                    buf[j] = input[i + 1 + j];
+                    j++;
+                    if (!ISODIGIT(input[i + 1 + j])) break;
+                }
+                buf[j] = '\0';
+
+                if (j > 0) {
+                    /* Do not use 3 characters if we will be > 1 byte */
+                    if ((j == 3) && (buf[0] > '3')) {
+                        j = 2;
+                        buf[j] = '\0';
                    }
+                    *d++ = strtol(buf, NULL, 8);
+                    i += 1 + j;
+                    count++;
                }
            }
+            else if (i + 1 < input_len) {
+                /* \C */
+                unsigned char c = input[i + 1];
+                switch(input[i + 1]) {
+                    case 'a' :
+                        c = '\a';
+                        break;
+                    case 'b' :
+                        c = '\b';
+                        break;
+                    case 'f' :
+                        c = '\f';
+                        break;
+                    case 'n' :
+                        c = '\n';
+                        break;
+                    case 'r' :
+                        c = '\r';
+                        break;
+                    case 't' :
+                        c = '\t';
+                        break;
+                    case 'v' :
+                        c = '\v';
+                        break;
+                    /* The remaining (\?,\\,\',\") are just a removal
+                     * of the escape char which is default.
+                     */
+                }
+
+                *d++ = c;
+                i += 2;
+                count++;
+            }
            else {
-                /* Not enough bytes available (4 data bytes were needed). */
+                /* Not enough bytes */
                while(i < input_len) {
                    *d++ = input[i++];
                    count++;
@@ -636,6 +695,7 @@ int jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len)

 /**
 *
+ * IMP1 Assumes NUL-terminated
 */
 int urldecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len) {
    unsigned char *d = input;
@@ -745,6 +805,7 @@ int urldecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len)

 /**
 *
+ * IMP1 Assumes NUL-terminated
 */
 int urldecode_nonstrict_inplace_ex(unsigned char *input, long int input_len, int *invalid_count) {
    unsigned char *d = (unsigned char *)input;
@@ -809,6 +870,7 @@ int urldecode_nonstrict_inplace_ex(unsigned char *input, long int input_len, int

 /**
 *
+ * IMP1 Assumes NUL-terminated
 */
 int html_entities_decode_inplace(apr_pool_t *mp, unsigned char *input, int input_len) {
    unsigned char *d = input;
@@ -884,6 +946,7 @@ int html_entities_decode_inplace(apr_pool_t *mp, unsigned char *input, int input
                    char *x = apr_pstrmemdup(mp, (const char *)&input[k], j - k);

                    /* Decode the entity. */
+                    /* ENH What about others? */
                    if (strcasecmp(x, "quot") == 0) *d++ = '"';
                    else
                    if (strcasecmp(x, "amp") == 0) *d++ = '&';
@@ -923,8 +986,10 @@ int html_entities_decode_inplace(apr_pool_t *mp, unsigned char *input, int input
    return count;
 }

-#define ISODIGIT(X) ((X >= '0')&&(X <= '7'))
-
+/**
+ *
+ * IMP1 Assumes NUL-terminated
+ */
 int ansi_c_sequences_decode_inplace(unsigned char *input, int input_len) {
    unsigned char *d = input;
    int i, count;
@@ -1032,6 +1097,10 @@ int ansi_c_sequences_decode_inplace(unsigned char *input, int input_len) {
    return count;
 }

+/**
+ *
+ * IMP1 Assumes NUL-terminated
+ */
 int normalise_path_inplace(unsigned char *input, int input_len, int win) {
    unsigned char *d = input;
    int i, count;
--- a/apache2/msc_util.h
+++ b/apache2/msc_util.h
@@ -35,7 +35,7 @@ int DSOLOCAL is_token_char(unsigned char c);

 int DSOLOCAL remove_lf_crlf_inplace(char *text);

-unsigned DSOLOCAL char x2c(unsigned char *what);
+unsigned char DSOLOCAL x2c(unsigned char *what);

 char DSOLOCAL *guess_tmp_dir(apr_pool_t *p);

@@ -66,7 +66,7 @@ char DSOLOCAL *log_escape_raw(apr_pool_t *mp, const unsigned char *text, unsigne
 char DSOLOCAL *_log_escape(apr_pool_t *p, const unsigned char *input,
    unsigned long int input_length, int escape_quotes, int escape_colon);

-int DSOLOCAL jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len);
+int DSOLOCAL js_decode_nonstrict_inplace(unsigned char *input, long int input_len);

 int DSOLOCAL urldecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_length);

--- a/apache2/re_tfns.c
+++ b/apache2/re_tfns.c
@@ -251,14 +251,14 @@ static int msre_fn_replaceComments_execute(apr_pool_t *mptmp, unsigned char *inp
    return changed;
 }

-/* jsDecodeUni */
+/* jsDecode */

-static int msre_fn_jsDecodeUni_execute(apr_pool_t *mptmp, unsigned char *input,
+static int msre_fn_jsDecode_execute(apr_pool_t *mptmp, unsigned char *input,
    long int input_len, char **rval, long int *rval_len)
 {
    long int length;

-    length = jsdecode_uni_nonstrict_inplace_ex(input, input_len);
+    length = js_decode_nonstrict_inplace(input, input_len);
    *rval = (char *)input;
    *rval_len = length;
    
@@ -518,10 +518,10 @@ void msre_engine_register_default_tfns(msre_engine *engine) {
        msre_fn_htmlEntityDecode_execute
    );

-    /* jsDecodeUni */
+    /* jsDecode */
    msre_engine_tfn_register(engine,
-        "jsDecodeUni",
-        msre_fn_jsDecodeUni_execute
+        "jsDecode",
+        msre_fn_jsDecode_execute
    );

    /* length */
--- a/doc/modsecurity2-apache-reference.xml
+++ b/doc/modsecurity2-apache-reference.xml
@@ -3369,13 +3369,12 @@ SecRule <emphasis>XML:/xq:employees/employee/name/text()</emphasis> Fred \
    </section>

    <section>
-      <title><literal>jsDecodeUni</literal></title>
+      <title><literal>jsDecode</literal></title>

-      <para>Decodes <literal moreinfo="none">\uXXXX</literal> JavaScript
-      encoding. If the code is in the range of FF01-FF5E (the full width ASCII
-      codes), then the higher byte is used to detect and adjust the lower
-      byte. Otherwise, only the lower byte will be used and the higher byte
-      zeroed.</para>
+      <para>Decodes JavaScript escape sequences. If a \uHHHH code is in the
+      range of FF01-FF5E (the full width ASCII codes), then the higher byte is
+      used to detect and adjust the lower byte. Otherwise, only the lower byte
+      will be used and the higher byte zeroed.</para>
    </section>

    <section>