Updates libinjection.

This is not yet their v3.10.0. But I belive it is close to be. See #124 at client9/libinjection for further information.
2026-01-16 08:27:10 +03:00 · 2017-05-30 10:43:10 -03:00
parent e5dbe59336
commit 53571a860d
6 changed files with 177 additions and 457 deletions
--- a/apache2/libinjection/libinjection_xss.c
+++ b/apache2/libinjection/libinjection_xss.c
@@ -1,11 +1,15 @@
-
-#include "libinjection.h"
 #include "libinjection_xss.h"
 #include "libinjection_html5.h"

 #include <assert.h>
 #include <stdio.h>

+/*
+ * HEY THIS ISN'T DONE
+ *  AND MISSING A KEY INGREDIENT!!
+ *
+ */
+
 typedef enum attribute {
    TYPE_NONE
    , TYPE_BLACK     /* ban always */
@@ -14,128 +18,11 @@ typedef enum attribute {
    , TYPE_ATTR_INDIRECT  /* attribute *name* is given in *value* */
 } attribute_t;

-
-static attribute_t is_black_attr(const char* s, size_t len);
-static int is_black_tag(const char* s, size_t len);
-static int is_black_url(const char* s, size_t len);
-static int cstrcasecmp_with_null(const char *a, const char *b, size_t n);
-static int html_decode_char_at(const char* src, size_t len, size_t* consumed);
-static int htmlencode_startswith(const char* prefix, const char *src, size_t n);
-
-
 typedef struct stringtype {
    const char* name;
    attribute_t atype;
 } stringtype_t;

-
-static const int gsHexDecodeMap[256] = {
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    0,   1,   2,   3,   4,   5,   6,   7,   8,   9, 256, 256,
-    256, 256, 256, 256, 256,  10,  11,  12,  13,  14,  15, 256,
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    256,  10,  11,  12,  13,  14,  15, 256, 256, 256, 256, 256,
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
-    256, 256, 256, 256
-};
-
-static int html_decode_char_at(const char* src, size_t len, size_t* consumed)
-{
-    int val = 0;
-    size_t i;
-    int ch;
-
-    if (len == 0 || src == NULL) {
-        *consumed = 0;
-        return -1;
-    }
-
-    *consumed = 1;
-    if (*src != '&' || len < 2) {
-        return (unsigned char)(*src);
-    }
-
-
-    if (*(src+1) != '#') {
-        /* normally this would be for named entities
-         * but for this case we don't actually care
-         */
-        return '&';
-    }
-
-    if (*(src+2) == 'x' || *(src+2) == 'X') {
-        ch = (unsigned char) (*(src+3));
-        ch = gsHexDecodeMap[ch];
-        if (ch == 256) {
-            /* degenerate case  '&#[?]' */
-            return '&';
-        }
-        val = ch;
-        i = 4;
-        while (i < len) {
-            ch = (unsigned char) src[i];
-            if (ch == ';') {
-                *consumed = i + 1;
-                return val;
-            }
-            ch = gsHexDecodeMap[ch];
-            if (ch == 256) {
-                *consumed = i;
-                return val;
-            }
-            val = (val * 16) + ch;
-            if (val > 0x1000FF) {
-                return '&';
-            }
-            ++i;
-        }
-        *consumed = i;
-        return val;
-    } else {
-        i = 2;
-        ch = (unsigned char) src[i];
-        if (ch < '0' || ch > '9') {
-            return '&';
-        }
-        val = ch - '0';
-        i += 1;
-        while (i < len) {
-            ch = (unsigned char) src[i];
-            if (ch == ';') {
-                *consumed = i + 1;
-                return val;
-            }
-            if (ch < '0' || ch > '9') {
-                *consumed = i;
-                return val;
-            }
-            val = (val * 10) + (ch - '0');
-            if (val > 0x1000FF) {
-                return '&';
-            }
-            ++i;
-        }
-        *consumed = i;
-        return val;
-    }
-}
-
-
 /*
 * view-source:
 * data:
@@ -150,7 +37,7 @@ static stringtype_t BLACKATTR[] = {
    , { "DATASRC", TYPE_BLACK }       /* IE */
    , { "DYNSRC", TYPE_ATTR_URL }     /* Obsolete img attribute */
    , { "FILTER", TYPE_STYLE }        /* Opera, SVG inline style */
-    , { "FORMACTION", TYPE_ATTR_URL } /* HTML 5 */
+    , { "FORMACTION", TYPE_ATTR_URL } /* HTML5 */
    , { "FOLDER", TYPE_ATTR_URL }     /* Only on A tags, IE-only */
    , { "FROM", TYPE_ATTR_URL }       /* SVG */
    , { "HANDLER", TYPE_ATTR_URL }    /* SVG Tiny, Opera */
@@ -166,27 +53,26 @@ static stringtype_t BLACKATTR[] = {
 };

 /* xmlns */
-/* `xml-stylesheet` > <eval>, <if expr=> */
+/* xml-stylesheet > <eval>, <if expr=> */

 /*
-  static const char* BLACKATTR[] = {
-  "ATTRIBUTENAME",
-  "BACKGROUND",
-  "DATAFORMATAS",
-  "HREF",
-  "SCROLL",
-  "SRC",
-  "STYLE",
-  "SRCDOC",
-  NULL
-  };
+static const char* BLACKATTR[] = {
+    "ATTRIBUTENAME",
+    "BACKGROUND",
+    "DATAFORMATAS",
+    "HREF",
+    "SCROLL",
+    "SRC",
+    "STYLE",
+    "SRCDOC",
+    NULL
+};
 */

 static const char* BLACKTAG[] = {
    "APPLET"
    /*    , "AUDIO" */
    , "BASE"
-    , "COMMENT"  /* IE http://html5sec.org/#38 */
    , "EMBED"
    /*   ,  "FORM" */
    , "FRAME"
@@ -206,94 +92,33 @@ static const char* BLACKTAG[] = {
    /*    , "VIDEO" */
    , "VMLFRAME"
    , "XML"
-    , "XSS"
    , NULL
 };

+static int is_black_tag(const char* s, size_t len);
+static attribute_t is_black_attr(const char* s, size_t len);
+static int is_black_url(const char* s, size_t len);
+static int cstrcasecmp_with_null(const char *a, const char *b, size_t n);

 static int cstrcasecmp_with_null(const char *a, const char *b, size_t n)
 {
-    char ca;
    char cb;
-    /* printf("Comparing to %s %.*s\n", a, (int)n, b); */
-    while (n-- > 0) {
-        cb = *b++;
+
+    for (; n > 0; a++, b++, n--) {
+        cb = *b;
        if (cb == '\0') continue;

-        ca = *a++;
-
        if (cb >= 'a' && cb <= 'z') {
            cb -= 0x20;
        }
-        /* printf("Comparing %c vs %c with %d left\n", ca, cb, (int)n); */
-        if (ca != cb) {
-            return 1;
+        if (*a != cb) {
+            return *a - cb;
+        } else if (*a == '\0') {
+            return -1;
        }
    }

-    if (*a == 0) {
-        /* printf(" MATCH \n"); */
-        return 0;
-    } else {
-        return 1;
-    }
-}
-
-/*
- * Does an HTML encoded  binary string (const char*, length) start with
- * a all uppercase c-string (null terminated), case insensitive!
- *
- * also ignore any embedded nulls in the HTML string!
- *
- * return 1 if match / starts with
- * return 0 if not
- */
-static int htmlencode_startswith(const char *a, const char *b, size_t n)
-{
-    size_t consumed;
-    int cb;
-    int first = 1;
-    /* printf("Comparing %s with %.*s\n", a,(int)n,b); */
-    while (n > 0) {
-        if (*a == 0) {
-            /* printf("Match EOL!\n"); */
-            return 1;
-        }
-        cb = html_decode_char_at(b, n, &consumed);
-        b += consumed;
-        n -= consumed;
-
-        if (first && cb <= 32) {
-            /* ignore all leading whitespace and control characters */
-            continue;
-        }
-        first = 0;
-
-        if (cb == 0) {
-            /* always ignore null characters in user input */
-            continue;
-        }
-
-        if (cb == 10) {
-            /* always ignore vertical tab characters in user input */
-            /* who allows this?? */
-            continue;
-        }
-
-        if (cb >= 'a' && cb <= 'z') {
-            /* upcase */
-            cb -= 0x20;
-        }
-
-        if (*a != (char) cb) {
-            /* printf("    %c != %c\n", *a, cb); */
-            /* mismatch */
-            return 0;
-        }
-        a++;
-    }
-
-    return (*a == 0) ? 1 : 0;
+    return (*a == 0) ? 0 : 1;
 }

 static int is_black_tag(const char* s, size_t len)
@@ -307,7 +132,6 @@ static int is_black_tag(const char* s, size_t len)
    black = BLACKTAG;
    while (*black != NULL) {
        if (cstrcasecmp_with_null(*black, s, len) == 0) {
-            /* printf("Got black tag %s\n", *black); */
            return 1;
        }
        black += 1;
@@ -317,7 +141,6 @@ static int is_black_tag(const char* s, size_t len)
    if ((s[0] == 's' || s[0] == 'S') &&
        (s[1] == 'v' || s[1] == 'V') &&
        (s[2] == 'g' || s[2] == 'G')) {
-        /*        printf("Got SVG tag \n"); */
        return 1;
    }

@@ -325,7 +148,6 @@ static int is_black_tag(const char* s, size_t len)
    if ((s[0] == 'x' || s[0] == 'X') &&
        (s[1] == 's' || s[1] == 'S') &&
        (s[2] == 'l' || s[2] == 'L')) {
-        /*      printf("Got XSL tag\n"); */
        return 1;
    }

@@ -340,9 +162,8 @@ static attribute_t is_black_attr(const char* s, size_t len)
        return TYPE_NONE;
    }

-    /* JavaScript on.* */
+    /* javascript on.* */
    if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) {
-        /* printf("Got JavaScript on- attribute name\n"); */
        return TYPE_BLACK;
    }

@@ -350,7 +171,6 @@ static attribute_t is_black_attr(const char* s, size_t len)
    if (len >= 5) {
        /* XMLNS can be used to create arbitrary tags */
        if (cstrcasecmp_with_null("XMLNS", s, 5) == 0 || cstrcasecmp_with_null("XLINK", s, 5) == 0) {
-            /*      printf("Got XMLNS and XLINK tags\n"); */
            return TYPE_BLACK;
        }
    }
@@ -358,7 +178,6 @@ static attribute_t is_black_attr(const char* s, size_t len)
    black = BLACKATTR;
    while (black->name != NULL) {
        if (cstrcasecmp_with_null(black->name, s, len) == 0) {
-            /*      printf("Got banned attribute name %s\n", black->name); */
            return black->atype;
        }
        black += 1;
@@ -379,43 +198,49 @@ static int is_black_url(const char* s, size_t len)
    /* covers JAVA, JAVASCRIPT, + colon */
    static const char* javascript_url = "JAVA";

+    size_t tokenlen;
+
    /* skip whitespace */
-    while (len > 0 && (*s <= 32 || *s >= 127)) {
+    while (len > 0) {
        /*
         * HEY: this is a signed character.
         *  We are intentionally skipping high-bit characters too
-         *  since they are not ASCII, and Opera sometimes uses UTF-8 whitespace.
-         *
-         * Also in EUC-JP some of the high bytes are just ignored.
+         *  since they are not ascii, and Opera sometimes uses UTF8 whitespace
         */
-        ++s;
-        --len;
+        if (*s <= 32) {
+            ++s;
+            --len;
+        }
+        break;
    }

-    if (htmlencode_startswith(data_url, s, len)) {
+    tokenlen = strlen(data_url);
+    if (len > tokenlen && cstrcasecmp_with_null(data_url, s, tokenlen) == 0) {
+        return 1;
+    }
+    tokenlen = strlen(viewsource_url);
+    if (len > tokenlen && cstrcasecmp_with_null(viewsource_url, s, tokenlen) == 0) {
        return 1;
    }

-    if (htmlencode_startswith(viewsource_url, s, len)) {
+    tokenlen = strlen(javascript_url);
+    if (len > tokenlen && cstrcasecmp_with_null(javascript_url, s, tokenlen) == 0) {
        return 1;
    }

-    if (htmlencode_startswith(javascript_url, s, len)) {
-        return 1;
-    }
-
-    if (htmlencode_startswith(vbscript_url, s, len)) {
+    tokenlen = strlen(vbscript_url);
+    if (len > tokenlen && cstrcasecmp_with_null(vbscript_url, s, tokenlen) == 0) {
        return 1;
    }
    return 0;
 }

-int libinjection_is_xss(const char* s, size_t len, int flags)
+int libinjection_is_xss(const char* s, size_t len)
 {
    h5_state_t h5;
    attribute_t attr = TYPE_NONE;

-    libinjection_h5_init(&h5, s, len, (enum html5_flags) flags);
+    libinjection_h5_init(&h5, s, len, 0);
    while (libinjection_h5_next(&h5)) {
        if (h5.token_type != ATTR_VALUE) {
            attr = TYPE_NONE;
@@ -433,16 +258,16 @@ int libinjection_is_xss(const char* s, size_t len, int flags)
            /*
             * IE6,7,8 parsing works a bit differently so
             * a whole <script> or other black tag might be hiding
-             * inside an attribute value under HTML 5 parsing
+             * inside an attribute value under HTML5 parsing
             * See http://html5sec.org/#102
             * to avoid doing a full reparse of the value, just
             * look for "<".  This probably need adjusting to
             * handle escaped characters
             */
            /*
-              if (memchr(h5.token_start, '<', h5.token_len) != NULL) {
-              return 1;
-              }
+            if (memchr(h5.token_start, '<', h5.token_len) != NULL) {
+                return 1;
+            }
            */

            switch (attr) {
@@ -464,13 +289,13 @@ int libinjection_is_xss(const char* s, size_t len, int flags)
                }
                break;
 /*
-  default:
-  assert(0);
+            default:
+                assert(0);
 */
            }
            attr = TYPE_NONE;
        } else if (h5.token_type == TAG_COMMENT) {
-            /* IE uses a "`" as a tag ending char */
+	    /* IE uses a "`" as a tag ending char */
            if (memchr(h5.token_start, '`', h5.token_len) != NULL) {
                return 1;
            }
@@ -482,7 +307,7 @@ int libinjection_is_xss(const char* s, size_t len, int flags)
                    (h5.token_start[2] == 'f' || h5.token_start[2] == 'F')) {
                    return 1;
                }
-                if ((h5.token_start[0] == 'x' || h5.token_start[0] == 'X') &&
+                if ((h5.token_start[0] == 'x' || h5.token_start[1] == 'X') &&
                    (h5.token_start[1] == 'm' || h5.token_start[1] == 'M') &&
                    (h5.token_start[2] == 'l' || h5.token_start[2] == 'L')) {
                    return 1;
@@ -490,7 +315,7 @@ int libinjection_is_xss(const char* s, size_t len, int flags)
            }

            if (h5.token_len > 5) {
-                /*  IE <?import pseudo-tag */
+	        /*  IE <?import pseudo-tag */
                if (cstrcasecmp_with_null("IMPORT", h5.token_start, 6) == 0) {
                    return 1;
                }
@@ -504,28 +329,3 @@ int libinjection_is_xss(const char* s, size_t len, int flags)
    }
    return 0;
 }
-
-
-/*
- * wrapper
- */
-int libinjection_xss(const char* s, size_t len)
-{
-    if (libinjection_is_xss(s, len, DATA_STATE)) {
-        return 1;
-    }
-    if (libinjection_is_xss(s, len, VALUE_NO_QUOTE)) {
-        return 1;
-    }
-    if (libinjection_is_xss(s, len, VALUE_SINGLE_QUOTE)) {
-        return 1;
-    }
-    if (libinjection_is_xss(s, len, VALUE_DOUBLE_QUOTE)) {
-        return 1;
-    }
-    if (libinjection_is_xss(s, len, VALUE_BACK_QUOTE)) {
-        return 1;
-    }
-
-    return 0;
-}