Updates libinjection to v3.10.0

2026-01-06 16:31:09 +03:00 · 2017-05-31 21:04:55 -03:00
parent 53571a860d
commit 9c0229ce1f
4 changed files with 476 additions and 166 deletions
--- a/4
+++ b/4
@@ -1,8 +1,8 @@
 DD MMM YYYY - 2.9.2 - To be released
 ------------------------------------

- * Updates libinjection to: bf234eb2f385b969c4f803b35fda53cffdd93922
-   [Issue #1412 - @zimmerle, @bjdijk]
+ * Updates libinjection to v3.10.0
+   [Issue #1412 - @client9, @zimmerle and @bjdijk]
 * Avoid log flood while using SecConnEngine
   [Issue #1436 - @victorhora]
 * Make url path absolute for SecHashEngine only when it is relative
--- a/apache2/libinjection/libinjection_html5.c
+++ b/apache2/libinjection/libinjection_html5.c
@@ -12,6 +12,7 @@


 #define CHAR_EOF -1
+#define CHAR_NULL 0
 #define CHAR_BANG 33
 #define CHAR_DOUBLE 34
 #define CHAR_PERCENT 37
@@ -23,6 +24,7 @@
 #define CHAR_GT 62
 #define CHAR_QUESTION 63
 #define CHAR_RIGHTB 93
+#define CHAR_TICK 96

 /* prototypes */

@@ -41,6 +43,7 @@ static int h5_state_before_attribute_name(h5_state_t* hs);
 static int h5_state_before_attribute_value(h5_state_t* hs);
 static int h5_state_attribute_value_double_quote(h5_state_t* hs);
 static int h5_state_attribute_value_single_quote(h5_state_t* hs);
+static int h5_state_attribute_value_back_quote(h5_state_t* hs);
 static int h5_state_attribute_value_no_quote(h5_state_t* hs);
 static int h5_state_after_attribute_value_quoted_state(h5_state_t* hs);
 static int h5_state_comment(h5_state_t* hs);
@@ -60,16 +63,28 @@ static int h5_state_doctype(h5_state_t* hs);
 /**
 * public function
 */
-void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, int flags)
+void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, enum html5_flags flags)
 {
    memset(hs, 0, sizeof(h5_state_t));
    hs->s = s;
    hs->len = len;
-    hs->state = h5_state_data;
-    if (flags == 0) {
+
+    switch (flags) {
+    case DATA_STATE:
        hs->state = h5_state_data;
-    } else {
-        assert(0);
+        break;
+    case VALUE_NO_QUOTE:
+        hs->state = h5_state_before_attribute_name;
+        break;
+    case VALUE_SINGLE_QUOTE:
+        hs->state = h5_state_attribute_value_single_quote;
+        break;
+    case VALUE_DOUBLE_QUOTE:
+        hs->state = h5_state_attribute_value_double_quote;
+        break;
+    case VALUE_BACK_QUOTE:
+        hs->state = h5_state_attribute_value_back_quote;
+        break;
    }
 }

@@ -85,10 +100,18 @@ int libinjection_h5_next(h5_state_t* hs)
 /**
 * Everything below here is private
 *
-*/
+ */
+

 static int h5_is_white(char ch)
 {
+    /*
+     * \t = horizontal tab = 0x09
+     * \n = newline = 0x0A
+     * \v = vertical tab = 0x0B
+     * \f = form feed = 0x0C
+     * \r = cr  = 0x0D
+     */
    return strchr(" \t\n\v\f\r", ch) != NULL;
 }

@@ -97,9 +120,17 @@ static int h5_skip_white(h5_state_t* hs)
    char ch;
    while (hs->pos < hs->len) {
        ch = hs->s[hs->pos];
-        if (ch == ' ') {
+        switch (ch) {
+        case 0x00: /* IE only */
+        case 0x20:
+        case 0x09:
+        case 0x0A:
+        case 0x0B: /* IE only */
+        case 0x0C:
+        case 0x0D: /* IE only */
            hs->pos += 1;
-        } else {
+            break;
+        default:
            return ch;
        }
    }
@@ -149,6 +180,9 @@ static int h5_state_tag_open(h5_state_t* hs)
    char ch;

    TRACE();
+    if (hs->pos >= hs->len) {
+        return 0;
+    }
    ch = hs->s[hs->pos];
    if (ch == CHAR_BANG) {
        hs->pos += 1;
@@ -167,6 +201,9 @@ static int h5_state_tag_open(h5_state_t* hs)
        return h5_state_bogus_comment2(hs);
    } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
        return h5_state_tag_name(hs);
+    } else if (ch == CHAR_NULL) {
+        /* IE-ism  NULL characters are ignored */
+        return h5_state_tag_name(hs);
    } else {
        /* user input mistake in configuring state */
        if (hs->pos == 0) {
@@ -197,7 +234,9 @@ static int h5_state_end_tag_open(h5_state_t* hs)
    } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
        return h5_state_tag_name(hs);
    }
-    return h5_state_data(hs);
+
+    hs->is_close = 0;
+    return h5_state_bogus_comment(hs);
 }
 /*
 *
@@ -231,7 +270,12 @@ static int h5_state_tag_name(h5_state_t* hs)
    pos = hs->pos;
    while (pos < hs->len) {
        ch = hs->s[pos];
-        if (h5_is_white(ch)) {
+        if (ch == 0) {
+            /* special non-standard case */
+            /* allow nulls in tag name   */
+            /* some old browsers apparently allow and ignore them */
+            pos += 1;
+        } else if (h5_is_white(ch)) {
            hs->token_start = hs->s + hs->pos;
            hs->token_len = pos - hs->pos;
            hs->token_type = TAG_NAME_OPEN;
@@ -299,7 +343,7 @@ static int h5_state_before_attribute_name(h5_state_t* hs)
    default: {
        return h5_state_attribute_name(hs);
    }
-  }
+    }
 }

 static int h5_state_attribute_name(h5_state_t* hs)
@@ -308,7 +352,7 @@ static int h5_state_attribute_name(h5_state_t* hs)
    size_t pos;

    TRACE();
-    pos = hs->pos;
+    pos = hs->pos + 1;
    while (pos < hs->len) {
        ch = hs->s[pos];
        if (h5_is_white(ch)) {
@@ -358,21 +402,19 @@ static int h5_state_attribute_name(h5_state_t* hs)
 static int h5_state_after_attribute_name(h5_state_t* hs)
 {
    int c;
-    size_t pos;

    TRACE();
-    pos = hs->pos;
    c = h5_skip_white(hs);
    switch (c) {
    case CHAR_EOF: {
        return 0;
    }
    case CHAR_SLASH: {
-        hs->pos = pos + 1;
+        hs->pos += 1;
        return h5_state_self_closing_start_tag(hs);
    }
    case CHAR_EQUALS: {
-        hs->pos = pos + 1;
+        hs->pos += 1;
        return h5_state_before_attribute_value(hs);
    }
    case CHAR_GT: {
@@ -403,6 +445,9 @@ static int h5_state_before_attribute_value(h5_state_t* hs)
        return h5_state_attribute_value_double_quote(hs);
    } else if (c == CHAR_SINGLE) {
        return h5_state_attribute_value_single_quote(hs);
+    } else if (c == CHAR_TICK) {
+        /* NON STANDARD IE */
+        return h5_state_attribute_value_back_quote(hs);
    } else {
        return h5_state_attribute_value_no_quote(hs);
    }
@@ -415,8 +460,16 @@ static int h5_state_attribute_value_quote(h5_state_t* hs, char qchar)

    TRACE();

-    /* skip quote */
-    hs->pos += 1;
+    /* skip initial quote in normal case.
+     * don't do this "if (pos == 0)" since it means we have started
+     * in a non-data state.  given an input of '><foo
+     * we want to make 0-length attribute name
+     */
+    if (hs->pos > 0) {
+        hs->pos += 1;
+    }
+
+
    idx = (const char*) memchr(hs->s + hs->pos, qchar, hs->len - hs->pos);
    if (idx == NULL) {
        hs->token_start = hs->s + hs->pos;
@@ -447,6 +500,13 @@ int h5_state_attribute_value_single_quote(h5_state_t* hs)
    return h5_state_attribute_value_quote(hs, CHAR_SINGLE);
 }

+static
+int h5_state_attribute_value_back_quote(h5_state_t* hs)
+{
+    TRACE();
+    return h5_state_attribute_value_quote(hs, CHAR_TICK);
+}
+
 static int h5_state_attribute_value_no_quote(h5_state_t* hs)
 {
    char ch;
@@ -656,10 +716,13 @@ static int h5_state_comment(h5_state_t* hs)
    char ch;
    const char* idx;
    size_t pos;
+    size_t offset;
+    const char* end = hs->s + hs->len;

    TRACE();
    pos = hs->pos;
    while (1) {
+
        idx = (const char*) memchr(hs->s + pos, CHAR_DASH, hs->len - pos);

        /* did not find anything or has less than 3 chars left */
@@ -670,21 +733,62 @@ static int h5_state_comment(h5_state_t* hs)
            hs->token_type = TAG_COMMENT;
            return 1;
        }
-        ch = *(idx + 1);
+        offset = 1;
+
+        /* skip all nulls */
+        while (idx + offset < end && *(idx + offset) == 0) {
+            offset += 1;
+        }
+        if (idx + offset == end) {
+            hs->state = h5_state_eof;
+            hs->token_start = hs->s + hs->pos;
+            hs->token_len = hs->len - hs->pos;
+            hs->token_type = TAG_COMMENT;
+            return 1;
+        }
+
+        ch = *(idx + offset);
        if (ch != CHAR_DASH && ch != CHAR_BANG) {
            pos = (size_t)(idx - hs->s) + 1;
            continue;
        }
-        ch = *(idx + 2);
+
+        /* need to test */
+#if 0
+        /* skip all nulls */
+        while (idx + offset < end && *(idx + offset) == 0) {
+            offset += 1;
+        }
+        if (idx + offset == end) {
+            hs->state = h5_state_eof;
+            hs->token_start = hs->s + hs->pos;
+            hs->token_len = hs->len - hs->pos;
+            hs->token_type = TAG_COMMENT;
+            return 1;
+        }
+#endif
+
+        offset += 1;
+        if (idx + offset == end) {
+            hs->state = h5_state_eof;
+            hs->token_start = hs->s + hs->pos;
+            hs->token_len = hs->len - hs->pos;
+            hs->token_type = TAG_COMMENT;
+            return 1;
+        }
+
+
+        ch = *(idx + offset);
        if (ch != CHAR_GT) {
            pos = (size_t)(idx - hs->s) + 1;
            continue;
        }
+        offset += 1;

        /* ends in --> or -!> */
        hs->token_start = hs->s + hs->pos;
        hs->token_len = (size_t)(idx - hs->s) - hs->pos;
-        hs->pos = (size_t)(idx - hs->s) + 3;
+        hs->pos = (size_t)(idx + offset - hs->s);
        hs->state = h5_state_data;
        hs->token_type = TAG_COMMENT;
        return 1;
--- a/apache2/libinjection/libinjection_sqli.c
+++ b/apache2/libinjection/libinjection_sqli.c
@@ -1,5 +1,5 @@
 /**
- * Copyright 2012,2013  Nick Galbreath
+ * Copyright 2012,2016  Nick Galbreath
 * nickg@client9.com
 * BSD License -- see COPYING.txt for details
 *
@@ -18,7 +18,7 @@
 #include "libinjection_sqli.h"
 #include "libinjection_sqli_data.h"

-#define LIBINJECTION_VERSION "3.9.1"
+#define LIBINJECTION_VERSION "3.9.2"

 #define LIBINJECTION_SQLI_TOKEN_SIZE  sizeof(((stoken_t*)(0))->val)
 #define LIBINJECTION_SQLI_MAX_TOKENS  5
@@ -112,15 +112,11 @@ memchr2(const char *haystack, size_t haystack_len, char c0, char c1)
    }

    while (cur < last) {
-        if (cur[0] == c0) {
-            if (cur[1] == c1) {
-                return cur;
-            } else {
-                cur += 2; /* (c0 == c1) ? 1 : 2; */
-            }
-        } else {
-            cur += 1;
+        /* safe since cur < len - 1 always */
+        if (cur[0] == c0 && cur[1] == c1) {
+            return cur;
        }
+        cur += 1;
    }

    return NULL;
@@ -191,11 +187,11 @@ static int char_is_white(char ch) {
    /* ' '  space is 0x32
       '\t  0x09 \011 horizontal tab
       '\n' 0x0a \012 new line
-       '\v' 0x0b \013 verical tab
+       '\v' 0x0b \013 vertical tab
       '\f' 0x0c \014 new page
       '\r' 0x0d \015 carriage return
            0x00 \000 null (oracle)
-            0xa0 \240 is latin1
+            0xa0 \240 is Latin-1
    */
    return strchr(" \t\n\v\f\r\240\000", ch) != NULL;
 }
@@ -294,7 +290,7 @@ static void st_clear(stoken_t * st)
 static void st_assign_char(stoken_t * st, const char stype, size_t pos, size_t len,
                           const char value)
 {
-    /* done to elimiate unused warning */
+    /* done to eliminate unused warning */
    (void)len;
    st->type = (char) stype;
    st->pos = pos;
@@ -402,7 +398,7 @@ static size_t parse_eol_comment(struct libinjection_sqli_state * sf)
    }
 }

-/** In Ansi mode, hash is an operator
+/** In ANSI mode, hash is an operator
 *  In MYSQL mode, it's a EOL comment like '--'
 */
 static size_t parse_hash(struct libinjection_sqli_state * sf)
@@ -842,7 +838,7 @@ static size_t parse_bstring(struct libinjection_sqli_state *sf)

 /*
 * hex literal string
- * re: [XX]'[0123456789abcdefABCDEF]*'
+ * re: [xX]'[0123456789abcdefABCDEF]*'
 * mysql has requirement of having EVEN number of chars,
 *  but pgsql does not
 */
@@ -1072,7 +1068,7 @@ static size_t parse_money(struct libinjection_sqli_state *sf)
            /* we have $foobar$ ... find it again */
            strend = my_memmem(cs+xlen+2, slen - (pos+xlen+2), cs + pos, xlen+2);

-            if (strend == NULL) {
+            if (strend == NULL || ((size_t)(strend - cs) < (pos+xlen+2))) {
                /* fell off edge */
                st_assign(sf->current, TYPE_STRING, pos+xlen+2, slen - pos - xlen - 2, cs+pos+xlen+2);
                sf->current->str_open = '$';
@@ -1104,7 +1100,6 @@ static size_t parse_number(struct libinjection_sqli_state * sf)
    const char *cs = sf->s;
    const size_t slen = sf->slen;
    size_t pos = sf->pos;
-    int have_dot = 0;
    int have_e = 0;
    int have_exp = 0;

@@ -1136,7 +1131,6 @@ static size_t parse_number(struct libinjection_sqli_state * sf)
    }

    if (pos < slen && cs[pos] == '.') {
-        have_dot = 1;
        pos += 1;
        while (pos < slen && ISDIGIT(cs[pos])) {
            pos += 1;
@@ -1185,7 +1179,7 @@ static size_t parse_number(struct libinjection_sqli_state * sf)
        }
    }

-    if (have_dot == 1 && have_e == 1 && have_exp == 0) {
+    if (have_e == 1 && have_exp == 0) {
        /* very special form of
         * "1234.e"
         * "10.10E"
@@ -1242,29 +1236,13 @@ int libinjection_sqli_tokenize(struct libinjection_sqli_state * sf)
        const unsigned char ch = (unsigned char) (s[*pos]);

        /*
-         * if not ascii, then continue...
-         *   actually probably need to just assuming
-         *   it's a string
+         * look up the parser, and call it
+         *
+         * Porting Note: this is mapping of char to function
+         *   charparsers[ch]()
         */
-        if (ch > 127) {
+        fnptr = char_parse_map[ch];

-            /* 160 or 0xA0 or octal 240 is "latin1 non-breaking space"
-             * but is treated as a space in mysql.
-             */
-            if (ch == 160) {
-                fnptr = parse_white;
-            } else {
-                fnptr = parse_word;
-            }
-        } else {
-            /*
-             * look up the parser, and call it
-             *
-             * Porting Note: this is mapping of char to function
-             *   charparsers[ch]()
-             */
-            fnptr = char_parse_map[ch];
-        }
        *pos = (*fnptr) (sf);

        /*
@@ -1349,16 +1327,22 @@ static int syntax_merge_words(struct libinjection_sqli_state * sf,stoken_t * a,
         a->type == TYPE_UNION ||
         a->type == TYPE_FUNCTION ||
         a->type == TYPE_EXPRESSION ||
+         a->type == TYPE_TSQL ||
         a->type == TYPE_SQLTYPE)) {
-        return CHAR_NULL;
+        return FALSE;
    }

-    if (b->type != TYPE_KEYWORD  && b->type != TYPE_BAREWORD &&
-        b->type != TYPE_OPERATOR && b->type != TYPE_SQLTYPE &&
-        b->type != TYPE_LOGIC_OPERATOR &&
-        b->type != TYPE_FUNCTION &&
-        b->type != TYPE_UNION    && b->type != TYPE_EXPRESSION) {
-        return CHAR_NULL;
+    if (!
+        (b->type == TYPE_KEYWORD ||
+         b->type == TYPE_BAREWORD ||
+         b->type == TYPE_OPERATOR ||
+         b->type == TYPE_UNION ||
+         b->type == TYPE_FUNCTION ||
+         b->type == TYPE_EXPRESSION ||
+         b->type == TYPE_TSQL ||
+         b->type == TYPE_SQLTYPE ||
+         b->type == TYPE_LOGIC_OPERATOR)) {
+        return FALSE;
    }

    sz1 = a->len;
@@ -1374,7 +1358,6 @@ static int syntax_merge_words(struct libinjection_sqli_state * sf,stoken_t * a,
    tmp[sz1] = ' ';
    memcpy(tmp + sz1 + 1, b->val, sz2);
    tmp[sz3] = CHAR_NULL;
-
    ch = sf->lookup(sf, LOOKUP_WORD, tmp, sz3);

    if (ch != CHAR_NULL) {
@@ -1450,6 +1433,13 @@ int libinjection_sqli_fold(struct libinjection_sqli_state * sf)
                    sf->tokenvec[2].type == TYPE_COMMA &&
                    sf->tokenvec[3].type == TYPE_LEFTPARENS &&
                    sf->tokenvec[4].type == TYPE_NUMBER
+                    ) ||
+                (
+                    sf->tokenvec[0].type == TYPE_BAREWORD &&
+                    sf->tokenvec[1].type == TYPE_RIGHTPARENS &&
+                    sf->tokenvec[2].type == TYPE_OPERATOR &&
+                    sf->tokenvec[3].type == TYPE_LEFTPARENS &&
+                    sf->tokenvec[4].type == TYPE_BAREWORD
                    )
                )
            {
@@ -1506,16 +1496,6 @@ int libinjection_sqli_fold(struct libinjection_sqli_state * sf)
            pos -= 1;
            sf->stats_folds += 1;
            continue;
-        } else if (sf->tokenvec[left].type == TYPE_SEMICOLON &&
-                   sf->tokenvec[left+1].type == TYPE_FUNCTION &&
-                   cstrcasecmp("IF", sf->tokenvec[left+1].val, sf->tokenvec[left+1].len) == 0) {
-            /* IF is normally a function, except in Transact-SQL where it can be used as a
-             * standalone control flow operator, e.g. ; IF 1=1 ...
-             * if found after a semicolon, convert from 'f' type to 'T' type
-             */
-            sf->tokenvec[left+1].type = TYPE_TSQL;
-            left += 2;
-            continue; /* reparse everything, but we probably can advance left, and pos */
        } else if ((sf->tokenvec[left].type == TYPE_OPERATOR ||
                    sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR) &&
                   (st_is_unary_op(&sf->tokenvec[left+1]) ||
@@ -1539,9 +1519,22 @@ int libinjection_sqli_fold(struct libinjection_sqli_state * sf)
                left -= 1;
            }
            continue;
+        } else if (sf->tokenvec[left].type == TYPE_SEMICOLON &&
+                   sf->tokenvec[left+1].type == TYPE_FUNCTION &&
+		   (sf->tokenvec[left+1].val[0] == 'I' ||
+		    sf->tokenvec[left+1].val[0] == 'i' ) &&
+		   (sf->tokenvec[left+1].val[1] == 'F' ||
+                    sf->tokenvec[left+1].val[1] == 'f' )) {
+            /* IF is normally a function, except in Transact-SQL where it can be used as a
+             * standalone control flow operator, e.g. ; IF 1=1 ...
+             * if found after a semicolon, convert from 'f' type to 'T' type
+             */
+            sf->tokenvec[left+1].type = TYPE_TSQL;
+            /* left += 2; */
+            continue; /* reparse everything, but we probably can advance left, and pos */
        } else if ((sf->tokenvec[left].type == TYPE_BAREWORD || sf->tokenvec[left].type == TYPE_VARIABLE) &&
                   sf->tokenvec[left+1].type == TYPE_LEFTPARENS && (
-                       /* TSQL functions but common enough to be collumn names */
+                       /* TSQL functions but common enough to be column names */
                       cstrcasecmp("USER_ID", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
                       cstrcasecmp("USER_NAME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||

@@ -1564,7 +1557,7 @@ int libinjection_sqli_fold(struct libinjection_sqli_state * sf)

            /* pos is the same
             * other conversions need to go here... for instance
-             * password CAN be a function, coalese CAN be a function
+             * password CAN be a function, coalesce CAN be a function
             */
            sf->tokenvec[left].type = TYPE_FUNCTION;
            continue;
@@ -1828,7 +1821,7 @@ int libinjection_sqli_fold(struct libinjection_sqli_state * sf)
             * 1,-sin(1) --> 1 (1)
             * Here, just do
             * 1,-sin(1) --> 1,sin(1)
-             * just remove unary opartor
+             * just remove unary operator
             */
            st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
            pos -= 1;
@@ -1852,9 +1845,21 @@ int libinjection_sqli_fold(struct libinjection_sqli_state * sf)
            pos -= 1;
            left = 0;
            continue;
+        } else if ((sf->tokenvec[left].type == TYPE_FUNCTION) &&
+                   (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) &&
+                   (sf->tokenvec[left+2].type != TYPE_RIGHTPARENS)) {
+            /*
+             * whats going on here
+             * Some SQL functions like USER() have 0 args
+             * if we get User(foo), then User is not a function
+             * This should be expanded since it eliminated a lot of false
+             * positives. 
+             */
+            if  (cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0) {
+                sf->tokenvec[left].type = TYPE_BAREWORD;
+            }
        }

-
        /* no folding -- assume left-most token is
           is good, now use the existing 2 tokens --
           do not get another
@@ -2019,7 +2024,7 @@ int libinjection_sqli_blacklist(struct libinjection_sqli_state* sql_state)
 }

 /*
- * return TRUE if sqli, false is benign
+ * return TRUE if SQLi, false is benign
 */
 int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state)
 {
@@ -2033,10 +2038,10 @@ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state)

    if (tlen > 1 && sql_state->fingerprint[tlen-1] == TYPE_COMMENT) {
        /*
-         * if ending comment is contains 'sp_password' then it's sqli!
+         * if ending comment is contains 'sp_password' then it's SQLi!
         * MS Audit log apparently ignores anything with
-         * 'sp_password' in it. Unable to find primary refernece to
-         * this "feature" of SQL Server but seems to be known sqli
+         * 'sp_password' in it. Unable to find primary reference to
+         * this "feature" of SQL Server but seems to be known SQLi
         * technique
         */
        if (my_memmem(sql_state->s, sql_state->slen,
@@ -2055,7 +2060,7 @@ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state)

        if (sql_state->fingerprint[1] == TYPE_UNION) {
            if (sql_state->stats_tokens == 2) {
-                /* not sure why but 1U comes up in Sqli attack
+                /* not sure why but 1U comes up in SQLi attack
                 * likely part of parameter splitting/etc.
                 * lots of reasons why "1 union" might be normal
                 * input, so beep only if other SQLi things are present
@@ -2080,7 +2085,7 @@ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state)

        /*
         * for fingerprint like 'nc', only comments of /x are treated
-         * as SQL... ending comments of "--" and "#" are not sqli
+         * as SQL... ending comments of "--" and "#" are not SQLi
         */
        if (sql_state->tokenvec[0].type == TYPE_BAREWORD &&
            sql_state->tokenvec[1].type == TYPE_COMMENT &&
@@ -2090,7 +2095,7 @@ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state)
        }

        /*
-         * if '1c' ends with '/x' then it's sqli
+         * if '1c' ends with '/x' then it's SQLi
         */
        if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
            sql_state->tokenvec[1].type == TYPE_COMMENT &&
@@ -2113,13 +2118,13 @@ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state)
        if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
            sql_state->tokenvec[1].type == TYPE_COMMENT) {
            if (sql_state->stats_tokens > 2) {
-                /* we have some folding going on, highly likely sqli */
+                /* we have some folding going on, highly likely SQLi */
                sql_state->reason = __LINE__;
                return TRUE;
            }
            /*
             * we check that next character after the number is either whitespace,
-             * or '/' or a '-' ==> sqli.
+             * or '/' or a '-' ==> SQLi.
             */
            ch = sql_state->s[sql_state->tokenvec[0].len];
            if ( ch <= 32 ) {
@@ -2141,7 +2146,7 @@ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state)
        }

        /*
-         * detect obvious sqli scans.. many people put '--' in plain text
+         * detect obvious SQLi scans.. many people put '--' in plain text
         * so only detect if input ends with '--', e.g. 1-- but not 1-- foo
         */
        if ((sql_state->tokenvec[1].len > 2)
@@ -2177,7 +2182,7 @@ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state)
                }

                /*
-                 * not sqli
+                 * not SQLi
                 */
                sql_state->reason = __LINE__;
                return FALSE;
@@ -2186,8 +2191,8 @@ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state)
                   streq(sql_state->fingerprint, "1&1") ||
                   streq(sql_state->fingerprint, "1&v") ||
                   streq(sql_state->fingerprint, "1&s")) {
-            /* 'sexy and 17' not sqli
-             * 'sexy and 17<18'  sqli
+            /* 'sexy and 17' not SQLi
+             * 'sexy and 17<18'  SQLi
             */
            if (sql_state->stats_tokens == 3) {
                sql_state->reason = __LINE__;
@@ -2243,7 +2248,7 @@ int libinjection_is_sqli(struct libinjection_sqli_state * sql_state)
    size_t slen = sql_state->slen;

    /*
-     * no input? not sqli
+     * no input? not SQLi
     */
    if (slen == 0) {
        return FALSE;
--- a/apache2/libinjection/libinjection_xss.c
+++ b/apache2/libinjection/libinjection_xss.c
@@ -1,15 +1,11 @@
+
+#include "libinjection.h"
 #include "libinjection_xss.h"
 #include "libinjection_html5.h"

 #include <assert.h>
 #include <stdio.h>

-/*
- * HEY THIS ISN'T DONE
- *  AND MISSING A KEY INGREDIENT!!
- *
- */
-
 typedef enum attribute {
    TYPE_NONE
    , TYPE_BLACK     /* ban always */
@@ -18,11 +14,128 @@ typedef enum attribute {
    , TYPE_ATTR_INDIRECT  /* attribute *name* is given in *value* */
 } attribute_t;

+
+static attribute_t is_black_attr(const char* s, size_t len);
+static int is_black_tag(const char* s, size_t len);
+static int is_black_url(const char* s, size_t len);
+static int cstrcasecmp_with_null(const char *a, const char *b, size_t n);
+static int html_decode_char_at(const char* src, size_t len, size_t* consumed);
+static int htmlencode_startswith(const char* prefix, const char *src, size_t n);
+
+
 typedef struct stringtype {
    const char* name;
    attribute_t atype;
 } stringtype_t;

+
+static const int gsHexDecodeMap[256] = {
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    0,   1,   2,   3,   4,   5,   6,   7,   8,   9, 256, 256,
+    256, 256, 256, 256, 256,  10,  11,  12,  13,  14,  15, 256,
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    256,  10,  11,  12,  13,  14,  15, 256, 256, 256, 256, 256,
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
+    256, 256, 256, 256
+};
+
+static int html_decode_char_at(const char* src, size_t len, size_t* consumed)
+{
+    int val = 0;
+    size_t i;
+    int ch;
+
+    if (len == 0 || src == NULL) {
+        *consumed = 0;
+        return -1;
+    }
+
+    *consumed = 1;
+    if (*src != '&' || len < 2) {
+        return (unsigned char)(*src);
+    }
+
+
+    if (*(src+1) != '#') {
+        /* normally this would be for named entities
+         * but for this case we don't actually care
+         */
+        return '&';
+    }
+
+    if (*(src+2) == 'x' || *(src+2) == 'X') {
+        ch = (unsigned char) (*(src+3));
+        ch = gsHexDecodeMap[ch];
+        if (ch == 256) {
+            /* degenerate case  '&#[?]' */
+            return '&';
+        }
+        val = ch;
+        i = 4;
+        while (i < len) {
+            ch = (unsigned char) src[i];
+            if (ch == ';') {
+                *consumed = i + 1;
+                return val;
+            }
+            ch = gsHexDecodeMap[ch];
+            if (ch == 256) {
+                *consumed = i;
+                return val;
+            }
+            val = (val * 16) + ch;
+            if (val > 0x1000FF) {
+                return '&';
+            }
+            ++i;
+        }
+        *consumed = i;
+        return val;
+    } else {
+        i = 2;
+        ch = (unsigned char) src[i];
+        if (ch < '0' || ch > '9') {
+            return '&';
+        }
+        val = ch - '0';
+        i += 1;
+        while (i < len) {
+            ch = (unsigned char) src[i];
+            if (ch == ';') {
+                *consumed = i + 1;
+                return val;
+            }
+            if (ch < '0' || ch > '9') {
+                *consumed = i;
+                return val;
+            }
+            val = (val * 10) + (ch - '0');
+            if (val > 0x1000FF) {
+                return '&';
+            }
+            ++i;
+        }
+        *consumed = i;
+        return val;
+    }
+}
+
+
 /*
 * view-source:
 * data:
@@ -37,7 +150,7 @@ static stringtype_t BLACKATTR[] = {
    , { "DATASRC", TYPE_BLACK }       /* IE */
    , { "DYNSRC", TYPE_ATTR_URL }     /* Obsolete img attribute */
    , { "FILTER", TYPE_STYLE }        /* Opera, SVG inline style */
-    , { "FORMACTION", TYPE_ATTR_URL } /* HTML5 */
+    , { "FORMACTION", TYPE_ATTR_URL } /* HTML 5 */
    , { "FOLDER", TYPE_ATTR_URL }     /* Only on A tags, IE-only */
    , { "FROM", TYPE_ATTR_URL }       /* SVG */
    , { "HANDLER", TYPE_ATTR_URL }    /* SVG Tiny, Opera */
@@ -53,26 +166,27 @@ static stringtype_t BLACKATTR[] = {
 };

 /* xmlns */
-/* xml-stylesheet > <eval>, <if expr=> */
+/* `xml-stylesheet` > <eval>, <if expr=> */

 /*
-static const char* BLACKATTR[] = {
-    "ATTRIBUTENAME",
-    "BACKGROUND",
-    "DATAFORMATAS",
-    "HREF",
-    "SCROLL",
-    "SRC",
-    "STYLE",
-    "SRCDOC",
-    NULL
-};
+  static const char* BLACKATTR[] = {
+  "ATTRIBUTENAME",
+  "BACKGROUND",
+  "DATAFORMATAS",
+  "HREF",
+  "SCROLL",
+  "SRC",
+  "STYLE",
+  "SRCDOC",
+  NULL
+  };
 */

 static const char* BLACKTAG[] = {
    "APPLET"
    /*    , "AUDIO" */
    , "BASE"
+    , "COMMENT"  /* IE http://html5sec.org/#38 */
    , "EMBED"
    /*   ,  "FORM" */
    , "FRAME"
@@ -92,33 +206,94 @@ static const char* BLACKTAG[] = {
    /*    , "VIDEO" */
    , "VMLFRAME"
    , "XML"
+    , "XSS"
    , NULL
 };

-static int is_black_tag(const char* s, size_t len);
-static attribute_t is_black_attr(const char* s, size_t len);
-static int is_black_url(const char* s, size_t len);
-static int cstrcasecmp_with_null(const char *a, const char *b, size_t n);

 static int cstrcasecmp_with_null(const char *a, const char *b, size_t n)
 {
+    char ca;
    char cb;
-
-    for (; n > 0; a++, b++, n--) {
-        cb = *b;
+    /* printf("Comparing to %s %.*s\n", a, (int)n, b); */
+    while (n-- > 0) {
+        cb = *b++;
        if (cb == '\0') continue;

+        ca = *a++;
+
        if (cb >= 'a' && cb <= 'z') {
            cb -= 0x20;
        }
-        if (*a != cb) {
-            return *a - cb;
-        } else if (*a == '\0') {
-            return -1;
+        /* printf("Comparing %c vs %c with %d left\n", ca, cb, (int)n); */
+        if (ca != cb) {
+            return 1;
        }
    }

-    return (*a == 0) ? 0 : 1;
+    if (*a == 0) {
+        /* printf(" MATCH \n"); */
+        return 0;
+    } else {
+        return 1;
+    }
+}
+
+/*
+ * Does an HTML encoded  binary string (const char*, length) start with
+ * a all uppercase c-string (null terminated), case insensitive!
+ *
+ * also ignore any embedded nulls in the HTML string!
+ *
+ * return 1 if match / starts with
+ * return 0 if not
+ */
+static int htmlencode_startswith(const char *a, const char *b, size_t n)
+{
+    size_t consumed;
+    int cb;
+    int first = 1;
+    /* printf("Comparing %s with %.*s\n", a,(int)n,b); */
+    while (n > 0) {
+        if (*a == 0) {
+            /* printf("Match EOL!\n"); */
+            return 1;
+        }
+        cb = html_decode_char_at(b, n, &consumed);
+        b += consumed;
+        n -= consumed;
+
+        if (first && cb <= 32) {
+            /* ignore all leading whitespace and control characters */
+            continue;
+        }
+        first = 0;
+
+        if (cb == 0) {
+            /* always ignore null characters in user input */
+            continue;
+        }
+
+        if (cb == 10) {
+            /* always ignore vertical tab characters in user input */
+            /* who allows this?? */
+            continue;
+        }
+
+        if (cb >= 'a' && cb <= 'z') {
+            /* upcase */
+            cb -= 0x20;
+        }
+
+        if (*a != (char) cb) {
+            /* printf("    %c != %c\n", *a, cb); */
+            /* mismatch */
+            return 0;
+        }
+        a++;
+    }
+
+    return (*a == 0) ? 1 : 0;
 }

 static int is_black_tag(const char* s, size_t len)
@@ -132,6 +307,7 @@ static int is_black_tag(const char* s, size_t len)
    black = BLACKTAG;
    while (*black != NULL) {
        if (cstrcasecmp_with_null(*black, s, len) == 0) {
+            /* printf("Got black tag %s\n", *black); */
            return 1;
        }
        black += 1;
@@ -141,6 +317,7 @@ static int is_black_tag(const char* s, size_t len)
    if ((s[0] == 's' || s[0] == 'S') &&
        (s[1] == 'v' || s[1] == 'V') &&
        (s[2] == 'g' || s[2] == 'G')) {
+        /*        printf("Got SVG tag \n"); */
        return 1;
    }

@@ -148,6 +325,7 @@ static int is_black_tag(const char* s, size_t len)
    if ((s[0] == 'x' || s[0] == 'X') &&
        (s[1] == 's' || s[1] == 'S') &&
        (s[2] == 'l' || s[2] == 'L')) {
+        /*      printf("Got XSL tag\n"); */
        return 1;
    }

@@ -162,15 +340,18 @@ static attribute_t is_black_attr(const char* s, size_t len)
        return TYPE_NONE;
    }

-    /* javascript on.* */
-    if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) {
-        return TYPE_BLACK;
-    }
-
-
    if (len >= 5) {
+        /* JavaScript on.* */
+        if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) {
+            /* printf("Got JavaScript on- attribute name\n"); */
+            return TYPE_BLACK;
+        }
+
+
+
        /* XMLNS can be used to create arbitrary tags */
        if (cstrcasecmp_with_null("XMLNS", s, 5) == 0 || cstrcasecmp_with_null("XLINK", s, 5) == 0) {
+            /*      printf("Got XMLNS and XLINK tags\n"); */
            return TYPE_BLACK;
        }
    }
@@ -178,6 +359,7 @@ static attribute_t is_black_attr(const char* s, size_t len)
    black = BLACKATTR;
    while (black->name != NULL) {
        if (cstrcasecmp_with_null(black->name, s, len) == 0) {
+            /*      printf("Got banned attribute name %s\n", black->name); */
            return black->atype;
        }
        black += 1;
@@ -198,49 +380,43 @@ static int is_black_url(const char* s, size_t len)
    /* covers JAVA, JAVASCRIPT, + colon */
    static const char* javascript_url = "JAVA";

-    size_t tokenlen;
-
    /* skip whitespace */
-    while (len > 0) {
+    while (len > 0 && (*s <= 32 || *s >= 127)) {
        /*
         * HEY: this is a signed character.
         *  We are intentionally skipping high-bit characters too
-         *  since they are not ascii, and Opera sometimes uses UTF8 whitespace
+         *  since they are not ASCII, and Opera sometimes uses UTF-8 whitespace.
+         *
+         * Also in EUC-JP some of the high bytes are just ignored.
         */
-        if (*s <= 32) {
-            ++s;
-            --len;
-        }
-        break;
+        ++s;
+        --len;
    }

-    tokenlen = strlen(data_url);
-    if (len > tokenlen && cstrcasecmp_with_null(data_url, s, tokenlen) == 0) {
-        return 1;
-    }
-    tokenlen = strlen(viewsource_url);
-    if (len > tokenlen && cstrcasecmp_with_null(viewsource_url, s, tokenlen) == 0) {
+    if (htmlencode_startswith(data_url, s, len)) {
        return 1;
    }

-    tokenlen = strlen(javascript_url);
-    if (len > tokenlen && cstrcasecmp_with_null(javascript_url, s, tokenlen) == 0) {
+    if (htmlencode_startswith(viewsource_url, s, len)) {
        return 1;
    }

-    tokenlen = strlen(vbscript_url);
-    if (len > tokenlen && cstrcasecmp_with_null(vbscript_url, s, tokenlen) == 0) {
+    if (htmlencode_startswith(javascript_url, s, len)) {
+        return 1;
+    }
+
+    if (htmlencode_startswith(vbscript_url, s, len)) {
        return 1;
    }
    return 0;
 }

-int libinjection_is_xss(const char* s, size_t len)
+int libinjection_is_xss(const char* s, size_t len, int flags)
 {
    h5_state_t h5;
    attribute_t attr = TYPE_NONE;

-    libinjection_h5_init(&h5, s, len, 0);
+    libinjection_h5_init(&h5, s, len, (enum html5_flags) flags);
    while (libinjection_h5_next(&h5)) {
        if (h5.token_type != ATTR_VALUE) {
            attr = TYPE_NONE;
@@ -258,16 +434,16 @@ int libinjection_is_xss(const char* s, size_t len)
            /*
             * IE6,7,8 parsing works a bit differently so
             * a whole <script> or other black tag might be hiding
-             * inside an attribute value under HTML5 parsing
+             * inside an attribute value under HTML 5 parsing
             * See http://html5sec.org/#102
             * to avoid doing a full reparse of the value, just
             * look for "<".  This probably need adjusting to
             * handle escaped characters
             */
            /*
-            if (memchr(h5.token_start, '<', h5.token_len) != NULL) {
-                return 1;
-            }
+              if (memchr(h5.token_start, '<', h5.token_len) != NULL) {
+              return 1;
+              }
            */

            switch (attr) {
@@ -289,13 +465,13 @@ int libinjection_is_xss(const char* s, size_t len)
                }
                break;
 /*
-            default:
-                assert(0);
+  default:
+  assert(0);
 */
            }
            attr = TYPE_NONE;
        } else if (h5.token_type == TAG_COMMENT) {
-	    /* IE uses a "`" as a tag ending char */
+            /* IE uses a "`" as a tag ending char */
            if (memchr(h5.token_start, '`', h5.token_len) != NULL) {
                return 1;
            }
@@ -307,7 +483,7 @@ int libinjection_is_xss(const char* s, size_t len)
                    (h5.token_start[2] == 'f' || h5.token_start[2] == 'F')) {
                    return 1;
                }
-                if ((h5.token_start[0] == 'x' || h5.token_start[1] == 'X') &&
+                if ((h5.token_start[0] == 'x' || h5.token_start[0] == 'X') &&
                    (h5.token_start[1] == 'm' || h5.token_start[1] == 'M') &&
                    (h5.token_start[2] == 'l' || h5.token_start[2] == 'L')) {
                    return 1;
@@ -315,7 +491,7 @@ int libinjection_is_xss(const char* s, size_t len)
            }

            if (h5.token_len > 5) {
-	        /*  IE <?import pseudo-tag */
+                /*  IE <?import pseudo-tag */
                if (cstrcasecmp_with_null("IMPORT", h5.token_start, 6) == 0) {
                    return 1;
                }
@@ -329,3 +505,28 @@ int libinjection_is_xss(const char* s, size_t len)
    }
    return 0;
 }
+
+
+/*
+ * wrapper
+ */
+int libinjection_xss(const char* s, size_t len)
+{
+    if (libinjection_is_xss(s, len, DATA_STATE)) {
+        return 1;
+    }
+    if (libinjection_is_xss(s, len, VALUE_NO_QUOTE)) {
+        return 1;
+    }
+    if (libinjection_is_xss(s, len, VALUE_SINGLE_QUOTE)) {
+        return 1;
+    }
+    if (libinjection_is_xss(s, len, VALUE_DOUBLE_QUOTE)) {
+        return 1;
+    }
+    if (libinjection_is_xss(s, len, VALUE_BACK_QUOTE)) {
+        return 1;
+    }
+
+    return 0;
+}