Sync to 1.2.0, fix regression in small sqli detection

2026-01-07 16:58:31 +03:00 · 2013-05-06 22:29:23 +09:00
parent ebcfe2c187
commit 4bc068e58d
3 changed files with 259 additions and 137 deletions
--- a/apache2/libinjection/sqlparse.c
+++ b/apache2/libinjection/sqlparse.c
@@ -68,7 +68,15 @@ memchr2(const char *haystack, size_t haystack_len, char c0, char c1)
    return NULL;
 }

-
+/** Find largest string containing certain characters.
+ *
+ * C Standard library 'strspn' only works for 'c-strings' (null terminated)
+ * This works on arbitrary length.
+ *
+ * Porting notes:
+ *   if accept is 'ABC', then this function would be similar to
+ *   a_regexp.match(a_str, '[ABC]*'),
+ */
 size_t strlenspn(const char *s, size_t len, const char *accept)
 {
    size_t i;
@@ -102,11 +110,96 @@ int cstrcasecmp(const char *a, const char *b)
    return ca - cb;
 }

+/**
+ * Case insentive string compare.
+ *  Here only to make code more readable
+ */
 int streq(const char *a, const char *b)
 {
    return cstrcasecmp(a, b) == 0;
 }

+/*
+ * Case-sensitive binary search.
+ *
+ */
+int bsearch_cstr(const char *key, const char *base[], size_t nmemb)
+{
+    int left = 0;
+    int right = (int) nmemb - 1;
+
+    while (left <= right) {
+        int pos = (left + right) / 2;
+        int cmp = strcmp(base[pos], key);
+        if (cmp == 0) {
+            return TRUE;
+        } else if (cmp < 0) {
+            left = pos + 1;
+        } else {
+            right = pos - 1;
+        }
+    }
+    return FALSE;
+}
+
+/*
+ * Case-insensitive binary search
+ */
+int bsearch_cstrcase(const char *key, const char *base[], size_t nmemb)
+{
+    int left = 0;
+    int right = (int) nmemb - 1;
+
+    while (left <= right) {
+        int pos = (left + right) / 2;
+        int cmp = cstrcasecmp(base[pos], key);
+        if (cmp == 0) {
+            return TRUE;
+        } else if (cmp < 0) {
+            left = pos + 1;
+        } else {
+            right = pos - 1;
+        }
+    }
+    return FALSE;
+}
+
+/**
+ *
+ *
+ *
+ * Porting Notes:
+ *  given a mapping/hash of string to char
+ *  this is just
+ *     mapping[key.upper()]
+ */
+char bsearch_keyword_type(const char *key, const keyword_t * keywords,
+                          size_t numb)
+{
+    int left = 0;
+    int right = (int) numb - 1;
+
+    while (left <= right) {
+        int pos = (left + right) / 2;
+        int cmp = cstrcasecmp(keywords[pos].word, key);
+        if (cmp == 0) {
+            return keywords[pos].type;
+        } else if (cmp < 0) {
+            left = pos + 1;
+        } else {
+            right = pos - 1;
+        }
+    }
+    return CHAR_NULL;
+}
+
+/* st_token methods
+ *
+ * The folow just manipulates the stoken_t type
+ *
+ *
+ */
+
 void st_clear(stoken_t * st)
 {
    st->type = CHAR_NULL;
@@ -141,74 +234,11 @@ void st_copy(stoken_t * dest, const stoken_t * src)
    memcpy(dest, src, sizeof(stoken_t));
 }

-const char *bsearch_cstrcase(const char *key, const char *base[], size_t nmemb)
-{
-    int left = 0;
-    int right = (int) nmemb - 1;
-
-    while (left <= right) {
-        int pos = (left + right) / 2;
-        int cmp = cstrcasecmp(base[pos], key);
-        if (cmp == 0) {
-            return base[pos];
-        } else if (cmp < 0) {
-            left = pos + 1;
-        } else {
-            right = pos - 1;
-        }
-    }
-    return NULL;
-}
-
-const char *bsearch_cstr(const char *key, const char *base[], size_t nmemb)
-{
-    int left = 0;
-    int right = (int) nmemb - 1;
-
-    while (left <= right) {
-        int pos = (left + right) / 2;
-        int cmp = strcmp(base[pos], key);
-        if (cmp == 0) {
-            return base[pos];
-        } else if (cmp < 0) {
-            left = pos + 1;
-        } else {
-            right = pos - 1;
-        }
-    }
-    return NULL;
-}
-
-char bsearch_keyword_type(const char *key, const keyword_t * keywords,
-                          size_t numb)
-{
-    int left = 0;
-    int right = (int) numb - 1;
-
-    while (left <= right) {
-        int pos = (left + right) / 2;
-        int cmp = cstrcasecmp(keywords[pos].word, key);
-        if (cmp == 0) {
-            return keywords[pos].type;
-        } else if (cmp < 0) {
-            left = pos + 1;
-        } else {
-            right = pos - 1;
-        }
-    }
-    return CHAR_NULL;
-}
-
-int is_operator2(const char *key)
-{
-    return bsearch_cstrcase(key, operators2, operators2_sz) != NULL;
-}
-
 int st_is_multiword_start(const stoken_t * st)
 {
    return bsearch_cstrcase(st->val,
-                        multikeywords_start,
-                        multikeywords_start_sz) != NULL;
+                            multikeywords_start,
+                            multikeywords_start_sz);
 }

 int st_is_unary_op(const stoken_t * st)
@@ -236,6 +266,12 @@ int st_is_arith_op(const stoken_t * st)
                                 cstrcasecmp(st->val, "DIV")));
 }

+/* Parsers
+ *
+ *
+ */
+
+
 size_t parse_white(sfilter * sf)
 {
    return sf->pos + 1;
@@ -406,6 +442,9 @@ size_t parse_backslash(sfilter * sf)
    const size_t slen = sf->slen;
    size_t pos = sf->pos;

+    /*
+     * Weird MySQL alias for NULL, "\N" (capital N only)
+     */
    if (pos + 1 < slen && cs[pos + 1] == 'N') {
        st_assign(current, '1', "NULL", 4);
        return pos + 2;
@@ -414,6 +453,14 @@ size_t parse_backslash(sfilter * sf)
    }
 }

+/** Is input a 2-char operator?
+ *
+ */
+int is_operator2(const char *key)
+{
+    return bsearch_cstr(key, operators2, operators2_sz);
+}
+
 size_t parse_operator2(sfilter * sf)
 {
    stoken_t *current = &sf->syntax_current;
@@ -703,19 +750,44 @@ int parse_token(sfilter * sf)

    st_clear(current);

+    /*
+     * if we are at beginning of string
+     *  and in single-quote or double quote mode
+     *  then pretend the input starts with a quote
+     */
    if (*pos == 0 && sf->delim != CHAR_NULL) {
        *pos = parse_string_core(s, slen, 0, current, sf->delim, 0);
        return TRUE;
    }

    while (*pos < slen) {
+        /*
+         * get current character
+         */
        const int ch = (int) (s[*pos]);
+
+        /*
+         * if not ascii, then continue...
+         *   actually probably need to just assuming
+         *   it's a string
+         */
        if (ch < 0 || ch > 127) {
            *pos += 1;
            continue;
        }
+
+        /*
+         * look up the parser, and call it
+         *
+         * Porting Note: this is mapping of char to function
+         *   charparsers[ch]()
+         */
        fnptr = char_parse_map[ch];
        *pos = (*fnptr) (sf);
+
+        /*
+         *
+         */
        if (current->type != CHAR_NULL) {
            return TRUE;
        }
@@ -770,6 +842,10 @@ int syntax_merge_words(stoken_t * a, stoken_t * b)
    }
 }

+/* This does some simple syntax cleanup based on the token
+ *
+ *
+ */
 int sqli_tokenize(sfilter * sf, stoken_t * sout)
 {
    stoken_t *last = &sf->syntax_last;
@@ -777,6 +853,10 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)

    while (parse_token(sf)) {
        char ttype = current->type;
+
+        /*
+         * TBD: hmm forgot logic here.
+         */
        if (ttype == 'c') {
            st_copy(&sf->syntax_comment, current);
            continue;
@@ -784,7 +864,13 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
        st_clear(&sf->syntax_comment);

        /*
-         * If we don't have a saved token
+         * If we don't have a saved token, and we have
+         * a string: save it.  if the next token is also a string
+         *   then merge them.  e.g. "A" "B" in SQL is actually "AB"
+         * a n/k/U/o type: save since next token my be merged together
+         *   for example: "LEFT" + "JOIN" = "LEFT JOIN"
+         * a o/& type: TBD need to review.
+         *
         */
        if (last->type == CHAR_NULL) {
            switch (ttype) {
@@ -909,6 +995,9 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
        st_clear(last);
        return TRUE;
    } else if (sf->syntax_comment.type) {
+        /*
+         * TBD
+         */
        st_copy(sout, &sf->syntax_comment);
        st_clear(&sf->syntax_comment);
        return TRUE;
@@ -917,6 +1006,9 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
    }
 }

+/*
+ * My apologies, this code is a mess
+ */
 int filter_fold(sfilter * sf, stoken_t * sout)
 {
    stoken_t *last = &sf->fold_last;
@@ -926,7 +1018,7 @@ int filter_fold(sfilter * sf, stoken_t * sout)
        st_copy(sout, last);
        sf->fold_state = 2;
        st_clear(last);
-        return TRUE;
+        return FALSE;
    }

    while (sqli_tokenize(sf, current)) {
@@ -952,7 +1044,7 @@ int filter_fold(sfilter * sf, stoken_t * sout)
                st_copy(last, current);
            }
            st_copy(sout, current);
-            return TRUE;
+            return FALSE;
        } else if (last->type == '(' && st_is_unary_op(current)) {
            /*
             * similar to beginning of statement
@@ -965,7 +1057,7 @@ int filter_fold(sfilter * sf, stoken_t * sout)
             * emit 1, but keep state
             */
            st_copy(sout, current);
-            return TRUE;
+            return FALSE;
        } else if ((last->type == '1' || last->type == 'n')
                   && st_is_arith_op(current)) {
            FOLD_DEBUG;
@@ -987,7 +1079,7 @@ int filter_fold(sfilter * sf, stoken_t * sout)
                    st_copy(sout, current);
                    st_clear(last);
                }
-                return TRUE;
+                return FALSE;
            } else {
                if (last->type == 'o') {
                    st_copy(sout, last);
@@ -998,7 +1090,7 @@ int filter_fold(sfilter * sf, stoken_t * sout)
                    st_copy(sout, current);
                    st_clear(last);
                }
-                return TRUE;
+                return FALSE;
            }
        }
    }
@@ -1007,52 +1099,54 @@ int filter_fold(sfilter * sf, stoken_t * sout)
        if (st_is_arith_op(last)) {
            st_copy(sout, last);
            st_clear(last);
-            return TRUE;
+            return FALSE;
        } else {
            st_clear(last);
        }
    }

-    return FALSE;
+    /*
+     * all done: nothing more to parse
+     */
+    return TRUE;
 }

+/* secondary api: detects SQLi in a string, GIVEN a context.
+ *
+ * A context can be:
+ *   *  CHAR_NULL (\0), process as is
+ *   *  CHAR_SINGLE ('), process pretending input started with a
+ *          single quote.
+ *   *  CHAR_DOUBLE ("), process pretending input started with a
+ *          double quote.
+ *
+ */
 int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
                    const char delim, ptr_fingerprints_fn fn)
 {
-    int all_done = 0;
    int tlen = 0;
+    char ch;
    int patmatch;
+    int all_done;

    sfilter_reset(sql_state, s, slen);
    sql_state->delim = delim;

    while (tlen < MAX_TOKENS) {
        all_done = filter_fold(sql_state, &(sql_state->tokenvec[tlen]));
-        if (!all_done) {
+        if (all_done) {
            break;
        }

        sql_state->pat[tlen] = sql_state->tokenvec[tlen].type;
        tlen += 1;
    }
-    sql_state->pat[tlen] = CHAR_NULL;

    /*
-     * if token 5 (last) looks like a functino word (such as ABS or ASCII)
-     * then check token 6 to see if it's a "(".
-     * if NOT then, it's not a function.
+     * make the fingerprint pattern a c-string (null delimited)
     */
+    sql_state->pat[tlen] = CHAR_NULL;

-    if (tlen == MAX_TOKENS && !all_done
-        && sql_state->pat[MAX_TOKENS - 1] == 'f') {
-
-        stoken_t tmp;
-        all_done = filter_fold(sql_state, &tmp);
-        if (!all_done && tmp.type != '(') {
-            sql_state->reason = __LINE__;
-            return FALSE;
-        }
-    }
    /*
     * check for 'X' in pattern
     * this means parsing could not be done
@@ -1066,10 +1160,22 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,

    patmatch = fn(sql_state->pat);

+    /*
+     * No match.
+     *
+     * Set sql_state->reason to current line number
+     * only for debugging purposes.
+     */
    if (!patmatch) {
        sql_state->reason = __LINE__;
        return FALSE;
    }
+
+    /*
+     * We got a SQLi match
+     * This next part just helps reduce false positives.
+     *
+     */
    switch (tlen) {
    case 2:{
        /*
@@ -1090,16 +1196,6 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
                sql_state->reason = __LINE__;
                return FALSE;
        }
-        /*
-         * detect obvious sqli scans.. many people put '--' in plain text
-         * so only detect if input ends with '--', e.g. 1-- but not 1-- foo
-         */
-
-        if ((strlen(sql_state->tokenvec[1].val) > 2)
-            && sql_state->tokenvec[1].val[0] == '-') {
-            sql_state->reason = __LINE__;
-            return FALSE;
-        }

        /**
         * there are some odd base64-looking query string values
@@ -1107,14 +1203,46 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
         * which evaluate to "1c"... these are not SQLi
         * but 1234-- probably is.
         * Make sure the "1" in "1c" is actually a true decimal number
+         *
+         * Need to check -original- string since the folding step
+         * may have merged tokens, e.g. "1+FOO" is folded into "1"
         */
-        if (sql_state->tokenvec[0].type == '1'&& sql_state->tokenvec[1].type == 'c' &&
-            strlen(sql_state->tokenvec[0].val) != strcspn(sql_state->tokenvec[0].val, "0123456789")) {
+        if (sql_state->tokenvec[0].type == '1'&& sql_state->tokenvec[1].type == 'c') {
+            /*
+             * we check that next character after the number is either whitespace,
+             * or '/' or a '-' ==> sqli.
+             */
+            ch = sql_state->s[strlen(sql_state->tokenvec[0].val)];
+            if ( ch <= 32 ) {
+                /* next char was whitespace,e.g. "1234 --"
+                 * this isn't exactly correct.. ideally we should skip over all whitespace
+                 * but this seems to be ok for now
+                 */
+                return TRUE;
+            }
+            if (ch == '/' && sql_state->s[strlen(sql_state->tokenvec[0].val) + 1] == '*') {
+                return TRUE;
+            }
+            if (ch == '-' && sql_state->s[strlen(sql_state->tokenvec[0].val) + 1] == '-') {
+                return TRUE;
+            }
+
            sql_state->reason = __LINE__;
            return FALSE;
        }
-        break;
+
+        /*
+         * detect obvious sqli scans.. many people put '--' in plain text
+         * so only detect if input ends with '--', e.g. 1-- but not 1-- foo
+         */
+        if ((strlen(sql_state->tokenvec[1].val) > 2)
+            && sql_state->tokenvec[1].val[0] == '-') {
+            sql_state->reason = __LINE__;
+            return FALSE;
        }
+
+        break;
+    } /* case 2 */
    case 3:{
        /*
         * ...foo' + 'bar...
@@ -1138,7 +1266,7 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
                }
                break;
        }
-    }                       /* case 3 */
+    }  /* case 3 */
    case 5: {
        if (streq(sql_state->pat, "sosos")) {
            if (sql_state->tokenvec[0].str_open == CHAR_NULL) {
@@ -1157,30 +1285,56 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
        }
    } /* case 5 */
    } /* end switch */
+
    return TRUE;
 }

+/**  Main API, detects SQLi in an input.
+ *
+ *
+ */
 int is_sqli(sfilter * sql_state, const char *s, size_t slen,
            ptr_fingerprints_fn fn)
 {

+    /*
+     * no input? not sqli
+     */
    if (slen == 0) {
        return FALSE;
    }

+    /*
+     * test input "as-is"
+     */
    if (is_string_sqli(sql_state, s, slen, CHAR_NULL, fn)) {
        return TRUE;
    }

+    /*
+     * if input has a single_quote, then
+     * test as if input was actually '
+     * example: if input if "1' = 1", then pretend it's
+     *   "'1' = 1"
+     * Porting Notes: example the same as doing
+     *   is_string_sqli(sql_state, "'" + s, slen+1, NULL, fn)
+     *
+     */
    if (memchr(s, CHAR_SINGLE, slen)
        && is_string_sqli(sql_state, s, slen, CHAR_SINGLE, fn)) {
        return TRUE;
    }

+    /*
+     * same as above but with a double-quote "
+     */
    if (memchr(s, CHAR_DOUBLE, slen)
        && is_string_sqli(sql_state, s, slen, CHAR_DOUBLE, fn)) {
        return TRUE;
    }

+    /*
+     * Hurray, input is not SQLi
+     */
    return FALSE;
 }
--- a/apache2/libinjection/sqlparse.h
+++ b/apache2/libinjection/sqlparse.h
@@ -34,7 +34,7 @@ extern "C" {
 * See python's normalized version
 * http://www.python.org/dev/peps/pep-0386/#normalizedversion
 */
-#define LIBINJECTION_VERSION "1.1.0"
+#define LIBINJECTION_VERSION "1.2.0"

 #define ST_MAX_SIZE 32
 #define MAX_TOKENS 5
--- a/apache2/libinjection/sqlparse_private.h
+++ b/apache2/libinjection/sqlparse_private.h
@@ -13,36 +13,6 @@

 #include "sqlparse.h"

-/***
- * The stdlib function 'strspn' assumes input to null-delimited.
- * This allows us to specifying and input length and allows
- * embedded nulls
- */
-size_t strlenspn(const char *s, size_t len, const char *accept);
-
-int streq(const char *a, const char *b);
-
-void st_clear(stoken_t * st);
-void st_assign_char(stoken_t * st, const char stype, const char value);
-void st_set_type(stoken_t * st, const char stype);
-void st_assign(stoken_t * st, const char stype, const char *value,
-               size_t len);
-void st_assign_cstr(stoken_t * st, const char stype, const char *value);
-void st_copy(stoken_t * dest, const stoken_t * src);
-
-int st_equals_cstr(const stoken_t * src, const char stype,
-                    const char *value);
-
-int st_is_empty(const stoken_t * st);
-int st_is_arith_op(const stoken_t * st);
-int st_is_unary_op(const stoken_t * st);
-int st_is_english_op(const stoken_t * st);
-int st_is_logical_op(const stoken_t * st);
-int st_is_multiword_start(const stoken_t * st);
-
-const char *bsearch_cstr(const char *key, const char *base[],
-                         size_t nmemb);
-
 typedef struct {
    const char *word;
    char type;
@@ -96,7 +66,5 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout);

 int filter_fold(sfilter * sf, stoken_t * sout);

-int char2int(char c);
-unsigned long long pat2int(const char *pat);

 #endif /* _SQLPARSE_PRIVATE_H */