From 4bc068e58d8485e94695a5a7425a38fb3e831a0b Mon Sep 17 00:00:00 2001 From: Nick Galbreath Date: Mon, 6 May 2013 22:29:23 +0900 Subject: [PATCH] Sync to 1.2.0, fix regression in small sqli detection --- apache2/libinjection/sqlparse.c | 362 +++++++++++++++++------- apache2/libinjection/sqlparse.h | 2 +- apache2/libinjection/sqlparse_private.h | 32 --- 3 files changed, 259 insertions(+), 137 deletions(-) diff --git a/apache2/libinjection/sqlparse.c b/apache2/libinjection/sqlparse.c index a8582f66..43c51343 100644 --- a/apache2/libinjection/sqlparse.c +++ b/apache2/libinjection/sqlparse.c @@ -68,7 +68,15 @@ memchr2(const char *haystack, size_t haystack_len, char c0, char c1) return NULL; } - +/** Find largest string containing certain characters. + * + * C Standard library 'strspn' only works for 'c-strings' (null terminated) + * This works on arbitrary length. + * + * Porting notes: + * if accept is 'ABC', then this function would be similar to + * a_regexp.match(a_str, '[ABC]*'), + */ size_t strlenspn(const char *s, size_t len, const char *accept) { size_t i; @@ -102,11 +110,96 @@ int cstrcasecmp(const char *a, const char *b) return ca - cb; } +/** + * Case insentive string compare. + * Here only to make code more readable + */ int streq(const char *a, const char *b) { return cstrcasecmp(a, b) == 0; } +/* + * Case-sensitive binary search. + * + */ +int bsearch_cstr(const char *key, const char *base[], size_t nmemb) +{ + int left = 0; + int right = (int) nmemb - 1; + + while (left <= right) { + int pos = (left + right) / 2; + int cmp = strcmp(base[pos], key); + if (cmp == 0) { + return TRUE; + } else if (cmp < 0) { + left = pos + 1; + } else { + right = pos - 1; + } + } + return FALSE; +} + +/* + * Case-insensitive binary search + */ +int bsearch_cstrcase(const char *key, const char *base[], size_t nmemb) +{ + int left = 0; + int right = (int) nmemb - 1; + + while (left <= right) { + int pos = (left + right) / 2; + int cmp = cstrcasecmp(base[pos], key); + if (cmp == 0) { + return TRUE; + } else if (cmp < 0) { + left = pos + 1; + } else { + right = pos - 1; + } + } + return FALSE; +} + +/** + * + * + * + * Porting Notes: + * given a mapping/hash of string to char + * this is just + * mapping[key.upper()] + */ +char bsearch_keyword_type(const char *key, const keyword_t * keywords, + size_t numb) +{ + int left = 0; + int right = (int) numb - 1; + + while (left <= right) { + int pos = (left + right) / 2; + int cmp = cstrcasecmp(keywords[pos].word, key); + if (cmp == 0) { + return keywords[pos].type; + } else if (cmp < 0) { + left = pos + 1; + } else { + right = pos - 1; + } + } + return CHAR_NULL; +} + +/* st_token methods + * + * The folow just manipulates the stoken_t type + * + * + */ + void st_clear(stoken_t * st) { st->type = CHAR_NULL; @@ -141,74 +234,11 @@ void st_copy(stoken_t * dest, const stoken_t * src) memcpy(dest, src, sizeof(stoken_t)); } -const char *bsearch_cstrcase(const char *key, const char *base[], size_t nmemb) -{ - int left = 0; - int right = (int) nmemb - 1; - - while (left <= right) { - int pos = (left + right) / 2; - int cmp = cstrcasecmp(base[pos], key); - if (cmp == 0) { - return base[pos]; - } else if (cmp < 0) { - left = pos + 1; - } else { - right = pos - 1; - } - } - return NULL; -} - -const char *bsearch_cstr(const char *key, const char *base[], size_t nmemb) -{ - int left = 0; - int right = (int) nmemb - 1; - - while (left <= right) { - int pos = (left + right) / 2; - int cmp = strcmp(base[pos], key); - if (cmp == 0) { - return base[pos]; - } else if (cmp < 0) { - left = pos + 1; - } else { - right = pos - 1; - } - } - return NULL; -} - -char bsearch_keyword_type(const char *key, const keyword_t * keywords, - size_t numb) -{ - int left = 0; - int right = (int) numb - 1; - - while (left <= right) { - int pos = (left + right) / 2; - int cmp = cstrcasecmp(keywords[pos].word, key); - if (cmp == 0) { - return keywords[pos].type; - } else if (cmp < 0) { - left = pos + 1; - } else { - right = pos - 1; - } - } - return CHAR_NULL; -} - -int is_operator2(const char *key) -{ - return bsearch_cstrcase(key, operators2, operators2_sz) != NULL; -} - int st_is_multiword_start(const stoken_t * st) { return bsearch_cstrcase(st->val, - multikeywords_start, - multikeywords_start_sz) != NULL; + multikeywords_start, + multikeywords_start_sz); } int st_is_unary_op(const stoken_t * st) @@ -236,6 +266,12 @@ int st_is_arith_op(const stoken_t * st) cstrcasecmp(st->val, "DIV"))); } +/* Parsers + * + * + */ + + size_t parse_white(sfilter * sf) { return sf->pos + 1; @@ -406,6 +442,9 @@ size_t parse_backslash(sfilter * sf) const size_t slen = sf->slen; size_t pos = sf->pos; + /* + * Weird MySQL alias for NULL, "\N" (capital N only) + */ if (pos + 1 < slen && cs[pos + 1] == 'N') { st_assign(current, '1', "NULL", 4); return pos + 2; @@ -414,6 +453,14 @@ size_t parse_backslash(sfilter * sf) } } +/** Is input a 2-char operator? + * + */ +int is_operator2(const char *key) +{ + return bsearch_cstr(key, operators2, operators2_sz); +} + size_t parse_operator2(sfilter * sf) { stoken_t *current = &sf->syntax_current; @@ -703,19 +750,44 @@ int parse_token(sfilter * sf) st_clear(current); + /* + * if we are at beginning of string + * and in single-quote or double quote mode + * then pretend the input starts with a quote + */ if (*pos == 0 && sf->delim != CHAR_NULL) { *pos = parse_string_core(s, slen, 0, current, sf->delim, 0); return TRUE; } while (*pos < slen) { + /* + * get current character + */ const int ch = (int) (s[*pos]); + + /* + * if not ascii, then continue... + * actually probably need to just assuming + * it's a string + */ if (ch < 0 || ch > 127) { *pos += 1; continue; } + + /* + * look up the parser, and call it + * + * Porting Note: this is mapping of char to function + * charparsers[ch]() + */ fnptr = char_parse_map[ch]; *pos = (*fnptr) (sf); + + /* + * + */ if (current->type != CHAR_NULL) { return TRUE; } @@ -770,6 +842,10 @@ int syntax_merge_words(stoken_t * a, stoken_t * b) } } +/* This does some simple syntax cleanup based on the token + * + * + */ int sqli_tokenize(sfilter * sf, stoken_t * sout) { stoken_t *last = &sf->syntax_last; @@ -777,6 +853,10 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout) while (parse_token(sf)) { char ttype = current->type; + + /* + * TBD: hmm forgot logic here. + */ if (ttype == 'c') { st_copy(&sf->syntax_comment, current); continue; @@ -784,7 +864,13 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout) st_clear(&sf->syntax_comment); /* - * If we don't have a saved token + * If we don't have a saved token, and we have + * a string: save it. if the next token is also a string + * then merge them. e.g. "A" "B" in SQL is actually "AB" + * a n/k/U/o type: save since next token my be merged together + * for example: "LEFT" + "JOIN" = "LEFT JOIN" + * a o/& type: TBD need to review. + * */ if (last->type == CHAR_NULL) { switch (ttype) { @@ -909,6 +995,9 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout) st_clear(last); return TRUE; } else if (sf->syntax_comment.type) { + /* + * TBD + */ st_copy(sout, &sf->syntax_comment); st_clear(&sf->syntax_comment); return TRUE; @@ -917,6 +1006,9 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout) } } +/* + * My apologies, this code is a mess + */ int filter_fold(sfilter * sf, stoken_t * sout) { stoken_t *last = &sf->fold_last; @@ -926,7 +1018,7 @@ int filter_fold(sfilter * sf, stoken_t * sout) st_copy(sout, last); sf->fold_state = 2; st_clear(last); - return TRUE; + return FALSE; } while (sqli_tokenize(sf, current)) { @@ -952,7 +1044,7 @@ int filter_fold(sfilter * sf, stoken_t * sout) st_copy(last, current); } st_copy(sout, current); - return TRUE; + return FALSE; } else if (last->type == '(' && st_is_unary_op(current)) { /* * similar to beginning of statement @@ -965,7 +1057,7 @@ int filter_fold(sfilter * sf, stoken_t * sout) * emit 1, but keep state */ st_copy(sout, current); - return TRUE; + return FALSE; } else if ((last->type == '1' || last->type == 'n') && st_is_arith_op(current)) { FOLD_DEBUG; @@ -987,7 +1079,7 @@ int filter_fold(sfilter * sf, stoken_t * sout) st_copy(sout, current); st_clear(last); } - return TRUE; + return FALSE; } else { if (last->type == 'o') { st_copy(sout, last); @@ -998,7 +1090,7 @@ int filter_fold(sfilter * sf, stoken_t * sout) st_copy(sout, current); st_clear(last); } - return TRUE; + return FALSE; } } } @@ -1007,52 +1099,54 @@ int filter_fold(sfilter * sf, stoken_t * sout) if (st_is_arith_op(last)) { st_copy(sout, last); st_clear(last); - return TRUE; + return FALSE; } else { st_clear(last); } } - return FALSE; + /* + * all done: nothing more to parse + */ + return TRUE; } +/* secondary api: detects SQLi in a string, GIVEN a context. + * + * A context can be: + * * CHAR_NULL (\0), process as is + * * CHAR_SINGLE ('), process pretending input started with a + * single quote. + * * CHAR_DOUBLE ("), process pretending input started with a + * double quote. + * + */ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen, const char delim, ptr_fingerprints_fn fn) { - int all_done = 0; int tlen = 0; + char ch; int patmatch; + int all_done; sfilter_reset(sql_state, s, slen); sql_state->delim = delim; while (tlen < MAX_TOKENS) { all_done = filter_fold(sql_state, &(sql_state->tokenvec[tlen])); - if (!all_done) { + if (all_done) { break; } sql_state->pat[tlen] = sql_state->tokenvec[tlen].type; tlen += 1; } - sql_state->pat[tlen] = CHAR_NULL; /* - * if token 5 (last) looks like a functino word (such as ABS or ASCII) - * then check token 6 to see if it's a "(". - * if NOT then, it's not a function. + * make the fingerprint pattern a c-string (null delimited) */ + sql_state->pat[tlen] = CHAR_NULL; - if (tlen == MAX_TOKENS && !all_done - && sql_state->pat[MAX_TOKENS - 1] == 'f') { - - stoken_t tmp; - all_done = filter_fold(sql_state, &tmp); - if (!all_done && tmp.type != '(') { - sql_state->reason = __LINE__; - return FALSE; - } - } /* * check for 'X' in pattern * this means parsing could not be done @@ -1066,10 +1160,22 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen, patmatch = fn(sql_state->pat); + /* + * No match. + * + * Set sql_state->reason to current line number + * only for debugging purposes. + */ if (!patmatch) { sql_state->reason = __LINE__; return FALSE; } + + /* + * We got a SQLi match + * This next part just helps reduce false positives. + * + */ switch (tlen) { case 2:{ /* @@ -1090,16 +1196,6 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen, sql_state->reason = __LINE__; return FALSE; } - /* - * detect obvious sqli scans.. many people put '--' in plain text - * so only detect if input ends with '--', e.g. 1-- but not 1-- foo - */ - - if ((strlen(sql_state->tokenvec[1].val) > 2) - && sql_state->tokenvec[1].val[0] == '-') { - sql_state->reason = __LINE__; - return FALSE; - } /** * there are some odd base64-looking query string values @@ -1107,14 +1203,46 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen, * which evaluate to "1c"... these are not SQLi * but 1234-- probably is. * Make sure the "1" in "1c" is actually a true decimal number + * + * Need to check -original- string since the folding step + * may have merged tokens, e.g. "1+FOO" is folded into "1" */ - if (sql_state->tokenvec[0].type == '1'&& sql_state->tokenvec[1].type == 'c' && - strlen(sql_state->tokenvec[0].val) != strcspn(sql_state->tokenvec[0].val, "0123456789")) { + if (sql_state->tokenvec[0].type == '1'&& sql_state->tokenvec[1].type == 'c') { + /* + * we check that next character after the number is either whitespace, + * or '/' or a '-' ==> sqli. + */ + ch = sql_state->s[strlen(sql_state->tokenvec[0].val)]; + if ( ch <= 32 ) { + /* next char was whitespace,e.g. "1234 --" + * this isn't exactly correct.. ideally we should skip over all whitespace + * but this seems to be ok for now + */ + return TRUE; + } + if (ch == '/' && sql_state->s[strlen(sql_state->tokenvec[0].val) + 1] == '*') { + return TRUE; + } + if (ch == '-' && sql_state->s[strlen(sql_state->tokenvec[0].val) + 1] == '-') { + return TRUE; + } + sql_state->reason = __LINE__; return FALSE; } - break; + + /* + * detect obvious sqli scans.. many people put '--' in plain text + * so only detect if input ends with '--', e.g. 1-- but not 1-- foo + */ + if ((strlen(sql_state->tokenvec[1].val) > 2) + && sql_state->tokenvec[1].val[0] == '-') { + sql_state->reason = __LINE__; + return FALSE; } + + break; + } /* case 2 */ case 3:{ /* * ...foo' + 'bar... @@ -1138,7 +1266,7 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen, } break; } - } /* case 3 */ + } /* case 3 */ case 5: { if (streq(sql_state->pat, "sosos")) { if (sql_state->tokenvec[0].str_open == CHAR_NULL) { @@ -1157,30 +1285,56 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen, } } /* case 5 */ } /* end switch */ + return TRUE; } +/** Main API, detects SQLi in an input. + * + * + */ int is_sqli(sfilter * sql_state, const char *s, size_t slen, ptr_fingerprints_fn fn) { + /* + * no input? not sqli + */ if (slen == 0) { return FALSE; } + /* + * test input "as-is" + */ if (is_string_sqli(sql_state, s, slen, CHAR_NULL, fn)) { return TRUE; } + /* + * if input has a single_quote, then + * test as if input was actually ' + * example: if input if "1' = 1", then pretend it's + * "'1' = 1" + * Porting Notes: example the same as doing + * is_string_sqli(sql_state, "'" + s, slen+1, NULL, fn) + * + */ if (memchr(s, CHAR_SINGLE, slen) && is_string_sqli(sql_state, s, slen, CHAR_SINGLE, fn)) { return TRUE; } + /* + * same as above but with a double-quote " + */ if (memchr(s, CHAR_DOUBLE, slen) && is_string_sqli(sql_state, s, slen, CHAR_DOUBLE, fn)) { return TRUE; } + /* + * Hurray, input is not SQLi + */ return FALSE; } diff --git a/apache2/libinjection/sqlparse.h b/apache2/libinjection/sqlparse.h index f626aaa7..838a0471 100644 --- a/apache2/libinjection/sqlparse.h +++ b/apache2/libinjection/sqlparse.h @@ -34,7 +34,7 @@ extern "C" { * See python's normalized version * http://www.python.org/dev/peps/pep-0386/#normalizedversion */ -#define LIBINJECTION_VERSION "1.1.0" +#define LIBINJECTION_VERSION "1.2.0" #define ST_MAX_SIZE 32 #define MAX_TOKENS 5 diff --git a/apache2/libinjection/sqlparse_private.h b/apache2/libinjection/sqlparse_private.h index e080a009..03f1bccc 100644 --- a/apache2/libinjection/sqlparse_private.h +++ b/apache2/libinjection/sqlparse_private.h @@ -13,36 +13,6 @@ #include "sqlparse.h" -/*** - * The stdlib function 'strspn' assumes input to null-delimited. - * This allows us to specifying and input length and allows - * embedded nulls - */ -size_t strlenspn(const char *s, size_t len, const char *accept); - -int streq(const char *a, const char *b); - -void st_clear(stoken_t * st); -void st_assign_char(stoken_t * st, const char stype, const char value); -void st_set_type(stoken_t * st, const char stype); -void st_assign(stoken_t * st, const char stype, const char *value, - size_t len); -void st_assign_cstr(stoken_t * st, const char stype, const char *value); -void st_copy(stoken_t * dest, const stoken_t * src); - -int st_equals_cstr(const stoken_t * src, const char stype, - const char *value); - -int st_is_empty(const stoken_t * st); -int st_is_arith_op(const stoken_t * st); -int st_is_unary_op(const stoken_t * st); -int st_is_english_op(const stoken_t * st); -int st_is_logical_op(const stoken_t * st); -int st_is_multiword_start(const stoken_t * st); - -const char *bsearch_cstr(const char *key, const char *base[], - size_t nmemb); - typedef struct { const char *word; char type; @@ -96,7 +66,5 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout); int filter_fold(sfilter * sf, stoken_t * sout); -int char2int(char c); -unsigned long long pat2int(const char *pat); #endif /* _SQLPARSE_PRIVATE_H */