Sync to 1.2.0, fix regression in small sqli detection

This commit is contained in:
Nick Galbreath 2013-05-06 22:29:23 +09:00
parent ebcfe2c187
commit 4bc068e58d
3 changed files with 259 additions and 137 deletions

View File

@ -68,7 +68,15 @@ memchr2(const char *haystack, size_t haystack_len, char c0, char c1)
return NULL;
}
/** Find largest string containing certain characters.
*
* C Standard library 'strspn' only works for 'c-strings' (null terminated)
* This works on arbitrary length.
*
* Porting notes:
* if accept is 'ABC', then this function would be similar to
* a_regexp.match(a_str, '[ABC]*'),
*/
size_t strlenspn(const char *s, size_t len, const char *accept)
{
size_t i;
@ -102,11 +110,96 @@ int cstrcasecmp(const char *a, const char *b)
return ca - cb;
}
/**
* Case insentive string compare.
* Here only to make code more readable
*/
int streq(const char *a, const char *b)
{
return cstrcasecmp(a, b) == 0;
}
/*
* Case-sensitive binary search.
*
*/
int bsearch_cstr(const char *key, const char *base[], size_t nmemb)
{
int left = 0;
int right = (int) nmemb - 1;
while (left <= right) {
int pos = (left + right) / 2;
int cmp = strcmp(base[pos], key);
if (cmp == 0) {
return TRUE;
} else if (cmp < 0) {
left = pos + 1;
} else {
right = pos - 1;
}
}
return FALSE;
}
/*
* Case-insensitive binary search
*/
int bsearch_cstrcase(const char *key, const char *base[], size_t nmemb)
{
int left = 0;
int right = (int) nmemb - 1;
while (left <= right) {
int pos = (left + right) / 2;
int cmp = cstrcasecmp(base[pos], key);
if (cmp == 0) {
return TRUE;
} else if (cmp < 0) {
left = pos + 1;
} else {
right = pos - 1;
}
}
return FALSE;
}
/**
*
*
*
* Porting Notes:
* given a mapping/hash of string to char
* this is just
* mapping[key.upper()]
*/
char bsearch_keyword_type(const char *key, const keyword_t * keywords,
size_t numb)
{
int left = 0;
int right = (int) numb - 1;
while (left <= right) {
int pos = (left + right) / 2;
int cmp = cstrcasecmp(keywords[pos].word, key);
if (cmp == 0) {
return keywords[pos].type;
} else if (cmp < 0) {
left = pos + 1;
} else {
right = pos - 1;
}
}
return CHAR_NULL;
}
/* st_token methods
*
* The folow just manipulates the stoken_t type
*
*
*/
void st_clear(stoken_t * st)
{
st->type = CHAR_NULL;
@ -141,74 +234,11 @@ void st_copy(stoken_t * dest, const stoken_t * src)
memcpy(dest, src, sizeof(stoken_t));
}
const char *bsearch_cstrcase(const char *key, const char *base[], size_t nmemb)
{
int left = 0;
int right = (int) nmemb - 1;
while (left <= right) {
int pos = (left + right) / 2;
int cmp = cstrcasecmp(base[pos], key);
if (cmp == 0) {
return base[pos];
} else if (cmp < 0) {
left = pos + 1;
} else {
right = pos - 1;
}
}
return NULL;
}
const char *bsearch_cstr(const char *key, const char *base[], size_t nmemb)
{
int left = 0;
int right = (int) nmemb - 1;
while (left <= right) {
int pos = (left + right) / 2;
int cmp = strcmp(base[pos], key);
if (cmp == 0) {
return base[pos];
} else if (cmp < 0) {
left = pos + 1;
} else {
right = pos - 1;
}
}
return NULL;
}
char bsearch_keyword_type(const char *key, const keyword_t * keywords,
size_t numb)
{
int left = 0;
int right = (int) numb - 1;
while (left <= right) {
int pos = (left + right) / 2;
int cmp = cstrcasecmp(keywords[pos].word, key);
if (cmp == 0) {
return keywords[pos].type;
} else if (cmp < 0) {
left = pos + 1;
} else {
right = pos - 1;
}
}
return CHAR_NULL;
}
int is_operator2(const char *key)
{
return bsearch_cstrcase(key, operators2, operators2_sz) != NULL;
}
int st_is_multiword_start(const stoken_t * st)
{
return bsearch_cstrcase(st->val,
multikeywords_start,
multikeywords_start_sz) != NULL;
multikeywords_start,
multikeywords_start_sz);
}
int st_is_unary_op(const stoken_t * st)
@ -236,6 +266,12 @@ int st_is_arith_op(const stoken_t * st)
cstrcasecmp(st->val, "DIV")));
}
/* Parsers
*
*
*/
size_t parse_white(sfilter * sf)
{
return sf->pos + 1;
@ -406,6 +442,9 @@ size_t parse_backslash(sfilter * sf)
const size_t slen = sf->slen;
size_t pos = sf->pos;
/*
* Weird MySQL alias for NULL, "\N" (capital N only)
*/
if (pos + 1 < slen && cs[pos + 1] == 'N') {
st_assign(current, '1', "NULL", 4);
return pos + 2;
@ -414,6 +453,14 @@ size_t parse_backslash(sfilter * sf)
}
}
/** Is input a 2-char operator?
*
*/
int is_operator2(const char *key)
{
return bsearch_cstr(key, operators2, operators2_sz);
}
size_t parse_operator2(sfilter * sf)
{
stoken_t *current = &sf->syntax_current;
@ -703,19 +750,44 @@ int parse_token(sfilter * sf)
st_clear(current);
/*
* if we are at beginning of string
* and in single-quote or double quote mode
* then pretend the input starts with a quote
*/
if (*pos == 0 && sf->delim != CHAR_NULL) {
*pos = parse_string_core(s, slen, 0, current, sf->delim, 0);
return TRUE;
}
while (*pos < slen) {
/*
* get current character
*/
const int ch = (int) (s[*pos]);
/*
* if not ascii, then continue...
* actually probably need to just assuming
* it's a string
*/
if (ch < 0 || ch > 127) {
*pos += 1;
continue;
}
/*
* look up the parser, and call it
*
* Porting Note: this is mapping of char to function
* charparsers[ch]()
*/
fnptr = char_parse_map[ch];
*pos = (*fnptr) (sf);
/*
*
*/
if (current->type != CHAR_NULL) {
return TRUE;
}
@ -770,6 +842,10 @@ int syntax_merge_words(stoken_t * a, stoken_t * b)
}
}
/* This does some simple syntax cleanup based on the token
*
*
*/
int sqli_tokenize(sfilter * sf, stoken_t * sout)
{
stoken_t *last = &sf->syntax_last;
@ -777,6 +853,10 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
while (parse_token(sf)) {
char ttype = current->type;
/*
* TBD: hmm forgot logic here.
*/
if (ttype == 'c') {
st_copy(&sf->syntax_comment, current);
continue;
@ -784,7 +864,13 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
st_clear(&sf->syntax_comment);
/*
* If we don't have a saved token
* If we don't have a saved token, and we have
* a string: save it. if the next token is also a string
* then merge them. e.g. "A" "B" in SQL is actually "AB"
* a n/k/U/o type: save since next token my be merged together
* for example: "LEFT" + "JOIN" = "LEFT JOIN"
* a o/& type: TBD need to review.
*
*/
if (last->type == CHAR_NULL) {
switch (ttype) {
@ -909,6 +995,9 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
st_clear(last);
return TRUE;
} else if (sf->syntax_comment.type) {
/*
* TBD
*/
st_copy(sout, &sf->syntax_comment);
st_clear(&sf->syntax_comment);
return TRUE;
@ -917,6 +1006,9 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
}
}
/*
* My apologies, this code is a mess
*/
int filter_fold(sfilter * sf, stoken_t * sout)
{
stoken_t *last = &sf->fold_last;
@ -926,7 +1018,7 @@ int filter_fold(sfilter * sf, stoken_t * sout)
st_copy(sout, last);
sf->fold_state = 2;
st_clear(last);
return TRUE;
return FALSE;
}
while (sqli_tokenize(sf, current)) {
@ -952,7 +1044,7 @@ int filter_fold(sfilter * sf, stoken_t * sout)
st_copy(last, current);
}
st_copy(sout, current);
return TRUE;
return FALSE;
} else if (last->type == '(' && st_is_unary_op(current)) {
/*
* similar to beginning of statement
@ -965,7 +1057,7 @@ int filter_fold(sfilter * sf, stoken_t * sout)
* emit 1, but keep state
*/
st_copy(sout, current);
return TRUE;
return FALSE;
} else if ((last->type == '1' || last->type == 'n')
&& st_is_arith_op(current)) {
FOLD_DEBUG;
@ -987,7 +1079,7 @@ int filter_fold(sfilter * sf, stoken_t * sout)
st_copy(sout, current);
st_clear(last);
}
return TRUE;
return FALSE;
} else {
if (last->type == 'o') {
st_copy(sout, last);
@ -998,7 +1090,7 @@ int filter_fold(sfilter * sf, stoken_t * sout)
st_copy(sout, current);
st_clear(last);
}
return TRUE;
return FALSE;
}
}
}
@ -1007,52 +1099,54 @@ int filter_fold(sfilter * sf, stoken_t * sout)
if (st_is_arith_op(last)) {
st_copy(sout, last);
st_clear(last);
return TRUE;
return FALSE;
} else {
st_clear(last);
}
}
return FALSE;
/*
* all done: nothing more to parse
*/
return TRUE;
}
/* secondary api: detects SQLi in a string, GIVEN a context.
*
* A context can be:
* * CHAR_NULL (\0), process as is
* * CHAR_SINGLE ('), process pretending input started with a
* single quote.
* * CHAR_DOUBLE ("), process pretending input started with a
* double quote.
*
*/
int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
const char delim, ptr_fingerprints_fn fn)
{
int all_done = 0;
int tlen = 0;
char ch;
int patmatch;
int all_done;
sfilter_reset(sql_state, s, slen);
sql_state->delim = delim;
while (tlen < MAX_TOKENS) {
all_done = filter_fold(sql_state, &(sql_state->tokenvec[tlen]));
if (!all_done) {
if (all_done) {
break;
}
sql_state->pat[tlen] = sql_state->tokenvec[tlen].type;
tlen += 1;
}
sql_state->pat[tlen] = CHAR_NULL;
/*
* if token 5 (last) looks like a functino word (such as ABS or ASCII)
* then check token 6 to see if it's a "(".
* if NOT then, it's not a function.
* make the fingerprint pattern a c-string (null delimited)
*/
sql_state->pat[tlen] = CHAR_NULL;
if (tlen == MAX_TOKENS && !all_done
&& sql_state->pat[MAX_TOKENS - 1] == 'f') {
stoken_t tmp;
all_done = filter_fold(sql_state, &tmp);
if (!all_done && tmp.type != '(') {
sql_state->reason = __LINE__;
return FALSE;
}
}
/*
* check for 'X' in pattern
* this means parsing could not be done
@ -1066,10 +1160,22 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
patmatch = fn(sql_state->pat);
/*
* No match.
*
* Set sql_state->reason to current line number
* only for debugging purposes.
*/
if (!patmatch) {
sql_state->reason = __LINE__;
return FALSE;
}
/*
* We got a SQLi match
* This next part just helps reduce false positives.
*
*/
switch (tlen) {
case 2:{
/*
@ -1090,16 +1196,6 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
sql_state->reason = __LINE__;
return FALSE;
}
/*
* detect obvious sqli scans.. many people put '--' in plain text
* so only detect if input ends with '--', e.g. 1-- but not 1-- foo
*/
if ((strlen(sql_state->tokenvec[1].val) > 2)
&& sql_state->tokenvec[1].val[0] == '-') {
sql_state->reason = __LINE__;
return FALSE;
}
/**
* there are some odd base64-looking query string values
@ -1107,14 +1203,46 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
* which evaluate to "1c"... these are not SQLi
* but 1234-- probably is.
* Make sure the "1" in "1c" is actually a true decimal number
*
* Need to check -original- string since the folding step
* may have merged tokens, e.g. "1+FOO" is folded into "1"
*/
if (sql_state->tokenvec[0].type == '1'&& sql_state->tokenvec[1].type == 'c' &&
strlen(sql_state->tokenvec[0].val) != strcspn(sql_state->tokenvec[0].val, "0123456789")) {
if (sql_state->tokenvec[0].type == '1'&& sql_state->tokenvec[1].type == 'c') {
/*
* we check that next character after the number is either whitespace,
* or '/' or a '-' ==> sqli.
*/
ch = sql_state->s[strlen(sql_state->tokenvec[0].val)];
if ( ch <= 32 ) {
/* next char was whitespace,e.g. "1234 --"
* this isn't exactly correct.. ideally we should skip over all whitespace
* but this seems to be ok for now
*/
return TRUE;
}
if (ch == '/' && sql_state->s[strlen(sql_state->tokenvec[0].val) + 1] == '*') {
return TRUE;
}
if (ch == '-' && sql_state->s[strlen(sql_state->tokenvec[0].val) + 1] == '-') {
return TRUE;
}
sql_state->reason = __LINE__;
return FALSE;
}
break;
/*
* detect obvious sqli scans.. many people put '--' in plain text
* so only detect if input ends with '--', e.g. 1-- but not 1-- foo
*/
if ((strlen(sql_state->tokenvec[1].val) > 2)
&& sql_state->tokenvec[1].val[0] == '-') {
sql_state->reason = __LINE__;
return FALSE;
}
break;
} /* case 2 */
case 3:{
/*
* ...foo' + 'bar...
@ -1138,7 +1266,7 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
}
break;
}
} /* case 3 */
} /* case 3 */
case 5: {
if (streq(sql_state->pat, "sosos")) {
if (sql_state->tokenvec[0].str_open == CHAR_NULL) {
@ -1157,30 +1285,56 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
}
} /* case 5 */
} /* end switch */
return TRUE;
}
/** Main API, detects SQLi in an input.
*
*
*/
int is_sqli(sfilter * sql_state, const char *s, size_t slen,
ptr_fingerprints_fn fn)
{
/*
* no input? not sqli
*/
if (slen == 0) {
return FALSE;
}
/*
* test input "as-is"
*/
if (is_string_sqli(sql_state, s, slen, CHAR_NULL, fn)) {
return TRUE;
}
/*
* if input has a single_quote, then
* test as if input was actually '
* example: if input if "1' = 1", then pretend it's
* "'1' = 1"
* Porting Notes: example the same as doing
* is_string_sqli(sql_state, "'" + s, slen+1, NULL, fn)
*
*/
if (memchr(s, CHAR_SINGLE, slen)
&& is_string_sqli(sql_state, s, slen, CHAR_SINGLE, fn)) {
return TRUE;
}
/*
* same as above but with a double-quote "
*/
if (memchr(s, CHAR_DOUBLE, slen)
&& is_string_sqli(sql_state, s, slen, CHAR_DOUBLE, fn)) {
return TRUE;
}
/*
* Hurray, input is not SQLi
*/
return FALSE;
}

View File

@ -34,7 +34,7 @@ extern "C" {
* See python's normalized version
* http://www.python.org/dev/peps/pep-0386/#normalizedversion
*/
#define LIBINJECTION_VERSION "1.1.0"
#define LIBINJECTION_VERSION "1.2.0"
#define ST_MAX_SIZE 32
#define MAX_TOKENS 5

View File

@ -13,36 +13,6 @@
#include "sqlparse.h"
/***
* The stdlib function 'strspn' assumes input to null-delimited.
* This allows us to specifying and input length and allows
* embedded nulls
*/
size_t strlenspn(const char *s, size_t len, const char *accept);
int streq(const char *a, const char *b);
void st_clear(stoken_t * st);
void st_assign_char(stoken_t * st, const char stype, const char value);
void st_set_type(stoken_t * st, const char stype);
void st_assign(stoken_t * st, const char stype, const char *value,
size_t len);
void st_assign_cstr(stoken_t * st, const char stype, const char *value);
void st_copy(stoken_t * dest, const stoken_t * src);
int st_equals_cstr(const stoken_t * src, const char stype,
const char *value);
int st_is_empty(const stoken_t * st);
int st_is_arith_op(const stoken_t * st);
int st_is_unary_op(const stoken_t * st);
int st_is_english_op(const stoken_t * st);
int st_is_logical_op(const stoken_t * st);
int st_is_multiword_start(const stoken_t * st);
const char *bsearch_cstr(const char *key, const char *base[],
size_t nmemb);
typedef struct {
const char *word;
char type;
@ -96,7 +66,5 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout);
int filter_fold(sfilter * sf, stoken_t * sout);
int char2int(char c);
unsigned long long pat2int(const char *pat);
#endif /* _SQLPARSE_PRIVATE_H */