sync with upstream: c89 support, win32 support, more detection, less false postives

This commit is contained in:
Nick Galbreath 2013-04-20 22:41:24 +09:00
parent b6b284ea6c
commit c10551dcf7
3 changed files with 255 additions and 143 deletions

View File

@ -394,6 +394,7 @@ static const char* patmap[] = {
"1k1U(", "1k1U(",
"1k1Uk", "1k1Uk",
"1k1c", "1k1c",
"1k1o1",
"1kU1,", "1kU1,",
"1kUs,", "1kUs,",
"1kUv,", "1kUv,",
@ -526,6 +527,7 @@ static const char* patmap[] = {
"Ukv,n", "Ukv,n",
"Ukv,s", "Ukv,s",
"Ukv,v", "Ukv,v",
"f((f(",
"f((k(", "f((k(",
"f((kf", "f((kf",
"f()&f", "f()&f",
@ -569,6 +571,7 @@ static const char* patmap[] = {
"f(vov", "f(vov",
"k()ok", "k()ok",
"k(1)U", "k(1)U",
"k(f(1",
"k(ok(", "k(ok(",
"k(s)U", "k(s)U",
"k(sv)", "k(sv)",
@ -587,9 +590,11 @@ static const char* patmap[] = {
"k1,vc", "k1,vc",
"k1,vk", "k1,vk",
"k1k(k", "k1k(k",
"k1kf(",
"k1o(s", "k1o(s",
"k1o(v", "k1o(v",
"k;non", "k;non",
"kf((f",
"kf(1)", "kf(1)",
"kf(1,", "kf(1,",
"kf(f(", "kf(f(",
@ -640,7 +645,11 @@ static const char* patmap[] = {
"knvvn", "knvvn",
"ko(k(", "ko(k(",
"ko(kf", "ko(kf",
"ko(n,",
"ko(s,",
"kok(k", "kok(k",
"ks&(k",
"ks&(o",
"ks)", "ks)",
"ks,1,", "ks,1,",
"ks,1c", "ks,1c",
@ -734,9 +743,13 @@ static const char* patmap[] = {
"n))&v", "n))&v",
"n)))&", "n)))&",
"n)));", "n)));",
"n)))B",
"n)))U",
"n)))k", "n)))k",
"n)))o", "n)))o",
"n));k", "n));k",
"n))B1",
"n))Uk",
"n))kk", "n))kk",
"n))o(", "n))o(",
"n))o1", "n))o1",
@ -750,6 +763,8 @@ static const char* patmap[] = {
"n);kk", "n);kk",
"n);kn", "n);kn",
"n);ko", "n);ko",
"n)B1c",
"n)Uk1",
"n)k1o", "n)k1o",
"n)kks", "n)kks",
"n)kkv", "n)kkv",
@ -790,8 +805,10 @@ static const char* patmap[] = {
"n;kn(", "n;kn(",
"n;ko(", "n;ko(",
"n;kok", "n;kok",
"nB1c",
"nUk(k", "nUk(k",
"nUk1,", "nUk1,",
"nUk1c",
"nUkf(", "nUkf(",
"nUkn,", "nUkn,",
"nUks,", "nUks,",
@ -829,6 +846,7 @@ static const char* patmap[] = {
"nof(1", "nof(1",
"nof(s", "nof(s",
"nof(v", "nof(v",
"nok(1",
"nok(f", "nok(f",
"nok(k", "nok(k",
"nok(s", "nok(s",
@ -2138,7 +2156,7 @@ static const char* patmap[] = {
"vovso", "vovso",
"vovvo", "vovvo",
}; };
static const size_t patmap_sz = 2135; static const size_t patmap_sz = 2153;
/* Simple binary search */ /* Simple binary search */
@ -2160,4 +2178,5 @@ int is_sqli_pattern(const char *key)
} }
return 0; /* FALSE */ return 0; /* FALSE */
} }
#endif #endif

View File

@ -7,7 +7,6 @@
* (setq c-default-style "k&r" * (setq c-default-style "k&r"
* c-basic-offset 4) * c-basic-offset 4)
* indent -kr -nut * indent -kr -nut
* test
*/ */
#include <string.h> #include <string.h>
@ -29,16 +28,16 @@
#define FOLD_DEBUG #define FOLD_DEBUG
#endif #endif
// order is important here /* order is important here */
#include "sqlparse_private.h" #include "sqlparse_private.h"
#include "sqlparse_data.h" #include "sqlparse_data.h"
// memmem is a linux function /* memmem is a linux function
// may not exist in Windows, and doesn't exist * may not exist in Windows, and doesn't exist
// in Mac OS X < 10.8 and FreeBSD < 6.0 * in Mac OS X < 10.8 and FreeBSD < 6.0
// Define our own. Modified to use 'const char*' * Define our own. Modified to use 'const char*'
// instead of (void *) * instead of (void *)
// */
/*- /*-
* Copyright (c) 2005 Pascal Gloor <pascal.gloor@spale.com> * Copyright (c) 2005 Pascal Gloor <pascal.gloor@spale.com>
@ -169,12 +168,6 @@ void st_assign_cstr(stoken_t * st, const char stype, const char *value)
st->val[ST_MAX_SIZE - 1] = CHAR_NULL; st->val[ST_MAX_SIZE - 1] = CHAR_NULL;
} }
int st_equals_cstr(const stoken_t * st, const char stype,
const char *value)
{
return st->type == stype && !cstrcasecmp(value, st->val);
}
void st_copy(stoken_t * dest, const stoken_t * src) void st_copy(stoken_t * dest, const stoken_t * src)
{ {
memcpy(dest, src, sizeof(stoken_t)); memcpy(dest, src, sizeof(stoken_t));
@ -355,8 +348,10 @@ size_t is_mysql_comment(const char *cs, const size_t len, size_t pos)
if (cs[pos + 2] != '!') { if (cs[pos + 2] != '!') {
return 0; return 0;
} }
// this is a mysql comment /*
// got "/*!" * this is a mysql comment
* got "/x!"
*/
if (pos + 3 >= len) { if (pos + 3 >= len) {
return 3; return 3;
} }
@ -364,7 +359,9 @@ size_t is_mysql_comment(const char *cs, const size_t len, size_t pos)
if (!isdigit(cs[pos + 3])) { if (!isdigit(cs[pos + 3])) {
return 3; return 3;
} }
// handle odd case of /*!0SELECT /*
* handle odd case of /x!0SELECT
*/
if (!isdigit(cs[pos + 4])) { if (!isdigit(cs[pos + 4])) {
return 4; return 4;
} }
@ -388,7 +385,7 @@ size_t parse_slash(sfilter * sf)
const size_t slen = sf->slen; const size_t slen = sf->slen;
size_t pos = sf->pos; size_t pos = sf->pos;
const char* cur = cs + pos; const char* cur = cs + pos;
size_t inc = 0; size_t inc;
size_t pos1 = pos + 1; size_t pos1 = pos + 1;
if (pos1 == slen || cs[pos1] != '*') { if (pos1 == slen || cs[pos1] != '*') {
@ -398,18 +395,24 @@ size_t parse_slash(sfilter * sf)
inc = is_mysql_comment(cs, slen, pos); inc = is_mysql_comment(cs, slen, pos);
if (inc == 0) { if (inc == 0) {
// skip over initial '/*' /*
* skip over initial '/x'
*/
const char *ptr = const char *ptr =
(const char *) my_memmem(cur + 2, slen - (pos + 2), "*/", 2); (const char *) my_memmem(cur + 2, slen - (pos + 2), "*/", 2);
if (ptr == NULL) { if (ptr == NULL) {
// unterminated comment /*
* unterminated comment
*/
st_assign_cstr(current, 'c', cs + pos); st_assign_cstr(current, 'c', cs + pos);
return slen; return slen;
} else { } else {
// postgresql allows nested comments which makes /*
// this is incompatible with parsing so * postgresql allows nested comments which makes
// if we find a '/*' inside the coment, then * this is incompatible with parsing so
// make a new token. * if we find a '/x' inside the coment, then
* make a new token.
*/
char ctype = 'c'; char ctype = 'c';
const size_t clen = (ptr + 2) - (cur); const size_t clen = (ptr + 2) - (cur);
if (my_memmem(cur + 2, ptr - (cur + 2), "/*", 2) != if (my_memmem(cur + 2, ptr - (cur + 2), "/*", 2) !=
@ -421,7 +424,9 @@ size_t parse_slash(sfilter * sf)
return pos + clen; return pos + clen;
} }
} else { } else {
// MySQL Comment /*
* MySQL Comment
*/
sf->in_comment = TRUE; sf->in_comment = TRUE;
st_clear(current); st_clear(current);
return pos + inc; return pos + inc;
@ -448,8 +453,8 @@ size_t parse_operator2(sfilter * sf)
stoken_t *current = &sf->syntax_current; stoken_t *current = &sf->syntax_current;
const char *cs = sf->s; const char *cs = sf->s;
const size_t slen = sf->slen; const size_t slen = sf->slen;
char op2[3];
size_t pos = sf->pos; size_t pos = sf->pos;
char op2[3];
if (pos + 1 >= slen) { if (pos + 1 >= slen) {
return parse_operator1(sf); return parse_operator1(sf);
@ -459,30 +464,37 @@ size_t parse_operator2(sfilter * sf)
op2[1] = cs[pos + 1]; op2[1] = cs[pos + 1];
op2[2] = CHAR_NULL; op2[2] = CHAR_NULL;
// Special Hack for MYSQL style comments /*
// instead of turning: * Special Hack for MYSQL style comments
// /*! FOO */ into FOO by rewriting the string, we * instead of turning:
// turn it into FOO */ and ignore the ending comment * /x! FOO x/ into FOO by rewriting the string, we
* turn it into FOO x/ and ignore the ending comment
*/
if (sf->in_comment && op2[0] == '*' && op2[1] == '/') { if (sf->in_comment && op2[0] == '*' && op2[1] == '/') {
sf->in_comment = FALSE; sf->in_comment = FALSE;
st_clear(current); st_clear(current);
return pos + 2; return pos + 2;
} else if (pos + 2 < slen && op2[0] == '<' && op2[1] == '=' } else if (pos + 2 < slen && op2[0] == '<' && op2[1] == '='
&& cs[pos + 2] == '>') { && cs[pos + 2] == '>') {
// special 3-char operator /*
* special 3-char operator
*/
st_assign_cstr(current, 'o', "<=>"); st_assign_cstr(current, 'o', "<=>");
return pos + 3; return pos + 3;
} else if (is_operator2(op2)) { } else if (is_operator2(op2)) {
if (streq(op2, "&&") || streq(op2, "||")) { if (streq(op2, "&&") || streq(op2, "||")) {
st_assign_cstr(current, '&', op2); st_assign_cstr(current, '&', op2);
} else { } else {
// normal 2 char operator /*
* normal 2 char operator
*/
st_assign_cstr(current, 'o', op2); st_assign_cstr(current, 'o', op2);
} }
return pos + 2; return pos + 2;
} else { } else {
// must be a single char operator /*
* must be a single char operator
*/
return parse_operator1(sf); return parse_operator1(sf);
} }
} }
@ -490,29 +502,41 @@ size_t parse_operator2(sfilter * sf)
size_t parse_string_core(const char *cs, const size_t len, size_t pos, size_t parse_string_core(const char *cs, const size_t len, size_t pos,
stoken_t * st, char delim, size_t offset) stoken_t * st, char delim, size_t offset)
{ {
// offset is to skip the perhaps first quote char /*
* offset is to skip the perhaps first quote char
*/
const char *qpos = const char *qpos =
(const char *) memchr((const void *) (cs + pos + offset), delim, (const char *) memchr((const void *) (cs + pos + offset), delim,
len - pos - offset); len - pos - offset);
// then keep string open/close info /*
* then keep string open/close info
*/
if (offset == 1) { if (offset == 1) {
// this is real quote /*
* this is real quote
*/
st->str_open = delim; st->str_open = delim;
} else { } else {
// this was a simulated quote /*
* this was a simulated quote
*/
st->str_open = CHAR_NULL; st->str_open = CHAR_NULL;
} }
while (TRUE) { while (TRUE) {
if (qpos == NULL) { if (qpos == NULL) {
// string ended with no trailing quote /*
// assign what we have * string ended with no trailing quote
* assign what we have
*/
st_assign_cstr(st, 's', cs + pos + offset); st_assign_cstr(st, 's', cs + pos + offset);
st->str_close = CHAR_NULL; st->str_close = CHAR_NULL;
return len; return len;
} else if (*(qpos - 1) != '\\') { } else if (*(qpos - 1) != '\\') {
// ending quote is not escaped.. copy and end /*
* ending quote is not escaped.. copy and end
*/
st_assign(st, 's', cs + pos + offset, st_assign(st, 's', cs + pos + offset,
qpos - (cs + pos + offset)); qpos - (cs + pos + offset));
st->str_close = delim; st->str_close = delim;
@ -535,7 +559,9 @@ size_t parse_string(sfilter * sf)
const size_t slen = sf->slen; const size_t slen = sf->slen;
size_t pos = sf->pos; size_t pos = sf->pos;
// assert cs[pos] == single or double quote /*
* assert cs[pos] == single or double quote
*/
return parse_string_core(cs, slen, pos, current, cs[pos], 1); return parse_string_core(cs, slen, pos, current, cs[pos], 1);
} }
@ -544,14 +570,39 @@ size_t parse_word(sfilter * sf)
stoken_t *current = &sf->syntax_current; stoken_t *current = &sf->syntax_current;
const char *cs = sf->s; const char *cs = sf->s;
size_t pos = sf->pos; size_t pos = sf->pos;
char *dot;
char ch;
size_t slen = size_t slen =
strlenspn(cs + pos, sf->slen - pos, strlenspn(cs + pos, sf->slen - pos,
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_.$"); "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$.");
st_assign(current, 'n', cs + pos, slen); st_assign(current, 'n', cs + pos, slen);
dot = strchr(current->val, '.');
if (dot != NULL) {
*dot = '\0';
ch = bsearch_keyword_type(current->val, sql_keywords,
sql_keywords_sz);
if (ch == 'k' || ch == 'o') {
/*
* we got something like "SELECT.1"
*/
current->type = ch;
return pos + strlen(current->val);
} else {
/*
* something else, put back dot
*/
*dot = '.';
}
}
/*
* do normal lookup with word including '.'
*/
if (slen < ST_MAX_SIZE) { if (slen < ST_MAX_SIZE) {
char ch = bsearch_keyword_type(current->val, sql_keywords, ch = bsearch_keyword_type(current->val, sql_keywords,
sql_keywords_sz); sql_keywords_sz);
if (ch == CHAR_NULL) { if (ch == CHAR_NULL) {
ch = 'n'; ch = 'n';
@ -567,16 +618,18 @@ size_t parse_var(sfilter * sf)
const char *cs = sf->s; const char *cs = sf->s;
const size_t slen = sf->slen; const size_t slen = sf->slen;
size_t pos = sf->pos; size_t pos = sf->pos;
size_t xlen = 0;
size_t pos1 = pos + 1; size_t pos1 = pos + 1;
size_t xlen;
// move past optional other '@' /*
* move past optional other '@'
*/
if (pos1 < slen && cs[pos1] == '@') { if (pos1 < slen && cs[pos1] == '@') {
pos1 += 1; pos1 += 1;
} }
xlen = strlenspn(cs + pos1, slen - pos1, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_.$"); xlen = strlenspn(cs + pos1, slen - pos1,
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_.$");
if (xlen == 0) { if (xlen == 0) {
st_assign(current, 'v', cs + pos, (pos1 - pos)); st_assign(current, 'v', cs + pos, (pos1 - pos));
return pos1; return pos1;
@ -592,12 +645,15 @@ size_t parse_number(sfilter * sf)
const char *cs = sf->s; const char *cs = sf->s;
const size_t slen = sf->slen; const size_t slen = sf->slen;
size_t pos = sf->pos; size_t pos = sf->pos;
size_t xlen = 0; size_t xlen;
size_t start = 0; size_t start;
if (pos + 1 < slen && cs[pos] == '0' && (cs[pos + 1] == 'X' || cs[pos + 1] == 'x')) { if (pos + 1 < slen && cs[pos] == '0' && (cs[pos + 1] == 'X' || cs[pos + 1] == 'x')) {
// TBD compare if isxdigit /*
xlen = strlenspn(cs + pos + 2, slen - pos - 2, "0123456789ABCDEFabcdef"); * TBD compare if isxdigit
*/
xlen =
strlenspn(cs + pos + 2, slen - pos - 2, "0123456789ABCDEFabcdef");
if (xlen == 0) { if (xlen == 0) {
st_assign_cstr(current, 'n', "0X"); st_assign_cstr(current, 'n', "0X");
return pos + 2; return pos + 2;
@ -606,8 +662,8 @@ size_t parse_number(sfilter * sf)
return pos + 2 + xlen; return pos + 2 + xlen;
} }
} }
start = pos;
start = pos;
while (isdigit(cs[pos])) { while (isdigit(cs[pos])) {
pos += 1; pos += 1;
} }
@ -631,10 +687,12 @@ size_t parse_number(sfilter * sf)
pos += 1; pos += 1;
} }
} else if (isalpha(cs[pos])) { } else if (isalpha(cs[pos])) {
// oh no, we have something like '6FOO' /*
// use microsoft style parsing and take just * oh no, we have something like '6FOO'
// the number part and leave the rest to be * use microsoft style parsing and take just
// parsed later * the number part and leave the rest to be
* parsed later
*/
st_assign(current, '1', cs + start, pos - start); st_assign(current, '1', cs + start, pos - start);
return pos; return pos;
} }
@ -682,9 +740,9 @@ void sfilter_reset(sfilter * sf, const char *s, size_t len)
int syntax_merge_words(stoken_t * a, stoken_t * b) int syntax_merge_words(stoken_t * a, stoken_t * b)
{ {
size_t sz1 = 0; size_t sz1;
size_t sz2 = 0; size_t sz2;
size_t sz3 = 0; size_t sz3;
char tmp[ST_MAX_SIZE]; char tmp[ST_MAX_SIZE];
char ch; char ch;
@ -697,11 +755,12 @@ int syntax_merge_words(stoken_t * a, stoken_t * b)
sz1 = strlen(a->val); sz1 = strlen(a->val);
sz2 = strlen(b->val); sz2 = strlen(b->val);
sz3 = sz1 + sz2 + 1; sz3 = sz1 + sz2 + 1;
if (sz3 >= ST_MAX_SIZE) { if (sz3 >= ST_MAX_SIZE) {
return FALSE; return FALSE;
} }
// oddly annoying last.val + ' ' + current.val /*
* oddly annoying last.val + ' ' + current.val
*/
memcpy(tmp, a->val, sz1); memcpy(tmp, a->val, sz1);
tmp[sz1] = ' '; tmp[sz1] = ' ';
memcpy(tmp + sz1 + 1, b->val, sz2); memcpy(tmp + sz1 + 1, b->val, sz2);
@ -709,7 +768,9 @@ int syntax_merge_words(stoken_t * a, stoken_t * b)
ch = bsearch_keyword_type(tmp, multikeywords, multikeywords_sz); ch = bsearch_keyword_type(tmp, multikeywords, multikeywords_sz);
if (ch != CHAR_NULL) { if (ch != CHAR_NULL) {
// -1, don't copy the null byte /*
* -1, don't copy the null byte
*/
st_assign(a, ch, tmp, sz3); st_assign(a, ch, tmp, sz3);
return TRUE; return TRUE;
} else { } else {
@ -730,13 +791,15 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
} }
st_clear(&sf->syntax_comment); st_clear(&sf->syntax_comment);
// /*
// If we don't have a saved token * If we don't have a saved token
// */
if (last->type == CHAR_NULL) { if (last->type == CHAR_NULL) {
switch (ttype) { switch (ttype) {
// items that have special needs /*
* items that have special needs
*/
case 's': case 's':
st_copy(last, current); st_copy(last, current);
continue; continue;
@ -749,28 +812,34 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
st_copy(last, current); st_copy(last, current);
continue; continue;
} else if (current->type == 'o' || current->type == '&') { } else if (current->type == 'o' || current->type == '&') {
//} else if (st_is_unary_op(current)) { /* } else if (st_is_unary_op(current)) { */
st_copy(last, current); st_copy(last, current);
continue; continue;
} else { } else {
// copy to out /*
* copy to out
*/
st_copy(sout, current); st_copy(sout, current);
return TRUE; return TRUE;
} }
default: default:
// copy to out /*
* copy to out
*/
st_copy(sout, current); st_copy(sout, current);
return TRUE; return TRUE;
} }
} }
// /*
// We have a saved token * We have a saved token
// */
switch (ttype) { switch (ttype) {
case 's': case 's':
if (last->type == 's') { if (last->type == 's') {
// "FOO" "BAR" == "FOO" (skip second string) /*
* "FOO" "BAR" == "FOO" (skip second string)
*/
continue; continue;
} else { } else {
st_copy(sout, last); st_copy(sout, last);
@ -780,18 +849,24 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
break; break;
case 'o': case 'o':
// first case to handle "IS" + "NOT" /*
* first case to handle "IS" + "NOT"
*/
if (syntax_merge_words(last, current)) { if (syntax_merge_words(last, current)) {
continue; continue;
} else if (st_is_unary_op(current) } else if (st_is_unary_op(current)
&& (last->type == 'o' || last->type == '&' && (last->type == 'o' || last->type == '&'
|| last->type == 'U')) { || last->type == 'U')) {
// if an operator is followed by a unary operator, skip it. /*
// 1, + ==> "+" is not unary, it's arithmetic * if an operator is followed by a unary operator, skip it.
// AND, + ==> "+" is unary * 1, + ==> "+" is not unary, it's arithmetic
* AND, + ==> "+" is unary
*/
continue; continue;
} else { } else {
// no match /*
* no match
*/
st_copy(sout, last); st_copy(sout, last);
st_copy(last, current); st_copy(last, current);
return TRUE; return TRUE;
@ -803,7 +878,9 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
if (syntax_merge_words(last, current)) { if (syntax_merge_words(last, current)) {
continue; continue;
} else { } else {
// total no match /*
* total no match
*/
st_copy(sout, last); st_copy(sout, last);
st_copy(last, current); st_copy(last, current);
return TRUE; return TRUE;
@ -811,15 +888,19 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
break; break;
default: default:
// fix up for ambigous "IN" /*
// handle case where IN is typically a function * fix up for ambigous "IN"
// but used in compound "IN BOOLEAN MODE" jive * handle case where IN is typically a function
* but used in compound "IN BOOLEAN MODE" jive
*/
if (last->type == 'n' && !cstrcasecmp(last->val, "IN")) { if (last->type == 'n' && !cstrcasecmp(last->val, "IN")) {
st_copy(last, current); st_copy(last, current);
st_assign_cstr(sout, 'f', "IN"); st_assign_cstr(sout, 'f', "IN");
return TRUE; return TRUE;
} else { } else {
// no match at all /*
* no match at all
*/
st_copy(sout, last); st_copy(sout, last);
st_copy(last, current); st_copy(last, current);
return TRUE; return TRUE;
@ -828,7 +909,9 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
} }
} }
// final cleanup /*
* final cleanup
*/
if (last->type) { if (last->type) {
st_copy(sout, last); st_copy(sout, last);
st_clear(last); st_clear(last);
@ -855,8 +938,10 @@ int filter_fold(sfilter * sf, stoken_t * sout)
} }
while (sqli_tokenize(sf, current)) { while (sqli_tokenize(sf, current)) {
// 0 = start of statement /*
// skip ( and unary ops * 0 = start of statement
* skip ( and unary ops
*/
if (sf->fold_state == 0) { if (sf->fold_state == 0) {
if (current->type == '(') { if (current->type == '(') {
continue; continue;
@ -877,13 +962,16 @@ int filter_fold(sfilter * sf, stoken_t * sout)
st_copy(sout, current); st_copy(sout, current);
return TRUE; return TRUE;
} else if (last->type == '(' && st_is_unary_op(current)) { } else if (last->type == '(' && st_is_unary_op(current)) {
// similar to beginning of statement /*
// an opening '(' resets state, and we should skip all * similar to beginning of statement
// unary operators * an opening '(' resets state, and we should skip all
* unary operators
*/
continue; continue;
} else if (last->type == '(' && current->type == '(') { } else if (last->type == '(' && current->type == '(') {
// if we get another '(' after another /* if we get another '(' after another
// emit 1, but keep state * emit 1, but keep state
*/
st_copy(sout, current); st_copy(sout, current);
return TRUE; return TRUE;
} else if ((last->type == '1' || last->type == 'n') } else if ((last->type == '1' || last->type == 'n')
@ -941,7 +1029,8 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
{ {
int all_done = 0; int all_done = 0;
int tlen = 0; int tlen = 0;
int patmatch = 0; int patmatch;
sfilter_reset(sql_state, s, slen); sfilter_reset(sql_state, s, slen);
sql_state->delim = delim; sql_state->delim = delim;
@ -956,9 +1045,11 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
} }
sql_state->pat[tlen] = CHAR_NULL; sql_state->pat[tlen] = CHAR_NULL;
// if token 5 (last) looks like a functino word (such as ABS or ASCII) /*
// then check token 6 to see if it's a "(". * if token 5 (last) looks like a functino word (such as ABS or ASCII)
// if NOT then, it's not a function. * then check token 6 to see if it's a "(".
* if NOT then, it's not a function.
*/
if (tlen == MAX_TOKENS && !all_done if (tlen == MAX_TOKENS && !all_done
&& sql_state->pat[MAX_TOKENS - 1] == 'f') { && sql_state->pat[MAX_TOKENS - 1] == 'f') {
@ -970,11 +1061,13 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
return FALSE; return FALSE;
} }
} }
// check for 'X' in pattern /*
// this means parsing could not be done * check for 'X' in pattern
// accurately due to pgsql's double comments * this means parsing could not be done
// or other syntax that isn't consistent * accurately due to pgsql's double comments
// should be very rare false positive * or other syntax that isn't consistent
* should be very rare false positive
*/
if (strchr(sql_state->pat, 'X')) { if (strchr(sql_state->pat, 'X')) {
return TRUE; return TRUE;
} }
@ -987,13 +1080,17 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
} }
switch (tlen) { switch (tlen) {
case 2:{ case 2:{
// if 'comment' is '#' ignore.. too many FP /*
* if 'comment' is '#' ignore.. too many FP
*/
if (sql_state->tokenvec[1].val[0] == '#') { if (sql_state->tokenvec[1].val[0] == '#') {
sql_state->reason = __LINE__; sql_state->reason = __LINE__;
return FALSE; return FALSE;
} }
// detect obvious sqli scans.. many people put '--' in plain text /*
// so only detect if input ends with '--', e.g. 1-- but not 1-- foo * detect obvious sqli scans.. many people put '--' in plain text
* so only detect if input ends with '--', e.g. 1-- but not 1-- foo
*/
if ((strlen(sql_state->tokenvec[1].val) > 2) if ((strlen(sql_state->tokenvec[1].val) > 2)
&& sql_state->tokenvec[1].val[0] == '-') { && sql_state->tokenvec[1].val[0] == '-') {
sql_state->reason = __LINE__; sql_state->reason = __LINE__;
@ -1002,25 +1099,47 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
break; break;
} }
case 3:{ case 3:{
// ...foo' + 'bar... /*
// no opening quote, no closing quote * ...foo' + 'bar...
// and each string has data * no opening quote, no closing quote
if (streq(sql_state->pat, "sos") * and each string has data
|| streq(sql_state->pat, "s&s")) { */
if (streq(sql_state->pat, "sos")
|| streq(sql_state->pat, "s&s")) {
if ((sql_state->tokenvec[0].str_open == CHAR_NULL) if ((sql_state->tokenvec[0].str_open == CHAR_NULL)
&& (sql_state->tokenvec[2].str_close == CHAR_NULL)) { && (sql_state->tokenvec[2].str_close == CHAR_NULL)) {
/*
// if ....foo" + "bar.... * if ....foo" + "bar....
*/
return TRUE; return TRUE;
} else { } else {
// not sqli /*
* not sqli
*/
sql_state->reason = __LINE__; sql_state->reason = __LINE__;
return FALSE; return FALSE;
} }
break; break;
}
} /* case 3 */
case 5: {
if (streq(sql_state->pat, "sosos")) {
if (sql_state->tokenvec[0].str_open == CHAR_NULL) {
/*
* if ....foo" + "bar....
*/
return TRUE;
} else {
/*
* not sqli
*/
sql_state->reason = __LINE__;
return FALSE;
} }
} /* case 3 */ break;
} /* end switch */ }
} /* case 5 */
} /* end switch */
return TRUE; return TRUE;
} }
@ -1048,29 +1167,3 @@ int is_sqli(sfilter * sql_state, const char *s, size_t slen,
return FALSE; return FALSE;
} }
/*
not used yet
// [('o', 228), ('k', 220), ('1', 217), (')', 157), ('(', 156), ('s', 154), ('n', 77), ('f', 73), (';', 59), (',', 35), ('v', 17), ('c', 15),
int char2int(char c)
{
const char *map = "ok1()snf;,";
const char *pos = strchr(map, c);
if (pos == NULL) {
return 15;
} else {
return (int) (pos - map) + 1;
}
}
unsigned long long pat2int(const char *pat)
{
unsigned long long val = 0;
while (*pat) {
val = (val << 4) + char2int(*pat);
pat += 1;
}
return val;
}
*/

View File

@ -57,7 +57,7 @@ typedef struct {
/* final sqli data */ /* final sqli data */
stoken_t tokenvec[MAX_TOKENS]; stoken_t tokenvec[MAX_TOKENS];
// +1 for possible ending null /* +1 for possible ending null */
char pat[MAX_TOKENS + 1]; char pat[MAX_TOKENS + 1];
char delim; char delim;
int reason; int reason;