sync with upstream: c89 support, win32 support, more detection, less false postives

This commit is contained in:
Nick Galbreath 2013-04-20 22:41:24 +09:00
parent b6b284ea6c
commit c10551dcf7
3 changed files with 255 additions and 143 deletions

View File

@ -394,6 +394,7 @@ static const char* patmap[] = {
"1k1U(",
"1k1Uk",
"1k1c",
"1k1o1",
"1kU1,",
"1kUs,",
"1kUv,",
@ -526,6 +527,7 @@ static const char* patmap[] = {
"Ukv,n",
"Ukv,s",
"Ukv,v",
"f((f(",
"f((k(",
"f((kf",
"f()&f",
@ -569,6 +571,7 @@ static const char* patmap[] = {
"f(vov",
"k()ok",
"k(1)U",
"k(f(1",
"k(ok(",
"k(s)U",
"k(sv)",
@ -587,9 +590,11 @@ static const char* patmap[] = {
"k1,vc",
"k1,vk",
"k1k(k",
"k1kf(",
"k1o(s",
"k1o(v",
"k;non",
"kf((f",
"kf(1)",
"kf(1,",
"kf(f(",
@ -640,7 +645,11 @@ static const char* patmap[] = {
"knvvn",
"ko(k(",
"ko(kf",
"ko(n,",
"ko(s,",
"kok(k",
"ks&(k",
"ks&(o",
"ks)",
"ks,1,",
"ks,1c",
@ -734,9 +743,13 @@ static const char* patmap[] = {
"n))&v",
"n)))&",
"n)));",
"n)))B",
"n)))U",
"n)))k",
"n)))o",
"n));k",
"n))B1",
"n))Uk",
"n))kk",
"n))o(",
"n))o1",
@ -750,6 +763,8 @@ static const char* patmap[] = {
"n);kk",
"n);kn",
"n);ko",
"n)B1c",
"n)Uk1",
"n)k1o",
"n)kks",
"n)kkv",
@ -790,8 +805,10 @@ static const char* patmap[] = {
"n;kn(",
"n;ko(",
"n;kok",
"nB1c",
"nUk(k",
"nUk1,",
"nUk1c",
"nUkf(",
"nUkn,",
"nUks,",
@ -829,6 +846,7 @@ static const char* patmap[] = {
"nof(1",
"nof(s",
"nof(v",
"nok(1",
"nok(f",
"nok(k",
"nok(s",
@ -2138,7 +2156,7 @@ static const char* patmap[] = {
"vovso",
"vovvo",
};
static const size_t patmap_sz = 2135;
static const size_t patmap_sz = 2153;
/* Simple binary search */
@ -2160,4 +2178,5 @@ int is_sqli_pattern(const char *key)
}
return 0; /* FALSE */
}
#endif

View File

@ -7,7 +7,6 @@
* (setq c-default-style "k&r"
* c-basic-offset 4)
* indent -kr -nut
* test
*/
#include <string.h>
@ -29,16 +28,16 @@
#define FOLD_DEBUG
#endif
// order is important here
/* order is important here */
#include "sqlparse_private.h"
#include "sqlparse_data.h"
// memmem is a linux function
// may not exist in Windows, and doesn't exist
// in Mac OS X < 10.8 and FreeBSD < 6.0
// Define our own. Modified to use 'const char*'
// instead of (void *)
//
/* memmem is a linux function
* may not exist in Windows, and doesn't exist
* in Mac OS X < 10.8 and FreeBSD < 6.0
* Define our own. Modified to use 'const char*'
* instead of (void *)
*/
/*-
* Copyright (c) 2005 Pascal Gloor <pascal.gloor@spale.com>
@ -169,12 +168,6 @@ void st_assign_cstr(stoken_t * st, const char stype, const char *value)
st->val[ST_MAX_SIZE - 1] = CHAR_NULL;
}
int st_equals_cstr(const stoken_t * st, const char stype,
const char *value)
{
return st->type == stype && !cstrcasecmp(value, st->val);
}
void st_copy(stoken_t * dest, const stoken_t * src)
{
memcpy(dest, src, sizeof(stoken_t));
@ -355,8 +348,10 @@ size_t is_mysql_comment(const char *cs, const size_t len, size_t pos)
if (cs[pos + 2] != '!') {
return 0;
}
// this is a mysql comment
// got "/*!"
/*
* this is a mysql comment
* got "/x!"
*/
if (pos + 3 >= len) {
return 3;
}
@ -364,7 +359,9 @@ size_t is_mysql_comment(const char *cs, const size_t len, size_t pos)
if (!isdigit(cs[pos + 3])) {
return 3;
}
// handle odd case of /*!0SELECT
/*
* handle odd case of /x!0SELECT
*/
if (!isdigit(cs[pos + 4])) {
return 4;
}
@ -388,7 +385,7 @@ size_t parse_slash(sfilter * sf)
const size_t slen = sf->slen;
size_t pos = sf->pos;
const char* cur = cs + pos;
size_t inc = 0;
size_t inc;
size_t pos1 = pos + 1;
if (pos1 == slen || cs[pos1] != '*') {
@ -398,18 +395,24 @@ size_t parse_slash(sfilter * sf)
inc = is_mysql_comment(cs, slen, pos);
if (inc == 0) {
// skip over initial '/*'
/*
* skip over initial '/x'
*/
const char *ptr =
(const char *) my_memmem(cur + 2, slen - (pos + 2), "*/", 2);
if (ptr == NULL) {
// unterminated comment
/*
* unterminated comment
*/
st_assign_cstr(current, 'c', cs + pos);
return slen;
} else {
// postgresql allows nested comments which makes
// this is incompatible with parsing so
// if we find a '/*' inside the coment, then
// make a new token.
/*
* postgresql allows nested comments which makes
* this is incompatible with parsing so
* if we find a '/x' inside the coment, then
* make a new token.
*/
char ctype = 'c';
const size_t clen = (ptr + 2) - (cur);
if (my_memmem(cur + 2, ptr - (cur + 2), "/*", 2) !=
@ -421,7 +424,9 @@ size_t parse_slash(sfilter * sf)
return pos + clen;
}
} else {
// MySQL Comment
/*
* MySQL Comment
*/
sf->in_comment = TRUE;
st_clear(current);
return pos + inc;
@ -448,8 +453,8 @@ size_t parse_operator2(sfilter * sf)
stoken_t *current = &sf->syntax_current;
const char *cs = sf->s;
const size_t slen = sf->slen;
char op2[3];
size_t pos = sf->pos;
char op2[3];
if (pos + 1 >= slen) {
return parse_operator1(sf);
@ -459,30 +464,37 @@ size_t parse_operator2(sfilter * sf)
op2[1] = cs[pos + 1];
op2[2] = CHAR_NULL;
// Special Hack for MYSQL style comments
// instead of turning:
// /*! FOO */ into FOO by rewriting the string, we
// turn it into FOO */ and ignore the ending comment
/*
* Special Hack for MYSQL style comments
* instead of turning:
* /x! FOO x/ into FOO by rewriting the string, we
* turn it into FOO x/ and ignore the ending comment
*/
if (sf->in_comment && op2[0] == '*' && op2[1] == '/') {
sf->in_comment = FALSE;
st_clear(current);
return pos + 2;
} else if (pos + 2 < slen && op2[0] == '<' && op2[1] == '='
&& cs[pos + 2] == '>') {
// special 3-char operator
/*
* special 3-char operator
*/
st_assign_cstr(current, 'o', "<=>");
return pos + 3;
} else if (is_operator2(op2)) {
if (streq(op2, "&&") || streq(op2, "||")) {
st_assign_cstr(current, '&', op2);
} else {
// normal 2 char operator
/*
* normal 2 char operator
*/
st_assign_cstr(current, 'o', op2);
}
return pos + 2;
} else {
// must be a single char operator
/*
* must be a single char operator
*/
return parse_operator1(sf);
}
}
@ -490,29 +502,41 @@ size_t parse_operator2(sfilter * sf)
size_t parse_string_core(const char *cs, const size_t len, size_t pos,
stoken_t * st, char delim, size_t offset)
{
// offset is to skip the perhaps first quote char
/*
* offset is to skip the perhaps first quote char
*/
const char *qpos =
(const char *) memchr((const void *) (cs + pos + offset), delim,
len - pos - offset);
// then keep string open/close info
/*
* then keep string open/close info
*/
if (offset == 1) {
// this is real quote
/*
* this is real quote
*/
st->str_open = delim;
} else {
// this was a simulated quote
/*
* this was a simulated quote
*/
st->str_open = CHAR_NULL;
}
while (TRUE) {
if (qpos == NULL) {
// string ended with no trailing quote
// assign what we have
/*
* string ended with no trailing quote
* assign what we have
*/
st_assign_cstr(st, 's', cs + pos + offset);
st->str_close = CHAR_NULL;
return len;
} else if (*(qpos - 1) != '\\') {
// ending quote is not escaped.. copy and end
/*
* ending quote is not escaped.. copy and end
*/
st_assign(st, 's', cs + pos + offset,
qpos - (cs + pos + offset));
st->str_close = delim;
@ -535,7 +559,9 @@ size_t parse_string(sfilter * sf)
const size_t slen = sf->slen;
size_t pos = sf->pos;
// assert cs[pos] == single or double quote
/*
* assert cs[pos] == single or double quote
*/
return parse_string_core(cs, slen, pos, current, cs[pos], 1);
}
@ -544,14 +570,39 @@ size_t parse_word(sfilter * sf)
stoken_t *current = &sf->syntax_current;
const char *cs = sf->s;
size_t pos = sf->pos;
char *dot;
char ch;
size_t slen =
strlenspn(cs + pos, sf->slen - pos,
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_.$");
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$.");
st_assign(current, 'n', cs + pos, slen);
dot = strchr(current->val, '.');
if (dot != NULL) {
*dot = '\0';
ch = bsearch_keyword_type(current->val, sql_keywords,
sql_keywords_sz);
if (ch == 'k' || ch == 'o') {
/*
* we got something like "SELECT.1"
*/
current->type = ch;
return pos + strlen(current->val);
} else {
/*
* something else, put back dot
*/
*dot = '.';
}
}
/*
* do normal lookup with word including '.'
*/
if (slen < ST_MAX_SIZE) {
char ch = bsearch_keyword_type(current->val, sql_keywords,
ch = bsearch_keyword_type(current->val, sql_keywords,
sql_keywords_sz);
if (ch == CHAR_NULL) {
ch = 'n';
@ -567,16 +618,18 @@ size_t parse_var(sfilter * sf)
const char *cs = sf->s;
const size_t slen = sf->slen;
size_t pos = sf->pos;
size_t xlen = 0;
size_t pos1 = pos + 1;
size_t xlen;
// move past optional other '@'
/*
* move past optional other '@'
*/
if (pos1 < slen && cs[pos1] == '@') {
pos1 += 1;
}
xlen = strlenspn(cs + pos1, slen - pos1, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_.$");
xlen = strlenspn(cs + pos1, slen - pos1,
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_.$");
if (xlen == 0) {
st_assign(current, 'v', cs + pos, (pos1 - pos));
return pos1;
@ -592,12 +645,15 @@ size_t parse_number(sfilter * sf)
const char *cs = sf->s;
const size_t slen = sf->slen;
size_t pos = sf->pos;
size_t xlen = 0;
size_t start = 0;
size_t xlen;
size_t start;
if (pos + 1 < slen && cs[pos] == '0' && (cs[pos + 1] == 'X' || cs[pos + 1] == 'x')) {
// TBD compare if isxdigit
xlen = strlenspn(cs + pos + 2, slen - pos - 2, "0123456789ABCDEFabcdef");
/*
* TBD compare if isxdigit
*/
xlen =
strlenspn(cs + pos + 2, slen - pos - 2, "0123456789ABCDEFabcdef");
if (xlen == 0) {
st_assign_cstr(current, 'n', "0X");
return pos + 2;
@ -606,8 +662,8 @@ size_t parse_number(sfilter * sf)
return pos + 2 + xlen;
}
}
start = pos;
start = pos;
while (isdigit(cs[pos])) {
pos += 1;
}
@ -631,10 +687,12 @@ size_t parse_number(sfilter * sf)
pos += 1;
}
} else if (isalpha(cs[pos])) {
// oh no, we have something like '6FOO'
// use microsoft style parsing and take just
// the number part and leave the rest to be
// parsed later
/*
* oh no, we have something like '6FOO'
* use microsoft style parsing and take just
* the number part and leave the rest to be
* parsed later
*/
st_assign(current, '1', cs + start, pos - start);
return pos;
}
@ -682,9 +740,9 @@ void sfilter_reset(sfilter * sf, const char *s, size_t len)
int syntax_merge_words(stoken_t * a, stoken_t * b)
{
size_t sz1 = 0;
size_t sz2 = 0;
size_t sz3 = 0;
size_t sz1;
size_t sz2;
size_t sz3;
char tmp[ST_MAX_SIZE];
char ch;
@ -697,11 +755,12 @@ int syntax_merge_words(stoken_t * a, stoken_t * b)
sz1 = strlen(a->val);
sz2 = strlen(b->val);
sz3 = sz1 + sz2 + 1;
if (sz3 >= ST_MAX_SIZE) {
return FALSE;
}
// oddly annoying last.val + ' ' + current.val
/*
* oddly annoying last.val + ' ' + current.val
*/
memcpy(tmp, a->val, sz1);
tmp[sz1] = ' ';
memcpy(tmp + sz1 + 1, b->val, sz2);
@ -709,7 +768,9 @@ int syntax_merge_words(stoken_t * a, stoken_t * b)
ch = bsearch_keyword_type(tmp, multikeywords, multikeywords_sz);
if (ch != CHAR_NULL) {
// -1, don't copy the null byte
/*
* -1, don't copy the null byte
*/
st_assign(a, ch, tmp, sz3);
return TRUE;
} else {
@ -730,13 +791,15 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
}
st_clear(&sf->syntax_comment);
//
// If we don't have a saved token
//
/*
* If we don't have a saved token
*/
if (last->type == CHAR_NULL) {
switch (ttype) {
// items that have special needs
/*
* items that have special needs
*/
case 's':
st_copy(last, current);
continue;
@ -749,28 +812,34 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
st_copy(last, current);
continue;
} else if (current->type == 'o' || current->type == '&') {
//} else if (st_is_unary_op(current)) {
/* } else if (st_is_unary_op(current)) { */
st_copy(last, current);
continue;
} else {
// copy to out
/*
* copy to out
*/
st_copy(sout, current);
return TRUE;
}
default:
// copy to out
/*
* copy to out
*/
st_copy(sout, current);
return TRUE;
}
}
//
// We have a saved token
//
/*
* We have a saved token
*/
switch (ttype) {
case 's':
if (last->type == 's') {
// "FOO" "BAR" == "FOO" (skip second string)
/*
* "FOO" "BAR" == "FOO" (skip second string)
*/
continue;
} else {
st_copy(sout, last);
@ -780,18 +849,24 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
break;
case 'o':
// first case to handle "IS" + "NOT"
/*
* first case to handle "IS" + "NOT"
*/
if (syntax_merge_words(last, current)) {
continue;
} else if (st_is_unary_op(current)
&& (last->type == 'o' || last->type == '&'
|| last->type == 'U')) {
// if an operator is followed by a unary operator, skip it.
// 1, + ==> "+" is not unary, it's arithmetic
// AND, + ==> "+" is unary
/*
* if an operator is followed by a unary operator, skip it.
* 1, + ==> "+" is not unary, it's arithmetic
* AND, + ==> "+" is unary
*/
continue;
} else {
// no match
/*
* no match
*/
st_copy(sout, last);
st_copy(last, current);
return TRUE;
@ -803,7 +878,9 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
if (syntax_merge_words(last, current)) {
continue;
} else {
// total no match
/*
* total no match
*/
st_copy(sout, last);
st_copy(last, current);
return TRUE;
@ -811,15 +888,19 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
break;
default:
// fix up for ambigous "IN"
// handle case where IN is typically a function
// but used in compound "IN BOOLEAN MODE" jive
/*
* fix up for ambigous "IN"
* handle case where IN is typically a function
* but used in compound "IN BOOLEAN MODE" jive
*/
if (last->type == 'n' && !cstrcasecmp(last->val, "IN")) {
st_copy(last, current);
st_assign_cstr(sout, 'f', "IN");
return TRUE;
} else {
// no match at all
/*
* no match at all
*/
st_copy(sout, last);
st_copy(last, current);
return TRUE;
@ -828,7 +909,9 @@ int sqli_tokenize(sfilter * sf, stoken_t * sout)
}
}
// final cleanup
/*
* final cleanup
*/
if (last->type) {
st_copy(sout, last);
st_clear(last);
@ -855,8 +938,10 @@ int filter_fold(sfilter * sf, stoken_t * sout)
}
while (sqli_tokenize(sf, current)) {
// 0 = start of statement
// skip ( and unary ops
/*
* 0 = start of statement
* skip ( and unary ops
*/
if (sf->fold_state == 0) {
if (current->type == '(') {
continue;
@ -877,13 +962,16 @@ int filter_fold(sfilter * sf, stoken_t * sout)
st_copy(sout, current);
return TRUE;
} else if (last->type == '(' && st_is_unary_op(current)) {
// similar to beginning of statement
// an opening '(' resets state, and we should skip all
// unary operators
/*
* similar to beginning of statement
* an opening '(' resets state, and we should skip all
* unary operators
*/
continue;
} else if (last->type == '(' && current->type == '(') {
// if we get another '(' after another
// emit 1, but keep state
/* if we get another '(' after another
* emit 1, but keep state
*/
st_copy(sout, current);
return TRUE;
} else if ((last->type == '1' || last->type == 'n')
@ -941,7 +1029,8 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
{
int all_done = 0;
int tlen = 0;
int patmatch = 0;
int patmatch;
sfilter_reset(sql_state, s, slen);
sql_state->delim = delim;
@ -956,9 +1045,11 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
}
sql_state->pat[tlen] = CHAR_NULL;
// if token 5 (last) looks like a functino word (such as ABS or ASCII)
// then check token 6 to see if it's a "(".
// if NOT then, it's not a function.
/*
* if token 5 (last) looks like a functino word (such as ABS or ASCII)
* then check token 6 to see if it's a "(".
* if NOT then, it's not a function.
*/
if (tlen == MAX_TOKENS && !all_done
&& sql_state->pat[MAX_TOKENS - 1] == 'f') {
@ -970,11 +1061,13 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
return FALSE;
}
}
// check for 'X' in pattern
// this means parsing could not be done
// accurately due to pgsql's double comments
// or other syntax that isn't consistent
// should be very rare false positive
/*
* check for 'X' in pattern
* this means parsing could not be done
* accurately due to pgsql's double comments
* or other syntax that isn't consistent
* should be very rare false positive
*/
if (strchr(sql_state->pat, 'X')) {
return TRUE;
}
@ -987,13 +1080,17 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
}
switch (tlen) {
case 2:{
// if 'comment' is '#' ignore.. too many FP
/*
* if 'comment' is '#' ignore.. too many FP
*/
if (sql_state->tokenvec[1].val[0] == '#') {
sql_state->reason = __LINE__;
return FALSE;
}
// detect obvious sqli scans.. many people put '--' in plain text
// so only detect if input ends with '--', e.g. 1-- but not 1-- foo
/*
* detect obvious sqli scans.. many people put '--' in plain text
* so only detect if input ends with '--', e.g. 1-- but not 1-- foo
*/
if ((strlen(sql_state->tokenvec[1].val) > 2)
&& sql_state->tokenvec[1].val[0] == '-') {
sql_state->reason = __LINE__;
@ -1002,25 +1099,47 @@ int is_string_sqli(sfilter * sql_state, const char *s, size_t slen,
break;
}
case 3:{
// ...foo' + 'bar...
// no opening quote, no closing quote
// and each string has data
if (streq(sql_state->pat, "sos")
|| streq(sql_state->pat, "s&s")) {
/*
* ...foo' + 'bar...
* no opening quote, no closing quote
* and each string has data
*/
if (streq(sql_state->pat, "sos")
|| streq(sql_state->pat, "s&s")) {
if ((sql_state->tokenvec[0].str_open == CHAR_NULL)
&& (sql_state->tokenvec[2].str_close == CHAR_NULL)) {
// if ....foo" + "bar....
/*
* if ....foo" + "bar....
*/
return TRUE;
} else {
// not sqli
/*
* not sqli
*/
sql_state->reason = __LINE__;
return FALSE;
}
break;
}
} /* case 3 */
case 5: {
if (streq(sql_state->pat, "sosos")) {
if (sql_state->tokenvec[0].str_open == CHAR_NULL) {
/*
* if ....foo" + "bar....
*/
return TRUE;
} else {
/*
* not sqli
*/
sql_state->reason = __LINE__;
return FALSE;
}
} /* case 3 */
} /* end switch */
break;
}
} /* case 5 */
} /* end switch */
return TRUE;
}
@ -1048,29 +1167,3 @@ int is_sqli(sfilter * sql_state, const char *s, size_t slen,
return FALSE;
}
/*
not used yet
// [('o', 228), ('k', 220), ('1', 217), (')', 157), ('(', 156), ('s', 154), ('n', 77), ('f', 73), (';', 59), (',', 35), ('v', 17), ('c', 15),
int char2int(char c)
{
const char *map = "ok1()snf;,";
const char *pos = strchr(map, c);
if (pos == NULL) {
return 15;
} else {
return (int) (pos - map) + 1;
}
}
unsigned long long pat2int(const char *pat)
{
unsigned long long val = 0;
while (*pat) {
val = (val << 4) + char2int(*pat);
pat += 1;
}
return val;
}
*/

View File

@ -57,7 +57,7 @@ typedef struct {
/* final sqli data */
stoken_t tokenvec[MAX_TOKENS];
// +1 for possible ending null
/* +1 for possible ending null */
char pat[MAX_TOKENS + 1];
char delim;
int reason;