mirror of
https://github.com/owasp-modsecurity/ModSecurity.git
synced 2025-08-15 23:55:03 +03:00
1446 lines
37 KiB
C
1446 lines
37 KiB
C
/**
|
|
* Copyright 2012,2013 Nick Galbreath
|
|
* nickg@client9.com
|
|
* BSD License -- see COPYING.txt for details
|
|
*
|
|
* (setq-default indent-tabs-mode nil)
|
|
* (setq c-default-style "k&r"
|
|
* c-basic-offset 4)
|
|
* indent -kr -nut
|
|
*/
|
|
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <ctype.h>
|
|
#include <assert.h>
|
|
|
|
#ifndef TRUE
|
|
#define TRUE 1
|
|
#endif
|
|
#ifndef FALSE
|
|
#define FALSE 0
|
|
#endif
|
|
|
|
#if 0
|
|
#define FOLD_DEBUG printf("%d: Fold state = %d, current=%c, last=%c\n", __LINE__, sf->fold_state, current->type, last->type == CHAR_NULL ? '~': last->type)
|
|
#else
|
|
#define FOLD_DEBUG
|
|
#endif
|
|
|
|
#include "libinjection_sqli_data.h"
|
|
|
|
/* memchr2 finds a string of 2 characters inside another string
|
|
* This a specialized version of "memmem" or "memchr".
|
|
* 'memmem' doesn't exist on all platforms
|
|
*
|
|
* Porting notes: this is just a special version of
|
|
* astring.find("AB")
|
|
*
|
|
*/
|
|
static const char *
|
|
memchr2(const char *haystack, size_t haystack_len, char c0, char c1)
|
|
{
|
|
const char *cur = haystack;
|
|
const char *last = haystack + haystack_len - 1;
|
|
|
|
if (haystack_len < 2) {
|
|
return NULL;
|
|
}
|
|
if (c0 == c1) {
|
|
return NULL;
|
|
}
|
|
|
|
while (cur < last) {
|
|
if (cur[0] == c0) {
|
|
if (cur[1] == c1) {
|
|
return cur;
|
|
} else {
|
|
cur += 2;
|
|
}
|
|
} else {
|
|
cur += 1;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/** Find largest string containing certain characters.
|
|
*
|
|
* C Standard library 'strspn' only works for 'c-strings' (null terminated)
|
|
* This works on arbitrary length.
|
|
*
|
|
* Performance notes:
|
|
* not critical
|
|
*
|
|
* Porting notes:
|
|
* if accept is 'ABC', then this function would be similar to
|
|
* a_regexp.match(a_str, '[ABC]*'),
|
|
*/
|
|
static size_t
|
|
strlenspn(const char *s, size_t len, const char *accept)
|
|
{
|
|
size_t i;
|
|
for (i = 0; i < len; ++i) {
|
|
/* likely we can do better by inlining this function
|
|
* but this works for now
|
|
*/
|
|
if (strchr(accept, s[i]) == NULL) {
|
|
return i;
|
|
}
|
|
}
|
|
return len;
|
|
}
|
|
|
|
/*
|
|
* ASCII half-case-insenstive compare!
|
|
*
|
|
* DANGER: this assume arg0 is *always upper case*
|
|
* and arg1 is mixed case!!
|
|
*
|
|
* Required since libc version uses the current locale
|
|
* and is much slower.
|
|
*/
|
|
static int cstrcasecmp(const char *a, const char *b)
|
|
{
|
|
char ca, cb;
|
|
|
|
do {
|
|
ca = *a++;
|
|
cb = *b++;
|
|
assert(ca < 'a' || ca > 'z');
|
|
if (cb >= 'a' && cb <= 'z')
|
|
cb -= 0x20;
|
|
} while (ca == cb && ca != '\0');
|
|
|
|
return ca - cb;
|
|
}
|
|
|
|
/**
|
|
* Case sensitive string compare.
|
|
* Here only to make code more readable
|
|
*/
|
|
static int streq(const char *a, const char *b)
|
|
{
|
|
return strcmp(a, b) == 0;
|
|
}
|
|
|
|
/*
|
|
* Case-sensitive binary search with "deferred detection of equality"
|
|
* We assume in most cases the key will NOT be found. This makes the
|
|
* main loop only have one comparison branch, which should optimize
|
|
* better in CPU. See #Deferred_detection_of_equality in
|
|
* http://en.wikipedia.org/wiki/Binary_search_algorithm
|
|
*
|
|
* This is used for fingerprint lookups, and a few other places.
|
|
* Note in normal operation this maybe takes 1% of total run time, so
|
|
* replacing this with another datastructure probably isn't worth
|
|
* the effort.
|
|
*/
|
|
static int bsearch_cstr(const char *key, const char *base[], size_t nmemb)
|
|
{
|
|
size_t pos;
|
|
size_t left = 0;
|
|
size_t right = nmemb - 1;
|
|
|
|
/* assert(nmemb > 0); */
|
|
|
|
while (left < right) {
|
|
pos = (left + right) >> 1;
|
|
/* assert(pos < right); */
|
|
if (strcmp(base[pos], key) < 0) {
|
|
left = pos + 1;
|
|
} else {
|
|
right = pos;
|
|
}
|
|
}
|
|
if ((left == right) && strcmp(base[left], key) == 0) {
|
|
return TRUE;
|
|
} else {
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Case-insensitive binary search
|
|
*
|
|
*/
|
|
static int bsearch_cstrcase(const char *key, const char *base[], size_t nmemb)
|
|
{
|
|
size_t pos;
|
|
size_t left = 0;
|
|
size_t right = nmemb - 1;
|
|
|
|
while (left < right) {
|
|
pos = (left + right) >> 1;
|
|
/* arg0 = upper case only, arg1 = mixed case */
|
|
if (cstrcasecmp(base[pos], key) < 0) {
|
|
left = pos + 1;
|
|
} else {
|
|
right = pos;
|
|
}
|
|
}
|
|
if ((left == right) && cstrcasecmp(base[left], key) == 0) {
|
|
return TRUE;
|
|
} else {
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
*/
|
|
#define UNUSED(x) (void)(x)
|
|
|
|
static int is_sqli_pattern(const char* key, void* callbackarg)
|
|
{
|
|
UNUSED(callbackarg);
|
|
return bsearch_cstr(key, sql_fingerprints, sqli_fingerprints_sz);
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
*
|
|
* Porting Notes:
|
|
* given a mapping/hash of string to char
|
|
* this is just
|
|
* typecode = mapping[key.upper()]
|
|
*/
|
|
|
|
static char bsearch_keyword_type(const char *key, const keyword_t * keywords,
|
|
size_t numb)
|
|
{
|
|
size_t pos;
|
|
size_t left = 0;
|
|
size_t right = numb - 1;
|
|
|
|
while (left < right) {
|
|
pos = (left + right) >> 1;
|
|
|
|
/* arg0 = upper case only, arg1 = mixed case */
|
|
if (cstrcasecmp(keywords[pos].word, key) < 0) {
|
|
left = pos + 1;
|
|
} else {
|
|
right = pos;
|
|
}
|
|
}
|
|
if ((left == right) && cstrcasecmp(keywords[left].word, key) == 0) {
|
|
return keywords[left].type;
|
|
} else {
|
|
return CHAR_NULL;
|
|
}
|
|
}
|
|
|
|
static char is_keyword(const char* key)
|
|
{
|
|
return bsearch_keyword_type(key, sql_keywords, sql_keywords_sz);
|
|
}
|
|
|
|
/* st_token methods
|
|
*
|
|
* The following functions manipulates the stoken_t type
|
|
*
|
|
*
|
|
*/
|
|
|
|
static void st_clear(stoken_t * st)
|
|
{
|
|
st->type = CHAR_NULL;
|
|
st->str_open = CHAR_NULL;
|
|
st->str_close = CHAR_NULL;
|
|
st->val[0] = CHAR_NULL;
|
|
}
|
|
|
|
static int st_is_empty(const stoken_t * st)
|
|
{
|
|
return st->type == CHAR_NULL;
|
|
}
|
|
|
|
static void st_assign_char(stoken_t * st, const char stype, const char value)
|
|
{
|
|
st->type = stype;
|
|
st->val[0] = value;
|
|
st->val[1] = CHAR_NULL;
|
|
}
|
|
|
|
static void st_assign(stoken_t * st, const char stype, const char *value,
|
|
size_t len)
|
|
{
|
|
size_t last = len < ST_MAX_SIZE ? len : (ST_MAX_SIZE - 1);
|
|
st->type = stype;
|
|
memcpy(st->val, value, last);
|
|
st->val[last] = CHAR_NULL;
|
|
}
|
|
|
|
static void st_copy(stoken_t * dest, const stoken_t * src)
|
|
{
|
|
memcpy(dest, src, sizeof(stoken_t));
|
|
}
|
|
|
|
static int st_is_multiword_start(const stoken_t * st)
|
|
{
|
|
return bsearch_cstrcase(st->val,
|
|
multikeywords_start,
|
|
multikeywords_start_sz);
|
|
}
|
|
|
|
static int st_is_unary_op(const stoken_t * st)
|
|
{
|
|
return (st->type == 'o' && !(strcmp(st->val, "+") &&
|
|
strcmp(st->val, "-") &&
|
|
strcmp(st->val, "!") &&
|
|
strcmp(st->val, "!!") &&
|
|
/* arg0 = upper case only, arg1 = mixed case */
|
|
cstrcasecmp("NOT", st->val) &&
|
|
strcmp(st->val, "~")));
|
|
}
|
|
|
|
static int st_is_arith_op(const stoken_t * st)
|
|
{
|
|
return (st->type == 'o' && !(strcmp(st->val, "-") &&
|
|
strcmp(st->val, "+") &&
|
|
strcmp(st->val, "~") &&
|
|
strcmp(st->val, "!") &&
|
|
strcmp(st->val, "/") &&
|
|
strcmp(st->val, "%") &&
|
|
strcmp(st->val, "*") &&
|
|
strcmp(st->val, "|") &&
|
|
strcmp(st->val, "&") &&
|
|
/* arg1 = upper case only, arg1 = mixed case */
|
|
cstrcasecmp("MOD", st->val) &&
|
|
cstrcasecmp("DIV", st->val)));
|
|
}
|
|
|
|
/* Parsers
|
|
*
|
|
*
|
|
*/
|
|
|
|
|
|
static size_t parse_white(sfilter * sf)
|
|
{
|
|
return sf->pos + 1;
|
|
}
|
|
|
|
static size_t parse_operator1(sfilter * sf)
|
|
{
|
|
stoken_t *current = &sf->syntax_current;
|
|
const char *cs = sf->s;
|
|
size_t pos = sf->pos;
|
|
|
|
st_assign_char(current, 'o', cs[pos]);
|
|
return pos + 1;
|
|
}
|
|
|
|
static size_t parse_other(sfilter * sf)
|
|
{
|
|
stoken_t *current = &sf->syntax_current;
|
|
const char *cs = sf->s;
|
|
size_t pos = sf->pos;
|
|
|
|
st_assign_char(current, '?', cs[pos]);
|
|
return pos + 1;
|
|
}
|
|
|
|
static size_t parse_char(sfilter * sf)
|
|
{
|
|
stoken_t *current = &sf->syntax_current;
|
|
const char *cs = sf->s;
|
|
size_t pos = sf->pos;
|
|
|
|
st_assign_char(current, cs[pos], cs[pos]);
|
|
return pos + 1;
|
|
}
|
|
|
|
static size_t parse_eol_comment(sfilter * sf)
|
|
{
|
|
stoken_t *current = &sf->syntax_current;
|
|
const char *cs = sf->s;
|
|
const size_t slen = sf->slen;
|
|
size_t pos = sf->pos;
|
|
|
|
const char *endpos =
|
|
(const char *) memchr((const void *) (cs + pos), '\n', slen - pos);
|
|
if (endpos == NULL) {
|
|
st_assign(current, 'c', cs + pos, slen - pos);
|
|
return slen;
|
|
} else {
|
|
st_assign(current, 'c', cs + pos, endpos - cs - pos);
|
|
return (endpos - cs) + 1;
|
|
}
|
|
}
|
|
|
|
static size_t parse_dash(sfilter * sf)
|
|
{
|
|
stoken_t *current = &sf->syntax_current;
|
|
const char *cs = sf->s;
|
|
const size_t slen = sf->slen;
|
|
size_t pos = sf->pos;
|
|
|
|
|
|
size_t pos1 = pos + 1;
|
|
if (pos1 < slen && cs[pos1] == '-') {
|
|
return parse_eol_comment(sf);
|
|
} else {
|
|
st_assign_char(current, 'o', '-');
|
|
return pos1;
|
|
}
|
|
}
|
|
|
|
static size_t is_mysql_comment(const char *cs, const size_t len, size_t pos)
|
|
{
|
|
size_t i;
|
|
|
|
if (pos + 2 >= len) {
|
|
return 0;
|
|
}
|
|
if (cs[pos + 2] != '!') {
|
|
return 0;
|
|
}
|
|
/*
|
|
* this is a mysql comment
|
|
* got "/x!"
|
|
*/
|
|
if (pos + 3 >= len) {
|
|
return 3;
|
|
}
|
|
|
|
if (!isdigit(cs[pos + 3])) {
|
|
return 3;
|
|
}
|
|
/*
|
|
* handle odd case of /x!0SELECT
|
|
*/
|
|
if (!isdigit(cs[pos + 4])) {
|
|
return 4;
|
|
}
|
|
|
|
if (pos + 7 >= len) {
|
|
return 4;
|
|
}
|
|
|
|
for (i = pos + 5; i <= pos + 7; ++i) {
|
|
if (!isdigit(cs[i])) {
|
|
return 3;
|
|
}
|
|
}
|
|
return 8;
|
|
}
|
|
|
|
static size_t parse_slash(sfilter * sf)
|
|
{
|
|
stoken_t *current = &sf->syntax_current;
|
|
const char *cs = sf->s;
|
|
const size_t slen = sf->slen;
|
|
size_t pos = sf->pos;
|
|
const char* cur = cs + pos;
|
|
size_t inc;
|
|
|
|
size_t pos1 = pos + 1;
|
|
if (pos1 == slen || cs[pos1] != '*') {
|
|
return parse_operator1(sf);
|
|
}
|
|
|
|
inc = is_mysql_comment(cs, slen, pos);
|
|
if (inc == 0) {
|
|
|
|
/*
|
|
* skip over initial '/x'
|
|
*/
|
|
const char *ptr = memchr2(cur + 2, slen - (pos + 2), '*', '/');
|
|
if (ptr == NULL) {
|
|
/*
|
|
* unterminated comment
|
|
*/
|
|
st_assign(current, 'c', cs + pos, slen - pos);
|
|
return slen;
|
|
} else {
|
|
/*
|
|
* postgresql allows nested comments which makes
|
|
* this is incompatible with parsing so
|
|
* if we find a '/x' inside the coment, then
|
|
* make a new token.
|
|
*/
|
|
char ctype = 'c';
|
|
const size_t clen = (ptr + 2) - (cur);
|
|
if (memchr2(cur + 2, ptr - (cur + 1), '/', '*') != NULL) {
|
|
ctype = 'X';
|
|
}
|
|
st_assign(current, ctype, cs + pos, clen);
|
|
|
|
return pos + clen;
|
|
}
|
|
} else {
|
|
/*
|
|
* MySQL Comment
|
|
*/
|
|
sf->in_comment = TRUE;
|
|
st_clear(current);
|
|
return pos + inc;
|
|
}
|
|
}
|
|
|
|
static size_t parse_backslash(sfilter * sf)
|
|
{
|
|
stoken_t *current = &sf->syntax_current;
|
|
const char *cs = sf->s;
|
|
const size_t slen = sf->slen;
|
|
size_t pos = sf->pos;
|
|
|
|
/*
|
|
* Weird MySQL alias for NULL, "\N" (capital N only)
|
|
*/
|
|
if (pos + 1 < slen && cs[pos + 1] == 'N') {
|
|
st_assign(current, '1', "NULL", 4);
|
|
return pos + 2;
|
|
} else {
|
|
return parse_other(sf);
|
|
}
|
|
}
|
|
|
|
/** Is input a 2-char operator?
|
|
*
|
|
*/
|
|
static int is_operator2(const char *key)
|
|
{
|
|
return bsearch_cstr(key, operators2, operators2_sz);
|
|
}
|
|
|
|
static size_t parse_operator2(sfilter * sf)
|
|
{
|
|
stoken_t *current = &sf->syntax_current;
|
|
const char *cs = sf->s;
|
|
const size_t slen = sf->slen;
|
|
size_t pos = sf->pos;
|
|
char op2[3];
|
|
|
|
if (pos + 1 >= slen) {
|
|
return parse_operator1(sf);
|
|
}
|
|
|
|
op2[0] = cs[pos];
|
|
op2[1] = cs[pos + 1];
|
|
op2[2] = CHAR_NULL;
|
|
|
|
/*
|
|
* Special Hack for MYSQL style comments
|
|
* instead of turning:
|
|
* /x! FOO x/ into FOO by rewriting the string, we
|
|
* turn it into FOO x/ and ignore the ending comment
|
|
*/
|
|
if (sf->in_comment && op2[0] == '*' && op2[1] == '/') {
|
|
sf->in_comment = FALSE;
|
|
st_clear(current);
|
|
return pos + 2;
|
|
} else if (pos + 2 < slen && op2[0] == '<' && op2[1] == '='
|
|
&& cs[pos + 2] == '>') {
|
|
/*
|
|
* special 3-char operator
|
|
*/
|
|
st_assign(current, 'o', "<=>", 3);
|
|
return pos + 3;
|
|
} else if (is_operator2(op2)) {
|
|
if (streq(op2, "&&") || streq(op2, "||")) {
|
|
st_assign(current, '&', op2, 2);
|
|
} else {
|
|
/*
|
|
* normal 2 char operator
|
|
*/
|
|
st_assign(current, 'o', op2, 2);
|
|
}
|
|
return pos + 2;
|
|
} else {
|
|
/*
|
|
* must be a single char operator
|
|
*/
|
|
return parse_operator1(sf);
|
|
}
|
|
}
|
|
|
|
static size_t parse_string_core(const char *cs, const size_t len, size_t pos,
|
|
stoken_t * st, char delim, size_t offset)
|
|
{
|
|
/*
|
|
* offset is to skip the perhaps first quote char
|
|
*/
|
|
const char *qpos =
|
|
(const char *) memchr((const void *) (cs + pos + offset), delim,
|
|
len - pos - offset);
|
|
|
|
/*
|
|
* then keep string open/close info
|
|
*/
|
|
if (offset == 1) {
|
|
/*
|
|
* this is real quote
|
|
*/
|
|
st->str_open = delim;
|
|
} else {
|
|
/*
|
|
* this was a simulated quote
|
|
*/
|
|
st->str_open = CHAR_NULL;
|
|
}
|
|
|
|
while (TRUE) {
|
|
if (qpos == NULL) {
|
|
/*
|
|
* string ended with no trailing quote
|
|
* assign what we have
|
|
*/
|
|
st_assign(st, 's', cs + pos + offset, len - pos - offset);
|
|
st->str_close = CHAR_NULL;
|
|
return len;
|
|
} else if (*(qpos - 1) != '\\') {
|
|
/*
|
|
* ending quote is not escaped.. copy and end
|
|
*/
|
|
st_assign(st, 's', cs + pos + offset,
|
|
qpos - (cs + pos + offset));
|
|
st->str_close = delim;
|
|
return qpos - cs + 1;
|
|
} else {
|
|
qpos =
|
|
(const char *) memchr((const void *) (qpos + 1), delim,
|
|
(cs + len) - (qpos + 1));
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Used when first char is a ' or "
|
|
*/
|
|
static size_t parse_string(sfilter * sf)
|
|
{
|
|
stoken_t *current = &sf->syntax_current;
|
|
const char *cs = sf->s;
|
|
const size_t slen = sf->slen;
|
|
size_t pos = sf->pos;
|
|
|
|
/*
|
|
* assert cs[pos] == single or double quote
|
|
*/
|
|
return parse_string_core(cs, slen, pos, current, cs[pos], 1);
|
|
}
|
|
|
|
static size_t parse_word(sfilter * sf)
|
|
{
|
|
stoken_t *current = &sf->syntax_current;
|
|
const char *cs = sf->s;
|
|
size_t pos = sf->pos;
|
|
char *dot;
|
|
char ch;
|
|
size_t slen =
|
|
strlenspn(cs + pos, sf->slen - pos,
|
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$.");
|
|
|
|
st_assign(current, 'n', cs + pos, slen);
|
|
|
|
dot = strchr(current->val, '.');
|
|
if (dot != NULL) {
|
|
*dot = '\0';
|
|
|
|
ch = is_keyword(current->val);
|
|
|
|
if (ch == 'k' || ch == 'o') {
|
|
/*
|
|
* we got something like "SELECT.1"
|
|
*/
|
|
current->type = ch;
|
|
return pos + strlen(current->val);
|
|
} else {
|
|
/*
|
|
* something else, put back dot
|
|
*/
|
|
*dot = '.';
|
|
}
|
|
}
|
|
|
|
/*
|
|
* do normal lookup with word including '.'
|
|
*/
|
|
if (slen < ST_MAX_SIZE) {
|
|
|
|
ch = is_keyword(current->val);
|
|
|
|
if (ch == CHAR_NULL) {
|
|
ch = 'n';
|
|
}
|
|
current->type = ch;
|
|
}
|
|
return pos + slen;
|
|
}
|
|
|
|
static size_t parse_var(sfilter * sf)
|
|
{
|
|
stoken_t *current = &sf->syntax_current;
|
|
const char *cs = sf->s;
|
|
const size_t slen = sf->slen;
|
|
size_t pos = sf->pos;
|
|
size_t pos1 = pos + 1;
|
|
size_t xlen;
|
|
|
|
/*
|
|
* move past optional other '@'
|
|
*/
|
|
if (pos1 < slen && cs[pos1] == '@') {
|
|
pos1 += 1;
|
|
}
|
|
|
|
xlen = strlenspn(cs + pos1, slen - pos1,
|
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_.$");
|
|
if (xlen == 0) {
|
|
st_assign(current, 'v', cs + pos, (pos1 - pos));
|
|
return pos1;
|
|
} else {
|
|
st_assign(current, 'v', cs + pos, xlen + (pos1 - pos));
|
|
return pos1 + xlen;
|
|
}
|
|
}
|
|
|
|
static size_t parse_money(sfilter *sf)
|
|
{
|
|
stoken_t *current = &sf->syntax_current;
|
|
const char *cs = sf->s;
|
|
const size_t slen = sf->slen;
|
|
size_t pos = sf->pos;
|
|
size_t xlen;
|
|
|
|
/*
|
|
* $1,000.00 or $1.000,00 ok!
|
|
* This also parses $....,,,111 but that's ok
|
|
*/
|
|
xlen = strlenspn(cs + pos + 1, slen - pos - 1, "0123456789.,");
|
|
if (xlen == 0) {
|
|
/*
|
|
* just ignore '$'
|
|
*/
|
|
return pos + 1;
|
|
} else {
|
|
st_assign(current, '1', cs + pos, 1 + xlen);
|
|
return pos + 1 + xlen;
|
|
}
|
|
}
|
|
|
|
static size_t parse_number(sfilter * sf)
|
|
{
|
|
stoken_t *current = &sf->syntax_current;
|
|
const char *cs = sf->s;
|
|
const size_t slen = sf->slen;
|
|
size_t pos = sf->pos;
|
|
size_t xlen;
|
|
size_t start;
|
|
|
|
if (pos + 1 < slen && cs[pos] == '0' && (cs[pos + 1] == 'X' || cs[pos + 1] == 'x')) {
|
|
/*
|
|
* TBD compare if isxdigit
|
|
*/
|
|
xlen =
|
|
strlenspn(cs + pos + 2, slen - pos - 2, "0123456789ABCDEFabcdef");
|
|
if (xlen == 0) {
|
|
st_assign(current, 'n', "0X", 2);
|
|
return pos + 2;
|
|
} else {
|
|
st_assign(current, '1', cs + pos, 2 + xlen);
|
|
return pos + 2 + xlen;
|
|
}
|
|
}
|
|
|
|
start = pos;
|
|
while (pos < slen && isdigit(cs[pos])) {
|
|
pos += 1;
|
|
}
|
|
if (pos < slen && cs[pos] == '.') {
|
|
pos += 1;
|
|
while (pos < slen && isdigit(cs[pos])) {
|
|
pos += 1;
|
|
}
|
|
if (pos - start == 1) {
|
|
st_assign_char(current, 'n', '.');
|
|
return pos;
|
|
}
|
|
}
|
|
|
|
if (pos < slen) {
|
|
if (cs[pos] == 'E' || cs[pos] == 'e') {
|
|
pos += 1;
|
|
if (pos < slen && (cs[pos] == '+' || cs[pos] == '-')) {
|
|
pos += 1;
|
|
}
|
|
while (pos < slen && isdigit(cs[pos])) {
|
|
pos += 1;
|
|
}
|
|
} else if (isalpha(cs[pos])) {
|
|
/*
|
|
* oh no, we have something like '6FOO'
|
|
* use microsoft style parsing and take just
|
|
* the number part and leave the rest to be
|
|
* parsed later
|
|
*/
|
|
st_assign(current, '1', cs + start, pos - start);
|
|
return pos;
|
|
}
|
|
}
|
|
|
|
st_assign(current, '1', cs + start, pos - start);
|
|
return pos;
|
|
}
|
|
|
|
int parse_token(sfilter * sf)
|
|
{
|
|
stoken_t *current = &sf->syntax_current;
|
|
const char *s = sf->s;
|
|
const size_t slen = sf->slen;
|
|
size_t *pos = &sf->pos;
|
|
pt2Function fnptr;
|
|
|
|
st_clear(current);
|
|
|
|
/*
|
|
* if we are at beginning of string
|
|
* and in single-quote or double quote mode
|
|
* then pretend the input starts with a quote
|
|
*/
|
|
if (*pos == 0 && sf->delim != CHAR_NULL) {
|
|
*pos = parse_string_core(s, slen, 0, current, sf->delim, 0);
|
|
return TRUE;
|
|
}
|
|
|
|
while (*pos < slen) {
|
|
/*
|
|
* get current character
|
|
*/
|
|
const int ch = (int) (s[*pos]);
|
|
|
|
/*
|
|
* if not ascii, then continue...
|
|
* actually probably need to just assuming
|
|
* it's a string
|
|
*/
|
|
if (ch < 0 || ch > 127) {
|
|
*pos += 1;
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* look up the parser, and call it
|
|
*
|
|
* Porting Note: this is mapping of char to function
|
|
* charparsers[ch]()
|
|
*/
|
|
fnptr = char_parse_map[ch];
|
|
*pos = (*fnptr) (sf);
|
|
|
|
/*
|
|
*
|
|
*/
|
|
if (current->type != CHAR_NULL) {
|
|
return TRUE;
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
/**
|
|
* Initializes parsing state
|
|
* TBD: explicity add parsing content (NULL, SINGLE, DOUBLE)
|
|
*/
|
|
void sfilter_reset(sfilter * sf, const char *s, size_t len)
|
|
{
|
|
memset(sf, 0, sizeof(sfilter));
|
|
sf->s = s;
|
|
sf->slen = len;
|
|
}
|
|
|
|
/** See if two tokens can be merged since they are compound SQL phrases.
|
|
*
|
|
* This takes two tokens, and, if they are the right type,
|
|
* merges their values together. Then checks to see if the
|
|
* new value is special using the PHRASES mapping.
|
|
*
|
|
* Example: "UNION" + "ALL" ==> "UNION ALL"
|
|
*
|
|
* C Security Notes: this is safe to use C-strings (null-terminated)
|
|
* since the types involved by definition do not have embedded nulls
|
|
* (e.g. there is no keyword with embedded null)
|
|
*
|
|
* Porting Notes: since this is C, it's oddly complicated.
|
|
* This is just: multikeywords[token.value + ' ' + token2.value]
|
|
*
|
|
*/
|
|
static int syntax_merge_words(stoken_t * a, stoken_t * b)
|
|
{
|
|
size_t sz1;
|
|
size_t sz2;
|
|
size_t sz3;
|
|
char tmp[ST_MAX_SIZE];
|
|
char ch;
|
|
|
|
if (!
|
|
(a->type == 'k' || a->type == 'n' || a->type == 'o'
|
|
|| a->type == 'U')) {
|
|
return FALSE;
|
|
}
|
|
|
|
sz1 = strlen(a->val);
|
|
sz2 = strlen(b->val);
|
|
sz3 = sz1 + sz2 + 1; /* +1 for space in the middle */
|
|
if (sz3 >= ST_MAX_SIZE) { /* make sure there is room for ending null */
|
|
return FALSE;
|
|
}
|
|
/*
|
|
* oddly annoying last.val + ' ' + current.val
|
|
*/
|
|
memcpy(tmp, a->val, sz1);
|
|
tmp[sz1] = ' ';
|
|
memcpy(tmp + sz1 + 1, b->val, sz2);
|
|
tmp[sz3] = CHAR_NULL;
|
|
|
|
ch = bsearch_keyword_type(tmp, multikeywords, multikeywords_sz);
|
|
if (ch != CHAR_NULL) {
|
|
st_assign(a, ch, tmp, sz3);
|
|
return TRUE;
|
|
} else {
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
/* This does some simple syntax cleanup based on the token
|
|
*
|
|
*
|
|
*/
|
|
int sqli_tokenize(sfilter * sf, stoken_t * sout)
|
|
{
|
|
stoken_t *last = &sf->syntax_last;
|
|
stoken_t *current = &sf->syntax_current;
|
|
|
|
while (parse_token(sf)) {
|
|
char ttype = current->type;
|
|
|
|
/*
|
|
* TBD: hmm forgot logic here.
|
|
*/
|
|
if (ttype == 'c') {
|
|
st_copy(&sf->syntax_comment, current);
|
|
continue;
|
|
}
|
|
st_clear(&sf->syntax_comment);
|
|
|
|
/*
|
|
* If we don't have a saved token, and we have
|
|
* a string: save it. if the next token is also a string
|
|
* then merge them. e.g. "A" "B" in SQL is actually "AB"
|
|
* a n/k/U/o type: save since next token my be merged together
|
|
* for example: "LEFT" + "JOIN" = "LEFT JOIN"
|
|
* a o/& type: TBD need to review.
|
|
*
|
|
*/
|
|
if (last->type == CHAR_NULL) {
|
|
switch (ttype) {
|
|
|
|
/*
|
|
* items that have special needs
|
|
*/
|
|
case 's':
|
|
st_copy(last, current);
|
|
continue;
|
|
case 'n':
|
|
case 'k':
|
|
case 'U':
|
|
case '&':
|
|
case 'o':
|
|
if (st_is_multiword_start(current)) {
|
|
st_copy(last, current);
|
|
continue;
|
|
} else if (current->type == 'o' || current->type == '&') {
|
|
/* } else if (st_is_unary_op(current)) { */
|
|
st_copy(last, current);
|
|
continue;
|
|
} else {
|
|
/*
|
|
* copy to out
|
|
*/
|
|
st_copy(sout, current);
|
|
return TRUE;
|
|
}
|
|
default:
|
|
/*
|
|
* copy to out
|
|
*/
|
|
st_copy(sout, current);
|
|
return TRUE;
|
|
}
|
|
}
|
|
/*
|
|
* We have a saved token
|
|
*/
|
|
|
|
switch (ttype) {
|
|
case 's':
|
|
if (last->type == 's') {
|
|
/*
|
|
* "FOO" "BAR" == "FOO" (skip second string)
|
|
*/
|
|
continue;
|
|
} else {
|
|
st_copy(sout, last);
|
|
st_copy(last, current);
|
|
return TRUE;
|
|
}
|
|
break;
|
|
|
|
case 'o':
|
|
/*
|
|
* first case to handle "IS" + "NOT"
|
|
*/
|
|
if (syntax_merge_words(last, current)) {
|
|
continue;
|
|
} else if (st_is_unary_op(current)
|
|
&& (last->type == 'o' || last->type == '&'
|
|
|| last->type == 'U')) {
|
|
/*
|
|
* if an operator is followed by a unary operator, skip it.
|
|
* 1, + ==> "+" is not unary, it's arithmetic
|
|
* AND, + ==> "+" is unary
|
|
*/
|
|
continue;
|
|
} else {
|
|
/*
|
|
* no match
|
|
*/
|
|
st_copy(sout, last);
|
|
st_copy(last, current);
|
|
return TRUE;
|
|
}
|
|
break;
|
|
|
|
case 'n':
|
|
case 'k':
|
|
if (syntax_merge_words(last, current)) {
|
|
continue;
|
|
} else {
|
|
/*
|
|
* total no match
|
|
*/
|
|
st_copy(sout, last);
|
|
st_copy(last, current);
|
|
return TRUE;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
/*
|
|
* fix up for ambigous "IN"
|
|
* handle case where IN is typically a function
|
|
* but used in compound "IN BOOLEAN MODE" jive
|
|
*
|
|
* warning on cstrcasecmp arg0=upper case only, arg1 = mixed
|
|
*/
|
|
if (last->type == 'n' && !cstrcasecmp("IN", last->val)) {
|
|
st_copy(last, current);
|
|
st_assign(sout, 'f', "IN", 2);
|
|
return TRUE;
|
|
} else {
|
|
/*
|
|
* no match at all
|
|
*/
|
|
st_copy(sout, last);
|
|
st_copy(last, current);
|
|
return TRUE;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* final cleanup
|
|
*/
|
|
if (last->type) {
|
|
st_copy(sout, last);
|
|
st_clear(last);
|
|
return TRUE;
|
|
} else if (sf->syntax_comment.type) {
|
|
/*
|
|
* TBD
|
|
*/
|
|
st_copy(sout, &sf->syntax_comment);
|
|
st_clear(&sf->syntax_comment);
|
|
return TRUE;
|
|
} else {
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* My apologies, this code is a mess
|
|
*/
|
|
int filter_fold(sfilter * sf, stoken_t * sout)
|
|
{
|
|
stoken_t *last = &sf->fold_last;
|
|
stoken_t *current = &sf->fold_current;
|
|
|
|
if (sf->fold_state == 4 && !st_is_empty(last)) {
|
|
st_copy(sout, last);
|
|
sf->fold_state = 2;
|
|
st_clear(last);
|
|
return FALSE;
|
|
}
|
|
|
|
while (sqli_tokenize(sf, current)) {
|
|
/*
|
|
* 0 = start of statement
|
|
* skip ( and unary ops
|
|
*/
|
|
if (sf->fold_state == 0) {
|
|
if (current->type == '(') {
|
|
continue;
|
|
}
|
|
if (st_is_unary_op(current)) {
|
|
continue;
|
|
}
|
|
sf->fold_state = 1;
|
|
}
|
|
|
|
if (st_is_empty(last)) {
|
|
FOLD_DEBUG;
|
|
if (current->type == '1' || current->type == 'n'
|
|
|| current->type == '(') {
|
|
sf->fold_state = 2;
|
|
st_copy(last, current);
|
|
}
|
|
st_copy(sout, current);
|
|
return FALSE;
|
|
} else if (last->type == '(' && st_is_unary_op(current)) {
|
|
/*
|
|
* similar to beginning of statement
|
|
* an opening '(' resets state, and we should skip all
|
|
* unary operators
|
|
*/
|
|
continue;
|
|
} else if (last->type == '(' && current->type == '(') {
|
|
/* if we get another '(' after another
|
|
* emit 1, but keep state
|
|
*/
|
|
st_copy(sout, current);
|
|
return FALSE;
|
|
} else if ((last->type == '1' || last->type == 'n')
|
|
&& st_is_arith_op(current)) {
|
|
FOLD_DEBUG;
|
|
st_copy(last, current);
|
|
} else if (last->type == 'o'
|
|
&& (current->type == '1' || current->type == 'n')) {
|
|
FOLD_DEBUG;
|
|
st_copy(last, current);
|
|
} else {
|
|
if (sf->fold_state == 2) {
|
|
if (last->type != '1' && last->type != '('
|
|
&& last->type != 'n') {
|
|
FOLD_DEBUG;
|
|
st_copy(sout, last);
|
|
st_copy(last, current);
|
|
sf->fold_state = 4;
|
|
} else {
|
|
FOLD_DEBUG;
|
|
st_copy(sout, current);
|
|
st_clear(last);
|
|
}
|
|
return FALSE;
|
|
} else {
|
|
if (last->type == 'o') {
|
|
st_copy(sout, last);
|
|
st_copy(last, current);
|
|
sf->fold_state = 4;
|
|
} else {
|
|
sf->fold_state = 2;
|
|
st_copy(sout, current);
|
|
st_clear(last);
|
|
}
|
|
return FALSE;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!st_is_empty(last)) {
|
|
if (st_is_arith_op(last)) {
|
|
st_copy(sout, last);
|
|
st_clear(last);
|
|
return FALSE;
|
|
} else {
|
|
st_clear(last);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* all done: nothing more to parse
|
|
*/
|
|
return TRUE;
|
|
}
|
|
|
|
/* secondary api: detects SQLi in a string, GIVEN a context.
|
|
*
|
|
* A context can be:
|
|
* * CHAR_NULL (\0), process as is
|
|
* * CHAR_SINGLE ('), process pretending input started with a
|
|
* single quote.
|
|
* * CHAR_DOUBLE ("), process pretending input started with a
|
|
* double quote.
|
|
*
|
|
*/
|
|
int libinjection_is_string_sqli(sfilter * sql_state,
|
|
const char *s, size_t slen,
|
|
const char delim,
|
|
ptr_fingerprints_fn fn, void* callbackarg)
|
|
{
|
|
int tlen = 0;
|
|
char ch;
|
|
int patmatch;
|
|
int all_done;
|
|
|
|
sfilter_reset(sql_state, s, slen);
|
|
sql_state->delim = delim;
|
|
|
|
while (tlen < MAX_TOKENS) {
|
|
all_done = filter_fold(sql_state, &(sql_state->tokenvec[tlen]));
|
|
if (all_done) {
|
|
break;
|
|
}
|
|
|
|
sql_state->pat[tlen] = sql_state->tokenvec[tlen].type;
|
|
tlen += 1;
|
|
}
|
|
|
|
/*
|
|
* make the fingerprint pattern a c-string (null delimited)
|
|
*/
|
|
sql_state->pat[tlen] = CHAR_NULL;
|
|
|
|
/*
|
|
* check for 'X' in pattern
|
|
* this means parsing could not be done
|
|
* accurately due to pgsql's double comments
|
|
* or other syntax that isn't consistent
|
|
* should be very rare false positive
|
|
*/
|
|
if (strchr(sql_state->pat, 'X')) {
|
|
return TRUE;
|
|
}
|
|
|
|
patmatch = fn(sql_state->pat, callbackarg);
|
|
|
|
/*
|
|
* No match.
|
|
*
|
|
* Set sql_state->reason to current line number
|
|
* only for debugging purposes.
|
|
*/
|
|
if (!patmatch) {
|
|
sql_state->reason = __LINE__;
|
|
return FALSE;
|
|
}
|
|
|
|
/*
|
|
* We got a SQLi match
|
|
* This next part just helps reduce false positives.
|
|
*
|
|
*/
|
|
switch (tlen) {
|
|
case 2:{
|
|
/*
|
|
* case 2 are "very small SQLi" which make them
|
|
* hard to tell from normal input...
|
|
*/
|
|
|
|
/*
|
|
* if 'comment' is '#' ignore.. too many FP
|
|
*/
|
|
if (sql_state->tokenvec[1].val[0] == '#') {
|
|
sql_state->reason = __LINE__;
|
|
return FALSE;
|
|
}
|
|
|
|
/*
|
|
* for fingerprint like 'nc', only comments of /x are treated
|
|
* as SQL... ending comments of "--" and "#" are not sqli
|
|
*/
|
|
if (sql_state->tokenvec[0].type == 'n' &&
|
|
sql_state->tokenvec[1].type == 'c' &&
|
|
sql_state->tokenvec[1].val[0] != '/') {
|
|
sql_state->reason = __LINE__;
|
|
return FALSE;
|
|
}
|
|
|
|
/*
|
|
* if '1c' ends with '/x' then it's sqli
|
|
*/
|
|
if (sql_state->tokenvec[0].type == '1' &&
|
|
sql_state->tokenvec[1].type == 'c' &&
|
|
sql_state->tokenvec[1].val[0] == '/') {
|
|
return TRUE;
|
|
}
|
|
|
|
/*
|
|
* if 'oc' then input must be 'CASE/x'
|
|
* used in HPP attack
|
|
*/
|
|
if (sql_state->tokenvec[0].type == 'o' &&
|
|
sql_state->tokenvec[1].type == 'c' &&
|
|
sql_state->tokenvec[1].val[0] == '/' &&
|
|
cstrcasecmp("CASE", sql_state->tokenvec[0].val) != 0)
|
|
{
|
|
sql_state->reason = __LINE__;
|
|
return FALSE;
|
|
}
|
|
|
|
/**
|
|
* there are some odd base64-looking query string values
|
|
* 1234-ABCDEFEhfhihwuefi--
|
|
* which evaluate to "1c"... these are not SQLi
|
|
* but 1234-- probably is.
|
|
* Make sure the "1" in "1c" is actually a true decimal number
|
|
*
|
|
* Need to check -original- string since the folding step
|
|
* may have merged tokens, e.g. "1+FOO" is folded into "1"
|
|
*
|
|
* Note: evasion: 1*1--
|
|
*/
|
|
if (sql_state->tokenvec[0].type == '1'&& sql_state->tokenvec[1].type == 'c') {
|
|
/*
|
|
* we check that next character after the number is either whitespace,
|
|
* or '/' or a '-' ==> sqli.
|
|
*/
|
|
ch = sql_state->s[strlen(sql_state->tokenvec[0].val)];
|
|
if ( ch <= 32 ) {
|
|
/* next char was whitespace,e.g. "1234 --"
|
|
* this isn't exactly correct.. ideally we should skip over all whitespace
|
|
* but this seems to be ok for now
|
|
*/
|
|
return TRUE;
|
|
}
|
|
if (ch == '/' && sql_state->s[strlen(sql_state->tokenvec[0].val) + 1] == '*') {
|
|
return TRUE;
|
|
}
|
|
if (ch == '-' && sql_state->s[strlen(sql_state->tokenvec[0].val) + 1] == '-') {
|
|
return TRUE;
|
|
}
|
|
|
|
sql_state->reason = __LINE__;
|
|
return FALSE;
|
|
}
|
|
|
|
/*
|
|
* detect obvious sqli scans.. many people put '--' in plain text
|
|
* so only detect if input ends with '--', e.g. 1-- but not 1-- foo
|
|
*/
|
|
if ((strlen(sql_state->tokenvec[1].val) > 2)
|
|
&& sql_state->tokenvec[1].val[0] == '-') {
|
|
sql_state->reason = __LINE__;
|
|
return FALSE;
|
|
}
|
|
|
|
break;
|
|
} /* case 2 */
|
|
case 3:{
|
|
/*
|
|
* ...foo' + 'bar...
|
|
* no opening quote, no closing quote
|
|
* and each string has data
|
|
*/
|
|
if (streq(sql_state->pat, "sos")
|
|
|| streq(sql_state->pat, "s&s")) {
|
|
if ((sql_state->tokenvec[0].str_open == CHAR_NULL)
|
|
&& (sql_state->tokenvec[2].str_close == CHAR_NULL)) {
|
|
/*
|
|
* if ....foo" + "bar....
|
|
*/
|
|
return TRUE;
|
|
} else {
|
|
/*
|
|
* not sqli
|
|
*/
|
|
sql_state->reason = __LINE__;
|
|
return FALSE;
|
|
}
|
|
break;
|
|
}
|
|
} /* case 3 */
|
|
case 5: {
|
|
if (streq(sql_state->pat, "sosos")) {
|
|
if (sql_state->tokenvec[0].str_open == CHAR_NULL) {
|
|
/*
|
|
* if ....foo" + "bar....
|
|
*/
|
|
return TRUE;
|
|
} else {
|
|
/*
|
|
* not sqli
|
|
*/
|
|
sql_state->reason = __LINE__;
|
|
return FALSE;
|
|
}
|
|
break;
|
|
}
|
|
} /* case 5 */
|
|
} /* end switch */
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
/** Main API, detects SQLi in an input.
|
|
*
|
|
*
|
|
*/
|
|
int libinjection_is_sqli(sfilter * sql_state, const char *s, size_t slen,
|
|
ptr_fingerprints_fn fn, void* callbackarg)
|
|
{
|
|
|
|
/*
|
|
* no input? not sqli
|
|
*/
|
|
if (slen == 0) {
|
|
return FALSE;
|
|
}
|
|
|
|
if (fn == NULL) {
|
|
fn = is_sqli_pattern;
|
|
}
|
|
|
|
/*
|
|
* test input "as-is"
|
|
*/
|
|
if (libinjection_is_string_sqli(sql_state, s, slen, CHAR_NULL,
|
|
fn, callbackarg)) {
|
|
return TRUE;
|
|
}
|
|
|
|
/*
|
|
* if input has a single_quote, then
|
|
* test as if input was actually '
|
|
* example: if input if "1' = 1", then pretend it's
|
|
* "'1' = 1"
|
|
* Porting Notes: example the same as doing
|
|
* is_string_sqli(sql_state, "'" + s, slen+1, NULL, fn, arg)
|
|
*
|
|
*/
|
|
if (memchr(s, CHAR_SINGLE, slen)
|
|
&& libinjection_is_string_sqli(sql_state, s, slen, CHAR_SINGLE,
|
|
fn, callbackarg)) {
|
|
return TRUE;
|
|
}
|
|
|
|
/*
|
|
* same as above but with a double-quote "
|
|
*/
|
|
if (memchr(s, CHAR_DOUBLE, slen)
|
|
&& libinjection_is_string_sqli(sql_state, s, slen, CHAR_DOUBLE,
|
|
fn, callbackarg)) {
|
|
return TRUE;
|
|
}
|
|
|
|
/*
|
|
* Hurray, input is not SQLi
|
|
*/
|
|
return FALSE;
|
|
}
|