diff --git a/apache2/Makefile.am b/apache2/Makefile.am index b7bde266..9ffc6404 100644 --- a/apache2/Makefile.am +++ b/apache2/Makefile.am @@ -11,7 +11,8 @@ mod_security2_la_SOURCES = mod_security2.c \ re_variables.c msc_logging.c msc_xml.c \ msc_multipart.c modsecurity.c msc_parsers.c \ msc_util.c msc_pcre.c persist_dbm.c msc_reqbody.c \ - msc_geo.c msc_gsb.c msc_crypt.c msc_tree.c msc_unicode.c acmp.c msc_lua.c msc_release.c libinjection/sqlparse.c + msc_geo.c msc_gsb.c msc_crypt.c msc_tree.c msc_unicode.c acmp.c msc_lua.c msc_release.c \ + libinjection/libinjection_sqli.c mod_security2_la_CFLAGS = @APXS_CFLAGS@ @APR_CFLAGS@ @APU_CFLAGS@ \ @PCRE_CFLAGS@ @LIBXML2_CFLAGS@ @LUA_CFLAGS@ @MODSEC_EXTRA_CFLAGS@ @CURL_CFLAGS@ diff --git a/apache2/libinjection/sqlparse.h b/apache2/libinjection/libinjection.h similarity index 52% rename from apache2/libinjection/sqlparse.h rename to apache2/libinjection/libinjection.h index 838a0471..38fd9810 100644 --- a/apache2/libinjection/sqlparse.h +++ b/apache2/libinjection/libinjection.h @@ -6,13 +6,16 @@ * * HOW TO USE: * + * #include "libinjection.h" + * * // Normalize query or postvar value * // If it comes in urlencoded, then it's up to you * // to urldecode it. If it's in correct form already * // then nothing to do! * * sfilter s; - * int sqli = is_sqli(&s, user_string, new_len); + * int sqli = libinjection_is_sqli(&s, user_string, new_len, + * NULL, NULL); * * // 0 = not sqli * // 1 = is sqli @@ -22,8 +25,8 @@ * */ -#ifndef _SQLPARSE_H -#define _SQLPARSE_H +#ifndef _LIBINJECTION_H +#define _LIBINJECTION_H #ifdef __cplusplus extern "C" { @@ -34,7 +37,7 @@ extern "C" { * See python's normalized version * http://www.python.org/dev/peps/pep-0386/#normalizedversion */ -#define LIBINJECTION_VERSION "1.2.0" +#define LIBINJECTION_VERSION "2.0.0" #define ST_MAX_SIZE 32 #define MAX_TOKENS 5 @@ -79,35 +82,56 @@ typedef struct { } sfilter; /** - * Pointer to function, takes cstr input, return true/false + * Pointer to function, takes cstr input, returns 1 for true, 0 for false */ -typedef int (*ptr_fingerprints_fn)(const char*); +typedef int (*ptr_fingerprints_fn)(const char*, void* callbackarg); /** * Main API: tests for SQLi in three possible contexts, no quotes, * single quote and double quote * + * \param sql_state + * \param s + * \param slen + * \param fn a pointer to a function that determines if a fingerprint + * is a match or not. If NULL, then a hardwired list is + * used. Useful for loading fingerprints data from custom + * sources. + * \param callbackarg. For default case, use NULL + * * \return 1 (true) if SQLi, 0 (false) if benign */ -int is_sqli(sfilter * sql_state, const char *s, size_t slen, - ptr_fingerprints_fn fn); +int libinjection_is_sqli(sfilter * sql_state, + const char *s, size_t slen, + ptr_fingerprints_fn fn, void* callbackarg); /** * This detects SQLi in a single context, mostly useful for custom * logic and debugging. * - * \param delim must be "NULL" (no context), single quote or double quote. + * \param sql_state + * \param s + * \param slen + * \param delim must be char of + * CHAR_NULL (\0), raw context + * CHAR_SINGLE ('), single quote context + * CHAR_DOUBLE ("), double quote context * Other values will likely be ignored. + * \param ptr_fingerprints_fn is a pointer to a function + * that determines if a fingerprint is a match or not. + * \param callbackarg passed to function above * - * \return 1 (true) if SQLi, 0 (false) if not SQLi **in this context** + * + * \return 1 (true) if SQLi or 0 (false) if not SQLi **in this context** * */ -int is_string_sqli(sfilter * sql_state, const char *s, size_t slen, - const char delim, - ptr_fingerprints_fn fn); +int libinjection_is_string_sqli(sfilter * sql_state, + const char *s, size_t slen, + const char delim, + ptr_fingerprints_fn fn, void* callbackarg); #ifdef __cplusplus } #endif -#endif /* _SQLPARSE_H */ +#endif /* _LIBINJECTION_H */ diff --git a/apache2/libinjection/libinjection_sqli.c b/apache2/libinjection/libinjection_sqli.c new file mode 100644 index 00000000..b2d26221 --- /dev/null +++ b/apache2/libinjection/libinjection_sqli.c @@ -0,0 +1,1445 @@ +/** + * Copyright 2012,2013 Nick Galbreath + * nickg@client9.com + * BSD License -- see COPYING.txt for details + * + * (setq-default indent-tabs-mode nil) + * (setq c-default-style "k&r" + * c-basic-offset 4) + * indent -kr -nut + */ + +#include +#include +#include +#include +#include + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +#if 0 +#define FOLD_DEBUG printf("%d: Fold state = %d, current=%c, last=%c\n", __LINE__, sf->fold_state, current->type, last->type == CHAR_NULL ? '~': last->type) +#else +#define FOLD_DEBUG +#endif + +#include "libinjection_sqli_data.h" + +/* memchr2 finds a string of 2 characters inside another string + * This a specialized version of "memmem" or "memchr". + * 'memmem' doesn't exist on all platforms + * + * Porting notes: this is just a special version of + * astring.find("AB") + * + */ +static const char * +memchr2(const char *haystack, size_t haystack_len, char c0, char c1) +{ + const char *cur = haystack; + const char *last = haystack + haystack_len - 1; + + if (haystack_len < 2) { + return NULL; + } + if (c0 == c1) { + return NULL; + } + + while (cur < last) { + if (cur[0] == c0) { + if (cur[1] == c1) { + return cur; + } else { + cur += 2; + } + } else { + cur += 1; + } + } + + return NULL; +} + +/** Find largest string containing certain characters. + * + * C Standard library 'strspn' only works for 'c-strings' (null terminated) + * This works on arbitrary length. + * + * Performance notes: + * not critical + * + * Porting notes: + * if accept is 'ABC', then this function would be similar to + * a_regexp.match(a_str, '[ABC]*'), + */ +static size_t +strlenspn(const char *s, size_t len, const char *accept) +{ + size_t i; + for (i = 0; i < len; ++i) { + /* likely we can do better by inlining this function + * but this works for now + */ + if (strchr(accept, s[i]) == NULL) { + return i; + } + } + return len; +} + +/* + * ASCII half-case-insenstive compare! + * + * DANGER: this assume arg0 is *always upper case* + * and arg1 is mixed case!! + * + * Required since libc version uses the current locale + * and is much slower. + */ +static int cstrcasecmp(const char *a, const char *b) +{ + char ca, cb; + + do { + ca = *a++; + cb = *b++; + assert(ca < 'a' || ca > 'z'); + if (cb >= 'a' && cb <= 'z') + cb -= 0x20; + } while (ca == cb && ca != '\0'); + + return ca - cb; +} + +/** + * Case sensitive string compare. + * Here only to make code more readable + */ +static int streq(const char *a, const char *b) +{ + return strcmp(a, b) == 0; +} + +/* + * Case-sensitive binary search with "deferred detection of equality" + * We assume in most cases the key will NOT be found. This makes the + * main loop only have one comparison branch, which should optimize + * better in CPU. See #Deferred_detection_of_equality in + * http://en.wikipedia.org/wiki/Binary_search_algorithm + * + * This is used for fingerprint lookups, and a few other places. + * Note in normal operation this maybe takes 1% of total run time, so + * replacing this with another datastructure probably isn't worth + * the effort. + */ +static int bsearch_cstr(const char *key, const char *base[], size_t nmemb) +{ + size_t pos; + size_t left = 0; + size_t right = nmemb - 1; + + /* assert(nmemb > 0); */ + + while (left < right) { + pos = (left + right) >> 1; + /* assert(pos < right); */ + if (strcmp(base[pos], key) < 0) { + left = pos + 1; + } else { + right = pos; + } + } + if ((left == right) && strcmp(base[left], key) == 0) { + return TRUE; + } else { + return FALSE; + } +} + +/* + * Case-insensitive binary search + * + */ +static int bsearch_cstrcase(const char *key, const char *base[], size_t nmemb) +{ + size_t pos; + size_t left = 0; + size_t right = nmemb - 1; + + while (left < right) { + pos = (left + right) >> 1; + /* arg0 = upper case only, arg1 = mixed case */ + if (cstrcasecmp(base[pos], key) < 0) { + left = pos + 1; + } else { + right = pos; + } + } + if ((left == right) && cstrcasecmp(base[left], key) == 0) { + return TRUE; + } else { + return FALSE; + } +} + +/** + * + */ +#define UNUSED(x) (void)(x) + +static int is_sqli_pattern(const char* key, void* callbackarg) +{ + UNUSED(callbackarg); + return bsearch_cstr(key, sql_fingerprints, sqli_fingerprints_sz); +} + +/** + * + * + * + * Porting Notes: + * given a mapping/hash of string to char + * this is just + * typecode = mapping[key.upper()] + */ + +static char bsearch_keyword_type(const char *key, const keyword_t * keywords, + size_t numb) +{ + size_t pos; + size_t left = 0; + size_t right = numb - 1; + + while (left < right) { + pos = (left + right) >> 1; + + /* arg0 = upper case only, arg1 = mixed case */ + if (cstrcasecmp(keywords[pos].word, key) < 0) { + left = pos + 1; + } else { + right = pos; + } + } + if ((left == right) && cstrcasecmp(keywords[left].word, key) == 0) { + return keywords[left].type; + } else { + return CHAR_NULL; + } +} + +static char is_keyword(const char* key) +{ + return bsearch_keyword_type(key, sql_keywords, sql_keywords_sz); +} + +/* st_token methods + * + * The following functions manipulates the stoken_t type + * + * + */ + +static void st_clear(stoken_t * st) +{ + st->type = CHAR_NULL; + st->str_open = CHAR_NULL; + st->str_close = CHAR_NULL; + st->val[0] = CHAR_NULL; +} + +static int st_is_empty(const stoken_t * st) +{ + return st->type == CHAR_NULL; +} + +static void st_assign_char(stoken_t * st, const char stype, const char value) +{ + st->type = stype; + st->val[0] = value; + st->val[1] = CHAR_NULL; +} + +static void st_assign(stoken_t * st, const char stype, const char *value, + size_t len) +{ + size_t last = len < ST_MAX_SIZE ? len : (ST_MAX_SIZE - 1); + st->type = stype; + memcpy(st->val, value, last); + st->val[last] = CHAR_NULL; +} + +static void st_copy(stoken_t * dest, const stoken_t * src) +{ + memcpy(dest, src, sizeof(stoken_t)); +} + +static int st_is_multiword_start(const stoken_t * st) +{ + return bsearch_cstrcase(st->val, + multikeywords_start, + multikeywords_start_sz); +} + +static int st_is_unary_op(const stoken_t * st) +{ + return (st->type == 'o' && !(strcmp(st->val, "+") && + strcmp(st->val, "-") && + strcmp(st->val, "!") && + strcmp(st->val, "!!") && + /* arg0 = upper case only, arg1 = mixed case */ + cstrcasecmp("NOT", st->val) && + strcmp(st->val, "~"))); +} + +static int st_is_arith_op(const stoken_t * st) +{ + return (st->type == 'o' && !(strcmp(st->val, "-") && + strcmp(st->val, "+") && + strcmp(st->val, "~") && + strcmp(st->val, "!") && + strcmp(st->val, "/") && + strcmp(st->val, "%") && + strcmp(st->val, "*") && + strcmp(st->val, "|") && + strcmp(st->val, "&") && + /* arg1 = upper case only, arg1 = mixed case */ + cstrcasecmp("MOD", st->val) && + cstrcasecmp("DIV", st->val))); +} + +/* Parsers + * + * + */ + + +static size_t parse_white(sfilter * sf) +{ + return sf->pos + 1; +} + +static size_t parse_operator1(sfilter * sf) +{ + stoken_t *current = &sf->syntax_current; + const char *cs = sf->s; + size_t pos = sf->pos; + + st_assign_char(current, 'o', cs[pos]); + return pos + 1; +} + +static size_t parse_other(sfilter * sf) +{ + stoken_t *current = &sf->syntax_current; + const char *cs = sf->s; + size_t pos = sf->pos; + + st_assign_char(current, '?', cs[pos]); + return pos + 1; +} + +static size_t parse_char(sfilter * sf) +{ + stoken_t *current = &sf->syntax_current; + const char *cs = sf->s; + size_t pos = sf->pos; + + st_assign_char(current, cs[pos], cs[pos]); + return pos + 1; +} + +static size_t parse_eol_comment(sfilter * sf) +{ + stoken_t *current = &sf->syntax_current; + const char *cs = sf->s; + const size_t slen = sf->slen; + size_t pos = sf->pos; + + const char *endpos = + (const char *) memchr((const void *) (cs + pos), '\n', slen - pos); + if (endpos == NULL) { + st_assign(current, 'c', cs + pos, slen - pos); + return slen; + } else { + st_assign(current, 'c', cs + pos, endpos - cs - pos); + return (endpos - cs) + 1; + } +} + +static size_t parse_dash(sfilter * sf) +{ + stoken_t *current = &sf->syntax_current; + const char *cs = sf->s; + const size_t slen = sf->slen; + size_t pos = sf->pos; + + + size_t pos1 = pos + 1; + if (pos1 < slen && cs[pos1] == '-') { + return parse_eol_comment(sf); + } else { + st_assign_char(current, 'o', '-'); + return pos1; + } +} + +static size_t is_mysql_comment(const char *cs, const size_t len, size_t pos) +{ + size_t i; + + if (pos + 2 >= len) { + return 0; + } + if (cs[pos + 2] != '!') { + return 0; + } + /* + * this is a mysql comment + * got "/x!" + */ + if (pos + 3 >= len) { + return 3; + } + + if (!isdigit(cs[pos + 3])) { + return 3; + } + /* + * handle odd case of /x!0SELECT + */ + if (!isdigit(cs[pos + 4])) { + return 4; + } + + if (pos + 7 >= len) { + return 4; + } + + for (i = pos + 5; i <= pos + 7; ++i) { + if (!isdigit(cs[i])) { + return 3; + } + } + return 8; +} + +static size_t parse_slash(sfilter * sf) +{ + stoken_t *current = &sf->syntax_current; + const char *cs = sf->s; + const size_t slen = sf->slen; + size_t pos = sf->pos; + const char* cur = cs + pos; + size_t inc; + + size_t pos1 = pos + 1; + if (pos1 == slen || cs[pos1] != '*') { + return parse_operator1(sf); + } + + inc = is_mysql_comment(cs, slen, pos); + if (inc == 0) { + + /* + * skip over initial '/x' + */ + const char *ptr = memchr2(cur + 2, slen - (pos + 2), '*', '/'); + if (ptr == NULL) { + /* + * unterminated comment + */ + st_assign(current, 'c', cs + pos, slen - pos); + return slen; + } else { + /* + * postgresql allows nested comments which makes + * this is incompatible with parsing so + * if we find a '/x' inside the coment, then + * make a new token. + */ + char ctype = 'c'; + const size_t clen = (ptr + 2) - (cur); + if (memchr2(cur + 2, ptr - (cur + 1), '/', '*') != NULL) { + ctype = 'X'; + } + st_assign(current, ctype, cs + pos, clen); + + return pos + clen; + } + } else { + /* + * MySQL Comment + */ + sf->in_comment = TRUE; + st_clear(current); + return pos + inc; + } +} + +static size_t parse_backslash(sfilter * sf) +{ + stoken_t *current = &sf->syntax_current; + const char *cs = sf->s; + const size_t slen = sf->slen; + size_t pos = sf->pos; + + /* + * Weird MySQL alias for NULL, "\N" (capital N only) + */ + if (pos + 1 < slen && cs[pos + 1] == 'N') { + st_assign(current, '1', "NULL", 4); + return pos + 2; + } else { + return parse_other(sf); + } +} + +/** Is input a 2-char operator? + * + */ +static int is_operator2(const char *key) +{ + return bsearch_cstr(key, operators2, operators2_sz); +} + +static size_t parse_operator2(sfilter * sf) +{ + stoken_t *current = &sf->syntax_current; + const char *cs = sf->s; + const size_t slen = sf->slen; + size_t pos = sf->pos; + char op2[3]; + + if (pos + 1 >= slen) { + return parse_operator1(sf); + } + + op2[0] = cs[pos]; + op2[1] = cs[pos + 1]; + op2[2] = CHAR_NULL; + + /* + * Special Hack for MYSQL style comments + * instead of turning: + * /x! FOO x/ into FOO by rewriting the string, we + * turn it into FOO x/ and ignore the ending comment + */ + if (sf->in_comment && op2[0] == '*' && op2[1] == '/') { + sf->in_comment = FALSE; + st_clear(current); + return pos + 2; + } else if (pos + 2 < slen && op2[0] == '<' && op2[1] == '=' + && cs[pos + 2] == '>') { + /* + * special 3-char operator + */ + st_assign(current, 'o', "<=>", 3); + return pos + 3; + } else if (is_operator2(op2)) { + if (streq(op2, "&&") || streq(op2, "||")) { + st_assign(current, '&', op2, 2); + } else { + /* + * normal 2 char operator + */ + st_assign(current, 'o', op2, 2); + } + return pos + 2; + } else { + /* + * must be a single char operator + */ + return parse_operator1(sf); + } +} + +static size_t parse_string_core(const char *cs, const size_t len, size_t pos, + stoken_t * st, char delim, size_t offset) +{ + /* + * offset is to skip the perhaps first quote char + */ + const char *qpos = + (const char *) memchr((const void *) (cs + pos + offset), delim, + len - pos - offset); + + /* + * then keep string open/close info + */ + if (offset == 1) { + /* + * this is real quote + */ + st->str_open = delim; + } else { + /* + * this was a simulated quote + */ + st->str_open = CHAR_NULL; + } + + while (TRUE) { + if (qpos == NULL) { + /* + * string ended with no trailing quote + * assign what we have + */ + st_assign(st, 's', cs + pos + offset, len - pos - offset); + st->str_close = CHAR_NULL; + return len; + } else if (*(qpos - 1) != '\\') { + /* + * ending quote is not escaped.. copy and end + */ + st_assign(st, 's', cs + pos + offset, + qpos - (cs + pos + offset)); + st->str_close = delim; + return qpos - cs + 1; + } else { + qpos = + (const char *) memchr((const void *) (qpos + 1), delim, + (cs + len) - (qpos + 1)); + } + } +} + +/** + * Used when first char is a ' or " + */ +static size_t parse_string(sfilter * sf) +{ + stoken_t *current = &sf->syntax_current; + const char *cs = sf->s; + const size_t slen = sf->slen; + size_t pos = sf->pos; + + /* + * assert cs[pos] == single or double quote + */ + return parse_string_core(cs, slen, pos, current, cs[pos], 1); +} + +static size_t parse_word(sfilter * sf) +{ + stoken_t *current = &sf->syntax_current; + const char *cs = sf->s; + size_t pos = sf->pos; + char *dot; + char ch; + size_t slen = + strlenspn(cs + pos, sf->slen - pos, + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$."); + + st_assign(current, 'n', cs + pos, slen); + + dot = strchr(current->val, '.'); + if (dot != NULL) { + *dot = '\0'; + + ch = is_keyword(current->val); + + if (ch == 'k' || ch == 'o') { + /* + * we got something like "SELECT.1" + */ + current->type = ch; + return pos + strlen(current->val); + } else { + /* + * something else, put back dot + */ + *dot = '.'; + } + } + + /* + * do normal lookup with word including '.' + */ + if (slen < ST_MAX_SIZE) { + + ch = is_keyword(current->val); + + if (ch == CHAR_NULL) { + ch = 'n'; + } + current->type = ch; + } + return pos + slen; +} + +static size_t parse_var(sfilter * sf) +{ + stoken_t *current = &sf->syntax_current; + const char *cs = sf->s; + const size_t slen = sf->slen; + size_t pos = sf->pos; + size_t pos1 = pos + 1; + size_t xlen; + + /* + * move past optional other '@' + */ + if (pos1 < slen && cs[pos1] == '@') { + pos1 += 1; + } + + xlen = strlenspn(cs + pos1, slen - pos1, + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_.$"); + if (xlen == 0) { + st_assign(current, 'v', cs + pos, (pos1 - pos)); + return pos1; + } else { + st_assign(current, 'v', cs + pos, xlen + (pos1 - pos)); + return pos1 + xlen; + } +} + +static size_t parse_money(sfilter *sf) +{ + stoken_t *current = &sf->syntax_current; + const char *cs = sf->s; + const size_t slen = sf->slen; + size_t pos = sf->pos; + size_t xlen; + + /* + * $1,000.00 or $1.000,00 ok! + * This also parses $....,,,111 but that's ok + */ + xlen = strlenspn(cs + pos + 1, slen - pos - 1, "0123456789.,"); + if (xlen == 0) { + /* + * just ignore '$' + */ + return pos + 1; + } else { + st_assign(current, '1', cs + pos, 1 + xlen); + return pos + 1 + xlen; + } +} + +static size_t parse_number(sfilter * sf) +{ + stoken_t *current = &sf->syntax_current; + const char *cs = sf->s; + const size_t slen = sf->slen; + size_t pos = sf->pos; + size_t xlen; + size_t start; + + if (pos + 1 < slen && cs[pos] == '0' && (cs[pos + 1] == 'X' || cs[pos + 1] == 'x')) { + /* + * TBD compare if isxdigit + */ + xlen = + strlenspn(cs + pos + 2, slen - pos - 2, "0123456789ABCDEFabcdef"); + if (xlen == 0) { + st_assign(current, 'n', "0X", 2); + return pos + 2; + } else { + st_assign(current, '1', cs + pos, 2 + xlen); + return pos + 2 + xlen; + } + } + + start = pos; + while (pos < slen && isdigit(cs[pos])) { + pos += 1; + } + if (pos < slen && cs[pos] == '.') { + pos += 1; + while (pos < slen && isdigit(cs[pos])) { + pos += 1; + } + if (pos - start == 1) { + st_assign_char(current, 'n', '.'); + return pos; + } + } + + if (pos < slen) { + if (cs[pos] == 'E' || cs[pos] == 'e') { + pos += 1; + if (pos < slen && (cs[pos] == '+' || cs[pos] == '-')) { + pos += 1; + } + while (pos < slen && isdigit(cs[pos])) { + pos += 1; + } + } else if (isalpha(cs[pos])) { + /* + * oh no, we have something like '6FOO' + * use microsoft style parsing and take just + * the number part and leave the rest to be + * parsed later + */ + st_assign(current, '1', cs + start, pos - start); + return pos; + } + } + + st_assign(current, '1', cs + start, pos - start); + return pos; +} + +int parse_token(sfilter * sf) +{ + stoken_t *current = &sf->syntax_current; + const char *s = sf->s; + const size_t slen = sf->slen; + size_t *pos = &sf->pos; + pt2Function fnptr; + + st_clear(current); + + /* + * if we are at beginning of string + * and in single-quote or double quote mode + * then pretend the input starts with a quote + */ + if (*pos == 0 && sf->delim != CHAR_NULL) { + *pos = parse_string_core(s, slen, 0, current, sf->delim, 0); + return TRUE; + } + + while (*pos < slen) { + /* + * get current character + */ + const int ch = (int) (s[*pos]); + + /* + * if not ascii, then continue... + * actually probably need to just assuming + * it's a string + */ + if (ch < 0 || ch > 127) { + *pos += 1; + continue; + } + + /* + * look up the parser, and call it + * + * Porting Note: this is mapping of char to function + * charparsers[ch]() + */ + fnptr = char_parse_map[ch]; + *pos = (*fnptr) (sf); + + /* + * + */ + if (current->type != CHAR_NULL) { + return TRUE; + } + } + return FALSE; +} + +/** + * Initializes parsing state + * TBD: explicity add parsing content (NULL, SINGLE, DOUBLE) + */ +void sfilter_reset(sfilter * sf, const char *s, size_t len) +{ + memset(sf, 0, sizeof(sfilter)); + sf->s = s; + sf->slen = len; +} + +/** See if two tokens can be merged since they are compound SQL phrases. + * + * This takes two tokens, and, if they are the right type, + * merges their values together. Then checks to see if the + * new value is special using the PHRASES mapping. + * + * Example: "UNION" + "ALL" ==> "UNION ALL" + * + * C Security Notes: this is safe to use C-strings (null-terminated) + * since the types involved by definition do not have embedded nulls + * (e.g. there is no keyword with embedded null) + * + * Porting Notes: since this is C, it's oddly complicated. + * This is just: multikeywords[token.value + ' ' + token2.value] + * + */ +static int syntax_merge_words(stoken_t * a, stoken_t * b) +{ + size_t sz1; + size_t sz2; + size_t sz3; + char tmp[ST_MAX_SIZE]; + char ch; + + if (! + (a->type == 'k' || a->type == 'n' || a->type == 'o' + || a->type == 'U')) { + return FALSE; + } + + sz1 = strlen(a->val); + sz2 = strlen(b->val); + sz3 = sz1 + sz2 + 1; /* +1 for space in the middle */ + if (sz3 >= ST_MAX_SIZE) { /* make sure there is room for ending null */ + return FALSE; + } + /* + * oddly annoying last.val + ' ' + current.val + */ + memcpy(tmp, a->val, sz1); + tmp[sz1] = ' '; + memcpy(tmp + sz1 + 1, b->val, sz2); + tmp[sz3] = CHAR_NULL; + + ch = bsearch_keyword_type(tmp, multikeywords, multikeywords_sz); + if (ch != CHAR_NULL) { + st_assign(a, ch, tmp, sz3); + return TRUE; + } else { + return FALSE; + } +} + +/* This does some simple syntax cleanup based on the token + * + * + */ +int sqli_tokenize(sfilter * sf, stoken_t * sout) +{ + stoken_t *last = &sf->syntax_last; + stoken_t *current = &sf->syntax_current; + + while (parse_token(sf)) { + char ttype = current->type; + + /* + * TBD: hmm forgot logic here. + */ + if (ttype == 'c') { + st_copy(&sf->syntax_comment, current); + continue; + } + st_clear(&sf->syntax_comment); + + /* + * If we don't have a saved token, and we have + * a string: save it. if the next token is also a string + * then merge them. e.g. "A" "B" in SQL is actually "AB" + * a n/k/U/o type: save since next token my be merged together + * for example: "LEFT" + "JOIN" = "LEFT JOIN" + * a o/& type: TBD need to review. + * + */ + if (last->type == CHAR_NULL) { + switch (ttype) { + + /* + * items that have special needs + */ + case 's': + st_copy(last, current); + continue; + case 'n': + case 'k': + case 'U': + case '&': + case 'o': + if (st_is_multiword_start(current)) { + st_copy(last, current); + continue; + } else if (current->type == 'o' || current->type == '&') { + /* } else if (st_is_unary_op(current)) { */ + st_copy(last, current); + continue; + } else { + /* + * copy to out + */ + st_copy(sout, current); + return TRUE; + } + default: + /* + * copy to out + */ + st_copy(sout, current); + return TRUE; + } + } + /* + * We have a saved token + */ + + switch (ttype) { + case 's': + if (last->type == 's') { + /* + * "FOO" "BAR" == "FOO" (skip second string) + */ + continue; + } else { + st_copy(sout, last); + st_copy(last, current); + return TRUE; + } + break; + + case 'o': + /* + * first case to handle "IS" + "NOT" + */ + if (syntax_merge_words(last, current)) { + continue; + } else if (st_is_unary_op(current) + && (last->type == 'o' || last->type == '&' + || last->type == 'U')) { + /* + * if an operator is followed by a unary operator, skip it. + * 1, + ==> "+" is not unary, it's arithmetic + * AND, + ==> "+" is unary + */ + continue; + } else { + /* + * no match + */ + st_copy(sout, last); + st_copy(last, current); + return TRUE; + } + break; + + case 'n': + case 'k': + if (syntax_merge_words(last, current)) { + continue; + } else { + /* + * total no match + */ + st_copy(sout, last); + st_copy(last, current); + return TRUE; + } + break; + + default: + /* + * fix up for ambigous "IN" + * handle case where IN is typically a function + * but used in compound "IN BOOLEAN MODE" jive + * + * warning on cstrcasecmp arg0=upper case only, arg1 = mixed + */ + if (last->type == 'n' && !cstrcasecmp("IN", last->val)) { + st_copy(last, current); + st_assign(sout, 'f', "IN", 2); + return TRUE; + } else { + /* + * no match at all + */ + st_copy(sout, last); + st_copy(last, current); + return TRUE; + } + break; + } + } + + /* + * final cleanup + */ + if (last->type) { + st_copy(sout, last); + st_clear(last); + return TRUE; + } else if (sf->syntax_comment.type) { + /* + * TBD + */ + st_copy(sout, &sf->syntax_comment); + st_clear(&sf->syntax_comment); + return TRUE; + } else { + return FALSE; + } +} + +/* + * My apologies, this code is a mess + */ +int filter_fold(sfilter * sf, stoken_t * sout) +{ + stoken_t *last = &sf->fold_last; + stoken_t *current = &sf->fold_current; + + if (sf->fold_state == 4 && !st_is_empty(last)) { + st_copy(sout, last); + sf->fold_state = 2; + st_clear(last); + return FALSE; + } + + while (sqli_tokenize(sf, current)) { + /* + * 0 = start of statement + * skip ( and unary ops + */ + if (sf->fold_state == 0) { + if (current->type == '(') { + continue; + } + if (st_is_unary_op(current)) { + continue; + } + sf->fold_state = 1; + } + + if (st_is_empty(last)) { + FOLD_DEBUG; + if (current->type == '1' || current->type == 'n' + || current->type == '(') { + sf->fold_state = 2; + st_copy(last, current); + } + st_copy(sout, current); + return FALSE; + } else if (last->type == '(' && st_is_unary_op(current)) { + /* + * similar to beginning of statement + * an opening '(' resets state, and we should skip all + * unary operators + */ + continue; + } else if (last->type == '(' && current->type == '(') { + /* if we get another '(' after another + * emit 1, but keep state + */ + st_copy(sout, current); + return FALSE; + } else if ((last->type == '1' || last->type == 'n') + && st_is_arith_op(current)) { + FOLD_DEBUG; + st_copy(last, current); + } else if (last->type == 'o' + && (current->type == '1' || current->type == 'n')) { + FOLD_DEBUG; + st_copy(last, current); + } else { + if (sf->fold_state == 2) { + if (last->type != '1' && last->type != '(' + && last->type != 'n') { + FOLD_DEBUG; + st_copy(sout, last); + st_copy(last, current); + sf->fold_state = 4; + } else { + FOLD_DEBUG; + st_copy(sout, current); + st_clear(last); + } + return FALSE; + } else { + if (last->type == 'o') { + st_copy(sout, last); + st_copy(last, current); + sf->fold_state = 4; + } else { + sf->fold_state = 2; + st_copy(sout, current); + st_clear(last); + } + return FALSE; + } + } + } + + if (!st_is_empty(last)) { + if (st_is_arith_op(last)) { + st_copy(sout, last); + st_clear(last); + return FALSE; + } else { + st_clear(last); + } + } + + /* + * all done: nothing more to parse + */ + return TRUE; +} + +/* secondary api: detects SQLi in a string, GIVEN a context. + * + * A context can be: + * * CHAR_NULL (\0), process as is + * * CHAR_SINGLE ('), process pretending input started with a + * single quote. + * * CHAR_DOUBLE ("), process pretending input started with a + * double quote. + * + */ +int libinjection_is_string_sqli(sfilter * sql_state, + const char *s, size_t slen, + const char delim, + ptr_fingerprints_fn fn, void* callbackarg) +{ + int tlen = 0; + char ch; + int patmatch; + int all_done; + + sfilter_reset(sql_state, s, slen); + sql_state->delim = delim; + + while (tlen < MAX_TOKENS) { + all_done = filter_fold(sql_state, &(sql_state->tokenvec[tlen])); + if (all_done) { + break; + } + + sql_state->pat[tlen] = sql_state->tokenvec[tlen].type; + tlen += 1; + } + + /* + * make the fingerprint pattern a c-string (null delimited) + */ + sql_state->pat[tlen] = CHAR_NULL; + + /* + * check for 'X' in pattern + * this means parsing could not be done + * accurately due to pgsql's double comments + * or other syntax that isn't consistent + * should be very rare false positive + */ + if (strchr(sql_state->pat, 'X')) { + return TRUE; + } + + patmatch = fn(sql_state->pat, callbackarg); + + /* + * No match. + * + * Set sql_state->reason to current line number + * only for debugging purposes. + */ + if (!patmatch) { + sql_state->reason = __LINE__; + return FALSE; + } + + /* + * We got a SQLi match + * This next part just helps reduce false positives. + * + */ + switch (tlen) { + case 2:{ + /* + * case 2 are "very small SQLi" which make them + * hard to tell from normal input... + */ + + /* + * if 'comment' is '#' ignore.. too many FP + */ + if (sql_state->tokenvec[1].val[0] == '#') { + sql_state->reason = __LINE__; + return FALSE; + } + + /* + * for fingerprint like 'nc', only comments of /x are treated + * as SQL... ending comments of "--" and "#" are not sqli + */ + if (sql_state->tokenvec[0].type == 'n' && + sql_state->tokenvec[1].type == 'c' && + sql_state->tokenvec[1].val[0] != '/') { + sql_state->reason = __LINE__; + return FALSE; + } + + /* + * if '1c' ends with '/x' then it's sqli + */ + if (sql_state->tokenvec[0].type == '1' && + sql_state->tokenvec[1].type == 'c' && + sql_state->tokenvec[1].val[0] == '/') { + return TRUE; + } + + /* + * if 'oc' then input must be 'CASE/x' + * used in HPP attack + */ + if (sql_state->tokenvec[0].type == 'o' && + sql_state->tokenvec[1].type == 'c' && + sql_state->tokenvec[1].val[0] == '/' && + cstrcasecmp("CASE", sql_state->tokenvec[0].val) != 0) + { + sql_state->reason = __LINE__; + return FALSE; + } + + /** + * there are some odd base64-looking query string values + * 1234-ABCDEFEhfhihwuefi-- + * which evaluate to "1c"... these are not SQLi + * but 1234-- probably is. + * Make sure the "1" in "1c" is actually a true decimal number + * + * Need to check -original- string since the folding step + * may have merged tokens, e.g. "1+FOO" is folded into "1" + * + * Note: evasion: 1*1-- + */ + if (sql_state->tokenvec[0].type == '1'&& sql_state->tokenvec[1].type == 'c') { + /* + * we check that next character after the number is either whitespace, + * or '/' or a '-' ==> sqli. + */ + ch = sql_state->s[strlen(sql_state->tokenvec[0].val)]; + if ( ch <= 32 ) { + /* next char was whitespace,e.g. "1234 --" + * this isn't exactly correct.. ideally we should skip over all whitespace + * but this seems to be ok for now + */ + return TRUE; + } + if (ch == '/' && sql_state->s[strlen(sql_state->tokenvec[0].val) + 1] == '*') { + return TRUE; + } + if (ch == '-' && sql_state->s[strlen(sql_state->tokenvec[0].val) + 1] == '-') { + return TRUE; + } + + sql_state->reason = __LINE__; + return FALSE; + } + + /* + * detect obvious sqli scans.. many people put '--' in plain text + * so only detect if input ends with '--', e.g. 1-- but not 1-- foo + */ + if ((strlen(sql_state->tokenvec[1].val) > 2) + && sql_state->tokenvec[1].val[0] == '-') { + sql_state->reason = __LINE__; + return FALSE; + } + + break; + } /* case 2 */ + case 3:{ + /* + * ...foo' + 'bar... + * no opening quote, no closing quote + * and each string has data + */ + if (streq(sql_state->pat, "sos") + || streq(sql_state->pat, "s&s")) { + if ((sql_state->tokenvec[0].str_open == CHAR_NULL) + && (sql_state->tokenvec[2].str_close == CHAR_NULL)) { + /* + * if ....foo" + "bar.... + */ + return TRUE; + } else { + /* + * not sqli + */ + sql_state->reason = __LINE__; + return FALSE; + } + break; + } + } /* case 3 */ + case 5: { + if (streq(sql_state->pat, "sosos")) { + if (sql_state->tokenvec[0].str_open == CHAR_NULL) { + /* + * if ....foo" + "bar.... + */ + return TRUE; + } else { + /* + * not sqli + */ + sql_state->reason = __LINE__; + return FALSE; + } + break; + } + } /* case 5 */ + } /* end switch */ + + return TRUE; +} + +/** Main API, detects SQLi in an input. + * + * + */ +int libinjection_is_sqli(sfilter * sql_state, const char *s, size_t slen, + ptr_fingerprints_fn fn, void* callbackarg) +{ + + /* + * no input? not sqli + */ + if (slen == 0) { + return FALSE; + } + + if (fn == NULL) { + fn = is_sqli_pattern; + } + + /* + * test input "as-is" + */ + if (libinjection_is_string_sqli(sql_state, s, slen, CHAR_NULL, + fn, callbackarg)) { + return TRUE; + } + + /* + * if input has a single_quote, then + * test as if input was actually ' + * example: if input if "1' = 1", then pretend it's + * "'1' = 1" + * Porting Notes: example the same as doing + * is_string_sqli(sql_state, "'" + s, slen+1, NULL, fn, arg) + * + */ + if (memchr(s, CHAR_SINGLE, slen) + && libinjection_is_string_sqli(sql_state, s, slen, CHAR_SINGLE, + fn, callbackarg)) { + return TRUE; + } + + /* + * same as above but with a double-quote " + */ + if (memchr(s, CHAR_DOUBLE, slen) + && libinjection_is_string_sqli(sql_state, s, slen, CHAR_DOUBLE, + fn, callbackarg)) { + return TRUE; + } + + /* + * Hurray, input is not SQLi + */ + return FALSE; +} diff --git a/apache2/libinjection/sqli_fingerprints.h b/apache2/libinjection/libinjection_sqli_data.h similarity index 54% rename from apache2/libinjection/sqli_fingerprints.h rename to apache2/libinjection/libinjection_sqli_data.h index 42e0b3c6..bd0abd2b 100644 --- a/apache2/libinjection/sqli_fingerprints.h +++ b/apache2/libinjection/libinjection_sqli_data.h @@ -1,7 +1,1024 @@ -#ifndef _SQLPARSE_FINGERPRINTS_H -#define _SQLPARSE_FINGERPRINTS_H -static const char* patmap[] = { +#ifndef _LIBINJECTION_SQLI_DATA_H +#define _LIBINJECTION_SQLI_DATA_H + +#include "libinjection.h" + +typedef struct { + const char *word; + char type; +} keyword_t; + +static size_t parse_money(sfilter * sf); +static size_t parse_other(sfilter * sf); +static size_t parse_white(sfilter * sf); +static size_t parse_operator1(sfilter *sf); +static size_t parse_char(sfilter *sf); +static size_t parse_eol_comment(sfilter *sf); +static size_t parse_dash(sfilter *sf); +static size_t parse_slash(sfilter *sf); +static size_t parse_backslash(sfilter * sf); +static size_t parse_operator2(sfilter *sf); +static size_t parse_string(sfilter *sf); +static size_t parse_word(sfilter * sf); +static size_t parse_var(sfilter * sf); +static size_t parse_number(sfilter * sf); + + +static const char* operators2[] = { + "!!", + "!<", + "!=", + "!>", + "!~", + "%=", + "&&", + "&=", + "*=", + "+=", + "-=", + "/=", + ":=", + "<<", + "<=", + "<>", + "<@", + ">=", + ">>", + "@>", + "^=", + "|/", + "|=", + "||", + "~*", +}; +static const size_t operators2_sz = 25; + +static const keyword_t sql_keywords[] = { + {"ABORT", 'k'}, + {"ABS", 'f'}, + {"ACCESSIBLE", 'k'}, + {"ACOS", 'f'}, + {"ADD", 'k'}, + {"ADDDATE", 'f'}, + {"ADDTIME", 'f'}, + {"AES_DECRYPT", 'f'}, + {"AES_ENCRYPT", 'f'}, + {"AGAINST", 'k'}, + {"AGE", 'f'}, + {"ALL_USERS", 'k'}, + {"ALTER", 'k'}, + {"ANALYZE", 'k'}, + {"AND", '&'}, + {"APPLOCK_MODE", 'f'}, + {"APPLOCK_TEST", 'f'}, + {"APP_NAME", 'f'}, + {"ARRAY_AGG", 'f'}, + {"ARRAY_CAT", 'f'}, + {"ARRAY_DIM", 'f'}, + {"ARRAY_FILL", 'f'}, + {"ARRAY_LENGTH", 'f'}, + {"ARRAY_LOWER", 'f'}, + {"ARRAY_NDIMS", 'f'}, + {"ARRAY_PREPEND", 'f'}, + {"ARRAY_TO_JSON", 'f'}, + {"ARRAY_TO_STRING", 'f'}, + {"ARRAY_UPPER", 'f'}, + {"AS", 'k'}, + {"ASC", 'k'}, + {"ASCII", 'f'}, + {"ASENSITIVE", 'k'}, + {"ASIN", 'f'}, + {"ASSEMBLYPROPERTY", 'f'}, + {"ASYMKEY_ID", 'f'}, + {"ATAN", 'f'}, + {"ATAN2", 'f'}, + {"AUTOINCREMENT", 'k'}, + {"AVG", 'f'}, + {"BEFORE", 'k'}, + {"BEGIN", 'k'}, + {"BENCHMARK", 'f'}, + {"BETWEEN", 'k'}, + {"BIGINT", 'k'}, + {"BIN", 'f'}, + {"BINARY", 'k'}, + {"BINARY_DOUBLE_INFINITY", '1'}, + {"BINARY_DOUBLE_NAN", '1'}, + {"BINARY_FLOAT_INFINITY", '1'}, + {"BINARY_FLOAT_NAN", '1'}, + {"BINBINARY", 'f'}, + {"BIT_AND", 'f'}, + {"BIT_COUNT", 'f'}, + {"BIT_LENGTH", 'f'}, + {"BIT_OR", 'f'}, + {"BIT_XOR", 'f'}, + {"BLOB", 'k'}, + {"BOOLEAN", 'k'}, + {"BOOL_AND", 'f'}, + {"BOOL_OR", 'f'}, + {"BOTH", 'k'}, + {"BTRIM", 'f'}, + {"BY", 'n'}, + {"CALL", 'k'}, + {"CASCADE", 'k'}, + {"CASE", 'o'}, + {"CAST", 'f'}, + {"CBOOL", 'f'}, + {"CBRT", 'f'}, + {"CBYTE", 'f'}, + {"CCUR", 'f'}, + {"CDATE", 'f'}, + {"CDBL", 'f'}, + {"CEIL", 'f'}, + {"CEILING", 'f'}, + {"CERTENCODED", 'f'}, + {"CERTPRIVATEKEY", 'f'}, + {"CERT_ID", 'f'}, + {"CERT_PROPERTY", 'f'}, + {"CHANGE", 'k'}, + {"CHANGES", 'f'}, + {"CHAR", 'f'}, + {"CHARACTER", 'k'}, + {"CHARACTER_LENGTH", 'f'}, + {"CHARINDEX", 'f'}, + {"CHARSET", 'f'}, + {"CHAR_LENGTH", 'f'}, + {"CHDIR", 'f'}, + {"CHDRIVE", 'f'}, + {"CHECK", 'k'}, + {"CHECKSUM_AGG", 'f'}, + {"CHOOSE", 'f'}, + {"CHR", 'f'}, + {"CINT", 'f'}, + {"CLNG", 'f'}, + {"CLOCK_TIMESTAMP", 'f'}, + {"COALESCE", 'k'}, + {"COERCIBILITY", 'f'}, + {"COLLATE", 'k'}, + {"COLLATION", 'f'}, + {"COLLATIONPROPERTY", 'f'}, + {"COLUMN", 'k'}, + {"COLUMNPROPERTY", 'f'}, + {"COLUMNS_UPDATED", 'f'}, + {"COL_LENGTH", 'f'}, + {"COL_NAME", 'f'}, + {"COMPRESS", 'f'}, + {"CONCAT", 'f'}, + {"CONCAT_WS", 'f'}, + {"CONDITION", 'k'}, + {"CONNECTION_ID", 'f'}, + {"CONSTRAINT", 'k'}, + {"CONTINUE", 'k'}, + {"CONV", 'f'}, + {"CONVERT", 'f'}, + {"CONVERT_FROM", 'f'}, + {"CONVERT_TO", 'f'}, + {"CONVERT_TZ", 'f'}, + {"COS", 'f'}, + {"COT", 'f'}, + {"COUNT", 'f'}, + {"COUNT_BIG", 'k'}, + {"CRC32", 'f'}, + {"CREATE", 'k'}, + {"CROSS", 'n'}, + {"CSNG", 'f'}, + {"CTXSYS.DRITHSX.SN", 'f'}, + {"CUME_DIST", 'f'}, + {"CURDATE", 'f'}, + {"CURDIR", 'f'}, + {"CURRENTUSER", 'f'}, + {"CURRENT_DATABASE", 'f'}, + {"CURRENT_DATE", 'k'}, + {"CURRENT_QUERY", 'f'}, + {"CURRENT_SCHEMA", 'f'}, + {"CURRENT_SCHEMAS", 'f'}, + {"CURRENT_SETTING", 'p'}, + {"CURRENT_TIME", 'k'}, + {"CURRENT_TIMESTAMP", 'k'}, + {"CURRENT_USER", 'k'}, + {"CURRVAL", 'f'}, + {"CURSOR", 'k'}, + {"CURSOR_STATUS", 'f'}, + {"CURTIME", 'f'}, + {"CVAR", 'f'}, + {"DATABASE", 'k'}, + {"DATABASEPROPERTYEX", 'f'}, + {"DATABASES", 'k'}, + {"DATABASE_PRINCIPAL_ID", 'f'}, + {"DATALENGTH", 'f'}, + {"DATE", 'f'}, + {"DATEADD", 'f'}, + {"DATEDIFF", 'f'}, + {"DATEFROMPARTS", 'f'}, + {"DATENAME", 'f'}, + {"DATEPART", 'f'}, + {"DATESERIAL", 'f'}, + {"DATETIME2FROMPARTS", 'f'}, + {"DATETIMEFROMPARTS", 'f'}, + {"DATETIMEOFFSETFROMPARTS", 'f'}, + {"DATEVALUE", 'f'}, + {"DATE_ADD", 'f'}, + {"DATE_FORMAT", 'f'}, + {"DATE_PART", 'f'}, + {"DATE_SUB", 'f'}, + {"DATE_TRUNC", 'f'}, + {"DAVG", 'f'}, + {"DAY", 'f'}, + {"DAYNAME", 'f'}, + {"DAYOFMONTH", 'f'}, + {"DAYOFWEEK", 'f'}, + {"DAYOFYEAR", 'f'}, + {"DAY_HOUR", 'k'}, + {"DAY_MICROSECOND", 'k'}, + {"DAY_MINUTE", 'k'}, + {"DAY_SECOND", 'k'}, + {"DBMS_PIPE.RECEIVE_MESSAGE", 'f'}, + {"DB_ID", 'f'}, + {"DB_NAME", 'f'}, + {"DCOUNT", 'f'}, + {"DEC", 'k'}, + {"DECIMAL", 'k'}, + {"DECLARE", 'k'}, + {"DECODE", 'f'}, + {"DECRYPTBYASMKEY", 'f'}, + {"DECRYPTBYCERT", 'f'}, + {"DECRYPTBYKEY", 'f'}, + {"DECRYPTBYKEYAUTOCERT", 'f'}, + {"DECRYPTBYPASSPHRASE", 'f'}, + {"DEFAULT", 'k'}, + {"DEGREES", 'f'}, + {"DELAY", 'k'}, + {"DELAYED", 'k'}, + {"DELETE", 'k'}, + {"DENSE_RANK", 'f'}, + {"DESC", 'k'}, + {"DESCRIBE", 'k'}, + {"DES_DECRYPT", 'f'}, + {"DES_ENCRYPT", 'f'}, + {"DETERMINISTIC", 'k'}, + {"DFIRST", 'f'}, + {"DIFFERENCE", 'f'}, + {"DISTINCROW", 'k'}, + {"DISTINCT", 'k'}, + {"DIV", 'o'}, + {"DLAST", 'f'}, + {"DLOOKUP", 'f'}, + {"DMAX", 'f'}, + {"DMIN", 'f'}, + {"DROP", 'k'}, + {"DSUM", 'f'}, + {"DUAL", 'k'}, + {"EACH", 'k'}, + {"ELSE", 'k'}, + {"ELSEIF", 'k'}, + {"ELT", 'f'}, + {"ENCLOSED", 'k'}, + {"ENCODE", 'f'}, + {"ENCRYPT", 'f'}, + {"ENCRYPTBYASMKEY", 'f'}, + {"ENCRYPTBYCERT", 'f'}, + {"ENCRYPTBYKEY", 'f'}, + {"ENCRYPTBYPASSPHRASE", 'f'}, + {"ENUM_FIRST", 'f'}, + {"ENUM_LAST", 'f'}, + {"ENUM_RANGE", 'f'}, + {"EOMONTH", 'f'}, + {"ESCAPED", 'k'}, + {"EVENTDATA", 'f'}, + {"EXEC", 'k'}, + {"EXECUTE", 'k'}, + {"EXISTS", 'k'}, + {"EXIT", 'k'}, + {"EXP", 'f'}, + {"EXPLAIN", 'k'}, + {"EXPORT_SET", 'f'}, + {"EXTRACT", 'f'}, + {"EXTRACTVALUE", 'f'}, + {"EXTRACT_VALUE", 'f'}, + {"FALSE", '1'}, + {"FETCH", 'k'}, + {"FIELD", 'f'}, + {"FILEDATETIME", 'f'}, + {"FILEGROUPPROPERTY", 'f'}, + {"FILEGROUP_ID", 'f'}, + {"FILEGROUP_NAME", 'f'}, + {"FILELEN", 'f'}, + {"FILEPROPERTY", 'f'}, + {"FILE_ID", 'f'}, + {"FILE_IDEX", 'f'}, + {"FILE_NAME", 'f'}, + {"FIND_IN_SET", 'f'}, + {"FIRST_VALUE", 'f'}, + {"FLOOR", 'f'}, + {"FN_VIRTUALFILESTATS", 'f'}, + {"FOR", 'n'}, + {"FORCE", 'k'}, + {"FOREIGN", 'k'}, + {"FORMAT", 'f'}, + {"FOUND_ROWS", 'f'}, + {"FROM", 'k'}, + {"FROM_DAYS", 'f'}, + {"FROM_UNIXTIME", 'f'}, + {"FULLTEXT", 'k'}, + {"FULLTEXTCATALOGPROPERTY", 'f'}, + {"FULLTEXTSERVICEPROPERTY", 'f'}, + {"GENERATE_SERIES", 'f'}, + {"GENERATE_SUBSCRIPTS", 'f'}, + {"GETATTR", 'f'}, + {"GETDATE", 'f'}, + {"GETUTCDATE", 'f'}, + {"GET_BIT", 'f'}, + {"GET_BYTE", 'f'}, + {"GET_FORMAT", 'f'}, + {"GET_LOCK", 'f'}, + {"GOTO", 'k'}, + {"GRANT", 'k'}, + {"GREATEST", 'f'}, + {"GROUP", 'n'}, + {"GROUPING", 'f'}, + {"GROUPING_ID", 'f'}, + {"GROUP_CONCAT", 'f'}, + {"HASHBYTES", 'f'}, + {"HAS_PERMS_BY_NAME", 'f'}, + {"HAVING", 'k'}, + {"HEX", 'f'}, + {"HIGH_PRIORITY", 'k'}, + {"HOST_NAME", 'f'}, + {"HOUR", 'f'}, + {"HOUR_MICROSECOND", 'k'}, + {"HOUR_MINUTE", 'k'}, + {"HOUR_SECOND", 'k'}, + {"IDENTIFY", 'f'}, + {"IDENT_CURRENT", 'f'}, + {"IDENT_INCR", 'f'}, + {"IDENT_SEED", 'f'}, + {"IF", 'k'}, + {"IFF", 'f'}, + {"IFNULL", 'f'}, + {"IGNORE", 'k'}, + {"IIF", 'f'}, + {"IN", 'n'}, + {"INDEX", 'k'}, + {"INDEXKEY_PROPERTY", 'f'}, + {"INDEXPROPERTY", 'f'}, + {"INDEX_COL", 'f'}, + {"INET_ATON", 'f'}, + {"INET_NTOA", 'f'}, + {"INFILE", 'k'}, + {"INITCAP", 'f'}, + {"INNER", 'k'}, + {"INOUT", 'k'}, + {"INSENSITIVE", 'k'}, + {"INSERT", 'k'}, + {"INSTR", 'f'}, + {"INSTRREV", 'f'}, + {"INT", 'k'}, + {"INT1", 'k'}, + {"INT2", 'k'}, + {"INT3", 'k'}, + {"INT4", 'k'}, + {"INT8", 'k'}, + {"INTEGER", 'k'}, + {"INTERVAL", 'k'}, + {"INTO", 'k'}, + {"IS", 'o'}, + {"ISDATE", 'f'}, + {"ISEMPTY", 'f'}, + {"ISFINITE", 'f'}, + {"ISNULL", 'f'}, + {"ISNUMERIC", 'f'}, + {"IS_FREE_LOCK", 'f'}, + {"IS_MEMBER", 'f'}, + {"IS_OBJECTSIGNED", 'f'}, + {"IS_ROLEMEMBER", 'f'}, + {"IS_SRVROLEMEMBER", 'f'}, + {"IS_USED_LOCK", 'f'}, + {"ITERATE", 'k'}, + {"JOIN", 'k'}, + {"JULIANDAY", 'f'}, + {"JUSTIFY_DAYS", 'f'}, + {"JUSTIFY_HOURS", 'f'}, + {"JUSTIFY_INTERVAL", 'f'}, + {"KEYS", 'k'}, + {"KEY_GUID", 'f'}, + {"KEY_ID", 'f'}, + {"KILL", 'k'}, + {"LAG", 'f'}, + {"LASTVAL", 'f'}, + {"LAST_INSERT_ID", 'f'}, + {"LAST_INSERT_ROWID", 'f'}, + {"LAST_VALUE", 'f'}, + {"LCASE", 'f'}, + {"LEAD", 'f'}, + {"LEADING", 'k'}, + {"LEAST", 'f'}, + {"LEAVE", 'k'}, + {"LEFT", 'n'}, + {"LENGTH", 'f'}, + {"LIKE", 'o'}, + {"LIMIT", 'k'}, + {"LINEAR", 'k'}, + {"LINES", 'k'}, + {"LN", 'f'}, + {"LOAD", 'k'}, + {"LOAD_EXTENSION", 'f'}, + {"LOAD_FILE", 'f'}, + {"LOCALTIME", 'k'}, + {"LOCALTIMESTAMP", 'k'}, + {"LOCATE", 'f'}, + {"LOCK", 'n'}, + {"LOG", 'f'}, + {"LOG10", 'f'}, + {"LOG2", 'f'}, + {"LONGBLOB", 'k'}, + {"LONGTEXT", 'k'}, + {"LOOP", 'k'}, + {"LOWER", 'f'}, + {"LOWER_INC", 'f'}, + {"LOWER_INF", 'f'}, + {"LOW_PRIORITY", 'k'}, + {"LPAD", 'f'}, + {"LTRIM", 'f'}, + {"MAKEDATE", 'f'}, + {"MAKE_SET", 'f'}, + {"MASKLEN", 'f'}, + {"MASTER_BIND", 'k'}, + {"MASTER_POS_WAIT", 'f'}, + {"MASTER_SSL_VERIFY_SERVER_CERT", 'k'}, + {"MATCH", 'k'}, + {"MAX", 'f'}, + {"MAXVALUE", 'k'}, + {"MD5", 'f'}, + {"MEDIUMBLOB", 'k'}, + {"MEDIUMINT", 'k'}, + {"MEDIUMTEXT", 'k'}, + {"MERGE", 'k'}, + {"MICROSECOND", 'f'}, + {"MID", 'f'}, + {"MIDDLEINT", 'k'}, + {"MIN", 'f'}, + {"MINUTE", 'f'}, + {"MINUTE_MICROSECOND", 'k'}, + {"MINUTE_SECOND", 'k'}, + {"MKDIR", 'f'}, + {"MOD", 'o'}, + {"MODE", 'n'}, + {"MODIFIES", 'k'}, + {"MONTH", 'f'}, + {"MONTHNAME", 'f'}, + {"NAME_CONST", 'f'}, + {"NATURAL", 'n'}, + {"NETMASK", 'f'}, + {"NEXTVAL", 'f'}, + {"NOT", 'o'}, + {"NOTNULL", 'k'}, + {"NOW", 'f'}, + {"NO_WRITE_TO_BINLOG", 'k'}, + {"NTH_VALUE", 'f'}, + {"NTILE", 'f'}, + {"NULL", '1'}, + {"NULLIF", 'f'}, + {"NUMERIC", 'k'}, + {"NZ", 'f'}, + {"OBJECTPROPERTY", 'f'}, + {"OBJECTPROPERTYEX", 'f'}, + {"OBJECT_DEFINITION", 'f'}, + {"OBJECT_ID", 'f'}, + {"OBJECT_NAME", 'f'}, + {"OBJECT_SCHEMA_NAME", 'f'}, + {"OCT", 'f'}, + {"OCTET_LENGTH", 'f'}, + {"OFFSET", 'k'}, + {"OLD_PASSWORD", 'f'}, + {"ONE_SHOT", 'k'}, + {"OPEN", 'k'}, + {"OPENDATASOURCE", 'f'}, + {"OPENQUERY", 'f'}, + {"OPENROWSET", 'f'}, + {"OPENXML", 'f'}, + {"OPTIMIZE", 'k'}, + {"OPTION", 'k'}, + {"OPTIONALLY", 'k'}, + {"OR", '&'}, + {"ORD", 'f'}, + {"ORDER", 'n'}, + {"ORIGINAL_DB_NAME", 'f'}, + {"ORIGINAL_LOGIN", 'f'}, + {"OUT", 'k'}, + {"OUTER", 'n'}, + {"OUTFILE", 'k'}, + {"OVERLAPS", 'f'}, + {"OVERLAY", 'f'}, + {"OWN3D", 'k'}, + {"PARSENAME", 'f'}, + {"PARTITION", 'k'}, + {"PASSWORD", 'k'}, + {"PATHINDEX", 'f'}, + {"PATINDEX", 'f'}, + {"PERCENTILE_COUNT", 'f'}, + {"PERCENTILE_DISC", 'f'}, + {"PERCENTILE_RANK", 'f'}, + {"PERCENT_RANK", 'f'}, + {"PERIOD_ADD", 'f'}, + {"PERIOD_DIFF", 'f'}, + {"PERMISSIONS", 'f'}, + {"PG_ADVISORY_LOCK", 'f'}, + {"PG_BACKEND_PID", 'f'}, + {"PG_CANCEL_BACKEND", 'f'}, + {"PG_CLIENT_ENCODING", 'f'}, + {"PG_CONF_LOAD_TIME", 'f'}, + {"PG_CREATE_RESTORE_POINT", 'f'}, + {"PG_HAS_ROLE", 'f'}, + {"PG_IS_IN_RECOVERY", 'f'}, + {"PG_IS_OTHER_TEMP_SCHEMA", 'f'}, + {"PG_LISTENING_CHANNELS", 'f'}, + {"PG_LS_DIR", 'f'}, + {"PG_MY_TEMP_SCHEMA", 'f'}, + {"PG_POSTMASTER_START_TIME", 'f'}, + {"PG_READ_BINARY_FILE", 'f'}, + {"PG_READ_FILE", 'f'}, + {"PG_RELOAD_CONF", 'f'}, + {"PG_ROTATE_LOGFILE", 'f'}, + {"PG_SLEEP", 'f'}, + {"PG_START_BACKUP", 'f'}, + {"PG_STAT_FILE", 'f'}, + {"PG_STOP_BACKUP", 'f'}, + {"PG_SWITCH_XLOG", 'f'}, + {"PG_TERMINATE_BACKEND", 'f'}, + {"PG_TRIGGER_DEPTH", 'f'}, + {"PI", 'f'}, + {"POSITION", 'f'}, + {"POW", 'f'}, + {"POWER", 'f'}, + {"PRECISION", 'k'}, + {"PRIMARY", 'k'}, + {"PROCEDURE", 'k'}, + {"PUBLISHINGSERVERNAME", 'f'}, + {"PURGE", 'k'}, + {"PWDCOMPARE", 'f'}, + {"PWDENCRYPT", 'f'}, + {"QUARTER", 'f'}, + {"QUOTE", 'f'}, + {"QUOTENAME", 'f'}, + {"QUOTE_IDENT", 'f'}, + {"QUOTE_LITERAL", 'f'}, + {"QUOTE_NULLABLE", 'f'}, + {"RADIANS", 'f'}, + {"RAND", 'f'}, + {"RANDOM", 'f'}, + {"RANDOMBLOB", 'f'}, + {"RANGE", 'k'}, + {"RANK", 'f'}, + {"READ", 'k'}, + {"READS", 'k'}, + {"READ_WRITE", 'k'}, + {"REAL", 'n'}, + {"REFERENCES", 'k'}, + {"REGEXP", 'o'}, + {"REGEXP_MATCHES", 'f'}, + {"REGEXP_REPLACE", 'f'}, + {"REGEXP_SPLIT_TO_ARRAY", 'f'}, + {"REGEXP_SPLIT_TO_TABLE", 'f'}, + {"RELEASE", 'k'}, + {"RELEASE_LOCK", 'f'}, + {"RENAME", 'k'}, + {"REPEAT", 'k'}, + {"REPLACE", 'k'}, + {"REPLICATE", 'f'}, + {"REQUIRE", 'k'}, + {"RESIGNAL", 'k'}, + {"RESTRICT", 'k'}, + {"RETURN", 'k'}, + {"REVERSE", 'f'}, + {"REVOKE", 'k'}, + {"RIGHT", 'n'}, + {"RLIKE", 'o'}, + {"ROUND", 'f'}, + {"ROW", 'f'}, + {"ROW_COUNT", 'f'}, + {"ROW_NUMBER", 'f'}, + {"ROW_TO_JSON", 'f'}, + {"RPAD", 'f'}, + {"RTRIM", 'f'}, + {"SCHAMA_NAME", 'f'}, + {"SCHEMA", 'k'}, + {"SCHEMAS", 'k'}, + {"SCHEMA_ID", 'f'}, + {"SCOPE_IDENTITY", 'f'}, + {"SECOND_MICROSECOND", 'k'}, + {"SEC_TO_TIME", 'f'}, + {"SELECT", 'k'}, + {"SENSITIVE", 'k'}, + {"SEPARATOR", 'k'}, + {"SESSION_USER", 'f'}, + {"SET", 'k'}, + {"SETATTR", 'f'}, + {"SETSEED", 'f'}, + {"SETVAL", 'f'}, + {"SET_BIT", 'f'}, + {"SET_BYTE", 'f'}, + {"SET_CONFIG", 'f'}, + {"SET_MASKLEN", 'f'}, + {"SHA", 'f'}, + {"SHA1", 'f'}, + {"SHA2", 'f'}, + {"SHOW", 'n'}, + {"SHUTDOWN", 'k'}, + {"SIGN", 'f'}, + {"SIGNAL", 'k'}, + {"SIGNBYASMKEY", 'f'}, + {"SIGNBYCERT", 'f'}, + {"SIMILAR", 'k'}, + {"SIN", 'f'}, + {"SLEEP", 'f'}, + {"SMALLDATETIMEFROMPARTS", 'f'}, + {"SMALLINT", 'k'}, + {"SOUNDEX", 'f'}, + {"SOUNDS", 'o'}, + {"SPACE", 'f'}, + {"SPATIAL", 'k'}, + {"SPECIFIC", 'k'}, + {"SPLIT_PART", 'f'}, + {"SQL", 'k'}, + {"SQLEXCEPTION", 'k'}, + {"SQLSTATE", 'k'}, + {"SQLWARNING", 'k'}, + {"SQL_BIG_RESULT", 'k'}, + {"SQL_CALC_FOUND_ROWS", 'k'}, + {"SQL_SMALL_RESULT", 'k'}, + {"SQL_VARIANT_PROPERTY", 'f'}, + {"SQRT", 'f'}, + {"SSL", 'k'}, + {"STARTING", 'k'}, + {"STATEMENT_TIMESTAMP", 'f'}, + {"STATS_DATE", 'f'}, + {"STDDEV", 'f'}, + {"STDDEV_POP", 'f'}, + {"STDDEV_SAMP", 'f'}, + {"STRAIGHT_JOIN", 'k'}, + {"STRCMP", 'f'}, + {"STRCONV", 'f'}, + {"STRING_AGG", 'f'}, + {"STRING_TO_ARRAY", 'f'}, + {"STRPOS", 'f'}, + {"STR_TO_DATE", 'f'}, + {"STUFF", 'f'}, + {"SUBDATE", 'f'}, + {"SUBSTR", 'f'}, + {"SUBSTRING", 'f'}, + {"SUBSTRING_INDEX", 'f'}, + {"SUBTIME", 'f'}, + {"SUM", 'f'}, + {"SUSER_ID", 'f'}, + {"SUSER_NAME", 'f'}, + {"SUSER_SID", 'f'}, + {"SUSER_SNAME", 'f'}, + {"SWITCHOFFET", 'f'}, + {"SYS.FN_BUILTIN_PERMISSIONS", 'f'}, + {"SYS.FN_GET_AUDIT_FILE", 'f'}, + {"SYS.FN_MY_PERMISSIONS", 'f'}, + {"SYS.STRAGG", 'f'}, + {"SYSCOLUMNS", 'k'}, + {"SYSDATE", 'f'}, + {"SYSDATETIME", 'f'}, + {"SYSDATETIMEOFFSET", 'f'}, + {"SYSOBJECTS", 'k'}, + {"SYSTEM_USER", 'f'}, + {"SYSUSERS", 'k'}, + {"SYSUTCDATETME", 'f'}, + {"TABLE", 'k'}, + {"TAN", 'f'}, + {"TERMINATED", 'k'}, + {"TERTIARY_WEIGHTS", 'f'}, + {"TEXTPTR", 'f'}, + {"TEXTVALID", 'f'}, + {"THEN", 'k'}, + {"TIME", 'k'}, + {"TIMEDIFF", 'f'}, + {"TIMEFROMPARTS", 'f'}, + {"TIMEOFDAY", 'f'}, + {"TIMESERIAL", 'f'}, + {"TIMESTAMP", 'f'}, + {"TIMESTAMPADD", 'f'}, + {"TIMEVALUE", 'f'}, + {"TIME_FORMAT", 'f'}, + {"TIME_TO_SEC", 'f'}, + {"TINYBLOB", 'k'}, + {"TINYINT", 'k'}, + {"TINYTEXT", 'k'}, + {"TODATETIMEOFFSET", 'f'}, + {"TOP", 'k'}, + {"TOTAL", 'f'}, + {"TOTAL_CHANGES", 'f'}, + {"TO_ASCII", 'f'}, + {"TO_CHAR", 'f'}, + {"TO_DATE", 'f'}, + {"TO_DAYS", 'f'}, + {"TO_HEX", 'f'}, + {"TO_NUMBER", 'f'}, + {"TO_SECONDS", 'f'}, + {"TO_TIMESTAMP", 'f'}, + {"TRAILING", 'n'}, + {"TRANSACTION_TIMESTAMP", 'f'}, + {"TRANSLATE", 'f'}, + {"TRIGGER", 'k'}, + {"TRIGGER_NESTLEVEL", 'f'}, + {"TRIM", 'f'}, + {"TRUE", '1'}, + {"TRUNC", 'f'}, + {"TRUNCATE", 'f'}, + {"TRY_CAST", 'f'}, + {"TRY_CONVERT", 'f'}, + {"TRY_PARSE", 'f'}, + {"TYPEOF", 'f'}, + {"TYPEPROPERTY", 'f'}, + {"TYPE_ID", 'f'}, + {"TYPE_NAME", 'f'}, + {"UCASE", 'f'}, + {"UNCOMPRESS", 'f'}, + {"UNCOMPRESS_LENGTH", 'f'}, + {"UNDO", 'k'}, + {"UNHEX", 'f'}, + {"UNICODE", 'f'}, + {"UNION", 'U'}, + {"UNIQUE", 'n'}, + {"UNIX_TIMESTAMP", 'f'}, + {"UNI_ON", 'U'}, + {"UNKNOWN", 'k'}, + {"UNLOCK", 'k'}, + {"UNNEST", 'f'}, + {"UNSIGNED", 'k'}, + {"UPDATE", 'k'}, + {"UPDATEXML", 'f'}, + {"UPPER", 'f'}, + {"UPPER_INC", 'f'}, + {"UPPER_INF", 'f'}, + {"USAGE", 'k'}, + {"USE", 'k'}, + {"USER_ID", 'n'}, + {"USER_NAME", 'f'}, + {"USING", 'f'}, + {"UTC_DATE", 'k'}, + {"UTC_TIME", 'k'}, + {"UTC_TIMESTAMP", 'k'}, + {"UTL_INADDR.GET_HOST_ADDRESS", 'f'}, + {"UUID", 'f'}, + {"UUID_SHORT", 'f'}, + {"VALUES", 'k'}, + {"VAR", 'f'}, + {"VARBINARY", 'k'}, + {"VARCHAR", 'k'}, + {"VARCHARACTER", 'k'}, + {"VARIANCE", 'f'}, + {"VARP", 'f'}, + {"VARYING", 'k'}, + {"VAR_POP", 'f'}, + {"VAR_SAMP", 'f'}, + {"VERIFYSIGNEDBYASMKEY", 'f'}, + {"VERIFYSIGNEDBYCERT", 'f'}, + {"VERSION", 'f'}, + {"WAITFOR", 'k'}, + {"WEEK", 'f'}, + {"WEEKDAY", 'f'}, + {"WEEKDAYNAME", 'f'}, + {"WEEKOFYEAR", 'f'}, + {"WHEN", 'k'}, + {"WHERE", 'k'}, + {"WHILE", 'k'}, + {"WIDTH_BUCKET", 'f'}, + {"WITH", 'k'}, + {"XMLAGG", 'f'}, + {"XMLCOMMENT", 'f'}, + {"XMLCONCAT", 'f'}, + {"XMLELEMENT", 'f'}, + {"XMLEXISTS", 'f'}, + {"XMLFOREST", 'f'}, + {"XMLFORMAT", 'f'}, + {"XMLPI", 'f'}, + {"XMLROOT", 'f'}, + {"XMLTYPE", 'f'}, + {"XML_IS_WELL_FORMED", 'f'}, + {"XOR", 'o'}, + {"XPATH", 'f'}, + {"XPATH_EXISTS", 'f'}, + {"XP_EXECRESULTSET", 'k'}, + {"YEAR", 'f'}, + {"YEARWEEK", 'f'}, + {"YEAR_MONTH", 'k'}, + {"ZEROBLOB", 'f'}, + {"ZEROFILL", 'k'}, +}; +static const size_t sql_keywords_sz = 752; +static const char* multikeywords_start[] = { + "ALTER", + "AT", + "AT TIME", + "CROSS", + "FULL", + "GROUP", + "IN", + "IN BOOLEAN", + "INTERSECT", + "IS", + "IS DISTINCT", + "IS NOT", + "LEFT", + "LOCK", + "NATURAL", + "NEXT", + "NEXT VALUE", + "NOT", + "NOT SIMILAR", + "ORDER", + "OWN3D", + "READ", + "RIGHT", + "SELECT", + "SIMILAR", + "SOUNDS", + "UNION", +}; +static const size_t multikeywords_start_sz = 27; +static const keyword_t multikeywords[] = { + {"ALTER DOMAIN", 'k'}, + {"ALTER TABLE", 'k'}, + {"AT TIME", 'n'}, + {"AT TIME ZONE", 'k'}, + {"CROSS JOIN", 'k'}, + {"FULL OUTER", 'k'}, + {"GROUP BY", 'B'}, + {"IN BOOLEAN", 'n'}, + {"IN BOOLEAN MODE", 'k'}, + {"INTERSECT ALL", 'o'}, + {"IS DISTINCT", 'n'}, + {"IS DISTINCT FROM", 'k'}, + {"IS NOT", 'o'}, + {"IS NOT DISTINCT", 'n'}, + {"IS NOT DISTINCT FROM", 'k'}, + {"LEFT JOIN", 'k'}, + {"LEFT OUTER", 'k'}, + {"LOCK TABLE", 'k'}, + {"LOCK TABLES", 'k'}, + {"NATURAL FULL", 'k'}, + {"NATURAL INNER", 'k'}, + {"NATURAL JOIN", 'k'}, + {"NATURAL LEFT", 'k'}, + {"NATURAL OUTER", 'k'}, + {"NATURAL RIGHT", 'k'}, + {"NEXT VALUE", 'n'}, + {"NEXT VALUE FOR", 'k'}, + {"NOT BETWEEN", 'o'}, + {"NOT IN", 'o'}, + {"NOT LIKE", 'o'}, + {"NOT REGEXP", 'o'}, + {"NOT RLIKE", 'o'}, + {"NOT SIMILAR", 'o'}, + {"NOT SIMILAR TO", 'o'}, + {"ORDER BY", 'B'}, + {"OWN3D BY", 'B'}, + {"READ WRITE", 'k'}, + {"RIGHT JOIN", 'k'}, + {"RIGHT OUTER", 'k'}, + {"SELECT ALL", 'k'}, + {"SIMILAR TO", 'o'}, + {"SOUNDS LIKE", 'o'}, + {"UNION ALL", 'U'}, +}; +static const size_t multikeywords_sz = 43; + +typedef size_t (*pt2Function)(sfilter *sf); +static const pt2Function char_parse_map[] = { + &parse_white, /* 0 */ + &parse_white, /* 1 */ + &parse_white, /* 2 */ + &parse_white, /* 3 */ + &parse_white, /* 4 */ + &parse_white, /* 5 */ + &parse_white, /* 6 */ + &parse_white, /* 7 */ + &parse_white, /* 8 */ + &parse_white, /* 9 */ + &parse_white, /* 10 */ + &parse_white, /* 11 */ + &parse_white, /* 12 */ + &parse_white, /* 13 */ + &parse_white, /* 14 */ + &parse_white, /* 15 */ + &parse_white, /* 16 */ + &parse_white, /* 17 */ + &parse_white, /* 18 */ + &parse_white, /* 19 */ + &parse_white, /* 20 */ + &parse_white, /* 21 */ + &parse_white, /* 22 */ + &parse_white, /* 23 */ + &parse_white, /* 24 */ + &parse_white, /* 25 */ + &parse_white, /* 26 */ + &parse_white, /* 27 */ + &parse_white, /* 28 */ + &parse_white, /* 29 */ + &parse_white, /* 30 */ + &parse_white, /* 31 */ + &parse_white, /* 32 */ + &parse_operator2, /* 33 */ + &parse_string, /* 34 */ + &parse_eol_comment, /* 35 */ + &parse_money, /* 36 */ + &parse_operator1, /* 37 */ + &parse_operator2, /* 38 */ + &parse_string, /* 39 */ + &parse_char, /* 40 */ + &parse_char, /* 41 */ + &parse_operator2, /* 42 */ + &parse_operator1, /* 43 */ + &parse_char, /* 44 */ + &parse_dash, /* 45 */ + &parse_number, /* 46 */ + &parse_slash, /* 47 */ + &parse_number, /* 48 */ + &parse_number, /* 49 */ + &parse_number, /* 50 */ + &parse_number, /* 51 */ + &parse_number, /* 52 */ + &parse_number, /* 53 */ + &parse_number, /* 54 */ + &parse_number, /* 55 */ + &parse_number, /* 56 */ + &parse_number, /* 57 */ + &parse_char, /* 58 */ + &parse_char, /* 59 */ + &parse_operator2, /* 60 */ + &parse_operator2, /* 61 */ + &parse_operator2, /* 62 */ + &parse_other, /* 63 */ + &parse_var, /* 64 */ + &parse_word, /* 65 */ + &parse_word, /* 66 */ + &parse_word, /* 67 */ + &parse_word, /* 68 */ + &parse_word, /* 69 */ + &parse_word, /* 70 */ + &parse_word, /* 71 */ + &parse_word, /* 72 */ + &parse_word, /* 73 */ + &parse_word, /* 74 */ + &parse_word, /* 75 */ + &parse_word, /* 76 */ + &parse_word, /* 77 */ + &parse_word, /* 78 */ + &parse_word, /* 79 */ + &parse_word, /* 80 */ + &parse_word, /* 81 */ + &parse_word, /* 82 */ + &parse_word, /* 83 */ + &parse_word, /* 84 */ + &parse_word, /* 85 */ + &parse_word, /* 86 */ + &parse_word, /* 87 */ + &parse_word, /* 88 */ + &parse_word, /* 89 */ + &parse_word, /* 90 */ + &parse_other, /* 91 */ + &parse_backslash, /* 92 */ + &parse_other, /* 93 */ + &parse_operator1, /* 94 */ + &parse_word, /* 95 */ + &parse_word, /* 96 */ + &parse_word, /* 97 */ + &parse_word, /* 98 */ + &parse_word, /* 99 */ + &parse_word, /* 100 */ + &parse_word, /* 101 */ + &parse_word, /* 102 */ + &parse_word, /* 103 */ + &parse_word, /* 104 */ + &parse_word, /* 105 */ + &parse_word, /* 106 */ + &parse_word, /* 107 */ + &parse_word, /* 108 */ + &parse_word, /* 109 */ + &parse_word, /* 110 */ + &parse_word, /* 111 */ + &parse_word, /* 112 */ + &parse_word, /* 113 */ + &parse_word, /* 114 */ + &parse_word, /* 115 */ + &parse_word, /* 116 */ + &parse_word, /* 117 */ + &parse_word, /* 118 */ + &parse_word, /* 119 */ + &parse_word, /* 120 */ + &parse_word, /* 121 */ + &parse_word, /* 122 */ + &parse_other, /* 123 */ + &parse_operator2, /* 124 */ + &parse_other, /* 125 */ + &parse_operator1, /* 126 */ + &parse_white, /* 127 */ +}; + +static const char* sql_fingerprints[] = { "&1o1U", "&1osU", "&1ovU", @@ -2301,27 +3318,6 @@ static const char* patmap[] = { "vovso", "vovvo", }; -static const size_t patmap_sz = 2298; - - -/* Simple binary search */ -int is_sqli_pattern(const char *key) -{ - int left = 0; - int right = (int)patmap_sz - 1; - - while (left <= right) { - int pos = (left + right) / 2; - int cmp = strcmp(patmap[pos], key); - if (cmp == 0) { - return 1; /* TRUE */ - } else if (cmp < 0) { - left = pos + 1; - } else { - right = pos - 1; - } - } - return 0; /* FALSE */ -} +static const size_t sqli_fingerprints_sz = 2298; #endif diff --git a/apache2/libinjection/sqlparse_data.h b/apache2/libinjection/sqlparse_data.h deleted file mode 100644 index 811f883d..00000000 --- a/apache2/libinjection/sqlparse_data.h +++ /dev/null @@ -1,983 +0,0 @@ -#ifndef _SQLPARSE_DATA_H -#define _SQLPARSE_DATA_H -#include "sqlparse.h" - -static const char* operators2[] = { - "!!", - "!<", - "!=", - "!>", - "!~", - "%=", - "&&", - "&=", - "*=", - "+=", - "-=", - "/=", - ":=", - "<<", - "<=", - "<>", - "<@", - ">=", - ">>", - "@>", - "^=", - "|/", - "|=", - "||", - "~*", -}; -static const size_t operators2_sz = 25; - -static const keyword_t sql_keywords[] = { - {"ABS", 'f'}, - {"ACCESSIBLE", 'k'}, - {"ACOS", 'f'}, - {"ADD", 'k'}, - {"ADDDATE", 'f'}, - {"ADDTIME", 'f'}, - {"AES_DECRYPT", 'f'}, - {"AES_ENCRYPT", 'f'}, - {"AGAINST", 'k'}, - {"AGE", 'f'}, - {"ALL_USERS", 'k'}, - {"ALTER", 'k'}, - {"ANALYZE", 'k'}, - {"AND", '&'}, - {"APPLOCK_MODE", 'f'}, - {"APPLOCK_TEST", 'f'}, - {"APP_NAME", 'f'}, - {"ARRAY_AGG", 'f'}, - {"ARRAY_CAT", 'f'}, - {"ARRAY_DIM", 'f'}, - {"ARRAY_FILL", 'f'}, - {"ARRAY_LENGTH", 'f'}, - {"ARRAY_LOWER", 'f'}, - {"ARRAY_NDIMS", 'f'}, - {"ARRAY_PREPEND", 'f'}, - {"ARRAY_TO_JSON", 'f'}, - {"ARRAY_TO_STRING", 'f'}, - {"ARRAY_UPPER", 'f'}, - {"AS", 'k'}, - {"ASC", 'k'}, - {"ASCII", 'f'}, - {"ASENSITIVE", 'k'}, - {"ASIN", 'f'}, - {"ASSEMBLYPROPERTY", 'f'}, - {"ASYMKEY_ID", 'f'}, - {"ATAN", 'f'}, - {"ATAN2", 'f'}, - {"AVG", 'f'}, - {"BEFORE", 'k'}, - {"BEGIN", 'k'}, - {"BENCHMARK", 'f'}, - {"BETWEEN", 'k'}, - {"BIGINT", 'k'}, - {"BIN", 'f'}, - {"BINARY", 'k'}, - {"BINARY_DOUBLE_INFINITY", '1'}, - {"BINARY_DOUBLE_NAN", '1'}, - {"BINARY_FLOAT_INFINITY", '1'}, - {"BINARY_FLOAT_NAN", '1'}, - {"BINBINARY", 'f'}, - {"BIT_AND", 'f'}, - {"BIT_COUNT", 'f'}, - {"BIT_LENGTH", 'f'}, - {"BIT_OR", 'f'}, - {"BIT_XOR", 'f'}, - {"BLOB", 'k'}, - {"BOOLEAN", 'k'}, - {"BOOL_AND", 'f'}, - {"BOOL_OR", 'f'}, - {"BOTH", 'k'}, - {"BTRIM", 'f'}, - {"BY", 'n'}, - {"CALL", 'k'}, - {"CASCADE", 'k'}, - {"CASE", 'o'}, - {"CAST", 'f'}, - {"CBOOL", 'f'}, - {"CBRT", 'f'}, - {"CBYTE", 'f'}, - {"CCUR", 'f'}, - {"CDATE", 'f'}, - {"CDBL", 'f'}, - {"CEIL", 'f'}, - {"CEILING", 'f'}, - {"CERTENCODED", 'f'}, - {"CERTPRIVATEKEY", 'f'}, - {"CERT_ID", 'f'}, - {"CERT_PROPERTY", 'f'}, - {"CHANGE", 'k'}, - {"CHAR", 'f'}, - {"CHARACTER", 'k'}, - {"CHARACTER_LENGTH", 'f'}, - {"CHARINDEX", 'f'}, - {"CHARSET", 'f'}, - {"CHAR_LENGTH", 'f'}, - {"CHDIR", 'f'}, - {"CHDRIVE", 'f'}, - {"CHECK", 'k'}, - {"CHECKSUM_AGG", 'f'}, - {"CHOOSE", 'f'}, - {"CHR", 'f'}, - {"CINT", 'f'}, - {"CLNG", 'f'}, - {"CLOCK_TIMESTAMP", 'f'}, - {"COALESCE", 'k'}, - {"COERCIBILITY", 'f'}, - {"COLLATE", 'k'}, - {"COLLATION", 'f'}, - {"COLLATIONPROPERTY", 'f'}, - {"COLUMN", 'k'}, - {"COLUMNPROPERTY", 'f'}, - {"COLUMNS_UPDATED", 'f'}, - {"COL_LENGTH", 'f'}, - {"COL_NAME", 'f'}, - {"COMPRESS", 'f'}, - {"CONCAT", 'f'}, - {"CONCAT_WS", 'f'}, - {"CONDITION", 'k'}, - {"CONNECTION_ID", 'f'}, - {"CONSTRAINT", 'k'}, - {"CONTINUE", 'k'}, - {"CONV", 'f'}, - {"CONVERT", 'f'}, - {"CONVERT_FROM", 'f'}, - {"CONVERT_TO", 'f'}, - {"CONVERT_TZ", 'f'}, - {"COS", 'f'}, - {"COT", 'f'}, - {"COUNT", 'f'}, - {"COUNT_BIG", 'k'}, - {"CRC32", 'f'}, - {"CREATE", 'k'}, - {"CSNG", 'f'}, - {"CTXSYS.DRITHSX.SN", 'f'}, - {"CUME_DIST", 'f'}, - {"CURDATE", 'f'}, - {"CURDIR", 'f'}, - {"CURRENTUSER", 'f'}, - {"CURRENT_DATABASE", 'f'}, - {"CURRENT_DATE", 'k'}, - {"CURRENT_QUERY", 'f'}, - {"CURRENT_SCHEMA", 'f'}, - {"CURRENT_SCHEMAS", 'f'}, - {"CURRENT_SETTING", 'p'}, - {"CURRENT_TIME", 'k'}, - {"CURRENT_TIMESTAMP", 'k'}, - {"CURRENT_USER", 'k'}, - {"CURRVAL", 'f'}, - {"CURSOR", 'k'}, - {"CURSOR_STATUS", 'f'}, - {"CURTIME", 'f'}, - {"CVAR", 'f'}, - {"DATABASE", 'k'}, - {"DATABASEPROPERTYEX", 'f'}, - {"DATABASES", 'k'}, - {"DATABASE_PRINCIPAL_ID", 'f'}, - {"DATALENGTH", 'f'}, - {"DATE", 'f'}, - {"DATEADD", 'f'}, - {"DATEDIFF", 'f'}, - {"DATEFROMPARTS", 'f'}, - {"DATENAME", 'f'}, - {"DATEPART", 'f'}, - {"DATESERIAL", 'f'}, - {"DATETIME2FROMPARTS", 'f'}, - {"DATETIMEFROMPARTS", 'f'}, - {"DATETIMEOFFSETFROMPARTS", 'f'}, - {"DATEVALUE", 'f'}, - {"DATE_ADD", 'f'}, - {"DATE_FORMAT", 'f'}, - {"DATE_PART", 'f'}, - {"DATE_SUB", 'f'}, - {"DATE_TRUNC", 'f'}, - {"DAVG", 'f'}, - {"DAY", 'f'}, - {"DAYNAME", 'f'}, - {"DAYOFMONTH", 'f'}, - {"DAYOFWEEK", 'f'}, - {"DAYOFYEAR", 'f'}, - {"DAY_HOUR", 'k'}, - {"DAY_MICROSECOND", 'k'}, - {"DAY_MINUTE", 'k'}, - {"DAY_SECOND", 'k'}, - {"DBMS_PIPE.RECEIVE_MESSAGE", 'f'}, - {"DB_ID", 'f'}, - {"DB_NAME", 'f'}, - {"DCOUNT", 'f'}, - {"DEC", 'k'}, - {"DECIMAL", 'k'}, - {"DECLARE", 'k'}, - {"DECODE", 'f'}, - {"DECRYPTBYASMKEY", 'f'}, - {"DECRYPTBYCERT", 'f'}, - {"DECRYPTBYKEY", 'f'}, - {"DECRYPTBYKEYAUTOCERT", 'f'}, - {"DECRYPTBYPASSPHRASE", 'f'}, - {"DEFAULT", 'k'}, - {"DEGREES", 'f'}, - {"DELAY", 'k'}, - {"DELAYED", 'k'}, - {"DELETE", 'k'}, - {"DENSE_RANK", 'f'}, - {"DESC", 'k'}, - {"DESCRIBE", 'k'}, - {"DES_DECRYPT", 'f'}, - {"DES_ENCRYPT", 'f'}, - {"DETERMINISTIC", 'k'}, - {"DFIRST", 'f'}, - {"DIFFERENCE", 'f'}, - {"DISTINCROW", 'k'}, - {"DISTINCT", 'k'}, - {"DIV", 'o'}, - {"DLAST", 'f'}, - {"DLOOKUP", 'f'}, - {"DMAX", 'f'}, - {"DMIN", 'f'}, - {"DROP", 'k'}, - {"DSUM", 'f'}, - {"DUAL", 'k'}, - {"EACH", 'k'}, - {"ELSE", 'k'}, - {"ELSEIF", 'k'}, - {"ELT", 'f'}, - {"ENCLOSED", 'k'}, - {"ENCODE", 'f'}, - {"ENCRYPT", 'f'}, - {"ENCRYPTBYASMKEY", 'f'}, - {"ENCRYPTBYCERT", 'f'}, - {"ENCRYPTBYKEY", 'f'}, - {"ENCRYPTBYPASSPHRASE", 'f'}, - {"ENUM_FIRST", 'f'}, - {"ENUM_LAST", 'f'}, - {"ENUM_RANGE", 'f'}, - {"EOMONTH", 'f'}, - {"ESCAPED", 'k'}, - {"EVENTDATA", 'f'}, - {"EXEC", 'k'}, - {"EXECUTE", 'k'}, - {"EXISTS", 'k'}, - {"EXIT", 'k'}, - {"EXP", 'f'}, - {"EXPLAIN", 'k'}, - {"EXPORT_SET", 'f'}, - {"EXTRACT", 'f'}, - {"EXTRACTVALUE", 'f'}, - {"EXTRACT_VALUE", 'f'}, - {"FALSE", '1'}, - {"FETCH", 'k'}, - {"FIELD", 'f'}, - {"FILEDATETIME", 'f'}, - {"FILEGROUPPROPERTY", 'f'}, - {"FILEGROUP_ID", 'f'}, - {"FILEGROUP_NAME", 'f'}, - {"FILELEN", 'f'}, - {"FILEPROPERTY", 'f'}, - {"FILE_ID", 'f'}, - {"FILE_IDEX", 'f'}, - {"FILE_NAME", 'f'}, - {"FIND_IN_SET", 'f'}, - {"FIRST_VALUE", 'f'}, - {"FLOOR", 'f'}, - {"FN_VIRTUALFILESTATS", 'f'}, - {"FOR", 'n'}, - {"FORCE", 'k'}, - {"FOREIGN", 'k'}, - {"FORMAT", 'f'}, - {"FOUND_ROWS", 'f'}, - {"FROM", 'k'}, - {"FROM_DAYS", 'f'}, - {"FROM_UNIXTIME", 'f'}, - {"FULLTEXT", 'k'}, - {"FULLTEXTCATALOGPROPERTY", 'f'}, - {"FULLTEXTSERVICEPROPERTY", 'f'}, - {"GENERATE_SERIES", 'f'}, - {"GENERATE_SUBSCRIPTS", 'f'}, - {"GETATTR", 'f'}, - {"GETDATE", 'f'}, - {"GETUTCDATE", 'f'}, - {"GET_BIT", 'f'}, - {"GET_BYTE", 'f'}, - {"GET_FORMAT", 'f'}, - {"GET_LOCK", 'f'}, - {"GOTO", 'k'}, - {"GRANT", 'k'}, - {"GREATEST", 'f'}, - {"GROUP", 'n'}, - {"GROUPING", 'f'}, - {"GROUPING_ID", 'f'}, - {"GROUP_CONCAT", 'f'}, - {"HASHBYTES", 'f'}, - {"HAS_PERMS_BY_NAME", 'f'}, - {"HAVING", 'k'}, - {"HEX", 'f'}, - {"HIGH_PRIORITY", 'k'}, - {"HOST_NAME", 'f'}, - {"HOUR", 'f'}, - {"HOUR_MICROSECOND", 'k'}, - {"HOUR_MINUTE", 'k'}, - {"HOUR_SECOND", 'k'}, - {"IDENTIFY", 'f'}, - {"IDENT_CURRENT", 'f'}, - {"IDENT_INCR", 'f'}, - {"IDENT_SEED", 'f'}, - {"IF", 'k'}, - {"IFF", 'f'}, - {"IFNULL", 'f'}, - {"IGNORE", 'k'}, - {"IIF", 'f'}, - {"IN", 'n'}, - {"INDEX", 'k'}, - {"INDEXKEY_PROPERTY", 'f'}, - {"INDEXPROPERTY", 'f'}, - {"INDEX_COL", 'f'}, - {"INET_ATON", 'f'}, - {"INET_NTOA", 'f'}, - {"INFILE", 'k'}, - {"INITCAP", 'f'}, - {"INNER", 'k'}, - {"INOUT", 'k'}, - {"INSENSITIVE", 'k'}, - {"INSERT", 'k'}, - {"INSTR", 'f'}, - {"INSTRREV", 'f'}, - {"INT", 'k'}, - {"INT1", 'k'}, - {"INT2", 'k'}, - {"INT3", 'k'}, - {"INT4", 'k'}, - {"INT8", 'k'}, - {"INTEGER", 'k'}, - {"INTERVAL", 'k'}, - {"INTO", 'k'}, - {"IS", 'o'}, - {"ISDATE", 'f'}, - {"ISEMPTY", 'f'}, - {"ISFINITE", 'f'}, - {"ISNULL", 'f'}, - {"ISNUMERIC", 'f'}, - {"IS_FREE_LOCK", 'f'}, - {"IS_MEMBER", 'f'}, - {"IS_OBJECTSIGNED", 'f'}, - {"IS_ROLEMEMBER", 'f'}, - {"IS_SRVROLEMEMBER", 'f'}, - {"IS_USED_LOCK", 'f'}, - {"ITERATE", 'k'}, - {"JOIN", 'k'}, - {"JUSTIFY_DAYS", 'f'}, - {"JUSTIFY_HOURS", 'f'}, - {"JUSTIFY_INTERVAL", 'f'}, - {"KEYS", 'k'}, - {"KEY_GUID", 'f'}, - {"KEY_ID", 'f'}, - {"KILL", 'k'}, - {"LAG", 'f'}, - {"LASTVAL", 'f'}, - {"LAST_INSERT_ID", 'f'}, - {"LAST_VALUE", 'f'}, - {"LCASE", 'f'}, - {"LEAD", 'f'}, - {"LEADING", 'k'}, - {"LEAST", 'f'}, - {"LEAVE", 'k'}, - {"LEFT", 'n'}, - {"LENGTH", 'f'}, - {"LIKE", 'o'}, - {"LIMIT", 'k'}, - {"LINEAR", 'k'}, - {"LINES", 'k'}, - {"LN", 'f'}, - {"LOAD", 'k'}, - {"LOAD_FILE", 'f'}, - {"LOCALTIME", 'k'}, - {"LOCALTIMESTAMP", 'k'}, - {"LOCATE", 'f'}, - {"LOCK", 'n'}, - {"LOG", 'f'}, - {"LOG10", 'f'}, - {"LOG2", 'f'}, - {"LONGBLOB", 'k'}, - {"LONGTEXT", 'k'}, - {"LOOP", 'k'}, - {"LOWER", 'f'}, - {"LOWER_INC", 'f'}, - {"LOWER_INF", 'f'}, - {"LOW_PRIORITY", 'k'}, - {"LPAD", 'f'}, - {"LTRIM", 'f'}, - {"MAKEDATE", 'f'}, - {"MAKE_SET", 'f'}, - {"MASKLEN", 'f'}, - {"MASTER_BIND", 'k'}, - {"MASTER_POS_WAIT", 'f'}, - {"MASTER_SSL_VERIFY_SERVER_CERT", 'k'}, - {"MATCH", 'k'}, - {"MAX", 'f'}, - {"MAXVALUE", 'k'}, - {"MD5", 'f'}, - {"MEDIUMBLOB", 'k'}, - {"MEDIUMINT", 'k'}, - {"MEDIUMTEXT", 'k'}, - {"MERGE", 'k'}, - {"MICROSECOND", 'f'}, - {"MID", 'f'}, - {"MIDDLEINT", 'k'}, - {"MIN", 'f'}, - {"MINUTE", 'f'}, - {"MINUTE_MICROSECOND", 'k'}, - {"MINUTE_SECOND", 'k'}, - {"MKDIR", 'f'}, - {"MOD", 'o'}, - {"MODE", 'n'}, - {"MODIFIES", 'k'}, - {"MONTH", 'f'}, - {"MONTHNAME", 'f'}, - {"NAME_CONST", 'f'}, - {"NETMASK", 'f'}, - {"NEXTVAL", 'f'}, - {"NOT", 'o'}, - {"NOW", 'f'}, - {"NO_WRITE_TO_BINLOG", 'k'}, - {"NTH_VALUE", 'f'}, - {"NTILE", 'f'}, - {"NULL", '1'}, - {"NULLIF", 'f'}, - {"NUMERIC", 'k'}, - {"NZ", 'f'}, - {"OBJECTPROPERTY", 'f'}, - {"OBJECTPROPERTYEX", 'f'}, - {"OBJECT_DEFINITION", 'f'}, - {"OBJECT_ID", 'f'}, - {"OBJECT_NAME", 'f'}, - {"OBJECT_SCHEMA_NAME", 'f'}, - {"OCT", 'f'}, - {"OCTET_LENGTH", 'f'}, - {"OFFSET", 'k'}, - {"OLD_PASSWORD", 'f'}, - {"ONE_SHOT", 'k'}, - {"OPEN", 'k'}, - {"OPENDATASOURCE", 'f'}, - {"OPENQUERY", 'f'}, - {"OPENROWSET", 'f'}, - {"OPENXML", 'f'}, - {"OPTIMIZE", 'k'}, - {"OPTION", 'k'}, - {"OPTIONALLY", 'k'}, - {"OR", '&'}, - {"ORD", 'f'}, - {"ORDER", 'n'}, - {"ORIGINAL_DB_NAME", 'f'}, - {"ORIGINAL_LOGIN", 'f'}, - {"OUT", 'k'}, - {"OUTFILE", 'k'}, - {"OVERLAPS", 'f'}, - {"OVERLAY", 'f'}, - {"OWN3D", 'k'}, - {"PARSENAME", 'f'}, - {"PARTITION", 'k'}, - {"PASSWORD", 'k'}, - {"PATHINDEX", 'f'}, - {"PATINDEX", 'f'}, - {"PERCENTILE_COUNT", 'f'}, - {"PERCENTILE_DISC", 'f'}, - {"PERCENTILE_RANK", 'f'}, - {"PERCENT_RANK", 'f'}, - {"PERIOD_ADD", 'f'}, - {"PERIOD_DIFF", 'f'}, - {"PERMISSIONS", 'f'}, - {"PG_ADVISORY_LOCK", 'f'}, - {"PG_BACKEND_PID", 'f'}, - {"PG_CANCEL_BACKEND", 'f'}, - {"PG_CLIENT_ENCODING", 'f'}, - {"PG_CONF_LOAD_TIME", 'f'}, - {"PG_CREATE_RESTORE_POINT", 'f'}, - {"PG_HAS_ROLE", 'f'}, - {"PG_IS_IN_RECOVERY", 'f'}, - {"PG_IS_OTHER_TEMP_SCHEMA", 'f'}, - {"PG_LISTENING_CHANNELS", 'f'}, - {"PG_LS_DIR", 'f'}, - {"PG_MY_TEMP_SCHEMA", 'f'}, - {"PG_POSTMASTER_START_TIME", 'f'}, - {"PG_READ_BINARY_FILE", 'f'}, - {"PG_READ_FILE", 'f'}, - {"PG_RELOAD_CONF", 'f'}, - {"PG_ROTATE_LOGFILE", 'f'}, - {"PG_SLEEP", 'f'}, - {"PG_START_BACKUP", 'f'}, - {"PG_STAT_FILE", 'f'}, - {"PG_STOP_BACKUP", 'f'}, - {"PG_SWITCH_XLOG", 'f'}, - {"PG_TERMINATE_BACKEND", 'f'}, - {"PG_TRIGGER_DEPTH", 'f'}, - {"PI", 'f'}, - {"POSITION", 'f'}, - {"POW", 'f'}, - {"POWER", 'f'}, - {"PRECISION", 'k'}, - {"PRIMARY", 'k'}, - {"PROCEDURE", 'k'}, - {"PUBLISHINGSERVERNAME", 'f'}, - {"PURGE", 'k'}, - {"PWDCOMPARE", 'f'}, - {"PWDENCRYPT", 'f'}, - {"QUARTER", 'f'}, - {"QUOTE", 'f'}, - {"QUOTENAME", 'f'}, - {"QUOTE_IDENT", 'f'}, - {"QUOTE_LITERAL", 'f'}, - {"QUOTE_NULLABLE", 'f'}, - {"RADIANS", 'f'}, - {"RAND", 'f'}, - {"RANDOM", 'f'}, - {"RANDOMBLOB", 'f'}, - {"RANGE", 'k'}, - {"RANK", 'f'}, - {"READ", 'k'}, - {"READS", 'k'}, - {"READ_WRITE", 'k'}, - {"REAL", 'n'}, - {"REFERENCES", 'k'}, - {"REGEXP", 'o'}, - {"REGEXP_MATCHES", 'f'}, - {"REGEXP_REPLACE", 'f'}, - {"REGEXP_SPLIT_TO_ARRAY", 'f'}, - {"REGEXP_SPLIT_TO_TABLE", 'f'}, - {"RELEASE", 'k'}, - {"RELEASE_LOCK", 'f'}, - {"RENAME", 'k'}, - {"REPEAT", 'k'}, - {"REPLACE", 'k'}, - {"REPLICATE", 'f'}, - {"REQUIRE", 'k'}, - {"RESIGNAL", 'k'}, - {"RESTRICT", 'k'}, - {"RETURN", 'k'}, - {"REVERSE", 'f'}, - {"REVOKE", 'k'}, - {"RIGHT", 'n'}, - {"RLIKE", 'o'}, - {"ROUND", 'f'}, - {"ROW", 'f'}, - {"ROW_COUNT", 'f'}, - {"ROW_NUMBER", 'f'}, - {"ROW_TO_JSON", 'f'}, - {"RPAD", 'f'}, - {"RTRIM", 'f'}, - {"SCHAMA_NAME", 'f'}, - {"SCHEMA", 'k'}, - {"SCHEMAS", 'k'}, - {"SCHEMA_ID", 'f'}, - {"SCOPE_IDENTITY", 'f'}, - {"SECOND_MICROSECOND", 'k'}, - {"SEC_TO_TIME", 'f'}, - {"SELECT", 'k'}, - {"SENSITIVE", 'k'}, - {"SEPARATOR", 'k'}, - {"SESSION_USER", 'f'}, - {"SET", 'k'}, - {"SETATTR", 'f'}, - {"SETSEED", 'f'}, - {"SETVAL", 'f'}, - {"SET_BIT", 'f'}, - {"SET_BYTE", 'f'}, - {"SET_CONFIG", 'f'}, - {"SET_MASKLEN", 'f'}, - {"SHA", 'f'}, - {"SHA1", 'f'}, - {"SHA2", 'f'}, - {"SHOW", 'n'}, - {"SHUTDOWN", 'k'}, - {"SIGN", 'f'}, - {"SIGNAL", 'k'}, - {"SIGNBYASMKEY", 'f'}, - {"SIGNBYCERT", 'f'}, - {"SIMILAR", 'k'}, - {"SIN", 'f'}, - {"SLEEP", 'f'}, - {"SMALLDATETIMEFROMPARTS", 'f'}, - {"SMALLINT", 'k'}, - {"SOUNDEX", 'f'}, - {"SOUNDS", 'o'}, - {"SPACE", 'f'}, - {"SPATIAL", 'k'}, - {"SPECIFIC", 'k'}, - {"SPLIT_PART", 'f'}, - {"SQL", 'k'}, - {"SQLEXCEPTION", 'k'}, - {"SQLSTATE", 'k'}, - {"SQLWARNING", 'k'}, - {"SQL_BIG_RESULT", 'k'}, - {"SQL_CALC_FOUND_ROWS", 'k'}, - {"SQL_SMALL_RESULT", 'k'}, - {"SQL_VARIANT_PROPERTY", 'f'}, - {"SQRT", 'f'}, - {"SSL", 'k'}, - {"STARTING", 'k'}, - {"STATEMENT_TIMESTAMP", 'f'}, - {"STATS_DATE", 'f'}, - {"STDDEV", 'p'}, - {"STDDEV_POP", 'f'}, - {"STDDEV_SAMP", 'f'}, - {"STRAIGHT_JOIN", 'k'}, - {"STRCMP", 'f'}, - {"STRCONV", 'f'}, - {"STRING_AGG", 'f'}, - {"STRING_TO_ARRAY", 'f'}, - {"STRPOS", 'f'}, - {"STR_TO_DATE", 'f'}, - {"STUFF", 'f'}, - {"SUBDATE", 'f'}, - {"SUBSTR", 'f'}, - {"SUBSTRING", 'f'}, - {"SUBSTRING_INDEX", 'f'}, - {"SUBTIME", 'f'}, - {"SUM", 'f'}, - {"SUSER_ID", 'f'}, - {"SUSER_NAME", 'f'}, - {"SUSER_SID", 'f'}, - {"SUSER_SNAME", 'f'}, - {"SWITCHOFFET", 'f'}, - {"SYS.FN_BUILTIN_PERMISSIONS", 'f'}, - {"SYS.FN_GET_AUDIT_FILE", 'f'}, - {"SYS.FN_MY_PERMISSIONS", 'f'}, - {"SYS.STRAGG", 'f'}, - {"SYSCOLUMNS", 'k'}, - {"SYSDATE", 'f'}, - {"SYSDATETIME", 'f'}, - {"SYSDATETIMEOFFSET", 'f'}, - {"SYSOBJECTS", 'k'}, - {"SYSTEM_USER", 'f'}, - {"SYSUSERS", 'k'}, - {"SYSUTCDATETME", 'f'}, - {"TABLE", 'k'}, - {"TAN", 'f'}, - {"TERMINATED", 'k'}, - {"TERTIARY_WEIGHTS", 'f'}, - {"TEXTPTR", 'f'}, - {"TEXTVALID", 'f'}, - {"THEN", 'k'}, - {"TIME", 'k'}, - {"TIMEDIFF", 'f'}, - {"TIMEFROMPARTS", 'f'}, - {"TIMEOFDAY", 'f'}, - {"TIMESERIAL", 'f'}, - {"TIMESTAMP", 'f'}, - {"TIMESTAMPADD", 'f'}, - {"TIMEVALUE", 'f'}, - {"TIME_FORMAT", 'f'}, - {"TIME_TO_SEC", 'f'}, - {"TINYBLOB", 'k'}, - {"TINYINT", 'k'}, - {"TINYTEXT", 'k'}, - {"TODATETIMEOFFSET", 'f'}, - {"TOP", 'k'}, - {"TO_ASCII", 'f'}, - {"TO_CHAR", 'f'}, - {"TO_DATE", 'f'}, - {"TO_DAYS", 'f'}, - {"TO_HEX", 'f'}, - {"TO_NUMBER", 'f'}, - {"TO_SECONDS", 'f'}, - {"TO_TIMESTAMP", 'f'}, - {"TRAILING", 'n'}, - {"TRANSACTION_TIMESTAMP", 'f'}, - {"TRANSLATE", 'f'}, - {"TRIGGER", 'k'}, - {"TRIGGER_NESTLEVEL", 'f'}, - {"TRIM", 'f'}, - {"TRUE", '1'}, - {"TRUNC", 'f'}, - {"TRUNCATE", 'f'}, - {"TRY_CAST", 'f'}, - {"TRY_CONVERT", 'f'}, - {"TRY_PARSE", 'f'}, - {"TYPEPROPERTY", 'f'}, - {"TYPE_ID", 'f'}, - {"TYPE_NAME", 'f'}, - {"UCASE", 'f'}, - {"UNCOMPRESS", 'f'}, - {"UNCOMPRESS_LENGTH", 'f'}, - {"UNDO", 'k'}, - {"UNHEX", 'f'}, - {"UNION", 'U'}, - {"UNIQUE", 'n'}, - {"UNIX_TIMESTAMP", 'f'}, - {"UNI_ON", 'U'}, - {"UNKNOWN", 'k'}, - {"UNLOCK", 'k'}, - {"UNNEST", 'f'}, - {"UNSIGNED", 'k'}, - {"UPDATE", 'k'}, - {"UPDATEXML", 'f'}, - {"UPPER", 'f'}, - {"UPPER_INC", 'f'}, - {"UPPER_INF", 'f'}, - {"USAGE", 'k'}, - {"USE", 'k'}, - {"USER_ID", 'n'}, - {"USER_NAME", 'f'}, - {"USING", 'f'}, - {"UTC_DATE", 'k'}, - {"UTC_TIME", 'k'}, - {"UTC_TIMESTAMP", 'k'}, - {"UTL_INADDR.GET_HOST_ADDRESS", 'f'}, - {"UUID", 'f'}, - {"UUID_SHORT", 'f'}, - {"VALUES", 'k'}, - {"VAR", 'f'}, - {"VARBINARY", 'k'}, - {"VARCHAR", 'k'}, - {"VARCHARACTER", 'k'}, - {"VARIANCE", 'f'}, - {"VARP", 'f'}, - {"VARYING", 'k'}, - {"VAR_POP", 'f'}, - {"VAR_SAMP", 'f'}, - {"VERIFYSIGNEDBYASMKEY", 'f'}, - {"VERIFYSIGNEDBYCERT", 'f'}, - {"VERSION", 'f'}, - {"WAITFOR", 'k'}, - {"WEEK", 'f'}, - {"WEEKDAY", 'f'}, - {"WEEKDAYNAME", 'f'}, - {"WEEKOFYEAR", 'f'}, - {"WHEN", 'k'}, - {"WHERE", 'k'}, - {"WHILE", 'k'}, - {"WIDTH_BUCKET", 'f'}, - {"WITH", 'k'}, - {"XMLAGG", 'f'}, - {"XMLCOMMENT", 'f'}, - {"XMLCONCAT", 'f'}, - {"XMLELEMENT", 'f'}, - {"XMLEXISTS", 'f'}, - {"XMLFOREST", 'f'}, - {"XMLFORMAT", 'f'}, - {"XMLPI", 'f'}, - {"XMLROOT", 'f'}, - {"XMLTYPE", 'f'}, - {"XML_IS_WELL_FORMED", 'f'}, - {"XOR", 'o'}, - {"XPATH", 'f'}, - {"XPATH_EXISTS", 'f'}, - {"XP_EXECRESULTSET", 'k'}, - {"YEAR", 'f'}, - {"YEARWEEK", 'f'}, - {"YEAR_MONTH", 'k'}, - {"ZEROFILL", 'k'}, -}; -static const size_t sql_keywords_sz = 737; -static const char* multikeywords_start[] = { - "ALTER", - "AT", - "AT TIME", - "CROSS", - "FULL", - "GROUP", - "IN", - "IN BOOLEAN", - "INTERSECT", - "IS", - "IS DISTINCT", - "IS NOT", - "LEFT", - "LOCK", - "NATURAL", - "NEXT", - "NEXT VALUE", - "NOT", - "NOT SIMILAR", - "ORDER", - "OWN3D", - "READ", - "RIGHT", - "SELECT", - "SIMILAR", - "SOUNDS", - "UNION", -}; -static const size_t multikeywords_start_sz = 27; -static const keyword_t multikeywords[] = { - {"ALTER DOMAIN", 'k'}, - {"ALTER TABLE", 'k'}, - {"AT TIME", 'n'}, - {"AT TIME ZONE", 'k'}, - {"CROSS JOIN", 'k'}, - {"FULL OUTER", 'k'}, - {"GROUP BY", 'B'}, - {"IN BOOLEAN", 'n'}, - {"IN BOOLEAN MODE", 'k'}, - {"INTERSECT ALL", 'o'}, - {"IS DISTINCT", 'n'}, - {"IS DISTINCT FROM", 'k'}, - {"IS NOT", 'o'}, - {"IS NOT DISTINCT", 'n'}, - {"IS NOT DISTINCT FROM", 'k'}, - {"LEFT JOIN", 'k'}, - {"LEFT OUTER", 'k'}, - {"LOCK TABLE", 'k'}, - {"LOCK TABLES", 'k'}, - {"NATURAL FULL", 'k'}, - {"NATURAL INNER", 'k'}, - {"NATURAL JOIN", 'k'}, - {"NATURAL LEFT", 'k'}, - {"NATURAL OUTER", 'k'}, - {"NATURAL RIGHT", 'k'}, - {"NEXT VALUE", 'n'}, - {"NEXT VALUE FOR", 'k'}, - {"NOT BETWEEN", 'o'}, - {"NOT IN", 'o'}, - {"NOT LIKE", 'o'}, - {"NOT REGEXP", 'o'}, - {"NOT RLIKE", 'o'}, - {"NOT SIMILAR", 'o'}, - {"NOT SIMILAR TO", 'o'}, - {"ORDER BY", 'B'}, - {"OWN3D BY", 'B'}, - {"READ WRITE", 'k'}, - {"RIGHT JOIN", 'k'}, - {"RIGHT OUTER", 'k'}, - {"SELECT ALL", 'k'}, - {"SIMILAR TO", 'o'}, - {"SOUNDS LIKE", 'o'}, - {"UNION ALL", 'U'}, -}; -static const size_t multikeywords_sz = 43; - -typedef size_t (*pt2Function)(sfilter *sf); -static const pt2Function char_parse_map[] = { - &parse_white, /* 0 */ - &parse_white, /* 1 */ - &parse_white, /* 2 */ - &parse_white, /* 3 */ - &parse_white, /* 4 */ - &parse_white, /* 5 */ - &parse_white, /* 6 */ - &parse_white, /* 7 */ - &parse_white, /* 8 */ - &parse_white, /* 9 */ - &parse_white, /* 10 */ - &parse_white, /* 11 */ - &parse_white, /* 12 */ - &parse_white, /* 13 */ - &parse_white, /* 14 */ - &parse_white, /* 15 */ - &parse_white, /* 16 */ - &parse_white, /* 17 */ - &parse_white, /* 18 */ - &parse_white, /* 19 */ - &parse_white, /* 20 */ - &parse_white, /* 21 */ - &parse_white, /* 22 */ - &parse_white, /* 23 */ - &parse_white, /* 24 */ - &parse_white, /* 25 */ - &parse_white, /* 26 */ - &parse_white, /* 27 */ - &parse_white, /* 28 */ - &parse_white, /* 29 */ - &parse_white, /* 30 */ - &parse_white, /* 31 */ - &parse_white, /* 32 */ - &parse_operator2, /* 33 */ - &parse_string, /* 34 */ - &parse_eol_comment, /* 35 */ - &parse_money, /* 36 */ - &parse_operator1, /* 37 */ - &parse_operator2, /* 38 */ - &parse_string, /* 39 */ - &parse_char, /* 40 */ - &parse_char, /* 41 */ - &parse_operator2, /* 42 */ - &parse_operator1, /* 43 */ - &parse_char, /* 44 */ - &parse_dash, /* 45 */ - &parse_number, /* 46 */ - &parse_slash, /* 47 */ - &parse_number, /* 48 */ - &parse_number, /* 49 */ - &parse_number, /* 50 */ - &parse_number, /* 51 */ - &parse_number, /* 52 */ - &parse_number, /* 53 */ - &parse_number, /* 54 */ - &parse_number, /* 55 */ - &parse_number, /* 56 */ - &parse_number, /* 57 */ - &parse_char, /* 58 */ - &parse_char, /* 59 */ - &parse_operator2, /* 60 */ - &parse_operator2, /* 61 */ - &parse_operator2, /* 62 */ - &parse_other, /* 63 */ - &parse_var, /* 64 */ - &parse_word, /* 65 */ - &parse_word, /* 66 */ - &parse_word, /* 67 */ - &parse_word, /* 68 */ - &parse_word, /* 69 */ - &parse_word, /* 70 */ - &parse_word, /* 71 */ - &parse_word, /* 72 */ - &parse_word, /* 73 */ - &parse_word, /* 74 */ - &parse_word, /* 75 */ - &parse_word, /* 76 */ - &parse_word, /* 77 */ - &parse_word, /* 78 */ - &parse_word, /* 79 */ - &parse_word, /* 80 */ - &parse_word, /* 81 */ - &parse_word, /* 82 */ - &parse_word, /* 83 */ - &parse_word, /* 84 */ - &parse_word, /* 85 */ - &parse_word, /* 86 */ - &parse_word, /* 87 */ - &parse_word, /* 88 */ - &parse_word, /* 89 */ - &parse_word, /* 90 */ - &parse_other, /* 91 */ - &parse_backslash, /* 92 */ - &parse_other, /* 93 */ - &parse_operator1, /* 94 */ - &parse_word, /* 95 */ - &parse_word, /* 96 */ - &parse_word, /* 97 */ - &parse_word, /* 98 */ - &parse_word, /* 99 */ - &parse_word, /* 100 */ - &parse_word, /* 101 */ - &parse_word, /* 102 */ - &parse_word, /* 103 */ - &parse_word, /* 104 */ - &parse_word, /* 105 */ - &parse_word, /* 106 */ - &parse_word, /* 107 */ - &parse_word, /* 108 */ - &parse_word, /* 109 */ - &parse_word, /* 110 */ - &parse_word, /* 111 */ - &parse_word, /* 112 */ - &parse_word, /* 113 */ - &parse_word, /* 114 */ - &parse_word, /* 115 */ - &parse_word, /* 116 */ - &parse_word, /* 117 */ - &parse_word, /* 118 */ - &parse_word, /* 119 */ - &parse_word, /* 120 */ - &parse_word, /* 121 */ - &parse_word, /* 122 */ - &parse_other, /* 123 */ - &parse_operator2, /* 124 */ - &parse_other, /* 125 */ - &parse_operator1, /* 126 */ - &parse_white, /* 127 */ -}; - -#endif diff --git a/apache2/libinjection/sqlparse_private.h b/apache2/libinjection/sqlparse_private.h deleted file mode 100644 index 03f1bccc..00000000 --- a/apache2/libinjection/sqlparse_private.h +++ /dev/null @@ -1,70 +0,0 @@ -/** - * Copyright 2012, Nick Galbreath - * nickg@client9.com - * BSD License - see COPYING.txt for details - * - * (setq-default indent-tabs-mode nil) - * (setq c-default-style "k&r" - * c-basic-offset 4) - * indent -kr -nut - */ -#ifndef _SQLPARSE_PRIVATE_H -#define _SQLPARSE_PRIVATE_H - -#include "sqlparse.h" - -typedef struct { - const char *word; - char type; -} keyword_t; - -char bsearch_keyword_type(const char *key, const keyword_t keywords[], - size_t len); - -int is_operator2(const char *key); - -int is_sqli_pattern(const char *key); - -size_t parse_none(sfilter * sf); -size_t parse_money(sfilter * sf); -size_t parse_other(sfilter * sf); -size_t parse_white(sfilter * sf); -size_t parse_operator1(sfilter *sf); -size_t parse_char(sfilter *sf); -size_t parse_eol_comment(sfilter *sf); -size_t parse_dash(sfilter *sf); -size_t is_mysql_comment(const char *cs, const size_t len, size_t pos); -size_t parse_slash(sfilter *sf); -size_t parse_backslash(sfilter * sf); -size_t parse_operator2(sfilter *sf); -size_t parse_string_core(const char *cs, const size_t len, size_t pos, - stoken_t * st, char delim, size_t offset); -size_t parse_string(sfilter *sf); -size_t parse_word(sfilter * sf); -size_t parse_var(sfilter * sf); - -size_t parse_number(sfilter * sf); - -int parse_token(sfilter * sf); - -/** - * Looks at syntax_last and syntax_current to see - * if they can be merged into a multi-keyword - */ -int syntax_merge_words(stoken_t * a, stoken_t * b); - -void sfilter_reset(sfilter * sf, const char *s, size_t slen); - -/** - * Takes a raw stream of SQL tokens and does the following: - * * Merge mutliple strings into one "foo", "bar" --> "foo bar" - * * Remove comments except last one 1, +, -- foo, 1 ->> 1,+,1 - * * Merge multi-word keywords and operators into one - * e.g. "UNION", "ALL" --> "UNION ALL" - */ -int sqli_tokenize(sfilter * sf, stoken_t * sout); - -int filter_fold(sfilter * sf, stoken_t * sout); - - -#endif /* _SQLPARSE_PRIVATE_H */ diff --git a/apache2/re_operators.c b/apache2/re_operators.c index 0e46e915..772d5827 100644 --- a/apache2/re_operators.c +++ b/apache2/re_operators.c @@ -27,8 +27,7 @@ #include #endif -#include "libinjection/sqlparse.h" -#include "libinjection/sqli_fingerprints.h" +#include "libinjection/libinjection.h" /** * @@ -2133,15 +2132,13 @@ static int msre_op_contains_execute(modsec_rec *msr, msre_rule *rule, msre_var * } /** libinjection detectSQLi -* links against files in libinjection directory + * links against files in libinjection directory * See www.client9.com/libinjection for details - * `is_sqli_pattern` right now is a hardwired set of sqli fingerprints. - * In future, change to read from file. -*/ + */ static int msre_op_detectSQLi_execute(modsec_rec *msr, msre_rule *rule, msre_var *var, char **error_msg) { sfilter sf; - int issqli = is_sqli(&sf, var->value, var->value_len, is_sqli_pattern); + int issqli = libinjection_is_sqli(&sf, var->value, var->value_len, NULL, NULL); int capture = apr_table_get(rule->actionset->actions, "capture") ? 1 : 0; if (issqli) {