From 9c0229ce1f563e09d9c0de3503f9d6b8a8d9a678 Mon Sep 17 00:00:00 2001 From: Felipe Zimmerle Date: Wed, 31 May 2017 21:04:55 -0300 Subject: [PATCH] Updates libinjection to v3.10.0 --- CHANGES | 4 +- apache2/libinjection/libinjection_html5.c | 146 ++++++++-- apache2/libinjection/libinjection_sqli.c | 157 +++++----- apache2/libinjection/libinjection_xss.c | 335 +++++++++++++++++----- 4 files changed, 476 insertions(+), 166 deletions(-) diff --git a/CHANGES b/CHANGES index b297792e..74682793 100644 --- a/CHANGES +++ b/CHANGES @@ -1,8 +1,8 @@ DD MMM YYYY - 2.9.2 - To be released ------------------------------------ - * Updates libinjection to: bf234eb2f385b969c4f803b35fda53cffdd93922 - [Issue #1412 - @zimmerle, @bjdijk] + * Updates libinjection to v3.10.0 + [Issue #1412 - @client9, @zimmerle and @bjdijk] * Avoid log flood while using SecConnEngine [Issue #1436 - @victorhora] * Make url path absolute for SecHashEngine only when it is relative diff --git a/apache2/libinjection/libinjection_html5.c b/apache2/libinjection/libinjection_html5.c index 65565101..a380ca0a 100644 --- a/apache2/libinjection/libinjection_html5.c +++ b/apache2/libinjection/libinjection_html5.c @@ -12,6 +12,7 @@ #define CHAR_EOF -1 +#define CHAR_NULL 0 #define CHAR_BANG 33 #define CHAR_DOUBLE 34 #define CHAR_PERCENT 37 @@ -23,6 +24,7 @@ #define CHAR_GT 62 #define CHAR_QUESTION 63 #define CHAR_RIGHTB 93 +#define CHAR_TICK 96 /* prototypes */ @@ -41,6 +43,7 @@ static int h5_state_before_attribute_name(h5_state_t* hs); static int h5_state_before_attribute_value(h5_state_t* hs); static int h5_state_attribute_value_double_quote(h5_state_t* hs); static int h5_state_attribute_value_single_quote(h5_state_t* hs); +static int h5_state_attribute_value_back_quote(h5_state_t* hs); static int h5_state_attribute_value_no_quote(h5_state_t* hs); static int h5_state_after_attribute_value_quoted_state(h5_state_t* hs); static int h5_state_comment(h5_state_t* hs); @@ -60,16 +63,28 @@ static int h5_state_doctype(h5_state_t* hs); /** * public function */ -void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, int flags) +void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, enum html5_flags flags) { memset(hs, 0, sizeof(h5_state_t)); hs->s = s; hs->len = len; - hs->state = h5_state_data; - if (flags == 0) { + + switch (flags) { + case DATA_STATE: hs->state = h5_state_data; - } else { - assert(0); + break; + case VALUE_NO_QUOTE: + hs->state = h5_state_before_attribute_name; + break; + case VALUE_SINGLE_QUOTE: + hs->state = h5_state_attribute_value_single_quote; + break; + case VALUE_DOUBLE_QUOTE: + hs->state = h5_state_attribute_value_double_quote; + break; + case VALUE_BACK_QUOTE: + hs->state = h5_state_attribute_value_back_quote; + break; } } @@ -85,10 +100,18 @@ int libinjection_h5_next(h5_state_t* hs) /** * Everything below here is private * -*/ + */ + static int h5_is_white(char ch) { + /* + * \t = horizontal tab = 0x09 + * \n = newline = 0x0A + * \v = vertical tab = 0x0B + * \f = form feed = 0x0C + * \r = cr = 0x0D + */ return strchr(" \t\n\v\f\r", ch) != NULL; } @@ -97,9 +120,17 @@ static int h5_skip_white(h5_state_t* hs) char ch; while (hs->pos < hs->len) { ch = hs->s[hs->pos]; - if (ch == ' ') { + switch (ch) { + case 0x00: /* IE only */ + case 0x20: + case 0x09: + case 0x0A: + case 0x0B: /* IE only */ + case 0x0C: + case 0x0D: /* IE only */ hs->pos += 1; - } else { + break; + default: return ch; } } @@ -149,6 +180,9 @@ static int h5_state_tag_open(h5_state_t* hs) char ch; TRACE(); + if (hs->pos >= hs->len) { + return 0; + } ch = hs->s[hs->pos]; if (ch == CHAR_BANG) { hs->pos += 1; @@ -167,6 +201,9 @@ static int h5_state_tag_open(h5_state_t* hs) return h5_state_bogus_comment2(hs); } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { return h5_state_tag_name(hs); + } else if (ch == CHAR_NULL) { + /* IE-ism NULL characters are ignored */ + return h5_state_tag_name(hs); } else { /* user input mistake in configuring state */ if (hs->pos == 0) { @@ -197,7 +234,9 @@ static int h5_state_end_tag_open(h5_state_t* hs) } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { return h5_state_tag_name(hs); } - return h5_state_data(hs); + + hs->is_close = 0; + return h5_state_bogus_comment(hs); } /* * @@ -231,7 +270,12 @@ static int h5_state_tag_name(h5_state_t* hs) pos = hs->pos; while (pos < hs->len) { ch = hs->s[pos]; - if (h5_is_white(ch)) { + if (ch == 0) { + /* special non-standard case */ + /* allow nulls in tag name */ + /* some old browsers apparently allow and ignore them */ + pos += 1; + } else if (h5_is_white(ch)) { hs->token_start = hs->s + hs->pos; hs->token_len = pos - hs->pos; hs->token_type = TAG_NAME_OPEN; @@ -299,7 +343,7 @@ static int h5_state_before_attribute_name(h5_state_t* hs) default: { return h5_state_attribute_name(hs); } - } + } } static int h5_state_attribute_name(h5_state_t* hs) @@ -308,7 +352,7 @@ static int h5_state_attribute_name(h5_state_t* hs) size_t pos; TRACE(); - pos = hs->pos; + pos = hs->pos + 1; while (pos < hs->len) { ch = hs->s[pos]; if (h5_is_white(ch)) { @@ -358,21 +402,19 @@ static int h5_state_attribute_name(h5_state_t* hs) static int h5_state_after_attribute_name(h5_state_t* hs) { int c; - size_t pos; TRACE(); - pos = hs->pos; c = h5_skip_white(hs); switch (c) { case CHAR_EOF: { return 0; } case CHAR_SLASH: { - hs->pos = pos + 1; + hs->pos += 1; return h5_state_self_closing_start_tag(hs); } case CHAR_EQUALS: { - hs->pos = pos + 1; + hs->pos += 1; return h5_state_before_attribute_value(hs); } case CHAR_GT: { @@ -403,6 +445,9 @@ static int h5_state_before_attribute_value(h5_state_t* hs) return h5_state_attribute_value_double_quote(hs); } else if (c == CHAR_SINGLE) { return h5_state_attribute_value_single_quote(hs); + } else if (c == CHAR_TICK) { + /* NON STANDARD IE */ + return h5_state_attribute_value_back_quote(hs); } else { return h5_state_attribute_value_no_quote(hs); } @@ -415,8 +460,16 @@ static int h5_state_attribute_value_quote(h5_state_t* hs, char qchar) TRACE(); - /* skip quote */ - hs->pos += 1; + /* skip initial quote in normal case. + * don't do this "if (pos == 0)" since it means we have started + * in a non-data state. given an input of '>pos > 0) { + hs->pos += 1; + } + + idx = (const char*) memchr(hs->s + hs->pos, qchar, hs->len - hs->pos); if (idx == NULL) { hs->token_start = hs->s + hs->pos; @@ -447,6 +500,13 @@ int h5_state_attribute_value_single_quote(h5_state_t* hs) return h5_state_attribute_value_quote(hs, CHAR_SINGLE); } +static +int h5_state_attribute_value_back_quote(h5_state_t* hs) +{ + TRACE(); + return h5_state_attribute_value_quote(hs, CHAR_TICK); +} + static int h5_state_attribute_value_no_quote(h5_state_t* hs) { char ch; @@ -656,10 +716,13 @@ static int h5_state_comment(h5_state_t* hs) char ch; const char* idx; size_t pos; + size_t offset; + const char* end = hs->s + hs->len; TRACE(); pos = hs->pos; while (1) { + idx = (const char*) memchr(hs->s + pos, CHAR_DASH, hs->len - pos); /* did not find anything or has less than 3 chars left */ @@ -670,21 +733,62 @@ static int h5_state_comment(h5_state_t* hs) hs->token_type = TAG_COMMENT; return 1; } - ch = *(idx + 1); + offset = 1; + + /* skip all nulls */ + while (idx + offset < end && *(idx + offset) == 0) { + offset += 1; + } + if (idx + offset == end) { + hs->state = h5_state_eof; + hs->token_start = hs->s + hs->pos; + hs->token_len = hs->len - hs->pos; + hs->token_type = TAG_COMMENT; + return 1; + } + + ch = *(idx + offset); if (ch != CHAR_DASH && ch != CHAR_BANG) { pos = (size_t)(idx - hs->s) + 1; continue; } - ch = *(idx + 2); + + /* need to test */ +#if 0 + /* skip all nulls */ + while (idx + offset < end && *(idx + offset) == 0) { + offset += 1; + } + if (idx + offset == end) { + hs->state = h5_state_eof; + hs->token_start = hs->s + hs->pos; + hs->token_len = hs->len - hs->pos; + hs->token_type = TAG_COMMENT; + return 1; + } +#endif + + offset += 1; + if (idx + offset == end) { + hs->state = h5_state_eof; + hs->token_start = hs->s + hs->pos; + hs->token_len = hs->len - hs->pos; + hs->token_type = TAG_COMMENT; + return 1; + } + + + ch = *(idx + offset); if (ch != CHAR_GT) { pos = (size_t)(idx - hs->s) + 1; continue; } + offset += 1; /* ends in --> or -!> */ hs->token_start = hs->s + hs->pos; hs->token_len = (size_t)(idx - hs->s) - hs->pos; - hs->pos = (size_t)(idx - hs->s) + 3; + hs->pos = (size_t)(idx + offset - hs->s); hs->state = h5_state_data; hs->token_type = TAG_COMMENT; return 1; diff --git a/apache2/libinjection/libinjection_sqli.c b/apache2/libinjection/libinjection_sqli.c index 0b67c5cc..cecbbea3 100644 --- a/apache2/libinjection/libinjection_sqli.c +++ b/apache2/libinjection/libinjection_sqli.c @@ -1,5 +1,5 @@ /** - * Copyright 2012,2013 Nick Galbreath + * Copyright 2012,2016 Nick Galbreath * nickg@client9.com * BSD License -- see COPYING.txt for details * @@ -18,7 +18,7 @@ #include "libinjection_sqli.h" #include "libinjection_sqli_data.h" -#define LIBINJECTION_VERSION "3.9.1" +#define LIBINJECTION_VERSION "3.9.2" #define LIBINJECTION_SQLI_TOKEN_SIZE sizeof(((stoken_t*)(0))->val) #define LIBINJECTION_SQLI_MAX_TOKENS 5 @@ -112,15 +112,11 @@ memchr2(const char *haystack, size_t haystack_len, char c0, char c1) } while (cur < last) { - if (cur[0] == c0) { - if (cur[1] == c1) { - return cur; - } else { - cur += 2; /* (c0 == c1) ? 1 : 2; */ - } - } else { - cur += 1; + /* safe since cur < len - 1 always */ + if (cur[0] == c0 && cur[1] == c1) { + return cur; } + cur += 1; } return NULL; @@ -191,11 +187,11 @@ static int char_is_white(char ch) { /* ' ' space is 0x32 '\t 0x09 \011 horizontal tab '\n' 0x0a \012 new line - '\v' 0x0b \013 verical tab + '\v' 0x0b \013 vertical tab '\f' 0x0c \014 new page '\r' 0x0d \015 carriage return 0x00 \000 null (oracle) - 0xa0 \240 is latin1 + 0xa0 \240 is Latin-1 */ return strchr(" \t\n\v\f\r\240\000", ch) != NULL; } @@ -294,7 +290,7 @@ static void st_clear(stoken_t * st) static void st_assign_char(stoken_t * st, const char stype, size_t pos, size_t len, const char value) { - /* done to elimiate unused warning */ + /* done to eliminate unused warning */ (void)len; st->type = (char) stype; st->pos = pos; @@ -402,7 +398,7 @@ static size_t parse_eol_comment(struct libinjection_sqli_state * sf) } } -/** In Ansi mode, hash is an operator +/** In ANSI mode, hash is an operator * In MYSQL mode, it's a EOL comment like '--' */ static size_t parse_hash(struct libinjection_sqli_state * sf) @@ -842,7 +838,7 @@ static size_t parse_bstring(struct libinjection_sqli_state *sf) /* * hex literal string - * re: [XX]'[0123456789abcdefABCDEF]*' + * re: [xX]'[0123456789abcdefABCDEF]*' * mysql has requirement of having EVEN number of chars, * but pgsql does not */ @@ -1072,7 +1068,7 @@ static size_t parse_money(struct libinjection_sqli_state *sf) /* we have $foobar$ ... find it again */ strend = my_memmem(cs+xlen+2, slen - (pos+xlen+2), cs + pos, xlen+2); - if (strend == NULL) { + if (strend == NULL || ((size_t)(strend - cs) < (pos+xlen+2))) { /* fell off edge */ st_assign(sf->current, TYPE_STRING, pos+xlen+2, slen - pos - xlen - 2, cs+pos+xlen+2); sf->current->str_open = '$'; @@ -1104,7 +1100,6 @@ static size_t parse_number(struct libinjection_sqli_state * sf) const char *cs = sf->s; const size_t slen = sf->slen; size_t pos = sf->pos; - int have_dot = 0; int have_e = 0; int have_exp = 0; @@ -1136,7 +1131,6 @@ static size_t parse_number(struct libinjection_sqli_state * sf) } if (pos < slen && cs[pos] == '.') { - have_dot = 1; pos += 1; while (pos < slen && ISDIGIT(cs[pos])) { pos += 1; @@ -1185,7 +1179,7 @@ static size_t parse_number(struct libinjection_sqli_state * sf) } } - if (have_dot == 1 && have_e == 1 && have_exp == 0) { + if (have_e == 1 && have_exp == 0) { /* very special form of * "1234.e" * "10.10E" @@ -1242,29 +1236,13 @@ int libinjection_sqli_tokenize(struct libinjection_sqli_state * sf) const unsigned char ch = (unsigned char) (s[*pos]); /* - * if not ascii, then continue... - * actually probably need to just assuming - * it's a string + * look up the parser, and call it + * + * Porting Note: this is mapping of char to function + * charparsers[ch]() */ - if (ch > 127) { + fnptr = char_parse_map[ch]; - /* 160 or 0xA0 or octal 240 is "latin1 non-breaking space" - * but is treated as a space in mysql. - */ - if (ch == 160) { - fnptr = parse_white; - } else { - fnptr = parse_word; - } - } else { - /* - * look up the parser, and call it - * - * Porting Note: this is mapping of char to function - * charparsers[ch]() - */ - fnptr = char_parse_map[ch]; - } *pos = (*fnptr) (sf); /* @@ -1349,16 +1327,22 @@ static int syntax_merge_words(struct libinjection_sqli_state * sf,stoken_t * a, a->type == TYPE_UNION || a->type == TYPE_FUNCTION || a->type == TYPE_EXPRESSION || + a->type == TYPE_TSQL || a->type == TYPE_SQLTYPE)) { - return CHAR_NULL; + return FALSE; } - if (b->type != TYPE_KEYWORD && b->type != TYPE_BAREWORD && - b->type != TYPE_OPERATOR && b->type != TYPE_SQLTYPE && - b->type != TYPE_LOGIC_OPERATOR && - b->type != TYPE_FUNCTION && - b->type != TYPE_UNION && b->type != TYPE_EXPRESSION) { - return CHAR_NULL; + if (! + (b->type == TYPE_KEYWORD || + b->type == TYPE_BAREWORD || + b->type == TYPE_OPERATOR || + b->type == TYPE_UNION || + b->type == TYPE_FUNCTION || + b->type == TYPE_EXPRESSION || + b->type == TYPE_TSQL || + b->type == TYPE_SQLTYPE || + b->type == TYPE_LOGIC_OPERATOR)) { + return FALSE; } sz1 = a->len; @@ -1374,7 +1358,6 @@ static int syntax_merge_words(struct libinjection_sqli_state * sf,stoken_t * a, tmp[sz1] = ' '; memcpy(tmp + sz1 + 1, b->val, sz2); tmp[sz3] = CHAR_NULL; - ch = sf->lookup(sf, LOOKUP_WORD, tmp, sz3); if (ch != CHAR_NULL) { @@ -1450,6 +1433,13 @@ int libinjection_sqli_fold(struct libinjection_sqli_state * sf) sf->tokenvec[2].type == TYPE_COMMA && sf->tokenvec[3].type == TYPE_LEFTPARENS && sf->tokenvec[4].type == TYPE_NUMBER + ) || + ( + sf->tokenvec[0].type == TYPE_BAREWORD && + sf->tokenvec[1].type == TYPE_RIGHTPARENS && + sf->tokenvec[2].type == TYPE_OPERATOR && + sf->tokenvec[3].type == TYPE_LEFTPARENS && + sf->tokenvec[4].type == TYPE_BAREWORD ) ) { @@ -1506,16 +1496,6 @@ int libinjection_sqli_fold(struct libinjection_sqli_state * sf) pos -= 1; sf->stats_folds += 1; continue; - } else if (sf->tokenvec[left].type == TYPE_SEMICOLON && - sf->tokenvec[left+1].type == TYPE_FUNCTION && - cstrcasecmp("IF", sf->tokenvec[left+1].val, sf->tokenvec[left+1].len) == 0) { - /* IF is normally a function, except in Transact-SQL where it can be used as a - * standalone control flow operator, e.g. ; IF 1=1 ... - * if found after a semicolon, convert from 'f' type to 'T' type - */ - sf->tokenvec[left+1].type = TYPE_TSQL; - left += 2; - continue; /* reparse everything, but we probably can advance left, and pos */ } else if ((sf->tokenvec[left].type == TYPE_OPERATOR || sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR) && (st_is_unary_op(&sf->tokenvec[left+1]) || @@ -1539,9 +1519,22 @@ int libinjection_sqli_fold(struct libinjection_sqli_state * sf) left -= 1; } continue; + } else if (sf->tokenvec[left].type == TYPE_SEMICOLON && + sf->tokenvec[left+1].type == TYPE_FUNCTION && + (sf->tokenvec[left+1].val[0] == 'I' || + sf->tokenvec[left+1].val[0] == 'i' ) && + (sf->tokenvec[left+1].val[1] == 'F' || + sf->tokenvec[left+1].val[1] == 'f' )) { + /* IF is normally a function, except in Transact-SQL where it can be used as a + * standalone control flow operator, e.g. ; IF 1=1 ... + * if found after a semicolon, convert from 'f' type to 'T' type + */ + sf->tokenvec[left+1].type = TYPE_TSQL; + /* left += 2; */ + continue; /* reparse everything, but we probably can advance left, and pos */ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD || sf->tokenvec[left].type == TYPE_VARIABLE) && sf->tokenvec[left+1].type == TYPE_LEFTPARENS && ( - /* TSQL functions but common enough to be collumn names */ + /* TSQL functions but common enough to be column names */ cstrcasecmp("USER_ID", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || cstrcasecmp("USER_NAME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || @@ -1564,7 +1557,7 @@ int libinjection_sqli_fold(struct libinjection_sqli_state * sf) /* pos is the same * other conversions need to go here... for instance - * password CAN be a function, coalese CAN be a function + * password CAN be a function, coalesce CAN be a function */ sf->tokenvec[left].type = TYPE_FUNCTION; continue; @@ -1828,7 +1821,7 @@ int libinjection_sqli_fold(struct libinjection_sqli_state * sf) * 1,-sin(1) --> 1 (1) * Here, just do * 1,-sin(1) --> 1,sin(1) - * just remove unary opartor + * just remove unary operator */ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]); pos -= 1; @@ -1852,9 +1845,21 @@ int libinjection_sqli_fold(struct libinjection_sqli_state * sf) pos -= 1; left = 0; continue; + } else if ((sf->tokenvec[left].type == TYPE_FUNCTION) && + (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) && + (sf->tokenvec[left+2].type != TYPE_RIGHTPARENS)) { + /* + * whats going on here + * Some SQL functions like USER() have 0 args + * if we get User(foo), then User is not a function + * This should be expanded since it eliminated a lot of false + * positives. + */ + if (cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0) { + sf->tokenvec[left].type = TYPE_BAREWORD; + } } - /* no folding -- assume left-most token is is good, now use the existing 2 tokens -- do not get another @@ -2019,7 +2024,7 @@ int libinjection_sqli_blacklist(struct libinjection_sqli_state* sql_state) } /* - * return TRUE if sqli, false is benign + * return TRUE if SQLi, false is benign */ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state) { @@ -2033,10 +2038,10 @@ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state) if (tlen > 1 && sql_state->fingerprint[tlen-1] == TYPE_COMMENT) { /* - * if ending comment is contains 'sp_password' then it's sqli! + * if ending comment is contains 'sp_password' then it's SQLi! * MS Audit log apparently ignores anything with - * 'sp_password' in it. Unable to find primary refernece to - * this "feature" of SQL Server but seems to be known sqli + * 'sp_password' in it. Unable to find primary reference to + * this "feature" of SQL Server but seems to be known SQLi * technique */ if (my_memmem(sql_state->s, sql_state->slen, @@ -2055,7 +2060,7 @@ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state) if (sql_state->fingerprint[1] == TYPE_UNION) { if (sql_state->stats_tokens == 2) { - /* not sure why but 1U comes up in Sqli attack + /* not sure why but 1U comes up in SQLi attack * likely part of parameter splitting/etc. * lots of reasons why "1 union" might be normal * input, so beep only if other SQLi things are present @@ -2080,7 +2085,7 @@ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state) /* * for fingerprint like 'nc', only comments of /x are treated - * as SQL... ending comments of "--" and "#" are not sqli + * as SQL... ending comments of "--" and "#" are not SQLi */ if (sql_state->tokenvec[0].type == TYPE_BAREWORD && sql_state->tokenvec[1].type == TYPE_COMMENT && @@ -2090,7 +2095,7 @@ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state) } /* - * if '1c' ends with '/x' then it's sqli + * if '1c' ends with '/x' then it's SQLi */ if (sql_state->tokenvec[0].type == TYPE_NUMBER && sql_state->tokenvec[1].type == TYPE_COMMENT && @@ -2113,13 +2118,13 @@ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state) if (sql_state->tokenvec[0].type == TYPE_NUMBER && sql_state->tokenvec[1].type == TYPE_COMMENT) { if (sql_state->stats_tokens > 2) { - /* we have some folding going on, highly likely sqli */ + /* we have some folding going on, highly likely SQLi */ sql_state->reason = __LINE__; return TRUE; } /* * we check that next character after the number is either whitespace, - * or '/' or a '-' ==> sqli. + * or '/' or a '-' ==> SQLi. */ ch = sql_state->s[sql_state->tokenvec[0].len]; if ( ch <= 32 ) { @@ -2141,7 +2146,7 @@ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state) } /* - * detect obvious sqli scans.. many people put '--' in plain text + * detect obvious SQLi scans.. many people put '--' in plain text * so only detect if input ends with '--', e.g. 1-- but not 1-- foo */ if ((sql_state->tokenvec[1].len > 2) @@ -2177,7 +2182,7 @@ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state) } /* - * not sqli + * not SQLi */ sql_state->reason = __LINE__; return FALSE; @@ -2186,8 +2191,8 @@ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state) streq(sql_state->fingerprint, "1&1") || streq(sql_state->fingerprint, "1&v") || streq(sql_state->fingerprint, "1&s")) { - /* 'sexy and 17' not sqli - * 'sexy and 17<18' sqli + /* 'sexy and 17' not SQLi + * 'sexy and 17<18' SQLi */ if (sql_state->stats_tokens == 3) { sql_state->reason = __LINE__; @@ -2243,7 +2248,7 @@ int libinjection_is_sqli(struct libinjection_sqli_state * sql_state) size_t slen = sql_state->slen; /* - * no input? not sqli + * no input? not SQLi */ if (slen == 0) { return FALSE; diff --git a/apache2/libinjection/libinjection_xss.c b/apache2/libinjection/libinjection_xss.c index 9aac0e45..f0df4d84 100644 --- a/apache2/libinjection/libinjection_xss.c +++ b/apache2/libinjection/libinjection_xss.c @@ -1,15 +1,11 @@ + +#include "libinjection.h" #include "libinjection_xss.h" #include "libinjection_html5.h" #include #include -/* - * HEY THIS ISN'T DONE - * AND MISSING A KEY INGREDIENT!! - * - */ - typedef enum attribute { TYPE_NONE , TYPE_BLACK /* ban always */ @@ -18,11 +14,128 @@ typedef enum attribute { , TYPE_ATTR_INDIRECT /* attribute *name* is given in *value* */ } attribute_t; + +static attribute_t is_black_attr(const char* s, size_t len); +static int is_black_tag(const char* s, size_t len); +static int is_black_url(const char* s, size_t len); +static int cstrcasecmp_with_null(const char *a, const char *b, size_t n); +static int html_decode_char_at(const char* src, size_t len, size_t* consumed); +static int htmlencode_startswith(const char* prefix, const char *src, size_t n); + + typedef struct stringtype { const char* name; attribute_t atype; } stringtype_t; + +static const int gsHexDecodeMap[256] = { + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 256, 256, + 256, 256, 256, 256, 256, 10, 11, 12, 13, 14, 15, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 10, 11, 12, 13, 14, 15, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256 +}; + +static int html_decode_char_at(const char* src, size_t len, size_t* consumed) +{ + int val = 0; + size_t i; + int ch; + + if (len == 0 || src == NULL) { + *consumed = 0; + return -1; + } + + *consumed = 1; + if (*src != '&' || len < 2) { + return (unsigned char)(*src); + } + + + if (*(src+1) != '#') { + /* normally this would be for named entities + * but for this case we don't actually care + */ + return '&'; + } + + if (*(src+2) == 'x' || *(src+2) == 'X') { + ch = (unsigned char) (*(src+3)); + ch = gsHexDecodeMap[ch]; + if (ch == 256) { + /* degenerate case '&#[?]' */ + return '&'; + } + val = ch; + i = 4; + while (i < len) { + ch = (unsigned char) src[i]; + if (ch == ';') { + *consumed = i + 1; + return val; + } + ch = gsHexDecodeMap[ch]; + if (ch == 256) { + *consumed = i; + return val; + } + val = (val * 16) + ch; + if (val > 0x1000FF) { + return '&'; + } + ++i; + } + *consumed = i; + return val; + } else { + i = 2; + ch = (unsigned char) src[i]; + if (ch < '0' || ch > '9') { + return '&'; + } + val = ch - '0'; + i += 1; + while (i < len) { + ch = (unsigned char) src[i]; + if (ch == ';') { + *consumed = i + 1; + return val; + } + if (ch < '0' || ch > '9') { + *consumed = i; + return val; + } + val = (val * 10) + (ch - '0'); + if (val > 0x1000FF) { + return '&'; + } + ++i; + } + *consumed = i; + return val; + } +} + + /* * view-source: * data: @@ -37,7 +150,7 @@ static stringtype_t BLACKATTR[] = { , { "DATASRC", TYPE_BLACK } /* IE */ , { "DYNSRC", TYPE_ATTR_URL } /* Obsolete img attribute */ , { "FILTER", TYPE_STYLE } /* Opera, SVG inline style */ - , { "FORMACTION", TYPE_ATTR_URL } /* HTML5 */ + , { "FORMACTION", TYPE_ATTR_URL } /* HTML 5 */ , { "FOLDER", TYPE_ATTR_URL } /* Only on A tags, IE-only */ , { "FROM", TYPE_ATTR_URL } /* SVG */ , { "HANDLER", TYPE_ATTR_URL } /* SVG Tiny, Opera */ @@ -53,26 +166,27 @@ static stringtype_t BLACKATTR[] = { }; /* xmlns */ -/* xml-stylesheet > , */ +/* `xml-stylesheet` > , */ /* -static const char* BLACKATTR[] = { - "ATTRIBUTENAME", - "BACKGROUND", - "DATAFORMATAS", - "HREF", - "SCROLL", - "SRC", - "STYLE", - "SRCDOC", - NULL -}; + static const char* BLACKATTR[] = { + "ATTRIBUTENAME", + "BACKGROUND", + "DATAFORMATAS", + "HREF", + "SCROLL", + "SRC", + "STYLE", + "SRCDOC", + NULL + }; */ static const char* BLACKTAG[] = { "APPLET" /* , "AUDIO" */ , "BASE" + , "COMMENT" /* IE http://html5sec.org/#38 */ , "EMBED" /* , "FORM" */ , "FRAME" @@ -92,33 +206,94 @@ static const char* BLACKTAG[] = { /* , "VIDEO" */ , "VMLFRAME" , "XML" + , "XSS" , NULL }; -static int is_black_tag(const char* s, size_t len); -static attribute_t is_black_attr(const char* s, size_t len); -static int is_black_url(const char* s, size_t len); -static int cstrcasecmp_with_null(const char *a, const char *b, size_t n); static int cstrcasecmp_with_null(const char *a, const char *b, size_t n) { + char ca; char cb; - - for (; n > 0; a++, b++, n--) { - cb = *b; + /* printf("Comparing to %s %.*s\n", a, (int)n, b); */ + while (n-- > 0) { + cb = *b++; if (cb == '\0') continue; + ca = *a++; + if (cb >= 'a' && cb <= 'z') { cb -= 0x20; } - if (*a != cb) { - return *a - cb; - } else if (*a == '\0') { - return -1; + /* printf("Comparing %c vs %c with %d left\n", ca, cb, (int)n); */ + if (ca != cb) { + return 1; } } - return (*a == 0) ? 0 : 1; + if (*a == 0) { + /* printf(" MATCH \n"); */ + return 0; + } else { + return 1; + } +} + +/* + * Does an HTML encoded binary string (const char*, length) start with + * a all uppercase c-string (null terminated), case insensitive! + * + * also ignore any embedded nulls in the HTML string! + * + * return 1 if match / starts with + * return 0 if not + */ +static int htmlencode_startswith(const char *a, const char *b, size_t n) +{ + size_t consumed; + int cb; + int first = 1; + /* printf("Comparing %s with %.*s\n", a,(int)n,b); */ + while (n > 0) { + if (*a == 0) { + /* printf("Match EOL!\n"); */ + return 1; + } + cb = html_decode_char_at(b, n, &consumed); + b += consumed; + n -= consumed; + + if (first && cb <= 32) { + /* ignore all leading whitespace and control characters */ + continue; + } + first = 0; + + if (cb == 0) { + /* always ignore null characters in user input */ + continue; + } + + if (cb == 10) { + /* always ignore vertical tab characters in user input */ + /* who allows this?? */ + continue; + } + + if (cb >= 'a' && cb <= 'z') { + /* upcase */ + cb -= 0x20; + } + + if (*a != (char) cb) { + /* printf(" %c != %c\n", *a, cb); */ + /* mismatch */ + return 0; + } + a++; + } + + return (*a == 0) ? 1 : 0; } static int is_black_tag(const char* s, size_t len) @@ -132,6 +307,7 @@ static int is_black_tag(const char* s, size_t len) black = BLACKTAG; while (*black != NULL) { if (cstrcasecmp_with_null(*black, s, len) == 0) { + /* printf("Got black tag %s\n", *black); */ return 1; } black += 1; @@ -141,6 +317,7 @@ static int is_black_tag(const char* s, size_t len) if ((s[0] == 's' || s[0] == 'S') && (s[1] == 'v' || s[1] == 'V') && (s[2] == 'g' || s[2] == 'G')) { + /* printf("Got SVG tag \n"); */ return 1; } @@ -148,6 +325,7 @@ static int is_black_tag(const char* s, size_t len) if ((s[0] == 'x' || s[0] == 'X') && (s[1] == 's' || s[1] == 'S') && (s[2] == 'l' || s[2] == 'L')) { + /* printf("Got XSL tag\n"); */ return 1; } @@ -162,15 +340,18 @@ static attribute_t is_black_attr(const char* s, size_t len) return TYPE_NONE; } - /* javascript on.* */ - if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) { - return TYPE_BLACK; - } - - if (len >= 5) { + /* JavaScript on.* */ + if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) { + /* printf("Got JavaScript on- attribute name\n"); */ + return TYPE_BLACK; + } + + + /* XMLNS can be used to create arbitrary tags */ if (cstrcasecmp_with_null("XMLNS", s, 5) == 0 || cstrcasecmp_with_null("XLINK", s, 5) == 0) { + /* printf("Got XMLNS and XLINK tags\n"); */ return TYPE_BLACK; } } @@ -178,6 +359,7 @@ static attribute_t is_black_attr(const char* s, size_t len) black = BLACKATTR; while (black->name != NULL) { if (cstrcasecmp_with_null(black->name, s, len) == 0) { + /* printf("Got banned attribute name %s\n", black->name); */ return black->atype; } black += 1; @@ -198,49 +380,43 @@ static int is_black_url(const char* s, size_t len) /* covers JAVA, JAVASCRIPT, + colon */ static const char* javascript_url = "JAVA"; - size_t tokenlen; - /* skip whitespace */ - while (len > 0) { + while (len > 0 && (*s <= 32 || *s >= 127)) { /* * HEY: this is a signed character. * We are intentionally skipping high-bit characters too - * since they are not ascii, and Opera sometimes uses UTF8 whitespace + * since they are not ASCII, and Opera sometimes uses UTF-8 whitespace. + * + * Also in EUC-JP some of the high bytes are just ignored. */ - if (*s <= 32) { - ++s; - --len; - } - break; + ++s; + --len; } - tokenlen = strlen(data_url); - if (len > tokenlen && cstrcasecmp_with_null(data_url, s, tokenlen) == 0) { - return 1; - } - tokenlen = strlen(viewsource_url); - if (len > tokenlen && cstrcasecmp_with_null(viewsource_url, s, tokenlen) == 0) { + if (htmlencode_startswith(data_url, s, len)) { return 1; } - tokenlen = strlen(javascript_url); - if (len > tokenlen && cstrcasecmp_with_null(javascript_url, s, tokenlen) == 0) { + if (htmlencode_startswith(viewsource_url, s, len)) { return 1; } - tokenlen = strlen(vbscript_url); - if (len > tokenlen && cstrcasecmp_with_null(vbscript_url, s, tokenlen) == 0) { + if (htmlencode_startswith(javascript_url, s, len)) { + return 1; + } + + if (htmlencode_startswith(vbscript_url, s, len)) { return 1; } return 0; } -int libinjection_is_xss(const char* s, size_t len) +int libinjection_is_xss(const char* s, size_t len, int flags) { h5_state_t h5; attribute_t attr = TYPE_NONE; - libinjection_h5_init(&h5, s, len, 0); + libinjection_h5_init(&h5, s, len, (enum html5_flags) flags); while (libinjection_h5_next(&h5)) { if (h5.token_type != ATTR_VALUE) { attr = TYPE_NONE; @@ -258,16 +434,16 @@ int libinjection_is_xss(const char* s, size_t len) /* * IE6,7,8 parsing works a bit differently so * a whole