From 66939d059b384449386b455f9afaeef8e3074318 Mon Sep 17 00:00:00 2001 From: Felipe Zimmerle Date: Mon, 17 Feb 2014 06:31:38 -0800 Subject: [PATCH] Adds initial support to @detectXSS Libinject was recently updated to support XSS detection. This commit adds initial support to it. --- apache2/Makefile.am | 2 + apache2/Makefile.win | 7 +- apache2/libinjection/libinjection_html5.c | 795 ++++++++++++++++++++++ apache2/libinjection/libinjection_html5.h | 54 ++ apache2/libinjection/libinjection_xss.c | 540 +++++++++++++++ apache2/libinjection/libinjection_xss.h | 21 + apache2/re_operators.c | 33 + iis/Makefile.win | 4 +- iis/ModSecurityIIS.vcxproj | 2 + iis/ModSecurityIIS.vcxproj.filters | 4 +- standalone/Makefile.am | 2 + tests/Makefile.am | 2 + tests/op/detectXSS.t | 18 + tests/regression/misc/25-libinjection.t | 110 +++ 14 files changed, 1590 insertions(+), 4 deletions(-) create mode 100644 apache2/libinjection/libinjection_html5.c create mode 100644 apache2/libinjection/libinjection_html5.h create mode 100644 apache2/libinjection/libinjection_xss.c create mode 100644 apache2/libinjection/libinjection_xss.h create mode 100644 tests/op/detectXSS.t create mode 100644 tests/regression/misc/25-libinjection.t diff --git a/apache2/Makefile.am b/apache2/Makefile.am index e58e1f54..77a0884a 100644 --- a/apache2/Makefile.am +++ b/apache2/Makefile.am @@ -5,7 +5,9 @@ mod_security2_la_SOURCES = acmp.c \ apache2_config.c \ apache2_io.c \ apache2_util.c \ + libinjection/libinjection_html5.c \ libinjection/libinjection_sqli.c \ + libinjection/libinjection_xss.c \ mod_security2.c \ modsecurity.c \ msc_status_engine.c \ diff --git a/apache2/Makefile.win b/apache2/Makefile.win index ca6cffb4..454c89d2 100644 --- a/apache2/Makefile.win +++ b/apache2/Makefile.win @@ -54,9 +54,12 @@ OBJS = mod_security2.obj apache2_config.obj apache2_io.obj apache2_util.obj \ msc_logging.obj msc_xml.obj msc_multipart.obj modsecurity.obj \ msc_parsers.obj msc_util.obj msc_pcre.obj persist_dbm.obj \ msc_reqbody.obj msc_geo.obj msc_gsb.obj msc_crypt.obj msc_tree.obj msc_unicode.obj acmp.obj msc_lua.obj \ - msc_release.obj libinjection\libinjection_sqli.obj \ + msc_release.obj \ msc_status_engine.obj \ - msc_json.obj + msc_json.obj \ + libinjection/libinjection_html5.obj \ + libinjection/libinjection_sqli.obj \ + libinjection/libinjection_xss.obj all: $(DLL) diff --git a/apache2/libinjection/libinjection_html5.c b/apache2/libinjection/libinjection_html5.c new file mode 100644 index 00000000..38ef9f0f --- /dev/null +++ b/apache2/libinjection/libinjection_html5.c @@ -0,0 +1,795 @@ +#include "libinjection_html5.h" + +#include +#include + +#ifdef DEBUG +#include +#define TRACE() printf("%s:%d\n", __FUNCTION__, __LINE__) +#else +#define TRACE() +#endif + + +#define CHAR_EOF -1 +#define CHAR_NULL 0 +#define CHAR_BANG 33 +#define CHAR_DOUBLE 34 +#define CHAR_PERCENT 37 +#define CHAR_SINGLE 39 +#define CHAR_DASH 45 +#define CHAR_SLASH 47 +#define CHAR_LT 60 +#define CHAR_EQUALS 61 +#define CHAR_GT 62 +#define CHAR_QUESTION 63 +#define CHAR_RIGHTB 93 +#define CHAR_TICK 96 + +/* prototypes */ + +static int h5_skip_white(h5_state_t* hs); +static int h5_is_white(char c); +static int h5_state_eof(h5_state_t* hs); +static int h5_state_data(h5_state_t* hs); +static int h5_state_tag_open(h5_state_t* hs); +static int h5_state_tag_name(h5_state_t* hs); +static int h5_state_tag_name_close(h5_state_t* hs); +static int h5_state_end_tag_open(h5_state_t* hs); +static int h5_state_self_closing_start_tag(h5_state_t* hs); +static int h5_state_attribute_name(h5_state_t* hs); +static int h5_state_after_attribute_name(h5_state_t* hs); +static int h5_state_before_attribute_name(h5_state_t* hs); +static int h5_state_before_attribute_value(h5_state_t* hs); +static int h5_state_attribute_value_double_quote(h5_state_t* hs); +static int h5_state_attribute_value_single_quote(h5_state_t* hs); +static int h5_state_attribute_value_back_quote(h5_state_t* hs); +static int h5_state_attribute_value_no_quote(h5_state_t* hs); +static int h5_state_after_attribute_value_quoted_state(h5_state_t* hs); +static int h5_state_comment(h5_state_t* hs); +static int h5_state_cdata(h5_state_t* hs); + + +/* 12.2.4.44 */ +static int h5_state_bogus_comment(h5_state_t* hs); +static int h5_state_bogus_comment2(h5_state_t* hs); + +/* 12.2.4.45 */ +static int h5_state_markup_declaration_open(h5_state_t* hs); + +/* 8.2.4.52 */ +static int h5_state_doctype(h5_state_t* hs); + +/** + * public function + */ +void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, enum html5_flags flags) +{ + memset(hs, 0, sizeof(h5_state_t)); + hs->s = s; + hs->len = len; + + switch (flags) { + case DATA_STATE: + hs->state = h5_state_data; + break; + case VALUE_NO_QUOTE: + hs->state = h5_state_before_attribute_name; + break; + case VALUE_SINGLE_QUOTE: + hs->state = h5_state_attribute_value_single_quote; + break; + case VALUE_DOUBLE_QUOTE: + hs->state = h5_state_attribute_value_double_quote; + break; + case VALUE_BACK_QUOTE: + hs->state = h5_state_attribute_value_back_quote; + break; + } +} + +/** + * public function + */ +int libinjection_h5_next(h5_state_t* hs) +{ + assert(hs->state != NULL); + return (*hs->state)(hs); +} + +/** + * Everything below here is private + * +*/ + +static int h5_is_white(char ch) +{ + return strchr(" \t\n\v\f\r", ch) != NULL; +} + +static int h5_skip_white(h5_state_t* hs) +{ + char ch; + while (hs->pos < hs->len) { + ch = hs->s[hs->pos]; + switch (ch) { + case 0x00: /* IE only */ + case 0x20: + case 0x09: + case 0x0A: + case 0x0B: /* IE only */ + case 0x0C: + case 0x0D: /* IE only */ + hs->pos += 1; + break; + default: + return ch; + } + } + return CHAR_EOF; +} + +static int h5_state_eof(h5_state_t* hs) +{ + /* eliminate unused function argument warning */ + (void)hs; + return 0; +} + +static int h5_state_data(h5_state_t* hs) +{ + const char* idx; + + TRACE(); + assert(hs->len >= hs->pos); + idx = (const char*) memchr(hs->s + hs->pos, CHAR_LT, hs->len - hs->pos); + if (idx == NULL) { + hs->token_start = hs->s + hs->pos; + hs->token_len = hs->len - hs->pos; + hs->token_type = DATA_TEXT; + hs->state = h5_state_eof; + if (hs->token_len == 0) { + return 0; + } + } else { + hs->token_start = hs->s + hs->pos; + hs->token_type = DATA_TEXT; + hs->token_len = (size_t)(idx - hs->s) - hs->pos; + hs->pos = (size_t)(idx - hs->s) + 1; + hs->state = h5_state_tag_open; + if (hs->token_len == 0) { + return h5_state_tag_open(hs); + } + } + return 1; +} + +/** + * 12 2.4.8 + */ +static int h5_state_tag_open(h5_state_t* hs) +{ + char ch; + + TRACE(); + ch = hs->s[hs->pos]; + if (ch == CHAR_BANG) { + hs->pos += 1; + return h5_state_markup_declaration_open(hs); + } else if (ch == CHAR_SLASH) { + hs->pos += 1; + hs->is_close = 1; + return h5_state_end_tag_open(hs); + } else if (ch == CHAR_QUESTION) { + hs->pos += 1; + return h5_state_bogus_comment(hs); + } else if (ch == CHAR_PERCENT) { + /* this is not in spec.. alternative comment format used + by IE <= 9 and Safari < 4.0.3 */ + hs->pos += 1; + return h5_state_bogus_comment2(hs); + } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { + return h5_state_tag_name(hs); + } else if (ch == CHAR_NULL) { + /* IE-ism NULL characters are ignored */ + return h5_state_tag_name(hs); + } else { + /* user input mistake in configuring state */ + if (hs->pos == 0) { + return h5_state_data(hs); + } + hs->token_start = hs->s + hs->pos - 1; + hs->token_len = 1; + hs->token_type = DATA_TEXT; + hs->state = h5_state_data; + return 1; + } +} +/** + * 12.2.4.9 + */ +static int h5_state_end_tag_open(h5_state_t* hs) +{ + char ch; + + TRACE(); + + if (hs->pos >= hs->len) { + return 0; + } + ch = hs->s[hs->pos]; + if (ch == CHAR_GT) { + return h5_state_data(hs); + } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { + return h5_state_tag_name(hs); + } + + hs->is_close = 0; + return h5_state_bogus_comment(hs); +} +/* + * + */ +static int h5_state_tag_name_close(h5_state_t* hs) +{ + TRACE(); + hs->is_close = 0; + hs->token_start = hs->s + hs->pos; + hs->token_len = 1; + hs->token_type = TAG_NAME_CLOSE; + hs->pos += 1; + if (hs->pos < hs->len) { + hs->state = h5_state_data; + } else { + hs->state = h5_state_eof; + } + + return 1; +} + +/** + * 12.2.4.10 + */ +static int h5_state_tag_name(h5_state_t* hs) +{ + char ch; + size_t pos; + + TRACE(); + pos = hs->pos; + while (pos < hs->len) { + ch = hs->s[pos]; + if (ch == 0) { + /* special non-standard case */ + /* allow nulls in tag name */ + /* some old browsers apparently allow and ignore them */ + pos += 1; + } else if (h5_is_white(ch)) { + hs->token_start = hs->s + hs->pos; + hs->token_len = pos - hs->pos; + hs->token_type = TAG_NAME_OPEN; + hs->pos = pos + 1; + hs->state = h5_state_before_attribute_name; + return 1; + } else if (ch == CHAR_SLASH) { + hs->token_start = hs->s + hs->pos; + hs->token_len = pos - hs->pos; + hs->token_type = TAG_NAME_OPEN; + hs->pos = pos + 1; + hs->state = h5_state_self_closing_start_tag; + return 1; + } else if (ch == CHAR_GT) { + hs->token_start = hs->s + hs->pos; + hs->token_len = pos - hs->pos; + if (hs->is_close) { + hs->pos = pos + 1; + hs->is_close = 0; + hs->token_type = TAG_CLOSE; + hs->state = h5_state_data; + } else { + hs->pos = pos; + hs->token_type = TAG_NAME_OPEN; + hs->state = h5_state_tag_name_close; + } + return 1; + } else { + pos += 1; + } + } + + hs->token_start = hs->s + hs->pos; + hs->token_len = hs->len - hs->pos; + hs->token_type = TAG_NAME_OPEN; + hs->state = h5_state_eof; + return 1; +} + +/** + * 12.2.4.34 + */ +static int h5_state_before_attribute_name(h5_state_t* hs) +{ + int ch; + + TRACE(); + ch = h5_skip_white(hs); + switch (ch) { + case CHAR_EOF: { + return 0; + } + case CHAR_SLASH: { + hs->pos += 1; + return h5_state_self_closing_start_tag(hs); + } + case CHAR_GT: { + hs->state = h5_state_data; + hs->token_start = hs->s + hs->pos; + hs->token_len = 1; + hs->token_type = TAG_NAME_CLOSE; + hs->pos += 1; + return 1; + } + default: { + return h5_state_attribute_name(hs); + } + } +} + +static int h5_state_attribute_name(h5_state_t* hs) +{ + char ch; + size_t pos; + + TRACE(); + pos = hs->pos + 1; + while (pos < hs->len) { + ch = hs->s[pos]; + if (h5_is_white(ch)) { + hs->token_start = hs->s + hs->pos; + hs->token_len = pos - hs->pos; + hs->token_type = ATTR_NAME; + hs->state = h5_state_after_attribute_name; + hs->pos = pos + 1; + return 1; + } else if (ch == CHAR_SLASH) { + hs->token_start = hs->s + hs->pos; + hs->token_len = pos - hs->pos; + hs->token_type = ATTR_NAME; + hs->state = h5_state_self_closing_start_tag; + hs->pos = pos + 1; + return 1; + } else if (ch == CHAR_EQUALS) { + hs->token_start = hs->s + hs->pos; + hs->token_len = pos - hs->pos; + hs->token_type = ATTR_NAME; + hs->state = h5_state_before_attribute_value; + hs->pos = pos + 1; + return 1; + } else if (ch == CHAR_GT) { + hs->token_start = hs->s + hs->pos; + hs->token_len = pos - hs->pos; + hs->token_type = ATTR_NAME; + hs->state = h5_state_tag_name_close; + hs->pos = pos; + return 1; + } else { + pos += 1; + } + } + /* EOF */ + hs->token_start = hs->s + hs->pos; + hs->token_len = hs->len - hs->pos; + hs->token_type = ATTR_NAME; + hs->state = h5_state_eof; + hs->pos = hs->len; + return 1; +} + +/** + * 12.2.4.36 + */ +static int h5_state_after_attribute_name(h5_state_t* hs) +{ + int c; + + TRACE(); + c = h5_skip_white(hs); + switch (c) { + case CHAR_EOF: { + return 0; + } + case CHAR_SLASH: { + hs->pos += 1; + return h5_state_self_closing_start_tag(hs); + } + case CHAR_EQUALS: { + hs->pos += 1; + return h5_state_before_attribute_value(hs); + } + case CHAR_GT: { + return h5_state_tag_name_close(hs); + } + default: { + return h5_state_attribute_name(hs); + } + } +} + +/** + * 12.2.4.37 + */ +static int h5_state_before_attribute_value(h5_state_t* hs) +{ + int c; + TRACE(); + + c = h5_skip_white(hs); + + if (c == CHAR_EOF) { + hs->state = h5_state_eof; + return 0; + } + + if (c == CHAR_DOUBLE) { + return h5_state_attribute_value_double_quote(hs); + } else if (c == CHAR_SINGLE) { + return h5_state_attribute_value_single_quote(hs); + } else if (c == CHAR_TICK) { + /* NON STANDARD IE */ + return h5_state_attribute_value_back_quote(hs); + } else { + return h5_state_attribute_value_no_quote(hs); + } +} + + +static int h5_state_attribute_value_quote(h5_state_t* hs, char qchar) +{ + const char* idx; + + TRACE(); + + /* skip initial quote in normal case. + * dont do this is pos == 0 since it means we have started + * in a non-data state. given an input of '>pos > 0) { + hs->pos += 1; + } + + + idx = (const char*) memchr(hs->s + hs->pos, qchar, hs->len - hs->pos); + if (idx == NULL) { + hs->token_start = hs->s + hs->pos; + hs->token_len = hs->len - hs->pos; + hs->token_type = ATTR_VALUE; + hs->state = h5_state_eof; + } else { + hs->token_start = hs->s + hs->pos; + hs->token_len = (size_t)(idx - hs->s) - hs->pos; + hs->token_type = ATTR_VALUE; + hs->state = h5_state_after_attribute_value_quoted_state; + hs->pos += hs->token_len + 1; + } + return 1; +} + +static +int h5_state_attribute_value_double_quote(h5_state_t* hs) +{ + TRACE(); + return h5_state_attribute_value_quote(hs, CHAR_DOUBLE); +} + +static +int h5_state_attribute_value_single_quote(h5_state_t* hs) +{ + TRACE(); + return h5_state_attribute_value_quote(hs, CHAR_SINGLE); +} + +static +int h5_state_attribute_value_back_quote(h5_state_t* hs) +{ + TRACE(); + return h5_state_attribute_value_quote(hs, CHAR_TICK); +} + +static int h5_state_attribute_value_no_quote(h5_state_t* hs) +{ + char ch; + size_t pos; + + TRACE(); + pos = hs->pos; + while (pos < hs->len) { + ch = hs->s[pos]; + if (h5_is_white(ch)) { + hs->token_type = ATTR_VALUE; + hs->token_start = hs->s + hs->pos; + hs->token_len = pos - hs->pos; + hs->pos = pos + 1; + hs->state = h5_state_before_attribute_name; + return 1; + } else if (ch == CHAR_GT) { + hs->token_type = ATTR_VALUE; + hs->token_start = hs->s + hs->pos; + hs->token_len = pos - hs->pos; + hs->pos = pos; + hs->state = h5_state_tag_name_close; + return 1; + } + pos += 1; + } + TRACE(); + /* EOF */ + hs->state = h5_state_eof; + hs->token_start = hs->s + hs->pos; + hs->token_len = hs->len - hs->pos; + hs->token_type = ATTR_VALUE; + return 1; +} + +/** + * 12.2.4.41 + */ +static int h5_state_after_attribute_value_quoted_state(h5_state_t* hs) +{ + char ch; + + TRACE(); + if (hs->pos >= hs->len) { + return 0; + } + ch = hs->s[hs->pos]; + if (h5_is_white(ch)) { + hs->pos += 1; + return h5_state_before_attribute_name(hs); + } else if (ch == CHAR_SLASH) { + hs->pos += 1; + return h5_state_self_closing_start_tag(hs); + } else if (ch == CHAR_GT) { + hs->token_start = hs->s + hs->pos; + hs->token_len = 1; + hs->token_type = TAG_NAME_CLOSE; + hs->pos += 1; + hs->state = h5_state_data; + return 1; + } else { + return h5_state_before_attribute_name(hs); + } +} + +/** + * 12.2.4.43 + */ +static int h5_state_self_closing_start_tag(h5_state_t* hs) +{ + char ch; + + TRACE(); + if (hs->pos >= hs->len) { + return 0; + } + ch = hs->s[hs->pos]; + if (ch == CHAR_GT) { + assert(hs->pos > 0); + hs->token_start = hs->s + hs->pos -1; + hs->token_len = 2; + hs->token_type = TAG_NAME_SELFCLOSE; + hs->state = h5_state_data; + hs->pos += 1; + return 1; + } else { + return h5_state_before_attribute_name(hs); + } +} + +/** + * 12.2.4.44 + */ +static int h5_state_bogus_comment(h5_state_t* hs) +{ + const char* idx; + + TRACE(); + idx = (const char*) memchr(hs->s + hs->pos, CHAR_GT, hs->len - hs->pos); + if (idx == NULL) { + hs->token_start = hs->s + hs->pos; + hs->token_len = hs->len - hs->pos; + hs->pos = hs->len; + hs->state = h5_state_eof; + } else { + hs->token_start = hs->s + hs->pos; + hs->token_len = (size_t)(idx - hs->s) - hs->pos; + hs->pos = (size_t)(idx - hs->s) + 1; + hs->state = h5_state_data; + } + + hs->token_type = TAG_COMMENT; + return 1; +} + +/** + * 12.2.4.44 ALT + */ +static int h5_state_bogus_comment2(h5_state_t* hs) +{ + const char* idx; + size_t pos; + + TRACE(); + pos = hs->pos; + while (1) { + idx = (const char*) memchr(hs->s + pos, CHAR_PERCENT, hs->len - pos); + if (idx == NULL || (idx + 1 >= hs->s + hs->len)) { + hs->token_start = hs->s + hs->pos; + hs->token_len = hs->len - hs->pos; + hs->pos = hs->len; + hs->token_type = TAG_COMMENT; + hs->state = h5_state_eof; + return 1; + } + + if (*(idx +1) != CHAR_GT) { + pos = (size_t)(idx - hs->s) + 1; + continue; + } + + /* ends in %> */ + hs->token_start = hs->s + hs->pos; + hs->token_len = (size_t)(idx - hs->s) - hs->pos; + hs->pos = (size_t)(idx - hs->s) + 2; + hs->state = h5_state_data; + hs->token_type = TAG_COMMENT; + return 1; + } +} + +/** + * 8.2.4.45 + */ +static int h5_state_markup_declaration_open(h5_state_t* hs) +{ + size_t remaining; + + TRACE(); + remaining = hs->len - hs->pos; + if (remaining >= 7 && + /* case insensitive */ + (hs->s[hs->pos + 0] == 'D' || hs->s[hs->pos + 0] == 'd') && + (hs->s[hs->pos + 1] == 'O' || hs->s[hs->pos + 1] == 'o') && + (hs->s[hs->pos + 2] == 'C' || hs->s[hs->pos + 2] == 'c') && + (hs->s[hs->pos + 3] == 'T' || hs->s[hs->pos + 3] == 't') && + (hs->s[hs->pos + 4] == 'Y' || hs->s[hs->pos + 4] == 'y') && + (hs->s[hs->pos + 5] == 'P' || hs->s[hs->pos + 5] == 'p') && + (hs->s[hs->pos + 6] == 'E' || hs->s[hs->pos + 6] == 'e') + ) { + return h5_state_doctype(hs); + } else if (remaining >= 7 && + /* upper case required */ + hs->s[hs->pos + 0] == '[' && + hs->s[hs->pos + 1] == 'C' && + hs->s[hs->pos + 2] == 'D' && + hs->s[hs->pos + 3] == 'A' && + hs->s[hs->pos + 4] == 'T' && + hs->s[hs->pos + 5] == 'A' && + hs->s[hs->pos + 6] == '[' + ) { + hs->pos += 7; + return h5_state_cdata(hs); + } else if (remaining >= 2 && + hs->s[hs->pos + 0] == '-' && + hs->s[hs->pos + 1] == '-') { + hs->pos += 2; + return h5_state_comment(hs); + } + + return h5_state_bogus_comment(hs); +} + +/** + * 12.2.4.48 + * 12.2.4.49 + * 12.2.4.50 + * 12.2.4.51 + * state machine spec is confusing since it can only look + * at one character at a time but simply it's comments end by: + * 1) EOF + * 2) ending in --> + * 3) ending in -!> + */ +static int h5_state_comment(h5_state_t* hs) +{ + char ch; + const char* idx; + size_t pos; + + TRACE(); + pos = hs->pos; + while (1) { + idx = (const char*) memchr(hs->s + pos, CHAR_DASH, hs->len - pos); + + /* did not find anything or has less than 3 chars left */ + if (idx == NULL || idx > hs->s + hs->len - 3) { + hs->state = h5_state_eof; + hs->token_start = hs->s + hs->pos; + hs->token_len = hs->len - hs->pos; + hs->token_type = TAG_COMMENT; + return 1; + } + ch = *(idx + 1); + if (ch != CHAR_DASH && ch != CHAR_BANG) { + pos = (size_t)(idx - hs->s) + 1; + continue; + } + ch = *(idx + 2); + if (ch != CHAR_GT) { + pos = (size_t)(idx - hs->s) + 1; + continue; + } + + /* ends in --> or -!> */ + hs->token_start = hs->s + hs->pos; + hs->token_len = (size_t)(idx - hs->s) - hs->pos; + hs->pos = (size_t)(idx - hs->s) + 3; + hs->state = h5_state_data; + hs->token_type = TAG_COMMENT; + return 1; + } +} + +static int h5_state_cdata(h5_state_t* hs) +{ + const char* idx; + size_t pos; + + TRACE(); + pos = hs->pos; + while (1) { + idx = (const char*) memchr(hs->s + pos, CHAR_RIGHTB, hs->len - pos); + + /* did not find anything or has less than 3 chars left */ + if (idx == NULL || idx > hs->s + hs->len - 3) { + hs->state = h5_state_eof; + hs->token_start = hs->s + hs->pos; + hs->token_len = hs->len - hs->pos; + hs->token_type = DATA_TEXT; + return 1; + } else if ( *(idx+1) == CHAR_RIGHTB && *(idx+2) == CHAR_GT) { + hs->state = h5_state_data; + hs->token_start = hs->s + hs->pos; + hs->token_len = (size_t)(idx - hs->s) - hs->pos; + hs->pos = (size_t)(idx - hs->s) + 3; + hs->token_type = DATA_TEXT; + return 1; + } else { + pos = (size_t)(idx - hs->s) + 1; + } + } +} + +/** + * 8.2.4.52 + * http://www.w3.org/html/wg/drafts/html/master/syntax.html#doctype-state + */ +static int h5_state_doctype(h5_state_t* hs) +{ + const char* idx; + + TRACE(); + hs->token_start = hs->s + hs->pos; + hs->token_type = DOCTYPE; + + idx = (const char*) memchr(hs->s + hs->pos, CHAR_GT, hs->len - hs->pos); + if (idx == NULL) { + hs->state = h5_state_eof; + hs->token_len = hs->len - hs->pos; + } else { + hs->state = h5_state_data; + hs->token_len = (size_t)(idx - hs->s) - hs->pos; + hs->pos = (size_t)(idx - hs->s) + 1; + } + return 1; +} diff --git a/apache2/libinjection/libinjection_html5.h b/apache2/libinjection/libinjection_html5.h new file mode 100644 index 00000000..bdaa94ec --- /dev/null +++ b/apache2/libinjection/libinjection_html5.h @@ -0,0 +1,54 @@ +#ifndef LIBINJECTION_HTML5 +#define LIBINJECTION_HTML5 + +#ifdef __cplusplus +extern "C" { +#endif + +/* pull in size_t */ + +#include + +enum html5_type { + DATA_TEXT + , TAG_NAME_OPEN + , TAG_NAME_CLOSE + , TAG_NAME_SELFCLOSE + , TAG_DATA + , TAG_CLOSE + , ATTR_NAME + , ATTR_VALUE + , TAG_COMMENT + , DOCTYPE +}; + +enum html5_flags { + DATA_STATE + , VALUE_NO_QUOTE + , VALUE_SINGLE_QUOTE + , VALUE_DOUBLE_QUOTE + , VALUE_BACK_QUOTE +}; + +struct h5_state; +typedef int (*ptr_html5_state)(struct h5_state*); + +typedef struct h5_state { + const char* s; + size_t len; + size_t pos; + int is_close; + ptr_html5_state state; + const char* token_start; + size_t token_len; + enum html5_type token_type; +} h5_state_t; + + +void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, enum html5_flags); +int libinjection_h5_next(h5_state_t* hs); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/apache2/libinjection/libinjection_xss.c b/apache2/libinjection/libinjection_xss.c new file mode 100644 index 00000000..2807c22f --- /dev/null +++ b/apache2/libinjection/libinjection_xss.c @@ -0,0 +1,540 @@ + +#include "libinjection.h" +#include "libinjection_xss.h" +#include "libinjection_html5.h" + +#include +#include + +#ifndef DEBUG +#include +#define TRACE() printf("%s:%d\n", __FUNCTION__, __LINE__) +#else +#define TRACE() +#endif + +typedef enum attribute { + TYPE_NONE + , TYPE_BLACK /* ban always */ + , TYPE_ATTR_URL /* attribute value takes a URL-like object */ + , TYPE_STYLE + , TYPE_ATTR_INDIRECT /* attribute *name* is given in *value* */ +} attribute_t; + + +static attribute_t is_black_attr(const char* s, size_t len); +static int is_black_tag(const char* s, size_t len); +static int is_black_url(const char* s, size_t len); +static int cstrcasecmp_with_null(const char *a, const char *b, size_t n); +static int html_decode_char_at(const char* src, size_t len, size_t* consumed); +static int htmlencode_startswith(const char* prefix, const char *src, size_t n); + + +typedef struct stringtype { + const char* name; + attribute_t atype; +} stringtype_t; + + +static const int gsHexDecodeMap[256] = { + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 256, 256, + 256, 256, 256, 256, 256, 10, 11, 12, 13, 14, 15, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 10, 11, 12, 13, 14, 15, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256 +}; + +static int html_decode_char_at(const char* src, size_t len, size_t* consumed) +{ + int val = 0; + size_t i; + int ch; + + if (len == 0 || src == NULL) { + *consumed = 0; + return -1; + } + + *consumed = 1; + if (*src != '&' || len < 2) { + return (unsigned char)(*src); + } + + + if (*(src+1) != '#') { + /* normally this would be for named entities + * but for this case we don't actually care + */ + return '&'; + } + + if (*(src+2) == 'x' || *(src+2) == 'X') { + ch = (unsigned char) (*(src+3)); + ch = gsHexDecodeMap[ch]; + if (ch == 256) { + /* degenerate case '&#[?]' */ + return '&'; + } + val = ch; + i = 4; + while (i < len) { + ch = (unsigned char) src[i]; + if (ch == ';') { + *consumed = i + 1; + return val; + } + ch = gsHexDecodeMap[ch]; + if (ch == 256) { + *consumed = i; + return val; + } + val = (val * 16) + ch; + if (val > 0x1000FF) { + return '&'; + } + ++i; + } + *consumed = i; + return val; + } else { + i = 2; + ch = (unsigned char) src[i]; + if (ch < '0' || ch > '9') { + return '&'; + } + val = ch - '0'; + i += 1; + while (i < len) { + ch = (unsigned char) src[i]; + if (ch == ';') { + *consumed = i + 1; + return val; + } + if (ch < '0' || ch > '9') { + *consumed = i; + return val; + } + val = (val * 10) + (ch - '0'); + if (val > 0x1000FF) { + return '&'; + } + ++i; + } + *consumed = i; + return val; + } +} + + +/* + * view-source: + * data: + * javascript: + */ +static stringtype_t BLACKATTR[] = { + { "ACTION", TYPE_ATTR_URL } /* form */ + , { "ATTRIBUTENAME", TYPE_ATTR_INDIRECT } /* SVG allow indirection of attribute names */ + , { "BY", TYPE_ATTR_URL } /* SVG */ + , { "BACKGROUND", TYPE_ATTR_URL } /* IE6, O11 */ + , { "DATAFORMATAS", TYPE_BLACK } /* IE */ + , { "DATASRC", TYPE_BLACK } /* IE */ + , { "DYNSRC", TYPE_ATTR_URL } /* Obsolete img attribute */ + , { "FILTER", TYPE_STYLE } /* Opera, SVG inline style */ + , { "FORMACTION", TYPE_ATTR_URL } /* HTML5 */ + , { "FOLDER", TYPE_ATTR_URL } /* Only on A tags, IE-only */ + , { "FROM", TYPE_ATTR_URL } /* SVG */ + , { "HANDLER", TYPE_ATTR_URL } /* SVG Tiny, Opera */ + , { "HREF", TYPE_ATTR_URL } + , { "LOWSRC", TYPE_ATTR_URL } /* Obsolete img attribute */ + , { "POSTER", TYPE_ATTR_URL } /* Opera 10,11 */ + , { "SRC", TYPE_ATTR_URL } + , { "STYLE", TYPE_STYLE } + , { "TO", TYPE_ATTR_URL } /* SVG */ + , { "VALUES", TYPE_ATTR_URL } /* SVG */ + , { "XLINK:HREF", TYPE_ATTR_URL } + , { NULL, TYPE_NONE } +}; + +/* xmlns */ +/* xml-stylesheet > , */ + +/* +static const char* BLACKATTR[] = { + "ATTRIBUTENAME", + "BACKGROUND", + "DATAFORMATAS", + "HREF", + "SCROLL", + "SRC", + "STYLE", + "SRCDOC", + NULL +}; +*/ + +static const char* BLACKTAG[] = { + "APPLET" + /* , "AUDIO" */ + , "BASE" + , "COMMENT" /* IE http://html5sec.org/#38 */ + , "EMBED" + /* , "FORM" */ + , "FRAME" + , "FRAMESET" + , "HANDLER" /* Opera SVG, effectively a script tag */ + , "IFRAME" + , "IMPORT" + , "ISINDEX" + , "LINK" + , "LISTENER" + /* , "MARQUEE" */ + , "META" + , "NOSCRIPT" + , "OBJECT" + , "SCRIPT" + , "STYLE" + /* , "VIDEO" */ + , "VMLFRAME" + , "XML" + , "XSS" + , NULL +}; + + +static int cstrcasecmp_with_null(const char *a, const char *b, size_t n) +{ + char ca; + char cb; + /* printf("Comparing to %s %.*s\n", a, (int)n, b); */ + while (n-- > 0) { + cb = *b++; + if (cb == '\0') continue; + + ca = *a++; + + if (cb >= 'a' && cb <= 'z') { + cb -= 0x20; + } + /* printf("Comparing %c vs %c with %d left\n", ca, cb, (int)n); */ + if (ca != cb) { + return 1; + } + } + + if (*a == 0) { + /* printf(" MATCH \n"); */ + return 0; + } else { + return 1; + } +} + +/* + * Does an HTML encoded binary string (const char*, lenght) start with + * a all uppercase c-string (null terminated), case insenstive! + * + * also ignore any embedded nulls in the HTML string! + * + * return 1 if match / starts with + * return 0 if not + */ +static int htmlencode_startswith(const char *a, const char *b, size_t n) +{ + size_t consumed; + int cb; + int first = 1; + /* printf("Comparing %s with %.*s\n", a,(int)n,b); */ + while (n > 0) { + if (*a == 0) { + /* printf("Match EOL!\n"); */ + return 1; + } + cb = html_decode_char_at(b, n, &consumed); + b += consumed; + n -= consumed; + + if (first && cb <= 32) { + /* ignore all leading whitespace and control characters */ + continue; + } + first = 0; + + if (cb == 0) { + /* always ignore null characters in user input */ + continue; + } + + if (cb == 10) { + /* always ignore vtab characters in user input */ + /* who allows this?? */ + continue; + } + + if (cb >= 'a' && cb <= 'z') { + /* upcase */ + cb -= 0x20; + } + + if (*a != (char) cb) { + /* printf(" %c != %c\n", *a, cb); */ + /* mismatch */ + return 0; + } + a++; + } + + return (*a == 0) ? 1 : 0; +} + +static int is_black_tag(const char* s, size_t len) +{ + const char** black; + + if (len < 3) { + return 0; + } + + black = BLACKTAG; + while (*black != NULL) { + if (cstrcasecmp_with_null(*black, s, len) == 0) { + /* printf("Got black tag %s\n", *black); */ + return 1; + } + black += 1; + } + + /* anything SVG related */ + if ((s[0] == 's' || s[0] == 'S') && + (s[1] == 'v' || s[1] == 'V') && + (s[2] == 'g' || s[2] == 'G')) { + /* printf("Got SVG tag \n"); */ + return 1; + } + + /* Anything XSL(t) related */ + if ((s[0] == 'x' || s[0] == 'X') && + (s[1] == 's' || s[1] == 'S') && + (s[2] == 'l' || s[2] == 'L')) { + /* printf("Got XSL tag\n"); */ + return 1; + } + + return 0; +} + +static attribute_t is_black_attr(const char* s, size_t len) +{ + stringtype_t* black; + + if (len < 2) { + return TYPE_NONE; + } + + /* javascript on.* */ + if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) { + /* printf("Got javascript on- attribute name\n"); */ + return TYPE_BLACK; + } + + + if (len >= 5) { + /* XMLNS can be used to create arbitrary tags */ + if (cstrcasecmp_with_null("XMLNS", s, 5) == 0 || cstrcasecmp_with_null("XLINK", s, 5) == 0) { + /* printf("Got XMLNS and XLINK tags\n"); */ + return TYPE_BLACK; + } + } + + black = BLACKATTR; + while (black->name != NULL) { + if (cstrcasecmp_with_null(black->name, s, len) == 0) { + /* printf("Got banned attribute name %s\n", black->name); */ + return black->atype; + } + black += 1; + } + + return TYPE_NONE; +} + +static int is_black_url(const char* s, size_t len) +{ + + static const char* data_url = "DATA"; + static const char* viewsource_url = "VIEW-SOURCE"; + + /* obsolete but interesting signal */ + static const char* vbscript_url = "VBSCRIPT"; + + /* covers JAVA, JAVASCRIPT, + colon */ + static const char* javascript_url = "JAVA"; + + /* skip whitespace */ + while (len > 0) { + /* + * HEY: this is a signed character. + * We are intentionally skipping high-bit characters too + * since they are not ascii, and Opera sometimes uses UTF8 whitespace + */ + if (*s <= 32) { + ++s; + --len; + } + break; + } + + + if (htmlencode_startswith(data_url, s, len)) { + return 1; + } + + if (htmlencode_startswith(viewsource_url, s, len)) { + return 1; + } + + if (htmlencode_startswith(javascript_url, s, len)) { + return 1; + } + + if (htmlencode_startswith(vbscript_url, s, len)) { + return 1; + } + return 0; +} + +int libinjection_is_xss(const char* s, size_t len, int flags) +{ + h5_state_t h5; + attribute_t attr = TYPE_NONE; + + libinjection_h5_init(&h5, s, len, (enum html5_flags) flags); + while (libinjection_h5_next(&h5)) { + if (h5.token_type != ATTR_VALUE) { + attr = TYPE_NONE; + } + + if (h5.token_type == DOCTYPE) { + return 1; + } else if (h5.token_type == TAG_NAME_OPEN) { + if (is_black_tag(h5.token_start, h5.token_len)) { + return 1; + } + } else if (h5.token_type == ATTR_NAME) { + attr = is_black_attr(h5.token_start, h5.token_len); + } else if (h5.token_type == ATTR_VALUE) { + /* + * IE6,7,8 parsing works a bit differently so + * a whole