Updates libinjection.

This is not yet their v3.10.0. But I belive it is close to be.
See #124 at client9/libinjection for further information.
This commit is contained in:
Felipe Zimmerle
2017-05-30 10:43:10 -03:00
parent e5dbe59336
commit 53571a860d
6 changed files with 177 additions and 457 deletions

View File

@@ -1,11 +1,15 @@
#include "libinjection.h"
#include "libinjection_xss.h"
#include "libinjection_html5.h"
#include <assert.h>
#include <stdio.h>
/*
* HEY THIS ISN'T DONE
* AND MISSING A KEY INGREDIENT!!
*
*/
typedef enum attribute {
TYPE_NONE
, TYPE_BLACK /* ban always */
@@ -14,128 +18,11 @@ typedef enum attribute {
, TYPE_ATTR_INDIRECT /* attribute *name* is given in *value* */
} attribute_t;
static attribute_t is_black_attr(const char* s, size_t len);
static int is_black_tag(const char* s, size_t len);
static int is_black_url(const char* s, size_t len);
static int cstrcasecmp_with_null(const char *a, const char *b, size_t n);
static int html_decode_char_at(const char* src, size_t len, size_t* consumed);
static int htmlencode_startswith(const char* prefix, const char *src, size_t n);
typedef struct stringtype {
const char* name;
attribute_t atype;
} stringtype_t;
static const int gsHexDecodeMap[256] = {
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 256, 256,
256, 256, 256, 256, 256, 10, 11, 12, 13, 14, 15, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 10, 11, 12, 13, 14, 15, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256
};
static int html_decode_char_at(const char* src, size_t len, size_t* consumed)
{
int val = 0;
size_t i;
int ch;
if (len == 0 || src == NULL) {
*consumed = 0;
return -1;
}
*consumed = 1;
if (*src != '&' || len < 2) {
return (unsigned char)(*src);
}
if (*(src+1) != '#') {
/* normally this would be for named entities
* but for this case we don't actually care
*/
return '&';
}
if (*(src+2) == 'x' || *(src+2) == 'X') {
ch = (unsigned char) (*(src+3));
ch = gsHexDecodeMap[ch];
if (ch == 256) {
/* degenerate case '&#[?]' */
return '&';
}
val = ch;
i = 4;
while (i < len) {
ch = (unsigned char) src[i];
if (ch == ';') {
*consumed = i + 1;
return val;
}
ch = gsHexDecodeMap[ch];
if (ch == 256) {
*consumed = i;
return val;
}
val = (val * 16) + ch;
if (val > 0x1000FF) {
return '&';
}
++i;
}
*consumed = i;
return val;
} else {
i = 2;
ch = (unsigned char) src[i];
if (ch < '0' || ch > '9') {
return '&';
}
val = ch - '0';
i += 1;
while (i < len) {
ch = (unsigned char) src[i];
if (ch == ';') {
*consumed = i + 1;
return val;
}
if (ch < '0' || ch > '9') {
*consumed = i;
return val;
}
val = (val * 10) + (ch - '0');
if (val > 0x1000FF) {
return '&';
}
++i;
}
*consumed = i;
return val;
}
}
/*
* view-source:
* data:
@@ -150,7 +37,7 @@ static stringtype_t BLACKATTR[] = {
, { "DATASRC", TYPE_BLACK } /* IE */
, { "DYNSRC", TYPE_ATTR_URL } /* Obsolete img attribute */
, { "FILTER", TYPE_STYLE } /* Opera, SVG inline style */
, { "FORMACTION", TYPE_ATTR_URL } /* HTML 5 */
, { "FORMACTION", TYPE_ATTR_URL } /* HTML5 */
, { "FOLDER", TYPE_ATTR_URL } /* Only on A tags, IE-only */
, { "FROM", TYPE_ATTR_URL } /* SVG */
, { "HANDLER", TYPE_ATTR_URL } /* SVG Tiny, Opera */
@@ -166,27 +53,26 @@ static stringtype_t BLACKATTR[] = {
};
/* xmlns */
/* `xml-stylesheet` > <eval>, <if expr=> */
/* xml-stylesheet > <eval>, <if expr=> */
/*
static const char* BLACKATTR[] = {
"ATTRIBUTENAME",
"BACKGROUND",
"DATAFORMATAS",
"HREF",
"SCROLL",
"SRC",
"STYLE",
"SRCDOC",
NULL
};
static const char* BLACKATTR[] = {
"ATTRIBUTENAME",
"BACKGROUND",
"DATAFORMATAS",
"HREF",
"SCROLL",
"SRC",
"STYLE",
"SRCDOC",
NULL
};
*/
static const char* BLACKTAG[] = {
"APPLET"
/* , "AUDIO" */
, "BASE"
, "COMMENT" /* IE http://html5sec.org/#38 */
, "EMBED"
/* , "FORM" */
, "FRAME"
@@ -206,94 +92,33 @@ static const char* BLACKTAG[] = {
/* , "VIDEO" */
, "VMLFRAME"
, "XML"
, "XSS"
, NULL
};
static int is_black_tag(const char* s, size_t len);
static attribute_t is_black_attr(const char* s, size_t len);
static int is_black_url(const char* s, size_t len);
static int cstrcasecmp_with_null(const char *a, const char *b, size_t n);
static int cstrcasecmp_with_null(const char *a, const char *b, size_t n)
{
char ca;
char cb;
/* printf("Comparing to %s %.*s\n", a, (int)n, b); */
while (n-- > 0) {
cb = *b++;
for (; n > 0; a++, b++, n--) {
cb = *b;
if (cb == '\0') continue;
ca = *a++;
if (cb >= 'a' && cb <= 'z') {
cb -= 0x20;
}
/* printf("Comparing %c vs %c with %d left\n", ca, cb, (int)n); */
if (ca != cb) {
return 1;
if (*a != cb) {
return *a - cb;
} else if (*a == '\0') {
return -1;
}
}
if (*a == 0) {
/* printf(" MATCH \n"); */
return 0;
} else {
return 1;
}
}
/*
* Does an HTML encoded binary string (const char*, length) start with
* a all uppercase c-string (null terminated), case insensitive!
*
* also ignore any embedded nulls in the HTML string!
*
* return 1 if match / starts with
* return 0 if not
*/
static int htmlencode_startswith(const char *a, const char *b, size_t n)
{
size_t consumed;
int cb;
int first = 1;
/* printf("Comparing %s with %.*s\n", a,(int)n,b); */
while (n > 0) {
if (*a == 0) {
/* printf("Match EOL!\n"); */
return 1;
}
cb = html_decode_char_at(b, n, &consumed);
b += consumed;
n -= consumed;
if (first && cb <= 32) {
/* ignore all leading whitespace and control characters */
continue;
}
first = 0;
if (cb == 0) {
/* always ignore null characters in user input */
continue;
}
if (cb == 10) {
/* always ignore vertical tab characters in user input */
/* who allows this?? */
continue;
}
if (cb >= 'a' && cb <= 'z') {
/* upcase */
cb -= 0x20;
}
if (*a != (char) cb) {
/* printf(" %c != %c\n", *a, cb); */
/* mismatch */
return 0;
}
a++;
}
return (*a == 0) ? 1 : 0;
return (*a == 0) ? 0 : 1;
}
static int is_black_tag(const char* s, size_t len)
@@ -307,7 +132,6 @@ static int is_black_tag(const char* s, size_t len)
black = BLACKTAG;
while (*black != NULL) {
if (cstrcasecmp_with_null(*black, s, len) == 0) {
/* printf("Got black tag %s\n", *black); */
return 1;
}
black += 1;
@@ -317,7 +141,6 @@ static int is_black_tag(const char* s, size_t len)
if ((s[0] == 's' || s[0] == 'S') &&
(s[1] == 'v' || s[1] == 'V') &&
(s[2] == 'g' || s[2] == 'G')) {
/* printf("Got SVG tag \n"); */
return 1;
}
@@ -325,7 +148,6 @@ static int is_black_tag(const char* s, size_t len)
if ((s[0] == 'x' || s[0] == 'X') &&
(s[1] == 's' || s[1] == 'S') &&
(s[2] == 'l' || s[2] == 'L')) {
/* printf("Got XSL tag\n"); */
return 1;
}
@@ -340,9 +162,8 @@ static attribute_t is_black_attr(const char* s, size_t len)
return TYPE_NONE;
}
/* JavaScript on.* */
/* javascript on.* */
if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) {
/* printf("Got JavaScript on- attribute name\n"); */
return TYPE_BLACK;
}
@@ -350,7 +171,6 @@ static attribute_t is_black_attr(const char* s, size_t len)
if (len >= 5) {
/* XMLNS can be used to create arbitrary tags */
if (cstrcasecmp_with_null("XMLNS", s, 5) == 0 || cstrcasecmp_with_null("XLINK", s, 5) == 0) {
/* printf("Got XMLNS and XLINK tags\n"); */
return TYPE_BLACK;
}
}
@@ -358,7 +178,6 @@ static attribute_t is_black_attr(const char* s, size_t len)
black = BLACKATTR;
while (black->name != NULL) {
if (cstrcasecmp_with_null(black->name, s, len) == 0) {
/* printf("Got banned attribute name %s\n", black->name); */
return black->atype;
}
black += 1;
@@ -379,43 +198,49 @@ static int is_black_url(const char* s, size_t len)
/* covers JAVA, JAVASCRIPT, + colon */
static const char* javascript_url = "JAVA";
size_t tokenlen;
/* skip whitespace */
while (len > 0 && (*s <= 32 || *s >= 127)) {
while (len > 0) {
/*
* HEY: this is a signed character.
* We are intentionally skipping high-bit characters too
* since they are not ASCII, and Opera sometimes uses UTF-8 whitespace.
*
* Also in EUC-JP some of the high bytes are just ignored.
* since they are not ascii, and Opera sometimes uses UTF8 whitespace
*/
++s;
--len;
if (*s <= 32) {
++s;
--len;
}
break;
}
if (htmlencode_startswith(data_url, s, len)) {
tokenlen = strlen(data_url);
if (len > tokenlen && cstrcasecmp_with_null(data_url, s, tokenlen) == 0) {
return 1;
}
tokenlen = strlen(viewsource_url);
if (len > tokenlen && cstrcasecmp_with_null(viewsource_url, s, tokenlen) == 0) {
return 1;
}
if (htmlencode_startswith(viewsource_url, s, len)) {
tokenlen = strlen(javascript_url);
if (len > tokenlen && cstrcasecmp_with_null(javascript_url, s, tokenlen) == 0) {
return 1;
}
if (htmlencode_startswith(javascript_url, s, len)) {
return 1;
}
if (htmlencode_startswith(vbscript_url, s, len)) {
tokenlen = strlen(vbscript_url);
if (len > tokenlen && cstrcasecmp_with_null(vbscript_url, s, tokenlen) == 0) {
return 1;
}
return 0;
}
int libinjection_is_xss(const char* s, size_t len, int flags)
int libinjection_is_xss(const char* s, size_t len)
{
h5_state_t h5;
attribute_t attr = TYPE_NONE;
libinjection_h5_init(&h5, s, len, (enum html5_flags) flags);
libinjection_h5_init(&h5, s, len, 0);
while (libinjection_h5_next(&h5)) {
if (h5.token_type != ATTR_VALUE) {
attr = TYPE_NONE;
@@ -433,16 +258,16 @@ int libinjection_is_xss(const char* s, size_t len, int flags)
/*
* IE6,7,8 parsing works a bit differently so
* a whole <script> or other black tag might be hiding
* inside an attribute value under HTML 5 parsing
* inside an attribute value under HTML5 parsing
* See http://html5sec.org/#102
* to avoid doing a full reparse of the value, just
* look for "<". This probably need adjusting to
* handle escaped characters
*/
/*
if (memchr(h5.token_start, '<', h5.token_len) != NULL) {
return 1;
}
if (memchr(h5.token_start, '<', h5.token_len) != NULL) {
return 1;
}
*/
switch (attr) {
@@ -464,13 +289,13 @@ int libinjection_is_xss(const char* s, size_t len, int flags)
}
break;
/*
default:
assert(0);
default:
assert(0);
*/
}
attr = TYPE_NONE;
} else if (h5.token_type == TAG_COMMENT) {
/* IE uses a "`" as a tag ending char */
/* IE uses a "`" as a tag ending char */
if (memchr(h5.token_start, '`', h5.token_len) != NULL) {
return 1;
}
@@ -482,7 +307,7 @@ int libinjection_is_xss(const char* s, size_t len, int flags)
(h5.token_start[2] == 'f' || h5.token_start[2] == 'F')) {
return 1;
}
if ((h5.token_start[0] == 'x' || h5.token_start[0] == 'X') &&
if ((h5.token_start[0] == 'x' || h5.token_start[1] == 'X') &&
(h5.token_start[1] == 'm' || h5.token_start[1] == 'M') &&
(h5.token_start[2] == 'l' || h5.token_start[2] == 'L')) {
return 1;
@@ -490,7 +315,7 @@ int libinjection_is_xss(const char* s, size_t len, int flags)
}
if (h5.token_len > 5) {
/* IE <?import pseudo-tag */
/* IE <?import pseudo-tag */
if (cstrcasecmp_with_null("IMPORT", h5.token_start, 6) == 0) {
return 1;
}
@@ -504,28 +329,3 @@ int libinjection_is_xss(const char* s, size_t len, int flags)
}
return 0;
}
/*
* wrapper
*/
int libinjection_xss(const char* s, size_t len)
{
if (libinjection_is_xss(s, len, DATA_STATE)) {
return 1;
}
if (libinjection_is_xss(s, len, VALUE_NO_QUOTE)) {
return 1;
}
if (libinjection_is_xss(s, len, VALUE_SINGLE_QUOTE)) {
return 1;
}
if (libinjection_is_xss(s, len, VALUE_DOUBLE_QUOTE)) {
return 1;
}
if (libinjection_is_xss(s, len, VALUE_BACK_QUOTE)) {
return 1;
}
return 0;
}