Updates libinjection to v3.10.0

This commit is contained in:
Felipe Zimmerle
2017-05-31 21:04:55 -03:00
parent 53571a860d
commit 9c0229ce1f
4 changed files with 476 additions and 166 deletions

View File

@@ -12,6 +12,7 @@
#define CHAR_EOF -1
#define CHAR_NULL 0
#define CHAR_BANG 33
#define CHAR_DOUBLE 34
#define CHAR_PERCENT 37
@@ -23,6 +24,7 @@
#define CHAR_GT 62
#define CHAR_QUESTION 63
#define CHAR_RIGHTB 93
#define CHAR_TICK 96
/* prototypes */
@@ -41,6 +43,7 @@ static int h5_state_before_attribute_name(h5_state_t* hs);
static int h5_state_before_attribute_value(h5_state_t* hs);
static int h5_state_attribute_value_double_quote(h5_state_t* hs);
static int h5_state_attribute_value_single_quote(h5_state_t* hs);
static int h5_state_attribute_value_back_quote(h5_state_t* hs);
static int h5_state_attribute_value_no_quote(h5_state_t* hs);
static int h5_state_after_attribute_value_quoted_state(h5_state_t* hs);
static int h5_state_comment(h5_state_t* hs);
@@ -60,16 +63,28 @@ static int h5_state_doctype(h5_state_t* hs);
/**
* public function
*/
void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, int flags)
void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, enum html5_flags flags)
{
memset(hs, 0, sizeof(h5_state_t));
hs->s = s;
hs->len = len;
hs->state = h5_state_data;
if (flags == 0) {
switch (flags) {
case DATA_STATE:
hs->state = h5_state_data;
} else {
assert(0);
break;
case VALUE_NO_QUOTE:
hs->state = h5_state_before_attribute_name;
break;
case VALUE_SINGLE_QUOTE:
hs->state = h5_state_attribute_value_single_quote;
break;
case VALUE_DOUBLE_QUOTE:
hs->state = h5_state_attribute_value_double_quote;
break;
case VALUE_BACK_QUOTE:
hs->state = h5_state_attribute_value_back_quote;
break;
}
}
@@ -85,10 +100,18 @@ int libinjection_h5_next(h5_state_t* hs)
/**
* Everything below here is private
*
*/
*/
static int h5_is_white(char ch)
{
/*
* \t = horizontal tab = 0x09
* \n = newline = 0x0A
* \v = vertical tab = 0x0B
* \f = form feed = 0x0C
* \r = cr = 0x0D
*/
return strchr(" \t\n\v\f\r", ch) != NULL;
}
@@ -97,9 +120,17 @@ static int h5_skip_white(h5_state_t* hs)
char ch;
while (hs->pos < hs->len) {
ch = hs->s[hs->pos];
if (ch == ' ') {
switch (ch) {
case 0x00: /* IE only */
case 0x20:
case 0x09:
case 0x0A:
case 0x0B: /* IE only */
case 0x0C:
case 0x0D: /* IE only */
hs->pos += 1;
} else {
break;
default:
return ch;
}
}
@@ -149,6 +180,9 @@ static int h5_state_tag_open(h5_state_t* hs)
char ch;
TRACE();
if (hs->pos >= hs->len) {
return 0;
}
ch = hs->s[hs->pos];
if (ch == CHAR_BANG) {
hs->pos += 1;
@@ -167,6 +201,9 @@ static int h5_state_tag_open(h5_state_t* hs)
return h5_state_bogus_comment2(hs);
} else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
return h5_state_tag_name(hs);
} else if (ch == CHAR_NULL) {
/* IE-ism NULL characters are ignored */
return h5_state_tag_name(hs);
} else {
/* user input mistake in configuring state */
if (hs->pos == 0) {
@@ -197,7 +234,9 @@ static int h5_state_end_tag_open(h5_state_t* hs)
} else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
return h5_state_tag_name(hs);
}
return h5_state_data(hs);
hs->is_close = 0;
return h5_state_bogus_comment(hs);
}
/*
*
@@ -231,7 +270,12 @@ static int h5_state_tag_name(h5_state_t* hs)
pos = hs->pos;
while (pos < hs->len) {
ch = hs->s[pos];
if (h5_is_white(ch)) {
if (ch == 0) {
/* special non-standard case */
/* allow nulls in tag name */
/* some old browsers apparently allow and ignore them */
pos += 1;
} else if (h5_is_white(ch)) {
hs->token_start = hs->s + hs->pos;
hs->token_len = pos - hs->pos;
hs->token_type = TAG_NAME_OPEN;
@@ -299,7 +343,7 @@ static int h5_state_before_attribute_name(h5_state_t* hs)
default: {
return h5_state_attribute_name(hs);
}
}
}
}
static int h5_state_attribute_name(h5_state_t* hs)
@@ -308,7 +352,7 @@ static int h5_state_attribute_name(h5_state_t* hs)
size_t pos;
TRACE();
pos = hs->pos;
pos = hs->pos + 1;
while (pos < hs->len) {
ch = hs->s[pos];
if (h5_is_white(ch)) {
@@ -358,21 +402,19 @@ static int h5_state_attribute_name(h5_state_t* hs)
static int h5_state_after_attribute_name(h5_state_t* hs)
{
int c;
size_t pos;
TRACE();
pos = hs->pos;
c = h5_skip_white(hs);
switch (c) {
case CHAR_EOF: {
return 0;
}
case CHAR_SLASH: {
hs->pos = pos + 1;
hs->pos += 1;
return h5_state_self_closing_start_tag(hs);
}
case CHAR_EQUALS: {
hs->pos = pos + 1;
hs->pos += 1;
return h5_state_before_attribute_value(hs);
}
case CHAR_GT: {
@@ -403,6 +445,9 @@ static int h5_state_before_attribute_value(h5_state_t* hs)
return h5_state_attribute_value_double_quote(hs);
} else if (c == CHAR_SINGLE) {
return h5_state_attribute_value_single_quote(hs);
} else if (c == CHAR_TICK) {
/* NON STANDARD IE */
return h5_state_attribute_value_back_quote(hs);
} else {
return h5_state_attribute_value_no_quote(hs);
}
@@ -415,8 +460,16 @@ static int h5_state_attribute_value_quote(h5_state_t* hs, char qchar)
TRACE();
/* skip quote */
hs->pos += 1;
/* skip initial quote in normal case.
* don't do this "if (pos == 0)" since it means we have started
* in a non-data state. given an input of '><foo
* we want to make 0-length attribute name
*/
if (hs->pos > 0) {
hs->pos += 1;
}
idx = (const char*) memchr(hs->s + hs->pos, qchar, hs->len - hs->pos);
if (idx == NULL) {
hs->token_start = hs->s + hs->pos;
@@ -447,6 +500,13 @@ int h5_state_attribute_value_single_quote(h5_state_t* hs)
return h5_state_attribute_value_quote(hs, CHAR_SINGLE);
}
static
int h5_state_attribute_value_back_quote(h5_state_t* hs)
{
TRACE();
return h5_state_attribute_value_quote(hs, CHAR_TICK);
}
static int h5_state_attribute_value_no_quote(h5_state_t* hs)
{
char ch;
@@ -656,10 +716,13 @@ static int h5_state_comment(h5_state_t* hs)
char ch;
const char* idx;
size_t pos;
size_t offset;
const char* end = hs->s + hs->len;
TRACE();
pos = hs->pos;
while (1) {
idx = (const char*) memchr(hs->s + pos, CHAR_DASH, hs->len - pos);
/* did not find anything or has less than 3 chars left */
@@ -670,21 +733,62 @@ static int h5_state_comment(h5_state_t* hs)
hs->token_type = TAG_COMMENT;
return 1;
}
ch = *(idx + 1);
offset = 1;
/* skip all nulls */
while (idx + offset < end && *(idx + offset) == 0) {
offset += 1;
}
if (idx + offset == end) {
hs->state = h5_state_eof;
hs->token_start = hs->s + hs->pos;
hs->token_len = hs->len - hs->pos;
hs->token_type = TAG_COMMENT;
return 1;
}
ch = *(idx + offset);
if (ch != CHAR_DASH && ch != CHAR_BANG) {
pos = (size_t)(idx - hs->s) + 1;
continue;
}
ch = *(idx + 2);
/* need to test */
#if 0
/* skip all nulls */
while (idx + offset < end && *(idx + offset) == 0) {
offset += 1;
}
if (idx + offset == end) {
hs->state = h5_state_eof;
hs->token_start = hs->s + hs->pos;
hs->token_len = hs->len - hs->pos;
hs->token_type = TAG_COMMENT;
return 1;
}
#endif
offset += 1;
if (idx + offset == end) {
hs->state = h5_state_eof;
hs->token_start = hs->s + hs->pos;
hs->token_len = hs->len - hs->pos;
hs->token_type = TAG_COMMENT;
return 1;
}
ch = *(idx + offset);
if (ch != CHAR_GT) {
pos = (size_t)(idx - hs->s) + 1;
continue;
}
offset += 1;
/* ends in --> or -!> */
hs->token_start = hs->s + hs->pos;
hs->token_len = (size_t)(idx - hs->s) - hs->pos;
hs->pos = (size_t)(idx - hs->s) + 3;
hs->pos = (size_t)(idx + offset - hs->s);
hs->state = h5_state_data;
hs->token_type = TAG_COMMENT;
return 1;