diff --git a/CHANGES b/CHANGES index 4a5e7edf..a6b1a14c 100644 --- a/CHANGES +++ b/CHANGES @@ -1,7 +1,7 @@ 12 Dec 2007 - 2.5.0-rc1 ----------------------- - * Added t:jsDecodeUni to decode JavScript \uXXXX encoding. + * Added t:jsDecode to decode JavScript escape sequences. * Added IS_NEW and IS_EXPIRED built-in collection variables. diff --git a/apache2/msc_util.c b/apache2/msc_util.c index 4b6da97b..32f5a572 100644 --- a/apache2/msc_util.c +++ b/apache2/msc_util.c @@ -16,10 +16,12 @@ #include #include -/* NOTE: Be careful as this can only be used on static values for X. +/* NOTE: Be careful as these can ONLY be used on static values for X. * (i.e. VALID_HEX(c++) will NOT work) */ #define VALID_HEX(X) (((X >= '0')&&(X <= '9')) || ((X >= 'a')&&(X <= 'f')) || ((X >= 'A')&&(X <= 'F'))) +#define ISODIGIT(X) ((X >= '0')&&(X <= '7')) + /** * @@ -68,6 +70,7 @@ int parse_name_eq_value(apr_pool_t *mp, const char *input, char **name, char **v /** * + * IMP1 Assumes NUL-terminated */ char *url_encode(apr_pool_t *mp, char *input, unsigned int input_len) { char *rval, *d; @@ -572,9 +575,11 @@ char *_log_escape(apr_pool_t *mp, const unsigned char *input, unsigned long int } /** - * JavaScript \uXXXX decoding. + * JavaScript decoding. + * IMP1 Assumes NUL-terminated */ -int jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len) { + +int js_decode_nonstrict_inplace(unsigned char *input, long int input_len) { unsigned char *d = (unsigned char *)input; long int i, count; @@ -585,38 +590,92 @@ int jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len) if (input[i] == '\\') { /* Character is an escape. */ - if ((i + 5 < input_len) && (input[i + 1] == 'u')) { - /* We have at least 4 data bytes. */ - if ( (VALID_HEX(input[i + 2])) && (VALID_HEX(input[i + 3])) - && (VALID_HEX(input[i + 4])) && (VALID_HEX(input[i + 5])) ) + if ( (i + 5 < input_len) && (input[i + 1] == 'u') + && (VALID_HEX(input[i + 2])) && (VALID_HEX(input[i + 3])) + && (VALID_HEX(input[i + 4])) && (VALID_HEX(input[i + 5])) ) + { + /* \uHHHH */ + + /* Use only the lower byte. */ + *d = x2c(&input[i + 4]); + + /* Full width ASCII (ff01 - ff5e) needs 0x20 added */ + if ( (*d > 0x00) && (*d < 0x5f) + && ((input[i + 2] == 'f') || (input[i + 2] == 'F')) + && ((input[i + 3] == 'f') || (input[i + 3] == 'F'))) { - /* We first make use of the lower byte here, ignoring the higher byte. */ - *d = x2c(&input[i + 4]); - - /* Full width ASCII (ff01 - ff5e) needs 0x20 added */ - if ( (*d > 0x00) && (*d < 0x5f) - && ((input[i + 2] == 'f') || (input[i + 2] == 'F')) - && ((input[i + 3] == 'f') || (input[i + 3] == 'F'))) - { - (*d) += 0x20; - } - - d++; - count++; - i += 6; + (*d) += 0x20; } - else { - /* Invalid data. */ - int j; - for(j = 0; (j < 6)&&(i < input_len); j++) { - *d++ = input[i++]; - count++; + d++; + count++; + i += 6; + } + else if ((i + 3 < input_len) && (input[i + 1] == 'x')) { + /* \xHH */ + *d++ = x2c(&input[i + 2]); + count++; + i += 4; + } + else if ((i + 1 < input_len) && ISODIGIT(input[i + 1])) { + /* \OOO (only one byte, \000 - \377) */ + char buf[4]; + int j = 0; + + while((i + 1 + j < input_len)&&(j < 3)) { + buf[j] = input[i + 1 + j]; + j++; + if (!ISODIGIT(input[i + 1 + j])) break; + } + buf[j] = '\0'; + + if (j > 0) { + /* Do not use 3 characters if we will be > 1 byte */ + if ((j == 3) && (buf[0] > '3')) { + j = 2; + buf[j] = '\0'; } + *d++ = strtol(buf, NULL, 8); + i += 1 + j; + count++; } } + else if (i + 1 < input_len) { + /* \C */ + unsigned char c = input[i + 1]; + switch(input[i + 1]) { + case 'a' : + c = '\a'; + break; + case 'b' : + c = '\b'; + break; + case 'f' : + c = '\f'; + break; + case 'n' : + c = '\n'; + break; + case 'r' : + c = '\r'; + break; + case 't' : + c = '\t'; + break; + case 'v' : + c = '\v'; + break; + /* The remaining (\?,\\,\',\") are just a removal + * of the escape char which is default. + */ + } + + *d++ = c; + i += 2; + count++; + } else { - /* Not enough bytes available (4 data bytes were needed). */ + /* Not enough bytes */ while(i < input_len) { *d++ = input[i++]; count++; @@ -636,6 +695,7 @@ int jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len) /** * + * IMP1 Assumes NUL-terminated */ int urldecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len) { unsigned char *d = input; @@ -745,6 +805,7 @@ int urldecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len) /** * + * IMP1 Assumes NUL-terminated */ int urldecode_nonstrict_inplace_ex(unsigned char *input, long int input_len, int *invalid_count) { unsigned char *d = (unsigned char *)input; @@ -809,6 +870,7 @@ int urldecode_nonstrict_inplace_ex(unsigned char *input, long int input_len, int /** * + * IMP1 Assumes NUL-terminated */ int html_entities_decode_inplace(apr_pool_t *mp, unsigned char *input, int input_len) { unsigned char *d = input; @@ -884,6 +946,7 @@ int html_entities_decode_inplace(apr_pool_t *mp, unsigned char *input, int input char *x = apr_pstrmemdup(mp, (const char *)&input[k], j - k); /* Decode the entity. */ + /* ENH What about others? */ if (strcasecmp(x, "quot") == 0) *d++ = '"'; else if (strcasecmp(x, "amp") == 0) *d++ = '&'; @@ -923,8 +986,10 @@ int html_entities_decode_inplace(apr_pool_t *mp, unsigned char *input, int input return count; } -#define ISODIGIT(X) ((X >= '0')&&(X <= '7')) - +/** + * + * IMP1 Assumes NUL-terminated + */ int ansi_c_sequences_decode_inplace(unsigned char *input, int input_len) { unsigned char *d = input; int i, count; @@ -1032,6 +1097,10 @@ int ansi_c_sequences_decode_inplace(unsigned char *input, int input_len) { return count; } +/** + * + * IMP1 Assumes NUL-terminated + */ int normalise_path_inplace(unsigned char *input, int input_len, int win) { unsigned char *d = input; int i, count; diff --git a/apache2/msc_util.h b/apache2/msc_util.h index a4014cb3..8ed5398b 100644 --- a/apache2/msc_util.h +++ b/apache2/msc_util.h @@ -35,7 +35,7 @@ int DSOLOCAL is_token_char(unsigned char c); int DSOLOCAL remove_lf_crlf_inplace(char *text); -unsigned DSOLOCAL char x2c(unsigned char *what); +unsigned char DSOLOCAL x2c(unsigned char *what); char DSOLOCAL *guess_tmp_dir(apr_pool_t *p); @@ -66,7 +66,7 @@ char DSOLOCAL *log_escape_raw(apr_pool_t *mp, const unsigned char *text, unsigne char DSOLOCAL *_log_escape(apr_pool_t *p, const unsigned char *input, unsigned long int input_length, int escape_quotes, int escape_colon); -int DSOLOCAL jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len); +int DSOLOCAL js_decode_nonstrict_inplace(unsigned char *input, long int input_len); int DSOLOCAL urldecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_length); diff --git a/apache2/re_tfns.c b/apache2/re_tfns.c index 7c19dc5f..1c8a0cf1 100644 --- a/apache2/re_tfns.c +++ b/apache2/re_tfns.c @@ -251,14 +251,14 @@ static int msre_fn_replaceComments_execute(apr_pool_t *mptmp, unsigned char *inp return changed; } -/* jsDecodeUni */ +/* jsDecode */ -static int msre_fn_jsDecodeUni_execute(apr_pool_t *mptmp, unsigned char *input, +static int msre_fn_jsDecode_execute(apr_pool_t *mptmp, unsigned char *input, long int input_len, char **rval, long int *rval_len) { long int length; - length = jsdecode_uni_nonstrict_inplace_ex(input, input_len); + length = js_decode_nonstrict_inplace(input, input_len); *rval = (char *)input; *rval_len = length; @@ -518,10 +518,10 @@ void msre_engine_register_default_tfns(msre_engine *engine) { msre_fn_htmlEntityDecode_execute ); - /* jsDecodeUni */ + /* jsDecode */ msre_engine_tfn_register(engine, - "jsDecodeUni", - msre_fn_jsDecodeUni_execute + "jsDecode", + msre_fn_jsDecode_execute ); /* length */ diff --git a/doc/modsecurity2-apache-reference.xml b/doc/modsecurity2-apache-reference.xml index 6cd716b2..121ca773 100644 --- a/doc/modsecurity2-apache-reference.xml +++ b/doc/modsecurity2-apache-reference.xml @@ -3369,13 +3369,12 @@ SecRule XML:/xq:employees/employee/name/text() Fred \
- <literal>jsDecodeUni</literal> + <literal>jsDecode</literal> - Decodes \uXXXX JavaScript - encoding. If the code is in the range of FF01-FF5E (the full width ASCII - codes), then the higher byte is used to detect and adjust the lower - byte. Otherwise, only the lower byte will be used and the higher byte - zeroed. + Decodes JavaScript escape sequences. If a \uHHHH code is in the + range of FF01-FF5E (the full width ASCII codes), then the higher byte is + used to detect and adjust the lower byte. Otherwise, only the lower byte + will be used and the higher byte zeroed.