diff --git a/CHANGES b/CHANGES index 5a3774f6..4a5e7edf 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,8 @@ 12 Dec 2007 - 2.5.0-rc1 ----------------------- + * Added t:jsDecodeUni to decode JavScript \uXXXX encoding. + * Added IS_NEW and IS_EXPIRED built-in collection variables. * Added SecMarker directive to allow a fixed target for skipAfter. diff --git a/apache2/msc_util.c b/apache2/msc_util.c index d98d061c..4b6da97b 100644 --- a/apache2/msc_util.c +++ b/apache2/msc_util.c @@ -16,6 +16,11 @@ #include #include +/* NOTE: Be careful as this can only be used on static values for X. + * (i.e. VALID_HEX(c++) will NOT work) + */ +#define VALID_HEX(X) (((X >= '0')&&(X <= '9')) || ((X >= 'a')&&(X <= 'f')) || ((X >= 'A')&&(X <= 'F'))) + /** * */ @@ -566,7 +571,68 @@ char *_log_escape(apr_pool_t *mp, const unsigned char *input, unsigned long int return ret; } -#define VALID_HEX(X) (((X >= '0')&&(X <= '9')) || ((X >= 'a')&&(X <= 'f')) || ((X >= 'A')&&(X <= 'F'))) +/** + * JavaScript \uXXXX decoding. + */ +int jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len) { + unsigned char *d = (unsigned char *)input; + long int i, count; + + if (input == NULL) return -1; + + i = count = 0; + while (i < input_len) { + if (input[i] == '\\') { + /* Character is an escape. */ + + if ((i + 5 < input_len) && (input[i + 1] == 'u')) { + /* We have at least 4 data bytes. */ + if ( (VALID_HEX(input[i + 2])) && (VALID_HEX(input[i + 3])) + && (VALID_HEX(input[i + 4])) && (VALID_HEX(input[i + 5])) ) + { + /* We first make use of the lower byte here, ignoring the higher byte. */ + *d = x2c(&input[i + 4]); + + /* Full width ASCII (ff01 - ff5e) needs 0x20 added */ + if ( (*d > 0x00) && (*d < 0x5f) + && ((input[i + 2] == 'f') || (input[i + 2] == 'F')) + && ((input[i + 3] == 'f') || (input[i + 3] == 'F'))) + { + (*d) += 0x20; + } + + d++; + count++; + i += 6; + } + else { + /* Invalid data. */ + int j; + + for(j = 0; (j < 6)&&(i < input_len); j++) { + *d++ = input[i++]; + count++; + } + } + } + else { + /* Not enough bytes available (4 data bytes were needed). */ + while(i < input_len) { + *d++ = input[i++]; + count++; + } + } + } + else { + *d++ = input[i++]; + count++; + } + } + + *d = '\0'; + + return count; +} /** * @@ -632,12 +698,7 @@ int urldecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len) char c1 = input[i + 1]; char c2 = input[i + 2]; - /* ENH Use VALID_HEX? */ - if ( (((c1 >= '0')&&(c1 <= '9')) || ((c1 >= 'a')&&(c1 <= 'f')) || - ((c1 >= 'A')&&(c1 <= 'F'))) - && (((c2 >= '0')&&(c2 <= '9')) || ((c2 >= 'a')&&(c2 <= 'f')) || - ((c2 >= 'A')&&(c2 <= 'F'))) ) - { + if (VALID_HEX(c1) && VALID_HEX(c2)) { *d++ = x2c(&input[i + 1]); count++; i += 3; @@ -701,10 +762,7 @@ int urldecode_nonstrict_inplace_ex(unsigned char *input, long int input_len, int char c1 = input[i + 1]; char c2 = input[i + 2]; - /* ENH Use VALID_HEX? */ - if ( (((c1 >= '0')&&(c1 <= '9')) || ((c1 >= 'a')&&(c1 <= 'f')) || ((c1 >= 'A')&&(c1 <= 'F'))) - && (((c2 >= '0')&&(c2 <= '9')) || ((c2 >= 'a')&&(c2 <= 'f')) || ((c2 >= 'A')&&(c2 <= 'F'))) ) - { + if (VALID_HEX(c1) && VALID_HEX(c2)) { /* Valid encoding - decode it. */ *d++ = x2c(&input[i + 1]); count++; diff --git a/apache2/msc_util.h b/apache2/msc_util.h index 1ae7b873..a4014cb3 100644 --- a/apache2/msc_util.h +++ b/apache2/msc_util.h @@ -66,6 +66,8 @@ char DSOLOCAL *log_escape_raw(apr_pool_t *mp, const unsigned char *text, unsigne char DSOLOCAL *_log_escape(apr_pool_t *p, const unsigned char *input, unsigned long int input_length, int escape_quotes, int escape_colon); +int DSOLOCAL jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len); + int DSOLOCAL urldecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_length); int DSOLOCAL urldecode_nonstrict_inplace_ex(unsigned char *input, long int input_length, int *invalid_count); diff --git a/apache2/re_tfns.c b/apache2/re_tfns.c index 992f4835..7c19dc5f 100644 --- a/apache2/re_tfns.c +++ b/apache2/re_tfns.c @@ -251,6 +251,20 @@ static int msre_fn_replaceComments_execute(apr_pool_t *mptmp, unsigned char *inp return changed; } +/* jsDecodeUni */ + +static int msre_fn_jsDecodeUni_execute(apr_pool_t *mptmp, unsigned char *input, + long int input_len, char **rval, long int *rval_len) +{ + long int length; + + length = jsdecode_uni_nonstrict_inplace_ex(input, input_len); + *rval = (char *)input; + *rval_len = length; + + return (*rval_len == input_len ? 0 : 1); +} + /* urlDecode */ static int msre_fn_urlDecode_execute(apr_pool_t *mptmp, unsigned char *input, @@ -504,6 +518,12 @@ void msre_engine_register_default_tfns(msre_engine *engine) { msre_fn_htmlEntityDecode_execute ); + /* jsDecodeUni */ + msre_engine_tfn_register(engine, + "jsDecodeUni", + msre_fn_jsDecodeUni_execute + ); + /* length */ msre_engine_tfn_register(engine, "length", diff --git a/doc/modsecurity2-apache-reference.xml b/doc/modsecurity2-apache-reference.xml index a05136be..6cd716b2 100644 --- a/doc/modsecurity2-apache-reference.xml +++ b/doc/modsecurity2-apache-reference.xml @@ -3368,6 +3368,16 @@ SecRule XML:/xq:employees/employee/name/text() Fred \ +
+ <literal>jsDecodeUni</literal> + + Decodes \uXXXX JavaScript + encoding. If the code is in the range of FF01-FF5E (the full width ASCII + codes), then the higher byte is used to detect and adjust the lower + byte. Otherwise, only the lower byte will be used and the higher byte + zeroed. +
+
<literal>length</literal> @@ -5156,4 +5166,4 @@ SecRule REQUEST_METHOD "!@within %{tx.allowed_methods}" t:l
- + \ No newline at end of file