From ae40b8c2136985015d2d99e502e9e5b5345b9379 Mon Sep 17 00:00:00 2001 From: ivanr Date: Wed, 16 Jul 2008 13:08:12 +0000 Subject: [PATCH] Implemented cssDecode. --- CHANGES | 5 +- apache2/msc_util.c | 68 +++++++++++++++++++++++++++ apache2/msc_util.h | 4 ++ apache2/re_tfns.c | 20 ++++++++ doc/modsecurity2-apache-reference.xml | 23 +++++++-- 5 files changed, 114 insertions(+), 6 deletions(-) diff --git a/CHANGES b/CHANGES index a6ceaae4..f9ce7d87 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,9 @@ -07 Jul 2008 - trunk + +16 Jul 2008 - trunk ------------------- +* Implement cssDecode. + * Persistent counter updates are now atomic. diff --git a/apache2/msc_util.c b/apache2/msc_util.c index 401cf4a3..2006cb89 100644 --- a/apache2/msc_util.c +++ b/apache2/msc_util.c @@ -332,6 +332,17 @@ unsigned char x2c(unsigned char *what) { return digit; } +/** + * Converts a single hexadecimal digit into a decimal value. + */ +unsigned char xsingle2c(unsigned char *what) { + register unsigned char digit; + + digit = (what[0] >= 'A' ? ((what[0] & 0xdf) - 'A') + 10 : (what[0] - '0')); + + return digit; +} + /** * */ @@ -1191,3 +1202,60 @@ char *resolve_relative_path(apr_pool_t *pool, const char *parent_filename, const strlen(parent_filename) - strlen(apr_filepath_name_get(parent_filename))), filename, NULL); } + +/** + * + * References: + * http://www.w3.org/TR/REC-CSS2/syndata.html#q4 + * http://www.unicode.org/roadmaps/ + */ +int css_decode_inplace(unsigned char *input, long int input_len) { + unsigned char *d = (unsigned char *)input; + long int i, j, count; + + if (input == NULL) return -1; + + i = count = 0; + while (i < input_len) { + if (input[i] == '\\') { + if (i + 1 < input_len) { /* Is there at least one more byte? */ + /* We are not going to need the backslash. */ + i++; + + /* Find out how many hexadecimal characters there are. */ + j = 0; + while ((j < 6)&&(i + j < input_len)&&(VALID_HEX(input[i + j]))) { + j++; + } + + /* Do we have at least one hexadecimal character? */ + if (j > 0) { + if (j == 1) { /* One character. */ + *d++ = xsingle2c(&input[i]); + } else { /* Two or more characters/ */ + /* For now just use the last two bytes. */ + // TODO What do we do if the other bytes are not zeros? + *d++ = x2c(&input[i + j - 2]); + } + + /* Move over. */ + count++; + i += j; + } else { + /* Invalid encoding, but we can't really do anything about it. */ + } + } else { + // TODO What do we do with the trailing backslash? + } + } else { + // TODO Not sure if we should remove the new line character here + // (see the specification for more information). + *d++ = input[i++]; + count++; + } + } + + *d = '\0'; + + return count; +} diff --git a/apache2/msc_util.h b/apache2/msc_util.h index 039b97c1..265bae31 100644 --- a/apache2/msc_util.h +++ b/apache2/msc_util.h @@ -37,6 +37,8 @@ int DSOLOCAL remove_lf_crlf_inplace(char *text); unsigned char DSOLOCAL x2c(unsigned char *what); +unsigned char DSOLOCAL xsingle2c(unsigned char *what); + char DSOLOCAL *guess_tmp_dir(apr_pool_t *p); char DSOLOCAL *current_logtime(apr_pool_t *mp); @@ -82,4 +84,6 @@ int DSOLOCAL is_empty_string(const char *string); char DSOLOCAL *resolve_relative_path(apr_pool_t *pool, const char *parent_filename, const char *filename); +int DSOLOCAL css_decode_inplace(unsigned char *input, long int input_len); + #endif diff --git a/apache2/re_tfns.c b/apache2/re_tfns.c index c4a0a0af..98faa382 100644 --- a/apache2/re_tfns.c +++ b/apache2/re_tfns.c @@ -190,6 +190,20 @@ static int msre_fn_compressWhitespace_execute(apr_pool_t *mptmp, unsigned char * return changed; } +/* cssDecode */ + +static int msre_fn_cssDecode_execute(apr_pool_t *mptmp, unsigned char *input, + long int input_len, char **rval, long int *rval_len) +{ + long int length; + + length = css_decode_inplace(input, input_len); + *rval = (char *)input; + *rval_len = length; + + return (*rval_len == input_len ? 0 : 1); +} + /* removeWhitespace */ static int msre_fn_removeWhitespace_execute(apr_pool_t *mptmp, unsigned char *input, @@ -509,6 +523,12 @@ void msre_engine_register_default_tfns(msre_engine *engine) { msre_fn_compressWhitespace_execute ); + /* cssDecode */ + msre_engine_tfn_register(engine, + "cssDecode", + msre_fn_cssDecode_execute + ); + /* escapeSeqDecode */ msre_engine_tfn_register(engine, "escapeSeqDecode", diff --git a/doc/modsecurity2-apache-reference.xml b/doc/modsecurity2-apache-reference.xml index ce99e291..523ba44e 100644 --- a/doc/modsecurity2-apache-reference.xml +++ b/doc/modsecurity2-apache-reference.xml @@ -4,7 +4,7 @@ Manual - Version 2.6.0-trunk (June 5, 2008) + Version 2.6.0-trunk (July 16, 2008) 2004-2008 @@ -3659,9 +3659,22 @@ SecRule XML:/xq:employees/employee/name/text() Fred \
<literal>compressWhitespace</literal> - This function is enabled by default. It converts whitespace - characters (32, \f, \t, \n, \r, \v, 160) to spaces (ASCII 32) and then - compresses multiple space characters into only one. + It converts whitespace characters (32, \f, \t, \n, \r, \v, 160) to + spaces (ASCII 32) and then compresses multiple space characters into + only one. +
+ +
+ cssDecode + + Decodes CSS-encoded characters, as specified at http://www.w3.org/TR/REC-CSS2/syndata.html. + This function uses only up to two bytes in the decoding process, meaning + it is useful to uncover ASCII characters (that wouldn't normally be + encoded) encoded using CSS encoding, or to counter evasion which is a + combination of a backslash and non-hexadecimal characters (e.g. + ja\vascript is equivalent to + javascript).
@@ -6110,4 +6123,4 @@ Server: Apache/2.x.x
- + \ No newline at end of file