Implemented cssDecode.

This commit is contained in:
ivanr 2008-07-16 13:08:12 +00:00
parent e6e06bff72
commit ae40b8c213
5 changed files with 114 additions and 6 deletions

View File

@ -1,6 +1,9 @@
07 Jul 2008 - trunk
16 Jul 2008 - trunk
-------------------
* Implement cssDecode.
* Persistent counter updates are now atomic.

View File

@ -332,6 +332,17 @@ unsigned char x2c(unsigned char *what) {
return digit;
}
/**
* Converts a single hexadecimal digit into a decimal value.
*/
unsigned char xsingle2c(unsigned char *what) {
register unsigned char digit;
digit = (what[0] >= 'A' ? ((what[0] & 0xdf) - 'A') + 10 : (what[0] - '0'));
return digit;
}
/**
*
*/
@ -1191,3 +1202,60 @@ char *resolve_relative_path(apr_pool_t *pool, const char *parent_filename, const
strlen(parent_filename) - strlen(apr_filepath_name_get(parent_filename))),
filename, NULL);
}
/**
*
* References:
* http://www.w3.org/TR/REC-CSS2/syndata.html#q4
* http://www.unicode.org/roadmaps/
*/
int css_decode_inplace(unsigned char *input, long int input_len) {
unsigned char *d = (unsigned char *)input;
long int i, j, count;
if (input == NULL) return -1;
i = count = 0;
while (i < input_len) {
if (input[i] == '\\') {
if (i + 1 < input_len) { /* Is there at least one more byte? */
/* We are not going to need the backslash. */
i++;
/* Find out how many hexadecimal characters there are. */
j = 0;
while ((j < 6)&&(i + j < input_len)&&(VALID_HEX(input[i + j]))) {
j++;
}
/* Do we have at least one hexadecimal character? */
if (j > 0) {
if (j == 1) { /* One character. */
*d++ = xsingle2c(&input[i]);
} else { /* Two or more characters/ */
/* For now just use the last two bytes. */
// TODO What do we do if the other bytes are not zeros?
*d++ = x2c(&input[i + j - 2]);
}
/* Move over. */
count++;
i += j;
} else {
/* Invalid encoding, but we can't really do anything about it. */
}
} else {
// TODO What do we do with the trailing backslash?
}
} else {
// TODO Not sure if we should remove the new line character here
// (see the specification for more information).
*d++ = input[i++];
count++;
}
}
*d = '\0';
return count;
}

View File

@ -37,6 +37,8 @@ int DSOLOCAL remove_lf_crlf_inplace(char *text);
unsigned char DSOLOCAL x2c(unsigned char *what);
unsigned char DSOLOCAL xsingle2c(unsigned char *what);
char DSOLOCAL *guess_tmp_dir(apr_pool_t *p);
char DSOLOCAL *current_logtime(apr_pool_t *mp);
@ -82,4 +84,6 @@ int DSOLOCAL is_empty_string(const char *string);
char DSOLOCAL *resolve_relative_path(apr_pool_t *pool, const char *parent_filename, const char *filename);
int DSOLOCAL css_decode_inplace(unsigned char *input, long int input_len);
#endif

View File

@ -190,6 +190,20 @@ static int msre_fn_compressWhitespace_execute(apr_pool_t *mptmp, unsigned char *
return changed;
}
/* cssDecode */
static int msre_fn_cssDecode_execute(apr_pool_t *mptmp, unsigned char *input,
long int input_len, char **rval, long int *rval_len)
{
long int length;
length = css_decode_inplace(input, input_len);
*rval = (char *)input;
*rval_len = length;
return (*rval_len == input_len ? 0 : 1);
}
/* removeWhitespace */
static int msre_fn_removeWhitespace_execute(apr_pool_t *mptmp, unsigned char *input,
@ -509,6 +523,12 @@ void msre_engine_register_default_tfns(msre_engine *engine) {
msre_fn_compressWhitespace_execute
);
/* cssDecode */
msre_engine_tfn_register(engine,
"cssDecode",
msre_fn_cssDecode_execute
);
/* escapeSeqDecode */
msre_engine_tfn_register(engine,
"escapeSeqDecode",

View File

@ -4,7 +4,7 @@
Manual</title>
<articleinfo>
<releaseinfo>Version 2.6.0-trunk (June 5, 2008)</releaseinfo>
<releaseinfo>Version 2.6.0-trunk (July 16, 2008)</releaseinfo>
<copyright>
<year>2004-2008</year>
@ -3659,9 +3659,22 @@ SecRule <emphasis>XML:/xq:employees/employee/name/text()</emphasis> Fred \
<section>
<title><literal>compressWhitespace</literal></title>
<para>This function is enabled by default. It converts whitespace
characters (32, \f, \t, \n, \r, \v, 160) to spaces (ASCII 32) and then
compresses multiple space characters into only one.</para>
<para>It converts whitespace characters (32, \f, \t, \n, \r, \v, 160) to
spaces (ASCII 32) and then compresses multiple space characters into
only one.</para>
</section>
<section>
<title>cssDecode</title>
<para>Decodes CSS-encoded characters, as specified at <ulink
url="http://www.w3.org/TR/REC-CSS2/syndata.html">http://www.w3.org/TR/REC-CSS2/syndata.html</ulink>.
This function uses only up to two bytes in the decoding process, meaning
it is useful to uncover ASCII characters (that wouldn't normally be
encoded) encoded using CSS encoding, or to counter evasion which is a
combination of a backslash and non-hexadecimal characters (e.g.
<literal>ja\vascript</literal> is equivalent to
<literal>javascript</literal>).</para>
</section>
<section>
@ -6110,4 +6123,4 @@ Server: Apache/2.x.x
</section>
</section>
</section>
</article>
</article>