Change jsDecodeuni to jsDecode which also decodes all the other JS escapes. See #193.

This commit is contained in:
brectanus 2007-12-14 00:19:46 +00:00
parent b0de659133
commit 8aa31fd099
5 changed files with 113 additions and 45 deletions

View File

@ -1,7 +1,7 @@
12 Dec 2007 - 2.5.0-rc1 12 Dec 2007 - 2.5.0-rc1
----------------------- -----------------------
* Added t:jsDecodeUni to decode JavScript \uXXXX encoding. * Added t:jsDecode to decode JavScript escape sequences.
* Added IS_NEW and IS_EXPIRED built-in collection variables. * Added IS_NEW and IS_EXPIRED built-in collection variables.

View File

@ -16,10 +16,12 @@
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
/* NOTE: Be careful as this can only be used on static values for X. /* NOTE: Be careful as these can ONLY be used on static values for X.
* (i.e. VALID_HEX(c++) will NOT work) * (i.e. VALID_HEX(c++) will NOT work)
*/ */
#define VALID_HEX(X) (((X >= '0')&&(X <= '9')) || ((X >= 'a')&&(X <= 'f')) || ((X >= 'A')&&(X <= 'F'))) #define VALID_HEX(X) (((X >= '0')&&(X <= '9')) || ((X >= 'a')&&(X <= 'f')) || ((X >= 'A')&&(X <= 'F')))
#define ISODIGIT(X) ((X >= '0')&&(X <= '7'))
/** /**
* *
@ -68,6 +70,7 @@ int parse_name_eq_value(apr_pool_t *mp, const char *input, char **name, char **v
/** /**
* *
* IMP1 Assumes NUL-terminated
*/ */
char *url_encode(apr_pool_t *mp, char *input, unsigned int input_len) { char *url_encode(apr_pool_t *mp, char *input, unsigned int input_len) {
char *rval, *d; char *rval, *d;
@ -572,9 +575,11 @@ char *_log_escape(apr_pool_t *mp, const unsigned char *input, unsigned long int
} }
/** /**
* JavaScript \uXXXX decoding. * JavaScript decoding.
* IMP1 Assumes NUL-terminated
*/ */
int jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len) {
int js_decode_nonstrict_inplace(unsigned char *input, long int input_len) {
unsigned char *d = (unsigned char *)input; unsigned char *d = (unsigned char *)input;
long int i, count; long int i, count;
@ -585,12 +590,13 @@ int jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len)
if (input[i] == '\\') { if (input[i] == '\\') {
/* Character is an escape. */ /* Character is an escape. */
if ((i + 5 < input_len) && (input[i + 1] == 'u')) { if ( (i + 5 < input_len) && (input[i + 1] == 'u')
/* We have at least 4 data bytes. */ && (VALID_HEX(input[i + 2])) && (VALID_HEX(input[i + 3]))
if ( (VALID_HEX(input[i + 2])) && (VALID_HEX(input[i + 3]))
&& (VALID_HEX(input[i + 4])) && (VALID_HEX(input[i + 5])) ) && (VALID_HEX(input[i + 4])) && (VALID_HEX(input[i + 5])) )
{ {
/* We first make use of the lower byte here, ignoring the higher byte. */ /* \uHHHH */
/* Use only the lower byte. */
*d = x2c(&input[i + 4]); *d = x2c(&input[i + 4]);
/* Full width ASCII (ff01 - ff5e) needs 0x20 added */ /* Full width ASCII (ff01 - ff5e) needs 0x20 added */
@ -605,18 +611,71 @@ int jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len)
count++; count++;
i += 6; i += 6;
} }
else { else if ((i + 3 < input_len) && (input[i + 1] == 'x')) {
/* Invalid data. */ /* \xHH */
int j; *d++ = x2c(&input[i + 2]);
count++;
i += 4;
}
else if ((i + 1 < input_len) && ISODIGIT(input[i + 1])) {
/* \OOO (only one byte, \000 - \377) */
char buf[4];
int j = 0;
for(j = 0; (j < 6)&&(i < input_len); j++) { while((i + 1 + j < input_len)&&(j < 3)) {
*d++ = input[i++]; buf[j] = input[i + 1 + j];
j++;
if (!ISODIGIT(input[i + 1 + j])) break;
}
buf[j] = '\0';
if (j > 0) {
/* Do not use 3 characters if we will be > 1 byte */
if ((j == 3) && (buf[0] > '3')) {
j = 2;
buf[j] = '\0';
}
*d++ = strtol(buf, NULL, 8);
i += 1 + j;
count++; count++;
} }
} }
else if (i + 1 < input_len) {
/* \C */
unsigned char c = input[i + 1];
switch(input[i + 1]) {
case 'a' :
c = '\a';
break;
case 'b' :
c = '\b';
break;
case 'f' :
c = '\f';
break;
case 'n' :
c = '\n';
break;
case 'r' :
c = '\r';
break;
case 't' :
c = '\t';
break;
case 'v' :
c = '\v';
break;
/* The remaining (\?,\\,\',\") are just a removal
* of the escape char which is default.
*/
}
*d++ = c;
i += 2;
count++;
} }
else { else {
/* Not enough bytes available (4 data bytes were needed). */ /* Not enough bytes */
while(i < input_len) { while(i < input_len) {
*d++ = input[i++]; *d++ = input[i++];
count++; count++;
@ -636,6 +695,7 @@ int jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len)
/** /**
* *
* IMP1 Assumes NUL-terminated
*/ */
int urldecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len) { int urldecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len) {
unsigned char *d = input; unsigned char *d = input;
@ -745,6 +805,7 @@ int urldecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len)
/** /**
* *
* IMP1 Assumes NUL-terminated
*/ */
int urldecode_nonstrict_inplace_ex(unsigned char *input, long int input_len, int *invalid_count) { int urldecode_nonstrict_inplace_ex(unsigned char *input, long int input_len, int *invalid_count) {
unsigned char *d = (unsigned char *)input; unsigned char *d = (unsigned char *)input;
@ -809,6 +870,7 @@ int urldecode_nonstrict_inplace_ex(unsigned char *input, long int input_len, int
/** /**
* *
* IMP1 Assumes NUL-terminated
*/ */
int html_entities_decode_inplace(apr_pool_t *mp, unsigned char *input, int input_len) { int html_entities_decode_inplace(apr_pool_t *mp, unsigned char *input, int input_len) {
unsigned char *d = input; unsigned char *d = input;
@ -884,6 +946,7 @@ int html_entities_decode_inplace(apr_pool_t *mp, unsigned char *input, int input
char *x = apr_pstrmemdup(mp, (const char *)&input[k], j - k); char *x = apr_pstrmemdup(mp, (const char *)&input[k], j - k);
/* Decode the entity. */ /* Decode the entity. */
/* ENH What about others? */
if (strcasecmp(x, "quot") == 0) *d++ = '"'; if (strcasecmp(x, "quot") == 0) *d++ = '"';
else else
if (strcasecmp(x, "amp") == 0) *d++ = '&'; if (strcasecmp(x, "amp") == 0) *d++ = '&';
@ -923,8 +986,10 @@ int html_entities_decode_inplace(apr_pool_t *mp, unsigned char *input, int input
return count; return count;
} }
#define ISODIGIT(X) ((X >= '0')&&(X <= '7')) /**
*
* IMP1 Assumes NUL-terminated
*/
int ansi_c_sequences_decode_inplace(unsigned char *input, int input_len) { int ansi_c_sequences_decode_inplace(unsigned char *input, int input_len) {
unsigned char *d = input; unsigned char *d = input;
int i, count; int i, count;
@ -1032,6 +1097,10 @@ int ansi_c_sequences_decode_inplace(unsigned char *input, int input_len) {
return count; return count;
} }
/**
*
* IMP1 Assumes NUL-terminated
*/
int normalise_path_inplace(unsigned char *input, int input_len, int win) { int normalise_path_inplace(unsigned char *input, int input_len, int win) {
unsigned char *d = input; unsigned char *d = input;
int i, count; int i, count;

View File

@ -35,7 +35,7 @@ int DSOLOCAL is_token_char(unsigned char c);
int DSOLOCAL remove_lf_crlf_inplace(char *text); int DSOLOCAL remove_lf_crlf_inplace(char *text);
unsigned DSOLOCAL char x2c(unsigned char *what); unsigned char DSOLOCAL x2c(unsigned char *what);
char DSOLOCAL *guess_tmp_dir(apr_pool_t *p); char DSOLOCAL *guess_tmp_dir(apr_pool_t *p);
@ -66,7 +66,7 @@ char DSOLOCAL *log_escape_raw(apr_pool_t *mp, const unsigned char *text, unsigne
char DSOLOCAL *_log_escape(apr_pool_t *p, const unsigned char *input, char DSOLOCAL *_log_escape(apr_pool_t *p, const unsigned char *input,
unsigned long int input_length, int escape_quotes, int escape_colon); unsigned long int input_length, int escape_quotes, int escape_colon);
int DSOLOCAL jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len); int DSOLOCAL js_decode_nonstrict_inplace(unsigned char *input, long int input_len);
int DSOLOCAL urldecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_length); int DSOLOCAL urldecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_length);

View File

@ -251,14 +251,14 @@ static int msre_fn_replaceComments_execute(apr_pool_t *mptmp, unsigned char *inp
return changed; return changed;
} }
/* jsDecodeUni */ /* jsDecode */
static int msre_fn_jsDecodeUni_execute(apr_pool_t *mptmp, unsigned char *input, static int msre_fn_jsDecode_execute(apr_pool_t *mptmp, unsigned char *input,
long int input_len, char **rval, long int *rval_len) long int input_len, char **rval, long int *rval_len)
{ {
long int length; long int length;
length = jsdecode_uni_nonstrict_inplace_ex(input, input_len); length = js_decode_nonstrict_inplace(input, input_len);
*rval = (char *)input; *rval = (char *)input;
*rval_len = length; *rval_len = length;
@ -518,10 +518,10 @@ void msre_engine_register_default_tfns(msre_engine *engine) {
msre_fn_htmlEntityDecode_execute msre_fn_htmlEntityDecode_execute
); );
/* jsDecodeUni */ /* jsDecode */
msre_engine_tfn_register(engine, msre_engine_tfn_register(engine,
"jsDecodeUni", "jsDecode",
msre_fn_jsDecodeUni_execute msre_fn_jsDecode_execute
); );
/* length */ /* length */

View File

@ -3369,13 +3369,12 @@ SecRule <emphasis>XML:/xq:employees/employee/name/text()</emphasis> Fred \
</section> </section>
<section> <section>
<title><literal>jsDecodeUni</literal></title> <title><literal>jsDecode</literal></title>
<para>Decodes <literal moreinfo="none">\uXXXX</literal> JavaScript <para>Decodes JavaScript escape sequences. If a \uHHHH code is in the
encoding. If the code is in the range of FF01-FF5E (the full width ASCII range of FF01-FF5E (the full width ASCII codes), then the higher byte is
codes), then the higher byte is used to detect and adjust the lower used to detect and adjust the lower byte. Otherwise, only the lower byte
byte. Otherwise, only the lower byte will be used and the higher byte will be used and the higher byte zeroed.</para>
zeroed.</para>
</section> </section>
<section> <section>