Change jsDecodeuni to jsDecode which also decodes all the other JS escapes. See #193.

This commit is contained in:
brectanus 2007-12-14 00:19:46 +00:00
parent b0de659133
commit 8aa31fd099
5 changed files with 113 additions and 45 deletions

View File

@ -1,7 +1,7 @@
12 Dec 2007 - 2.5.0-rc1
-----------------------
* Added t:jsDecodeUni to decode JavScript \uXXXX encoding.
* Added t:jsDecode to decode JavScript escape sequences.
* Added IS_NEW and IS_EXPIRED built-in collection variables.

View File

@ -16,10 +16,12 @@
#include <sys/types.h>
#include <sys/stat.h>
/* NOTE: Be careful as this can only be used on static values for X.
/* NOTE: Be careful as these can ONLY be used on static values for X.
* (i.e. VALID_HEX(c++) will NOT work)
*/
#define VALID_HEX(X) (((X >= '0')&&(X <= '9')) || ((X >= 'a')&&(X <= 'f')) || ((X >= 'A')&&(X <= 'F')))
#define ISODIGIT(X) ((X >= '0')&&(X <= '7'))
/**
*
@ -68,6 +70,7 @@ int parse_name_eq_value(apr_pool_t *mp, const char *input, char **name, char **v
/**
*
* IMP1 Assumes NUL-terminated
*/
char *url_encode(apr_pool_t *mp, char *input, unsigned int input_len) {
char *rval, *d;
@ -572,9 +575,11 @@ char *_log_escape(apr_pool_t *mp, const unsigned char *input, unsigned long int
}
/**
* JavaScript \uXXXX decoding.
* JavaScript decoding.
* IMP1 Assumes NUL-terminated
*/
int jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len) {
int js_decode_nonstrict_inplace(unsigned char *input, long int input_len) {
unsigned char *d = (unsigned char *)input;
long int i, count;
@ -585,38 +590,92 @@ int jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len)
if (input[i] == '\\') {
/* Character is an escape. */
if ((i + 5 < input_len) && (input[i + 1] == 'u')) {
/* We have at least 4 data bytes. */
if ( (VALID_HEX(input[i + 2])) && (VALID_HEX(input[i + 3]))
&& (VALID_HEX(input[i + 4])) && (VALID_HEX(input[i + 5])) )
if ( (i + 5 < input_len) && (input[i + 1] == 'u')
&& (VALID_HEX(input[i + 2])) && (VALID_HEX(input[i + 3]))
&& (VALID_HEX(input[i + 4])) && (VALID_HEX(input[i + 5])) )
{
/* \uHHHH */
/* Use only the lower byte. */
*d = x2c(&input[i + 4]);
/* Full width ASCII (ff01 - ff5e) needs 0x20 added */
if ( (*d > 0x00) && (*d < 0x5f)
&& ((input[i + 2] == 'f') || (input[i + 2] == 'F'))
&& ((input[i + 3] == 'f') || (input[i + 3] == 'F')))
{
/* We first make use of the lower byte here, ignoring the higher byte. */
*d = x2c(&input[i + 4]);
/* Full width ASCII (ff01 - ff5e) needs 0x20 added */
if ( (*d > 0x00) && (*d < 0x5f)
&& ((input[i + 2] == 'f') || (input[i + 2] == 'F'))
&& ((input[i + 3] == 'f') || (input[i + 3] == 'F')))
{
(*d) += 0x20;
}
d++;
count++;
i += 6;
(*d) += 0x20;
}
else {
/* Invalid data. */
int j;
for(j = 0; (j < 6)&&(i < input_len); j++) {
*d++ = input[i++];
count++;
d++;
count++;
i += 6;
}
else if ((i + 3 < input_len) && (input[i + 1] == 'x')) {
/* \xHH */
*d++ = x2c(&input[i + 2]);
count++;
i += 4;
}
else if ((i + 1 < input_len) && ISODIGIT(input[i + 1])) {
/* \OOO (only one byte, \000 - \377) */
char buf[4];
int j = 0;
while((i + 1 + j < input_len)&&(j < 3)) {
buf[j] = input[i + 1 + j];
j++;
if (!ISODIGIT(input[i + 1 + j])) break;
}
buf[j] = '\0';
if (j > 0) {
/* Do not use 3 characters if we will be > 1 byte */
if ((j == 3) && (buf[0] > '3')) {
j = 2;
buf[j] = '\0';
}
*d++ = strtol(buf, NULL, 8);
i += 1 + j;
count++;
}
}
else if (i + 1 < input_len) {
/* \C */
unsigned char c = input[i + 1];
switch(input[i + 1]) {
case 'a' :
c = '\a';
break;
case 'b' :
c = '\b';
break;
case 'f' :
c = '\f';
break;
case 'n' :
c = '\n';
break;
case 'r' :
c = '\r';
break;
case 't' :
c = '\t';
break;
case 'v' :
c = '\v';
break;
/* The remaining (\?,\\,\',\") are just a removal
* of the escape char which is default.
*/
}
*d++ = c;
i += 2;
count++;
}
else {
/* Not enough bytes available (4 data bytes were needed). */
/* Not enough bytes */
while(i < input_len) {
*d++ = input[i++];
count++;
@ -636,6 +695,7 @@ int jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len)
/**
*
* IMP1 Assumes NUL-terminated
*/
int urldecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len) {
unsigned char *d = input;
@ -745,6 +805,7 @@ int urldecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len)
/**
*
* IMP1 Assumes NUL-terminated
*/
int urldecode_nonstrict_inplace_ex(unsigned char *input, long int input_len, int *invalid_count) {
unsigned char *d = (unsigned char *)input;
@ -809,6 +870,7 @@ int urldecode_nonstrict_inplace_ex(unsigned char *input, long int input_len, int
/**
*
* IMP1 Assumes NUL-terminated
*/
int html_entities_decode_inplace(apr_pool_t *mp, unsigned char *input, int input_len) {
unsigned char *d = input;
@ -884,6 +946,7 @@ int html_entities_decode_inplace(apr_pool_t *mp, unsigned char *input, int input
char *x = apr_pstrmemdup(mp, (const char *)&input[k], j - k);
/* Decode the entity. */
/* ENH What about others? */
if (strcasecmp(x, "quot") == 0) *d++ = '"';
else
if (strcasecmp(x, "amp") == 0) *d++ = '&';
@ -923,8 +986,10 @@ int html_entities_decode_inplace(apr_pool_t *mp, unsigned char *input, int input
return count;
}
#define ISODIGIT(X) ((X >= '0')&&(X <= '7'))
/**
*
* IMP1 Assumes NUL-terminated
*/
int ansi_c_sequences_decode_inplace(unsigned char *input, int input_len) {
unsigned char *d = input;
int i, count;
@ -1032,6 +1097,10 @@ int ansi_c_sequences_decode_inplace(unsigned char *input, int input_len) {
return count;
}
/**
*
* IMP1 Assumes NUL-terminated
*/
int normalise_path_inplace(unsigned char *input, int input_len, int win) {
unsigned char *d = input;
int i, count;

View File

@ -35,7 +35,7 @@ int DSOLOCAL is_token_char(unsigned char c);
int DSOLOCAL remove_lf_crlf_inplace(char *text);
unsigned DSOLOCAL char x2c(unsigned char *what);
unsigned char DSOLOCAL x2c(unsigned char *what);
char DSOLOCAL *guess_tmp_dir(apr_pool_t *p);
@ -66,7 +66,7 @@ char DSOLOCAL *log_escape_raw(apr_pool_t *mp, const unsigned char *text, unsigne
char DSOLOCAL *_log_escape(apr_pool_t *p, const unsigned char *input,
unsigned long int input_length, int escape_quotes, int escape_colon);
int DSOLOCAL jsdecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len);
int DSOLOCAL js_decode_nonstrict_inplace(unsigned char *input, long int input_len);
int DSOLOCAL urldecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_length);

View File

@ -251,14 +251,14 @@ static int msre_fn_replaceComments_execute(apr_pool_t *mptmp, unsigned char *inp
return changed;
}
/* jsDecodeUni */
/* jsDecode */
static int msre_fn_jsDecodeUni_execute(apr_pool_t *mptmp, unsigned char *input,
static int msre_fn_jsDecode_execute(apr_pool_t *mptmp, unsigned char *input,
long int input_len, char **rval, long int *rval_len)
{
long int length;
length = jsdecode_uni_nonstrict_inplace_ex(input, input_len);
length = js_decode_nonstrict_inplace(input, input_len);
*rval = (char *)input;
*rval_len = length;
@ -518,10 +518,10 @@ void msre_engine_register_default_tfns(msre_engine *engine) {
msre_fn_htmlEntityDecode_execute
);
/* jsDecodeUni */
/* jsDecode */
msre_engine_tfn_register(engine,
"jsDecodeUni",
msre_fn_jsDecodeUni_execute
"jsDecode",
msre_fn_jsDecode_execute
);
/* length */

View File

@ -3369,13 +3369,12 @@ SecRule <emphasis>XML:/xq:employees/employee/name/text()</emphasis> Fred \
</section>
<section>
<title><literal>jsDecodeUni</literal></title>
<title><literal>jsDecode</literal></title>
<para>Decodes <literal moreinfo="none">\uXXXX</literal> JavaScript
encoding. If the code is in the range of FF01-FF5E (the full width ASCII
codes), then the higher byte is used to detect and adjust the lower
byte. Otherwise, only the lower byte will be used and the higher byte
zeroed.</para>
<para>Decodes JavaScript escape sequences. If a \uHHHH code is in the
range of FF01-FF5E (the full width ASCII codes), then the higher byte is
used to detect and adjust the lower byte. Otherwise, only the lower byte
will be used and the higher byte zeroed.</para>
</section>
<section>