mirror of
https://github.com/bellard/quickjs.git
synced 2025-12-31 05:39:10 +03:00
added regexp duplicate named groups - fixed reset of captures with quantizers
This commit is contained in:
@@ -6,6 +6,7 @@
|
|||||||
- added Atomics.pause
|
- added Atomics.pause
|
||||||
- added added Map and WeakMap upsert methods
|
- added added Map and WeakMap upsert methods
|
||||||
- added Math.sumPrecise()
|
- added Math.sumPrecise()
|
||||||
|
- added regexp duplicate named groups
|
||||||
- misc bug fixes
|
- misc bug fixes
|
||||||
|
|
||||||
2025-09-13:
|
2025-09-13:
|
||||||
|
|||||||
2
TODO
2
TODO
@@ -63,4 +63,4 @@ Test262o: 0/11262 errors, 463 excluded
|
|||||||
Test262o commit: 7da91bceb9ce7613f87db47ddd1292a2dda58b42 (es5-tests branch)
|
Test262o commit: 7da91bceb9ce7613f87db47ddd1292a2dda58b42 (es5-tests branch)
|
||||||
|
|
||||||
Test262:
|
Test262:
|
||||||
Result: 72/83257 errors, 2590 excluded, 5786 skipped
|
Result: 66/83295 errors, 2590 excluded, 5767 skipped
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ DEF(word_boundary, 1)
|
|||||||
DEF(word_boundary_i, 1)
|
DEF(word_boundary_i, 1)
|
||||||
DEF(not_word_boundary, 1)
|
DEF(not_word_boundary, 1)
|
||||||
DEF(not_word_boundary_i, 1)
|
DEF(not_word_boundary_i, 1)
|
||||||
DEF(back_reference, 2)
|
DEF(back_reference, 2) /* variable length */
|
||||||
DEF(back_reference_i, 2) /* must come after */
|
DEF(back_reference_i, 2) /* must come after */
|
||||||
DEF(backward_back_reference, 2) /* must come after */
|
DEF(backward_back_reference, 2) /* must come after */
|
||||||
DEF(backward_back_reference_i, 2) /* must come after */
|
DEF(backward_back_reference_i, 2) /* must come after */
|
||||||
|
|||||||
207
libregexp.c
207
libregexp.c
@@ -77,6 +77,7 @@ typedef struct {
|
|||||||
BOOL ignore_case;
|
BOOL ignore_case;
|
||||||
BOOL multi_line;
|
BOOL multi_line;
|
||||||
BOOL dotall;
|
BOOL dotall;
|
||||||
|
uint8_t group_name_scope;
|
||||||
int capture_count;
|
int capture_count;
|
||||||
int total_capture_count; /* -1 = not computed yet */
|
int total_capture_count; /* -1 = not computed yet */
|
||||||
int has_named_captures; /* -1 = don't know, 0 = no, 1 = yes */
|
int has_named_captures; /* -1 = don't know, 0 = no, 1 = yes */
|
||||||
@@ -478,7 +479,7 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
|
|||||||
if (i != 1)
|
if (i != 1)
|
||||||
printf(",");
|
printf(",");
|
||||||
printf("<%s>", p);
|
printf("<%s>", p);
|
||||||
p += strlen(p) + 1;
|
p += strlen(p) + LRE_GROUP_NAME_TRAILER_LEN;
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
assert(p == (char *)(buf + buf_len));
|
assert(p == (char *)(buf + buf_len));
|
||||||
@@ -547,11 +548,22 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
|
|||||||
break;
|
break;
|
||||||
case REOP_save_start:
|
case REOP_save_start:
|
||||||
case REOP_save_end:
|
case REOP_save_end:
|
||||||
|
printf(" %u", buf[pos + 1]);
|
||||||
|
break;
|
||||||
case REOP_back_reference:
|
case REOP_back_reference:
|
||||||
case REOP_back_reference_i:
|
case REOP_back_reference_i:
|
||||||
case REOP_backward_back_reference:
|
case REOP_backward_back_reference:
|
||||||
case REOP_backward_back_reference_i:
|
case REOP_backward_back_reference_i:
|
||||||
printf(" %u", buf[pos + 1]);
|
{
|
||||||
|
int n, i;
|
||||||
|
n = buf[pos + 1];
|
||||||
|
len += n;
|
||||||
|
for(i = 0; i < n; i++) {
|
||||||
|
if (i != 0)
|
||||||
|
printf(",");
|
||||||
|
printf(" %u", buf[pos + 2 + i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case REOP_save_reset:
|
case REOP_save_reset:
|
||||||
printf(" %u %u", buf[pos + 1], buf[pos + 2]);
|
printf(" %u %u", buf[pos + 1], buf[pos + 2]);
|
||||||
@@ -1531,17 +1543,18 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return:
|
/* need_check_adv: false if the opcodes always advance the char pointer
|
||||||
- true if the opcodes may not advance the char pointer
|
need_capture_init: true if all the captures in the atom are not set
|
||||||
- false if the opcodes always advance the char pointer
|
|
||||||
*/
|
*/
|
||||||
static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len)
|
static BOOL re_need_check_adv_and_capture_init(BOOL *pneed_capture_init,
|
||||||
|
const uint8_t *bc_buf, int bc_buf_len)
|
||||||
{
|
{
|
||||||
int pos, opcode, len;
|
int pos, opcode, len;
|
||||||
uint32_t val;
|
uint32_t val;
|
||||||
BOOL ret;
|
BOOL need_check_adv, need_capture_init;
|
||||||
|
|
||||||
ret = TRUE;
|
need_check_adv = TRUE;
|
||||||
|
need_capture_init = FALSE;
|
||||||
pos = 0;
|
pos = 0;
|
||||||
while (pos < bc_buf_len) {
|
while (pos < bc_buf_len) {
|
||||||
opcode = bc_buf[pos];
|
opcode = bc_buf[pos];
|
||||||
@@ -1551,20 +1564,21 @@ static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len)
|
|||||||
case REOP_range_i:
|
case REOP_range_i:
|
||||||
val = get_u16(bc_buf + pos + 1);
|
val = get_u16(bc_buf + pos + 1);
|
||||||
len += val * 4;
|
len += val * 4;
|
||||||
goto simple_char;
|
need_check_adv = FALSE;
|
||||||
|
break;
|
||||||
case REOP_range32:
|
case REOP_range32:
|
||||||
case REOP_range32_i:
|
case REOP_range32_i:
|
||||||
val = get_u16(bc_buf + pos + 1);
|
val = get_u16(bc_buf + pos + 1);
|
||||||
len += val * 8;
|
len += val * 8;
|
||||||
goto simple_char;
|
need_check_adv = FALSE;
|
||||||
|
break;
|
||||||
case REOP_char:
|
case REOP_char:
|
||||||
case REOP_char_i:
|
case REOP_char_i:
|
||||||
case REOP_char32:
|
case REOP_char32:
|
||||||
case REOP_char32_i:
|
case REOP_char32_i:
|
||||||
case REOP_dot:
|
case REOP_dot:
|
||||||
case REOP_any:
|
case REOP_any:
|
||||||
simple_char:
|
need_check_adv = FALSE;
|
||||||
ret = FALSE;
|
|
||||||
break;
|
break;
|
||||||
case REOP_line_start:
|
case REOP_line_start:
|
||||||
case REOP_line_start_m:
|
case REOP_line_start_m:
|
||||||
@@ -1582,18 +1596,25 @@ static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len)
|
|||||||
case REOP_save_start:
|
case REOP_save_start:
|
||||||
case REOP_save_end:
|
case REOP_save_end:
|
||||||
case REOP_save_reset:
|
case REOP_save_reset:
|
||||||
|
break;
|
||||||
case REOP_back_reference:
|
case REOP_back_reference:
|
||||||
case REOP_back_reference_i:
|
case REOP_back_reference_i:
|
||||||
case REOP_backward_back_reference:
|
case REOP_backward_back_reference:
|
||||||
case REOP_backward_back_reference_i:
|
case REOP_backward_back_reference_i:
|
||||||
|
val = bc_buf[pos + 1];
|
||||||
|
len += val;
|
||||||
|
need_capture_init = TRUE;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
/* safe behavior: we cannot predict the outcome */
|
/* safe behavior: we cannot predict the outcome */
|
||||||
return TRUE;
|
need_capture_init = TRUE;
|
||||||
|
goto done;
|
||||||
}
|
}
|
||||||
pos += len;
|
pos += len;
|
||||||
}
|
}
|
||||||
return ret;
|
done:
|
||||||
|
*pneed_capture_init = need_capture_init;
|
||||||
|
return need_check_adv;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* '*pp' is the first char after '<' */
|
/* '*pp' is the first char after '<' */
|
||||||
@@ -1652,16 +1673,16 @@ static int re_parse_group_name(char *buf, int buf_size, const uint8_t **pp)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* if capture_name = NULL: return the number of captures + 1.
|
/* if capture_name = NULL: return the number of captures + 1.
|
||||||
Otherwise, return the capture index corresponding to capture_name
|
Otherwise, return the number of matching capture groups */
|
||||||
or -1 if none */
|
|
||||||
static int re_parse_captures(REParseState *s, int *phas_named_captures,
|
static int re_parse_captures(REParseState *s, int *phas_named_captures,
|
||||||
const char *capture_name)
|
const char *capture_name, BOOL emit_group_index)
|
||||||
{
|
{
|
||||||
const uint8_t *p;
|
const uint8_t *p;
|
||||||
int capture_index;
|
int capture_index, n;
|
||||||
char name[TMP_BUF_SIZE];
|
char name[TMP_BUF_SIZE];
|
||||||
|
|
||||||
capture_index = 1;
|
capture_index = 1;
|
||||||
|
n = 0;
|
||||||
*phas_named_captures = 0;
|
*phas_named_captures = 0;
|
||||||
for (p = s->buf_start; p < s->buf_end; p++) {
|
for (p = s->buf_start; p < s->buf_end; p++) {
|
||||||
switch (*p) {
|
switch (*p) {
|
||||||
@@ -1673,8 +1694,11 @@ static int re_parse_captures(REParseState *s, int *phas_named_captures,
|
|||||||
if (capture_name) {
|
if (capture_name) {
|
||||||
p += 3;
|
p += 3;
|
||||||
if (re_parse_group_name(name, sizeof(name), &p) == 0) {
|
if (re_parse_group_name(name, sizeof(name), &p) == 0) {
|
||||||
if (!strcmp(name, capture_name))
|
if (!strcmp(name, capture_name)) {
|
||||||
return capture_index;
|
if (emit_group_index)
|
||||||
|
dbuf_putc(&s->byte_code, capture_index);
|
||||||
|
n++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
capture_index++;
|
capture_index++;
|
||||||
@@ -1699,17 +1723,18 @@ static int re_parse_captures(REParseState *s, int *phas_named_captures,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
done:
|
done:
|
||||||
if (capture_name)
|
if (capture_name) {
|
||||||
return -1;
|
return n;
|
||||||
else
|
} else {
|
||||||
return capture_index;
|
return capture_index;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int re_count_captures(REParseState *s)
|
static int re_count_captures(REParseState *s)
|
||||||
{
|
{
|
||||||
if (s->total_capture_count < 0) {
|
if (s->total_capture_count < 0) {
|
||||||
s->total_capture_count = re_parse_captures(s, &s->has_named_captures,
|
s->total_capture_count = re_parse_captures(s, &s->has_named_captures,
|
||||||
NULL);
|
NULL, FALSE);
|
||||||
}
|
}
|
||||||
return s->total_capture_count;
|
return s->total_capture_count;
|
||||||
}
|
}
|
||||||
@@ -1721,25 +1746,53 @@ static BOOL re_has_named_captures(REParseState *s)
|
|||||||
return s->has_named_captures;
|
return s->has_named_captures;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int find_group_name(REParseState *s, const char *name)
|
static int find_group_name(REParseState *s, const char *name, BOOL emit_group_index)
|
||||||
{
|
{
|
||||||
const char *p, *buf_end;
|
const char *p, *buf_end;
|
||||||
size_t len, name_len;
|
size_t len, name_len;
|
||||||
int capture_index;
|
int capture_index, n;
|
||||||
|
|
||||||
p = (char *)s->group_names.buf;
|
p = (char *)s->group_names.buf;
|
||||||
if (!p) return -1;
|
if (!p)
|
||||||
|
return 0;
|
||||||
buf_end = (char *)s->group_names.buf + s->group_names.size;
|
buf_end = (char *)s->group_names.buf + s->group_names.size;
|
||||||
name_len = strlen(name);
|
name_len = strlen(name);
|
||||||
capture_index = 1;
|
capture_index = 1;
|
||||||
|
n = 0;
|
||||||
while (p < buf_end) {
|
while (p < buf_end) {
|
||||||
len = strlen(p);
|
len = strlen(p);
|
||||||
if (len == name_len && memcmp(name, p, name_len) == 0)
|
if (len == name_len && memcmp(name, p, name_len) == 0) {
|
||||||
return capture_index;
|
if (emit_group_index)
|
||||||
p += len + 1;
|
dbuf_putc(&s->byte_code, capture_index);
|
||||||
|
n++;
|
||||||
|
}
|
||||||
|
p += len + LRE_GROUP_NAME_TRAILER_LEN;
|
||||||
capture_index++;
|
capture_index++;
|
||||||
}
|
}
|
||||||
return -1;
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
static BOOL is_duplicate_group_name(REParseState *s, const char *name, int scope)
|
||||||
|
{
|
||||||
|
const char *p, *buf_end;
|
||||||
|
size_t len, name_len;
|
||||||
|
int scope1;
|
||||||
|
|
||||||
|
p = (char *)s->group_names.buf;
|
||||||
|
if (!p)
|
||||||
|
return 0;
|
||||||
|
buf_end = (char *)s->group_names.buf + s->group_names.size;
|
||||||
|
name_len = strlen(name);
|
||||||
|
while (p < buf_end) {
|
||||||
|
len = strlen(p);
|
||||||
|
if (len == name_len && memcmp(name, p, name_len) == 0) {
|
||||||
|
scope1 = (uint8_t)p[len + 1];
|
||||||
|
if (scope == scope1)
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
p += len + LRE_GROUP_NAME_TRAILER_LEN;
|
||||||
|
}
|
||||||
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir);
|
static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir);
|
||||||
@@ -1783,7 +1836,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||||||
{
|
{
|
||||||
const uint8_t *p;
|
const uint8_t *p;
|
||||||
int c, last_atom_start, quant_min, quant_max, last_capture_count;
|
int c, last_atom_start, quant_min, quant_max, last_capture_count;
|
||||||
BOOL greedy, add_zero_advance_check, is_neg, is_backward_lookahead;
|
BOOL greedy, is_neg, is_backward_lookahead;
|
||||||
REStringList cr_s, *cr = &cr_s;
|
REStringList cr_s, *cr = &cr_s;
|
||||||
|
|
||||||
last_atom_start = -1;
|
last_atom_start = -1;
|
||||||
@@ -1922,12 +1975,16 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||||||
&p)) {
|
&p)) {
|
||||||
return re_parse_error(s, "invalid group name");
|
return re_parse_error(s, "invalid group name");
|
||||||
}
|
}
|
||||||
if (find_group_name(s, s->u.tmp_buf) > 0) {
|
/* poor's man method to test duplicate group
|
||||||
|
names. */
|
||||||
|
/* XXX: this method does not catch all the errors*/
|
||||||
|
if (is_duplicate_group_name(s, s->u.tmp_buf, s->group_name_scope)) {
|
||||||
return re_parse_error(s, "duplicate group name");
|
return re_parse_error(s, "duplicate group name");
|
||||||
}
|
}
|
||||||
/* group name with a trailing zero */
|
/* group name with a trailing zero */
|
||||||
dbuf_put(&s->group_names, (uint8_t *)s->u.tmp_buf,
|
dbuf_put(&s->group_names, (uint8_t *)s->u.tmp_buf,
|
||||||
strlen(s->u.tmp_buf) + 1);
|
strlen(s->u.tmp_buf) + 1);
|
||||||
|
dbuf_putc(&s->group_names, s->group_name_scope);
|
||||||
s->has_named_captures = 1;
|
s->has_named_captures = 1;
|
||||||
goto parse_capture;
|
goto parse_capture;
|
||||||
} else {
|
} else {
|
||||||
@@ -1938,6 +1995,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||||||
p++;
|
p++;
|
||||||
/* capture without group name */
|
/* capture without group name */
|
||||||
dbuf_putc(&s->group_names, 0);
|
dbuf_putc(&s->group_names, 0);
|
||||||
|
dbuf_putc(&s->group_names, 0);
|
||||||
parse_capture:
|
parse_capture:
|
||||||
if (s->capture_count >= CAPTURE_COUNT_MAX)
|
if (s->capture_count >= CAPTURE_COUNT_MAX)
|
||||||
return re_parse_error(s, "too many captures");
|
return re_parse_error(s, "too many captures");
|
||||||
@@ -1973,7 +2031,8 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||||||
case 'k':
|
case 'k':
|
||||||
{
|
{
|
||||||
const uint8_t *p1;
|
const uint8_t *p1;
|
||||||
int dummy_res;
|
int dummy_res, n;
|
||||||
|
BOOL is_forward;
|
||||||
|
|
||||||
p1 = p;
|
p1 = p;
|
||||||
if (p1[2] != '<') {
|
if (p1[2] != '<') {
|
||||||
@@ -1993,21 +2052,33 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||||||
else
|
else
|
||||||
goto parse_class_atom;
|
goto parse_class_atom;
|
||||||
}
|
}
|
||||||
c = find_group_name(s, s->u.tmp_buf);
|
is_forward = FALSE;
|
||||||
if (c < 0) {
|
n = find_group_name(s, s->u.tmp_buf, FALSE);
|
||||||
|
if (n == 0) {
|
||||||
/* no capture name parsed before, try to look
|
/* no capture name parsed before, try to look
|
||||||
after (inefficient, but hopefully not common */
|
after (inefficient, but hopefully not common */
|
||||||
c = re_parse_captures(s, &dummy_res, s->u.tmp_buf);
|
n = re_parse_captures(s, &dummy_res, s->u.tmp_buf, FALSE);
|
||||||
if (c < 0) {
|
if (n == 0) {
|
||||||
if (s->is_unicode || re_has_named_captures(s))
|
if (s->is_unicode || re_has_named_captures(s))
|
||||||
return re_parse_error(s, "group name not defined");
|
return re_parse_error(s, "group name not defined");
|
||||||
else
|
else
|
||||||
goto parse_class_atom;
|
goto parse_class_atom;
|
||||||
}
|
}
|
||||||
|
is_forward = TRUE;
|
||||||
|
}
|
||||||
|
last_atom_start = s->byte_code.size;
|
||||||
|
last_capture_count = s->capture_count;
|
||||||
|
|
||||||
|
/* emit back references to all the captures indexes matching the group name */
|
||||||
|
re_emit_op_u8(s, REOP_back_reference + 2 * is_backward_dir + s->ignore_case, n);
|
||||||
|
if (is_forward) {
|
||||||
|
re_parse_captures(s, &dummy_res, s->u.tmp_buf, TRUE);
|
||||||
|
} else {
|
||||||
|
find_group_name(s, s->u.tmp_buf, TRUE);
|
||||||
}
|
}
|
||||||
p = p1;
|
p = p1;
|
||||||
}
|
}
|
||||||
goto emit_back_reference;
|
break;
|
||||||
case '0':
|
case '0':
|
||||||
p += 2;
|
p += 2;
|
||||||
c = 0;
|
c = 0;
|
||||||
@@ -2053,11 +2124,11 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||||||
}
|
}
|
||||||
return re_parse_error(s, "back reference out of range in regular expression");
|
return re_parse_error(s, "back reference out of range in regular expression");
|
||||||
}
|
}
|
||||||
emit_back_reference:
|
|
||||||
last_atom_start = s->byte_code.size;
|
last_atom_start = s->byte_code.size;
|
||||||
last_capture_count = s->capture_count;
|
last_capture_count = s->capture_count;
|
||||||
|
|
||||||
re_emit_op_u8(s, REOP_back_reference + 2 * is_backward_dir + s->ignore_case, c);
|
re_emit_op_u8(s, REOP_back_reference + 2 * is_backward_dir + s->ignore_case, 1);
|
||||||
|
dbuf_putc(&s->byte_code, c);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@@ -2166,20 +2237,39 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||||||
if (last_atom_start < 0) {
|
if (last_atom_start < 0) {
|
||||||
return re_parse_error(s, "nothing to repeat");
|
return re_parse_error(s, "nothing to repeat");
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
BOOL need_capture_init, add_zero_advance_check;
|
||||||
|
int len, pos;
|
||||||
|
|
||||||
/* the spec tells that if there is no advance when
|
/* the spec tells that if there is no advance when
|
||||||
running the atom after the first quant_min times,
|
running the atom after the first quant_min times,
|
||||||
then there is no match. We remove this test when we
|
then there is no match. We remove this test when we
|
||||||
are sure the atom always advances the position. */
|
are sure the atom always advances the position. */
|
||||||
add_zero_advance_check = re_need_check_advance(s->byte_code.buf + last_atom_start,
|
add_zero_advance_check =
|
||||||
|
re_need_check_adv_and_capture_init(&need_capture_init,
|
||||||
|
s->byte_code.buf + last_atom_start,
|
||||||
s->byte_code.size - last_atom_start);
|
s->byte_code.size - last_atom_start);
|
||||||
|
|
||||||
{
|
/* general case: need to reset the capture at each
|
||||||
int len, pos;
|
iteration. We don't do it if there are no captures
|
||||||
|
in the atom or if we are sure all captures are
|
||||||
|
initialized in the atom. If quant_min = 0, we still
|
||||||
|
need to reset once the captures in case the atom
|
||||||
|
does not match. */
|
||||||
|
if (need_capture_init && last_capture_count != s->capture_count) {
|
||||||
|
if (dbuf_insert(&s->byte_code, last_atom_start, 3))
|
||||||
|
goto out_of_memory;
|
||||||
|
int pos = last_atom_start;
|
||||||
|
s->byte_code.buf[pos++] = REOP_save_reset;
|
||||||
|
s->byte_code.buf[pos++] = last_capture_count;
|
||||||
|
s->byte_code.buf[pos++] = s->capture_count - 1;
|
||||||
|
}
|
||||||
|
|
||||||
len = s->byte_code.size - last_atom_start;
|
len = s->byte_code.size - last_atom_start;
|
||||||
if (quant_min == 0) {
|
if (quant_min == 0) {
|
||||||
/* need to reset the capture in case the atom is
|
/* need to reset the capture in case the atom is
|
||||||
not executed */
|
not executed */
|
||||||
if (last_capture_count != s->capture_count) {
|
if (!need_capture_init && last_capture_count != s->capture_count) {
|
||||||
if (dbuf_insert(&s->byte_code, last_atom_start, 3))
|
if (dbuf_insert(&s->byte_code, last_atom_start, 3))
|
||||||
goto out_of_memory;
|
goto out_of_memory;
|
||||||
s->byte_code.buf[last_atom_start++] = REOP_save_reset;
|
s->byte_code.buf[last_atom_start++] = REOP_save_reset;
|
||||||
@@ -2320,6 +2410,8 @@ static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir)
|
|||||||
|
|
||||||
pos = re_emit_op_u32(s, REOP_goto, 0);
|
pos = re_emit_op_u32(s, REOP_goto, 0);
|
||||||
|
|
||||||
|
s->group_name_scope++;
|
||||||
|
|
||||||
if (re_parse_alternative(s, is_backward_dir))
|
if (re_parse_alternative(s, is_backward_dir))
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@@ -2382,6 +2474,13 @@ static int compute_register_count(uint8_t *bc_buf, int bc_buf_len)
|
|||||||
val = get_u16(bc_buf + pos + 1);
|
val = get_u16(bc_buf + pos + 1);
|
||||||
len += val * 8;
|
len += val * 8;
|
||||||
break;
|
break;
|
||||||
|
case REOP_back_reference:
|
||||||
|
case REOP_back_reference_i:
|
||||||
|
case REOP_backward_back_reference:
|
||||||
|
case REOP_backward_back_reference_i:
|
||||||
|
val = bc_buf[pos + 1];
|
||||||
|
len += val;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
pos += len;
|
pos += len;
|
||||||
}
|
}
|
||||||
@@ -2481,7 +2580,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
|
|||||||
s->byte_code.size - RE_HEADER_LEN);
|
s->byte_code.size - RE_HEADER_LEN);
|
||||||
|
|
||||||
/* add the named groups if needed */
|
/* add the named groups if needed */
|
||||||
if (s->group_names.size > (s->capture_count - 1)) {
|
if (s->group_names.size > (s->capture_count - 1) * LRE_GROUP_NAME_TRAILER_LEN) {
|
||||||
dbuf_put(&s->byte_code, s->group_names.buf, s->group_names.size);
|
dbuf_put(&s->byte_code, s->group_names.buf, s->group_names.size);
|
||||||
put_u16(s->byte_code.buf + RE_HEADER_FLAGS,
|
put_u16(s->byte_code.buf + RE_HEADER_FLAGS,
|
||||||
lre_get_flags(s->byte_code.buf) | LRE_FLAG_NAMED_GROUPS);
|
lre_get_flags(s->byte_code.buf) | LRE_FLAG_NAMED_GROUPS);
|
||||||
@@ -3057,15 +3156,22 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||||||
case REOP_backward_back_reference_i:
|
case REOP_backward_back_reference_i:
|
||||||
{
|
{
|
||||||
const uint8_t *cptr1, *cptr1_end, *cptr1_start;
|
const uint8_t *cptr1, *cptr1_end, *cptr1_start;
|
||||||
|
const uint8_t *pc1;
|
||||||
uint32_t c1, c2;
|
uint32_t c1, c2;
|
||||||
|
int i, n;
|
||||||
|
|
||||||
val = *pc++;
|
n = *pc++;
|
||||||
|
pc1 = pc;
|
||||||
|
pc += n;
|
||||||
|
|
||||||
|
for(i = 0; i < n; i++) {
|
||||||
|
val = pc1[i];
|
||||||
if (val >= s->capture_count)
|
if (val >= s->capture_count)
|
||||||
goto no_match;
|
goto no_match;
|
||||||
cptr1_start = capture[2 * val];
|
cptr1_start = capture[2 * val];
|
||||||
cptr1_end = capture[2 * val + 1];
|
cptr1_end = capture[2 * val + 1];
|
||||||
if (!cptr1_start || !cptr1_end)
|
/* test the first not empty capture */
|
||||||
break;
|
if (cptr1_start && cptr1_end) {
|
||||||
if (opcode == REOP_back_reference ||
|
if (opcode == REOP_back_reference ||
|
||||||
opcode == REOP_back_reference_i) {
|
opcode == REOP_back_reference_i) {
|
||||||
cptr1 = cptr1_start;
|
cptr1 = cptr1_start;
|
||||||
@@ -3096,6 +3202,9 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||||||
goto no_match;
|
goto no_match;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case REOP_range:
|
case REOP_range:
|
||||||
|
|||||||
@@ -40,6 +40,9 @@
|
|||||||
#define LRE_RET_MEMORY_ERROR (-1)
|
#define LRE_RET_MEMORY_ERROR (-1)
|
||||||
#define LRE_RET_TIMEOUT (-2)
|
#define LRE_RET_TIMEOUT (-2)
|
||||||
|
|
||||||
|
/* trailer length after the group name including the trailing '\0' */
|
||||||
|
#define LRE_GROUP_NAME_TRAILER_LEN 2
|
||||||
|
|
||||||
uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
|
uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
|
||||||
const char *buf, size_t buf_len, int re_flags,
|
const char *buf, size_t buf_len, int re_flags,
|
||||||
void *opaque);
|
void *opaque);
|
||||||
|
|||||||
35
quickjs.c
35
quickjs.c
@@ -47405,6 +47405,7 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val,
|
|||||||
int64_t last_index;
|
int64_t last_index;
|
||||||
const char *group_name_ptr;
|
const char *group_name_ptr;
|
||||||
JSObject *p_obj;
|
JSObject *p_obj;
|
||||||
|
JSAtom group_name;
|
||||||
|
|
||||||
if (!re)
|
if (!re)
|
||||||
return JS_EXCEPTION;
|
return JS_EXCEPTION;
|
||||||
@@ -47419,6 +47420,7 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val,
|
|||||||
indices = JS_UNDEFINED;
|
indices = JS_UNDEFINED;
|
||||||
indices_groups = JS_UNDEFINED;
|
indices_groups = JS_UNDEFINED;
|
||||||
capture = NULL;
|
capture = NULL;
|
||||||
|
group_name = JS_ATOM_NULL;
|
||||||
|
|
||||||
if (js_regexp_get_lastIndex(ctx, &last_index, this_val))
|
if (js_regexp_get_lastIndex(ctx, &last_index, this_val))
|
||||||
goto fail;
|
goto fail;
|
||||||
@@ -47501,15 +47503,20 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val,
|
|||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
for(i = 0; i < capture_count; i++) {
|
for(i = 0; i < capture_count; i++) {
|
||||||
const char *name = NULL;
|
|
||||||
uint8_t **match = &capture[2 * i];
|
uint8_t **match = &capture[2 * i];
|
||||||
int start = -1;
|
int start = -1;
|
||||||
int end = -1;
|
int end = -1;
|
||||||
JSValue val;
|
JSValue val;
|
||||||
|
|
||||||
if (group_name_ptr && i > 0) {
|
if (group_name_ptr && i > 0) {
|
||||||
if (*group_name_ptr) name = group_name_ptr;
|
if (*group_name_ptr) {
|
||||||
group_name_ptr += strlen(group_name_ptr) + 1;
|
/* XXX: slow, should create a shape when the regexp is
|
||||||
|
compiled */
|
||||||
|
group_name = JS_NewAtom(ctx, group_name_ptr);
|
||||||
|
if (group_name == JS_ATOM_NULL)
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
group_name_ptr += strlen(group_name_ptr) + LRE_GROUP_NAME_TRAILER_LEN;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (match[0] && match[1]) {
|
if (match[0] && match[1]) {
|
||||||
@@ -47536,14 +47543,17 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val,
|
|||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (name && !JS_IsUndefined(indices_groups)) {
|
if (group_name != JS_ATOM_NULL) {
|
||||||
val = JS_DupValue(ctx, val);
|
/* JS_HasProperty() cannot fail here */
|
||||||
if (JS_DefinePropertyValueStr(ctx, indices_groups,
|
if (!JS_IsUndefined(val) ||
|
||||||
name, val, prop_flags) < 0) {
|
!JS_HasProperty(ctx, indices_groups, group_name)) {
|
||||||
|
if (JS_DefinePropertyValue(ctx, indices_groups,
|
||||||
|
group_name, JS_DupValue(ctx, val), prop_flags) < 0) {
|
||||||
JS_FreeValue(ctx, val);
|
JS_FreeValue(ctx, val);
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (JS_DefinePropertyValueUint32(ctx, indices, i, val,
|
if (JS_DefinePropertyValueUint32(ctx, indices, i, val,
|
||||||
prop_flags) < 0) {
|
prop_flags) < 0) {
|
||||||
goto fail;
|
goto fail;
|
||||||
@@ -47557,14 +47567,20 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val,
|
|||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (name) {
|
if (group_name != JS_ATOM_NULL) {
|
||||||
if (JS_DefinePropertyValueStr(ctx, groups, name,
|
/* JS_HasProperty() cannot fail here */
|
||||||
|
if (!JS_IsUndefined(val) ||
|
||||||
|
!JS_HasProperty(ctx, groups, group_name)) {
|
||||||
|
if (JS_DefinePropertyValue(ctx, groups, group_name,
|
||||||
JS_DupValue(ctx, val),
|
JS_DupValue(ctx, val),
|
||||||
prop_flags) < 0) {
|
prop_flags) < 0) {
|
||||||
JS_FreeValue(ctx, val);
|
JS_FreeValue(ctx, val);
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
JS_FreeAtom(ctx, group_name);
|
||||||
|
group_name = JS_ATOM_NULL;
|
||||||
|
}
|
||||||
p_obj->u.array.u.values[p_obj->u.array.count++] = val;
|
p_obj->u.array.u.values[p_obj->u.array.count++] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -47584,6 +47600,7 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val,
|
|||||||
ret = obj;
|
ret = obj;
|
||||||
obj = JS_UNDEFINED;
|
obj = JS_UNDEFINED;
|
||||||
fail:
|
fail:
|
||||||
|
JS_FreeAtom(ctx, group_name);
|
||||||
JS_FreeValue(ctx, indices_groups);
|
JS_FreeValue(ctx, indices_groups);
|
||||||
JS_FreeValue(ctx, indices);
|
JS_FreeValue(ctx, indices);
|
||||||
JS_FreeValue(ctx, str_val);
|
JS_FreeValue(ctx, str_val);
|
||||||
|
|||||||
@@ -176,7 +176,7 @@ Reflect.construct
|
|||||||
Reflect.set
|
Reflect.set
|
||||||
Reflect.setPrototypeOf
|
Reflect.setPrototypeOf
|
||||||
regexp-dotall
|
regexp-dotall
|
||||||
regexp-duplicate-named-groups=skip
|
regexp-duplicate-named-groups
|
||||||
regexp-lookbehind
|
regexp-lookbehind
|
||||||
regexp-match-indices
|
regexp-match-indices
|
||||||
regexp-modifiers
|
regexp-modifiers
|
||||||
|
|||||||
@@ -31,12 +31,6 @@ test262/test/staging/sm/Function/function-name-for.js:13: Test262Error: Expected
|
|||||||
test262/test/staging/sm/Function/implicit-this-in-parameter-expression.js:12: Test262Error: Expected SameValue(«[object Object]», «undefined») to be true
|
test262/test/staging/sm/Function/implicit-this-in-parameter-expression.js:12: Test262Error: Expected SameValue(«[object Object]», «undefined») to be true
|
||||||
test262/test/staging/sm/Function/invalid-parameter-list.js:13: Test262Error: Expected a SyntaxError to be thrown but no exception was thrown at all
|
test262/test/staging/sm/Function/invalid-parameter-list.js:13: Test262Error: Expected a SyntaxError to be thrown but no exception was thrown at all
|
||||||
test262/test/staging/sm/Function/invalid-parameter-list.js:13: strict mode: Test262Error: Expected a SyntaxError to be thrown but no exception was thrown at all
|
test262/test/staging/sm/Function/invalid-parameter-list.js:13: strict mode: Test262Error: Expected a SyntaxError to be thrown but no exception was thrown at all
|
||||||
test262/test/staging/sm/RegExp/regress-613820-1.js:12: Test262Error: Actual [aaa, aa, a] and expected [aa, a, a] should have the same contents.
|
|
||||||
test262/test/staging/sm/RegExp/regress-613820-1.js:12: strict mode: Test262Error: Actual [aaa, aa, a] and expected [aa, a, a] should have the same contents.
|
|
||||||
test262/test/staging/sm/RegExp/regress-613820-2.js:12: Test262Error: Actual [foobar, f, o, o, b, a, r] and expected [foobar, undefined, undefined, undefined, b, a, r] should have the same contents.
|
|
||||||
test262/test/staging/sm/RegExp/regress-613820-2.js:12: strict mode: Test262Error: Actual [foobar, f, o, o, b, a, r] and expected [foobar, undefined, undefined, undefined, b, a, r] should have the same contents.
|
|
||||||
test262/test/staging/sm/RegExp/regress-613820-3.js:12: Test262Error: Actual [aab, a, undefined, ab] and expected [aa, undefined, a, undefined] should have the same contents.
|
|
||||||
test262/test/staging/sm/RegExp/regress-613820-3.js:12: strict mode: Test262Error: Actual [aab, a, undefined, ab] and expected [aa, undefined, a, undefined] should have the same contents.
|
|
||||||
test262/test/staging/sm/String/string-upper-lower-mapping.js:16: Test262Error: Expected SameValue(«""», «""») to be true
|
test262/test/staging/sm/String/string-upper-lower-mapping.js:16: Test262Error: Expected SameValue(«""», «""») to be true
|
||||||
test262/test/staging/sm/String/string-upper-lower-mapping.js:16: strict mode: Test262Error: Expected SameValue(«""», «""») to be true
|
test262/test/staging/sm/String/string-upper-lower-mapping.js:16: strict mode: Test262Error: Expected SameValue(«""», «""») to be true
|
||||||
test262/test/staging/sm/TypedArray/constructor-buffer-sequence.js:29: Test262Error: Expected a ExpectedError but got a Error
|
test262/test/staging/sm/TypedArray/constructor-buffer-sequence.js:29: Test262Error: Expected a ExpectedError but got a Error
|
||||||
|
|||||||
Reference in New Issue
Block a user