Adjust lastIndex to leading surrogate when inside a surrogate pair in unicode RegExp (initial patch by auvred)

This commit is contained in:
Fabrice Bellard 2025-08-25 17:54:19 +02:00
parent d9ec8f102e
commit a4ac84d993
2 changed files with 28 additions and 1 deletions

View File

@ -3163,6 +3163,7 @@ int lre_exec(uint8_t **capture,
REExecContext s_s, *s = &s_s; REExecContext s_s, *s = &s_s;
int re_flags, i, alloca_size, ret; int re_flags, i, alloca_size, ret;
StackInt *stack_buf; StackInt *stack_buf;
const uint8_t *cptr;
re_flags = lre_get_flags(bc_buf); re_flags = lre_get_flags(bc_buf);
s->is_unicode = (re_flags & (LRE_FLAG_UNICODE | LRE_FLAG_UNICODE_SETS)) != 0; s->is_unicode = (re_flags & (LRE_FLAG_UNICODE | LRE_FLAG_UNICODE_SETS)) != 0;
@ -3187,8 +3188,17 @@ int lre_exec(uint8_t **capture,
capture[i] = NULL; capture[i] = NULL;
alloca_size = s->stack_size_max * sizeof(stack_buf[0]); alloca_size = s->stack_size_max * sizeof(stack_buf[0]);
stack_buf = alloca(alloca_size); stack_buf = alloca(alloca_size);
cptr = cbuf + (cindex << cbuf_type);
if (0 < cindex && cindex < clen && s->is_unicode) {
const uint16_t *p = (const uint16_t *)cptr;
if (is_lo_surrogate(*p) && is_hi_surrogate(p[-1])) {
cptr = (const uint8_t *)(p - 1);
}
}
ret = lre_exec_backtrack(s, capture, stack_buf, 0, bc_buf + RE_HEADER_LEN, ret = lre_exec_backtrack(s, capture, stack_buf, 0, bc_buf + RE_HEADER_LEN,
cbuf + (cindex << cbuf_type), FALSE); cptr, FALSE);
lre_realloc(s->opaque, s->state_stack, 0); lre_realloc(s->opaque, s->state_stack, 0);
return ret; return ret;
} }

View File

@ -779,6 +779,23 @@ function test_regexp()
/* Note: SpiderMonkey and v8 may not be correct */ /* Note: SpiderMonkey and v8 may not be correct */
assert("abcAbC".replace(/[\q{BC|A}]/gvi,"X"), "XXXX"); assert("abcAbC".replace(/[\q{BC|A}]/gvi,"X"), "XXXX");
assert("abcAbC".replace(/[\q{BC|A}--a]/gvi,"X"), "aXAX"); assert("abcAbC".replace(/[\q{BC|A}--a]/gvi,"X"), "aXAX");
/* case where lastIndex points to the second element of a
surrogate pair */
a = /(?:)/gu;
a.lastIndex = 1;
a.exec("🐱");
assert(a.lastIndex, 0);
a.lastIndex = 1;
a.exec("a\udc00");
assert(a.lastIndex, 1);
a = /\u{10000}/vgd;
a.lastIndex = 1;
a = a.exec("\u{10000}_\u{10000}");
assert(a.indices[0][0], 0);
assert(a.indices[0][1], 2);
} }
function test_symbol() function test_symbol()