Adjust lastIndex to leading surrogate when inside a surrogate pair in unicode RegExp

This commit is contained in:
auvred 2025-08-21 08:50:13 +03:00
parent 1fdc768fdc
commit e2bc28f7c3
No known key found for this signature in database
2 changed files with 25 additions and 1 deletions

View File

@ -3165,6 +3165,15 @@ int lre_exec(uint8_t **capture,
s->interrupt_counter = INTERRUPT_COUNTER_INIT; s->interrupt_counter = INTERRUPT_COUNTER_INIT;
s->opaque = opaque; s->opaque = opaque;
const uint8_t *cptr = cbuf + (cindex << cbuf_type);
if (0 < cindex && cindex < clen && s->is_unicode) {
const uint16_t *p = (const uint16_t *)cptr;
if (is_lo_surrogate(*p) && is_hi_surrogate(*(--p))) {
cptr = (const void *)p;
}
}
s->state_size = sizeof(REExecState) + s->state_size = sizeof(REExecState) +
s->capture_count * sizeof(capture[0]) * 2 + s->capture_count * sizeof(capture[0]) * 2 +
s->stack_size_max * sizeof(stack_buf[0]); s->stack_size_max * sizeof(stack_buf[0]);
@ -3177,7 +3186,7 @@ int lre_exec(uint8_t **capture,
alloca_size = s->stack_size_max * sizeof(stack_buf[0]); alloca_size = s->stack_size_max * sizeof(stack_buf[0]);
stack_buf = alloca(alloca_size); stack_buf = alloca(alloca_size);
ret = lre_exec_backtrack(s, capture, stack_buf, 0, bc_buf + RE_HEADER_LEN, ret = lre_exec_backtrack(s, capture, stack_buf, 0, bc_buf + RE_HEADER_LEN,
cbuf + (cindex << cbuf_type), FALSE); cptr, FALSE);
lre_realloc(s->opaque, s->state_stack, 0); lre_realloc(s->opaque, s->state_stack, 0);
return ret; return ret;
} }

View File

@ -779,6 +779,21 @@ function test_regexp()
/* Note: SpiderMonkey and v8 may not be correct */ /* Note: SpiderMonkey and v8 may not be correct */
assert("abcAbC".replace(/[\q{BC|A}]/gvi,"X"), "XXXX"); assert("abcAbC".replace(/[\q{BC|A}]/gvi,"X"), "XXXX");
assert("abcAbC".replace(/[\q{BC|A}--a]/gvi,"X"), "aXAX"); assert("abcAbC".replace(/[\q{BC|A}--a]/gvi,"X"), "aXAX");
a = /(?:)/gu;
a.lastIndex = 1;
a.exec("🐱");
assert(a.lastIndex, 0);
a.lastIndex = 1;
a.exec("a\udc00");
assert(a.lastIndex, 1);
a = /\u{10000}/vgd;
a.lastIndex = 1;
a = a.exec("\u{10000}_\u{10000}");
assert(a.indices[0][0], 0);
assert(a.indices[0][1], 2);
} }
function test_symbol() function test_symbol()