diff --git a/libregexp.c b/libregexp.c index 1c519af..8ae4bac 100644 --- a/libregexp.c +++ b/libregexp.c @@ -3163,6 +3163,7 @@ int lre_exec(uint8_t **capture, REExecContext s_s, *s = &s_s; int re_flags, i, alloca_size, ret; StackInt *stack_buf; + const uint8_t *cptr; re_flags = lre_get_flags(bc_buf); s->is_unicode = (re_flags & (LRE_FLAG_UNICODE | LRE_FLAG_UNICODE_SETS)) != 0; @@ -3187,8 +3188,17 @@ int lre_exec(uint8_t **capture, capture[i] = NULL; alloca_size = s->stack_size_max * sizeof(stack_buf[0]); stack_buf = alloca(alloca_size); + + cptr = cbuf + (cindex << cbuf_type); + if (0 < cindex && cindex < clen && s->is_unicode) { + const uint16_t *p = (const uint16_t *)cptr; + if (is_lo_surrogate(*p) && is_hi_surrogate(p[-1])) { + cptr = (const uint8_t *)(p - 1); + } + } + ret = lre_exec_backtrack(s, capture, stack_buf, 0, bc_buf + RE_HEADER_LEN, - cbuf + (cindex << cbuf_type), FALSE); + cptr, FALSE); lre_realloc(s->opaque, s->state_stack, 0); return ret; } diff --git a/tests/test_builtin.js b/tests/test_builtin.js index a541c19..14a883c 100644 --- a/tests/test_builtin.js +++ b/tests/test_builtin.js @@ -779,6 +779,23 @@ function test_regexp() /* Note: SpiderMonkey and v8 may not be correct */ assert("abcAbC".replace(/[\q{BC|A}]/gvi,"X"), "XXXX"); assert("abcAbC".replace(/[\q{BC|A}--a]/gvi,"X"), "aXAX"); + + /* case where lastIndex points to the second element of a + surrogate pair */ + a = /(?:)/gu; + a.lastIndex = 1; + a.exec("🐱"); + assert(a.lastIndex, 0); + + a.lastIndex = 1; + a.exec("a\udc00"); + assert(a.lastIndex, 1); + + a = /\u{10000}/vgd; + a.lastIndex = 1; + a = a.exec("\u{10000}_\u{10000}"); + assert(a.indices[0][0], 0); + assert(a.indices[0][1], 2); } function test_symbol()