mirror of
https://github.com/bellard/quickjs.git
synced 2025-12-31 05:39:10 +03:00
regexp: cosmetic: make it clearer that there is now a set of registers instead of an auxiliary stack
This commit is contained in:
@@ -45,11 +45,11 @@ DEF(save_start, 2) /* save start position */
|
||||
DEF(save_end, 2) /* save end position, must come after saved_start */
|
||||
DEF(save_reset, 3) /* reset save positions */
|
||||
DEF(loop, 6) /* decrement the top the stack and goto if != 0 */
|
||||
DEF(loop_split_goto_first, 10)
|
||||
DEF(loop_split_goto_first, 10) /* loop and then split */
|
||||
DEF(loop_split_next_first, 10)
|
||||
DEF(loop_check_adv_split_goto_first, 10)
|
||||
DEF(loop_check_adv_split_goto_first, 10) /* loop and then check advance and split */
|
||||
DEF(loop_check_adv_split_next_first, 10)
|
||||
DEF(push_i32, 6) /* push integer on the stack */
|
||||
DEF(set_i32, 6) /* store the immediate value to a register */
|
||||
DEF(word_boundary, 1)
|
||||
DEF(word_boundary_i, 1)
|
||||
DEF(not_word_boundary, 1)
|
||||
@@ -64,8 +64,8 @@ DEF(range32, 3) /* variable length */
|
||||
DEF(range32_i, 3) /* variable length */
|
||||
DEF(lookahead, 5)
|
||||
DEF(negative_lookahead, 5) /* must come after */
|
||||
DEF(push_char_pos, 2) /* push the character position on the stack */
|
||||
DEF(check_advance, 2) /* pop one stack element and check that it is different from the character position */
|
||||
DEF(set_char_pos, 2) /* store the character position to a register */
|
||||
DEF(check_advance, 2) /* check that the register is different from the character position */
|
||||
DEF(prev, 1) /* go to the previous char */
|
||||
|
||||
#endif /* DEF */
|
||||
|
||||
103
libregexp.c
103
libregexp.c
@@ -55,7 +55,7 @@ typedef enum {
|
||||
} REOPCodeEnum;
|
||||
|
||||
#define CAPTURE_COUNT_MAX 255
|
||||
#define STACK_SIZE_MAX 255
|
||||
#define REGISTER_COUNT_MAX 255
|
||||
/* must be large enough to have a negligible runtime cost and small
|
||||
enough to call the interrupt callback often. */
|
||||
#define INTERRUPT_COUNTER_INIT 10000
|
||||
@@ -107,7 +107,7 @@ static const REOpCode reopcode_info[REOP_COUNT] = {
|
||||
|
||||
#define RE_HEADER_FLAGS 0
|
||||
#define RE_HEADER_CAPTURE_COUNT 2
|
||||
#define RE_HEADER_STACK_SIZE 3
|
||||
#define RE_HEADER_REGISTER_COUNT 3
|
||||
#define RE_HEADER_BYTECODE_LEN 4
|
||||
|
||||
#define RE_HEADER_LEN 8
|
||||
@@ -468,8 +468,8 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
|
||||
re_flags = lre_get_flags(buf);
|
||||
bc_len = get_u32(buf + RE_HEADER_BYTECODE_LEN);
|
||||
assert(bc_len + RE_HEADER_LEN <= buf_len);
|
||||
printf("flags: 0x%x capture_count=%d aux_stack_size=%d\n",
|
||||
re_flags, buf[RE_HEADER_CAPTURE_COUNT], buf[RE_HEADER_STACK_SIZE]);
|
||||
printf("flags: 0x%x capture_count=%d reg_count=%d\n",
|
||||
re_flags, buf[RE_HEADER_CAPTURE_COUNT], buf[RE_HEADER_REGISTER_COUNT]);
|
||||
if (re_flags & LRE_FLAG_NAMED_GROUPS) {
|
||||
const char *p;
|
||||
p = (char *)buf + RE_HEADER_LEN + bc_len;
|
||||
@@ -530,7 +530,7 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
|
||||
val2 = buf[pos + 1];
|
||||
val = get_u32(buf + pos + 2);
|
||||
val += (pos + 6);
|
||||
printf(" %u, %u", val2, val);
|
||||
printf(" r%u, %u", val2, val);
|
||||
break;
|
||||
case REOP_loop_split_goto_first:
|
||||
case REOP_loop_split_next_first:
|
||||
@@ -542,7 +542,7 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
|
||||
limit = get_u32(buf + pos + 2);
|
||||
val = get_u32(buf + pos + 6);
|
||||
val += (pos + 10);
|
||||
printf(" %u, %u, %u", val2, limit, val);
|
||||
printf(" r%u, %u, %u", val2, limit, val);
|
||||
}
|
||||
break;
|
||||
case REOP_save_start:
|
||||
@@ -556,15 +556,15 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
|
||||
case REOP_save_reset:
|
||||
printf(" %u %u", buf[pos + 1], buf[pos + 2]);
|
||||
break;
|
||||
case REOP_push_i32:
|
||||
case REOP_set_i32:
|
||||
val = buf[pos + 1];
|
||||
val2 = get_u32(buf + pos + 2);
|
||||
printf(" %u, %d", val, val2);
|
||||
printf(" r%u, %d", val, val2);
|
||||
break;
|
||||
case REOP_push_char_pos:
|
||||
case REOP_set_char_pos:
|
||||
case REOP_check_advance:
|
||||
val = buf[pos + 1];
|
||||
printf(" %u", val);
|
||||
printf(" r%u", val);
|
||||
break;
|
||||
case REOP_range:
|
||||
case REOP_range_i:
|
||||
@@ -1570,8 +1570,8 @@ static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len)
|
||||
case REOP_line_start_m:
|
||||
case REOP_line_end:
|
||||
case REOP_line_end_m:
|
||||
case REOP_push_i32:
|
||||
case REOP_push_char_pos:
|
||||
case REOP_set_i32:
|
||||
case REOP_set_char_pos:
|
||||
case REOP_word_boundary:
|
||||
case REOP_word_boundary_i:
|
||||
case REOP_not_word_boundary:
|
||||
@@ -2197,7 +2197,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
||||
put_u32(s->byte_code.buf + last_atom_start + 1,
|
||||
len + 5 * has_goto + add_zero_advance_check * 2 * 2);
|
||||
if (add_zero_advance_check) {
|
||||
s->byte_code.buf[last_atom_start + 1 + 4] = REOP_push_char_pos;
|
||||
s->byte_code.buf[last_atom_start + 1 + 4] = REOP_set_char_pos;
|
||||
s->byte_code.buf[last_atom_start + 1 + 4 + 1] = 0;
|
||||
re_emit_op_u8(s, REOP_check_advance, 0);
|
||||
}
|
||||
@@ -2211,13 +2211,13 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
||||
put_u32(s->byte_code.buf + pos, 6 + add_zero_advance_check * 2 + len + 10);
|
||||
pos += 4;
|
||||
|
||||
s->byte_code.buf[pos++] = REOP_push_i32;
|
||||
s->byte_code.buf[pos++] = REOP_set_i32;
|
||||
s->byte_code.buf[pos++] = 0;
|
||||
put_u32(s->byte_code.buf + pos, quant_max);
|
||||
pos += 4;
|
||||
last_atom_start = pos;
|
||||
if (add_zero_advance_check) {
|
||||
s->byte_code.buf[pos++] = REOP_push_char_pos;
|
||||
s->byte_code.buf[pos++] = REOP_set_char_pos;
|
||||
s->byte_code.buf[pos++] = 0;
|
||||
}
|
||||
re_emit_goto_u8_u32(s, (add_zero_advance_check ? REOP_loop_check_adv_split_next_first : REOP_loop_split_next_first) - greedy, 0, quant_max, last_atom_start);
|
||||
@@ -2233,13 +2233,13 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
||||
goto out_of_memory;
|
||||
/* Note: we assume the string length is < INT32_MAX */
|
||||
pos = last_atom_start;
|
||||
s->byte_code.buf[pos++] = REOP_push_i32;
|
||||
s->byte_code.buf[pos++] = REOP_set_i32;
|
||||
s->byte_code.buf[pos++] = 0;
|
||||
put_u32(s->byte_code.buf + pos, quant_max);
|
||||
pos += 4;
|
||||
last_atom_start = pos;
|
||||
if (add_zero_advance_check) {
|
||||
s->byte_code.buf[pos++] = REOP_push_char_pos;
|
||||
s->byte_code.buf[pos++] = REOP_set_char_pos;
|
||||
s->byte_code.buf[pos++] = 0;
|
||||
}
|
||||
if (quant_min == quant_max) {
|
||||
@@ -2330,9 +2330,9 @@ static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* the control flow is recursive so the analysis can be linear. As a
|
||||
side effect, the auxiliary stack addresses are computed. */
|
||||
static int compute_stack_size(uint8_t *bc_buf, int bc_buf_len)
|
||||
/* Allocate the registers as a stack. The control flow is recursive so
|
||||
the analysis can be linear. */
|
||||
static int compute_register_count(uint8_t *bc_buf, int bc_buf_len)
|
||||
{
|
||||
int stack_size, stack_size_max, pos, opcode, len;
|
||||
uint32_t val;
|
||||
@@ -2348,12 +2348,12 @@ static int compute_stack_size(uint8_t *bc_buf, int bc_buf_len)
|
||||
assert(opcode < REOP_COUNT);
|
||||
assert((pos + len) <= bc_buf_len);
|
||||
switch(opcode) {
|
||||
case REOP_push_i32:
|
||||
case REOP_push_char_pos:
|
||||
case REOP_set_i32:
|
||||
case REOP_set_char_pos:
|
||||
bc_buf[pos + 1] = stack_size;
|
||||
stack_size++;
|
||||
if (stack_size > stack_size_max) {
|
||||
if (stack_size > STACK_SIZE_MAX)
|
||||
if (stack_size > REGISTER_COUNT_MAX)
|
||||
return -1;
|
||||
stack_size_max = stack_size;
|
||||
}
|
||||
@@ -2408,7 +2408,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
|
||||
void *opaque)
|
||||
{
|
||||
REParseState s_s, *s = &s_s;
|
||||
int stack_size;
|
||||
int register_count;
|
||||
BOOL is_sticky;
|
||||
|
||||
memset(s, 0, sizeof(*s));
|
||||
@@ -2469,14 +2469,14 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
|
||||
goto error;
|
||||
}
|
||||
|
||||
stack_size = compute_stack_size(s->byte_code.buf, s->byte_code.size);
|
||||
if (stack_size < 0) {
|
||||
register_count = compute_register_count(s->byte_code.buf, s->byte_code.size);
|
||||
if (register_count < 0) {
|
||||
re_parse_error(s, "too many imbricated quantifiers");
|
||||
goto error;
|
||||
}
|
||||
|
||||
s->byte_code.buf[RE_HEADER_CAPTURE_COUNT] = s->capture_count;
|
||||
s->byte_code.buf[RE_HEADER_STACK_SIZE] = stack_size;
|
||||
s->byte_code.buf[RE_HEADER_REGISTER_COUNT] = register_count;
|
||||
put_u32(s->byte_code.buf + RE_HEADER_BYTECODE_LEN,
|
||||
s->byte_code.size - RE_HEADER_LEN);
|
||||
|
||||
@@ -2620,7 +2620,6 @@ typedef struct {
|
||||
/* 0 = 8 bit chars, 1 = 16 bit chars, 2 = 16 bit chars, UTF-16 */
|
||||
int cbuf_type;
|
||||
int capture_count;
|
||||
int stack_size_max;
|
||||
BOOL is_unicode;
|
||||
int interrupt_counter;
|
||||
void *opaque; /* used for stack overflow check */
|
||||
@@ -2665,7 +2664,7 @@ static no_inline int stack_realloc(REExecContext *s, size_t n)
|
||||
|
||||
/* return 1 if match, 0 if not match or < 0 if error. */
|
||||
static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
||||
uint8_t **aux_stack, const uint8_t *pc, const uint8_t *cptr)
|
||||
uint8_t **regs, const uint8_t *pc, const uint8_t *cptr)
|
||||
{
|
||||
int opcode;
|
||||
int cbuf_type;
|
||||
@@ -2705,7 +2704,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
||||
}
|
||||
|
||||
/* avoid saving the previous value if already saved */
|
||||
#define SAVE_AUX_STACK(idx, value) \
|
||||
#define SAVE_REG(idx, value) \
|
||||
{ \
|
||||
StackElem *sp1; \
|
||||
sp1 = sp; \
|
||||
@@ -2717,12 +2716,12 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
||||
} else { \
|
||||
CHECK_STACK_SPACE(2); \
|
||||
sp[0].val = -(int)(idx + 1); \
|
||||
sp[1].ptr = aux_stack[idx]; \
|
||||
sp[1].ptr = regs[idx]; \
|
||||
sp += 2; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
aux_stack[idx] = (value); \
|
||||
regs[idx] = (value); \
|
||||
}
|
||||
|
||||
|
||||
@@ -2747,13 +2746,13 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
||||
REExecStateEnum type;
|
||||
if (bp == s->stack_buf)
|
||||
return 0;
|
||||
/* undo the modifications to capture[] and aux_stack[] */
|
||||
/* undo the modifications to capture[] and regs[] */
|
||||
while (sp > bp) {
|
||||
intptr_t idx2 = sp[-2].val;
|
||||
if (idx2 >= 0)
|
||||
capture[idx2] = sp[-1].ptr;
|
||||
else
|
||||
aux_stack[-idx2 - 1] = sp[-1].ptr;
|
||||
regs[-idx2 - 1] = sp[-1].ptr;
|
||||
sp -= 2;
|
||||
}
|
||||
|
||||
@@ -2804,13 +2803,13 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
||||
for(;;) {
|
||||
REExecStateEnum type;
|
||||
type = bp[-1].bp.type;
|
||||
/* undo the modifications to capture[] and aux_stack[] */
|
||||
/* undo the modifications to capture[] and regs[] */
|
||||
while (sp > bp) {
|
||||
intptr_t idx2 = sp[-2].val;
|
||||
if (idx2 >= 0)
|
||||
capture[idx2] = sp[-1].ptr;
|
||||
else
|
||||
aux_stack[-idx2 - 1] = sp[-1].ptr;
|
||||
regs[-idx2 - 1] = sp[-1].ptr;
|
||||
sp -= 2;
|
||||
}
|
||||
pc = sp[-3].ptr;
|
||||
@@ -2950,11 +2949,11 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
||||
}
|
||||
}
|
||||
break;
|
||||
case REOP_push_i32:
|
||||
case REOP_set_i32:
|
||||
idx = pc[0];
|
||||
val = get_u32(pc + 1);
|
||||
pc += 5;
|
||||
SAVE_AUX_STACK(idx, (void *)(uintptr_t)val);
|
||||
SAVE_REG(idx, (void *)(uintptr_t)val);
|
||||
break;
|
||||
case REOP_loop:
|
||||
{
|
||||
@@ -2963,8 +2962,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
||||
val = get_u32(pc + 1);
|
||||
pc += 5;
|
||||
|
||||
val2 = (uintptr_t)aux_stack[idx] - 1;
|
||||
SAVE_AUX_STACK(idx, (void *)(uintptr_t)val2);
|
||||
val2 = (uintptr_t)regs[idx] - 1;
|
||||
SAVE_REG(idx, (void *)(uintptr_t)val2);
|
||||
if (val2 != 0) {
|
||||
pc += (int)val;
|
||||
if (lre_poll_timeout(s))
|
||||
@@ -2985,8 +2984,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
||||
pc += 9;
|
||||
|
||||
/* decrement the counter */
|
||||
val2 = (uintptr_t)aux_stack[idx] - 1;
|
||||
SAVE_AUX_STACK(idx, (void *)(uintptr_t)val2);
|
||||
val2 = (uintptr_t)regs[idx] - 1;
|
||||
SAVE_REG(idx, (void *)(uintptr_t)val2);
|
||||
|
||||
if (val2 > limit) {
|
||||
/* normal loop if counter > limit */
|
||||
@@ -2997,7 +2996,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
||||
/* check advance */
|
||||
if ((opcode == REOP_loop_check_adv_split_goto_first ||
|
||||
opcode == REOP_loop_check_adv_split_next_first) &&
|
||||
aux_stack[idx + 1] == cptr &&
|
||||
regs[idx + 1] == cptr &&
|
||||
val2 != limit) {
|
||||
goto no_match;
|
||||
}
|
||||
@@ -3022,15 +3021,15 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
||||
}
|
||||
}
|
||||
break;
|
||||
case REOP_push_char_pos:
|
||||
case REOP_set_char_pos:
|
||||
idx = pc[0];
|
||||
pc++;
|
||||
SAVE_AUX_STACK(idx, (uint8_t *)cptr);
|
||||
SAVE_REG(idx, (uint8_t *)cptr);
|
||||
break;
|
||||
case REOP_check_advance:
|
||||
idx = pc[0];
|
||||
pc++;
|
||||
if (aux_stack[idx] == cptr)
|
||||
if (regs[idx] == cptr)
|
||||
goto no_match;
|
||||
break;
|
||||
case REOP_word_boundary:
|
||||
@@ -3212,14 +3211,13 @@ int lre_exec(uint8_t **capture,
|
||||
int cbuf_type, void *opaque)
|
||||
{
|
||||
REExecContext s_s, *s = &s_s;
|
||||
int re_flags, i, ret;
|
||||
uint8_t **aux_stack;
|
||||
int re_flags, i, ret, register_count;
|
||||
uint8_t **regs;
|
||||
const uint8_t *cptr;
|
||||
|
||||
re_flags = lre_get_flags(bc_buf);
|
||||
s->is_unicode = (re_flags & (LRE_FLAG_UNICODE | LRE_FLAG_UNICODE_SETS)) != 0;
|
||||
s->capture_count = bc_buf[RE_HEADER_CAPTURE_COUNT];
|
||||
s->stack_size_max = bc_buf[RE_HEADER_STACK_SIZE];
|
||||
s->cbuf = cbuf;
|
||||
s->cbuf_end = cbuf + (clen << cbuf_type);
|
||||
s->cbuf_type = cbuf_type;
|
||||
@@ -3233,7 +3231,10 @@ int lre_exec(uint8_t **capture,
|
||||
|
||||
for(i = 0; i < s->capture_count * 2; i++)
|
||||
capture[i] = NULL;
|
||||
aux_stack = alloca(s->stack_size_max * sizeof(aux_stack[0]));
|
||||
/* XXX: modify the API so that the registers are allocated after
|
||||
the captures to suppress some tests */
|
||||
register_count = bc_buf[RE_HEADER_REGISTER_COUNT];
|
||||
regs = alloca(register_count * sizeof(regs[0]));
|
||||
|
||||
cptr = cbuf + (cindex << cbuf_type);
|
||||
if (0 < cindex && cindex < clen && s->cbuf_type == 2) {
|
||||
@@ -3243,7 +3244,7 @@ int lre_exec(uint8_t **capture,
|
||||
}
|
||||
}
|
||||
|
||||
ret = lre_exec_backtrack(s, capture, aux_stack, bc_buf + RE_HEADER_LEN,
|
||||
ret = lre_exec_backtrack(s, capture, regs, bc_buf + RE_HEADER_LEN,
|
||||
cptr);
|
||||
if (s->stack_buf != s->static_stack_buf)
|
||||
lre_realloc(s->opaque, s->stack_buf, 0);
|
||||
|
||||
Reference in New Issue
Block a user