diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bec4a48..c9ccf62 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -214,6 +214,7 @@ jobs: submodules: true - name: Install MinGW and Wine run: | + sudo apt update sudo apt install -y wine mingw-w64 cp /usr/x86_64-w64-mingw32/lib/libwinpthread-1.dll . - name: Setup Wine diff --git a/Changelog b/Changelog index 7cc3399..8a32a92 100644 --- a/Changelog +++ b/Changelog @@ -1,3 +1,21 @@ +2025-09-13: + +- added JSON modules and import attributes +- added JS_PrintValue() API +- qjs: pretty print objects in print() and console.log() +- qjs: better promise rejection tracker heuristics +- added RegExp v flag +- added RegExp modifiers +- added RegExp.escape +- added Float16Array +- added Promise.try +- improved JSON parser spec conformance +- qjs: improved compatibility of std.parseExtJSON() with JSON5 and + accept JSON5 modules +- added JS_FreePropertyEnum() and JS_AtomToCStringLen() API +- added Error.isError() +- misc bug fixes + 2025-04-26: - removed the bignum extensions and qjscalc diff --git a/TODO b/TODO index 6501fec..81f18ba 100644 --- a/TODO +++ b/TODO @@ -62,6 +62,5 @@ Optimization ideas: Test262o: 0/11262 errors, 463 excluded Test262o commit: 7da91bceb9ce7613f87db47ddd1292a2dda58b42 (es5-tests branch) -Result: 70/78178 errors, 1610 excluded, 7236 skipped -Test262 commit: 56e77d6325067a545ea7e8ff5be5d9284334e33c - +Result: 54/79414 errors, 1637 excluded, 6821 skipped +Test262 commit: e7e136756cd67c1ffcf7c09d03aeb8ad5a6cec0c diff --git a/VERSION b/VERSION index c76e76d..433b8f8 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2025-04-26 +2025-09-13 diff --git a/cutils.h b/cutils.h index 32b9757..9fcb7a6 100644 --- a/cutils.h +++ b/cutils.h @@ -364,4 +364,60 @@ static inline double uint64_as_float64(uint64_t u64) return u.d; } +static inline double fromfp16(uint16_t v) +{ + double d; + uint32_t v1; + v1 = v & 0x7fff; + if (unlikely(v1 >= 0x7c00)) + v1 += 0x1f8000; /* NaN or infinity */ + d = uint64_as_float64(((uint64_t)(v >> 15) << 63) | ((uint64_t)v1 << (52 - 10))); + return d * 0x1p1008; +} + +static inline uint16_t tofp16(double d) +{ + uint64_t a, addend; + uint32_t v, sgn; + int shift; + + a = float64_as_uint64(d); + sgn = a >> 63; + a = a & 0x7fffffffffffffff; + if (unlikely(a > 0x7ff0000000000000)) { + /* nan */ + v = 0x7c01; + } else if (a < 0x3f10000000000000) { /* 0x1p-14 */ + /* subnormal f16 number or zero */ + if (a <= 0x3e60000000000000) { /* 0x1p-25 */ + v = 0x0000; /* zero */ + } else { + shift = 1051 - (a >> 52); + a = ((uint64_t)1 << 52) | (a & (((uint64_t)1 << 52) - 1)); + addend = ((a >> shift) & 1) + (((uint64_t)1 << (shift - 1)) - 1); + v = (a + addend) >> shift; + } + } else { + /* normal number or infinity */ + a -= 0x3f00000000000000; /* adjust the exponent */ + /* round */ + addend = ((a >> (52 - 10)) & 1) + (((uint64_t)1 << (52 - 11)) - 1); + v = (a + addend) >> (52 - 10); + /* overflow ? */ + if (unlikely(v > 0x7c00)) + v = 0x7c00; + } + return v | (sgn << 15); +} + +static inline int isfp16nan(uint16_t v) +{ + return (v & 0x7FFF) > 0x7C00; +} + +static inline int isfp16zero(uint16_t v) +{ + return (v & 0x7FFF) == 0; +} + #endif /* CUTILS_H */ diff --git a/doc/quickjs.texi b/doc/quickjs.texi index eef00b7..9130b47 100644 --- a/doc/quickjs.texi +++ b/doc/quickjs.texi @@ -449,17 +449,20 @@ optional properties: @item parseExtJSON(str) - Parse @code{str} using a superset of @code{JSON.parse}. The - following extensions are accepted: + Parse @code{str} using a superset of @code{JSON.parse}. The superset + is very close to the JSON5 specification. The following extensions + are accepted: @itemize @item Single line and multiline comments @item unquoted properties (ASCII-only Javascript identifiers) @item trailing comma in array and object definitions @item single quoted strings + @item @code{\v} escape and multi-line strings with trailing @code{\} @item @code{\f} and @code{\v} are accepted as space characters - @item leading plus in numbers - @item octal (@code{0o} prefix) and hexadecimal (@code{0x} prefix) numbers + @item leading plus or decimal point in numbers + @item hexadecimal (@code{0x} prefix), octal (@code{0o} prefix) and binary (@code{0b} prefix) integers + @item @code{NaN} and @code{Infinity} are accepted as numbers @end itemize @end table diff --git a/examples/hello_module.js b/examples/hello_module.js index 463660f..5d4c78e 100644 --- a/examples/hello_module.js +++ b/examples/hello_module.js @@ -1,6 +1,8 @@ -/* example of JS module */ +/* example of JS and JSON modules */ import { fib } from "./fib_module.js"; +import msg from "./message.json"; console.log("Hello World"); console.log("fib(10)=", fib(10)); +console.log("msg=", msg); diff --git a/examples/message.json b/examples/message.json new file mode 100644 index 0000000..3b7fe48 --- /dev/null +++ b/examples/message.json @@ -0,0 +1,2 @@ +{ "x" : 1, "tab": [ 1, 2, 3 ] } + diff --git a/libregexp-opcode.h b/libregexp-opcode.h index f255e09..ebab751 100644 --- a/libregexp-opcode.h +++ b/libregexp-opcode.h @@ -26,11 +26,15 @@ DEF(invalid, 1) /* never used */ DEF(char, 3) +DEF(char_i, 3) DEF(char32, 5) +DEF(char32_i, 5) DEF(dot, 1) DEF(any, 1) /* same as dot but match any character including line terminator */ DEF(line_start, 1) +DEF(line_start_m, 1) DEF(line_end, 1) +DEF(line_end_m, 1) DEF(goto, 5) DEF(split_goto_first, 5) DEF(split_next_first, 5) @@ -42,11 +46,17 @@ DEF(loop, 5) /* decrement the top the stack and goto if != 0 */ DEF(push_i32, 5) /* push integer on the stack */ DEF(drop, 1) DEF(word_boundary, 1) +DEF(word_boundary_i, 1) DEF(not_word_boundary, 1) +DEF(not_word_boundary_i, 1) DEF(back_reference, 2) -DEF(backward_back_reference, 2) /* must come after back_reference */ +DEF(back_reference_i, 2) /* must come after */ +DEF(backward_back_reference, 2) /* must come after */ +DEF(backward_back_reference_i, 2) /* must come after */ DEF(range, 3) /* variable length */ +DEF(range_i, 3) /* variable length */ DEF(range32, 3) /* variable length */ +DEF(range32_i, 3) /* variable length */ DEF(lookahead, 5) DEF(negative_lookahead, 5) DEF(push_char_pos, 1) /* push the character position on the stack */ diff --git a/libregexp.c b/libregexp.c index 8c47389..0cf9a12 100644 --- a/libregexp.c +++ b/libregexp.c @@ -71,7 +71,9 @@ typedef struct { const uint8_t *buf_start; int re_flags; BOOL is_unicode; + BOOL unicode_sets; /* if set, is_unicode is also set */ BOOL ignore_case; + BOOL multi_line; BOOL dotall; int capture_count; int total_capture_count; /* -1 = not computed yet */ @@ -102,11 +104,11 @@ static const REOpCode reopcode_info[REOP_COUNT] = { }; #define RE_HEADER_FLAGS 0 -#define RE_HEADER_CAPTURE_COUNT 1 -#define RE_HEADER_STACK_SIZE 2 -#define RE_HEADER_BYTECODE_LEN 3 +#define RE_HEADER_CAPTURE_COUNT 2 +#define RE_HEADER_STACK_SIZE 3 +#define RE_HEADER_BYTECODE_LEN 4 -#define RE_HEADER_LEN 7 +#define RE_HEADER_LEN 8 static inline int is_digit(int c) { return c >= '0' && c <= '9'; @@ -122,6 +124,264 @@ static int dbuf_insert(DynBuf *s, int pos, int len) return 0; } +typedef struct REString { + struct REString *next; + uint32_t hash; + uint32_t len; + uint32_t buf[]; +} REString; + +typedef struct { + /* the string list is the union of 'char_range' and of the strings + in hash_table[]. The strings in hash_table[] have a length != + 1. */ + CharRange cr; + uint32_t n_strings; + uint32_t hash_size; + int hash_bits; + REString **hash_table; +} REStringList; + +static uint32_t re_string_hash(int len, const uint32_t *buf) +{ + int i; + uint32_t h; + h = 1; + for(i = 0; i < len; i++) + h = h * 263 + buf[i]; + return h * 0x61C88647; +} + +static void re_string_list_init(REParseState *s1, REStringList *s) +{ + cr_init(&s->cr, s1->opaque, lre_realloc); + s->n_strings = 0; + s->hash_size = 0; + s->hash_bits = 0; + s->hash_table = NULL; +} + +static void re_string_list_free(REStringList *s) +{ + REString *p, *p_next; + int i; + for(i = 0; i < s->hash_size; i++) { + for(p = s->hash_table[i]; p != NULL; p = p_next) { + p_next = p->next; + lre_realloc(s->cr.mem_opaque, p, 0); + } + } + lre_realloc(s->cr.mem_opaque, s->hash_table, 0); + + cr_free(&s->cr); +} + +static void lre_print_char(int c, BOOL is_range) +{ + if (c == '\'' || c == '\\' || + (is_range && (c == '-' || c == ']'))) { + printf("\\%c", c); + } else if (c >= ' ' && c <= 126) { + printf("%c", c); + } else { + printf("\\u{%04x}", c); + } +} + +static __maybe_unused void re_string_list_dump(const char *str, const REStringList *s) +{ + REString *p; + const CharRange *cr; + int i, j, k; + + printf("%s:\n", str); + printf(" ranges: ["); + cr = &s->cr; + for(i = 0; i < cr->len; i += 2) { + lre_print_char(cr->points[i], TRUE); + if (cr->points[i] != cr->points[i + 1] - 1) { + printf("-"); + lre_print_char(cr->points[i + 1] - 1, TRUE); + } + } + printf("]\n"); + + j = 0; + for(i = 0; i < s->hash_size; i++) { + for(p = s->hash_table[i]; p != NULL; p = p->next) { + printf(" %d/%d: '", j, s->n_strings); + for(k = 0; k < p->len; k++) { + lre_print_char(p->buf[k], FALSE); + } + printf("'\n"); + j++; + } + } +} + +static int re_string_find2(REStringList *s, int len, const uint32_t *buf, + uint32_t h0, BOOL add_flag) +{ + uint32_t h = 0; /* avoid warning */ + REString *p; + if (s->n_strings != 0) { + h = h0 >> (32 - s->hash_bits); + for(p = s->hash_table[h]; p != NULL; p = p->next) { + if (p->hash == h0 && p->len == len && + !memcmp(p->buf, buf, len * sizeof(buf[0]))) { + return 1; + } + } + } + /* not found */ + if (!add_flag) + return 0; + /* increase the size of the hash table if needed */ + if (unlikely((s->n_strings + 1) > s->hash_size)) { + REString **new_hash_table, *p_next; + int new_hash_bits, i; + uint32_t new_hash_size; + new_hash_bits = max_int(s->hash_bits + 1, 4); + new_hash_size = 1 << new_hash_bits; + new_hash_table = lre_realloc(s->cr.mem_opaque, NULL, + sizeof(new_hash_table[0]) * new_hash_size); + if (!new_hash_table) + return -1; + memset(new_hash_table, 0, sizeof(new_hash_table[0]) * new_hash_size); + for(i = 0; i < s->hash_size; i++) { + for(p = s->hash_table[i]; p != NULL; p = p_next) { + p_next = p->next; + h = p->hash >> (32 - new_hash_bits); + p->next = new_hash_table[h]; + new_hash_table[h] = p; + } + } + lre_realloc(s->cr.mem_opaque, s->hash_table, 0); + s->hash_bits = new_hash_bits; + s->hash_size = new_hash_size; + s->hash_table = new_hash_table; + h = h0 >> (32 - s->hash_bits); + } + + p = lre_realloc(s->cr.mem_opaque, NULL, sizeof(REString) + len * sizeof(buf[0])); + if (!p) + return -1; + p->next = s->hash_table[h]; + s->hash_table[h] = p; + s->n_strings++; + p->hash = h0; + p->len = len; + memcpy(p->buf, buf, sizeof(buf[0]) * len); + return 1; +} + +static int re_string_find(REStringList *s, int len, const uint32_t *buf, + BOOL add_flag) +{ + uint32_t h0; + h0 = re_string_hash(len, buf); + return re_string_find2(s, len, buf, h0, add_flag); +} + +/* return -1 if memory error, 0 if OK */ +static int re_string_add(REStringList *s, int len, const uint32_t *buf) +{ + if (len == 1) { + return cr_union_interval(&s->cr, buf[0], buf[0]); + } + if (re_string_find(s, len, buf, TRUE) < 0) + return -1; + return 0; +} + +/* a = a op b */ +static int re_string_list_op(REStringList *a, REStringList *b, int op) +{ + int i, ret; + REString *p, **pp; + + if (cr_op1(&a->cr, b->cr.points, b->cr.len, op)) + return -1; + + switch(op) { + case CR_OP_UNION: + if (b->n_strings != 0) { + for(i = 0; i < b->hash_size; i++) { + for(p = b->hash_table[i]; p != NULL; p = p->next) { + if (re_string_find2(a, p->len, p->buf, p->hash, TRUE) < 0) + return -1; + } + } + } + break; + case CR_OP_INTER: + case CR_OP_SUB: + for(i = 0; i < a->hash_size; i++) { + pp = &a->hash_table[i]; + for(;;) { + p = *pp; + if (p == NULL) + break; + ret = re_string_find2(b, p->len, p->buf, p->hash, FALSE); + if (op == CR_OP_SUB) + ret = !ret; + if (!ret) { + /* remove it */ + *pp = p->next; + a->n_strings--; + lre_realloc(a->cr.mem_opaque, p, 0); + } else { + /* keep it */ + pp = &p->next; + } + } + } + break; + default: + abort(); + } + return 0; +} + +static int re_string_list_canonicalize(REParseState *s1, + REStringList *s, BOOL is_unicode) +{ + if (cr_regexp_canonicalize(&s->cr, is_unicode)) + return -1; + if (s->n_strings != 0) { + REStringList a_s, *a = &a_s; + int i, j; + REString *p; + + /* XXX: simplify */ + re_string_list_init(s1, a); + + a->n_strings = s->n_strings; + a->hash_size = s->hash_size; + a->hash_bits = s->hash_bits; + a->hash_table = s->hash_table; + + s->n_strings = 0; + s->hash_size = 0; + s->hash_bits = 0; + s->hash_table = NULL; + + for(i = 0; i < a->hash_size; i++) { + for(p = a->hash_table[i]; p != NULL; p = p->next) { + for(j = 0; j < p->len; j++) { + p->buf[j] = lre_canonicalize(p->buf[j], is_unicode); + } + if (re_string_add(s, p->len, p->buf)) { + re_string_list_free(a); + return -1; + } + } + } + re_string_list_free(a); + } + return 0; +} + static const uint16_t char_range_d[] = { 1, 0x0030, 0x0039 + 1, @@ -170,7 +430,7 @@ static const uint16_t * const char_range_table[] = { char_range_w, }; -static int cr_init_char_range(REParseState *s, CharRange *cr, uint32_t c) +static int cr_init_char_range(REParseState *s, REStringList *cr, uint32_t c) { BOOL invert; const uint16_t *c_pt; @@ -179,18 +439,18 @@ static int cr_init_char_range(REParseState *s, CharRange *cr, uint32_t c) invert = c & 1; c_pt = char_range_table[c >> 1]; len = *c_pt++; - cr_init(cr, s->opaque, lre_realloc); + re_string_list_init(s, cr); for(i = 0; i < len * 2; i++) { - if (cr_add_point(cr, c_pt[i])) + if (cr_add_point(&cr->cr, c_pt[i])) goto fail; } if (invert) { - if (cr_invert(cr)) + if (cr_invert(&cr->cr)) goto fail; } return 0; fail: - cr_free(cr); + re_string_list_free(cr); return -1; } @@ -240,6 +500,7 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf, printf("%s", reopcode_info[opcode].name); switch(opcode) { case REOP_char: + case REOP_char_i: val = get_u16(buf + pos + 1); if (val >= ' ' && val <= 126) printf(" '%c'", val); @@ -247,6 +508,7 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf, printf(" 0x%04x", val); break; case REOP_char32: + case REOP_char32_i: val = get_u32(buf + pos + 1); if (val >= ' ' && val <= 126) printf(" '%c'", val); @@ -273,7 +535,9 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf, case REOP_save_start: case REOP_save_end: case REOP_back_reference: + case REOP_back_reference_i: case REOP_backward_back_reference: + case REOP_backward_back_reference_i: printf(" %u", buf[pos + 1]); break; case REOP_save_reset: @@ -284,6 +548,7 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf, printf(" %d", val); break; case REOP_range: + case REOP_range_i: { int n, i; n = get_u16(buf + pos + 1); @@ -295,6 +560,7 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf, } break; case REOP_range32: + case REOP_range32_i: { int n, i; n = get_u16(buf + pos + 1); @@ -533,8 +799,16 @@ static BOOL is_unicode_char(int c) (c == '_')); } -static int parse_unicode_property(REParseState *s, CharRange *cr, - const uint8_t **pp, BOOL is_inv) +/* XXX: memory error test */ +static void seq_prop_cb(void *opaque, const uint32_t *seq, int seq_len) +{ + REStringList *sl = opaque; + re_string_add(sl, seq_len, seq); +} + +static int parse_unicode_property(REParseState *s, REStringList *cr, + const uint8_t **pp, BOOL is_inv, + BOOL allow_sequence_prop) { const uint8_t *p; char name[64], value[64]; @@ -574,51 +848,76 @@ static int parse_unicode_property(REParseState *s, CharRange *cr, } else if (!strcmp(name, "Script_Extensions") || !strcmp(name, "scx")) { script_ext = TRUE; do_script: - cr_init(cr, s->opaque, lre_realloc); - ret = unicode_script(cr, value, script_ext); + re_string_list_init(s, cr); + ret = unicode_script(&cr->cr, value, script_ext); if (ret) { - cr_free(cr); + re_string_list_free(cr); if (ret == -2) return re_parse_error(s, "unknown unicode script"); else goto out_of_memory; } } else if (!strcmp(name, "General_Category") || !strcmp(name, "gc")) { - cr_init(cr, s->opaque, lre_realloc); - ret = unicode_general_category(cr, value); + re_string_list_init(s, cr); + ret = unicode_general_category(&cr->cr, value); if (ret) { - cr_free(cr); + re_string_list_free(cr); if (ret == -2) return re_parse_error(s, "unknown unicode general category"); else goto out_of_memory; } } else if (value[0] == '\0') { - cr_init(cr, s->opaque, lre_realloc); - ret = unicode_general_category(cr, name); + re_string_list_init(s, cr); + ret = unicode_general_category(&cr->cr, name); if (ret == -1) { - cr_free(cr); + re_string_list_free(cr); goto out_of_memory; } if (ret < 0) { - ret = unicode_prop(cr, name); - if (ret) { - cr_free(cr); - if (ret == -2) - goto unknown_property_name; - else - goto out_of_memory; + ret = unicode_prop(&cr->cr, name); + if (ret == -1) { + re_string_list_free(cr); + goto out_of_memory; } } + if (ret < 0 && !is_inv && allow_sequence_prop) { + CharRange cr_tmp; + cr_init(&cr_tmp, s->opaque, lre_realloc); + ret = unicode_sequence_prop(name, seq_prop_cb, cr, &cr_tmp); + cr_free(&cr_tmp); + if (ret == -1) { + re_string_list_free(cr); + goto out_of_memory; + } + } + if (ret < 0) + goto unknown_property_name; } else { unknown_property_name: return re_parse_error(s, "unknown unicode property name"); } + /* the ordering of case folding and inversion differs with + unicode_sets. 'unicode_sets' ordering is more consistent */ + /* XXX: the spec seems incorrect, we do it as the other engines + seem to do it. */ + if (s->ignore_case && s->unicode_sets) { + if (re_string_list_canonicalize(s, cr, s->is_unicode)) { + re_string_list_free(cr); + goto out_of_memory; + } + } if (is_inv) { - if (cr_invert(cr)) { - cr_free(cr); - return -1; + if (cr_invert(&cr->cr)) { + re_string_list_free(cr); + goto out_of_memory; + } + } + if (s->ignore_case && !s->unicode_sets) { + if (re_string_list_canonicalize(s, cr, s->is_unicode)) { + re_string_list_free(cr); + goto out_of_memory; } } *pp = p; @@ -628,10 +927,61 @@ static int parse_unicode_property(REParseState *s, CharRange *cr, } #endif /* CONFIG_ALL_UNICODE */ +static int get_class_atom(REParseState *s, REStringList *cr, + const uint8_t **pp, BOOL inclass); + +static int parse_class_string_disjunction(REParseState *s, REStringList *cr, + const uint8_t **pp) +{ + const uint8_t *p; + DynBuf str; + int c; + + p = *pp; + if (*p != '{') + return re_parse_error(s, "expecting '{' after \\q"); + + dbuf_init2(&str, s->opaque, lre_realloc); + re_string_list_init(s, cr); + + p++; + for(;;) { + str.size = 0; + while (*p != '}' && *p != '|') { + c = get_class_atom(s, NULL, &p, FALSE); + if (c < 0) + goto fail; + if (dbuf_put_u32(&str, c)) { + re_parse_out_of_memory(s); + goto fail; + } + } + if (re_string_add(cr, str.size / 4, (uint32_t *)str.buf)) { + re_parse_out_of_memory(s); + goto fail; + } + if (*p == '}') + break; + p++; + } + if (s->ignore_case) { + if (re_string_list_canonicalize(s, cr, TRUE)) + goto fail; + } + p++; /* skip the '}' */ + dbuf_free(&str); + *pp = p; + return 0; + fail: + dbuf_free(&str); + re_string_list_free(cr); + return -1; +} + /* return -1 if error otherwise the character or a class range - (CLASS_RANGE_BASE). In case of class range, 'cr' is + (CLASS_RANGE_BASE) if cr != NULL. In case of class range, 'cr' is initialized. Otherwise, it is ignored. */ -static int get_class_atom(REParseState *s, CharRange *cr, +static int get_class_atom(REParseState *s, REStringList *cr, const uint8_t **pp, BOOL inclass) { const uint8_t *p; @@ -666,6 +1016,8 @@ static int get_class_atom(REParseState *s, CharRange *cr, case 'W': c = CHAR_RANGE_W; class_range: + if (!cr) + goto default_escape; if (cr_init_char_range(s, cr, c)) return -1; c = CLASS_RANGE_BASE; @@ -690,27 +1042,50 @@ static int get_class_atom(REParseState *s, CharRange *cr, if (!inclass && s->is_unicode) goto invalid_escape; break; + case '^': + case '$': + case '\\': + case '.': + case '*': + case '+': + case '?': + case '(': + case ')': + case '[': + case ']': + case '{': + case '}': + case '|': + case '/': + /* always valid to escape these characters */ + break; #ifdef CONFIG_ALL_UNICODE case 'p': case 'P': - if (s->is_unicode) { - if (parse_unicode_property(s, cr, &p, (c == 'P'))) + if (s->is_unicode && cr) { + if (parse_unicode_property(s, cr, &p, (c == 'P'), s->unicode_sets)) return -1; c = CLASS_RANGE_BASE; break; } - /* fall thru */ + goto default_escape; #endif + case 'q': + if (s->unicode_sets && cr && inclass) { + if (parse_class_string_disjunction(s, cr, &p)) + return -1; + c = CLASS_RANGE_BASE; + break; + } + goto default_escape; default: + default_escape: p--; ret = lre_parse_escape(&p, s->is_unicode * 2); if (ret >= 0) { c = ret; } else { - if (ret == -2 && *p != '\0' && strchr("^$\\.*+?()[]{}|/", *p)) { - /* always valid to escape these characters */ - goto normal_char; - } else if (s->is_unicode) { + if (s->is_unicode) { invalid_escape: return re_parse_error(s, "invalid escape sequence in regular expression"); } else { @@ -727,6 +1102,48 @@ static int get_class_atom(REParseState *s, CharRange *cr, return re_parse_error(s, "unexpected end"); } /* fall thru */ + goto normal_char; + + case '&': + case '!': + case '#': + case '$': + case '%': + case '*': + case '+': + case ',': + case '.': + case ':': + case ';': + case '<': + case '=': + case '>': + case '?': + case '@': + case '^': + case '`': + case '~': + if (s->unicode_sets && p[1] == c) { + /* forbidden double characters */ + return re_parse_error(s, "invalid class set operation in regular expression"); + } + goto normal_char; + + case '(': + case ')': + case '[': + case ']': + case '{': + case '}': + case '/': + case '-': + case '|': + if (s->unicode_sets) { + /* invalid characters in unicode sets */ + return re_parse_error(s, "invalid character in class in regular expression"); + } + goto normal_char; + default: normal_char: /* normal char */ @@ -754,8 +1171,6 @@ static int re_emit_range(REParseState *s, const CharRange *cr) if (len >= 65535) return re_parse_error(s, "too many ranges"); if (len == 0) { - /* not sure it can really happen. Emit a match that is always - false */ re_emit_op_u32(s, REOP_char32, -1); } else { high = cr->points[cr->len - 1]; @@ -764,7 +1179,7 @@ static int re_emit_range(REParseState *s, const CharRange *cr) if (high <= 0xffff) { /* can use 16 bit ranges with the conversion that 0xffff = infinity */ - re_emit_op_u16(s, REOP_range, len); + re_emit_op_u16(s, s->ignore_case ? REOP_range_i : REOP_range, len); for(i = 0; i < cr->len; i += 2) { dbuf_put_u16(&s->byte_code, cr->points[i]); high = cr->points[i + 1] - 1; @@ -773,7 +1188,7 @@ static int re_emit_range(REParseState *s, const CharRange *cr) dbuf_put_u16(&s->byte_code, high); } } else { - re_emit_op_u16(s, REOP_range32, len); + re_emit_op_u16(s, s->ignore_case ? REOP_range32_i : REOP_range32, len); for(i = 0; i < cr->len; i += 2) { dbuf_put_u32(&s->byte_code, cr->points[i]); dbuf_put_u32(&s->byte_code, cr->points[i + 1] - 1); @@ -783,15 +1198,139 @@ static int re_emit_range(REParseState *s, const CharRange *cr) return 0; } -static int re_parse_char_class(REParseState *s, const uint8_t **pp) +static int re_string_cmp_len(const void *a, const void *b, void *arg) +{ + REString *p1 = *(REString **)a; + REString *p2 = *(REString **)b; + return (p1->len < p2->len) - (p1->len > p2->len); +} + +static void re_emit_char(REParseState *s, int c) +{ + if (c <= 0xffff) + re_emit_op_u16(s, s->ignore_case ? REOP_char_i : REOP_char, c); + else + re_emit_op_u32(s, s->ignore_case ? REOP_char32_i : REOP_char32, c); +} + +static int re_emit_string_list(REParseState *s, const REStringList *sl) +{ + REString **tab, *p; + int i, j, split_pos, last_match_pos, n; + BOOL has_empty_string, is_last; + + // re_string_list_dump("sl", sl); + if (sl->n_strings == 0) { + /* simple case: only characters */ + if (re_emit_range(s, &sl->cr)) + return -1; + } else { + /* at least one string list is present : match the longest ones first */ + /* XXX: add a new op_switch opcode to compile as a trie */ + tab = lre_realloc(s->opaque, NULL, sizeof(tab[0]) * sl->n_strings); + if (!tab) { + re_parse_out_of_memory(s); + return -1; + } + has_empty_string = FALSE; + n = 0; + for(i = 0; i < sl->hash_size; i++) { + for(p = sl->hash_table[i]; p != NULL; p = p->next) { + if (p->len == 0) { + has_empty_string = TRUE; + } else { + tab[n++] = p; + } + } + } + assert(n <= sl->n_strings); + + rqsort(tab, n, sizeof(tab[0]), re_string_cmp_len, NULL); + + last_match_pos = -1; + for(i = 0; i < n; i++) { + p = tab[i]; + is_last = !has_empty_string && sl->cr.len == 0 && i == (n - 1); + if (!is_last) + split_pos = re_emit_op_u32(s, REOP_split_next_first, 0); + else + split_pos = 0; + for(j = 0; j < p->len; j++) { + re_emit_char(s, p->buf[j]); + } + if (!is_last) { + last_match_pos = re_emit_op_u32(s, REOP_goto, last_match_pos); + put_u32(s->byte_code.buf + split_pos, s->byte_code.size - (split_pos + 4)); + } + } + + if (sl->cr.len != 0) { + /* char range */ + is_last = !has_empty_string; + if (!is_last) + split_pos = re_emit_op_u32(s, REOP_split_next_first, 0); + else + split_pos = 0; /* not used */ + if (re_emit_range(s, &sl->cr)) { + lre_realloc(s->opaque, tab, 0); + return -1; + } + if (!is_last) + put_u32(s->byte_code.buf + split_pos, s->byte_code.size - (split_pos + 4)); + } + + /* patch the 'goto match' */ + while (last_match_pos != -1) { + int next_pos = get_u32(s->byte_code.buf + last_match_pos); + put_u32(s->byte_code.buf + last_match_pos, s->byte_code.size - (last_match_pos + 4)); + last_match_pos = next_pos; + } + + lre_realloc(s->opaque, tab, 0); + } + return 0; +} + +static int re_parse_nested_class(REParseState *s, REStringList *cr, const uint8_t **pp); + +static int re_parse_class_set_operand(REParseState *s, REStringList *cr, const uint8_t **pp) +{ + int c1; + const uint8_t *p = *pp; + + if (*p == '[') { + if (re_parse_nested_class(s, cr, pp)) + return -1; + } else { + c1 = get_class_atom(s, cr, pp, TRUE); + if (c1 < 0) + return -1; + if (c1 < CLASS_RANGE_BASE) { + /* create a range with a single character */ + re_string_list_init(s, cr); + if (s->ignore_case) + c1 = lre_canonicalize(c1, s->is_unicode); + if (cr_union_interval(&cr->cr, c1, c1)) { + re_string_list_free(cr); + return -1; + } + } + } + return 0; +} + +static int re_parse_nested_class(REParseState *s, REStringList *cr, const uint8_t **pp) { const uint8_t *p; uint32_t c1, c2; - CharRange cr_s, *cr = &cr_s; - CharRange cr1_s, *cr1 = &cr1_s; - BOOL invert; + int ret; + REStringList cr1_s, *cr1 = &cr1_s; + BOOL invert, is_first; - cr_init(cr, s->opaque, lre_realloc); + if (lre_check_stack_overflow(s->opaque, 0)) + return re_parse_error(s, "stack overflow"); + + re_string_list_init(s, cr); p = *pp; p++; /* skip '[' */ @@ -800,74 +1339,155 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp) p++; invert = TRUE; } - + + /* handle unions */ + is_first = TRUE; for(;;) { if (*p == ']') break; - c1 = get_class_atom(s, cr1, &p, TRUE); - if ((int)c1 < 0) - goto fail; - if (*p == '-' && p[1] != ']') { - const uint8_t *p0 = p + 1; - if (c1 >= CLASS_RANGE_BASE) { - if (s->is_unicode) { - cr_free(cr1); - goto invalid_class_range; - } - /* Annex B: match '-' character */ - goto class_atom; - } - c2 = get_class_atom(s, cr1, &p0, TRUE); - if ((int)c2 < 0) + if (*p == '[' && s->unicode_sets) { + if (re_parse_nested_class(s, cr1, &p)) goto fail; - if (c2 >= CLASS_RANGE_BASE) { - cr_free(cr1); - if (s->is_unicode) { - goto invalid_class_range; - } - /* Annex B: match '-' character */ - goto class_atom; - } - p = p0; - if (c2 < c1) { - invalid_class_range: - re_parse_error(s, "invalid class range"); - goto fail; - } - if (cr_union_interval(cr, c1, c2)) - goto memory_error; + goto class_union; } else { - class_atom: - if (c1 >= CLASS_RANGE_BASE) { - int ret; - ret = cr_union1(cr, cr1->points, cr1->len); - cr_free(cr1); - if (ret) - goto memory_error; + c1 = get_class_atom(s, cr1, &p, TRUE); + if ((int)c1 < 0) + goto fail; + if (*p == '-' && p[1] != ']') { + const uint8_t *p0 = p + 1; + if (p[1] == '-' && s->unicode_sets && is_first) + goto class_atom; /* first character class followed by '--' */ + if (c1 >= CLASS_RANGE_BASE) { + if (s->is_unicode) { + re_string_list_free(cr1); + goto invalid_class_range; + } + /* Annex B: match '-' character */ + goto class_atom; + } + c2 = get_class_atom(s, cr1, &p0, TRUE); + if ((int)c2 < 0) + goto fail; + if (c2 >= CLASS_RANGE_BASE) { + re_string_list_free(cr1); + if (s->is_unicode) { + goto invalid_class_range; + } + /* Annex B: match '-' character */ + goto class_atom; + } + p = p0; + if (c2 < c1) { + invalid_class_range: + re_parse_error(s, "invalid class range"); + goto fail; + } + if (s->ignore_case) { + CharRange cr2_s, *cr2 = &cr2_s; + cr_init(cr2, s->opaque, lre_realloc); + if (cr_add_interval(cr2, c1, c2 + 1) || + cr_regexp_canonicalize(cr2, s->is_unicode) || + cr_op1(&cr->cr, cr2->points, cr2->len, CR_OP_UNION)) { + cr_free(cr2); + goto memory_error; + } + cr_free(cr2); + } else { + if (cr_union_interval(&cr->cr, c1, c2)) + goto memory_error; + } + is_first = FALSE; /* union operation */ } else { - if (cr_union_interval(cr, c1, c1)) - goto memory_error; + class_atom: + if (c1 >= CLASS_RANGE_BASE) { + class_union: + ret = re_string_list_op(cr, cr1, CR_OP_UNION); + re_string_list_free(cr1); + if (ret) + goto memory_error; + } else { + if (s->ignore_case) + c1 = lre_canonicalize(c1, s->is_unicode); + if (cr_union_interval(&cr->cr, c1, c1)) + goto memory_error; + } } } + if (s->unicode_sets && is_first) { + if (*p == '&' && p[1] == '&' && p[2] != '&') { + /* handle '&&' */ + for(;;) { + if (*p == ']') { + break; + } else if (*p == '&' && p[1] == '&' && p[2] != '&') { + p += 2; + } else { + goto invalid_operation; + } + if (re_parse_class_set_operand(s, cr1, &p)) + goto fail; + ret = re_string_list_op(cr, cr1, CR_OP_INTER); + re_string_list_free(cr1); + if (ret) + goto memory_error; + } + } else if (*p == '-' && p[1] == '-') { + /* handle '--' */ + for(;;) { + if (*p == ']') { + break; + } else if (*p == '-' && p[1] == '-') { + p += 2; + } else { + invalid_operation: + re_parse_error(s, "invalid operation in regular expression"); + goto fail; + } + if (re_parse_class_set_operand(s, cr1, &p)) + goto fail; + ret = re_string_list_op(cr, cr1, CR_OP_SUB); + re_string_list_free(cr1); + if (ret) + goto memory_error; + } + } + } + is_first = FALSE; } - if (s->ignore_case) { - if (cr_regexp_canonicalize(cr, s->is_unicode)) - goto memory_error; - } - if (invert) { - if (cr_invert(cr)) - goto memory_error; - } - if (re_emit_range(s, cr)) - goto fail; - cr_free(cr); + p++; /* skip ']' */ *pp = p; + if (invert) { + /* XXX: add may_contain_string syntax check to be fully + compliant. The test here accepts more input than the + spec. */ + if (cr->n_strings != 0) { + re_parse_error(s, "negated character class with strings in regular expression debugger eval code"); + goto fail; + } + if (cr_invert(&cr->cr)) + goto memory_error; + } return 0; memory_error: re_parse_out_of_memory(s); fail: - cr_free(cr); + re_string_list_free(cr); + return -1; +} + +static int re_parse_char_class(REParseState *s, const uint8_t **pp) +{ + REStringList cr_s, *cr = &cr_s; + + if (re_parse_nested_class(s, cr, pp)) + return -1; + if (re_emit_string_list(s, cr)) + goto fail; + re_string_list_free(cr); + return 0; + fail: + re_string_list_free(cr); return -1; } @@ -888,27 +1508,35 @@ static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len) len = reopcode_info[opcode].size; switch(opcode) { case REOP_range: + case REOP_range_i: val = get_u16(bc_buf + pos + 1); len += val * 4; goto simple_char; case REOP_range32: + case REOP_range32_i: val = get_u16(bc_buf + pos + 1); len += val * 8; goto simple_char; case REOP_char: + case REOP_char_i: case REOP_char32: + case REOP_char32_i: case REOP_dot: case REOP_any: simple_char: ret = FALSE; break; case REOP_line_start: + case REOP_line_start_m: case REOP_line_end: + case REOP_line_end_m: case REOP_push_i32: case REOP_push_char_pos: case REOP_drop: case REOP_word_boundary: + case REOP_word_boundary_i: case REOP_not_word_boundary: + case REOP_not_word_boundary_i: case REOP_prev: /* no effect */ break; @@ -916,7 +1544,9 @@ static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len) case REOP_save_end: case REOP_save_reset: case REOP_back_reference: + case REOP_back_reference_i: case REOP_backward_back_reference: + case REOP_backward_back_reference_i: break; default: /* safe behavior: we cannot predict the outcome */ @@ -941,24 +1571,32 @@ static int re_is_simple_quantifier(const uint8_t *bc_buf, int bc_buf_len) len = reopcode_info[opcode].size; switch(opcode) { case REOP_range: + case REOP_range_i: val = get_u16(bc_buf + pos + 1); len += val * 4; goto simple_char; case REOP_range32: + case REOP_range32_i: val = get_u16(bc_buf + pos + 1); len += val * 8; goto simple_char; case REOP_char: + case REOP_char_i: case REOP_char32: + case REOP_char32_i: case REOP_dot: case REOP_any: simple_char: count++; break; case REOP_line_start: + case REOP_line_start_m: case REOP_line_end: + case REOP_line_end_m: case REOP_word_boundary: + case REOP_word_boundary_i: case REOP_not_word_boundary: + case REOP_not_word_boundary_i: break; default: return -1; @@ -1116,12 +1754,47 @@ static int find_group_name(REParseState *s, const char *name) static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir); +static int re_parse_modifiers(REParseState *s, const uint8_t **pp) +{ + const uint8_t *p = *pp; + int mask = 0; + int val; + + for(;;) { + if (*p == 'i') { + val = LRE_FLAG_IGNORECASE; + } else if (*p == 'm') { + val = LRE_FLAG_MULTILINE; + } else if (*p == 's') { + val = LRE_FLAG_DOTALL; + } else { + break; + } + if (mask & val) + return re_parse_error(s, "duplicate modifier: '%c'", *p); + mask |= val; + p++; + } + *pp = p; + return mask; +} + +static BOOL update_modifier(BOOL val, int add_mask, int remove_mask, + int mask) +{ + if (add_mask & mask) + val = TRUE; + if (remove_mask & mask) + val = FALSE; + return val; +} + static int re_parse_term(REParseState *s, BOOL is_backward_dir) { const uint8_t *p; int c, last_atom_start, quant_min, quant_max, last_capture_count; BOOL greedy, add_zero_advance_check, is_neg, is_backward_lookahead; - CharRange cr_s, *cr = &cr_s; + REStringList cr_s, *cr = &cr_s; last_atom_start = -1; last_capture_count = 0; @@ -1130,11 +1803,11 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) switch(c) { case '^': p++; - re_emit_op(s, REOP_line_start); + re_emit_op(s, s->multi_line ? REOP_line_start_m : REOP_line_start); break; case '$': p++; - re_emit_op(s, REOP_line_end); + re_emit_op(s, s->multi_line ? REOP_line_end_m : REOP_line_end); break; case '.': p++; @@ -1184,6 +1857,44 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) p = s->buf_ptr; if (re_parse_expect(s, &p, ')')) return -1; + } else if (p[2] == 'i' || p[2] == 'm' || p[2] == 's' || p[2] == '-') { + BOOL saved_ignore_case, saved_multi_line, saved_dotall; + int add_mask, remove_mask; + p += 2; + remove_mask = 0; + add_mask = re_parse_modifiers(s, &p); + if (add_mask < 0) + return -1; + if (*p == '-') { + p++; + remove_mask = re_parse_modifiers(s, &p); + if (remove_mask < 0) + return -1; + } + if ((add_mask == 0 && remove_mask == 0) || + (add_mask & remove_mask) != 0) { + return re_parse_error(s, "invalid modifiers"); + } + if (re_parse_expect(s, &p, ':')) + return -1; + saved_ignore_case = s->ignore_case; + saved_multi_line = s->multi_line; + saved_dotall = s->dotall; + s->ignore_case = update_modifier(s->ignore_case, add_mask, remove_mask, LRE_FLAG_IGNORECASE); + s->multi_line = update_modifier(s->multi_line, add_mask, remove_mask, LRE_FLAG_MULTILINE); + s->dotall = update_modifier(s->dotall, add_mask, remove_mask, LRE_FLAG_DOTALL); + + last_atom_start = s->byte_code.size; + last_capture_count = s->capture_count; + s->buf_ptr = p; + if (re_parse_disjunction(s, is_backward_dir)) + return -1; + p = s->buf_ptr; + if (re_parse_expect(s, &p, ')')) + return -1; + s->ignore_case = saved_ignore_case; + s->multi_line = saved_multi_line; + s->dotall = saved_dotall; } else if ((p[2] == '=' || p[2] == '!')) { is_neg = (p[2] == '!'); is_backward_lookahead = FALSE; @@ -1262,7 +1973,11 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) switch(p[1]) { case 'b': case 'B': - re_emit_op(s, REOP_word_boundary + (p[1] != 'b')); + if (p[1] != 'b') { + re_emit_op(s, s->ignore_case ? REOP_not_word_boundary_i : REOP_not_word_boundary); + } else { + re_emit_op(s, s->ignore_case ? REOP_word_boundary_i : REOP_word_boundary); + } p += 2; break; case 'k': @@ -1351,7 +2066,8 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) emit_back_reference: last_atom_start = s->byte_code.size; last_capture_count = s->capture_count; - re_emit_op_u8(s, REOP_back_reference + is_backward_dir, c); + + re_emit_op_u8(s, REOP_back_reference + 2 * is_backward_dir + s->ignore_case, c); } break; default: @@ -1385,18 +2101,14 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) re_emit_op(s, REOP_prev); if (c >= CLASS_RANGE_BASE) { int ret; - /* Note: canonicalization is not needed */ - ret = re_emit_range(s, cr); - cr_free(cr); + ret = re_emit_string_list(s, cr); + re_string_list_free(cr); if (ret) return -1; } else { if (s->ignore_case) c = lre_canonicalize(c, s->is_unicode); - if (c <= 0xffff) - re_emit_op_u16(s, REOP_char, c); - else - re_emit_op_u32(s, REOP_char32, c); + re_emit_char(s, c); } if (is_backward_dir) re_emit_op(s, REOP_prev); @@ -1706,10 +2418,12 @@ static int compute_stack_size(const uint8_t *bc_buf, int bc_buf_len) stack_size--; break; case REOP_range: + case REOP_range_i: val = get_u16(bc_buf + pos + 1); len += val * 4; break; case REOP_range32: + case REOP_range32_i: val = get_u16(bc_buf + pos + 1); len += val * 8; break; @@ -1719,6 +2433,17 @@ static int compute_stack_size(const uint8_t *bc_buf, int bc_buf_len) return stack_size_max; } +static void *lre_bytecode_realloc(void *opaque, void *ptr, size_t size) +{ + if (size > (INT32_MAX / 2)) { + /* the bytecode cannot be larger than 2G. Leave some slack to + avoid some overflows. */ + return NULL; + } else { + return lre_realloc(opaque, ptr, size); + } +} + /* 'buf' must be a zero terminated UTF-8 string of length buf_len. Return NULL if error and allocate an error message in *perror_msg, otherwise the compiled bytecode and its length in plen. @@ -1737,18 +2462,20 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size, s->buf_end = s->buf_ptr + buf_len; s->buf_start = s->buf_ptr; s->re_flags = re_flags; - s->is_unicode = ((re_flags & LRE_FLAG_UNICODE) != 0); + s->is_unicode = ((re_flags & (LRE_FLAG_UNICODE | LRE_FLAG_UNICODE_SETS)) != 0); is_sticky = ((re_flags & LRE_FLAG_STICKY) != 0); s->ignore_case = ((re_flags & LRE_FLAG_IGNORECASE) != 0); + s->multi_line = ((re_flags & LRE_FLAG_MULTILINE) != 0); s->dotall = ((re_flags & LRE_FLAG_DOTALL) != 0); + s->unicode_sets = ((re_flags & LRE_FLAG_UNICODE_SETS) != 0); s->capture_count = 1; s->total_capture_count = -1; s->has_named_captures = -1; - dbuf_init2(&s->byte_code, opaque, lre_realloc); + dbuf_init2(&s->byte_code, opaque, lre_bytecode_realloc); dbuf_init2(&s->group_names, opaque, lre_realloc); - dbuf_putc(&s->byte_code, re_flags); /* first element is the flags */ + dbuf_put_u16(&s->byte_code, re_flags); /* first element is the flags */ dbuf_putc(&s->byte_code, 0); /* second element is the number of captures */ dbuf_putc(&s->byte_code, 0); /* stack size */ dbuf_put_u32(&s->byte_code, 0); /* bytecode length */ @@ -1801,7 +2528,8 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size, /* add the named groups if needed */ if (s->group_names.size > (s->capture_count - 1)) { dbuf_put(&s->byte_code, s->group_names.buf, s->group_names.size); - s->byte_code.buf[RE_HEADER_FLAGS] |= LRE_FLAG_NAMED_GROUPS; + put_u16(s->byte_code.buf + RE_HEADER_FLAGS, + lre_get_flags(s->byte_code.buf) | LRE_FLAG_NAMED_GROUPS); } dbuf_free(&s->group_names); @@ -1935,8 +2663,6 @@ typedef struct { int cbuf_type; int capture_count; int stack_size_max; - BOOL multi_line; - BOOL ignore_case; BOOL is_unicode; int interrupt_counter; void *opaque; /* used for stack overflow check */ @@ -2085,17 +2811,19 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, } break; case REOP_char32: + case REOP_char32_i: val = get_u32(pc); pc += 4; goto test_char; case REOP_char: + case REOP_char_i: val = get_u16(pc); pc += 2; test_char: if (cptr >= cbuf_end) goto no_match; GET_CHAR(c, cptr, cbuf_end, cbuf_type); - if (s->ignore_case) { + if (opcode == REOP_char_i || opcode == REOP_char32_i) { c = lre_canonicalize(c, s->is_unicode); } if (val != c) @@ -2139,18 +2867,20 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, return LRE_RET_TIMEOUT; break; case REOP_line_start: + case REOP_line_start_m: if (cptr == s->cbuf) break; - if (!s->multi_line) + if (opcode == REOP_line_start) goto no_match; PEEK_PREV_CHAR(c, cptr, s->cbuf, cbuf_type); if (!is_line_terminator(c)) goto no_match; break; case REOP_line_end: + case REOP_line_end_m: if (cptr == cbuf_end) break; - if (!s->multi_line) + if (opcode == REOP_line_end) goto no_match; PEEK_CHAR(c, cptr, cbuf_end, cbuf_type); if (!is_line_terminator(c)) @@ -2213,14 +2943,20 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, goto no_match; break; case REOP_word_boundary: + case REOP_word_boundary_i: case REOP_not_word_boundary: + case REOP_not_word_boundary_i: { BOOL v1, v2; + int ignore_case = (opcode == REOP_word_boundary_i || opcode == REOP_not_word_boundary_i); + BOOL is_boundary = (opcode == REOP_word_boundary || opcode == REOP_word_boundary_i); /* char before */ if (cptr == s->cbuf) { v1 = FALSE; } else { PEEK_PREV_CHAR(c, cptr, s->cbuf, cbuf_type); + if (ignore_case) + c = lre_canonicalize(c, s->is_unicode); v1 = is_word_char(c); } /* current char */ @@ -2228,14 +2964,18 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, v2 = FALSE; } else { PEEK_CHAR(c, cptr, cbuf_end, cbuf_type); + if (ignore_case) + c = lre_canonicalize(c, s->is_unicode); v2 = is_word_char(c); } - if (v1 ^ v2 ^ (REOP_not_word_boundary - opcode)) + if (v1 ^ v2 ^ is_boundary) goto no_match; } break; case REOP_back_reference: + case REOP_back_reference_i: case REOP_backward_back_reference: + case REOP_backward_back_reference_i: { const uint8_t *cptr1, *cptr1_end, *cptr1_start; uint32_t c1, c2; @@ -2247,14 +2987,15 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, cptr1_end = capture[2 * val + 1]; if (!cptr1_start || !cptr1_end) break; - if (opcode == REOP_back_reference) { + if (opcode == REOP_back_reference || + opcode == REOP_back_reference_i) { cptr1 = cptr1_start; while (cptr1 < cptr1_end) { if (cptr >= cbuf_end) goto no_match; GET_CHAR(c1, cptr1, cptr1_end, cbuf_type); GET_CHAR(c2, cptr, cbuf_end, cbuf_type); - if (s->ignore_case) { + if (opcode == REOP_back_reference_i) { c1 = lre_canonicalize(c1, s->is_unicode); c2 = lre_canonicalize(c2, s->is_unicode); } @@ -2268,7 +3009,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, goto no_match; GET_PREV_CHAR(c1, cptr1, cptr1_start, cbuf_type); GET_PREV_CHAR(c2, cptr, s->cbuf, cbuf_type); - if (s->ignore_case) { + if (opcode == REOP_backward_back_reference_i) { c1 = lre_canonicalize(c1, s->is_unicode); c2 = lre_canonicalize(c2, s->is_unicode); } @@ -2279,6 +3020,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, } break; case REOP_range: + case REOP_range_i: { int n; uint32_t low, high, idx_min, idx_max, idx; @@ -2288,7 +3030,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, if (cptr >= cbuf_end) goto no_match; GET_CHAR(c, cptr, cbuf_end, cbuf_type); - if (s->ignore_case) { + if (opcode == REOP_range_i) { c = lre_canonicalize(c, s->is_unicode); } idx_min = 0; @@ -2319,6 +3061,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, } break; case REOP_range32: + case REOP_range32_i: { int n; uint32_t low, high, idx_min, idx_max, idx; @@ -2328,7 +3071,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, if (cptr >= cbuf_end) goto no_match; GET_CHAR(c, cptr, cbuf_end, cbuf_type); - if (s->ignore_case) { + if (opcode == REOP_range32_i) { c = lre_canonicalize(c, s->is_unicode); } idx_min = 0; @@ -2420,11 +3163,10 @@ int lre_exec(uint8_t **capture, REExecContext s_s, *s = &s_s; int re_flags, i, alloca_size, ret; StackInt *stack_buf; + const uint8_t *cptr; re_flags = lre_get_flags(bc_buf); - s->multi_line = (re_flags & LRE_FLAG_MULTILINE) != 0; - s->ignore_case = (re_flags & LRE_FLAG_IGNORECASE) != 0; - s->is_unicode = (re_flags & LRE_FLAG_UNICODE) != 0; + s->is_unicode = (re_flags & (LRE_FLAG_UNICODE | LRE_FLAG_UNICODE_SETS)) != 0; s->capture_count = bc_buf[RE_HEADER_CAPTURE_COUNT]; s->stack_size_max = bc_buf[RE_HEADER_STACK_SIZE]; s->cbuf = cbuf; @@ -2446,8 +3188,17 @@ int lre_exec(uint8_t **capture, capture[i] = NULL; alloca_size = s->stack_size_max * sizeof(stack_buf[0]); stack_buf = alloca(alloca_size); + + cptr = cbuf + (cindex << cbuf_type); + if (0 < cindex && cindex < clen && s->cbuf_type == 2) { + const uint16_t *p = (const uint16_t *)cptr; + if (is_lo_surrogate(*p) && is_hi_surrogate(p[-1])) { + cptr = (const uint8_t *)(p - 1); + } + } + ret = lre_exec_backtrack(s, capture, stack_buf, 0, bc_buf + RE_HEADER_LEN, - cbuf + (cindex << cbuf_type), FALSE); + cptr, FALSE); lre_realloc(s->opaque, s->state_stack, 0); return ret; } @@ -2459,7 +3210,7 @@ int lre_get_capture_count(const uint8_t *bc_buf) int lre_get_flags(const uint8_t *bc_buf) { - return bc_buf[RE_HEADER_FLAGS]; + return get_u16(bc_buf + RE_HEADER_FLAGS); } /* Return NULL if no group names. Otherwise, return a pointer to diff --git a/libregexp.h b/libregexp.h index 7475bbe..da76e4c 100644 --- a/libregexp.h +++ b/libregexp.h @@ -35,6 +35,7 @@ #define LRE_FLAG_STICKY (1 << 5) #define LRE_FLAG_INDICES (1 << 6) /* Unused by libregexp, just recorded. */ #define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */ +#define LRE_FLAG_UNICODE_SETS (1 << 8) #define LRE_RET_MEMORY_ERROR (-1) #define LRE_RET_TIMEOUT (-2) diff --git a/libunicode-table.h b/libunicode-table.h index dc46f16..67df6b3 100644 --- a/libunicode-table.h +++ b/libunicode-table.h @@ -3130,6 +3130,7 @@ typedef enum { } UnicodeScriptEnum; static const char unicode_script_name_table[] = + "Unknown,Zzzz" "\0" "Adlam,Adlm" "\0" "Ahom,Ahom" "\0" "Anatolian_Hieroglyphs,Hluw" "\0" @@ -4054,6 +4055,89 @@ static const uint8_t unicode_prop_Changes_When_NFKC_Casefolded1_table[450] = { 0x4f, 0xff, }; +static const uint8_t unicode_prop_Basic_Emoji1_table[143] = { + 0x60, 0x23, 0x19, 0x81, 0x40, 0xcc, 0x1a, 0x01, + 0x80, 0x42, 0x08, 0x81, 0x94, 0x81, 0xb1, 0x8b, + 0xaa, 0x80, 0x92, 0x80, 0x8c, 0x07, 0x81, 0x90, + 0x0c, 0x0f, 0x04, 0x80, 0x94, 0x06, 0x08, 0x03, + 0x01, 0x06, 0x03, 0x81, 0x9b, 0x80, 0xa2, 0x00, + 0x03, 0x10, 0x80, 0xbc, 0x82, 0x97, 0x80, 0x8d, + 0x80, 0x43, 0x5a, 0x81, 0xb2, 0x03, 0x80, 0x61, + 0xc4, 0xad, 0x80, 0x40, 0xc9, 0x80, 0x40, 0xbd, + 0x01, 0x89, 0xe5, 0x80, 0x97, 0x80, 0x93, 0x01, + 0x20, 0x82, 0x94, 0x81, 0x40, 0xad, 0xa0, 0x8b, + 0x88, 0x80, 0xc5, 0x80, 0x95, 0x8b, 0xaa, 0x1c, + 0x8b, 0x90, 0x10, 0x82, 0xc6, 0x00, 0x80, 0x40, + 0xba, 0x81, 0xbe, 0x8c, 0x18, 0x97, 0x91, 0x80, + 0x99, 0x81, 0x8c, 0x80, 0xd5, 0xd4, 0xaf, 0xc5, + 0x28, 0x12, 0x0a, 0x1b, 0x8a, 0x0e, 0x88, 0x40, + 0xe2, 0x8b, 0x18, 0x41, 0x1a, 0xae, 0x80, 0x89, + 0x80, 0x40, 0xb8, 0xef, 0x8c, 0x82, 0x89, 0x84, + 0xb7, 0x86, 0x8e, 0x81, 0x8a, 0x85, 0x88, +}; + +static const uint8_t unicode_prop_Basic_Emoji2_table[183] = { + 0x40, 0xa8, 0x03, 0x80, 0x5f, 0x8c, 0x80, 0x8b, + 0x80, 0x40, 0xd7, 0x80, 0x95, 0x80, 0xd9, 0x85, + 0x8e, 0x81, 0x41, 0x7c, 0x80, 0x40, 0xa5, 0x80, + 0x9c, 0x10, 0x0c, 0x82, 0x40, 0xc6, 0x80, 0x40, + 0xe6, 0x81, 0x89, 0x80, 0x88, 0x80, 0xb9, 0x0a, + 0x84, 0x88, 0x01, 0x05, 0x03, 0x01, 0x00, 0x09, + 0x02, 0x02, 0x0f, 0x14, 0x00, 0x80, 0x9b, 0x09, + 0x00, 0x08, 0x80, 0x91, 0x01, 0x80, 0x92, 0x00, + 0x18, 0x00, 0x0a, 0x05, 0x07, 0x81, 0x95, 0x05, + 0x00, 0x00, 0x80, 0x94, 0x05, 0x09, 0x01, 0x17, + 0x04, 0x09, 0x08, 0x01, 0x00, 0x00, 0x05, 0x02, + 0x80, 0x90, 0x81, 0x8e, 0x01, 0x80, 0x9a, 0x81, + 0xbb, 0x80, 0x41, 0x91, 0x81, 0x41, 0xce, 0x82, + 0x45, 0x27, 0x80, 0x8b, 0x80, 0x42, 0x58, 0x00, + 0x80, 0x61, 0xbe, 0xd5, 0x81, 0x8b, 0x81, 0x40, + 0x81, 0x80, 0xb3, 0x80, 0x40, 0xe8, 0x01, 0x88, + 0x88, 0x80, 0xc5, 0x80, 0x97, 0x08, 0x11, 0x81, + 0xaa, 0x1c, 0x8b, 0x92, 0x00, 0x00, 0x80, 0xc6, + 0x00, 0x80, 0x40, 0xba, 0x80, 0xca, 0x81, 0xa3, + 0x09, 0x86, 0x8c, 0x01, 0x19, 0x80, 0x93, 0x01, + 0x07, 0x81, 0x88, 0x04, 0x82, 0x8b, 0x17, 0x11, + 0x00, 0x03, 0x05, 0x02, 0x05, 0x80, 0x40, 0xcf, + 0x00, 0x82, 0x8f, 0x2a, 0x05, 0x01, 0x80, +}; + +static const uint8_t unicode_prop_RGI_Emoji_Modifier_Sequence_table[73] = { + 0x60, 0x26, 0x1c, 0x80, 0x40, 0xda, 0x80, 0x8f, + 0x83, 0x61, 0xcc, 0x76, 0x80, 0xbb, 0x11, 0x01, + 0x82, 0xf4, 0x09, 0x8a, 0x94, 0x18, 0x18, 0x88, + 0x10, 0x1a, 0x02, 0x30, 0x00, 0x97, 0x80, 0x40, + 0xc8, 0x0b, 0x80, 0x94, 0x03, 0x81, 0x40, 0xad, + 0x12, 0x84, 0xd2, 0x80, 0x8f, 0x82, 0x88, 0x80, + 0x8a, 0x80, 0x42, 0x3e, 0x01, 0x07, 0x3d, 0x80, + 0x88, 0x89, 0x11, 0xb7, 0x80, 0xbc, 0x08, 0x08, + 0x80, 0x90, 0x10, 0x8c, 0x40, 0xe4, 0x82, 0xa9, + 0x88, +}; + +static const uint8_t unicode_prop_RGI_Emoji_Flag_Sequence_table[128] = { + 0x0c, 0x00, 0x09, 0x00, 0x04, 0x01, 0x02, 0x06, + 0x03, 0x03, 0x01, 0x02, 0x01, 0x03, 0x07, 0x0d, + 0x18, 0x00, 0x09, 0x00, 0x00, 0x89, 0x08, 0x00, + 0x00, 0x81, 0x88, 0x83, 0x8c, 0x10, 0x00, 0x01, + 0x07, 0x08, 0x29, 0x10, 0x28, 0x00, 0x80, 0x8a, + 0x00, 0x0a, 0x00, 0x0e, 0x15, 0x18, 0x83, 0x89, + 0x06, 0x00, 0x81, 0x8d, 0x00, 0x12, 0x08, 0x00, + 0x03, 0x00, 0x24, 0x00, 0x05, 0x21, 0x00, 0x00, + 0x29, 0x90, 0x00, 0x02, 0x00, 0x08, 0x09, 0x00, + 0x08, 0x18, 0x8b, 0x80, 0x8c, 0x02, 0x19, 0x1a, + 0x11, 0x00, 0x00, 0x80, 0x9c, 0x80, 0x88, 0x02, + 0x00, 0x00, 0x02, 0x20, 0x88, 0x0a, 0x00, 0x03, + 0x01, 0x02, 0x05, 0x08, 0x00, 0x01, 0x09, 0x20, + 0x21, 0x18, 0x22, 0x00, 0x00, 0x00, 0x00, 0x18, + 0x28, 0x89, 0x80, 0x8b, 0x80, 0x90, 0x80, 0x92, + 0x80, 0x8d, 0x05, 0x80, 0x8a, 0x80, 0x88, 0x80, +}; + +static const uint8_t unicode_prop_Emoji_Keycap_Sequence_table[4] = { + 0xa2, 0x05, 0x04, 0x89, +}; + static const uint8_t unicode_prop_ASCII_Hex_Digit_table[5] = { 0xaf, 0x89, 0x35, 0x99, 0x85, }; @@ -4493,6 +4577,11 @@ typedef enum { UNICODE_PROP_Changes_When_Titlecased1, UNICODE_PROP_Changes_When_Casefolded1, UNICODE_PROP_Changes_When_NFKC_Casefolded1, + UNICODE_PROP_Basic_Emoji1, + UNICODE_PROP_Basic_Emoji2, + UNICODE_PROP_RGI_Emoji_Modifier_Sequence, + UNICODE_PROP_RGI_Emoji_Flag_Sequence, + UNICODE_PROP_Emoji_Keycap_Sequence, UNICODE_PROP_ASCII_Hex_Digit, UNICODE_PROP_Bidi_Control, UNICODE_PROP_Dash, @@ -4633,6 +4722,11 @@ static const uint8_t * const unicode_prop_table[] = { unicode_prop_Changes_When_Titlecased1_table, unicode_prop_Changes_When_Casefolded1_table, unicode_prop_Changes_When_NFKC_Casefolded1_table, + unicode_prop_Basic_Emoji1_table, + unicode_prop_Basic_Emoji2_table, + unicode_prop_RGI_Emoji_Modifier_Sequence_table, + unicode_prop_RGI_Emoji_Flag_Sequence_table, + unicode_prop_Emoji_Keycap_Sequence_table, unicode_prop_ASCII_Hex_Digit_table, unicode_prop_Bidi_Control_table, unicode_prop_Dash_table, @@ -4688,6 +4782,11 @@ static const uint16_t unicode_prop_len_table[] = { countof(unicode_prop_Changes_When_Titlecased1_table), countof(unicode_prop_Changes_When_Casefolded1_table), countof(unicode_prop_Changes_When_NFKC_Casefolded1_table), + countof(unicode_prop_Basic_Emoji1_table), + countof(unicode_prop_Basic_Emoji2_table), + countof(unicode_prop_RGI_Emoji_Modifier_Sequence_table), + countof(unicode_prop_RGI_Emoji_Flag_Sequence_table), + countof(unicode_prop_Emoji_Keycap_Sequence_table), countof(unicode_prop_ASCII_Hex_Digit_table), countof(unicode_prop_Bidi_Control_table), countof(unicode_prop_Dash_table), @@ -4726,5 +4825,325 @@ static const uint16_t unicode_prop_len_table[] = { countof(unicode_prop_Case_Ignorable_table), }; +typedef enum { + UNICODE_SEQUENCE_PROP_Basic_Emoji, + UNICODE_SEQUENCE_PROP_Emoji_Keycap_Sequence, + UNICODE_SEQUENCE_PROP_RGI_Emoji_Modifier_Sequence, + UNICODE_SEQUENCE_PROP_RGI_Emoji_Flag_Sequence, + UNICODE_SEQUENCE_PROP_RGI_Emoji_Tag_Sequence, + UNICODE_SEQUENCE_PROP_RGI_Emoji_ZWJ_Sequence, + UNICODE_SEQUENCE_PROP_RGI_Emoji, + UNICODE_SEQUENCE_PROP_COUNT, +} UnicodeSequencePropertyEnum; + +static const char unicode_sequence_prop_name_table[] = + "Basic_Emoji" "\0" + "Emoji_Keycap_Sequence" "\0" + "RGI_Emoji_Modifier_Sequence" "\0" + "RGI_Emoji_Flag_Sequence" "\0" + "RGI_Emoji_Tag_Sequence" "\0" + "RGI_Emoji_ZWJ_Sequence" "\0" + "RGI_Emoji" "\0" +; + +static const uint8_t unicode_rgi_emoji_tag_sequence[18] = { + 0x67, 0x62, 0x65, 0x6e, 0x67, 0x00, 0x67, 0x62, + 0x73, 0x63, 0x74, 0x00, 0x67, 0x62, 0x77, 0x6c, + 0x73, 0x00, +}; + +static const uint8_t unicode_rgi_emoji_zwj_sequence[2320] = { + 0x02, 0xb8, 0x19, 0x40, 0x86, 0x02, 0xd1, 0x39, + 0xb0, 0x19, 0x02, 0x26, 0x39, 0x42, 0x86, 0x02, + 0xb4, 0x36, 0x42, 0x86, 0x03, 0x68, 0x54, 0x64, + 0x87, 0x68, 0x54, 0x02, 0xdc, 0x39, 0x42, 0x86, + 0x02, 0xd1, 0x39, 0x73, 0x13, 0x02, 0x39, 0x39, + 0x40, 0x86, 0x02, 0x69, 0x34, 0xbd, 0x19, 0x03, + 0xb6, 0x36, 0x40, 0x86, 0xa1, 0x87, 0x03, 0x68, + 0x74, 0x1d, 0x19, 0x68, 0x74, 0x03, 0x68, 0x34, + 0xbd, 0x19, 0xa1, 0x87, 0x02, 0xf1, 0x7a, 0xf2, + 0x7a, 0x02, 0xca, 0x33, 0x42, 0x86, 0x02, 0x69, + 0x34, 0xb0, 0x19, 0x04, 0x68, 0x14, 0x68, 0x14, + 0x67, 0x14, 0x66, 0x14, 0x02, 0xf9, 0x26, 0x42, + 0x86, 0x03, 0x69, 0x74, 0x1d, 0x19, 0x69, 0x74, + 0x03, 0xd1, 0x19, 0xbc, 0x19, 0xa1, 0x87, 0x02, + 0x3c, 0x19, 0x40, 0x86, 0x02, 0x68, 0x34, 0xeb, + 0x13, 0x02, 0xc3, 0x33, 0xa1, 0x87, 0x02, 0x70, + 0x34, 0x40, 0x86, 0x02, 0xd4, 0x39, 0x42, 0x86, + 0x02, 0xcf, 0x39, 0x42, 0x86, 0x02, 0x47, 0x36, + 0x40, 0x86, 0x02, 0x39, 0x39, 0x42, 0x86, 0x04, + 0xd1, 0x79, 0x64, 0x87, 0x8b, 0x14, 0xd1, 0x79, + 0x02, 0xd1, 0x39, 0x95, 0x86, 0x02, 0x68, 0x34, + 0x93, 0x13, 0x02, 0x69, 0x34, 0xed, 0x13, 0x02, + 0xda, 0x39, 0x40, 0x86, 0x03, 0x69, 0x34, 0xaf, + 0x19, 0xa1, 0x87, 0x02, 0xd1, 0x39, 0x93, 0x13, + 0x03, 0xce, 0x39, 0x42, 0x86, 0xa1, 0x87, 0x03, + 0xd1, 0x79, 0x64, 0x87, 0xd1, 0x79, 0x03, 0xc3, + 0x33, 0x42, 0x86, 0xa1, 0x87, 0x03, 0x69, 0x74, + 0x1d, 0x19, 0x68, 0x74, 0x02, 0x69, 0x34, 0x92, + 0x16, 0x02, 0xd1, 0x39, 0x96, 0x86, 0x04, 0x69, + 0x14, 0x64, 0x87, 0x8b, 0x14, 0x68, 0x14, 0x02, + 0x68, 0x34, 0x7c, 0x13, 0x02, 0x47, 0x36, 0x42, + 0x86, 0x02, 0x86, 0x34, 0x42, 0x86, 0x02, 0xd1, + 0x39, 0x7c, 0x13, 0x02, 0x69, 0x14, 0xa4, 0x13, + 0x02, 0xda, 0x39, 0x42, 0x86, 0x02, 0x37, 0x39, + 0x40, 0x86, 0x02, 0xd1, 0x39, 0x08, 0x87, 0x04, + 0x68, 0x54, 0x64, 0x87, 0x8b, 0x14, 0x68, 0x54, + 0x02, 0x4d, 0x36, 0x40, 0x86, 0x02, 0x68, 0x34, + 0x2c, 0x15, 0x02, 0x69, 0x34, 0xaf, 0x19, 0x02, + 0x6e, 0x34, 0x40, 0x86, 0x02, 0xcd, 0x39, 0x42, + 0x86, 0x02, 0xd1, 0x39, 0x2c, 0x15, 0x02, 0x6f, + 0x14, 0x40, 0x86, 0x03, 0xd1, 0x39, 0xbc, 0x19, + 0xa1, 0x87, 0x02, 0x68, 0x34, 0xa8, 0x13, 0x02, + 0x69, 0x34, 0x73, 0x13, 0x04, 0x69, 0x54, 0x64, + 0x87, 0x8b, 0x14, 0x68, 0x54, 0x02, 0x71, 0x34, + 0x42, 0x86, 0x02, 0xd1, 0x39, 0xa8, 0x13, 0x02, + 0x45, 0x36, 0x40, 0x86, 0x03, 0x69, 0x54, 0x64, + 0x87, 0x68, 0x54, 0x03, 0x69, 0x54, 0x64, 0x87, + 0x69, 0x54, 0x03, 0xce, 0x39, 0x40, 0x86, 0xa1, + 0x87, 0x02, 0xd8, 0x39, 0x40, 0x86, 0x03, 0xc3, + 0x33, 0x40, 0x86, 0xa1, 0x87, 0x02, 0x4d, 0x36, + 0x42, 0x86, 0x02, 0xd1, 0x19, 0x92, 0x16, 0x02, + 0xd1, 0x39, 0xeb, 0x13, 0x02, 0x68, 0x34, 0xbc, + 0x14, 0x02, 0xd1, 0x39, 0xbc, 0x14, 0x02, 0x3d, + 0x39, 0x40, 0x86, 0x02, 0xb8, 0x39, 0x42, 0x86, + 0x02, 0xa3, 0x36, 0x40, 0x86, 0x02, 0x75, 0x35, + 0x40, 0x86, 0x02, 0xd8, 0x39, 0x42, 0x86, 0x02, + 0x69, 0x34, 0x93, 0x13, 0x02, 0x35, 0x39, 0x40, + 0x86, 0x02, 0x4b, 0x36, 0x40, 0x86, 0x02, 0x3d, + 0x39, 0x42, 0x86, 0x02, 0x38, 0x39, 0x42, 0x86, + 0x02, 0xa3, 0x36, 0x42, 0x86, 0x03, 0x69, 0x14, + 0x67, 0x14, 0x67, 0x14, 0x02, 0xb6, 0x36, 0x40, + 0x86, 0x02, 0x69, 0x34, 0x7c, 0x13, 0x02, 0x75, + 0x35, 0x42, 0x86, 0x02, 0xcc, 0x93, 0x40, 0x86, + 0x02, 0xcc, 0x33, 0x40, 0x86, 0x03, 0xd1, 0x39, + 0xbd, 0x19, 0xa1, 0x87, 0x02, 0x82, 0x34, 0x40, + 0x86, 0x02, 0x87, 0x34, 0x40, 0x86, 0x02, 0x69, + 0x14, 0x3e, 0x13, 0x02, 0xd6, 0x39, 0x40, 0x86, + 0x02, 0x68, 0x14, 0xbd, 0x19, 0x02, 0x46, 0x36, + 0x42, 0x86, 0x02, 0x4b, 0x36, 0x42, 0x86, 0x02, + 0x69, 0x34, 0x2c, 0x15, 0x03, 0xb6, 0x36, 0x42, + 0x86, 0xa1, 0x87, 0x02, 0xc4, 0x33, 0x40, 0x86, + 0x02, 0x26, 0x19, 0x40, 0x86, 0x02, 0x69, 0x14, + 0xb0, 0x19, 0x02, 0xde, 0x19, 0x42, 0x86, 0x02, + 0x69, 0x34, 0xa8, 0x13, 0x02, 0xcc, 0x33, 0x42, + 0x86, 0x02, 0x82, 0x34, 0x42, 0x86, 0x02, 0xd1, + 0x19, 0x93, 0x13, 0x02, 0x81, 0x14, 0x42, 0x86, + 0x02, 0x69, 0x34, 0x95, 0x86, 0x02, 0x68, 0x34, + 0xbb, 0x14, 0x02, 0xd1, 0x39, 0xbb, 0x14, 0x02, + 0x69, 0x34, 0xeb, 0x13, 0x02, 0xd1, 0x39, 0x84, + 0x13, 0x02, 0x69, 0x34, 0xbc, 0x14, 0x04, 0x69, + 0x54, 0x64, 0x87, 0x8b, 0x14, 0x69, 0x54, 0x02, + 0x26, 0x39, 0x40, 0x86, 0x02, 0xb4, 0x36, 0x40, + 0x86, 0x02, 0x47, 0x16, 0x42, 0x86, 0x02, 0xdc, + 0x39, 0x40, 0x86, 0x02, 0xca, 0x33, 0x40, 0x86, + 0x02, 0xf9, 0x26, 0x40, 0x86, 0x02, 0x69, 0x34, + 0x08, 0x87, 0x03, 0x69, 0x14, 0x69, 0x14, 0x66, + 0x14, 0x03, 0xd1, 0x59, 0x1d, 0x19, 0xd1, 0x59, + 0x02, 0xd4, 0x39, 0x40, 0x86, 0x02, 0xcf, 0x39, + 0x40, 0x86, 0x02, 0x68, 0x34, 0xa4, 0x13, 0x02, + 0xd1, 0x39, 0xa4, 0x13, 0x02, 0xd1, 0x19, 0xa8, + 0x13, 0x02, 0xd7, 0x39, 0x42, 0x86, 0x03, 0x69, + 0x34, 0xbc, 0x19, 0xa1, 0x87, 0x02, 0x68, 0x14, + 0xb0, 0x19, 0x02, 0x68, 0x14, 0x73, 0x13, 0x04, + 0x69, 0x14, 0x69, 0x14, 0x66, 0x14, 0x66, 0x14, + 0x03, 0x68, 0x34, 0xaf, 0x19, 0xa1, 0x87, 0x02, + 0x68, 0x34, 0x80, 0x16, 0x02, 0x73, 0x34, 0x42, + 0x86, 0x02, 0xd1, 0x39, 0x80, 0x16, 0x02, 0x68, + 0x34, 0xb0, 0x19, 0x02, 0x86, 0x34, 0x40, 0x86, + 0x02, 0x38, 0x19, 0x42, 0x86, 0x02, 0x69, 0x34, + 0xbb, 0x14, 0x02, 0xb5, 0x36, 0x42, 0x86, 0x02, + 0xcd, 0x39, 0x40, 0x86, 0x02, 0x68, 0x34, 0x95, + 0x86, 0x02, 0x68, 0x34, 0x27, 0x15, 0x03, 0x68, + 0x14, 0x68, 0x14, 0x66, 0x14, 0x02, 0x71, 0x34, + 0x40, 0x86, 0x02, 0xd1, 0x39, 0x27, 0x15, 0x02, + 0x2e, 0x16, 0xa8, 0x14, 0x02, 0xc3, 0x33, 0x42, + 0x86, 0x02, 0x69, 0x14, 0x66, 0x14, 0x02, 0x68, + 0x34, 0x96, 0x86, 0x02, 0x69, 0x34, 0xa4, 0x13, + 0x03, 0x69, 0x14, 0x64, 0x87, 0x68, 0x14, 0x02, + 0xb8, 0x39, 0x40, 0x86, 0x02, 0x68, 0x34, 0x3e, + 0x13, 0x03, 0xd1, 0x19, 0xaf, 0x19, 0xa1, 0x87, + 0x02, 0xd1, 0x39, 0x3e, 0x13, 0x02, 0x68, 0x34, + 0xbd, 0x19, 0x02, 0xd1, 0x19, 0xbb, 0x14, 0x02, + 0xd1, 0x19, 0x95, 0x86, 0x02, 0xdb, 0x39, 0x42, + 0x86, 0x02, 0x38, 0x39, 0x40, 0x86, 0x02, 0x69, + 0x34, 0x80, 0x16, 0x02, 0x69, 0x14, 0xeb, 0x13, + 0x04, 0x68, 0x14, 0x69, 0x14, 0x67, 0x14, 0x67, + 0x14, 0x02, 0x77, 0x34, 0x42, 0x86, 0x02, 0x46, + 0x36, 0x40, 0x86, 0x02, 0x68, 0x34, 0x92, 0x16, + 0x02, 0x4e, 0x36, 0x42, 0x86, 0x03, 0x69, 0x14, + 0xbd, 0x19, 0xa1, 0x87, 0x02, 0xde, 0x19, 0x40, + 0x86, 0x02, 0x69, 0x34, 0x27, 0x15, 0x03, 0xc3, + 0x13, 0x40, 0x86, 0xa1, 0x87, 0x02, 0x81, 0x14, + 0x40, 0x86, 0x03, 0xd1, 0x39, 0xaf, 0x19, 0xa1, + 0x87, 0x02, 0x68, 0x34, 0xbc, 0x19, 0x02, 0xd1, + 0x19, 0x80, 0x16, 0x02, 0xd9, 0x39, 0x42, 0x86, + 0x02, 0xd1, 0x39, 0xbc, 0x19, 0x02, 0xdc, 0x19, + 0x42, 0x86, 0x02, 0x68, 0x34, 0x73, 0x13, 0x02, + 0x69, 0x34, 0x3e, 0x13, 0x02, 0x47, 0x16, 0x40, + 0x86, 0x02, 0xd1, 0x39, 0xbd, 0x19, 0x02, 0x3e, + 0x39, 0x42, 0x86, 0x02, 0x69, 0x14, 0x95, 0x86, + 0x02, 0x68, 0x14, 0x96, 0x86, 0x03, 0x69, 0x34, + 0xbd, 0x19, 0xa1, 0x87, 0x02, 0xd7, 0x39, 0x40, + 0x86, 0x02, 0x45, 0x16, 0x42, 0x86, 0x02, 0x68, + 0x34, 0xed, 0x13, 0x03, 0x68, 0x34, 0xbc, 0x19, + 0xa1, 0x87, 0x02, 0xd1, 0x39, 0xed, 0x13, 0x02, + 0xd1, 0x39, 0x92, 0x16, 0x02, 0x73, 0x34, 0x40, + 0x86, 0x02, 0x38, 0x19, 0x40, 0x86, 0x02, 0xb5, + 0x36, 0x40, 0x86, 0x02, 0x68, 0x34, 0xaf, 0x19, + 0x02, 0xd1, 0x39, 0xaf, 0x19, 0x02, 0x69, 0x34, + 0xbc, 0x19, 0x02, 0xb6, 0x16, 0x42, 0x86, 0x02, + 0x26, 0x14, 0x25, 0x15, 0x02, 0xc3, 0x33, 0x40, + 0x86, 0x02, 0xdd, 0x39, 0x42, 0x86, 0x02, 0xcb, + 0x93, 0x42, 0x86, 0x02, 0xcb, 0x33, 0x42, 0x86, + 0x02, 0x81, 0x34, 0x42, 0x86, 0x02, 0xce, 0x39, + 0xa1, 0x87, 0x02, 0xdb, 0x39, 0x40, 0x86, 0x02, + 0x68, 0x34, 0x08, 0x87, 0x02, 0xd1, 0x19, 0xb0, + 0x19, 0x02, 0x77, 0x34, 0x40, 0x86, 0x02, 0x4e, + 0x36, 0x40, 0x86, 0x02, 0xce, 0x39, 0x42, 0x86, + 0x02, 0x4e, 0x16, 0x42, 0x86, 0x02, 0xd9, 0x39, + 0x40, 0x86, 0x02, 0xdc, 0x19, 0x40, 0x86, 0x02, + 0x3e, 0x39, 0x40, 0x86, 0x02, 0xb9, 0x39, 0x42, + 0x86, 0x02, 0xda, 0x19, 0x42, 0x86, 0x02, 0x42, + 0x16, 0x94, 0x81, 0x02, 0x45, 0x16, 0x40, 0x86, + 0x02, 0x69, 0x14, 0xbd, 0x19, 0x02, 0x70, 0x34, + 0x42, 0x86, 0x02, 0xce, 0x19, 0xa1, 0x87, 0x02, + 0xc3, 0x13, 0x42, 0x86, 0x02, 0x68, 0x14, 0x08, + 0x87, 0x02, 0xd1, 0x19, 0x7c, 0x13, 0x02, 0x68, + 0x14, 0x92, 0x16, 0x02, 0xb6, 0x16, 0x40, 0x86, + 0x02, 0x37, 0x39, 0x42, 0x86, 0x03, 0xce, 0x19, + 0x42, 0x86, 0xa1, 0x87, 0x03, 0x68, 0x14, 0x67, + 0x14, 0x67, 0x14, 0x02, 0xdd, 0x39, 0x40, 0x86, + 0x02, 0xcf, 0x19, 0x42, 0x86, 0x02, 0xd1, 0x19, + 0x2c, 0x15, 0x02, 0x4b, 0x13, 0xe9, 0x17, 0x02, + 0x68, 0x14, 0x67, 0x14, 0x02, 0xcb, 0x93, 0x40, + 0x86, 0x02, 0x6e, 0x34, 0x42, 0x86, 0x02, 0xcb, + 0x33, 0x40, 0x86, 0x02, 0x81, 0x34, 0x40, 0x86, + 0x02, 0xb6, 0x36, 0xa1, 0x87, 0x02, 0x45, 0x36, + 0x42, 0x86, 0x02, 0xb4, 0x16, 0x42, 0x86, 0x02, + 0x69, 0x14, 0x73, 0x13, 0x04, 0x69, 0x14, 0x69, + 0x14, 0x67, 0x14, 0x66, 0x14, 0x02, 0x35, 0x39, + 0x42, 0x86, 0x02, 0x68, 0x14, 0x93, 0x13, 0x02, + 0xb6, 0x36, 0x42, 0x86, 0x03, 0x68, 0x14, 0x69, + 0x14, 0x66, 0x14, 0x02, 0xce, 0x39, 0x40, 0x86, + 0x02, 0x4e, 0x16, 0x40, 0x86, 0x02, 0x87, 0x34, + 0x42, 0x86, 0x02, 0x86, 0x14, 0x42, 0x86, 0x02, + 0xd6, 0x39, 0x42, 0x86, 0x02, 0xc4, 0x33, 0x42, + 0x86, 0x02, 0x69, 0x34, 0x96, 0x86, 0x02, 0xb9, + 0x39, 0x40, 0x86, 0x02, 0x68, 0x14, 0xa8, 0x13, + 0x02, 0xd1, 0x19, 0x84, 0x13, 0x02, 0xda, 0x19, + 0x40, 0x86, 0x02, 0xd8, 0x19, 0x42, 0x86, 0x02, + 0xc3, 0x13, 0x40, 0x86, 0x02, 0xb9, 0x19, 0x42, + 0x86, 0x02, 0x3d, 0x19, 0x42, 0x86, 0x02, 0xcf, + 0x19, 0x40, 0x86, 0x04, 0x68, 0x14, 0x68, 0x14, + 0x67, 0x14, 0x67, 0x14, 0x03, 0xd1, 0x19, 0xd1, + 0x19, 0xd2, 0x19, 0x02, 0x68, 0x14, 0xbb, 0x14, + 0x02, 0x3b, 0x14, 0x44, 0x87, 0x02, 0xd1, 0x19, + 0x27, 0x15, 0x02, 0xb4, 0x16, 0x40, 0x86, 0x02, + 0xcd, 0x19, 0x42, 0x86, 0x02, 0xd3, 0x86, 0xa5, + 0x14, 0x02, 0x70, 0x14, 0x42, 0x86, 0x03, 0xb6, + 0x16, 0x42, 0x86, 0xa1, 0x87, 0x04, 0x69, 0x14, + 0x64, 0x87, 0x8b, 0x14, 0x69, 0x14, 0x02, 0x36, + 0x16, 0x2b, 0x93, 0x02, 0x68, 0x14, 0x80, 0x16, + 0x02, 0x86, 0x14, 0x40, 0x86, 0x02, 0x08, 0x14, + 0x1b, 0x0b, 0x02, 0xd1, 0x19, 0xbc, 0x19, 0x02, + 0xca, 0x13, 0x42, 0x86, 0x02, 0x41, 0x94, 0xe8, + 0x95, 0x02, 0xd8, 0x19, 0x40, 0x86, 0x02, 0xb9, + 0x19, 0x40, 0x86, 0x02, 0xd1, 0x19, 0xed, 0x13, + 0x02, 0xf9, 0x86, 0x42, 0x86, 0x03, 0xd1, 0x19, + 0xbd, 0x19, 0xa1, 0x87, 0x02, 0x3d, 0x19, 0x40, + 0x86, 0x02, 0xd6, 0x19, 0x42, 0x86, 0x03, 0x69, + 0x14, 0x66, 0x14, 0x66, 0x14, 0x02, 0xd1, 0x19, + 0xaf, 0x19, 0x03, 0x69, 0x14, 0x69, 0x14, 0x67, + 0x14, 0x02, 0xcd, 0x19, 0x40, 0x86, 0x02, 0x70, + 0x14, 0x40, 0x86, 0x03, 0x68, 0x14, 0xbc, 0x19, + 0xa1, 0x87, 0x02, 0x6e, 0x14, 0x42, 0x86, 0x02, + 0x69, 0x14, 0x92, 0x16, 0x03, 0x68, 0x14, 0x68, + 0x14, 0x67, 0x14, 0x02, 0x69, 0x14, 0x67, 0x14, + 0x02, 0x75, 0x95, 0x42, 0x86, 0x03, 0x69, 0x14, + 0x64, 0x87, 0x69, 0x14, 0x02, 0xd1, 0x19, 0xbc, + 0x14, 0x02, 0xdf, 0x19, 0x42, 0x86, 0x02, 0xca, + 0x13, 0x40, 0x86, 0x02, 0x82, 0x14, 0x42, 0x86, + 0x02, 0x69, 0x14, 0x93, 0x13, 0x02, 0x68, 0x14, + 0x7c, 0x13, 0x02, 0xf9, 0x86, 0x40, 0x86, 0x02, + 0xd6, 0x19, 0x40, 0x86, 0x02, 0x68, 0x14, 0x2c, + 0x15, 0x02, 0x69, 0x14, 0xa8, 0x13, 0x02, 0xd4, + 0x19, 0x42, 0x86, 0x04, 0x68, 0x14, 0x69, 0x14, + 0x66, 0x14, 0x66, 0x14, 0x02, 0x77, 0x14, 0x42, + 0x86, 0x02, 0x39, 0x19, 0x42, 0x86, 0x02, 0xd1, + 0x19, 0xa4, 0x13, 0x02, 0x6e, 0x14, 0x40, 0x86, + 0x03, 0xd1, 0x19, 0xd2, 0x19, 0xd2, 0x19, 0x02, + 0x69, 0x14, 0xbb, 0x14, 0x02, 0xd1, 0x19, 0x96, + 0x86, 0x02, 0x75, 0x95, 0x40, 0x86, 0x04, 0x68, + 0x14, 0x64, 0x87, 0x8b, 0x14, 0x68, 0x14, 0x02, + 0xd1, 0x19, 0x3e, 0x13, 0x02, 0xdf, 0x19, 0x40, + 0x86, 0x02, 0x82, 0x14, 0x40, 0x86, 0x02, 0x44, + 0x13, 0xeb, 0x17, 0x02, 0xdd, 0x19, 0x42, 0x86, + 0x02, 0x69, 0x14, 0x80, 0x16, 0x03, 0x68, 0x14, + 0xaf, 0x19, 0xa1, 0x87, 0x02, 0xa3, 0x16, 0x42, + 0x86, 0x02, 0x69, 0x14, 0x96, 0x86, 0x02, 0x46, + 0x16, 0x42, 0x86, 0x02, 0xb6, 0x16, 0xa1, 0x87, + 0x02, 0x68, 0x14, 0x27, 0x15, 0x02, 0x26, 0x14, + 0x1b, 0x0b, 0x02, 0xd4, 0x19, 0x40, 0x86, 0x02, + 0x77, 0x14, 0x40, 0x86, 0x02, 0x39, 0x19, 0x40, + 0x86, 0x02, 0x37, 0x19, 0x42, 0x86, 0x03, 0x69, + 0x14, 0x67, 0x14, 0x66, 0x14, 0x03, 0xc3, 0x13, + 0x42, 0x86, 0xa1, 0x87, 0x02, 0x68, 0x14, 0xbc, + 0x19, 0x02, 0xd1, 0x19, 0xeb, 0x13, 0x04, 0x69, + 0x14, 0x69, 0x14, 0x67, 0x14, 0x67, 0x14, 0x02, + 0xd1, 0x19, 0x08, 0x87, 0x02, 0x68, 0x14, 0xed, + 0x13, 0x03, 0x69, 0x14, 0xbc, 0x19, 0xa1, 0x87, + 0x02, 0xdd, 0x19, 0x40, 0x86, 0x02, 0xc3, 0x13, + 0xa1, 0x87, 0x03, 0x68, 0x14, 0x66, 0x14, 0x66, + 0x14, 0x03, 0x68, 0x14, 0x69, 0x14, 0x67, 0x14, + 0x02, 0xa3, 0x16, 0x40, 0x86, 0x02, 0xdb, 0x19, + 0x42, 0x86, 0x02, 0x68, 0x14, 0xaf, 0x19, 0x02, + 0x46, 0x16, 0x40, 0x86, 0x02, 0x35, 0x16, 0xab, + 0x14, 0x02, 0x68, 0x14, 0x95, 0x86, 0x02, 0x42, + 0x16, 0x95, 0x81, 0x02, 0xc4, 0x13, 0x42, 0x86, + 0x02, 0x15, 0x14, 0xba, 0x19, 0x02, 0x69, 0x14, + 0x08, 0x87, 0x03, 0xd1, 0x19, 0x1d, 0x19, 0xd1, + 0x19, 0x02, 0x69, 0x14, 0x7c, 0x13, 0x02, 0x37, + 0x19, 0x40, 0x86, 0x02, 0x73, 0x14, 0x42, 0x86, + 0x02, 0x69, 0x14, 0x2c, 0x15, 0x02, 0xb5, 0x16, + 0x42, 0x86, 0x02, 0x35, 0x19, 0x42, 0x86, 0x04, + 0x68, 0x14, 0x69, 0x14, 0x67, 0x14, 0x66, 0x14, + 0x02, 0x64, 0x87, 0x25, 0x15, 0x02, 0x64, 0x87, + 0x79, 0x1a, 0x02, 0x68, 0x14, 0xbc, 0x14, 0x03, + 0xce, 0x19, 0x40, 0x86, 0xa1, 0x87, 0x02, 0x87, + 0x14, 0x42, 0x86, 0x02, 0x4d, 0x16, 0x42, 0x86, + 0x04, 0x68, 0x14, 0x68, 0x14, 0x66, 0x14, 0x66, + 0x14, 0x02, 0xdb, 0x19, 0x40, 0x86, 0x02, 0xd9, + 0x19, 0x42, 0x86, 0x02, 0xc4, 0x13, 0x40, 0x86, + 0x02, 0xd1, 0x19, 0xbd, 0x19, 0x02, 0x68, 0x14, + 0xa4, 0x13, 0x02, 0x3e, 0x19, 0x42, 0x86, 0x02, + 0xf3, 0x93, 0xa7, 0x86, 0x03, 0x69, 0x14, 0xaf, + 0x19, 0xa1, 0x87, 0x02, 0xf3, 0x93, 0x08, 0x13, + 0x02, 0xd1, 0x19, 0xd2, 0x19, 0x02, 0x73, 0x14, + 0x40, 0x86, 0x02, 0xb5, 0x16, 0x40, 0x86, 0x02, + 0x35, 0x19, 0x40, 0x86, 0x02, 0x69, 0x14, 0x27, + 0x15, 0x02, 0xce, 0x19, 0x42, 0x86, 0x02, 0x71, + 0x14, 0x42, 0x86, 0x02, 0xd1, 0x19, 0x73, 0x13, + 0x02, 0x68, 0x14, 0x3e, 0x13, 0x02, 0xf4, 0x13, + 0x20, 0x86, 0x02, 0x87, 0x14, 0x40, 0x86, 0x03, + 0xb6, 0x16, 0x40, 0x86, 0xa1, 0x87, 0x02, 0x4d, + 0x16, 0x40, 0x86, 0x02, 0x69, 0x14, 0xbc, 0x19, + 0x02, 0x4b, 0x16, 0x42, 0x86, 0x02, 0xd9, 0x19, + 0x40, 0x86, 0x02, 0x3e, 0x19, 0x40, 0x86, 0x02, + 0x69, 0x14, 0xed, 0x13, 0x02, 0xd7, 0x19, 0x42, + 0x86, 0x02, 0xb8, 0x19, 0x42, 0x86, 0x03, 0x68, + 0x14, 0x67, 0x14, 0x66, 0x14, 0x02, 0x3c, 0x19, + 0x42, 0x86, 0x02, 0x68, 0x14, 0x66, 0x14, 0x03, + 0x68, 0x14, 0x64, 0x87, 0x68, 0x14, 0x02, 0x69, + 0x14, 0xaf, 0x19, 0x02, 0xce, 0x19, 0x40, 0x86, + 0x02, 0x71, 0x14, 0x40, 0x86, 0x02, 0x68, 0x14, + 0xeb, 0x13, 0x03, 0x68, 0x14, 0xbd, 0x19, 0xa1, + 0x87, 0x02, 0x6f, 0x14, 0x42, 0x86, 0x04, 0xd1, + 0x19, 0xd1, 0x19, 0xd2, 0x19, 0xd2, 0x19, 0x02, + 0x69, 0x14, 0xbc, 0x14, 0x02, 0xcc, 0x93, 0x42, + 0x86, 0x02, 0x4b, 0x16, 0x40, 0x86, 0x02, 0x26, + 0x19, 0x42, 0x86, 0x02, 0xd7, 0x19, 0x40, 0x86, +}; + #endif /* CONFIG_ALL_UNICODE */ -/* 64 tables / 33442 bytes, 5 index / 351 bytes */ +/* 71 tables / 36311 bytes, 5 index / 351 bytes */ diff --git a/libunicode.c b/libunicode.c index d1bf1e9..3791523 100644 --- a/libunicode.c +++ b/libunicode.c @@ -499,6 +499,9 @@ int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len, case CR_OP_XOR: is_in = (a_idx & 1) ^ (b_idx & 1); break; + case CR_OP_SUB: + is_in = (a_idx & 1) & ((b_idx & 1) ^ 1); + break; default: abort(); } @@ -511,14 +514,14 @@ int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len, return 0; } -int cr_union1(CharRange *cr, const uint32_t *b_pt, int b_len) +int cr_op1(CharRange *cr, const uint32_t *b_pt, int b_len, int op) { CharRange a = *cr; int ret; cr->len = 0; cr->size = 0; cr->points = NULL; - ret = cr_op(cr, a.points, a.len, b_pt, b_len, CR_OP_UNION); + ret = cr_op(cr, a.points, a.len, b_pt, b_len, op); cr_free(&a); return ret; } @@ -1282,8 +1285,6 @@ int unicode_script(CharRange *cr, script_idx = unicode_find_name(unicode_script_name_table, script_name); if (script_idx < 0) return -2; - /* Note: we remove the "Unknown" Script */ - script_idx += UNICODE_SCRIPT_Unknown + 1; is_common = (script_idx == UNICODE_SCRIPT_Common || script_idx == UNICODE_SCRIPT_Inherited); @@ -1313,17 +1314,21 @@ int unicode_script(CharRange *cr, n |= *p++; n += 96 + (1 << 12); } - if (type == 0) - v = 0; - else - v = *p++; c1 = c + n + 1; - if (v == script_idx) { - if (cr_add_interval(cr1, c, c1)) - goto fail; + if (type != 0) { + v = *p++; + if (v == script_idx || script_idx == UNICODE_SCRIPT_Unknown) { + if (cr_add_interval(cr1, c, c1)) + goto fail; + } } c = c1; } + if (script_idx == UNICODE_SCRIPT_Unknown) { + /* Unknown is all the characters outside scripts */ + if (cr_invert(cr1)) + goto fail; + } if (is_ext) { /* add the script extensions */ @@ -1554,6 +1559,7 @@ static int unicode_prop_ops(CharRange *cr, ...) cr2 = &stack[stack_len - 1]; cr3 = &stack[stack_len++]; cr_init(cr3, cr->mem_opaque, cr->realloc_func); + /* CR_OP_XOR may be used here */ if (cr_op(cr3, cr1->points, cr1->len, cr2->points, cr2->len, op - POP_UNION + CR_OP_UNION)) goto fail; @@ -1908,3 +1914,210 @@ BOOL lre_is_space_non_ascii(uint32_t c) } return FALSE; } + +#define SEQ_MAX_LEN 16 + +static int unicode_sequence_prop1(int seq_prop_idx, UnicodeSequencePropCB *cb, void *opaque, + CharRange *cr) +{ + int i, c, j; + uint32_t seq[SEQ_MAX_LEN]; + + switch(seq_prop_idx) { + case UNICODE_SEQUENCE_PROP_Basic_Emoji: + if (unicode_prop1(cr, UNICODE_PROP_Basic_Emoji1) < 0) + return -1; + for(i = 0; i < cr->len; i += 2) { + for(c = cr->points[i]; c < cr->points[i + 1]; c++) { + seq[0] = c; + cb(opaque, seq, 1); + } + } + + cr->len = 0; + + if (unicode_prop1(cr, UNICODE_PROP_Basic_Emoji2) < 0) + return -1; + for(i = 0; i < cr->len; i += 2) { + for(c = cr->points[i]; c < cr->points[i + 1]; c++) { + seq[0] = c; + seq[1] = 0xfe0f; + cb(opaque, seq, 2); + } + } + + break; + case UNICODE_SEQUENCE_PROP_RGI_Emoji_Modifier_Sequence: + if (unicode_prop1(cr, UNICODE_PROP_Emoji_Modifier_Base) < 0) + return -1; + for(i = 0; i < cr->len; i += 2) { + for(c = cr->points[i]; c < cr->points[i + 1]; c++) { + for(j = 0; j < 5; j++) { + seq[0] = c; + seq[1] = 0x1f3fb + j; + cb(opaque, seq, 2); + } + } + } + break; + case UNICODE_SEQUENCE_PROP_RGI_Emoji_Flag_Sequence: + if (unicode_prop1(cr, UNICODE_PROP_RGI_Emoji_Flag_Sequence) < 0) + return -1; + for(i = 0; i < cr->len; i += 2) { + for(c = cr->points[i]; c < cr->points[i + 1]; c++) { + int c0, c1; + c0 = c / 26; + c1 = c % 26; + seq[0] = 0x1F1E6 + c0; + seq[1] = 0x1F1E6 + c1; + cb(opaque, seq, 2); + } + } + break; + case UNICODE_SEQUENCE_PROP_RGI_Emoji_ZWJ_Sequence: + { + int len, code, pres, k, mod, mod_count, mod_pos[2], hc_pos, n_mod, n_hc, mod1; + int mod_idx, hc_idx, i0, i1; + const uint8_t *tab = unicode_rgi_emoji_zwj_sequence; + + for(i = 0; i < countof(unicode_rgi_emoji_zwj_sequence);) { + len = tab[i++]; + k = 0; + mod = 0; + mod_count = 0; + hc_pos = -1; + for(j = 0; j < len; j++) { + code = tab[i++]; + code |= tab[i++] << 8; + pres = code >> 15; + mod1 = (code >> 13) & 3; + code &= 0x1fff; + if (code < 0x1000) { + c = code + 0x2000; + } else { + c = 0x1f000 + (code - 0x1000); + } + if (c == 0x1f9b0) + hc_pos = k; + seq[k++] = c; + if (mod1 != 0) { + assert(mod_count < 2); + mod = mod1; + mod_pos[mod_count++] = k; + seq[k++] = 0; /* will be filled later */ + } + if (pres) { + seq[k++] = 0xfe0f; + } + if (j < len - 1) { + seq[k++] = 0x200d; + } + } + + /* genrate all the variants */ + switch(mod) { + case 1: + n_mod = 5; + break; + case 2: + n_mod = 25; + break; + case 3: + n_mod = 20; + break; + default: + n_mod = 1; + break; + } + if (hc_pos >= 0) + n_hc = 4; + else + n_hc = 1; + for(hc_idx = 0; hc_idx < n_hc; hc_idx++) { + for(mod_idx = 0; mod_idx < n_mod; mod_idx++) { + if (hc_pos >= 0) + seq[hc_pos] = 0x1f9b0 + hc_idx; + + switch(mod) { + case 1: + seq[mod_pos[0]] = 0x1f3fb + mod_idx; + break; + case 2: + case 3: + i0 = mod_idx / 5; + i1 = mod_idx % 5; + /* avoid identical values */ + if (mod == 3 && i0 >= i1) + i0++; + seq[mod_pos[0]] = 0x1f3fb + i0; + seq[mod_pos[1]] = 0x1f3fb + i1; + break; + default: + break; + } +#if 0 + for(j = 0; j < k; j++) + printf(" %04x", seq[j]); + printf("\n"); +#endif + cb(opaque, seq, k); + } + } + } + } + break; + case UNICODE_SEQUENCE_PROP_RGI_Emoji_Tag_Sequence: + { + for(i = 0; i < countof(unicode_rgi_emoji_tag_sequence);) { + j = 0; + seq[j++] = 0x1F3F4; + for(;;) { + c = unicode_rgi_emoji_tag_sequence[i++]; + if (c == 0x00) + break; + seq[j++] = 0xe0000 + c; + } + seq[j++] = 0xe007f; + cb(opaque, seq, j); + } + } + break; + case UNICODE_SEQUENCE_PROP_Emoji_Keycap_Sequence: + if (unicode_prop1(cr, UNICODE_PROP_Emoji_Keycap_Sequence) < 0) + return -1; + for(i = 0; i < cr->len; i += 2) { + for(c = cr->points[i]; c < cr->points[i + 1]; c++) { + seq[0] = c; + seq[1] = 0xfe0f; + seq[2] = 0x20e3; + cb(opaque, seq, 3); + } + } + break; + case UNICODE_SEQUENCE_PROP_RGI_Emoji: + /* all prevous sequences */ + for(i = UNICODE_SEQUENCE_PROP_Basic_Emoji; i <= UNICODE_SEQUENCE_PROP_RGI_Emoji_ZWJ_Sequence; i++) { + int ret; + ret = unicode_sequence_prop1(i, cb, opaque, cr); + if (ret < 0) + return ret; + cr->len = 0; + } + break; + default: + return -2; + } + return 0; +} + +/* build a unicode sequence property */ +/* return -2 if not found, -1 if other error. 'cr' is used as temporary memory. */ +int unicode_sequence_prop(const char *prop_name, UnicodeSequencePropCB *cb, void *opaque, + CharRange *cr) +{ + int seq_prop_idx; + seq_prop_idx = unicode_find_name(unicode_sequence_prop_name_table, prop_name); + if (seq_prop_idx < 0) + return -2; + return unicode_sequence_prop1(seq_prop_idx, cb, opaque, cr); +} diff --git a/libunicode.h b/libunicode.h index cc2f244..5d964e4 100644 --- a/libunicode.h +++ b/libunicode.h @@ -45,6 +45,7 @@ typedef enum { CR_OP_UNION, CR_OP_INTER, CR_OP_XOR, + CR_OP_SUB, } CharRangeOpEnum; void cr_init(CharRange *cr, void *mem_opaque, void *(*realloc_func)(void *opaque, void *ptr, size_t size)); @@ -73,19 +74,18 @@ static inline int cr_add_interval(CharRange *cr, uint32_t c1, uint32_t c2) return 0; } -int cr_union1(CharRange *cr, const uint32_t *b_pt, int b_len); +int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len, + const uint32_t *b_pt, int b_len, int op); +int cr_op1(CharRange *cr, const uint32_t *b_pt, int b_len, int op); static inline int cr_union_interval(CharRange *cr, uint32_t c1, uint32_t c2) { uint32_t b_pt[2]; b_pt[0] = c1; b_pt[1] = c2 + 1; - return cr_union1(cr, b_pt, 2); + return cr_op1(cr, b_pt, 2, CR_OP_UNION); } -int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len, - const uint32_t *b_pt, int b_len, int op); - int cr_invert(CharRange *cr); int cr_regexp_canonicalize(CharRange *cr, int is_unicode); @@ -107,6 +107,10 @@ int unicode_script(CharRange *cr, const char *script_name, int is_ext); int unicode_general_category(CharRange *cr, const char *gc_name); int unicode_prop(CharRange *cr, const char *prop_name); +typedef void UnicodeSequencePropCB(void *opaque, const uint32_t *buf, int len); +int unicode_sequence_prop(const char *prop_name, UnicodeSequencePropCB *cb, void *opaque, + CharRange *cr); + int lre_case_conv(uint32_t *res, uint32_t c, int conv_type); int lre_canonicalize(uint32_t c, int is_unicode); diff --git a/qjs.c b/qjs.c index 2eaa9ee..a88e39a 100644 --- a/qjs.c +++ b/qjs.c @@ -465,7 +465,7 @@ int main(int argc, char **argv) } /* loader for ES6 modules */ - JS_SetModuleLoaderFunc(rt, NULL, js_module_loader, NULL); + JS_SetModuleLoaderFunc2(rt, NULL, js_module_loader, js_module_check_attributes, NULL); if (dump_unhandled_promise_rejection) { JS_SetHostPromiseRejectionTracker(rt, js_std_promise_rejection_tracker, diff --git a/qjsc.c b/qjsc.c index f9e1928..e55ca61 100644 --- a/qjsc.c +++ b/qjsc.c @@ -170,14 +170,24 @@ static void dump_hex(FILE *f, const uint8_t *buf, size_t len) fprintf(f, "\n"); } +typedef enum { + CNAME_TYPE_SCRIPT, + CNAME_TYPE_MODULE, + CNAME_TYPE_JSON_MODULE, +} CNameTypeEnum; + static void output_object_code(JSContext *ctx, FILE *fo, JSValueConst obj, const char *c_name, - BOOL load_only) + CNameTypeEnum c_name_type) { uint8_t *out_buf; size_t out_buf_len; int flags; - flags = JS_WRITE_OBJ_BYTECODE; + + if (c_name_type == CNAME_TYPE_JSON_MODULE) + flags = 0; + else + flags = JS_WRITE_OBJ_BYTECODE; if (byte_swap) flags |= JS_WRITE_OBJ_BSWAP; out_buf = JS_WriteObject(ctx, &out_buf_len, obj, flags); @@ -186,7 +196,7 @@ static void output_object_code(JSContext *ctx, exit(1); } - namelist_add(&cname_list, c_name, NULL, load_only); + namelist_add(&cname_list, c_name, NULL, c_name_type); fprintf(fo, "const uint32_t %s_size = %u;\n\n", c_name, (unsigned int)out_buf_len); @@ -227,7 +237,8 @@ static void find_unique_cname(char *cname, size_t cname_size) } JSModuleDef *jsc_module_loader(JSContext *ctx, - const char *module_name, void *opaque) + const char *module_name, void *opaque, + JSValueConst attributes) { JSModuleDef *m; namelist_entry_t *e; @@ -249,9 +260,9 @@ JSModuleDef *jsc_module_loader(JSContext *ctx, } else { size_t buf_len; uint8_t *buf; - JSValue func_val; char cname[1024]; - + int res; + buf = js_load_file(ctx, &buf_len, module_name); if (!buf) { JS_ThrowReferenceError(ctx, "could not load module filename '%s'", @@ -259,21 +270,59 @@ JSModuleDef *jsc_module_loader(JSContext *ctx, return NULL; } - /* compile the module */ - func_val = JS_Eval(ctx, (char *)buf, buf_len, module_name, - JS_EVAL_TYPE_MODULE | JS_EVAL_FLAG_COMPILE_ONLY); - js_free(ctx, buf); - if (JS_IsException(func_val)) - return NULL; - get_c_name(cname, sizeof(cname), module_name); - if (namelist_find(&cname_list, cname)) { - find_unique_cname(cname, sizeof(cname)); - } - output_object_code(ctx, outfile, func_val, cname, TRUE); + res = js_module_test_json(ctx, attributes); + if (has_suffix(module_name, ".json") || res > 0) { + /* compile as JSON or JSON5 depending on "type" */ + JSValue val; + int flags; - /* the module is already referenced, so we must free it */ - m = JS_VALUE_GET_PTR(func_val); - JS_FreeValue(ctx, func_val); + if (res == 2) + flags = JS_PARSE_JSON_EXT; + else + flags = 0; + val = JS_ParseJSON2(ctx, (char *)buf, buf_len, module_name, flags); + js_free(ctx, buf); + if (JS_IsException(val)) + return NULL; + /* create a dummy module */ + m = JS_NewCModule(ctx, module_name, js_module_dummy_init); + if (!m) { + JS_FreeValue(ctx, val); + return NULL; + } + + get_c_name(cname, sizeof(cname), module_name); + if (namelist_find(&cname_list, cname)) { + find_unique_cname(cname, sizeof(cname)); + } + + /* output the module name */ + fprintf(outfile, "static const uint8_t %s_module_name[] = {\n", + cname); + dump_hex(outfile, (const uint8_t *)module_name, strlen(module_name) + 1); + fprintf(outfile, "};\n\n"); + + output_object_code(ctx, outfile, val, cname, CNAME_TYPE_JSON_MODULE); + JS_FreeValue(ctx, val); + } else { + JSValue func_val; + + /* compile the module */ + func_val = JS_Eval(ctx, (char *)buf, buf_len, module_name, + JS_EVAL_TYPE_MODULE | JS_EVAL_FLAG_COMPILE_ONLY); + js_free(ctx, buf); + if (JS_IsException(func_val)) + return NULL; + get_c_name(cname, sizeof(cname), module_name); + if (namelist_find(&cname_list, cname)) { + find_unique_cname(cname, sizeof(cname)); + } + output_object_code(ctx, outfile, func_val, cname, CNAME_TYPE_MODULE); + + /* the module is already referenced, so we must free it */ + m = JS_VALUE_GET_PTR(func_val); + JS_FreeValue(ctx, func_val); + } } return m; } @@ -313,8 +362,11 @@ static void compile_file(JSContext *ctx, FILE *fo, pstrcpy(c_name, sizeof(c_name), c_name1); } else { get_c_name(c_name, sizeof(c_name), filename); + if (namelist_find(&cname_list, c_name)) { + find_unique_cname(c_name, sizeof(c_name)); + } } - output_object_code(ctx, fo, obj, c_name, FALSE); + output_object_code(ctx, fo, obj, c_name, CNAME_TYPE_SCRIPT); JS_FreeValue(ctx, obj); } @@ -709,7 +761,7 @@ int main(int argc, char **argv) JS_SetStripInfo(rt, strip_flags); /* loader for ES6 modules */ - JS_SetModuleLoaderFunc(rt, NULL, jsc_module_loader, NULL); + JS_SetModuleLoaderFunc2(rt, NULL, jsc_module_loader, NULL, NULL); fprintf(fo, "/* File generated automatically by the QuickJS compiler. */\n" "\n" @@ -732,7 +784,7 @@ int main(int argc, char **argv) } for(i = 0; i < dynamic_module_list.count; i++) { - if (!jsc_module_loader(ctx, dynamic_module_list.array[i].name, NULL)) { + if (!jsc_module_loader(ctx, dynamic_module_list.array[i].name, NULL, JS_UNDEFINED)) { fprintf(stderr, "Could not load dynamic module '%s'\n", dynamic_module_list.array[i].name); exit(1); @@ -770,9 +822,12 @@ int main(int argc, char **argv) } for(i = 0; i < cname_list.count; i++) { namelist_entry_t *e = &cname_list.array[i]; - if (e->flags) { + if (e->flags == CNAME_TYPE_MODULE) { fprintf(fo, " js_std_eval_binary(ctx, %s, %s_size, 1);\n", e->name, e->name); + } else if (e->flags == CNAME_TYPE_JSON_MODULE) { + fprintf(fo, " js_std_eval_binary_json_module(ctx, %s, %s_size, (const char *)%s_module_name);\n", + e->name, e->name, e->name); } } fprintf(fo, @@ -788,7 +843,7 @@ int main(int argc, char **argv) /* add the module loader if necessary */ if (feature_bitmap & (1 << FE_MODULE_LOADER)) { - fprintf(fo, " JS_SetModuleLoaderFunc(rt, NULL, js_module_loader, NULL);\n"); + fprintf(fo, " JS_SetModuleLoaderFunc2(rt, NULL, js_module_loader, js_module_check_attributes, NULL);\n"); } fprintf(fo, @@ -797,7 +852,7 @@ int main(int argc, char **argv) for(i = 0; i < cname_list.count; i++) { namelist_entry_t *e = &cname_list.array[i]; - if (!e->flags) { + if (e->flags == CNAME_TYPE_SCRIPT) { fprintf(fo, " js_std_eval_binary(ctx, %s, %s_size, 0);\n", e->name, e->name); } diff --git a/quickjs-atom.h b/quickjs-atom.h index 73766f2..425c2e9 100644 --- a/quickjs-atom.h +++ b/quickjs-atom.h @@ -177,6 +177,12 @@ DEF(minus_zero, "-0") DEF(Infinity, "Infinity") DEF(minus_Infinity, "-Infinity") DEF(NaN, "NaN") +DEF(hasIndices, "hasIndices") +DEF(ignoreCase, "ignoreCase") +DEF(multiline, "multiline") +DEF(dotAll, "dotAll") +DEF(sticky, "sticky") +DEF(unicodeSets, "unicodeSets") /* the following 3 atoms are only used with CONFIG_ATOMICS */ DEF(not_equal, "not-equal") DEF(timed_out, "timed-out") @@ -211,6 +217,7 @@ DEF(Int32Array, "Int32Array") DEF(Uint32Array, "Uint32Array") DEF(BigInt64Array, "BigInt64Array") DEF(BigUint64Array, "BigUint64Array") +DEF(Float16Array, "Float16Array") DEF(Float32Array, "Float32Array") DEF(Float64Array, "Float64Array") DEF(DataView, "DataView") diff --git a/quickjs-libc.c b/quickjs-libc.c index 6317ca4..d8b4a79 100644 --- a/quickjs-libc.c +++ b/quickjs-libc.c @@ -136,11 +136,18 @@ typedef struct { JSValue on_message_func; } JSWorkerMessageHandler; +typedef struct { + struct list_head link; + JSValue promise; + JSValue reason; +} JSRejectedPromiseEntry; + typedef struct JSThreadState { struct list_head os_rw_handlers; /* list of JSOSRWHandler.link */ struct list_head os_signal_handlers; /* list JSOSSignalHandler.link */ struct list_head os_timers; /* list of JSOSTimer.link */ struct list_head port_list; /* list of JSWorkerMessageHandler.link */ + struct list_head rejected_promise_list; /* list of JSRejectedPromiseEntry.link */ int eval_script_recurse; /* only used in the main thread */ int next_timer_id; /* for setTimeout() */ /* not used in the main thread */ @@ -160,6 +167,7 @@ static BOOL my_isdigit(int c) return (c >= '0' && c <= '9'); } +/* XXX: use 'o' and 'O' for object using JS_PrintValue() ? */ static JSValue js_printf_internal(JSContext *ctx, int argc, JSValueConst *argv, FILE *fp) { @@ -583,17 +591,101 @@ int js_module_set_import_meta(JSContext *ctx, JSValueConst func_val, return 0; } -JSModuleDef *js_module_loader(JSContext *ctx, - const char *module_name, void *opaque) +static int json_module_init(JSContext *ctx, JSModuleDef *m) +{ + JSValue val; + val = JS_GetModulePrivateValue(ctx, m); + JS_SetModuleExport(ctx, m, "default", val); + return 0; +} + +static JSModuleDef *create_json_module(JSContext *ctx, const char *module_name, JSValue val) { JSModuleDef *m; + m = JS_NewCModule(ctx, module_name, json_module_init); + if (!m) { + JS_FreeValue(ctx, val); + return NULL; + } + /* only export the "default" symbol which will contain the JSON object */ + JS_AddModuleExport(ctx, m, "default"); + JS_SetModulePrivateValue(ctx, m, val); + return m; +} +/* in order to conform with the specification, only the keys should be + tested and not the associated values. */ +int js_module_check_attributes(JSContext *ctx, void *opaque, + JSValueConst attributes) +{ + JSPropertyEnum *tab; + uint32_t i, len; + int ret; + const char *cstr; + size_t cstr_len; + + if (JS_GetOwnPropertyNames(ctx, &tab, &len, attributes, JS_GPN_ENUM_ONLY | JS_GPN_STRING_MASK)) + return -1; + ret = 0; + for(i = 0; i < len; i++) { + cstr = JS_AtomToCStringLen(ctx, &cstr_len, tab[i].atom); + if (!cstr) { + ret = -1; + break; + } + if (!(cstr_len == 4 && !memcmp(cstr, "type", cstr_len))) { + JS_ThrowTypeError(ctx, "import attribute '%s' is not supported", cstr); + ret = -1; + } + JS_FreeCString(ctx, cstr); + if (ret) + break; + } + JS_FreePropertyEnum(ctx, tab, len); + return ret; +} + +/* return > 0 if the attributes indicate a JSON module */ +int js_module_test_json(JSContext *ctx, JSValueConst attributes) +{ + JSValue str; + const char *cstr; + size_t len; + BOOL res; + + if (JS_IsUndefined(attributes)) + return FALSE; + str = JS_GetPropertyStr(ctx, attributes, "type"); + if (!JS_IsString(str)) + return FALSE; + cstr = JS_ToCStringLen(ctx, &len, str); + JS_FreeValue(ctx, str); + if (!cstr) + return FALSE; + /* XXX: raise an error if unknown type ? */ + if (len == 4 && !memcmp(cstr, "json", len)) { + res = 1; + } else if (len == 5 && !memcmp(cstr, "json5", len)) { + res = 2; + } else { + res = 0; + } + JS_FreeCString(ctx, cstr); + return res; +} + +JSModuleDef *js_module_loader(JSContext *ctx, + const char *module_name, void *opaque, + JSValueConst attributes) +{ + JSModuleDef *m; + int res; + if (has_suffix(module_name, ".so")) { m = js_module_loader_so(ctx, module_name); } else { size_t buf_len; uint8_t *buf; - JSValue func_val; buf = js_load_file(ctx, &buf_len, module_name); if (!buf) { @@ -601,18 +693,36 @@ JSModuleDef *js_module_loader(JSContext *ctx, module_name); return NULL; } - - /* compile the module */ - func_val = JS_Eval(ctx, (char *)buf, buf_len, module_name, - JS_EVAL_TYPE_MODULE | JS_EVAL_FLAG_COMPILE_ONLY); - js_free(ctx, buf); - if (JS_IsException(func_val)) - return NULL; - /* XXX: could propagate the exception */ - js_module_set_import_meta(ctx, func_val, TRUE, FALSE); - /* the module is already referenced, so we must free it */ - m = JS_VALUE_GET_PTR(func_val); - JS_FreeValue(ctx, func_val); + res = js_module_test_json(ctx, attributes); + if (has_suffix(module_name, ".json") || res > 0) { + /* compile as JSON or JSON5 depending on "type" */ + JSValue val; + int flags; + if (res == 2) + flags = JS_PARSE_JSON_EXT; + else + flags = 0; + val = JS_ParseJSON2(ctx, (char *)buf, buf_len, module_name, flags); + js_free(ctx, buf); + if (JS_IsException(val)) + return NULL; + m = create_json_module(ctx, module_name, val); + if (!m) + return NULL; + } else { + JSValue func_val; + /* compile the module */ + func_val = JS_Eval(ctx, (char *)buf, buf_len, module_name, + JS_EVAL_TYPE_MODULE | JS_EVAL_FLAG_COMPILE_ONLY); + js_free(ctx, buf); + if (JS_IsException(func_val)) + return NULL; + /* XXX: could propagate the exception */ + js_module_set_import_meta(ctx, func_val, TRUE, FALSE); + /* the module is already referenced, so we must free it */ + m = JS_VALUE_GET_PTR(func_val); + JS_FreeValue(ctx, func_val); + } } return m; } @@ -804,7 +914,7 @@ static JSValue js_evalScript(JSContext *ctx, JSValueConst this_val, /* convert the uncatchable "interrupted" error into a normal error so that it can be caught by the REPL */ if (JS_IsException(ret)) - JS_ResetUncatchableError(ctx); + JS_SetUncatchableException(ctx, FALSE); } return ret; } @@ -1083,6 +1193,19 @@ static JSValue js_std_file_printf(JSContext *ctx, JSValueConst this_val, return js_printf_internal(ctx, argc, argv, f); } +static void js_print_value_write(void *opaque, const char *buf, size_t len) +{ + FILE *fo = opaque; + fwrite(buf, 1, len, fo); +} + +static JSValue js_std_file_printObject(JSContext *ctx, JSValueConst this_val, + int argc, JSValueConst *argv) +{ + JS_PrintValue(ctx, js_print_value_write, stdout, argv[0], NULL); + return JS_UNDEFINED; +} + static JSValue js_std_file_flush(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { @@ -1540,6 +1663,7 @@ static const JSCFunctionListEntry js_std_funcs[] = { JS_PROP_INT32_DEF("SEEK_CUR", SEEK_CUR, JS_PROP_CONFIGURABLE ), JS_PROP_INT32_DEF("SEEK_END", SEEK_END, JS_PROP_CONFIGURABLE ), JS_OBJECT_DEF("Error", js_std_error_props, countof(js_std_error_props), JS_PROP_CONFIGURABLE), + JS_CFUNC_DEF("__printObject", 1, js_std_file_printObject ), }; static const JSCFunctionListEntry js_std_file_proto_funcs[] = { @@ -2916,9 +3040,7 @@ static char **build_envp(JSContext *ctx, JSValueConst obj) JS_FreeCString(ctx, str); } done: - for(i = 0; i < len; i++) - JS_FreeAtom(ctx, tab[i].atom); - js_free(ctx, tab); + JS_FreePropertyEnum(ctx, tab, len); return envp; fail: if (envp) { @@ -3470,7 +3592,7 @@ static void *worker_func(void *opaque) JS_SetStripInfo(rt, args->strip_flags); js_std_init_handlers(rt); - JS_SetModuleLoaderFunc(rt, NULL, js_module_loader, NULL); + JS_SetModuleLoaderFunc2(rt, NULL, js_module_loader, js_module_check_attributes, NULL); /* set the pipe to communicate with the parent */ ts = JS_GetRuntimeOpaque(rt); @@ -3903,17 +4025,23 @@ static JSValue js_print(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { int i; - const char *str; - size_t len; - + JSValueConst v; + for(i = 0; i < argc; i++) { if (i != 0) putchar(' '); - str = JS_ToCStringLen(ctx, &len, argv[i]); - if (!str) - return JS_EXCEPTION; - fwrite(str, 1, len, stdout); - JS_FreeCString(ctx, str); + v = argv[i]; + if (JS_IsString(v)) { + const char *str; + size_t len; + str = JS_ToCStringLen(ctx, &len, v); + if (!str) + return JS_EXCEPTION; + fwrite(str, 1, len, stdout); + JS_FreeCString(ctx, str); + } else { + JS_PrintValue(ctx, js_print_value_write, stdout, v, NULL); + } } putchar('\n'); return JS_UNDEFINED; @@ -3977,6 +4105,7 @@ void js_std_init_handlers(JSRuntime *rt) init_list_head(&ts->os_signal_handlers); init_list_head(&ts->os_timers); init_list_head(&ts->port_list); + init_list_head(&ts->rejected_promise_list); ts->next_timer_id = 1; JS_SetRuntimeOpaque(rt, ts); @@ -4014,6 +4143,13 @@ void js_std_free_handlers(JSRuntime *rt) free_timer(rt, th); } + list_for_each_safe(el, el1, &ts->rejected_promise_list) { + JSRejectedPromiseEntry *rp = list_entry(el, JSRejectedPromiseEntry, link); + JS_FreeValueRT(rt, rp->promise); + JS_FreeValueRT(rt, rp->reason); + free(rp); + } + #ifdef USE_WORKER /* XXX: free port_list ? */ js_free_message_pipe(ts->recv_pipe); @@ -4024,33 +4160,10 @@ void js_std_free_handlers(JSRuntime *rt) JS_SetRuntimeOpaque(rt, NULL); /* fail safe */ } -static void js_dump_obj(JSContext *ctx, FILE *f, JSValueConst val) -{ - const char *str; - - str = JS_ToCString(ctx, val); - if (str) { - fprintf(f, "%s\n", str); - JS_FreeCString(ctx, str); - } else { - fprintf(f, "[exception]\n"); - } -} - static void js_std_dump_error1(JSContext *ctx, JSValueConst exception_val) { - JSValue val; - BOOL is_error; - - is_error = JS_IsError(ctx, exception_val); - js_dump_obj(ctx, stderr, exception_val); - if (is_error) { - val = JS_GetPropertyStr(ctx, exception_val, "stack"); - if (!JS_IsUndefined(val)) { - js_dump_obj(ctx, stderr, val); - } - JS_FreeValue(ctx, val); - } + JS_PrintValue(ctx, js_print_value_write, stderr, exception_val, NULL); + fputc('\n', stderr); } void js_std_dump_error(JSContext *ctx) @@ -4062,13 +4175,66 @@ void js_std_dump_error(JSContext *ctx) JS_FreeValue(ctx, exception_val); } +static JSRejectedPromiseEntry *find_rejected_promise(JSContext *ctx, JSThreadState *ts, + JSValueConst promise) +{ + struct list_head *el; + + list_for_each(el, &ts->rejected_promise_list) { + JSRejectedPromiseEntry *rp = list_entry(el, JSRejectedPromiseEntry, link); + if (JS_SameValue(ctx, rp->promise, promise)) + return rp; + } + return NULL; +} + void js_std_promise_rejection_tracker(JSContext *ctx, JSValueConst promise, JSValueConst reason, BOOL is_handled, void *opaque) { + JSRuntime *rt = JS_GetRuntime(ctx); + JSThreadState *ts = JS_GetRuntimeOpaque(rt); + JSRejectedPromiseEntry *rp; + if (!is_handled) { - fprintf(stderr, "Possibly unhandled promise rejection: "); - js_std_dump_error1(ctx, reason); + /* add a new entry if needed */ + rp = find_rejected_promise(ctx, ts, promise); + if (!rp) { + rp = malloc(sizeof(*rp)); + if (rp) { + rp->promise = JS_DupValue(ctx, promise); + rp->reason = JS_DupValue(ctx, reason); + list_add_tail(&rp->link, &ts->rejected_promise_list); + } + } + } else { + /* the rejection is handled, so the entry can be removed if present */ + rp = find_rejected_promise(ctx, ts, promise); + if (rp) { + JS_FreeValue(ctx, rp->promise); + JS_FreeValue(ctx, rp->reason); + list_del(&rp->link); + free(rp); + } + } +} + +/* check if there are pending promise rejections. It must be done + asynchrously in case a rejected promise is handled later. Currently + we do it once the application is about to sleep. It could be done + more often if needed. */ +static void js_std_promise_rejection_check(JSContext *ctx) +{ + JSRuntime *rt = JS_GetRuntime(ctx); + JSThreadState *ts = JS_GetRuntimeOpaque(rt); + struct list_head *el; + + if (unlikely(!list_empty(&ts->rejected_promise_list))) { + list_for_each(el, &ts->rejected_promise_list) { + JSRejectedPromiseEntry *rp = list_entry(el, JSRejectedPromiseEntry, link); + fprintf(stderr, "Possibly unhandled promise rejection: "); + js_std_dump_error1(ctx, rp->reason); + } exit(1); } } @@ -4076,21 +4242,21 @@ void js_std_promise_rejection_tracker(JSContext *ctx, JSValueConst promise, /* main loop which calls the user JS callbacks */ void js_std_loop(JSContext *ctx) { - JSContext *ctx1; int err; for(;;) { /* execute the pending jobs */ for(;;) { - err = JS_ExecutePendingJob(JS_GetRuntime(ctx), &ctx1); + err = JS_ExecutePendingJob(JS_GetRuntime(ctx), NULL); if (err <= 0) { - if (err < 0) { - js_std_dump_error(ctx1); - } + if (err < 0) + js_std_dump_error(ctx); break; } } + js_std_promise_rejection_check(ctx); + if (!os_poll_func || os_poll_func(ctx)) break; } @@ -4115,13 +4281,14 @@ JSValue js_std_await(JSContext *ctx, JSValue obj) JS_FreeValue(ctx, obj); break; } else if (state == JS_PROMISE_PENDING) { - JSContext *ctx1; int err; - err = JS_ExecutePendingJob(JS_GetRuntime(ctx), &ctx1); + err = JS_ExecutePendingJob(JS_GetRuntime(ctx), NULL); if (err < 0) { - js_std_dump_error(ctx1); + js_std_dump_error(ctx); } if (err == 0) { + js_std_promise_rejection_check(ctx); + if (os_poll_func) os_poll_func(ctx); } @@ -4145,6 +4312,7 @@ void js_std_eval_binary(JSContext *ctx, const uint8_t *buf, size_t buf_len, if (JS_VALUE_GET_TAG(obj) == JS_TAG_MODULE) { js_module_set_import_meta(ctx, obj, FALSE, FALSE); } + JS_FreeValue(ctx, obj); } else { if (JS_VALUE_GET_TAG(obj) == JS_TAG_MODULE) { if (JS_ResolveModule(ctx, obj) < 0) { @@ -4165,3 +4333,22 @@ void js_std_eval_binary(JSContext *ctx, const uint8_t *buf, size_t buf_len, JS_FreeValue(ctx, val); } } + +void js_std_eval_binary_json_module(JSContext *ctx, + const uint8_t *buf, size_t buf_len, + const char *module_name) +{ + JSValue obj; + JSModuleDef *m; + + obj = JS_ReadObject(ctx, buf, buf_len, 0); + if (JS_IsException(obj)) + goto exception; + m = create_json_module(ctx, module_name, obj); + if (!m) { + exception: + js_std_dump_error(ctx); + exit(1); + } +} + diff --git a/quickjs-libc.h b/quickjs-libc.h index 850484f..5c8301b 100644 --- a/quickjs-libc.h +++ b/quickjs-libc.h @@ -44,10 +44,16 @@ void js_std_dump_error(JSContext *ctx); uint8_t *js_load_file(JSContext *ctx, size_t *pbuf_len, const char *filename); int js_module_set_import_meta(JSContext *ctx, JSValueConst func_val, JS_BOOL use_realpath, JS_BOOL is_main); +int js_module_test_json(JSContext *ctx, JSValueConst attributes); +int js_module_check_attributes(JSContext *ctx, void *opaque, JSValueConst attributes); JSModuleDef *js_module_loader(JSContext *ctx, - const char *module_name, void *opaque); + const char *module_name, void *opaque, + JSValueConst attributes); void js_std_eval_binary(JSContext *ctx, const uint8_t *buf, size_t buf_len, int flags); +void js_std_eval_binary_json_module(JSContext *ctx, + const uint8_t *buf, size_t buf_len, + const char *module_name); void js_std_promise_rejection_tracker(JSContext *ctx, JSValueConst promise, JSValueConst reason, JS_BOOL is_handled, void *opaque); diff --git a/quickjs-opcode.h b/quickjs-opcode.h index f20fb11..814a7cb 100644 --- a/quickjs-opcode.h +++ b/quickjs-opcode.h @@ -121,7 +121,7 @@ DEF( apply_eval, 3, 2, 1, u16) /* func array -> ret_eval */ DEF( regexp, 1, 2, 1, none) /* create a RegExp object from the pattern and a bytecode string */ DEF( get_super, 1, 1, 1, none) -DEF( import, 1, 1, 1, none) /* dynamic module import */ +DEF( import, 1, 2, 1, none) /* dynamic module import */ DEF( check_var, 5, 0, 1, atom) /* check if a variable exists */ DEF( get_var_undef, 5, 0, 1, atom) /* push undefined if the variable does not exist */ @@ -144,6 +144,7 @@ DEF( put_private_field, 1, 3, 0, none) /* obj value prop -> */ DEF(define_private_field, 1, 3, 1, none) /* obj prop value -> obj */ DEF( get_array_el, 1, 2, 1, none) DEF( get_array_el2, 1, 2, 2, none) /* obj prop -> obj value */ +DEF( get_array_el3, 1, 2, 3, none) /* obj prop -> obj prop1 value */ DEF( put_array_el, 1, 3, 0, none) DEF(get_super_value, 1, 3, 1, none) /* this obj prop -> value */ DEF(put_super_value, 1, 4, 0, none) /* this obj prop value -> */ @@ -189,7 +190,6 @@ DEF( nip_catch, 1, 2, 1, none) /* catch ... a -> a */ DEF( to_object, 1, 1, 1, none) //DEF( to_string, 1, 1, 1, none) DEF( to_propkey, 1, 1, 1, none) -DEF( to_propkey2, 1, 2, 2, none) DEF( with_get_var, 10, 1, 0, atom_label_u8) /* must be in the same order as scope_xxx */ DEF( with_put_var, 10, 2, 1, atom_label_u8) /* must be in the same order as scope_xxx */ diff --git a/quickjs.c b/quickjs.c index adfd93e..9b997f6 100644 --- a/quickjs.c +++ b/quickjs.c @@ -103,6 +103,7 @@ //#define DUMP_ATOMS /* dump atoms in JS_FreeContext */ //#define DUMP_SHAPES /* dump shapes in JS_FreeContext */ //#define DUMP_MODULE_RESOLVE +//#define DUMP_MODULE_EXEC //#define DUMP_PROMISE //#define DUMP_READ_OBJECT //#define DUMP_ROPE_REBALANCE @@ -147,6 +148,7 @@ enum { JS_CLASS_UINT32_ARRAY, /* u.array (typed_array) */ JS_CLASS_BIG_INT64_ARRAY, /* u.array (typed_array) */ JS_CLASS_BIG_UINT64_ARRAY, /* u.array (typed_array) */ + JS_CLASS_FLOAT16_ARRAY, /* u.array (typed_array) */ JS_CLASS_FLOAT32_ARRAY, /* u.array (typed_array) */ JS_CLASS_FLOAT64_ARRAY, /* u.array (typed_array) */ JS_CLASS_DATAVIEW, /* u.typed_array */ @@ -263,6 +265,8 @@ struct JSRuntime { uintptr_t stack_limit; /* lower stack limit */ JSValue current_exception; + /* true if the current exception cannot be catched */ + BOOL current_exception_is_uncatchable : 8; /* true if inside an out of memory error, to avoid recursing */ BOOL in_out_of_memory : 8; @@ -277,7 +281,12 @@ struct JSRuntime { struct list_head job_list; /* list of JSJobEntry.link */ JSModuleNormalizeFunc *module_normalize_func; - JSModuleLoaderFunc *module_loader_func; + BOOL module_loader_has_attr; + union { + JSModuleLoaderFunc *module_loader_func; + JSModuleLoaderFunc2 *module_loader_func2; + } u; + JSModuleCheckSupportedImportAttributes *module_check_attrs; void *module_loader_opaque; /* timestamp for internal use in module evaluation */ int64_t module_async_evaluation_next_timestamp; @@ -332,6 +341,7 @@ typedef enum { JS_GC_OBJ_TYPE_VAR_REF, JS_GC_OBJ_TYPE_ASYNC_FUNCTION, JS_GC_OBJ_TYPE_JS_CONTEXT, + JS_GC_OBJ_TYPE_MODULE, } JSGCObjectTypeEnum; /* header for GC objects. GC objects are C data structures with a @@ -753,6 +763,7 @@ typedef struct { typedef struct JSReqModuleEntry { JSAtom module_name; JSModuleDef *module; /* used using resolution */ + JSValue attributes; /* JS_UNDEFINED or an object contains the attributes as key/value */ } JSReqModuleEntry; typedef enum JSExportTypeEnum { @@ -780,6 +791,7 @@ typedef struct JSStarExportEntry { typedef struct JSImportEntry { int var_idx; /* closure variable index */ + BOOL is_star; /* import_name = '*' is a valid import name, so need a flag */ JSAtom import_name; int req_module_idx; /* in req_module_entries */ } JSImportEntry; @@ -794,7 +806,7 @@ typedef enum { } JSModuleStatus; struct JSModuleDef { - JSRefCountHeader header; /* must come first, 32-bit */ + JSGCObjectHeader header; /* must come first */ JSAtom module_name; struct list_head link; @@ -829,7 +841,8 @@ struct JSModuleDef { int async_parent_modules_count; int async_parent_modules_size; int pending_async_dependencies; - BOOL async_evaluation; + BOOL async_evaluation; /* true: async_evaluation_timestamp corresponds to [[AsyncEvaluationOrder]] + false: [[AsyncEvaluationOrder]] is UNSET or DONE */ int64_t async_evaluation_timestamp; JSModuleDef *cycle_root; JSValue promise; /* corresponds to spec field: capability */ @@ -840,11 +853,12 @@ struct JSModuleDef { BOOL eval_has_exception : 8; JSValue eval_exception; JSValue meta_obj; /* for import.meta */ + JSValue private_value; /* private value for C modules */ }; typedef struct JSJobEntry { struct list_head link; - JSContext *ctx; + JSContext *realm; JSJobFunc *job_func; int argc; JSValue argv[0]; @@ -911,7 +925,7 @@ struct JSObject { uint8_t is_exotic : 1; /* TRUE if object has exotic property handlers */ uint8_t fast_array : 1; /* TRUE if u.array is used for get/put (for JS_CLASS_ARRAY, JS_CLASS_ARGUMENTS and typed arrays) */ uint8_t is_constructor : 1; /* TRUE if object is a constructor function */ - uint8_t is_uncatchable_error : 1; /* if TRUE, error is not catchable */ + uint8_t has_immutable_prototype : 1; /* cannot modify the prototype */ uint8_t tmp_mark : 1; /* used in JS_WriteObjectRec() */ uint8_t is_HTMLDDA : 1; /* specific annex B IsHtmlDDA behavior */ uint16_t class_id; /* see JS_CLASS_x */ @@ -971,6 +985,7 @@ struct JSObject { uint32_t *uint32_ptr; /* JS_CLASS_UINT32_ARRAY */ int64_t *int64_ptr; /* JS_CLASS_INT64_ARRAY */ uint64_t *uint64_ptr; /* JS_CLASS_UINT64_ARRAY */ + uint16_t *fp16_ptr; /* JS_CLASS_FLOAT16_ARRAY */ float *float_ptr; /* JS_CLASS_FLOAT32_ARRAY */ double *double_ptr; /* JS_CLASS_FLOAT64_ARRAY */ } u; @@ -981,6 +996,27 @@ struct JSObject { } u; }; +typedef struct JSMapRecord { + int ref_count; /* used during enumeration to avoid freeing the record */ + BOOL empty : 8; /* TRUE if the record is deleted */ + struct list_head link; + struct JSMapRecord *hash_next; + JSValue key; + JSValue value; +} JSMapRecord; + +typedef struct JSMapState { + BOOL is_weak; /* TRUE if WeakSet/WeakMap */ + struct list_head records; /* list of JSMapRecord.link */ + uint32_t record_count; + JSMapRecord **hash_table; + int hash_bits; + uint32_t hash_size; /* = 2 ^ hash_bits */ + uint32_t record_count_threshold; /* count at which a hash table + resize is needed */ + JSWeakRefHeader weakref_header; /* only used if is_weak = TRUE */ +} JSMapState; + enum { __JS_ATOM_NULL = JS_ATOM_NULL, #define DEF(name, str) JS_ATOM_ ## name, @@ -1059,12 +1095,10 @@ static __maybe_unused void JS_DumpString(JSRuntime *rt, const JSString *p); static __maybe_unused void JS_DumpObjectHeader(JSRuntime *rt); static __maybe_unused void JS_DumpObject(JSRuntime *rt, JSObject *p); static __maybe_unused void JS_DumpGCObject(JSRuntime *rt, JSGCObjectHeader *p); -static __maybe_unused void JS_DumpValueShort(JSRuntime *rt, JSValueConst val); -static __maybe_unused void JS_DumpValue(JSContext *ctx, JSValueConst val); -static __maybe_unused void JS_PrintValue(JSContext *ctx, - const char *str, - JSValueConst val); +static __maybe_unused void JS_DumpValueRT(JSRuntime *rt, const char *str, JSValueConst val); +static __maybe_unused void JS_DumpValue(JSContext *ctx, const char *str, JSValueConst val); static __maybe_unused void JS_DumpShapes(JSRuntime *rt); +static void js_dump_value_write(void *opaque, const char *buf, size_t len); static JSValue js_function_apply(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv, int magic); static void js_array_finalizer(JSRuntime *rt, JSValue val); @@ -1189,11 +1223,11 @@ static void js_async_function_resolve_mark(JSRuntime *rt, JSValueConst val, static JSValue JS_EvalInternal(JSContext *ctx, JSValueConst this_obj, const char *input, size_t input_len, const char *filename, int flags, int scope_idx); -static void js_free_module_def(JSContext *ctx, JSModuleDef *m); +static void js_free_module_def(JSRuntime *rt, JSModuleDef *m); static void js_mark_module_def(JSRuntime *rt, JSModuleDef *m, JS_MarkFunc *mark_func); static JSValue js_import_meta(JSContext *ctx); -static JSValue js_dynamic_import(JSContext *ctx, JSValueConst specifier); +static JSValue js_dynamic_import(JSContext *ctx, JSValueConst specifier, JSValueConst options); static void free_var_ref(JSRuntime *rt, JSVarRef *var_ref); static JSValue js_new_promise_capability(JSContext *ctx, JSValue *resolving_funcs, @@ -1257,6 +1291,13 @@ static void finrec_delete_weakref(JSRuntime *rt, JSWeakRefHeader *wh); static void JS_RunGCInternal(JSRuntime *rt, BOOL remove_weak_objects); static JSValue js_array_from_iterator(JSContext *ctx, uint32_t *plen, JSValueConst obj, JSValueConst method); +static int js_string_find_invalid_codepoint(JSString *p); +static JSValue js_regexp_toString(JSContext *ctx, JSValueConst this_val, + int argc, JSValueConst *argv); +static JSValue get_date_string(JSContext *ctx, JSValueConst this_val, + int argc, JSValueConst *argv, int magic); +static JSValue js_error_toString(JSContext *ctx, JSValueConst this_val, + int argc, JSValueConst *argv); static const JSClassExoticMethods js_arguments_exotic_methods; static const JSClassExoticMethods js_string_exotic_methods; @@ -1429,6 +1470,23 @@ static inline void js_dbuf_init(JSContext *ctx, DynBuf *s) dbuf_init2(s, ctx->rt, (DynBufReallocFunc *)js_realloc_rt); } +static void *js_realloc_bytecode_rt(void *opaque, void *ptr, size_t size) +{ + JSRuntime *rt = opaque; + if (size > (INT32_MAX / 2)) { + /* the bytecode cannot be larger than 2G. Leave some slack to + avoid some overflows. */ + return NULL; + } else { + return rt->mf.js_realloc(&rt->malloc_state, ptr, size); + } +} + +static inline void js_dbuf_bytecode_init(JSContext *ctx, DynBuf *s) +{ + dbuf_init2(s, ctx->rt, js_realloc_bytecode_rt); +} + static inline int is_digit(int c) { return c >= '0' && c <= '9'; } @@ -1473,6 +1531,7 @@ static JSClassShortDef const js_std_class_def[] = { { JS_ATOM_Uint32Array, js_typed_array_finalizer, js_typed_array_mark }, /* JS_CLASS_UINT32_ARRAY */ { JS_ATOM_BigInt64Array, js_typed_array_finalizer, js_typed_array_mark }, /* JS_CLASS_BIG_INT64_ARRAY */ { JS_ATOM_BigUint64Array, js_typed_array_finalizer, js_typed_array_mark }, /* JS_CLASS_BIG_UINT64_ARRAY */ + { JS_ATOM_Float16Array, js_typed_array_finalizer, js_typed_array_mark }, /* JS_CLASS_FLOAT16_ARRAY */ { JS_ATOM_Float32Array, js_typed_array_finalizer, js_typed_array_mark }, /* JS_CLASS_FLOAT32_ARRAY */ { JS_ATOM_Float64Array, js_typed_array_finalizer, js_typed_array_mark }, /* JS_CLASS_FLOAT64_ARRAY */ { JS_ATOM_DataView, js_typed_array_finalizer, js_typed_array_mark }, /* JS_CLASS_DATAVIEW */ @@ -1741,7 +1800,7 @@ int JS_EnqueueJob(JSContext *ctx, JSJobFunc *job_func, e = js_malloc(ctx, sizeof(*e) + argc * sizeof(JSValue)); if (!e) return -1; - e->ctx = ctx; + e->realm = JS_DupContext(ctx); e->job_func = job_func; e->argc = argc; for(i = 0; i < argc; i++) { @@ -1757,7 +1816,10 @@ BOOL JS_IsJobPending(JSRuntime *rt) } /* return < 0 if exception, 0 if no job pending, 1 if a job was - executed successfully. the context of the job is stored in '*pctx' */ + executed successfully. The context of the job is stored in '*pctx' + if pctx != NULL. It may be NULL if the context was already + destroyed or if no job was pending. The 'pctx' parameter is now + absolete. */ int JS_ExecutePendingJob(JSRuntime *rt, JSContext **pctx) { JSContext *ctx; @@ -1766,15 +1828,16 @@ int JS_ExecutePendingJob(JSRuntime *rt, JSContext **pctx) int i, ret; if (list_empty(&rt->job_list)) { - *pctx = NULL; + if (pctx) + *pctx = NULL; return 0; } /* get the first pending job and execute it */ e = list_entry(rt->job_list.next, JSJobEntry, link); list_del(&e->link); - ctx = e->ctx; - res = e->job_func(e->ctx, e->argc, (JSValueConst *)e->argv); + ctx = e->realm; + res = e->job_func(ctx, e->argc, (JSValueConst *)e->argv); for(i = 0; i < e->argc; i++) JS_FreeValue(ctx, e->argv[i]); if (JS_IsException(res)) @@ -1783,7 +1846,13 @@ int JS_ExecutePendingJob(JSRuntime *rt, JSContext **pctx) ret = 1; JS_FreeValue(ctx, res); js_free(ctx, e); - *pctx = ctx; + if (pctx) { + if (ctx->header.ref_count > 1) + *pctx = ctx; + else + *pctx = NULL; + } + JS_FreeContext(ctx); return ret; } @@ -1864,6 +1933,7 @@ void JS_FreeRuntime(JSRuntime *rt) JSJobEntry *e = list_entry(el, JSJobEntry, link); for(i = 0; i < e->argc; i++) JS_FreeValueRT(rt, e->argv[i]); + JS_FreeContext(e->realm); js_free_rt(rt, e); } init_list_head(&rt->job_list); @@ -2139,7 +2209,13 @@ static void js_free_modules(JSContext *ctx, JSFreeModuleEnum flag) JSModuleDef *m = list_entry(el, JSModuleDef, link); if (flag == JS_FREE_MODULE_ALL || (flag == JS_FREE_MODULE_NOT_RESOLVED && !m->resolved)) { - js_free_module_def(ctx, m); + /* warning: the module may be referenced elsewhere. It + could be simpler to use an array instead of a list for + 'ctx->loaded_modules' */ + list_del(&m->link); + m->link.prev = NULL; + m->link.next = NULL; + JS_FreeValue(ctx, JS_MKPTR(JS_TAG_MODULE, m)); } } } @@ -2157,11 +2233,9 @@ static void JS_MarkContext(JSRuntime *rt, JSContext *ctx, int i; struct list_head *el; - /* modules are not seen by the GC, so we directly mark the objects - referenced by each module */ list_for_each(el, &ctx->loaded_modules) { JSModuleDef *m = list_entry(el, JSModuleDef, link); - js_mark_module_def(rt, m, mark_func); + JS_MarkValue(rt, JS_MKPTR(JS_TAG_MODULE, m), mark_func); } JS_MarkValue(rt, ctx->global_obj, mark_func); @@ -2401,18 +2475,18 @@ static uint32_t hash_string_rope(JSValueConst val, uint32_t h) } } -static __maybe_unused void JS_DumpChar(JSRuntime *rt, int c, int sep) +static __maybe_unused void JS_DumpChar(FILE *fo, int c, int sep) { if (c == sep || c == '\\') { - putchar('\\'); - putchar(c); + fputc('\\', fo); + fputc(c, fo); } else if (c >= ' ' && c <= 126) { - putchar(c); + fputc(c, fo); } else if (c == '\n') { - putchar('\\'); - putchar('n'); + fputc('\\', fo); + fputc('n', fo); } else { - printf("\\u%04x", c); + fprintf(fo, "\\u%04x", c); } } @@ -2428,7 +2502,7 @@ static __maybe_unused void JS_DumpString(JSRuntime *rt, const JSString *p) sep = (p->header.ref_count == 1) ? '\"' : '\''; putchar(sep); for(i = 0; i < p->len; i++) { - JS_DumpChar(rt, string_get(p, i), sep); + JS_DumpChar(stdout, string_get(p, i), sep); } putchar(sep); } @@ -3172,54 +3246,19 @@ static BOOL JS_AtomSymbolHasDescription(JSContext *ctx, JSAtom v) !(p->len == 0 && p->is_wide_char != 0)); } -static __maybe_unused void print_atom(JSContext *ctx, JSAtom atom) -{ - char buf[ATOM_GET_STR_BUF_SIZE]; - const char *p; - int i; - - /* XXX: should handle embedded null characters */ - /* XXX: should move encoding code to JS_AtomGetStr */ - p = JS_AtomGetStr(ctx, buf, sizeof(buf), atom); - for (i = 0; p[i]; i++) { - int c = (unsigned char)p[i]; - if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || - (c == '_' || c == '$') || (c >= '0' && c <= '9' && i > 0))) - break; - } - if (i > 0 && p[i] == '\0') { - printf("%s", p); - } else { - putchar('"'); - printf("%.*s", i, p); - for (; p[i]; i++) { - int c = (unsigned char)p[i]; - if (c == '\"' || c == '\\') { - putchar('\\'); - putchar(c); - } else if (c >= ' ' && c <= 126) { - putchar(c); - } else if (c == '\n') { - putchar('\\'); - putchar('n'); - } else { - printf("\\u%04x", c); - } - } - putchar('\"'); - } -} - /* free with JS_FreeCString() */ -const char *JS_AtomToCString(JSContext *ctx, JSAtom atom) +const char *JS_AtomToCStringLen(JSContext *ctx, size_t *plen, JSAtom atom) { JSValue str; const char *cstr; str = JS_AtomToString(ctx, atom); - if (JS_IsException(str)) + if (JS_IsException(str)) { + if (plen) + *plen = 0; return NULL; - cstr = JS_ToCString(ctx, str); + } + cstr = JS_ToCStringLen(ctx, plen, str); JS_FreeValue(ctx, str); return cstr; } @@ -5050,7 +5089,7 @@ static JSValue JS_NewObjectFromShape(JSContext *ctx, JSShape *sh, JSClassID clas p->is_exotic = 0; p->fast_array = 0; p->is_constructor = 0; - p->is_uncatchable_error = 0; + p->has_immutable_prototype = 0; p->tmp_mark = 0; p->is_HTMLDDA = 0; p->weakref_count = 0; @@ -5100,6 +5139,7 @@ static JSValue JS_NewObjectFromShape(JSContext *ctx, JSShape *sh, JSClassID clas case JS_CLASS_UINT32_ARRAY: case JS_CLASS_BIG_INT64_ARRAY: case JS_CLASS_BIG_UINT64_ARRAY: + case JS_CLASS_FLOAT16_ARRAY: case JS_CLASS_FLOAT32_ARRAY: case JS_CLASS_FLOAT64_ARRAY: p->is_exotic = 1; @@ -5347,6 +5387,10 @@ static JSValue JS_NewCFunction3(JSContext *ctx, JSCFunction *func, if (!name) name = ""; name_atom = JS_NewAtom(ctx, name); + if (name_atom == JS_ATOM_NULL) { + JS_FreeValue(ctx, func_obj); + return JS_EXCEPTION; + } js_function_set_properties(ctx, func_obj, name_atom, length); JS_FreeAtom(ctx, name_atom); return func_obj; @@ -5772,6 +5816,9 @@ static void free_gc_object(JSRuntime *rt, JSGCObjectHeader *gp) case JS_GC_OBJ_TYPE_ASYNC_FUNCTION: __async_func_free(rt, (JSAsyncFunctionState *)gp); break; + case JS_GC_OBJ_TYPE_MODULE: + js_free_module_def(rt, (JSModuleDef *)gp); + break; default: abort(); } @@ -5836,6 +5883,7 @@ void __JS_FreeValueRT(JSRuntime *rt, JSValue v) break; case JS_TAG_OBJECT: case JS_TAG_FUNCTION_BYTECODE: + case JS_TAG_MODULE: { JSGCObjectHeader *p = JS_VALUE_GET_PTR(v); if (rt->gc_phase != JS_GC_PHASE_REMOVE_CYCLES) { @@ -5848,9 +5896,6 @@ void __JS_FreeValueRT(JSRuntime *rt, JSValue v) } } break; - case JS_TAG_MODULE: - abort(); /* never freed here */ - break; case JS_TAG_BIG_INT: { JSBigInt *p = JS_VALUE_GET_PTR(v); @@ -5924,6 +5969,7 @@ void JS_MarkValue(JSRuntime *rt, JSValueConst val, JS_MarkFunc *mark_func) switch(JS_VALUE_GET_TAG(val)) { case JS_TAG_OBJECT: case JS_TAG_FUNCTION_BYTECODE: + case JS_TAG_MODULE: mark_func(rt, JS_VALUE_GET_PTR(val)); break; default: @@ -6036,6 +6082,12 @@ static void mark_children(JSRuntime *rt, JSGCObjectHeader *gp, JS_MarkContext(rt, ctx, mark_func); } break; + case JS_GC_OBJ_TYPE_MODULE: + { + JSModuleDef *m = (JSModuleDef *)gp; + js_mark_module_def(rt, m, mark_func); + } + break; default: abort(); } @@ -6132,6 +6184,7 @@ static void gc_free_cycles(JSRuntime *rt) case JS_GC_OBJ_TYPE_JS_OBJECT: case JS_GC_OBJ_TYPE_FUNCTION_BYTECODE: case JS_GC_OBJ_TYPE_ASYNC_FUNCTION: + case JS_GC_OBJ_TYPE_MODULE: #ifdef DUMP_GC_FREE if (!header_done) { printf("Freeing cycles:\n"); @@ -6154,7 +6207,8 @@ static void gc_free_cycles(JSRuntime *rt) p = list_entry(el, JSGCObjectHeader, link); assert(p->gc_obj_type == JS_GC_OBJ_TYPE_JS_OBJECT || p->gc_obj_type == JS_GC_OBJ_TYPE_FUNCTION_BYTECODE || - p->gc_obj_type == JS_GC_OBJ_TYPE_ASYNC_FUNCTION); + p->gc_obj_type == JS_GC_OBJ_TYPE_ASYNC_FUNCTION || + p->gc_obj_type == JS_GC_OBJ_TYPE_MODULE); if (p->gc_obj_type == JS_GC_OBJ_TYPE_JS_OBJECT && ((JSObject *)p)->weakref_count != 0) { /* keep the object because there are weak references to it */ @@ -6491,6 +6545,7 @@ void JS_ComputeMemoryUsage(JSRuntime *rt, JSMemoryUsage *s) case JS_CLASS_UINT32_ARRAY: /* u.typed_array / u.array */ case JS_CLASS_BIG_INT64_ARRAY: /* u.typed_array / u.array */ case JS_CLASS_BIG_UINT64_ARRAY: /* u.typed_array / u.array */ + case JS_CLASS_FLOAT16_ARRAY: /* u.typed_array / u.array */ case JS_CLASS_FLOAT32_ARRAY: /* u.typed_array / u.array */ case JS_CLASS_FLOAT64_ARRAY: /* u.typed_array / u.array */ case JS_CLASS_DATAVIEW: /* u.typed_array */ @@ -6697,6 +6752,7 @@ JSValue JS_Throw(JSContext *ctx, JSValue obj) JSRuntime *rt = ctx->rt; JS_FreeValue(ctx, rt->current_exception); rt->current_exception = obj; + rt->current_exception_is_uncatchable = FALSE; return JS_EXCEPTION; } @@ -6838,20 +6894,30 @@ static int find_line_num(JSContext *ctx, JSFunctionBytecode *b, return 0; } -/* in order to avoid executing arbitrary code during the stack trace - generation, we only look at simple 'name' properties containing a - string. */ -static const char *get_func_name(JSContext *ctx, JSValueConst func) +/* return a string property without executing arbitrary JS code (used + when dumping the stack trace or in debug print). */ +static const char *get_prop_string(JSContext *ctx, JSValueConst obj, JSAtom prop) { + JSObject *p; JSProperty *pr; JSShapeProperty *prs; JSValueConst val; - if (JS_VALUE_GET_TAG(func) != JS_TAG_OBJECT) - return NULL; - prs = find_own_property(&pr, JS_VALUE_GET_OBJ(func), JS_ATOM_name); - if (!prs) + if (JS_VALUE_GET_TAG(obj) != JS_TAG_OBJECT) return NULL; + p = JS_VALUE_GET_OBJ(obj); + prs = find_own_property(&pr, p, prop); + if (!prs) { + /* we look at one level in the prototype to handle the 'name' + field of the Error objects */ + p = p->shape->proto; + if (!p) + return NULL; + prs = find_own_property(&pr, p, prop); + if (!prs) + return NULL; + } + if ((prs->flags & JS_PROP_TMASK) != JS_PROP_NORMAL) return NULL; val = pr->u.value; @@ -6875,6 +6941,9 @@ static void build_backtrace(JSContext *ctx, JSValueConst error_obj, const char *str1; JSObject *p; + if (!JS_IsObject(error_obj)) + return; /* protection in the out of memory case */ + js_dbuf_init(ctx, &dbuf); if (filename) { dbuf_printf(&dbuf, " at %s", filename); @@ -6882,13 +6951,17 @@ static void build_backtrace(JSContext *ctx, JSValueConst error_obj, dbuf_printf(&dbuf, ":%d:%d", line_num, col_num); dbuf_putc(&dbuf, '\n'); str = JS_NewString(ctx, filename); + if (JS_IsException(str)) + return; /* Note: SpiderMonkey does that, could update once there is a standard */ - JS_DefinePropertyValue(ctx, error_obj, JS_ATOM_fileName, str, - JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE); - JS_DefinePropertyValue(ctx, error_obj, JS_ATOM_lineNumber, JS_NewInt32(ctx, line_num), - JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE); - JS_DefinePropertyValue(ctx, error_obj, JS_ATOM_columnNumber, JS_NewInt32(ctx, col_num), - JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE); + if (JS_DefinePropertyValue(ctx, error_obj, JS_ATOM_fileName, str, + JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE) < 0 || + JS_DefinePropertyValue(ctx, error_obj, JS_ATOM_lineNumber, JS_NewInt32(ctx, line_num), + JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE) < 0 || + JS_DefinePropertyValue(ctx, error_obj, JS_ATOM_columnNumber, JS_NewInt32(ctx, col_num), + JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE) < 0) { + return; + } } for(sf = ctx->rt->current_stack_frame; sf != NULL; sf = sf->prev_frame) { if (sf->js_mode & JS_MODE_BACKTRACE_BARRIER) @@ -6897,7 +6970,7 @@ static void build_backtrace(JSContext *ctx, JSValueConst error_obj, backtrace_flags &= ~JS_BACKTRACE_FLAG_SKIP_FIRST_LEVEL; continue; } - func_name_str = get_func_name(ctx, sf->cur_func); + func_name_str = get_prop_string(ctx, sf->cur_func, JS_ATOM_name); if (!func_name_str || func_name_str[0] == '\0') str1 = ""; else @@ -6973,9 +7046,9 @@ static JSValue JS_ThrowError2(JSContext *ctx, JSErrorEnum error_num, JS_DefinePropertyValue(ctx, obj, JS_ATOM_message, JS_NewString(ctx, buf), JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE); - } - if (add_backtrace) { - build_backtrace(ctx, obj, NULL, 0, 0, 0); + if (add_backtrace) { + build_backtrace(ctx, obj, NULL, 0, 0, 0); + } } ret = JS_Throw(ctx, obj); return ret; @@ -7164,7 +7237,7 @@ static JSValue JS_ThrowTypeErrorInvalidClass(JSContext *ctx, int class_id) static void JS_ThrowInterrupted(JSContext *ctx) { JS_ThrowInternalError(ctx, "interrupted"); - JS_SetUncatchableError(ctx, ctx->rt->current_exception, TRUE); + JS_SetUncatchableException(ctx, TRUE); } static no_inline __exception int __js_poll_interrupts(JSContext *ctx) @@ -7189,6 +7262,15 @@ static inline __exception int js_poll_interrupts(JSContext *ctx) } } +static void JS_SetImmutablePrototype(JSContext *ctx, JSValueConst obj) +{ + JSObject *p; + if (JS_VALUE_GET_TAG(obj) != JS_TAG_OBJECT) + return; + p = JS_VALUE_GET_OBJ(obj); + p->has_immutable_prototype = TRUE; +} + /* Return -1 (exception) or TRUE/FALSE. 'throw_flag' = FALSE indicates that it is called from Reflect.setPrototypeOf(). */ static int JS_SetPrototypeInternal(JSContext *ctx, JSValueConst obj, @@ -7238,7 +7320,15 @@ static int JS_SetPrototypeInternal(JSContext *ctx, JSValueConst obj, sh = p->shape; if (sh->proto == proto) return TRUE; - if (!p->extensible) { + if (unlikely(p->has_immutable_prototype)) { + if (throw_flag) { + JS_ThrowTypeError(ctx, "prototype is immutable"); + return -1; + } else { + return FALSE; + } + } + if (unlikely(!p->extensible)) { if (throw_flag) { JS_ThrowTypeError(ctx, "object is not extensible"); return -1; @@ -7861,7 +7951,7 @@ static int num_keys_cmp(const void *p1, const void *p2, void *opaque) return 1; } -static void js_free_prop_enum(JSContext *ctx, JSPropertyEnum *tab, uint32_t len) +void JS_FreePropertyEnum(JSContext *ctx, JSPropertyEnum *tab, uint32_t len) { uint32_t i; if (tab) { @@ -7955,7 +8045,7 @@ static int __exception JS_GetOwnPropertyNamesInternal(JSContext *ctx, /* set the "is_enumerable" field if necessary */ res = JS_GetOwnPropertyInternal(ctx, &desc, p, atom); if (res < 0) { - js_free_prop_enum(ctx, tab_exotic, exotic_count); + JS_FreePropertyEnum(ctx, tab_exotic, exotic_count); return -1; } if (res) { @@ -7986,7 +8076,7 @@ static int __exception JS_GetOwnPropertyNamesInternal(JSContext *ctx, if (atom_count < exotic_keys_count || atom_count > INT32_MAX) { add_overflow: JS_ThrowOutOfMemory(ctx); - js_free_prop_enum(ctx, tab_exotic, exotic_count); + JS_FreePropertyEnum(ctx, tab_exotic, exotic_count); return -1; } /* XXX: need generic way to test for js_malloc(ctx, a * b) overflow */ @@ -7994,7 +8084,7 @@ static int __exception JS_GetOwnPropertyNamesInternal(JSContext *ctx, /* avoid allocating 0 bytes */ tab_atom = js_malloc(ctx, sizeof(tab_atom[0]) * max_int(atom_count, 1)); if (!tab_atom) { - js_free_prop_enum(ctx, tab_exotic, exotic_count); + JS_FreePropertyEnum(ctx, tab_exotic, exotic_count); return -1; } @@ -8039,7 +8129,7 @@ static int __exception JS_GetOwnPropertyNamesInternal(JSContext *ctx, for(i = 0; i < len; i++) { tab_atom[num_index].atom = __JS_AtomFromUInt32(i); if (tab_atom[num_index].atom == JS_ATOM_NULL) { - js_free_prop_enum(ctx, tab_atom, num_index); + JS_FreePropertyEnum(ctx, tab_atom, num_index); return -1; } tab_atom[num_index].is_enumerable = TRUE; @@ -8335,6 +8425,9 @@ static JSValue JS_GetPropertyValue(JSContext *ctx, JSValueConst this_obj, case JS_CLASS_BIG_UINT64_ARRAY: if (unlikely(idx >= p->u.array.count)) goto slow_path; return JS_NewBigUint64(ctx, p->u.array.u.uint64_ptr[idx]); + case JS_CLASS_FLOAT16_ARRAY: + if (unlikely(idx >= p->u.array.count)) goto slow_path; + return __JS_NewFloat64(ctx, fromfp16(p->u.array.u.fp16_ptr[idx])); case JS_CLASS_FLOAT32_ARRAY: if (unlikely(idx >= p->u.array.count)) goto slow_path; return __JS_NewFloat64(ctx, p->u.array.u.float_ptr[idx]); @@ -8427,6 +8520,8 @@ JSValue JS_GetPropertyStr(JSContext *ctx, JSValueConst this_obj, JSAtom atom; JSValue ret; atom = JS_NewAtom(ctx, prop); + if (atom == JS_ATOM_NULL) + return JS_EXCEPTION; ret = JS_GetProperty(ctx, this_obj, atom); JS_FreeAtom(ctx, atom); return ret; @@ -9169,6 +9264,13 @@ static int JS_SetPropertyValue(JSContext *ctx, JSValueConst this_obj, p->u.array.u.uint64_ptr[idx] = v; } break; + case JS_CLASS_FLOAT16_ARRAY: + if (JS_ToFloat64Free(ctx, &d, val)) + return -1; + if (unlikely(idx >= (uint32_t)p->u.array.count)) + goto ta_out_of_bound; + p->u.array.u.fp16_ptr[idx] = tofp16(d); + break; case JS_CLASS_FLOAT32_ARRAY: if (JS_ToFloat64Free(ctx, &d, val)) return -1; @@ -9239,6 +9341,10 @@ int JS_SetPropertyStr(JSContext *ctx, JSValueConst this_obj, JSAtom atom; int ret; atom = JS_NewAtom(ctx, prop); + if (atom == JS_ATOM_NULL) { + JS_FreeValue(ctx, val); + return -1; + } ret = JS_SetPropertyInternal(ctx, this_obj, atom, val, this_obj, JS_PROP_THROW); JS_FreeAtom(ctx, atom); return ret; @@ -9795,6 +9901,10 @@ int JS_DefinePropertyValueStr(JSContext *ctx, JSValueConst this_obj, JSAtom atom; int ret; atom = JS_NewAtom(ctx, prop); + if (atom == JS_ATOM_NULL) { + JS_FreeValue(ctx, val); + return -1; + } ret = JS_DefinePropertyValue(ctx, this_obj, atom, val, flags); JS_FreeAtom(ctx, atom); return ret; @@ -10098,6 +10208,29 @@ static int JS_SetGlobalVar(JSContext *ctx, JSAtom prop, JSValue val, return JS_SetPropertyInternal(ctx, ctx->global_obj, prop, val, ctx->global_obj, flags); } +/* return -1, FALSE or TRUE */ +static int JS_DeleteGlobalVar(JSContext *ctx, JSAtom prop) +{ + JSObject *p; + JSShapeProperty *prs; + JSProperty *pr; + int ret; + + /* 9.1.1.4.7 DeleteBinding ( N ) */ + p = JS_VALUE_GET_OBJ(ctx->global_var_obj); + prs = find_own_property(&pr, p, prop); + if (prs) + return FALSE; /* lexical variables cannot be deleted */ + ret = JS_HasProperty(ctx, ctx->global_obj, prop); + if (ret < 0) + return -1; + if (ret) { + return JS_DeleteProperty(ctx, ctx->global_obj, prop, 0); + } else { + return TRUE; + } +} + /* return -1, FALSE or TRUE. return FALSE if not configurable or invalid object. return -1 in case of exception. flags can be 0, JS_PROP_THROW or JS_PROP_THROW_STRICT */ @@ -10196,29 +10329,10 @@ BOOL JS_IsError(JSContext *ctx, JSValueConst val) return (p->class_id == JS_CLASS_ERROR); } -/* used to avoid catching interrupt exceptions */ -BOOL JS_IsUncatchableError(JSContext *ctx, JSValueConst val) +/* must be called after JS_Throw() */ +void JS_SetUncatchableException(JSContext *ctx, BOOL flag) { - JSObject *p; - if (JS_VALUE_GET_TAG(val) != JS_TAG_OBJECT) - return FALSE; - p = JS_VALUE_GET_OBJ(val); - return p->class_id == JS_CLASS_ERROR && p->is_uncatchable_error; -} - -void JS_SetUncatchableError(JSContext *ctx, JSValueConst val, BOOL flag) -{ - JSObject *p; - if (JS_VALUE_GET_TAG(val) != JS_TAG_OBJECT) - return; - p = JS_VALUE_GET_OBJ(val); - if (p->class_id == JS_CLASS_ERROR) - p->is_uncatchable_error = flag; -} - -void JS_ResetUncatchableError(JSContext *ctx) -{ - JS_SetUncatchableError(ctx, ctx->rt->current_exception, FALSE); + ctx->rt->current_exception_is_uncatchable = flag; } void JS_SetOpaque(JSValue obj, void *opaque) @@ -10504,6 +10618,15 @@ static inline js_limb_t js_limb_clz(js_limb_t a) } #endif +/* handle a = 0 too */ +static inline js_limb_t js_limb_safe_clz(js_limb_t a) +{ + if (a == 0) + return JS_LIMB_BITS; + else + return js_limb_clz(a); +} + static js_limb_t mp_add(js_limb_t *res, const js_limb_t *op1, const js_limb_t *op2, js_limb_t n, js_limb_t carry) { @@ -11649,6 +11772,7 @@ static JSBigInt *js_bigint_from_string(JSContext *ctx, const char *str, int radix) { const char *p = str; + size_t n_digits1; int is_neg, n_digits, n_limbs, len, log2_radix, n_bits, i; JSBigInt *r; js_limb_t v, c, h; @@ -11660,10 +11784,16 @@ static JSBigInt *js_bigint_from_string(JSContext *ctx, } while (*p == '0') p++; - n_digits = strlen(p); + n_digits1 = strlen(p); + /* the real check for overflox is done js_bigint_new(). Here + we just avoid integer overflow */ + if (n_digits1 > JS_BIGINT_MAX_SIZE * JS_LIMB_BITS) { + JS_ThrowRangeError(ctx, "BigInt is too large to allocate"); + return NULL; + } + n_digits = n_digits1; log2_radix = 32 - clz32(radix - 1); /* ceil(log2(radix)) */ /* compute the maximum number of limbs */ - /* XXX: overflow */ if (radix == 10) { n_bits = (n_digits * 27 + 7) / 8; /* >= ceil(n_digits * log2(10)) */ } else { @@ -11852,7 +11982,7 @@ static JSValue js_bigint_to_string1(JSContext *ctx, JSValueConst val, int radix) r = tmp; } log2_radix = 31 - clz32(radix); /* floor(log2(radix)) */ - n_bits = r->len * JS_LIMB_BITS - js_limb_clz(r->tab[r->len - 1]); + n_bits = r->len * JS_LIMB_BITS - js_limb_safe_clz(r->tab[r->len - 1]); /* n_digits is exact only if radix is a power of two. Otherwise it is >= the exact number of digits */ n_digits = (n_bits + log2_radix - 1) / log2_radix; @@ -11894,11 +12024,10 @@ static JSValue js_bigint_to_string1(JSContext *ctx, JSValueConst val, int radix) bit_pos = i * log2_radix; pos = bit_pos / JS_LIMB_BITS; shift = bit_pos % JS_LIMB_BITS; - if (likely((shift + log2_radix) <= JS_LIMB_BITS)) { - c = r->tab[pos] >> shift; - } else { - c = (r->tab[pos] >> shift) | - (r->tab[pos + 1] << (JS_LIMB_BITS - shift)); + c = r->tab[pos] >> shift; + if ((shift + log2_radix) > JS_LIMB_BITS && + (pos + 1) < r->len) { + c |= r->tab[pos + 1] << (JS_LIMB_BITS - shift); } c &= (radix - 1); *--q = digits[c]; @@ -12099,7 +12228,7 @@ static JSValue js_atof(JSContext *ctx, const char *str, const char **pp, case ATOD_TYPE_FLOAT64: { double d; - d = js_atod(buf,NULL, radix, is_float ? 0 : JS_ATOD_INT_ONLY, + d = js_atod(buf, NULL, radix, is_float ? 0 : JS_ATOD_INT_ONLY, &atod_mem); /* return int or float64 */ val = JS_NewFloat64(ctx, d); @@ -12111,8 +12240,10 @@ static JSValue js_atof(JSContext *ctx, const char *str, const char **pp, if (has_legacy_octal || is_float) goto fail; r = js_bigint_from_string(ctx, buf, radix); - if (!r) - goto mem_error; + if (!r) { + val = JS_EXCEPTION; + goto done; + } val = JS_CompactBigInt(ctx, r); } break; @@ -12871,27 +13002,71 @@ static JSValue JS_ToStringCheckObject(JSContext *ctx, JSValueConst val) return JS_ToString(ctx, val); } -static JSValue JS_ToQuotedString(JSContext *ctx, JSValueConst val1) +#define JS_PRINT_MAX_DEPTH 8 + +typedef struct { + JSRuntime *rt; + JSContext *ctx; /* may be NULL */ + JSPrintValueOptions options; + JSPrintValueWrite *write_func; + void *write_opaque; + int level; + JSObject *print_stack[JS_PRINT_MAX_DEPTH]; /* level values */ +} JSPrintValueState; + +static void js_print_value(JSPrintValueState *s, JSValueConst val); + +static void js_putc(JSPrintValueState *s, char c) { - JSValue val; - JSString *p; - int i; - uint32_t c; - StringBuffer b_s, *b = &b_s; - char buf[16]; + s->write_func(s->write_opaque, &c, 1); +} - val = JS_ToStringCheckObject(ctx, val1); - if (JS_IsException(val)) - return val; - p = JS_VALUE_GET_STRING(val); +static void js_puts(JSPrintValueState *s, const char *str) +{ + s->write_func(s->write_opaque, str, strlen(str)); +} - if (string_buffer_init(ctx, b, p->len + 2)) - goto fail; +static void __attribute__((format(printf, 2, 3))) js_printf(JSPrintValueState *s, const char *fmt, ...) +{ + va_list ap; + char buf[256]; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + s->write_func(s->write_opaque, buf, strlen(buf)); +} - if (string_buffer_putc8(b, '\"')) - goto fail; - for(i = 0; i < p->len; ) { - c = string_getc(p, &i); +static void js_print_float64(JSPrintValueState *s, double d) +{ + JSDTOATempMem dtoa_mem; + char buf[32]; + int len; + len = js_dtoa(buf, d, 10, 0, JS_DTOA_FORMAT_FREE | JS_DTOA_MINUS_ZERO, &dtoa_mem); + s->write_func(s->write_opaque, buf, len); +} + +static uint32_t js_string_get_length(JSValueConst val) +{ + if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING) { + JSString *p = JS_VALUE_GET_STRING(val); + return p->len; + } else if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING_ROPE) { + JSStringRope *r = JS_VALUE_GET_PTR(val); + return r->len; + } else { + return 0; + } +} + +/* pretty print the first 'len' characters of 'p' */ +static void js_print_string1(JSPrintValueState *s, JSString *p, int len, int sep) +{ + uint8_t buf[UTF8_CHAR_LEN_MAX]; + int l, i, c, c1; + + for(i = 0; i < len; i++) { + c = string_get(p, i); switch(c) { case '\t': c = 't'; @@ -12908,52 +13083,731 @@ static JSValue JS_ToQuotedString(JSContext *ctx, JSValueConst val1) case '\f': c = 'f'; goto quote; - case '\"': case '\\': quote: - if (string_buffer_putc8(b, '\\')) - goto fail; - if (string_buffer_putc8(b, c)) - goto fail; + js_putc(s, '\\'); + js_putc(s, c); break; default: - if (c < 32 || is_surrogate(c)) { - snprintf(buf, sizeof(buf), "\\u%04x", c); - if (string_buffer_puts8(b, buf)) - goto fail; + if (c == sep) + goto quote; + if (c >= 32 && c <= 126) { + js_putc(s, c); + } else if (c < 32 || + (c >= 0x7f && c <= 0x9f)) { + escape: + js_printf(s, "\\u%04x", c); } else { - if (string_buffer_putc(b, c)) - goto fail; + if (is_hi_surrogate(c)) { + if ((i + 1) >= len) + goto escape; + c1 = string_get(p, i + 1); + if (!is_lo_surrogate(c1)) + goto escape; + i++; + c = from_surrogate(c, c1); + } else if (is_lo_surrogate(c)) { + goto escape; + } + l = unicode_to_utf8(buf, c); + s->write_func(s->write_opaque, (char *)buf, l); } break; } } - if (string_buffer_putc8(b, '\"')) - goto fail; - JS_FreeValue(ctx, val); - return string_buffer_end(b); - fail: - JS_FreeValue(ctx, val); - string_buffer_free(b); - return JS_EXCEPTION; +} + +static void js_print_string_rec(JSPrintValueState *s, JSValueConst val, + int sep, uint32_t pos) +{ + if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING) { + JSString *p = JS_VALUE_GET_STRING(val); + uint32_t len; + if (pos < s->options.max_string_length) { + len = min_uint32(p->len, s->options.max_string_length - pos); + js_print_string1(s, p, len, sep); + } + } else if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING_ROPE) { + JSStringRope *r = JS_VALUE_GET_PTR(val); + js_print_string_rec(s, r->left, sep, pos); + js_print_string_rec(s, r->right, sep, pos + js_string_get_length(r->left)); + } else { + js_printf(s, "", (int)JS_VALUE_GET_TAG(val)); + } +} + +static void js_print_string(JSPrintValueState *s, JSValueConst val) +{ + int sep; + if (s->options.raw_dump && JS_VALUE_GET_TAG(val) == JS_TAG_STRING) { + JSString *p = JS_VALUE_GET_STRING(val); + js_printf(s, "%d", p->header.ref_count); + sep = (p->header.ref_count == 1) ? '\"' : '\''; + } else { + sep = '\"'; + } + js_putc(s, sep); + js_print_string_rec(s, val, sep, 0); + js_putc(s, sep); + if (js_string_get_length(val) > s->options.max_string_length) { + uint32_t n = js_string_get_length(val) - s->options.max_string_length; + js_printf(s, "... %u more character%s", n, n > 1 ? "s" : ""); + } +} + +static void js_print_raw_string(JSPrintValueState *s, JSValueConst val) +{ + const char *cstr; + size_t len; + cstr = JS_ToCStringLen(s->ctx, &len, val); + if (cstr) { + s->write_func(s->write_opaque, cstr, len); + JS_FreeCString(s->ctx, cstr); + } +} + +static BOOL is_ascii_ident(const JSString *p) +{ + int i, c; + + if (p->len == 0) + return FALSE; + for(i = 0; i < p->len; i++) { + c = string_get(p, i); + if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c == '_' || c == '$') || (c >= '0' && c <= '9' && i > 0))) + return FALSE; + } + return TRUE; +} + +static void js_print_atom(JSPrintValueState *s, JSAtom atom) +{ + int i; + if (__JS_AtomIsTaggedInt(atom)) { + js_printf(s, "%u", __JS_AtomToUInt32(atom)); + } else if (atom == JS_ATOM_NULL) { + js_puts(s, ""); + } else { + assert(atom < s->rt->atom_size); + JSString *p; + p = s->rt->atom_array[atom]; + if (is_ascii_ident(p)) { + for(i = 0; i < p->len; i++) { + js_putc(s, string_get(p, i)); + } + } else { + js_putc(s, '"'); + js_print_string1(s, p, p->len, '\"'); + js_putc(s, '"'); + } + } +} + +/* return 0 if invalid length */ +static uint32_t js_print_array_get_length(JSObject *p) +{ + JSProperty *pr; + JSShapeProperty *prs; + JSValueConst val; + + prs = find_own_property(&pr, p, JS_ATOM_length); + if (!prs) + return 0; + if ((prs->flags & JS_PROP_TMASK) != JS_PROP_NORMAL) + return 0; + val = pr->u.value; + switch(JS_VALUE_GET_NORM_TAG(val)) { + case JS_TAG_INT: + return JS_VALUE_GET_INT(val); + case JS_TAG_FLOAT64: + return (uint32_t)JS_VALUE_GET_FLOAT64(val); + default: + return 0; + } +} + +static void js_print_comma(JSPrintValueState *s, int *pcomma_state) +{ + switch(*pcomma_state) { + case 0: + break; + case 1: + js_printf(s, ", "); + break; + case 2: + js_printf(s, " { "); + break; + } + *pcomma_state = 1; +} + +static void js_print_more_items(JSPrintValueState *s, int *pcomma_state, + uint32_t n) +{ + js_print_comma(s, pcomma_state); + js_printf(s, "... %u more item%s", n, n > 1 ? "s" : ""); +} + +/* similar to js_regexp_toString() but without side effect */ +static void js_print_regexp(JSPrintValueState *s, JSObject *p1) +{ + JSRegExp *re = &p1->u.regexp; + JSString *p; + int i, n, c, c2, bra, flags; + static const char regexp_flags[] = { 'g', 'i', 'm', 's', 'u', 'y', 'd', 'v' }; + + p = re->pattern; + js_putc(s, '/'); + if (p->len == 0) { + js_puts(s, "(?:)"); + } else { + bra = 0; + for (i = 0, n = p->len; i < n;) { + c2 = -1; + switch (c = string_get(p, i++)) { + case '\\': + if (i < n) + c2 = string_get(p, i++); + break; + case ']': + bra = 0; + break; + case '[': + if (!bra) { + if (i < n && string_get(p, i) == ']') + c2 = string_get(p, i++); + bra = 1; + } + break; + case '\n': + c = '\\'; + c2 = 'n'; + break; + case '\r': + c = '\\'; + c2 = 'r'; + break; + case '/': + if (!bra) { + c = '\\'; + c2 = '/'; + } + break; + } + js_putc(s, c); + if (c2 >= 0) + js_putc(s, c2); + } + } + js_putc(s, '/'); + + flags = lre_get_flags(re->bytecode->u.str8); + for(i = 0; i < countof(regexp_flags); i++) { + if ((flags >> i) & 1) { + js_putc(s, regexp_flags[i]); + } + } +} + +/* similar to js_error_toString() but without side effect */ +static void js_print_error(JSPrintValueState *s, JSObject *p) +{ + const char *str; + size_t len; + + str = get_prop_string(s->ctx, JS_MKPTR(JS_TAG_OBJECT, p), JS_ATOM_name); + if (!str) { + js_puts(s, "Error"); + } else { + js_puts(s, str); + JS_FreeCString(s->ctx, str); + } + + str = get_prop_string(s->ctx, JS_MKPTR(JS_TAG_OBJECT, p), JS_ATOM_message); + if (str && str[0] != '\0') { + js_puts(s, ": "); + js_puts(s, str); + } + JS_FreeCString(s->ctx, str); + + /* dump the stack if present */ + str = get_prop_string(s->ctx, JS_MKPTR(JS_TAG_OBJECT, p), JS_ATOM_stack); + if (str) { + js_putc(s, '\n'); + + /* XXX: should remove the last '\n' in stack as + v8. SpiderMonkey does not do it */ + len = strlen(str); + if (len > 0 && str[len - 1] == '\n') + len--; + s->write_func(s->write_opaque, str, len); + + JS_FreeCString(s->ctx, str); + } +} + +static void js_print_object(JSPrintValueState *s, JSObject *p) +{ + JSRuntime *rt = s->rt; + JSShape *sh; + JSShapeProperty *prs; + JSProperty *pr; + int comma_state; + BOOL is_array; + uint32_t i; + + comma_state = 0; + is_array = FALSE; + if (p->class_id == JS_CLASS_ARRAY) { + is_array = TRUE; + js_printf(s, "[ "); + /* XXX: print array like properties even if not fast array */ + if (p->fast_array) { + uint32_t len, n, len1; + len = js_print_array_get_length(p); + + len1 = min_uint32(p->u.array.count, s->options.max_item_count); + for(i = 0; i < len1; i++) { + js_print_comma(s, &comma_state); + js_print_value(s, p->u.array.u.values[i]); + } + if (len1 < p->u.array.count) + js_print_more_items(s, &comma_state, p->u.array.count - len1); + if (p->u.array.count < len) { + n = len - p->u.array.count; + js_print_comma(s, &comma_state); + js_printf(s, "<%u empty item%s>", n, n > 1 ? "s" : ""); + } + } + } else if (p->class_id >= JS_CLASS_UINT8C_ARRAY && p->class_id <= JS_CLASS_FLOAT64_ARRAY) { + uint32_t size = 1 << typed_array_size_log2(p->class_id); + uint32_t len1; + int64_t v; + + js_print_atom(s, rt->class_array[p->class_id].class_name); + js_printf(s, "(%u) [ ", p->u.array.count); + + is_array = TRUE; + len1 = min_uint32(p->u.array.count, s->options.max_item_count); + for(i = 0; i < len1; i++) { + const uint8_t *ptr = p->u.array.u.uint8_ptr + i * size; + js_print_comma(s, &comma_state); + switch(p->class_id) { + case JS_CLASS_UINT8C_ARRAY: + case JS_CLASS_UINT8_ARRAY: + v = *ptr; + goto ta_int64; + case JS_CLASS_INT8_ARRAY: + v = *(int8_t *)ptr; + goto ta_int64; + case JS_CLASS_INT16_ARRAY: + v = *(int16_t *)ptr; + goto ta_int64; + case JS_CLASS_UINT16_ARRAY: + v = *(uint16_t *)ptr; + goto ta_int64; + case JS_CLASS_INT32_ARRAY: + v = *(int32_t *)ptr; + goto ta_int64; + case JS_CLASS_UINT32_ARRAY: + v = *(uint32_t *)ptr; + goto ta_int64; + case JS_CLASS_BIG_INT64_ARRAY: + v = *(int64_t *)ptr; + ta_int64: + js_printf(s, "%" PRId64, v); + break; + case JS_CLASS_BIG_UINT64_ARRAY: + js_printf(s, "%" PRIu64, *(uint64_t *)ptr); + break; + case JS_CLASS_FLOAT16_ARRAY: + js_print_float64(s, fromfp16(*(uint16_t *)ptr)); + break; + case JS_CLASS_FLOAT32_ARRAY: + js_print_float64(s, *(float *)ptr); + break; + case JS_CLASS_FLOAT64_ARRAY: + js_print_float64(s, *(double *)ptr); + break; + } + } + if (len1 < p->u.array.count) + js_print_more_items(s, &comma_state, p->u.array.count - len1); + } else if (p->class_id == JS_CLASS_BYTECODE_FUNCTION || + (rt->class_array[p->class_id].call != NULL && + p->class_id != JS_CLASS_PROXY)) { + js_printf(s, "[Function"); + /* XXX: allow dump without ctx */ + if (!s->options.raw_dump && s->ctx) { + const char *func_name_str; + js_putc(s, ' '); + func_name_str = get_prop_string(s->ctx, JS_MKPTR(JS_TAG_OBJECT, p), JS_ATOM_name); + if (!func_name_str || func_name_str[0] == '\0') + js_puts(s, "(anonymous)"); + else + js_puts(s, func_name_str); + JS_FreeCString(s->ctx, func_name_str); + } + js_printf(s, "]"); + comma_state = 2; + } else if (p->class_id == JS_CLASS_MAP || p->class_id == JS_CLASS_SET) { + JSMapState *ms = p->u.opaque; + struct list_head *el; + + if (!ms) + goto default_obj; + js_print_atom(s, rt->class_array[p->class_id].class_name); + js_printf(s, "(%u) { ", ms->record_count); + i = 0; + list_for_each(el, &ms->records) { + JSMapRecord *mr = list_entry(el, JSMapRecord, link); + js_print_comma(s, &comma_state); + if (mr->empty) + continue; + js_print_value(s, mr->key); + if (p->class_id == JS_CLASS_MAP) { + js_printf(s, " => "); + js_print_value(s, mr->value); + } + i++; + if (i >= s->options.max_item_count) + break; + } + if (i < ms->record_count) + js_print_more_items(s, &comma_state, ms->record_count - i); + } else if (p->class_id == JS_CLASS_REGEXP && s->ctx) { + js_print_regexp(s, p); + comma_state = 2; + } else if (p->class_id == JS_CLASS_DATE && s->ctx) { + /* get_date_string() has no side effect */ + JSValue str = get_date_string(s->ctx, JS_MKPTR(JS_TAG_OBJECT, p), 0, NULL, 0x23); /* toISOString() */ + if (JS_IsException(str)) + goto default_obj; + js_print_raw_string(s, str); + JS_FreeValueRT(s->rt, str); + comma_state = 2; + } else if (p->class_id == JS_CLASS_ERROR && s->ctx) { + js_print_error(s, p); + comma_state = 2; + } else { + default_obj: + if (p->class_id != JS_CLASS_OBJECT) { + js_print_atom(s, rt->class_array[p->class_id].class_name); + js_printf(s, " "); + } + js_printf(s, "{ "); + } + + sh = p->shape; /* the shape can be NULL while freeing an object */ + if (sh) { + uint32_t j; + + j = 0; + for(i = 0, prs = get_shape_prop(sh); i < sh->prop_count; i++, prs++) { + if (prs->atom != JS_ATOM_NULL) { + if (!(prs->flags & JS_PROP_ENUMERABLE) && + !s->options.show_hidden) { + continue; + } + if (j < s->options.max_item_count) { + pr = &p->prop[i]; + js_print_comma(s, &comma_state); + js_print_atom(s, prs->atom); + js_printf(s, ": "); + + /* XXX: autoinit property */ + if ((prs->flags & JS_PROP_TMASK) == JS_PROP_GETSET) { + if (s->options.raw_dump) { + js_printf(s, "[Getter %p Setter %p]", + pr->u.getset.getter, pr->u.getset.setter); + } else { + if (pr->u.getset.getter && pr->u.getset.setter) { + js_printf(s, "[Getter/Setter]"); + } else if (pr->u.getset.setter) { + js_printf(s, "[Setter]"); + } else { + js_printf(s, "[Getter]"); + } + } + } else if ((prs->flags & JS_PROP_TMASK) == JS_PROP_VARREF) { + if (s->options.raw_dump) { + js_printf(s, "[varref %p]", (void *)pr->u.var_ref); + } else { + js_print_value(s, *pr->u.var_ref->pvalue); + } + } else if ((prs->flags & JS_PROP_TMASK) == JS_PROP_AUTOINIT) { + if (s->options.raw_dump) { + js_printf(s, "[autoinit %p %d %p]", + (void *)js_autoinit_get_realm(pr), + js_autoinit_get_id(pr), + (void *)pr->u.init.opaque); + } else { + /* XXX: could autoinit but need to restart + the iteration */ + js_printf(s, "[autoinit]"); + } + } else { + js_print_value(s, pr->u.value); + } + } + j++; + } + } + if (j > s->options.max_item_count) + js_print_more_items(s, &comma_state, j - s->options.max_item_count); + } + if (s->options.raw_dump && js_class_has_bytecode(p->class_id)) { + JSFunctionBytecode *b = p->u.func.function_bytecode; + if (b->closure_var_count) { + JSVarRef **var_refs; + var_refs = p->u.func.var_refs; + + js_print_comma(s, &comma_state); + js_printf(s, "[[Closure]]: ["); + for(i = 0; i < b->closure_var_count; i++) { + if (i != 0) + js_printf(s, ", "); + js_print_value(s, var_refs[i]->value); + } + js_printf(s, " ]"); + } + if (p->u.func.home_object) { + js_print_comma(s, &comma_state); + js_printf(s, "[[HomeObject]]: "); + js_print_value(s, JS_MKPTR(JS_TAG_OBJECT, p->u.func.home_object)); + } + } + + if (!is_array) { + if (comma_state != 2) { + js_printf(s, " }"); + } + } else { + js_printf(s, " ]"); + } +} + +static int js_print_stack_index(JSPrintValueState *s, JSObject *p) +{ + int i; + for(i = 0; i < s->level; i++) + if (s->print_stack[i] == p) + return i; + return -1; +} + +static void js_print_value(JSPrintValueState *s, JSValueConst val) +{ + uint32_t tag = JS_VALUE_GET_NORM_TAG(val); + const char *str; + + switch(tag) { + case JS_TAG_INT: + js_printf(s, "%d", JS_VALUE_GET_INT(val)); + break; + case JS_TAG_BOOL: + if (JS_VALUE_GET_BOOL(val)) + str = "true"; + else + str = "false"; + goto print_str; + case JS_TAG_NULL: + str = "null"; + goto print_str; + case JS_TAG_EXCEPTION: + str = "exception"; + goto print_str; + case JS_TAG_UNINITIALIZED: + str = "uninitialized"; + goto print_str; + case JS_TAG_UNDEFINED: + str = "undefined"; + print_str: + js_puts(s, str); + break; + case JS_TAG_FLOAT64: + js_print_float64(s, JS_VALUE_GET_FLOAT64(val)); + break; + case JS_TAG_SHORT_BIG_INT: + js_printf(s, "%" PRId64 "n", (int64_t)JS_VALUE_GET_SHORT_BIG_INT(val)); + break; + case JS_TAG_BIG_INT: + if (!s->options.raw_dump && s->ctx) { + JSValue str = js_bigint_to_string(s->ctx, val); + if (JS_IsException(str)) + goto raw_bigint; + js_print_raw_string(s, str); + js_putc(s, 'n'); + JS_FreeValueRT(s->rt, str); + } else { + JSBigInt *p; + int sgn, i; + raw_bigint: + p = JS_VALUE_GET_PTR(val); + /* In order to avoid allocations we just dump the limbs */ + sgn = js_bigint_sign(p); + if (sgn) + js_printf(s, "BigInt.asIntN(%d,", p->len * JS_LIMB_BITS); + js_printf(s, "0x"); + for(i = p->len - 1; i >= 0; i--) { + if (i != p->len - 1) + js_putc(s, '_'); +#if JS_LIMB_BITS == 32 + js_printf(s, "%08x", p->tab[i]); +#else + js_printf(s, "%016" PRIx64, p->tab[i]); +#endif + } + js_putc(s, 'n'); + if (sgn) + js_putc(s, ')'); + } + break; + case JS_TAG_STRING: + case JS_TAG_STRING_ROPE: + if (s->options.raw_dump && tag == JS_TAG_STRING_ROPE) { + JSStringRope *r = JS_VALUE_GET_STRING_ROPE(val); + js_printf(s, "[rope len=%d depth=%d]", r->len, r->depth); + } else { + js_print_string(s, val); + } + break; + case JS_TAG_FUNCTION_BYTECODE: + { + JSFunctionBytecode *b = JS_VALUE_GET_PTR(val); + js_puts(s, "[bytecode "); + js_print_atom(s, b->func_name); + js_putc(s, ']'); + } + break; + case JS_TAG_OBJECT: + { + JSObject *p = JS_VALUE_GET_OBJ(val); + int idx; + idx = js_print_stack_index(s, p); + if (idx >= 0) { + js_printf(s, "[circular %d]", idx); + } else if (s->level < s->options.max_depth) { + s->print_stack[s->level++] = p; + js_print_object(s, JS_VALUE_GET_OBJ(val)); + s->level--; + } else { + JSAtom atom = s->rt->class_array[p->class_id].class_name; + js_putc(s, '['); + js_print_atom(s, atom); + if (s->options.raw_dump) { + js_printf(s, " %p", (void *)p); + } + js_putc(s, ']'); + } + } + break; + case JS_TAG_SYMBOL: + { + JSAtomStruct *p = JS_VALUE_GET_PTR(val); + js_puts(s, "Symbol("); + js_print_atom(s, js_get_atom_index(s->rt, p)); + js_putc(s, ')'); + } + break; + case JS_TAG_MODULE: + js_puts(s, "[module]"); + break; + default: + js_printf(s, "[unknown tag %d]", tag); + break; + } +} + +void JS_PrintValueSetDefaultOptions(JSPrintValueOptions *options) +{ + memset(options, 0, sizeof(*options)); + options->max_depth = 2; + options->max_string_length = 1000; + options->max_item_count = 100; +} + +static void JS_PrintValueInternal(JSRuntime *rt, JSContext *ctx, + JSPrintValueWrite *write_func, void *write_opaque, + JSValueConst val, const JSPrintValueOptions *options) +{ + JSPrintValueState ss, *s = &ss; + if (options) + s->options = *options; + else + JS_PrintValueSetDefaultOptions(&s->options); + if (s->options.max_depth <= 0) + s->options.max_depth = JS_PRINT_MAX_DEPTH; + else + s->options.max_depth = min_int(s->options.max_depth, JS_PRINT_MAX_DEPTH); + if (s->options.max_string_length == 0) + s->options.max_string_length = UINT32_MAX; + if (s->options.max_item_count == 0) + s->options.max_item_count = UINT32_MAX; + s->rt = rt; + s->ctx = ctx; + s->write_func = write_func; + s->write_opaque = write_opaque; + s->level = 0; + js_print_value(s, val); +} + +void JS_PrintValueRT(JSRuntime *rt, JSPrintValueWrite *write_func, void *write_opaque, + JSValueConst val, const JSPrintValueOptions *options) +{ + JS_PrintValueInternal(rt, NULL, write_func, write_opaque, val, options); +} + +void JS_PrintValue(JSContext *ctx, JSPrintValueWrite *write_func, void *write_opaque, + JSValueConst val, const JSPrintValueOptions *options) +{ + JS_PrintValueInternal(ctx->rt, ctx, write_func, write_opaque, val, options); +} + +static void js_dump_value_write(void *opaque, const char *buf, size_t len) +{ + FILE *fo = opaque; + fwrite(buf, 1, len, fo); +} + +static __maybe_unused void print_atom(JSContext *ctx, JSAtom atom) +{ + JSPrintValueState ss, *s = &ss; + memset(s, 0, sizeof(*s)); + s->rt = ctx->rt; + s->ctx = ctx; + s->write_func = js_dump_value_write; + s->write_opaque = stdout; + js_print_atom(s, atom); +} + +static __maybe_unused void JS_DumpValue(JSContext *ctx, const char *str, JSValueConst val) +{ + printf("%s=", str); + JS_PrintValue(ctx, js_dump_value_write, stdout, val, NULL); + printf("\n"); +} + +static __maybe_unused void JS_DumpValueRT(JSRuntime *rt, const char *str, JSValueConst val) +{ + printf("%s=", str); + JS_PrintValueRT(rt, js_dump_value_write, stdout, val, NULL); + printf("\n"); } static __maybe_unused void JS_DumpObjectHeader(JSRuntime *rt) { - printf("%14s %4s %4s %14s %10s %s\n", - "ADDRESS", "REFS", "SHRF", "PROTO", "CLASS", "PROPS"); + printf("%14s %4s %4s %14s %s\n", + "ADDRESS", "REFS", "SHRF", "PROTO", "CONTENT"); } /* for debug only: dump an object without side effect */ static __maybe_unused void JS_DumpObject(JSRuntime *rt, JSObject *p) { - uint32_t i; - char atom_buf[ATOM_GET_STR_BUF_SIZE]; JSShape *sh; - JSShapeProperty *prs; - JSProperty *pr; - BOOL is_first = TRUE; - + JSPrintValueOptions options; + /* XXX: should encode atoms with special characters */ sh = p->shape; /* the shape can be NULL while freeing an object */ printf("%14p %4d ", @@ -12967,85 +13821,13 @@ static __maybe_unused void JS_DumpObject(JSRuntime *rt, JSObject *p) } else { printf("%3s %14s ", "-", "-"); } - printf("%10s ", - JS_AtomGetStrRT(rt, atom_buf, sizeof(atom_buf), rt->class_array[p->class_id].class_name)); - if (p->is_exotic && p->fast_array) { - printf("[ "); - for(i = 0; i < p->u.array.count; i++) { - if (i != 0) - printf(", "); - switch (p->class_id) { - case JS_CLASS_ARRAY: - case JS_CLASS_ARGUMENTS: - JS_DumpValueShort(rt, p->u.array.u.values[i]); - break; - case JS_CLASS_UINT8C_ARRAY: - case JS_CLASS_INT8_ARRAY: - case JS_CLASS_UINT8_ARRAY: - case JS_CLASS_INT16_ARRAY: - case JS_CLASS_UINT16_ARRAY: - case JS_CLASS_INT32_ARRAY: - case JS_CLASS_UINT32_ARRAY: - case JS_CLASS_BIG_INT64_ARRAY: - case JS_CLASS_BIG_UINT64_ARRAY: - case JS_CLASS_FLOAT32_ARRAY: - case JS_CLASS_FLOAT64_ARRAY: - { - int size = 1 << typed_array_size_log2(p->class_id); - const uint8_t *b = p->u.array.u.uint8_ptr + i * size; - while (size-- > 0) - printf("%02X", *b++); - } - break; - } - } - printf(" ] "); - } - if (sh) { - printf("{ "); - for(i = 0, prs = get_shape_prop(sh); i < sh->prop_count; i++, prs++) { - if (prs->atom != JS_ATOM_NULL) { - pr = &p->prop[i]; - if (!is_first) - printf(", "); - printf("%s: ", - JS_AtomGetStrRT(rt, atom_buf, sizeof(atom_buf), prs->atom)); - if ((prs->flags & JS_PROP_TMASK) == JS_PROP_GETSET) { - printf("[getset %p %p]", (void *)pr->u.getset.getter, - (void *)pr->u.getset.setter); - } else if ((prs->flags & JS_PROP_TMASK) == JS_PROP_VARREF) { - printf("[varref %p]", (void *)pr->u.var_ref); - } else if ((prs->flags & JS_PROP_TMASK) == JS_PROP_AUTOINIT) { - printf("[autoinit %p %d %p]", - (void *)js_autoinit_get_realm(pr), - js_autoinit_get_id(pr), - (void *)pr->u.init.opaque); - } else { - JS_DumpValueShort(rt, pr->u.value); - } - is_first = FALSE; - } - } - printf(" }"); - } + JS_PrintValueSetDefaultOptions(&options); + options.max_depth = 1; + options.show_hidden = TRUE; + options.raw_dump = TRUE; + JS_PrintValueRT(rt, js_dump_value_write, stdout, JS_MKPTR(JS_TAG_OBJECT, p), &options); - if (js_class_has_bytecode(p->class_id)) { - JSFunctionBytecode *b = p->u.func.function_bytecode; - JSVarRef **var_refs; - if (b->closure_var_count) { - var_refs = p->u.func.var_refs; - printf(" Closure:"); - for(i = 0; i < b->closure_var_count; i++) { - printf(" "); - JS_DumpValueShort(rt, var_refs[i]->value); - } - if (p->u.func.home_object) { - printf(" HomeObject: "); - JS_DumpValueShort(rt, JS_MKPTR(JS_TAG_OBJECT, p->u.func.home_object)); - } - } - } printf("\n"); } @@ -13073,6 +13855,9 @@ static __maybe_unused void JS_DumpGCObject(JSRuntime *rt, JSGCObjectHeader *p) case JS_GC_OBJ_TYPE_JS_CONTEXT: printf("[js_context]"); break; + case JS_GC_OBJ_TYPE_MODULE: + printf("[module]"); + break; default: printf("[unknown %d]", p->gc_obj_type); break; @@ -13081,126 +13866,6 @@ static __maybe_unused void JS_DumpGCObject(JSRuntime *rt, JSGCObjectHeader *p) } } -static __maybe_unused void JS_DumpValueShort(JSRuntime *rt, - JSValueConst val) -{ - uint32_t tag = JS_VALUE_GET_NORM_TAG(val); - const char *str; - - switch(tag) { - case JS_TAG_INT: - printf("%d", JS_VALUE_GET_INT(val)); - break; - case JS_TAG_BOOL: - if (JS_VALUE_GET_BOOL(val)) - str = "true"; - else - str = "false"; - goto print_str; - case JS_TAG_NULL: - str = "null"; - goto print_str; - case JS_TAG_EXCEPTION: - str = "exception"; - goto print_str; - case JS_TAG_UNINITIALIZED: - str = "uninitialized"; - goto print_str; - case JS_TAG_UNDEFINED: - str = "undefined"; - print_str: - printf("%s", str); - break; - case JS_TAG_FLOAT64: - printf("%.14g", JS_VALUE_GET_FLOAT64(val)); - break; - case JS_TAG_SHORT_BIG_INT: - printf("%" PRId64 "n", (int64_t)JS_VALUE_GET_SHORT_BIG_INT(val)); - break; - case JS_TAG_BIG_INT: - { - JSBigInt *p = JS_VALUE_GET_PTR(val); - int sgn, i; - /* In order to avoid allocations we just dump the limbs */ - sgn = js_bigint_sign(p); - if (sgn) - printf("BigInt.asIntN(%d,", p->len * JS_LIMB_BITS); - printf("0x"); - for(i = p->len - 1; i >= 0; i--) { - if (i != p->len - 1) - printf("_"); -#if JS_LIMB_BITS == 32 - printf("%08x", p->tab[i]); -#else - printf("%016" PRIx64, p->tab[i]); -#endif - } - printf("n"); - if (sgn) - printf(")"); - } - break; - case JS_TAG_STRING: - { - JSString *p; - p = JS_VALUE_GET_STRING(val); - JS_DumpString(rt, p); - } - break; - case JS_TAG_STRING_ROPE: - { - JSStringRope *r = JS_VALUE_GET_STRING_ROPE(val); - printf("[rope len=%d depth=%d]", r->len, r->depth); - } - break; - case JS_TAG_FUNCTION_BYTECODE: - { - JSFunctionBytecode *b = JS_VALUE_GET_PTR(val); - char buf[ATOM_GET_STR_BUF_SIZE]; - printf("[bytecode %s]", JS_AtomGetStrRT(rt, buf, sizeof(buf), b->func_name)); - } - break; - case JS_TAG_OBJECT: - { - JSObject *p = JS_VALUE_GET_OBJ(val); - JSAtom atom = rt->class_array[p->class_id].class_name; - char atom_buf[ATOM_GET_STR_BUF_SIZE]; - printf("[%s %p]", - JS_AtomGetStrRT(rt, atom_buf, sizeof(atom_buf), atom), (void *)p); - } - break; - case JS_TAG_SYMBOL: - { - JSAtomStruct *p = JS_VALUE_GET_PTR(val); - char atom_buf[ATOM_GET_STR_BUF_SIZE]; - printf("Symbol(%s)", - JS_AtomGetStrRT(rt, atom_buf, sizeof(atom_buf), js_get_atom_index(rt, p))); - } - break; - case JS_TAG_MODULE: - printf("[module]"); - break; - default: - printf("[unknown tag %d]", tag); - break; - } -} - -static __maybe_unused void JS_DumpValue(JSContext *ctx, - JSValueConst val) -{ - JS_DumpValueShort(ctx->rt, val); -} - -static __maybe_unused void JS_PrintValue(JSContext *ctx, - const char *str, - JSValueConst val) -{ - printf("%s=", str); - JS_DumpValueShort(ctx->rt, val); - printf("\n"); -} - /* return -1 if exception (proxy case) or TRUE/FALSE */ // TODO: should take flags to make proxy resolution and exceptions optional int JS_IsArray(JSContext *ctx, JSValueConst val) @@ -15041,7 +15706,7 @@ static __exception int js_for_in_prepare_prototype_chain_enum(JSContext *ctx, JS_FreeValue(ctx, obj1); goto fail; } - js_free_prop_enum(ctx, tab_atom, tab_atom_count); + JS_FreePropertyEnum(ctx, tab_atom, tab_atom_count); if (tab_atom_count != 0) { JS_FreeValue(ctx, obj1); goto slow_path; @@ -15125,7 +15790,7 @@ static __exception int js_for_in_next(JSContext *ctx, JSValue *sp) JS_GPN_STRING_MASK | JS_GPN_SET_ENUM)) { return -1; } - js_free_prop_enum(ctx, it->tab_atom, it->atom_count); + JS_FreePropertyEnum(ctx, it->tab_atom, it->atom_count); it->tab_atom = tab_atom; it->atom_count = tab_atom_count; it->idx = 0; @@ -15506,6 +16171,7 @@ static __exception int js_append_enumerate(JSContext *ctx, JSValue *sp) int is_array_iterator; JSValue *arrp; uint32_t i, count32, pos; + JSCFunctionType ft; if (JS_VALUE_GET_TAG(sp[-2]) != JS_TAG_INT) { JS_ThrowInternalError(ctx, "invalid index for append"); @@ -15523,8 +16189,8 @@ static __exception int js_append_enumerate(JSContext *ctx, JSValue *sp) iterator = JS_GetProperty(ctx, sp[-1], JS_ATOM_Symbol_iterator); if (JS_IsException(iterator)) return -1; - is_array_iterator = JS_IsCFunction(ctx, iterator, - (JSCFunction *)js_create_array_iterator, + ft.generic_magic = js_create_array_iterator; + is_array_iterator = JS_IsCFunction(ctx, iterator, ft.generic, JS_ITERATOR_KIND_VALUE); JS_FreeValue(ctx, iterator); @@ -15536,8 +16202,10 @@ static __exception int js_append_enumerate(JSContext *ctx, JSValue *sp) JS_FreeValue(ctx, enumobj); return -1; } + + ft.iterator_next = js_array_iterator_next; if (is_array_iterator - && JS_IsCFunction(ctx, method, (JSCFunction *)js_array_iterator_next, 0) + && JS_IsCFunction(ctx, method, ft.generic, 0) && js_get_fast_array(ctx, sp[-1], &arrp, &count32)) { uint32_t len; if (js_get_length32(ctx, &len, sp[-1])) @@ -15648,10 +16316,10 @@ static __exception int JS_CopyDataProperties(JSContext *ctx, if (ret < 0) goto exception; } - js_free_prop_enum(ctx, tab_atom, tab_atom_count); + JS_FreePropertyEnum(ctx, tab_atom, tab_atom_count); return 0; exception: - js_free_prop_enum(ctx, tab_atom, tab_atom_count); + JS_FreePropertyEnum(ctx, tab_atom, tab_atom_count); return -1; } @@ -16868,10 +17536,12 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, { JSValue val; sf->cur_pc = pc; - val = js_dynamic_import(ctx, sp[-1]); + val = js_dynamic_import(ctx, sp[-2], sp[-1]); if (JS_IsException(val)) goto exception; + JS_FreeValue(ctx, sp[-2]); JS_FreeValue(ctx, sp[-1]); + sp--; sp[-1] = val; } BREAK; @@ -17798,6 +18468,38 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, } BREAK; + CASE(OP_get_array_el3): + { + JSValue val; + + switch (JS_VALUE_GET_TAG(sp[-2])) { + case JS_TAG_INT: + case JS_TAG_STRING: + case JS_TAG_SYMBOL: + /* undefined and null are tested in JS_GetPropertyValue() */ + break; + default: + /* must be tested nefore JS_ToPropertyKey */ + if (unlikely(JS_IsUndefined(sp[-2]) || JS_IsNull(sp[-2]))) { + JS_ThrowTypeError(ctx, "value has no property"); + goto exception; + } + sf->cur_pc = pc; + ret_val = JS_ToPropertyKey(ctx, sp[-1]); + if (JS_IsException(ret_val)) + goto exception; + JS_FreeValue(ctx, sp[-1]); + sp[-1] = ret_val; + break; + } + sf->cur_pc = pc; + val = JS_GetPropertyValue(ctx, sp[-2], JS_DupValue(ctx, sp[-1])); + *sp++ = val; + if (unlikely(JS_IsException(val))) + goto exception; + } + BREAK; + CASE(OP_get_ref_value): { JSValue val; @@ -18020,12 +18722,10 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, *pv = __JS_NewFloat64(ctx, JS_VALUE_GET_FLOAT64(*pv) + JS_VALUE_GET_FLOAT64(op2)); sp--; - } else if (JS_VALUE_GET_TAG(*pv) == JS_TAG_STRING) { + } else if (JS_VALUE_GET_TAG(*pv) == JS_TAG_STRING && + JS_VALUE_GET_TAG(op2) == JS_TAG_STRING) { sp--; sf->cur_pc = pc; - op2 = JS_ToPrimitiveFree(ctx, op2, HINT_NONE); - if (JS_IsException(op2)) - goto exception; if (JS_ConcatStringInPlace(ctx, JS_VALUE_GET_STRING(*pv), op2)) { JS_FreeValue(ctx, op2); } else { @@ -18490,7 +19190,7 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, pc += 4; sf->cur_pc = pc; - ret = JS_DeleteProperty(ctx, ctx->global_obj, atom, 0); + ret = JS_DeleteGlobalVar(ctx, atom); if (unlikely(ret < 0)) goto exception; *sp++ = JS_NewBool(ctx, ret); @@ -18525,27 +19225,6 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, } BREAK; - CASE(OP_to_propkey2): - /* must be tested first */ - if (unlikely(JS_IsUndefined(sp[-2]) || JS_IsNull(sp[-2]))) { - JS_ThrowTypeError(ctx, "value has no property"); - goto exception; - } - switch (JS_VALUE_GET_TAG(sp[-1])) { - case JS_TAG_INT: - case JS_TAG_STRING: - case JS_TAG_SYMBOL: - break; - default: - sf->cur_pc = pc; - ret_val = JS_ToPropertyKey(ctx, sp[-1]); - if (JS_IsException(ret_val)) - goto exception; - JS_FreeValue(ctx, sp[-1]); - sp[-1] = ret_val; - break; - } - BREAK; #if 0 CASE(OP_to_string): if (JS_VALUE_GET_TAG(sp[-1]) != JS_TAG_STRING) { @@ -18737,7 +19416,7 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, sf->cur_pc = pc; build_backtrace(ctx, rt->current_exception, NULL, 0, 0, 0); } - if (!JS_IsUncatchableError(ctx, rt->current_exception)) { + if (!rt->current_exception_is_uncatchable) { while (sp > stack_buf) { JSValue val = *--sp; JS_FreeValue(ctx, val); @@ -20436,6 +21115,7 @@ static __exception int js_parse_template_part(JSParseState *s, const uint8_t *p) { uint32_t c; StringBuffer b_s, *b = &b_s; + JSValue str; /* p points to the first byte of the template part */ if (string_buffer_init(s->ctx, b, 32)) @@ -20478,9 +21158,12 @@ static __exception int js_parse_template_part(JSParseState *s, const uint8_t *p) if (string_buffer_putc(b, c)) goto fail; } + str = string_buffer_end(b); + if (JS_IsException(str)) + return -1; s->token.val = TOK_TEMPLATE; s->token.u.str.sep = c; - s->token.u.str.str = string_buffer_end(b); + s->token.u.str.str = str; s->buf_ptr = p; return 0; @@ -20499,7 +21182,8 @@ static __exception int js_parse_string(JSParseState *s, int sep, uint32_t c; StringBuffer b_s, *b = &b_s; const uint8_t *p_escape; - + JSValue str; + /* string */ if (string_buffer_init(s->ctx, b, 32)) goto fail; @@ -20508,11 +21192,6 @@ static __exception int js_parse_string(JSParseState *s, int sep, goto invalid_char; c = *p; if (c < 0x20) { - if (!s->cur_func) { - if (do_throw) - js_parse_error_pos(s, p, "invalid character in a JSON string"); - goto fail; - } if (sep == '`') { if (c == '\r') { if (p[1] == '\n') @@ -20558,8 +21237,6 @@ static __exception int js_parse_string(JSParseState *s, int sep, continue; default: if (c >= '0' && c <= '9') { - if (!s->cur_func) - goto invalid_escape; /* JSON case */ if (!(s->cur_func->js_mode & JS_MODE_STRICT) && sep != '`') goto parse_escape; if (c == '0' && !(p[1] >= '0' && p[1] <= '9')) { @@ -20613,9 +21290,12 @@ static __exception int js_parse_string(JSParseState *s, int sep, if (string_buffer_putc(b, c)) goto fail; } + str = string_buffer_end(b); + if (JS_IsException(str)) + return -1; token->val = TOK_STRING; token->u.str.sep = c; - token->u.str.str = string_buffer_end(b); + token->u.str.str = str; *pp = p; return 0; @@ -20643,6 +21323,7 @@ static __exception int js_parse_regexp(JSParseState *s) StringBuffer b_s, *b = &b_s; StringBuffer b2_s, *b2 = &b2_s; uint32_t c; + JSValue body_str, flags_str; p = s->buf_ptr; p++; @@ -20724,9 +21405,17 @@ static __exception int js_parse_regexp(JSParseState *s) p = p_next; } + body_str = string_buffer_end(b); + flags_str = string_buffer_end(b2); + if (JS_IsException(body_str) || + JS_IsException(flags_str)) { + JS_FreeValue(s->ctx, body_str); + JS_FreeValue(s->ctx, flags_str); + return -1; + } s->token.val = TOK_REGEXP; - s->token.u.regexp.body = string_buffer_end(b); - s->token.u.regexp.flags = string_buffer_end(b2); + s->token.u.regexp.body = body_str; + s->token.u.regexp.flags = flags_str; s->buf_ptr = p; return 0; fail: @@ -21296,6 +21985,7 @@ static __exception int next_token(JSParseState *s) } /* 'c' is the first character. Return JS_ATOM_NULL in case of error */ +/* XXX: accept unicode identifiers as JSON5 ? */ static JSAtom json_parse_ident(JSParseState *s, const uint8_t **pp, int c) { const uint8_t *p; @@ -21328,6 +22018,178 @@ static JSAtom json_parse_ident(JSParseState *s, const uint8_t **pp, int c) return atom; } +static int json_parse_string(JSParseState *s, const uint8_t **pp, int sep) +{ + const uint8_t *p, *p_next; + int i; + uint32_t c; + StringBuffer b_s, *b = &b_s; + + if (string_buffer_init(s->ctx, b, 32)) + goto fail; + + p = *pp; + for(;;) { + if (p >= s->buf_end) { + goto end_of_input; + } + c = *p++; + if (c == sep) + break; + if (c < 0x20) { + js_parse_error_pos(s, p - 1, "Bad control character in string literal"); + goto fail; + } + if (c == '\\') { + c = *p++; + switch(c) { + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case '\\': break; + case '/': break; + case 'u': + c = 0; + for(i = 0; i < 4; i++) { + int h = from_hex(*p++); + if (h < 0) { + js_parse_error_pos(s, p - 1, "Bad Unicode escape"); + goto fail; + } + c = (c << 4) | h; + } + break; + case '\n': + if (s->ext_json) + continue; + goto bad_escape; + case 'v': + if (s->ext_json) { + c = '\v'; + break; + } + goto bad_escape; + default: + if (c == sep) + break; + if (p > s->buf_end) + goto end_of_input; + bad_escape: + js_parse_error_pos(s, p - 1, "Bad escaped character"); + goto fail; + } + } else + if (c >= 0x80) { + c = unicode_from_utf8(p - 1, UTF8_CHAR_LEN_MAX, &p_next); + if (c > 0x10FFFF) { + js_parse_error_pos(s, p - 1, "Bad UTF-8 sequence"); + goto fail; + } + p = p_next; + } + if (string_buffer_putc(b, c)) + goto fail; + } + s->token.val = TOK_STRING; + s->token.u.str.sep = sep; + s->token.u.str.str = string_buffer_end(b); + *pp = p; + return 0; + + end_of_input: + js_parse_error(s, "Unexpected end of JSON input"); + fail: + string_buffer_free(b); + return -1; +} + +static int json_parse_number(JSParseState *s, const uint8_t **pp) +{ + const uint8_t *p = *pp; + const uint8_t *p_start = p; + int radix; + double d; + JSATODTempMem atod_mem; + + if (*p == '+' || *p == '-') + p++; + + if (!is_digit(*p)) { + if (s->ext_json) { + if (strstart((const char *)p, "Infinity", (const char **)&p)) { + d = 1.0 / 0.0; + if (*p_start == '-') + d = -d; + goto done; + } else if (strstart((const char *)p, "NaN", (const char **)&p)) { + d = NAN; + goto done; + } else if (*p != '.') { + goto unexpected_token; + } + } else { + goto unexpected_token; + } + } + + if (p[0] == '0') { + if (s->ext_json) { + /* also accepts base 16, 8 and 2 prefix for integers */ + radix = 10; + if (p[1] == 'x' || p[1] == 'X') { + p += 2; + radix = 16; + } else if ((p[1] == 'o' || p[1] == 'O')) { + p += 2; + radix = 8; + } else if ((p[1] == 'b' || p[1] == 'B')) { + p += 2; + radix = 2; + } + if (radix != 10) { + /* prefix is present */ + if (to_digit(*p) >= radix) { + unexpected_token: + return js_parse_error_pos(s, p, "Unexpected token '%c'", *p); + } + d = js_atod((const char *)p_start, (const char **)&p, 0, + JS_ATOD_INT_ONLY | JS_ATOD_ACCEPT_BIN_OCT, &atod_mem); + goto done; + } + } + if (is_digit(p[1])) + return js_parse_error_pos(s, p, "Unexpected number"); + } + + while (is_digit(*p)) + p++; + + if (*p == '.') { + p++; + if (!is_digit(*p)) + return js_parse_error_pos(s, p, "Unterminated fractional number"); + while (is_digit(*p)) + p++; + } + if (*p == 'e' || *p == 'E') { + p++; + if (*p == '+' || *p == '-') + p++; + if (!is_digit(*p)) + return js_parse_error_pos(s, p, "Exponent part is missing a number"); + while (is_digit(*p)) + p++; + } + d = js_atod((const char *)p_start, NULL, 10, 0, &atod_mem); + done: + s->token.val = TOK_NUMBER; + s->token.u.num.val = JS_NewFloat64(s->ctx, d); + *pp = p; + return 0; +} + static __exception int json_next_token(JSParseState *s) { const uint8_t *p; @@ -21359,7 +22221,8 @@ static __exception int json_next_token(JSParseState *s) } /* fall through */ case '\"': - if (js_parse_string(s, c, TRUE, p + 1, &s->token, &p)) + p++; + if (json_parse_string(s, &p, c)) goto fail; break; case '\r': /* accept DOS and MAC newline sequences */ @@ -21449,7 +22312,6 @@ static __exception int json_next_token(JSParseState *s) case 'Y': case 'Z': case '_': case '$': - /* identifier : only pure ascii characters are accepted */ p++; atom = json_parse_ident(s, &p, c); if (atom == JS_ATOM_NULL) @@ -21460,39 +22322,23 @@ static __exception int json_next_token(JSParseState *s) s->token.val = TOK_IDENT; break; case '+': - if (!s->ext_json || !is_digit(p[1])) + if (!s->ext_json) goto def_token; goto parse_number; - case '0': - if (is_digit(p[1])) + case '.': + if (s->ext_json && is_digit(p[1])) + goto parse_number; + else goto def_token; - goto parse_number; case '-': - if (!is_digit(p[1])) - goto def_token; - goto parse_number; + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': /* number */ parse_number: - { - JSValue ret; - int flags, radix; - if (!s->ext_json) { - flags = 0; - radix = 10; - } else { - flags = ATOD_ACCEPT_BIN_OCT; - radix = 0; - } - ret = js_atof(s->ctx, (const char *)p, (const char **)&p, radix, - flags); - if (JS_IsException(ret)) - goto fail; - s->token.val = TOK_NUMBER; - s->token.u.num.val = ret; - } + if (json_parse_number(s, &p)) + goto fail; break; default: if (c >= 128) { @@ -21677,7 +22523,7 @@ BOOL JS_DetectModule(const char *input, size_t input_len) } static inline int get_prev_opcode(JSFunctionDef *fd) { - if (fd->last_opcode_pos < 0) + if (fd->last_opcode_pos < 0 || dbuf_error(&fd->byte_code)) return OP_invalid; else return fd->byte_code.buf[fd->last_opcode_pos]; @@ -21742,7 +22588,11 @@ static void emit_op(JSParseState *s, uint8_t val) static void emit_atom(JSParseState *s, JSAtom name) { - emit_u32(s, JS_DupAtom(s->ctx, name)); + DynBuf *bc = &s->cur_func->byte_code; + if (dbuf_realloc(bc, bc->size + 4)) + return; /* not enough memory : don't duplicate the atom */ + put_u32(bc->buf + bc->size, JS_DupAtom(s->ctx, name)); + bc->size += 4; } static int update_label(JSFunctionDef *s, int label, int delta) @@ -21756,29 +22606,33 @@ static int update_label(JSFunctionDef *s, int label, int delta) return ls->ref_count; } -static int new_label_fd(JSFunctionDef *fd, int label) +static int new_label_fd(JSFunctionDef *fd) { + int label; LabelSlot *ls; - if (label < 0) { - if (js_resize_array(fd->ctx, (void *)&fd->label_slots, - sizeof(fd->label_slots[0]), - &fd->label_size, fd->label_count + 1)) - return -1; - label = fd->label_count++; - ls = &fd->label_slots[label]; - ls->ref_count = 0; - ls->pos = -1; - ls->pos2 = -1; - ls->addr = -1; - ls->first_reloc = NULL; - } + if (js_resize_array(fd->ctx, (void *)&fd->label_slots, + sizeof(fd->label_slots[0]), + &fd->label_size, fd->label_count + 1)) + return -1; + label = fd->label_count++; + ls = &fd->label_slots[label]; + ls->ref_count = 0; + ls->pos = -1; + ls->pos2 = -1; + ls->addr = -1; + ls->first_reloc = NULL; return label; } static int new_label(JSParseState *s) { - return new_label_fd(s->cur_func, -1); + int label; + label = new_label_fd(s->cur_func); + if (unlikely(label < 0)) { + dbuf_set_error(&s->cur_func->byte_code); + } + return label; } /* don't update the last opcode and don't emit line number info */ @@ -21806,8 +22660,11 @@ static int emit_label(JSParseState *s, int label) static int emit_goto(JSParseState *s, int opcode, int label) { if (js_is_live_code(s)) { - if (label < 0) + if (label < 0) { label = new_label(s); + if (label < 0) + return -1; + } emit_op(s, opcode); emit_u32(s, label); s->cur_func->label_slots[label].ref_count++; @@ -23848,6 +24705,8 @@ static __exception int get_lvalue(JSParseState *s, int *popcode, int *pscope, switch(opcode) { case OP_scope_get_var: label = new_label(s); + if (label < 0) + return -1; emit_op(s, OP_scope_make_ref); emit_atom(s, name); emit_u32(s, label); @@ -23866,10 +24725,7 @@ static __exception int get_lvalue(JSParseState *s, int *popcode, int *pscope, emit_u16(s, scope); break; case OP_get_array_el: - /* XXX: replace by a single opcode ? */ - emit_op(s, OP_to_propkey2); - emit_op(s, OP_dup2); - emit_op(s, OP_get_array_el); + emit_op(s, OP_get_array_el3); break; case OP_get_super_value: emit_op(s, OP_to_propkey); @@ -23883,6 +24739,8 @@ static __exception int get_lvalue(JSParseState *s, int *popcode, int *pscope, switch(opcode) { case OP_scope_get_var: label = new_label(s); + if (label < 0) + return -1; emit_op(s, OP_scope_make_ref); emit_atom(s, name); emit_u32(s, label); @@ -24298,7 +25156,7 @@ static int js_parse_destructuring_element(JSParseState *s, int tok, int is_arg, continue; } if (prop_name == JS_ATOM_NULL) { - emit_op(s, OP_to_propkey2); + emit_op(s, OP_to_propkey); if (has_ellipsis) { /* define the property in excludeList */ emit_op(s, OP_perm3); @@ -24892,6 +25750,23 @@ static __exception int js_parse_postfix_expr(JSParseState *s, int parse_flags) return js_parse_error(s, "invalid use of 'import()'"); if (js_parse_assign_expr(s)) return -1; + if (s->token.val == ',') { + if (next_token(s)) + return -1; + if (s->token.val != ')') { + if (js_parse_assign_expr(s)) + return -1; + /* accept a trailing comma */ + if (s->token.val == ',') { + if (next_token(s)) + return -1; + } + } else { + emit_op(s, OP_undefined); + } + } else { + emit_op(s, OP_undefined); + } if (js_parse_expect(s, ')')) return -1; emit_op(s, OP_import); @@ -24908,6 +25783,8 @@ static __exception int js_parse_postfix_expr(JSParseState *s, int parse_flags) BOOL has_optional_chain = FALSE; if (s->token.val == TOK_QUESTION_MARK_DOT) { + if ((parse_flags & PF_POSTFIX_CALL) == 0) + return js_parse_error(s, "new keyword cannot be used with an optional chain"); op_token_ptr = s->token.ptr; /* optional chaining */ if (next_token(s)) @@ -27451,7 +28328,7 @@ fail: return -1; } -/* 'name' is freed */ +/* 'name' is freed. The module is referenced by 'ctx->loaded_modules' */ static JSModuleDef *js_new_module_def(JSContext *ctx, JSAtom name) { JSModuleDef *m; @@ -27461,6 +28338,7 @@ static JSModuleDef *js_new_module_def(JSContext *ctx, JSAtom name) return NULL; } m->header.ref_count = 1; + add_gc_object(ctx->rt, &m->header, JS_GC_OBJ_TYPE_MODULE); m->module_name = name; m->module_ns = JS_UNDEFINED; m->func_obj = JS_UNDEFINED; @@ -27469,6 +28347,7 @@ static JSModuleDef *js_new_module_def(JSContext *ctx, JSAtom name) m->promise = JS_UNDEFINED; m->resolving_funcs[0] = JS_UNDEFINED; m->resolving_funcs[1] = JS_UNDEFINED; + m->private_value = JS_UNDEFINED; list_add_tail(&m->link, &ctx->loaded_modules); return m; } @@ -27478,6 +28357,11 @@ static void js_mark_module_def(JSRuntime *rt, JSModuleDef *m, { int i; + for(i = 0; i < m->req_module_entries_count; i++) { + JSReqModuleEntry *rme = &m->req_module_entries[i]; + JS_MarkValue(rt, rme->attributes, mark_func); + } + for(i = 0; i < m->export_entries_count; i++) { JSExportEntry *me = &m->export_entries[i]; if (me->export_type == JS_EXPORT_TYPE_LOCAL && @@ -27493,61 +28377,65 @@ static void js_mark_module_def(JSRuntime *rt, JSModuleDef *m, JS_MarkValue(rt, m->promise, mark_func); JS_MarkValue(rt, m->resolving_funcs[0], mark_func); JS_MarkValue(rt, m->resolving_funcs[1], mark_func); + JS_MarkValue(rt, m->private_value, mark_func); } -static void js_free_module_def(JSContext *ctx, JSModuleDef *m) +static void js_free_module_def(JSRuntime *rt, JSModuleDef *m) { int i; - JS_FreeAtom(ctx, m->module_name); + JS_FreeAtomRT(rt, m->module_name); for(i = 0; i < m->req_module_entries_count; i++) { JSReqModuleEntry *rme = &m->req_module_entries[i]; - JS_FreeAtom(ctx, rme->module_name); + JS_FreeAtomRT(rt, rme->module_name); + JS_FreeValueRT(rt, rme->attributes); } - js_free(ctx, m->req_module_entries); + js_free_rt(rt, m->req_module_entries); for(i = 0; i < m->export_entries_count; i++) { JSExportEntry *me = &m->export_entries[i]; if (me->export_type == JS_EXPORT_TYPE_LOCAL) - free_var_ref(ctx->rt, me->u.local.var_ref); - JS_FreeAtom(ctx, me->export_name); - JS_FreeAtom(ctx, me->local_name); + free_var_ref(rt, me->u.local.var_ref); + JS_FreeAtomRT(rt, me->export_name); + JS_FreeAtomRT(rt, me->local_name); } - js_free(ctx, m->export_entries); + js_free_rt(rt, m->export_entries); - js_free(ctx, m->star_export_entries); + js_free_rt(rt, m->star_export_entries); for(i = 0; i < m->import_entries_count; i++) { JSImportEntry *mi = &m->import_entries[i]; - JS_FreeAtom(ctx, mi->import_name); + JS_FreeAtomRT(rt, mi->import_name); } - js_free(ctx, m->import_entries); - js_free(ctx, m->async_parent_modules); + js_free_rt(rt, m->import_entries); + js_free_rt(rt, m->async_parent_modules); - JS_FreeValue(ctx, m->module_ns); - JS_FreeValue(ctx, m->func_obj); - JS_FreeValue(ctx, m->eval_exception); - JS_FreeValue(ctx, m->meta_obj); - JS_FreeValue(ctx, m->promise); - JS_FreeValue(ctx, m->resolving_funcs[0]); - JS_FreeValue(ctx, m->resolving_funcs[1]); - list_del(&m->link); - js_free(ctx, m); + JS_FreeValueRT(rt, m->module_ns); + JS_FreeValueRT(rt, m->func_obj); + JS_FreeValueRT(rt, m->eval_exception); + JS_FreeValueRT(rt, m->meta_obj); + JS_FreeValueRT(rt, m->promise); + JS_FreeValueRT(rt, m->resolving_funcs[0]); + JS_FreeValueRT(rt, m->resolving_funcs[1]); + JS_FreeValueRT(rt, m->private_value); + /* during the GC the finalizers are called in an arbitrary + order so the module may no longer be referenced by the JSContext list */ + if (m->link.next) { + list_del(&m->link); + } + remove_gc_object(&m->header); + if (rt->gc_phase == JS_GC_PHASE_REMOVE_CYCLES && m->header.ref_count != 0) { + list_add_tail(&m->header.link, &rt->gc_zero_ref_count_list); + } else { + js_free_rt(rt, m); + } } static int add_req_module_entry(JSContext *ctx, JSModuleDef *m, JSAtom module_name) { JSReqModuleEntry *rme; - int i; - - /* no need to add the module request if it is already present */ - for(i = 0; i < m->req_module_entries_count; i++) { - rme = &m->req_module_entries[i]; - if (rme->module_name == module_name) - return i; - } if (js_resize_array(ctx, (void **)&m->req_module_entries, sizeof(JSReqModuleEntry), @@ -27557,7 +28445,8 @@ static int add_req_module_entry(JSContext *ctx, JSModuleDef *m, rme = &m->req_module_entries[m->req_module_entries_count++]; rme->module_name = JS_DupAtom(ctx, module_name); rme->module = NULL; - return i; + rme->attributes = JS_UNDEFINED; + return m->req_module_entries_count - 1; } static JSExportEntry *find_export_entry(JSContext *ctx, JSModuleDef *m, @@ -27637,6 +28526,8 @@ JSModuleDef *JS_NewCModule(JSContext *ctx, const char *name_str, if (name == JS_ATOM_NULL) return NULL; m = js_new_module_def(ctx, name); + if (!m) + return NULL; m->init_func = func; return m; } @@ -27676,12 +28567,38 @@ int JS_SetModuleExport(JSContext *ctx, JSModuleDef *m, const char *export_name, return -1; } +int JS_SetModulePrivateValue(JSContext *ctx, JSModuleDef *m, JSValue val) +{ + set_value(ctx, &m->private_value, val); + return 0; +} + +JSValue JS_GetModulePrivateValue(JSContext *ctx, JSModuleDef *m) +{ + return JS_DupValue(ctx, m->private_value); +} + void JS_SetModuleLoaderFunc(JSRuntime *rt, JSModuleNormalizeFunc *module_normalize, JSModuleLoaderFunc *module_loader, void *opaque) { rt->module_normalize_func = module_normalize; - rt->module_loader_func = module_loader; + rt->module_loader_has_attr = FALSE; + rt->u.module_loader_func = module_loader; + rt->module_check_attrs = NULL; + rt->module_loader_opaque = opaque; +} + +void JS_SetModuleLoaderFunc2(JSRuntime *rt, + JSModuleNormalizeFunc *module_normalize, + JSModuleLoaderFunc2 *module_loader, + JSModuleCheckSupportedImportAttributes *module_check_attrs, + void *opaque) +{ + rt->module_normalize_func = module_normalize; + rt->module_loader_has_attr = TRUE; + rt->u.module_loader_func2 = module_loader; + rt->module_check_attrs = module_check_attrs; rt->module_loader_opaque = opaque; } @@ -27762,7 +28679,8 @@ static JSModuleDef *js_find_loaded_module(JSContext *ctx, JSAtom name) /* return NULL in case of exception (e.g. module could not be loaded) */ static JSModuleDef *js_host_resolve_imported_module(JSContext *ctx, const char *base_cname, - const char *cname1) + const char *cname1, + JSValueConst attributes) { JSRuntime *rt = ctx->rt; JSModuleDef *m; @@ -27795,22 +28713,26 @@ static JSModuleDef *js_host_resolve_imported_module(JSContext *ctx, JS_FreeAtom(ctx, module_name); /* load the module */ - if (!rt->module_loader_func) { + if (!rt->u.module_loader_func) { /* XXX: use a syntax error ? */ JS_ThrowReferenceError(ctx, "could not load module '%s'", cname); js_free(ctx, cname); return NULL; } - - m = rt->module_loader_func(ctx, cname, rt->module_loader_opaque); + if (rt->module_loader_has_attr) { + m = rt->u.module_loader_func2(ctx, cname, rt->module_loader_opaque, attributes); + } else { + m = rt->u.module_loader_func(ctx, cname, rt->module_loader_opaque); + } js_free(ctx, cname); return m; } static JSModuleDef *js_host_resolve_imported_module_atom(JSContext *ctx, - JSAtom base_module_name, - JSAtom module_name1) + JSAtom base_module_name, + JSAtom module_name1, + JSValueConst attributes) { const char *base_cname, *cname; JSModuleDef *m; @@ -27823,7 +28745,7 @@ static JSModuleDef *js_host_resolve_imported_module_atom(JSContext *ctx, JS_FreeCString(ctx, base_cname); return NULL; } - m = js_host_resolve_imported_module(ctx, base_cname, cname); + m = js_host_resolve_imported_module(ctx, base_cname, cname, attributes); JS_FreeCString(ctx, base_cname); JS_FreeCString(ctx, cname); return m; @@ -28285,7 +29207,8 @@ static int js_resolve_module(JSContext *ctx, JSModuleDef *m) for(i = 0; i < m->req_module_entries_count; i++) { JSReqModuleEntry *rme = &m->req_module_entries[i]; m1 = js_host_resolve_imported_module_atom(ctx, m->module_name, - rme->module_name); + rme->module_name, + rme->attributes); if (!m1) return -1; rme->module = m1; @@ -28512,7 +29435,7 @@ static int js_inner_module_linking(JSContext *ctx, JSModuleDef *m, printf(": "); #endif m1 = m->req_module_entries[mi->req_module_idx].module; - if (mi->import_name == JS_ATOM__star_) { + if (mi->is_star) { JSValue val; /* name space import */ val = JS_GetModuleNamespace(ctx, m1); @@ -28762,14 +29685,15 @@ static JSValue js_load_module_fulfilled(JSContext *ctx, JSValueConst this_val, static void JS_LoadModuleInternal(JSContext *ctx, const char *basename, const char *filename, - JSValueConst *resolving_funcs) + JSValueConst *resolving_funcs, + JSValueConst attributes) { JSValue evaluate_promise; JSModuleDef *m; JSValue ret, err, func_obj, evaluate_resolving_funcs[2]; JSValueConst func_data[3]; - m = js_host_resolve_imported_module(ctx, basename, filename); + m = js_host_resolve_imported_module(ctx, basename, filename, attributes); if (!m) goto fail; @@ -28816,7 +29740,7 @@ JSValue JS_LoadModule(JSContext *ctx, const char *basename, if (JS_IsException(promise)) return JS_EXCEPTION; JS_LoadModuleInternal(ctx, basename, filename, - (JSValueConst *)resolving_funcs); + (JSValueConst *)resolving_funcs, JS_UNDEFINED); JS_FreeValue(ctx, resolving_funcs[0]); JS_FreeValue(ctx, resolving_funcs[1]); return promise; @@ -28828,6 +29752,7 @@ static JSValue js_dynamic_import_job(JSContext *ctx, JSValueConst *resolving_funcs = argv; JSValueConst basename_val = argv[2]; JSValueConst specifier = argv[3]; + JSValueConst attributes = argv[4]; const char *basename = NULL, *filename; JSValue ret, err; @@ -28844,7 +29769,7 @@ static JSValue js_dynamic_import_job(JSContext *ctx, goto exception; JS_LoadModuleInternal(ctx, basename, filename, - resolving_funcs); + resolving_funcs, attributes); JS_FreeCString(ctx, filename); JS_FreeCString(ctx, basename); return JS_UNDEFINED; @@ -28858,11 +29783,12 @@ static JSValue js_dynamic_import_job(JSContext *ctx, return JS_UNDEFINED; } -static JSValue js_dynamic_import(JSContext *ctx, JSValueConst specifier) +static JSValue js_dynamic_import(JSContext *ctx, JSValueConst specifier, JSValueConst options) { JSAtom basename; - JSValue promise, resolving_funcs[2], basename_val; - JSValueConst args[4]; + JSValue promise, resolving_funcs[2], basename_val, err, ret; + JSValue specifier_str = JS_UNDEFINED, attributes = JS_UNDEFINED, attributes_obj = JS_UNDEFINED; + JSValueConst args[5]; basename = JS_GetScriptOrModuleName(ctx, 0); if (basename == JS_ATOM_NULL) @@ -28879,19 +29805,82 @@ static JSValue js_dynamic_import(JSContext *ctx, JSValueConst specifier) return promise; } + /* the string conversion must occur here */ + specifier_str = JS_ToString(ctx, specifier); + if (JS_IsException(specifier_str)) + goto exception; + + if (!JS_IsUndefined(options)) { + if (!JS_IsObject(options)) { + JS_ThrowTypeError(ctx, "options must be an object"); + goto exception; + } + attributes_obj = JS_GetProperty(ctx, options, JS_ATOM_with); + if (JS_IsException(attributes_obj)) + goto exception; + if (!JS_IsUndefined(attributes_obj)) { + JSPropertyEnum *atoms; + uint32_t atoms_len, i; + JSValue val; + + if (!JS_IsObject(attributes_obj)) { + JS_ThrowTypeError(ctx, "options.with must be an object"); + goto exception; + } + attributes = JS_NewObjectProto(ctx, JS_NULL); + if (JS_GetOwnPropertyNamesInternal(ctx, &atoms, &atoms_len, JS_VALUE_GET_OBJ(attributes_obj), + JS_GPN_STRING_MASK | JS_GPN_ENUM_ONLY)) { + goto exception; + } + for(i = 0; i < atoms_len; i++) { + val = JS_GetProperty(ctx, attributes_obj, atoms[i].atom); + if (JS_IsException(val)) + goto exception1; + if (!JS_IsString(val)) { + JS_FreeValue(ctx, val); + JS_ThrowTypeError(ctx, "module attribute values must be strings"); + goto exception1; + } + if (JS_DefinePropertyValue(ctx, attributes, atoms[i].atom, val, + JS_PROP_C_W_E) < 0) { + exception1: + JS_FreePropertyEnum(ctx, atoms, atoms_len); + goto exception; + } + } + JS_FreePropertyEnum(ctx, atoms, atoms_len); + if (ctx->rt->module_check_attrs && + ctx->rt->module_check_attrs(ctx, ctx->rt->module_loader_opaque, attributes) < 0) { + goto exception; + } + JS_FreeValue(ctx, attributes_obj); + } + } + args[0] = resolving_funcs[0]; args[1] = resolving_funcs[1]; args[2] = basename_val; - args[3] = specifier; - + args[3] = specifier_str; + args[4] = attributes; + /* cannot run JS_LoadModuleInternal synchronously because it would cause an unexpected recursion in js_evaluate_module() */ - JS_EnqueueJob(ctx, js_dynamic_import_job, 4, args); - + JS_EnqueueJob(ctx, js_dynamic_import_job, 5, args); + done: JS_FreeValue(ctx, basename_val); JS_FreeValue(ctx, resolving_funcs[0]); JS_FreeValue(ctx, resolving_funcs[1]); + JS_FreeValue(ctx, specifier_str); + JS_FreeValue(ctx, attributes); return promise; + exception: + JS_FreeValue(ctx, attributes_obj); + err = JS_GetException(ctx); + ret = JS_Call(ctx, resolving_funcs[1], JS_UNDEFINED, + 1, (JSValueConst *)&err); + JS_FreeValue(ctx, ret); + JS_FreeValue(ctx, err); + goto done; } static void js_set_module_evaluated(JSContext *ctx, JSModuleDef *m) @@ -28968,6 +29957,14 @@ static int exec_module_list_cmp(const void *p1, const void *p2, void *opaque) static int js_execute_async_module(JSContext *ctx, JSModuleDef *m); static int js_execute_sync_module(JSContext *ctx, JSModuleDef *m, JSValue *pvalue); +#ifdef DUMP_MODULE_EXEC +static void js_dump_module(JSContext *ctx, const char *str, JSModuleDef *m) +{ + char buf1[ATOM_GET_STR_BUF_SIZE]; + static const char *module_status_str[] = { "unlinked", "linking", "linked", "evaluating", "evaluating_async", "evaluated" }; + printf("%s: %s status=%s\n", str, JS_AtomGetStr(ctx, buf1, sizeof(buf1), m->module_name), module_status_str[m->status]); +} +#endif static JSValue js_async_module_execution_rejected(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv, int magic, JSValue *func_data) @@ -28976,6 +29973,9 @@ static JSValue js_async_module_execution_rejected(JSContext *ctx, JSValueConst t JSValueConst error = argv[0]; int i; +#ifdef DUMP_MODULE_EXEC + js_dump_module(ctx, __func__, module); +#endif if (js_check_stack_overflow(ctx->rt, 0)) return JS_ThrowStackOverflow(ctx); @@ -28991,6 +29991,7 @@ static JSValue js_async_module_execution_rejected(JSContext *ctx, JSValueConst t module->eval_has_exception = TRUE; module->eval_exception = JS_DupValue(ctx, error); module->status = JS_MODULE_STATUS_EVALUATED; + module->async_evaluation = FALSE; for(i = 0; i < module->async_parent_modules_count; i++) { JSModuleDef *m = module->async_parent_modules[i]; @@ -29017,6 +30018,9 @@ static JSValue js_async_module_execution_fulfilled(JSContext *ctx, JSValueConst ExecModuleList exec_list_s, *exec_list = &exec_list_s; int i; +#ifdef DUMP_MODULE_EXEC + js_dump_module(ctx, __func__, module); +#endif if (module->status == JS_MODULE_STATUS_EVALUATED) { assert(module->eval_has_exception); return JS_UNDEFINED; @@ -29042,6 +30046,9 @@ static JSValue js_async_module_execution_fulfilled(JSContext *ctx, JSValueConst for(i = 0; i < exec_list->count; i++) { JSModuleDef *m = exec_list->tab[i]; +#ifdef DUMP_MODULE_EXEC + printf(" %d/%d", i, exec_list->count); js_dump_module(ctx, "", m); +#endif if (m->status == JS_MODULE_STATUS_EVALUATED) { assert(m->eval_has_exception); } else if (m->has_tla) { @@ -29056,6 +30063,7 @@ static JSValue js_async_module_execution_fulfilled(JSContext *ctx, JSValueConst JS_FreeValue(ctx, m_obj); JS_FreeValue(ctx, error); } else { + m->async_evaluation = FALSE; js_set_module_evaluated(ctx, m); } } @@ -29068,6 +30076,9 @@ static int js_execute_async_module(JSContext *ctx, JSModuleDef *m) { JSValue promise, m_obj; JSValue resolve_funcs[2], ret_val; +#ifdef DUMP_MODULE_EXEC + js_dump_module(ctx, __func__, m); +#endif promise = js_async_function_call(ctx, m->func_obj, JS_UNDEFINED, 0, NULL, 0); if (JS_IsException(promise)) return -1; @@ -29087,6 +30098,9 @@ static int js_execute_async_module(JSContext *ctx, JSModuleDef *m) static int js_execute_sync_module(JSContext *ctx, JSModuleDef *m, JSValue *pvalue) { +#ifdef DUMP_MODULE_EXEC + js_dump_module(ctx, __func__, m); +#endif if (m->init_func) { /* C module init : no asynchronous execution */ if (m->init_func(ctx, m) < 0) @@ -29126,19 +30140,16 @@ static int js_inner_module_evaluation(JSContext *ctx, JSModuleDef *m, JSModuleDef *m1; int i; +#ifdef DUMP_MODULE_EXEC + js_dump_module(ctx, __func__, m); +#endif + if (js_check_stack_overflow(ctx->rt, 0)) { JS_ThrowStackOverflow(ctx); *pvalue = JS_GetException(ctx); return -1; } -#ifdef DUMP_MODULE_RESOLVE - { - char buf1[ATOM_GET_STR_BUF_SIZE]; - printf("js_inner_module_evaluation '%s':\n", JS_AtomGetStr(ctx, buf1, sizeof(buf1), m->module_name)); - } -#endif - if (m->status == JS_MODULE_STATUS_EVALUATING_ASYNC || m->status == JS_MODULE_STATUS_EVALUATED) { if (m->eval_has_exception) { @@ -29239,6 +30250,9 @@ static JSValue js_evaluate_module(JSContext *ctx, JSModuleDef *m) JSModuleDef *m1, *stack_top; JSValue ret_val, result; +#ifdef DUMP_MODULE_EXEC + js_dump_module(ctx, __func__, m); +#endif assert(m->status == JS_MODULE_STATUS_LINKED || m->status == JS_MODULE_STATUS_EVALUATING_ASYNC || m->status == JS_MODULE_STATUS_EVALUATED); @@ -29271,6 +30285,9 @@ static JSValue js_evaluate_module(JSContext *ctx, JSModuleDef *m) 1, (JSValueConst *)&m->eval_exception); JS_FreeValue(ctx, ret_val); } else { +#ifdef DUMP_MODULE_EXEC + js_dump_module(ctx, " done", m); +#endif assert(m->status == JS_MODULE_STATUS_EVALUATING_ASYNC || m->status == JS_MODULE_STATUS_EVALUATED); assert(!m->eval_has_exception); @@ -29287,27 +30304,109 @@ static JSValue js_evaluate_module(JSContext *ctx, JSModuleDef *m) return JS_DupValue(ctx, m->promise); } -static __exception JSAtom js_parse_from_clause(JSParseState *s) +static __exception int js_parse_with_clause(JSParseState *s, JSReqModuleEntry *rme) +{ + JSContext *ctx = s->ctx; + JSAtom key; + int ret; + const uint8_t *key_token_ptr; + + if (next_token(s)) + return -1; + if (js_parse_expect(s, '{')) + return -1; + while (s->token.val != '}') { + key_token_ptr = s->token.ptr; + if (s->token.val == TOK_STRING) { + key = JS_ValueToAtom(ctx, s->token.u.str.str); + if (key == JS_ATOM_NULL) + return -1; + } else { + if (!token_is_ident(s->token.val)) { + js_parse_error(s, "identifier expected"); + return -1; + } + key = JS_DupAtom(ctx, s->token.u.ident.atom); + } + if (next_token(s)) + return -1; + if (js_parse_expect(s, ':')) { + JS_FreeAtom(ctx, key); + return -1; + } + if (s->token.val != TOK_STRING) { + js_parse_error_pos(s, key_token_ptr, "string expected"); + return -1; + } + if (JS_IsUndefined(rme->attributes)) { + JSValue attributes = JS_NewObjectProto(ctx, JS_NULL); + if (JS_IsException(attributes)) { + JS_FreeAtom(ctx, key); + return -1; + } + rme->attributes = attributes; + } + ret = JS_HasProperty(ctx, rme->attributes, key); + if (ret != 0) { + JS_FreeAtom(ctx, key); + if (ret < 0) + return -1; + else + return js_parse_error(s, "duplicate with key"); + } + ret = JS_DefinePropertyValue(ctx, rme->attributes, key, + JS_DupValue(ctx, s->token.u.str.str), JS_PROP_C_W_E); + JS_FreeAtom(ctx, key); + if (ret < 0) + return -1; + if (next_token(s)) + return -1; + if (s->token.val != ',') + break; + if (next_token(s)) + return -1; + } + if (!JS_IsUndefined(rme->attributes) && + ctx->rt->module_check_attrs && + ctx->rt->module_check_attrs(ctx, ctx->rt->module_loader_opaque, rme->attributes) < 0) { + return -1; + } + return js_parse_expect(s, '}'); +} + +/* return the module index in m->req_module_entries[] or < 0 if error */ +static __exception int js_parse_from_clause(JSParseState *s, JSModuleDef *m) { JSAtom module_name; + int idx; + if (!token_is_pseudo_keyword(s, JS_ATOM_from)) { js_parse_error(s, "from clause expected"); - return JS_ATOM_NULL; + return -1; } if (next_token(s)) - return JS_ATOM_NULL; + return -1; if (s->token.val != TOK_STRING) { js_parse_error(s, "string expected"); - return JS_ATOM_NULL; + return -1; } module_name = JS_ValueToAtom(s->ctx, s->token.u.str.str); if (module_name == JS_ATOM_NULL) - return JS_ATOM_NULL; + return -1; if (next_token(s)) { JS_FreeAtom(s->ctx, module_name); - return JS_ATOM_NULL; + return -1; } - return module_name; + + idx = add_req_module_entry(s->ctx, m, module_name); + JS_FreeAtom(s->ctx, module_name); + if (idx < 0) + return -1; + if (s->token.val == TOK_WITH) { + if (js_parse_with_clause(s, &m->req_module_entries[idx])) + return -1; + } + return idx; } static __exception int js_parse_export(JSParseState *s) @@ -29316,7 +30415,6 @@ static __exception int js_parse_export(JSParseState *s) JSModuleDef *m = s->cur_func->module; JSAtom local_name, export_name; int first_export, idx, i, tok; - JSAtom module_name; JSExportEntry *me; if (next_token(s)) @@ -29352,11 +30450,21 @@ static __exception int js_parse_export(JSParseState *s) if (token_is_pseudo_keyword(s, JS_ATOM_as)) { if (next_token(s)) goto fail; - if (!token_is_ident(s->token.val)) { - js_parse_error(s, "identifier expected"); - goto fail; + if (s->token.val == TOK_STRING) { + if (js_string_find_invalid_codepoint(JS_VALUE_GET_STRING(s->token.u.str.str)) >= 0) { + js_parse_error(s, "contains unpaired surrogate"); + goto fail; + } + export_name = JS_ValueToAtom(s->ctx, s->token.u.str.str); + if (export_name == JS_ATOM_NULL) + goto fail; + } else { + if (!token_is_ident(s->token.val)) { + js_parse_error(s, "identifier expected"); + goto fail; + } + export_name = JS_DupAtom(ctx, s->token.u.ident.atom); } - export_name = JS_DupAtom(ctx, s->token.u.ident.atom); if (next_token(s)) { fail: JS_FreeAtom(ctx, local_name); @@ -29381,11 +30489,7 @@ static __exception int js_parse_export(JSParseState *s) if (js_parse_expect(s, '}')) return -1; if (token_is_pseudo_keyword(s, JS_ATOM_from)) { - module_name = js_parse_from_clause(s); - if (module_name == JS_ATOM_NULL) - return -1; - idx = add_req_module_entry(ctx, m, module_name); - JS_FreeAtom(ctx, module_name); + idx = js_parse_from_clause(s, m); if (idx < 0) return -1; for(i = first_export; i < m->export_entries_count; i++) { @@ -29407,11 +30511,7 @@ static __exception int js_parse_export(JSParseState *s) export_name = JS_DupAtom(ctx, s->token.u.ident.atom); if (next_token(s)) goto fail1; - module_name = js_parse_from_clause(s); - if (module_name == JS_ATOM_NULL) - goto fail1; - idx = add_req_module_entry(ctx, m, module_name); - JS_FreeAtom(ctx, module_name); + idx = js_parse_from_clause(s, m); if (idx < 0) goto fail1; me = add_export_entry(s, m, JS_ATOM__star_, export_name, @@ -29421,11 +30521,7 @@ static __exception int js_parse_export(JSParseState *s) return -1; me->u.req_module_idx = idx; } else { - module_name = js_parse_from_clause(s); - if (module_name == JS_ATOM_NULL) - return -1; - idx = add_req_module_entry(ctx, m, module_name); - JS_FreeAtom(ctx, module_name); + idx = js_parse_from_clause(s, m); if (idx < 0) return -1; if (add_star_export_entry(ctx, m, idx) < 0) @@ -29479,12 +30575,11 @@ static int add_closure_var(JSContext *ctx, JSFunctionDef *s, JSVarKindEnum var_kind); static int add_import(JSParseState *s, JSModuleDef *m, - JSAtom local_name, JSAtom import_name) + JSAtom local_name, JSAtom import_name, BOOL is_star) { JSContext *ctx = s->ctx; int i, var_idx; JSImportEntry *mi; - BOOL is_local; if (local_name == JS_ATOM_arguments || local_name == JS_ATOM_eval) return js_parse_error(s, "invalid import binding"); @@ -29496,8 +30591,7 @@ static int add_import(JSParseState *s, JSModuleDef *m, } } - is_local = (import_name == JS_ATOM__star_); - var_idx = add_closure_var(ctx, s->cur_func, is_local, FALSE, + var_idx = add_closure_var(ctx, s->cur_func, is_star, FALSE, m->import_entries_count, local_name, TRUE, TRUE, FALSE); if (var_idx < 0) @@ -29510,6 +30604,7 @@ static int add_import(JSParseState *s, JSModuleDef *m, mi = &m->import_entries[m->import_entries_count++]; mi->import_name = JS_DupAtom(ctx, import_name); mi->var_idx = var_idx; + mi->is_star = is_star; return 0; } @@ -29532,6 +30627,14 @@ static __exception int js_parse_import(JSParseState *s) JS_FreeAtom(ctx, module_name); return -1; } + idx = add_req_module_entry(ctx, m, module_name); + JS_FreeAtom(ctx, module_name); + if (idx < 0) + return -1; + if (s->token.val == TOK_WITH) { + if (js_parse_with_clause(s, &m->req_module_entries[idx])) + return -1; + } } else { if (s->token.val == TOK_IDENT) { if (s->token.u.ident.is_reserved) { @@ -29542,7 +30645,7 @@ static __exception int js_parse_import(JSParseState *s) import_name = JS_ATOM_default; if (next_token(s)) goto fail; - if (add_import(s, m, local_name, import_name)) + if (add_import(s, m, local_name, import_name, FALSE)) goto fail; JS_FreeAtom(ctx, local_name); @@ -29568,7 +30671,7 @@ static __exception int js_parse_import(JSParseState *s) import_name = JS_ATOM__star_; if (next_token(s)) goto fail; - if (add_import(s, m, local_name, import_name)) + if (add_import(s, m, local_name, import_name, TRUE)) goto fail; JS_FreeAtom(ctx, local_name); } else if (s->token.val == '{') { @@ -29576,11 +30679,24 @@ static __exception int js_parse_import(JSParseState *s) return -1; while (s->token.val != '}') { - if (!token_is_ident(s->token.val)) { - js_parse_error(s, "identifier expected"); - return -1; + BOOL is_string; + if (s->token.val == TOK_STRING) { + is_string = TRUE; + if (js_string_find_invalid_codepoint(JS_VALUE_GET_STRING(s->token.u.str.str)) >= 0) { + js_parse_error(s, "contains unpaired surrogate"); + return -1; + } + import_name = JS_ValueToAtom(s->ctx, s->token.u.str.str); + if (import_name == JS_ATOM_NULL) + return -1; + } else { + is_string = FALSE; + if (!token_is_ident(s->token.val)) { + js_parse_error(s, "identifier expected"); + return -1; + } + import_name = JS_DupAtom(ctx, s->token.u.ident.atom); } - import_name = JS_DupAtom(ctx, s->token.u.ident.atom); local_name = JS_ATOM_NULL; if (next_token(s)) goto fail; @@ -29592,16 +30708,19 @@ static __exception int js_parse_import(JSParseState *s) goto fail; } local_name = JS_DupAtom(ctx, s->token.u.ident.atom); - if (next_token(s)) { + if (next_token(s)) + goto fail; + } else { + if (is_string) { + js_parse_error(s, "expecting 'as'"); fail: JS_FreeAtom(ctx, local_name); JS_FreeAtom(ctx, import_name); return -1; } - } else { local_name = JS_DupAtom(ctx, import_name); } - if (add_import(s, m, local_name, import_name)) + if (add_import(s, m, local_name, import_name, FALSE)) goto fail; JS_FreeAtom(ctx, local_name); JS_FreeAtom(ctx, import_name); @@ -29614,14 +30733,10 @@ static __exception int js_parse_import(JSParseState *s) return -1; } end_import_clause: - module_name = js_parse_from_clause(s); - if (module_name == JS_ATOM_NULL) + idx = js_parse_from_clause(s, m); + if (idx < 0) return -1; } - idx = add_req_module_entry(ctx, m, module_name); - JS_FreeAtom(ctx, module_name); - if (idx < 0) - return -1; for(i = first_import; i < m->import_entries_count; i++) m->import_entries[i].req_module_idx = idx; @@ -29686,7 +30801,7 @@ static JSFunctionDef *js_new_function_def(JSContext *ctx, fd->is_eval = is_eval; fd->is_func_expr = is_func_expr; - js_dbuf_init(ctx, &fd->byte_code); + js_dbuf_bytecode_init(ctx, &fd->byte_code); fd->last_opcode_pos = -1; fd->func_name = JS_ATOM_NULL; fd->var_object_idx = -1; @@ -29744,6 +30859,8 @@ static void free_bytecode_atoms(JSRuntime *rt, case OP_FMT_atom_u16: case OP_FMT_atom_label_u8: case OP_FMT_atom_label_u16: + if ((pos + 1 + 4) > bc_len) + break; /* may happen if there is not enough memory when emiting bytecode */ atom = get_u32(bc_buf + pos + 1); JS_FreeAtomRT(rt, atom); break; @@ -30034,7 +31151,7 @@ static void dump_byte_code(JSContext *ctx, int pass, has_pool_idx: printf(" %u: ", idx); if (idx < cpool_count) { - JS_DumpValue(ctx, cpool[idx]); + JS_PrintValue(ctx, js_dump_value_write, stdout, cpool[idx], NULL); } break; case OP_FMT_atom: @@ -30533,7 +31650,13 @@ static void var_object_test(JSContext *ctx, JSFunctionDef *s, { dbuf_putc(bc, get_with_scope_opcode(op)); dbuf_put_u32(bc, JS_DupAtom(ctx, var_name)); - *plabel_done = new_label_fd(s, *plabel_done); + if (*plabel_done < 0) { + *plabel_done = new_label_fd(s); + if (*plabel_done < 0) { + dbuf_set_error(bc); + return; + } + } dbuf_put_u32(bc, *plabel_done); dbuf_putc(bc, is_with); update_label(s, *plabel_done, 1); @@ -31578,8 +32701,11 @@ static void instantiate_hoisted_definitions(JSContext *ctx, JSFunctionDef *s, Dy evaluating the module so that the exported functions are visible if there are cyclic module references */ if (s->module) { - label_next = new_label_fd(s, -1); - + label_next = new_label_fd(s); + if (label_next < 0) { + dbuf_set_error(bc); + return; + } /* if 'this' is true, initialize the global variables and return */ dbuf_putc(bc, OP_push_this); dbuf_putc(bc, OP_if_false); @@ -31770,7 +32896,7 @@ static __exception int resolve_variables(JSContext *ctx, JSFunctionDef *s) cc.bc_buf = bc_buf = s->byte_code.buf; cc.bc_len = bc_len = s->byte_code.size; - js_dbuf_init(ctx, &bc_out); + js_dbuf_bytecode_init(ctx, &bc_out); /* first pass for runtime checks (must be done before the variables are created) */ @@ -32390,7 +33516,7 @@ static __exception int resolve_labels(JSContext *ctx, JSFunctionDef *s) cc.bc_buf = bc_buf = s->byte_code.buf; cc.bc_len = bc_len = s->byte_code.size; - js_dbuf_init(ctx, &bc_out); + js_dbuf_bytecode_init(ctx, &bc_out); #if SHORT_OPCODES if (s->jump_size) { @@ -32868,9 +33994,8 @@ static __exception int resolve_labels(JSContext *ctx, JSFunctionDef *s) goto no_change; case OP_to_propkey: - case OP_to_propkey2: if (OPTIMIZE) { - /* remove redundant to_propkey/to_propkey2 opcodes when storing simple data */ + /* remove redundant to_propkey opcodes when storing simple data */ if (code_match(&cc, pos_next, M3(OP_get_loc, OP_get_arg, OP_get_var_ref), -1, OP_put_array_el, -1) || code_match(&cc, pos_next, M3(OP_push_i32, OP_push_const, OP_push_atom_value), OP_put_array_el, -1) || code_match(&cc, pos_next, M4(OP_undefined, OP_null, OP_push_true, OP_push_false), OP_put_array_el, -1)) { @@ -33799,7 +34924,8 @@ static void free_function_bytecode(JSRuntime *rt, JSFunctionBytecode *b) JS_AtomGetStrRT(rt, buf, sizeof(buf), b->func_name)); } #endif - free_bytecode_atoms(rt, b->byte_code_buf, b->byte_code_len, TRUE); + if (b->byte_code_buf) + free_bytecode_atoms(rt, b->byte_code_buf, b->byte_code_len, TRUE); if (b->vardefs) { for(i = 0; i < b->arg_count + b->var_count; i++) { @@ -34391,7 +35517,7 @@ static __exception int js_parse_function_decl2(JSParseState *s, push_scope(s); /* enter body scope */ fd->body_scope = fd->scope_level; - if (s->token.val == TOK_ARROW) { + if (s->token.val == TOK_ARROW && func_type == JS_PARSE_FUNC_ARROW) { if (next_token(s)) goto fail; @@ -34801,7 +35927,7 @@ static JSValue __JS_EvalInternal(JSContext *ctx, JSValueConst this_obj, fail1: /* XXX: should free all the unresolved dependencies */ if (m) - js_free_module_def(ctx, m); + JS_FreeValue(ctx, JS_MKPTR(JS_TAG_MODULE, m)); return JS_EXCEPTION; } @@ -35008,7 +36134,7 @@ typedef enum BCTagEnum { BC_TAG_OBJECT_REFERENCE, } BCTagEnum; -#define BC_VERSION 4 +#define BC_VERSION 5 typedef struct BCWriterState { JSContext *ctx; @@ -35430,6 +36556,8 @@ static int JS_WriteModule(BCWriterState *s, JSValueConst obj) for(i = 0; i < m->req_module_entries_count; i++) { JSReqModuleEntry *rme = &m->req_module_entries[i]; bc_put_atom(s, rme->module_name); + if (JS_WriteObjectRec(s, rme->attributes)) + goto fail; } bc_put_leb128(s, m->export_entries_count); @@ -35455,6 +36583,7 @@ static int JS_WriteModule(BCWriterState *s, JSValueConst obj) for(i = 0; i < m->import_entries_count; i++) { JSImportEntry *mi = &m->import_entries[i]; bc_put_leb128(s, mi->var_idx); + bc_put_u8(s, mi->is_star); bc_put_atom(s, mi->import_name); bc_put_leb128(s, mi->req_module_idx); } @@ -36428,8 +37557,13 @@ static JSValue JS_ReadModule(BCReaderState *s) goto fail; for(i = 0; i < m->req_module_entries_count; i++) { JSReqModuleEntry *rme = &m->req_module_entries[i]; + JSValue val; if (bc_get_atom(s, &rme->module_name)) goto fail; + val = JS_ReadObjectRec(s); + if (JS_IsException(val)) + goto fail; + rme->attributes = val; } } @@ -36482,8 +37616,12 @@ static JSValue JS_ReadModule(BCReaderState *s) goto fail; for(i = 0; i < m->import_entries_count; i++) { JSImportEntry *mi = &m->import_entries[i]; + uint8_t v8; if (bc_get_leb128_int(s, &mi->var_idx)) goto fail; + if (bc_get_u8(s, &v8)) + goto fail; + mi->is_star = (v8 != 0); if (bc_get_atom(s, &mi->import_name)) goto fail; if (bc_get_leb128_int(s, &mi->req_module_idx)) @@ -36501,7 +37639,7 @@ static JSValue JS_ReadModule(BCReaderState *s) return obj; fail: if (m) { - js_free_module_def(ctx, m); + JS_FreeValue(ctx, JS_MKPTR(JS_TAG_MODULE, m)); } return JS_EXCEPTION; } @@ -37093,17 +38231,22 @@ static int JS_InstantiateFunctionListItem(JSContext *ctx, JSValueConst obj, return 0; } -void JS_SetPropertyFunctionList(JSContext *ctx, JSValueConst obj, - const JSCFunctionListEntry *tab, int len) +int JS_SetPropertyFunctionList(JSContext *ctx, JSValueConst obj, + const JSCFunctionListEntry *tab, int len) { - int i; + int i, ret; for (i = 0; i < len; i++) { const JSCFunctionListEntry *e = &tab[i]; JSAtom atom = find_atom(ctx, e->name); - JS_InstantiateFunctionListItem(ctx, obj, atom, e); + if (atom == JS_ATOM_NULL) + return -1; + ret = JS_InstantiateFunctionListItem(ctx, obj, atom, e); JS_FreeAtom(ctx, atom); + if (ret) + return -1; } + return 0; } int JS_AddModuleExportList(JSContext *ctx, JSModuleDef *m, @@ -37307,22 +38450,6 @@ static int js_obj_to_desc(JSContext *ctx, JSPropertyDescriptor *d, val = JS_UNDEFINED; getter = JS_UNDEFINED; setter = JS_UNDEFINED; - if (JS_HasProperty(ctx, desc, JS_ATOM_configurable)) { - JSValue prop = JS_GetProperty(ctx, desc, JS_ATOM_configurable); - if (JS_IsException(prop)) - goto fail; - flags |= JS_PROP_HAS_CONFIGURABLE; - if (JS_ToBoolFree(ctx, prop)) - flags |= JS_PROP_CONFIGURABLE; - } - if (JS_HasProperty(ctx, desc, JS_ATOM_writable)) { - JSValue prop = JS_GetProperty(ctx, desc, JS_ATOM_writable); - if (JS_IsException(prop)) - goto fail; - flags |= JS_PROP_HAS_WRITABLE; - if (JS_ToBoolFree(ctx, prop)) - flags |= JS_PROP_WRITABLE; - } if (JS_HasProperty(ctx, desc, JS_ATOM_enumerable)) { JSValue prop = JS_GetProperty(ctx, desc, JS_ATOM_enumerable); if (JS_IsException(prop)) @@ -37331,12 +38458,28 @@ static int js_obj_to_desc(JSContext *ctx, JSPropertyDescriptor *d, if (JS_ToBoolFree(ctx, prop)) flags |= JS_PROP_ENUMERABLE; } + if (JS_HasProperty(ctx, desc, JS_ATOM_configurable)) { + JSValue prop = JS_GetProperty(ctx, desc, JS_ATOM_configurable); + if (JS_IsException(prop)) + goto fail; + flags |= JS_PROP_HAS_CONFIGURABLE; + if (JS_ToBoolFree(ctx, prop)) + flags |= JS_PROP_CONFIGURABLE; + } if (JS_HasProperty(ctx, desc, JS_ATOM_value)) { flags |= JS_PROP_HAS_VALUE; val = JS_GetProperty(ctx, desc, JS_ATOM_value); if (JS_IsException(val)) goto fail; } + if (JS_HasProperty(ctx, desc, JS_ATOM_writable)) { + JSValue prop = JS_GetProperty(ctx, desc, JS_ATOM_writable); + if (JS_IsException(prop)) + goto fail; + flags |= JS_PROP_HAS_WRITABLE; + if (JS_ToBoolFree(ctx, prop)) + flags |= JS_PROP_WRITABLE; + } if (JS_HasProperty(ctx, desc, JS_ATOM_get)) { flags |= JS_PROP_HAS_GET; getter = JS_GetProperty(ctx, desc, JS_ATOM_get); @@ -37421,7 +38564,7 @@ static __exception int JS_ObjectDefineProperties(JSContext *ctx, ret = 0; exception: - js_free_prop_enum(ctx, atoms, len); + JS_FreePropertyEnum(ctx, atoms, len); JS_FreeValue(ctx, props); JS_FreeValue(ctx, desc); return ret; @@ -37692,12 +38835,12 @@ static JSValue js_object_getOwnPropertyDescriptors(JSContext *ctx, JSValueConst goto exception; } } - js_free_prop_enum(ctx, props, len); + JS_FreePropertyEnum(ctx, props, len); JS_FreeValue(ctx, obj); return r; exception: - js_free_prop_enum(ctx, props, len); + JS_FreePropertyEnum(ctx, props, len); JS_FreeValue(ctx, obj); JS_FreeValue(ctx, r); return JS_EXCEPTION; @@ -37777,7 +38920,7 @@ exception: JS_FreeValue(ctx, r); r = JS_EXCEPTION; done: - js_free_prop_enum(ctx, atoms, len); + JS_FreePropertyEnum(ctx, atoms, len); JS_FreeValue(ctx, obj); return r; } @@ -38038,11 +39181,11 @@ static JSValue js_object_seal(JSContext *ctx, JSValueConst this_val, JS_UNDEFINED, JS_UNDEFINED, desc_flags) < 0) goto exception; } - js_free_prop_enum(ctx, props, len); + JS_FreePropertyEnum(ctx, props, len); return JS_DupValue(ctx, obj); exception: - js_free_prop_enum(ctx, props, len); + JS_FreePropertyEnum(ctx, props, len); return JS_EXCEPTION; } @@ -38084,11 +39227,11 @@ static JSValue js_object_isSealed(JSContext *ctx, JSValueConst this_val, return JS_EXCEPTION; res ^= 1; done: - js_free_prop_enum(ctx, props, len); + JS_FreePropertyEnum(ctx, props, len); return JS_NewBool(ctx, res); exception: - js_free_prop_enum(ctx, props, len); + JS_FreePropertyEnum(ctx, props, len); return JS_EXCEPTION; } @@ -38986,6 +40129,16 @@ static const JSCFunctionListEntry js_error_proto_funcs[] = { JS_PROP_STRING_DEF("message", "", JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE ), }; +static JSValue js_error_isError(JSContext *ctx, JSValueConst this_val, + int argc, JSValueConst *argv) +{ + return JS_NewBool(ctx, JS_IsError(ctx, argv[0])); +} + +static const JSCFunctionListEntry js_error_funcs[] = { + JS_CFUNC_DEF("isError", 1, js_error_isError), +}; + /* AggregateError */ /* used by C code. */ @@ -39649,8 +40802,10 @@ done: goto exception; args[0] = ret; res = JS_Invoke(ctx, arr, JS_ATOM_set, 1, args); - if (check_exception_free(ctx, res)) + if (check_exception_free(ctx, res)) { + JS_FreeValue(ctx, arr); goto exception; + } JS_FreeValue(ctx, ret); ret = arr; } @@ -42679,12 +43834,6 @@ static JSValue js_string_trim(JSContext *ctx, JSValueConst this_val, return ret; } -static JSValue js_string___quote(JSContext *ctx, JSValueConst this_val, - int argc, JSValueConst *argv) -{ - return JS_ToQuotedString(ctx, this_val); -} - /* return 0 if before the first char */ static int string_prevc(JSString *p, int *pidx) { @@ -43172,7 +44321,6 @@ static const JSCFunctionListEntry js_string_proto_funcs[] = { JS_ALIAS_DEF("trimLeft", "trimStart" ), JS_CFUNC_DEF("toString", 0, js_string_toString ), JS_CFUNC_DEF("valueOf", 0, js_string_toString ), - JS_CFUNC_DEF("__quote", 1, js_string___quote ), JS_CFUNC_MAGIC_DEF("toLowerCase", 0, js_string_toLowerCase, 1 ), JS_CFUNC_MAGIC_DEF("toUpperCase", 0, js_string_toLowerCase, 0 ), JS_CFUNC_MAGIC_DEF("toLocaleLowerCase", 0, js_string_toLowerCase, 1 ), @@ -43357,6 +44505,11 @@ static JSValue js_math_hypot(JSContext *ctx, JSValueConst this_val, return JS_NewFloat64(ctx, r); } +static double js_math_f16round(double a) +{ + return fromfp16(tofp16(a)); +} + static double js_math_fround(double a) { return (float)a; @@ -43460,6 +44613,7 @@ static const JSCFunctionListEntry js_math_funcs[] = { JS_CFUNC_SPECIAL_DEF("cbrt", 1, f_f, cbrt ), JS_CFUNC_DEF("hypot", 2, js_math_hypot ), JS_CFUNC_DEF("random", 0, js_math_random ), + JS_CFUNC_SPECIAL_DEF("f16round", 1, f_f, js_math_f16round ), JS_CFUNC_SPECIAL_DEF("fround", 1, f_f, js_math_fround ), JS_CFUNC_DEF("imul", 2, js_math_imul ), JS_CFUNC_DEF("clz32", 1, js_math_clz32 ), @@ -43515,9 +44669,13 @@ static int getTimezoneOffset(int64_t time) time_t gm_ti, loc_ti; tm = gmtime(&ti); + if (!tm) + return 0; gm_ti = mktime(tm); tm = localtime(&ti); + if (!tm) + return 0; loc_ti = mktime(tm); res = (gm_ti - loc_ti) / 60; @@ -43624,6 +44782,9 @@ static JSValue js_compile_regexp(JSContext *ctx, JSValueConst pattern, case 'u': mask = LRE_FLAG_UNICODE; break; + case 'v': + mask = LRE_FLAG_UNICODE_SETS; + break; case 'y': mask = LRE_FLAG_STICKY; break; @@ -43633,14 +44794,20 @@ static JSValue js_compile_regexp(JSContext *ctx, JSValueConst pattern, if ((re_flags & mask) != 0) { bad_flags: JS_FreeCString(ctx, str); - return JS_ThrowSyntaxError(ctx, "invalid regular expression flags"); + goto bad_flags1; } re_flags |= mask; } JS_FreeCString(ctx, str); } - str = JS_ToCStringLen2(ctx, &len, pattern, !(re_flags & LRE_FLAG_UNICODE)); + /* 'u' and 'v' cannot be both set */ + if ((re_flags & LRE_FLAG_UNICODE_SETS) && (re_flags & LRE_FLAG_UNICODE)) { + bad_flags1: + return JS_ThrowSyntaxError(ctx, "invalid regular expression flags"); + } + + str = JS_ToCStringLen2(ctx, &len, pattern, !(re_flags & (LRE_FLAG_UNICODE | LRE_FLAG_UNICODE_SETS))); if (!str) return JS_EXCEPTION; re_bytecode_buf = lre_compile(&re_bytecode_len, error_msg, @@ -43944,49 +45111,34 @@ static JSValue js_regexp_get_flag(JSContext *ctx, JSValueConst this_val, int mas return JS_NewBool(ctx, flags & mask); } +#define RE_FLAG_COUNT 8 + static JSValue js_regexp_get_flags(JSContext *ctx, JSValueConst this_val) { - char str[8], *p = str; - int res; - + char str[RE_FLAG_COUNT], *p = str; + int res, i; + static const int flag_atom[RE_FLAG_COUNT] = { + JS_ATOM_hasIndices, + JS_ATOM_global, + JS_ATOM_ignoreCase, + JS_ATOM_multiline, + JS_ATOM_dotAll, + JS_ATOM_unicode, + JS_ATOM_unicodeSets, + JS_ATOM_sticky, + }; + static const char flag_char[RE_FLAG_COUNT] = { 'd', 'g', 'i', 'm', 's', 'u', 'v', 'y' }; + if (JS_VALUE_GET_TAG(this_val) != JS_TAG_OBJECT) return JS_ThrowTypeErrorNotAnObject(ctx); - res = JS_ToBoolFree(ctx, JS_GetPropertyStr(ctx, this_val, "hasIndices")); - if (res < 0) - goto exception; - if (res) - *p++ = 'd'; - res = JS_ToBoolFree(ctx, JS_GetProperty(ctx, this_val, JS_ATOM_global)); - if (res < 0) - goto exception; - if (res) - *p++ = 'g'; - res = JS_ToBoolFree(ctx, JS_GetPropertyStr(ctx, this_val, "ignoreCase")); - if (res < 0) - goto exception; - if (res) - *p++ = 'i'; - res = JS_ToBoolFree(ctx, JS_GetPropertyStr(ctx, this_val, "multiline")); - if (res < 0) - goto exception; - if (res) - *p++ = 'm'; - res = JS_ToBoolFree(ctx, JS_GetPropertyStr(ctx, this_val, "dotAll")); - if (res < 0) - goto exception; - if (res) - *p++ = 's'; - res = JS_ToBoolFree(ctx, JS_GetProperty(ctx, this_val, JS_ATOM_unicode)); - if (res < 0) - goto exception; - if (res) - *p++ = 'u'; - res = JS_ToBoolFree(ctx, JS_GetPropertyStr(ctx, this_val, "sticky")); - if (res < 0) - goto exception; - if (res) - *p++ = 'y'; + for(i = 0; i < RE_FLAG_COUNT; i++) { + res = JS_ToBoolFree(ctx, JS_GetProperty(ctx, this_val, flag_atom[i])); + if (res < 0) + goto exception; + if (res) + *p++ = flag_char[i]; + } return JS_NewStringLen(ctx, str, p - str); exception: @@ -44039,6 +45191,58 @@ void *lre_realloc(void *opaque, void *ptr, size_t size) return js_realloc_rt(ctx->rt, ptr, size); } +static JSValue js_regexp_escape(JSContext *ctx, JSValueConst this_val, + int argc, JSValueConst *argv) +{ + JSValue str; + StringBuffer b_s, *b = &b_s; + JSString *p; + uint32_t c, i; + char s[16]; + + if (!JS_IsString(argv[0])) + return JS_ThrowTypeError(ctx, "not a string"); + str = JS_ToString(ctx, argv[0]); /* must call it to linearlize ropes */ + if (JS_IsException(str)) + return JS_EXCEPTION; + p = JS_VALUE_GET_STRING(str); + string_buffer_init2(ctx, b, 0, p->is_wide_char); + for (i = 0; i < p->len; i++) { + c = string_get(p, i); + if (c < 33) { + if (c >= 9 && c <= 13) { + string_buffer_putc8(b, '\\'); + string_buffer_putc8(b, "tnvfr"[c - 9]); + } else { + goto hex2; + } + } else if (c < 128) { + if ((c >= '0' && c <= '9') + || (c >= 'A' && c <= 'Z') + || (c >= 'a' && c <= 'z')) { + if (i == 0) + goto hex2; + } else if (strchr(",-=<>#&!%:;@~'`\"", c)) { + goto hex2; + } else if (c != '_') { + string_buffer_putc8(b, '\\'); + } + string_buffer_putc8(b, c); + } else if (c < 256) { + hex2: + snprintf(s, sizeof(s), "\\x%02x", c); + string_buffer_puts8(b, s); + } else if (is_surrogate(c) || lre_is_space(c)) { + snprintf(s, sizeof(s), "\\u%04x", c); + string_buffer_puts8(b, s); + } else { + string_buffer_putc16(b, c); + } + } + JS_FreeValue(ctx, str); + return string_buffer_end(b); +} + static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { @@ -44419,14 +45623,12 @@ static JSValue js_regexp_Symbol_match(JSContext *ctx, JSValueConst this_val, goto exception; p = JS_VALUE_GET_STRING(flags); - // TODO(bnoordhuis) query 'u' flag the same way? global = (-1 != string_indexof_char(p, 'g', 0)); if (!global) { A = JS_RegExpExec(ctx, rx, S); } else { - fullUnicode = JS_ToBoolFree(ctx, JS_GetProperty(ctx, rx, JS_ATOM_unicode)); - if (fullUnicode < 0) - goto exception; + fullUnicode = (string_indexof_char(p, 'u', 0) >= 0 || + string_indexof_char(p, 'v', 0) >= 0); if (JS_SetProperty(ctx, rx, JS_ATOM_lastIndex, JS_NewInt32(ctx, 0)) < 0) goto exception; @@ -44445,7 +45647,7 @@ static JSValue js_regexp_Symbol_match(JSContext *ctx, JSValueConst this_val, if (JS_IsException(matchStr)) goto exception; isEmpty = JS_IsEmptyString(matchStr); - if (JS_SetPropertyInt64(ctx, A, n++, matchStr) < 0) + if (JS_DefinePropertyValueInt64(ctx, A, n++, matchStr, JS_PROP_C_W_E | JS_PROP_THROW) < 0) goto exception; if (isEmpty) { int64_t thisIndex, nextIndex; @@ -44610,7 +45812,8 @@ static JSValue js_regexp_Symbol_matchAll(JSContext *ctx, JSValueConst this_val, it->iterated_string = S; strp = JS_VALUE_GET_STRING(flags); it->global = string_indexof_char(strp, 'g', 0) >= 0; - it->unicode = string_indexof_char(strp, 'u', 0) >= 0; + it->unicode = (string_indexof_char(strp, 'u', 0) >= 0 || + string_indexof_char(strp, 'v', 0) >= 0); it->done = FALSE; JS_SetOpaque(iter, it); @@ -44757,13 +45960,11 @@ static JSValue js_regexp_Symbol_replace(JSContext *ctx, JSValueConst this_val, goto exception; p = JS_VALUE_GET_STRING(flags); - // TODO(bnoordhuis) query 'u' flag the same way? fullUnicode = 0; is_global = (-1 != string_indexof_char(p, 'g', 0)); if (is_global) { - fullUnicode = JS_ToBoolFree(ctx, JS_GetProperty(ctx, rx, JS_ATOM_unicode)); - if (fullUnicode < 0) - goto exception; + fullUnicode = (string_indexof_char(p, 'u', 0) >= 0 || + string_indexof_char(p, 'v', 0) >= 0); if (JS_SetProperty(ctx, rx, JS_ATOM_lastIndex, JS_NewInt32(ctx, 0)) < 0) goto exception; } @@ -44989,7 +46190,8 @@ static JSValue js_regexp_Symbol_split(JSContext *ctx, JSValueConst this_val, if (JS_IsException(flags)) goto exception; strp = JS_VALUE_GET_STRING(flags); - unicodeMatching = string_indexof_char(strp, 'u', 0) >= 0; + unicodeMatching = (string_indexof_char(strp, 'u', 0) >= 0 || + string_indexof_char(strp, 'v', 0) >= 0); if (string_indexof_char(strp, 'y', 0) < 0) { flags = JS_ConcatString3(ctx, "", flags, "y"); if (JS_IsException(flags)) @@ -45086,6 +46288,7 @@ done: } static const JSCFunctionListEntry js_regexp_funcs[] = { + JS_CFUNC_DEF("escape", 1, js_regexp_escape ), JS_CGETSET_DEF("[Symbol.species]", js_get_this, NULL ), //JS_CFUNC_DEF("__RegExpExec", 2, js_regexp___RegExpExec ), //JS_CFUNC_DEF("__RegExpDelete", 2, js_regexp___RegExpDelete ), @@ -45099,6 +46302,7 @@ static const JSCFunctionListEntry js_regexp_proto_funcs[] = { JS_CGETSET_MAGIC_DEF("multiline", js_regexp_get_flag, NULL, LRE_FLAG_MULTILINE ), JS_CGETSET_MAGIC_DEF("dotAll", js_regexp_get_flag, NULL, LRE_FLAG_DOTALL ), JS_CGETSET_MAGIC_DEF("unicode", js_regexp_get_flag, NULL, LRE_FLAG_UNICODE ), + JS_CGETSET_MAGIC_DEF("unicodeSets", js_regexp_get_flag, NULL, LRE_FLAG_UNICODE_SETS ), JS_CGETSET_MAGIC_DEF("sticky", js_regexp_get_flag, NULL, LRE_FLAG_STICKY ), JS_CGETSET_MAGIC_DEF("hasIndices", js_regexp_get_flag, NULL, LRE_FLAG_INDICES ), JS_CFUNC_DEF("exec", 1, js_regexp_exec ), @@ -45261,6 +46465,12 @@ static JSValue json_parse_value(JSParseState *s) val = JS_NewBool(ctx, s->token.u.ident.atom == JS_ATOM_true); } else if (s->token.u.ident.atom == JS_ATOM_null) { val = JS_NULL; + } else if (s->token.u.ident.atom == JS_ATOM_NaN && s->ext_json) { + /* Note: json5 identifier handling is ambiguous e.g. is + '{ NaN: 1 }' a valid JSON5 production ? */ + val = JS_NewFloat64(s->ctx, NAN); + } else if (s->token.u.ident.atom == JS_ATOM_Infinity && s->ext_json) { + val = JS_NewFloat64(s->ctx, INFINITY); } else { goto def_token; } @@ -45365,7 +46575,7 @@ static JSValue internalize_json_property(JSContext *ctx, JSValueConst holder, goto fail; } } - js_free_prop_enum(ctx, atoms, len); + JS_FreePropertyEnum(ctx, atoms, len); atoms = NULL; name_val = JS_AtomToValue(ctx, name); if (JS_IsException(name_val)) @@ -45377,7 +46587,7 @@ static JSValue internalize_json_property(JSContext *ctx, JSValueConst holder, JS_FreeValue(ctx, val); return res; fail: - js_free_prop_enum(ctx, atoms, len); + JS_FreePropertyEnum(ctx, atoms, len); JS_FreeValue(ctx, val); return JS_EXCEPTION; } @@ -45425,10 +46635,72 @@ typedef struct JSONStringifyContext { StringBuffer *b; } JSONStringifyContext; -static JSValue JS_ToQuotedStringFree(JSContext *ctx, JSValue val) { - JSValue r = JS_ToQuotedString(ctx, val); +static int JS_ToQuotedString(JSContext *ctx, StringBuffer *b, JSValueConst val1) +{ + JSValue val; + JSString *p; + int i; + uint32_t c; + char buf[16]; + + val = JS_ToStringCheckObject(ctx, val1); + if (JS_IsException(val)) + return -1; + p = JS_VALUE_GET_STRING(val); + + if (string_buffer_putc8(b, '\"')) + goto fail; + for(i = 0; i < p->len; ) { + c = string_getc(p, &i); + switch(c) { + case '\t': + c = 't'; + goto quote; + case '\r': + c = 'r'; + goto quote; + case '\n': + c = 'n'; + goto quote; + case '\b': + c = 'b'; + goto quote; + case '\f': + c = 'f'; + goto quote; + case '\"': + case '\\': + quote: + if (string_buffer_putc8(b, '\\')) + goto fail; + if (string_buffer_putc8(b, c)) + goto fail; + break; + default: + if (c < 32 || is_surrogate(c)) { + snprintf(buf, sizeof(buf), "\\u%04x", c); + if (string_buffer_puts8(b, buf)) + goto fail; + } else { + if (string_buffer_putc(b, c)) + goto fail; + } + break; + } + } + if (string_buffer_putc8(b, '\"')) + goto fail; JS_FreeValue(ctx, val); - return r; + return 0; + fail: + JS_FreeValue(ctx, val); + return -1; +} + +static int JS_ToQuotedStringFree(JSContext *ctx, StringBuffer *b, JSValue val) { + int ret = JS_ToQuotedString(ctx, b, val); + JS_FreeValue(ctx, val); + return ret; } static JSValue js_json_check(JSContext *ctx, JSONStringifyContext *jsc, @@ -45611,13 +46883,11 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc, if (!JS_IsUndefined(v)) { if (has_content) string_buffer_putc8(jsc->b, ','); - prop = JS_ToQuotedStringFree(ctx, prop); - if (JS_IsException(prop)) { + string_buffer_concat_value(jsc->b, sep); + if (JS_ToQuotedString(ctx, jsc->b, prop)) { JS_FreeValue(ctx, v); goto exception; } - string_buffer_concat_value(jsc->b, sep); - string_buffer_concat_value(jsc->b, prop); string_buffer_putc8(jsc->b, ':'); string_buffer_concat_value(jsc->b, sep1); if (js_json_to_str(ctx, jsc, val, v, indent1)) @@ -45645,10 +46915,7 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc, switch (JS_VALUE_GET_NORM_TAG(val)) { case JS_TAG_STRING: case JS_TAG_STRING_ROPE: - val = JS_ToQuotedStringFree(ctx, val); - if (JS_IsException(val)) - goto exception; - goto concat_value; + return JS_ToQuotedStringFree(ctx, jsc->b, val); case JS_TAG_FLOAT64: if (!isfinite(JS_VALUE_GET_FLOAT64(val))) { val = JS_NULL; @@ -46087,7 +47354,7 @@ static JSValue js_proxy_get_prototype(JSContext *ctx, JSValueConst obj) JS_FreeValue(ctx, ret); return JS_EXCEPTION; } - if (JS_VALUE_GET_OBJ(proto1) != JS_VALUE_GET_OBJ(ret)) { + if (!js_same_value(ctx, proto1, ret)) { JS_FreeValue(ctx, proto1); fail: JS_FreeValue(ctx, ret); @@ -46127,7 +47394,7 @@ static int js_proxy_set_prototype(JSContext *ctx, JSValueConst obj, proto1 = JS_GetPrototype(ctx, s->target); if (JS_IsException(proto1)) return -1; - if (JS_VALUE_GET_OBJ(proto_val) != JS_VALUE_GET_OBJ(proto1)) { + if (!js_same_value(ctx, proto_val, proto1)) { JS_FreeValue(ctx, proto1); JS_ThrowTypeError(ctx, "proxy: inconsistent prototype"); return -1; @@ -46751,14 +48018,14 @@ static int js_proxy_get_own_property_names(JSContext *ctx, } } - js_free_prop_enum(ctx, tab2, len2); + JS_FreePropertyEnum(ctx, tab2, len2); JS_FreeValue(ctx, prop_array); *ptab = tab; *plen = len; return 0; fail: - js_free_prop_enum(ctx, tab2, len2); - js_free_prop_enum(ctx, tab, len); + JS_FreePropertyEnum(ctx, tab2, len2); + JS_FreePropertyEnum(ctx, tab, len); JS_FreeValue(ctx, prop_array); return -1; } @@ -47094,27 +48361,6 @@ static const JSCFunctionListEntry js_symbol_funcs[] = { /* Set/Map/WeakSet/WeakMap */ -typedef struct JSMapRecord { - int ref_count; /* used during enumeration to avoid freeing the record */ - BOOL empty : 8; /* TRUE if the record is deleted */ - struct list_head link; - struct JSMapRecord *hash_next; - JSValue key; - JSValue value; -} JSMapRecord; - -typedef struct JSMapState { - BOOL is_weak; /* TRUE if WeakSet/WeakMap */ - struct list_head records; /* list of JSMapRecord.link */ - uint32_t record_count; - JSMapRecord **hash_table; - int hash_bits; - uint32_t hash_size; /* = 2 ^ hash_bits */ - uint32_t record_count_threshold; /* count at which a hash table - resize is needed */ - JSWeakRefHeader weakref_header; /* only used if is_weak = TRUE */ -} JSMapState; - static BOOL js_weakref_is_target(JSValueConst val) { switch (JS_VALUE_GET_TAG(val)) { @@ -48395,8 +49641,8 @@ static JSValue js_promise_resolve_function_call(JSContext *ctx, else resolution = JS_UNDEFINED; #ifdef DUMP_PROMISE - printf("js_promise_resolving_function_call: is_reject=%d resolution=", is_reject); - JS_DumpValue(ctx, resolution); + printf("js_promise_resolving_function_call: is_reject=%d ", is_reject); + JS_DumpValue(ctx, "resolution", resolution); printf("\n"); #endif if (is_reject || !JS_IsObject(resolution)) { @@ -48636,6 +49882,34 @@ static JSValue js_promise_withResolvers(JSContext *ctx, return obj; } +static JSValue js_promise_try(JSContext *ctx, JSValueConst this_val, + int argc, JSValueConst *argv) +{ + JSValue result_promise, resolving_funcs[2], ret, ret2; + BOOL is_reject = 0; + + if (!JS_IsObject(this_val)) + return JS_ThrowTypeErrorNotAnObject(ctx); + result_promise = js_new_promise_capability(ctx, resolving_funcs, this_val); + if (JS_IsException(result_promise)) + return result_promise; + ret = JS_Call(ctx, argv[0], JS_UNDEFINED, argc - 1, argv + 1); + if (JS_IsException(ret)) { + is_reject = 1; + ret = JS_GetException(ctx); + } + ret2 = JS_Call(ctx, resolving_funcs[is_reject], JS_UNDEFINED, 1, (JSValueConst *)&ret); + JS_FreeValue(ctx, resolving_funcs[0]); + JS_FreeValue(ctx, resolving_funcs[1]); + JS_FreeValue(ctx, ret); + if (JS_IsException(ret2)) { + JS_FreeValue(ctx, result_promise); + return ret2; + } + JS_FreeValue(ctx, ret2); + return result_promise; +} + static __exception int remainingElementsCount_add(JSContext *ctx, JSValueConst resolve_element_env, int addend) @@ -49123,6 +50397,7 @@ static const JSCFunctionListEntry js_promise_funcs[] = { JS_CFUNC_MAGIC_DEF("all", 1, js_promise_all, PROMISE_MAGIC_all ), JS_CFUNC_MAGIC_DEF("allSettled", 1, js_promise_all, PROMISE_MAGIC_allSettled ), JS_CFUNC_MAGIC_DEF("any", 1, js_promise_all, PROMISE_MAGIC_any ), + JS_CFUNC_DEF("try", 1, js_promise_try ), JS_CFUNC_DEF("race", 1, js_promise_race ), JS_CFUNC_DEF("withResolvers", 0, js_promise_withResolvers ), JS_CGETSET_DEF("[Symbol.species]", js_get_this, NULL), @@ -49392,6 +50667,7 @@ void JS_AddIntrinsicPromise(JSContext *ctx) { JSRuntime *rt = ctx->rt; JSValue obj1; + JSCFunctionType ft; if (!JS_IsRegisteredClass(rt, JS_CLASS_PROMISE)) { init_class_range(rt, js_async_class_def, JS_CLASS_PROMISE, @@ -49420,7 +50696,8 @@ void JS_AddIntrinsicPromise(JSContext *ctx) /* AsyncFunction */ ctx->class_proto[JS_CLASS_ASYNC_FUNCTION] = JS_NewObjectProto(ctx, ctx->function_proto); - obj1 = JS_NewCFunction3(ctx, (JSCFunction *)js_function_constructor, + ft.generic_magic = js_function_constructor; + obj1 = JS_NewCFunction3(ctx, ft.generic, "AsyncFunction", 1, JS_CFUNC_constructor_or_func_magic, JS_FUNC_ASYNC, ctx->function_ctor); @@ -49456,7 +50733,8 @@ void JS_AddIntrinsicPromise(JSContext *ctx) /* AsyncGeneratorFunction */ ctx->class_proto[JS_CLASS_ASYNC_GENERATOR_FUNCTION] = JS_NewObjectProto(ctx, ctx->function_proto); - obj1 = JS_NewCFunction3(ctx, (JSCFunction *)js_function_constructor, + ft.generic_magic = js_function_constructor; + obj1 = JS_NewCFunction3(ctx, ft.generic, "AsyncGeneratorFunction", 1, JS_CFUNC_constructor_or_func_magic, JS_FUNC_ASYNC_GENERATOR, @@ -51125,6 +52403,8 @@ static void JS_AddIntrinsicBasicObjects(JSContext *ctx) int i; ctx->class_proto[JS_CLASS_OBJECT] = JS_NewObjectProto(ctx, JS_NULL); + JS_SetImmutablePrototype(ctx, ctx->class_proto[JS_CLASS_OBJECT]); + ctx->function_proto = JS_NewCFunction3(ctx, js_function_proto, "", 0, JS_CFUNC_generic, 0, ctx->class_proto[JS_CLASS_OBJECT]); @@ -51178,6 +52458,7 @@ void JS_AddIntrinsicBaseObjects(JSContext *ctx) int i; JSValueConst obj, number_obj; JSValue obj1; + JSCFunctionType ft; ctx->throw_type_error = JS_NewCFunction(ctx, js_throw_type_error, NULL, 0); @@ -51215,9 +52496,10 @@ void JS_AddIntrinsicBaseObjects(JSContext *ctx) "Error", 1, JS_CFUNC_constructor_or_func_magic, -1); JS_NewGlobalCConstructor2(ctx, obj1, "Error", ctx->class_proto[JS_CLASS_ERROR]); + JS_SetPropertyFunctionList(ctx, obj1, js_error_funcs, countof(js_error_funcs)); /* Used to squelch a -Wcast-function-type warning. */ - JSCFunctionType ft = { .generic_magic = js_error_constructor }; + ft.generic_magic = js_error_constructor; for(i = 0; i < JS_NATIVE_ERROR_COUNT; i++) { JSValue func_obj; int n_args; @@ -51360,7 +52642,8 @@ void JS_AddIntrinsicBaseObjects(JSContext *ctx) countof(js_generator_proto_funcs)); ctx->class_proto[JS_CLASS_GENERATOR_FUNCTION] = JS_NewObjectProto(ctx, ctx->function_proto); - obj1 = JS_NewCFunction3(ctx, (JSCFunction *)js_function_constructor, + ft.generic_magic = js_function_constructor; + obj1 = JS_NewCFunction3(ctx, ft.generic, "GeneratorFunction", 1, JS_CFUNC_constructor_or_func_magic, JS_FUNC_GENERATOR, ctx->function_ctor); @@ -51393,8 +52676,8 @@ void JS_AddIntrinsicBaseObjects(JSContext *ctx) static uint8_t const typed_array_size_log2[JS_TYPED_ARRAY_COUNT] = { 0, 0, 0, 1, 1, 2, 2, - 3, 3, /* BigInt64Array, BigUint64Array */ - 2, 3 + 3, 3, // BigInt64Array, BigUint64Array + 1, 2, 3 // Float16Array, Float32Array, Float64Array }; static JSValue js_array_buffer_constructor3(JSContext *ctx, @@ -52327,7 +53610,9 @@ static JSValue js_typed_array_fill(JSContext *ctx, JSValueConst this_val, double d; if (JS_ToFloat64(ctx, &d, argv[0])) return JS_EXCEPTION; - if (p->class_id == JS_CLASS_FLOAT32_ARRAY) { + if (p->class_id == JS_CLASS_FLOAT16_ARRAY) { + v64 = tofp16(d); + } else if (p->class_id == JS_CLASS_FLOAT32_ARRAY) { union { float f; uint32_t u32; @@ -52458,6 +53743,7 @@ static JSValue js_typed_array_indexOf(JSContext *ctx, JSValueConst this_val, int64_t v64; double d; float f; + uint16_t hf; len = js_typed_array_get_length_internal(ctx, this_val); if (len < 0) @@ -52468,22 +53754,12 @@ static JSValue js_typed_array_indexOf(JSContext *ctx, JSValueConst this_val, if (special == special_lastIndexOf) { k = len - 1; if (argc > 1) { - if (JS_ToFloat64(ctx, &d, argv[1])) + int64_t k1; + if (JS_ToInt64Clamp(ctx, &k1, argv[1], -1, len - 1, len)) goto exception; - if (isnan(d)) { - k = 0; - } else { - if (d >= 0) { - if (d < k) { - k = d; - } - } else { - d += len; - if (d < 0) - goto done; - k = d; - } - } + k = k1; + if (k < 0) + goto done; } stop = -1; inc = -1; @@ -52620,6 +53896,39 @@ static JSValue js_typed_array_indexOf(JSContext *ctx, JSValueConst this_val, } } break; + case JS_CLASS_FLOAT16_ARRAY: + if (is_bigint) + break; + if (isnan(d)) { + const uint16_t *pv = p->u.array.u.fp16_ptr; + /* special case: indexOf returns -1, includes finds NaN */ + if (special != special_includes) + goto done; + for (; k != stop; k += inc) { + if (isfp16nan(pv[k])) { + res = k; + break; + } + } + } else if (d == 0) { + // special case: includes also finds negative zero + const uint16_t *pv = p->u.array.u.fp16_ptr; + for (; k != stop; k += inc) { + if (isfp16zero(pv[k])) { + res = k; + break; + } + } + } else if (hf = tofp16(d), d == fromfp16(hf)) { + const uint16_t *pv = p->u.array.u.fp16_ptr; + for (; k != stop; k += inc) { + if (pv[k] == hf) { + res = k; + break; + } + } + } + break; case JS_CLASS_FLOAT32_ARRAY: if (is_bigint) break; @@ -52919,7 +54228,8 @@ static JSValue js_typed_array_subarray(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { JSValueConst args[4]; - JSValue arr, byteOffset, ta_buffer; + JSValue arr, ta_buffer; + JSTypedArray *ta; JSObject *p; int len, start, final, count, shift, offset; @@ -52930,18 +54240,17 @@ static JSValue js_typed_array_subarray(JSContext *ctx, JSValueConst this_val, if (JS_ToInt32Clamp(ctx, &start, argv[0], 0, len, len)) goto exception; + shift = typed_array_size_log2(p->class_id); + ta = p->u.typed_array; + /* Read byteOffset (ta->offset) even if detached */ + offset = ta->offset + (start << shift); + final = len; if (!JS_IsUndefined(argv[1])) { if (JS_ToInt32Clamp(ctx, &final, argv[1], 0, len, len)) goto exception; } count = max_int(final - start, 0); - byteOffset = js_typed_array_get_byteOffset(ctx, this_val, 0); - if (JS_IsException(byteOffset)) - goto exception; - shift = typed_array_size_log2(p->class_id); - offset = JS_VALUE_GET_INT(byteOffset) + (start << shift); - JS_FreeValue(ctx, byteOffset); ta_buffer = js_typed_array_get_buffer(ctx, this_val, 0); if (JS_IsException(ta_buffer)) goto exception; @@ -53010,6 +54319,11 @@ static int js_TA_cmp_uint64(const void *a, const void *b, void *opaque) { return (y < x) - (y > x); } +static int js_TA_cmp_float16(const void *a, const void *b, void *opaque) { + return js_cmp_doubles(fromfp16(*(const uint16_t *)a), + fromfp16(*(const uint16_t *)b)); +} + static int js_TA_cmp_float32(const void *a, const void *b, void *opaque) { return js_cmp_doubles(*(const float *)a, *(const float *)b); } @@ -53050,6 +54364,10 @@ static JSValue js_TA_get_uint64(JSContext *ctx, const void *a) { return JS_NewBigUint64(ctx, *(uint64_t *)a); } +static JSValue js_TA_get_float16(JSContext *ctx, const void *a) { + return __JS_NewFloat64(ctx, fromfp16(*(const uint16_t *)a)); +} + static JSValue js_TA_get_float32(JSContext *ctx, const void *a) { return __JS_NewFloat64(ctx, *(const float *)a); } @@ -53175,6 +54493,10 @@ static JSValue js_typed_array_sort(JSContext *ctx, JSValueConst this_val, tsc.getfun = js_TA_get_uint64; cmpfun = js_TA_cmp_uint64; break; + case JS_CLASS_FLOAT16_ARRAY: + tsc.getfun = js_TA_get_float16; + cmpfun = js_TA_cmp_float16; + break; case JS_CLASS_FLOAT32_ARRAY: tsc.getfun = js_TA_get_float32; cmpfun = js_TA_cmp_float32; @@ -53730,6 +55052,14 @@ static JSValue js_dataview_getValue(JSContext *ctx, return JS_NewBigUint64(ctx, v); } break; + case JS_CLASS_FLOAT16_ARRAY: + { + uint16_t v; + v = get_u16(ptr); + if (is_swap) + v = bswap16(v); + return __JS_NewFloat64(ctx, fromfp16(v)); + } case JS_CLASS_FLOAT32_ARRAY: { union { @@ -53791,7 +55121,9 @@ static JSValue js_dataview_setValue(JSContext *ctx, double d; if (JS_ToFloat64(ctx, &d, val)) return JS_EXCEPTION; - if (class_id == JS_CLASS_FLOAT32_ARRAY) { + if (class_id == JS_CLASS_FLOAT16_ARRAY) { + v = tofp16(d); + } else if (class_id == JS_CLASS_FLOAT32_ARRAY) { union { float f; uint32_t i; @@ -53820,6 +55152,7 @@ static JSValue js_dataview_setValue(JSContext *ctx, break; case JS_CLASS_INT16_ARRAY: case JS_CLASS_UINT16_ARRAY: + case JS_CLASS_FLOAT16_ARRAY: if (is_swap) v = bswap16(v); put_u16(ptr, v); @@ -53856,6 +55189,7 @@ static const JSCFunctionListEntry js_dataview_proto_funcs[] = { JS_CFUNC_MAGIC_DEF("getUint32", 1, js_dataview_getValue, JS_CLASS_UINT32_ARRAY ), JS_CFUNC_MAGIC_DEF("getBigInt64", 1, js_dataview_getValue, JS_CLASS_BIG_INT64_ARRAY ), JS_CFUNC_MAGIC_DEF("getBigUint64", 1, js_dataview_getValue, JS_CLASS_BIG_UINT64_ARRAY ), + JS_CFUNC_MAGIC_DEF("getFloat16", 1, js_dataview_getValue, JS_CLASS_FLOAT16_ARRAY ), JS_CFUNC_MAGIC_DEF("getFloat32", 1, js_dataview_getValue, JS_CLASS_FLOAT32_ARRAY ), JS_CFUNC_MAGIC_DEF("getFloat64", 1, js_dataview_getValue, JS_CLASS_FLOAT64_ARRAY ), JS_CFUNC_MAGIC_DEF("setInt8", 2, js_dataview_setValue, JS_CLASS_INT8_ARRAY ), @@ -53866,6 +55200,7 @@ static const JSCFunctionListEntry js_dataview_proto_funcs[] = { JS_CFUNC_MAGIC_DEF("setUint32", 2, js_dataview_setValue, JS_CLASS_UINT32_ARRAY ), JS_CFUNC_MAGIC_DEF("setBigInt64", 2, js_dataview_setValue, JS_CLASS_BIG_INT64_ARRAY ), JS_CFUNC_MAGIC_DEF("setBigUint64", 2, js_dataview_setValue, JS_CLASS_BIG_UINT64_ARRAY ), + JS_CFUNC_MAGIC_DEF("setFloat16", 2, js_dataview_setValue, JS_CLASS_FLOAT16_ARRAY ), JS_CFUNC_MAGIC_DEF("setFloat32", 2, js_dataview_setValue, JS_CLASS_FLOAT32_ARRAY ), JS_CFUNC_MAGIC_DEF("setFloat64", 2, js_dataview_setValue, JS_CLASS_FLOAT64_ARRAY ), JS_PROP_STRING_DEF("[Symbol.toStringTag]", "DataView", JS_PROP_CONFIGURABLE ), @@ -54499,7 +55834,7 @@ typedef struct JSFinRecEntry { typedef struct JSFinalizationRegistryData { JSWeakRefHeader weakref_header; struct list_head entries; /* list of JSFinRecEntry.link */ - JSContext *ctx; + JSContext *realm; JSValue cb; } JSFinalizationRegistryData; @@ -54516,6 +55851,7 @@ static void js_finrec_finalizer(JSRuntime *rt, JSValue val) js_free_rt(rt, fre); } JS_FreeValueRT(rt, frd->cb); + JS_FreeContext(frd->realm); list_del(&frd->weakref_header.link); js_free_rt(rt, frd); } @@ -54532,6 +55868,7 @@ static void js_finrec_mark(JSRuntime *rt, JSValueConst val, JS_MarkValue(rt, fre->held_val, mark_func); } JS_MarkValue(rt, frd->cb, mark_func); + mark_func(rt, &frd->realm->header); } } @@ -54557,7 +55894,7 @@ static void finrec_delete_weakref(JSRuntime *rt, JSWeakRefHeader *wh) JSValueConst args[2]; args[0] = frd->cb; args[1] = fre->held_val; - JS_EnqueueJob(frd->ctx, js_finrec_job, 2, args); + JS_EnqueueJob(frd->realm, js_finrec_job, 2, args); js_weakref_free(rt, fre->target); js_weakref_free(rt, fre->token); @@ -54592,7 +55929,7 @@ static JSValue js_finrec_constructor(JSContext *ctx, JSValueConst new_target, frd->weakref_header.weakref_type = JS_WEAKREF_TYPE_FINREC; list_add_tail(&frd->weakref_header.link, &ctx->rt->weakref_list); init_list_head(&frd->entries); - frd->ctx = ctx; /* XXX: JS_DupContext() ? */ + frd->realm = JS_DupContext(ctx); frd->cb = JS_DupValue(ctx, cb); JS_SetOpaque(obj, frd); return obj; diff --git a/quickjs.h b/quickjs.h index 8814222..c8cd149 100644 --- a/quickjs.h +++ b/quickjs.h @@ -456,7 +456,11 @@ void JS_FreeAtom(JSContext *ctx, JSAtom v); void JS_FreeAtomRT(JSRuntime *rt, JSAtom v); JSValue JS_AtomToValue(JSContext *ctx, JSAtom atom); JSValue JS_AtomToString(JSContext *ctx, JSAtom atom); -const char *JS_AtomToCString(JSContext *ctx, JSAtom atom); +const char *JS_AtomToCStringLen(JSContext *ctx, size_t *plen, JSAtom atom); +static inline const char *JS_AtomToCString(JSContext *ctx, JSAtom atom) +{ + return JS_AtomToCStringLen(ctx, NULL, atom); +} JSAtom JS_ValueToAtom(JSContext *ctx, JSValueConst val); /* object class support */ @@ -659,11 +663,10 @@ static inline JS_BOOL JS_IsObject(JSValueConst v) } JSValue JS_Throw(JSContext *ctx, JSValue obj); +void JS_SetUncatchableException(JSContext *ctx, JS_BOOL flag); JSValue JS_GetException(JSContext *ctx); JS_BOOL JS_HasException(JSContext *ctx); JS_BOOL JS_IsError(JSContext *ctx, JSValueConst val); -void JS_SetUncatchableError(JSContext *ctx, JSValueConst val, JS_BOOL flag); -void JS_ResetUncatchableError(JSContext *ctx); JSValue JS_NewError(JSContext *ctx); JSValue __js_printf_like(2, 3) JS_ThrowSyntaxError(JSContext *ctx, const char *fmt, ...); JSValue __js_printf_like(2, 3) JS_ThrowTypeError(JSContext *ctx, const char *fmt, ...); @@ -806,6 +809,8 @@ JSValue JS_GetPrototype(JSContext *ctx, JSValueConst val); int JS_GetOwnPropertyNames(JSContext *ctx, JSPropertyEnum **ptab, uint32_t *plen, JSValueConst obj, int flags); +void JS_FreePropertyEnum(JSContext *ctx, JSPropertyEnum *tab, + uint32_t len); int JS_GetOwnProperty(JSContext *ctx, JSPropertyDescriptor *desc, JSValueConst obj, JSAtom prop); @@ -872,6 +877,7 @@ typedef enum JSTypedArrayEnum { JS_TYPED_ARRAY_UINT32, JS_TYPED_ARRAY_BIG_INT64, JS_TYPED_ARRAY_BIG_UINT64, + JS_TYPED_ARRAY_FLOAT16, JS_TYPED_ARRAY_FLOAT32, JS_TYPED_ARRAY_FLOAT64, } JSTypedArrayEnum; @@ -930,12 +936,25 @@ typedef char *JSModuleNormalizeFunc(JSContext *ctx, const char *module_name, void *opaque); typedef JSModuleDef *JSModuleLoaderFunc(JSContext *ctx, const char *module_name, void *opaque); - +typedef JSModuleDef *JSModuleLoaderFunc2(JSContext *ctx, + const char *module_name, void *opaque, + JSValueConst attributes); +/* return -1 if exception, 0 if OK */ +typedef int JSModuleCheckSupportedImportAttributes(JSContext *ctx, void *opaque, + JSValueConst attributes); + /* module_normalize = NULL is allowed and invokes the default module filename normalizer */ void JS_SetModuleLoaderFunc(JSRuntime *rt, JSModuleNormalizeFunc *module_normalize, JSModuleLoaderFunc *module_loader, void *opaque); +/* same as JS_SetModuleLoaderFunc but with attributes. if + module_check_attrs = NULL, no attribute checking is done. */ +void JS_SetModuleLoaderFunc2(JSRuntime *rt, + JSModuleNormalizeFunc *module_normalize, + JSModuleLoaderFunc2 *module_loader, + JSModuleCheckSupportedImportAttributes *module_check_attrs, + void *opaque); /* return the import.meta object of a module */ JSValue JS_GetImportMeta(JSContext *ctx, JSModuleDef *m); JSAtom JS_GetModuleName(JSContext *ctx, JSModuleDef *m); @@ -1030,7 +1049,9 @@ static inline JSValue JS_NewCFunctionMagic(JSContext *ctx, JSCFunctionMagic *fun const char *name, int length, JSCFunctionEnum cproto, int magic) { - return JS_NewCFunction2(ctx, (JSCFunction *)func, name, length, cproto, magic); + /* Used to squelch a -Wcast-function-type warning. */ + JSCFunctionType ft = { .generic_magic = func }; + return JS_NewCFunction2(ctx, ft.generic, name, length, cproto, magic); } void JS_SetConstructor(JSContext *ctx, JSValueConst func_obj, JSValueConst proto); @@ -1094,9 +1115,9 @@ typedef struct JSCFunctionListEntry { #define JS_ALIAS_DEF(name, from) { name, JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE, JS_DEF_ALIAS, 0, .u = { .alias = { from, -1 } } } #define JS_ALIAS_BASE_DEF(name, from, base) { name, JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE, JS_DEF_ALIAS, 0, .u = { .alias = { from, base } } } -void JS_SetPropertyFunctionList(JSContext *ctx, JSValueConst obj, - const JSCFunctionListEntry *tab, - int len); +int JS_SetPropertyFunctionList(JSContext *ctx, JSValueConst obj, + const JSCFunctionListEntry *tab, + int len); /* C module definition */ @@ -1113,6 +1134,29 @@ int JS_SetModuleExport(JSContext *ctx, JSModuleDef *m, const char *export_name, JSValue val); int JS_SetModuleExportList(JSContext *ctx, JSModuleDef *m, const JSCFunctionListEntry *tab, int len); +/* associate a JSValue to a C module */ +int JS_SetModulePrivateValue(JSContext *ctx, JSModuleDef *m, JSValue val); +JSValue JS_GetModulePrivateValue(JSContext *ctx, JSModuleDef *m); + +/* debug value output */ + +typedef struct { + JS_BOOL show_hidden : 8; /* only show enumerable properties */ + JS_BOOL raw_dump : 8; /* avoid doing autoinit and avoid any malloc() call (for internal use) */ + uint32_t max_depth; /* recurse up to this depth, 0 = no limit */ + uint32_t max_string_length; /* print no more than this length for + strings, 0 = no limit */ + uint32_t max_item_count; /* print no more than this count for + arrays or objects, 0 = no limit */ +} JSPrintValueOptions; + +typedef void JSPrintValueWrite(void *opaque, const char *buf, size_t len); + +void JS_PrintValueSetDefaultOptions(JSPrintValueOptions *options); +void JS_PrintValueRT(JSRuntime *rt, JSPrintValueWrite *write_func, void *write_opaque, + JSValueConst val, const JSPrintValueOptions *options); +void JS_PrintValue(JSContext *ctx, JSPrintValueWrite *write_func, void *write_opaque, + JSValueConst val, const JSPrintValueOptions *options); #undef js_unlikely #undef js_force_inline diff --git a/release.sh b/release.sh index f66e928..6f4bdf5 100755 --- a/release.sh +++ b/release.sh @@ -174,7 +174,7 @@ cp Makefile VERSION TODO Changelog readme.txt LICENSE \ cp tests/*.js tests/*.patch tests/bjson.c $outdir/tests -cp examples/*.js examples/*.c $outdir/examples +cp examples/*.js examples/*.c examples/*.json $outdir/examples cp doc/quickjs.texi doc/quickjs.pdf doc/quickjs.html \ $outdir/doc diff --git a/repl.js b/repl.js index c65100d..06d4f26 100644 --- a/repl.js +++ b/repl.js @@ -875,126 +875,19 @@ import * as os from "os"; } var hex_mode = false; - var eval_mode = "std"; - function number_to_string(a, radix) { + function number_to_string_hex(a) { var s; - if (!isFinite(a)) { - /* NaN, Infinite */ - return a.toString(); + if (a < 0) { + a = -a; + s = "-"; } else { - if (a == 0) { - if (1 / a < 0) - s = "-0"; - else - s = "0"; - } else { - if (radix == 16 && a === Math.floor(a)) { - var s; - if (a < 0) { - a = -a; - s = "-"; - } else { - s = ""; - } - s += "0x" + a.toString(16); - } else { - s = a.toString(); - } - } - return s; + s = ""; } - } - - function bigint_to_string(a, radix) { - var s; - if (radix == 16) { - var s; - if (a < 0) { - a = -a; - s = "-"; - } else { - s = ""; - } - s += "0x" + a.toString(16); - } else { - s = a.toString(); - } - if (eval_mode === "std") - s += "n"; + s += "0x" + a.toString(16); return s; } - - function print(a) { - var stack = []; - - function print_rec(a) { - var n, i, keys, key, type, s; - - type = typeof(a); - if (type === "object") { - if (a === null) { - std.puts(a); - } else if (stack.indexOf(a) >= 0) { - std.puts("[circular]"); - } else if (a instanceof Date) { - std.puts("Date " + a.toGMTString().__quote()); - } else { - stack.push(a); - if (Array.isArray(a)) { - n = a.length; - std.puts("[ "); - for(i = 0; i < n; i++) { - if (i !== 0) - std.puts(", "); - if (i in a) { - print_rec(a[i]); - } else { - std.puts(""); - } - if (i > 20) { - std.puts("..."); - break; - } - } - std.puts(" ]"); - } else if (Object.__getClass(a) === "RegExp") { - std.puts(a.toString()); - } else { - keys = Object.keys(a); - n = keys.length; - std.puts("{ "); - for(i = 0; i < n; i++) { - if (i !== 0) - std.puts(", "); - key = keys[i]; - std.puts(key, ": "); - print_rec(a[key]); - } - std.puts(" }"); - } - stack.pop(a); - } - } else if (type === "string") { - s = a.__quote(); - if (s.length > 79) - s = s.substring(0, 75) + "...\""; - std.puts(s); - } else if (type === "number") { - std.puts(number_to_string(a, hex_mode ? 16 : 10)); - } else if (type === "bigint") { - std.puts(bigint_to_string(a, hex_mode ? 16 : 10)); - } else if (type === "symbol") { - std.puts(String(a)); - } else if (type === "function") { - std.puts("function " + a.name + "()"); - } else { - std.puts(a); - } - } - print_rec(a); - } - + function extract_directive(a) { var pos; if (a[0] !== '\\') @@ -1116,10 +1009,25 @@ import * as os from "os"; } function print_eval_result(result) { + var default_print = true; + result = result.value; eval_time = os.now() - eval_start_time; std.puts(colors[styles.result]); - print(result); + if (hex_mode) { + if (typeof result == "number" && + result === Math.floor(result)) { + std.puts(number_to_string_hex(result)); + default_print = false; + } else if (typeof result == "bigint") { + std.puts(number_to_string_hex(result)); + std.puts("n"); + default_print = false; + } + } + if (default_print) { + std.__printObject(result); + } std.puts("\n"); std.puts(colors.none); /* set the last result */ @@ -1130,15 +1038,10 @@ import * as os from "os"; function print_eval_error(error) { std.puts(colors[styles.error_msg]); - if (error instanceof Error) { - console.log(error); - if (error.stack) { - std.puts(error.stack); - } - } else { + if (!(error instanceof Error)) std.puts("Throw: "); - console.log(error); - } + std.__printObject(error); + std.puts("\n"); std.puts(colors.none); handle_cmd_end(); diff --git a/run-test262.c b/run-test262.c index 4397a1d..100ed13 100644 --- a/run-test262.c +++ b/run-test262.c @@ -78,6 +78,7 @@ char *harness_dir; char *harness_exclude; char *harness_features; char *harness_skip_features; +int *harness_skip_features_count; char *error_filename; char *error_file; FILE *error_out; @@ -372,26 +373,39 @@ static void enumerate_tests(const char *path) namelist_cmp_indirect); } +static void js_print_value_write(void *opaque, const char *buf, size_t len) +{ + FILE *fo = opaque; + fwrite(buf, 1, len, fo); +} + static JSValue js_print(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { int i; - const char *str; - + JSValueConst v; + if (outfile) { for (i = 0; i < argc; i++) { if (i != 0) fputc(' ', outfile); - str = JS_ToCString(ctx, argv[i]); - if (!str) - return JS_EXCEPTION; - if (!strcmp(str, "Test262:AsyncTestComplete")) { - async_done++; - } else if (strstart(str, "Test262:AsyncTestFailure", NULL)) { - async_done = 2; /* force an error */ + v = argv[i]; + if (JS_IsString(v)) { + const char *str; + size_t len; + str = JS_ToCStringLen(ctx, &len, v); + if (!str) + return JS_EXCEPTION; + if (!strcmp(str, "Test262:AsyncTestComplete")) { + async_done++; + } else if (strstart(str, "Test262:AsyncTestFailure", NULL)) { + async_done = 2; /* force an error */ + } + fwrite(str, 1, len, outfile); + JS_FreeCString(ctx, str); + } else { + JS_PrintValue(ctx, js_print_value_write, outfile, v, NULL); } - fputs(str, outfile); - JS_FreeCString(ctx, str); } fputc('\n', outfile); } @@ -483,8 +497,7 @@ static void *agent_start(void *arg) JS_FreeValue(ctx, ret_val); for(;;) { - JSContext *ctx1; - ret = JS_ExecutePendingJob(JS_GetRuntime(ctx), &ctx1); + ret = JS_ExecutePendingJob(JS_GetRuntime(ctx), NULL); if (ret < 0) { js_std_dump_error(ctx); break; @@ -823,13 +836,21 @@ static char *load_file(const char *filename, size_t *lenp) return buf; } +static int json_module_init_test(JSContext *ctx, JSModuleDef *m) +{ + JSValue val; + val = JS_GetModulePrivateValue(ctx, m); + JS_SetModuleExport(ctx, m, "default", val); + return 0; +} + static JSModuleDef *js_module_loader_test(JSContext *ctx, - const char *module_name, void *opaque) + const char *module_name, void *opaque, + JSValueConst attributes) { size_t buf_len; uint8_t *buf; JSModuleDef *m; - JSValue func_val; char *filename, *slash, path[1024]; // interpret import("bar.js") from path/to/foo.js as @@ -851,15 +872,33 @@ static JSModuleDef *js_module_loader_test(JSContext *ctx, return NULL; } - /* compile the module */ - func_val = JS_Eval(ctx, (char *)buf, buf_len, module_name, - JS_EVAL_TYPE_MODULE | JS_EVAL_FLAG_COMPILE_ONLY); - js_free(ctx, buf); - if (JS_IsException(func_val)) - return NULL; - /* the module is already referenced, so we must free it */ - m = JS_VALUE_GET_PTR(func_val); - JS_FreeValue(ctx, func_val); + if (js_module_test_json(ctx, attributes) == 1) { + /* compile as JSON */ + JSValue val; + val = JS_ParseJSON(ctx, (char *)buf, buf_len, module_name); + js_free(ctx, buf); + if (JS_IsException(val)) + return NULL; + m = JS_NewCModule(ctx, module_name, json_module_init_test); + if (!m) { + JS_FreeValue(ctx, val); + return NULL; + } + /* only export the "default" symbol which will contain the JSON object */ + JS_AddModuleExport(ctx, m, "default"); + JS_SetModulePrivateValue(ctx, m, val); + } else { + JSValue func_val; + /* compile the module */ + func_val = JS_Eval(ctx, (char *)buf, buf_len, module_name, + JS_EVAL_TYPE_MODULE | JS_EVAL_FLAG_COMPILE_ONLY); + js_free(ctx, buf); + if (JS_IsException(func_val)) + return NULL; + /* the module is already referenced, so we must free it */ + m = JS_VALUE_GET_PTR(func_val); + JS_FreeValue(ctx, func_val); + } return m; } @@ -1231,8 +1270,7 @@ static int eval_buf(JSContext *ctx, const char *buf, size_t buf_len, JS_FreeValue(ctx, res_val); } for(;;) { - JSContext *ctx1; - ret = JS_ExecutePendingJob(JS_GetRuntime(ctx), &ctx1); + ret = JS_ExecutePendingJob(JS_GetRuntime(ctx), NULL); if (ret < 0) { res_val = JS_EXCEPTION; break; @@ -1574,7 +1612,7 @@ int run_test_buf(const char *filename, const char *harness, namelist_t *ip, JS_SetCanBlock(rt, can_block); /* loader for ES6 modules */ - JS_SetModuleLoaderFunc(rt, NULL, js_module_loader_test, (void *)filename); + JS_SetModuleLoaderFunc2(rt, NULL, js_module_loader_test, NULL, (void *)filename); add_helpers(ctx); @@ -1699,10 +1737,13 @@ int run_test(const char *filename, int index) p = find_tag(desc, "features:", &state); if (p) { while ((option = get_option(&p, &state)) != NULL) { + char *p1; if (find_word(harness_features, option)) { /* feature is enabled */ - } else if (find_word(harness_skip_features, option)) { + } else if ((p1 = find_word(harness_skip_features, option)) != NULL) { /* skip disabled feature */ + if (harness_skip_features_count) + harness_skip_features_count[p1 - harness_skip_features]++; skip |= 1; } else { /* feature is not listed: skip and warn */ @@ -1875,7 +1916,7 @@ int run_test262_harness_test(const char *filename, BOOL is_module) JS_SetCanBlock(rt, can_block); /* loader for ES6 modules */ - JS_SetModuleLoaderFunc(rt, NULL, js_module_loader_test, (void *)filename); + JS_SetModuleLoaderFunc2(rt, NULL, js_module_loader_test, NULL, (void *)filename); add_helpers(ctx); @@ -1899,10 +1940,9 @@ int run_test262_harness_test(const char *filename, BOOL is_module) JS_FreeValue(ctx, res_val); } for(;;) { - JSContext *ctx1; - ret = JS_ExecutePendingJob(JS_GetRuntime(ctx), &ctx1); + ret = JS_ExecutePendingJob(JS_GetRuntime(ctx), NULL); if (ret < 0) { - js_std_dump_error(ctx1); + js_std_dump_error(ctx); ret_code = 1; } else if (ret == 0) { break; @@ -2036,6 +2076,7 @@ int main(int argc, char **argv) const char *ignore = ""; BOOL is_test262_harness = FALSE; BOOL is_module = FALSE; + BOOL count_skipped_features = FALSE; clock_t clocks; #if !defined(_WIN32) @@ -2103,6 +2144,8 @@ int main(int argc, char **argv) is_test262_harness = TRUE; } else if (str_equal(arg, "--module")) { is_module = TRUE; + } else if (str_equal(arg, "--count_skipped_features")) { + count_skipped_features = TRUE; } else { fatal(1, "unknown option: %s", arg); break; @@ -2137,6 +2180,14 @@ int main(int argc, char **argv) clocks = clock(); + if (count_skipped_features) { + /* not storage efficient but it is simple */ + size_t size; + size = sizeof(harness_skip_features_count[0]) * strlen(harness_skip_features); + harness_skip_features_count = malloc(size); + memset(harness_skip_features_count, 0, size); + } + if (is_dir_list) { if (optind < argc && !isdigit((unsigned char)argv[optind][0])) { filename = argv[optind++]; @@ -2187,6 +2238,30 @@ int main(int argc, char **argv) printf("\n"); } + if (count_skipped_features) { + size_t i, n, len = strlen(harness_skip_features); + BOOL disp = FALSE; + int c; + for(i = 0; i < len; i++) { + if (harness_skip_features_count[i] != 0) { + if (!disp) { + disp = TRUE; + printf("%-30s %7s\n", "SKIPPED FEATURE", "COUNT"); + } + for(n = 0; n < 30; n++) { + c = harness_skip_features[i + n]; + if (is_word_sep(c)) + break; + putchar(c); + } + for(; n < 30; n++) + putchar(' '); + printf(" %7d\n", harness_skip_features_count[i]); + } + } + printf("\n"); + } + if (is_dir_list) { fprintf(stderr, "Result: %d/%d error%s", test_failed, test_count, test_count != 1 ? "s" : ""); @@ -2216,6 +2291,8 @@ int main(int argc, char **argv) namelist_free(&exclude_list); namelist_free(&exclude_dir_list); free(harness_dir); + free(harness_skip_features); + free(harness_skip_features_count); free(harness_features); free(harness_exclude); free(error_file); diff --git a/test262.conf b/test262.conf index 1feddcf..c099f2f 100644 --- a/test262.conf +++ b/test262.conf @@ -103,12 +103,12 @@ destructuring-assignment destructuring-binding dynamic-import error-cause -Error.isError=skip +Error.isError explicit-resource-management=skip exponentiation export-star-as-namespace-from-module FinalizationRegistry -Float16Array=skip +Float16Array Float32Array Float64Array for-in-order @@ -116,9 +116,7 @@ for-of generators globalThis hashbang -host-gc-required=skip -import-assertions=skip -import-attributes=skip +import-attributes import-defer=skip import.meta Int16Array @@ -144,7 +142,7 @@ Intl.Segmenter=skip IsHTMLDDA iterator-helpers=skip iterator-sequencing=skip -json-modules=skip +json-modules json-parse-with-source=skip json-superset legacy-regexp=skip @@ -162,7 +160,7 @@ Object.is optional-catch-binding optional-chaining Promise -promise-try=skip +promise-try promise-with-resolvers Promise.allSettled Promise.any @@ -177,11 +175,11 @@ regexp-dotall regexp-duplicate-named-groups=skip regexp-lookbehind regexp-match-indices -regexp-modifiers=skip +regexp-modifiers regexp-named-groups regexp-unicode-property-escapes -regexp-v-flag=skip -RegExp.escape=skip +regexp-v-flag +RegExp.escape resizable-arraybuffer=skip rest-parameters Set @@ -230,6 +228,7 @@ Uint32Array Uint8Array uint8array-base64=skip Uint8ClampedArray +upsert=skip WeakMap WeakRef WeakSet @@ -250,32 +249,6 @@ test262/test/built-ins/ThrowTypeError/unique-per-realm-function-proto.js #test262/test/built-ins/RegExp/CharacterClassEscapes/ #test262/test/built-ins/RegExp/property-escapes/ -# feature regexp-v-flag is missing in the tests -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-negative-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-negative-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-positive-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-positive-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-negative-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-negative-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-positive-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-positive-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-negative-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-negative-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-positive-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-positive-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-negative-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-negative-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-positive-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-positive-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-negative-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-negative-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-positive-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-positive-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-negative-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-negative-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-positive-cases.js -test262/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-positive-cases.js - # not yet in official specification test262/test/built-ins/String/prototype/match/cstm-matcher-on-bigint-primitive.js test262/test/built-ins/String/prototype/match/cstm-matcher-on-bigint-primitive.js @@ -341,14 +314,6 @@ test262/test/staging/sm/Set/symmetric-difference.js test262/test/staging/sm/Set/union.js test262/test/staging/sm/extensions/censor-strict-caller.js test262/test/staging/sm/JSON/parse-with-source.js -test262/test/staging/sm/RegExp/flags.js -test262/test/staging/sm/RegExp/prototype.js - -# no f16 -test262/test/staging/sm/Math/f16round.js -test262/test/staging/sm/TypedArray/sort_small.js -test262/test/staging/sm/extensions/dataview.js -test262/test/staging/sm/TypedArray/toString.js # not standard test262/test/staging/sm/Function/builtin-no-construct.js @@ -357,9 +322,23 @@ test262/test/staging/sm/Function/function-toString-builtin-name.js test262/test/staging/sm/extensions/arguments-property-access-in-function.js test262/test/staging/sm/extensions/function-caller-skips-eval-frames.js test262/test/staging/sm/extensions/function-properties.js +test262/test/staging/sm/regress/regress-577648-1.js +test262/test/staging/sm/regress/regress-577648-2.js +test262/test/staging/sm/regress/regress-584355.js +test262/test/staging/sm/regress/regress-586482-1.js +test262/test/staging/sm/regress/regress-586482-2.js +test262/test/staging/sm/regress/regress-586482-3.js +test262/test/staging/sm/regress/regress-586482-4.js +test262/test/staging/sm/regress/regress-699682.js + # RegExp toSource not fully compliant test262/test/staging/sm/RegExp/toString.js test262/test/staging/sm/RegExp/source.js +test262/test/staging/sm/RegExp/escape.js +# source directives are not standard yet +test262/test/staging/sm/syntax/syntax-parsed-arrow-then-directive.js +# returning "bound fn" as initialName for a function is permitted by the spec +test262/test/staging/sm/Function/function-toString-builtin.js [tests] # list test files or use config.testdir diff --git a/test262_errors.txt b/test262_errors.txt index 206e03f..ffe19b9 100644 --- a/test262_errors.txt +++ b/test262_errors.txt @@ -1,72 +1,54 @@ -test262/test/language/module-code/top-level-await/module-graphs-does-not-hang.js:10: TypeError: $DONE() not called +test262/test/annexB/language/expressions/assignmenttargettype/callexpression-as-for-in-lhs.js:27: SyntaxError: invalid for in/of left hand-side +test262/test/annexB/language/expressions/assignmenttargettype/callexpression-as-for-of-lhs.js:27: SyntaxError: invalid for in/of left hand-side +test262/test/annexB/language/expressions/assignmenttargettype/callexpression-in-compound-assignment.js:33: SyntaxError: invalid assignment left-hand side +test262/test/annexB/language/expressions/assignmenttargettype/callexpression-in-postfix-update.js:27: SyntaxError: invalid increment/decrement operand +test262/test/annexB/language/expressions/assignmenttargettype/callexpression-in-prefix-update.js:27: SyntaxError: invalid increment/decrement operand +test262/test/annexB/language/expressions/assignmenttargettype/callexpression.js:33: SyntaxError: invalid assignment left-hand side +test262/test/annexB/language/expressions/assignmenttargettype/cover-callexpression-and-asyncarrowhead.js:20: SyntaxError: invalid assignment left-hand side +test262/test/built-ins/Atomics/notify/retrieve-length-before-index-coercion-non-shared-detached.js:34: TypeError: ArrayBuffer is detached +test262/test/built-ins/Atomics/notify/retrieve-length-before-index-coercion-non-shared-detached.js:34: strict mode: TypeError: ArrayBuffer is detached test262/test/staging/sm/Date/UTC-convert-all-arguments.js:75: Test262Error: index 1: expected 42, got Error: didn't throw Expected SameValue(«Error: didn't throw», «42») to be true test262/test/staging/sm/Date/constructor-convert-all-arguments.js:75: Test262Error: index undefined: expected 42, got Error: didn't throw Expected SameValue(«Error: didn't throw», «42») to be true test262/test/staging/sm/Date/non-iso.js:76: Test262Error: Expected SameValue(«NaN», «-40071559730000») to be true test262/test/staging/sm/Date/two-digit-years.js:76: Test262Error: Expected SameValue(«915177600000», «NaN») to be true test262/test/staging/sm/Function/arguments-parameter-shadowing.js:15: Test262Error: Expected SameValue(«true», «false») to be true test262/test/staging/sm/Function/constructor-binding.js:12: Test262Error: Expected SameValue(«"function"», «"undefined"») to be true -test262/test/staging/sm/Function/function-bind.js:14: Test262Error: Expected SameValue(«false», «true») to be true +test262/test/staging/sm/Function/function-bind.js:14: Test262Error: Conforms to NativeFunction Syntax: "function bound unbound() {\n [native code]\n}" test262/test/staging/sm/Function/function-name-for.js:12: Test262Error: Expected SameValue(«""», «"forInHead"») to be true -test262/test/staging/sm/Function/function-toString-builtin.js:14: Test262Error: Expected match to '/^\s*function\s*(get|set)?\s*(\w+|(?:'[^']*')|(?:"[^"]*")|\d+|(?:\[[^\]]+\]))?\s*\(\s*\)\s*\{\s*\[native code\]\s*\}\s*$/', Actual value 'function bound fn() { - [native code] -}' Expected SameValue(«null», «null») to be false test262/test/staging/sm/Function/implicit-this-in-parameter-expression.js:13: Test262Error: Expected SameValue(«[object Object]», «undefined») to be true test262/test/staging/sm/Function/invalid-parameter-list.js:35: Error: Assertion failed: expected exception SyntaxError, no exception thrown -test262/test/staging/sm/JSON/parse-number-syntax.js:39: Test262Error: parsing string <1.> threw a non-SyntaxError exception: Test262Error: string <1.> shouldn't have parsed as JSON Expected SameValue(«false», «true») to be true Expected SameValue(«true», «false») to be true -test262/test/staging/sm/JSON/parse-syntax-errors-02.js:51: Test262Error: parsing string <["Illegal backslash escape: \x15"]> threw a non-SyntaxError exception: Test262Error: string <["Illegal backslash escape: \x15"]> shouldn't have parsed as JSON Expected SameValue(«false», «true») to be true Expected SameValue(«true», «false») to be true -test262/test/staging/sm/Math/cbrt-approx.js:26: Error: got 1.39561242508609, expected a number near 1.3956124250860895 (relative error: 2) test262/test/staging/sm/RegExp/constructor-ordering-2.js:15: Test262Error: Expected SameValue(«false», «true») to be true -test262/test/staging/sm/RegExp/escape.js:13: Test262Error: Expected SameValue(«"\\\n"», «"\\n"») to be true -test262/test/staging/sm/RegExp/match-trace.js:13: Test262Error: Expected SameValue(«"get:flags,get:unicode,set:lastIndex,get:exec,call:exec,get:result[0],get:exec,call:exec,get:result[0],get:exec,call:exec,"», «"get:flags,set:lastIndex,get:exec,call:exec,get:result[0],get:exec,call:exec,get:result[0],get:exec,call:exec,"») to be true -test262/test/staging/sm/RegExp/regress-613820-1.js:13: Test262Error: Expected SameValue(«"aaa"», «"aa"») to be true -test262/test/staging/sm/RegExp/regress-613820-2.js:13: Test262Error: Expected SameValue(«"f"», «undefined») to be true -test262/test/staging/sm/RegExp/regress-613820-3.js:13: Test262Error: Expected SameValue(«"aab"», «"aa"») to be true -test262/test/staging/sm/RegExp/replace-trace.js:13: Test262Error: Expected SameValue(«"get:flags,get:unicode,set:lastIndex,get:exec,call:exec,get:result[0],get:exec,call:exec,get:result[length],get:result[0],get:result[index],get:result[groups],"», «"get:flags,set:lastIndex,get:exec,call:exec,get:result[0],get:exec,call:exec,get:result[length],get:result[0],get:result[index],get:result[groups],"») to be true -test262/test/staging/sm/RegExp/unicode-ignoreCase-escape.js:22: Test262Error: Actual argument shouldn't be nullish. -test262/test/staging/sm/RegExp/unicode-ignoreCase-word-boundary.js:13: Test262Error: Expected SameValue(«false», «true») to be true -test262/test/staging/sm/String/match-defines-match-elements.js:52: Test262Error: Expected SameValue(«true», «false») to be true +test262/test/staging/sm/RegExp/regress-613820-1.js:12: Test262Error: Actual [aaa, aa, a] and expected [aa, a, a] should have the same contents. +test262/test/staging/sm/RegExp/regress-613820-1.js:12: strict mode: Test262Error: Actual [aaa, aa, a] and expected [aa, a, a] should have the same contents. +test262/test/staging/sm/RegExp/regress-613820-2.js:12: Test262Error: Actual [foobar, f, o, o, b, a, r] and expected [foobar, undefined, undefined, undefined, b, a, r] should have the same contents. +test262/test/staging/sm/RegExp/regress-613820-2.js:12: strict mode: Test262Error: Actual [foobar, f, o, o, b, a, r] and expected [foobar, undefined, undefined, undefined, b, a, r] should have the same contents. +test262/test/staging/sm/RegExp/regress-613820-3.js:12: Test262Error: Actual [aab, a, undefined, ab] and expected [aa, undefined, a, undefined] should have the same contents. +test262/test/staging/sm/RegExp/regress-613820-3.js:12: strict mode: Test262Error: Actual [aab, a, undefined, ab] and expected [aa, undefined, a, undefined] should have the same contents. test262/test/staging/sm/TypedArray/constructor-buffer-sequence.js:73: Error: Assertion failed: expected exception ExpectedError, got Error: Poisoned Value test262/test/staging/sm/TypedArray/prototype-constructor-identity.js:17: Test262Error: Expected SameValue(«2», «6») to be true test262/test/staging/sm/TypedArray/set-detached-bigint.js:27: Error: Assertion failed: expected exception SyntaxError, got RangeError: invalid array length test262/test/staging/sm/TypedArray/set-detached.js:112: RangeError: invalid array length -test262/test/staging/sm/TypedArray/sort-negative-nan.js:102: TypeError: cannot read property 'name' of undefined test262/test/staging/sm/TypedArray/sort_modifications.js:12: Test262Error: Int8Array at index 0 for size 4 Expected SameValue(«0», «1») to be true -test262/test/staging/sm/TypedArray/subarray.js:15: Test262Error: Expected SameValue(«0», «1») to be true test262/test/staging/sm/async-functions/async-contains-unicode-escape.js:45: Error: Assertion failed: expected exception SyntaxError, no exception thrown test262/test/staging/sm/async-functions/await-error.js:12: Test262Error: Expected SameValue(«false», «true») to be true test262/test/staging/sm/async-functions/await-in-arrow-parameters.js:33: Error: Assertion failed: expected exception SyntaxError, no exception thrown - AsyncFunction:(a = (b = await/r/g) => {}) => {} test262/test/staging/sm/class/boundFunctionSubclassing.js:12: Test262Error: Expected SameValue(«false», «true») to be true test262/test/staging/sm/class/compPropNames.js:26: Error: Expected syntax error: ({[1, 2]: 3}) -test262/test/staging/sm/class/methDefn.js:26: Error: Expected syntax error: b = {a() => 0} test262/test/staging/sm/class/strictExecution.js:32: Error: Assertion failed: expected exception TypeError, no exception thrown test262/test/staging/sm/class/superPropOrdering.js:83: Error: Assertion failed: expected exception TypeError, no exception thrown -test262/test/staging/sm/expressions/optional-chain.js:25: Error: Assertion failed: expected exception SyntaxError, no exception thrown test262/test/staging/sm/expressions/short-circuit-compound-assignment-const.js:97: TypeError: 'a' is read-only test262/test/staging/sm/expressions/short-circuit-compound-assignment-tdz.js:23: Error: Assertion failed: expected exception ReferenceError, got TypeError: 'a' is read-only test262/test/staging/sm/extensions/TypedArray-set-object-funky-length-detaches.js:55: RangeError: invalid array length -test262/test/staging/sm/extensions/regress-469625-01.js:16: Test262Error: TM: Array prototype and expression closures Expected SameValue(«"TypeError: [].__proto__ is not a function"», «"TypeError: not a function"») to be true test262/test/staging/sm/generators/syntax.js:30: Error: Assertion failed: expected SyntaxError, but no exception thrown - function* g() { (function* yield() {}); } test262/test/staging/sm/lexical-environment/block-scoped-functions-annex-b-arguments.js:14: Test262Error: Expected SameValue(«"object"», «"function"») to be true test262/test/staging/sm/lexical-environment/block-scoped-functions-annex-b-eval.js:12: Test262Error: Expected SameValue(«"outer-gouter-geval-gtruefalseq"», «"outer-geval-gwith-gtruefalseq"») to be true test262/test/staging/sm/lexical-environment/block-scoped-functions-annex-b-if.js:20: TypeError: not a function test262/test/staging/sm/lexical-environment/block-scoped-functions-annex-b-notapplicable.js:15: Test262Error: Expected SameValue(«function x() {2}», «function x() {1}») to be true test262/test/staging/sm/lexical-environment/block-scoped-functions-deprecated-redecl.js:23: Test262Error: Expected SameValue(«3», «4») to be true -test262/test/staging/sm/lexical-environment/unscopables-proto.js:15: Test262Error: Expected SameValue(«true», «false») to be true test262/test/staging/sm/lexical-environment/var-in-catch-body-annex-b-eval.js:17: Test262Error: Expected SameValue(«"g"», «"global-x"») to be true -test262/test/staging/sm/module/module-export-name-star.js:15: SyntaxError: identifier expected test262/test/staging/sm/object/defineProperties-order.js:14: Test262Error: Expected SameValue(«"ownKeys,getOwnPropertyDescriptor,getOwnPropertyDescriptor,get,get"», «"ownKeys,getOwnPropertyDescriptor,get,getOwnPropertyDescriptor,get"») to be true -test262/test/staging/sm/object/defineProperty-proxy.js:32: Test262Error: Expected ["has configurable", "get configurable", "has writable", "get writable", "has enumerable", "get enumerable", "has value", "get value", "has get", "has set"] to be structurally equal to ["has enumerable", "get enumerable", "has configurable", "get configurable", "has value", "get value", "has writable", "get writable", "has get", "has set"]. -test262/test/staging/sm/regress/regress-577648-1.js:21: Test262Error: 1 Expected SameValue(«true», «false») to be true -test262/test/staging/sm/regress/regress-577648-2.js:14: Test262Error: Expected SameValue(«true», «false») to be true -test262/test/staging/sm/regress/regress-584355.js:12: Test262Error: Expected SameValue(«"function f () { ff (); }"», «"undefined"») to be true -test262/test/staging/sm/regress/regress-586482-1.js:19: Test262Error: ok Expected SameValue(«true», «false») to be true -test262/test/staging/sm/regress/regress-586482-2.js:19: Test262Error: ok Expected SameValue(«true», «false») to be true -test262/test/staging/sm/regress/regress-586482-3.js:18: Test262Error: ok Expected SameValue(«true», «false») to be true -test262/test/staging/sm/regress/regress-586482-4.js:14: Test262Error: ok Expected SameValue(«function() { this.f(); }», «undefined») to be true test262/test/staging/sm/regress/regress-602621.js:14: Test262Error: function sub-statement must override arguments Expected SameValue(«"function"», «"object"») to be true -test262/test/staging/sm/regress/regress-699682.js:15: Test262Error: Expected SameValue(«false», «true») to be true test262/test/staging/sm/regress/regress-1383630.js:30: Error: Assertion failed: expected exception TypeError, no exception thrown test262/test/staging/sm/statements/arrow-function-in-for-statement-head.js:15: Test262Error: expected syntax error, got Error: didn't throw Expected SameValue(«false», «true») to be true test262/test/staging/sm/statements/regress-642975.js:14: Test262Error: Expected SameValue(«undefined», «"y"») to be true test262/test/staging/sm/statements/try-completion.js:17: Test262Error: Expected SameValue(«"try"», «undefined») to be true -test262/test/staging/sm/syntax/syntax-parsed-arrow-then-directive.js:77: Test262Error: stack should contain 'http://example.com/foo.js': block, semi Expected SameValue(«false», «true») to be true diff --git a/tests/assert.js b/tests/assert.js index c8240c8..42369ed 100644 --- a/tests/assert.js +++ b/tests/assert.js @@ -3,14 +3,8 @@ export function assert(actual, expected, message) { expected = true; if (typeof actual === typeof expected) { - if (actual === expected) { - if (actual !== 0 || (1 / actual) === (1 / expected)) - return; - } - if (typeof actual === 'number') { - if (isNaN(actual) && isNaN(expected)) - return; - } + if (Object.is(actual, expected)) + return; if (typeof actual === 'object') { if (actual !== null && expected !== null && actual.constructor === expected.constructor diff --git a/tests/test262.patch b/tests/test262.patch index b6f4aa5..6956d15 100644 --- a/tests/test262.patch +++ b/tests/test262.patch @@ -71,10 +71,10 @@ index b397be0..c197ddc 100644 return result; } diff --git a/harness/sm/non262.js b/harness/sm/non262.js -index c1829e3..3a3ee27 100644 +index 89df923..79ded15 100644 --- a/harness/sm/non262.js +++ b/harness/sm/non262.js -@@ -41,8 +41,6 @@ globalThis.createNewGlobal = function() { +@@ -34,8 +34,6 @@ globalThis.createNewGlobal = function() { return $262.createRealm().global } @@ -83,13 +83,43 @@ index c1829e3..3a3ee27 100644 function assertEq(...args) { assert.sameValue(...args) } -@@ -71,4 +69,4 @@ if (globalThis.createExternalArrayBuffer === undefined) { - if (globalThis.enableGeckoProfilingWithSlowAssertions === undefined) { - globalThis.enableGeckoProfilingWithSlowAssertions = globalThis.enableGeckoProfiling = - globalThis.disableGeckoProfiling = () => {} --} -\ No newline at end of file -+} +diff --git a/test/staging/sm/extensions/regress-469625-01.js b/test/staging/sm/extensions/regress-469625-01.js +index 81f84fc..4652002 100644 +--- a/test/staging/sm/extensions/regress-469625-01.js ++++ b/test/staging/sm/extensions/regress-469625-01.js +@@ -14,8 +14,7 @@ esid: pending + //----------------------------------------------------------------------------- + var BUGNUMBER = 469625; + var summary = 'TM: Array prototype and expression closures'; +-var actual = ''; +-var expect = ''; ++var actual = null; + + + //----------------------------------------------------------------------------- +@@ -24,9 +23,6 @@ test(); + + function test() + { +- expect = 'TypeError: [].__proto__ is not a function'; +- +- + Array.prototype.__proto__ = function () { return 3; }; + + try +@@ -35,8 +31,10 @@ function test() + } + catch(ex) + { +- print(actual = ex + ''); ++ print(ex + ''); ++ actual = ex; + } + +- assert.sameValue(expect, actual, summary); ++ assert.sameValue(actual instanceof TypeError, true); ++ assert.sameValue(actual.message.includes("not a function"), true); + } diff --git a/test/staging/sm/misc/new-with-non-constructor.js b/test/staging/sm/misc/new-with-non-constructor.js index 18c2f0c..f9aa209 100644 --- a/test/staging/sm/misc/new-with-non-constructor.js diff --git a/tests/test_bjson.js b/tests/test_bjson.js index a270796..8794b22 100644 --- a/tests/test_bjson.js +++ b/tests/test_bjson.js @@ -42,6 +42,7 @@ function isArrayLike(a) (a instanceof Int8Array) || (a instanceof Int16Array) || (a instanceof Int32Array) || + (a instanceof Float16Array) || (a instanceof Float32Array) || (a instanceof Float64Array); } @@ -85,7 +86,7 @@ function toStr(a) case "undefined": return "undefined"; case "string": - return a.__quote(); + return JSON.stringify(a); case "number": if (a == 0 && 1 / a < 0) return "-0"; @@ -157,6 +158,7 @@ function bjson_test_all() bjson_test([new Date(1234), new String("abc"), new Number(-12.1), new Boolean(true)]); bjson_test(new Int32Array([123123, 222111, -32222])); + bjson_test(new Float16Array([1024, 1024.5])); bjson_test(new Float64Array([123123, 222111.5])); /* tested with a circular reference */ diff --git a/tests/test_builtin.js b/tests/test_builtin.js index 423841d..14a883c 100644 --- a/tests/test_builtin.js +++ b/tests/test_builtin.js @@ -489,6 +489,9 @@ function test_typed_array() a = new Uint16Array(buffer, 2); a[0] = -1; + a = new Float16Array(buffer, 8, 1); + a[0] = 1; + a = new Float32Array(buffer, 8, 1); a[0] = 1; @@ -593,7 +596,7 @@ function test_json() ] ]`); - assert_json_error('\n" @\\x"'); + assert_json_error('\n" \\@x"'); assert_json_error('\n{ "a": @x }"'); } @@ -748,6 +751,51 @@ function test_regexp() assert(a, ["123a23", "3"]); a = /()*?a/.exec(","); assert(a, null); + + /* test \b escape */ + assert(/[\q{a\b}]/.test("a\b"), true); + assert(/[\b]/.test("\b"), true); + + /* test case insensitive matching (test262 hardly tests it) */ + assert("aAbBcC#4".replace(/\p{Lower}/gu,"X"), "XAXBXC#4"); + + assert("aAbBcC#4".replace(/\p{Lower}/gui,"X"), "XXXXXX#4"); + assert("aAbBcC#4".replace(/\p{Upper}/gui,"X"), "XXXXXX#4"); + assert("aAbBcC#4".replace(/\P{Lower}/gui,"X"), "XXXXXXXX"); + assert("aAbBcC#4".replace(/\P{Upper}/gui,"X"), "XXXXXXXX"); + assert("aAbBcC".replace(/[^b]/gui, "X"), "XXbBXX"); + assert("aAbBcC".replace(/[^A-B]/gui, "X"), "aAbBXX"); + + assert("aAbBcC#4".replace(/\p{Lower}/gvi,"X"), "XXXXXX#4"); + assert("aAbBcC#4".replace(/\P{Lower}/gvi,"X"), "aAbBcCXX"); + assert("aAbBcC#4".replace(/[^\P{Lower}]/gvi,"X"), "XXXXXX#4"); + assert("aAbBcC#4".replace(/\P{Upper}/gvi,"X"), "aAbBcCXX"); + assert("aAbBcC".replace(/[^b]/gvi, "X"), "XXbBXX"); + assert("aAbBcC".replace(/[^A-B]/gvi, "X"), "aAbBXX"); + assert("aAbBcC".replace(/[[a-c]&&B]/gvi, "X"), "aAXXcC"); + assert("aAbBcC".replace(/[[a-c]--B]/gvi, "X"), "XXbBXX"); + + assert("abcAbC".replace(/[\q{AbC}]/gvi,"X"), "XX"); + /* Note: SpiderMonkey and v8 may not be correct */ + assert("abcAbC".replace(/[\q{BC|A}]/gvi,"X"), "XXXX"); + assert("abcAbC".replace(/[\q{BC|A}--a]/gvi,"X"), "aXAX"); + + /* case where lastIndex points to the second element of a + surrogate pair */ + a = /(?:)/gu; + a.lastIndex = 1; + a.exec("🐱"); + assert(a.lastIndex, 0); + + a.lastIndex = 1; + a.exec("a\udc00"); + assert(a.lastIndex, 1); + + a = /\u{10000}/vgd; + a.lastIndex = 1; + a = a.exec("\u{10000}_\u{10000}"); + assert(a.indices[0][0], 0); + assert(a.indices[0][1], 2); } function test_symbol() diff --git a/tests/test_language.js b/tests/test_language.js index cda782b..4fa16c8 100644 --- a/tests/test_language.js +++ b/tests/test_language.js @@ -2,7 +2,7 @@ function assert(actual, expected, message) { if (arguments.length == 1) expected = true; - if (actual === expected) + if (Object.is(actual, expected)) return; if (actual !== null && expected !== null diff --git a/tests/test_std.js b/tests/test_std.js index c844869..3debe40 100644 --- a/tests/test_std.js +++ b/tests/test_std.js @@ -6,7 +6,7 @@ function assert(actual, expected, message) { if (arguments.length == 1) expected = true; - if (actual === expected) + if (Object.is(actual, expected)) return; if (actual !== null && expected !== null @@ -129,15 +129,27 @@ function test_popen() function test_ext_json() { var expected, input, obj; - expected = '{"x":false,"y":true,"z2":null,"a":[1,8,160],"s":"str"}'; + expected = '{"x":false,"y":true,"z2":null,"a":[1,8,160],"b":"abc\\u000bd","s":"str"}'; input = `{ "x":false, /*comments are allowed */ "y":true, // also a comment z2:null, // unquoted property names "a":[+1,0o10,0xa0,], // plus prefix, octal, hexadecimal - "s":"str",} // trailing comma in objects and arrays + "b": "ab\ +c\\vd", // multi-line strings, '\v' escape + "s":'str',} // trailing comma in objects and arrays, single quoted string `; obj = std.parseExtJSON(input); assert(JSON.stringify(obj), expected); + + obj = std.parseExtJSON('[Infinity, +Infinity, -Infinity, NaN, +NaN, -NaN, .1, -.2]'); + assert(obj[0], Infinity); + assert(obj[1], Infinity); + assert(obj[2], -Infinity); + assert(obj[3], NaN); + assert(obj[4], NaN); + assert(obj[5], NaN); + assert(obj[6], 0.1); + assert(obj[7], -0.2); } function test_os() @@ -294,6 +306,22 @@ function test_async_gc() })(); } +/* check that the promise async rejection handler is not invoked when + the rejection is handled not too late after the promise + rejection. */ +function test_async_promise_rejection() +{ + var counter = 0; + var p1, p2, p3; + p1 = Promise.reject(); + p2 = Promise.reject(); + p3 = Promise.resolve(); + p1.catch(() => counter++); + p2.catch(() => counter++); + p3.then(() => counter++) + os.setTimeout(() => { assert(counter, 3) }, 10); +} + test_printf(); test_file1(); test_file2(); @@ -304,4 +332,5 @@ test_os_exec(); test_timer(); test_ext_json(); test_async_gc(); +test_async_promise_rejection(); diff --git a/unicode_download.sh b/unicode_download.sh index e259891..ef8b30d 100755 --- a/unicode_download.sh +++ b/unicode_download.sh @@ -1,8 +1,9 @@ #!/bin/sh set -e -url="ftp://ftp.unicode.org/Public/16.0.0/ucd" -emoji_url="${url}/emoji/emoji-data.txt" +version="16.0.0" +emoji_version="16.0" +url="ftp://ftp.unicode.org/Public" files="CaseFolding.txt DerivedNormalizationProps.txt PropList.txt \ SpecialCasing.txt CompositionExclusions.txt ScriptExtensions.txt \ @@ -12,8 +13,11 @@ PropertyValueAliases.txt" mkdir -p unicode for f in $files; do - g="${url}/${f}" + g="${url}/${version}/ucd/${f}" wget $g -O unicode/$f done -wget $emoji_url -O unicode/emoji-data.txt +wget "${url}/${version}/ucd/emoji/emoji-data.txt" -O unicode/emoji-data.txt + +wget "${url}/emoji/${emoji_version}/emoji-sequences.txt" -O unicode/emoji-sequences.txt +wget "${url}/emoji/${emoji_version}/emoji-zwj-sequences.txt" -O unicode/emoji-zwj-sequences.txt diff --git a/unicode_gen.c b/unicode_gen.c index 0f11ef8..c793ba1 100644 --- a/unicode_gen.c +++ b/unicode_gen.c @@ -156,6 +156,153 @@ char *get_line(char *buf, int buf_size, FILE *f) return buf; } +typedef struct REString { + struct REString *next; + uint32_t hash; + uint32_t len; + uint32_t flags; + uint32_t buf[]; +} REString; + +typedef struct { + uint32_t n_strings; + uint32_t hash_size; + int hash_bits; + REString **hash_table; +} REStringList; + +static uint32_t re_string_hash(int len, const uint32_t *buf) +{ + int i; + uint32_t h; + h = 1; + for(i = 0; i < len; i++) + h = h * 263 + buf[i]; + return h * 0x61C88647; +} + +static void re_string_list_init(REStringList *s) +{ + s->n_strings = 0; + s->hash_size = 0; + s->hash_bits = 0; + s->hash_table = NULL; +} + +static __maybe_unused void re_string_list_free(REStringList *s) +{ + REString *p, *p_next; + int i; + for(i = 0; i < s->hash_size; i++) { + for(p = s->hash_table[i]; p != NULL; p = p_next) { + p_next = p->next; + free(p); + } + } + free(s->hash_table); +} + +static void lre_print_char(int c, BOOL is_range) +{ + if (c == '\'' || c == '\\' || + (is_range && (c == '-' || c == ']'))) { + printf("\\%c", c); + } else if (c >= ' ' && c <= 126) { + printf("%c", c); + } else { + printf("\\u{%04x}", c); + } +} + +static __maybe_unused void re_string_list_dump(const char *str, const REStringList *s) +{ + REString *p; + int i, j, k; + + printf("%s:\n", str); + + j = 0; + for(i = 0; i < s->hash_size; i++) { + for(p = s->hash_table[i]; p != NULL; p = p->next) { + printf(" %d/%d: '", j, s->n_strings); + for(k = 0; k < p->len; k++) { + lre_print_char(p->buf[k], FALSE); + } + printf("'\n"); + j++; + } + } +} + +static REString *re_string_find2(REStringList *s, int len, const uint32_t *buf, + uint32_t h0, BOOL add_flag) +{ + uint32_t h = 0; /* avoid warning */ + REString *p; + if (s->n_strings != 0) { + h = h0 >> (32 - s->hash_bits); + for(p = s->hash_table[h]; p != NULL; p = p->next) { + if (p->hash == h0 && p->len == len && + !memcmp(p->buf, buf, len * sizeof(buf[0]))) { + return p; + } + } + } + /* not found */ + if (!add_flag) + return NULL; + /* increase the size of the hash table if needed */ + if (unlikely((s->n_strings + 1) > s->hash_size)) { + REString **new_hash_table, *p_next; + int new_hash_bits, i; + uint32_t new_hash_size; + new_hash_bits = max_int(s->hash_bits + 1, 4); + new_hash_size = 1 << new_hash_bits; + new_hash_table = malloc(sizeof(new_hash_table[0]) * new_hash_size); + if (!new_hash_table) + return NULL; + memset(new_hash_table, 0, sizeof(new_hash_table[0]) * new_hash_size); + for(i = 0; i < s->hash_size; i++) { + for(p = s->hash_table[i]; p != NULL; p = p_next) { + p_next = p->next; + h = p->hash >> (32 - new_hash_bits); + p->next = new_hash_table[h]; + new_hash_table[h] = p; + } + } + free(s->hash_table); + s->hash_bits = new_hash_bits; + s->hash_size = new_hash_size; + s->hash_table = new_hash_table; + h = h0 >> (32 - s->hash_bits); + } + + p = malloc(sizeof(REString) + len * sizeof(buf[0])); + if (!p) + return NULL; + p->next = s->hash_table[h]; + s->hash_table[h] = p; + s->n_strings++; + p->hash = h0; + p->len = len; + p->flags = 0; + memcpy(p->buf, buf, sizeof(buf[0]) * len); + return p; +} + +static REString *re_string_find(REStringList *s, int len, const uint32_t *buf, + BOOL add_flag) +{ + uint32_t h0; + h0 = re_string_hash(len, buf); + return re_string_find2(s, len, buf, h0, add_flag); +} + +static void re_string_add(REStringList *s, int len, const uint32_t *buf) +{ + re_string_find(s, len, buf, TRUE); +} + #define UNICODE_GENERAL_CATEGORY typedef enum { @@ -225,6 +372,23 @@ static const char *unicode_prop_short_name[] = { #undef UNICODE_PROP_LIST +#define UNICODE_SEQUENCE_PROP_LIST + +typedef enum { +#define DEF(id) SEQUENCE_PROP_ ## id, +#include "unicode_gen_def.h" +#undef DEF + SEQUENCE_PROP_COUNT, +} UnicodeSequencePropEnum1; + +static const char *unicode_sequence_prop_name[] = { +#define DEF(id) #id, +#include "unicode_gen_def.h" +#undef DEF +}; + +#undef UNICODE_SEQUENCE_PROP_LIST + typedef struct { /* case conv */ uint8_t u_len; @@ -247,7 +411,15 @@ typedef struct { int *decomp_data; } CCInfo; +typedef struct { + int count; + int size; + int *tab; +} UnicodeSequenceProperties; + CCInfo *unicode_db; +REStringList rgi_emoji_zwj_sequence; +DynBuf rgi_emoji_tag_sequence; int find_name(const char **tab, int tab_len, const char *name) { @@ -751,6 +923,147 @@ void parse_prop_list(const char *filename) fclose(f); } +#define SEQ_MAX_LEN 16 + +static BOOL is_emoji_modifier(uint32_t c) +{ + return (c >= 0x1f3fb && c <= 0x1f3ff); +} + +static void add_sequence_prop(int idx, int seq_len, int *seq) +{ + int i; + + assert(idx < SEQUENCE_PROP_COUNT); + switch(idx) { + case SEQUENCE_PROP_Basic_Emoji: + /* convert to 2 properties lists */ + if (seq_len == 1) { + set_prop(seq[0], PROP_Basic_Emoji1, 1); + } else if (seq_len == 2 && seq[1] == 0xfe0f) { + set_prop(seq[0], PROP_Basic_Emoji2, 1); + } else { + abort(); + } + break; + case SEQUENCE_PROP_RGI_Emoji_Modifier_Sequence: + assert(seq_len == 2); + assert(is_emoji_modifier(seq[1])); + assert(get_prop(seq[0], PROP_Emoji_Modifier_Base)); + set_prop(seq[0], PROP_RGI_Emoji_Modifier_Sequence, 1); + break; + case SEQUENCE_PROP_RGI_Emoji_Flag_Sequence: + { + int code; + assert(seq_len == 2); + assert(seq[0] >= 0x1F1E6 && seq[0] <= 0x1F1FF); + assert(seq[1] >= 0x1F1E6 && seq[1] <= 0x1F1FF); + code = (seq[0] - 0x1F1E6) * 26 + (seq[1] - 0x1F1E6); + /* XXX: would be more compact with a simple bitmap -> 676 bits */ + set_prop(code, PROP_RGI_Emoji_Flag_Sequence, 1); + } + break; + case SEQUENCE_PROP_RGI_Emoji_ZWJ_Sequence: + re_string_add(&rgi_emoji_zwj_sequence, seq_len, (uint32_t *)seq); + break; + case SEQUENCE_PROP_RGI_Emoji_Tag_Sequence: + { + assert(seq_len >= 3); + assert(seq[0] == 0x1F3F4); + assert(seq[seq_len - 1] == 0xE007F); + for(i = 1; i < seq_len - 1; i++) { + assert(seq[i] >= 0xe0001 && seq[i] <= 0xe007e); + dbuf_putc(&rgi_emoji_tag_sequence, seq[i] - 0xe0000); + } + dbuf_putc(&rgi_emoji_tag_sequence, 0); + } + break; + case SEQUENCE_PROP_Emoji_Keycap_Sequence: + assert(seq_len == 3); + assert(seq[1] == 0xfe0f); + assert(seq[2] == 0x20e3); + set_prop(seq[0], PROP_Emoji_Keycap_Sequence, 1); + break; + default: + assert(0); + } +} + +void parse_sequence_prop_list(const char *filename) +{ + FILE *f; + char line[4096], *p, buf[256], *q, *p_start; + uint32_t c0, c1, c; + int idx, seq_len; + int seq[SEQ_MAX_LEN]; + + f = fopen(filename, "rb"); + if (!f) { + perror(filename); + exit(1); + } + + for(;;) { + if (!get_line(line, sizeof(line), f)) + break; + p = line; + while (isspace(*p)) + p++; + if (*p == '#' || *p == '@' || *p == '\0') + continue; + p_start = p; + + /* find the sequence property name */ + p = strchr(p, ';'); + if (!p) + continue; + p++; + p += strspn(p, " \t"); + q = buf; + while (*p != '\0' && *p != ' ' && *p != '#' && *p != '\t' && *p != ';') { + if ((q - buf) < sizeof(buf) - 1) + *q++ = *p; + p++; + } + *q = '\0'; + idx = find_name(unicode_sequence_prop_name, + countof(unicode_sequence_prop_name), buf); + if (idx < 0) { + fprintf(stderr, "Property not found: %s\n", buf); + exit(1); + } + + p = p_start; + c0 = strtoul(p, (char **)&p, 16); + assert(c0 <= CHARCODE_MAX); + + if (*p == '.' && p[1] == '.') { + p += 2; + c1 = strtoul(p, (char **)&p, 16); + assert(c1 <= CHARCODE_MAX); + for(c = c0; c <= c1; c++) { + seq[0] = c; + add_sequence_prop(idx, 1, seq); + } + } else { + seq_len = 0; + seq[seq_len++] = c0; + for(;;) { + while (isspace(*p)) + p++; + if (*p == ';' || *p == '\0') + break; + c0 = strtoul(p, (char **)&p, 16); + assert(c0 <= CHARCODE_MAX); + assert(seq_len < countof(seq)); + seq[seq_len++] = c0; + } + add_sequence_prop(idx, seq_len, seq); + } + } + fclose(f); +} + void parse_scripts(const char *filename) { FILE *f; @@ -1654,7 +1967,7 @@ void dump_name_table(FILE *f, const char *cname, const char **tab_name, int len, maxw = 0; for(i = 0; i < len; i++) { w = strlen(tab_name[i]); - if (tab_short_name[i][0] != '\0') { + if (tab_short_name && tab_short_name[i][0] != '\0') { w += 1 + strlen(tab_short_name[i]); } if (maxw < w) @@ -1666,7 +1979,7 @@ void dump_name_table(FILE *f, const char *cname, const char **tab_name, int len, for(i = 0; i < len; i++) { fprintf(f, " \""); w = fprintf(f, "%s", tab_name[i]); - if (tab_short_name[i][0] != '\0') { + if (tab_short_name && tab_short_name[i][0] != '\0') { w += fprintf(f, ",%s", tab_short_name[i]); } fprintf(f, "\"%*s\"\\0\"\n", 1 + maxw - w, ""); @@ -1774,10 +2087,9 @@ void build_script_table(FILE *f) fprintf(f, " UNICODE_SCRIPT_COUNT,\n"); fprintf(f, "} UnicodeScriptEnum;\n\n"); - i = 1; dump_name_table(f, "unicode_script_name_table", - unicode_script_name + i, SCRIPT_COUNT - i, - unicode_script_short_name + i); + unicode_script_name, SCRIPT_COUNT, + unicode_script_short_name); dbuf_init(dbuf); #ifdef DUMP_TABLE_SIZE @@ -1930,6 +2242,218 @@ void build_prop_list_table(FILE *f) fprintf(f, "};\n\n"); } +static BOOL is_emoji_hair_color(uint32_t c) +{ + return (c >= 0x1F9B0 && c <= 0x1F9B3); +} + +#define EMOJI_MOD_NONE 0 +#define EMOJI_MOD_TYPE1 1 +#define EMOJI_MOD_TYPE2 2 +#define EMOJI_MOD_TYPE2D 3 + +static BOOL mark_zwj_string(REStringList *sl, uint32_t *buf, int len, int mod_type, int *mod_pos, + int hc_pos, BOOL mark_flag) +{ + REString *p; + int i, n_mod, i0, i1, hc_count, j; + +#if 0 + if (mark_flag) + printf("mod_type=%d\n", mod_type); +#endif + + switch(mod_type) { + case EMOJI_MOD_NONE: + n_mod = 1; + break; + case EMOJI_MOD_TYPE1: + n_mod = 5; + break; + case EMOJI_MOD_TYPE2: + n_mod = 25; + break; + case EMOJI_MOD_TYPE2D: + n_mod = 20; + break; + default: + assert(0); + } + if (hc_pos >= 0) + hc_count = 4; + else + hc_count = 1; + /* check that all the related strings are present */ + for(j = 0; j < hc_count; j++) { + for(i = 0; i < n_mod; i++) { + switch(mod_type) { + case EMOJI_MOD_NONE: + break; + case EMOJI_MOD_TYPE1: + buf[mod_pos[0]] = 0x1f3fb + i; + break; + case EMOJI_MOD_TYPE2: + case EMOJI_MOD_TYPE2D: + i0 = i / 5; + i1 = i % 5; + /* avoid identical values */ + if (mod_type == EMOJI_MOD_TYPE2D && i0 >= i1) + i0++; + buf[mod_pos[0]] = 0x1f3fb + i0; + buf[mod_pos[1]] = 0x1f3fb + i1; + break; + default: + assert(0); + } + + if (hc_pos >= 0) + buf[hc_pos] = 0x1F9B0 + j; + + p = re_string_find(sl, len, buf, FALSE); + if (!p) + return FALSE; + if (mark_flag) + p->flags |= 1; + } + } + return TRUE; +} + +static void zwj_encode_string(DynBuf *dbuf, const uint32_t *buf, int len, int mod_type, int *mod_pos, + int hc_pos) +{ + int i, j; + int c, code; + uint32_t buf1[SEQ_MAX_LEN]; + + j = 0; + for(i = 0; i < len;) { + c = buf[i++]; + if (c >= 0x2000 && c <= 0x2fff) { + code = c - 0x2000; + } else if (c >= 0x1f000 && c <= 0x1ffff) { + code = c - 0x1f000 + 0x1000; + } else { + assert(0); + } + if (i < len && is_emoji_modifier(buf[i])) { + /* modifier */ + code |= (mod_type << 13); + i++; + } + if (i < len && buf[i] == 0xfe0f) { + /* presentation selector present */ + code |= 0x8000; + i++; + } + if (i < len) { + /* zero width join */ + assert(buf[i] == 0x200d); + i++; + } + buf1[j++] = code; + } + dbuf_putc(dbuf, j); + for(i = 0; i < j; i++) { + dbuf_putc(dbuf, buf1[i]); + dbuf_putc(dbuf, buf1[i] >> 8); + } +} + +static void build_rgi_emoji_zwj_sequence(FILE *f, REStringList *sl) +{ + int mod_pos[2], mod_count, hair_color_pos, j, h; + REString *p; + uint32_t buf[SEQ_MAX_LEN]; + DynBuf dbuf; + +#if 0 + { + for(h = 0; h < sl->hash_size; h++) { + for(p = sl->hash_table[h]; p != NULL; p = p->next) { + for(j = 0; j < p->len; j++) + printf(" %04x", p->buf[j]); + printf("\n"); + } + } + exit(0); + } +#endif + // printf("rgi_emoji_zwj_sequence: n=%d\n", sl->n_strings); + + dbuf_init(&dbuf); + + /* avoid duplicating strings with emoji modifiers or hair colors */ + for(h = 0; h < sl->hash_size; h++) { + for(p = sl->hash_table[h]; p != NULL; p = p->next) { + if (p->flags) /* already examined */ + continue; + mod_count = 0; + hair_color_pos = -1; + for(j = 0; j < p->len; j++) { + if (is_emoji_modifier(p->buf[j])) { + assert(mod_count < 2); + mod_pos[mod_count++] = j; + } else if (is_emoji_hair_color(p->buf[j])) { + hair_color_pos = j; + } + buf[j] = p->buf[j]; + } + + if (mod_count != 0 || hair_color_pos >= 0) { + int mod_type; + if (mod_count == 0) + mod_type = EMOJI_MOD_NONE; + else if (mod_count == 1) + mod_type = EMOJI_MOD_TYPE1; + else + mod_type = EMOJI_MOD_TYPE2; + + if (mark_zwj_string(sl, buf, p->len, mod_type, mod_pos, hair_color_pos, FALSE)) { + mark_zwj_string(sl, buf, p->len, mod_type, mod_pos, hair_color_pos, TRUE); + } else if (mod_type == EMOJI_MOD_TYPE2) { + mod_type = EMOJI_MOD_TYPE2D; + if (mark_zwj_string(sl, buf, p->len, mod_type, mod_pos, hair_color_pos, FALSE)) { + mark_zwj_string(sl, buf, p->len, mod_type, mod_pos, hair_color_pos, TRUE); + } else { + dump_str("not_found", (int *)p->buf, p->len); + goto keep; + } + } + if (hair_color_pos >= 0) + buf[hair_color_pos] = 0x1f9b0; + /* encode the string */ + zwj_encode_string(&dbuf, buf, p->len, mod_type, mod_pos, hair_color_pos); + } else { + keep: + zwj_encode_string(&dbuf, buf, p->len, EMOJI_MOD_NONE, NULL, -1); + } + } + } + + /* Encode */ + dump_byte_table(f, "unicode_rgi_emoji_zwj_sequence", dbuf.buf, dbuf.size); + + dbuf_free(&dbuf); +} + +void build_sequence_prop_list_table(FILE *f) +{ + int i; + fprintf(f, "typedef enum {\n"); + for(i = 0; i < SEQUENCE_PROP_COUNT; i++) + fprintf(f, " UNICODE_SEQUENCE_PROP_%s,\n", unicode_sequence_prop_name[i]); + fprintf(f, " UNICODE_SEQUENCE_PROP_COUNT,\n"); + fprintf(f, "} UnicodeSequencePropertyEnum;\n\n"); + + dump_name_table(f, "unicode_sequence_prop_name_table", + unicode_sequence_prop_name, SEQUENCE_PROP_COUNT, NULL); + + dump_byte_table(f, "unicode_rgi_emoji_tag_sequence", rgi_emoji_tag_sequence.buf, rgi_emoji_tag_sequence.size); + + build_rgi_emoji_zwj_sequence(f, &rgi_emoji_zwj_sequence); +} + #ifdef USE_TEST int check_conv(uint32_t *res, uint32_t c, int conv_type) { @@ -3156,6 +3680,8 @@ int main(int argc, char *argv[]) outfilename = argv[arg++]; unicode_db = mallocz(sizeof(unicode_db[0]) * (CHARCODE_MAX + 1)); + re_string_list_init(&rgi_emoji_zwj_sequence); + dbuf_init(&rgi_emoji_tag_sequence); snprintf(filename, sizeof(filename), "%s/UnicodeData.txt", unicode_db_path); @@ -3190,6 +3716,14 @@ int main(int argc, char *argv[]) unicode_db_path); parse_prop_list(filename); + snprintf(filename, sizeof(filename), "%s/emoji-sequences.txt", + unicode_db_path); + parse_sequence_prop_list(filename); + + snprintf(filename, sizeof(filename), "%s/emoji-zwj-sequences.txt", + unicode_db_path); + parse_sequence_prop_list(filename); + // dump_unicode_data(unicode_db); build_conv_table(unicode_db); @@ -3234,10 +3768,12 @@ int main(int argc, char *argv[]) build_script_table(fo); build_script_ext_table(fo); build_prop_list_table(fo); + build_sequence_prop_list_table(fo); fprintf(fo, "#endif /* CONFIG_ALL_UNICODE */\n"); fprintf(fo, "/* %u tables / %u bytes, %u index / %u bytes */\n", total_tables, total_table_bytes, total_index, total_index_bytes); fclose(fo); } + re_string_list_free(&rgi_emoji_zwj_sequence); return 0; } diff --git a/unicode_gen_def.h b/unicode_gen_def.h index f2a3216..95c369f 100644 --- a/unicode_gen_def.h +++ b/unicode_gen_def.h @@ -234,6 +234,11 @@ DEF(XID_Continue1, "") DEF(Changes_When_Titlecased1, "") DEF(Changes_When_Casefolded1, "") DEF(Changes_When_NFKC_Casefolded1, "") +DEF(Basic_Emoji1, "") +DEF(Basic_Emoji2, "") +DEF(RGI_Emoji_Modifier_Sequence, "") +DEF(RGI_Emoji_Flag_Sequence, "") +DEF(Emoji_Keycap_Sequence, "") /* Prop list exported to JS */ DEF(ASCII_Hex_Digit, "AHex") @@ -301,3 +306,13 @@ DEF(XID_Start, "XIDS") DEF(Cased1, "") #endif + +#ifdef UNICODE_SEQUENCE_PROP_LIST +DEF(Basic_Emoji) +DEF(Emoji_Keycap_Sequence) +DEF(RGI_Emoji_Modifier_Sequence) +DEF(RGI_Emoji_Flag_Sequence) +DEF(RGI_Emoji_Tag_Sequence) +DEF(RGI_Emoji_ZWJ_Sequence) +DEF(RGI_Emoji) +#endif