Improve libunicode and libregexp headers (#288)

- move all `lre_xxx` functions to libunicode
- use flags table `lre_ctype_bits` instead of bitmaps
- simplify `lre_is_space`, `lre_js_is_ident_first` and `lre_js_is_ident_next`
- simplify `simple_next_token`, handle UTF-8 correctly
- simplify `is_let`, remove dead code
This commit is contained in:
Charlie Gordon
2024-05-05 17:47:40 +02:00
committed by GitHub
parent 1402478d8d
commit 7a2c6f42d4
6 changed files with 244 additions and 135 deletions

View File

@@ -25,10 +25,7 @@
#define LIBREGEXP_H
#include <stddef.h>
#include "libunicode.h"
#define LRE_BOOL int /* for documentation purposes */
#include <stdint.h>
#define LRE_FLAG_GLOBAL (1 << 0)
#define LRE_FLAG_IGNORECASE (1 << 1)
@@ -50,43 +47,9 @@ int lre_exec(uint8_t **capture,
int cbuf_type, void *opaque);
int lre_parse_escape(const uint8_t **pp, int allow_utf16);
LRE_BOOL lre_is_space(int c);
/* must be provided by the user */
LRE_BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size);
/* must be provided by the user, return non zero if overflow */
int lre_check_stack_overflow(void *opaque, size_t alloca_size);
void *lre_realloc(void *opaque, void *ptr, size_t size);
/* JS identifier test */
extern uint32_t const lre_id_start_table_ascii[4];
extern uint32_t const lre_id_continue_table_ascii[4];
static inline int lre_js_is_ident_first(int c)
{
if ((uint32_t)c < 128) {
return (lre_id_start_table_ascii[c >> 5] >> (c & 31)) & 1;
} else {
#ifdef CONFIG_ALL_UNICODE
return lre_is_id_start(c);
#else
return !lre_is_space(c);
#endif
}
}
static inline int lre_js_is_ident_next(int c)
{
if ((uint32_t)c < 128) {
return (lre_id_continue_table_ascii[c >> 5] >> (c & 31)) & 1;
} else {
/* ZWNJ and ZWJ are accepted in identifiers */
#ifdef CONFIG_ALL_UNICODE
return lre_is_id_continue(c) || c == 0x200C || c == 0x200D;
#else
return !lre_is_space(c) || c == 0x200C || c == 0x200D;
#endif
}
}
#undef LRE_BOOL
#endif /* LIBREGEXP_H */