regexp: added v flag support - fixed corner cases of case insensitive matching

This commit is contained in:
Fabrice Bellard
2025-05-16 17:43:03 +02:00
parent a8b2d7c2b2
commit d7cdfdc8d7
13 changed files with 2004 additions and 205 deletions

View File

@@ -45,6 +45,7 @@ typedef enum {
CR_OP_UNION,
CR_OP_INTER,
CR_OP_XOR,
CR_OP_SUB,
} CharRangeOpEnum;
void cr_init(CharRange *cr, void *mem_opaque, void *(*realloc_func)(void *opaque, void *ptr, size_t size));
@@ -73,19 +74,18 @@ static inline int cr_add_interval(CharRange *cr, uint32_t c1, uint32_t c2)
return 0;
}
int cr_union1(CharRange *cr, const uint32_t *b_pt, int b_len);
int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len,
const uint32_t *b_pt, int b_len, int op);
int cr_op1(CharRange *cr, const uint32_t *b_pt, int b_len, int op);
static inline int cr_union_interval(CharRange *cr, uint32_t c1, uint32_t c2)
{
uint32_t b_pt[2];
b_pt[0] = c1;
b_pt[1] = c2 + 1;
return cr_union1(cr, b_pt, 2);
return cr_op1(cr, b_pt, 2, CR_OP_UNION);
}
int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len,
const uint32_t *b_pt, int b_len, int op);
int cr_invert(CharRange *cr);
int cr_regexp_canonicalize(CharRange *cr, int is_unicode);
@@ -107,6 +107,10 @@ int unicode_script(CharRange *cr, const char *script_name, int is_ext);
int unicode_general_category(CharRange *cr, const char *gc_name);
int unicode_prop(CharRange *cr, const char *prop_name);
typedef void UnicodeSequencePropCB(void *opaque, const uint32_t *buf, int len);
int unicode_sequence_prop(const char *prop_name, UnicodeSequencePropCB *cb, void *opaque,
CharRange *cr);
int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
int lre_canonicalize(uint32_t c, int is_unicode);