mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
prefilter: workaround for \b in UCP and !UTF8 mode
For now, just drop the assertion (which will still return a superset of matches, as per prefiltering semantics).
This commit is contained in:
parent
734eb2ce62
commit
cacf07fe9b
@ -295,6 +295,16 @@ public:
|
|||||||
|
|
||||||
Component *visit(ComponentWordBoundary *c) override {
|
Component *visit(ComponentWordBoundary *c) override {
|
||||||
assert(c);
|
assert(c);
|
||||||
|
|
||||||
|
// TODO: Right now, we do not have correct code for resolving these
|
||||||
|
// when prefiltering is on, UCP is on, and UTF-8 is *off*. For now, we
|
||||||
|
// just replace with an empty sequence (as that will return a superset
|
||||||
|
// of matches).
|
||||||
|
if (mode.ucp && !mode.utf8) {
|
||||||
|
return new ComponentSequence();
|
||||||
|
}
|
||||||
|
|
||||||
|
// All other cases can be prefiltered.
|
||||||
c->setPrefilter(true);
|
c->setPrefilter(true);
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user