From cacf07fe9bf7fd396ca841f7ab8ae849500ca048 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 18 Jan 2017 11:33:57 +1100 Subject: [PATCH] prefilter: workaround for \b in UCP and !UTF8 mode For now, just drop the assertion (which will still return a superset of matches, as per prefiltering semantics). --- src/parser/prefilter.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/parser/prefilter.cpp b/src/parser/prefilter.cpp index ea58a134..f69362e4 100644 --- a/src/parser/prefilter.cpp +++ b/src/parser/prefilter.cpp @@ -295,6 +295,16 @@ public: Component *visit(ComponentWordBoundary *c) override { assert(c); + + // TODO: Right now, we do not have correct code for resolving these + // when prefiltering is on, UCP is on, and UTF-8 is *off*. For now, we + // just replace with an empty sequence (as that will return a superset + // of matches). + if (mode.ucp && !mode.utf8) { + return new ComponentSequence(); + } + + // All other cases can be prefiltered. c->setPrefilter(true); return c; }