diff --git a/src/nfa/vermicelli.h b/src/nfa/vermicelli.h index ba8afcf1..b2f2ab7c 100644 --- a/src/nfa/vermicelli.h +++ b/src/nfa/vermicelli.h @@ -74,9 +74,7 @@ const u8 *vermicelliExec(char c, char nocase, const u8 *buf, } buf += VERM_BOUNDARY - min; - if (buf >= buf_end) { - return buf_end; - } + assert(buf < buf_end); } // Aligned loops from here on in @@ -129,9 +127,7 @@ const u8 *nvermicelliExec(char c, char nocase, const u8 *buf, } buf += VERM_BOUNDARY - min; - if (buf >= buf_end) { - return buf_end; - } + assert(buf < buf_end); } // Aligned loops from here on in @@ -172,9 +168,7 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, } buf += VERM_BOUNDARY - min; - if (buf >= buf_end) { - return buf_end - 1; - } + assert(buf < buf_end); } // Aligned loops from here on in @@ -190,9 +184,18 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, ptr = nocase ? dvermPreconditionNocase(chars1, chars2, buf_end - VERM_BOUNDARY) : dvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY); - /* buf_end - 1 to be conservative in case last byte is a partial match */ - return ptr ? ptr : buf_end - 1; + if (ptr) { + return ptr; + } + + /* check for partial match at end */ + u8 mask = nocase ? CASE_CLEAR : 0xff; + if ((buf_end[-1] & mask) == c1) { + return buf_end - 1; + } + + return buf_end; } static really_inline @@ -220,9 +223,7 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2, } buf += VERM_BOUNDARY - min; - if (buf >= buf_end) { - return buf_end - 1; - } + assert(buf < buf_end); } // Aligned loops from here on in @@ -235,9 +236,17 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2, // Tidy up the mess at the end ptr = dvermPreconditionMasked(chars1, chars2, mask1, mask2, buf_end - VERM_BOUNDARY); - /* buf_end - 1 to be conservative in case last byte is a partial match */ - return ptr ? ptr : buf_end - 1; + if (ptr) { + return ptr; + } + + /* check for partial match at end */ + if ((buf_end[-1] & m1) == c1) { + return buf_end - 1; + } + + return buf_end; } // Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if diff --git a/unit/internal/vermicelli.cpp b/unit/internal/vermicelli.cpp index 5d66a332..5e4a8253 100644 --- a/unit/internal/vermicelli.cpp +++ b/unit/internal/vermicelli.cpp @@ -126,27 +126,29 @@ TEST(DoubleVermicelli, ExecNoMatch1) { const u8 *rv = vermicelliDoubleExec('a', 'b', 0, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j), (size_t)rv); rv = vermicelliDoubleExec('B', 'b', 0, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j), (size_t)rv); rv = vermicelliDoubleExec('A', 'B', 1, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j), (size_t)rv); + /* partial match */ rv = vermicelliDoubleExec('b', 'B', 0, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + /* partial match */ rv = vermicelliDoubleExec('B', 'A', 1, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); } } } @@ -353,30 +355,32 @@ TEST(DoubleVermicelliMasked, ExecNoMatch1) { t1_raw + i, t1_raw + t1.length() - i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); - rv = vermicelliDoubleMaskedExec('B', 'b', 0xff, CASE_CLEAR, + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j), (size_t)rv); + + rv = vermicelliDoubleMaskedExec('B', 'B', 0xff, CASE_CLEAR, t1_raw + i, t1_raw + t1.length() - i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j), (size_t)rv); rv = vermicelliDoubleMaskedExec('A', 'B', CASE_CLEAR, CASE_CLEAR, t1_raw + i, t1_raw + t1.length() -i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j), (size_t)rv); - rv = vermicelliDoubleMaskedExec('b', 'B', CASE_CLEAR, 0xff, + /* partial match */ + rv = vermicelliDoubleMaskedExec('B', 'B', CASE_CLEAR, 0xff, t1_raw + i, t1_raw + t1.length() - i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); rv = vermicelliDoubleMaskedExec('B', 'A', 0xff, 0xff, t1_raw + i, t1_raw + t1.length() - i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j), (size_t)rv); } } }