make dverm more precise

This commit is contained in:
Alex Coyte 2016-03-30 11:14:59 +11:00 committed by Matthew Barr
parent b4727cf1ea
commit 6c7ee12bb9
4 changed files with 44 additions and 22 deletions

View File

@ -357,5 +357,7 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
rv = MAX(c + accel->generic.offset, rv); rv = MAX(c + accel->generic.offset, rv);
rv -= accel->generic.offset; rv -= accel->generic.offset;
DEBUG_PRINTF("advanced %zd\n", rv - c);
return rv; return rv;
} }

View File

@ -178,11 +178,21 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
} }
// Aligned loops from here on in // Aligned loops from here on in
if (nocase) { const u8 *ptr = nocase ? dvermSearchAlignedNocase(chars1, chars2, c1, c2,
return dvermSearchAlignedNocase(chars1, chars2, c1, c2, buf, buf_end); buf, buf_end)
} else { : dvermSearchAligned(chars1, chars2, c1, c2, buf,
return dvermSearchAligned(chars1, chars2, c1, c2, buf, buf_end); buf_end);
if (ptr) {
return ptr;
} }
// Tidy up the mess at the end
ptr = nocase ? dvermPreconditionNocase(chars1, chars2,
buf_end - VERM_BOUNDARY)
: dvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY);
/* buf_end - 1 to be conservative in case last byte is a partial match */
return ptr ? ptr : buf_end - 1;
} }
static really_inline static really_inline
@ -216,8 +226,18 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2,
} }
// Aligned loops from here on in // Aligned loops from here on in
return dvermSearchAlignedMasked(chars1, chars2, mask1, mask2, c1, c2, m1, m2, const u8 *ptr = dvermSearchAlignedMasked(chars1, chars2, mask1, mask2, c1,
buf, buf_end); c2, m1, m2, buf, buf_end);
if (ptr) {
return ptr;
}
// Tidy up the mess at the end
ptr = dvermPreconditionMasked(chars1, chars2, mask1, mask2,
buf_end - VERM_BOUNDARY);
/* buf_end - 1 to be conservative in case last byte is a partial match */
return ptr ? ptr : buf_end - 1;
} }
// Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if // Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if

View File

@ -147,7 +147,8 @@ const u8 *dvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2,
return buf + pos; return buf + pos;
} }
} }
return buf;
return NULL;
} }
static really_inline static really_inline
@ -169,7 +170,8 @@ const u8 *dvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2,
return buf + pos; return buf + pos;
} }
} }
return buf;
return NULL;
} }
static really_inline static really_inline
@ -190,7 +192,8 @@ const u8 *dvermSearchAlignedMasked(m128 chars1, m128 chars2,
return buf + pos; return buf + pos;
} }
} }
return buf;
return NULL;
} }
// returns NULL if not found // returns NULL if not found

View File

@ -31,8 +31,6 @@
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "nfa/vermicelli.h" #include "nfa/vermicelli.h"
#define BOUND (~(VERM_BOUNDARY - 1))
TEST(Vermicelli, ExecNoMatch1) { TEST(Vermicelli, ExecNoMatch1) {
char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
@ -128,27 +126,27 @@ TEST(DoubleVermicelli, ExecNoMatch1) {
const u8 *rv = vermicelliDoubleExec('a', 'b', 0, (u8 *)t1 + i, const u8 *rv = vermicelliDoubleExec('a', 'b', 0, (u8 *)t1 + i,
(u8 *)t1 + strlen(t1) - j); (u8 *)t1 + strlen(t1) - j);
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv); ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv);
rv = vermicelliDoubleExec('B', 'b', 0, (u8 *)t1 + i, rv = vermicelliDoubleExec('B', 'b', 0, (u8 *)t1 + i,
(u8 *)t1 + strlen(t1) - j); (u8 *)t1 + strlen(t1) - j);
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv); ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv);
rv = vermicelliDoubleExec('A', 'B', 1, (u8 *)t1 + i, rv = vermicelliDoubleExec('A', 'B', 1, (u8 *)t1 + i,
(u8 *)t1 + strlen(t1) - j); (u8 *)t1 + strlen(t1) - j);
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv); ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv);
rv = vermicelliDoubleExec('b', 'B', 0, (u8 *)t1 + i, rv = vermicelliDoubleExec('b', 'B', 0, (u8 *)t1 + i,
(u8 *)t1 + strlen(t1) - j); (u8 *)t1 + strlen(t1) - j);
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv); ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv);
rv = vermicelliDoubleExec('B', 'A', 1, (u8 *)t1 + i, rv = vermicelliDoubleExec('B', 'A', 1, (u8 *)t1 + i,
(u8 *)t1 + strlen(t1) - j); (u8 *)t1 + strlen(t1) - j);
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv); ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv);
} }
} }
} }
@ -355,31 +353,30 @@ TEST(DoubleVermicelliMasked, ExecNoMatch1) {
t1_raw + i, t1_raw + i,
t1_raw + t1.length() - i - j); t1_raw + t1.length() - i - j);
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv);
rv = vermicelliDoubleMaskedExec('B', 'b', 0xff, CASE_CLEAR, rv = vermicelliDoubleMaskedExec('B', 'b', 0xff, CASE_CLEAR,
t1_raw + i, t1_raw + i,
t1_raw + t1.length() - i - j); t1_raw + t1.length() - i - j);
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv);
rv = vermicelliDoubleMaskedExec('A', 'B', CASE_CLEAR, CASE_CLEAR, rv = vermicelliDoubleMaskedExec('A', 'B', CASE_CLEAR, CASE_CLEAR,
t1_raw + i, t1_raw + i,
t1_raw + t1.length() -i - j); t1_raw + t1.length() -i - j);
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv);
rv = vermicelliDoubleMaskedExec('b', 'B', CASE_CLEAR, 0xff, rv = vermicelliDoubleMaskedExec('b', 'B', CASE_CLEAR, 0xff,
t1_raw + i, t1_raw + i,
t1_raw + t1.length() - i - j); t1_raw + t1.length() - i - j);
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv);
rv = vermicelliDoubleMaskedExec('B', 'A', 0xff, 0xff, rv = vermicelliDoubleMaskedExec('B', 'A', 0xff, 0xff,
t1_raw + i, t1_raw + i,
t1_raw + t1.length() - i - j); t1_raw + t1.length() - i - j);
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv);
} }
} }
} }