make dverm more precise

This commit is contained in:
Alex Coyte 2016-03-30 11:14:59 +11:00 committed by Matthew Barr
parent b4727cf1ea
commit 6c7ee12bb9
4 changed files with 44 additions and 22 deletions

View File

@ -357,5 +357,7 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
rv = MAX(c + accel->generic.offset, rv);
rv -= accel->generic.offset;
DEBUG_PRINTF("advanced %zd\n", rv - c);
return rv;
}

View File

@ -178,11 +178,21 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
}
// Aligned loops from here on in
if (nocase) {
return dvermSearchAlignedNocase(chars1, chars2, c1, c2, buf, buf_end);
} else {
return dvermSearchAligned(chars1, chars2, c1, c2, buf, buf_end);
const u8 *ptr = nocase ? dvermSearchAlignedNocase(chars1, chars2, c1, c2,
buf, buf_end)
: dvermSearchAligned(chars1, chars2, c1, c2, buf,
buf_end);
if (ptr) {
return ptr;
}
// Tidy up the mess at the end
ptr = nocase ? dvermPreconditionNocase(chars1, chars2,
buf_end - VERM_BOUNDARY)
: dvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY);
/* buf_end - 1 to be conservative in case last byte is a partial match */
return ptr ? ptr : buf_end - 1;
}
static really_inline
@ -216,8 +226,18 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2,
}
// Aligned loops from here on in
return dvermSearchAlignedMasked(chars1, chars2, mask1, mask2, c1, c2, m1, m2,
buf, buf_end);
const u8 *ptr = dvermSearchAlignedMasked(chars1, chars2, mask1, mask2, c1,
c2, m1, m2, buf, buf_end);
if (ptr) {
return ptr;
}
// Tidy up the mess at the end
ptr = dvermPreconditionMasked(chars1, chars2, mask1, mask2,
buf_end - VERM_BOUNDARY);
/* buf_end - 1 to be conservative in case last byte is a partial match */
return ptr ? ptr : buf_end - 1;
}
// Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if

View File

@ -147,7 +147,8 @@ const u8 *dvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2,
return buf + pos;
}
}
return buf;
return NULL;
}
static really_inline
@ -169,7 +170,8 @@ const u8 *dvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2,
return buf + pos;
}
}
return buf;
return NULL;
}
static really_inline
@ -190,7 +192,8 @@ const u8 *dvermSearchAlignedMasked(m128 chars1, m128 chars2,
return buf + pos;
}
}
return buf;
return NULL;
}
// returns NULL if not found

View File

@ -31,8 +31,6 @@
#include "gtest/gtest.h"
#include "nfa/vermicelli.h"
#define BOUND (~(VERM_BOUNDARY - 1))
TEST(Vermicelli, ExecNoMatch1) {
char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
@ -128,27 +126,27 @@ TEST(DoubleVermicelli, ExecNoMatch1) {
const u8 *rv = vermicelliDoubleExec('a', 'b', 0, (u8 *)t1 + i,
(u8 *)t1 + strlen(t1) - j);
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv);
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv);
rv = vermicelliDoubleExec('B', 'b', 0, (u8 *)t1 + i,
(u8 *)t1 + strlen(t1) - j);
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv);
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv);
rv = vermicelliDoubleExec('A', 'B', 1, (u8 *)t1 + i,
(u8 *)t1 + strlen(t1) - j);
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv);
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv);
rv = vermicelliDoubleExec('b', 'B', 0, (u8 *)t1 + i,
(u8 *)t1 + strlen(t1) - j);
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv);
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv);
rv = vermicelliDoubleExec('B', 'A', 1, (u8 *)t1 + i,
(u8 *)t1 + strlen(t1) - j);
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv);
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv);
}
}
}
@ -355,31 +353,30 @@ TEST(DoubleVermicelliMasked, ExecNoMatch1) {
t1_raw + i,
t1_raw + t1.length() - i - j);
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv);
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv);
rv = vermicelliDoubleMaskedExec('B', 'b', 0xff, CASE_CLEAR,
t1_raw + i,
t1_raw + t1.length() - i - j);
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv);
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv);
rv = vermicelliDoubleMaskedExec('A', 'B', CASE_CLEAR, CASE_CLEAR,
t1_raw + i,
t1_raw + t1.length() -i - j);
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv);
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv);
rv = vermicelliDoubleMaskedExec('b', 'B', CASE_CLEAR, 0xff,
t1_raw + i,
t1_raw + t1.length() - i - j);
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv);
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv);
rv = vermicelliDoubleMaskedExec('B', 'A', 0xff, 0xff,
t1_raw + i,
t1_raw + t1.length() - i - j);
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv);
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv);
}
}
}