mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
make dverm more precise
This commit is contained in:
parent
b4727cf1ea
commit
6c7ee12bb9
@ -357,5 +357,7 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
|
|||||||
rv = MAX(c + accel->generic.offset, rv);
|
rv = MAX(c + accel->generic.offset, rv);
|
||||||
rv -= accel->generic.offset;
|
rv -= accel->generic.offset;
|
||||||
|
|
||||||
|
DEBUG_PRINTF("advanced %zd\n", rv - c);
|
||||||
|
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
@ -178,11 +178,21 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Aligned loops from here on in
|
// Aligned loops from here on in
|
||||||
if (nocase) {
|
const u8 *ptr = nocase ? dvermSearchAlignedNocase(chars1, chars2, c1, c2,
|
||||||
return dvermSearchAlignedNocase(chars1, chars2, c1, c2, buf, buf_end);
|
buf, buf_end)
|
||||||
} else {
|
: dvermSearchAligned(chars1, chars2, c1, c2, buf,
|
||||||
return dvermSearchAligned(chars1, chars2, c1, c2, buf, buf_end);
|
buf_end);
|
||||||
|
if (ptr) {
|
||||||
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tidy up the mess at the end
|
||||||
|
ptr = nocase ? dvermPreconditionNocase(chars1, chars2,
|
||||||
|
buf_end - VERM_BOUNDARY)
|
||||||
|
: dvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY);
|
||||||
|
/* buf_end - 1 to be conservative in case last byte is a partial match */
|
||||||
|
return ptr ? ptr : buf_end - 1;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
@ -216,8 +226,18 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Aligned loops from here on in
|
// Aligned loops from here on in
|
||||||
return dvermSearchAlignedMasked(chars1, chars2, mask1, mask2, c1, c2, m1, m2,
|
const u8 *ptr = dvermSearchAlignedMasked(chars1, chars2, mask1, mask2, c1,
|
||||||
buf, buf_end);
|
c2, m1, m2, buf, buf_end);
|
||||||
|
if (ptr) {
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tidy up the mess at the end
|
||||||
|
ptr = dvermPreconditionMasked(chars1, chars2, mask1, mask2,
|
||||||
|
buf_end - VERM_BOUNDARY);
|
||||||
|
/* buf_end - 1 to be conservative in case last byte is a partial match */
|
||||||
|
return ptr ? ptr : buf_end - 1;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if
|
// Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if
|
||||||
|
@ -147,7 +147,8 @@ const u8 *dvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2,
|
|||||||
return buf + pos;
|
return buf + pos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return buf;
|
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
@ -169,7 +170,8 @@ const u8 *dvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2,
|
|||||||
return buf + pos;
|
return buf + pos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return buf;
|
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
@ -190,7 +192,8 @@ const u8 *dvermSearchAlignedMasked(m128 chars1, m128 chars2,
|
|||||||
return buf + pos;
|
return buf + pos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return buf;
|
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
// returns NULL if not found
|
// returns NULL if not found
|
||||||
|
@ -31,8 +31,6 @@
|
|||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
#include "nfa/vermicelli.h"
|
#include "nfa/vermicelli.h"
|
||||||
|
|
||||||
#define BOUND (~(VERM_BOUNDARY - 1))
|
|
||||||
|
|
||||||
TEST(Vermicelli, ExecNoMatch1) {
|
TEST(Vermicelli, ExecNoMatch1) {
|
||||||
char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
|
char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
|
||||||
|
|
||||||
@ -128,27 +126,27 @@ TEST(DoubleVermicelli, ExecNoMatch1) {
|
|||||||
const u8 *rv = vermicelliDoubleExec('a', 'b', 0, (u8 *)t1 + i,
|
const u8 *rv = vermicelliDoubleExec('a', 'b', 0, (u8 *)t1 + i,
|
||||||
(u8 *)t1 + strlen(t1) - j);
|
(u8 *)t1 + strlen(t1) - j);
|
||||||
|
|
||||||
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv);
|
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv);
|
||||||
|
|
||||||
rv = vermicelliDoubleExec('B', 'b', 0, (u8 *)t1 + i,
|
rv = vermicelliDoubleExec('B', 'b', 0, (u8 *)t1 + i,
|
||||||
(u8 *)t1 + strlen(t1) - j);
|
(u8 *)t1 + strlen(t1) - j);
|
||||||
|
|
||||||
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv);
|
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv);
|
||||||
|
|
||||||
rv = vermicelliDoubleExec('A', 'B', 1, (u8 *)t1 + i,
|
rv = vermicelliDoubleExec('A', 'B', 1, (u8 *)t1 + i,
|
||||||
(u8 *)t1 + strlen(t1) - j);
|
(u8 *)t1 + strlen(t1) - j);
|
||||||
|
|
||||||
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv);
|
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv);
|
||||||
|
|
||||||
rv = vermicelliDoubleExec('b', 'B', 0, (u8 *)t1 + i,
|
rv = vermicelliDoubleExec('b', 'B', 0, (u8 *)t1 + i,
|
||||||
(u8 *)t1 + strlen(t1) - j);
|
(u8 *)t1 + strlen(t1) - j);
|
||||||
|
|
||||||
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv);
|
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv);
|
||||||
|
|
||||||
rv = vermicelliDoubleExec('B', 'A', 1, (u8 *)t1 + i,
|
rv = vermicelliDoubleExec('B', 'A', 1, (u8 *)t1 + i,
|
||||||
(u8 *)t1 + strlen(t1) - j);
|
(u8 *)t1 + strlen(t1) - j);
|
||||||
|
|
||||||
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1) & BOUND, (size_t)rv);
|
ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -355,31 +353,30 @@ TEST(DoubleVermicelliMasked, ExecNoMatch1) {
|
|||||||
t1_raw + i,
|
t1_raw + i,
|
||||||
t1_raw + t1.length() - i - j);
|
t1_raw + t1.length() - i - j);
|
||||||
|
|
||||||
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv);
|
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv);
|
||||||
|
|
||||||
rv = vermicelliDoubleMaskedExec('B', 'b', 0xff, CASE_CLEAR,
|
rv = vermicelliDoubleMaskedExec('B', 'b', 0xff, CASE_CLEAR,
|
||||||
t1_raw + i,
|
t1_raw + i,
|
||||||
t1_raw + t1.length() - i - j);
|
t1_raw + t1.length() - i - j);
|
||||||
|
|
||||||
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv);
|
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv);
|
||||||
|
|
||||||
rv = vermicelliDoubleMaskedExec('A', 'B', CASE_CLEAR, CASE_CLEAR,
|
rv = vermicelliDoubleMaskedExec('A', 'B', CASE_CLEAR, CASE_CLEAR,
|
||||||
t1_raw + i,
|
t1_raw + i,
|
||||||
t1_raw + t1.length() -i - j);
|
t1_raw + t1.length() -i - j);
|
||||||
|
|
||||||
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv);
|
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv);
|
||||||
|
|
||||||
rv = vermicelliDoubleMaskedExec('b', 'B', CASE_CLEAR, 0xff,
|
rv = vermicelliDoubleMaskedExec('b', 'B', CASE_CLEAR, 0xff,
|
||||||
t1_raw + i,
|
t1_raw + i,
|
||||||
t1_raw + t1.length() - i - j);
|
t1_raw + t1.length() - i - j);
|
||||||
|
|
||||||
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv);
|
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv);
|
||||||
|
|
||||||
rv = vermicelliDoubleMaskedExec('B', 'A', 0xff, 0xff,
|
rv = vermicelliDoubleMaskedExec('B', 'A', 0xff, 0xff,
|
||||||
t1_raw + i,
|
t1_raw + i,
|
||||||
t1_raw + t1.length() - i - j);
|
t1_raw + t1.length() - i - j);
|
||||||
|
|
||||||
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv);
|
ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user