chimera: hybrid of Hyperscan and PCRE

This commit is contained in:
Wang, Xiang W
2018-03-09 03:52:12 -05:00
parent 8a1c497f44
commit bf87f8c003
47 changed files with 6985 additions and 202 deletions

View File

@@ -123,22 +123,58 @@ set_target_properties(unit-internal PROPERTIES COMPILE_FLAGS "${HS_CXX_FLAGS}")
target_link_libraries(unit-internal hs corpusomatic)
endif(NOT (RELEASE_BUILD OR FAT_RUNTIME))
#
# build target to run unit tests
#
if (NOT RELEASE_BUILD)
add_custom_target(
unit
COMMAND bin/unit-internal
COMMAND bin/unit-hyperscan
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
DEPENDS unit-internal unit-hyperscan
)
else ()
add_custom_target(
unit
COMMAND bin/unit-hyperscan
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
DEPENDS unit-hyperscan
)
if (BUILD_CHIMERA)
# enable Chimera unit tests
set(unit_chimera_SOURCES
${gtest_SOURCES}
chimera/allocators.cpp
chimera/arg_checks.cpp
chimera/bad_patterns.cpp
chimera/compat.cpp
chimera/main.cpp
chimera/scan.cpp
)
add_executable(unit-chimera ${unit_chimera_SOURCES})
target_link_libraries(unit-chimera chimera hs pcre)
#
# build target to run unit tests
#
if (NOT RELEASE_BUILD)
add_custom_target(
unit
COMMAND bin/unit-internal
COMMAND bin/unit-hyperscan
COMMAND bin/unit-chimera
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
DEPENDS unit-internal unit-hyperscan unit-chimera
)
else ()
add_custom_target(
unit
COMMAND bin/unit-hyperscan
COMMAND bin/unit-chimera
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
DEPENDS unit-hyperscan unit-chimera
)
endif()
else()
#
# build target to run unit tests
#
if (NOT RELEASE_BUILD)
add_custom_target(
unit
COMMAND bin/unit-internal
COMMAND bin/unit-hyperscan
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
DEPENDS unit-internal unit-hyperscan
)
else ()
add_custom_target(
unit
COMMAND bin/unit-hyperscan
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
DEPENDS unit-hyperscan
)
endif()
endif()

149
unit/chimera/allocators.cpp Normal file
View File

@@ -0,0 +1,149 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "gtest/gtest.h"
#include "chimera/ch.h"
#include <cstdlib>
#include <string>
using std::string;
static void *null_malloc(size_t) { return nullptr; }
// Helper: correctly construct a simple database.
static
void makeDatabase(ch_database_t **hydb) {
static const char *expr[] = { "foobar" };
ch_database_t *db = nullptr;
ch_compile_error_t *compile_err = nullptr;
ch_error_t err;
err = ch_compile_multi(expr, nullptr, nullptr, 1, 0, nullptr, &db,
&compile_err);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_TRUE(db != nullptr);
*hydb = db;
}
TEST(HybridAllocator, DatabaseInfoBadAlloc) {
ch_database_t *db = nullptr;
makeDatabase(&db);
ASSERT_TRUE(db != nullptr);
ch_set_allocator(null_malloc, nullptr);
char *info = nullptr;
ch_error_t err = ch_database_info(db, &info);
ASSERT_EQ(CH_NOMEM, err);
ch_set_allocator(nullptr, nullptr);
ch_free_database(db);
}
static
void * two_aligned_malloc(size_t len) {
void *mem = malloc(len + 2);
if (!mem) {
return nullptr;
}
return (char *)mem + 2;
}
static
void two_aligned_free(void *mem) {
if (!mem) {
return;
}
// Allocated with two_aligned_malloc above.
free((char *)mem - 2);
}
TEST(HybridAllocator, TwoAlignedCompile) {
ch_set_database_allocator(two_aligned_malloc, two_aligned_free);
ch_database_t *db = nullptr;
ch_compile_error_t *compile_err = nullptr;
const hs_platform_info_t *platform = nullptr;
ch_error_t err =
ch_compile("foobar", 0, CH_MODE_GROUPS, platform, &db, &compile_err);
ASSERT_EQ(CH_COMPILER_ERROR, err);
ASSERT_EQ(nullptr, db);
ASSERT_NE(nullptr, compile_err);
ch_free_compile_error(compile_err);
ch_set_database_allocator(nullptr, nullptr);
}
TEST(HybridAllocator, TwoAlignedCompileError) {
ch_set_misc_allocator(two_aligned_malloc, two_aligned_free);
ch_database_t *db = nullptr;
ch_compile_error_t *compile_err = nullptr;
const hs_platform_info_t *platform = nullptr;
ch_error_t err =
ch_compile("\\1", 0, CH_MODE_GROUPS, platform, &db, &compile_err);
ASSERT_EQ(CH_COMPILER_ERROR, err);
ASSERT_EQ(nullptr, db);
ASSERT_NE(nullptr, compile_err);
EXPECT_STREQ("Allocator returned misaligned memory.", compile_err->message);
ch_free_compile_error(compile_err);
ch_set_database_allocator(nullptr, nullptr);
ch_set_misc_allocator(nullptr, nullptr);
}
TEST(HybridAllocator, TwoAlignedDatabaseInfo) {
ch_database_t *db = nullptr;
makeDatabase(&db);
ch_set_misc_allocator(two_aligned_malloc, two_aligned_free);
char *info = nullptr;
ch_error_t err = ch_database_info(db, &info);
ASSERT_EQ(CH_BAD_ALLOC, err);
ch_set_misc_allocator(nullptr, nullptr);
ch_free_database(db);
}
TEST(HybridAllocator, TwoAlignedAllocScratch) {
ch_database_t *db = nullptr;
makeDatabase(&db);
ch_set_scratch_allocator(two_aligned_malloc, two_aligned_free);
ch_scratch_t *scratch = nullptr;
ch_error_t err = ch_alloc_scratch(db, &scratch);
ASSERT_EQ(CH_BAD_ALLOC, err);
ch_set_scratch_allocator(nullptr, nullptr);
ch_free_database(db);
}

591
unit/chimera/arg_checks.cpp Normal file
View File

@@ -0,0 +1,591 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "gtest/gtest.h"
#include "chimera/ch.h"
static char garbage[] = "TEST(HybridArgChecks, DatabaseSizeNoDatabase) {" \
" size_t sz = ch_database_size(0);" \
" ASSERT_EQ(0, sz);";
namespace /* anonymous */ {
// Dummy callback: does nothing, returns 0 (keep matching)
ch_callback_t dummyHandler(unsigned, unsigned long long,
unsigned long long, unsigned, unsigned,
const ch_capture_t *, void *) {
// empty
return CH_CALLBACK_CONTINUE;
}
// Helper: correctly construct a simple database.
static
void makeDatabase(ch_database_t **hydb) {
static const char *expr[] = { "foo.*bar" };
ch_database_t *db = nullptr;
ch_compile_error_t *compile_err = nullptr;
ch_error_t err;
err = ch_compile_multi(expr, nullptr, nullptr, 1, 0, nullptr, &db,
&compile_err);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_TRUE(db != nullptr);
*hydb = db;
}
// Helper: given a database, build me some scratch.
static
void makeScratch(const ch_database_t *db,
ch_scratch_t **scratch) {
ch_error_t err = ch_alloc_scratch(db, scratch);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_TRUE(*scratch != nullptr);
}
// Break the magic number of the given database.
void breakDatabaseMagic(ch_database *db) {
// database magic should be 0xdbdb at the start
ASSERT_TRUE(memcmp("\xde\xde", db, 2) == 0);
*(char *)db = 0xdc;
}
// Break the version number of the given database.
void breakDatabaseVersion(ch_database *db) {
// database version is the second u32
*((char *)db + 4) += 1;
}
// Check that CH_version gives us a reasonable string back
TEST(HybridArgChecks, Version) {
const char *version = ch_version();
ASSERT_TRUE(version != nullptr);
ASSERT_TRUE(version[0] >= '0' && version[0] <= '9')
<< "First byte should be a digit.";
ASSERT_EQ('.', version[1]) << "Second byte should be a dot.";
}
// ch_compile: Hand the compiler a bogus flag.
TEST(HybridArgChecks, SingleBogusFlags) {
ch_database_t *db = nullptr;
ch_compile_error_t *compile_err = nullptr;
ch_error_t err;
static const unsigned int badflags[] = {
0xffffffff,
16,
128,
256,
512,
};
for (size_t i = 0; i < sizeof(badflags)/sizeof(badflags[0]); i++) {
const char expr[] = "foobar";
err = ch_compile(expr, badflags[i], 0, nullptr, &db, &compile_err);
EXPECT_EQ(CH_COMPILER_ERROR, err);
EXPECT_TRUE(db == nullptr);
EXPECT_TRUE(compile_err != nullptr);
EXPECT_STREQ("Unrecognized flag used.", compile_err->message);
ch_free_compile_error(compile_err);
}
}
// ch_compile: Hand the compiler a bogus mode.
TEST(HybridArgChecks, SingleBogusMode) {
ch_database_t *db = nullptr;
ch_compile_error_t *compile_err = nullptr;
ch_error_t err;
static const unsigned int badModes[] = {
0xffffffff,
1,
2,
CH_MODE_GROUPS << 1, // this was our largest mode flag
};
for (size_t i = 0; i < sizeof(badModes)/sizeof(badModes[0]); i++) {
const char expr[] = "foobar";
err = ch_compile(expr, 0, badModes[i], nullptr, &db, &compile_err);
EXPECT_EQ(CH_COMPILER_ERROR, err);
EXPECT_TRUE(db == nullptr);
EXPECT_TRUE(compile_err != nullptr);
EXPECT_STREQ("Invalid mode flag supplied.", compile_err->message);
ch_free_compile_error(compile_err);
}
}
// ch_compile: Compile a nullptr pattern set)
TEST(HybridArgChecks, SingleCompileBlockNoPattern) {
ch_database_t *db = nullptr;
ch_compile_error_t *compile_err = nullptr;
ch_error_t err;
err = ch_compile(nullptr, 0, 0, nullptr, &db, &compile_err);
EXPECT_EQ(CH_COMPILER_ERROR, err);
EXPECT_TRUE(db == nullptr);
EXPECT_TRUE(compile_err != nullptr);
ch_free_compile_error(compile_err);
}
// ch_compile: Compile a pattern to a nullptr database ptr
TEST(HybridArgChecks, SingleCompileBlockNoDatabase) {
ch_compile_error_t *compile_err = nullptr;
const char expr[] = "foobar";
ch_error_t err;
err = ch_compile(expr, 0, 0, nullptr, nullptr, &compile_err);
EXPECT_EQ(CH_COMPILER_ERROR, err);
EXPECT_TRUE(compile_err != nullptr);
ch_free_compile_error(compile_err);
}
// ch_compile_multi: Hand the compiler a bogus flag.
TEST(HybridArgChecks, MultiBogusFlags) {
ch_database_t *db = nullptr;
ch_compile_error_t *compile_err = nullptr;
ch_error_t err;
static const unsigned int badflags[] = {
0xffffffff,
16, // HS_FLAG_ERROREOD
128,
256,
512,
};
for (size_t i = 0; i < sizeof(badflags)/sizeof(badflags[0]); i++) {
const char *expr[] = { "foobar" };
err = ch_compile_multi(expr, &badflags[i], nullptr, 1, 0, nullptr, &db,
&compile_err);
EXPECT_EQ(CH_COMPILER_ERROR, err);
EXPECT_TRUE(db == nullptr);
EXPECT_TRUE(compile_err != nullptr);
EXPECT_STREQ("Unrecognized flag used.", compile_err->message);
ch_free_compile_error(compile_err);
}
}
// ch_compile_multi: Hand the ch_compile_multi a bogus mode.
TEST(HybridArgChecks, MultiBogusMode) {
ch_database_t *db = nullptr;
ch_compile_error_t *compile_err = nullptr;
ch_error_t err;
static const unsigned int badModes[] = {
0xffffffff,
1,
2,
CH_MODE_GROUPS << 1, // this was our largest mode flag
};
for (size_t i = 0; i < sizeof(badModes)/sizeof(badModes[0]); i++) {
const char *expr[] = { "foobar" };
err = ch_compile_multi(expr, nullptr, nullptr, 1, badModes[i], nullptr,
&db, &compile_err);
EXPECT_EQ(CH_COMPILER_ERROR, err);
EXPECT_TRUE(db == nullptr);
EXPECT_TRUE(compile_err != nullptr);
EXPECT_STREQ("Invalid mode flag supplied.", compile_err->message);
ch_free_compile_error(compile_err);
}
}
// ch_compile_multi: Compile a nullptr pattern set (block mode)
TEST(HybridArgChecks, MultiCompileBlockNoPattern) {
ch_database_t *db = nullptr;
ch_compile_error_t *compile_err = nullptr;
ch_error_t err;
err = ch_compile_multi(nullptr, nullptr, nullptr, 1, 0, nullptr, &db,
&compile_err);
EXPECT_EQ(CH_COMPILER_ERROR, err);
EXPECT_TRUE(db == nullptr);
EXPECT_TRUE(compile_err != nullptr);
ch_free_compile_error(compile_err);
}
// ch_compile_multi: Compile a set of zero patterns
TEST(HybridArgChecks, MultiCompileZeroPatterns) {
ch_database_t *db = nullptr;
ch_compile_error_t *compile_err = nullptr;
const char *expr[] = {"foobar"};
ch_error_t err;
err = ch_compile_multi(expr, nullptr, nullptr, 0, 0, nullptr, &db,
&compile_err);
EXPECT_EQ(CH_COMPILER_ERROR, err);
EXPECT_TRUE(db == nullptr);
EXPECT_TRUE(compile_err != nullptr);
ch_free_compile_error(compile_err);
}
// ch_compile_multi: Compile a pattern to a nullptr database ptr
TEST(HybridArgChecks, MultiCompileBlockNoDatabase) {
ch_compile_error_t *compile_err = nullptr;
const char *expr[] = {"foobar"};
ch_error_t err;
err = ch_compile_multi(expr, nullptr, nullptr, 1, 0, nullptr, nullptr,
&compile_err);
EXPECT_EQ(CH_COMPILER_ERROR, err);
EXPECT_TRUE(compile_err != nullptr);
ch_free_compile_error(compile_err);
}
// ch_compile_ext_multi: Hand the compiler a bogus flag.
TEST(HybridArgChecks, ExtMultiBogusFlags) {
ch_database_t *db = nullptr;
ch_compile_error_t *compile_err = nullptr;
ch_error_t err;
static const unsigned int badflags[] = {
0xffffffff,
16, // HS_FLAG_ERROREOD
128,
256,
512,
};
for (size_t i = 0; i < sizeof(badflags)/sizeof(badflags[0]); i++) {
const char *expr[] = { "foobar" };
err = ch_compile_ext_multi(expr, &badflags[i], nullptr, 1, 0,
10000000, 8000, nullptr, &db, &compile_err);
EXPECT_EQ(CH_COMPILER_ERROR, err);
EXPECT_TRUE(db == nullptr);
EXPECT_TRUE(compile_err != nullptr);
EXPECT_STREQ("Unrecognized flag used.", compile_err->message);
ch_free_compile_error(compile_err);
}
}
// ch_compile_ext_multi: Hand the ch_compile_multi a bogus mode.
TEST(HybridArgChecks, ExtMultiBogusMode) {
ch_database_t *db = nullptr;
ch_compile_error_t *compile_err = nullptr;
ch_error_t err;
static const unsigned int badModes[] = {
0xffffffff,
1,
2,
CH_MODE_GROUPS << 1, // this was our largest mode flag
};
for (size_t i = 0; i < sizeof(badModes)/sizeof(badModes[0]); i++) {
const char *expr[] = { "foobar" };
err = ch_compile_ext_multi(expr, nullptr, nullptr, 1, badModes[i],
10000000, 8000, nullptr, &db, &compile_err);
EXPECT_EQ(CH_COMPILER_ERROR, err);
EXPECT_TRUE(db == nullptr);
EXPECT_TRUE(compile_err != nullptr);
EXPECT_STREQ("Invalid mode flag supplied.", compile_err->message);
ch_free_compile_error(compile_err);
}
}
// ch_compile_ext_multi: Compile a nullptr pattern set (block mode)
TEST(HybridArgChecks, ExtMultiCompileBlockNoPattern) {
ch_database_t *db = nullptr;
ch_compile_error_t *compile_err = nullptr;
ch_error_t err;
err = ch_compile_ext_multi(nullptr, nullptr, nullptr, 1, 0, 10000000,
8000, nullptr, &db, &compile_err);
EXPECT_EQ(CH_COMPILER_ERROR, err);
EXPECT_TRUE(db == nullptr);
EXPECT_TRUE(compile_err != nullptr);
ch_free_compile_error(compile_err);
}
// ch_compile_ext_multi: Compile a set of zero patterns
TEST(HybridArgChecks, ExtMultiCompileZeroPatterns) {
ch_database_t *db = nullptr;
ch_compile_error_t *compile_err = nullptr;
const char *expr[] = {"foobar"};
ch_error_t err;
err = ch_compile_ext_multi(expr, nullptr, nullptr, 0, 0, 10000000,
8000, nullptr, &db, &compile_err);
EXPECT_EQ(CH_COMPILER_ERROR, err);
EXPECT_TRUE(db == nullptr);
EXPECT_TRUE(compile_err != nullptr);
ch_free_compile_error(compile_err);
}
// ch_compile_ext_multi: Compile a pattern to a nullptr database ptr
TEST(HybridArgChecks, ExtMultiCompileBlockNoDatabase) {
ch_compile_error_t *compile_err = nullptr;
const char *expr[] = {"foobar"};
ch_error_t err;
err = ch_compile_ext_multi(expr, nullptr, nullptr, 1, 0, 10000000,
8000, nullptr, nullptr, &compile_err);
EXPECT_EQ(CH_COMPILER_ERROR, err);
EXPECT_TRUE(compile_err != nullptr);
ch_free_compile_error(compile_err);
}
// ch_scan: Call with no database
TEST(HybridArgChecks, ScanBlockNoDatabase) {
ch_database_t *db = nullptr;
makeDatabase(&db);
ch_scratch_t *scratch = nullptr;
makeScratch(db, &scratch);
ch_error_t err = ch_scan(nullptr, "data", 4, 0, scratch,
dummyHandler, nullptr, nullptr);
ASSERT_NE(CH_SUCCESS, err);
EXPECT_NE(CH_SCAN_TERMINATED, err);
// teardown
err = ch_free_scratch(scratch);
ASSERT_EQ(CH_SUCCESS, err);
ch_free_database(db);
}
// ch_scan: Call with a database with broken magic
TEST(HybridArgChecks, ScanBlockBrokenDatabaseMagic) {
ch_database_t *db = nullptr;
makeDatabase(&db);
ch_scratch_t *scratch = nullptr;
makeScratch(db, &scratch);
// break the database here, after scratch alloc
breakDatabaseMagic(db);
ch_error_t err = ch_scan(db, "data", 4, 0, scratch,
dummyHandler, nullptr, nullptr);
ASSERT_EQ(CH_INVALID, err);
// teardown
err = ch_free_scratch(scratch);
ASSERT_EQ(CH_SUCCESS, err);
free(db);
}
// ch_scan: Call with a database with broken version
TEST(HybridArgChecks, ScanBlockBrokenDatabaseVersion) {
ch_database_t *db = nullptr;
makeDatabase(&db);
ch_scratch_t *scratch = nullptr;
makeScratch(db, &scratch);
// break the database here, after scratch alloc
breakDatabaseVersion(db);
ch_error_t err = ch_scan(db, "data", 4, 0, scratch,
dummyHandler, nullptr, nullptr);
ASSERT_EQ(CH_DB_VERSION_ERROR, err);
// teardown
err = ch_free_scratch(scratch);
ASSERT_EQ(CH_SUCCESS, err);
ch_free_database(db);
}
// ch_scan: Call with no data
TEST(HybridArgChecks, ScanBlockNoData) {
ch_database_t *db = nullptr;
makeDatabase(&db);
ch_scratch_t *scratch = nullptr;
makeScratch(db, &scratch);
ch_error_t err = ch_scan(db, nullptr, 4, 0, scratch, dummyHandler,
nullptr, nullptr);
ASSERT_NE(CH_SUCCESS, err);
EXPECT_NE(CH_SCAN_TERMINATED, err);
// teardown
err = ch_free_scratch(scratch);
ASSERT_EQ(CH_SUCCESS, err);
ch_free_database(db);
}
// ch_scan: Call with no scratch
TEST(HybridArgChecks, ScanBlockNoScratch) {
ch_database_t *db = nullptr;
makeDatabase(&db);
ch_error_t err = ch_scan(db, "data", 4, 0, nullptr, dummyHandler,
nullptr, nullptr);
ASSERT_NE(CH_SUCCESS, err);
EXPECT_NE(CH_SCAN_TERMINATED, err);
// teardown
ch_free_database(db);
}
// ch_scan: Call with no event handler
TEST(HybridArgChecks, ScanBlockNoHandler) {
ch_database_t *db = nullptr;
makeDatabase(&db);
ch_scratch_t *scratch = nullptr;
makeScratch(db, &scratch);
ch_error_t err = ch_scan(db, "data", 4, 0, scratch, nullptr, nullptr,
nullptr);
ASSERT_EQ(CH_SUCCESS, err);
EXPECT_NE(CH_SCAN_TERMINATED, err);
// teardown
err = ch_free_scratch(scratch);
ASSERT_EQ(CH_SUCCESS, err);
ch_free_database(db);
}
// ch_alloc_scratch: Call with no database
TEST(HybridArgChecks, AllocScratchNoDatabase) {
ch_scratch_t *scratch = nullptr;
ch_error_t err = ch_alloc_scratch(nullptr, &scratch);
EXPECT_NE(CH_SUCCESS, err);
EXPECT_TRUE(scratch == nullptr);
}
// ch_alloc_scratch: Call with nullptr ptr-to-scratch
TEST(HybridArgChecks, AllocScratchNullScratchPtr) {
ch_database_t *db = nullptr;
makeDatabase(&db);
ch_error_t err = ch_alloc_scratch(db, nullptr);
ASSERT_EQ(CH_INVALID, err);
// teardown
ch_free_database(db);
}
// ch_alloc_scratch: Call with bogus scratch
TEST(HybridArgChecks, AllocScratchBogusScratch) {
ch_database_t *db = nullptr;
makeDatabase(&db);
ch_scratch_t *blah = (ch_scratch_t *)malloc(100);
memset(blah, 0xf0, 100);
ch_error_t err = ch_alloc_scratch(db, &blah);
ASSERT_EQ(CH_INVALID, err);
// teardown
free(blah);
ch_free_database(db);
}
// ch_alloc_scratch: Call with broken database magic
TEST(HybridArgChecks, AllocScratchBadDatabaseMagic) {
ch_database_t *db = nullptr;
makeDatabase(&db);
breakDatabaseMagic(db);
ch_scratch_t *scratch = nullptr;
ch_error_t err = ch_alloc_scratch(db, &scratch);
ASSERT_EQ(CH_INVALID, err);
// teardown
free(db);
}
// ch_alloc_scratch: Call with broken database version
TEST(HybridArgChecks, AllocScratchBadDatabaseVersion) {
ch_database_t *db = nullptr;
makeDatabase(&db);
breakDatabaseVersion(db);
ch_scratch_t *scratch = nullptr;
ch_error_t err = ch_alloc_scratch(db, &scratch);
ASSERT_EQ(CH_DB_VERSION_ERROR, err);
// teardown
ch_free_database(db);
}
// ch_clone_scratch: Call with no source scratch
TEST(HybridArgChecks, CloneScratchNoSource) {
ch_scratch_t *scratch = nullptr, *scratch2 = nullptr;
ch_error_t err = ch_clone_scratch(scratch, &scratch2);
EXPECT_NE(CH_SUCCESS, err);
EXPECT_TRUE(scratch2 == nullptr);
}
// ch_database_size: Call with no database
TEST(HybridArgChecks, DatabaseSizeNoDatabase) {
size_t sz = 0;
ch_error_t err = ch_database_size(0, &sz);
ASSERT_EQ(CH_INVALID, err);
ASSERT_EQ(0U, sz);
}
// ch_clone_scratch: bad scratch arg
TEST(HybridArgChecks, CloneBadScratch) {
// Try cloning the scratch
void *local_garbage = malloc(sizeof(garbage));
memcpy(local_garbage, garbage, sizeof(garbage));
ch_scratch_t *cloned = nullptr;
ch_scratch_t *scratch = (ch_scratch_t *)local_garbage;
ch_error_t err = ch_clone_scratch(scratch, &cloned);
free(local_garbage);
ASSERT_EQ(CH_INVALID, err);
}
// ch_scan: bad scratch arg
TEST(HybridArgChecks, ScanBadScratch) {
ch_database_t *db = nullptr;
makeDatabase(&db);
void *local_garbage = malloc(sizeof(garbage));
memcpy(local_garbage, garbage, sizeof(garbage));
ch_scratch_t *scratch = (ch_scratch_t *)local_garbage;
ch_error_t err = ch_scan(db, "data", 4, 0, scratch,
dummyHandler, nullptr, nullptr);
free(local_garbage);
ASSERT_EQ(CH_INVALID, err);
// teardown
ch_free_database(db);
}
TEST(HybridArgChecks, ch_free_database_null) {
ch_error_t err = ch_free_database(nullptr);
ASSERT_EQ(CH_SUCCESS, err);
}
TEST(HybridArgChecks, ch_free_database_garbage) {
ch_error_t err = ch_free_database((ch_database_t *)garbage);
ASSERT_EQ(CH_INVALID, err);
}
TEST(HybridArgChecks, ch_free_scratch_null) {
ch_error_t err = ch_free_scratch(nullptr);
ASSERT_EQ(CH_SUCCESS, err);
}
TEST(HybridArgChecks, ch_free_scratch_garbage) {
ch_error_t err = ch_free_scratch((ch_scratch_t *)garbage);
ASSERT_EQ(CH_INVALID, err);
}
TEST(HybridArgChecks, ch_free_compile_error_null) {
ch_error_t err = ch_free_compile_error(nullptr);
ASSERT_EQ(CH_SUCCESS, err);
}
} // namespace

View File

@@ -0,0 +1,95 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "gtest/gtest.h"
#include "chimera/ch.h"
using namespace testing;
class HybridCompile : public TestWithParam<const char *> {
// empty
};
TEST_P(HybridCompile, BadPattern) {
ch_error_t err;
ch_compile_error_t *compile_err = nullptr;
const char *pattern = GetParam();
ch_database_t *db = nullptr;
err = ch_compile_multi(&pattern, nullptr, nullptr, 1, 0, nullptr, &db,
&compile_err);
ASSERT_NE(CH_SUCCESS, err) << "Compile should have failed for expr: "
<< pattern;
ASSERT_TRUE(db == nullptr);
ASSERT_TRUE(compile_err != nullptr);
ch_free_compile_error(compile_err);
}
static
const char * BAD_PATTERNS[] = {
// unmatched parens
"(foo",
"foo)",
"((foo)",
"(foo))",
// nothing to repeat
"a+++",
"a+?+",
"a???",
"a??+",
"?qa",
"*abc",
"+abc",
// repeating boundaries is not allowed (UE-1007)
"^?0",
"^*0",
"^+0",
"^{1,3}0",
"0$?",
"0$*",
"0$+",
"0${1,3}",
// char classes
"[]",
"[]foobar",
"[`-\\80",
// bad named classes
"[[:foo:]]",
"[[:1234:]]",
"[[:f\\oo:]]",
"[[: :]]",
"[[:...:]]",
"[[:l\\ower:]]",
"[[:abc\\:]]",
"[abc[:x\\]pqr:]]",
"[[:a\\dz:]]",
"foobar\\", // trailing unescaped backslash
};
INSTANTIATE_TEST_CASE_P(Compile, HybridCompile, ValuesIn(BAD_PATTERNS));

56
unit/chimera/compat.cpp Normal file
View File

@@ -0,0 +1,56 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "gtest/gtest.h"
#include "chimera/ch.h"
#include "hs.h"
// We currently depend on our common (meaning) hash defines having the same
// values.
TEST(HybridCompat, Defines) {
// flags
EXPECT_EQ(HS_FLAG_CASELESS, CH_FLAG_CASELESS);
EXPECT_EQ(HS_FLAG_DOTALL, CH_FLAG_DOTALL);
EXPECT_EQ(HS_FLAG_MULTILINE, CH_FLAG_MULTILINE);
EXPECT_EQ(HS_FLAG_SINGLEMATCH, CH_FLAG_SINGLEMATCH);
EXPECT_EQ(HS_FLAG_UTF8, CH_FLAG_UTF8);
EXPECT_EQ(HS_FLAG_UCP, CH_FLAG_UCP);
// errors
EXPECT_EQ(HS_SUCCESS, CH_SUCCESS);
EXPECT_EQ(HS_INVALID, CH_INVALID);
EXPECT_EQ(HS_NOMEM, CH_NOMEM);
EXPECT_EQ(HS_SCAN_TERMINATED, CH_SCAN_TERMINATED);
EXPECT_EQ(HS_COMPILER_ERROR, CH_COMPILER_ERROR);
EXPECT_EQ(HS_DB_VERSION_ERROR, CH_DB_VERSION_ERROR);
EXPECT_EQ(HS_DB_PLATFORM_ERROR, CH_DB_PLATFORM_ERROR);
EXPECT_EQ(HS_DB_MODE_ERROR, CH_DB_MODE_ERROR);
EXPECT_EQ(HS_BAD_ALIGN, CH_BAD_ALIGN);
EXPECT_EQ(HS_BAD_ALLOC, CH_BAD_ALLOC);
EXPECT_EQ(HS_SCRATCH_IN_USE, CH_SCRATCH_IN_USE);
}

35
unit/chimera/main.cpp Normal file
View File

@@ -0,0 +1,35 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "gtest/gtest.h"
// Driver: run all the tests (defined in other source files in this directory)
int main(int argc, char **argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

551
unit/chimera/scan.cpp Normal file
View File

@@ -0,0 +1,551 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <vector>
#include <tuple>
#include "gtest/gtest.h"
#include "chimera/ch.h"
using namespace std;
using namespace testing;
namespace {
class HybridScanParams {
public:
HybridScanParams() {}
HybridScanParams(const char *s, unsigned int f)
: patterns(1, s), flags(1, f) {}
void add(const char *pattern, unsigned int myflags) {
patterns.push_back(pattern);
flags.push_back(myflags);
}
size_t size() const {
return patterns.size();
}
const char * const * getPatterns() const {
return &patterns[0];
}
const unsigned int * getFlags() const {
return &flags[0];
}
private:
vector<const char *> patterns;
vector<unsigned int> flags;
};
static
vector<HybridScanParams> paramFactory() {
vector<HybridScanParams> hsp;
// Some simple single-pattern cases.
hsp.push_back(HybridScanParams(".", CH_FLAG_DOTALL));
hsp.push_back(HybridScanParams("foobar", 0));
hsp.push_back(HybridScanParams("foo.*bar", 0));
hsp.push_back(HybridScanParams("fred.*bill", CH_FLAG_DOTALL));
hsp.push_back(HybridScanParams(".*", 0)); // vacuosity!
hsp.push_back(HybridScanParams("\\A(.?.{7,27}jf[tmqq]l(f|t|hgmr.+.fg|abks)){3,7}", 0));
hsp.push_back(HybridScanParams("^begin", CH_FLAG_MULTILINE));
hsp.push_back(HybridScanParams("match", CH_FLAG_SINGLEMATCH));
// Single-pattern cases where the pattern isn't supported by hyperscan but
// can be prefiltered.
hsp.push_back(HybridScanParams("foo(?!bar)", 0));
hsp.push_back(HybridScanParams("(sens|respons)e and \\1ibility", 0));
// A case that can't be prefiltered (as of this writing) because it's too
// gosh-darned big. This tests that the hybrid matcher can run without the
// multi-matcher (or with a "fake" one).
hsp.push_back(HybridScanParams("((c(p|p)h{2,}bh.|p|((((cq|j|c|(\\b)|.[^nbgn]|(\\B)[qfh]a)){10,12}|ih|a|mnde[pa].|.g)){5,8})){3}", 0));
// Simple multi-pattern literal case.
hsp.push_back(HybridScanParams());
hsp.back().add("hatstand", 0);
hsp.back().add("teakettle", 0);
hsp.back().add("badgerbrush", 0);
hsp.back().add("mnemosyne", 0);
// More complex multi-pattern case.
hsp.push_back(HybridScanParams());
hsp.back().add("foo.{3,7}bar", 0);
hsp.back().add("foo.{30,70}bar", 0);
hsp.back().add("foobar.*foobar", 0);
hsp.back().add("^blingwrapper.*foo", 0);
hsp.back().add("[0-9a-f]{70,}\\n", 0);
// A couple of trivial Unicode patterns, mostly to make sure we accept
// the flags.
hsp.push_back(HybridScanParams());
hsp.back().add("foo.*bar", CH_FLAG_UTF8);
hsp.back().add("today", CH_FLAG_UTF8|CH_FLAG_UCP);
// PCRE exotica.
hsp.push_back(HybridScanParams());
hsp.back().add("benign literal", 0);
hsp.back().add("(?|(abc)|(def))\\1", 0);
hsp.back().add("(?|(abc)|(def))(?1)", 0);
hsp.back().add("(sens|respons)e and \\1ibility", 0);
hsp.back().add("\\w+(?=;)", 0);
hsp.back().add("foo(?!bar)", 0);
hsp.back().add("(?<=bullock|donkey)", 0);
return hsp;
}
// Dummy callback.
static
ch_callback_t dummyHandler(unsigned, unsigned long long, unsigned long long,
unsigned, unsigned,const ch_capture_t *, void *) {
// empty
return CH_CALLBACK_CONTINUE;
}
static
void checkGroups(unsigned int num, const ch_capture_t *captured) {
// We should have _some_ group info.
ASSERT_LT(0U, num);
ASSERT_TRUE(captured != nullptr);
// Group 0 is always active.
ASSERT_TRUE(captured[0].flags & CH_CAPTURE_FLAG_ACTIVE);
// Sanity-checking.
for (unsigned int i = 0; i < num; i++) {
if (!(captured[i].flags & CH_CAPTURE_FLAG_ACTIVE)) {
continue;
}
ASSERT_LE(captured[i].from, captured[i].to) << "Group " << i
<< "not sane.";
}
}
// Dummy callback that checks that we had some groups set.
static
ch_callback_t dummyGroupHandler(unsigned, unsigned long long,
unsigned long long, unsigned, unsigned num,
const ch_capture_t *captured, void *) {
checkGroups(num, captured);
return CH_CALLBACK_CONTINUE;
}
class HybridScan : public TestWithParam<tuple<HybridScanParams, bool>> {
protected:
virtual void SetUp() {
ch_error_t err;
ch_compile_error_t *compile_err = nullptr;
const HybridScanParams &hsp = get<0>(GetParam());
groups = get<1>(GetParam());
err = ch_compile_ext_multi(hsp.getPatterns(), hsp.getFlags(), nullptr,
hsp.size(), groups ? CH_MODE_GROUPS :
CH_MODE_NOGROUPS, 10000000, 8000,
nullptr, &db, &compile_err);
ASSERT_EQ(err, CH_SUCCESS);
ASSERT_TRUE(db != nullptr);
err = ch_alloc_scratch(db, &scratch);
ASSERT_EQ(err, CH_SUCCESS);
ASSERT_TRUE(scratch != nullptr);
}
virtual void TearDown() {
ch_free_database(db);
ch_free_scratch(scratch);
}
ch_database_t *db = nullptr;
ch_scratch_t *scratch = nullptr;
bool groups;
};
static const string SCAN_DATA(
"Beware the Jabberwock, my son!\n"
"The jaws that bite, the claws that catch!\n"
"Beware the Jubjub bird, and shun\n"
"The frumious Bandersnatch!\n");
TEST_P(HybridScan, BuildAndScan) {
ASSERT_TRUE(db != nullptr);
size_t sz;
ch_error_t err = ch_database_size(db, &sz);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_LT(16U, sz);
ch_match_event_handler cb = groups ? dummyGroupHandler : dummyHandler;
err = ch_scan(db, SCAN_DATA.c_str(), SCAN_DATA.length(), 0,
scratch, cb, nullptr, nullptr);
ASSERT_EQ(CH_SUCCESS, err);
}
TEST_P(HybridScan, ScanNearly4KData) {
ASSERT_TRUE(db != nullptr);
string data(4000, '*'); // it's full of stars!
// Insert some strings that will match a few patterns.
data.insert(278, "foo");
data.insert(285, "bar");
data.insert(1178, "foobar");
data.insert(1894, "bar");
data.insert(3000, "foobar");
ch_match_event_handler cb = groups ? dummyGroupHandler : dummyHandler;
ch_error_t err = ch_scan(db, data.c_str(), data.length(), 0,
scratch, cb, nullptr, nullptr);
ASSERT_EQ(CH_SUCCESS, err);
}
TEST_P(HybridScan, ScanBigData) {
ASSERT_TRUE(db != nullptr);
// More than 4MB, as that pushes us into using PCRE for non-Pawn cases.
string data(5*1024*1024, '*'); // it's full of stars!
// Insert some strings that will match a few patterns.
data.insert(278, "foo");
data.insert(285, "bar");
data.insert(1178, "foobar");
data.insert(1894, "bar");
data.insert(3000, "foobar");
ch_match_event_handler cb = groups ? dummyGroupHandler : dummyHandler;
ch_error_t err = ch_scan(db, data.c_str(), data.length(), 0,
scratch, cb, nullptr, nullptr);
ASSERT_EQ(CH_SUCCESS, err);
}
TEST_P(HybridScan, ScanClonedScratch) {
ASSERT_TRUE(db != nullptr);
ch_error_t err;
ch_scratch_t *clonedScratch = nullptr;
err = ch_clone_scratch(scratch, &clonedScratch);
ASSERT_EQ(CH_SUCCESS, err);
ch_match_event_handler cb = groups ? dummyGroupHandler : dummyHandler;
err = ch_scan(db, SCAN_DATA.c_str(), SCAN_DATA.length(), 0,
clonedScratch, cb, nullptr, nullptr);
ASSERT_EQ(CH_SUCCESS, err);
ch_free_scratch(clonedScratch);
}
TEST_P(HybridScan, DatabaseInfo) {
ASSERT_TRUE(db != nullptr);
char *info = nullptr;
ch_error_t err = ch_database_info(db, &info);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_TRUE(info != nullptr);
const string strinfo(info);
const string prefix("Chimera ");
ASSERT_GE(strinfo.size(), prefix.size());
ASSERT_EQ(prefix, strinfo.substr(0, prefix.size()));
free(info);
}
TEST_P(HybridScan, NonZeroScratchSize) {
ASSERT_TRUE(db != nullptr);
size_t curr_size;
ch_error_t err = ch_scratch_size(scratch, &curr_size);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_LT(0, curr_size);
}
INSTANTIATE_TEST_CASE_P(Scan, HybridScan,
Combine(ValuesIn(paramFactory()), Bool()));
// Counting callback that returns CH_CALLBACK_CONTINUE.
static
ch_callback_t countHandler(unsigned, unsigned long long, unsigned long long,
unsigned, unsigned, const ch_capture_t *,
void *ctx) {
unsigned int *count = (unsigned int *)ctx;
++(*count);
return CH_CALLBACK_CONTINUE;
}
// Counting callback that returns CH_CALLBACK_SKIP_PATTERN.
static
ch_callback_t skipHandler(unsigned, unsigned long long, unsigned long long,
unsigned, unsigned, const ch_capture_t *,
void *ctx) {
unsigned int *count = (unsigned int *)ctx;
++(*count);
return CH_CALLBACK_SKIP_PATTERN;
}
// Counting callback that returns CH_CALLBACK_TERMINATE.
static
ch_callback_t terminateHandler(unsigned, unsigned long long, unsigned long long,
unsigned, unsigned, const ch_capture_t *,
void *ctx) {
unsigned int *count = (unsigned int *)ctx;
++(*count);
return CH_CALLBACK_TERMINATE;
}
static
void makeDatabase(ch_database_t **db, const char * const expr[], size_t num) {
*db = nullptr;
ch_compile_error_t *compile_err = nullptr;
ch_error_t err = ch_compile_ext_multi(expr, nullptr, nullptr, num, 0,
10000000, 8000, nullptr, db,
&compile_err);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_TRUE(*db != nullptr);
}
struct RescanContext {
RescanContext(const ch_database_t *db_in, ch_scratch_t *scratch_in)
: db(db_in), scratch(scratch_in) {}
const ch_database_t *db;
ch_scratch_t *scratch;
size_t matches = 0;
};
static
int rescan_block_cb(unsigned, unsigned long long, unsigned long long, unsigned,
unsigned, const ch_capture_t *, void *ctx) {
RescanContext *rctx = (RescanContext *)ctx;
rctx->matches++;
const string data = "___foo___bar_";
hs_error_t err = ch_scan(rctx->db, data.c_str(), data.length(), 0,
rctx->scratch, nullptr, nullptr, nullptr);
EXPECT_EQ(CH_SCRATCH_IN_USE, err);
return 0;
}
TEST(Scan, ScratchInUse) {
static const char * const expr[] = { "foo.*bar" };
ch_database_t *db = nullptr;
makeDatabase(&db, expr, 1);
ch_scratch_t *scratch = nullptr;
ch_error_t err = ch_alloc_scratch(db, &scratch);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_TRUE(scratch != nullptr);
RescanContext rc(db, scratch);
const string data("___foo___bar_");
err = ch_scan(db, data.c_str(), data.length(), 0,
scratch, rescan_block_cb, 0, &rc);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_EQ(1U, rc.matches);
ch_free_scratch(scratch);
ch_free_database(db);
}
TEST(Scan, CallbackSkip1) {
static const char * const expr[] = { "." };
ch_database_t *db = nullptr;
makeDatabase(&db, expr, 1);
ch_scratch_t *scratch = nullptr;
ch_error_t err = ch_alloc_scratch(db, &scratch);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_TRUE(scratch != nullptr);
unsigned int count = 0;
const string data("qwertyuiop");
err = ch_scan(db, data.c_str(), data.length(), 0,
scratch, skipHandler, 0, &count);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_EQ(1U, count);
ch_free_scratch(scratch);
ch_free_database(db);
}
TEST(Scan, CallbackSkip2) {
static const char * const expr[] = { "[a-z]+", "[0-9]" };
ch_database_t *db = nullptr;
makeDatabase(&db, expr, 2);
ch_scratch_t *scratch = nullptr;
ch_error_t err = ch_alloc_scratch(db, &scratch);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_TRUE(scratch != nullptr);
unsigned int count = 0;
const string data("foo 0123 0 bar 39483 n34jfhlqekrcoi3q4");
err = ch_scan(db, data.c_str(), data.length(), 0,
scratch, skipHandler, 0, &count);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_EQ(2U, count); // both patterns should match once
ch_free_scratch(scratch);
ch_free_database(db);
}
// This case includes a pattern that we use libpcre for.
TEST(Scan, CallbackSkip3) {
static const char * const expr[] = { "[a-z]+", "foo(?!bar)" };
ch_database_t *db = nullptr;
makeDatabase(&db, expr, 2);
ch_scratch_t *scratch = nullptr;
ch_error_t err = ch_alloc_scratch(db, &scratch);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_TRUE(scratch != nullptr);
unsigned int count = 0;
const string data("foobaz foobing foobar");
err = ch_scan(db, data.c_str(), data.length(), 0,
scratch, skipHandler, 0, &count);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_EQ(2U, count); // both patterns should match once
ch_free_scratch(scratch);
ch_free_database(db);
}
TEST(Scan, CallbackNoSkip1) {
static const char * const expr[] = { "foo|bar", "[0-9]{3}" };
ch_database_t *db = nullptr;
makeDatabase(&db, expr, 2);
ch_scratch_t *scratch = nullptr;
ch_error_t err = ch_alloc_scratch(db, &scratch);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_TRUE(scratch != nullptr);
unsigned int count = 0;
const string data("foo 012 bar 345 foobar 678");
err = ch_scan(db, data.c_str(), data.length(), 0,
scratch, countHandler, 0, &count);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_EQ(7U, count); // seven matches in total
ch_free_scratch(scratch);
ch_free_database(db);
}
TEST(Scan, CallbackNoSkip2) {
static const char * const expr[] = { "foo(?!bar)", "[0-9]{3}" };
ch_database_t *db = nullptr;
makeDatabase(&db, expr, 2);
ch_scratch_t *scratch = nullptr;
ch_error_t err = ch_alloc_scratch(db, &scratch);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_TRUE(scratch != nullptr);
unsigned int count = 0;
const string data("foo 012 bar 345 foobar 678");
err = ch_scan(db, data.c_str(), data.length(), 0,
scratch, countHandler, 0, &count);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_EQ(4U, count); // four matches in total
ch_free_scratch(scratch);
ch_free_database(db);
}
TEST(Scan, CallbackTerm1) {
static const char * const expr[] = { "." };
ch_database_t *db = nullptr;
makeDatabase(&db, expr, 1);
ch_scratch_t *scratch = nullptr;
ch_error_t err = ch_alloc_scratch(db, &scratch);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_TRUE(scratch != nullptr);
unsigned int count = 0;
const string data("qwertyuiop");
err = ch_scan(db, data.c_str(), data.length(), 0,
scratch, terminateHandler, 0, &count);
ASSERT_EQ(CH_SCAN_TERMINATED, err);
ASSERT_EQ(1U, count);
ch_free_scratch(scratch);
ch_free_database(db);
}
TEST(Scan, CallbackTerm2) {
static const char * const expr[] = { "[a-z]+", "[0-9]" };
ch_database_t *db = nullptr;
makeDatabase(&db, expr, 2);
ch_scratch_t *scratch = nullptr;
ch_error_t err = ch_alloc_scratch(db, &scratch);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_TRUE(scratch != 0);
unsigned int count = 0;
const string data("foo 0123 0 bar 39483 n34jfhlqekrcoi3q4");
err = ch_scan(db, data.c_str(), data.length(), 0,
scratch, terminateHandler, 0, &count);
ASSERT_EQ(CH_SCAN_TERMINATED, err);
ASSERT_EQ(1U, count);
ch_free_scratch(scratch);
ch_free_database(db);
}
// This case includes a pattern that we use libpcre for.
TEST(Scan, CallbackTerm3) {
static const char * const expr[] = { "[a-z]+", "foo(?!bar)" };
ch_database_t *db = nullptr;
makeDatabase(&db, expr, 2);
ch_scratch_t *scratch = nullptr;
ch_error_t err = ch_alloc_scratch(db, &scratch);
ASSERT_EQ(CH_SUCCESS, err);
ASSERT_TRUE(scratch != nullptr);
unsigned int count = 0;
const string data("foobaz foobing foobar");
err = ch_scan(db, data.c_str(), data.length(), 0,
scratch, terminateHandler, 0, &count);
ASSERT_EQ(CH_SCAN_TERMINATED, err);
ASSERT_EQ(1U, count);
ch_free_scratch(scratch);
ch_free_database(db);
}
} // namespace