mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
chimera: hybrid of Hyperscan and PCRE
This commit is contained in:
parent
8a1c497f44
commit
bf87f8c003
@ -70,6 +70,16 @@ include_directories(SYSTEM include)
|
||||
|
||||
include (${CMAKE_MODULE_PATH}/boost.cmake)
|
||||
|
||||
# PCRE check, we have a fixed requirement for PCRE to use Chimera
|
||||
# and hscollider
|
||||
set(PCRE_REQUIRED_MAJOR_VERSION 8)
|
||||
set(PCRE_REQUIRED_MINOR_VERSION 41)
|
||||
set(PCRE_REQUIRED_VERSION ${PCRE_REQUIRED_MAJOR_VERSION}.${PCRE_REQUIRED_MINOR_VERSION})
|
||||
include (${CMAKE_MODULE_PATH}/pcre.cmake)
|
||||
if (NOT CORRECT_PCRE_VERSION)
|
||||
message(STATUS "PCRE ${PCRE_REQUIRED_VERSION} not found")
|
||||
endif()
|
||||
|
||||
# -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6)
|
||||
find_package(PythonInterp)
|
||||
find_program(RAGEL ragel)
|
||||
@ -154,7 +164,7 @@ if(MSVC OR MSVC_IDE)
|
||||
# todo: change these as required
|
||||
set(ARCH_C_FLAGS "/arch:AVX2")
|
||||
set(ARCH_CXX_FLAGS "/arch:AVX2")
|
||||
set(MSVC_WARNS "/wd4101 /wd4146 /wd4172 /wd4200 /wd4244 /wd4267 /wd4307 /wd4334 /wd4805 -D_CRT_SECURE_NO_WARNINGS")
|
||||
set(MSVC_WARNS "/wd4101 /wd4146 /wd4172 /wd4200 /wd4244 /wd4267 /wd4307 /wd4334 /wd4805 /wd4996 -D_CRT_SECURE_NO_WARNINGS")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 ${MSVC_WARNS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 ${MSVC_WARNS} /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD")
|
||||
endif()
|
||||
@ -445,12 +455,20 @@ else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
|
||||
# we need static libs for Chimera - too much deep magic for shared libs
|
||||
if (CORRECT_PCRE_VERSION AND BUILD_STATIC_LIBS)
|
||||
set(BUILD_CHIMERA TRUE)
|
||||
endif()
|
||||
|
||||
add_subdirectory(util)
|
||||
add_subdirectory(unit)
|
||||
add_subdirectory(doc/dev-reference)
|
||||
if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt)
|
||||
add_subdirectory(tools)
|
||||
endif()
|
||||
if (EXISTS ${CMAKE_SOURCE_DIR}/chimera/CMakeLists.txt AND BUILD_CHIMERA)
|
||||
add_subdirectory(chimera)
|
||||
endif()
|
||||
|
||||
# do substitutions
|
||||
configure_file(${CMAKE_MODULE_PATH}/config.h.in ${PROJECT_BINARY_DIR}/config.h)
|
||||
|
32
chimera/CMakeLists.txt
Normal file
32
chimera/CMakeLists.txt
Normal file
@ -0,0 +1,32 @@
|
||||
# Chimera lib
|
||||
|
||||
include_directories(${PCRE_INCLUDE_DIRS})
|
||||
|
||||
# only set these after all tests are done
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
||||
|
||||
SET(chimera_HEADERS
|
||||
ch.h
|
||||
ch_common.h
|
||||
ch_compile.h
|
||||
ch_runtime.h
|
||||
)
|
||||
install(FILES ${chimera_HEADERS} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/hs")
|
||||
|
||||
SET(chimera_SRCS
|
||||
${chimera_HEADERS}
|
||||
ch_alloc.c
|
||||
ch_alloc.h
|
||||
ch_compile.cpp
|
||||
ch_database.c
|
||||
ch_database.h
|
||||
ch_internal.h
|
||||
ch_runtime.c
|
||||
ch_scratch.h
|
||||
ch_scratch.c
|
||||
)
|
||||
|
||||
add_library(chimera STATIC ${chimera_SRCS})
|
||||
add_dependencies(chimera hs pcre)
|
||||
target_link_libraries(chimera hs pcre)
|
45
chimera/ch.h
Normal file
45
chimera/ch.h
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CH_H_
|
||||
#define CH_H_
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief The complete Chimera API definition.
|
||||
*
|
||||
* Chimera is a hybrid solution of Hyperscan and PCRE.
|
||||
*
|
||||
* This header includes both the Chimera compiler and runtime components. See
|
||||
* the individual component headers for documentation.
|
||||
*/
|
||||
|
||||
#include "ch_compile.h"
|
||||
#include "ch_runtime.h"
|
||||
|
||||
#endif /* CH_H_ */
|
109
chimera/ch_alloc.c
Normal file
109
chimera/ch_alloc.c
Normal file
@ -0,0 +1,109 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Runtime functions for setting custom allocators.
|
||||
*/
|
||||
|
||||
#include "ch.h"
|
||||
#include "ch_common.h"
|
||||
#include "ch_internal.h"
|
||||
#include "hs.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#define default_malloc malloc
|
||||
#define default_free free
|
||||
|
||||
ch_alloc_t ch_database_alloc = default_malloc;
|
||||
ch_alloc_t ch_misc_alloc = default_malloc;
|
||||
ch_alloc_t ch_scratch_alloc = default_malloc;
|
||||
|
||||
ch_free_t ch_database_free = default_free;
|
||||
ch_free_t ch_misc_free = default_free;
|
||||
ch_free_t ch_scratch_free = default_free;
|
||||
|
||||
static
|
||||
ch_alloc_t normalise_alloc(ch_alloc_t a) {
|
||||
if (!a) {
|
||||
return default_malloc;
|
||||
} else {
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
ch_free_t normalise_free(ch_free_t f) {
|
||||
if (!f) {
|
||||
return default_free;
|
||||
} else {
|
||||
return f;
|
||||
}
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_set_allocator(ch_alloc_t allocfunc,
|
||||
ch_free_t freefunc) {
|
||||
ch_set_database_allocator(allocfunc, freefunc);
|
||||
ch_set_misc_allocator(allocfunc, freefunc);
|
||||
ch_set_scratch_allocator(allocfunc, freefunc);
|
||||
|
||||
// Set core Hyperscan alloc/free.
|
||||
hs_error_t ret = hs_set_allocator(allocfunc, freefunc);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_set_database_allocator(ch_alloc_t allocfunc,
|
||||
ch_free_t freefunc) {
|
||||
ch_database_alloc = normalise_alloc(allocfunc);
|
||||
ch_database_free = normalise_free(freefunc);
|
||||
|
||||
// Set Hyperscan database alloc/free.
|
||||
return hs_set_database_allocator(allocfunc, freefunc);
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_set_misc_allocator(ch_alloc_t allocfunc,
|
||||
ch_free_t freefunc) {
|
||||
ch_misc_alloc = normalise_alloc(allocfunc);
|
||||
ch_misc_free = normalise_free(freefunc);
|
||||
|
||||
// Set Hyperscan misc alloc/free.
|
||||
return hs_set_misc_allocator(allocfunc, freefunc);
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_set_scratch_allocator(ch_alloc_t allocfunc,
|
||||
ch_free_t freefunc) {
|
||||
ch_scratch_alloc = normalise_alloc(allocfunc);
|
||||
ch_scratch_free = normalise_free(freefunc);
|
||||
|
||||
// Set Hyperscan scratch alloc/free.
|
||||
return hs_set_scratch_allocator(allocfunc, freefunc);
|
||||
}
|
65
chimera/ch_alloc.h
Normal file
65
chimera/ch_alloc.h
Normal file
@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CH_ALLOC_H
|
||||
#define CH_ALLOC_H
|
||||
|
||||
#include "hs_common.h"
|
||||
#include "ue2common.h"
|
||||
#include "ch_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
extern hs_alloc_t ch_database_alloc;
|
||||
extern hs_alloc_t ch_misc_alloc;
|
||||
extern hs_alloc_t ch_scratch_alloc;
|
||||
|
||||
extern hs_free_t ch_database_free;
|
||||
extern hs_free_t ch_misc_free;
|
||||
extern hs_free_t ch_scratch_free;
|
||||
#ifdef __cplusplus
|
||||
} /* extern C */
|
||||
#endif
|
||||
/** \brief Check the results of an alloc done with hs_alloc for alignment.
|
||||
*
|
||||
* If we have incorrect alignment, return an error. Caller should free the
|
||||
* offending block. */
|
||||
static really_inline
|
||||
ch_error_t ch_check_alloc(const void *mem) {
|
||||
ch_error_t ret = CH_SUCCESS;
|
||||
if (!mem) {
|
||||
ret = CH_NOMEM;
|
||||
} else if (!ISALIGNED_N(mem, alignof(unsigned long long))) {
|
||||
ret = CH_BAD_ALLOC;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
360
chimera/ch_common.h
Normal file
360
chimera/ch_common.h
Normal file
@ -0,0 +1,360 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CH_COMMON_H_
|
||||
#define CH_COMMON_H_
|
||||
|
||||
#include "hs_common.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief The Chimera common API definition.
|
||||
*
|
||||
* Chimera is a hybrid of Hyperscan and PCRE.
|
||||
*
|
||||
* This header contains functions available to both the Chimera compiler and
|
||||
* runtime.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
struct ch_database;
|
||||
|
||||
/**
|
||||
* A Chimera pattern database.
|
||||
*
|
||||
* Generated by one of the Chimera compiler functions:
|
||||
* - @ref ch_compile()
|
||||
* - @ref ch_compile_multi()
|
||||
* - @ref ch_compile_ext_multi()
|
||||
*/
|
||||
typedef struct ch_database ch_database_t;
|
||||
|
||||
/**
|
||||
* A type for errors returned by Chimera functions.
|
||||
*/
|
||||
typedef int ch_error_t;
|
||||
|
||||
/**
|
||||
* Free a compiled pattern database.
|
||||
*
|
||||
* The free callback set by @ref ch_set_allocator()) will be used by this
|
||||
* function.
|
||||
*
|
||||
* @param db
|
||||
* A compiled pattern database. NULL may also be safely provided, in which
|
||||
* case the function does nothing.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_free_database(ch_database_t *db);
|
||||
|
||||
/**
|
||||
* Utility function for identifying this release version.
|
||||
*
|
||||
* @return
|
||||
* A string containing the version number of this release build and the
|
||||
* date of the build. It is allocated statically, so it does not need to
|
||||
* be freed by the caller.
|
||||
*/
|
||||
const char * HS_CDECL ch_version(void);
|
||||
|
||||
/**
|
||||
* Returns the size of the given database.
|
||||
*
|
||||
* @param database
|
||||
* Pointer to compiled expression database.
|
||||
*
|
||||
* @param database_size
|
||||
* On success, the size of the compiled database in bytes is placed in this
|
||||
* parameter.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_database_size(const ch_database_t *database,
|
||||
size_t *database_size);
|
||||
|
||||
/**
|
||||
* Utility function providing information about a database.
|
||||
*
|
||||
* @param database
|
||||
* Pointer to a compiled database.
|
||||
*
|
||||
* @param info
|
||||
* On success, a string containing the version and platform information for
|
||||
* the supplied database is placed in the parameter. The string is
|
||||
* allocated using the allocator supplied in @ref hs_set_allocator()
|
||||
* (or malloc() if no allocator was set) and should be freed by the caller.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_database_info(const ch_database_t *database,
|
||||
char **info);
|
||||
|
||||
/**
|
||||
* The type of the callback function that will be used by Chimera to allocate
|
||||
* more memory at runtime as required.
|
||||
*
|
||||
* If Chimera is to be used in a multi-threaded, or similarly concurrent
|
||||
* environment, the allocation function will need to be re-entrant, or
|
||||
* similarly safe for concurrent use.
|
||||
*
|
||||
* @param size
|
||||
* The number of bytes to allocate.
|
||||
* @return
|
||||
* A pointer to the region of memory allocated, or NULL on error.
|
||||
*/
|
||||
typedef void *(HS_CDECL *ch_alloc_t)(size_t size);
|
||||
|
||||
/**
|
||||
* The type of the callback function that will be used by Chimera to free
|
||||
* memory regions previously allocated using the @ref ch_alloc_t function.
|
||||
*
|
||||
* @param ptr
|
||||
* The region of memory to be freed.
|
||||
*/
|
||||
typedef void (HS_CDECL *ch_free_t)(void *ptr);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Chimera for allocating
|
||||
* memory at runtime for stream state, scratch space, database bytecode,
|
||||
* and various other data structure returned by the Chimera API.
|
||||
*
|
||||
* The function is equivalent to calling @ref ch_set_scratch_allocator(),
|
||||
* @ref ch_set_database_allocator() and
|
||||
* @ref ch_set_misc_allocator() with the provided parameters.
|
||||
*
|
||||
* This call will override any previous allocators that have been set.
|
||||
*
|
||||
* Note: there is no way to change the allocator used for temporary objects
|
||||
* created during the various compile calls (@ref ch_compile() and @ref
|
||||
* ch_compile_multi()).
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_set_allocator(ch_alloc_t alloc_func,
|
||||
ch_free_t free_func);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Chimera for allocating memory
|
||||
* for database bytecode produced by the compile calls (@ref ch_compile() and @ref
|
||||
* ch_compile_multi()).
|
||||
*
|
||||
* If no database allocation functions are set, or if NULL is used in place of
|
||||
* both parameters, then memory allocation will default to standard methods
|
||||
* (such as the system malloc() and free() calls).
|
||||
*
|
||||
* This call will override any previous database allocators that have been set.
|
||||
*
|
||||
* Note: the database allocator may also be set by calling @ref
|
||||
* ch_set_allocator().
|
||||
*
|
||||
* Note: there is no way to change how temporary objects created during the
|
||||
* various compile calls (@ref ch_compile() and @ref ch_compile_multi()) are
|
||||
* allocated.
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_set_database_allocator(ch_alloc_t alloc_func,
|
||||
ch_free_t free_func);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Chimera for allocating memory
|
||||
* for items returned by the Chimera API such as @ref ch_compile_error_t.
|
||||
*
|
||||
* If no misc allocation functions are set, or if NULL is used in place of both
|
||||
* parameters, then memory allocation will default to standard methods (such as
|
||||
* the system malloc() and free() calls).
|
||||
*
|
||||
* This call will override any previous misc allocators that have been set.
|
||||
*
|
||||
* Note: the misc allocator may also be set by calling @ref ch_set_allocator().
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_set_misc_allocator(ch_alloc_t alloc_func,
|
||||
ch_free_t free_func);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Chimera for allocating memory
|
||||
* for scratch space by @ref ch_alloc_scratch() and @ref ch_clone_scratch().
|
||||
*
|
||||
* If no scratch allocation functions are set, or if NULL is used in place of
|
||||
* both parameters, then memory allocation will default to standard methods
|
||||
* (such as the system malloc() and free() calls).
|
||||
*
|
||||
* This call will override any previous scratch allocators that have been set.
|
||||
*
|
||||
* Note: the scratch allocator may also be set by calling @ref
|
||||
* ch_set_allocator().
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_set_scratch_allocator(ch_alloc_t alloc_func,
|
||||
ch_free_t free_func);
|
||||
|
||||
/**
|
||||
* @defgroup CH_ERROR ch_error_t values
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* The engine completed normally.
|
||||
*/
|
||||
#define CH_SUCCESS 0
|
||||
|
||||
/**
|
||||
* A parameter passed to this function was invalid.
|
||||
*/
|
||||
#define CH_INVALID (-1)
|
||||
|
||||
/**
|
||||
* A memory allocation failed.
|
||||
*/
|
||||
#define CH_NOMEM (-2)
|
||||
|
||||
/**
|
||||
* The engine was terminated by callback.
|
||||
*
|
||||
* This return value indicates that the target buffer was partially scanned,
|
||||
* but that the callback function requested that scanning cease after a match
|
||||
* was located.
|
||||
*/
|
||||
#define CH_SCAN_TERMINATED (-3)
|
||||
|
||||
/**
|
||||
* The pattern compiler failed, and the @ref ch_compile_error_t should be
|
||||
* inspected for more detail.
|
||||
*/
|
||||
#define CH_COMPILER_ERROR (-4)
|
||||
|
||||
/**
|
||||
* The given database was built for a different version of the Chimera matcher.
|
||||
*/
|
||||
#define CH_DB_VERSION_ERROR (-5)
|
||||
|
||||
/**
|
||||
* The given database was built for a different platform (i.e., CPU type).
|
||||
*/
|
||||
#define CH_DB_PLATFORM_ERROR (-6)
|
||||
|
||||
/**
|
||||
* The given database was built for a different mode of operation. This error
|
||||
* is returned when streaming calls are used with a non-streaming database and
|
||||
* vice versa.
|
||||
*/
|
||||
#define CH_DB_MODE_ERROR (-7)
|
||||
|
||||
/**
|
||||
* A parameter passed to this function was not correctly aligned.
|
||||
*/
|
||||
#define CH_BAD_ALIGN (-8)
|
||||
|
||||
/**
|
||||
* The memory allocator did not correctly return memory suitably aligned for
|
||||
* the largest representable data type on this platform.
|
||||
*/
|
||||
#define CH_BAD_ALLOC (-9)
|
||||
|
||||
/**
|
||||
* The scratch region was already in use.
|
||||
*
|
||||
* This error is returned when Chimera is able to detect that the scratch
|
||||
* region given is already in use by another Chimera API call.
|
||||
*
|
||||
* A separate scratch region, allocated with @ref ch_alloc_scratch() or @ref
|
||||
* ch_clone_scratch(), is required for every concurrent caller of the Chimera
|
||||
* API.
|
||||
*
|
||||
* For example, this error might be returned when @ref ch_scan() has been
|
||||
* called inside a callback delivered by a currently-executing @ref ch_scan()
|
||||
* call using the same scratch region.
|
||||
*
|
||||
* Note: Not all concurrent uses of scratch regions may be detected. This error
|
||||
* is intended as a best-effort debugging tool, not a guarantee.
|
||||
*/
|
||||
#define CH_SCRATCH_IN_USE (-10)
|
||||
|
||||
/**
|
||||
* Returned when pcre_exec (called for some expressions internally from @ref
|
||||
* ch_scan) failed due to a fatal error.
|
||||
*/
|
||||
#define CH_FAIL_INTERNAL (-32)
|
||||
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* CH_COMMON_H_ */
|
878
chimera/ch_compile.cpp
Normal file
878
chimera/ch_compile.cpp
Normal file
@ -0,0 +1,878 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Compiler front-end, including public API calls for compilation.
|
||||
*/
|
||||
|
||||
#include "ch_compile.h"
|
||||
#include "ch_alloc.h"
|
||||
#include "ch_internal.h"
|
||||
#include "ch_database.h"
|
||||
#include "grey.h"
|
||||
#include "hs_common.h"
|
||||
#include "hs_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/multibit_build.h"
|
||||
#include "util/target_info.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <limits.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
#define PCRE_ERROR_MSG "Internal error building PCRE pattern."
|
||||
|
||||
using namespace std;
|
||||
using namespace ue2;
|
||||
|
||||
static const char failureNoMemory[] = "Unable to allocate memory.";
|
||||
static const char failureInternal[] = "Internal error.";
|
||||
static const char failureBadAlloc[] = "Allocator returned misaligned memory.";
|
||||
|
||||
static const ch_compile_error_t ch_enomem
|
||||
= { const_cast<char *>(failureNoMemory), 0 };
|
||||
static const ch_compile_error_t ch_einternal
|
||||
= { const_cast<char *>(failureInternal), 0 };
|
||||
static const ch_compile_error_t ch_badalloc
|
||||
= { const_cast<char *>(failureBadAlloc), 0 };
|
||||
|
||||
static
|
||||
ch_compile_error_t *generateChimeraCompileError(const string &err,
|
||||
int expression) {
|
||||
ch_compile_error_t *ret =
|
||||
(struct ch_compile_error *)ch_misc_alloc(sizeof(ch_compile_error_t));
|
||||
if (ret) {
|
||||
ch_error_t e = ch_check_alloc(ret);
|
||||
if (e != CH_SUCCESS) {
|
||||
ch_misc_free(ret);
|
||||
return const_cast<ch_compile_error_t *>(&ch_badalloc);
|
||||
}
|
||||
char *msg = (char *)ch_misc_alloc(err.size() + 1);
|
||||
if (msg) {
|
||||
e = ch_check_alloc(msg);
|
||||
if (e != HS_SUCCESS) {
|
||||
ch_misc_free(msg);
|
||||
return const_cast<ch_compile_error_t *>(&ch_badalloc);
|
||||
}
|
||||
memcpy(msg, err.c_str(), err.size() + 1);
|
||||
ret->message = msg;
|
||||
} else {
|
||||
ch_misc_free(ret);
|
||||
ret = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ret || !ret->message) {
|
||||
return const_cast<ch_compile_error_t *>(&ch_enomem);
|
||||
}
|
||||
|
||||
ret->expression = expression;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static
|
||||
void freeChimeraCompileError(ch_compile_error_t *error) {
|
||||
if (!error) {
|
||||
return;
|
||||
}
|
||||
if (error == &ch_enomem || error == &ch_einternal ||
|
||||
error == &ch_badalloc) {
|
||||
// These are not allocated.
|
||||
return;
|
||||
}
|
||||
|
||||
ch_misc_free(error->message);
|
||||
ch_misc_free(error);
|
||||
}
|
||||
|
||||
static
|
||||
bool checkMode(unsigned int mode, ch_compile_error_t **comp_error) {
|
||||
static const unsigned int supported = CH_MODE_GROUPS;
|
||||
|
||||
if (mode & ~supported) {
|
||||
*comp_error =
|
||||
generateChimeraCompileError("Invalid mode flag supplied.", -1);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/** \brief Throw a compile error if we're passed some unsupported flags. */
|
||||
static
|
||||
void checkFlags(const unsigned int flags) {
|
||||
static const unsigned int supported = HS_FLAG_DOTALL
|
||||
| HS_FLAG_MULTILINE
|
||||
| HS_FLAG_CASELESS
|
||||
| HS_FLAG_SINGLEMATCH
|
||||
| HS_FLAG_UCP
|
||||
| HS_FLAG_UTF8;
|
||||
|
||||
if (flags & ~supported) {
|
||||
throw CompileError("Unrecognized flag used.");
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool isHyperscanSupported(const char *expression, unsigned int flags,
|
||||
const hs_platform_info *platform) {
|
||||
hs_database_t *db = nullptr;
|
||||
hs_compile_error *comp_error = nullptr;
|
||||
|
||||
unsigned int id = 0;
|
||||
hs_error_t err = hs_compile_multi(&expression, &flags, &id,
|
||||
1, HS_MODE_BLOCK, platform, &db,
|
||||
&comp_error);
|
||||
if (err != HS_SUCCESS) {
|
||||
assert(!db);
|
||||
assert(comp_error);
|
||||
DEBUG_PRINTF("unsupported: %s\n", comp_error->message);
|
||||
hs_free_compile_error(comp_error);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(db);
|
||||
assert(!comp_error);
|
||||
hs_free_database(db);
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool writeHyperscanDatabase(char *ptr, hs_database_t *db) {
|
||||
// Note: we must use our serialization calls to re-home the database.
|
||||
char *serialized = nullptr;
|
||||
size_t slen = 0;
|
||||
hs_error_t err = hs_serialize_database(db, &serialized, &slen);
|
||||
if (err != HS_SUCCESS) {
|
||||
DEBUG_PRINTF("hs_serialize_database returned %d\n", err);
|
||||
assert(0);
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("writing database to ptr %p\n", ptr);
|
||||
|
||||
// deserialize_at without the platform tests.
|
||||
err = hs_deserialize_database_at(serialized, slen, (hs_database_t *)ptr);
|
||||
if (err != HS_SUCCESS) {
|
||||
DEBUG_PRINTF("hs_deserialize_database_at returned %d\n", err);
|
||||
assert(0);
|
||||
ch_misc_free(serialized);
|
||||
return false;
|
||||
}
|
||||
|
||||
ch_misc_free(serialized);
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool writeHyperscanDatabase(ch_bytecode *db, hs_database_t *hs_db) {
|
||||
db->databaseOffset = ROUNDUP_CL(sizeof(*db));
|
||||
char *ptr = (char *)db + db->databaseOffset;
|
||||
return writeHyperscanDatabase(ptr, hs_db);
|
||||
}
|
||||
|
||||
static
|
||||
int convertFlagsToPcreOptions(unsigned int flags) {
|
||||
int options = 0;
|
||||
if (flags & HS_FLAG_CASELESS) {
|
||||
options |= PCRE_CASELESS;
|
||||
}
|
||||
if (flags & HS_FLAG_DOTALL) {
|
||||
options |= PCRE_DOTALL;
|
||||
}
|
||||
if (flags & HS_FLAG_MULTILINE) {
|
||||
options |= PCRE_MULTILINE;
|
||||
}
|
||||
if (flags & HS_FLAG_UTF8) {
|
||||
options |= PCRE_UTF8;
|
||||
}
|
||||
if (flags & HS_FLAG_UCP) {
|
||||
options |= PCRE_UCP;
|
||||
}
|
||||
|
||||
// All other flags are meaningless to PCRE.
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
/** \brief Data about a single pattern. */
|
||||
struct PatternData : boost::noncopyable {
|
||||
PatternData(const char *pattern, u32 flags, u32 idx, u32 id_in,
|
||||
unsigned mode, unsigned long int match_limit,
|
||||
unsigned long int match_limit_recursion,
|
||||
const hs_platform_info *platform);
|
||||
~PatternData() {
|
||||
pcre_free(compiled);
|
||||
pcre_free(extra);
|
||||
}
|
||||
|
||||
void buildPcre(const char *pattern, u32 flags);
|
||||
|
||||
size_t patternSize() const;
|
||||
|
||||
void writePattern(ch_pattern *pattern) const;
|
||||
|
||||
pcre *compiled; //!< pcre_compile output
|
||||
pcre_extra *extra; //!< pcre_study output
|
||||
size_t compiled_size;
|
||||
int study_size;
|
||||
int capture_cnt;
|
||||
bool utf8;
|
||||
u32 id; //!< ID from the user
|
||||
u32 expr_index; //!< index in the expression array
|
||||
bool singlematch; //!< pattern is in highlander mode
|
||||
bool guard; //!< this pattern should be guarded by the multimatcher
|
||||
u32 minWidth; //!< min match width
|
||||
u32 maxWidth; //!< max match width
|
||||
u32 fixedWidth; //!< fixed pattern width
|
||||
unsigned long int matchLimit; //! pcre match limit
|
||||
unsigned long int matchLimitRecursion; //! pcre match_limit_recursion
|
||||
};
|
||||
|
||||
PatternData::PatternData(const char *pattern, u32 flags, u32 idx, u32 id_in,
|
||||
unsigned mode, unsigned long int match_limit,
|
||||
unsigned long int match_limit_recursion,
|
||||
const hs_platform_info *platform)
|
||||
: compiled(nullptr), extra(nullptr), id(id_in), expr_index(idx),
|
||||
singlematch(flags & HS_FLAG_SINGLEMATCH),
|
||||
guard(false), minWidth(0), maxWidth(UINT_MAX),
|
||||
fixedWidth(UINT_MAX), matchLimit(match_limit),
|
||||
matchLimitRecursion(match_limit_recursion) {
|
||||
assert(pattern);
|
||||
|
||||
flags |= HS_FLAG_ALLOWEMPTY; /* don't hand things off to pcre for no
|
||||
reason */
|
||||
|
||||
buildPcre(pattern, flags);
|
||||
|
||||
// Fetch the expression info for a prefiltering, non-singlematch version of
|
||||
// this pattern, if possible.
|
||||
hs_expr_info *info = nullptr;
|
||||
hs_compile_error_t *error = nullptr;
|
||||
u32 infoflags = (flags | HS_FLAG_PREFILTER) & ~HS_FLAG_SINGLEMATCH;
|
||||
u32 rawflags = (flags | HS_FLAG_SOM_LEFTMOST) & ~HS_FLAG_SINGLEMATCH;
|
||||
hs_error_t err = hs_expression_info(pattern, infoflags, &info, &error);
|
||||
if (err == HS_SUCCESS) {
|
||||
assert(info);
|
||||
hs_expr_info *i = (hs_expr_info *)info;
|
||||
minWidth = i->min_width;
|
||||
maxWidth = i->max_width;
|
||||
bool ordered = i->unordered_matches ? false : true;
|
||||
|
||||
// Only enable capturing if required
|
||||
u32 captureCnt = 0;
|
||||
if (mode & CH_MODE_GROUPS) {
|
||||
captureCnt = capture_cnt;
|
||||
}
|
||||
|
||||
// No need to confirm with PCRE if:
|
||||
// 1) pattern is fixed width
|
||||
// 2) pattern isn't vacuous as it can't combine with start of match
|
||||
// 3) no capturing in this pattern
|
||||
// 4) no offset adjust in this pattern as hyperscan match callback
|
||||
// will arrive without order, i.e. [^a]\z has offset adjust
|
||||
// 5) hyperscan compile succeeds without prefiltering
|
||||
if (minWidth == maxWidth && minWidth && maxWidth != UINT_MAX &&
|
||||
!captureCnt && ordered &&
|
||||
isHyperscanSupported(pattern, rawflags, platform)) {
|
||||
fixedWidth = maxWidth;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("gathered info: widths=[%u,%u]\n", minWidth, maxWidth);
|
||||
|
||||
ch_misc_free(info);
|
||||
|
||||
u32 guardflags;
|
||||
guardflags = (flags | HS_FLAG_PREFILTER) & ~HS_FLAG_SINGLEMATCH;
|
||||
guard = isHyperscanSupported(pattern, guardflags, platform);
|
||||
} else {
|
||||
// We can't even prefilter this pattern, so we're dependent on Big Dumb
|
||||
// Pcre Scans.
|
||||
DEBUG_PRINTF("hs_expression_info failed, falling back to pcre\n");
|
||||
hs_free_compile_error(error);
|
||||
}
|
||||
}
|
||||
|
||||
void PatternData::buildPcre(const char *pattern, u32 flags) {
|
||||
int options = convertFlagsToPcreOptions(flags);
|
||||
const char *errptr = nullptr;
|
||||
int erroffset = 0;
|
||||
|
||||
compiled = pcre_compile(pattern, options, &errptr, &erroffset, nullptr);
|
||||
if (!compiled) {
|
||||
DEBUG_PRINTF("PCRE failed to compile: %s\n", pattern);
|
||||
string err("PCRE compilation failed: ");
|
||||
err += string(errptr);
|
||||
err += ".";
|
||||
throw CompileError(expr_index, err);
|
||||
}
|
||||
|
||||
extra = pcre_study(compiled, PCRE_STUDY_JIT_COMPILE, &errptr);
|
||||
// Note that it's OK for pcre_study to return NULL if there's nothing
|
||||
// to be found, but a non-NULL error is always bad.
|
||||
if (errptr) {
|
||||
DEBUG_PRINTF("PCRE could not be studied: %s\n", errptr);
|
||||
string err("PCRE compilation failed: ");
|
||||
err += string(errptr);
|
||||
err += ".";
|
||||
throw CompileError(expr_index, err);
|
||||
}
|
||||
|
||||
if (pcre_fullinfo(compiled, extra, PCRE_INFO_SIZE, &compiled_size)) {
|
||||
throw CompileError(PCRE_ERROR_MSG);
|
||||
}
|
||||
|
||||
if (!extra) {
|
||||
study_size = 0;
|
||||
} else {
|
||||
if (pcre_fullinfo(compiled, extra, PCRE_INFO_STUDYSIZE, &study_size)) {
|
||||
throw CompileError(PCRE_ERROR_MSG);
|
||||
}
|
||||
}
|
||||
|
||||
if (pcre_fullinfo(compiled, extra, PCRE_INFO_CAPTURECOUNT, &capture_cnt)) {
|
||||
throw CompileError(PCRE_ERROR_MSG);
|
||||
}
|
||||
|
||||
/* We use the pcre rather than hs to get this information as we may need it
|
||||
* even in the pure unguarded pcre mode where there is no hs available. We
|
||||
* can not use the compile flags due to (*UTF8) verb */
|
||||
unsigned long int opts = 0; // PCRE_INFO_OPTIONS demands an unsigned long
|
||||
if (pcre_fullinfo(compiled, extra, PCRE_INFO_OPTIONS, &opts)) {
|
||||
throw CompileError(PCRE_ERROR_MSG);
|
||||
}
|
||||
utf8 = opts & PCRE_UTF8;
|
||||
}
|
||||
|
||||
size_t PatternData::patternSize() const {
|
||||
size_t len = 0;
|
||||
|
||||
// ch_pattern header.
|
||||
len += sizeof(ch_pattern);
|
||||
|
||||
len = ROUNDUP_N(len, 8);
|
||||
DEBUG_PRINTF("compiled pcre at %zu\n", len);
|
||||
len += compiled_size;
|
||||
|
||||
// PCRE study data, which may be zero.
|
||||
if (study_size) {
|
||||
len = ROUNDUP_N(len, 8);
|
||||
DEBUG_PRINTF("study at %zu\n", len);
|
||||
len += (size_t)study_size;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("pattern size %zu\n", len);
|
||||
return len;
|
||||
}
|
||||
|
||||
/** \brief Write out an ch_pattern structure, which should already be sized
|
||||
* correctly according to PatternData::patternSize. */
|
||||
void PatternData::writePattern(ch_pattern *pattern) const {
|
||||
assert(pattern);
|
||||
assert(ISALIGNED_CL(pattern));
|
||||
|
||||
pattern->id = id;
|
||||
|
||||
u32 flags = 0;
|
||||
if (singlematch) {
|
||||
flags |= CHIMERA_PATTERN_FLAG_SINGLEMATCH;
|
||||
}
|
||||
if (utf8) {
|
||||
flags |= CHIMERA_PATTERN_FLAG_UTF8;
|
||||
}
|
||||
|
||||
pattern->flags = flags;
|
||||
pattern->maxWidth = maxWidth;
|
||||
pattern->minWidth = minWidth == UINT_MAX ? 0 : minWidth;
|
||||
pattern->fixedWidth = fixedWidth;
|
||||
|
||||
// Compiled PCRE pattern.
|
||||
char *ptr = (char *)pattern;
|
||||
ptr += ROUNDUP_N(sizeof(*pattern), 8);
|
||||
DEBUG_PRINTF("compiled pcre at %zu\n", (size_t)(ptr - (char *)pattern));
|
||||
memcpy(ptr, compiled, compiled_size);
|
||||
ptr += compiled_size;
|
||||
|
||||
// PCRE match limits
|
||||
pattern->extra.flags = PCRE_EXTRA_MATCH_LIMIT |
|
||||
PCRE_EXTRA_MATCH_LIMIT_RECURSION;
|
||||
pattern->extra.match_limit = matchLimit ? matchLimit : 10000000;
|
||||
// Set to avoid segment fault
|
||||
pattern->extra.match_limit_recursion =
|
||||
matchLimitRecursion ? matchLimitRecursion : 1500;
|
||||
|
||||
// PCRE study_data.
|
||||
u32 studyOffset = 0;
|
||||
if (extra) {
|
||||
assert(extra->study_data);
|
||||
ptr = ROUNDUP_PTR(ptr, 8);
|
||||
DEBUG_PRINTF("study at %zu\n", (size_t)(ptr - (char *)pattern));
|
||||
memcpy(ptr, extra->study_data, study_size);
|
||||
studyOffset = (size_t)(ptr - (char *)pattern);
|
||||
|
||||
pattern->extra.flags |= PCRE_EXTRA_STUDY_DATA;
|
||||
pattern->extra.study_data = ptr;
|
||||
|
||||
ptr += study_size;
|
||||
} else {
|
||||
pattern->extra.flags &= ~PCRE_EXTRA_STUDY_DATA;
|
||||
}
|
||||
pattern->studyOffset = studyOffset;
|
||||
|
||||
size_t pcreLen = (ptr - (char *)pattern);
|
||||
assert(pcreLen <= patternSize());
|
||||
pattern->length = (u32)pcreLen;
|
||||
|
||||
// We shouldn't overrun the space we've allocated for this pattern.
|
||||
assert(patternSize() >= (size_t)(ptr - (char *)pattern));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace ch {
|
||||
|
||||
static
|
||||
void ch_compile_multi_int(const char *const *expressions, const unsigned *flags,
|
||||
const unsigned *ids, unsigned elements,
|
||||
unsigned mode, unsigned long int match_limit,
|
||||
unsigned long int match_limit_recursion,
|
||||
const hs_platform_info_t *platform,
|
||||
ch_database_t **out) {
|
||||
vector<unique_ptr<PatternData>> pcres;
|
||||
pcres.reserve(elements);
|
||||
vector<u32> unguarded; // indices of unguarded PCREs.
|
||||
vector<const char *> multiExpr;
|
||||
vector<unsigned int> multiFlags;
|
||||
vector<unsigned int> multiIds;
|
||||
bool allConfirm = true;
|
||||
bool allSingleMatch = true;
|
||||
for (unsigned int i = 0; i < elements; i++) {
|
||||
const char *myExpr = expressions[i];
|
||||
unsigned int myFlags = flags ? flags[i] : 0;
|
||||
unsigned int myId = ids ? ids[i] : 0;
|
||||
|
||||
checkFlags(myFlags);
|
||||
|
||||
// First, build with libpcre. A build failure from libpcre will throw
|
||||
// an exception up to the caller.
|
||||
auto patternData =
|
||||
ue2::make_unique<PatternData>(myExpr, myFlags, i, myId, mode, match_limit,
|
||||
match_limit_recursion, platform);
|
||||
pcres.push_back(move(patternData));
|
||||
PatternData &curr = *pcres.back();
|
||||
|
||||
if (!(myFlags & HS_FLAG_SINGLEMATCH)) {
|
||||
allSingleMatch = false;
|
||||
}
|
||||
|
||||
// in the multimatch, we always run in prefilter mode and accept vacuous
|
||||
// patterns.
|
||||
myFlags |=
|
||||
HS_FLAG_ALLOWEMPTY | HS_FLAG_PREFILTER;
|
||||
|
||||
if (curr.fixedWidth != UINT_MAX) {
|
||||
myFlags |= HS_FLAG_SOM_LEFTMOST;
|
||||
DEBUG_PRINTF("fixed width, turn off prefiltering\n");
|
||||
myFlags &= ~HS_FLAG_PREFILTER;
|
||||
allConfirm = false;
|
||||
|
||||
// Single match can't coexist with SOM.
|
||||
myFlags &= ~HS_FLAG_SINGLEMATCH;
|
||||
}
|
||||
|
||||
if (curr.guard) {
|
||||
// We use the index into the PCREs array as the Hyperscan idx.
|
||||
multiExpr.push_back(myExpr);
|
||||
multiFlags.push_back(myFlags);
|
||||
multiIds.push_back(i);
|
||||
} else {
|
||||
// No Hyperscan support, PCRE is unguarded.
|
||||
unguarded.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("built %zu PCREs, %zu of which are unguarded\n",
|
||||
pcres.size(), unguarded.size());
|
||||
|
||||
// Work out our sizing for the output database.
|
||||
size_t patternSize = 0;
|
||||
for (unsigned int i = 0; i < elements; i++) {
|
||||
size_t len = pcres[i]->patternSize();
|
||||
patternSize += ROUNDUP_CL(len);
|
||||
}
|
||||
DEBUG_PRINTF("pcre bytecode takes %zu bytes\n", patternSize);
|
||||
|
||||
bool noMulti = multiExpr.empty();
|
||||
size_t multiSize = 0;
|
||||
hs_database *multidb = nullptr;
|
||||
if (!noMulti) {
|
||||
hs_compile_error_t *hs_comp_error = nullptr;
|
||||
hs_error_t err = hs_compile_multi(&multiExpr[0], &multiFlags[0],
|
||||
&multiIds[0], multiExpr.size(),
|
||||
HS_MODE_BLOCK, platform, &multidb,
|
||||
&hs_comp_error);
|
||||
|
||||
if (err != HS_SUCCESS) {
|
||||
assert(hs_comp_error);
|
||||
DEBUG_PRINTF("hs_compile_multi returned error: %s\n",
|
||||
hs_comp_error->message);
|
||||
assert(0);
|
||||
hs_free_compile_error(hs_comp_error);
|
||||
throw CompileError("Internal error.");
|
||||
}
|
||||
|
||||
assert(multidb);
|
||||
err = hs_database_size(multidb, &multiSize);
|
||||
if (err != HS_SUCCESS) {
|
||||
assert(0);
|
||||
throw CompileError("Internal error.");
|
||||
}
|
||||
DEBUG_PRINTF("built hyperscan database with len %zu bytes\n", multiSize);
|
||||
}
|
||||
|
||||
size_t bytecodeLen = sizeof(ch_bytecode) +
|
||||
multiSize + alignof(u32) +
|
||||
(sizeof(u32) * unguarded.size()) +
|
||||
(sizeof(u32) * elements) +
|
||||
patternSize +
|
||||
128; // padding for alignment
|
||||
size_t totalSize = sizeof(ch_database) + bytecodeLen;
|
||||
|
||||
DEBUG_PRINTF("allocating %zu bytes for database\n", totalSize);
|
||||
char *ptr = (char *)ch_database_alloc(totalSize);
|
||||
if (ch_check_alloc(ptr) != CH_SUCCESS) {
|
||||
ch_database_free(ptr);
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
|
||||
memset(ptr, 0, totalSize);
|
||||
|
||||
// First, the header.
|
||||
ch_database *hydb = (ch_database *)ptr;
|
||||
hydb->magic = CH_DB_MAGIC;
|
||||
hydb->version = HS_VERSION_32BIT;
|
||||
hydb->length = bytecodeLen;
|
||||
|
||||
// Then, the bytecode.
|
||||
size_t shift = (size_t)hydb->bytes & 0x3f;
|
||||
hydb->bytecode = offsetof(struct ch_database, bytes) - shift;
|
||||
ch_bytecode *db = (ch_bytecode *)((char *)hydb + hydb->bytecode);
|
||||
db->patternCount = elements;
|
||||
db->activeSize = mmbit_size(elements);
|
||||
db->flags = 0;
|
||||
db->length = bytecodeLen;
|
||||
|
||||
if (noMulti) {
|
||||
db->flags |= CHIMERA_FLAG_NO_MULTIMATCH;
|
||||
}
|
||||
if (mode & CH_MODE_GROUPS) {
|
||||
db->flags |= CHIMERA_FLAG_GROUPS;
|
||||
}
|
||||
if (allConfirm) {
|
||||
db->flags |= CHIMERA_FLAG_ALL_CONFIRM;
|
||||
}
|
||||
if (allSingleMatch) {
|
||||
db->flags |= CHIMERA_FLAG_ALL_SINGLE;
|
||||
}
|
||||
|
||||
|
||||
// Find and set the max ovector size by looking at the capture count for
|
||||
// each pcre.
|
||||
u32 maxCaptureGroups = 0;
|
||||
for (unsigned int i = 0; i < elements; i++) {
|
||||
maxCaptureGroups = max(maxCaptureGroups, (u32)pcres[i]->capture_cnt);
|
||||
}
|
||||
db->maxCaptureGroups = maxCaptureGroups;
|
||||
DEBUG_PRINTF("max capture groups is %u\n", maxCaptureGroups);
|
||||
|
||||
if (!noMulti) {
|
||||
DEBUG_PRINTF("write hyperscan database\n");
|
||||
// Write Hyperscan database directly after the header struct, then free it.
|
||||
if (!writeHyperscanDatabase(db, multidb)) {
|
||||
ch_database_free(hydb);
|
||||
hs_free_database(multidb);
|
||||
throw CompileError("Internal error.");
|
||||
}
|
||||
hs_free_database(multidb);
|
||||
} else {
|
||||
db->databaseOffset = ROUNDUP_CL(sizeof(*db));
|
||||
}
|
||||
|
||||
// Then, write our unguarded PCRE list.
|
||||
db->unguardedCount = unguarded.size();
|
||||
db->unguardedOffset = ROUNDUP_N(db->databaseOffset + multiSize, 4);
|
||||
ptr = (char *)db + db->unguardedOffset;
|
||||
copy(unguarded.begin(), unguarded.end(), (u32 *)ptr);
|
||||
|
||||
// Then, write all our compiled PCRE patterns and the lookup table for
|
||||
// them.
|
||||
db->patternOffset = db->unguardedOffset + unguarded.size() * sizeof(u32);
|
||||
u32 *patternOffset = (u32 *)((char *)db + db->patternOffset);
|
||||
u32 offset = ROUNDUP_CL(db->patternOffset + elements * sizeof(u32));
|
||||
for (unsigned int i = 0; i < elements; i++) {
|
||||
*patternOffset = offset;
|
||||
size_t len = pcres[i]->patternSize();
|
||||
ptr = (char *)db + offset;
|
||||
struct ch_pattern *pattern = (struct ch_pattern *)ptr;
|
||||
pcres[i]->writePattern(pattern);
|
||||
DEBUG_PRINTF("wrote pcre %u into offset %u, len %zu\n", i, offset, len);
|
||||
offset += ROUNDUP_CL(len);
|
||||
patternOffset++;
|
||||
}
|
||||
|
||||
assert(offset <= totalSize);
|
||||
assert(hydb->magic == CH_DB_MAGIC);
|
||||
DEBUG_PRINTF("built hybrid database, size %zu bytes\n", totalSize);
|
||||
DEBUG_PRINTF("offset=%u\n", offset);
|
||||
*out = hydb;
|
||||
}
|
||||
|
||||
} // namespace ch
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_compile(const char *expression, unsigned flags,
|
||||
unsigned mode,
|
||||
const hs_platform_info_t *platform,
|
||||
ch_database_t **db,
|
||||
ch_compile_error_t **comp_error) {
|
||||
if (!comp_error) {
|
||||
if (db) {
|
||||
db = nullptr;
|
||||
}
|
||||
// nowhere to write the string, but we can still report an error code
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
if (!db) {
|
||||
*comp_error =
|
||||
generateChimeraCompileError("Invalid parameter: db is NULL", -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
if (!expression) {
|
||||
*db = nullptr;
|
||||
*comp_error =
|
||||
generateChimeraCompileError("Invalid parameter: expressions is\
|
||||
NULL", -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (!checkMode(mode, comp_error)) {
|
||||
*db = nullptr;
|
||||
assert(*comp_error); // set by checkMode
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
try {
|
||||
unsigned id = 0; // single expressions get zero as an ID
|
||||
// Internal function to do all the work, now that we've handled all the
|
||||
// argument checking.
|
||||
ch::ch_compile_multi_int(&expression, &flags, &id, 1, mode, 0, 0,
|
||||
platform, db);
|
||||
}
|
||||
catch (const CompileError &e) {
|
||||
// Compiler error occurred
|
||||
*db = nullptr;
|
||||
*comp_error = generateChimeraCompileError(e.reason, e.hasIndex ?
|
||||
(int)e.index : -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
catch (std::bad_alloc) {
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<ch_compile_error_t *>(&ch_enomem);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
catch (...) {
|
||||
assert(!"Internal error, unexpected exception");
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<ch_compile_error_t *>(&ch_einternal);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("success!\n");
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_compile_multi(const char *const *expressions,
|
||||
const unsigned *flags, const unsigned *ids,
|
||||
unsigned elements, unsigned mode,
|
||||
const hs_platform_info_t *platform,
|
||||
ch_database_t **db,
|
||||
ch_compile_error_t **comp_error) {
|
||||
if (!comp_error) {
|
||||
if (db) {
|
||||
db = nullptr;
|
||||
}
|
||||
// nowhere to write the string, but we can still report an error code
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
if (!db) {
|
||||
*comp_error =
|
||||
generateChimeraCompileError("Invalid parameter: db is NULL", -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
if (!expressions) {
|
||||
*db = nullptr;
|
||||
*comp_error =
|
||||
generateChimeraCompileError("Invalid parameter: expressions is\
|
||||
NULL", -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
if (!elements) {
|
||||
*db = nullptr;
|
||||
*comp_error = generateChimeraCompileError("Invalid parameter:\
|
||||
elements is zero", -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (!checkMode(mode, comp_error)) {
|
||||
*db = nullptr;
|
||||
assert(*comp_error); // set by checkMode
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
try {
|
||||
// Internal function to do all the work, now that we've handled all the
|
||||
// argument checking.
|
||||
ch::ch_compile_multi_int(expressions, flags, ids, elements, mode, 0, 0,
|
||||
platform, db);
|
||||
}
|
||||
catch (const CompileError &e) {
|
||||
// Compiler error occurred
|
||||
*db = nullptr;
|
||||
*comp_error = generateChimeraCompileError(e.reason, e.hasIndex ?
|
||||
(int)e.index : -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
catch (std::bad_alloc) {
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<ch_compile_error_t *>(&ch_enomem);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
catch (...) {
|
||||
assert(!"Internal error, unexpected exception");
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<ch_compile_error_t *>(&ch_einternal);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("success!\n");
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_compile_ext_multi(
|
||||
const char *const *expressions,
|
||||
const unsigned *flags,
|
||||
const unsigned *ids,
|
||||
unsigned elements, unsigned mode,
|
||||
unsigned long int match_limit,
|
||||
unsigned long int match_limit_recursion,
|
||||
const hs_platform_info_t *platform,
|
||||
ch_database_t **db,
|
||||
ch_compile_error_t **comp_error) {
|
||||
if (!comp_error) {
|
||||
if (db) {
|
||||
db = nullptr;
|
||||
}
|
||||
// nowhere to write the string, but we can still report an error code
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
if (!db) {
|
||||
*comp_error =
|
||||
generateChimeraCompileError("Invalid parameter: db is NULL", -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
if (!expressions) {
|
||||
*db = nullptr;
|
||||
*comp_error =
|
||||
generateChimeraCompileError("Invalid parameter: expressions is\
|
||||
NULL", -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
if (!elements) {
|
||||
*db = nullptr;
|
||||
*comp_error = generateChimeraCompileError("Invalid parameter:\
|
||||
elements is zero", -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (!checkMode(mode, comp_error)) {
|
||||
*db = nullptr;
|
||||
assert(*comp_error); // set by checkMode
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
try {
|
||||
// Internal function to do all the work, now that we've handled all the
|
||||
// argument checking.
|
||||
ch::ch_compile_multi_int(expressions, flags, ids, elements, mode,
|
||||
match_limit, match_limit_recursion, platform,
|
||||
db);
|
||||
}
|
||||
catch (const CompileError &e) {
|
||||
// Compiler error occurred
|
||||
*db = nullptr;
|
||||
*comp_error = generateChimeraCompileError(e.reason, e.hasIndex ?
|
||||
(int)e.index : -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
catch (std::bad_alloc) {
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<ch_compile_error_t *>(&ch_enomem);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
catch (...) {
|
||||
assert(!"Internal error, unexpected exception");
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<ch_compile_error_t *>(&ch_einternal);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("success!\n");
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_free_compile_error(ch_compile_error_t *error) {
|
||||
freeChimeraCompileError(error);
|
||||
return CH_SUCCESS;
|
||||
}
|
394
chimera/ch_compile.h
Normal file
394
chimera/ch_compile.h
Normal file
@ -0,0 +1,394 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CH_COMPILE_H_
|
||||
#define CH_COMPILE_H_
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief The Chimera compiler API definition.
|
||||
*
|
||||
* Chimera is a hybrid solution of Hyperscan and PCRE.
|
||||
*
|
||||
* This header contains functions for compiling regular expressions into
|
||||
* Chimera databases that can be used by the Chimera runtime.
|
||||
*/
|
||||
|
||||
#include "ch_common.h"
|
||||
#include "hs_compile.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/**
|
||||
* A type containing error details that is returned by the compile calls (@ref
|
||||
* ch_compile() and @ref ch_compile_multi() on failure. The caller may inspect
|
||||
* the values returned in this type to determine the cause of failure.
|
||||
*/
|
||||
typedef struct ch_compile_error {
|
||||
/**
|
||||
* A human-readable error message describing the error.
|
||||
*/
|
||||
char *message;
|
||||
|
||||
/**
|
||||
* The zero-based number of the expression that caused the error (if this
|
||||
* can be determined). If the error is not specific to an expression, then
|
||||
* this value will be less than zero.
|
||||
*/
|
||||
int expression;
|
||||
} ch_compile_error_t;
|
||||
|
||||
/**
|
||||
* The basic regular expression compiler.
|
||||
*
|
||||
* This is the function call with which an expression is compiled into a
|
||||
* Chimera database which can be passed to the runtime function (
|
||||
* @ref ch_scan())
|
||||
*
|
||||
* @param expression
|
||||
* The NULL-terminated expression to parse. Note that this string must
|
||||
* represent ONLY the pattern to be matched, with no delimiters or flags;
|
||||
* any global flags should be specified with the @a flags argument. For
|
||||
* example, the expression `/abc?def/i` should be compiled by providing
|
||||
* `abc?def` as the @a expression, and @ref CH_FLAG_CASELESS as the @a
|
||||
* flags.
|
||||
*
|
||||
* @param flags
|
||||
* Flags which modify the behaviour of the expression. Multiple flags may
|
||||
* be used by ORing them together. Valid values are:
|
||||
* - CH_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - CH_FLAG_DOTALL - Matching a `.` will not exclude newlines.
|
||||
* - CH_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - CH_FLAG_SINGLEMATCH - Only one match will be generated for the
|
||||
* expression per stream.
|
||||
* - CH_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
|
||||
* - CH_FLAG_UCP - Use Unicode properties for character classes.
|
||||
*
|
||||
* @param mode
|
||||
* Compiler mode flag that affect the database as a whole for capturing
|
||||
* groups. One of CH_MODE_NOGROUPS or CH_MODE_GROUPS must be supplied.
|
||||
* See @ref CH_MODE_FLAG for more details.
|
||||
*
|
||||
* @param platform
|
||||
* If not NULL, the platform structure is used to determine the target
|
||||
* platform for the database. If NULL, a database suitable for running
|
||||
* on the current host platform is produced.
|
||||
*
|
||||
* @param db
|
||||
* On success, a pointer to the generated database will be returned in
|
||||
* this parameter, or NULL on failure. The caller is responsible for
|
||||
* deallocating the buffer using the @ref ch_free_database() function.
|
||||
*
|
||||
* @param compile_error
|
||||
* If the compile fails, a pointer to a @ref ch_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* ch_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS is returned on successful compilation; @ref
|
||||
* CH_COMPILER_ERROR on failure, with details provided in the error
|
||||
* parameter.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_compile(const char *expression, unsigned int flags,
|
||||
unsigned int mode,
|
||||
const hs_platform_info_t *platform,
|
||||
ch_database_t **db,
|
||||
ch_compile_error_t **compile_error);
|
||||
|
||||
/**
|
||||
* The multiple regular expression compiler.
|
||||
*
|
||||
* This is the function call with which a set of expressions is compiled into a
|
||||
* database which can be passed to the runtime function (@ref ch_scan()).
|
||||
* Each expression can be labelled with a unique integer which is passed into
|
||||
* the match callback to identify the pattern that has matched.
|
||||
*
|
||||
* @param expressions
|
||||
* Array of NULL-terminated expressions to compile. Note that (as for @ref
|
||||
* ch_compile()) these strings must contain only the pattern to be
|
||||
* matched, with no delimiters or flags. For example, the expression
|
||||
* `/abc?def/i` should be compiled by providing `abc?def` as the first
|
||||
* string in the @a expressions array, and @ref CH_FLAG_CASELESS as the
|
||||
* first value in the @a flags array.
|
||||
*
|
||||
* @param flags
|
||||
* Array of flags which modify the behaviour of each expression. Multiple
|
||||
* flags may be used by ORing them together. Specifying the NULL pointer
|
||||
* in place of an array will set the flags value for all patterns to zero.
|
||||
* Valid values are:
|
||||
* - CH_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - CH_FLAG_DOTALL - Matching a `.` will not exclude newlines.
|
||||
* - CH_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - CH_FLAG_SINGLEMATCH - Only one match will be generated by patterns
|
||||
* with this match id per stream.
|
||||
* - CH_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
|
||||
* - CH_FLAG_UCP - Use Unicode properties for character classes.
|
||||
*
|
||||
* @param ids
|
||||
* An array of integers specifying the ID number to be associated with the
|
||||
* corresponding pattern in the expressions array. Specifying the NULL
|
||||
* pointer in place of an array will set the ID value for all patterns to
|
||||
* zero.
|
||||
*
|
||||
* @param elements
|
||||
* The number of elements in the input arrays.
|
||||
*
|
||||
* @param mode
|
||||
* Compiler mode flag that affect the database as a whole for capturing
|
||||
* groups. One of CH_MODE_NOGROUPS or CH_MODE_GROUPS must be supplied.
|
||||
* See @ref CH_MODE_FLAG for more details.
|
||||
*
|
||||
* @param platform
|
||||
* If not NULL, the platform structure is used to determine the target
|
||||
* platform for the database. If NULL, a database suitable for running
|
||||
* on the current host platform is produced.
|
||||
*
|
||||
* @param db
|
||||
* On success, a pointer to the generated database will be returned in
|
||||
* this parameter, or NULL on failure. The caller is responsible for
|
||||
* deallocating the buffer using the @ref ch_free_database() function.
|
||||
*
|
||||
* @param compile_error
|
||||
* If the compile fails, a pointer to a @ref ch_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* ch_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS is returned on successful compilation; @ref
|
||||
* CH_COMPILER_ERROR on failure, with details provided in the @a error
|
||||
* parameter.
|
||||
*
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_compile_multi(const char *const *expressions,
|
||||
const unsigned int *flags,
|
||||
const unsigned int *ids,
|
||||
unsigned int elements, unsigned int mode,
|
||||
const hs_platform_info_t *platform,
|
||||
ch_database_t **db,
|
||||
ch_compile_error_t **compile_error);
|
||||
|
||||
/**
|
||||
* The multiple regular expression compiler with extended match limits support.
|
||||
*
|
||||
* This is the function call with which a set of expressions is compiled into a
|
||||
* database in the same way as @ref ch_compile_multi(), but allows additional
|
||||
* parameters to be specified via match_limit and match_limit_recursion to
|
||||
* define match limits for PCRE runtime.
|
||||
*
|
||||
* @param expressions
|
||||
* Array of NULL-terminated expressions to compile. Note that (as for @ref
|
||||
* ch_compile()) these strings must contain only the pattern to be
|
||||
* matched, with no delimiters or flags. For example, the expression
|
||||
* `/abc?def/i` should be compiled by providing `abc?def` as the first
|
||||
* string in the @a expressions array, and @ref CH_FLAG_CASELESS as the
|
||||
* first value in the @a flags array.
|
||||
*
|
||||
* @param flags
|
||||
* Array of flags which modify the behaviour of each expression. Multiple
|
||||
* flags may be used by ORing them together. Specifying the NULL pointer
|
||||
* in place of an array will set the flags value for all patterns to zero.
|
||||
* Valid values are:
|
||||
* - CH_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - CH_FLAG_DOTALL - Matching a `.` will not exclude newlines.
|
||||
* - CH_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - CH_FLAG_SINGLEMATCH - Only one match will be generated by patterns
|
||||
* with this match id per stream.
|
||||
* - CH_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
|
||||
* - CH_FLAG_UCP - Use Unicode properties for character classes.
|
||||
*
|
||||
* @param ids
|
||||
* An array of integers specifying the ID number to be associated with the
|
||||
* corresponding pattern in the expressions array. Specifying the NULL
|
||||
* pointer in place of an array will set the ID value for all patterns to
|
||||
* zero.
|
||||
*
|
||||
* @param elements
|
||||
* The number of elements in the input arrays.
|
||||
*
|
||||
* @param mode
|
||||
* Compiler mode flag that affect the database as a whole for capturing
|
||||
* groups. One of CH_MODE_NOGROUPS or CH_MODE_GROUPS must be supplied.
|
||||
* See @ref CH_MODE_FLAG for more details.
|
||||
*
|
||||
* @param match_limit
|
||||
* A limit from pcre_extra on the amount of match function called in PCRE
|
||||
* to limit backtracking that can take place.
|
||||
*
|
||||
* @param match_limit_recursion
|
||||
* A limit from pcre_extra on the recursion depth of match function
|
||||
* in PCRE.
|
||||
*
|
||||
* @param platform
|
||||
* If not NULL, the platform structure is used to determine the target
|
||||
* platform for the database. If NULL, a database suitable for running
|
||||
* on the current host platform is produced.
|
||||
*
|
||||
* @param db
|
||||
* On success, a pointer to the generated database will be returned in
|
||||
* this parameter, or NULL on failure. The caller is responsible for
|
||||
* deallocating the buffer using the @ref ch_free_database() function.
|
||||
*
|
||||
* @param compile_error
|
||||
* If the compile fails, a pointer to a @ref ch_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* ch_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS is returned on successful compilation; @ref
|
||||
* CH_COMPILER_ERROR on failure, with details provided in the @a error
|
||||
* parameter.
|
||||
*
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_compile_ext_multi(const char *const *expressions,
|
||||
const unsigned int *flags,
|
||||
const unsigned int *ids,
|
||||
unsigned int elements,
|
||||
unsigned int mode,
|
||||
unsigned long int match_limit,
|
||||
unsigned long int match_limit_recursion,
|
||||
const hs_platform_info_t *platform,
|
||||
ch_database_t **db,
|
||||
ch_compile_error_t **compile_error);
|
||||
|
||||
/**
|
||||
* Free an error structure generated by @ref ch_compile(), @ref
|
||||
* ch_compile_multi().
|
||||
*
|
||||
* @param error
|
||||
* The @ref ch_compile_error_t to be freed. NULL may also be safely
|
||||
* provided.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_free_compile_error(ch_compile_error_t *error);
|
||||
|
||||
/**
|
||||
* @defgroup CH_PATTERN_FLAG Pattern flags
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Compile flag: Set case-insensitive matching.
|
||||
*
|
||||
* This flag sets the expression to be matched case-insensitively by default.
|
||||
* The expression may still use PCRE tokens (notably `(?i)` and
|
||||
* `(?-i)`) to switch case-insensitive matching on and off.
|
||||
*/
|
||||
#define CH_FLAG_CASELESS 1
|
||||
|
||||
/**
|
||||
* Compile flag: Matching a `.` will not exclude newlines.
|
||||
*
|
||||
* This flag sets any instances of the `.` token to match newline characters as
|
||||
* well as all other characters. The PCRE specification states that the `.`
|
||||
* token does not match newline characters by default, so without this flag the
|
||||
* `.` token will not cross line boundaries.
|
||||
*/
|
||||
#define CH_FLAG_DOTALL 2
|
||||
|
||||
/**
|
||||
* Compile flag: Set multi-line anchoring.
|
||||
*
|
||||
* This flag instructs the expression to make the `^` and `$` tokens match
|
||||
* newline characters as well as the start and end of the stream. If this flag
|
||||
* is not specified, the `^` token will only ever match at the start of a
|
||||
* stream, and the `$` token will only ever match at the end of a stream within
|
||||
* the guidelines of the PCRE specification.
|
||||
*/
|
||||
#define CH_FLAG_MULTILINE 4
|
||||
|
||||
/**
|
||||
* Compile flag: Set single-match only mode.
|
||||
*
|
||||
* This flag sets the expression's match ID to match at most once, only the
|
||||
* first match for each invocation of @ref ch_scan() will be returned.
|
||||
*
|
||||
*/
|
||||
#define CH_FLAG_SINGLEMATCH 8
|
||||
|
||||
/**
|
||||
* Compile flag: Enable UTF-8 mode for this expression.
|
||||
*
|
||||
* This flag instructs Chimera to treat the pattern as a sequence of UTF-8
|
||||
* characters. The results of scanning invalid UTF-8 sequences with a Chimera
|
||||
* library that has been compiled with one or more patterns using this flag are
|
||||
* undefined.
|
||||
*/
|
||||
#define CH_FLAG_UTF8 32
|
||||
|
||||
/**
|
||||
* Compile flag: Enable Unicode property support for this expression.
|
||||
*
|
||||
* This flag instructs Chimera to use Unicode properties, rather than the
|
||||
* default ASCII interpretations, for character mnemonics like `\w` and `\s` as
|
||||
* well as the POSIX character classes. It is only meaningful in conjunction
|
||||
* with @ref CH_FLAG_UTF8.
|
||||
*/
|
||||
#define CH_FLAG_UCP 64
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @defgroup CH_MODE_FLAG Compile mode flags
|
||||
*
|
||||
* The mode flags are used as values for the mode parameter of the various
|
||||
* compile calls (@ref ch_compile(), @ref ch_compile_multi().
|
||||
*
|
||||
* By default, the matcher will only supply the start and end offsets of the
|
||||
* match when the match callback is called. Using mode flag @ref CH_MODE_GROUPS
|
||||
* will also fill the `captured' array with the start and end offsets of all
|
||||
* the capturing groups specified by the pattern that has matched.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Compiler mode flag: Disable capturing groups.
|
||||
*/
|
||||
#define CH_MODE_NOGROUPS 0
|
||||
|
||||
/**
|
||||
* Compiler mode flag: Enable capturing groups.
|
||||
*/
|
||||
#define CH_MODE_GROUPS 1048576
|
||||
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* CH_COMPILE_H_ */
|
126
chimera/ch_database.c
Normal file
126
chimera/ch_database.c
Normal file
@ -0,0 +1,126 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Chimera: database construction, etc.
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "allocator.h"
|
||||
#include "database.h"
|
||||
#include "hs.h"
|
||||
#include "ch.h"
|
||||
#include "hs_internal.h"
|
||||
#include "ch_common.h"
|
||||
#include "ch_alloc.h"
|
||||
#include "ch_database.h"
|
||||
#include "ch_internal.h"
|
||||
|
||||
static really_inline
|
||||
int db_correctly_aligned(const void *db) {
|
||||
return ISALIGNED_N(db, alignof(unsigned long long));
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_free_database(ch_database_t *hydb) {
|
||||
if (hydb && hydb->magic != CH_DB_MAGIC) {
|
||||
return CH_INVALID;
|
||||
}
|
||||
ch_database_free(hydb);
|
||||
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_database_size(const ch_database_t *hydb, size_t *size) {
|
||||
if (!size) {
|
||||
return CH_INVALID;
|
||||
}
|
||||
|
||||
ch_error_t ret = hydbIsValid(hydb);
|
||||
if (unlikely(ret != CH_SUCCESS)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
*size = sizeof(struct ch_database) + hydb->length;
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
/** \brief Identifier prepended to database info. */
|
||||
static const char CHIMERA_IDENT[] = "Chimera ";
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_database_info(const ch_database_t *hydb, char **info) {
|
||||
if (!info) {
|
||||
return CH_INVALID;
|
||||
}
|
||||
*info = NULL;
|
||||
|
||||
if (!hydb || !db_correctly_aligned(hydb) || hydb->magic != CH_DB_MAGIC) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
const struct ch_bytecode *bytecode = ch_get_bytecode(hydb);
|
||||
char noMulti = (bytecode->flags & CHIMERA_FLAG_NO_MULTIMATCH);
|
||||
if (noMulti) {
|
||||
size_t len = strlen(CHIMERA_IDENT);
|
||||
*info = ch_misc_alloc(len + 1);
|
||||
if (!(*info)) {
|
||||
return CH_INVALID;
|
||||
}
|
||||
memcpy((*info), CHIMERA_IDENT, len);
|
||||
(*info)[len] = '\0';
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
char *hsinfo = NULL;
|
||||
hs_error_t ret = hs_database_info(getHyperscanDatabase(bytecode), &hsinfo);
|
||||
if (ret != HS_SUCCESS) {
|
||||
assert(!hsinfo);
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t hybridlen = strlen(CHIMERA_IDENT);
|
||||
size_t hslen = strlen(hsinfo);
|
||||
*info = ch_misc_alloc(hybridlen + hslen + 1);
|
||||
if (!(*info)) {
|
||||
ch_misc_free(hsinfo);
|
||||
return CH_INVALID;
|
||||
}
|
||||
|
||||
memcpy((*info), CHIMERA_IDENT, hybridlen);
|
||||
memcpy((*info) + hybridlen, hsinfo, hslen);
|
||||
(*info)[hybridlen + hslen] = '\0';
|
||||
ch_misc_free(hsinfo);
|
||||
|
||||
return CH_SUCCESS;
|
||||
}
|
158
chimera/ch_database.h
Normal file
158
chimera/ch_database.h
Normal file
@ -0,0 +1,158 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Runtime code for ch_database manipulation.
|
||||
*/
|
||||
|
||||
#ifndef CH_DATABASE_H_
|
||||
#define CH_DATABASE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#define PCRE_STATIC
|
||||
#include <pcre.h>
|
||||
|
||||
#include "ch_compile.h" // for CH_MODE_ flags
|
||||
#include "ue2common.h"
|
||||
#include "hs_version.h"
|
||||
#include "hs.h"
|
||||
|
||||
#define CH_DB_MAGIC 0xdedededeU //!< Magic number stored in \ref ch_database
|
||||
|
||||
/** \brief Main Chimera database header. */
|
||||
struct ch_database {
|
||||
u32 magic; //!< must be \ref CH_DB_MAGIC
|
||||
u32 version; //!< release version
|
||||
u32 length; //!< total allocated length in bytes
|
||||
u32 reserved0; //!< unused
|
||||
u32 reserved1; //!< unused
|
||||
u32 bytecode; //!< offset relative to db start
|
||||
u32 padding[16]; //!< padding for alignment of rest of bytecode
|
||||
char bytes[];
|
||||
};
|
||||
|
||||
/** \brief Chimera bytecode header, which follows the \ref ch_database and is
|
||||
* always 64-byte aligned. */
|
||||
struct ch_bytecode {
|
||||
u32 length; //!< length of bytecode including this header struct
|
||||
u32 flags; //!< whole-database flags (CHIMERA_FLAG_NO_MULTIMATCH,
|
||||
// CHIMERA_FLAG_GROUPS)
|
||||
u32 patternCount; //!< total number of patterns
|
||||
u32 activeSize; //!< size of mmbit to store active pattern ids
|
||||
u32 databaseOffset; //!< offset for database following \ref ch_bytecode
|
||||
// header
|
||||
u32 patternOffset; //!< points to an array of u32 offsets, each pointing to
|
||||
// a \ref ch_pattern
|
||||
u32 unguardedOffset; //!< pointer to a list of unguarded pattern indices
|
||||
u32 unguardedCount; //!< number of unguarded patterns
|
||||
u32 maxCaptureGroups; //!< max number of capture groups used by any pattern
|
||||
};
|
||||
|
||||
/** \brief Per-pattern header.
|
||||
*
|
||||
* struct is followed in bytecode by:
|
||||
* 1. pcre bytecode (always present)
|
||||
* 2. pcre study data (sometimes)
|
||||
*/
|
||||
struct ch_pattern {
|
||||
u32 id; //!< pattern ID to report to the user
|
||||
u32 flags; //!< per-pattern flags (e.g. \ref CHIMERA_PATTERN_FLAG_UTF8)
|
||||
u32 maxWidth; //!< maximum width of a match, or UINT_MAX for inf.
|
||||
u32 minWidth; //!< minimum width of a match.
|
||||
u32 fixedWidth;//!< pattern has fixed width.
|
||||
u32 studyOffset; //!< offset relative to struct start of study data,
|
||||
// or zero if there is none
|
||||
u32 length; //!< length of struct plus pcre bytecode and study data
|
||||
pcre_extra extra; //!< pcre_extra struct, used to store study data ptr for
|
||||
// the currently-running pcre at runtime.
|
||||
};
|
||||
|
||||
static really_inline
|
||||
const void *ch_get_bytecode(const struct ch_database *db) {
|
||||
assert(db);
|
||||
const void *bytecode = (const char *)db + db->bytecode;
|
||||
assert(ISALIGNED_16(bytecode));
|
||||
return bytecode;
|
||||
}
|
||||
|
||||
struct hs_database;
|
||||
|
||||
static really_inline
|
||||
const struct hs_database *getHyperscanDatabase(const struct ch_bytecode *db) {
|
||||
assert(db);
|
||||
const char *ptr = (const char *)db;
|
||||
const struct hs_database *hs_db;
|
||||
hs_db = (const struct hs_database *)(ptr + db->databaseOffset);
|
||||
assert(ISALIGNED_CL(hs_db));
|
||||
return hs_db;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u32 *getUnguarded(const struct ch_bytecode *db) {
|
||||
assert(db);
|
||||
const char *ptr = (const char *)db;
|
||||
const u32 *unguarded = (const u32 *)(ptr + db->unguardedOffset);
|
||||
assert(ISALIGNED_N(unguarded, sizeof(u32)));
|
||||
return unguarded;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct ch_pattern *getPattern(const struct ch_bytecode *db, u32 i) {
|
||||
assert(db);
|
||||
assert(i < db->patternCount);
|
||||
const char *ptr = (const char *)db;
|
||||
const u32 *patternOffset = (const u32 *)(ptr + db->patternOffset);
|
||||
assert(patternOffset[i] < db->length);
|
||||
return (const struct ch_pattern *)(ptr + patternOffset[i]);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
ch_error_t hydbIsValid(const struct ch_database *hydb) {
|
||||
if (!hydb || hydb->magic != CH_DB_MAGIC) {
|
||||
DEBUG_PRINTF("bad magic (%u != %u)\n", hydb->magic, CH_DB_MAGIC);
|
||||
return CH_INVALID;
|
||||
}
|
||||
|
||||
if (hydb->version != HS_VERSION_32BIT) {
|
||||
DEBUG_PRINTF("bad version\n");
|
||||
return CH_DB_VERSION_ERROR;
|
||||
}
|
||||
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* CH_DATABASE_H_ */
|
||||
|
44
chimera/ch_internal.h
Normal file
44
chimera/ch_internal.h
Normal file
@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Chimera: data structures and internals.
|
||||
*/
|
||||
|
||||
#ifndef CH_INTERNAL_H
|
||||
#define CH_INTERNAL_H
|
||||
|
||||
#define CHIMERA_FLAG_NO_MULTIMATCH 1 //!< Don't run a multimatch scan
|
||||
#define CHIMERA_FLAG_GROUPS 2 //!< Return capturing groups
|
||||
#define CHIMERA_FLAG_ALL_CONFIRM 4 //!< All patterns need confirm
|
||||
#define CHIMERA_FLAG_ALL_SINGLE 8 //!< All patterns need only one match
|
||||
|
||||
#define CHIMERA_PATTERN_FLAG_SINGLEMATCH 1 //!< only report the first match
|
||||
#define CHIMERA_PATTERN_FLAG_UTF8 2 //!< pattern is in UTF-8 mode
|
||||
|
||||
#endif
|
629
chimera/ch_runtime.c
Normal file
629
chimera/ch_runtime.c
Normal file
@ -0,0 +1,629 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Chimera: main runtime.
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "ch.h"
|
||||
#include "hs.h"
|
||||
#include "hs_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "ch_database.h"
|
||||
#include "ch_internal.h"
|
||||
#include "ch_scratch.h"
|
||||
#include "util/multibit.h"
|
||||
#include "util/unicode_def.h"
|
||||
|
||||
typedef struct queue_item PQ_T;
|
||||
|
||||
static
|
||||
char PQ_COMP(PQ_T *pqc_items, int a, int b) {
|
||||
if ((pqc_items)[a].to != (pqc_items)[b].to) {
|
||||
return (pqc_items)[a].to < (pqc_items)[b].to;
|
||||
} else if ((pqc_items)[a].from != (pqc_items)[b].from) {
|
||||
return (pqc_items)[a].from < (pqc_items)[b].from;
|
||||
} else {
|
||||
return (pqc_items)[a].id < (pqc_items)[b].id;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
char PQ_COMP_B(PQ_T *pqc_items, int a, PQ_T b_fixed) {
|
||||
if ((pqc_items)[a].to != (b_fixed).to) {
|
||||
return (pqc_items)[a].to < (b_fixed).to;
|
||||
} else if ((pqc_items)[a].from != (b_fixed).from) {
|
||||
return (pqc_items)[a].from < (b_fixed).from;
|
||||
} else {
|
||||
return (pqc_items)[a].id < b_fixed.id;
|
||||
}
|
||||
}
|
||||
|
||||
#include "util/pqueue.h"
|
||||
|
||||
static really_inline
|
||||
void pq_insert_with(struct match_pq *pq, int from, int to, u32 id) {
|
||||
DEBUG_PRINTF("inserting pattern%u in pq at %u\n", id, to);
|
||||
struct queue_item temp = {
|
||||
.from = from,
|
||||
.to = to,
|
||||
.id = id,
|
||||
};
|
||||
|
||||
pq_insert(pq->item, pq->size, temp);
|
||||
++pq->size;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void pq_pop_nice(struct match_pq *pq) {
|
||||
pq_pop(pq->item, pq->size);
|
||||
pq->size--;
|
||||
}
|
||||
|
||||
/** dummy event handler for use when user does not provide one */
|
||||
static
|
||||
int null_onEvent(UNUSED unsigned id, UNUSED unsigned long long from,
|
||||
UNUSED unsigned long long to, UNUSED unsigned flags,
|
||||
UNUSED unsigned size, UNUSED const ch_capture_t *captured,
|
||||
UNUSED void *ctxt) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** \brief Chimera runtime context. */
|
||||
struct HybridContext {
|
||||
const char *data; //!< buffer being scanned
|
||||
u32 length; //!< length of data buffer
|
||||
u32 valid_utf8_highwater; //!< UTF-8 has been validated up to here.
|
||||
const struct ch_bytecode *db;
|
||||
struct ch_scratch *scratch;
|
||||
struct match_pq *pq;
|
||||
/** \brief user-supplied match callback */
|
||||
int (*match_callback)(unsigned int id, unsigned long long from,
|
||||
unsigned long long to, unsigned int flags,
|
||||
unsigned int size, const ch_capture_t *capture,
|
||||
void *ctx);
|
||||
/** \brief user-supplied error callback */
|
||||
int (*error_callback)(ch_error_event_t error_type, unsigned int id,
|
||||
void *info, void *ctx);
|
||||
/** \brief user-supplied context */
|
||||
void *context;
|
||||
};
|
||||
|
||||
// Internal PCRE func.
|
||||
extern int _pcre_valid_utf(const unsigned char *, int, int *);
|
||||
|
||||
/** UTF-8 validity check. Returns >0 if the given region of the data is valid
|
||||
* UTF-8, 0 otherwise. */
|
||||
static
|
||||
char isValidUTF8(struct HybridContext *hyctx, u32 end) {
|
||||
assert(hyctx);
|
||||
|
||||
if (hyctx->valid_utf8_highwater >= end) {
|
||||
return 1; // Already validated.
|
||||
}
|
||||
|
||||
const unsigned char *data =
|
||||
(const unsigned char *)hyctx->data + hyctx->valid_utf8_highwater;
|
||||
int validate_len = end - hyctx->valid_utf8_highwater;
|
||||
|
||||
DEBUG_PRINTF("validating %d bytes\n", validate_len);
|
||||
|
||||
int erroroffset = 0;
|
||||
if (_pcre_valid_utf(data, validate_len, &erroroffset)) {
|
||||
DEBUG_PRINTF("UTF8 invalid at offset %d\n", erroroffset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
hyctx->valid_utf8_highwater = end;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static
|
||||
const pcre *getPcre(const struct ch_pattern *pattern) {
|
||||
const char *ptr = (const char *)pattern;
|
||||
const pcre *p = (const pcre *)(ptr + ROUNDUP_N(sizeof(*pattern), 8));
|
||||
assert(ISALIGNED_N(p, 8));
|
||||
return p;
|
||||
}
|
||||
|
||||
/** \brief Fill the Chimera groups array from a pcre_exec ovector. */
|
||||
static
|
||||
void fillGroupsFromOvector(ch_capture_t *groups, int numPairs, int *ovector) {
|
||||
assert(groups);
|
||||
assert(ISALIGNED_N(groups, alignof(ch_capture_t)));
|
||||
|
||||
DEBUG_PRINTF("filling %d groups (@ %p) from pcre ovector\n",
|
||||
numPairs, groups);
|
||||
|
||||
for (int i = 0; i < numPairs * 2; i += 2) {
|
||||
if (ovector[i] == -1) {
|
||||
groups->flags = CH_CAPTURE_FLAG_INACTIVE;
|
||||
} else {
|
||||
groups->flags = CH_CAPTURE_FLAG_ACTIVE;
|
||||
assert(ovector[i] <= ovector[i + 1]);
|
||||
groups->from = ovector[i];
|
||||
groups->to = ovector[i + 1];
|
||||
}
|
||||
++groups;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
ch_error_t handlePcreNonMatch(const struct ch_pattern *pattern, int rv,
|
||||
ch_error_event_handler onError,
|
||||
void *userContext) {
|
||||
assert(rv < 0);
|
||||
|
||||
if (rv == PCRE_ERROR_NOMATCH) {
|
||||
DEBUG_PRINTF("no match found by libpcre\n");
|
||||
return CH_SUCCESS;
|
||||
} else if (rv == PCRE_ERROR_MATCHLIMIT) {
|
||||
DEBUG_PRINTF("pcre hit match limit\n");
|
||||
if (onError) {
|
||||
return onError(CH_ERROR_MATCHLIMIT, pattern->id, NULL,
|
||||
userContext);
|
||||
}
|
||||
return CH_SUCCESS;
|
||||
} else if (rv == PCRE_ERROR_RECURSIONLIMIT) {
|
||||
DEBUG_PRINTF("pcre hit recursion limit\n");
|
||||
if (onError) {
|
||||
return onError(CH_ERROR_RECURSIONLIMIT, pattern->id, NULL,
|
||||
userContext);
|
||||
}
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
// All other errors not handled above are fatal.
|
||||
return CH_FAIL_INTERNAL;
|
||||
}
|
||||
|
||||
static
|
||||
ch_error_t scanPcre(struct HybridContext *hyctx, UNUSED unsigned int length,
|
||||
unsigned int offset, u32 id) {
|
||||
const char *data = hyctx->data;
|
||||
unsigned int full_length = hyctx->length;
|
||||
ch_error_event_handler onError = hyctx->error_callback;
|
||||
void *userContext = hyctx->context;
|
||||
|
||||
const struct ch_pattern *pattern = getPattern(hyctx->db, id);
|
||||
const pcre *p = getPcre(pattern);
|
||||
|
||||
// Set up the PCRE extra block.
|
||||
const pcre_extra *extra = &pattern->extra;
|
||||
|
||||
int startoffset = offset;
|
||||
|
||||
int *ovector = hyctx->scratch->ovector;
|
||||
int ovectorSize = (hyctx->scratch->maxCaptureGroups + 1) * 3;
|
||||
assert(ovectorSize >= 2);
|
||||
|
||||
DEBUG_PRINTF("scanning %u bytes, pattern %u, startoffset %d\n",
|
||||
length, id, startoffset);
|
||||
|
||||
int options = 0;
|
||||
if (pattern->flags & CHIMERA_PATTERN_FLAG_UTF8) {
|
||||
// We do our own UTF-8 validation.
|
||||
options |= PCRE_NO_UTF8_CHECK;
|
||||
if (!isValidUTF8(hyctx, full_length)) {
|
||||
return handlePcreNonMatch(pattern, PCRE_ERROR_BADUTF8, onError,
|
||||
userContext);
|
||||
}
|
||||
}
|
||||
|
||||
int rv = pcre_exec(p, extra, data, full_length, startoffset, options,
|
||||
ovector, ovectorSize);
|
||||
|
||||
DEBUG_PRINTF("pcre return code is %d\n", rv);
|
||||
|
||||
// Handle all non-match or error cases, all of which involve us
|
||||
// terminating the loop.
|
||||
if (rv < 0) {
|
||||
return handlePcreNonMatch(pattern, rv, onError, userContext);
|
||||
}
|
||||
|
||||
// We've found a match, and we should always have room for at least the
|
||||
// start and end offsets in our ovector. Pass this info to the user.
|
||||
assert(rv >= 1);
|
||||
assert(rv < ovectorSize);
|
||||
int from = ovector[0];
|
||||
int to = ovector[1];
|
||||
DEBUG_PRINTF("match %d -> %d\n", from, to);
|
||||
|
||||
struct ch_patterndata *pd = hyctx->scratch->patternData + id;
|
||||
|
||||
if (hyctx->db->flags & CHIMERA_FLAG_GROUPS) {
|
||||
fillGroupsFromOvector(pd->match, rv, ovector);
|
||||
} else {
|
||||
rv = 0;
|
||||
}
|
||||
pd->groupCount = (u32)rv;
|
||||
|
||||
// Insert new matched item to the queue
|
||||
pq_insert_with(hyctx->pq, from, to, id);
|
||||
|
||||
// Next scan starts at the first codepoint after the match. It's
|
||||
// possible that we have a vacuous match, in which case we must step
|
||||
// past it to ensure that we always progress.
|
||||
if (from != to) {
|
||||
startoffset = to;
|
||||
} else if (pattern->flags & CHIMERA_PATTERN_FLAG_UTF8) {
|
||||
startoffset = to + 1;
|
||||
while (startoffset < (int)full_length &&
|
||||
((data[startoffset] & 0xc0) == UTF_CONT_BYTE_HEADER)) {
|
||||
++startoffset;
|
||||
}
|
||||
} else {
|
||||
startoffset = to + 1;
|
||||
}
|
||||
|
||||
pd->scanStart = startoffset;
|
||||
DEBUG_PRINTF("new offset %u\n", pd->scanStart);
|
||||
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
static
|
||||
ch_error_t catchupPcre(struct HybridContext *hyctx, unsigned int id,
|
||||
unsigned long long from, unsigned long long to) {
|
||||
ch_match_event_handler onEvent = hyctx->match_callback;
|
||||
void *userContext = hyctx->context;
|
||||
DEBUG_PRINTF("priority queue size %u\n", hyctx->pq->size);
|
||||
while (hyctx->pq->size) {
|
||||
u32 num_item = hyctx->pq->size;
|
||||
struct queue_item *item = pq_top(hyctx->pq->item);
|
||||
size_t top_from = item->from;
|
||||
size_t top_to = item->to;
|
||||
u32 top_id = item->id;
|
||||
|
||||
if (top_to > to) {
|
||||
pq_insert_with(hyctx->pq, from, to, id);
|
||||
break;
|
||||
}
|
||||
pq_pop_nice(hyctx->pq);
|
||||
|
||||
const struct ch_pattern *pattern = getPattern(hyctx->db, top_id);
|
||||
struct ch_patterndata *pd = hyctx->scratch->patternData + top_id;
|
||||
|
||||
// Report match for pattern
|
||||
DEBUG_PRINTF("trigger match@%zu\n", top_to);
|
||||
ch_callback_t cbrv =
|
||||
onEvent(pattern->id, top_from, top_to, 0 /* flags */,
|
||||
pd->groupCount, pd->match, userContext);
|
||||
|
||||
if (cbrv == CH_CALLBACK_TERMINATE) {
|
||||
DEBUG_PRINTF("user callback told us to terminate scanning\n");
|
||||
return CH_SCAN_TERMINATED;
|
||||
} else if (cbrv == CH_CALLBACK_SKIP_PATTERN) {
|
||||
DEBUG_PRINTF("user callback told us to skip this pattern\n");
|
||||
pd->scanStart = hyctx->length;
|
||||
}
|
||||
|
||||
if (top_id == id) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Push a new match to replace the old one
|
||||
unsigned int start = pd->scanStart;
|
||||
unsigned int len = hyctx->length - pd->scanStart;
|
||||
if (hyctx->length >= pd->scanStart &&
|
||||
!(pattern->flags & CHIMERA_PATTERN_FLAG_SINGLEMATCH)) {
|
||||
DEBUG_PRINTF("get a new match item\n");
|
||||
int ret = scanPcre(hyctx, len, start, top_id);
|
||||
|
||||
if (ret == CH_CALLBACK_TERMINATE) {
|
||||
DEBUG_PRINTF("user callback told us to terminate scanning\n");
|
||||
return CH_SCAN_TERMINATED;
|
||||
} else if (ret == CH_CALLBACK_SKIP_PATTERN) {
|
||||
DEBUG_PRINTF("user callback told us to skip this pattern\n");
|
||||
pd->scanStart = hyctx->length;
|
||||
ret = CH_SUCCESS;
|
||||
} else if (ret == CH_FAIL_INTERNAL) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// No further match is found
|
||||
if (hyctx->pq->size == num_item - 1) {
|
||||
pd->scanStart = hyctx->length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
/** \brief Callback used for internal Hyperscan multi-matcher. */
|
||||
static
|
||||
int multiCallback(unsigned int id, unsigned long long from,
|
||||
unsigned long long to, UNUSED unsigned int flags,
|
||||
void *ctx) {
|
||||
assert(ctx);
|
||||
struct HybridContext *hyctx = ctx;
|
||||
|
||||
DEBUG_PRINTF("match for ID %u at offset %llu\n", id, to);
|
||||
assert(id < hyctx->db->patternCount);
|
||||
|
||||
const struct ch_pattern *pattern = getPattern(hyctx->db, id);
|
||||
struct ch_patterndata *pd = hyctx->scratch->patternData + id;
|
||||
char needConfirm = pattern->fixedWidth == ~0U;
|
||||
|
||||
if (needConfirm &&
|
||||
mmbit_isset(hyctx->scratch->active, hyctx->db->patternCount, id)) {
|
||||
if ((hyctx->db->flags & CHIMERA_FLAG_ALL_CONFIRM) &&
|
||||
mmbit_all(hyctx->scratch->active, hyctx->db->patternCount)) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
// Store the fact that we've seen this bit.
|
||||
char already = mmbit_set(hyctx->scratch->active,
|
||||
hyctx->db->patternCount, id);
|
||||
DEBUG_PRINTF("match from %u to %llu\n", pd->scanStart, to);
|
||||
|
||||
if (!already) {
|
||||
pd->scanStart = 0;
|
||||
} else if (to < pd->scanStart + pattern->minWidth) {
|
||||
return 0;
|
||||
} else if (pattern->flags & CHIMERA_PATTERN_FLAG_SINGLEMATCH) {
|
||||
if ((hyctx->db->flags & CHIMERA_FLAG_ALL_SINGLE) &&
|
||||
mmbit_all(hyctx->scratch->active, hyctx->db->patternCount)) {
|
||||
return 1;
|
||||
}
|
||||
// Note: we may have unordered match from Hyperscan,
|
||||
// thus possibly get to < pd->scanStart.
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ret = HS_SUCCESS;
|
||||
unsigned int start = pd->scanStart;
|
||||
unsigned int len = hyctx->length - pd->scanStart;
|
||||
assert(hyctx->length >= pd->scanStart);
|
||||
const char *data = hyctx->data;
|
||||
if (needConfirm) {
|
||||
DEBUG_PRINTF("run confirm for the first time\n");
|
||||
ret = scanPcre(hyctx, len, start, id);
|
||||
hyctx->scratch->ret = ret;
|
||||
if (ret == CH_CALLBACK_TERMINATE) {
|
||||
DEBUG_PRINTF("user callback told us to terminate scanning\n");
|
||||
return HS_SCAN_TERMINATED;
|
||||
} else if (ret == CH_CALLBACK_SKIP_PATTERN) {
|
||||
DEBUG_PRINTF("user callback told us to skip this pattern\n");
|
||||
pd->scanStart = hyctx->length;
|
||||
ret = HS_SUCCESS;
|
||||
} else if (ret == CH_FAIL_INTERNAL) {
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
if (already) {
|
||||
DEBUG_PRINTF("catch up with new matches\n");
|
||||
ret = catchupPcre(hyctx, id, from, to);
|
||||
|
||||
hyctx->scratch->ret = ret;
|
||||
if (pd->scanStart >= hyctx->length) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
int startoffset = 0;
|
||||
// Next scan starts at the first codepoint after the match. It's
|
||||
// possible that we have a vacuous match, in which case we must step
|
||||
// past it to ensure that we always progress.
|
||||
if (from != to) {
|
||||
startoffset = to;
|
||||
} else if (pattern->flags & CHIMERA_PATTERN_FLAG_UTF8) {
|
||||
startoffset = to + 1;
|
||||
while (startoffset < (int)hyctx->length &&
|
||||
((data[startoffset] & 0xc0) == UTF_CONT_BYTE_HEADER)) {
|
||||
++startoffset;
|
||||
}
|
||||
} else {
|
||||
startoffset = to + 1;
|
||||
}
|
||||
pd->scanStart = startoffset;
|
||||
int rv = 0;
|
||||
if (hyctx->db->flags & CHIMERA_FLAG_GROUPS) {
|
||||
ch_capture_t *groups = pd->match;
|
||||
groups->flags = CH_CAPTURE_FLAG_ACTIVE;
|
||||
groups->from = from;
|
||||
groups->to = to;
|
||||
rv = 1;
|
||||
}
|
||||
pd->groupCount = (u32)rv;
|
||||
pq_insert_with(hyctx->pq, from, to, id);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static
|
||||
hs_error_t scanHyperscan(struct HybridContext *hyctx, const char *data,
|
||||
unsigned int length) {
|
||||
DEBUG_PRINTF("scanning %u bytes with Hyperscan\n", length);
|
||||
const struct ch_bytecode *hydb = hyctx->db;
|
||||
const hs_database_t *db = getHyperscanDatabase(hydb);
|
||||
hs_scratch_t *scratch = hyctx->scratch->multi_scratch;
|
||||
|
||||
hs_error_t err = hs_scan(db, data, length, 0, scratch, multiCallback,
|
||||
hyctx);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/** \brief Init match priority queue.
|
||||
*
|
||||
* Add a first match offset for each pattern that is not supported by Hyperscan
|
||||
* with prefiltering.
|
||||
*/
|
||||
static really_inline
|
||||
ch_error_t initQueue(struct HybridContext *hyctx, struct match_pq *pq) {
|
||||
const struct ch_bytecode *db = hyctx->db;
|
||||
|
||||
u8 *active = hyctx->scratch->active;
|
||||
mmbit_clear(active, db->patternCount);
|
||||
|
||||
// Init match queue size
|
||||
pq->size = 0;
|
||||
|
||||
unsigned int length = hyctx->length;
|
||||
const u32 *unguarded = getUnguarded(db);
|
||||
for (u32 i = 0; i < db->unguardedCount; i++) {
|
||||
u32 patternId = unguarded[i];
|
||||
DEBUG_PRINTF("switch on unguarded pcre %u\n", patternId);
|
||||
mmbit_set(active, db->patternCount, patternId);
|
||||
|
||||
DEBUG_PRINTF("get a new match item\n");
|
||||
int ret = scanPcre(hyctx, length, 0, patternId);
|
||||
|
||||
struct ch_patterndata *pd = hyctx->scratch->patternData + patternId;
|
||||
if (ret == CH_CALLBACK_TERMINATE) {
|
||||
DEBUG_PRINTF("user callback told us to terminate scanning\n");
|
||||
return CH_SCAN_TERMINATED;
|
||||
} else if (ret == CH_CALLBACK_SKIP_PATTERN) {
|
||||
DEBUG_PRINTF("user callback told us to skip this pattern\n");
|
||||
pd->scanStart = length;
|
||||
ret = CH_SUCCESS;
|
||||
} else if (ret == CH_FAIL_INTERNAL) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
ch_error_t ch_scan_i(const ch_database_t *hydb,
|
||||
const char *data, unsigned int length,
|
||||
UNUSED unsigned int flags,
|
||||
ch_scratch_t *scratch,
|
||||
ch_match_event_handler onEvent,
|
||||
ch_error_event_handler onError,
|
||||
void *userContext) {
|
||||
if (unlikely(!hydb || !scratch || !data)) {
|
||||
DEBUG_PRINTF("args invalid\n");
|
||||
return CH_INVALID;
|
||||
}
|
||||
ch_error_t ret = hydbIsValid(hydb);
|
||||
if (ret != CH_SUCCESS) {
|
||||
DEBUG_PRINTF("database invalid\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!ISALIGNED_CL(scratch)) {
|
||||
DEBUG_PRINTF("bad alignment %p\n", scratch);
|
||||
return CH_INVALID;
|
||||
}
|
||||
|
||||
if (scratch->magic != CH_SCRATCH_MAGIC) {
|
||||
DEBUG_PRINTF("scratch invalid\n");
|
||||
return CH_INVALID;
|
||||
}
|
||||
|
||||
if (unlikely(markScratchInUse(scratch))) {
|
||||
return CH_SCRATCH_IN_USE;
|
||||
}
|
||||
|
||||
// Hyperscan underlying scratch and database validity will be checked by
|
||||
// the hs_scan() call, so no need to do it here.
|
||||
|
||||
// PCRE takes the data region length in as an int, so this limits our block
|
||||
// size to INT_MAX.
|
||||
if (length > INT_MAX) {
|
||||
DEBUG_PRINTF("length invalid\n");
|
||||
unmarkScratchInUse(scratch);
|
||||
return CH_INVALID;
|
||||
}
|
||||
|
||||
const struct ch_bytecode *db = ch_get_bytecode(hydb);
|
||||
|
||||
scratch->pq.size = 0;
|
||||
scratch->ret = CH_SUCCESS;
|
||||
|
||||
// Firstly, we run Hyperscan in block mode and add its matches into the
|
||||
// active list for subsequent confirmation with pcre.
|
||||
struct HybridContext hyctx = {
|
||||
.data = data,
|
||||
.length = length,
|
||||
.valid_utf8_highwater = 0,
|
||||
.db = db,
|
||||
.scratch = scratch,
|
||||
.pq = &scratch->pq,
|
||||
.match_callback = onEvent ? onEvent : null_onEvent,
|
||||
.error_callback = onError,
|
||||
.context = userContext
|
||||
};
|
||||
|
||||
// Init priority queue.
|
||||
ret = initQueue(&hyctx, &scratch->pq);
|
||||
if (ret != CH_SUCCESS) {
|
||||
DEBUG_PRINTF("Chimera returned error %d\n", ret);
|
||||
unmarkScratchInUse(scratch);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!(db->flags & CHIMERA_FLAG_NO_MULTIMATCH)) {
|
||||
ret = scanHyperscan(&hyctx, data, length);
|
||||
if (ret != HS_SUCCESS && scratch->ret != CH_SUCCESS) {
|
||||
DEBUG_PRINTF("Hyperscan returned error %d\n", scratch->ret);
|
||||
unmarkScratchInUse(scratch);
|
||||
return scratch->ret;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("Flush priority queue\n");
|
||||
// Catch up with PCRE and make up id and offsets as we don't really care
|
||||
// about their values
|
||||
ret = catchupPcre(&hyctx, ~0U, length, length);
|
||||
if (ret != CH_SUCCESS) {
|
||||
DEBUG_PRINTF("PCRE catch up returned error %d\n", ret);
|
||||
unmarkScratchInUse(scratch);
|
||||
return ret;
|
||||
}
|
||||
|
||||
unmarkScratchInUse(scratch);
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_scan(const ch_database_t *hydb, const char *data,
|
||||
unsigned int length, unsigned int flags,
|
||||
ch_scratch_t *scratch,
|
||||
ch_match_event_handler onEvent,
|
||||
ch_error_event_handler onError, void *userContext) {
|
||||
ch_error_t ret = ch_scan_i(hydb, data, length, flags, scratch, onEvent,
|
||||
onError, userContext);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
const char * HS_CDECL ch_version(void) {
|
||||
return HS_VERSION_STRING;
|
||||
}
|
377
chimera/ch_runtime.h
Normal file
377
chimera/ch_runtime.h
Normal file
@ -0,0 +1,377 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CH_RUNTIME_H_
|
||||
#define CH_RUNTIME_H_
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief The Chimera runtime API definition.
|
||||
*
|
||||
* Chimera is a hybrid of Hyperscan and PCRE regular expression engine.
|
||||
*
|
||||
* This header contains functions for using compiled Chimera databases for
|
||||
* scanning data at runtime.
|
||||
*/
|
||||
|
||||
#include "hs_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
struct ch_scratch;
|
||||
|
||||
/**
|
||||
* A Chimera scratch space.
|
||||
*/
|
||||
typedef struct ch_scratch ch_scratch_t;
|
||||
|
||||
/**
|
||||
* Callback return value used to tell the Chimera matcher what to do after
|
||||
* processing this match.
|
||||
*/
|
||||
typedef int ch_callback_t;
|
||||
|
||||
/**
|
||||
* @defgroup CH_CALLBACK ch_callback_t values
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Continue matching.
|
||||
*/
|
||||
#define CH_CALLBACK_CONTINUE 0
|
||||
|
||||
/**
|
||||
* Terminate matching.
|
||||
*/
|
||||
#define CH_CALLBACK_TERMINATE 1
|
||||
|
||||
/**
|
||||
* Skip remaining matches for this ID and continue.
|
||||
*/
|
||||
#define CH_CALLBACK_SKIP_PATTERN 2
|
||||
|
||||
|
||||
/** @} */
|
||||
|
||||
|
||||
/**
|
||||
* Type used to differentiate the errors raised with the @ref
|
||||
* ch_error_event_handler callback.
|
||||
*/
|
||||
typedef int ch_error_event_t;
|
||||
|
||||
/**
|
||||
* @defgroup CH_ERROR_EVENT ch_error_event_t values
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* PCRE hits its match limit and reports PCRE_ERROR_MATCHLIMIT.
|
||||
*/
|
||||
#define CH_ERROR_MATCHLIMIT 1
|
||||
|
||||
/**
|
||||
* PCRE hits its recursion limit and reports PCRE_ERROR_RECURSIONLIMIT.
|
||||
*/
|
||||
#define CH_ERROR_RECURSIONLIMIT 2
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Structure representing a captured subexpression within a match. An array of
|
||||
* these structures corresponding to capture groups in order is passed to the
|
||||
* callback on match, with active structures identified by the
|
||||
* CH_CAPTURE_FLAG_ACTIVE flag.
|
||||
*/
|
||||
typedef struct ch_capture {
|
||||
/**
|
||||
* The flags indicating if this structure is active.
|
||||
*/
|
||||
unsigned int flags;
|
||||
|
||||
/**
|
||||
* offset at which this capture group begins.
|
||||
*/
|
||||
unsigned long long from; /*< offset at which this capture group begins. */
|
||||
|
||||
/**
|
||||
* offset at which this capture group ends.
|
||||
*/
|
||||
unsigned long long to;
|
||||
} ch_capture_t;
|
||||
|
||||
/**
|
||||
* @defgroup CH_CAPTURE ch_capture_t flags
|
||||
*
|
||||
* These flags are used in @ref ch_capture_t::flags to indicate if this
|
||||
* structure is active.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Flag indicating that a particular capture group is inactive, used in @ref
|
||||
* ch_capture_t::flags.
|
||||
*/
|
||||
#define CH_CAPTURE_FLAG_INACTIVE 0
|
||||
|
||||
/**
|
||||
* Flag indicating that a particular capture group is active, used in @ref
|
||||
* ch_capture_t::flags.
|
||||
*/
|
||||
#define CH_CAPTURE_FLAG_ACTIVE 1
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Definition of the match event callback function type.
|
||||
*
|
||||
* A callback function matching the defined type must be provided by the
|
||||
* application calling the @ref ch_scan()
|
||||
*
|
||||
* This callback function will be invoked whenever a match is located in the
|
||||
* target data during the execution of a scan. The details of the match are
|
||||
* passed in as parameters to the callback function, and the callback function
|
||||
* should return a value indicating whether or not matching should continue on
|
||||
* the target data. If no callbacks are desired from a scan call, NULL may be
|
||||
* provided in order to suppress match production.
|
||||
*
|
||||
* @param id
|
||||
* The ID number of the expression that matched. If the expression was a
|
||||
* single expression compiled with @ref ch_compile(), this value will be
|
||||
* zero.
|
||||
*
|
||||
* @param from
|
||||
* The offset of the first byte that matches the expression.
|
||||
*
|
||||
* @param to
|
||||
* The offset after the last byte that matches the expression.
|
||||
*
|
||||
* @param flags
|
||||
* This is provided for future use and is unused at present.
|
||||
*
|
||||
* @param size
|
||||
* The number of valid entries pointed to by the captured parameter.
|
||||
*
|
||||
* @param captured
|
||||
* A pointer to an array of @ref ch_capture_t structures that
|
||||
* contain the start and end offsets of entire pattern match and
|
||||
* each captured subexpression.
|
||||
*
|
||||
* @param ctx
|
||||
* The pointer supplied by the user to the @ref ch_scan() function.
|
||||
*
|
||||
* @return
|
||||
* The callback can return @ref CH_CALLBACK_TERMINATE to stop matching.
|
||||
* Otherwise, a return value of @ref CH_CALLBACK_CONTINUE will continue,
|
||||
* with the current pattern if configured to produce multiple matches per
|
||||
* pattern, while a return value of @ref CH_CALLBACK_SKIP_PATTERN will
|
||||
* cease matching this pattern but continue matching the next pattern.
|
||||
*/
|
||||
typedef ch_callback_t (*ch_match_event_handler)(unsigned int id,
|
||||
unsigned long long from,
|
||||
unsigned long long to,
|
||||
unsigned int flags,
|
||||
unsigned int size,
|
||||
const ch_capture_t *captured,
|
||||
void *ctx);
|
||||
|
||||
/**
|
||||
* Definition of the Chimera error event callback function type.
|
||||
*
|
||||
* A callback function matching the defined type may be provided by the
|
||||
* application calling the @ref ch_scan function. This callback function
|
||||
* will be invoked when an error event occurs during matching; this indicates
|
||||
* that some matches for a given expression may not be reported.
|
||||
*
|
||||
* @param error_type
|
||||
* The type of error event that occurred. Currently these errors
|
||||
* correspond to resource limits on PCRE backtracking
|
||||
* @ref CH_ERROR_MATCHLIMIT and @ref CH_ERROR_RECURSIONLIMIT.
|
||||
*
|
||||
* @param id
|
||||
* The ID number of the expression that matched.
|
||||
*
|
||||
* @param info
|
||||
* Event-specific data, for future use. Currently unused.
|
||||
*
|
||||
* @param ctx
|
||||
* The context pointer supplied by the user to the @ref ch_scan
|
||||
* function.
|
||||
*
|
||||
* @return
|
||||
* The callback can return @ref CH_CALLBACK_SKIP_PATTERN to cease matching this
|
||||
* pattern but continue matching the next pattern. Otherwise, we stop
|
||||
* matching for all patterns with @ref CH_CALLBACK_TERMINATE.
|
||||
*/
|
||||
typedef ch_callback_t (*ch_error_event_handler)(ch_error_event_t error_type,
|
||||
unsigned int id, void *info,
|
||||
void *ctx);
|
||||
|
||||
/**
|
||||
* The block regular expression scanner.
|
||||
*
|
||||
* This is the function call in which the actual pattern matching takes place
|
||||
* for block-mode pattern databases.
|
||||
*
|
||||
* @param db
|
||||
* A compiled pattern database.
|
||||
*
|
||||
* @param data
|
||||
* Pointer to the data to be scanned.
|
||||
*
|
||||
* @param length
|
||||
* The number of bytes to scan.
|
||||
*
|
||||
* @param flags
|
||||
* Flags modifying the behaviour of this function. This parameter is
|
||||
* provided for future use and is unused at present.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref ch_alloc_scratch() for this
|
||||
* database.
|
||||
*
|
||||
* @param onEvent
|
||||
* Pointer to a match event callback function. If a NULL pointer is given,
|
||||
* no matches will be returned.
|
||||
*
|
||||
* @param onError
|
||||
* Pointer to a error event callback function. If a NULL pointer is given,
|
||||
* @ref CH_ERROR_MATCHLIMIT and @ref CH_ERROR_RECURSIONLIMIT errors will
|
||||
* be ignored and match will continue.
|
||||
*
|
||||
* @param context
|
||||
* The user defined pointer which will be passed to the callback function.
|
||||
*
|
||||
* @return
|
||||
* Returns @ref CH_SUCCESS on success; @ref CH_SCAN_TERMINATED if the
|
||||
* match callback indicated that scanning should stop; other values on
|
||||
* error.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_scan(const ch_database_t *db, const char *data,
|
||||
unsigned int length, unsigned int flags,
|
||||
ch_scratch_t *scratch,
|
||||
ch_match_event_handler onEvent,
|
||||
ch_error_event_handler onError,
|
||||
void *context);
|
||||
|
||||
/**
|
||||
* Allocate a "scratch" space for use by Chimera.
|
||||
*
|
||||
* This is required for runtime use, and one scratch space per thread, or
|
||||
* concurrent caller, is required. Any allocator callback set by @ref
|
||||
* ch_set_scratch_allocator() or @ref ch_set_allocator() will be used by this
|
||||
* function.
|
||||
*
|
||||
* @param db
|
||||
* The database, as produced by @ref ch_compile().
|
||||
*
|
||||
* @param scratch
|
||||
* On first allocation, a pointer to NULL should be provided so a new
|
||||
* scratch can be allocated. If a scratch block has been previously
|
||||
* allocated, then a pointer to it should be passed back in to see if it
|
||||
* is valid for this database block. If a new scratch block is required,
|
||||
* the original will be freed and the new one returned, otherwise the
|
||||
* previous scratch block will be returned. On success, the scratch block
|
||||
* will be suitable for use with the provided database in addition to any
|
||||
* databases that original scratch space was suitable for.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on successful allocation; @ref CH_NOMEM if the
|
||||
* allocation fails. Other errors may be returned if invalid parameters
|
||||
* are specified.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_alloc_scratch(const ch_database_t *db,
|
||||
ch_scratch_t **scratch);
|
||||
|
||||
/**
|
||||
* Allocate a scratch space that is a clone of an existing scratch space.
|
||||
*
|
||||
* This is useful when multiple concurrent threads will be using the same set
|
||||
* of compiled databases, and another scratch space is required. Any allocator
|
||||
* callback set by @ref ch_set_scratch_allocator() or @ref ch_set_allocator()
|
||||
* will be used by this function.
|
||||
*
|
||||
* @param src
|
||||
* The existing @ref ch_scratch_t to be cloned.
|
||||
*
|
||||
* @param dest
|
||||
* A pointer to the new scratch space will be returned here.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success; @ref CH_NOMEM if the allocation fails.
|
||||
* Other errors may be returned if invalid parameters are specified.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_clone_scratch(const ch_scratch_t *src,
|
||||
ch_scratch_t **dest);
|
||||
|
||||
/**
|
||||
* Provides the size of the given scratch space.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref ch_alloc_scratch() or @ref
|
||||
* ch_clone_scratch().
|
||||
*
|
||||
* @param scratch_size
|
||||
* On success, the size of the scratch space in bytes is placed in this
|
||||
* parameter.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_scratch_size(const ch_scratch_t *scratch,
|
||||
size_t *scratch_size);
|
||||
|
||||
/**
|
||||
* Free a scratch block previously allocated by @ref ch_alloc_scratch() or @ref
|
||||
* ch_clone_scratch().
|
||||
*
|
||||
* The free callback set by @ref ch_set_scratch_allocator() or @ref
|
||||
* ch_set_allocator() will be used by this function.
|
||||
*
|
||||
* @param scratch
|
||||
* The scratch block to be freed. NULL may also be safely provided.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_free_scratch(ch_scratch_t *scratch);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* CH_RUNTIME_H_ */
|
317
chimera/ch_scratch.c
Normal file
317
chimera/ch_scratch.c
Normal file
@ -0,0 +1,317 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Chimera: scratch space alloc.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "allocator.h"
|
||||
#include "ch.h"
|
||||
#include "hs.h"
|
||||
#include "hs_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "ch_alloc.h"
|
||||
#include "ch_internal.h"
|
||||
#include "ch_scratch.h"
|
||||
#include "ch_database.h"
|
||||
|
||||
static
|
||||
size_t getPatternDataSize(const ch_scratch_t *s) {
|
||||
size_t numCapturingStructs =
|
||||
s->patternCount * (s->maxCaptureGroups + 1);
|
||||
return (sizeof(struct ch_patterndata) * s->patternCount) +
|
||||
alignof(struct ch_capture) + // padding
|
||||
(sizeof(struct ch_capture) * numCapturingStructs);
|
||||
}
|
||||
|
||||
static
|
||||
void initPatternData(const ch_scratch_t *s) {
|
||||
// ch_capture array is aligned, directly after the patterndata array.
|
||||
char *ptr = (char *)s->patternData +
|
||||
(sizeof(struct ch_patterndata) * s->patternCount);
|
||||
struct ch_capture *cap = (struct ch_capture *)
|
||||
(ROUNDUP_PTR(ptr, alignof(struct ch_capture)));
|
||||
|
||||
for (u32 i = 0; i < s->patternCount; i++) {
|
||||
struct ch_patterndata *pd = &s->patternData[i];
|
||||
pd->match = cap;
|
||||
DEBUG_PRINTF("pattern %u: pd=%p, match=%p\n", i, pd, pd->match);
|
||||
cap += (s->maxCaptureGroups + 1);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
ch_error_t alloc_scratch(const ch_scratch_t *proto, ch_scratch_t **scratch) {
|
||||
size_t ovectorSize = (proto->maxCaptureGroups + 1) * sizeof(int) * 3;
|
||||
size_t capturedSize =
|
||||
sizeof(struct ch_capture) * (proto->maxCaptureGroups + 1);
|
||||
size_t patternDataSize = getPatternDataSize(proto);
|
||||
size_t activeSize = proto->activeSize;
|
||||
size_t queueSize = proto->patternCount * sizeof(struct queue_item);
|
||||
|
||||
// max padding for alignment below.
|
||||
size_t padding = alignof(int) + alignof(struct ch_capture) +
|
||||
alignof(struct ch_patterndata) +
|
||||
alignof(struct queue_item);
|
||||
|
||||
size_t allocSize = sizeof(ch_scratch_t) + ovectorSize + capturedSize +
|
||||
patternDataSize + activeSize + queueSize + padding
|
||||
+ 256; /* padding for cacheline alignment */
|
||||
ch_scratch_t *s;
|
||||
ch_scratch_t *s_tmp = ch_scratch_alloc(allocSize);
|
||||
ch_error_t err = ch_check_alloc(s_tmp);
|
||||
if (err != CH_SUCCESS) {
|
||||
ch_scratch_free(s_tmp);
|
||||
*scratch = NULL;
|
||||
return err;
|
||||
}
|
||||
|
||||
memset(s_tmp, 0, allocSize);
|
||||
s = ROUNDUP_PTR(s_tmp, 64);
|
||||
// Set ordinary members.
|
||||
*s = *proto;
|
||||
|
||||
s->magic = CH_SCRATCH_MAGIC;
|
||||
s->in_use = 0;
|
||||
s->scratch_alloc = (char *)s_tmp;
|
||||
|
||||
// Set pointers internal to allocation.
|
||||
|
||||
char *ptr = (char *)s + sizeof(*s);
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(int));
|
||||
s->ovector = (int *)ptr;
|
||||
ptr += ovectorSize;
|
||||
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(struct ch_capture));
|
||||
s->captured = (struct ch_capture *)ptr;
|
||||
ptr += capturedSize;
|
||||
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(struct ch_patterndata));
|
||||
s->patternData = (struct ch_patterndata *)ptr;
|
||||
ptr += patternDataSize;
|
||||
|
||||
// Pre-fill pattern data, setting captureOffsets
|
||||
initPatternData(s);
|
||||
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(struct queue_item));
|
||||
s->pq.item = (struct queue_item *)ptr;
|
||||
ptr += queueSize;
|
||||
|
||||
s->active = (u8 *)ptr;
|
||||
|
||||
// Store size.
|
||||
s->scratchSize = allocSize;
|
||||
|
||||
// We should never overrun our allocation.
|
||||
assert((ptr + activeSize) - (char *)s <= (ptrdiff_t)allocSize);
|
||||
|
||||
*scratch = s;
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_alloc_scratch(const ch_database_t *hydb,
|
||||
ch_scratch_t **scratch) {
|
||||
if (!hydb || !scratch) {
|
||||
DEBUG_PRINTF("invalid args\n");
|
||||
return CH_INVALID;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("hydb=%p, &scratch=%p\n", hydb, scratch);
|
||||
ch_error_t rv = hydbIsValid(hydb);
|
||||
if (rv != CH_SUCCESS) {
|
||||
DEBUG_PRINTF("invalid database\n");
|
||||
return rv;
|
||||
}
|
||||
|
||||
if (*scratch != NULL) {
|
||||
/* has to be aligned before we can do anything with it */
|
||||
if (!ISALIGNED_CL(*scratch)) {
|
||||
return CH_INVALID;
|
||||
}
|
||||
if ((*scratch)->magic != CH_SCRATCH_MAGIC) {
|
||||
return CH_INVALID;
|
||||
}
|
||||
if (markScratchInUse(*scratch)) {
|
||||
return CH_SCRATCH_IN_USE;
|
||||
}
|
||||
}
|
||||
|
||||
// We allocate a prototype of the scratch header to do our sizing with.
|
||||
ch_scratch_t *proto;
|
||||
ch_scratch_t *proto_tmp = ch_scratch_alloc(sizeof(ch_scratch_t) + 256);
|
||||
ch_error_t proto_ret = ch_check_alloc(proto_tmp);
|
||||
if (proto_ret != CH_SUCCESS) {
|
||||
ch_scratch_free(proto_tmp);
|
||||
ch_scratch_free(*scratch);
|
||||
*scratch = NULL;
|
||||
return proto_ret;
|
||||
}
|
||||
|
||||
proto = ROUNDUP_PTR(proto_tmp, 64);
|
||||
|
||||
int resize = 0;
|
||||
if (*scratch) {
|
||||
*proto = **scratch;
|
||||
} else {
|
||||
memset(proto, 0, sizeof(*proto));
|
||||
resize = 1;
|
||||
}
|
||||
proto->scratch_alloc = (char *)proto_tmp;
|
||||
|
||||
const struct ch_bytecode *db = ch_get_bytecode(hydb);
|
||||
|
||||
if (db->maxCaptureGroups > proto->maxCaptureGroups) {
|
||||
proto->maxCaptureGroups = db->maxCaptureGroups;
|
||||
resize = 1;
|
||||
}
|
||||
|
||||
if (db->patternCount > proto->patternCount) {
|
||||
proto->patternCount = db->patternCount;
|
||||
proto->activeSize = db->activeSize;
|
||||
resize = 1;
|
||||
}
|
||||
|
||||
if (resize) {
|
||||
if (*scratch) {
|
||||
ch_scratch_free((*scratch)->scratch_alloc);
|
||||
}
|
||||
|
||||
ch_error_t alloc_ret = alloc_scratch(proto, scratch);
|
||||
ch_scratch_free(proto_tmp);
|
||||
if (alloc_ret != CH_SUCCESS) {
|
||||
*scratch = NULL;
|
||||
return alloc_ret;
|
||||
}
|
||||
} else {
|
||||
ch_scratch_free(proto_tmp);
|
||||
unmarkScratchInUse(*scratch);
|
||||
}
|
||||
|
||||
if (db->flags & CHIMERA_FLAG_NO_MULTIMATCH) {
|
||||
(*scratch)->multi_scratch = NULL;
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
// We may still have to realloc the underlying Hyperscan scratch.
|
||||
rv = hs_alloc_scratch(getHyperscanDatabase(db),
|
||||
&(*scratch)->multi_scratch);
|
||||
if (rv != HS_SUCCESS) {
|
||||
DEBUG_PRINTF("hs_alloc_scratch for multi_scratch failed\n");
|
||||
hs_free_scratch((*scratch)->multi_scratch);
|
||||
ch_scratch_free((*scratch)->scratch_alloc);
|
||||
*scratch = NULL;
|
||||
return rv;
|
||||
}
|
||||
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_clone_scratch(const ch_scratch_t *src,
|
||||
ch_scratch_t **dest) {
|
||||
if (!dest || !src || !ISALIGNED_CL(src) ||
|
||||
src->magic != CH_SCRATCH_MAGIC) {
|
||||
DEBUG_PRINTF("scratch invalid\n");
|
||||
return CH_INVALID;
|
||||
}
|
||||
|
||||
ch_error_t ret = alloc_scratch(src, dest);
|
||||
if (ret != CH_SUCCESS) {
|
||||
DEBUG_PRINTF("alloc_scratch failed\n");
|
||||
*dest = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (src->multi_scratch) {
|
||||
(*dest)->multi_scratch = NULL;
|
||||
ret = hs_clone_scratch(src->multi_scratch, &(*dest)->multi_scratch);
|
||||
if (ret != HS_SUCCESS) {
|
||||
DEBUG_PRINTF("hs_clone_scratch(multi_scratch,...) failed\n");
|
||||
ch_scratch_free(*dest);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_free_scratch(ch_scratch_t *scratch) {
|
||||
ch_error_t ret = CH_SUCCESS;
|
||||
if (scratch) {
|
||||
/* has to be aligned before we can do anything with it */
|
||||
if (!ISALIGNED_CL(scratch)) {
|
||||
return CH_INVALID;
|
||||
}
|
||||
if (scratch->magic != CH_SCRATCH_MAGIC) {
|
||||
return CH_INVALID;
|
||||
}
|
||||
if (markScratchInUse(scratch)) {
|
||||
return CH_SCRATCH_IN_USE;
|
||||
}
|
||||
|
||||
if (scratch->multi_scratch) {
|
||||
ret = hs_free_scratch(scratch->multi_scratch);
|
||||
}
|
||||
|
||||
scratch->magic = 0;
|
||||
assert(scratch->scratch_alloc);
|
||||
DEBUG_PRINTF("scratch %p is really at %p : freeing\n", scratch,
|
||||
scratch->scratch_alloc);
|
||||
ch_scratch_free(scratch->scratch_alloc);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** Not public, but used for info from our internal tools. Note that in the
|
||||
* hybrid matcher the scratch is definitely not a contiguous memory region. */
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_scratch_size(const ch_scratch_t *scratch, size_t *size) {
|
||||
ch_error_t ret = CH_SUCCESS;
|
||||
if (!size || !scratch || !ISALIGNED_CL(scratch) ||
|
||||
scratch->magic != CH_SCRATCH_MAGIC) {
|
||||
return CH_INVALID;
|
||||
} else {
|
||||
size_t multi_size = 0;
|
||||
|
||||
if (scratch->multi_scratch) {
|
||||
ret = hs_scratch_size(scratch->multi_scratch, &multi_size);
|
||||
}
|
||||
if (ret) {
|
||||
multi_size = 0;
|
||||
}
|
||||
|
||||
*size = scratch->scratchSize + multi_size;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
119
chimera/ch_scratch.h
Normal file
119
chimera/ch_scratch.h
Normal file
@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Scratch and associated data structures.
|
||||
*
|
||||
* This header gets pulled into many places (many deep, slow to compile
|
||||
* places). Try to keep the included headers under control.
|
||||
*/
|
||||
|
||||
#ifndef CH_SCRATCH_H_
|
||||
#define CH_SCRATCH_H_
|
||||
|
||||
#include "ch_common.h"
|
||||
#include "ch_runtime.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#define CH_SCRATCH_MAGIC 0x554F4259 //!< Magic number stored in \ref ch_scratch
|
||||
|
||||
struct queue_item {
|
||||
int from; /** \brief used to store the start location. */
|
||||
int to; /** \brief used to store the current location. */
|
||||
u32 id; /**< pattern index. */
|
||||
};
|
||||
|
||||
struct match_pq {
|
||||
struct queue_item *item;
|
||||
u32 size; /**< current size of the priority queue */
|
||||
};
|
||||
|
||||
/** \brief Information about a pattern stored at runtime when a match is
|
||||
* encountered. */
|
||||
struct ch_patterndata {
|
||||
struct ch_capture *match; //!< buffered group info
|
||||
u32 groupCount; //!< number of capturing groups
|
||||
u32 scanStart; //!< start of match window (still to be single-scanned).
|
||||
};
|
||||
|
||||
/** \brief Scratch space header for Chimera. */
|
||||
struct ch_scratch {
|
||||
u32 magic; //!< must be \ref CH_SCRATCH_MAGIC
|
||||
u8 in_use; /**< non-zero when being used by an API call. */
|
||||
struct hs_scratch *multi_scratch; //!< for hyperscan scatch.
|
||||
int *ovector; //!< maximally-sized ovector for PCRE usage.
|
||||
struct ch_capture *captured; //!< max-sized capture group struct.
|
||||
u8 *active; //!< active multibit.
|
||||
struct ch_patterndata *patternData; //!< per-pattern match data, indexed by
|
||||
// pattern ID.
|
||||
struct match_pq pq; //!< priority queue to ensure matching ordering
|
||||
u32 patternCount; //!< number of patterns, used to size active multibit
|
||||
u32 activeSize; //!< size of active multibit
|
||||
u32 maxCaptureGroups; //!< largest num of capturing groups required
|
||||
u32 scratchSize; //!< size of allocation
|
||||
int ret; //!< return value in Hyperscan callback
|
||||
char *scratch_alloc; /* user allocated scratch object */
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Mark scratch as in use.
|
||||
*
|
||||
* Returns non-zero if it was already in use, zero otherwise.
|
||||
*/
|
||||
static really_inline
|
||||
char markScratchInUse(struct ch_scratch *scratch) {
|
||||
DEBUG_PRINTF("marking scratch as in use\n");
|
||||
assert(scratch && scratch->magic == CH_SCRATCH_MAGIC);
|
||||
if (scratch->in_use) {
|
||||
DEBUG_PRINTF("scratch already in use!\n");
|
||||
return 1;
|
||||
}
|
||||
scratch->in_use = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Mark scratch as no longer in use.
|
||||
*/
|
||||
static really_inline
|
||||
void unmarkScratchInUse(struct ch_scratch *scratch) {
|
||||
DEBUG_PRINTF("marking scratch as not in use\n");
|
||||
assert(scratch && scratch->magic == CH_SCRATCH_MAGIC);
|
||||
assert(scratch->in_use == 1);
|
||||
scratch->in_use = 0;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* CH_SCRATCH_H_ */
|
@ -61,5 +61,3 @@ else ()
|
||||
return ()
|
||||
endif ()
|
||||
endif (PCRE_BUILD_SOURCE)
|
||||
|
||||
set (PCRE_CHECKED TRUE PARENT_SCOPE)
|
||||
|
@ -46,7 +46,7 @@ using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
u32 mmbit_size(u32 total_bits) {
|
||||
u32 HS_CDECL mmbit_size(u32 total_bits) {
|
||||
if (total_bits > MMB_MAX_BITS) {
|
||||
throw ResourceLimitError();
|
||||
}
|
||||
|
@ -33,6 +33,7 @@
|
||||
#ifndef MULTIBIT_BUILD_H
|
||||
#define MULTIBIT_BUILD_H
|
||||
|
||||
#include "hs_common.h"
|
||||
#include "multibit_internal.h"
|
||||
#include "hash.h"
|
||||
|
||||
@ -62,8 +63,10 @@ namespace ue2 {
|
||||
*
|
||||
* This will throw a resource limit assertion if the requested mmbit is too
|
||||
* large.
|
||||
*
|
||||
* TODO:add temporary HS_CDECL for chimera on Windows, need improve this.
|
||||
*/
|
||||
u32 mmbit_size(u32 total_bits);
|
||||
u32 HS_CDECL mmbit_size(u32 total_bits);
|
||||
|
||||
/** \brief Construct a sparse iterator over the values in \a bits for a
|
||||
* multibit of size \a total_bits. */
|
||||
|
@ -31,6 +31,8 @@ SET(hsbench_SOURCES
|
||||
common.h
|
||||
data_corpus.cpp
|
||||
data_corpus.h
|
||||
engine.cpp
|
||||
engine.h
|
||||
engine_hyperscan.cpp
|
||||
engine_hyperscan.h
|
||||
heapstats.cpp
|
||||
@ -45,6 +47,23 @@ SET(hsbench_SOURCES
|
||||
timer.h
|
||||
)
|
||||
|
||||
if (BUILD_CHIMERA)
|
||||
add_definitions(-DHS_HYBRID)
|
||||
SET(hsbench_SOURCES
|
||||
${hsbench_SOURCES}
|
||||
engine_chimera.cpp
|
||||
engine_chimera.h
|
||||
engine_pcre.cpp
|
||||
engine_pcre.h
|
||||
)
|
||||
endif()
|
||||
|
||||
add_executable(hsbench ${hsbench_SOURCES})
|
||||
target_link_libraries(hsbench hs databaseutil expressionutil ${SQLITE3_LDFLAGS}
|
||||
${CMAKE_THREAD_LIBS_INIT})
|
||||
if (BUILD_CHIMERA)
|
||||
include_directories(${PCRE_INCLUDE_DIRS})
|
||||
target_link_libraries(hsbench hs chimera ${PCRE_LDFLAGS} databaseutil
|
||||
expressionutil ${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
else()
|
||||
target_link_libraries(hsbench hs databaseutil expressionutil
|
||||
${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif()
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -42,6 +42,12 @@ extern bool forceEditDistance;
|
||||
extern unsigned editDistance;
|
||||
extern bool printCompressSize;
|
||||
|
||||
/** Structure for the result of a single complete scan. */
|
||||
struct ResultEntry {
|
||||
double seconds = 0; //!< Time taken for scan.
|
||||
unsigned int matches = 0; //!< Count of matches found.
|
||||
};
|
||||
|
||||
struct SqlFailure {
|
||||
explicit SqlFailure(const std::string &s) : message(s) {}
|
||||
std::string message;
|
||||
|
35
tools/hsbench/engine.cpp
Normal file
35
tools/hsbench/engine.cpp
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "engine.h"
|
||||
|
||||
EngineContext::~EngineContext() { }
|
||||
|
||||
EngineStream::~EngineStream() { }
|
||||
|
||||
Engine::~Engine() { }
|
94
tools/hsbench/engine.h
Normal file
94
tools/hsbench/engine.h
Normal file
@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ENGINE_H
|
||||
#define ENGINE_H
|
||||
|
||||
#include "common.h"
|
||||
#include "sqldb.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
// Engines have an engine context which is allocated on a per-thread basis.
|
||||
class EngineContext : boost::noncopyable {
|
||||
public:
|
||||
virtual ~EngineContext();
|
||||
};
|
||||
|
||||
/** Streaming mode scans have persistent stream state associated with them. */
|
||||
class EngineStream : boost::noncopyable {
|
||||
public:
|
||||
virtual ~EngineStream();
|
||||
unsigned int sn;
|
||||
};
|
||||
|
||||
// Benchmarking engine
|
||||
class Engine : boost::noncopyable {
|
||||
public:
|
||||
virtual ~Engine();
|
||||
|
||||
// allocate an EngineContext
|
||||
virtual std::unique_ptr<EngineContext> makeContext() const = 0;
|
||||
|
||||
// non-streaming scan
|
||||
virtual void scan(const char *data, unsigned len, unsigned blockId,
|
||||
ResultEntry &results, EngineContext &ectx) const = 0;
|
||||
|
||||
// vectoring scan
|
||||
virtual void scan_vectored(const char *const *data,
|
||||
const unsigned int *len, unsigned int count,
|
||||
unsigned int streamId, ResultEntry &result,
|
||||
EngineContext &ectx) const = 0;
|
||||
|
||||
// stream open
|
||||
virtual std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
|
||||
unsigned id) const = 0;
|
||||
|
||||
// stream close
|
||||
virtual void streamClose(std::unique_ptr<EngineStream> stream,
|
||||
ResultEntry &result) const = 0;
|
||||
|
||||
// stream compress and expand
|
||||
virtual void streamCompressExpand(EngineStream &stream,
|
||||
std::vector<char> &temp) const = 0;
|
||||
|
||||
// streaming scan
|
||||
virtual void streamScan(EngineStream &stream, const char *data,
|
||||
unsigned int len, unsigned int id,
|
||||
ResultEntry &result) const = 0;
|
||||
|
||||
virtual void printStats() const = 0;
|
||||
|
||||
virtual void sqlStats(SqlDB &db) const = 0;
|
||||
};
|
||||
|
||||
#endif // ENGINE_H
|
314
tools/hsbench/engine_chimera.cpp
Normal file
314
tools/hsbench/engine_chimera.cpp
Normal file
@ -0,0 +1,314 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "ExpressionParser.h"
|
||||
#include "common.h"
|
||||
#include "engine_chimera.h"
|
||||
#include "expressions.h"
|
||||
#include "heapstats.h"
|
||||
#include "sqldb.h"
|
||||
#include "timer.h"
|
||||
|
||||
#include "chimera/ch_database.h"
|
||||
|
||||
#include "util/make_unique.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
EngineCHContext::EngineCHContext(const ch_database_t *db) {
|
||||
ch_alloc_scratch(db, &scratch);
|
||||
assert(scratch);
|
||||
}
|
||||
|
||||
EngineCHContext::~EngineCHContext() {
|
||||
ch_free_scratch(scratch);
|
||||
}
|
||||
|
||||
namespace /* anonymous */ {
|
||||
|
||||
/** Scan context structure passed to the onMatch callback function. */
|
||||
struct ScanCHContext {
|
||||
ScanCHContext(unsigned id_in, ResultEntry &result_in)
|
||||
: id(id_in), result(result_in) {}
|
||||
unsigned id;
|
||||
ResultEntry &result;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
/**
|
||||
* Callback function called for every match that Chimera produces, used when
|
||||
* "echo matches" is off.
|
||||
*/
|
||||
static
|
||||
int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int,
|
||||
unsigned int, const ch_capture_t *, void *ctx) {
|
||||
ScanCHContext *sc = static_cast<ScanCHContext *>(ctx);
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback function called for every match that Chimera produces when "echo
|
||||
* matches" is enabled.
|
||||
*/
|
||||
static
|
||||
int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
|
||||
unsigned int, unsigned int, const ch_capture_t *, void *ctx) {
|
||||
ScanCHContext *sc = static_cast<ScanCHContext *>(ctx);
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
printf("Match @%u:%llu for %u\n", sc->id, to, id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
EngineChimera::EngineChimera(ch_database_t *db_in, CompileCHStats cs)
|
||||
: db(db_in), compile_stats(move(cs)) {
|
||||
assert(db);
|
||||
}
|
||||
|
||||
EngineChimera::~EngineChimera() {
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
unique_ptr<EngineContext> EngineChimera::makeContext() const {
|
||||
return ue2::make_unique<EngineCHContext>(db);
|
||||
}
|
||||
|
||||
void EngineChimera::scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ectx) const {
|
||||
assert(data);
|
||||
|
||||
auto &ctx = static_cast<EngineCHContext &>(ectx);
|
||||
ScanCHContext sc(id, result);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
ch_error_t rv = ch_scan(db, data, len, 0, ctx.scratch, callback, nullptr,
|
||||
&sc);
|
||||
|
||||
if (rv != CH_SUCCESS) {
|
||||
printf("Fatal error: ch_scan returned error %d\n", rv);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
// vectoring scan
|
||||
void EngineChimera::scan_vectored(UNUSED const char *const *data,
|
||||
UNUSED const unsigned int *len,
|
||||
UNUSED unsigned int count,
|
||||
UNUSED unsigned int streamId,
|
||||
UNUSED ResultEntry &result,
|
||||
UNUSED EngineContext &ectx) const {
|
||||
printf("Hybrid matcher can't support vectored mode.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
unique_ptr<EngineStream> EngineChimera::streamOpen(UNUSED EngineContext &ectx,
|
||||
UNUSED unsigned id) const {
|
||||
printf("Hybrid matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EngineChimera::streamClose(UNUSED unique_ptr<EngineStream> stream,
|
||||
UNUSED ResultEntry &result) const {
|
||||
printf("Hybrid matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EngineChimera::streamScan(UNUSED EngineStream &stream,
|
||||
UNUSED const char *data,
|
||||
UNUSED unsigned len, UNUSED unsigned id,
|
||||
UNUSED ResultEntry &result) const {
|
||||
printf("Hybrid matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EngineChimera::streamCompressExpand(UNUSED EngineStream &stream,
|
||||
UNUSED vector<char> &temp) const {
|
||||
printf("Hybrid matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EngineChimera::printStats() const {
|
||||
// Output summary information.
|
||||
if (!compile_stats.sigs_name.empty()) {
|
||||
printf("Signature set: %s\n", compile_stats.sigs_name.c_str());
|
||||
}
|
||||
printf("Signatures: %s\n", compile_stats.signatures.c_str());
|
||||
printf("Chimera info: %s\n", compile_stats.db_info.c_str());
|
||||
printf("Expression count: %'zu\n", compile_stats.expressionCount);
|
||||
printf("Bytecode size: %'zu bytes\n", compile_stats.compiledSize);
|
||||
printf("Database CRC: 0x%x\n", compile_stats.crc32);
|
||||
printf("Scratch size: %'zu bytes\n", compile_stats.scratchSize);
|
||||
printf("Compile time: %'0.3Lf seconds\n", compile_stats.compileSecs);
|
||||
printf("Peak heap usage: %'u bytes\n", compile_stats.peakMemorySize);
|
||||
}
|
||||
|
||||
void EngineChimera::sqlStats(SqlDB &sqldb) const {
|
||||
ostringstream crc;
|
||||
crc << "0x" << hex << compile_stats.crc32;
|
||||
|
||||
static const string Q =
|
||||
"INSERT INTO Compile ("
|
||||
"sigsName, signatures, dbInfo, exprCount, dbSize, crc,"
|
||||
"scratchSize, compileSecs, peakMemory) "
|
||||
"VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)";
|
||||
|
||||
sqldb.insert_all(Q, compile_stats.sigs_name, compile_stats.signatures,
|
||||
compile_stats.db_info, compile_stats.expressionCount,
|
||||
compile_stats.compiledSize, crc.str(),
|
||||
compile_stats.scratchSize, compile_stats.compileSecs,
|
||||
compile_stats.peakMemorySize);
|
||||
}
|
||||
|
||||
unique_ptr<EngineChimera>
|
||||
buildEngineChimera(const ExpressionMap &expressions, const string &name,
|
||||
const string &sigs_name) {
|
||||
if (expressions.empty()) {
|
||||
assert(0);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
long double compileSecs = 0.0;
|
||||
size_t compiledSize = 0.0;
|
||||
size_t scratchSize = 0;
|
||||
unsigned int peakMemorySize = 0;
|
||||
string db_info;
|
||||
|
||||
ch_database_t *db;
|
||||
ch_error_t err;
|
||||
|
||||
const unsigned int count = expressions.size();
|
||||
|
||||
vector<string> exprs;
|
||||
vector<unsigned int> flags, ids;
|
||||
vector<hs_expr_ext> ext;
|
||||
|
||||
for (const auto &m : expressions) {
|
||||
string expr;
|
||||
unsigned int f = 0;
|
||||
hs_expr_ext extparam; // unused
|
||||
extparam.flags = 0;
|
||||
if (!readExpression(m.second, expr, &f, &extparam)) {
|
||||
printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(),
|
||||
m.first);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (extparam.flags) {
|
||||
printf("Error parsing PCRE with extended flags: %s (id %u)\n",
|
||||
m.second.c_str(), m.first);
|
||||
return nullptr;
|
||||
}
|
||||
exprs.push_back(expr);
|
||||
ids.push_back(m.first);
|
||||
flags.push_back(f);
|
||||
}
|
||||
|
||||
// Our compiler takes an array of plain ol' C strings.
|
||||
vector<const char *> patterns(count);
|
||||
for (unsigned int i = 0; i < count; i++) {
|
||||
patterns[i] = exprs[i].c_str();
|
||||
}
|
||||
|
||||
Timer timer;
|
||||
timer.start();
|
||||
|
||||
// Capture groups by default
|
||||
unsigned int mode = CH_MODE_GROUPS;
|
||||
ch_compile_error_t *compile_err;
|
||||
err = ch_compile_multi(patterns.data(), flags.data(), ids.data(),
|
||||
count, mode, nullptr, &db, &compile_err);
|
||||
|
||||
timer.complete();
|
||||
compileSecs = timer.seconds();
|
||||
peakMemorySize = getPeakHeap();
|
||||
|
||||
if (err == CH_COMPILER_ERROR) {
|
||||
if (compile_err->expression >= 0) {
|
||||
printf("Compile error for signature #%u: %s\n",
|
||||
compile_err->expression, compile_err->message);
|
||||
} else {
|
||||
printf("Compile error: %s\n", compile_err->message);
|
||||
}
|
||||
ch_free_compile_error(compile_err);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
err = ch_database_size(db, &compiledSize);
|
||||
if (err != CH_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
assert(compiledSize > 0);
|
||||
|
||||
char *info;
|
||||
err = ch_database_info(db, &info);
|
||||
if (err != CH_SUCCESS) {
|
||||
return nullptr;
|
||||
} else {
|
||||
db_info = string(info);
|
||||
free(info);
|
||||
}
|
||||
|
||||
// Allocate scratch temporarily to find its size: this is a good test
|
||||
// anyway.
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
err = ch_alloc_scratch(db, &scratch);
|
||||
if (err != HS_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
err = ch_scratch_size(scratch, &scratchSize);
|
||||
if (err != CH_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
ch_free_scratch(scratch);
|
||||
|
||||
// Collect summary information.
|
||||
CompileCHStats cs;
|
||||
cs.sigs_name = sigs_name;
|
||||
if (!sigs_name.empty()) {
|
||||
const auto pos = name.find_last_of('/');
|
||||
cs.signatures = name.substr(pos + 1);
|
||||
} else {
|
||||
cs.signatures = name;
|
||||
}
|
||||
cs.db_info = db_info;
|
||||
cs.expressionCount = expressions.size();
|
||||
cs.compiledSize = compiledSize;
|
||||
cs.scratchSize = scratchSize;
|
||||
cs.compileSecs = compileSecs;
|
||||
cs.peakMemorySize = peakMemorySize;
|
||||
|
||||
return ue2::make_unique<EngineChimera>(db, move(cs));
|
||||
}
|
103
tools/hsbench/engine_chimera.h
Normal file
103
tools/hsbench/engine_chimera.h
Normal file
@ -0,0 +1,103 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ENGINECHIMERA_H
|
||||
#define ENGINECHIMERA_H
|
||||
|
||||
#include "expressions.h"
|
||||
#include "engine.h"
|
||||
|
||||
#include "chimera/ch.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/** Infomation about the database compile */
|
||||
struct CompileCHStats {
|
||||
std::string sigs_name;
|
||||
std::string signatures;
|
||||
std::string db_info;
|
||||
size_t expressionCount = 0;
|
||||
size_t compiledSize = 0;
|
||||
uint32_t crc32 = 0;
|
||||
size_t scratchSize = 0;
|
||||
long double compileSecs = 0;
|
||||
unsigned int peakMemorySize = 0;
|
||||
};
|
||||
|
||||
/** Engine context which is allocated on a per-thread basis. */
|
||||
class EngineCHContext : public EngineContext{
|
||||
public:
|
||||
explicit EngineCHContext(const ch_database_t *db);
|
||||
~EngineCHContext();
|
||||
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
};
|
||||
|
||||
/** Chimera Engine for scanning data. */
|
||||
class EngineChimera : public Engine {
|
||||
public:
|
||||
explicit EngineChimera(ch_database_t *db, CompileCHStats cs);
|
||||
~EngineChimera();
|
||||
|
||||
std::unique_ptr<EngineContext> makeContext() const;
|
||||
|
||||
void scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
void scan_vectored(const char *const *data, const unsigned int *len,
|
||||
unsigned int count, unsigned int streamId,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
|
||||
unsigned id) const;
|
||||
|
||||
void streamClose(std::unique_ptr<EngineStream> stream,
|
||||
ResultEntry &result) const;
|
||||
|
||||
void streamCompressExpand(EngineStream &stream,
|
||||
std::vector<char> &temp) const;
|
||||
|
||||
void streamScan(EngineStream &stream, const char *data, unsigned int len,
|
||||
unsigned int id, ResultEntry &result) const;
|
||||
|
||||
void printStats() const;
|
||||
|
||||
void sqlStats(SqlDB &db) const;
|
||||
|
||||
private:
|
||||
ch_database_t *db;
|
||||
CompileCHStats compile_stats;
|
||||
};
|
||||
|
||||
std::unique_ptr<EngineChimera>
|
||||
buildEngineChimera(const ExpressionMap &expressions, const std::string &name,
|
||||
const std::string &sigs_name);
|
||||
|
||||
#endif // ENGINECHIMERA_H
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -57,20 +57,22 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
EngineContext::EngineContext(const hs_database_t *db) {
|
||||
EngineHSContext::EngineHSContext(const hs_database_t *db) {
|
||||
hs_alloc_scratch(db, &scratch);
|
||||
assert(scratch);
|
||||
}
|
||||
|
||||
EngineContext::~EngineContext() {
|
||||
EngineHSContext::~EngineHSContext() {
|
||||
hs_free_scratch(scratch);
|
||||
}
|
||||
|
||||
EngineHSStream::~EngineHSStream() { }
|
||||
|
||||
namespace /* anonymous */ {
|
||||
|
||||
/** Scan context structure passed to the onMatch callback function. */
|
||||
struct ScanContext {
|
||||
ScanContext(unsigned id_in, ResultEntry &result_in,
|
||||
struct ScanHSContext {
|
||||
ScanHSContext(unsigned id_in, ResultEntry &result_in,
|
||||
const EngineStream *stream_in)
|
||||
: id(id_in), result(result_in), stream(stream_in) {}
|
||||
unsigned id;
|
||||
@ -87,7 +89,7 @@ struct ScanContext {
|
||||
static
|
||||
int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int,
|
||||
void *ctx) {
|
||||
ScanContext *sc = static_cast<ScanContext *>(ctx);
|
||||
ScanHSContext *sc = static_cast<ScanHSContext *>(ctx);
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
@ -101,7 +103,7 @@ int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int,
|
||||
static
|
||||
int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
|
||||
unsigned int, void *ctx) {
|
||||
ScanContext *sc = static_cast<ScanContext *>(ctx);
|
||||
ScanHSContext *sc = static_cast<ScanHSContext *>(ctx);
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
@ -114,7 +116,7 @@ int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
|
||||
return 0;
|
||||
}
|
||||
|
||||
EngineHyperscan::EngineHyperscan(hs_database_t *db_in, CompileStats cs)
|
||||
EngineHyperscan::EngineHyperscan(hs_database_t *db_in, CompileHSStats cs)
|
||||
: db(db_in), compile_stats(std::move(cs)) {
|
||||
assert(db);
|
||||
}
|
||||
@ -124,14 +126,15 @@ EngineHyperscan::~EngineHyperscan() {
|
||||
}
|
||||
|
||||
unique_ptr<EngineContext> EngineHyperscan::makeContext() const {
|
||||
return ue2::make_unique<EngineContext>(db);
|
||||
return ue2::make_unique<EngineHSContext>(db);
|
||||
}
|
||||
|
||||
void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ctx) const {
|
||||
ResultEntry &result, EngineContext &ectx) const {
|
||||
assert(data);
|
||||
|
||||
ScanContext sc(id, result, nullptr);
|
||||
EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
|
||||
ScanHSContext sc(id, result, nullptr);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
hs_error_t rv = hs_scan(db, data, len, 0, ctx.scratch, callback, &sc);
|
||||
|
||||
@ -144,11 +147,12 @@ void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id,
|
||||
void EngineHyperscan::scan_vectored(const char *const *data,
|
||||
const unsigned int *len, unsigned int count,
|
||||
unsigned streamId, ResultEntry &result,
|
||||
EngineContext &ctx) const {
|
||||
EngineContext &ectx) const {
|
||||
assert(data);
|
||||
assert(len);
|
||||
|
||||
ScanContext sc(streamId, result, nullptr);
|
||||
EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
|
||||
ScanHSContext sc(streamId, result, nullptr);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
hs_error_t rv =
|
||||
hs_scan_vector(db, data, len, count, 0, ctx.scratch, callback, &sc);
|
||||
@ -159,9 +163,10 @@ void EngineHyperscan::scan_vectored(const char *const *data,
|
||||
}
|
||||
}
|
||||
|
||||
unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ctx,
|
||||
unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ectx,
|
||||
unsigned streamId) const {
|
||||
auto stream = ue2::make_unique<EngineStream>();
|
||||
EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
|
||||
auto stream = ue2::make_unique<EngineHSStream>();
|
||||
stream->ctx = &ctx;
|
||||
|
||||
hs_open_stream(db, 0, &stream->id);
|
||||
@ -170,17 +175,18 @@ unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ctx,
|
||||
return nullptr;
|
||||
}
|
||||
stream->sn = streamId;
|
||||
return stream;
|
||||
return move(stream);
|
||||
}
|
||||
|
||||
void EngineHyperscan::streamClose(unique_ptr<EngineStream> stream,
|
||||
ResultEntry &result) const {
|
||||
assert(stream);
|
||||
|
||||
auto &s = static_cast<EngineStream &>(*stream);
|
||||
EngineContext &ctx = *s.ctx;
|
||||
auto &s = static_cast<EngineHSStream &>(*stream);
|
||||
EngineContext &ectx = *s.ctx;
|
||||
EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
|
||||
|
||||
ScanContext sc(0, result, &s);
|
||||
ScanHSContext sc(0, result, &s);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
|
||||
assert(s.id);
|
||||
@ -193,10 +199,10 @@ void EngineHyperscan::streamScan(EngineStream &stream, const char *data,
|
||||
ResultEntry &result) const {
|
||||
assert(data);
|
||||
|
||||
auto &s = static_cast<EngineStream &>(stream);
|
||||
EngineContext &ctx = *s.ctx;
|
||||
auto &s = static_cast<EngineHSStream &>(stream);
|
||||
EngineHSContext &ctx = *s.ctx;
|
||||
|
||||
ScanContext sc(id, result, &s);
|
||||
ScanHSContext sc(id, result, &s);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
hs_error_t rv =
|
||||
hs_scan_stream(s.id, data, len, 0, ctx.scratch, callback, &sc);
|
||||
@ -210,11 +216,12 @@ void EngineHyperscan::streamScan(EngineStream &stream, const char *data,
|
||||
void EngineHyperscan::streamCompressExpand(EngineStream &stream,
|
||||
vector<char> &temp) const {
|
||||
size_t used = 0;
|
||||
hs_error_t err = hs_compress_stream(stream.id, temp.data(), temp.size(),
|
||||
auto &s = static_cast<EngineHSStream &>(stream);
|
||||
hs_error_t err = hs_compress_stream(s.id, temp.data(), temp.size(),
|
||||
&used);
|
||||
if (err == HS_INSUFFICIENT_SPACE) {
|
||||
temp.resize(used);
|
||||
err = hs_compress_stream(stream.id, temp.data(), temp.size(), &used);
|
||||
err = hs_compress_stream(s.id, temp.data(), temp.size(), &used);
|
||||
}
|
||||
|
||||
if (err != HS_SUCCESS) {
|
||||
@ -223,10 +230,10 @@ void EngineHyperscan::streamCompressExpand(EngineStream &stream,
|
||||
}
|
||||
|
||||
if (printCompressSize) {
|
||||
printf("stream %u: compressed to %zu\n", stream.sn, used);
|
||||
printf("stream %u: compressed to %zu\n", s.sn, used);
|
||||
}
|
||||
|
||||
err = hs_reset_and_expand_stream(stream.id, temp.data(), temp.size(),
|
||||
err = hs_reset_and_expand_stream(s.id, temp.data(), temp.size(),
|
||||
nullptr, nullptr, nullptr);
|
||||
|
||||
if (err != HS_SUCCESS) {
|
||||
@ -469,7 +476,7 @@ buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode,
|
||||
hs_free_scratch(scratch);
|
||||
|
||||
// Collect summary information.
|
||||
CompileStats cs;
|
||||
CompileHSStats cs;
|
||||
cs.sigs_name = sigs_name;
|
||||
if (!sigs_name.empty()) {
|
||||
const auto pos = name.find_last_of('/');
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -30,22 +30,15 @@
|
||||
#define ENGINEHYPERSCAN_H
|
||||
|
||||
#include "expressions.h"
|
||||
#include "common.h"
|
||||
#include "sqldb.h"
|
||||
#include "engine.h"
|
||||
#include "hs_runtime.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/** Structure for the result of a single complete scan. */
|
||||
struct ResultEntry {
|
||||
double seconds = 0; //!< Time taken for scan.
|
||||
unsigned int matches = 0; //!< Count of matches found.
|
||||
};
|
||||
|
||||
/** Infomation about the database compile */
|
||||
struct CompileStats {
|
||||
struct CompileHSStats {
|
||||
std::string sigs_name;
|
||||
std::string signatures;
|
||||
std::string db_info;
|
||||
@ -60,38 +53,38 @@ struct CompileStats {
|
||||
};
|
||||
|
||||
/** Engine context which is allocated on a per-thread basis. */
|
||||
class EngineContext {
|
||||
class EngineHSContext : public EngineContext {
|
||||
public:
|
||||
explicit EngineContext(const hs_database_t *db);
|
||||
~EngineContext();
|
||||
explicit EngineHSContext(const hs_database_t *db);
|
||||
~EngineHSContext();
|
||||
|
||||
hs_scratch_t *scratch = nullptr;
|
||||
};
|
||||
|
||||
/** Streaming mode scans have persistent stream state associated with them. */
|
||||
class EngineStream {
|
||||
class EngineHSStream : public EngineStream {
|
||||
public:
|
||||
~EngineHSStream();
|
||||
hs_stream_t *id;
|
||||
unsigned int sn;
|
||||
EngineContext *ctx;
|
||||
EngineHSContext *ctx;
|
||||
};
|
||||
|
||||
/** Hyperscan Engine for scanning data. */
|
||||
class EngineHyperscan {
|
||||
class EngineHyperscan : public Engine {
|
||||
public:
|
||||
explicit EngineHyperscan(hs_database_t *db, CompileStats cs);
|
||||
explicit EngineHyperscan(hs_database_t *db, CompileHSStats cs);
|
||||
~EngineHyperscan();
|
||||
|
||||
std::unique_ptr<EngineContext> makeContext() const;
|
||||
|
||||
void scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ctx) const;
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
void scan_vectored(const char *const *data, const unsigned int *len,
|
||||
unsigned int count, unsigned int streamId,
|
||||
ResultEntry &result, EngineContext &ctx) const;
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
std::unique_ptr<EngineStream> streamOpen(EngineContext &ctx,
|
||||
std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
|
||||
unsigned id) const;
|
||||
|
||||
void streamClose(std::unique_ptr<EngineStream> stream,
|
||||
@ -109,7 +102,7 @@ public:
|
||||
|
||||
private:
|
||||
hs_database_t *db;
|
||||
CompileStats compile_stats;
|
||||
CompileHSStats compile_stats;
|
||||
};
|
||||
|
||||
namespace ue2 {
|
||||
|
388
tools/hsbench/engine_pcre.cpp
Normal file
388
tools/hsbench/engine_pcre.cpp
Normal file
@ -0,0 +1,388 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "common.h"
|
||||
#include "engine_pcre.h"
|
||||
#include "heapstats.h"
|
||||
#include "huge.h"
|
||||
#include "sqldb.h"
|
||||
#include "timer.h"
|
||||
|
||||
#include "util/make_unique.h"
|
||||
#include "util/unicode_def.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
EnginePCREContext::EnginePCREContext(int capture_cnt) {
|
||||
ovec = (int *)malloc((capture_cnt + 1)* sizeof(int) * 3);
|
||||
}
|
||||
|
||||
EnginePCREContext::~EnginePCREContext() {
|
||||
free(ovec);
|
||||
}
|
||||
|
||||
namespace /* anonymous */ {
|
||||
|
||||
/** Scan context structure passed to the onMatch callback function. */
|
||||
struct ScanPCREContext {
|
||||
ScanPCREContext(unsigned id_in, ResultEntry &result_in)
|
||||
: id(id_in), result(result_in) {}
|
||||
unsigned id;
|
||||
ResultEntry &result;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
/**
|
||||
* Function called for every match that PCRE produces, used when
|
||||
* "echo matches" is off.
|
||||
*/
|
||||
static
|
||||
int onMatch(ScanPCREContext *sc) {
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function called for every match that PCRE produces when "echo
|
||||
* matches" is enabled.
|
||||
*/
|
||||
static
|
||||
int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
|
||||
ScanPCREContext *sc) {
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
printf("Match @%u:%llu for %u\n", sc->id, to, id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
EnginePCRE::EnginePCRE(vector<unique_ptr<PcreDB>> dbs_in, CompilePCREStats cs,
|
||||
int capture_cnt_in)
|
||||
: dbs(move(dbs_in)), compile_stats(move(cs)),
|
||||
capture_cnt(capture_cnt_in) {}
|
||||
|
||||
EnginePCRE::~EnginePCRE() {
|
||||
for (auto &pcreDB : dbs) {
|
||||
free(pcreDB->extra);
|
||||
free(pcreDB->db);
|
||||
}
|
||||
}
|
||||
|
||||
unique_ptr<EngineContext> EnginePCRE::makeContext() const {
|
||||
return ue2::make_unique<EnginePCREContext>(capture_cnt);
|
||||
}
|
||||
|
||||
void EnginePCRE::scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ectx) const {
|
||||
assert(data);
|
||||
|
||||
ScanPCREContext sc(id, result);
|
||||
auto &ctx = static_cast<EnginePCREContext &>(ectx);
|
||||
int *ovec = ctx.ovec;
|
||||
int ovec_size = (capture_cnt + 1) * 3;
|
||||
for (const auto &pcreDB : dbs) {
|
||||
int startoffset = 0;
|
||||
bool utf8 = pcreDB->utf8;
|
||||
bool highlander = pcreDB->highlander;
|
||||
|
||||
int flags = 0;
|
||||
int ret;
|
||||
do {
|
||||
ret = pcre_exec(pcreDB->db, pcreDB->extra, data, len,
|
||||
startoffset, flags, ovec, ovec_size);
|
||||
if (ret <= PCRE_ERROR_NOMATCH) {
|
||||
break;
|
||||
}
|
||||
|
||||
int from = ovec[0];
|
||||
int to = ovec[1];
|
||||
assert(from <= to);
|
||||
|
||||
if (echo_matches) {
|
||||
onMatchEcho(pcreDB->id, from, to, &sc);
|
||||
} else {
|
||||
onMatch(&sc);
|
||||
}
|
||||
|
||||
// If we only wanted a single match, we're done.
|
||||
if (highlander) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Next scan starts at the first codepoint after the match. It's
|
||||
// possible that we have a vacuous match, in which case we must step
|
||||
// past it to ensure that we always progress.
|
||||
if (from != to) {
|
||||
startoffset = to;
|
||||
} else if (utf8) {
|
||||
startoffset = to + 1;
|
||||
while (startoffset < (int)len &&
|
||||
((data[startoffset] & 0xc0) == UTF_CONT_BYTE_HEADER)) {
|
||||
++startoffset;
|
||||
}
|
||||
} else {
|
||||
startoffset = to + 1;
|
||||
}
|
||||
} while (startoffset <= (int)len);
|
||||
|
||||
if (ret < PCRE_ERROR_NOMATCH) {
|
||||
printf("Fatal error: pcre returned error %d\n", ret);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// vectoring scan
|
||||
void EnginePCRE::scan_vectored(UNUSED const char *const *data,
|
||||
UNUSED const unsigned int *len,
|
||||
UNUSED unsigned int count,
|
||||
UNUSED unsigned int streamId,
|
||||
UNUSED ResultEntry &result,
|
||||
UNUSED EngineContext &ectx) const {
|
||||
printf("PCRE matcher can't support vectored mode.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
unique_ptr<EngineStream> EnginePCRE::streamOpen(UNUSED EngineContext &ectx,
|
||||
UNUSED unsigned id) const {
|
||||
printf("PCRE matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EnginePCRE::streamClose(UNUSED unique_ptr<EngineStream> stream,
|
||||
UNUSED ResultEntry &result) const {
|
||||
printf("PCRE matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EnginePCRE::streamScan(UNUSED EngineStream &stream,
|
||||
UNUSED const char *data,
|
||||
UNUSED unsigned len, UNUSED unsigned id,
|
||||
UNUSED ResultEntry &result) const {
|
||||
printf("PCRE matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EnginePCRE::streamCompressExpand(UNUSED EngineStream &stream,
|
||||
UNUSED vector<char> &temp) const {
|
||||
printf("PCRE matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EnginePCRE::printStats() const {
|
||||
// Output summary information.
|
||||
if (!compile_stats.sigs_name.empty()) {
|
||||
printf("Signature set: %s\n", compile_stats.sigs_name.c_str());
|
||||
}
|
||||
printf("Signatures: %s\n", compile_stats.signatures.c_str());
|
||||
printf("PCRE info: %s\n", compile_stats.db_info.c_str());
|
||||
printf("Expression count: %'zu\n", compile_stats.expressionCount);
|
||||
printf("Bytecode size: %'zu bytes\n", compile_stats.compiledSize);
|
||||
printf("Scratch size: %'zu bytes\n", compile_stats.scratchSize);
|
||||
printf("Compile time: %'0.3Lf seconds\n", compile_stats.compileSecs);
|
||||
printf("Peak heap usage: %'u bytes\n", compile_stats.peakMemorySize);
|
||||
}
|
||||
|
||||
void EnginePCRE::sqlStats(SqlDB &sqldb) const {
|
||||
ostringstream crc;
|
||||
|
||||
static const string Q =
|
||||
"INSERT INTO Compile ("
|
||||
"sigsName, signatures, dbInfo, exprCount, dbSize, crc,"
|
||||
"scratchSize, compileSecs, peakMemory) "
|
||||
"VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)";
|
||||
|
||||
sqldb.insert_all(Q, compile_stats.sigs_name, compile_stats.signatures,
|
||||
compile_stats.db_info, compile_stats.expressionCount,
|
||||
compile_stats.compiledSize, crc.str(),
|
||||
compile_stats.scratchSize, compile_stats.compileSecs,
|
||||
compile_stats.peakMemorySize);
|
||||
}
|
||||
|
||||
static
|
||||
bool decodeExprPCRE(string &expr, unsigned *flags, struct PcreDB &db) {
|
||||
if (expr[0] != '/') {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t end = expr.find_last_of('/');
|
||||
if (end == string::npos) {
|
||||
return false;
|
||||
}
|
||||
string strFlags = expr.substr(end + 1, expr.length() - end - 1);
|
||||
|
||||
// strip starting and trailing slashes and the flags
|
||||
expr.erase(end, expr.length() - end);
|
||||
expr.erase(0, 1);
|
||||
|
||||
// decode the flags
|
||||
*flags = 0;
|
||||
for (size_t i = 0; i != strFlags.length(); ++i) {
|
||||
switch (strFlags[i]) {
|
||||
case 's':
|
||||
*flags |= PCRE_DOTALL;
|
||||
break;
|
||||
case 'm':
|
||||
*flags |= PCRE_MULTILINE;
|
||||
break;
|
||||
case 'i':
|
||||
*flags |= PCRE_CASELESS;
|
||||
break;
|
||||
case '8':
|
||||
*flags |= PCRE_UTF8;
|
||||
db.utf8 = true;
|
||||
break;
|
||||
case 'W':
|
||||
*flags |= PCRE_UCP;
|
||||
break;
|
||||
case 'H':
|
||||
db.highlander = true;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unique_ptr<EnginePCRE>
|
||||
buildEnginePcre(const ExpressionMap &expressions, const string &name,
|
||||
const string &sigs_name) {
|
||||
if (expressions.empty()) {
|
||||
assert(0);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
long double compileSecs = 0.0;
|
||||
size_t compiledSize = 0.0;
|
||||
unsigned int peakMemorySize = 0;
|
||||
string db_info("Version: ");
|
||||
db_info += string(pcre_version());
|
||||
|
||||
vector<unique_ptr<PcreDB>> dbs;
|
||||
int capture_cnt = 0;
|
||||
|
||||
Timer timer;
|
||||
timer.start();
|
||||
|
||||
for (const auto &m : expressions) {
|
||||
string expr(m.second);
|
||||
unsigned int flags = 0;
|
||||
auto pcreDB = ue2::make_unique<PcreDB>();
|
||||
if (!decodeExprPCRE(expr, &flags, *pcreDB)) {
|
||||
printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(),
|
||||
m.first);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const char *errp;
|
||||
int erro;
|
||||
pcre *db = pcre_compile(expr.c_str(), flags, &errp, &erro, NULL);
|
||||
|
||||
if (!db) {
|
||||
printf("Compile error %s\n", errp);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
pcre_extra *extra = pcre_study(db, PCRE_STUDY_JIT_COMPILE, &errp);
|
||||
if (errp) {
|
||||
printf("PCRE could not be studied: %s\n", errp);
|
||||
return nullptr;
|
||||
}
|
||||
if (!extra) {
|
||||
extra = (pcre_extra *)malloc(sizeof(pcre_extra));
|
||||
}
|
||||
int cap = 0; // PCRE_INFO_CAPTURECOUNT demands an int
|
||||
if (pcre_fullinfo(db, extra, PCRE_INFO_CAPTURECOUNT, &cap)) {
|
||||
printf("PCRE fullinfo error\n");
|
||||
free(extra);
|
||||
free(db);
|
||||
return nullptr;
|
||||
}
|
||||
assert(cap >= 0);
|
||||
capture_cnt = max(capture_cnt, cap);
|
||||
|
||||
size_t db_size = 0;
|
||||
if (pcre_fullinfo(db, extra, PCRE_INFO_SIZE, &db_size)) {
|
||||
printf("PCRE fullinfo error\n");
|
||||
free(extra);
|
||||
free(db);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
size_t study_size = 0;
|
||||
if (pcre_fullinfo(db, extra, PCRE_INFO_STUDYSIZE,
|
||||
&study_size)) {
|
||||
printf("PCRE fullinfo error\n");
|
||||
free(extra);
|
||||
free(db);
|
||||
return nullptr;
|
||||
}
|
||||
compiledSize += db_size + study_size;
|
||||
|
||||
pcreDB->id = m.first;
|
||||
pcreDB->db = db;
|
||||
|
||||
extra->flags =
|
||||
PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
|
||||
extra->match_limit = 10000000;
|
||||
extra->match_limit_recursion = 1500;
|
||||
|
||||
pcreDB->extra = extra;
|
||||
dbs.push_back(move(pcreDB));
|
||||
}
|
||||
|
||||
timer.complete();
|
||||
compileSecs = timer.seconds();
|
||||
peakMemorySize = getPeakHeap();
|
||||
|
||||
// Collect summary information.
|
||||
CompilePCREStats cs;
|
||||
cs.sigs_name = sigs_name;
|
||||
if (!sigs_name.empty()) {
|
||||
const auto pos = name.find_last_of('/');
|
||||
cs.signatures = name.substr(pos + 1);
|
||||
} else {
|
||||
cs.signatures = name;
|
||||
}
|
||||
cs.db_info = db_info;
|
||||
cs.expressionCount = expressions.size();
|
||||
cs.compiledSize = compiledSize;
|
||||
cs.scratchSize = (capture_cnt + 1) * sizeof(int) * 3;
|
||||
cs.compileSecs = compileSecs;
|
||||
cs.peakMemorySize = peakMemorySize;
|
||||
|
||||
return ue2::make_unique<EnginePCRE>(move(dbs), move(cs), capture_cnt);
|
||||
}
|
114
tools/hsbench/engine_pcre.h
Normal file
114
tools/hsbench/engine_pcre.h
Normal file
@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ENGINEPCRE_H
|
||||
#define ENGINEPCRE_H
|
||||
|
||||
#include "expressions.h"
|
||||
#include "engine.h"
|
||||
|
||||
#include <pcre.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/** Infomation about the database compile */
|
||||
struct CompilePCREStats {
|
||||
std::string sigs_name;
|
||||
std::string signatures;
|
||||
std::string db_info;
|
||||
size_t expressionCount = 0;
|
||||
size_t compiledSize = 0;
|
||||
size_t scratchSize = 0;
|
||||
long double compileSecs = 0;
|
||||
unsigned int peakMemorySize = 0;
|
||||
};
|
||||
|
||||
/** Engine context which is allocated on a per-thread basis. */
|
||||
class EnginePCREContext : public EngineContext{
|
||||
public:
|
||||
explicit EnginePCREContext(int capture_cnt);
|
||||
~EnginePCREContext();
|
||||
|
||||
int *ovec = nullptr;
|
||||
};
|
||||
|
||||
struct PcreDB {
|
||||
bool highlander = false;
|
||||
bool utf8 = false;
|
||||
u32 id;
|
||||
pcre *db = nullptr;
|
||||
pcre_extra *extra = nullptr;
|
||||
};
|
||||
|
||||
/** PCRE Engine for scanning data. */
|
||||
class EnginePCRE : public Engine {
|
||||
public:
|
||||
explicit EnginePCRE(std::vector<std::unique_ptr<PcreDB>> dbs_in,
|
||||
CompilePCREStats cs, int capture_cnt_in);
|
||||
~EnginePCRE();
|
||||
|
||||
std::unique_ptr<EngineContext> makeContext() const;
|
||||
|
||||
void scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
void scan_vectored(const char *const *data, const unsigned int *len,
|
||||
unsigned int count, unsigned int streamId,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
|
||||
unsigned id) const;
|
||||
|
||||
void streamClose(std::unique_ptr<EngineStream> stream,
|
||||
ResultEntry &result) const;
|
||||
|
||||
void streamCompressExpand(EngineStream &stream,
|
||||
std::vector<char> &temp) const;
|
||||
|
||||
void streamScan(EngineStream &stream, const char *data, unsigned int len,
|
||||
unsigned int id, ResultEntry &result) const;
|
||||
|
||||
void printStats() const;
|
||||
|
||||
void sqlStats(SqlDB &db) const;
|
||||
|
||||
private:
|
||||
std::vector<std::unique_ptr<PcreDB>> dbs;
|
||||
|
||||
CompilePCREStats compile_stats;
|
||||
|
||||
int capture_cnt;
|
||||
};
|
||||
|
||||
std::unique_ptr<EnginePCRE>
|
||||
buildEnginePcre(const ExpressionMap &expressions, const std::string &name,
|
||||
const std::string &sigs_name);
|
||||
|
||||
#endif // ENGINEPCRE_H
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -31,6 +31,10 @@
|
||||
#include "common.h"
|
||||
#include "data_corpus.h"
|
||||
#include "engine_hyperscan.h"
|
||||
#if defined(HS_HYBRID)
|
||||
#include "engine_chimera.h"
|
||||
#include "engine_pcre.h"
|
||||
#endif
|
||||
#include "expressions.h"
|
||||
#include "sqldb.h"
|
||||
#include "thread_barrier.h"
|
||||
@ -87,6 +91,8 @@ namespace /* anonymous */ {
|
||||
|
||||
bool display_per_scan = false;
|
||||
ScanMode scan_mode = ScanMode::STREAMING;
|
||||
bool useHybrid = false;
|
||||
bool usePcre = false;
|
||||
unsigned repeats = 20;
|
||||
string exprPath("");
|
||||
string corpusFile("");
|
||||
@ -102,7 +108,7 @@ typedef void (*thread_func_t)(void *context);
|
||||
|
||||
class ThreadContext : boost::noncopyable {
|
||||
public:
|
||||
ThreadContext(unsigned num_in, const EngineHyperscan &db_in,
|
||||
ThreadContext(unsigned num_in, const Engine &db_in,
|
||||
thread_barrier &tb_in, thread_func_t function_in,
|
||||
vector<DataBlock> corpus_data_in)
|
||||
: num(num_in), results(repeats), engine(db_in),
|
||||
@ -155,7 +161,7 @@ public:
|
||||
unsigned num;
|
||||
Timer timer;
|
||||
vector<ResultEntry> results;
|
||||
const EngineHyperscan &engine;
|
||||
const Engine &engine;
|
||||
unique_ptr<EngineContext> enginectx;
|
||||
vector<DataBlock> corpus_data;
|
||||
|
||||
@ -181,6 +187,10 @@ void usage(const char *error) {
|
||||
" (default: streaming).\n");
|
||||
printf(" -V Benchmark in vectored mode"
|
||||
" (default: streaming).\n");
|
||||
#if defined(HS_HYBRID)
|
||||
printf(" -H Benchmark using Chimera (if supported).\n");
|
||||
printf(" -P Benchmark using PCRE (if supported).\n");
|
||||
#endif
|
||||
#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP
|
||||
printf(" -T CPU,CPU,... Benchmark with threads on these CPUs.\n");
|
||||
#endif
|
||||
@ -214,7 +224,7 @@ struct BenchmarkSigs {
|
||||
static
|
||||
void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
|
||||
UNUSED unique_ptr<Grey> &grey) {
|
||||
const char options[] = "-b:c:Cd:e:E:G:hi:n:No:p:sS:Vw:z:"
|
||||
const char options[] = "-b:c:Cd:e:E:G:hHi:n:No:p:PsS:Vw:z:"
|
||||
#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP
|
||||
"T:" // add the thread flag
|
||||
#endif
|
||||
@ -287,6 +297,14 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
|
||||
usage(nullptr);
|
||||
exit(0);
|
||||
break;
|
||||
case 'H':
|
||||
#if defined(HS_HYBRID)
|
||||
useHybrid = true;
|
||||
#else
|
||||
usage("Hybrid matcher not enabled in this build");
|
||||
exit(1);
|
||||
#endif
|
||||
break;
|
||||
case 'n':
|
||||
if (!fromString(optarg, repeats) || repeats == 0) {
|
||||
usage("Couldn't parse argument to -n flag, should be"
|
||||
@ -294,6 +312,14 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
case 'P':
|
||||
#if defined(HS_HYBRID)
|
||||
usePcre = true;
|
||||
#else
|
||||
usage("PCRE matcher not enabled in this build");
|
||||
exit(1);
|
||||
#endif
|
||||
break;
|
||||
case 's':
|
||||
in_sigfile = 2;
|
||||
break;
|
||||
@ -399,6 +425,24 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Constraints on Chimera and PCRE engines
|
||||
if (useHybrid || usePcre) {
|
||||
if (useHybrid && usePcre) {
|
||||
usage("Can't run both Chimera and PCRE.");
|
||||
exit(1);
|
||||
}
|
||||
if (scan_mode != ScanMode::BLOCK) {
|
||||
usage("Must specify block mode in Chimera or PCRE with "
|
||||
"the -N option.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (forceEditDistance || loadDatabases || saveDatabases) {
|
||||
usage("No extended options are supported in Chimera or PCRE.");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Read in any -s signature sets.
|
||||
for (const auto &file : sigFiles) {
|
||||
SignatureSet sigs;
|
||||
@ -503,7 +547,7 @@ static
|
||||
void benchStreamingInternal(ThreadContext *ctx, vector<StreamInfo> &streams,
|
||||
bool do_compress) {
|
||||
assert(ctx);
|
||||
const EngineHyperscan &e = ctx->engine;
|
||||
const Engine &e = ctx->engine;
|
||||
const vector<DataBlock> &blocks = ctx->corpus_data;
|
||||
vector<char> compress_buf(do_compress ? 1000 : 0);
|
||||
|
||||
@ -812,7 +856,7 @@ void sqlResults(const vector<unique_ptr<ThreadContext>> &threads,
|
||||
* the same copy of the data.
|
||||
*/
|
||||
static
|
||||
unique_ptr<ThreadContext> makeThreadContext(const EngineHyperscan &db,
|
||||
unique_ptr<ThreadContext> makeThreadContext(const Engine &db,
|
||||
const vector<DataBlock> &blocks,
|
||||
unsigned id,
|
||||
thread_barrier &sync_barrier) {
|
||||
@ -839,7 +883,7 @@ unique_ptr<ThreadContext> makeThreadContext(const EngineHyperscan &db,
|
||||
|
||||
/** Run the given benchmark. */
|
||||
static
|
||||
void runBenchmark(const EngineHyperscan &db,
|
||||
void runBenchmark(const Engine &db,
|
||||
const vector<DataBlock> &corpus_blocks) {
|
||||
size_t numThreads;
|
||||
bool useAffinity = false;
|
||||
@ -936,8 +980,18 @@ int main(int argc, char *argv[]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto engine = buildEngineHyperscan(exprMap, scan_mode, s.name,
|
||||
unique_ptr<Engine> engine;
|
||||
if (useHybrid) {
|
||||
#if defined(HS_HYBRID)
|
||||
engine = buildEngineChimera(exprMap, s.name, sigName);
|
||||
} else if (usePcre) {
|
||||
engine = buildEnginePcre(exprMap, s.name, sigName);
|
||||
#endif
|
||||
} else {
|
||||
engine = buildEngineHyperscan(exprMap, scan_mode, s.name,
|
||||
sigName, *grey);
|
||||
}
|
||||
|
||||
if (!engine) {
|
||||
printf("Error: expressions failed to compile.\n");
|
||||
exit(1);
|
||||
|
@ -5,6 +5,14 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
||||
SET(hscheck_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
if (BUILD_CHIMERA)
|
||||
include_directories(${PCRE_INCLUDE_DIRS})
|
||||
add_definitions(-DHS_HYBRID)
|
||||
add_executable(hscheck ${hscheck_SOURCES})
|
||||
target_link_libraries(hscheck hs chimera ${PCRE_LDFLAGS} expressionutil pthread)
|
||||
else()
|
||||
add_executable(hscheck ${hscheck_SOURCES})
|
||||
target_link_libraries(hscheck hs expressionutil pthread)
|
||||
endif()
|
||||
|
||||
|
@ -59,6 +59,11 @@
|
||||
#include "hs_internal.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#ifdef HS_HYBRID
|
||||
#include <pcre.h>
|
||||
#include "chimera/ch.h"
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <mutex>
|
||||
@ -77,6 +82,7 @@ namespace /* anonymous */ {
|
||||
// are we in streaming mode? (default: yes)
|
||||
bool g_streaming = true;
|
||||
bool g_vectored = false;
|
||||
bool g_hybrid = false;
|
||||
string g_exprPath("");
|
||||
string g_signatureFile("");
|
||||
bool g_allSignatures = false;
|
||||
@ -282,9 +288,31 @@ void checkExpression(UNUSED void *threadarg) {
|
||||
|
||||
// Try and compile a database.
|
||||
const char *regexp = regex.c_str();
|
||||
const hs_expr_ext *extp = &ext;
|
||||
|
||||
hs_error_t err;
|
||||
|
||||
if (g_hybrid) {
|
||||
#ifdef HS_HYBRID
|
||||
ch_compile_error_t *ch_compile_err;
|
||||
ch_database_t *hybrid_db = nullptr;
|
||||
err = ch_compile_multi(®exp, &flags, nullptr, 1, CH_MODE_GROUPS,
|
||||
nullptr, &hybrid_db, &ch_compile_err);
|
||||
if (err == HS_SUCCESS) {
|
||||
assert(hybrid_db);
|
||||
recordSuccess(g_exprMap, it->first);
|
||||
ch_free_database(hybrid_db);
|
||||
} else {
|
||||
assert(!hybrid_db);
|
||||
assert(ch_compile_err);
|
||||
recordFailure(g_exprMap, it->first, ch_compile_err->message);
|
||||
ch_free_compile_error(ch_compile_err);
|
||||
}
|
||||
#else
|
||||
cerr << "Hybrid mode not available in this build." << endl;
|
||||
exit(1);
|
||||
#endif // HS_HYBRID
|
||||
} else {
|
||||
const hs_expr_ext *extp = &ext;
|
||||
hs_compile_error_t *compile_err;
|
||||
hs_database_t *db = nullptr;
|
||||
|
||||
@ -313,6 +341,7 @@ void checkExpression(UNUSED void *threadarg) {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool fetchSubIds(const char *logical, vector<unsigned> &ids) {
|
||||
@ -429,6 +458,9 @@ void usage() {
|
||||
#endif
|
||||
<< " -V Operate in vectored mode." << endl
|
||||
<< " -N Operate in block mode (default: streaming)." << endl
|
||||
#ifdef HS_HYBRID
|
||||
<< " -H Operate in hybrid mode." << endl
|
||||
#endif
|
||||
<< " -L Pass HS_FLAG_SOM_LEFTMOST for all expressions (default: off)." << endl
|
||||
<< " -8 Force UTF8 mode on all patterns." << endl
|
||||
<< " -T NUM Run with NUM threads." << endl
|
||||
@ -440,7 +472,7 @@ void usage() {
|
||||
|
||||
static
|
||||
void processArgs(int argc, char *argv[], UNUSED unique_ptr<Grey> &grey) {
|
||||
const char options[] = "e:E:s:z:hLNV8G:T:BC";
|
||||
const char options[] = "e:E:s:z:hHLNV8G:T:BC";
|
||||
bool signatureSet = false;
|
||||
|
||||
for (;;) {
|
||||
@ -492,6 +524,9 @@ void processArgs(int argc, char *argv[], UNUSED unique_ptr<Grey> &grey) {
|
||||
g_streaming = false;
|
||||
g_vectored = true;
|
||||
break;
|
||||
case 'H':
|
||||
g_hybrid = true;
|
||||
break;
|
||||
case 'T':
|
||||
num_of_threads = atoi(optarg);
|
||||
break;
|
||||
|
@ -1,9 +1,3 @@
|
||||
# we have a fixed requirement for PCRE
|
||||
set(PCRE_REQUIRED_MAJOR_VERSION 8)
|
||||
set(PCRE_REQUIRED_MINOR_VERSION 41)
|
||||
set(PCRE_REQUIRED_VERSION ${PCRE_REQUIRED_MAJOR_VERSION}.${PCRE_REQUIRED_MINOR_VERSION})
|
||||
|
||||
include (${CMAKE_MODULE_PATH}/pcre.cmake)
|
||||
if (NOT CORRECT_PCRE_VERSION)
|
||||
message(STATUS "PCRE ${PCRE_REQUIRED_VERSION} not found, not building hscollider")
|
||||
return()
|
||||
@ -29,6 +23,8 @@ set_source_files_properties(
|
||||
|
||||
ragelmaker(ColliderCorporaParser.rl)
|
||||
|
||||
add_definitions(-DHS_HYBRID)
|
||||
|
||||
# only set these after all tests are done
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
||||
@ -69,7 +65,7 @@ add_dependencies(hscollider ragel_ColliderCorporaParser)
|
||||
add_dependencies(hscollider pcre)
|
||||
|
||||
if(NOT WIN32)
|
||||
target_link_libraries(hscollider hs ${PCRE_LDFLAGS} databaseutil
|
||||
target_link_libraries(hscollider hs chimera ${PCRE_LDFLAGS} databaseutil
|
||||
expressionutil corpusomatic crosscompileutil pthread
|
||||
"${BACKTRACE_LDFLAGS}")
|
||||
|
||||
@ -78,7 +74,7 @@ if(HAVE_BACKTRACE)
|
||||
"${BACKTRACE_CFLAGS}")
|
||||
endif()
|
||||
else() # WIN32
|
||||
target_link_libraries(hscollider hs ${PCRE_LDFLAGS} databaseutil
|
||||
target_link_libraries(hscollider hs chimera ${PCRE_LDFLAGS} databaseutil
|
||||
expressionutil corpusomatic crosscompileutil)
|
||||
endif()
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -54,10 +54,10 @@ public:
|
||||
explicit DatabaseProxy(const std::set<unsigned> &expr_ids)
|
||||
: ids(expr_ids) {}
|
||||
|
||||
explicit DatabaseProxy(std::shared_ptr<HyperscanDB> built_db)
|
||||
explicit DatabaseProxy(std::shared_ptr<BaseDB> built_db)
|
||||
: db(built_db) {}
|
||||
|
||||
std::shared_ptr<HyperscanDB> get(const UltimateTruth &ultimate) {
|
||||
std::shared_ptr<BaseDB> get(const UltimateTruth &ultimate) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
if (failed) {
|
||||
// We have previously failed to compile this database.
|
||||
@ -80,7 +80,7 @@ public:
|
||||
|
||||
private:
|
||||
std::mutex mutex;
|
||||
std::shared_ptr<HyperscanDB> db;
|
||||
std::shared_ptr<BaseDB> db;
|
||||
std::set<unsigned> ids;
|
||||
bool failed = false; // Database failed compilation.
|
||||
};
|
||||
|
@ -187,6 +187,14 @@ string pcreErrStr(int err) {
|
||||
}
|
||||
}
|
||||
|
||||
/* that is, a mode provided by native hyperscan */
|
||||
static
|
||||
bool isStandardMode(unsigned int mode) {
|
||||
return mode == MODE_BLOCK
|
||||
|| mode == MODE_STREAMING
|
||||
|| mode == MODE_VECTORED;
|
||||
}
|
||||
|
||||
GroundTruth::GroundTruth(ostream &os, const ExpressionMap &expr,
|
||||
unsigned long int limit,
|
||||
unsigned long int limit_recursion)
|
||||
@ -194,9 +202,11 @@ GroundTruth::GroundTruth(ostream &os, const ExpressionMap &expr,
|
||||
matchLimitRecursion(limit_recursion) {}
|
||||
|
||||
void GroundTruth::global_prep() {
|
||||
if (isStandardMode(colliderMode)) {
|
||||
// We're using pcre callouts
|
||||
pcre_callout = &pcreCallOut;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void addCallout(string &re) {
|
||||
@ -262,11 +272,17 @@ GroundTruth::compile(unsigned id, bool no_callouts) {
|
||||
throw PcreCompileFailure("Unsupported extended flags.");
|
||||
}
|
||||
|
||||
// Hybrid mode implies SOM.
|
||||
if (colliderMode == MODE_HYBRID) {
|
||||
assert(!use_NFA);
|
||||
som = true;
|
||||
}
|
||||
|
||||
// SOM flags might be set globally.
|
||||
som |= !!somFlags;
|
||||
|
||||
// For traditional Hyperscan, add global callout to pattern.
|
||||
if (!combination && !no_callouts) {
|
||||
if (!combination && !no_callouts && isStandardMode(colliderMode)) {
|
||||
addCallout(re);
|
||||
}
|
||||
|
||||
@ -403,6 +419,79 @@ int scanBasic(const CompiledPcre &compiled, const string &buffer,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static
|
||||
bool isUtf8(const CompiledPcre &compiled) {
|
||||
unsigned long int options = 0;
|
||||
pcre_fullinfo(compiled.bytecode, NULL, PCRE_INFO_OPTIONS, &options);
|
||||
return options & PCRE_UTF8;
|
||||
}
|
||||
|
||||
static
|
||||
CaptureVec makeCaptureVec(const vector<int> &ovector, int ret) {
|
||||
assert(ret > 0);
|
||||
|
||||
CaptureVec cap;
|
||||
|
||||
if (no_groups) {
|
||||
return cap; // No group info requested.
|
||||
}
|
||||
|
||||
cap.reserve(ret * 2);
|
||||
for (int i = 0; i < ret * 2; i += 2) {
|
||||
int from = ovector[i], to = ovector[i + 1];
|
||||
cap.push_back(make_pair(from, to));
|
||||
}
|
||||
return cap;
|
||||
}
|
||||
|
||||
static
|
||||
int scanHybrid(const CompiledPcre &compiled, const string &buffer,
|
||||
const pcre_extra &extra, vector<int> &ovector,
|
||||
ResultSet &rs, ostream &out) {
|
||||
int len = (int)buffer.length();
|
||||
int startoffset = 0;
|
||||
bool utf8 = isUtf8(compiled);
|
||||
|
||||
int flags = 0;
|
||||
int ret;
|
||||
do {
|
||||
ret = pcre_exec(compiled.bytecode, &extra, buffer.c_str(), len,
|
||||
startoffset, flags, &ovector[0], ovector.size());
|
||||
|
||||
if (ret <= PCRE_ERROR_NOMATCH) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
int from = ovector.at(0);
|
||||
int to = ovector.at(1);
|
||||
rs.addMatch(from, to, makeCaptureVec(ovector, ret));
|
||||
|
||||
if (echo_matches) {
|
||||
out << "PCRE Match @ (" << from << "," << to << ")" << endl;
|
||||
}
|
||||
|
||||
// If we only wanted a single match, we're done.
|
||||
if (compiled.highlander) break;
|
||||
|
||||
// Next scan starts at the first codepoint after the match. It's
|
||||
// possible that we have a vacuous match, in which case we must step
|
||||
// past it to ensure that we always progress.
|
||||
if (from != to) {
|
||||
startoffset = to;
|
||||
} else if (utf8) {
|
||||
startoffset = to + 1;
|
||||
while (startoffset < len
|
||||
&& ((buffer[startoffset] & 0xc0) == UTF_CONT_BYTE_HEADER)) {
|
||||
++startoffset;
|
||||
}
|
||||
} else {
|
||||
startoffset = to + 1;
|
||||
}
|
||||
} while (startoffset <= len);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static
|
||||
int scanOffset(const CompiledPcre &compiled, const string &buffer,
|
||||
const pcre_extra &extra, vector<int> &ovector,
|
||||
@ -532,15 +621,24 @@ bool GroundTruth::run(unsigned, const CompiledPcre &compiled,
|
||||
pcre_extra extra;
|
||||
extra.flags = 0;
|
||||
|
||||
// If running in traditional HyperScan mode, switch on callouts.
|
||||
bool usingCallouts = isStandardMode(colliderMode);
|
||||
if (usingCallouts) {
|
||||
// Switch on callouts.
|
||||
extra.flags |= PCRE_EXTRA_CALLOUT_DATA;
|
||||
extra.callout_data = &ctx;
|
||||
}
|
||||
|
||||
// Set the match_limit (in order to bound execution time on very complex
|
||||
// patterns)
|
||||
extra.flags |= (PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION);
|
||||
if (colliderMode == MODE_HYBRID) {
|
||||
extra.match_limit = 10000000;
|
||||
extra.match_limit_recursion = 1500;
|
||||
} else {
|
||||
extra.match_limit = matchLimit;
|
||||
extra.match_limit_recursion = matchLimitRecursion;
|
||||
}
|
||||
|
||||
#ifdef PCRE_NO_START_OPTIMIZE
|
||||
// Switch off optimizations that may result in callouts not occurring.
|
||||
@ -553,6 +651,7 @@ bool GroundTruth::run(unsigned, const CompiledPcre &compiled,
|
||||
ovector.resize(ovecsize);
|
||||
|
||||
int ret;
|
||||
bool hybrid = false;
|
||||
switch (colliderMode) {
|
||||
case MODE_BLOCK:
|
||||
case MODE_STREAMING:
|
||||
@ -563,6 +662,10 @@ bool GroundTruth::run(unsigned, const CompiledPcre &compiled,
|
||||
ret = scanBasic(compiled, buffer, extra, ovector, ctx);
|
||||
}
|
||||
break;
|
||||
case MODE_HYBRID:
|
||||
ret = scanHybrid(compiled, buffer, extra, ovector, rs, out);
|
||||
hybrid = true;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
ret = PCRE_ERROR_NULL;
|
||||
@ -595,7 +698,7 @@ bool GroundTruth::run(unsigned, const CompiledPcre &compiled,
|
||||
return true;
|
||||
}
|
||||
|
||||
if (compiled.som) {
|
||||
if (compiled.som && !hybrid) {
|
||||
filterLeftmostSom(rs);
|
||||
}
|
||||
|
||||
|
@ -35,25 +35,36 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
// Type for capturing groups: a vector of (from, to) offsets, with both set to
|
||||
// -1 for inactive groups (like pcre's ovector). Used by hybrid modes.
|
||||
typedef std::vector<std::pair<int, int> > CaptureVec;
|
||||
|
||||
// Class representing a single match, encapsulating to/from offsets.
|
||||
class MatchResult {
|
||||
public:
|
||||
MatchResult(unsigned long long start, unsigned long long end)
|
||||
: from(start), to(end) {}
|
||||
MatchResult(unsigned long long start, unsigned long long end,
|
||||
const CaptureVec &cap)
|
||||
: from(start), to(end), captured(cap) {}
|
||||
|
||||
bool operator<(const MatchResult &a) const {
|
||||
if (from != a.from) {
|
||||
return from < a.from;
|
||||
}
|
||||
if (to != a.to) {
|
||||
return to < a.to;
|
||||
}
|
||||
return captured < a.captured;
|
||||
}
|
||||
|
||||
bool operator==(const MatchResult &a) const {
|
||||
return from == a.from && to == a.to;
|
||||
return from == a.from && to == a.to && captured == a.captured;
|
||||
}
|
||||
|
||||
unsigned long long from;
|
||||
unsigned long long to;
|
||||
CaptureVec captured;
|
||||
};
|
||||
|
||||
enum ResultSource {
|
||||
@ -114,6 +125,19 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
// Add a match (with capturing vector)
|
||||
void addMatch(unsigned long long from, unsigned long long to,
|
||||
const CaptureVec &cap, int block = 0) {
|
||||
MatchResult m(from, to, cap);
|
||||
matches.insert(m);
|
||||
|
||||
if (matches_by_block[block].find(m) != matches_by_block[block].end()) {
|
||||
dupe_matches.insert(m);
|
||||
} else {
|
||||
matches_by_block[block].insert(m);
|
||||
}
|
||||
}
|
||||
|
||||
// Clear all matches.
|
||||
void clear() {
|
||||
matches.clear();
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -90,19 +90,14 @@ hs_error_t open_magic_stream(const hs_database_t *db, unsigned flags,
|
||||
|
||||
#endif // RELEASE_BUILD
|
||||
|
||||
class HyperscanDB : boost::noncopyable {
|
||||
class BaseDB : boost::noncopyable {
|
||||
public:
|
||||
// Constructor takes iterators over a container of pattern IDs.
|
||||
template <class Iter>
|
||||
HyperscanDB(hs_database_t *db_in, Iter ids_begin, Iter ids_end)
|
||||
: db(db_in), ids(ids_begin, ids_end) {}
|
||||
BaseDB(Iter ids_begin, Iter ids_end)
|
||||
: ids(ids_begin, ids_end) {}
|
||||
|
||||
~HyperscanDB() {
|
||||
hs_free_database(db);
|
||||
}
|
||||
|
||||
// Underlying Hyperscan database pointer.
|
||||
hs_database_t *db;
|
||||
virtual ~BaseDB();
|
||||
|
||||
// The set of expression IDs that must return their matches in order.
|
||||
unordered_set<unsigned> ordered;
|
||||
@ -111,15 +106,55 @@ public:
|
||||
unordered_set<unsigned> ids;
|
||||
};
|
||||
|
||||
BaseDB::~BaseDB() { }
|
||||
|
||||
class HyperscanDB : public BaseDB {
|
||||
public:
|
||||
// Constructor takes iterators over a container of pattern IDs.
|
||||
template <class Iter>
|
||||
HyperscanDB(hs_database_t *db_in, Iter ids_begin, Iter ids_end)
|
||||
: BaseDB(ids_begin, ids_end), db(db_in) {}
|
||||
|
||||
~HyperscanDB();
|
||||
|
||||
// Underlying Hyperscan database pointer.
|
||||
hs_database_t *db;
|
||||
};
|
||||
|
||||
HyperscanDB::~HyperscanDB() {
|
||||
hs_free_database(db);
|
||||
}
|
||||
|
||||
#ifdef HS_HYBRID
|
||||
|
||||
class HybridDB : public BaseDB {
|
||||
public:
|
||||
// Constructor takes iterators over a container of pattern IDs.
|
||||
template <class Iter>
|
||||
HybridDB(ch_database_t *db_in, Iter ids_begin, Iter ids_end)
|
||||
: BaseDB(ids_begin, ids_end), db(db_in) {}
|
||||
|
||||
~HybridDB();
|
||||
|
||||
// Underlying Hyperscan database pointer.
|
||||
ch_database_t *db;
|
||||
};
|
||||
|
||||
HybridDB::~HybridDB() {
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
#endif // HS_HYBRID
|
||||
|
||||
// Used to track the ID and result set.
|
||||
namespace {
|
||||
struct MultiContext {
|
||||
MultiContext(unsigned int id_in, const HyperscanDB &db_in, ResultSet *rs_in,
|
||||
MultiContext(unsigned int id_in, const BaseDB &db_in, ResultSet *rs_in,
|
||||
bool single_in, ostream &os)
|
||||
: id(id_in), db(db_in), rs(rs_in), single(single_in), out(os) {}
|
||||
unsigned int id;
|
||||
int block = 0;
|
||||
const HyperscanDB &db;
|
||||
const BaseDB &db;
|
||||
ResultSet *rs;
|
||||
u64a lastRawMatch = 0; /* store last known unadjusted match location */
|
||||
u64a lastOrderMatch = 0;
|
||||
@ -230,6 +265,75 @@ int callbackMulti(unsigned int id, unsigned long long from,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef HS_HYBRID
|
||||
|
||||
// Hybrid matcher callback.
|
||||
static
|
||||
ch_callback_t callbackHybrid(unsigned id, unsigned long long from,
|
||||
unsigned long long to, unsigned, unsigned size,
|
||||
const ch_capture_t *captured, void *ctx) {
|
||||
MultiContext *mctx = static_cast<MultiContext *>(ctx);
|
||||
assert(mctx);
|
||||
assert(mctx->rs);
|
||||
assert(mctx->in_scan_call);
|
||||
|
||||
ostream &out = mctx->out;
|
||||
|
||||
to -= g_corpora_prefix.size();
|
||||
|
||||
if (mctx->terminated) {
|
||||
out << "UE2 Match @ (" << from << "," << to << ") for " << id
|
||||
<< " after termination" << endl;
|
||||
mctx->rs->match_after_halt = true;
|
||||
}
|
||||
|
||||
if (mctx->single || id == mctx->id) {
|
||||
CaptureVec cap;
|
||||
for (unsigned int i = 0; i < size; i++) {
|
||||
if (!(captured[i].flags & CH_CAPTURE_FLAG_ACTIVE)) {
|
||||
cap.push_back(make_pair(-1, -1));
|
||||
} else {
|
||||
cap.push_back(make_pair(captured[i].from, captured[i].to));
|
||||
}
|
||||
}
|
||||
mctx->rs->addMatch(from, to, cap);
|
||||
}
|
||||
|
||||
if (echo_matches) {
|
||||
out << "Match @ [" << from << "," << to << "] for " << id << endl;
|
||||
out << " Captured " << size << " groups: ";
|
||||
for (unsigned int i = 0; i < size; i++) {
|
||||
if (!(captured[i].flags & CH_CAPTURE_FLAG_ACTIVE)) {
|
||||
out << "{} ";
|
||||
} else {
|
||||
out << "{" << captured[i].from << "," << captured[i].to << "} ";
|
||||
}
|
||||
}
|
||||
out << endl;
|
||||
}
|
||||
|
||||
if (limit_matches && mctx->rs->matches.size() == limit_matches) {
|
||||
mctx->terminated = true;
|
||||
return CH_CALLBACK_TERMINATE;
|
||||
}
|
||||
|
||||
return CH_CALLBACK_CONTINUE;
|
||||
}
|
||||
|
||||
// Hybrid matcher error callback.
|
||||
static
|
||||
ch_callback_t errorCallback(UNUSED ch_error_event_t errorType, UNUSED unsigned int id, void *,
|
||||
void *ctx) {
|
||||
UNUSED MultiContext *mctx = static_cast<MultiContext *>(ctx);
|
||||
assert(mctx);
|
||||
assert(mctx->rs);
|
||||
assert(mctx->in_scan_call);
|
||||
|
||||
return CH_CALLBACK_SKIP_PATTERN;
|
||||
}
|
||||
|
||||
#endif // HS_HYBRID
|
||||
|
||||
static
|
||||
void filterLeftmostSom(ResultSet &rs) {
|
||||
if (rs.matches.size() <= 1) {
|
||||
@ -252,6 +356,9 @@ UltimateTruth::UltimateTruth(ostream &os, const ExpressionMap &expr,
|
||||
const Grey &grey_in, unsigned int streamBlocks)
|
||||
: grey(grey_in), out(os), m_expr(expr), m_xcompile(false),
|
||||
m_streamBlocks(streamBlocks), scratch(nullptr),
|
||||
#ifdef HS_HYBRID
|
||||
chimeraScratch(nullptr),
|
||||
#endif
|
||||
platform(plat) {
|
||||
// Build our mode flags.
|
||||
|
||||
@ -265,15 +372,27 @@ UltimateTruth::UltimateTruth(ostream &os, const ExpressionMap &expr,
|
||||
case MODE_VECTORED:
|
||||
m_mode = HS_MODE_VECTORED;
|
||||
break;
|
||||
case MODE_HYBRID:
|
||||
m_mode = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
// Set desired SOM precision, if we're in streaming mode.
|
||||
if (colliderMode == MODE_STREAMING) {
|
||||
m_mode |= somPrecisionMode;
|
||||
}
|
||||
|
||||
#ifdef HS_HYBRID
|
||||
if (colliderMode == MODE_HYBRID && !no_groups) {
|
||||
m_mode |= CH_MODE_GROUPS;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
UltimateTruth::~UltimateTruth() {
|
||||
#ifdef HS_HYBRID
|
||||
ch_free_scratch(chimeraScratch);
|
||||
#endif
|
||||
hs_free_scratch(scratch);
|
||||
}
|
||||
|
||||
@ -327,13 +446,13 @@ void mangle_scratch(hs_scratch_t *scratch) {
|
||||
scratch->fdr_conf_offset = 0xe4;
|
||||
}
|
||||
|
||||
bool UltimateTruth::blockScan(const HyperscanDB &hdb, const string &buffer,
|
||||
bool UltimateTruth::blockScan(const BaseDB &bdb, const string &buffer,
|
||||
size_t align, match_event_handler callback,
|
||||
void *ctx_in, ResultSet *) {
|
||||
assert(colliderMode == MODE_BLOCK);
|
||||
assert(!m_xcompile);
|
||||
|
||||
const hs_database_t *db = hdb.db;
|
||||
const hs_database_t *db = reinterpret_cast<const HyperscanDB &>(bdb).db;
|
||||
assert(db);
|
||||
MultiContext *ctx = (MultiContext *)ctx_in;
|
||||
|
||||
@ -438,13 +557,13 @@ hs_stream_t *compressAndResetExpandStream(const hs_database_t *db,
|
||||
return out;
|
||||
}
|
||||
|
||||
bool UltimateTruth::streamingScan(const HyperscanDB &hdb, const string &buffer,
|
||||
bool UltimateTruth::streamingScan(const BaseDB &bdb, const string &buffer,
|
||||
size_t align, match_event_handler callback,
|
||||
void *ctx_in, ResultSet *rs) {
|
||||
assert(colliderMode == MODE_STREAMING);
|
||||
assert(!m_xcompile);
|
||||
|
||||
const hs_database_t *db = hdb.db;
|
||||
const hs_database_t *db = reinterpret_cast<const HyperscanDB &>(bdb).db;
|
||||
assert(db);
|
||||
MultiContext *ctx = (MultiContext *)ctx_in;
|
||||
|
||||
@ -594,13 +713,13 @@ bool UltimateTruth::streamingScan(const HyperscanDB &hdb, const string &buffer,
|
||||
return ret == HS_SUCCESS;
|
||||
}
|
||||
|
||||
bool UltimateTruth::vectoredScan(const HyperscanDB &hdb, const string &buffer,
|
||||
bool UltimateTruth::vectoredScan(const BaseDB &bdb, const string &buffer,
|
||||
size_t align, match_event_handler callback,
|
||||
void *ctx_in, ResultSet *rs) {
|
||||
assert(colliderMode == MODE_VECTORED);
|
||||
assert(!m_xcompile);
|
||||
|
||||
const hs_database_t *db = hdb.db;
|
||||
const hs_database_t *db = reinterpret_cast<const HyperscanDB &>(bdb).db;
|
||||
assert(db);
|
||||
MultiContext *ctx = (MultiContext *)ctx_in;
|
||||
|
||||
@ -682,19 +801,67 @@ bool UltimateTruth::vectoredScan(const HyperscanDB &hdb, const string &buffer,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool UltimateTruth::run(unsigned int id, shared_ptr<const HyperscanDB> hdb,
|
||||
#ifdef HS_HYBRID
|
||||
bool UltimateTruth::hybridScan(const BaseDB &bdb, const string &buffer,
|
||||
size_t align, ch_match_event_handler callback,
|
||||
ch_error_event_handler error_callback,
|
||||
void *ctx_in, ResultSet *) {
|
||||
assert(colliderMode == MODE_HYBRID);
|
||||
assert(!m_xcompile);
|
||||
|
||||
const ch_database_t *db = reinterpret_cast<const HybridDB &>(bdb).db;
|
||||
assert(db);
|
||||
MultiContext *ctx = (MultiContext *)ctx_in;
|
||||
|
||||
char *realigned = setupScanBuffer(buffer.c_str(), buffer.size(), align);
|
||||
if (!realigned) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (use_copy_scratch && !cloneScratch()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ctx->in_scan_call = true;
|
||||
ch_error_t ret =
|
||||
ch_scan(db, realigned, buffer.size(), 0, chimeraScratch, callback,
|
||||
error_callback, ctx);
|
||||
ctx->in_scan_call = false;
|
||||
|
||||
if (g_verbose) {
|
||||
out << "Scan call returned " << ret << endl;
|
||||
}
|
||||
|
||||
if (ctx->terminated) {
|
||||
if (g_verbose && ret != CH_SCAN_TERMINATED) {
|
||||
out << "Scan should have returned CH_SCAN_TERMINATED, returned "
|
||||
<< ret << " instead." << endl;
|
||||
}
|
||||
return ret == CH_SCAN_TERMINATED;
|
||||
}
|
||||
|
||||
if (g_verbose && ret != CH_SUCCESS) {
|
||||
out << "Scan should have returned CH_SUCCESS, returned " << ret
|
||||
<< " instead." << endl;
|
||||
}
|
||||
|
||||
return ret == CH_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool UltimateTruth::run(unsigned int id, shared_ptr<const BaseDB> bdb,
|
||||
const string &buffer, bool single_pattern,
|
||||
unsigned int align, ResultSet &rs) {
|
||||
assert(!m_xcompile);
|
||||
assert(hdb);
|
||||
assert(bdb);
|
||||
|
||||
// Ensure that scratch is appropriate for this database.
|
||||
if (!allocScratch(hdb)) {
|
||||
if (!allocScratch(bdb)) {
|
||||
out << "Scratch alloc failed." << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
MultiContext ctx(id, *hdb, &rs, single_pattern, out);
|
||||
MultiContext ctx(id, *bdb, &rs, single_pattern, out);
|
||||
if (!g_corpora_suffix.empty()) {
|
||||
ctx.use_max_offset = true;
|
||||
ctx.max_offset = buffer.size() - g_corpora_suffix.size();
|
||||
@ -702,11 +869,20 @@ bool UltimateTruth::run(unsigned int id, shared_ptr<const HyperscanDB> hdb,
|
||||
|
||||
switch (colliderMode) {
|
||||
case MODE_BLOCK:
|
||||
return blockScan(*hdb, buffer, align, callbackMulti, &ctx, &rs);
|
||||
return blockScan(*bdb, buffer, align, callbackMulti, &ctx, &rs);
|
||||
case MODE_STREAMING:
|
||||
return streamingScan(*hdb, buffer, align, callbackMulti, &ctx, &rs);
|
||||
return streamingScan(*bdb, buffer, align, callbackMulti, &ctx, &rs);
|
||||
case MODE_VECTORED:
|
||||
return vectoredScan(*hdb, buffer, align, callbackMulti, &ctx, &rs);
|
||||
return vectoredScan(*bdb, buffer, align, callbackMulti, &ctx, &rs);
|
||||
case MODE_HYBRID:
|
||||
#ifdef HS_HYBRID
|
||||
return hybridScan(*bdb, buffer, align, callbackHybrid, errorCallback,
|
||||
&ctx, &rs);
|
||||
#else
|
||||
cerr << "Hybrid mode not available in this build." << endl;
|
||||
abort();
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
assert(0);
|
||||
@ -739,7 +915,7 @@ bool isOrdered(const string &expr, unsigned int flags) {
|
||||
return ordered;
|
||||
}
|
||||
|
||||
static unique_ptr<HyperscanDB>
|
||||
static unique_ptr<BaseDB>
|
||||
compileHyperscan(vector<const char *> &patterns, vector<unsigned> &flags,
|
||||
vector<unsigned> &idsvec, ptr_vector<hs_expr_ext> &ext,
|
||||
unsigned mode, const hs_platform_info *platform, string &error,
|
||||
@ -762,7 +938,30 @@ compileHyperscan(vector<const char *> &patterns, vector<unsigned> &flags,
|
||||
return ue2::make_unique<HyperscanDB>(db, idsvec.begin(), idsvec.end());
|
||||
}
|
||||
|
||||
shared_ptr<HyperscanDB> UltimateTruth::compile(const set<unsigned> &ids,
|
||||
#ifdef HS_HYBRID
|
||||
static unique_ptr<BaseDB>
|
||||
compileHybrid(vector<const char *> &patterns,
|
||||
vector<unsigned> &flags, vector<unsigned> &idsvec,
|
||||
unsigned mode, const hs_platform_info *platform, string &error) {
|
||||
const unsigned count = patterns.size();
|
||||
ch_database_t *db = nullptr;
|
||||
ch_compile_error_t *compile_err;
|
||||
|
||||
ch_error_t err = ch_compile_multi(&patterns[0], &flags[0],
|
||||
&idsvec[0], count, mode, platform, &db,
|
||||
&compile_err);
|
||||
|
||||
if (err != HS_SUCCESS) {
|
||||
error = compile_err->message;
|
||||
ch_free_compile_error(compile_err);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return ue2::make_unique<HybridDB>(db, idsvec.begin(), idsvec.end());
|
||||
}
|
||||
#endif
|
||||
|
||||
shared_ptr<BaseDB> UltimateTruth::compile(const set<unsigned> &ids,
|
||||
string &error) const {
|
||||
// Build our vectors for compilation
|
||||
const size_t count = ids.size();
|
||||
@ -811,6 +1010,17 @@ shared_ptr<HyperscanDB> UltimateTruth::compile(const set<unsigned> &ids,
|
||||
ext[n].edit_distance = edit_distance;
|
||||
}
|
||||
|
||||
if (colliderMode == MODE_HYBRID) {
|
||||
if (ext[n].flags) {
|
||||
error = "Hybrid does not support extended parameters.";
|
||||
return nullptr;
|
||||
}
|
||||
// We can also strip some other flags in the hybrid matcher.
|
||||
flags[n] &= ~HS_FLAG_PREFILTER; // prefilter always used
|
||||
flags[n] &= ~HS_FLAG_ALLOWEMPTY; // empty always allowed
|
||||
flags[n] &= ~HS_FLAG_SOM_LEFTMOST; // SOM always on
|
||||
}
|
||||
|
||||
n++;
|
||||
}
|
||||
|
||||
@ -827,8 +1037,18 @@ shared_ptr<HyperscanDB> UltimateTruth::compile(const set<unsigned> &ids,
|
||||
idsvec.push_back(0);
|
||||
}
|
||||
|
||||
auto db = compileHyperscan(patterns, flags, idsvec, ext, m_mode, platform,
|
||||
error, grey);
|
||||
unique_ptr<BaseDB> db;
|
||||
if (colliderMode == MODE_HYBRID) {
|
||||
#ifdef HS_HYBRID
|
||||
db = compileHybrid(patterns, flags, idsvec, m_mode, platform, error);
|
||||
#else
|
||||
error = "Hybrid mode not available in this build.";
|
||||
#endif
|
||||
} else {
|
||||
db = compileHyperscan(patterns, flags, idsvec, ext, m_mode,
|
||||
platform, error, grey);
|
||||
}
|
||||
|
||||
if (!db) {
|
||||
return nullptr;
|
||||
}
|
||||
@ -850,25 +1070,55 @@ shared_ptr<HyperscanDB> UltimateTruth::compile(const set<unsigned> &ids,
|
||||
return move(db);
|
||||
}
|
||||
|
||||
bool UltimateTruth::allocScratch(shared_ptr<const HyperscanDB> db) {
|
||||
bool UltimateTruth::allocScratch(shared_ptr<const BaseDB> db) {
|
||||
assert(db);
|
||||
|
||||
// We explicitly avoid running scratch allocators for the same HyperscanDB
|
||||
// We explicitly avoid running scratch allocators for the same BaseDB
|
||||
// over and over again by retaining a shared_ptr to the last one we saw.
|
||||
if (db == last_db) {
|
||||
return true;
|
||||
}
|
||||
|
||||
hs_error_t err = hs_alloc_scratch(db.get()->db, &scratch);
|
||||
if (colliderMode == MODE_HYBRID) {
|
||||
#ifdef HS_HYBRID
|
||||
ch_error_t err = ch_alloc_scratch(
|
||||
reinterpret_cast<const HybridDB *>(db.get())->db, &chimeraScratch);
|
||||
if (err != HS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
#endif // HS_HYBRID
|
||||
} else {
|
||||
hs_error_t err = hs_alloc_scratch(
|
||||
reinterpret_cast<const HyperscanDB *>(db.get())->db, &scratch);
|
||||
if (err != HS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
last_db = db;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool UltimateTruth::cloneScratch(void) {
|
||||
if (colliderMode == MODE_HYBRID) {
|
||||
#ifdef HS_HYBRID
|
||||
ch_scratch_t *old_scratch = chimeraScratch;
|
||||
ch_scratch_t *new_scratch;
|
||||
ch_error_t ret = ch_clone_scratch(chimeraScratch, &new_scratch);
|
||||
if (ret != CH_SUCCESS) {
|
||||
DEBUG_PRINTF("failure to clone %d\n", ret);
|
||||
return false;
|
||||
}
|
||||
chimeraScratch = new_scratch;
|
||||
ret = ch_free_scratch(old_scratch);
|
||||
if (ret != CH_SUCCESS) {
|
||||
DEBUG_PRINTF("failure to free %d\n", ret);
|
||||
return false;
|
||||
}
|
||||
DEBUG_PRINTF("hybrid scratch cloned from %p to %p\n",
|
||||
old_scratch, chimeraScratch);
|
||||
#endif // HS_HYBRID
|
||||
} else {
|
||||
hs_scratch_t *old_scratch = scratch;
|
||||
hs_scratch_t *new_scratch;
|
||||
hs_error_t ret = hs_clone_scratch(scratch, &new_scratch);
|
||||
@ -883,6 +1133,7 @@ bool UltimateTruth::cloneScratch(void) {
|
||||
return false;
|
||||
}
|
||||
DEBUG_PRINTF("scratch cloned from %p to %p\n", old_scratch, scratch);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -947,20 +1198,35 @@ char *UltimateTruth::setupVecScanBuffer(const char *begin, size_t len,
|
||||
return ptr;
|
||||
}
|
||||
|
||||
bool UltimateTruth::saveDatabase(const HyperscanDB &hdb,
|
||||
bool UltimateTruth::saveDatabase(const BaseDB &bdb,
|
||||
const string &filename) const {
|
||||
return ::saveDatabase(hdb.db, filename.c_str(), g_verbose);
|
||||
if (colliderMode == MODE_HYBRID) {
|
||||
cerr << "Hybrid mode doesn't support serialization." << endl;
|
||||
abort();
|
||||
} else {
|
||||
return ::saveDatabase(reinterpret_cast<const HyperscanDB *>(&bdb)->db,
|
||||
filename.c_str(), g_verbose);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
shared_ptr<HyperscanDB>
|
||||
shared_ptr<BaseDB>
|
||||
UltimateTruth::loadDatabase(const string &filename,
|
||||
const std::set<unsigned> &ids) const {
|
||||
shared_ptr<BaseDB> db;
|
||||
|
||||
if (colliderMode == MODE_HYBRID) {
|
||||
cerr << "Hybrid mode doesn't support deserialization." << endl;
|
||||
abort();
|
||||
} else {
|
||||
hs_database_t *hs_db = ::loadDatabase(filename.c_str(), g_verbose);
|
||||
if (!hs_db) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto db = make_shared<HyperscanDB>(hs_db, ids.begin(), ids.end());
|
||||
db = make_shared<HyperscanDB>(hs_db, ids.begin(), ids.end());
|
||||
}
|
||||
|
||||
assert(db);
|
||||
|
||||
// Fill db::ordered with the expressions that require the ordered flag.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -33,6 +33,10 @@
|
||||
|
||||
#include "hs.h"
|
||||
|
||||
#ifdef HS_HYBRID
|
||||
#include "chimera/ch.h"
|
||||
#endif
|
||||
|
||||
#include <memory>
|
||||
#include <ostream>
|
||||
#include <set>
|
||||
@ -47,7 +51,7 @@ struct Grey;
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
class HyperscanDB;
|
||||
class BaseDB;
|
||||
class ResultSet;
|
||||
|
||||
// Wrapper around ue2 to generate results for an expression and corpus.
|
||||
@ -59,13 +63,13 @@ public:
|
||||
|
||||
~UltimateTruth();
|
||||
|
||||
std::shared_ptr<HyperscanDB> compile(const std::set<unsigned> &ids,
|
||||
std::shared_ptr<BaseDB> compile(const std::set<unsigned> &ids,
|
||||
std::string &error) const;
|
||||
|
||||
bool saveDatabase(const HyperscanDB &db,
|
||||
bool saveDatabase(const BaseDB &db,
|
||||
const std::string &filename) const;
|
||||
|
||||
std::shared_ptr<HyperscanDB>
|
||||
std::shared_ptr<BaseDB>
|
||||
loadDatabase(const std::string &filename,
|
||||
const std::set<unsigned> &ids) const;
|
||||
|
||||
@ -74,7 +78,7 @@ public:
|
||||
return !m_xcompile;
|
||||
}
|
||||
|
||||
bool run(unsigned id, std::shared_ptr<const HyperscanDB> db,
|
||||
bool run(unsigned id, std::shared_ptr<const BaseDB> db,
|
||||
const std::string &buffer, bool single_pattern, unsigned align,
|
||||
ResultSet &rs);
|
||||
|
||||
@ -84,22 +88,28 @@ public:
|
||||
std::string dbFilename(const std::set<unsigned int> &ids) const;
|
||||
|
||||
private:
|
||||
bool blockScan(const HyperscanDB &db, const std::string &buffer,
|
||||
bool blockScan(const BaseDB &db, const std::string &buffer,
|
||||
size_t align, match_event_handler callback, void *ctx,
|
||||
ResultSet *rs);
|
||||
bool streamingScan(const HyperscanDB &db, const std::string &buffer,
|
||||
bool streamingScan(const BaseDB &db, const std::string &buffer,
|
||||
size_t align, match_event_handler callback, void *ctx,
|
||||
ResultSet *rs);
|
||||
bool vectoredScan(const HyperscanDB &db, const std::string &buffer,
|
||||
bool vectoredScan(const BaseDB &db, const std::string &buffer,
|
||||
size_t align, match_event_handler callback, void *ctx,
|
||||
ResultSet *rs);
|
||||
#ifdef HS_HYBRID
|
||||
bool hybridScan(const BaseDB &db, const std::string &buffer,
|
||||
size_t align, ch_match_event_handler callback,
|
||||
ch_error_event_handler error_callback,
|
||||
void *ctx, ResultSet *rs);
|
||||
#endif // HS_HYBRID
|
||||
|
||||
char *setupScanBuffer(const char *buf, size_t len, size_t align);
|
||||
|
||||
char *setupVecScanBuffer(const char *buf, size_t len, size_t align,
|
||||
unsigned int block_id);
|
||||
|
||||
bool allocScratch(std::shared_ptr<const HyperscanDB> db);
|
||||
bool allocScratch(std::shared_ptr<const BaseDB> db);
|
||||
|
||||
bool cloneScratch(void);
|
||||
|
||||
@ -126,6 +136,11 @@ private:
|
||||
// Scratch space for Hyperscan.
|
||||
hs_scratch_t *scratch;
|
||||
|
||||
#ifdef HS_HYBRID
|
||||
// Scratch space for Chimera.
|
||||
ch_scratch_t *chimeraScratch;
|
||||
#endif // HS_HYBRID
|
||||
|
||||
// Temporary scan buffer used for realigned scanning
|
||||
std::vector<char> m_scanBuf;
|
||||
|
||||
@ -134,7 +149,7 @@ private:
|
||||
|
||||
// Last database we successfully allocated scratch for, so that we can
|
||||
// avoid unnecessarily reallocating for it.
|
||||
std::shared_ptr<const HyperscanDB> last_db;
|
||||
std::shared_ptr<const BaseDB> last_db;
|
||||
|
||||
const hs_platform_info *platform;
|
||||
};
|
||||
|
@ -76,6 +76,7 @@ void usage(const char *name, const char *error) {
|
||||
"blocks.\n");
|
||||
printf(" -V NUM Use vectored mode, split data into ~NUM "
|
||||
"blocks.\n");
|
||||
printf(" -H Use hybrid mode.\n");
|
||||
printf(" -Z {R or 0-%d} Only test one alignment, either as given or "
|
||||
"'R' for random.\n", MAX_MAX_UE2_ALIGN - 1);
|
||||
printf(" -q Quiet; display only match differences, no other "
|
||||
@ -90,6 +91,7 @@ void usage(const char *name, const char *error) {
|
||||
printf(" -E DISTANCE Match all patterns within edit distance"
|
||||
" DISTANCE.\n");
|
||||
printf(" --prefilter Apply HS_FLAG_PREFILTER to all patterns.\n");
|
||||
printf(" --no-groups Disable capturing in Hybrid mode.\n");
|
||||
printf("\n");
|
||||
printf("Testing mode options:\n");
|
||||
printf("\n");
|
||||
@ -157,7 +159,7 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
|
||||
vector<string> *corpora, UNUSED Grey *grey,
|
||||
unique_ptr<hs_platform_info> *plat_out) {
|
||||
static const char options[]
|
||||
= "-ab:cC:d:D:e:E:G:hi:k:Lm:M:n:o:O:p:P:qr:R:S:s:t:T:vV:w:x:X:Y:z:Z:8";
|
||||
= "-ab:cC:d:D:e:E:G:hHi:k:Lm:M:n:o:O:p:P:qr:R:S:s:t:T:vV:w:x:X:Y:z:Z:8";
|
||||
s32 in_multi = 0;
|
||||
s32 in_corpora = 0;
|
||||
int pcreFlag = 1;
|
||||
@ -180,6 +182,7 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
|
||||
{"no-signal-handler", 0, &no_signal_handler, 1},
|
||||
{"compress-expand", 0, &compressFlag, 1},
|
||||
{"compress-reset-expand", 0, &compressResetFlag, 1},
|
||||
{"no-groups", 0, &no_groups, 1},
|
||||
{nullptr, 0, nullptr, 0}};
|
||||
|
||||
for (;;) {
|
||||
@ -271,6 +274,15 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
|
||||
case 'h':
|
||||
usage(argv[0], nullptr);
|
||||
exit(0);
|
||||
case 'H':
|
||||
if (colliderMode != MODE_BLOCK) {
|
||||
usage(argv[0], "You can only use one mode at a time!");
|
||||
exit(1);
|
||||
}
|
||||
colliderMode = MODE_HYBRID;
|
||||
// Disable graph truth in hybrid mode
|
||||
nfaFlag = 0;
|
||||
break;
|
||||
case 'i':
|
||||
loadDatabases = true;
|
||||
serializePath = optarg;
|
||||
@ -542,6 +554,11 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (colliderMode == MODE_HYBRID && !ue2Flag) {
|
||||
usage(argv[0], "You cannot disable UE2 engine in Hybrid mode.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// need at least two pattern engines active
|
||||
if (nfaFlag + pcreFlag + ue2Flag < 2) {
|
||||
usage(argv[0], "At least two pattern engines should be active.");
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -36,7 +36,8 @@
|
||||
enum ColliderMode {
|
||||
MODE_BLOCK,
|
||||
MODE_STREAMING,
|
||||
MODE_VECTORED
|
||||
MODE_VECTORED,
|
||||
MODE_HYBRID
|
||||
};
|
||||
|
||||
extern unsigned numThreads;
|
||||
@ -68,6 +69,7 @@ extern unsigned max_ue2_align;
|
||||
extern size_t g_memoryLimit;
|
||||
extern bool force_utf8;
|
||||
extern int force_prefilter;
|
||||
extern int no_groups;
|
||||
extern unsigned somPrecisionMode;
|
||||
extern unsigned limit_matches;
|
||||
extern unsigned randomSeed;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -448,6 +448,9 @@ void printMode(void) {
|
||||
case MODE_VECTORED:
|
||||
cout << "Vectored-" << g_streamBlocks;
|
||||
break;
|
||||
case MODE_HYBRID:
|
||||
cout << "Hybrid";
|
||||
break;
|
||||
}
|
||||
|
||||
if (use_copy_scratch) {
|
||||
@ -690,7 +693,7 @@ shared_ptr<DatabaseProxy> constructDatabase(const set<unsigned int> &ids,
|
||||
|
||||
if (loadDatabases) {
|
||||
string filename = ultimate.dbFilename(ids);
|
||||
shared_ptr<HyperscanDB> db = ultimate.loadDatabase(filename, ids);
|
||||
shared_ptr<BaseDB> db = ultimate.loadDatabase(filename, ids);
|
||||
if (!db) {
|
||||
if (!g_quiet) {
|
||||
cout << "FAILED: could not load database " << filename << endl;
|
||||
@ -706,7 +709,7 @@ shared_ptr<DatabaseProxy> constructDatabase(const set<unsigned int> &ids,
|
||||
// If we're not runnable (i.e. we're cross-compiling), let's at least
|
||||
// try to build the database.
|
||||
if (!ultimate.runnable()) {
|
||||
shared_ptr<HyperscanDB> db = ue2->get(ultimate);
|
||||
shared_ptr<BaseDB> db = ue2->get(ultimate);
|
||||
assert(db); // throws otherwise
|
||||
}
|
||||
|
||||
@ -872,7 +875,7 @@ void runTestUnit(ostream &out, GroundTruth &ground, GraphTruth &graph,
|
||||
assert(use_UE2);
|
||||
Corpus &corpus = unit.corpus;
|
||||
|
||||
shared_ptr<const HyperscanDB> db;
|
||||
shared_ptr<const BaseDB> db;
|
||||
if (use_UE2) {
|
||||
// Acquire UE2 database.
|
||||
debug_stage = STAGE_UE2_COMPILE;
|
||||
@ -1648,6 +1651,7 @@ void printSettingsV(const vector<string> &corporaFiles,
|
||||
case MODE_BLOCK: cout << "block mode"; break;
|
||||
case MODE_STREAMING: cout << "streaming mode"; break;
|
||||
case MODE_VECTORED: cout << "vectored mode"; break;
|
||||
case MODE_HYBRID: cout << "hybrid mode"; break;
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
@ -1746,6 +1750,7 @@ void printSettingsQ(const vector<string> &corporaFiles,
|
||||
case MODE_BLOCK: cout << "block mode"; break;
|
||||
case MODE_STREAMING: cout << "streaming mode"; break;
|
||||
case MODE_VECTORED: cout << "vectored mode"; break;
|
||||
case MODE_HYBRID: cout << "hybrid mode"; break;
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
|
@ -123,6 +123,41 @@ set_target_properties(unit-internal PROPERTIES COMPILE_FLAGS "${HS_CXX_FLAGS}")
|
||||
target_link_libraries(unit-internal hs corpusomatic)
|
||||
endif(NOT (RELEASE_BUILD OR FAT_RUNTIME))
|
||||
|
||||
if (BUILD_CHIMERA)
|
||||
# enable Chimera unit tests
|
||||
set(unit_chimera_SOURCES
|
||||
${gtest_SOURCES}
|
||||
chimera/allocators.cpp
|
||||
chimera/arg_checks.cpp
|
||||
chimera/bad_patterns.cpp
|
||||
chimera/compat.cpp
|
||||
chimera/main.cpp
|
||||
chimera/scan.cpp
|
||||
)
|
||||
add_executable(unit-chimera ${unit_chimera_SOURCES})
|
||||
target_link_libraries(unit-chimera chimera hs pcre)
|
||||
#
|
||||
# build target to run unit tests
|
||||
#
|
||||
if (NOT RELEASE_BUILD)
|
||||
add_custom_target(
|
||||
unit
|
||||
COMMAND bin/unit-internal
|
||||
COMMAND bin/unit-hyperscan
|
||||
COMMAND bin/unit-chimera
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||
DEPENDS unit-internal unit-hyperscan unit-chimera
|
||||
)
|
||||
else ()
|
||||
add_custom_target(
|
||||
unit
|
||||
COMMAND bin/unit-hyperscan
|
||||
COMMAND bin/unit-chimera
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||
DEPENDS unit-hyperscan unit-chimera
|
||||
)
|
||||
endif()
|
||||
else()
|
||||
#
|
||||
# build target to run unit tests
|
||||
#
|
||||
@ -142,3 +177,4 @@ add_custom_target(
|
||||
DEPENDS unit-hyperscan
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
149
unit/chimera/allocators.cpp
Normal file
149
unit/chimera/allocators.cpp
Normal file
@ -0,0 +1,149 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "chimera/ch.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
|
||||
using std::string;
|
||||
|
||||
static void *null_malloc(size_t) { return nullptr; }
|
||||
|
||||
// Helper: correctly construct a simple database.
|
||||
static
|
||||
void makeDatabase(ch_database_t **hydb) {
|
||||
static const char *expr[] = { "foobar" };
|
||||
ch_database_t *db = nullptr;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
ch_error_t err;
|
||||
|
||||
err = ch_compile_multi(expr, nullptr, nullptr, 1, 0, nullptr, &db,
|
||||
&compile_err);
|
||||
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_TRUE(db != nullptr);
|
||||
|
||||
*hydb = db;
|
||||
}
|
||||
|
||||
TEST(HybridAllocator, DatabaseInfoBadAlloc) {
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db);
|
||||
ASSERT_TRUE(db != nullptr);
|
||||
|
||||
ch_set_allocator(null_malloc, nullptr);
|
||||
|
||||
char *info = nullptr;
|
||||
ch_error_t err = ch_database_info(db, &info);
|
||||
ASSERT_EQ(CH_NOMEM, err);
|
||||
|
||||
ch_set_allocator(nullptr, nullptr);
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
static
|
||||
void * two_aligned_malloc(size_t len) {
|
||||
void *mem = malloc(len + 2);
|
||||
if (!mem) {
|
||||
return nullptr;
|
||||
}
|
||||
return (char *)mem + 2;
|
||||
}
|
||||
|
||||
static
|
||||
void two_aligned_free(void *mem) {
|
||||
if (!mem) {
|
||||
return;
|
||||
}
|
||||
// Allocated with two_aligned_malloc above.
|
||||
free((char *)mem - 2);
|
||||
}
|
||||
|
||||
TEST(HybridAllocator, TwoAlignedCompile) {
|
||||
ch_set_database_allocator(two_aligned_malloc, two_aligned_free);
|
||||
|
||||
ch_database_t *db = nullptr;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
const hs_platform_info_t *platform = nullptr;
|
||||
ch_error_t err =
|
||||
ch_compile("foobar", 0, CH_MODE_GROUPS, platform, &db, &compile_err);
|
||||
ASSERT_EQ(CH_COMPILER_ERROR, err);
|
||||
ASSERT_EQ(nullptr, db);
|
||||
ASSERT_NE(nullptr, compile_err);
|
||||
ch_free_compile_error(compile_err);
|
||||
ch_set_database_allocator(nullptr, nullptr);
|
||||
}
|
||||
|
||||
TEST(HybridAllocator, TwoAlignedCompileError) {
|
||||
ch_set_misc_allocator(two_aligned_malloc, two_aligned_free);
|
||||
|
||||
ch_database_t *db = nullptr;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
const hs_platform_info_t *platform = nullptr;
|
||||
ch_error_t err =
|
||||
ch_compile("\\1", 0, CH_MODE_GROUPS, platform, &db, &compile_err);
|
||||
ASSERT_EQ(CH_COMPILER_ERROR, err);
|
||||
ASSERT_EQ(nullptr, db);
|
||||
ASSERT_NE(nullptr, compile_err);
|
||||
EXPECT_STREQ("Allocator returned misaligned memory.", compile_err->message);
|
||||
ch_free_compile_error(compile_err);
|
||||
ch_set_database_allocator(nullptr, nullptr);
|
||||
ch_set_misc_allocator(nullptr, nullptr);
|
||||
}
|
||||
|
||||
TEST(HybridAllocator, TwoAlignedDatabaseInfo) {
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db);
|
||||
|
||||
ch_set_misc_allocator(two_aligned_malloc, two_aligned_free);
|
||||
|
||||
char *info = nullptr;
|
||||
ch_error_t err = ch_database_info(db, &info);
|
||||
ASSERT_EQ(CH_BAD_ALLOC, err);
|
||||
|
||||
ch_set_misc_allocator(nullptr, nullptr);
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
TEST(HybridAllocator, TwoAlignedAllocScratch) {
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db);
|
||||
|
||||
ch_set_scratch_allocator(two_aligned_malloc, two_aligned_free);
|
||||
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
ch_error_t err = ch_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(CH_BAD_ALLOC, err);
|
||||
|
||||
ch_set_scratch_allocator(nullptr, nullptr);
|
||||
ch_free_database(db);
|
||||
}
|
591
unit/chimera/arg_checks.cpp
Normal file
591
unit/chimera/arg_checks.cpp
Normal file
@ -0,0 +1,591 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "chimera/ch.h"
|
||||
|
||||
static char garbage[] = "TEST(HybridArgChecks, DatabaseSizeNoDatabase) {" \
|
||||
" size_t sz = ch_database_size(0);" \
|
||||
" ASSERT_EQ(0, sz);";
|
||||
|
||||
namespace /* anonymous */ {
|
||||
|
||||
// Dummy callback: does nothing, returns 0 (keep matching)
|
||||
ch_callback_t dummyHandler(unsigned, unsigned long long,
|
||||
unsigned long long, unsigned, unsigned,
|
||||
const ch_capture_t *, void *) {
|
||||
// empty
|
||||
return CH_CALLBACK_CONTINUE;
|
||||
}
|
||||
|
||||
// Helper: correctly construct a simple database.
|
||||
static
|
||||
void makeDatabase(ch_database_t **hydb) {
|
||||
static const char *expr[] = { "foo.*bar" };
|
||||
ch_database_t *db = nullptr;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
ch_error_t err;
|
||||
|
||||
err = ch_compile_multi(expr, nullptr, nullptr, 1, 0, nullptr, &db,
|
||||
&compile_err);
|
||||
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_TRUE(db != nullptr);
|
||||
|
||||
*hydb = db;
|
||||
}
|
||||
|
||||
// Helper: given a database, build me some scratch.
|
||||
static
|
||||
void makeScratch(const ch_database_t *db,
|
||||
ch_scratch_t **scratch) {
|
||||
ch_error_t err = ch_alloc_scratch(db, scratch);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_TRUE(*scratch != nullptr);
|
||||
}
|
||||
|
||||
// Break the magic number of the given database.
|
||||
void breakDatabaseMagic(ch_database *db) {
|
||||
// database magic should be 0xdbdb at the start
|
||||
ASSERT_TRUE(memcmp("\xde\xde", db, 2) == 0);
|
||||
*(char *)db = 0xdc;
|
||||
}
|
||||
|
||||
// Break the version number of the given database.
|
||||
void breakDatabaseVersion(ch_database *db) {
|
||||
// database version is the second u32
|
||||
*((char *)db + 4) += 1;
|
||||
}
|
||||
|
||||
// Check that CH_version gives us a reasonable string back
|
||||
TEST(HybridArgChecks, Version) {
|
||||
const char *version = ch_version();
|
||||
ASSERT_TRUE(version != nullptr);
|
||||
ASSERT_TRUE(version[0] >= '0' && version[0] <= '9')
|
||||
<< "First byte should be a digit.";
|
||||
ASSERT_EQ('.', version[1]) << "Second byte should be a dot.";
|
||||
}
|
||||
|
||||
// ch_compile: Hand the compiler a bogus flag.
|
||||
TEST(HybridArgChecks, SingleBogusFlags) {
|
||||
ch_database_t *db = nullptr;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
ch_error_t err;
|
||||
|
||||
static const unsigned int badflags[] = {
|
||||
0xffffffff,
|
||||
16,
|
||||
128,
|
||||
256,
|
||||
512,
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < sizeof(badflags)/sizeof(badflags[0]); i++) {
|
||||
const char expr[] = "foobar";
|
||||
err = ch_compile(expr, badflags[i], 0, nullptr, &db, &compile_err);
|
||||
EXPECT_EQ(CH_COMPILER_ERROR, err);
|
||||
EXPECT_TRUE(db == nullptr);
|
||||
EXPECT_TRUE(compile_err != nullptr);
|
||||
EXPECT_STREQ("Unrecognized flag used.", compile_err->message);
|
||||
ch_free_compile_error(compile_err);
|
||||
}
|
||||
}
|
||||
|
||||
// ch_compile: Hand the compiler a bogus mode.
|
||||
TEST(HybridArgChecks, SingleBogusMode) {
|
||||
ch_database_t *db = nullptr;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
ch_error_t err;
|
||||
|
||||
static const unsigned int badModes[] = {
|
||||
0xffffffff,
|
||||
1,
|
||||
2,
|
||||
CH_MODE_GROUPS << 1, // this was our largest mode flag
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < sizeof(badModes)/sizeof(badModes[0]); i++) {
|
||||
const char expr[] = "foobar";
|
||||
err = ch_compile(expr, 0, badModes[i], nullptr, &db, &compile_err);
|
||||
EXPECT_EQ(CH_COMPILER_ERROR, err);
|
||||
EXPECT_TRUE(db == nullptr);
|
||||
EXPECT_TRUE(compile_err != nullptr);
|
||||
EXPECT_STREQ("Invalid mode flag supplied.", compile_err->message);
|
||||
ch_free_compile_error(compile_err);
|
||||
}
|
||||
}
|
||||
|
||||
// ch_compile: Compile a nullptr pattern set)
|
||||
TEST(HybridArgChecks, SingleCompileBlockNoPattern) {
|
||||
ch_database_t *db = nullptr;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
ch_error_t err;
|
||||
err = ch_compile(nullptr, 0, 0, nullptr, &db, &compile_err);
|
||||
EXPECT_EQ(CH_COMPILER_ERROR, err);
|
||||
EXPECT_TRUE(db == nullptr);
|
||||
EXPECT_TRUE(compile_err != nullptr);
|
||||
ch_free_compile_error(compile_err);
|
||||
}
|
||||
|
||||
// ch_compile: Compile a pattern to a nullptr database ptr
|
||||
TEST(HybridArgChecks, SingleCompileBlockNoDatabase) {
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
const char expr[] = "foobar";
|
||||
ch_error_t err;
|
||||
err = ch_compile(expr, 0, 0, nullptr, nullptr, &compile_err);
|
||||
EXPECT_EQ(CH_COMPILER_ERROR, err);
|
||||
EXPECT_TRUE(compile_err != nullptr);
|
||||
ch_free_compile_error(compile_err);
|
||||
}
|
||||
|
||||
// ch_compile_multi: Hand the compiler a bogus flag.
|
||||
TEST(HybridArgChecks, MultiBogusFlags) {
|
||||
ch_database_t *db = nullptr;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
ch_error_t err;
|
||||
|
||||
static const unsigned int badflags[] = {
|
||||
0xffffffff,
|
||||
16, // HS_FLAG_ERROREOD
|
||||
128,
|
||||
256,
|
||||
512,
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < sizeof(badflags)/sizeof(badflags[0]); i++) {
|
||||
const char *expr[] = { "foobar" };
|
||||
err = ch_compile_multi(expr, &badflags[i], nullptr, 1, 0, nullptr, &db,
|
||||
&compile_err);
|
||||
EXPECT_EQ(CH_COMPILER_ERROR, err);
|
||||
EXPECT_TRUE(db == nullptr);
|
||||
EXPECT_TRUE(compile_err != nullptr);
|
||||
EXPECT_STREQ("Unrecognized flag used.", compile_err->message);
|
||||
ch_free_compile_error(compile_err);
|
||||
}
|
||||
}
|
||||
|
||||
// ch_compile_multi: Hand the ch_compile_multi a bogus mode.
|
||||
TEST(HybridArgChecks, MultiBogusMode) {
|
||||
ch_database_t *db = nullptr;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
ch_error_t err;
|
||||
|
||||
static const unsigned int badModes[] = {
|
||||
0xffffffff,
|
||||
1,
|
||||
2,
|
||||
CH_MODE_GROUPS << 1, // this was our largest mode flag
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < sizeof(badModes)/sizeof(badModes[0]); i++) {
|
||||
const char *expr[] = { "foobar" };
|
||||
err = ch_compile_multi(expr, nullptr, nullptr, 1, badModes[i], nullptr,
|
||||
&db, &compile_err);
|
||||
EXPECT_EQ(CH_COMPILER_ERROR, err);
|
||||
EXPECT_TRUE(db == nullptr);
|
||||
EXPECT_TRUE(compile_err != nullptr);
|
||||
EXPECT_STREQ("Invalid mode flag supplied.", compile_err->message);
|
||||
ch_free_compile_error(compile_err);
|
||||
}
|
||||
}
|
||||
|
||||
// ch_compile_multi: Compile a nullptr pattern set (block mode)
|
||||
TEST(HybridArgChecks, MultiCompileBlockNoPattern) {
|
||||
ch_database_t *db = nullptr;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
ch_error_t err;
|
||||
err = ch_compile_multi(nullptr, nullptr, nullptr, 1, 0, nullptr, &db,
|
||||
&compile_err);
|
||||
EXPECT_EQ(CH_COMPILER_ERROR, err);
|
||||
EXPECT_TRUE(db == nullptr);
|
||||
EXPECT_TRUE(compile_err != nullptr);
|
||||
ch_free_compile_error(compile_err);
|
||||
}
|
||||
|
||||
// ch_compile_multi: Compile a set of zero patterns
|
||||
TEST(HybridArgChecks, MultiCompileZeroPatterns) {
|
||||
ch_database_t *db = nullptr;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
const char *expr[] = {"foobar"};
|
||||
ch_error_t err;
|
||||
err = ch_compile_multi(expr, nullptr, nullptr, 0, 0, nullptr, &db,
|
||||
&compile_err);
|
||||
EXPECT_EQ(CH_COMPILER_ERROR, err);
|
||||
EXPECT_TRUE(db == nullptr);
|
||||
EXPECT_TRUE(compile_err != nullptr);
|
||||
ch_free_compile_error(compile_err);
|
||||
}
|
||||
|
||||
// ch_compile_multi: Compile a pattern to a nullptr database ptr
|
||||
TEST(HybridArgChecks, MultiCompileBlockNoDatabase) {
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
const char *expr[] = {"foobar"};
|
||||
ch_error_t err;
|
||||
err = ch_compile_multi(expr, nullptr, nullptr, 1, 0, nullptr, nullptr,
|
||||
&compile_err);
|
||||
EXPECT_EQ(CH_COMPILER_ERROR, err);
|
||||
EXPECT_TRUE(compile_err != nullptr);
|
||||
ch_free_compile_error(compile_err);
|
||||
}
|
||||
|
||||
// ch_compile_ext_multi: Hand the compiler a bogus flag.
|
||||
TEST(HybridArgChecks, ExtMultiBogusFlags) {
|
||||
ch_database_t *db = nullptr;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
ch_error_t err;
|
||||
|
||||
static const unsigned int badflags[] = {
|
||||
0xffffffff,
|
||||
16, // HS_FLAG_ERROREOD
|
||||
128,
|
||||
256,
|
||||
512,
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < sizeof(badflags)/sizeof(badflags[0]); i++) {
|
||||
const char *expr[] = { "foobar" };
|
||||
err = ch_compile_ext_multi(expr, &badflags[i], nullptr, 1, 0,
|
||||
10000000, 8000, nullptr, &db, &compile_err);
|
||||
EXPECT_EQ(CH_COMPILER_ERROR, err);
|
||||
EXPECT_TRUE(db == nullptr);
|
||||
EXPECT_TRUE(compile_err != nullptr);
|
||||
EXPECT_STREQ("Unrecognized flag used.", compile_err->message);
|
||||
ch_free_compile_error(compile_err);
|
||||
}
|
||||
}
|
||||
|
||||
// ch_compile_ext_multi: Hand the ch_compile_multi a bogus mode.
|
||||
TEST(HybridArgChecks, ExtMultiBogusMode) {
|
||||
ch_database_t *db = nullptr;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
ch_error_t err;
|
||||
|
||||
static const unsigned int badModes[] = {
|
||||
0xffffffff,
|
||||
1,
|
||||
2,
|
||||
CH_MODE_GROUPS << 1, // this was our largest mode flag
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < sizeof(badModes)/sizeof(badModes[0]); i++) {
|
||||
const char *expr[] = { "foobar" };
|
||||
err = ch_compile_ext_multi(expr, nullptr, nullptr, 1, badModes[i],
|
||||
10000000, 8000, nullptr, &db, &compile_err);
|
||||
EXPECT_EQ(CH_COMPILER_ERROR, err);
|
||||
EXPECT_TRUE(db == nullptr);
|
||||
EXPECT_TRUE(compile_err != nullptr);
|
||||
EXPECT_STREQ("Invalid mode flag supplied.", compile_err->message);
|
||||
ch_free_compile_error(compile_err);
|
||||
}
|
||||
}
|
||||
|
||||
// ch_compile_ext_multi: Compile a nullptr pattern set (block mode)
|
||||
TEST(HybridArgChecks, ExtMultiCompileBlockNoPattern) {
|
||||
ch_database_t *db = nullptr;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
ch_error_t err;
|
||||
err = ch_compile_ext_multi(nullptr, nullptr, nullptr, 1, 0, 10000000,
|
||||
8000, nullptr, &db, &compile_err);
|
||||
EXPECT_EQ(CH_COMPILER_ERROR, err);
|
||||
EXPECT_TRUE(db == nullptr);
|
||||
EXPECT_TRUE(compile_err != nullptr);
|
||||
ch_free_compile_error(compile_err);
|
||||
}
|
||||
|
||||
// ch_compile_ext_multi: Compile a set of zero patterns
|
||||
TEST(HybridArgChecks, ExtMultiCompileZeroPatterns) {
|
||||
ch_database_t *db = nullptr;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
const char *expr[] = {"foobar"};
|
||||
ch_error_t err;
|
||||
err = ch_compile_ext_multi(expr, nullptr, nullptr, 0, 0, 10000000,
|
||||
8000, nullptr, &db, &compile_err);
|
||||
EXPECT_EQ(CH_COMPILER_ERROR, err);
|
||||
EXPECT_TRUE(db == nullptr);
|
||||
EXPECT_TRUE(compile_err != nullptr);
|
||||
ch_free_compile_error(compile_err);
|
||||
}
|
||||
|
||||
// ch_compile_ext_multi: Compile a pattern to a nullptr database ptr
|
||||
TEST(HybridArgChecks, ExtMultiCompileBlockNoDatabase) {
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
const char *expr[] = {"foobar"};
|
||||
ch_error_t err;
|
||||
err = ch_compile_ext_multi(expr, nullptr, nullptr, 1, 0, 10000000,
|
||||
8000, nullptr, nullptr, &compile_err);
|
||||
EXPECT_EQ(CH_COMPILER_ERROR, err);
|
||||
EXPECT_TRUE(compile_err != nullptr);
|
||||
ch_free_compile_error(compile_err);
|
||||
}
|
||||
|
||||
// ch_scan: Call with no database
|
||||
TEST(HybridArgChecks, ScanBlockNoDatabase) {
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db);
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
makeScratch(db, &scratch);
|
||||
|
||||
ch_error_t err = ch_scan(nullptr, "data", 4, 0, scratch,
|
||||
dummyHandler, nullptr, nullptr);
|
||||
ASSERT_NE(CH_SUCCESS, err);
|
||||
EXPECT_NE(CH_SCAN_TERMINATED, err);
|
||||
|
||||
// teardown
|
||||
err = ch_free_scratch(scratch);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
// ch_scan: Call with a database with broken magic
|
||||
TEST(HybridArgChecks, ScanBlockBrokenDatabaseMagic) {
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db);
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
makeScratch(db, &scratch);
|
||||
|
||||
// break the database here, after scratch alloc
|
||||
breakDatabaseMagic(db);
|
||||
|
||||
ch_error_t err = ch_scan(db, "data", 4, 0, scratch,
|
||||
dummyHandler, nullptr, nullptr);
|
||||
ASSERT_EQ(CH_INVALID, err);
|
||||
|
||||
// teardown
|
||||
err = ch_free_scratch(scratch);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
free(db);
|
||||
}
|
||||
|
||||
// ch_scan: Call with a database with broken version
|
||||
TEST(HybridArgChecks, ScanBlockBrokenDatabaseVersion) {
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db);
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
makeScratch(db, &scratch);
|
||||
|
||||
// break the database here, after scratch alloc
|
||||
breakDatabaseVersion(db);
|
||||
|
||||
ch_error_t err = ch_scan(db, "data", 4, 0, scratch,
|
||||
dummyHandler, nullptr, nullptr);
|
||||
ASSERT_EQ(CH_DB_VERSION_ERROR, err);
|
||||
|
||||
// teardown
|
||||
err = ch_free_scratch(scratch);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
// ch_scan: Call with no data
|
||||
TEST(HybridArgChecks, ScanBlockNoData) {
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db);
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
makeScratch(db, &scratch);
|
||||
|
||||
ch_error_t err = ch_scan(db, nullptr, 4, 0, scratch, dummyHandler,
|
||||
nullptr, nullptr);
|
||||
ASSERT_NE(CH_SUCCESS, err);
|
||||
EXPECT_NE(CH_SCAN_TERMINATED, err);
|
||||
|
||||
// teardown
|
||||
err = ch_free_scratch(scratch);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
// ch_scan: Call with no scratch
|
||||
TEST(HybridArgChecks, ScanBlockNoScratch) {
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db);
|
||||
|
||||
ch_error_t err = ch_scan(db, "data", 4, 0, nullptr, dummyHandler,
|
||||
nullptr, nullptr);
|
||||
ASSERT_NE(CH_SUCCESS, err);
|
||||
EXPECT_NE(CH_SCAN_TERMINATED, err);
|
||||
|
||||
// teardown
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
// ch_scan: Call with no event handler
|
||||
TEST(HybridArgChecks, ScanBlockNoHandler) {
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db);
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
makeScratch(db, &scratch);
|
||||
|
||||
ch_error_t err = ch_scan(db, "data", 4, 0, scratch, nullptr, nullptr,
|
||||
nullptr);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
EXPECT_NE(CH_SCAN_TERMINATED, err);
|
||||
|
||||
// teardown
|
||||
err = ch_free_scratch(scratch);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
// ch_alloc_scratch: Call with no database
|
||||
TEST(HybridArgChecks, AllocScratchNoDatabase) {
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
ch_error_t err = ch_alloc_scratch(nullptr, &scratch);
|
||||
EXPECT_NE(CH_SUCCESS, err);
|
||||
EXPECT_TRUE(scratch == nullptr);
|
||||
}
|
||||
|
||||
// ch_alloc_scratch: Call with nullptr ptr-to-scratch
|
||||
TEST(HybridArgChecks, AllocScratchNullScratchPtr) {
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db);
|
||||
|
||||
ch_error_t err = ch_alloc_scratch(db, nullptr);
|
||||
ASSERT_EQ(CH_INVALID, err);
|
||||
|
||||
// teardown
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
// ch_alloc_scratch: Call with bogus scratch
|
||||
TEST(HybridArgChecks, AllocScratchBogusScratch) {
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db);
|
||||
|
||||
ch_scratch_t *blah = (ch_scratch_t *)malloc(100);
|
||||
memset(blah, 0xf0, 100);
|
||||
ch_error_t err = ch_alloc_scratch(db, &blah);
|
||||
ASSERT_EQ(CH_INVALID, err);
|
||||
|
||||
// teardown
|
||||
free(blah);
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
// ch_alloc_scratch: Call with broken database magic
|
||||
TEST(HybridArgChecks, AllocScratchBadDatabaseMagic) {
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db);
|
||||
|
||||
breakDatabaseMagic(db);
|
||||
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
ch_error_t err = ch_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(CH_INVALID, err);
|
||||
|
||||
// teardown
|
||||
free(db);
|
||||
}
|
||||
|
||||
// ch_alloc_scratch: Call with broken database version
|
||||
TEST(HybridArgChecks, AllocScratchBadDatabaseVersion) {
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db);
|
||||
|
||||
breakDatabaseVersion(db);
|
||||
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
ch_error_t err = ch_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(CH_DB_VERSION_ERROR, err);
|
||||
|
||||
// teardown
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
// ch_clone_scratch: Call with no source scratch
|
||||
TEST(HybridArgChecks, CloneScratchNoSource) {
|
||||
ch_scratch_t *scratch = nullptr, *scratch2 = nullptr;
|
||||
ch_error_t err = ch_clone_scratch(scratch, &scratch2);
|
||||
EXPECT_NE(CH_SUCCESS, err);
|
||||
EXPECT_TRUE(scratch2 == nullptr);
|
||||
}
|
||||
|
||||
// ch_database_size: Call with no database
|
||||
TEST(HybridArgChecks, DatabaseSizeNoDatabase) {
|
||||
size_t sz = 0;
|
||||
ch_error_t err = ch_database_size(0, &sz);
|
||||
ASSERT_EQ(CH_INVALID, err);
|
||||
ASSERT_EQ(0U, sz);
|
||||
}
|
||||
|
||||
// ch_clone_scratch: bad scratch arg
|
||||
TEST(HybridArgChecks, CloneBadScratch) {
|
||||
// Try cloning the scratch
|
||||
void *local_garbage = malloc(sizeof(garbage));
|
||||
memcpy(local_garbage, garbage, sizeof(garbage));
|
||||
ch_scratch_t *cloned = nullptr;
|
||||
ch_scratch_t *scratch = (ch_scratch_t *)local_garbage;
|
||||
ch_error_t err = ch_clone_scratch(scratch, &cloned);
|
||||
free(local_garbage);
|
||||
ASSERT_EQ(CH_INVALID, err);
|
||||
}
|
||||
|
||||
// ch_scan: bad scratch arg
|
||||
TEST(HybridArgChecks, ScanBadScratch) {
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db);
|
||||
|
||||
void *local_garbage = malloc(sizeof(garbage));
|
||||
memcpy(local_garbage, garbage, sizeof(garbage));
|
||||
|
||||
ch_scratch_t *scratch = (ch_scratch_t *)local_garbage;
|
||||
ch_error_t err = ch_scan(db, "data", 4, 0, scratch,
|
||||
dummyHandler, nullptr, nullptr);
|
||||
free(local_garbage);
|
||||
ASSERT_EQ(CH_INVALID, err);
|
||||
|
||||
// teardown
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
TEST(HybridArgChecks, ch_free_database_null) {
|
||||
ch_error_t err = ch_free_database(nullptr);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
}
|
||||
|
||||
TEST(HybridArgChecks, ch_free_database_garbage) {
|
||||
ch_error_t err = ch_free_database((ch_database_t *)garbage);
|
||||
ASSERT_EQ(CH_INVALID, err);
|
||||
}
|
||||
|
||||
TEST(HybridArgChecks, ch_free_scratch_null) {
|
||||
ch_error_t err = ch_free_scratch(nullptr);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
}
|
||||
|
||||
TEST(HybridArgChecks, ch_free_scratch_garbage) {
|
||||
ch_error_t err = ch_free_scratch((ch_scratch_t *)garbage);
|
||||
ASSERT_EQ(CH_INVALID, err);
|
||||
}
|
||||
|
||||
TEST(HybridArgChecks, ch_free_compile_error_null) {
|
||||
ch_error_t err = ch_free_compile_error(nullptr);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
95
unit/chimera/bad_patterns.cpp
Normal file
95
unit/chimera/bad_patterns.cpp
Normal file
@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "chimera/ch.h"
|
||||
|
||||
using namespace testing;
|
||||
|
||||
class HybridCompile : public TestWithParam<const char *> {
|
||||
// empty
|
||||
};
|
||||
|
||||
TEST_P(HybridCompile, BadPattern) {
|
||||
ch_error_t err;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
const char *pattern = GetParam();
|
||||
ch_database_t *db = nullptr;
|
||||
|
||||
err = ch_compile_multi(&pattern, nullptr, nullptr, 1, 0, nullptr, &db,
|
||||
&compile_err);
|
||||
ASSERT_NE(CH_SUCCESS, err) << "Compile should have failed for expr: "
|
||||
<< pattern;
|
||||
ASSERT_TRUE(db == nullptr);
|
||||
ASSERT_TRUE(compile_err != nullptr);
|
||||
|
||||
ch_free_compile_error(compile_err);
|
||||
}
|
||||
|
||||
static
|
||||
const char * BAD_PATTERNS[] = {
|
||||
// unmatched parens
|
||||
"(foo",
|
||||
"foo)",
|
||||
"((foo)",
|
||||
"(foo))",
|
||||
// nothing to repeat
|
||||
"a+++",
|
||||
"a+?+",
|
||||
"a???",
|
||||
"a??+",
|
||||
"?qa",
|
||||
"*abc",
|
||||
"+abc",
|
||||
// repeating boundaries is not allowed (UE-1007)
|
||||
"^?0",
|
||||
"^*0",
|
||||
"^+0",
|
||||
"^{1,3}0",
|
||||
"0$?",
|
||||
"0$*",
|
||||
"0$+",
|
||||
"0${1,3}",
|
||||
// char classes
|
||||
"[]",
|
||||
"[]foobar",
|
||||
"[`-\\80",
|
||||
// bad named classes
|
||||
"[[:foo:]]",
|
||||
"[[:1234:]]",
|
||||
"[[:f\\oo:]]",
|
||||
"[[: :]]",
|
||||
"[[:...:]]",
|
||||
"[[:l\\ower:]]",
|
||||
"[[:abc\\:]]",
|
||||
"[abc[:x\\]pqr:]]",
|
||||
"[[:a\\dz:]]",
|
||||
"foobar\\", // trailing unescaped backslash
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Compile, HybridCompile, ValuesIn(BAD_PATTERNS));
|
56
unit/chimera/compat.cpp
Normal file
56
unit/chimera/compat.cpp
Normal file
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "chimera/ch.h"
|
||||
#include "hs.h"
|
||||
|
||||
// We currently depend on our common (meaning) hash defines having the same
|
||||
// values.
|
||||
TEST(HybridCompat, Defines) {
|
||||
// flags
|
||||
EXPECT_EQ(HS_FLAG_CASELESS, CH_FLAG_CASELESS);
|
||||
EXPECT_EQ(HS_FLAG_DOTALL, CH_FLAG_DOTALL);
|
||||
EXPECT_EQ(HS_FLAG_MULTILINE, CH_FLAG_MULTILINE);
|
||||
EXPECT_EQ(HS_FLAG_SINGLEMATCH, CH_FLAG_SINGLEMATCH);
|
||||
EXPECT_EQ(HS_FLAG_UTF8, CH_FLAG_UTF8);
|
||||
EXPECT_EQ(HS_FLAG_UCP, CH_FLAG_UCP);
|
||||
|
||||
// errors
|
||||
EXPECT_EQ(HS_SUCCESS, CH_SUCCESS);
|
||||
EXPECT_EQ(HS_INVALID, CH_INVALID);
|
||||
EXPECT_EQ(HS_NOMEM, CH_NOMEM);
|
||||
EXPECT_EQ(HS_SCAN_TERMINATED, CH_SCAN_TERMINATED);
|
||||
EXPECT_EQ(HS_COMPILER_ERROR, CH_COMPILER_ERROR);
|
||||
EXPECT_EQ(HS_DB_VERSION_ERROR, CH_DB_VERSION_ERROR);
|
||||
EXPECT_EQ(HS_DB_PLATFORM_ERROR, CH_DB_PLATFORM_ERROR);
|
||||
EXPECT_EQ(HS_DB_MODE_ERROR, CH_DB_MODE_ERROR);
|
||||
EXPECT_EQ(HS_BAD_ALIGN, CH_BAD_ALIGN);
|
||||
EXPECT_EQ(HS_BAD_ALLOC, CH_BAD_ALLOC);
|
||||
EXPECT_EQ(HS_SCRATCH_IN_USE, CH_SCRATCH_IN_USE);
|
||||
}
|
35
unit/chimera/main.cpp
Normal file
35
unit/chimera/main.cpp
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
// Driver: run all the tests (defined in other source files in this directory)
|
||||
int main(int argc, char **argv) {
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
551
unit/chimera/scan.cpp
Normal file
551
unit/chimera/scan.cpp
Normal file
@ -0,0 +1,551 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "chimera/ch.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
|
||||
class HybridScanParams {
|
||||
public:
|
||||
HybridScanParams() {}
|
||||
HybridScanParams(const char *s, unsigned int f)
|
||||
: patterns(1, s), flags(1, f) {}
|
||||
|
||||
void add(const char *pattern, unsigned int myflags) {
|
||||
patterns.push_back(pattern);
|
||||
flags.push_back(myflags);
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return patterns.size();
|
||||
}
|
||||
|
||||
const char * const * getPatterns() const {
|
||||
return &patterns[0];
|
||||
}
|
||||
|
||||
const unsigned int * getFlags() const {
|
||||
return &flags[0];
|
||||
}
|
||||
|
||||
private:
|
||||
vector<const char *> patterns;
|
||||
vector<unsigned int> flags;
|
||||
};
|
||||
|
||||
static
|
||||
vector<HybridScanParams> paramFactory() {
|
||||
vector<HybridScanParams> hsp;
|
||||
|
||||
// Some simple single-pattern cases.
|
||||
hsp.push_back(HybridScanParams(".", CH_FLAG_DOTALL));
|
||||
hsp.push_back(HybridScanParams("foobar", 0));
|
||||
hsp.push_back(HybridScanParams("foo.*bar", 0));
|
||||
hsp.push_back(HybridScanParams("fred.*bill", CH_FLAG_DOTALL));
|
||||
hsp.push_back(HybridScanParams(".*", 0)); // vacuosity!
|
||||
hsp.push_back(HybridScanParams("\\A(.?.{7,27}jf[tmqq]l(f|t|hgmr.+.fg|abks)){3,7}", 0));
|
||||
hsp.push_back(HybridScanParams("^begin", CH_FLAG_MULTILINE));
|
||||
hsp.push_back(HybridScanParams("match", CH_FLAG_SINGLEMATCH));
|
||||
|
||||
// Single-pattern cases where the pattern isn't supported by hyperscan but
|
||||
// can be prefiltered.
|
||||
hsp.push_back(HybridScanParams("foo(?!bar)", 0));
|
||||
hsp.push_back(HybridScanParams("(sens|respons)e and \\1ibility", 0));
|
||||
|
||||
// A case that can't be prefiltered (as of this writing) because it's too
|
||||
// gosh-darned big. This tests that the hybrid matcher can run without the
|
||||
// multi-matcher (or with a "fake" one).
|
||||
hsp.push_back(HybridScanParams("((c(p|p)h{2,}bh.|p|((((cq|j|c|(\\b)|.[^nbgn]|(\\B)[qfh]a)){10,12}|ih|a|mnde[pa].|.g)){5,8})){3}", 0));
|
||||
|
||||
// Simple multi-pattern literal case.
|
||||
hsp.push_back(HybridScanParams());
|
||||
hsp.back().add("hatstand", 0);
|
||||
hsp.back().add("teakettle", 0);
|
||||
hsp.back().add("badgerbrush", 0);
|
||||
hsp.back().add("mnemosyne", 0);
|
||||
|
||||
// More complex multi-pattern case.
|
||||
hsp.push_back(HybridScanParams());
|
||||
hsp.back().add("foo.{3,7}bar", 0);
|
||||
hsp.back().add("foo.{30,70}bar", 0);
|
||||
hsp.back().add("foobar.*foobar", 0);
|
||||
hsp.back().add("^blingwrapper.*foo", 0);
|
||||
hsp.back().add("[0-9a-f]{70,}\\n", 0);
|
||||
|
||||
// A couple of trivial Unicode patterns, mostly to make sure we accept
|
||||
// the flags.
|
||||
hsp.push_back(HybridScanParams());
|
||||
hsp.back().add("foo.*bar", CH_FLAG_UTF8);
|
||||
hsp.back().add("today", CH_FLAG_UTF8|CH_FLAG_UCP);
|
||||
|
||||
// PCRE exotica.
|
||||
hsp.push_back(HybridScanParams());
|
||||
hsp.back().add("benign literal", 0);
|
||||
hsp.back().add("(?|(abc)|(def))\\1", 0);
|
||||
hsp.back().add("(?|(abc)|(def))(?1)", 0);
|
||||
hsp.back().add("(sens|respons)e and \\1ibility", 0);
|
||||
hsp.back().add("\\w+(?=;)", 0);
|
||||
hsp.back().add("foo(?!bar)", 0);
|
||||
hsp.back().add("(?<=bullock|donkey)", 0);
|
||||
|
||||
return hsp;
|
||||
}
|
||||
|
||||
// Dummy callback.
|
||||
static
|
||||
ch_callback_t dummyHandler(unsigned, unsigned long long, unsigned long long,
|
||||
unsigned, unsigned,const ch_capture_t *, void *) {
|
||||
// empty
|
||||
return CH_CALLBACK_CONTINUE;
|
||||
}
|
||||
|
||||
static
|
||||
void checkGroups(unsigned int num, const ch_capture_t *captured) {
|
||||
// We should have _some_ group info.
|
||||
ASSERT_LT(0U, num);
|
||||
ASSERT_TRUE(captured != nullptr);
|
||||
|
||||
// Group 0 is always active.
|
||||
ASSERT_TRUE(captured[0].flags & CH_CAPTURE_FLAG_ACTIVE);
|
||||
|
||||
// Sanity-checking.
|
||||
for (unsigned int i = 0; i < num; i++) {
|
||||
if (!(captured[i].flags & CH_CAPTURE_FLAG_ACTIVE)) {
|
||||
continue;
|
||||
}
|
||||
ASSERT_LE(captured[i].from, captured[i].to) << "Group " << i
|
||||
<< "not sane.";
|
||||
}
|
||||
}
|
||||
|
||||
// Dummy callback that checks that we had some groups set.
|
||||
static
|
||||
ch_callback_t dummyGroupHandler(unsigned, unsigned long long,
|
||||
unsigned long long, unsigned, unsigned num,
|
||||
const ch_capture_t *captured, void *) {
|
||||
checkGroups(num, captured);
|
||||
return CH_CALLBACK_CONTINUE;
|
||||
}
|
||||
|
||||
class HybridScan : public TestWithParam<tuple<HybridScanParams, bool>> {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
ch_error_t err;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
const HybridScanParams &hsp = get<0>(GetParam());
|
||||
groups = get<1>(GetParam());
|
||||
|
||||
err = ch_compile_ext_multi(hsp.getPatterns(), hsp.getFlags(), nullptr,
|
||||
hsp.size(), groups ? CH_MODE_GROUPS :
|
||||
CH_MODE_NOGROUPS, 10000000, 8000,
|
||||
nullptr, &db, &compile_err);
|
||||
ASSERT_EQ(err, CH_SUCCESS);
|
||||
ASSERT_TRUE(db != nullptr);
|
||||
|
||||
err = ch_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(err, CH_SUCCESS);
|
||||
ASSERT_TRUE(scratch != nullptr);
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
ch_free_database(db);
|
||||
ch_free_scratch(scratch);
|
||||
}
|
||||
|
||||
ch_database_t *db = nullptr;
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
bool groups;
|
||||
};
|
||||
|
||||
static const string SCAN_DATA(
|
||||
"Beware the Jabberwock, my son!\n"
|
||||
"The jaws that bite, the claws that catch!\n"
|
||||
"Beware the Jubjub bird, and shun\n"
|
||||
"The frumious Bandersnatch!\n");
|
||||
|
||||
TEST_P(HybridScan, BuildAndScan) {
|
||||
ASSERT_TRUE(db != nullptr);
|
||||
|
||||
size_t sz;
|
||||
ch_error_t err = ch_database_size(db, &sz);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_LT(16U, sz);
|
||||
|
||||
ch_match_event_handler cb = groups ? dummyGroupHandler : dummyHandler;
|
||||
|
||||
err = ch_scan(db, SCAN_DATA.c_str(), SCAN_DATA.length(), 0,
|
||||
scratch, cb, nullptr, nullptr);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
}
|
||||
|
||||
TEST_P(HybridScan, ScanNearly4KData) {
|
||||
ASSERT_TRUE(db != nullptr);
|
||||
|
||||
string data(4000, '*'); // it's full of stars!
|
||||
|
||||
// Insert some strings that will match a few patterns.
|
||||
data.insert(278, "foo");
|
||||
data.insert(285, "bar");
|
||||
data.insert(1178, "foobar");
|
||||
data.insert(1894, "bar");
|
||||
data.insert(3000, "foobar");
|
||||
|
||||
ch_match_event_handler cb = groups ? dummyGroupHandler : dummyHandler;
|
||||
|
||||
ch_error_t err = ch_scan(db, data.c_str(), data.length(), 0,
|
||||
scratch, cb, nullptr, nullptr);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
}
|
||||
|
||||
TEST_P(HybridScan, ScanBigData) {
|
||||
ASSERT_TRUE(db != nullptr);
|
||||
|
||||
// More than 4MB, as that pushes us into using PCRE for non-Pawn cases.
|
||||
string data(5*1024*1024, '*'); // it's full of stars!
|
||||
|
||||
// Insert some strings that will match a few patterns.
|
||||
data.insert(278, "foo");
|
||||
data.insert(285, "bar");
|
||||
data.insert(1178, "foobar");
|
||||
data.insert(1894, "bar");
|
||||
data.insert(3000, "foobar");
|
||||
|
||||
ch_match_event_handler cb = groups ? dummyGroupHandler : dummyHandler;
|
||||
|
||||
ch_error_t err = ch_scan(db, data.c_str(), data.length(), 0,
|
||||
scratch, cb, nullptr, nullptr);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
}
|
||||
|
||||
TEST_P(HybridScan, ScanClonedScratch) {
|
||||
ASSERT_TRUE(db != nullptr);
|
||||
|
||||
ch_error_t err;
|
||||
ch_scratch_t *clonedScratch = nullptr;
|
||||
err = ch_clone_scratch(scratch, &clonedScratch);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
|
||||
ch_match_event_handler cb = groups ? dummyGroupHandler : dummyHandler;
|
||||
|
||||
err = ch_scan(db, SCAN_DATA.c_str(), SCAN_DATA.length(), 0,
|
||||
clonedScratch, cb, nullptr, nullptr);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
|
||||
ch_free_scratch(clonedScratch);
|
||||
}
|
||||
|
||||
TEST_P(HybridScan, DatabaseInfo) {
|
||||
ASSERT_TRUE(db != nullptr);
|
||||
|
||||
char *info = nullptr;
|
||||
ch_error_t err = ch_database_info(db, &info);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_TRUE(info != nullptr);
|
||||
|
||||
const string strinfo(info);
|
||||
const string prefix("Chimera ");
|
||||
ASSERT_GE(strinfo.size(), prefix.size());
|
||||
ASSERT_EQ(prefix, strinfo.substr(0, prefix.size()));
|
||||
|
||||
free(info);
|
||||
}
|
||||
|
||||
TEST_P(HybridScan, NonZeroScratchSize) {
|
||||
ASSERT_TRUE(db != nullptr);
|
||||
size_t curr_size;
|
||||
ch_error_t err = ch_scratch_size(scratch, &curr_size);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_LT(0, curr_size);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Scan, HybridScan,
|
||||
Combine(ValuesIn(paramFactory()), Bool()));
|
||||
|
||||
// Counting callback that returns CH_CALLBACK_CONTINUE.
|
||||
static
|
||||
ch_callback_t countHandler(unsigned, unsigned long long, unsigned long long,
|
||||
unsigned, unsigned, const ch_capture_t *,
|
||||
void *ctx) {
|
||||
unsigned int *count = (unsigned int *)ctx;
|
||||
++(*count);
|
||||
return CH_CALLBACK_CONTINUE;
|
||||
}
|
||||
|
||||
// Counting callback that returns CH_CALLBACK_SKIP_PATTERN.
|
||||
static
|
||||
ch_callback_t skipHandler(unsigned, unsigned long long, unsigned long long,
|
||||
unsigned, unsigned, const ch_capture_t *,
|
||||
void *ctx) {
|
||||
unsigned int *count = (unsigned int *)ctx;
|
||||
++(*count);
|
||||
return CH_CALLBACK_SKIP_PATTERN;
|
||||
}
|
||||
|
||||
// Counting callback that returns CH_CALLBACK_TERMINATE.
|
||||
static
|
||||
ch_callback_t terminateHandler(unsigned, unsigned long long, unsigned long long,
|
||||
unsigned, unsigned, const ch_capture_t *,
|
||||
void *ctx) {
|
||||
unsigned int *count = (unsigned int *)ctx;
|
||||
++(*count);
|
||||
return CH_CALLBACK_TERMINATE;
|
||||
}
|
||||
|
||||
static
|
||||
void makeDatabase(ch_database_t **db, const char * const expr[], size_t num) {
|
||||
*db = nullptr;
|
||||
ch_compile_error_t *compile_err = nullptr;
|
||||
ch_error_t err = ch_compile_ext_multi(expr, nullptr, nullptr, num, 0,
|
||||
10000000, 8000, nullptr, db,
|
||||
&compile_err);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_TRUE(*db != nullptr);
|
||||
}
|
||||
|
||||
struct RescanContext {
|
||||
RescanContext(const ch_database_t *db_in, ch_scratch_t *scratch_in)
|
||||
: db(db_in), scratch(scratch_in) {}
|
||||
const ch_database_t *db;
|
||||
ch_scratch_t *scratch;
|
||||
size_t matches = 0;
|
||||
};
|
||||
|
||||
static
|
||||
int rescan_block_cb(unsigned, unsigned long long, unsigned long long, unsigned,
|
||||
unsigned, const ch_capture_t *, void *ctx) {
|
||||
RescanContext *rctx = (RescanContext *)ctx;
|
||||
rctx->matches++;
|
||||
|
||||
const string data = "___foo___bar_";
|
||||
|
||||
hs_error_t err = ch_scan(rctx->db, data.c_str(), data.length(), 0,
|
||||
rctx->scratch, nullptr, nullptr, nullptr);
|
||||
EXPECT_EQ(CH_SCRATCH_IN_USE, err);
|
||||
return 0;
|
||||
}
|
||||
|
||||
TEST(Scan, ScratchInUse) {
|
||||
static const char * const expr[] = { "foo.*bar" };
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db, expr, 1);
|
||||
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
ch_error_t err = ch_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_TRUE(scratch != nullptr);
|
||||
|
||||
RescanContext rc(db, scratch);
|
||||
|
||||
const string data("___foo___bar_");
|
||||
err = ch_scan(db, data.c_str(), data.length(), 0,
|
||||
scratch, rescan_block_cb, 0, &rc);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_EQ(1U, rc.matches);
|
||||
|
||||
ch_free_scratch(scratch);
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
TEST(Scan, CallbackSkip1) {
|
||||
static const char * const expr[] = { "." };
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db, expr, 1);
|
||||
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
ch_error_t err = ch_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_TRUE(scratch != nullptr);
|
||||
|
||||
unsigned int count = 0;
|
||||
const string data("qwertyuiop");
|
||||
err = ch_scan(db, data.c_str(), data.length(), 0,
|
||||
scratch, skipHandler, 0, &count);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_EQ(1U, count);
|
||||
|
||||
ch_free_scratch(scratch);
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
TEST(Scan, CallbackSkip2) {
|
||||
static const char * const expr[] = { "[a-z]+", "[0-9]" };
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db, expr, 2);
|
||||
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
ch_error_t err = ch_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_TRUE(scratch != nullptr);
|
||||
|
||||
unsigned int count = 0;
|
||||
const string data("foo 0123 0 bar 39483 n34jfhlqekrcoi3q4");
|
||||
err = ch_scan(db, data.c_str(), data.length(), 0,
|
||||
scratch, skipHandler, 0, &count);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_EQ(2U, count); // both patterns should match once
|
||||
|
||||
ch_free_scratch(scratch);
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
// This case includes a pattern that we use libpcre for.
|
||||
TEST(Scan, CallbackSkip3) {
|
||||
static const char * const expr[] = { "[a-z]+", "foo(?!bar)" };
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db, expr, 2);
|
||||
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
ch_error_t err = ch_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_TRUE(scratch != nullptr);
|
||||
|
||||
unsigned int count = 0;
|
||||
const string data("foobaz foobing foobar");
|
||||
err = ch_scan(db, data.c_str(), data.length(), 0,
|
||||
scratch, skipHandler, 0, &count);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_EQ(2U, count); // both patterns should match once
|
||||
|
||||
ch_free_scratch(scratch);
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
TEST(Scan, CallbackNoSkip1) {
|
||||
static const char * const expr[] = { "foo|bar", "[0-9]{3}" };
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db, expr, 2);
|
||||
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
ch_error_t err = ch_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_TRUE(scratch != nullptr);
|
||||
|
||||
unsigned int count = 0;
|
||||
const string data("foo 012 bar 345 foobar 678");
|
||||
err = ch_scan(db, data.c_str(), data.length(), 0,
|
||||
scratch, countHandler, 0, &count);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_EQ(7U, count); // seven matches in total
|
||||
|
||||
ch_free_scratch(scratch);
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
TEST(Scan, CallbackNoSkip2) {
|
||||
static const char * const expr[] = { "foo(?!bar)", "[0-9]{3}" };
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db, expr, 2);
|
||||
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
ch_error_t err = ch_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_TRUE(scratch != nullptr);
|
||||
|
||||
unsigned int count = 0;
|
||||
const string data("foo 012 bar 345 foobar 678");
|
||||
err = ch_scan(db, data.c_str(), data.length(), 0,
|
||||
scratch, countHandler, 0, &count);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_EQ(4U, count); // four matches in total
|
||||
|
||||
ch_free_scratch(scratch);
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
TEST(Scan, CallbackTerm1) {
|
||||
static const char * const expr[] = { "." };
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db, expr, 1);
|
||||
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
ch_error_t err = ch_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_TRUE(scratch != nullptr);
|
||||
|
||||
unsigned int count = 0;
|
||||
const string data("qwertyuiop");
|
||||
err = ch_scan(db, data.c_str(), data.length(), 0,
|
||||
scratch, terminateHandler, 0, &count);
|
||||
ASSERT_EQ(CH_SCAN_TERMINATED, err);
|
||||
ASSERT_EQ(1U, count);
|
||||
|
||||
ch_free_scratch(scratch);
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
TEST(Scan, CallbackTerm2) {
|
||||
static const char * const expr[] = { "[a-z]+", "[0-9]" };
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db, expr, 2);
|
||||
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
ch_error_t err = ch_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_TRUE(scratch != 0);
|
||||
|
||||
unsigned int count = 0;
|
||||
const string data("foo 0123 0 bar 39483 n34jfhlqekrcoi3q4");
|
||||
err = ch_scan(db, data.c_str(), data.length(), 0,
|
||||
scratch, terminateHandler, 0, &count);
|
||||
ASSERT_EQ(CH_SCAN_TERMINATED, err);
|
||||
ASSERT_EQ(1U, count);
|
||||
|
||||
ch_free_scratch(scratch);
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
// This case includes a pattern that we use libpcre for.
|
||||
TEST(Scan, CallbackTerm3) {
|
||||
static const char * const expr[] = { "[a-z]+", "foo(?!bar)" };
|
||||
ch_database_t *db = nullptr;
|
||||
makeDatabase(&db, expr, 2);
|
||||
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
ch_error_t err = ch_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(CH_SUCCESS, err);
|
||||
ASSERT_TRUE(scratch != nullptr);
|
||||
|
||||
unsigned int count = 0;
|
||||
const string data("foobaz foobing foobar");
|
||||
err = ch_scan(db, data.c_str(), data.length(), 0,
|
||||
scratch, terminateHandler, 0, &count);
|
||||
ASSERT_EQ(CH_SCAN_TERMINATED, err);
|
||||
ASSERT_EQ(1U, count);
|
||||
|
||||
ch_free_scratch(scratch);
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
} // namespace
|
Loading…
x
Reference in New Issue
Block a user