Merge branch develop to master

This commit is contained in:
Wang, Xiang W 2018-07-09 12:45:44 -04:00
commit 2060dd3a9c
138 changed files with 11646 additions and 721 deletions

View File

@ -2,6 +2,18 @@
This is a list of notable changes to Hyperscan, in reverse chronological order. This is a list of notable changes to Hyperscan, in reverse chronological order.
## [5.0.0] 2018-07-09
- Introduce chimera hybrid engine of Hyperscan and PCRE, to fully support
PCRE syntax as well as to take advantage of the high performance nature of
Hyperscan.
- New API feature: logical combinations (AND, OR and NOT) of patterns in a
given pattern set.
- Windows porting: hsbench, hscheck, hscollider and hsdump tools now available
on Windows 8 or newer.
- Improve undirected graph implementation to avoid graph copy and reduce
compile time.
- Bugfix for issue #86: enable hscollider for installed PCRE package.
## [4.7.0] 2018-01-24 ## [4.7.0] 2018-01-24
- Introduced hscollider pattern testing tool, for validating Hyperscan match - Introduced hscollider pattern testing tool, for validating Hyperscan match
behaviour against PCRE. behaviour against PCRE.

View File

@ -1,8 +1,8 @@
cmake_minimum_required (VERSION 2.8.11) cmake_minimum_required (VERSION 2.8.11)
project (hyperscan C CXX) project (hyperscan C CXX)
set (HS_MAJOR_VERSION 4) set (HS_MAJOR_VERSION 5)
set (HS_MINOR_VERSION 7) set (HS_MINOR_VERSION 0)
set (HS_PATCH_VERSION 0) set (HS_PATCH_VERSION 0)
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
@ -154,7 +154,7 @@ if(MSVC OR MSVC_IDE)
# todo: change these as required # todo: change these as required
set(ARCH_C_FLAGS "/arch:AVX2") set(ARCH_C_FLAGS "/arch:AVX2")
set(ARCH_CXX_FLAGS "/arch:AVX2") set(ARCH_CXX_FLAGS "/arch:AVX2")
set(MSVC_WARNS "/wd4101 /wd4146 /wd4172 /wd4200 /wd4244 /wd4267 /wd4307 /wd4334 /wd4805 -D_CRT_SECURE_NO_WARNINGS") set(MSVC_WARNS "/wd4101 /wd4146 /wd4172 /wd4200 /wd4244 /wd4267 /wd4307 /wd4334 /wd4805 /wd4996 -D_CRT_SECURE_NO_WARNINGS")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 ${MSVC_WARNS}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 ${MSVC_WARNS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 ${MSVC_WARNS} /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 ${MSVC_WARNS} /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD")
endif() endif()
@ -446,11 +446,32 @@ else()
endif() endif()
add_subdirectory(util) add_subdirectory(util)
add_subdirectory(unit)
add_subdirectory(doc/dev-reference) add_subdirectory(doc/dev-reference)
if (NOT WIN32)
# PCRE check, we have a fixed requirement for PCRE to use Chimera
# and hscollider
set(PCRE_REQUIRED_MAJOR_VERSION 8)
set(PCRE_REQUIRED_MINOR_VERSION 41)
set(PCRE_REQUIRED_VERSION ${PCRE_REQUIRED_MAJOR_VERSION}.${PCRE_REQUIRED_MINOR_VERSION})
include (${CMAKE_MODULE_PATH}/pcre.cmake)
if (NOT CORRECT_PCRE_VERSION)
message(STATUS "PCRE ${PCRE_REQUIRED_VERSION} not found")
endif()
# we need static libs for Chimera - too much deep magic for shared libs
if (CORRECT_PCRE_VERSION AND PCRE_BUILD_SOURCE AND BUILD_STATIC_LIBS)
set(BUILD_CHIMERA TRUE)
endif()
add_subdirectory(unit)
if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt) if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt)
add_subdirectory(tools) add_subdirectory(tools)
endif() endif()
if (EXISTS ${CMAKE_SOURCE_DIR}/chimera/CMakeLists.txt AND BUILD_CHIMERA)
add_subdirectory(chimera)
endif()
endif()
# do substitutions # do substitutions
configure_file(${CMAKE_MODULE_PATH}/config.h.in ${PROJECT_BINARY_DIR}/config.h) configure_file(${CMAKE_MODULE_PATH}/config.h.in ${PROJECT_BINARY_DIR}/config.h)
@ -479,6 +500,31 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
endif() endif()
if (WIN32)
# PCRE check, we have a fixed requirement for PCRE to use Chimera
# and hscollider
set(PCRE_REQUIRED_MAJOR_VERSION 8)
set(PCRE_REQUIRED_MINOR_VERSION 41)
set(PCRE_REQUIRED_VERSION ${PCRE_REQUIRED_MAJOR_VERSION}.${PCRE_REQUIRED_MINOR_VERSION})
include (${CMAKE_MODULE_PATH}/pcre.cmake)
if (NOT CORRECT_PCRE_VERSION)
message(STATUS "PCRE ${PCRE_REQUIRED_VERSION} not found")
endif()
# we need static libs for Chimera - too much deep magic for shared libs
if (CORRECT_PCRE_VERSION AND PCRE_BUILD_SOURCE AND BUILD_STATIC_LIBS)
set(BUILD_CHIMERA TRUE)
endif()
add_subdirectory(unit)
if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt)
add_subdirectory(tools)
endif()
if (EXISTS ${CMAKE_SOURCE_DIR}/chimera/CMakeLists.txt AND BUILD_CHIMERA)
add_subdirectory(chimera)
endif()
endif()
if(NOT WIN32) if(NOT WIN32)
set(RAGEL_C_FLAGS "-Wno-unused") set(RAGEL_C_FLAGS "-Wno-unused")
endif() endif()
@ -860,7 +906,6 @@ SET (hs_compile_SRCS
src/nfagraph/ng_stop.h src/nfagraph/ng_stop.h
src/nfagraph/ng_uncalc_components.cpp src/nfagraph/ng_uncalc_components.cpp
src/nfagraph/ng_uncalc_components.h src/nfagraph/ng_uncalc_components.h
src/nfagraph/ng_undirected.h
src/nfagraph/ng_utf8.cpp src/nfagraph/ng_utf8.cpp
src/nfagraph/ng_utf8.h src/nfagraph/ng_utf8.h
src/nfagraph/ng_util.cpp src/nfagraph/ng_util.cpp
@ -915,6 +960,8 @@ SET (hs_compile_SRCS
src/parser/check_refs.h src/parser/check_refs.h
src/parser/control_verbs.cpp src/parser/control_verbs.cpp
src/parser/control_verbs.h src/parser/control_verbs.h
src/parser/logical_combination.cpp
src/parser/logical_combination.h
src/parser/parse_error.cpp src/parser/parse_error.cpp
src/parser/parse_error.h src/parser/parse_error.h
src/parser/parser_util.cpp src/parser/parser_util.cpp
@ -1014,6 +1061,7 @@ SET (hs_compile_SRCS
src/util/graph.h src/util/graph.h
src/util/graph_range.h src/util/graph_range.h
src/util/graph_small_color_map.h src/util/graph_small_color_map.h
src/util/graph_undirected.h
src/util/hash.h src/util/hash.h
src/util/hash_dynamic_bitset.h src/util/hash_dynamic_bitset.h
src/util/insertion_ordered.h src/util/insertion_ordered.h

49
chimera/CMakeLists.txt Normal file
View File

@ -0,0 +1,49 @@
# Chimera lib
include_directories(${PCRE_INCLUDE_DIRS})
# only set these after all tests are done
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
SET(chimera_HEADERS
ch.h
ch_common.h
ch_compile.h
ch_runtime.h
)
install(FILES ${chimera_HEADERS} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/hs")
SET(chimera_SRCS
${chimera_HEADERS}
ch_alloc.c
ch_alloc.h
ch_compile.cpp
ch_database.c
ch_database.h
ch_internal.h
ch_runtime.c
ch_scratch.h
ch_scratch.c
)
add_library(chimera STATIC ${chimera_SRCS})
add_dependencies(chimera hs pcre)
target_link_libraries(chimera hs pcre)
install(TARGETS chimera DESTINATION ${CMAKE_INSTALL_LIBDIR})
if (NOT WIN32)
# expand out library names for pkgconfig static link info
foreach (LIB ${CMAKE_CXX_IMPLICIT_LINK_LIBRARIES})
# this is fragile, but protects us from toolchain specific files
if (NOT EXISTS ${LIB})
set(PRIVATE_LIBS "${PRIVATE_LIBS} -l${LIB}")
endif()
endforeach()
set(PRIVATE_LIBS "${PRIVATE_LIBS} -L${LIBDIR} -lpcre")
configure_file(libch.pc.in libch.pc @ONLY) # only replace @ quoted vars
install(FILES ${CMAKE_BINARY_DIR}/chimera/libch.pc
DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
endif()

45
chimera/ch.h Normal file
View File

@ -0,0 +1,45 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CH_H_
#define CH_H_
/**
* @file
* @brief The complete Chimera API definition.
*
* Chimera is a hybrid solution of Hyperscan and PCRE.
*
* This header includes both the Chimera compiler and runtime components. See
* the individual component headers for documentation.
*/
#include "ch_compile.h"
#include "ch_runtime.h"
#endif /* CH_H_ */

109
chimera/ch_alloc.c Normal file
View File

@ -0,0 +1,109 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Runtime functions for setting custom allocators.
*/
#include "ch.h"
#include "ch_common.h"
#include "ch_internal.h"
#include "hs.h"
#include "ue2common.h"
#define default_malloc malloc
#define default_free free
ch_alloc_t ch_database_alloc = default_malloc;
ch_alloc_t ch_misc_alloc = default_malloc;
ch_alloc_t ch_scratch_alloc = default_malloc;
ch_free_t ch_database_free = default_free;
ch_free_t ch_misc_free = default_free;
ch_free_t ch_scratch_free = default_free;
static
ch_alloc_t normalise_alloc(ch_alloc_t a) {
if (!a) {
return default_malloc;
} else {
return a;
}
}
static
ch_free_t normalise_free(ch_free_t f) {
if (!f) {
return default_free;
} else {
return f;
}
}
HS_PUBLIC_API
ch_error_t HS_CDECL ch_set_allocator(ch_alloc_t allocfunc,
ch_free_t freefunc) {
ch_set_database_allocator(allocfunc, freefunc);
ch_set_misc_allocator(allocfunc, freefunc);
ch_set_scratch_allocator(allocfunc, freefunc);
// Set core Hyperscan alloc/free.
hs_error_t ret = hs_set_allocator(allocfunc, freefunc);
return ret;
}
HS_PUBLIC_API
ch_error_t HS_CDECL ch_set_database_allocator(ch_alloc_t allocfunc,
ch_free_t freefunc) {
ch_database_alloc = normalise_alloc(allocfunc);
ch_database_free = normalise_free(freefunc);
// Set Hyperscan database alloc/free.
return hs_set_database_allocator(allocfunc, freefunc);
}
HS_PUBLIC_API
ch_error_t HS_CDECL ch_set_misc_allocator(ch_alloc_t allocfunc,
ch_free_t freefunc) {
ch_misc_alloc = normalise_alloc(allocfunc);
ch_misc_free = normalise_free(freefunc);
// Set Hyperscan misc alloc/free.
return hs_set_misc_allocator(allocfunc, freefunc);
}
HS_PUBLIC_API
ch_error_t HS_CDECL ch_set_scratch_allocator(ch_alloc_t allocfunc,
ch_free_t freefunc) {
ch_scratch_alloc = normalise_alloc(allocfunc);
ch_scratch_free = normalise_free(freefunc);
// Set Hyperscan scratch alloc/free.
return hs_set_scratch_allocator(allocfunc, freefunc);
}

65
chimera/ch_alloc.h Normal file
View File

@ -0,0 +1,65 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CH_ALLOC_H
#define CH_ALLOC_H
#include "hs_common.h"
#include "ue2common.h"
#include "ch_common.h"
#ifdef __cplusplus
extern "C"
{
#endif
extern hs_alloc_t ch_database_alloc;
extern hs_alloc_t ch_misc_alloc;
extern hs_alloc_t ch_scratch_alloc;
extern hs_free_t ch_database_free;
extern hs_free_t ch_misc_free;
extern hs_free_t ch_scratch_free;
#ifdef __cplusplus
} /* extern C */
#endif
/** \brief Check the results of an alloc done with hs_alloc for alignment.
*
* If we have incorrect alignment, return an error. Caller should free the
* offending block. */
static really_inline
ch_error_t ch_check_alloc(const void *mem) {
ch_error_t ret = CH_SUCCESS;
if (!mem) {
ret = CH_NOMEM;
} else if (!ISALIGNED_N(mem, alignof(unsigned long long))) {
ret = CH_BAD_ALLOC;
}
return ret;
}
#endif

360
chimera/ch_common.h Normal file
View File

@ -0,0 +1,360 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CH_COMMON_H_
#define CH_COMMON_H_
#include "hs_common.h"
#include <stdlib.h>
/**
* @file
* @brief The Chimera common API definition.
*
* Chimera is a hybrid of Hyperscan and PCRE.
*
* This header contains functions available to both the Chimera compiler and
* runtime.
*/
#ifdef __cplusplus
extern "C"
{
#endif
struct ch_database;
/**
* A Chimera pattern database.
*
* Generated by one of the Chimera compiler functions:
* - @ref ch_compile()
* - @ref ch_compile_multi()
* - @ref ch_compile_ext_multi()
*/
typedef struct ch_database ch_database_t;
/**
* A type for errors returned by Chimera functions.
*/
typedef int ch_error_t;
/**
* Free a compiled pattern database.
*
* The free callback set by @ref ch_set_allocator()) will be used by this
* function.
*
* @param db
* A compiled pattern database. NULL may also be safely provided, in which
* case the function does nothing.
*
* @return
* @ref CH_SUCCESS on success, other values on failure.
*/
ch_error_t HS_CDECL ch_free_database(ch_database_t *db);
/**
* Utility function for identifying this release version.
*
* @return
* A string containing the version number of this release build and the
* date of the build. It is allocated statically, so it does not need to
* be freed by the caller.
*/
const char * HS_CDECL ch_version(void);
/**
* Returns the size of the given database.
*
* @param database
* Pointer to compiled expression database.
*
* @param database_size
* On success, the size of the compiled database in bytes is placed in this
* parameter.
*
* @return
* @ref CH_SUCCESS on success, other values on failure.
*/
ch_error_t HS_CDECL ch_database_size(const ch_database_t *database,
size_t *database_size);
/**
* Utility function providing information about a database.
*
* @param database
* Pointer to a compiled database.
*
* @param info
* On success, a string containing the version and platform information for
* the supplied database is placed in the parameter. The string is
* allocated using the allocator supplied in @ref hs_set_allocator()
* (or malloc() if no allocator was set) and should be freed by the caller.
*
* @return
* @ref CH_SUCCESS on success, other values on failure.
*/
ch_error_t HS_CDECL ch_database_info(const ch_database_t *database,
char **info);
/**
* The type of the callback function that will be used by Chimera to allocate
* more memory at runtime as required.
*
* If Chimera is to be used in a multi-threaded, or similarly concurrent
* environment, the allocation function will need to be re-entrant, or
* similarly safe for concurrent use.
*
* @param size
* The number of bytes to allocate.
* @return
* A pointer to the region of memory allocated, or NULL on error.
*/
typedef void *(HS_CDECL *ch_alloc_t)(size_t size);
/**
* The type of the callback function that will be used by Chimera to free
* memory regions previously allocated using the @ref ch_alloc_t function.
*
* @param ptr
* The region of memory to be freed.
*/
typedef void (HS_CDECL *ch_free_t)(void *ptr);
/**
* Set the allocate and free functions used by Chimera for allocating
* memory at runtime for stream state, scratch space, database bytecode,
* and various other data structure returned by the Chimera API.
*
* The function is equivalent to calling @ref ch_set_scratch_allocator(),
* @ref ch_set_database_allocator() and
* @ref ch_set_misc_allocator() with the provided parameters.
*
* This call will override any previous allocators that have been set.
*
* Note: there is no way to change the allocator used for temporary objects
* created during the various compile calls (@ref ch_compile() and @ref
* ch_compile_multi()).
*
* @param alloc_func
* A callback function pointer that allocates memory. This function must
* return memory suitably aligned for the largest representable data type
* on this platform.
*
* @param free_func
* A callback function pointer that frees allocated memory.
*
* @return
* @ref CH_SUCCESS on success, other values on failure.
*/
ch_error_t HS_CDECL ch_set_allocator(ch_alloc_t alloc_func,
ch_free_t free_func);
/**
* Set the allocate and free functions used by Chimera for allocating memory
* for database bytecode produced by the compile calls (@ref ch_compile() and @ref
* ch_compile_multi()).
*
* If no database allocation functions are set, or if NULL is used in place of
* both parameters, then memory allocation will default to standard methods
* (such as the system malloc() and free() calls).
*
* This call will override any previous database allocators that have been set.
*
* Note: the database allocator may also be set by calling @ref
* ch_set_allocator().
*
* Note: there is no way to change how temporary objects created during the
* various compile calls (@ref ch_compile() and @ref ch_compile_multi()) are
* allocated.
*
* @param alloc_func
* A callback function pointer that allocates memory. This function must
* return memory suitably aligned for the largest representable data type
* on this platform.
*
* @param free_func
* A callback function pointer that frees allocated memory.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
ch_error_t HS_CDECL ch_set_database_allocator(ch_alloc_t alloc_func,
ch_free_t free_func);
/**
* Set the allocate and free functions used by Chimera for allocating memory
* for items returned by the Chimera API such as @ref ch_compile_error_t.
*
* If no misc allocation functions are set, or if NULL is used in place of both
* parameters, then memory allocation will default to standard methods (such as
* the system malloc() and free() calls).
*
* This call will override any previous misc allocators that have been set.
*
* Note: the misc allocator may also be set by calling @ref ch_set_allocator().
*
* @param alloc_func
* A callback function pointer that allocates memory. This function must
* return memory suitably aligned for the largest representable data type
* on this platform.
*
* @param free_func
* A callback function pointer that frees allocated memory.
*
* @return
* @ref CH_SUCCESS on success, other values on failure.
*/
ch_error_t HS_CDECL ch_set_misc_allocator(ch_alloc_t alloc_func,
ch_free_t free_func);
/**
* Set the allocate and free functions used by Chimera for allocating memory
* for scratch space by @ref ch_alloc_scratch() and @ref ch_clone_scratch().
*
* If no scratch allocation functions are set, or if NULL is used in place of
* both parameters, then memory allocation will default to standard methods
* (such as the system malloc() and free() calls).
*
* This call will override any previous scratch allocators that have been set.
*
* Note: the scratch allocator may also be set by calling @ref
* ch_set_allocator().
*
* @param alloc_func
* A callback function pointer that allocates memory. This function must
* return memory suitably aligned for the largest representable data type
* on this platform.
*
* @param free_func
* A callback function pointer that frees allocated memory.
*
* @return
* @ref CH_SUCCESS on success, other values on failure.
*/
ch_error_t HS_CDECL ch_set_scratch_allocator(ch_alloc_t alloc_func,
ch_free_t free_func);
/**
* @defgroup CH_ERROR ch_error_t values
*
* @{
*/
/**
* The engine completed normally.
*/
#define CH_SUCCESS 0
/**
* A parameter passed to this function was invalid.
*/
#define CH_INVALID (-1)
/**
* A memory allocation failed.
*/
#define CH_NOMEM (-2)
/**
* The engine was terminated by callback.
*
* This return value indicates that the target buffer was partially scanned,
* but that the callback function requested that scanning cease after a match
* was located.
*/
#define CH_SCAN_TERMINATED (-3)
/**
* The pattern compiler failed, and the @ref ch_compile_error_t should be
* inspected for more detail.
*/
#define CH_COMPILER_ERROR (-4)
/**
* The given database was built for a different version of the Chimera matcher.
*/
#define CH_DB_VERSION_ERROR (-5)
/**
* The given database was built for a different platform (i.e., CPU type).
*/
#define CH_DB_PLATFORM_ERROR (-6)
/**
* The given database was built for a different mode of operation. This error
* is returned when streaming calls are used with a non-streaming database and
* vice versa.
*/
#define CH_DB_MODE_ERROR (-7)
/**
* A parameter passed to this function was not correctly aligned.
*/
#define CH_BAD_ALIGN (-8)
/**
* The memory allocator did not correctly return memory suitably aligned for
* the largest representable data type on this platform.
*/
#define CH_BAD_ALLOC (-9)
/**
* The scratch region was already in use.
*
* This error is returned when Chimera is able to detect that the scratch
* region given is already in use by another Chimera API call.
*
* A separate scratch region, allocated with @ref ch_alloc_scratch() or @ref
* ch_clone_scratch(), is required for every concurrent caller of the Chimera
* API.
*
* For example, this error might be returned when @ref ch_scan() has been
* called inside a callback delivered by a currently-executing @ref ch_scan()
* call using the same scratch region.
*
* Note: Not all concurrent uses of scratch regions may be detected. This error
* is intended as a best-effort debugging tool, not a guarantee.
*/
#define CH_SCRATCH_IN_USE (-10)
/**
* Returned when pcre_exec (called for some expressions internally from @ref
* ch_scan) failed due to a fatal error.
*/
#define CH_FAIL_INTERNAL (-32)
/** @} */
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* CH_COMMON_H_ */

878
chimera/ch_compile.cpp Normal file
View File

@ -0,0 +1,878 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Compiler front-end, including public API calls for compilation.
*/
#include "ch_compile.h"
#include "ch_alloc.h"
#include "ch_internal.h"
#include "ch_database.h"
#include "grey.h"
#include "hs_common.h"
#include "hs_internal.h"
#include "ue2common.h"
#include "util/compile_error.h"
#include "util/make_unique.h"
#include "util/multibit_build.h"
#include "util/target_info.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstring>
#include <memory>
#include <ostream>
#include <sstream>
#include <limits.h>
#include <string>
#include <vector>
#include <boost/core/noncopyable.hpp>
#define PCRE_ERROR_MSG "Internal error building PCRE pattern."
using namespace std;
using namespace ue2;
static const char failureNoMemory[] = "Unable to allocate memory.";
static const char failureInternal[] = "Internal error.";
static const char failureBadAlloc[] = "Allocator returned misaligned memory.";
static const ch_compile_error_t ch_enomem
= { const_cast<char *>(failureNoMemory), 0 };
static const ch_compile_error_t ch_einternal
= { const_cast<char *>(failureInternal), 0 };
static const ch_compile_error_t ch_badalloc
= { const_cast<char *>(failureBadAlloc), 0 };
static
ch_compile_error_t *generateChimeraCompileError(const string &err,
int expression) {
ch_compile_error_t *ret =
(struct ch_compile_error *)ch_misc_alloc(sizeof(ch_compile_error_t));
if (ret) {
ch_error_t e = ch_check_alloc(ret);
if (e != CH_SUCCESS) {
ch_misc_free(ret);
return const_cast<ch_compile_error_t *>(&ch_badalloc);
}
char *msg = (char *)ch_misc_alloc(err.size() + 1);
if (msg) {
e = ch_check_alloc(msg);
if (e != HS_SUCCESS) {
ch_misc_free(msg);
return const_cast<ch_compile_error_t *>(&ch_badalloc);
}
memcpy(msg, err.c_str(), err.size() + 1);
ret->message = msg;
} else {
ch_misc_free(ret);
ret = nullptr;
}
}
if (!ret || !ret->message) {
return const_cast<ch_compile_error_t *>(&ch_enomem);
}
ret->expression = expression;
return ret;
}
static
void freeChimeraCompileError(ch_compile_error_t *error) {
if (!error) {
return;
}
if (error == &ch_enomem || error == &ch_einternal ||
error == &ch_badalloc) {
// These are not allocated.
return;
}
ch_misc_free(error->message);
ch_misc_free(error);
}
static
bool checkMode(unsigned int mode, ch_compile_error_t **comp_error) {
static const unsigned int supported = CH_MODE_GROUPS;
if (mode & ~supported) {
*comp_error =
generateChimeraCompileError("Invalid mode flag supplied.", -1);
return false;
}
return true;
}
/** \brief Throw a compile error if we're passed some unsupported flags. */
static
void checkFlags(const unsigned int flags) {
static const unsigned int supported = HS_FLAG_DOTALL
| HS_FLAG_MULTILINE
| HS_FLAG_CASELESS
| HS_FLAG_SINGLEMATCH
| HS_FLAG_UCP
| HS_FLAG_UTF8;
if (flags & ~supported) {
throw CompileError("Unrecognized flag used.");
}
}
static
bool isHyperscanSupported(const char *expression, unsigned int flags,
const hs_platform_info *platform) {
hs_database_t *db = nullptr;
hs_compile_error *comp_error = nullptr;
unsigned int id = 0;
hs_error_t err = hs_compile_multi(&expression, &flags, &id,
1, HS_MODE_BLOCK, platform, &db,
&comp_error);
if (err != HS_SUCCESS) {
assert(!db);
assert(comp_error);
DEBUG_PRINTF("unsupported: %s\n", comp_error->message);
hs_free_compile_error(comp_error);
return false;
}
assert(db);
assert(!comp_error);
hs_free_database(db);
return true;
}
static
bool writeHyperscanDatabase(char *ptr, hs_database_t *db) {
// Note: we must use our serialization calls to re-home the database.
char *serialized = nullptr;
size_t slen = 0;
hs_error_t err = hs_serialize_database(db, &serialized, &slen);
if (err != HS_SUCCESS) {
DEBUG_PRINTF("hs_serialize_database returned %d\n", err);
assert(0);
return false;
}
DEBUG_PRINTF("writing database to ptr %p\n", ptr);
// deserialize_at without the platform tests.
err = hs_deserialize_database_at(serialized, slen, (hs_database_t *)ptr);
if (err != HS_SUCCESS) {
DEBUG_PRINTF("hs_deserialize_database_at returned %d\n", err);
assert(0);
ch_misc_free(serialized);
return false;
}
ch_misc_free(serialized);
return true;
}
static
bool writeHyperscanDatabase(ch_bytecode *db, hs_database_t *hs_db) {
db->databaseOffset = ROUNDUP_CL(sizeof(*db));
char *ptr = (char *)db + db->databaseOffset;
return writeHyperscanDatabase(ptr, hs_db);
}
static
int convertFlagsToPcreOptions(unsigned int flags) {
int options = 0;
if (flags & HS_FLAG_CASELESS) {
options |= PCRE_CASELESS;
}
if (flags & HS_FLAG_DOTALL) {
options |= PCRE_DOTALL;
}
if (flags & HS_FLAG_MULTILINE) {
options |= PCRE_MULTILINE;
}
if (flags & HS_FLAG_UTF8) {
options |= PCRE_UTF8;
}
if (flags & HS_FLAG_UCP) {
options |= PCRE_UCP;
}
// All other flags are meaningless to PCRE.
return options;
}
namespace {
/** \brief Data about a single pattern. */
struct PatternData : boost::noncopyable {
PatternData(const char *pattern, u32 flags, u32 idx, u32 id_in,
unsigned mode, unsigned long int match_limit,
unsigned long int match_limit_recursion,
const hs_platform_info *platform);
~PatternData() {
pcre_free(compiled);
pcre_free(extra);
}
void buildPcre(const char *pattern, u32 flags);
size_t patternSize() const;
void writePattern(ch_pattern *pattern) const;
pcre *compiled; //!< pcre_compile output
pcre_extra *extra; //!< pcre_study output
size_t compiled_size;
int study_size;
int capture_cnt;
bool utf8;
u32 id; //!< ID from the user
u32 expr_index; //!< index in the expression array
bool singlematch; //!< pattern is in highlander mode
bool guard; //!< this pattern should be guarded by the multimatcher
u32 minWidth; //!< min match width
u32 maxWidth; //!< max match width
u32 fixedWidth; //!< fixed pattern width
unsigned long int matchLimit; //! pcre match limit
unsigned long int matchLimitRecursion; //! pcre match_limit_recursion
};
PatternData::PatternData(const char *pattern, u32 flags, u32 idx, u32 id_in,
unsigned mode, unsigned long int match_limit,
unsigned long int match_limit_recursion,
const hs_platform_info *platform)
: compiled(nullptr), extra(nullptr), id(id_in), expr_index(idx),
singlematch(flags & HS_FLAG_SINGLEMATCH),
guard(false), minWidth(0), maxWidth(UINT_MAX),
fixedWidth(UINT_MAX), matchLimit(match_limit),
matchLimitRecursion(match_limit_recursion) {
assert(pattern);
flags |= HS_FLAG_ALLOWEMPTY; /* don't hand things off to pcre for no
reason */
buildPcre(pattern, flags);
// Fetch the expression info for a prefiltering, non-singlematch version of
// this pattern, if possible.
hs_expr_info *info = nullptr;
hs_compile_error_t *error = nullptr;
u32 infoflags = (flags | HS_FLAG_PREFILTER) & ~HS_FLAG_SINGLEMATCH;
u32 rawflags = (flags | HS_FLAG_SOM_LEFTMOST) & ~HS_FLAG_SINGLEMATCH;
hs_error_t err = hs_expression_info(pattern, infoflags, &info, &error);
if (err == HS_SUCCESS) {
assert(info);
hs_expr_info *i = (hs_expr_info *)info;
minWidth = i->min_width;
maxWidth = i->max_width;
bool ordered = i->unordered_matches ? false : true;
// Only enable capturing if required
u32 captureCnt = 0;
if (mode & CH_MODE_GROUPS) {
captureCnt = capture_cnt;
}
// No need to confirm with PCRE if:
// 1) pattern is fixed width
// 2) pattern isn't vacuous as it can't combine with start of match
// 3) no capturing in this pattern
// 4) no offset adjust in this pattern as hyperscan match callback
// will arrive without order, i.e. [^a]\z has offset adjust
// 5) hyperscan compile succeeds without prefiltering
if (minWidth == maxWidth && minWidth && maxWidth != UINT_MAX &&
!captureCnt && ordered &&
isHyperscanSupported(pattern, rawflags, platform)) {
fixedWidth = maxWidth;
}
DEBUG_PRINTF("gathered info: widths=[%u,%u]\n", minWidth, maxWidth);
ch_misc_free(info);
u32 guardflags;
guardflags = (flags | HS_FLAG_PREFILTER) & ~HS_FLAG_SINGLEMATCH;
guard = isHyperscanSupported(pattern, guardflags, platform);
} else {
// We can't even prefilter this pattern, so we're dependent on Big Dumb
// Pcre Scans.
DEBUG_PRINTF("hs_expression_info failed, falling back to pcre\n");
hs_free_compile_error(error);
}
}
void PatternData::buildPcre(const char *pattern, u32 flags) {
int options = convertFlagsToPcreOptions(flags);
const char *errptr = nullptr;
int erroffset = 0;
compiled = pcre_compile(pattern, options, &errptr, &erroffset, nullptr);
if (!compiled) {
DEBUG_PRINTF("PCRE failed to compile: %s\n", pattern);
string err("PCRE compilation failed: ");
err += string(errptr);
err += ".";
throw CompileError(expr_index, err);
}
extra = pcre_study(compiled, PCRE_STUDY_JIT_COMPILE, &errptr);
// Note that it's OK for pcre_study to return NULL if there's nothing
// to be found, but a non-NULL error is always bad.
if (errptr) {
DEBUG_PRINTF("PCRE could not be studied: %s\n", errptr);
string err("PCRE compilation failed: ");
err += string(errptr);
err += ".";
throw CompileError(expr_index, err);
}
if (pcre_fullinfo(compiled, extra, PCRE_INFO_SIZE, &compiled_size)) {
throw CompileError(PCRE_ERROR_MSG);
}
if (!extra) {
study_size = 0;
} else {
if (pcre_fullinfo(compiled, extra, PCRE_INFO_STUDYSIZE, &study_size)) {
throw CompileError(PCRE_ERROR_MSG);
}
}
if (pcre_fullinfo(compiled, extra, PCRE_INFO_CAPTURECOUNT, &capture_cnt)) {
throw CompileError(PCRE_ERROR_MSG);
}
/* We use the pcre rather than hs to get this information as we may need it
* even in the pure unguarded pcre mode where there is no hs available. We
* can not use the compile flags due to (*UTF8) verb */
unsigned long int opts = 0; // PCRE_INFO_OPTIONS demands an unsigned long
if (pcre_fullinfo(compiled, extra, PCRE_INFO_OPTIONS, &opts)) {
throw CompileError(PCRE_ERROR_MSG);
}
utf8 = opts & PCRE_UTF8;
}
size_t PatternData::patternSize() const {
size_t len = 0;
// ch_pattern header.
len += sizeof(ch_pattern);
len = ROUNDUP_N(len, 8);
DEBUG_PRINTF("compiled pcre at %zu\n", len);
len += compiled_size;
// PCRE study data, which may be zero.
if (study_size) {
len = ROUNDUP_N(len, 8);
DEBUG_PRINTF("study at %zu\n", len);
len += (size_t)study_size;
}
DEBUG_PRINTF("pattern size %zu\n", len);
return len;
}
/** \brief Write out an ch_pattern structure, which should already be sized
* correctly according to PatternData::patternSize. */
void PatternData::writePattern(ch_pattern *pattern) const {
assert(pattern);
assert(ISALIGNED_CL(pattern));
pattern->id = id;
u32 flags = 0;
if (singlematch) {
flags |= CHIMERA_PATTERN_FLAG_SINGLEMATCH;
}
if (utf8) {
flags |= CHIMERA_PATTERN_FLAG_UTF8;
}
pattern->flags = flags;
pattern->maxWidth = maxWidth;
pattern->minWidth = minWidth == UINT_MAX ? 0 : minWidth;
pattern->fixedWidth = fixedWidth;
// Compiled PCRE pattern.
char *ptr = (char *)pattern;
ptr += ROUNDUP_N(sizeof(*pattern), 8);
DEBUG_PRINTF("compiled pcre at %zu\n", (size_t)(ptr - (char *)pattern));
memcpy(ptr, compiled, compiled_size);
ptr += compiled_size;
// PCRE match limits
pattern->extra.flags = PCRE_EXTRA_MATCH_LIMIT |
PCRE_EXTRA_MATCH_LIMIT_RECURSION;
pattern->extra.match_limit = matchLimit ? matchLimit : 10000000;
// Set to avoid segment fault
pattern->extra.match_limit_recursion =
matchLimitRecursion ? matchLimitRecursion : 1500;
// PCRE study_data.
u32 studyOffset = 0;
if (extra) {
assert(extra->study_data);
ptr = ROUNDUP_PTR(ptr, 8);
DEBUG_PRINTF("study at %zu\n", (size_t)(ptr - (char *)pattern));
memcpy(ptr, extra->study_data, study_size);
studyOffset = (size_t)(ptr - (char *)pattern);
pattern->extra.flags |= PCRE_EXTRA_STUDY_DATA;
pattern->extra.study_data = ptr;
ptr += study_size;
} else {
pattern->extra.flags &= ~PCRE_EXTRA_STUDY_DATA;
}
pattern->studyOffset = studyOffset;
size_t pcreLen = (ptr - (char *)pattern);
assert(pcreLen <= patternSize());
pattern->length = (u32)pcreLen;
// We shouldn't overrun the space we've allocated for this pattern.
assert(patternSize() >= (size_t)(ptr - (char *)pattern));
}
} // namespace
namespace ch {
static
void ch_compile_multi_int(const char *const *expressions, const unsigned *flags,
const unsigned *ids, unsigned elements,
unsigned mode, unsigned long int match_limit,
unsigned long int match_limit_recursion,
const hs_platform_info_t *platform,
ch_database_t **out) {
vector<unique_ptr<PatternData>> pcres;
pcres.reserve(elements);
vector<u32> unguarded; // indices of unguarded PCREs.
vector<const char *> multiExpr;
vector<unsigned int> multiFlags;
vector<unsigned int> multiIds;
bool allConfirm = true;
bool allSingleMatch = true;
for (unsigned int i = 0; i < elements; i++) {
const char *myExpr = expressions[i];
unsigned int myFlags = flags ? flags[i] : 0;
unsigned int myId = ids ? ids[i] : 0;
checkFlags(myFlags);
// First, build with libpcre. A build failure from libpcre will throw
// an exception up to the caller.
auto patternData =
ue2::make_unique<PatternData>(myExpr, myFlags, i, myId, mode, match_limit,
match_limit_recursion, platform);
pcres.push_back(move(patternData));
PatternData &curr = *pcres.back();
if (!(myFlags & HS_FLAG_SINGLEMATCH)) {
allSingleMatch = false;
}
// in the multimatch, we always run in prefilter mode and accept vacuous
// patterns.
myFlags |=
HS_FLAG_ALLOWEMPTY | HS_FLAG_PREFILTER;
if (curr.fixedWidth != UINT_MAX) {
myFlags |= HS_FLAG_SOM_LEFTMOST;
DEBUG_PRINTF("fixed width, turn off prefiltering\n");
myFlags &= ~HS_FLAG_PREFILTER;
allConfirm = false;
// Single match can't coexist with SOM.
myFlags &= ~HS_FLAG_SINGLEMATCH;
}
if (curr.guard) {
// We use the index into the PCREs array as the Hyperscan idx.
multiExpr.push_back(myExpr);
multiFlags.push_back(myFlags);
multiIds.push_back(i);
} else {
// No Hyperscan support, PCRE is unguarded.
unguarded.push_back(i);
}
}
DEBUG_PRINTF("built %zu PCREs, %zu of which are unguarded\n",
pcres.size(), unguarded.size());
// Work out our sizing for the output database.
size_t patternSize = 0;
for (unsigned int i = 0; i < elements; i++) {
size_t len = pcres[i]->patternSize();
patternSize += ROUNDUP_CL(len);
}
DEBUG_PRINTF("pcre bytecode takes %zu bytes\n", patternSize);
bool noMulti = multiExpr.empty();
size_t multiSize = 0;
hs_database *multidb = nullptr;
if (!noMulti) {
hs_compile_error_t *hs_comp_error = nullptr;
hs_error_t err = hs_compile_multi(&multiExpr[0], &multiFlags[0],
&multiIds[0], multiExpr.size(),
HS_MODE_BLOCK, platform, &multidb,
&hs_comp_error);
if (err != HS_SUCCESS) {
assert(hs_comp_error);
DEBUG_PRINTF("hs_compile_multi returned error: %s\n",
hs_comp_error->message);
assert(0);
hs_free_compile_error(hs_comp_error);
throw CompileError("Internal error.");
}
assert(multidb);
err = hs_database_size(multidb, &multiSize);
if (err != HS_SUCCESS) {
assert(0);
throw CompileError("Internal error.");
}
DEBUG_PRINTF("built hyperscan database with len %zu bytes\n", multiSize);
}
size_t bytecodeLen = sizeof(ch_bytecode) +
multiSize + alignof(u32) +
(sizeof(u32) * unguarded.size()) +
(sizeof(u32) * elements) +
patternSize +
128; // padding for alignment
size_t totalSize = sizeof(ch_database) + bytecodeLen;
DEBUG_PRINTF("allocating %zu bytes for database\n", totalSize);
char *ptr = (char *)ch_database_alloc(totalSize);
if (ch_check_alloc(ptr) != CH_SUCCESS) {
ch_database_free(ptr);
throw std::bad_alloc();
}
memset(ptr, 0, totalSize);
// First, the header.
ch_database *hydb = (ch_database *)ptr;
hydb->magic = CH_DB_MAGIC;
hydb->version = HS_VERSION_32BIT;
hydb->length = bytecodeLen;
// Then, the bytecode.
size_t shift = (size_t)hydb->bytes & 0x3f;
hydb->bytecode = offsetof(struct ch_database, bytes) - shift;
ch_bytecode *db = (ch_bytecode *)((char *)hydb + hydb->bytecode);
db->patternCount = elements;
db->activeSize = mmbit_size(elements);
db->flags = 0;
db->length = bytecodeLen;
if (noMulti) {
db->flags |= CHIMERA_FLAG_NO_MULTIMATCH;
}
if (mode & CH_MODE_GROUPS) {
db->flags |= CHIMERA_FLAG_GROUPS;
}
if (allConfirm) {
db->flags |= CHIMERA_FLAG_ALL_CONFIRM;
}
if (allSingleMatch) {
db->flags |= CHIMERA_FLAG_ALL_SINGLE;
}
// Find and set the max ovector size by looking at the capture count for
// each pcre.
u32 maxCaptureGroups = 0;
for (unsigned int i = 0; i < elements; i++) {
maxCaptureGroups = max(maxCaptureGroups, (u32)pcres[i]->capture_cnt);
}
db->maxCaptureGroups = maxCaptureGroups;
DEBUG_PRINTF("max capture groups is %u\n", maxCaptureGroups);
if (!noMulti) {
DEBUG_PRINTF("write hyperscan database\n");
// Write Hyperscan database directly after the header struct, then free it.
if (!writeHyperscanDatabase(db, multidb)) {
ch_database_free(hydb);
hs_free_database(multidb);
throw CompileError("Internal error.");
}
hs_free_database(multidb);
} else {
db->databaseOffset = ROUNDUP_CL(sizeof(*db));
}
// Then, write our unguarded PCRE list.
db->unguardedCount = unguarded.size();
db->unguardedOffset = ROUNDUP_N(db->databaseOffset + multiSize, 4);
ptr = (char *)db + db->unguardedOffset;
copy(unguarded.begin(), unguarded.end(), (u32 *)ptr);
// Then, write all our compiled PCRE patterns and the lookup table for
// them.
db->patternOffset = db->unguardedOffset + unguarded.size() * sizeof(u32);
u32 *patternOffset = (u32 *)((char *)db + db->patternOffset);
u32 offset = ROUNDUP_CL(db->patternOffset + elements * sizeof(u32));
for (unsigned int i = 0; i < elements; i++) {
*patternOffset = offset;
size_t len = pcres[i]->patternSize();
ptr = (char *)db + offset;
struct ch_pattern *pattern = (struct ch_pattern *)ptr;
pcres[i]->writePattern(pattern);
DEBUG_PRINTF("wrote pcre %u into offset %u, len %zu\n", i, offset, len);
offset += ROUNDUP_CL(len);
patternOffset++;
}
assert(offset <= totalSize);
assert(hydb->magic == CH_DB_MAGIC);
DEBUG_PRINTF("built hybrid database, size %zu bytes\n", totalSize);
DEBUG_PRINTF("offset=%u\n", offset);
*out = hydb;
}
} // namespace ch
extern "C" HS_PUBLIC_API
ch_error_t HS_CDECL ch_compile(const char *expression, unsigned flags,
unsigned mode,
const hs_platform_info_t *platform,
ch_database_t **db,
ch_compile_error_t **comp_error) {
if (!comp_error) {
if (db) {
db = nullptr;
}
// nowhere to write the string, but we can still report an error code
return CH_COMPILER_ERROR;
}
if (!db) {
*comp_error =
generateChimeraCompileError("Invalid parameter: db is NULL", -1);
return CH_COMPILER_ERROR;
}
if (!expression) {
*db = nullptr;
*comp_error =
generateChimeraCompileError("Invalid parameter: expressions is\
NULL", -1);
return CH_COMPILER_ERROR;
}
if (!checkMode(mode, comp_error)) {
*db = nullptr;
assert(*comp_error); // set by checkMode
return CH_COMPILER_ERROR;
}
try {
unsigned id = 0; // single expressions get zero as an ID
// Internal function to do all the work, now that we've handled all the
// argument checking.
ch::ch_compile_multi_int(&expression, &flags, &id, 1, mode, 0, 0,
platform, db);
}
catch (const CompileError &e) {
// Compiler error occurred
*db = nullptr;
*comp_error = generateChimeraCompileError(e.reason, e.hasIndex ?
(int)e.index : -1);
return CH_COMPILER_ERROR;
}
catch (std::bad_alloc) {
*db = nullptr;
*comp_error = const_cast<ch_compile_error_t *>(&ch_enomem);
return CH_COMPILER_ERROR;
}
catch (...) {
assert(!"Internal error, unexpected exception");
*db = nullptr;
*comp_error = const_cast<ch_compile_error_t *>(&ch_einternal);
return CH_COMPILER_ERROR;
}
DEBUG_PRINTF("success!\n");
return CH_SUCCESS;
}
extern "C" HS_PUBLIC_API
ch_error_t HS_CDECL ch_compile_multi(const char *const *expressions,
const unsigned *flags, const unsigned *ids,
unsigned elements, unsigned mode,
const hs_platform_info_t *platform,
ch_database_t **db,
ch_compile_error_t **comp_error) {
if (!comp_error) {
if (db) {
db = nullptr;
}
// nowhere to write the string, but we can still report an error code
return CH_COMPILER_ERROR;
}
if (!db) {
*comp_error =
generateChimeraCompileError("Invalid parameter: db is NULL", -1);
return CH_COMPILER_ERROR;
}
if (!expressions) {
*db = nullptr;
*comp_error =
generateChimeraCompileError("Invalid parameter: expressions is\
NULL", -1);
return CH_COMPILER_ERROR;
}
if (!elements) {
*db = nullptr;
*comp_error = generateChimeraCompileError("Invalid parameter:\
elements is zero", -1);
return CH_COMPILER_ERROR;
}
if (!checkMode(mode, comp_error)) {
*db = nullptr;
assert(*comp_error); // set by checkMode
return CH_COMPILER_ERROR;
}
try {
// Internal function to do all the work, now that we've handled all the
// argument checking.
ch::ch_compile_multi_int(expressions, flags, ids, elements, mode, 0, 0,
platform, db);
}
catch (const CompileError &e) {
// Compiler error occurred
*db = nullptr;
*comp_error = generateChimeraCompileError(e.reason, e.hasIndex ?
(int)e.index : -1);
return CH_COMPILER_ERROR;
}
catch (std::bad_alloc) {
*db = nullptr;
*comp_error = const_cast<ch_compile_error_t *>(&ch_enomem);
return CH_COMPILER_ERROR;
}
catch (...) {
assert(!"Internal error, unexpected exception");
*db = nullptr;
*comp_error = const_cast<ch_compile_error_t *>(&ch_einternal);
return CH_COMPILER_ERROR;
}
DEBUG_PRINTF("success!\n");
return CH_SUCCESS;
}
extern "C" HS_PUBLIC_API
ch_error_t HS_CDECL ch_compile_ext_multi(
const char *const *expressions,
const unsigned *flags,
const unsigned *ids,
unsigned elements, unsigned mode,
unsigned long int match_limit,
unsigned long int match_limit_recursion,
const hs_platform_info_t *platform,
ch_database_t **db,
ch_compile_error_t **comp_error) {
if (!comp_error) {
if (db) {
db = nullptr;
}
// nowhere to write the string, but we can still report an error code
return CH_COMPILER_ERROR;
}
if (!db) {
*comp_error =
generateChimeraCompileError("Invalid parameter: db is NULL", -1);
return CH_COMPILER_ERROR;
}
if (!expressions) {
*db = nullptr;
*comp_error =
generateChimeraCompileError("Invalid parameter: expressions is\
NULL", -1);
return CH_COMPILER_ERROR;
}
if (!elements) {
*db = nullptr;
*comp_error = generateChimeraCompileError("Invalid parameter:\
elements is zero", -1);
return CH_COMPILER_ERROR;
}
if (!checkMode(mode, comp_error)) {
*db = nullptr;
assert(*comp_error); // set by checkMode
return CH_COMPILER_ERROR;
}
try {
// Internal function to do all the work, now that we've handled all the
// argument checking.
ch::ch_compile_multi_int(expressions, flags, ids, elements, mode,
match_limit, match_limit_recursion, platform,
db);
}
catch (const CompileError &e) {
// Compiler error occurred
*db = nullptr;
*comp_error = generateChimeraCompileError(e.reason, e.hasIndex ?
(int)e.index : -1);
return CH_COMPILER_ERROR;
}
catch (std::bad_alloc) {
*db = nullptr;
*comp_error = const_cast<ch_compile_error_t *>(&ch_enomem);
return CH_COMPILER_ERROR;
}
catch (...) {
assert(!"Internal error, unexpected exception");
*db = nullptr;
*comp_error = const_cast<ch_compile_error_t *>(&ch_einternal);
return CH_COMPILER_ERROR;
}
DEBUG_PRINTF("success!\n");
return CH_SUCCESS;
}
extern "C" HS_PUBLIC_API
ch_error_t HS_CDECL ch_free_compile_error(ch_compile_error_t *error) {
freeChimeraCompileError(error);
return CH_SUCCESS;
}

394
chimera/ch_compile.h Normal file
View File

@ -0,0 +1,394 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CH_COMPILE_H_
#define CH_COMPILE_H_
/**
* @file
* @brief The Chimera compiler API definition.
*
* Chimera is a hybrid solution of Hyperscan and PCRE.
*
* This header contains functions for compiling regular expressions into
* Chimera databases that can be used by the Chimera runtime.
*/
#include "ch_common.h"
#include "hs_compile.h"
#ifdef __cplusplus
extern "C"
{
#endif
/**
* A type containing error details that is returned by the compile calls (@ref
* ch_compile() and @ref ch_compile_multi() on failure. The caller may inspect
* the values returned in this type to determine the cause of failure.
*/
typedef struct ch_compile_error {
/**
* A human-readable error message describing the error.
*/
char *message;
/**
* The zero-based number of the expression that caused the error (if this
* can be determined). If the error is not specific to an expression, then
* this value will be less than zero.
*/
int expression;
} ch_compile_error_t;
/**
* The basic regular expression compiler.
*
* This is the function call with which an expression is compiled into a
* Chimera database which can be passed to the runtime function (
* @ref ch_scan())
*
* @param expression
* The NULL-terminated expression to parse. Note that this string must
* represent ONLY the pattern to be matched, with no delimiters or flags;
* any global flags should be specified with the @a flags argument. For
* example, the expression `/abc?def/i` should be compiled by providing
* `abc?def` as the @a expression, and @ref CH_FLAG_CASELESS as the @a
* flags.
*
* @param flags
* Flags which modify the behaviour of the expression. Multiple flags may
* be used by ORing them together. Valid values are:
* - CH_FLAG_CASELESS - Matching will be performed case-insensitively.
* - CH_FLAG_DOTALL - Matching a `.` will not exclude newlines.
* - CH_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - CH_FLAG_SINGLEMATCH - Only one match will be generated for the
* expression per stream.
* - CH_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
* - CH_FLAG_UCP - Use Unicode properties for character classes.
*
* @param mode
* Compiler mode flag that affect the database as a whole for capturing
* groups. One of CH_MODE_NOGROUPS or CH_MODE_GROUPS must be supplied.
* See @ref CH_MODE_FLAG for more details.
*
* @param platform
* If not NULL, the platform structure is used to determine the target
* platform for the database. If NULL, a database suitable for running
* on the current host platform is produced.
*
* @param db
* On success, a pointer to the generated database will be returned in
* this parameter, or NULL on failure. The caller is responsible for
* deallocating the buffer using the @ref ch_free_database() function.
*
* @param compile_error
* If the compile fails, a pointer to a @ref ch_compile_error_t will be
* returned, providing details of the error condition. The caller is
* responsible for deallocating the buffer using the @ref
* ch_free_compile_error() function.
*
* @return
* @ref CH_SUCCESS is returned on successful compilation; @ref
* CH_COMPILER_ERROR on failure, with details provided in the error
* parameter.
*/
ch_error_t HS_CDECL ch_compile(const char *expression, unsigned int flags,
unsigned int mode,
const hs_platform_info_t *platform,
ch_database_t **db,
ch_compile_error_t **compile_error);
/**
* The multiple regular expression compiler.
*
* This is the function call with which a set of expressions is compiled into a
* database which can be passed to the runtime function (@ref ch_scan()).
* Each expression can be labelled with a unique integer which is passed into
* the match callback to identify the pattern that has matched.
*
* @param expressions
* Array of NULL-terminated expressions to compile. Note that (as for @ref
* ch_compile()) these strings must contain only the pattern to be
* matched, with no delimiters or flags. For example, the expression
* `/abc?def/i` should be compiled by providing `abc?def` as the first
* string in the @a expressions array, and @ref CH_FLAG_CASELESS as the
* first value in the @a flags array.
*
* @param flags
* Array of flags which modify the behaviour of each expression. Multiple
* flags may be used by ORing them together. Specifying the NULL pointer
* in place of an array will set the flags value for all patterns to zero.
* Valid values are:
* - CH_FLAG_CASELESS - Matching will be performed case-insensitively.
* - CH_FLAG_DOTALL - Matching a `.` will not exclude newlines.
* - CH_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - CH_FLAG_SINGLEMATCH - Only one match will be generated by patterns
* with this match id per stream.
* - CH_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
* - CH_FLAG_UCP - Use Unicode properties for character classes.
*
* @param ids
* An array of integers specifying the ID number to be associated with the
* corresponding pattern in the expressions array. Specifying the NULL
* pointer in place of an array will set the ID value for all patterns to
* zero.
*
* @param elements
* The number of elements in the input arrays.
*
* @param mode
* Compiler mode flag that affect the database as a whole for capturing
* groups. One of CH_MODE_NOGROUPS or CH_MODE_GROUPS must be supplied.
* See @ref CH_MODE_FLAG for more details.
*
* @param platform
* If not NULL, the platform structure is used to determine the target
* platform for the database. If NULL, a database suitable for running
* on the current host platform is produced.
*
* @param db
* On success, a pointer to the generated database will be returned in
* this parameter, or NULL on failure. The caller is responsible for
* deallocating the buffer using the @ref ch_free_database() function.
*
* @param compile_error
* If the compile fails, a pointer to a @ref ch_compile_error_t will be
* returned, providing details of the error condition. The caller is
* responsible for deallocating the buffer using the @ref
* ch_free_compile_error() function.
*
* @return
* @ref CH_SUCCESS is returned on successful compilation; @ref
* CH_COMPILER_ERROR on failure, with details provided in the @a error
* parameter.
*
*/
ch_error_t HS_CDECL ch_compile_multi(const char *const *expressions,
const unsigned int *flags,
const unsigned int *ids,
unsigned int elements, unsigned int mode,
const hs_platform_info_t *platform,
ch_database_t **db,
ch_compile_error_t **compile_error);
/**
* The multiple regular expression compiler with extended match limits support.
*
* This is the function call with which a set of expressions is compiled into a
* database in the same way as @ref ch_compile_multi(), but allows additional
* parameters to be specified via match_limit and match_limit_recursion to
* define match limits for PCRE runtime.
*
* @param expressions
* Array of NULL-terminated expressions to compile. Note that (as for @ref
* ch_compile()) these strings must contain only the pattern to be
* matched, with no delimiters or flags. For example, the expression
* `/abc?def/i` should be compiled by providing `abc?def` as the first
* string in the @a expressions array, and @ref CH_FLAG_CASELESS as the
* first value in the @a flags array.
*
* @param flags
* Array of flags which modify the behaviour of each expression. Multiple
* flags may be used by ORing them together. Specifying the NULL pointer
* in place of an array will set the flags value for all patterns to zero.
* Valid values are:
* - CH_FLAG_CASELESS - Matching will be performed case-insensitively.
* - CH_FLAG_DOTALL - Matching a `.` will not exclude newlines.
* - CH_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - CH_FLAG_SINGLEMATCH - Only one match will be generated by patterns
* with this match id per stream.
* - CH_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
* - CH_FLAG_UCP - Use Unicode properties for character classes.
*
* @param ids
* An array of integers specifying the ID number to be associated with the
* corresponding pattern in the expressions array. Specifying the NULL
* pointer in place of an array will set the ID value for all patterns to
* zero.
*
* @param elements
* The number of elements in the input arrays.
*
* @param mode
* Compiler mode flag that affect the database as a whole for capturing
* groups. One of CH_MODE_NOGROUPS or CH_MODE_GROUPS must be supplied.
* See @ref CH_MODE_FLAG for more details.
*
* @param match_limit
* A limit from pcre_extra on the amount of match function called in PCRE
* to limit backtracking that can take place.
*
* @param match_limit_recursion
* A limit from pcre_extra on the recursion depth of match function
* in PCRE.
*
* @param platform
* If not NULL, the platform structure is used to determine the target
* platform for the database. If NULL, a database suitable for running
* on the current host platform is produced.
*
* @param db
* On success, a pointer to the generated database will be returned in
* this parameter, or NULL on failure. The caller is responsible for
* deallocating the buffer using the @ref ch_free_database() function.
*
* @param compile_error
* If the compile fails, a pointer to a @ref ch_compile_error_t will be
* returned, providing details of the error condition. The caller is
* responsible for deallocating the buffer using the @ref
* ch_free_compile_error() function.
*
* @return
* @ref CH_SUCCESS is returned on successful compilation; @ref
* CH_COMPILER_ERROR on failure, with details provided in the @a error
* parameter.
*
*/
ch_error_t HS_CDECL ch_compile_ext_multi(const char *const *expressions,
const unsigned int *flags,
const unsigned int *ids,
unsigned int elements,
unsigned int mode,
unsigned long int match_limit,
unsigned long int match_limit_recursion,
const hs_platform_info_t *platform,
ch_database_t **db,
ch_compile_error_t **compile_error);
/**
* Free an error structure generated by @ref ch_compile(), @ref
* ch_compile_multi().
*
* @param error
* The @ref ch_compile_error_t to be freed. NULL may also be safely
* provided.
*
* @return
* @ref CH_SUCCESS on success, other values on failure.
*/
ch_error_t HS_CDECL ch_free_compile_error(ch_compile_error_t *error);
/**
* @defgroup CH_PATTERN_FLAG Pattern flags
*
* @{
*/
/**
* Compile flag: Set case-insensitive matching.
*
* This flag sets the expression to be matched case-insensitively by default.
* The expression may still use PCRE tokens (notably `(?i)` and
* `(?-i)`) to switch case-insensitive matching on and off.
*/
#define CH_FLAG_CASELESS 1
/**
* Compile flag: Matching a `.` will not exclude newlines.
*
* This flag sets any instances of the `.` token to match newline characters as
* well as all other characters. The PCRE specification states that the `.`
* token does not match newline characters by default, so without this flag the
* `.` token will not cross line boundaries.
*/
#define CH_FLAG_DOTALL 2
/**
* Compile flag: Set multi-line anchoring.
*
* This flag instructs the expression to make the `^` and `$` tokens match
* newline characters as well as the start and end of the stream. If this flag
* is not specified, the `^` token will only ever match at the start of a
* stream, and the `$` token will only ever match at the end of a stream within
* the guidelines of the PCRE specification.
*/
#define CH_FLAG_MULTILINE 4
/**
* Compile flag: Set single-match only mode.
*
* This flag sets the expression's match ID to match at most once, only the
* first match for each invocation of @ref ch_scan() will be returned.
*
*/
#define CH_FLAG_SINGLEMATCH 8
/**
* Compile flag: Enable UTF-8 mode for this expression.
*
* This flag instructs Chimera to treat the pattern as a sequence of UTF-8
* characters. The results of scanning invalid UTF-8 sequences with a Chimera
* library that has been compiled with one or more patterns using this flag are
* undefined.
*/
#define CH_FLAG_UTF8 32
/**
* Compile flag: Enable Unicode property support for this expression.
*
* This flag instructs Chimera to use Unicode properties, rather than the
* default ASCII interpretations, for character mnemonics like `\w` and `\s` as
* well as the POSIX character classes. It is only meaningful in conjunction
* with @ref CH_FLAG_UTF8.
*/
#define CH_FLAG_UCP 64
/** @} */
/**
* @defgroup CH_MODE_FLAG Compile mode flags
*
* The mode flags are used as values for the mode parameter of the various
* compile calls (@ref ch_compile(), @ref ch_compile_multi().
*
* By default, the matcher will only supply the start and end offsets of the
* match when the match callback is called. Using mode flag @ref CH_MODE_GROUPS
* will also fill the `captured' array with the start and end offsets of all
* the capturing groups specified by the pattern that has matched.
*
* @{
*/
/**
* Compiler mode flag: Disable capturing groups.
*/
#define CH_MODE_NOGROUPS 0
/**
* Compiler mode flag: Enable capturing groups.
*/
#define CH_MODE_GROUPS 1048576
/** @} */
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* CH_COMPILE_H_ */

126
chimera/ch_database.c Normal file
View File

@ -0,0 +1,126 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Chimera: database construction, etc.
*/
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "allocator.h"
#include "database.h"
#include "hs.h"
#include "ch.h"
#include "hs_internal.h"
#include "ch_common.h"
#include "ch_alloc.h"
#include "ch_database.h"
#include "ch_internal.h"
static really_inline
int db_correctly_aligned(const void *db) {
return ISALIGNED_N(db, alignof(unsigned long long));
}
HS_PUBLIC_API
ch_error_t HS_CDECL ch_free_database(ch_database_t *hydb) {
if (hydb && hydb->magic != CH_DB_MAGIC) {
return CH_INVALID;
}
ch_database_free(hydb);
return CH_SUCCESS;
}
HS_PUBLIC_API
ch_error_t HS_CDECL ch_database_size(const ch_database_t *hydb, size_t *size) {
if (!size) {
return CH_INVALID;
}
ch_error_t ret = hydbIsValid(hydb);
if (unlikely(ret != CH_SUCCESS)) {
return ret;
}
*size = sizeof(struct ch_database) + hydb->length;
return CH_SUCCESS;
}
/** \brief Identifier prepended to database info. */
static const char CHIMERA_IDENT[] = "Chimera ";
HS_PUBLIC_API
ch_error_t HS_CDECL ch_database_info(const ch_database_t *hydb, char **info) {
if (!info) {
return CH_INVALID;
}
*info = NULL;
if (!hydb || !db_correctly_aligned(hydb) || hydb->magic != CH_DB_MAGIC) {
return HS_INVALID;
}
const struct ch_bytecode *bytecode = ch_get_bytecode(hydb);
char noMulti = (bytecode->flags & CHIMERA_FLAG_NO_MULTIMATCH);
if (noMulti) {
size_t len = strlen(CHIMERA_IDENT);
*info = ch_misc_alloc(len + 1);
if (!(*info)) {
return CH_INVALID;
}
memcpy((*info), CHIMERA_IDENT, len);
(*info)[len] = '\0';
return CH_SUCCESS;
}
char *hsinfo = NULL;
hs_error_t ret = hs_database_info(getHyperscanDatabase(bytecode), &hsinfo);
if (ret != HS_SUCCESS) {
assert(!hsinfo);
return ret;
}
size_t hybridlen = strlen(CHIMERA_IDENT);
size_t hslen = strlen(hsinfo);
*info = ch_misc_alloc(hybridlen + hslen + 1);
if (!(*info)) {
ch_misc_free(hsinfo);
return CH_INVALID;
}
memcpy((*info), CHIMERA_IDENT, hybridlen);
memcpy((*info) + hybridlen, hsinfo, hslen);
(*info)[hybridlen + hslen] = '\0';
ch_misc_free(hsinfo);
return CH_SUCCESS;
}

158
chimera/ch_database.h Normal file
View File

@ -0,0 +1,158 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Runtime code for ch_database manipulation.
*/
#ifndef CH_DATABASE_H_
#define CH_DATABASE_H_
#ifdef __cplusplus
extern "C"
{
#endif
#define PCRE_STATIC
#include <pcre.h>
#include "ch_compile.h" // for CH_MODE_ flags
#include "ue2common.h"
#include "hs_version.h"
#include "hs.h"
#define CH_DB_MAGIC 0xdedededeU //!< Magic number stored in \ref ch_database
/** \brief Main Chimera database header. */
struct ch_database {
u32 magic; //!< must be \ref CH_DB_MAGIC
u32 version; //!< release version
u32 length; //!< total allocated length in bytes
u32 reserved0; //!< unused
u32 reserved1; //!< unused
u32 bytecode; //!< offset relative to db start
u32 padding[16]; //!< padding for alignment of rest of bytecode
char bytes[];
};
/** \brief Chimera bytecode header, which follows the \ref ch_database and is
* always 64-byte aligned. */
struct ch_bytecode {
u32 length; //!< length of bytecode including this header struct
u32 flags; //!< whole-database flags (CHIMERA_FLAG_NO_MULTIMATCH,
// CHIMERA_FLAG_GROUPS)
u32 patternCount; //!< total number of patterns
u32 activeSize; //!< size of mmbit to store active pattern ids
u32 databaseOffset; //!< offset for database following \ref ch_bytecode
// header
u32 patternOffset; //!< points to an array of u32 offsets, each pointing to
// a \ref ch_pattern
u32 unguardedOffset; //!< pointer to a list of unguarded pattern indices
u32 unguardedCount; //!< number of unguarded patterns
u32 maxCaptureGroups; //!< max number of capture groups used by any pattern
};
/** \brief Per-pattern header.
*
* struct is followed in bytecode by:
* 1. pcre bytecode (always present)
* 2. pcre study data (sometimes)
*/
struct ch_pattern {
u32 id; //!< pattern ID to report to the user
u32 flags; //!< per-pattern flags (e.g. \ref CHIMERA_PATTERN_FLAG_UTF8)
u32 maxWidth; //!< maximum width of a match, or UINT_MAX for inf.
u32 minWidth; //!< minimum width of a match.
u32 fixedWidth;//!< pattern has fixed width.
u32 studyOffset; //!< offset relative to struct start of study data,
// or zero if there is none
u32 length; //!< length of struct plus pcre bytecode and study data
pcre_extra extra; //!< pcre_extra struct, used to store study data ptr for
// the currently-running pcre at runtime.
};
static really_inline
const void *ch_get_bytecode(const struct ch_database *db) {
assert(db);
const void *bytecode = (const char *)db + db->bytecode;
assert(ISALIGNED_16(bytecode));
return bytecode;
}
struct hs_database;
static really_inline
const struct hs_database *getHyperscanDatabase(const struct ch_bytecode *db) {
assert(db);
const char *ptr = (const char *)db;
const struct hs_database *hs_db;
hs_db = (const struct hs_database *)(ptr + db->databaseOffset);
assert(ISALIGNED_CL(hs_db));
return hs_db;
}
static really_inline
const u32 *getUnguarded(const struct ch_bytecode *db) {
assert(db);
const char *ptr = (const char *)db;
const u32 *unguarded = (const u32 *)(ptr + db->unguardedOffset);
assert(ISALIGNED_N(unguarded, sizeof(u32)));
return unguarded;
}
static really_inline
const struct ch_pattern *getPattern(const struct ch_bytecode *db, u32 i) {
assert(db);
assert(i < db->patternCount);
const char *ptr = (const char *)db;
const u32 *patternOffset = (const u32 *)(ptr + db->patternOffset);
assert(patternOffset[i] < db->length);
return (const struct ch_pattern *)(ptr + patternOffset[i]);
}
static really_inline
ch_error_t hydbIsValid(const struct ch_database *hydb) {
if (!hydb || hydb->magic != CH_DB_MAGIC) {
DEBUG_PRINTF("bad magic (%u != %u)\n", hydb->magic, CH_DB_MAGIC);
return CH_INVALID;
}
if (hydb->version != HS_VERSION_32BIT) {
DEBUG_PRINTF("bad version\n");
return CH_DB_VERSION_ERROR;
}
return CH_SUCCESS;
}
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* CH_DATABASE_H_ */

44
chimera/ch_internal.h Normal file
View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Chimera: data structures and internals.
*/
#ifndef CH_INTERNAL_H
#define CH_INTERNAL_H
#define CHIMERA_FLAG_NO_MULTIMATCH 1 //!< Don't run a multimatch scan
#define CHIMERA_FLAG_GROUPS 2 //!< Return capturing groups
#define CHIMERA_FLAG_ALL_CONFIRM 4 //!< All patterns need confirm
#define CHIMERA_FLAG_ALL_SINGLE 8 //!< All patterns need only one match
#define CHIMERA_PATTERN_FLAG_SINGLEMATCH 1 //!< only report the first match
#define CHIMERA_PATTERN_FLAG_UTF8 2 //!< pattern is in UTF-8 mode
#endif

629
chimera/ch_runtime.c Normal file
View File

@ -0,0 +1,629 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Chimera: main runtime.
*/
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "ch.h"
#include "hs.h"
#include "hs_internal.h"
#include "ue2common.h"
#include "ch_database.h"
#include "ch_internal.h"
#include "ch_scratch.h"
#include "util/multibit.h"
#include "util/unicode_def.h"
typedef struct queue_item PQ_T;
static
char PQ_COMP(PQ_T *pqc_items, int a, int b) {
if ((pqc_items)[a].to != (pqc_items)[b].to) {
return (pqc_items)[a].to < (pqc_items)[b].to;
} else if ((pqc_items)[a].from != (pqc_items)[b].from) {
return (pqc_items)[a].from < (pqc_items)[b].from;
} else {
return (pqc_items)[a].id < (pqc_items)[b].id;
}
}
static
char PQ_COMP_B(PQ_T *pqc_items, int a, PQ_T b_fixed) {
if ((pqc_items)[a].to != (b_fixed).to) {
return (pqc_items)[a].to < (b_fixed).to;
} else if ((pqc_items)[a].from != (b_fixed).from) {
return (pqc_items)[a].from < (b_fixed).from;
} else {
return (pqc_items)[a].id < b_fixed.id;
}
}
#include "util/pqueue.h"
static really_inline
void pq_insert_with(struct match_pq *pq, int from, int to, u32 id) {
DEBUG_PRINTF("inserting pattern%u in pq at %u\n", id, to);
struct queue_item temp = {
.from = from,
.to = to,
.id = id,
};
pq_insert(pq->item, pq->size, temp);
++pq->size;
}
static really_inline
void pq_pop_nice(struct match_pq *pq) {
pq_pop(pq->item, pq->size);
pq->size--;
}
/** dummy event handler for use when user does not provide one */
static
int HS_CDECL null_onEvent(UNUSED unsigned id, UNUSED unsigned long long from,
UNUSED unsigned long long to, UNUSED unsigned flags,
UNUSED unsigned size, UNUSED const ch_capture_t *captured,
UNUSED void *ctxt) {
return 0;
}
/** \brief Chimera runtime context. */
struct HybridContext {
const char *data; //!< buffer being scanned
u32 length; //!< length of data buffer
u32 valid_utf8_highwater; //!< UTF-8 has been validated up to here.
const struct ch_bytecode *db;
struct ch_scratch *scratch;
struct match_pq *pq;
/** \brief user-supplied match callback */
int (HS_CDECL *match_callback)(unsigned int id, unsigned long long from,
unsigned long long to, unsigned int flags,
unsigned int size, const ch_capture_t *capture,
void *ctx);
/** \brief user-supplied error callback */
int (HS_CDECL *error_callback)(ch_error_event_t error_type, unsigned int id,
void *info, void *ctx);
/** \brief user-supplied context */
void *context;
};
// Internal PCRE func.
extern int _pcre_valid_utf(const unsigned char *, int, int *);
/** UTF-8 validity check. Returns >0 if the given region of the data is valid
* UTF-8, 0 otherwise. */
static
char isValidUTF8(struct HybridContext *hyctx, u32 end) {
assert(hyctx);
if (hyctx->valid_utf8_highwater >= end) {
return 1; // Already validated.
}
const unsigned char *data =
(const unsigned char *)hyctx->data + hyctx->valid_utf8_highwater;
int validate_len = end - hyctx->valid_utf8_highwater;
DEBUG_PRINTF("validating %d bytes\n", validate_len);
int erroroffset = 0;
if (_pcre_valid_utf(data, validate_len, &erroroffset)) {
DEBUG_PRINTF("UTF8 invalid at offset %d\n", erroroffset);
return 0;
}
hyctx->valid_utf8_highwater = end;
return 1;
}
static
const pcre *getPcre(const struct ch_pattern *pattern) {
const char *ptr = (const char *)pattern;
const pcre *p = (const pcre *)(ptr + ROUNDUP_N(sizeof(*pattern), 8));
assert(ISALIGNED_N(p, 8));
return p;
}
/** \brief Fill the Chimera groups array from a pcre_exec ovector. */
static
void fillGroupsFromOvector(ch_capture_t *groups, int numPairs, int *ovector) {
assert(groups);
assert(ISALIGNED_N(groups, alignof(ch_capture_t)));
DEBUG_PRINTF("filling %d groups (@ %p) from pcre ovector\n",
numPairs, groups);
for (int i = 0; i < numPairs * 2; i += 2) {
if (ovector[i] == -1) {
groups->flags = CH_CAPTURE_FLAG_INACTIVE;
} else {
groups->flags = CH_CAPTURE_FLAG_ACTIVE;
assert(ovector[i] <= ovector[i + 1]);
groups->from = ovector[i];
groups->to = ovector[i + 1];
}
++groups;
}
}
static
ch_error_t handlePcreNonMatch(const struct ch_pattern *pattern, int rv,
ch_error_event_handler onError,
void *userContext) {
assert(rv < 0);
if (rv == PCRE_ERROR_NOMATCH) {
DEBUG_PRINTF("no match found by libpcre\n");
return CH_SUCCESS;
} else if (rv == PCRE_ERROR_MATCHLIMIT) {
DEBUG_PRINTF("pcre hit match limit\n");
if (onError) {
return onError(CH_ERROR_MATCHLIMIT, pattern->id, NULL,
userContext);
}
return CH_SUCCESS;
} else if (rv == PCRE_ERROR_RECURSIONLIMIT) {
DEBUG_PRINTF("pcre hit recursion limit\n");
if (onError) {
return onError(CH_ERROR_RECURSIONLIMIT, pattern->id, NULL,
userContext);
}
return CH_SUCCESS;
}
// All other errors not handled above are fatal.
return CH_FAIL_INTERNAL;
}
static
ch_error_t scanPcre(struct HybridContext *hyctx, UNUSED unsigned int length,
unsigned int offset, u32 id) {
const char *data = hyctx->data;
unsigned int full_length = hyctx->length;
ch_error_event_handler onError = hyctx->error_callback;
void *userContext = hyctx->context;
const struct ch_pattern *pattern = getPattern(hyctx->db, id);
const pcre *p = getPcre(pattern);
// Set up the PCRE extra block.
const pcre_extra *extra = &pattern->extra;
int startoffset = offset;
int *ovector = hyctx->scratch->ovector;
int ovectorSize = (hyctx->scratch->maxCaptureGroups + 1) * 3;
assert(ovectorSize >= 2);
DEBUG_PRINTF("scanning %u bytes, pattern %u, startoffset %d\n",
length, id, startoffset);
int options = 0;
if (pattern->flags & CHIMERA_PATTERN_FLAG_UTF8) {
// We do our own UTF-8 validation.
options |= PCRE_NO_UTF8_CHECK;
if (!isValidUTF8(hyctx, full_length)) {
return handlePcreNonMatch(pattern, PCRE_ERROR_BADUTF8, onError,
userContext);
}
}
int rv = pcre_exec(p, extra, data, full_length, startoffset, options,
ovector, ovectorSize);
DEBUG_PRINTF("pcre return code is %d\n", rv);
// Handle all non-match or error cases, all of which involve us
// terminating the loop.
if (rv < 0) {
return handlePcreNonMatch(pattern, rv, onError, userContext);
}
// We've found a match, and we should always have room for at least the
// start and end offsets in our ovector. Pass this info to the user.
assert(rv >= 1);
assert(rv < ovectorSize);
int from = ovector[0];
int to = ovector[1];
DEBUG_PRINTF("match %d -> %d\n", from, to);
struct ch_patterndata *pd = hyctx->scratch->patternData + id;
if (hyctx->db->flags & CHIMERA_FLAG_GROUPS) {
fillGroupsFromOvector(pd->match, rv, ovector);
} else {
rv = 0;
}
pd->groupCount = (u32)rv;
// Insert new matched item to the queue
pq_insert_with(hyctx->pq, from, to, id);
// Next scan starts at the first codepoint after the match. It's
// possible that we have a vacuous match, in which case we must step
// past it to ensure that we always progress.
if (from != to) {
startoffset = to;
} else if (pattern->flags & CHIMERA_PATTERN_FLAG_UTF8) {
startoffset = to + 1;
while (startoffset < (int)full_length &&
((data[startoffset] & 0xc0) == UTF_CONT_BYTE_HEADER)) {
++startoffset;
}
} else {
startoffset = to + 1;
}
pd->scanStart = startoffset;
DEBUG_PRINTF("new offset %u\n", pd->scanStart);
return CH_SUCCESS;
}
static
ch_error_t catchupPcre(struct HybridContext *hyctx, unsigned int id,
unsigned long long from, unsigned long long to) {
ch_match_event_handler onEvent = hyctx->match_callback;
void *userContext = hyctx->context;
DEBUG_PRINTF("priority queue size %u\n", hyctx->pq->size);
while (hyctx->pq->size) {
u32 num_item = hyctx->pq->size;
struct queue_item *item = pq_top(hyctx->pq->item);
size_t top_from = item->from;
size_t top_to = item->to;
u32 top_id = item->id;
if (top_to > to) {
pq_insert_with(hyctx->pq, from, to, id);
break;
}
pq_pop_nice(hyctx->pq);
const struct ch_pattern *pattern = getPattern(hyctx->db, top_id);
struct ch_patterndata *pd = hyctx->scratch->patternData + top_id;
// Report match for pattern
DEBUG_PRINTF("trigger match@%zu\n", top_to);
ch_callback_t cbrv =
onEvent(pattern->id, top_from, top_to, 0 /* flags */,
pd->groupCount, pd->match, userContext);
if (cbrv == CH_CALLBACK_TERMINATE) {
DEBUG_PRINTF("user callback told us to terminate scanning\n");
return CH_SCAN_TERMINATED;
} else if (cbrv == CH_CALLBACK_SKIP_PATTERN) {
DEBUG_PRINTF("user callback told us to skip this pattern\n");
pd->scanStart = hyctx->length;
}
if (top_id == id) {
break;
}
// Push a new match to replace the old one
unsigned int start = pd->scanStart;
unsigned int len = hyctx->length - pd->scanStart;
if (hyctx->length >= pd->scanStart &&
!(pattern->flags & CHIMERA_PATTERN_FLAG_SINGLEMATCH)) {
DEBUG_PRINTF("get a new match item\n");
int ret = scanPcre(hyctx, len, start, top_id);
if (ret == CH_CALLBACK_TERMINATE) {
DEBUG_PRINTF("user callback told us to terminate scanning\n");
return CH_SCAN_TERMINATED;
} else if (ret == CH_CALLBACK_SKIP_PATTERN) {
DEBUG_PRINTF("user callback told us to skip this pattern\n");
pd->scanStart = hyctx->length;
ret = CH_SUCCESS;
} else if (ret == CH_FAIL_INTERNAL) {
return ret;
}
// No further match is found
if (hyctx->pq->size == num_item - 1) {
pd->scanStart = hyctx->length;
}
}
}
return CH_SUCCESS;
}
/** \brief Callback used for internal Hyperscan multi-matcher. */
static
int HS_CDECL multiCallback(unsigned int id, unsigned long long from,
unsigned long long to, UNUSED unsigned int flags,
void *ctx) {
assert(ctx);
struct HybridContext *hyctx = ctx;
DEBUG_PRINTF("match for ID %u at offset %llu\n", id, to);
assert(id < hyctx->db->patternCount);
const struct ch_pattern *pattern = getPattern(hyctx->db, id);
struct ch_patterndata *pd = hyctx->scratch->patternData + id;
char needConfirm = pattern->fixedWidth == ~0U;
if (needConfirm &&
mmbit_isset(hyctx->scratch->active, hyctx->db->patternCount, id)) {
if ((hyctx->db->flags & CHIMERA_FLAG_ALL_CONFIRM) &&
mmbit_all(hyctx->scratch->active, hyctx->db->patternCount)) {
return 1;
}
return 0;
}
// Store the fact that we've seen this bit.
char already = mmbit_set(hyctx->scratch->active,
hyctx->db->patternCount, id);
DEBUG_PRINTF("match from %u to %llu\n", pd->scanStart, to);
if (!already) {
pd->scanStart = 0;
} else if (to < pd->scanStart + pattern->minWidth) {
return 0;
} else if (pattern->flags & CHIMERA_PATTERN_FLAG_SINGLEMATCH) {
if ((hyctx->db->flags & CHIMERA_FLAG_ALL_SINGLE) &&
mmbit_all(hyctx->scratch->active, hyctx->db->patternCount)) {
return 1;
}
// Note: we may have unordered match from Hyperscan,
// thus possibly get to < pd->scanStart.
return 0;
}
int ret = HS_SUCCESS;
unsigned int start = pd->scanStart;
unsigned int len = hyctx->length - pd->scanStart;
assert(hyctx->length >= pd->scanStart);
const char *data = hyctx->data;
if (needConfirm) {
DEBUG_PRINTF("run confirm for the first time\n");
ret = scanPcre(hyctx, len, start, id);
hyctx->scratch->ret = ret;
if (ret == CH_CALLBACK_TERMINATE) {
DEBUG_PRINTF("user callback told us to terminate scanning\n");
return HS_SCAN_TERMINATED;
} else if (ret == CH_CALLBACK_SKIP_PATTERN) {
DEBUG_PRINTF("user callback told us to skip this pattern\n");
pd->scanStart = hyctx->length;
ret = HS_SUCCESS;
} else if (ret == CH_FAIL_INTERNAL) {
return ret;
}
} else {
if (already) {
DEBUG_PRINTF("catch up with new matches\n");
ret = catchupPcre(hyctx, id, from, to);
hyctx->scratch->ret = ret;
if (pd->scanStart >= hyctx->length) {
return ret;
}
}
int startoffset = 0;
// Next scan starts at the first codepoint after the match. It's
// possible that we have a vacuous match, in which case we must step
// past it to ensure that we always progress.
if (from != to) {
startoffset = to;
} else if (pattern->flags & CHIMERA_PATTERN_FLAG_UTF8) {
startoffset = to + 1;
while (startoffset < (int)hyctx->length &&
((data[startoffset] & 0xc0) == UTF_CONT_BYTE_HEADER)) {
++startoffset;
}
} else {
startoffset = to + 1;
}
pd->scanStart = startoffset;
int rv = 0;
if (hyctx->db->flags & CHIMERA_FLAG_GROUPS) {
ch_capture_t *groups = pd->match;
groups->flags = CH_CAPTURE_FLAG_ACTIVE;
groups->from = from;
groups->to = to;
rv = 1;
}
pd->groupCount = (u32)rv;
pq_insert_with(hyctx->pq, from, to, id);
}
return ret;
}
static
hs_error_t scanHyperscan(struct HybridContext *hyctx, const char *data,
unsigned int length) {
DEBUG_PRINTF("scanning %u bytes with Hyperscan\n", length);
const struct ch_bytecode *hydb = hyctx->db;
const hs_database_t *db = getHyperscanDatabase(hydb);
hs_scratch_t *scratch = hyctx->scratch->multi_scratch;
hs_error_t err = hs_scan(db, data, length, 0, scratch, multiCallback,
hyctx);
return err;
}
/** \brief Init match priority queue.
*
* Add a first match offset for each pattern that is not supported by Hyperscan
* with prefiltering.
*/
static really_inline
ch_error_t initQueue(struct HybridContext *hyctx, struct match_pq *pq) {
const struct ch_bytecode *db = hyctx->db;
u8 *active = hyctx->scratch->active;
mmbit_clear(active, db->patternCount);
// Init match queue size
pq->size = 0;
unsigned int length = hyctx->length;
const u32 *unguarded = getUnguarded(db);
for (u32 i = 0; i < db->unguardedCount; i++) {
u32 patternId = unguarded[i];
DEBUG_PRINTF("switch on unguarded pcre %u\n", patternId);
mmbit_set(active, db->patternCount, patternId);
DEBUG_PRINTF("get a new match item\n");
int ret = scanPcre(hyctx, length, 0, patternId);
struct ch_patterndata *pd = hyctx->scratch->patternData + patternId;
if (ret == CH_CALLBACK_TERMINATE) {
DEBUG_PRINTF("user callback told us to terminate scanning\n");
return CH_SCAN_TERMINATED;
} else if (ret == CH_CALLBACK_SKIP_PATTERN) {
DEBUG_PRINTF("user callback told us to skip this pattern\n");
pd->scanStart = length;
ret = CH_SUCCESS;
} else if (ret == CH_FAIL_INTERNAL) {
return ret;
}
}
return CH_SUCCESS;
}
static really_inline
ch_error_t ch_scan_i(const ch_database_t *hydb,
const char *data, unsigned int length,
UNUSED unsigned int flags,
ch_scratch_t *scratch,
ch_match_event_handler onEvent,
ch_error_event_handler onError,
void *userContext) {
if (unlikely(!hydb || !scratch || !data)) {
DEBUG_PRINTF("args invalid\n");
return CH_INVALID;
}
ch_error_t ret = hydbIsValid(hydb);
if (ret != CH_SUCCESS) {
DEBUG_PRINTF("database invalid\n");
return ret;
}
if (!ISALIGNED_CL(scratch)) {
DEBUG_PRINTF("bad alignment %p\n", scratch);
return CH_INVALID;
}
if (scratch->magic != CH_SCRATCH_MAGIC) {
DEBUG_PRINTF("scratch invalid\n");
return CH_INVALID;
}
if (unlikely(markScratchInUse(scratch))) {
return CH_SCRATCH_IN_USE;
}
// Hyperscan underlying scratch and database validity will be checked by
// the hs_scan() call, so no need to do it here.
// PCRE takes the data region length in as an int, so this limits our block
// size to INT_MAX.
if (length > INT_MAX) {
DEBUG_PRINTF("length invalid\n");
unmarkScratchInUse(scratch);
return CH_INVALID;
}
const struct ch_bytecode *db = ch_get_bytecode(hydb);
scratch->pq.size = 0;
scratch->ret = CH_SUCCESS;
// Firstly, we run Hyperscan in block mode and add its matches into the
// active list for subsequent confirmation with pcre.
struct HybridContext hyctx = {
.data = data,
.length = length,
.valid_utf8_highwater = 0,
.db = db,
.scratch = scratch,
.pq = &scratch->pq,
.match_callback = onEvent ? onEvent : null_onEvent,
.error_callback = onError,
.context = userContext
};
// Init priority queue.
ret = initQueue(&hyctx, &scratch->pq);
if (ret != CH_SUCCESS) {
DEBUG_PRINTF("Chimera returned error %d\n", ret);
unmarkScratchInUse(scratch);
return ret;
}
if (!(db->flags & CHIMERA_FLAG_NO_MULTIMATCH)) {
ret = scanHyperscan(&hyctx, data, length);
if (ret != HS_SUCCESS && scratch->ret != CH_SUCCESS) {
DEBUG_PRINTF("Hyperscan returned error %d\n", scratch->ret);
unmarkScratchInUse(scratch);
return scratch->ret;
}
}
DEBUG_PRINTF("Flush priority queue\n");
// Catch up with PCRE and make up id and offsets as we don't really care
// about their values
ret = catchupPcre(&hyctx, ~0U, length, length);
if (ret != CH_SUCCESS) {
DEBUG_PRINTF("PCRE catch up returned error %d\n", ret);
unmarkScratchInUse(scratch);
return ret;
}
unmarkScratchInUse(scratch);
return CH_SUCCESS;
}
HS_PUBLIC_API
ch_error_t HS_CDECL ch_scan(const ch_database_t *hydb, const char *data,
unsigned int length, unsigned int flags,
ch_scratch_t *scratch,
ch_match_event_handler onEvent,
ch_error_event_handler onError, void *userContext) {
ch_error_t ret = ch_scan_i(hydb, data, length, flags, scratch, onEvent,
onError, userContext);
return ret;
}
HS_PUBLIC_API
const char * HS_CDECL ch_version(void) {
return HS_VERSION_STRING;
}

378
chimera/ch_runtime.h Normal file
View File

@ -0,0 +1,378 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CH_RUNTIME_H_
#define CH_RUNTIME_H_
#include <stdlib.h>
/**
* @file
* @brief The Chimera runtime API definition.
*
* Chimera is a hybrid of Hyperscan and PCRE regular expression engine.
*
* This header contains functions for using compiled Chimera databases for
* scanning data at runtime.
*/
#include "hs_common.h"
#ifdef __cplusplus
extern "C"
{
#endif
struct ch_scratch;
/**
* A Chimera scratch space.
*/
typedef struct ch_scratch ch_scratch_t;
/**
* Callback return value used to tell the Chimera matcher what to do after
* processing this match.
*/
typedef int ch_callback_t;
/**
* @defgroup CH_CALLBACK ch_callback_t values
*
* @{
*/
/**
* Continue matching.
*/
#define CH_CALLBACK_CONTINUE 0
/**
* Terminate matching.
*/
#define CH_CALLBACK_TERMINATE 1
/**
* Skip remaining matches for this ID and continue.
*/
#define CH_CALLBACK_SKIP_PATTERN 2
/** @} */
/**
* Type used to differentiate the errors raised with the @ref
* ch_error_event_handler callback.
*/
typedef int ch_error_event_t;
/**
* @defgroup CH_ERROR_EVENT ch_error_event_t values
*
* @{
*/
/**
* PCRE hits its match limit and reports PCRE_ERROR_MATCHLIMIT.
*/
#define CH_ERROR_MATCHLIMIT 1
/**
* PCRE hits its recursion limit and reports PCRE_ERROR_RECURSIONLIMIT.
*/
#define CH_ERROR_RECURSIONLIMIT 2
/** @} */
/**
* Structure representing a captured subexpression within a match. An array of
* these structures corresponding to capture groups in order is passed to the
* callback on match, with active structures identified by the
* CH_CAPTURE_FLAG_ACTIVE flag.
*/
typedef struct ch_capture {
/**
* The flags indicating if this structure is active.
*/
unsigned int flags;
/**
* offset at which this capture group begins.
*/
unsigned long long from; /*< offset at which this capture group begins. */
/**
* offset at which this capture group ends.
*/
unsigned long long to;
} ch_capture_t;
/**
* @defgroup CH_CAPTURE ch_capture_t flags
*
* These flags are used in @ref ch_capture_t::flags to indicate if this
* structure is active.
*
* @{
*/
/**
* Flag indicating that a particular capture group is inactive, used in @ref
* ch_capture_t::flags.
*/
#define CH_CAPTURE_FLAG_INACTIVE 0
/**
* Flag indicating that a particular capture group is active, used in @ref
* ch_capture_t::flags.
*/
#define CH_CAPTURE_FLAG_ACTIVE 1
/** @} */
/**
* Definition of the match event callback function type.
*
* A callback function matching the defined type must be provided by the
* application calling the @ref ch_scan()
*
* This callback function will be invoked whenever a match is located in the
* target data during the execution of a scan. The details of the match are
* passed in as parameters to the callback function, and the callback function
* should return a value indicating whether or not matching should continue on
* the target data. If no callbacks are desired from a scan call, NULL may be
* provided in order to suppress match production.
*
* @param id
* The ID number of the expression that matched. If the expression was a
* single expression compiled with @ref ch_compile(), this value will be
* zero.
*
* @param from
* The offset of the first byte that matches the expression.
*
* @param to
* The offset after the last byte that matches the expression.
*
* @param flags
* This is provided for future use and is unused at present.
*
* @param size
* The number of valid entries pointed to by the captured parameter.
*
* @param captured
* A pointer to an array of @ref ch_capture_t structures that
* contain the start and end offsets of entire pattern match and
* each captured subexpression.
*
* @param ctx
* The pointer supplied by the user to the @ref ch_scan() function.
*
* @return
* The callback can return @ref CH_CALLBACK_TERMINATE to stop matching.
* Otherwise, a return value of @ref CH_CALLBACK_CONTINUE will continue,
* with the current pattern if configured to produce multiple matches per
* pattern, while a return value of @ref CH_CALLBACK_SKIP_PATTERN will
* cease matching this pattern but continue matching the next pattern.
*/
typedef ch_callback_t (HS_CDECL *ch_match_event_handler)(unsigned int id,
unsigned long long from,
unsigned long long to,
unsigned int flags,
unsigned int size,
const ch_capture_t *captured,
void *ctx);
/**
* Definition of the Chimera error event callback function type.
*
* A callback function matching the defined type may be provided by the
* application calling the @ref ch_scan function. This callback function
* will be invoked when an error event occurs during matching; this indicates
* that some matches for a given expression may not be reported.
*
* @param error_type
* The type of error event that occurred. Currently these errors
* correspond to resource limits on PCRE backtracking
* @ref CH_ERROR_MATCHLIMIT and @ref CH_ERROR_RECURSIONLIMIT.
*
* @param id
* The ID number of the expression that matched.
*
* @param info
* Event-specific data, for future use. Currently unused.
*
* @param ctx
* The context pointer supplied by the user to the @ref ch_scan
* function.
*
* @return
* The callback can return @ref CH_CALLBACK_SKIP_PATTERN to cease matching
* this pattern but continue matching the next pattern. Otherwise, we stop
* matching for all patterns with @ref CH_CALLBACK_TERMINATE.
*/
typedef ch_callback_t (HS_CDECL *ch_error_event_handler)(
ch_error_event_t error_type,
unsigned int id, void *info,
void *ctx);
/**
* The block regular expression scanner.
*
* This is the function call in which the actual pattern matching takes place
* for block-mode pattern databases.
*
* @param db
* A compiled pattern database.
*
* @param data
* Pointer to the data to be scanned.
*
* @param length
* The number of bytes to scan.
*
* @param flags
* Flags modifying the behaviour of this function. This parameter is
* provided for future use and is unused at present.
*
* @param scratch
* A per-thread scratch space allocated by @ref ch_alloc_scratch() for this
* database.
*
* @param onEvent
* Pointer to a match event callback function. If a NULL pointer is given,
* no matches will be returned.
*
* @param onError
* Pointer to a error event callback function. If a NULL pointer is given,
* @ref CH_ERROR_MATCHLIMIT and @ref CH_ERROR_RECURSIONLIMIT errors will
* be ignored and match will continue.
*
* @param context
* The user defined pointer which will be passed to the callback function.
*
* @return
* Returns @ref CH_SUCCESS on success; @ref CH_SCAN_TERMINATED if the
* match callback indicated that scanning should stop; other values on
* error.
*/
ch_error_t HS_CDECL ch_scan(const ch_database_t *db, const char *data,
unsigned int length, unsigned int flags,
ch_scratch_t *scratch,
ch_match_event_handler onEvent,
ch_error_event_handler onError,
void *context);
/**
* Allocate a "scratch" space for use by Chimera.
*
* This is required for runtime use, and one scratch space per thread, or
* concurrent caller, is required. Any allocator callback set by @ref
* ch_set_scratch_allocator() or @ref ch_set_allocator() will be used by this
* function.
*
* @param db
* The database, as produced by @ref ch_compile().
*
* @param scratch
* On first allocation, a pointer to NULL should be provided so a new
* scratch can be allocated. If a scratch block has been previously
* allocated, then a pointer to it should be passed back in to see if it
* is valid for this database block. If a new scratch block is required,
* the original will be freed and the new one returned, otherwise the
* previous scratch block will be returned. On success, the scratch block
* will be suitable for use with the provided database in addition to any
* databases that original scratch space was suitable for.
*
* @return
* @ref CH_SUCCESS on successful allocation; @ref CH_NOMEM if the
* allocation fails. Other errors may be returned if invalid parameters
* are specified.
*/
ch_error_t HS_CDECL ch_alloc_scratch(const ch_database_t *db,
ch_scratch_t **scratch);
/**
* Allocate a scratch space that is a clone of an existing scratch space.
*
* This is useful when multiple concurrent threads will be using the same set
* of compiled databases, and another scratch space is required. Any allocator
* callback set by @ref ch_set_scratch_allocator() or @ref ch_set_allocator()
* will be used by this function.
*
* @param src
* The existing @ref ch_scratch_t to be cloned.
*
* @param dest
* A pointer to the new scratch space will be returned here.
*
* @return
* @ref CH_SUCCESS on success; @ref CH_NOMEM if the allocation fails.
* Other errors may be returned if invalid parameters are specified.
*/
ch_error_t HS_CDECL ch_clone_scratch(const ch_scratch_t *src,
ch_scratch_t **dest);
/**
* Provides the size of the given scratch space.
*
* @param scratch
* A per-thread scratch space allocated by @ref ch_alloc_scratch() or @ref
* ch_clone_scratch().
*
* @param scratch_size
* On success, the size of the scratch space in bytes is placed in this
* parameter.
*
* @return
* @ref CH_SUCCESS on success, other values on failure.
*/
ch_error_t HS_CDECL ch_scratch_size(const ch_scratch_t *scratch,
size_t *scratch_size);
/**
* Free a scratch block previously allocated by @ref ch_alloc_scratch() or @ref
* ch_clone_scratch().
*
* The free callback set by @ref ch_set_scratch_allocator() or @ref
* ch_set_allocator() will be used by this function.
*
* @param scratch
* The scratch block to be freed. NULL may also be safely provided.
*
* @return
* @ref CH_SUCCESS on success, other values on failure.
*/
ch_error_t HS_CDECL ch_free_scratch(ch_scratch_t *scratch);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* CH_RUNTIME_H_ */

317
chimera/ch_scratch.c Normal file
View File

@ -0,0 +1,317 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Chimera: scratch space alloc.
*/
#include <string.h>
#include "allocator.h"
#include "ch.h"
#include "hs.h"
#include "hs_internal.h"
#include "ue2common.h"
#include "ch_alloc.h"
#include "ch_internal.h"
#include "ch_scratch.h"
#include "ch_database.h"
static
size_t getPatternDataSize(const ch_scratch_t *s) {
size_t numCapturingStructs =
s->patternCount * (s->maxCaptureGroups + 1);
return (sizeof(struct ch_patterndata) * s->patternCount) +
alignof(struct ch_capture) + // padding
(sizeof(struct ch_capture) * numCapturingStructs);
}
static
void initPatternData(const ch_scratch_t *s) {
// ch_capture array is aligned, directly after the patterndata array.
char *ptr = (char *)s->patternData +
(sizeof(struct ch_patterndata) * s->patternCount);
struct ch_capture *cap = (struct ch_capture *)
(ROUNDUP_PTR(ptr, alignof(struct ch_capture)));
for (u32 i = 0; i < s->patternCount; i++) {
struct ch_patterndata *pd = &s->patternData[i];
pd->match = cap;
DEBUG_PRINTF("pattern %u: pd=%p, match=%p\n", i, pd, pd->match);
cap += (s->maxCaptureGroups + 1);
}
}
static
ch_error_t alloc_scratch(const ch_scratch_t *proto, ch_scratch_t **scratch) {
size_t ovectorSize = (proto->maxCaptureGroups + 1) * sizeof(int) * 3;
size_t capturedSize =
sizeof(struct ch_capture) * (proto->maxCaptureGroups + 1);
size_t patternDataSize = getPatternDataSize(proto);
size_t activeSize = proto->activeSize;
size_t queueSize = proto->patternCount * sizeof(struct queue_item);
// max padding for alignment below.
size_t padding = alignof(int) + alignof(struct ch_capture) +
alignof(struct ch_patterndata) +
alignof(struct queue_item);
size_t allocSize = sizeof(ch_scratch_t) + ovectorSize + capturedSize +
patternDataSize + activeSize + queueSize + padding
+ 256; /* padding for cacheline alignment */
ch_scratch_t *s;
ch_scratch_t *s_tmp = ch_scratch_alloc(allocSize);
ch_error_t err = ch_check_alloc(s_tmp);
if (err != CH_SUCCESS) {
ch_scratch_free(s_tmp);
*scratch = NULL;
return err;
}
memset(s_tmp, 0, allocSize);
s = ROUNDUP_PTR(s_tmp, 64);
// Set ordinary members.
*s = *proto;
s->magic = CH_SCRATCH_MAGIC;
s->in_use = 0;
s->scratch_alloc = (char *)s_tmp;
// Set pointers internal to allocation.
char *ptr = (char *)s + sizeof(*s);
ptr = ROUNDUP_PTR(ptr, alignof(int));
s->ovector = (int *)ptr;
ptr += ovectorSize;
ptr = ROUNDUP_PTR(ptr, alignof(struct ch_capture));
s->captured = (struct ch_capture *)ptr;
ptr += capturedSize;
ptr = ROUNDUP_PTR(ptr, alignof(struct ch_patterndata));
s->patternData = (struct ch_patterndata *)ptr;
ptr += patternDataSize;
// Pre-fill pattern data, setting captureOffsets
initPatternData(s);
ptr = ROUNDUP_PTR(ptr, alignof(struct queue_item));
s->pq.item = (struct queue_item *)ptr;
ptr += queueSize;
s->active = (u8 *)ptr;
// Store size.
s->scratchSize = allocSize;
// We should never overrun our allocation.
assert((ptr + activeSize) - (char *)s <= (ptrdiff_t)allocSize);
*scratch = s;
return CH_SUCCESS;
}
HS_PUBLIC_API
ch_error_t HS_CDECL ch_alloc_scratch(const ch_database_t *hydb,
ch_scratch_t **scratch) {
if (!hydb || !scratch) {
DEBUG_PRINTF("invalid args\n");
return CH_INVALID;
}
DEBUG_PRINTF("hydb=%p, &scratch=%p\n", hydb, scratch);
ch_error_t rv = hydbIsValid(hydb);
if (rv != CH_SUCCESS) {
DEBUG_PRINTF("invalid database\n");
return rv;
}
if (*scratch != NULL) {
/* has to be aligned before we can do anything with it */
if (!ISALIGNED_CL(*scratch)) {
return CH_INVALID;
}
if ((*scratch)->magic != CH_SCRATCH_MAGIC) {
return CH_INVALID;
}
if (markScratchInUse(*scratch)) {
return CH_SCRATCH_IN_USE;
}
}
// We allocate a prototype of the scratch header to do our sizing with.
ch_scratch_t *proto;
ch_scratch_t *proto_tmp = ch_scratch_alloc(sizeof(ch_scratch_t) + 256);
ch_error_t proto_ret = ch_check_alloc(proto_tmp);
if (proto_ret != CH_SUCCESS) {
ch_scratch_free(proto_tmp);
ch_scratch_free(*scratch);
*scratch = NULL;
return proto_ret;
}
proto = ROUNDUP_PTR(proto_tmp, 64);
int resize = 0;
if (*scratch) {
*proto = **scratch;
} else {
memset(proto, 0, sizeof(*proto));
resize = 1;
}
proto->scratch_alloc = (char *)proto_tmp;
const struct ch_bytecode *db = ch_get_bytecode(hydb);
if (db->maxCaptureGroups > proto->maxCaptureGroups) {
proto->maxCaptureGroups = db->maxCaptureGroups;
resize = 1;
}
if (db->patternCount > proto->patternCount) {
proto->patternCount = db->patternCount;
proto->activeSize = db->activeSize;
resize = 1;
}
if (resize) {
if (*scratch) {
ch_scratch_free((*scratch)->scratch_alloc);
}
ch_error_t alloc_ret = alloc_scratch(proto, scratch);
ch_scratch_free(proto_tmp);
if (alloc_ret != CH_SUCCESS) {
*scratch = NULL;
return alloc_ret;
}
} else {
ch_scratch_free(proto_tmp);
unmarkScratchInUse(*scratch);
}
if (db->flags & CHIMERA_FLAG_NO_MULTIMATCH) {
(*scratch)->multi_scratch = NULL;
return CH_SUCCESS;
}
// We may still have to realloc the underlying Hyperscan scratch.
rv = hs_alloc_scratch(getHyperscanDatabase(db),
&(*scratch)->multi_scratch);
if (rv != HS_SUCCESS) {
DEBUG_PRINTF("hs_alloc_scratch for multi_scratch failed\n");
hs_free_scratch((*scratch)->multi_scratch);
ch_scratch_free((*scratch)->scratch_alloc);
*scratch = NULL;
return rv;
}
return CH_SUCCESS;
}
HS_PUBLIC_API
ch_error_t HS_CDECL ch_clone_scratch(const ch_scratch_t *src,
ch_scratch_t **dest) {
if (!dest || !src || !ISALIGNED_CL(src) ||
src->magic != CH_SCRATCH_MAGIC) {
DEBUG_PRINTF("scratch invalid\n");
return CH_INVALID;
}
ch_error_t ret = alloc_scratch(src, dest);
if (ret != CH_SUCCESS) {
DEBUG_PRINTF("alloc_scratch failed\n");
*dest = NULL;
return ret;
}
if (src->multi_scratch) {
(*dest)->multi_scratch = NULL;
ret = hs_clone_scratch(src->multi_scratch, &(*dest)->multi_scratch);
if (ret != HS_SUCCESS) {
DEBUG_PRINTF("hs_clone_scratch(multi_scratch,...) failed\n");
ch_scratch_free(*dest);
return ret;
}
}
return CH_SUCCESS;
}
HS_PUBLIC_API
ch_error_t HS_CDECL ch_free_scratch(ch_scratch_t *scratch) {
ch_error_t ret = CH_SUCCESS;
if (scratch) {
/* has to be aligned before we can do anything with it */
if (!ISALIGNED_CL(scratch)) {
return CH_INVALID;
}
if (scratch->magic != CH_SCRATCH_MAGIC) {
return CH_INVALID;
}
if (markScratchInUse(scratch)) {
return CH_SCRATCH_IN_USE;
}
if (scratch->multi_scratch) {
ret = hs_free_scratch(scratch->multi_scratch);
}
scratch->magic = 0;
assert(scratch->scratch_alloc);
DEBUG_PRINTF("scratch %p is really at %p : freeing\n", scratch,
scratch->scratch_alloc);
ch_scratch_free(scratch->scratch_alloc);
}
return ret;
}
/** Not public, but used for info from our internal tools. Note that in the
* hybrid matcher the scratch is definitely not a contiguous memory region. */
HS_PUBLIC_API
ch_error_t HS_CDECL ch_scratch_size(const ch_scratch_t *scratch, size_t *size) {
ch_error_t ret = CH_SUCCESS;
if (!size || !scratch || !ISALIGNED_CL(scratch) ||
scratch->magic != CH_SCRATCH_MAGIC) {
return CH_INVALID;
} else {
size_t multi_size = 0;
if (scratch->multi_scratch) {
ret = hs_scratch_size(scratch->multi_scratch, &multi_size);
}
if (ret) {
multi_size = 0;
}
*size = scratch->scratchSize + multi_size;
}
return ret;
}

119
chimera/ch_scratch.h Normal file
View File

@ -0,0 +1,119 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Scratch and associated data structures.
*
* This header gets pulled into many places (many deep, slow to compile
* places). Try to keep the included headers under control.
*/
#ifndef CH_SCRATCH_H_
#define CH_SCRATCH_H_
#include "ch_common.h"
#include "ch_runtime.h"
#ifdef __cplusplus
extern "C"
{
#endif
#define CH_SCRATCH_MAGIC 0x554F4259 //!< Magic number stored in \ref ch_scratch
struct queue_item {
int from; /** \brief used to store the start location. */
int to; /** \brief used to store the current location. */
u32 id; /**< pattern index. */
};
struct match_pq {
struct queue_item *item;
u32 size; /**< current size of the priority queue */
};
/** \brief Information about a pattern stored at runtime when a match is
* encountered. */
struct ch_patterndata {
struct ch_capture *match; //!< buffered group info
u32 groupCount; //!< number of capturing groups
u32 scanStart; //!< start of match window (still to be single-scanned).
};
/** \brief Scratch space header for Chimera. */
struct ch_scratch {
u32 magic; //!< must be \ref CH_SCRATCH_MAGIC
u8 in_use; /**< non-zero when being used by an API call. */
struct hs_scratch *multi_scratch; //!< for hyperscan scatch.
int *ovector; //!< maximally-sized ovector for PCRE usage.
struct ch_capture *captured; //!< max-sized capture group struct.
u8 *active; //!< active multibit.
struct ch_patterndata *patternData; //!< per-pattern match data, indexed by
// pattern ID.
struct match_pq pq; //!< priority queue to ensure matching ordering
u32 patternCount; //!< number of patterns, used to size active multibit
u32 activeSize; //!< size of active multibit
u32 maxCaptureGroups; //!< largest num of capturing groups required
u32 scratchSize; //!< size of allocation
int ret; //!< return value in Hyperscan callback
char *scratch_alloc; /* user allocated scratch object */
};
/**
* \brief Mark scratch as in use.
*
* Returns non-zero if it was already in use, zero otherwise.
*/
static really_inline
char markScratchInUse(struct ch_scratch *scratch) {
DEBUG_PRINTF("marking scratch as in use\n");
assert(scratch && scratch->magic == CH_SCRATCH_MAGIC);
if (scratch->in_use) {
DEBUG_PRINTF("scratch already in use!\n");
return 1;
}
scratch->in_use = 1;
return 0;
}
/**
* \brief Mark scratch as no longer in use.
*/
static really_inline
void unmarkScratchInUse(struct ch_scratch *scratch) {
DEBUG_PRINTF("marking scratch as not in use\n");
assert(scratch && scratch->magic == CH_SCRATCH_MAGIC);
assert(scratch->in_use == 1);
scratch->in_use = 0;
}
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* CH_SCRATCH_H_ */

12
chimera/libch.pc.in Normal file
View File

@ -0,0 +1,12 @@
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=@CMAKE_INSTALL_PREFIX@
libdir=@CMAKE_INSTALL_PREFIX@/lib
includedir=@CMAKE_INSTALL_PREFIX@/include
Name: libch
Description: Intel(R) Chimera Library
Version: @HS_VERSION@
Requires.private: libhs
Libs: -L${libdir} -lchimera
Libs.private: @PRIVATE_LIBS@
Cflags: -I${includedir}/hs

View File

@ -54,11 +54,10 @@ else ()
find_package(PkgConfig) find_package(PkgConfig)
pkg_check_modules(PCRE libpcre=${PCRE_REQUIRED_VERSION}) pkg_check_modules(PCRE libpcre=${PCRE_REQUIRED_VERSION})
if (PCRE_FOUND) if (PCRE_FOUND)
set(CORRECT_PCRE_VERSION TRUE)
message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION}") message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION}")
else () else ()
message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION} not found") message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION} not found")
return () return ()
endif () endif ()
endif (PCRE_BUILD_SOURCE) endif (PCRE_BUILD_SOURCE)
set (PCRE_CHECKED TRUE PARENT_SCOPE)

View File

@ -0,0 +1,333 @@
.. _chimera:
#######
Chimera
#######
This section describes Chimera library.
************
Introduction
************
Chimera is a software regular expression matching engine that is a hybrid of
Hyperscan and PCRE. The design goals of Chimera are to fully support PCRE
syntax as well as to take advantage of the high performance nature of Hyperscan.
Chimera inherits the design guideline of Hyperscan with C APIs for compilation
and scanning.
The Chimera API itself is composed of two major components:
===========
Compilation
===========
These functions take a group of regular expressions, along with identifiers and
option flags, and compile them into an immutable database that can be used by
the Chimera scanning API. This compilation process performs considerable
analysis and optimization work in order to build a database that will match
the given expressions efficiently.
See :ref:`chcompile` for more details
========
Scanning
========
Once a Chimera database has been created, it can be used to scan data in memory.
Chimera only supports block mode in which we scan a single contiguous block in
memory.
Matches are delivered to the application via a user-supplied callback function
that is called synchronously for each match.
For a given database, Chimera provides several guarantees:
* No memory allocations occur at runtime with the exception of scratch space
allocation, it should be done ahead of time for performance-critical
applications:
- **Scratch space**: temporary memory used for internal data at scan time.
Structures in scratch space do not persist beyond the end of a single scan
call.
* The size of the scratch space required for a given database is fixed and
determined at database compile time. This means that the memory requirement
of the application are known ahead of time, and the scratch space can be
pre-allocated if required for performance reasons.
* Any pattern that has successfully been compiled by the Chimera compiler can
be scanned against any input. There could be internal resource limits or
other limitations caused by PCRE at runtime that could cause a scan call to
return an error.
.. note:: Chimera is designed to have the same matching behavior as PCRE,
including greedy/ungreedy, capturing, etc. Chimera reports both
**start offset** and **end offset** for each match like PCRE. Different
from the fashion of reporting all matches in Hyperscan, Chimera only reports
non-overlapping matches. For example, the pattern :regexp:`/foofoo/` will
match ``foofoofoofoo`` at offsets (0, 6) and (6, 12).
.. note:: Since Chimera is a hybrid of Hyperscan and PCRE in order to support
full PCRE syntax, there will be extra performance overhead compared to
Hyperscan-only solution. Please always use Hyperscan for better performance
unless you must need full PCRE syntax support.
See :ref:`chruntime` for more details
************
Requirements
************
The PCRE library (http://pcre.org/) version 8.41 is required for Chimera.
.. note:: Since Chimera needs to reference PCRE internal function, please place PCRE source
directory under Hyperscan root directory in order to build Chimera.
Beside this, both hardware and software requirements of Chimera are the same to Hyperscan.
See :ref:`hardware` and :ref:`software` for more details.
.. note:: Building Hyperscan will automatically generate Chimera library.
Currently only static library is supported for Chimera, so please
use static build type when configure CMake build options.
.. _chcompile:
******************
Compiling Patterns
******************
===================
Building a Database
===================
The Chimera compiler API accepts regular expressions and converts them into a
compiled pattern database that can then be used to scan data.
The API provides two functions that compile regular expressions into
databases:
#. :c:func:`ch_compile`: compiles a single expression into a pattern database.
#. :c:func:`ch_compile_multi`: compiles an array of expressions into a pattern
database. All of the supplied patterns will be scanned for concurrently at
scan time, with user-supplied identifiers returned when they match.
#. :c:func:`ch_compile_ext_multi`: compiles an array of expressions as above,
but allows PCRE match limits to be specified for each expression.
Compilation allows the Chimera library to analyze the given pattern(s) and
pre-determine how to scan for these patterns in an optimized fashion using
Hyperscan and PCRE.
===============
Pattern Support
===============
Chimera fully supports the pattern syntax used by the PCRE library ("libpcre"),
described at <http://www.pcre.org/>.The version of PCRE used to validate
Chimera's interpretation of this syntax is 8.41.
=========
Semantics
=========
Chimera supports the exact same semantics of PCRE library. Moreover, it supports
multiple simultaneous pattern matching like Hyperscan and the multiple matches
will be reported in order by end offset.
.. _chruntime:
*********************
Scanning for Patterns
*********************
Chimera provides scan function with ``ch_scan``.
================
Handling Matches
================
``ch_scan`` will call a user-supplied callback function when a match
is found. This function has the following signature:
.. doxygentypedef:: ch_match_event_handler
:outline:
:no-link:
The *id* argument will be set to the identifier for the matching expression
provided at compile time, and the *from* argument will be set to the
start-offset of the match the *to* argument will be set to the end-offset
of the match. The *captured* stores offsets of entire pattern match as well as
captured subexpressions. The *size* will be set to the number of valid entries in
the *captured*.
The match callback function has the capability to continue or halt scanning
by returning different values.
See :c:type:`ch_match_event_handler` for more information.
=======================
Handling Runtime Errors
=======================
``ch_scan`` will call a user-supplied callback function when a runtime error
occurs in libpcre. This function has the following signature:
.. doxygentypedef:: ch_error_event_handler
:outline:
:no-link:
The *id* argument will be set to the identifier for the matching expression
provided at compile time.
The match callback function has the capability to either halt scanning or
continue scanning for the next pattern.
See :c:type:`ch_error_event_handler` for more information.
=============
Scratch Space
=============
While scanning data, Chimera needs a small amount of temporary memory to store
on-the-fly internal data. This amount is unfortunately too large to fit on the
stack, particularly for embedded applications, and allocating memory dynamically
is too expensive, so a pre-allocated "scratch" space must be provided to the
scanning functions.
The function :c:func:`ch_alloc_scratch` allocates a large enough region of
scratch space to support a given database. If the application uses multiple
databases, only a single scratch region is necessary: in this case, calling
:c:func:`ch_alloc_scratch` on each database (with the same ``scratch`` pointer)
will ensure that the scratch space is large enough to support scanning against
any of the given databases.
While the Chimera library is re-entrant, the use of scratch spaces is not.
For example, if by design it is deemed necessary to run recursive or nested
scanning (say, from the match callback function), then an additional scratch
space is required for that context.
In the absence of recursive scanning, only one such space is required per thread
and can (and indeed should) be allocated before data scanning is to commence.
In a scenario where a set of expressions are compiled by a single "master"
thread and data will be scanned by multiple "worker" threads, the convenience
function :c:func:`ch_clone_scratch` allows multiple copies of an existing
scratch space to be made for each thread (rather than forcing the caller to pass
all the compiled databases through :c:func:`ch_alloc_scratch` multiple times).
For example:
.. code-block:: c
ch_error_t err;
ch_scratch_t *scratch_prototype = NULL;
err = ch_alloc_scratch(db, &scratch_prototype);
if (err != CH_SUCCESS) {
printf("ch_alloc_scratch failed!");
exit(1);
}
ch_scratch_t *scratch_thread1 = NULL;
ch_scratch_t *scratch_thread2 = NULL;
err = ch_clone_scratch(scratch_prototype, &scratch_thread1);
if (err != CH_SUCCESS) {
printf("ch_clone_scratch failed!");
exit(1);
}
err = ch_clone_scratch(scratch_prototype, &scratch_thread2);
if (err != CH_SUCCESS) {
printf("ch_clone_scratch failed!");
exit(1);
}
ch_free_scratch(scratch_prototype);
/* Now two threads can both scan against database db,
each with its own scratch space. */
=================
Custom Allocators
=================
By default, structures used by Chimera at runtime (scratch space, etc) are
allocated with the default system allocators, usually
``malloc()`` and ``free()``.
The Chimera API provides a facility for changing this behaviour to support
applications that use custom memory allocators.
These functions are:
- :c:func:`ch_set_database_allocator`, which sets the allocate and free functions
used for compiled pattern databases.
- :c:func:`ch_set_scratch_allocator`, which sets the allocate and free
functions used for scratch space.
- :c:func:`ch_set_misc_allocator`, which sets the allocate and free functions
used for miscellaneous data, such as compile error structures and
informational strings.
The :c:func:`ch_set_allocator` function can be used to set all of the custom
allocators to the same allocate/free pair.
************************
API Reference: Constants
************************
===========
Error Codes
===========
.. doxygengroup:: CH_ERROR
:content-only:
:no-link:
=============
Pattern flags
=============
.. doxygengroup:: CH_PATTERN_FLAG
:content-only:
:no-link:
==================
Compile mode flags
==================
.. doxygengroup:: CH_MODE_FLAG
:content-only:
:no-link:
********************
API Reference: Files
********************
==========
File: ch.h
==========
.. doxygenfile:: ch.h
=================
File: ch_common.h
=================
.. doxygenfile:: ch_common.h
==================
File: ch_compile.h
==================
.. doxygenfile:: ch_compile.h
==================
File: ch_runtime.h
==================
.. doxygenfile:: ch_runtime.h

View File

@ -471,3 +471,93 @@ matching support. Here they are, in a nutshell:
Approximate matching is always disabled by default, and can be enabled on a Approximate matching is always disabled by default, and can be enabled on a
per-pattern basis by using an extended parameter described in :ref:`extparam`. per-pattern basis by using an extended parameter described in :ref:`extparam`.
.. _logical_combinations:
********************
Logical Combinations
********************
For situations when a user requires behaviour that depends on the presence or
absence of matches from groups of patterns, Hyperscan provides support for the
logical combination of patterns in a given pattern set, with three operators:
``NOT``, ``AND`` and ``OR``.
The logical value of such a combination is based on each expression's matching
status at a given offset. The matching status of any expression has a boolean
value: *false* if the expression has not yet matched or *true* if the expression
has already matched. In particular, the value of a ``NOT`` operation at a given
offset is *true* if the expression it refers to is *false* at this offset.
For example, ``NOT 101`` means that expression 101 has not yet matched at this
offset.
A logical combination is passed to Hyperscan at compile time as an expression.
This combination expression will raise matches at every offset where one of its
sub-expressions matches and the logical value of the whole expression is *true*.
To illustrate, here is an example combination expression: ::
((301 OR 302) AND 303) AND (304 OR NOT 305)
If expression 301 matches at offset 10, the logical value of 301 is *true*
while the other patterns' values are *false*. Hence, the whole combination's value is
*false*.
Then expression 303 matches at offset 20. Now the values of 301 and 303 are
*true* while the other patterns' values are still *false*. In this case, the
combination's value is *true*, so the combination expression raises a match at
offset 20.
Finally, expression 305 has matches at offset 30. Now the values of 301, 303 and 305
are *true* while the other patterns' values are still *false*. In this case, the
combination's value is *false* and no match is raised.
**Using Logical Combinations**
In logical combination syntax, an expression is written as infix notation, it
consists of operands, operators and parentheses. The operands are expression
IDs, and operators are ``!`` (NOT), ``&`` (AND) or ``|`` (OR). For example, the
combination described in the previous section would be written as: ::
((301 | 302) & 303) & (304 | !305)
In a logical combination expression:
* The priority of operators are ``!`` > ``&`` > ``|``. For example:
- ``A&B|C`` is treated as ``(A&B)|C``,
- ``A|B&C`` is treated as ``A|(B&C)``,
- ``A&!B`` is treated as ``A&(!B)``.
* Extra parentheses are allowed. For example:
- ``(A)&!(B)`` is the same as ``A&!B``,
- ``(A&B)|C`` is the same as ``A&B|C``.
* Whitespace is ignored.
To use a logical combination expression, it must be passed to one of the
Hyperscan compile functions (:c:func:`hs_compile_multi`,
:c:func:`hs_compile_ext_multi`) along with the :c:member:`HS_FLAG_COMBINATION` flag,
which identifies the pattern as a logical combination expression. The patterns
referred to in the logical combination expression must be compiled together in
the same pattern set as the combination expression.
When an expression has the :c:member:`HS_FLAG_COMBINATION` flag set, it ignores
all other flags except the :c:member:`HS_FLAG_SINGLEMATCH` flag and the
:c:member:`HS_FLAG_QUIET` flag.
Hyperscan will reject logical combination expressions at compile time that
evaluate to *true* when no patterns have matched; for example: ::
!101
!101|102
!101&!102
!(101&102)
Patterns that are referred to as operands within a logical combination (for
example, 301 through 305 in the examples above) may also use the
:c:member:`HS_FLAG_QUIET` flag to silence the reporting of individual matches
for those patterns. In the absence of this flag, all matches (for
both individual patterns and their logical combinations) will be reported.
When an expression has both the :c:member:`HS_FLAG_COMBINATION` flag and the
:c:member:`HS_FLAG_QUIET` flag set, no matches for this logical combination
will be reported.

View File

@ -27,10 +27,10 @@ Very Quick Start
Known working generators: Known working generators:
* ``Unix Makefiles`` --- make-compatible makefiles (default on Linux/FreeBSD/Mac OS X) * ``Unix Makefiles`` --- make-compatible makefiles (default on Linux/FreeBSD/Mac OS X)
* ``Ninja`` --- `Ninja <http://martine.github.io/ninja/>`_ build files. * ``Ninja`` --- `Ninja <http://martine.github.io/ninja/>`_ build files.
* ``Visual Studio 15 2017`` --- Visual Studio projects
Generators that might work include: Generators that might work include:
* ``Xcode`` --- OS X Xcode projects. * ``Xcode`` --- OS X Xcode projects.
* ``Visual Studio`` --- Visual Studio projects - very experimental
#. Build Hyperscan #. Build Hyperscan
@ -38,6 +38,7 @@ Very Quick Start
* ``cmake --build .`` --- will build everything * ``cmake --build .`` --- will build everything
* ``make -j<jobs>`` --- use makefiles in parallel * ``make -j<jobs>`` --- use makefiles in parallel
* ``ninja`` --- use Ninja build * ``ninja`` --- use Ninja build
* ``MsBuild.exe`` --- use Visual Studio MsBuild
* etc. * etc.
#. Check Hyperscan #. Check Hyperscan
@ -49,6 +50,8 @@ Very Quick Start
Requirements Requirements
************ ************
.. _hardware:
Hardware Hardware
======== ========
@ -84,6 +87,7 @@ compiler support. The supported compilers are:
* GCC, v4.8.1 or higher * GCC, v4.8.1 or higher
* Clang, v3.4 or higher (with libstdc++ or libc++) * Clang, v3.4 or higher (with libstdc++ or libc++)
* Intel C++ Compiler v15 or higher * Intel C++ Compiler v15 or higher
* Visual C++ 2017 Build Tools
Examples of operating systems that Hyperscan is known to work on include: Examples of operating systems that Hyperscan is known to work on include:
@ -96,13 +100,17 @@ FreeBSD:
* 10.0 or newer * 10.0 or newer
Windows:
* 8 or newer
Mac OS X: Mac OS X:
* 10.8 or newer, using XCode/Clang * 10.8 or newer, using XCode/Clang
Hyperscan *may* compile and run on other platforms, but there is no guarantee. Hyperscan *may* compile and run on other platforms, but there is no guarantee.
We currently have experimental support for Windows using Intel C++ Compiler We currently have experimental support for Windows using Intel C++ Compiler
or Visual Studio 2015. or Visual Studio 2017.
In addition, the following software is required for compiling the Hyperscan library: In addition, the following software is required for compiling the Hyperscan library:
@ -118,7 +126,8 @@ Dependency Version Notes
Most of these dependencies can be provided by the package manager on the build Most of these dependencies can be provided by the package manager on the build
system (e.g. Debian/Ubuntu/RedHat packages, FreeBSD ports, etc). However, system (e.g. Debian/Ubuntu/RedHat packages, FreeBSD ports, etc). However,
ensure that the correct version is present. ensure that the correct version is present. As for Windows, in order to have
Ragel, you may use Cygwin to build it from source.
Boost Headers Boost Headers
------------- -------------

View File

@ -758,7 +758,7 @@ WARN_LOGFILE =
# spaces. # spaces.
# Note: If this tag is empty the current directory is searched. # Note: If this tag is empty the current directory is searched.
INPUT = @CMAKE_SOURCE_DIR@/src/hs.h @CMAKE_SOURCE_DIR@/src/hs_common.h @CMAKE_SOURCE_DIR@/src/hs_compile.h @CMAKE_SOURCE_DIR@/src/hs_runtime.h INPUT = @CMAKE_SOURCE_DIR@/src/hs.h @CMAKE_SOURCE_DIR@/src/hs_common.h @CMAKE_SOURCE_DIR@/src/hs_compile.h @CMAKE_SOURCE_DIR@/src/hs_runtime.h @CMAKE_SOURCE_DIR@/chimera/ch.h @CMAKE_SOURCE_DIR@/chimera/ch_common.h @CMAKE_SOURCE_DIR@/chimera/ch_compile.h @CMAKE_SOURCE_DIR@/chimera/ch_runtime.h
# This tag can be used to specify the character encoding of the source files # This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses

View File

@ -20,3 +20,4 @@ Hyperscan |version| Developer's Reference Guide
tools tools
api_constants api_constants
api_files api_files
chimera

View File

@ -246,6 +246,8 @@ Character API Flag Description
``W`` :c:member:`HS_FLAG_UCP` Unicode property support ``W`` :c:member:`HS_FLAG_UCP` Unicode property support
``P`` :c:member:`HS_FLAG_PREFILTER` Prefiltering mode ``P`` :c:member:`HS_FLAG_PREFILTER` Prefiltering mode
``L`` :c:member:`HS_FLAG_SOM_LEFTMOST` Leftmost start of match reporting ``L`` :c:member:`HS_FLAG_SOM_LEFTMOST` Leftmost start of match reporting
``C`` :c:member:`HS_FLAG_COMBINATION` Logical combination of patterns
``Q`` :c:member:`HS_FLAG_QUIET` Quiet at matching
========= ================================= =========== ========= ================================= ===========
In addition to the set of flags above, :ref:`extparam` can be supplied In addition to the set of flags above, :ref:`extparam` can be supplied

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -45,6 +45,7 @@
#include "parser/buildstate.h" #include "parser/buildstate.h"
#include "parser/dump.h" #include "parser/dump.h"
#include "parser/Component.h" #include "parser/Component.h"
#include "parser/logical_combination.h"
#include "parser/parse_error.h" #include "parser/parse_error.h"
#include "parser/Parser.h" // for flags #include "parser/Parser.h" // for flags
#include "parser/position.h" #include "parser/position.h"
@ -111,14 +112,21 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
const hs_expr_ext *ext) const hs_expr_ext *ext)
: expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH, : expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH,
false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET, false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET,
0, 0, 0) { 0, 0, 0, flags & HS_FLAG_QUIET) {
// We disallow SOM + Quiet.
if ((flags & HS_FLAG_QUIET) && (flags & HS_FLAG_SOM_LEFTMOST)) {
throw CompileError("HS_FLAG_QUIET is not supported in "
"combination with HS_FLAG_SOM_LEFTMOST.");
}
flags &= ~HS_FLAG_QUIET;
ParseMode mode(flags); ParseMode mode(flags);
component = parse(expression, mode); component = parse(expression, mode);
expr.utf8 = mode.utf8; /* utf8 may be set by parse() */ expr.utf8 = mode.utf8; /* utf8 may be set by parse() */
if (expr.utf8 && !isValidUtf8(expression)) { const size_t len = strlen(expression);
if (expr.utf8 && !isValidUtf8(expression, len)) {
throw ParseError("Expression is not valid UTF-8."); throw ParseError("Expression is not valid UTF-8.");
} }
@ -233,6 +241,45 @@ void addExpression(NG &ng, unsigned index, const char *expression,
DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s'\n", index, id, flags, DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s'\n", index, id, flags,
expression); expression);
if (flags & HS_FLAG_COMBINATION) {
if (flags & ~(HS_FLAG_COMBINATION | HS_FLAG_QUIET |
HS_FLAG_SINGLEMATCH)) {
throw CompileError("only HS_FLAG_QUIET and HS_FLAG_SINGLEMATCH "
"are supported in combination "
"with HS_FLAG_COMBINATION.");
}
if (flags & HS_FLAG_QUIET) {
DEBUG_PRINTF("skip QUIET logical combination expression %u\n", id);
} else {
u32 ekey = INVALID_EKEY;
u64a min_offset = 0;
u64a max_offset = MAX_OFFSET;
if (flags & HS_FLAG_SINGLEMATCH) {
ekey = ng.rm.getExhaustibleKey(id);
}
if (ext) {
validateExt(*ext);
if (ext->flags & ~(HS_EXT_FLAG_MIN_OFFSET |
HS_EXT_FLAG_MAX_OFFSET)) {
throw CompileError("only HS_EXT_FLAG_MIN_OFFSET and "
"HS_EXT_FLAG_MAX_OFFSET extra flags "
"are supported in combination "
"with HS_FLAG_COMBINATION.");
}
if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) {
min_offset = ext->min_offset;
}
if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) {
max_offset = ext->max_offset;
}
}
ng.rm.pl.parseLogicalCombination(id, expression, ekey, min_offset,
max_offset);
DEBUG_PRINTF("parsed logical combination expression %u\n", id);
}
return;
}
// Ensure that our pattern isn't too long (in characters). // Ensure that our pattern isn't too long (in characters).
if (strlen(expression) > cc.grey.limitPatternLength) { if (strlen(expression) > cc.grey.limitPatternLength) {
throw CompileError("Pattern length exceeds limit."); throw CompileError("Pattern length exceeds limit.");

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017, Intel Corporation * Copyright (c) 2017-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -46,12 +46,12 @@ public:
bool highlander_in, bool utf8_in, bool prefilter_in, bool highlander_in, bool utf8_in, bool prefilter_in,
som_type som_in, ReportID report_in, u64a min_offset_in, som_type som_in, ReportID report_in, u64a min_offset_in,
u64a max_offset_in, u64a min_length_in, u32 edit_distance_in, u64a max_offset_in, u64a min_length_in, u32 edit_distance_in,
u32 hamm_distance_in) u32 hamm_distance_in, bool quiet_in)
: index(index_in), report(report_in), allow_vacuous(allow_vacuous_in), : index(index_in), report(report_in), allow_vacuous(allow_vacuous_in),
highlander(highlander_in), utf8(utf8_in), prefilter(prefilter_in), highlander(highlander_in), utf8(utf8_in), prefilter(prefilter_in),
som(som_in), min_offset(min_offset_in), max_offset(max_offset_in), som(som_in), min_offset(min_offset_in), max_offset(max_offset_in),
min_length(min_length_in), edit_distance(edit_distance_in), min_length(min_length_in), edit_distance(edit_distance_in),
hamm_distance(hamm_distance_in) {} hamm_distance(hamm_distance_in), quiet(quiet_in) {}
/** /**
* \brief Index of the expression represented by this graph. * \brief Index of the expression represented by this graph.
@ -98,6 +98,9 @@ public:
*/ */
u32 edit_distance; u32 edit_distance;
u32 hamm_distance; u32 hamm_distance;
/** \brief Quiet on match. */
bool quiet;
}; };
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -245,6 +245,11 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
} }
} }
// Check sub-expression ids
ng.rm.pl.validateSubIDs(ids, expressions, flags, elements);
// Renumber and assign lkey to reports
ng.rm.logicalKeyRenumber();
unsigned length = 0; unsigned length = 0;
struct hs_database *out = build(ng, &length); struct hs_database *out = build(ng, &length);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -811,6 +811,28 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform);
*/ */
#define HS_FLAG_SOM_LEFTMOST 256 #define HS_FLAG_SOM_LEFTMOST 256
/**
* Compile flag: Logical combination.
*
* This flag instructs Hyperscan to parse this expression as logical
* combination syntax.
* Logical constraints consist of operands, operators and parentheses.
* The operands are expression indices, and operators can be
* '!'(NOT), '&'(AND) or '|'(OR).
* For example:
* (101&102&103)|(104&!105)
* ((301|302)&303)&(304|305)
*/
#define HS_FLAG_COMBINATION 512
/**
* Compile flag: Don't do any match reporting.
*
* This flag instructs Hyperscan to ignore match reporting for this expression.
* It is designed to be used on the sub-expressions in logical combinations.
*/
#define HS_FLAG_QUIET 1024
/** @} */ /** @} */
/** /**

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -122,7 +122,7 @@ typedef struct hs_scratch hs_scratch_t;
* subsequent calls to @ref hs_scan_stream() for that stream will * subsequent calls to @ref hs_scan_stream() for that stream will
* immediately return with @ref HS_SCAN_TERMINATED. * immediately return with @ref HS_SCAN_TERMINATED.
*/ */
typedef int (*match_event_handler)(unsigned int id, typedef int (HS_CDECL *match_event_handler)(unsigned int id,
unsigned long long from, unsigned long long from,
unsigned long long to, unsigned long long to,
unsigned int flags, unsigned int flags,

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -374,7 +374,7 @@ unique_ptr<GoughGraph> makeCFG(const raw_som_dfa &raw) {
} }
u16 top_sym = raw.alpha_remap[TOP]; u16 top_sym = raw.alpha_remap[TOP];
DEBUG_PRINTF("top: %hu, kind %d\n", top_sym, raw.kind); DEBUG_PRINTF("top: %hu, kind %s\n", top_sym, to_string(raw.kind).c_str());
/* create edges, JOIN variables (on edge targets) */ /* create edges, JOIN variables (on edge targets) */
map<dstate_id_t, GoughEdge> seen; map<dstate_id_t, GoughEdge> seen;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -84,7 +84,7 @@ struct mcclellan {
u8 has_accel; /**< 1 iff there are any accel plans */ u8 has_accel; /**< 1 iff there are any accel plans */
u8 remap[256]; /**< remaps characters to a smaller alphabet */ u8 remap[256]; /**< remaps characters to a smaller alphabet */
ReportID arb_report; /**< one of the accepts that this dfa may raise */ ReportID arb_report; /**< one of the accepts that this dfa may raise */
u32 accel_offset; /**< offset of the accel structures from start of NFA */ u32 accel_offset; /**< offset of accel structures from start of McClellan */
u32 haig_offset; /**< reserved for use by Haig, relative to start of NFA */ u32 haig_offset; /**< reserved for use by Haig, relative to start of NFA */
}; };

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -760,7 +760,7 @@ bytecode_ptr<NFA> mcclellanCompile8(dfa_info &info, const CompileContext &cc,
return nfa; return nfa;
} }
#define MAX_SHERMAN_LIST_LEN 8 #define MAX_SHERMAN_LIST_LEN 9
static static
void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate, void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate,

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016-2017, Intel Corporation * Copyright (c) 2016-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -173,7 +173,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
u32 sheng_limit_x4 = sheng_limit * 0x01010101; u32 sheng_limit_x4 = sheng_limit * 0x01010101;
m128 simd_stop_limit = set4x32(sheng_stop_limit_x4); m128 simd_stop_limit = set4x32(sheng_stop_limit_x4);
m128 accel_delta = set16x8(sheng_limit - sheng_stop_limit); m128 accel_delta = set16x8(sheng_limit - sheng_stop_limit);
DEBUG_PRINTF("end %hu, accel %hhu --> limit %hhu\n", sheng_limit, DEBUG_PRINTF("end %hhu, accel %hu --> limit %hhu\n", sheng_limit,
m->sheng_accel_limit, sheng_stop_limit); m->sheng_accel_limit, sheng_stop_limit);
#endif #endif
@ -181,7 +181,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
m128 shuffle_mask = masks[*(c++)]; \ m128 shuffle_mask = masks[*(c++)]; \
s = pshufb_m128(shuffle_mask, s); \ s = pshufb_m128(shuffle_mask, s); \
u32 s_gpr_x4 = movd(s); /* convert to u8 */ \ u32 s_gpr_x4 = movd(s); /* convert to u8 */ \
DEBUG_PRINTF("c %hhu (%c) --> s %hhu\n", c[-1], c[-1], s_gpr_x4); \ DEBUG_PRINTF("c %hhu (%c) --> s %u\n", c[-1], c[-1], s_gpr_x4); \
if (s_gpr_x4 >= sheng_stop_limit_x4) { \ if (s_gpr_x4 >= sheng_stop_limit_x4) { \
s_gpr = s_gpr_x4; \ s_gpr = s_gpr_x4; \
goto exit; \ goto exit; \
@ -191,7 +191,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
u8 s_gpr; u8 s_gpr;
while (c < c_end) { while (c < c_end) {
#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) #if defined(HAVE_BMI2) && defined(ARCH_64_BIT)
/* This version uses pext for efficently bitbashing out scaled /* This version uses pext for efficiently bitbashing out scaled
* versions of the bytes to process from a u64a */ * versions of the bytes to process from a u64a */
u64a data_bytes = unaligned_load_u64a(c); u64a data_bytes = unaligned_load_u64a(c);
@ -201,7 +201,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
s = pshufb_m128(shuffle_mask0, s); s = pshufb_m128(shuffle_mask0, s);
m128 s_max = s; m128 s_max = s;
m128 s_max0 = s_max; m128 s_max0 = s_max;
DEBUG_PRINTF("c %02llx --> s %hhu\n", cc0 >> 4, movd(s)); DEBUG_PRINTF("c %02llx --> s %u\n", cc0 >> 4, movd(s));
#define SHENG_SINGLE_UNROLL_ITER(iter) \ #define SHENG_SINGLE_UNROLL_ITER(iter) \
assert(iter); \ assert(iter); \
@ -217,7 +217,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
s_max = max_u8_m128(s_max, s); \ s_max = max_u8_m128(s_max, s); \
} \ } \
m128 s_max##iter = s_max; \ m128 s_max##iter = s_max; \
DEBUG_PRINTF("c %02llx --> s %hhu max %hhu\n", cc##iter >> 4, \ DEBUG_PRINTF("c %02llx --> s %u max %u\n", cc##iter >> 4, \
movd(s), movd(s_max)); movd(s), movd(s_max));
SHENG_SINGLE_UNROLL_ITER(1); SHENG_SINGLE_UNROLL_ITER(1);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016, Intel Corporation * Copyright (c) 2016-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -84,7 +84,7 @@ struct mcsheng {
u8 has_accel; /**< 1 iff there are any accel plans */ u8 has_accel; /**< 1 iff there are any accel plans */
u8 remap[256]; /**< remaps characters to a smaller alphabet */ u8 remap[256]; /**< remaps characters to a smaller alphabet */
ReportID arb_report; /**< one of the accepts that this dfa may raise */ ReportID arb_report; /**< one of the accepts that this dfa may raise */
u32 accel_offset; /**< offset of the accel structures from start of NFA */ u32 accel_offset; /**< offset of accel structures from start of McClellan */
m128 sheng_masks[N_CHARS]; m128 sheng_masks[N_CHARS];
}; };

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016, Intel Corporation * Copyright (c) 2016-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -628,7 +628,7 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) {
fireSingleReport(cb, ctxt, sh->report, offset); fireSingleReport(cb, ctxt, sh->report, offset);
} else { } else {
fireReports(sh, cb, ctxt, s, offset, &cached_state_id, fireReports(sh, cb, ctxt, s, offset, &cached_state_id,
&cached_report_id, 1); &cached_report_id, 0);
} }
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -577,7 +577,8 @@ bool NG::addHolder(NGHolder &g) {
} }
bool NG::addLiteral(const ue2_literal &literal, u32 expr_index, bool NG::addLiteral(const ue2_literal &literal, u32 expr_index,
u32 external_report, bool highlander, som_type som) { u32 external_report, bool highlander, som_type som,
bool quiet) {
assert(!literal.empty()); assert(!literal.empty());
if (!cc.grey.shortcutLiterals) { if (!cc.grey.shortcutLiterals) {
@ -605,7 +606,7 @@ bool NG::addLiteral(const ue2_literal &literal, u32 expr_index,
} else { } else {
u32 ekey = highlander ? rm.getExhaustibleKey(external_report) u32 ekey = highlander ? rm.getExhaustibleKey(external_report)
: INVALID_EKEY; : INVALID_EKEY;
Report r = makeECallback(external_report, 0, ekey); Report r = makeECallback(external_report, 0, ekey, quiet);
id = rm.getInternalId(r); id = rm.getInternalId(r);
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -77,7 +77,7 @@ public:
/** \brief Adds a literal to Rose, used by literal shortcut passes (instead /** \brief Adds a literal to Rose, used by literal shortcut passes (instead
* of using \ref addGraph) */ * of using \ref addGraph) */
bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report, bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report,
bool highlander, som_type som); bool highlander, som_type som, bool quiet);
/** \brief Maximum history in bytes available for use by SOM reverse NFAs, /** \brief Maximum history in bytes available for use by SOM reverse NFAs,
* a hack for pattern support (see UE-1903). This is always set to the max * a hack for pattern support (see UE-1903). This is always set to the max

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -53,11 +53,11 @@
#include "ng_depth.h" #include "ng_depth.h"
#include "ng_holder.h" #include "ng_holder.h"
#include "ng_prune.h" #include "ng_prune.h"
#include "ng_undirected.h"
#include "ng_util.h" #include "ng_util.h"
#include "grey.h" #include "grey.h"
#include "ue2common.h" #include "ue2common.h"
#include "util/graph_range.h" #include "util/graph_range.h"
#include "util/graph_undirected.h"
#include "util/make_unique.h" #include "util/make_unique.h"
#include <map> #include <map>
@ -310,28 +310,19 @@ void splitIntoComponents(unique_ptr<NGHolder> g,
return; return;
} }
unordered_map<NFAVertex, NFAUndirectedVertex> old2new; auto ug = make_undirected_graph(*g);
auto ug = createUnGraph(*g, true, true, old2new);
// Construct reverse mapping. // Filter specials and shell vertices from undirected graph.
unordered_map<NFAUndirectedVertex, NFAVertex> new2old; unordered_set<NFAVertex> bad_vertices(
for (const auto &m : old2new) { {g->start, g->startDs, g->accept, g->acceptEod});
new2old.emplace(m.second, m.first); bad_vertices.insert(head_shell.begin(), head_shell.end());
} bad_vertices.insert(tail_shell.begin(), tail_shell.end());
// Filter shell vertices from undirected graph.
unordered_set<NFAUndirectedVertex> shell_undir_vertices;
for (auto v : head_shell) {
shell_undir_vertices.insert(old2new.at(v));
}
for (auto v : tail_shell) {
shell_undir_vertices.insert(old2new.at(v));
}
auto filtered_ug = boost::make_filtered_graph( auto filtered_ug = boost::make_filtered_graph(
ug, boost::keep_all(), make_bad_vertex_filter(&shell_undir_vertices)); ug, boost::keep_all(), make_bad_vertex_filter(&bad_vertices));
// Actually run the connected components algorithm. // Actually run the connected components algorithm.
map<NFAUndirectedVertex, u32> split_components; map<NFAVertex, u32> split_components;
const u32 num = connected_components( const u32 num = connected_components(
filtered_ug, boost::make_assoc_property_map(split_components)); filtered_ug, boost::make_assoc_property_map(split_components));
@ -348,10 +339,8 @@ void splitIntoComponents(unique_ptr<NGHolder> g,
// Collect vertex lists per component. // Collect vertex lists per component.
for (const auto &m : split_components) { for (const auto &m : split_components) {
NFAUndirectedVertex uv = m.first; NFAVertex v = m.first;
u32 c = m.second; u32 c = m.second;
assert(contains(new2old, uv));
NFAVertex v = new2old.at(uv);
verts[c].push_back(v); verts[c].push_back(v);
DEBUG_PRINTF("vertex %zu is in comp %u\n", (*g)[v].index, c); DEBUG_PRINTF("vertex %zu is in comp %u\n", (*g)[v].index, c);
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -73,7 +73,7 @@ static
void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused, void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused,
stateset *init, stateset *initDS, stateset *init, stateset *initDS,
vector<NFAVertex> *v_by_index) { vector<NFAVertex> *v_by_index) {
DEBUG_PRINTF("graph kind: %u\n", (int)g.kind); DEBUG_PRINTF("graph kind: %s\n", to_string(g.kind).c_str());
for (auto v : vertices_range(g)) { for (auto v : vertices_range(g)) {
if (contains(unused, v)) { if (contains(unused, v)) {
continue; continue;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -542,7 +542,8 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph,
return nullptr; return nullptr;
} }
DEBUG_PRINTF("attempting to build ?%d? mcclellan\n", (int)graph.kind); DEBUG_PRINTF("attempting to build %s mcclellan\n",
to_string(graph.kind).c_str());
assert(allMatchStatesHaveReports(graph)); assert(allMatchStatesHaveReports(graph));
bool prunable = grey.highlanderPruneDFA && has_managed_reports(graph); bool prunable = grey.highlanderPruneDFA && has_managed_reports(graph);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -38,7 +38,6 @@
#include "ng_prune.h" #include "ng_prune.h"
#include "ng_reports.h" #include "ng_reports.h"
#include "ng_som_util.h" #include "ng_som_util.h"
#include "ng_undirected.h"
#include "ng_util.h" #include "ng_util.h"
#include "nfa/accel.h" #include "nfa/accel.h"
#include "nfa/limex_limits.h" #include "nfa/limex_limits.h"
@ -48,6 +47,7 @@
#include "util/dump_charclass.h" #include "util/dump_charclass.h"
#include "util/graph_range.h" #include "util/graph_range.h"
#include "util/graph_small_color_map.h" #include "util/graph_small_color_map.h"
#include "util/graph_undirected.h"
#include "util/report_manager.h" #include "util/report_manager.h"
#include "util/unordered.h" #include "util/unordered.h"
@ -73,40 +73,41 @@ namespace ue2 {
namespace { namespace {
/** \brief Filter that retains only edges between vertices with the same /**
* reachability. */ * \brief Filter that retains only edges between vertices with the same
* reachability. Special vertices are dropped.
*/
template<class Graph> template<class Graph>
struct ReachFilter { struct ReachFilter {
ReachFilter() {} ReachFilter() = default;
explicit ReachFilter(const Graph *g_in) : g(g_in) {} explicit ReachFilter(const Graph *g_in) : g(g_in) {}
// Convenience typedefs. // Convenience typedefs.
typedef typename boost::graph_traits<Graph> Traits; using Traits = typename boost::graph_traits<Graph>;
typedef typename Traits::vertex_descriptor VertexDescriptor; using VertexDescriptor = typename Traits::vertex_descriptor;
typedef typename Traits::edge_descriptor EdgeDescriptor; using EdgeDescriptor = typename Traits::edge_descriptor;
bool operator()(const VertexDescriptor &v) const {
assert(g);
// Disallow special vertices, as otherwise we will try to remove them
// later.
return !is_special(v, *g);
}
bool operator()(const EdgeDescriptor &e) const { bool operator()(const EdgeDescriptor &e) const {
assert(g); assert(g);
VertexDescriptor u = source(e, *g), v = target(e, *g);
// Disallow special vertices, as otherwise we will try to remove them
// later.
if (is_special(u, *g) || is_special(v, *g)) {
return false;
}
// Vertices must have the same reach. // Vertices must have the same reach.
auto u = source(e, *g), v = target(e, *g);
const CharReach &cr_u = (*g)[u].char_reach; const CharReach &cr_u = (*g)[u].char_reach;
const CharReach &cr_v = (*g)[v].char_reach; const CharReach &cr_v = (*g)[v].char_reach;
return cr_u == cr_v; return cr_u == cr_v;
} }
const Graph *g = nullptr; const Graph *g = nullptr;
}; };
typedef boost::filtered_graph<NGHolder, ReachFilter<NGHolder>> RepeatGraph; using RepeatGraph = boost::filtered_graph<NGHolder, ReachFilter<NGHolder>,
ReachFilter<NGHolder>>;
struct ReachSubgraph { struct ReachSubgraph {
vector<NFAVertex> vertices; vector<NFAVertex> vertices;
@ -300,10 +301,9 @@ void splitSubgraph(const NGHolder &g, const deque<NFAVertex> &verts,
unordered_map<NFAVertex, NFAVertex> verts_map; // in g -> in verts_g unordered_map<NFAVertex, NFAVertex> verts_map; // in g -> in verts_g
fillHolder(&verts_g, g, verts, &verts_map); fillHolder(&verts_g, g, verts, &verts_map);
unordered_map<NFAVertex, NFAUndirectedVertex> old2new; const auto ug = make_undirected_graph(verts_g);
auto ug = createUnGraph(verts_g, true, true, old2new);
unordered_map<NFAUndirectedVertex, u32> repeatMap; unordered_map<NFAVertex, u32> repeatMap;
size_t num = connected_components(ug, make_assoc_property_map(repeatMap)); size_t num = connected_components(ug, make_assoc_property_map(repeatMap));
DEBUG_PRINTF("found %zu connected repeat components\n", num); DEBUG_PRINTF("found %zu connected repeat components\n", num);
@ -312,7 +312,8 @@ void splitSubgraph(const NGHolder &g, const deque<NFAVertex> &verts,
vector<ReachSubgraph> rs(num); vector<ReachSubgraph> rs(num);
for (auto v : verts) { for (auto v : verts) {
NFAUndirectedVertex vu = old2new.at(verts_map.at(v)); assert(!is_special(v, g));
auto vu = verts_map.at(v);
auto rit = repeatMap.find(vu); auto rit = repeatMap.find(vu);
if (rit == repeatMap.end()) { if (rit == repeatMap.end()) {
continue; /* not part of a repeat */ continue; /* not part of a repeat */
@ -323,8 +324,14 @@ void splitSubgraph(const NGHolder &g, const deque<NFAVertex> &verts,
} }
for (const auto &rsi : rs) { for (const auto &rsi : rs) {
if (rsi.vertices.empty()) {
// Empty elements can happen when connected_components finds a
// subgraph consisting entirely of specials (which aren't added to
// ReachSubgraph in the loop above). There's nothing we can do with
// these, so we skip them.
continue;
}
DEBUG_PRINTF("repeat with %zu vertices\n", rsi.vertices.size()); DEBUG_PRINTF("repeat with %zu vertices\n", rsi.vertices.size());
assert(!rsi.vertices.empty());
if (rsi.vertices.size() >= minNumVertices) { if (rsi.vertices.size() >= minNumVertices) {
DEBUG_PRINTF("enqueuing\n"); DEBUG_PRINTF("enqueuing\n");
q.push(rsi); q.push(rsi);
@ -1023,17 +1030,16 @@ static
void buildReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs, void buildReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs,
const u32 minNumVertices) { const u32 minNumVertices) {
const ReachFilter<NGHolder> fil(&g); const ReachFilter<NGHolder> fil(&g);
const RepeatGraph rg(g, fil); const RepeatGraph rg(g, fil, fil);
if (!isCompBigEnough(rg, minNumVertices)) { if (!isCompBigEnough(rg, minNumVertices)) {
DEBUG_PRINTF("component not big enough, bailing\n"); DEBUG_PRINTF("component not big enough, bailing\n");
return; return;
} }
unordered_map<RepeatGraph::vertex_descriptor, NFAUndirectedVertex> old2new; const auto ug = make_undirected_graph(rg);
auto ug = createUnGraph(rg, true, true, old2new);
unordered_map<NFAUndirectedVertex, u32> repeatMap; unordered_map<NFAVertex, u32> repeatMap;
unsigned int num; unsigned int num;
num = connected_components(ug, make_assoc_property_map(repeatMap)); num = connected_components(ug, make_assoc_property_map(repeatMap));
@ -1045,8 +1051,7 @@ void buildReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs,
rs.resize(num); rs.resize(num);
for (auto v : topoOrder) { for (auto v : topoOrder) {
NFAUndirectedVertex vu = old2new[v]; auto rit = repeatMap.find(v);
auto rit = repeatMap.find(vu);
if (rit == repeatMap.end()) { if (rit == repeatMap.end()) {
continue; /* not part of a repeat */ continue; /* not part of a repeat */
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -87,7 +87,11 @@ private:
/** Find the set of characters that are not present in the reachability of /** Find the set of characters that are not present in the reachability of
* graph \p g after a certain depth (currently 8). If a character in this set * graph \p g after a certain depth (currently 8). If a character in this set
* is encountered, it means that the NFA is either dead or has not progressed * is encountered, it means that the NFA is either dead or has not progressed
* more than 8 characters from its start states. */ * more than 8 characters from its start states.
*
* This is only used to guide merging heuristics, use
* findLeftOffsetStopAlphabet for real uses.
*/
CharReach findStopAlphabet(const NGHolder &g, som_type som) { CharReach findStopAlphabet(const NGHolder &g, som_type som) {
const depth max_depth(MAX_STOP_DEPTH); const depth max_depth(MAX_STOP_DEPTH);
const InitDepths depths(g); const InitDepths depths(g);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -47,7 +47,11 @@ class NGHolder;
/** Find the set of characters that are not present in the reachability of /** Find the set of characters that are not present in the reachability of
* graph \p g after a certain depth (currently 8). If a character in this set * graph \p g after a certain depth (currently 8). If a character in this set
* is encountered, it means that the NFA is either dead or has not progressed * is encountered, it means that the NFA is either dead or has not progressed
* more than 8 characters from its start states. */ * more than 8 characters from its start states.
*
* This is only used to guide merging heuristics, use
* findLeftOffsetStopAlphabet for real uses.
*/
CharReach findStopAlphabet(const NGHolder &g, som_type som); CharReach findStopAlphabet(const NGHolder &g, som_type som);
/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then /** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then

View File

@ -1,136 +0,0 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Create an undirected graph from an NFAGraph.
*/
#ifndef NG_UNDIRECTED_H
#define NG_UNDIRECTED_H
#include "ng_holder.h"
#include "ng_util.h"
#include "ue2common.h"
#include "util/graph_range.h"
#include "util/unordered.h"
#include <vector>
#include <boost/graph/adjacency_list.hpp>
namespace ue2 {
/**
* \brief BGL graph type for the undirected NFA graph.
*
* Note that we use a set for the out-edge lists: this avoids the construction
* of parallel edges. The only vertex property constructed is \a
* vertex_index_t.
*/
using NFAUndirectedGraph = boost::adjacency_list<
boost::listS, // out edges
boost::listS, // vertices
boost::undirectedS, // graph is undirected
boost::property<boost::vertex_index_t, size_t>>; // vertex properties
using NFAUndirectedVertex = NFAUndirectedGraph::vertex_descriptor;
/**
* Make a copy of an NFAGraph with undirected edges, optionally without start
* vertices. Mappings from the original graph to the new one are provided.
*
* Note that new vertex indices are assigned contiguously in \a vertices(g)
* order.
*/
template <typename Graph>
NFAUndirectedGraph createUnGraph(const Graph &g,
bool excludeStarts,
bool excludeAccepts,
std::unordered_map<typename Graph::vertex_descriptor,
NFAUndirectedVertex> &old2new) {
NFAUndirectedGraph ug;
size_t idx = 0;
assert(old2new.empty());
old2new.reserve(num_vertices(g));
for (auto v : ue2::vertices_range(g)) {
// skip all accept nodes
if (excludeAccepts && is_any_accept(v, g)) {
continue;
}
// skip starts if required
if (excludeStarts && is_any_start(v, g)) {
continue;
}
auto nuv = boost::add_vertex(ug);
old2new.emplace(v, nuv);
boost::put(boost::vertex_index, ug, nuv, idx++);
}
// Track seen edges so that we don't insert parallel edges.
using Vertex = typename Graph::vertex_descriptor;
ue2_unordered_set<std::pair<Vertex, Vertex>> seen;
seen.reserve(num_edges(g));
auto make_ordered_edge = [](Vertex a, Vertex b) {
return std::make_pair(std::min(a, b), std::max(a, b));
};
for (const auto &e : ue2::edges_range(g)) {
auto u = source(e, g);
auto v = target(e, g);
if ((excludeAccepts && is_any_accept(u, g))
|| (excludeStarts && is_any_start(u, g))) {
continue;
}
if ((excludeAccepts && is_any_accept(v, g))
|| (excludeStarts && is_any_start(v, g))) {
continue;
}
if (!seen.emplace(make_ordered_edge(u, v)).second) {
continue; // skip parallel edge.
}
NFAUndirectedVertex new_u = old2new.at(u);
NFAUndirectedVertex new_v = old2new.at(v);
boost::add_edge(new_u, new_v, ug);
}
assert(!has_parallel_edge(ug));
return ug;
}
} // namespace ue2
#endif /* NG_UNDIRECTED_H */

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016-2017, Intel Corporation * Copyright (c) 2016-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -60,6 +60,7 @@
#include "util/flat_containers.h" #include "util/flat_containers.h"
#include "util/graph.h" #include "util/graph.h"
#include "util/graph_range.h" #include "util/graph_range.h"
#include "util/graph_small_color_map.h"
#include "util/insertion_ordered.h" #include "util/insertion_ordered.h"
#include "util/make_unique.h" #include "util/make_unique.h"
#include "util/order_check.h" #include "util/order_check.h"
@ -133,14 +134,21 @@ bool createsTransientLHS(const NGHolder &g, const vector<NFAVertex> &vv,
return true; return true;
} }
/**
* Counts the number of vertices that are reachable from the set of sources
* given.
*/
static static
double calcSplitRatio(const NGHolder &g, const vector<NFAVertex> &vv) { size_t count_reachable(const NGHolder &g, const vector<NFAVertex> &sources,
flat_set<NFAVertex> not_reachable; small_color_map<decltype(get(vertex_index, g))> &color_map) {
find_unreachable(g, vv, &not_reachable); auto null_visitor = boost::make_dfs_visitor(boost::null_visitor());
double rv = (double)not_reachable.size() / num_vertices(g); color_map.fill(small_color::white);
rv = rv > 0.5 ? 1 - rv : rv;
return rv; for (auto v : sources) {
boost::depth_first_visit(g, v, null_visitor, color_map);
}
return color_map.count(small_color::black);
} }
static static
@ -687,8 +695,12 @@ unique_ptr<VertLitInfo> findBestSplit(const NGHolder &g,
} }
if (last_chance) { if (last_chance) {
const size_t num_verts = num_vertices(g);
auto color_map = make_small_color_map(g);
for (auto &a : lits) { for (auto &a : lits) {
a->split_ratio = calcSplitRatio(g, a->vv); size_t num_reachable = count_reachable(g, a->vv, color_map);
double ratio = (double)num_reachable / (double)num_verts;
a->split_ratio = ratio > 0.5 ? 1 - ratio : ratio;
} }
} }

View File

@ -176,11 +176,7 @@ depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter,
} }
if (d.is_unreachable()) { if (d.is_unreachable()) {
// If we're actually reachable, we'll have a min width, so we can assert(findMinWidth(h, filter, src).is_unreachable());
// return infinity in this case.
if (findMinWidth(h, filter, src).is_reachable()) {
return depth::infinity();
}
return d; return d;
} }

View File

@ -0,0 +1,376 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Parse and build ParsedLogical::logicalTree and combInfoMap.
*/
#include "logical_combination.h"
#include "parser/parse_error.h"
#include "util/container.h"
#include "hs_compile.h"
#include <vector>
using namespace std;
namespace ue2 {
u32 ParsedLogical::getLogicalKey(u32 a) {
auto it = toLogicalKeyMap.find(a);
if (it == toLogicalKeyMap.end()) {
// get size before assigning to avoid wacky LHS shenanigans
u32 size = toLogicalKeyMap.size();
bool inserted;
tie(it, inserted) = toLogicalKeyMap.emplace(a, size);
assert(inserted);
}
DEBUG_PRINTF("%u -> lkey %u\n", it->first, it->second);
return it->second;
}
u32 ParsedLogical::getCombKey(u32 a) {
auto it = toCombKeyMap.find(a);
if (it == toCombKeyMap.end()) {
u32 size = toCombKeyMap.size();
bool inserted;
tie(it, inserted) = toCombKeyMap.emplace(a, size);
assert(inserted);
}
DEBUG_PRINTF("%u -> ckey %u\n", it->first, it->second);
return it->second;
}
void ParsedLogical::addRelateCKey(u32 lkey, u32 ckey) {
auto it = lkey2ckeys.find(lkey);
if (it == lkey2ckeys.end()) {
bool inserted;
tie(it, inserted) = lkey2ckeys.emplace(lkey, set<u32>());
assert(inserted);
}
it->second.insert(ckey);
DEBUG_PRINTF("lkey %u belongs to combination key %u\n",
it->first, ckey);
}
#define TRY_RENUM_OP(ckey) \
do { \
if (ckey & LOGICAL_OP_BIT) { \
ckey = (ckey & ~LOGICAL_OP_BIT) + toLogicalKeyMap.size(); \
} \
} while(0)
u32 ParsedLogical::logicalTreeAdd(u32 op, u32 left, u32 right) {
LogicalOp lop;
assert((LOGICAL_OP_BIT & (u32)logicalTree.size()) == 0);
lop.id = LOGICAL_OP_BIT | (u32)logicalTree.size();
lop.op = op;
lop.lo = left;
lop.ro = right;
logicalTree.push_back(lop);
return lop.id;
}
void ParsedLogical::combinationInfoAdd(UNUSED u32 ckey, u32 id, u32 ekey,
u32 lkey_start, u32 lkey_result,
u64a min_offset, u64a max_offset) {
assert(ckey == combInfoMap.size());
CombInfo ci;
ci.id = id;
ci.ekey = ekey;
ci.start = lkey_start;
ci.result = lkey_result;
ci.min_offset = min_offset;
ci.max_offset = max_offset;
combInfoMap.push_back(ci);
DEBUG_PRINTF("ckey %u (id %u) -> lkey %u..%u, ekey=0x%x\n", ckey, ci.id,
ci.start, ci.result, ci.ekey);
}
void ParsedLogical::validateSubIDs(const unsigned *ids,
const char *const *expressions,
const unsigned *flags,
unsigned elements) {
for (const auto &it : toLogicalKeyMap) {
bool unknown = true;
u32 i = 0;
for (i = 0; i < elements; i++) {
if ((ids ? ids[i] : 0) == it.first) {
unknown = false;
break;
}
}
if (unknown) {
throw CompileError("Unknown sub-expression id.");
}
if (contains(toCombKeyMap, it.first)) {
throw CompileError("Have combination of combination.");
}
if (flags && (flags[i] & HS_FLAG_SOM_LEFTMOST)) {
throw CompileError("Have SOM flag in sub-expression.");
}
if (flags && (flags[i] & HS_FLAG_PREFILTER)) {
throw CompileError("Have PREFILTER flag in sub-expression.");
}
hs_compile_error_t *compile_err = NULL;
hs_expr_info_t *info = NULL;
hs_error_t err = hs_expression_info(expressions[i], flags[i], &info,
&compile_err);
if (err != HS_SUCCESS) {
hs_free_compile_error(compile_err);
throw CompileError("Run hs_expression_info() failed.");
}
if (!info) {
throw CompileError("Get hs_expr_info_t failed.");
} else {
if (info->unordered_matches) {
throw CompileError("Have unordered match in sub-expressions.");
}
free(info);
}
}
}
void ParsedLogical::logicalKeyRenumber() {
// renumber operation lkey in op vector
for (auto &op : logicalTree) {
TRY_RENUM_OP(op.id);
TRY_RENUM_OP(op.lo);
TRY_RENUM_OP(op.ro);
}
// renumber operation lkey in info map
for (auto &ci : combInfoMap) {
TRY_RENUM_OP(ci.start);
TRY_RENUM_OP(ci.result);
}
}
struct LogicalOperator {
LogicalOperator(u32 op_in, u32 paren_in)
: op(op_in), paren(paren_in) {}
u32 op;
u32 paren;
};
static
u32 toOperator(char c) {
u32 op = UNKNOWN_OP;
switch (c) {
case '!' :
op = LOGICAL_OP_NOT;
break;
case '&' :
op = LOGICAL_OP_AND;
break;
case '|' :
op = LOGICAL_OP_OR;
break;
default:
break;
};
return op;
}
static
bool cmpOperator(const LogicalOperator &op1, const LogicalOperator &op2) {
if (op1.paren < op2.paren) {
return false;
}
if (op1.paren > op2.paren) {
return true;
}
assert(op1.paren == op2.paren);
if (op1.op > op2.op) {
return false;
}
if (op1.op < op2.op) {
return true;
}
return true;
}
static
u32 fetchSubID(const char *logical, u32 &digit, u32 end) {
if (digit == (u32)-1) { // no digit parsing in progress
return (u32)-1;
}
assert(end > digit);
if (end - digit > 9) {
throw LocatedParseError("Expression id too large");
}
u32 mult = 1;
u32 sum = 0;
for (u32 j = end - 1; (j >= digit) && (j != (u32)-1) ; j--) {
assert(isdigit(logical[j]));
sum += (logical[j] - '0') * mult;
mult *= 10;
}
digit = (u32)-1;
return sum;
}
static
void popOperator(vector<LogicalOperator> &op_stack, vector<u32> &subid_stack,
ParsedLogical &pl) {
if (subid_stack.empty()) {
throw LocatedParseError("Not enough operand");
}
u32 right = subid_stack.back();
subid_stack.pop_back();
u32 left = 0;
if (op_stack.back().op != LOGICAL_OP_NOT) {
if (subid_stack.empty()) {
throw LocatedParseError("Not enough operand");
}
left = subid_stack.back();
subid_stack.pop_back();
}
subid_stack.push_back(pl.logicalTreeAdd(op_stack.back().op, left, right));
op_stack.pop_back();
}
static
char getValue(const vector<char> &lv, u32 ckey) {
if (ckey & LOGICAL_OP_BIT) {
return lv[ckey & ~LOGICAL_OP_BIT];
} else {
return 0;
}
}
static
bool hasMatchFromPurelyNegative(const vector<LogicalOp> &tree,
u32 start, u32 result) {
vector<char> lv(tree.size());
assert(start <= result);
for (u32 i = start; i <= result; i++) {
assert(i & LOGICAL_OP_BIT);
const LogicalOp &op = tree[i & ~LOGICAL_OP_BIT];
assert(i == op.id);
switch (op.op) {
case LOGICAL_OP_NOT:
lv[op.id & ~LOGICAL_OP_BIT] = !getValue(lv, op.ro);
break;
case LOGICAL_OP_AND:
lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) &
getValue(lv, op.ro);
break;
case LOGICAL_OP_OR:
lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) |
getValue(lv, op.ro);
break;
default:
assert(0);
break;
}
}
return lv[result & ~LOGICAL_OP_BIT];
}
void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical,
u32 ekey, u64a min_offset,
u64a max_offset) {
u32 ckey = getCombKey(id);
vector<LogicalOperator> op_stack;
vector<u32> subid_stack;
u32 lkey_start = INVALID_LKEY; // logical operation's lkey
u32 paren = 0; // parentheses
u32 digit = (u32)-1; // digit start offset, invalid offset is -1
u32 subid = (u32)-1;
u32 i;
try {
for (i = 0; logical[i]; i++) {
if (isdigit(logical[i])) {
if (digit == (u32)-1) { // new digit start
digit = i;
}
} else {
if ((subid = fetchSubID(logical, digit, i)) != (u32)-1) {
subid_stack.push_back(getLogicalKey(subid));
addRelateCKey(subid_stack.back(), ckey);
}
if (logical[i] == ' ') { // skip whitespace
continue;
}
if (logical[i] == '(') {
paren += 1;
} else if (logical[i] == ')') {
if (paren <= 0) {
throw LocatedParseError("Not enough left parentheses");
}
paren -= 1;
} else {
u32 prio = toOperator(logical[i]);
if (prio != UNKNOWN_OP) {
LogicalOperator op(prio, paren);
while (!op_stack.empty()
&& cmpOperator(op_stack.back(), op)) {
popOperator(op_stack, subid_stack, *this);
if (lkey_start == INVALID_LKEY) {
lkey_start = subid_stack.back();
}
}
op_stack.push_back(op);
} else {
throw LocatedParseError("Unknown character");
}
}
}
}
if (paren != 0) {
throw LocatedParseError("Not enough right parentheses");
}
if ((subid = fetchSubID(logical, digit, i)) != (u32)-1) {
subid_stack.push_back(getLogicalKey(subid));
addRelateCKey(subid_stack.back(), ckey);
}
while (!op_stack.empty()) {
popOperator(op_stack, subid_stack, *this);
if (lkey_start == INVALID_LKEY) {
lkey_start = subid_stack.back();
}
}
if (subid_stack.size() != 1) {
throw LocatedParseError("Not enough operator");
}
} catch (LocatedParseError &error) {
error.locate(i);
throw;
}
u32 lkey_result = subid_stack.back(); // logical operation's lkey
if (lkey_start == INVALID_LKEY) {
throw CompileError("No logical operation.");
}
if (hasMatchFromPurelyNegative(logicalTree, lkey_start, lkey_result)) {
throw CompileError("Has match from purely negative sub-expressions.");
}
combinationInfoAdd(ckey, id, ekey, lkey_start, lkey_result,
min_offset, max_offset);
}
} // namespace ue2

View File

@ -0,0 +1,112 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Parse and build ParsedLogical::logicalTree and combInfoMap.
*/
#ifndef LOGICAL_COMBINATION_H
#define LOGICAL_COMBINATION_H
#include "util/logical.h"
#include <map>
#include <set>
#include <vector>
namespace ue2 {
class ParsedLogical {
friend class ReportManager;
public:
/** \brief Parse 1 logical expression \a logical, assign temporary ckey. */
void parseLogicalCombination(unsigned id, const char *logical, u32 ekey,
u64a min_offset, u64a max_offset);
/** \brief Check if all sub-expression id in combinations are valid. */
void validateSubIDs(const unsigned *ids, const char *const *expressions,
const unsigned *flags, unsigned elements);
/** \brief Renumber and assign final lkey for each logical operation
* after parsed all logical expressions. */
void logicalKeyRenumber();
/** \brief Fetch the lkey associated with the given expression id,
* assigning one if necessary. */
u32 getLogicalKey(u32 expressionId);
/** \brief Fetch the ckey associated with the given expression id,
* assigning one if necessary. */
u32 getCombKey(u32 expressionId);
/** \brief Add lkey's corresponding combination id. */
void addRelateCKey(u32 lkey, u32 ckey);
/** \brief Add one Logical Operation. */
u32 logicalTreeAdd(u32 op, u32 left, u32 right);
/** \brief Assign the combination info associated with the given ckey. */
void combinationInfoAdd(u32 ckey, u32 id, u32 ekey, u32 lkey_start,
u32 lkey_result, u64a min_offset, u64a max_offset);
const std::map<u32, u32> &getLkeyMap() const {
return toLogicalKeyMap;
}
const std::vector<LogicalOp> &getLogicalTree() const {
return logicalTree;
}
CombInfo getCombInfoById(u32 id) const {
u32 ckey = toCombKeyMap.at(id);
assert(ckey < combInfoMap.size());
return combInfoMap.at(ckey);
}
private:
/** \brief Mapping from ckey to combination info. */
std::vector<CombInfo> combInfoMap;
/** \brief Mapping from combination expression id to combination key,
* combination key is used in combination bit-vector cache. */
std::map<u32, u32> toCombKeyMap;
/** \brief Mapping from expression id to logical key, logical key is used
* as index in LogicalOp array. */
std::map<u32, u32> toLogicalKeyMap;
/** \brief Mapping from logical key to related combination keys. */
std::map<u32, std::set<u32>> lkey2ckeys;
/** \brief Logical constraints, each operation from postfix notation. */
std::vector<LogicalOp> logicalTree;
};
} // namespace ue2
#endif

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -199,7 +199,7 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &pe) {
DEBUG_PRINTF("constructed literal %s\n", dumpString(lit).c_str()); DEBUG_PRINTF("constructed literal %s\n", dumpString(lit).c_str());
return ng.addLiteral(lit, expr.index, expr.report, expr.highlander, return ng.addLiteral(lit, expr.index, expr.report, expr.highlander,
expr.som); expr.som, expr.quiet);
} }
} // namespace ue2 } // namespace ue2

View File

@ -60,12 +60,11 @@ bool isAllowedCodepoint(u32 val) {
return true; return true;
} }
bool isValidUtf8(const char *expression) { bool isValidUtf8(const char *expression, const size_t len) {
if (!expression) { if (!expression) {
return true; return true;
} }
const size_t len = strlen(expression);
const u8 *s = (const u8 *)expression; const u8 *s = (const u8 *)expression;
u32 val; u32 val;

View File

@ -29,10 +29,12 @@
#ifndef PARSER_UTF8_VALIDATE_H #ifndef PARSER_UTF8_VALIDATE_H
#define PARSER_UTF8_VALIDATE_H #define PARSER_UTF8_VALIDATE_H
#include <cstddef> // size_t
namespace ue2 { namespace ue2 {
/** \brief Validate that the given expression is well-formed UTF-8. */ /** \brief Validate that the given expression is well-formed UTF-8. */
bool isValidUtf8(const char *expression); bool isValidUtf8(const char *expression, const size_t len);
} // namespace ue2 } // namespace ue2

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016, Intel Corporation * Copyright (c) 2016-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -42,6 +42,7 @@
#include "rose/runtime.h" #include "rose/runtime.h"
#include "som/som_runtime.h" #include "som/som_runtime.h"
#include "util/exhaust.h" #include "util/exhaust.h"
#include "util/logical.h"
#include "util/fatbit.h" #include "util/fatbit.h"
enum DedupeResult { enum DedupeResult {
@ -151,6 +152,93 @@ void clearEvec(const struct RoseEngine *rose, char *evec) {
mmbit_clear((u8 *)evec, rose->ekeyCount); mmbit_clear((u8 *)evec, rose->ekeyCount);
} }
/** \brief Test whether the given key (\a lkey) is set in the logical vector
* \a lvec. */
static really_inline
char getLogicalVal(const struct RoseEngine *rose, const char *lvec, u32 lkey) {
DEBUG_PRINTF("checking lkey matching %p %u\n", lvec, lkey);
assert(lkey != INVALID_LKEY);
assert(lkey < rose->lkeyCount + rose->lopCount);
return mmbit_isset((const u8 *)lvec, rose->lkeyCount + rose->lopCount,
lkey);
}
/** \brief Mark key \a lkey on in the logical vector. */
static really_inline
void setLogicalVal(const struct RoseEngine *rose, char *lvec, u32 lkey,
char val) {
DEBUG_PRINTF("marking as matched logical key %u\n", lkey);
assert(lkey != INVALID_LKEY);
assert(lkey < rose->lkeyCount + rose->lopCount);
switch (val) {
case 0:
mmbit_unset((u8 *)lvec, rose->lkeyCount + rose->lopCount, lkey);
break;
default:
mmbit_set((u8 *)lvec, rose->lkeyCount + rose->lopCount, lkey);
break;
}
}
/** \brief Mark key \a ckey on in the combination vector. */
static really_inline
void setCombinationActive(const struct RoseEngine *rose, char *cvec, u32 ckey) {
DEBUG_PRINTF("marking as active combination key %u\n", ckey);
assert(ckey != INVALID_CKEY);
assert(ckey < rose->ckeyCount);
mmbit_set((u8 *)cvec, rose->ckeyCount, ckey);
}
/** \brief Returns 1 if compliant to all logical combinations. */
static really_inline
char isLogicalCombination(const struct RoseEngine *rose, char *lvec,
u32 start, u32 result) {
const struct LogicalOp *logicalTree = (const struct LogicalOp *)
((const char *)rose + rose->logicalTreeOffset);
assert(start >= rose->lkeyCount);
assert(start <= result);
assert(result < rose->lkeyCount + rose->lopCount);
for (u32 i = start; i <= result; i++) {
const struct LogicalOp *op = logicalTree + (i - rose->lkeyCount);
assert(i == op->id);
assert(op->op <= LAST_LOGICAL_OP);
switch ((enum LogicalOpType)op->op) {
case LOGICAL_OP_NOT:
setLogicalVal(rose, lvec, op->id,
!getLogicalVal(rose, lvec, op->ro));
break;
case LOGICAL_OP_AND:
setLogicalVal(rose, lvec, op->id,
getLogicalVal(rose, lvec, op->lo) &
getLogicalVal(rose, lvec, op->ro)); // &&
break;
case LOGICAL_OP_OR:
setLogicalVal(rose, lvec, op->id,
getLogicalVal(rose, lvec, op->lo) |
getLogicalVal(rose, lvec, op->ro)); // ||
break;
}
}
return getLogicalVal(rose, lvec, result);
}
/** \brief Clear all keys in the logical vector. */
static really_inline
void clearLvec(const struct RoseEngine *rose, char *lvec, char *cvec) {
DEBUG_PRINTF("clearing lvec %p %u\n", lvec,
rose->lkeyCount + rose->lopCount);
DEBUG_PRINTF("clearing cvec %p %u\n", cvec, rose->ckeyCount);
mmbit_clear((u8 *)lvec, rose->lkeyCount + rose->lopCount);
mmbit_clear((u8 *)cvec, rose->ckeyCount);
}
/** \brief Clear all keys in the combination vector. */
static really_inline
void clearCvec(const struct RoseEngine *rose, char *cvec) {
DEBUG_PRINTF("clearing cvec %p %u\n", cvec, rose->ckeyCount);
mmbit_clear((u8 *)cvec, rose->ckeyCount);
}
/** /**
* \brief Deliver the given report to the user callback. * \brief Deliver the given report to the user callback.
* *

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -145,6 +145,7 @@ void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch,
tctxt->lastEndOffset = 0; tctxt->lastEndOffset = 0;
tctxt->filledDelayedSlots = 0; tctxt->filledDelayedSlots = 0;
tctxt->lastMatchOffset = 0; tctxt->lastMatchOffset = 0;
tctxt->lastCombMatchOffset = 0;
tctxt->minMatchOffset = 0; tctxt->minMatchOffset = 0;
tctxt->minNonMpvMatchOffset = 0; tctxt->minNonMpvMatchOffset = 0;
tctxt->next_mpv_offset = 0; tctxt->next_mpv_offset = 0;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -424,6 +424,12 @@ hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc,
} }
done: done:
if (t->flushCombProgramOffset) {
if (roseRunFlushCombProgram(t, scratch, mpv_exec_end)
== HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
}
updateMinMatchOffsetFromMpv(&scratch->tctxt, mpv_exec_end); updateMinMatchOffsetFromMpv(&scratch->tctxt, mpv_exec_end);
scratch->tctxt.next_mpv_offset scratch->tctxt.next_mpv_offset
= MAX(next_pos_match_loc + scratch->core_info.buf_offset, = MAX(next_pos_match_loc + scratch->core_info.buf_offset,

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -51,6 +51,7 @@
#include "hwlm/hwlm.h" #include "hwlm/hwlm.h"
#include "runtime.h" #include "runtime.h"
#include "scratch.h" #include "scratch.h"
#include "rose.h"
#include "rose_common.h" #include "rose_common.h"
#include "rose_internal.h" #include "rose_internal.h"
#include "ue2common.h" #include "ue2common.h"
@ -105,6 +106,12 @@ hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, s64a loc,
assert(!can_stop_matching(scratch)); assert(!can_stop_matching(scratch));
if (canSkipCatchUpMPV(t, scratch, cur_offset)) { if (canSkipCatchUpMPV(t, scratch, cur_offset)) {
if (t->flushCombProgramOffset) {
if (roseRunFlushCombProgram(t, scratch, cur_offset)
== HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
}
updateMinMatchOffsetFromMpv(&scratch->tctxt, cur_offset); updateMinMatchOffsetFromMpv(&scratch->tctxt, cur_offset);
return HWLM_CONTINUE_MATCHING; return HWLM_CONTINUE_MATCHING;
} }
@ -139,6 +146,12 @@ hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t,
hwlmcb_rv_t rv; hwlmcb_rv_t rv;
if (!t->activeArrayCount if (!t->activeArrayCount
|| !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { || !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
if (t->flushCombProgramOffset) {
if (roseRunFlushCombProgram(t, scratch, end)
== HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
}
updateMinMatchOffset(&scratch->tctxt, end); updateMinMatchOffset(&scratch->tctxt, end);
rv = HWLM_CONTINUE_MATCHING; rv = HWLM_CONTINUE_MATCHING;
} else { } else {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -571,6 +571,22 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }
/**
* \brief Execute a flush combination program.
*
* Returns MO_HALT_MATCHING if the stream is exhausted or the user has
* instructed us to halt, or MO_CONTINUE_MATCHING otherwise.
*/
int roseRunFlushCombProgram(const struct RoseEngine *rose,
struct hs_scratch *scratch, u64a end) {
hwlmcb_rv_t rv = roseRunProgram(rose, scratch, rose->flushCombProgramOffset,
0, end, 0);
if (rv == HWLM_TERMINATE_MATCHING) {
return MO_HALT_MATCHING;
}
return MO_CONTINUE_MATCHING;
}
int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) { int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) {
struct hs_scratch *scratch = context; struct hs_scratch *scratch = context;
assert(scratch && scratch->magic == SCRATCH_MAGIC); assert(scratch && scratch->magic == SCRATCH_MAGIC);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -66,6 +66,7 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t,
u64a top_squash_distance, u64a end, u64a top_squash_distance, u64a end,
char in_catchup); char in_catchup);
/** \brief Initialize the queue for a suffix/outfix engine. */
static really_inline static really_inline
void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t, void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t,
struct hs_scratch *scratch) { struct hs_scratch *scratch) {
@ -90,6 +91,7 @@ void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t,
info->stateOffset, *(u32 *)q->state); info->stateOffset, *(u32 *)q->state);
} }
/** \brief Initialize the queue for a leftfix (prefix/infix) engine. */
static really_inline static really_inline
void initRoseQueue(const struct RoseEngine *t, u32 qi, void initRoseQueue(const struct RoseEngine *t, u32 qi,
const struct LeftNfaInfo *left, const struct LeftNfaInfo *left,

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -501,8 +501,7 @@ hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch,
} }
/* catches up engines enough to ensure any earlier mpv triggers are enqueued /* catches up engines enough to ensure any earlier mpv triggers are enqueued
* and then adds the trigger to the mpv queue. Must not be called during catch * and then adds the trigger to the mpv queue. */
* up */
static rose_inline static rose_inline
hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t,
struct hs_scratch *scratch, struct hs_scratch *scratch,
@ -558,6 +557,22 @@ void roseHandleSomSom(struct hs_scratch *scratch,
setSomFromSomAware(scratch, sr, start, end); setSomFromSomAware(scratch, sr, start, end);
} }
static rose_inline
hwlmcb_rv_t roseSetExhaust(const struct RoseEngine *t,
struct hs_scratch *scratch, u32 ekey) {
assert(scratch);
assert(scratch->magic == SCRATCH_MAGIC);
struct core_info *ci = &scratch->core_info;
assert(!can_stop_matching(scratch));
assert(!isExhausted(ci->rose, ci->exhaustionVector, ekey));
markAsMatched(ci->rose, ci->exhaustionVector, ekey);
return roseHaltIfExhausted(t, scratch);
}
static really_inline static really_inline
int reachHasBit(const u8 *reach, u8 c) { int reachHasBit(const u8 *reach, u8 c) {
return !!(reach[c / 8U] & (u8)1U << (c % 8U)); return !!(reach[c / 8U] & (u8)1U << (c % 8U));
@ -1823,6 +1838,56 @@ void updateSeqPoint(struct RoseContext *tctxt, u64a offset,
} }
} }
static rose_inline
hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t,
struct hs_scratch *scratch) {
u8 *cvec = (u8 *)scratch->core_info.combVector;
if (!mmbit_any(cvec, t->ckeyCount)) {
return HWLM_CONTINUE_MATCHING;
}
u64a end = scratch->tctxt.lastCombMatchOffset;
for (u32 i = mmbit_iterate(cvec, t->ckeyCount, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(cvec, t->ckeyCount, i)) {
const struct CombInfo *combInfoMap = (const struct CombInfo *)
((const char *)t + t->combInfoMapOffset);
const struct CombInfo *ci = combInfoMap + i;
if ((ci->min_offset != 0) && (end < ci->min_offset)) {
DEBUG_PRINTF("halt: before min_offset=%llu\n", ci->min_offset);
continue;
}
if ((ci->max_offset != MAX_OFFSET) && (end > ci->max_offset)) {
DEBUG_PRINTF("halt: after max_offset=%llu\n", ci->max_offset);
continue;
}
DEBUG_PRINTF("check ekey %u\n", ci->ekey);
if (ci->ekey != INVALID_EKEY) {
assert(ci->ekey < t->ekeyCount);
const char *evec = scratch->core_info.exhaustionVector;
if (isExhausted(t, evec, ci->ekey)) {
DEBUG_PRINTF("ekey %u already set, match is exhausted\n",
ci->ekey);
continue;
}
}
DEBUG_PRINTF("check ckey %u\n", i);
char *lvec = scratch->core_info.logicalVector;
if (!isLogicalCombination(t, lvec, ci->start, ci->result)) {
DEBUG_PRINTF("Logical Combination Failed!\n");
continue;
}
DEBUG_PRINTF("Logical Combination Passed!\n");
if (roseReport(t, scratch, end, ci->id, 0,
ci->ekey) == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
}
clearCvec(t, (char *)cvec);
return HWLM_CONTINUE_MATCHING;
}
#define PROGRAM_CASE(name) \ #define PROGRAM_CASE(name) \
case ROSE_INSTR_##name: { \ case ROSE_INSTR_##name: { \
DEBUG_PRINTF("instruction: " #name " (pc=%u)\n", \ DEBUG_PRINTF("instruction: " #name " (pc=%u)\n", \
@ -2588,6 +2653,47 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
} }
} }
PROGRAM_NEXT_INSTRUCTION PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(SET_LOGICAL) {
DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n",
ri->lkey, ri->offset_adjust);
assert(ri->lkey != INVALID_LKEY);
assert(ri->lkey < t->lkeyCount);
char *lvec = scratch->core_info.logicalVector;
setLogicalVal(t, lvec, ri->lkey, 1);
updateLastCombMatchOffset(tctxt, end + ri->offset_adjust);
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(SET_COMBINATION) {
DEBUG_PRINTF("set ckey %u as active\n", ri->ckey);
assert(ri->ckey != INVALID_CKEY);
assert(ri->ckey < t->ckeyCount);
char *cvec = scratch->core_info.combVector;
setCombinationActive(t, cvec, ri->ckey);
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(FLUSH_COMBINATION) {
assert(end >= tctxt->lastCombMatchOffset);
if (end > tctxt->lastCombMatchOffset) {
if (flushActiveCombinations(t, scratch)
== HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
}
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(SET_EXHAUST) {
updateSeqPoint(tctxt, end, from_mpv);
if (roseSetExhaust(t, scratch, ri->ekey)
== HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
work_done = 1;
}
PROGRAM_NEXT_INSTRUCTION
} }
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -53,4 +53,7 @@ int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context);
int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
u64a stream_offset, struct hs_scratch *scratch); u64a stream_offset, struct hs_scratch *scratch);
int roseRunFlushCombProgram(const struct RoseEngine *rose,
struct hs_scratch *scratch, u64a end);
#endif // ROSE_H #endif // ROSE_H

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -453,7 +453,7 @@ RoseVertex tryForAnchoredVertex(RoseBuildImpl *tbi,
<= tbi->cc.grey.maxAnchoredRegion) { <= tbi->cc.grey.maxAnchoredRegion) {
if (ep.maxBound || ep.minBound) { if (ep.maxBound || ep.minBound) {
/* TODO: handle, however these cases are not generated currently by /* TODO: handle, however these cases are not generated currently by
ng_rose */ ng_violet */
return RoseGraph::null_vertex(); return RoseGraph::null_vertex();
} }
max_width = depth(ep.maxBound + iv_info.s.length()); max_width = depth(ep.maxBound + iv_info.s.length());
@ -567,7 +567,7 @@ void doRoseLiteralVertex(RoseBuildImpl *tbi, bool use_eod_table,
assert(iv_info.type == RIV_LITERAL); assert(iv_info.type == RIV_LITERAL);
assert(!parents.empty()); /* start vertices should not be here */ assert(!parents.empty()); /* start vertices should not be here */
// ng_rose should have ensured that mixed-sensitivity literals are no // ng_violet should have ensured that mixed-sensitivity literals are no
// longer than the benefits max width. // longer than the benefits max width.
assert(iv_info.s.length() <= MAX_MASK2_WIDTH || assert(iv_info.s.length() <= MAX_MASK2_WIDTH ||
!mixed_sensitivity(iv_info.s)); !mixed_sensitivity(iv_info.s));
@ -1849,10 +1849,9 @@ bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out,
return true; /* failure is not yet an option */ return true; /* failure is not yet an option */
} }
static static
bool prepAcceptForAddAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &w, bool prepAcceptForAddAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &w,
u32 max_adj, NFAVertex u, NFAVertex u,
const vector<DepthMinMax> &vertexDepths, const vector<DepthMinMax> &vertexDepths,
map<u32, DepthMinMax> &depthMap, map<u32, DepthMinMax> &depthMap,
map<NFAVertex, set<u32>> &reportMap, map<NFAVertex, set<u32>> &reportMap,
@ -1883,9 +1882,9 @@ bool prepAcceptForAddAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &w,
depthMap[lit_id] = unionDepthMinMax(depthMap[lit_id], d); depthMap[lit_id] = unionDepthMinMax(depthMap[lit_id], d);
} }
if (depthMap[lit_id].max + depth(max_adj) > max_anchored_depth) { if (depthMap[lit_id].max > max_anchored_depth) {
DEBUG_PRINTF("depth=%s exceeds maxAnchoredRegion=%u\n", DEBUG_PRINTF("depth=%s exceeds maxAnchoredRegion=%u\n",
(depthMap[lit_id].max + depth(max_adj)).str().c_str(), depthMap[lit_id].max.str().c_str(),
tbi.cc.grey.maxAnchoredRegion); tbi.cc.grey.maxAnchoredRegion);
return false; return false;
} }
@ -1932,7 +1931,7 @@ bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) {
flat_set<u32> added_lit_ids; /* literal ids added for this NFA */ flat_set<u32> added_lit_ids; /* literal ids added for this NFA */
for (auto v : inv_adjacent_vertices_range(h.accept, h)) { for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
if (!prepAcceptForAddAnchoredNFA(*this, h, 0, v, vertexDepths, depthMap, if (!prepAcceptForAddAnchoredNFA(*this, h, v, vertexDepths, depthMap,
reportMap, allocated_reports, reportMap, allocated_reports,
added_lit_ids)) { added_lit_ids)) {
removeAddedLiterals(*this, added_lit_ids); removeAddedLiterals(*this, added_lit_ids);
@ -1946,7 +1945,7 @@ bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) {
if (v == h.accept) { if (v == h.accept) {
continue; continue;
} }
if (!prepAcceptForAddAnchoredNFA(*this, h, 0, v, vertexDepths, depthMap, if (!prepAcceptForAddAnchoredNFA(*this, h, v, vertexDepths, depthMap,
reportMap, allocated_reports_eod, reportMap, allocated_reports_eod,
added_lit_ids)) { added_lit_ids)) {
removeAddedLiterals(*this, added_lit_ids); removeAddedLiterals(*this, added_lit_ids);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -426,6 +426,17 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount,
curr_offset += mmbit_size(build.rm.numEkeys()); curr_offset += mmbit_size(build.rm.numEkeys());
so->exhausted_size = mmbit_size(build.rm.numEkeys()); so->exhausted_size = mmbit_size(build.rm.numEkeys());
// Logical multibit.
so->logicalVec = curr_offset;
so->logicalVec_size = mmbit_size(build.rm.numLogicalKeys() +
build.rm.numLogicalOps());
curr_offset += so->logicalVec_size;
// Combination multibit.
so->combVec = curr_offset;
so->combVec_size = mmbit_size(build.rm.numCkeys());
curr_offset += so->combVec_size;
// SOM locations and valid/writeable multibit structures. // SOM locations and valid/writeable multibit structures.
if (build.ssm.numSomSlots()) { if (build.ssm.numSomSlots()) {
const u32 somWidth = build.ssm.somPrecision(); const u32 somWidth = build.ssm.somPrecision();
@ -2469,6 +2480,18 @@ void writeLeftInfo(RoseEngineBlob &engine_blob, RoseEngine &proto,
proto.rosePrefixCount = countRosePrefixes(leftInfoTable); proto.rosePrefixCount = countRosePrefixes(leftInfoTable);
} }
static
void writeLogicalInfo(const ReportManager &rm, RoseEngineBlob &engine_blob,
RoseEngine &proto) {
const auto &tree = rm.getLogicalTree();
proto.logicalTreeOffset = engine_blob.add_range(tree);
const auto &combMap = rm.getCombInfoMap();
proto.combInfoMapOffset = engine_blob.add_range(combMap);
proto.lkeyCount = rm.numLogicalKeys();
proto.lopCount = rm.numLogicalOps();
proto.ckeyCount = rm.numCkeys();
}
static static
void writeNfaInfo(const RoseBuildImpl &build, build_context &bc, void writeNfaInfo(const RoseBuildImpl &build, build_context &bc,
RoseEngine &proto, const set<u32> &no_retrigger_queues) { RoseEngine &proto, const set<u32> &no_retrigger_queues) {
@ -3313,6 +3336,15 @@ RoseProgram makeEodProgram(const RoseBuildImpl &build, build_context &bc,
return program; return program;
} }
static
RoseProgram makeFlushCombProgram(const RoseEngine &t) {
RoseProgram program;
if (t.ckeyCount) {
addFlushCombinationProgram(program);
}
return program;
}
static static
u32 history_required(const rose_literal_id &key) { u32 history_required(const rose_literal_id &key) {
if (key.msk.size() < key.s.length()) { if (key.msk.size() < key.s.length()) {
@ -3678,6 +3710,10 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
writeDkeyInfo(rm, bc.engine_blob, proto); writeDkeyInfo(rm, bc.engine_blob, proto);
writeLeftInfo(bc.engine_blob, proto, leftInfoTable); writeLeftInfo(bc.engine_blob, proto, leftInfoTable);
writeLogicalInfo(rm, bc.engine_blob, proto);
auto flushComb_prog = makeFlushCombProgram(proto);
proto.flushCombProgramOffset = writeProgram(bc, move(flushComb_prog));
// Build anchored matcher. // Build anchored matcher.
auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas); auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -1469,6 +1469,25 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
} }
PROGRAM_NEXT_INSTRUCTION PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(SET_LOGICAL) {
os << " lkey " << ri->lkey << endl;
os << " offset_adjust " << ri->offset_adjust << endl;
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(SET_COMBINATION) {
os << " ckey " << ri->ckey << endl;
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(FLUSH_COMBINATION) {}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(SET_EXHAUST) {
os << " ekey " << ri->ekey << endl;
}
PROGRAM_NEXT_INSTRUCTION
default: default:
os << " UNKNOWN (code " << int{code} << ")" << endl; os << " UNKNOWN (code " << int{code} << ")" << endl;
os << " <stopping>" << endl; os << " <stopping>" << endl;
@ -1523,6 +1542,23 @@ void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) {
os.close(); os.close();
} }
static
void dumpRoseFlushCombPrograms(const RoseEngine *t, const string &filename) {
ofstream os(filename);
const char *base = (const char *)t;
if (t->flushCombProgramOffset) {
os << "Flush Combination Program @ " << t->flushCombProgramOffset
<< ":" << endl;
dumpProgram(os, t, base + t->flushCombProgramOffset);
os << endl;
} else {
os << "<No Flush Combination Program>" << endl;
}
os.close();
}
static static
void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) { void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) {
ofstream os(filename); ofstream os(filename);
@ -2028,6 +2064,10 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
fprintf(f, " - history buffer : %u bytes\n", t->historyRequired); fprintf(f, " - history buffer : %u bytes\n", t->historyRequired);
fprintf(f, " - exhaustion vector : %u bytes\n", fprintf(f, " - exhaustion vector : %u bytes\n",
t->stateOffsets.exhausted_size); t->stateOffsets.exhausted_size);
fprintf(f, " - logical vector : %u bytes\n",
t->stateOffsets.logicalVec_size);
fprintf(f, " - combination vector: %u bytes\n",
t->stateOffsets.combVec_size);
fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize); fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize);
fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState); fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState);
fprintf(f, " - active array : %u bytes\n", fprintf(f, " - active array : %u bytes\n",
@ -2092,6 +2132,11 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U32(t, mode); DUMP_U32(t, mode);
DUMP_U32(t, historyRequired); DUMP_U32(t, historyRequired);
DUMP_U32(t, ekeyCount); DUMP_U32(t, ekeyCount);
DUMP_U32(t, lkeyCount);
DUMP_U32(t, lopCount);
DUMP_U32(t, ckeyCount);
DUMP_U32(t, logicalTreeOffset);
DUMP_U32(t, combInfoMapOffset);
DUMP_U32(t, dkeyCount); DUMP_U32(t, dkeyCount);
DUMP_U32(t, dkeyLogSize); DUMP_U32(t, dkeyLogSize);
DUMP_U32(t, invDkeyOffset); DUMP_U32(t, invDkeyOffset);
@ -2127,6 +2172,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U32(t, leftOffset); DUMP_U32(t, leftOffset);
DUMP_U32(t, roseCount); DUMP_U32(t, roseCount);
DUMP_U32(t, eodProgramOffset); DUMP_U32(t, eodProgramOffset);
DUMP_U32(t, flushCombProgramOffset);
DUMP_U32(t, lastByteHistoryIterOffset); DUMP_U32(t, lastByteHistoryIterOffset);
DUMP_U32(t, minWidth); DUMP_U32(t, minWidth);
DUMP_U32(t, minWidthExcludingBoundaries); DUMP_U32(t, minWidthExcludingBoundaries);
@ -2150,6 +2196,10 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U32(t, stateOffsets.history); DUMP_U32(t, stateOffsets.history);
DUMP_U32(t, stateOffsets.exhausted); DUMP_U32(t, stateOffsets.exhausted);
DUMP_U32(t, stateOffsets.exhausted_size); DUMP_U32(t, stateOffsets.exhausted_size);
DUMP_U32(t, stateOffsets.logicalVec);
DUMP_U32(t, stateOffsets.logicalVec_size);
DUMP_U32(t, stateOffsets.combVec);
DUMP_U32(t, stateOffsets.combVec_size);
DUMP_U32(t, stateOffsets.activeLeafArray); DUMP_U32(t, stateOffsets.activeLeafArray);
DUMP_U32(t, stateOffsets.activeLeafArray_size); DUMP_U32(t, stateOffsets.activeLeafArray_size);
DUMP_U32(t, stateOffsets.activeLeftArray); DUMP_U32(t, stateOffsets.activeLeftArray);
@ -2200,6 +2250,7 @@ void roseDumpPrograms(const vector<LitFragment> &fragments, const RoseEngine *t,
const string &base) { const string &base) {
dumpRoseLitPrograms(fragments, t, base + "/rose_lit_programs.txt"); dumpRoseLitPrograms(fragments, t, base + "/rose_lit_programs.txt");
dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt"); dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt");
dumpRoseFlushCombPrograms(t, base + "/rose_flush_comb_programs.txt");
dumpRoseReportPrograms(t, base + "/rose_report_programs.txt"); dumpRoseReportPrograms(t, base + "/rose_report_programs.txt");
dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt"); dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt");
dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt"); dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt");

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017, Intel Corporation * Copyright (c) 2017-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -47,6 +47,7 @@ RoseInstrSuffixesEod::~RoseInstrSuffixesEod() = default;
RoseInstrMatcherEod::~RoseInstrMatcherEod() = default; RoseInstrMatcherEod::~RoseInstrMatcherEod() = default;
RoseInstrEnd::~RoseInstrEnd() = default; RoseInstrEnd::~RoseInstrEnd() = default;
RoseInstrClearWorkDone::~RoseInstrClearWorkDone() = default; RoseInstrClearWorkDone::~RoseInstrClearWorkDone() = default;
RoseInstrFlushCombination::~RoseInstrFlushCombination() = default;
using OffsetMap = RoseInstruction::OffsetMap; using OffsetMap = RoseInstruction::OffsetMap;
@ -644,4 +645,26 @@ void RoseInstrIncludedJump::write(void *dest, RoseEngineBlob &blob,
inst->squash = squash; inst->squash = squash;
} }
void RoseInstrSetLogical::write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const {
RoseInstrBase::write(dest, blob, offset_map);
auto *inst = static_cast<impl_type *>(dest);
inst->lkey = lkey;
inst->offset_adjust = offset_adjust;
}
void RoseInstrSetCombination::write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const {
RoseInstrBase::write(dest, blob, offset_map);
auto *inst = static_cast<impl_type *>(dest);
inst->ckey = ckey;
}
void RoseInstrSetExhaust::write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const {
RoseInstrBase::write(dest, blob, offset_map);
auto *inst = static_cast<impl_type *>(dest);
inst->ekey = ekey;
}
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017, Intel Corporation * Copyright (c) 2017-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -2144,6 +2144,94 @@ public:
} }
}; };
class RoseInstrSetLogical
: public RoseInstrBaseNoTargets<ROSE_INSTR_SET_LOGICAL,
ROSE_STRUCT_SET_LOGICAL,
RoseInstrSetLogical> {
public:
u32 lkey;
s32 offset_adjust;
RoseInstrSetLogical(u32 lkey_in, s32 offset_adjust_in)
: lkey(lkey_in), offset_adjust(offset_adjust_in) {}
bool operator==(const RoseInstrSetLogical &ri) const {
return lkey == ri.lkey && offset_adjust == ri.offset_adjust;
}
size_t hash() const override {
return hash_all(opcode, lkey, offset_adjust);
}
void write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const override;
bool equiv_to(const RoseInstrSetLogical &ri, const OffsetMap &,
const OffsetMap &) const {
return lkey == ri.lkey && offset_adjust == ri.offset_adjust;
}
};
class RoseInstrSetCombination
: public RoseInstrBaseNoTargets<ROSE_INSTR_SET_COMBINATION,
ROSE_STRUCT_SET_COMBINATION,
RoseInstrSetCombination> {
public:
u32 ckey;
RoseInstrSetCombination(u32 ckey_in) : ckey(ckey_in) {}
bool operator==(const RoseInstrSetCombination &ri) const {
return ckey == ri.ckey;
}
size_t hash() const override {
return hash_all(opcode, ckey);
}
void write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const override;
bool equiv_to(const RoseInstrSetCombination &ri, const OffsetMap &,
const OffsetMap &) const {
return ckey == ri.ckey;
}
};
class RoseInstrFlushCombination
: public RoseInstrBaseTrivial<ROSE_INSTR_FLUSH_COMBINATION,
ROSE_STRUCT_FLUSH_COMBINATION,
RoseInstrFlushCombination> {
public:
~RoseInstrFlushCombination() override;
};
class RoseInstrSetExhaust
: public RoseInstrBaseNoTargets<ROSE_INSTR_SET_EXHAUST,
ROSE_STRUCT_SET_EXHAUST,
RoseInstrSetExhaust> {
public:
u32 ekey;
RoseInstrSetExhaust(u32 ekey_in) : ekey(ekey_in) {}
bool operator==(const RoseInstrSetExhaust &ri) const {
return ekey == ri.ekey;
}
size_t hash() const override {
return hash_all(opcode, ekey);
}
void write(void *dest, RoseEngineBlob &blob,
const OffsetMap &offset_map) const override;
bool equiv_to(const RoseInstrSetExhaust &ri, const OffsetMap &,
const OffsetMap &) const {
return ekey == ri.ekey;
}
};
class RoseInstrEnd class RoseInstrEnd
: public RoseInstrBaseTrivial<ROSE_INSTR_END, ROSE_STRUCT_END, : public RoseInstrBaseTrivial<ROSE_INSTR_END, ROSE_STRUCT_END,
RoseInstrEnd> { RoseInstrEnd> {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -459,7 +459,7 @@ public:
const_iterator end() const { return ordering.end(); } const_iterator end() const { return ordering.end(); }
}; };
typedef Bouquet<left_id> RoseBouquet; typedef Bouquet<left_id> LeftfixBouquet;
typedef Bouquet<suffix_id> SuffixBouquet; typedef Bouquet<suffix_id> SuffixBouquet;
} // namespace } // namespace
@ -565,7 +565,7 @@ bool hasSameEngineType(const RoseVertexProps &u_prop,
* *
* Parameters are vectors of literals + lag pairs. * Parameters are vectors of literals + lag pairs.
* *
* Note: if more constaints of when the leftfixes were going to be checked * Note: if more constraints of when the leftfixes were going to be checked
* (mandatory lookarounds passing, offset checks), more merges may be allowed. * (mandatory lookarounds passing, offset checks), more merges may be allowed.
*/ */
static static
@ -599,7 +599,7 @@ bool compatibleLiteralsForMerge(
/* An engine requires that all accesses to it are ordered by offsets. (ie, /* An engine requires that all accesses to it are ordered by offsets. (ie,
we can not check an engine's state at offset Y, if we have already we can not check an engine's state at offset Y, if we have already
checked its status at offset X and X > Y). If we can not establish that checked its status at offset X and X > Y). If we can not establish that
the literals used for triggering will statisfy this property, then it is the literals used for triggering will satisfy this property, then it is
not safe to merge the engine. */ not safe to merge the engine. */
for (const auto &ue : ulits) { for (const auto &ue : ulits) {
const rose_literal_id &ul = *ue.first; const rose_literal_id &ul = *ue.first;
@ -1437,7 +1437,19 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) {
assert(!parents.empty()); assert(!parents.empty());
#ifndef _WIN32
engine_groups[MergeKey(left, parents)].push_back(left); engine_groups[MergeKey(left, parents)].push_back(left);
#else
// On windows, when passing MergeKey object into map 'engine_groups',
// it will not be copied, but will be freed along with
// engine_groups.clear().
// If we construct MergeKey object on the stack, it will be destructed
// on its life cycle ending, then on engine_groups.clear(), which
// will cause is_block_type_valid() assertion error in MergeKey
// destructor.
MergeKey *mk = new MergeKey(left, parents);
engine_groups[*mk].push_back(left);
#endif
} }
vector<vector<left_id>> chunks; vector<vector<left_id>> chunks;
@ -1778,7 +1790,7 @@ u32 estimatedAccelStates(const RoseBuildImpl &tbi, const NGHolder &h) {
} }
static static
void mergeNfaLeftfixes(RoseBuildImpl &tbi, RoseBouquet &roses) { void mergeNfaLeftfixes(RoseBuildImpl &tbi, LeftfixBouquet &roses) {
RoseGraph &g = tbi.g; RoseGraph &g = tbi.g;
DEBUG_PRINTF("%zu nfa rose merge candidates\n", roses.size()); DEBUG_PRINTF("%zu nfa rose merge candidates\n", roses.size());
@ -1894,7 +1906,7 @@ void mergeSmallLeftfixes(RoseBuildImpl &tbi) {
RoseGraph &g = tbi.g; RoseGraph &g = tbi.g;
RoseBouquet nfa_roses; LeftfixBouquet nfa_leftfixes;
for (auto v : vertices_range(g)) { for (auto v : vertices_range(g)) {
if (!g[v].left) { if (!g[v].left) {
@ -1939,20 +1951,20 @@ void mergeSmallLeftfixes(RoseBuildImpl &tbi) {
continue; continue;
} }
nfa_roses.insert(left, v); nfa_leftfixes.insert(left, v);
} }
deque<RoseBouquet> rose_groups; deque<LeftfixBouquet> leftfix_groups;
chunkBouquets(nfa_roses, rose_groups, MERGE_GROUP_SIZE_MAX); chunkBouquets(nfa_leftfixes, leftfix_groups, MERGE_GROUP_SIZE_MAX);
nfa_roses.clear(); nfa_leftfixes.clear();
DEBUG_PRINTF("chunked nfa roses into %zu groups\n", rose_groups.size()); DEBUG_PRINTF("chunked nfa leftfixes into %zu groups\n",
leftfix_groups.size());
for (auto &group : rose_groups) { for (auto &group : leftfix_groups) {
mergeNfaLeftfixes(tbi, group); mergeNfaLeftfixes(tbi, group);
} }
} }
static static
void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands, void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands,
insertion_ordered_map<left_id, vector<RoseVertex>> &eng_verts) { insertion_ordered_map<left_id, vector<RoseVertex>> &eng_verts) {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -993,15 +993,19 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) {
return true; return true;
} }
/**
* \brief True if there is an engine with a top that is not triggered by a
* vertex in the Rose graph. This is a consistency check used in assertions.
*/
bool hasOrphanedTops(const RoseBuildImpl &build) { bool hasOrphanedTops(const RoseBuildImpl &build) {
const RoseGraph &g = build.g; const RoseGraph &g = build.g;
unordered_map<left_id, set<u32>> roses; unordered_map<left_id, set<u32>> leftfixes;
unordered_map<suffix_id, set<u32>> suffixes; unordered_map<suffix_id, set<u32>> suffixes;
for (auto v : vertices_range(g)) { for (auto v : vertices_range(g)) {
if (g[v].left) { if (g[v].left) {
set<u32> &tops = roses[g[v].left]; set<u32> &tops = leftfixes[g[v].left];
if (!build.isRootSuccessor(v)) { if (!build.isRootSuccessor(v)) {
// Tops for infixes come from the in-edges. // Tops for infixes come from the in-edges.
for (const auto &e : in_edges_range(v, g)) { for (const auto &e : in_edges_range(v, g)) {
@ -1014,7 +1018,7 @@ bool hasOrphanedTops(const RoseBuildImpl &build) {
} }
} }
for (const auto &e : roses) { for (const auto &e : leftfixes) {
if (all_tops(e.first) != e.second) { if (all_tops(e.first) != e.second) {
DEBUG_PRINTF("rose tops (%s) don't match rose graph (%s)\n", DEBUG_PRINTF("rose tops (%s) don't match rose graph (%s)\n",
as_string_list(all_tops(e.first)).c_str(), as_string_list(all_tops(e.first)).c_str(),

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016-2017, Intel Corporation * Copyright (c) 2016-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -280,7 +280,7 @@ void stripCheckHandledInstruction(RoseProgram &prog) {
} }
/** Returns true if the program may read the the interpreter's work_done flag */ /** Returns true if the program may read the interpreter's work_done flag */
static static
bool reads_work_done_flag(const RoseProgram &prog) { bool reads_work_done_flag(const RoseProgram &prog) {
for (const auto &ri : prog) { for (const auto &ri : prog) {
@ -313,6 +313,10 @@ void addMatcherEodProgram(RoseProgram &program) {
program.add_block(move(block)); program.add_block(move(block));
} }
void addFlushCombinationProgram(RoseProgram &program) {
program.add_before_end(make_unique<RoseInstrFlushCombination>());
}
static static
void makeRoleCheckLeftfix(const RoseBuildImpl &build, void makeRoleCheckLeftfix(const RoseBuildImpl &build,
const map<RoseVertex, left_build_info> &leftfix_info, const map<RoseVertex, left_build_info> &leftfix_info,
@ -496,6 +500,23 @@ void writeSomOperation(const Report &report, som_operation *op) {
} }
} }
static
void addLogicalSetRequired(const Report &report, ReportManager &rm,
RoseProgram &program) {
if (report.lkey == INVALID_LKEY) {
return;
}
// set matching status of current lkey
auto risl = make_unique<RoseInstrSetLogical>(report.lkey,
report.offsetAdjust);
program.add_before_end(move(risl));
// set current lkey's corresponding ckeys active, pending to check
for (auto ckey : rm.getRelateCKeys(report.lkey)) {
auto risc = make_unique<RoseInstrSetCombination>(ckey);
program.add_before_end(move(risc));
}
}
static static
void makeReport(const RoseBuildImpl &build, const ReportID id, void makeReport(const RoseBuildImpl &build, const ReportID id,
const bool has_som, RoseProgram &program) { const bool has_som, RoseProgram &program) {
@ -542,38 +563,62 @@ void makeReport(const RoseBuildImpl &build, const ReportID id,
switch (report.type) { switch (report.type) {
case EXTERNAL_CALLBACK: case EXTERNAL_CALLBACK:
if (build.rm.numCkeys()) {
addFlushCombinationProgram(report_block);
}
if (!has_som) { if (!has_som) {
// Dedupe is only necessary if this report has a dkey, or if there // Dedupe is only necessary if this report has a dkey, or if there
// are SOM reports to catch up. // are SOM reports to catch up.
bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom; bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom;
if (report.ekey == INVALID_EKEY) { if (report.ekey == INVALID_EKEY) {
if (needs_dedupe) { if (needs_dedupe) {
if (!report.quiet) {
report_block.add_before_end( report_block.add_before_end(
make_unique<RoseInstrDedupeAndReport>( make_unique<RoseInstrDedupeAndReport>(
report.quashSom, build.rm.getDkey(report), report.quashSom, build.rm.getDkey(report),
report.onmatch, report.offsetAdjust, end_inst)); report.onmatch, report.offsetAdjust, end_inst));
} else { } else {
report_block.add_before_end(make_unique<RoseInstrReport>( makeDedupe(build.rm, report, report_block);
}
} else {
if (!report.quiet) {
report_block.add_before_end(
make_unique<RoseInstrReport>(
report.onmatch, report.offsetAdjust)); report.onmatch, report.offsetAdjust));
} }
}
} else { } else {
if (needs_dedupe) { if (needs_dedupe) {
makeDedupe(build.rm, report, report_block); makeDedupe(build.rm, report, report_block);
} }
report_block.add_before_end(make_unique<RoseInstrReportExhaust>( if (!report.quiet) {
report_block.add_before_end(
make_unique<RoseInstrReportExhaust>(
report.onmatch, report.offsetAdjust, report.ekey)); report.onmatch, report.offsetAdjust, report.ekey));
} else {
report_block.add_before_end(
make_unique<RoseInstrSetExhaust>(report.ekey));
}
} }
} else { // has_som } else { // has_som
makeDedupeSom(build.rm, report, report_block); makeDedupeSom(build.rm, report, report_block);
if (report.ekey == INVALID_EKEY) { if (report.ekey == INVALID_EKEY) {
if (!report.quiet) {
report_block.add_before_end(make_unique<RoseInstrReportSom>( report_block.add_before_end(make_unique<RoseInstrReportSom>(
report.onmatch, report.offsetAdjust)); report.onmatch, report.offsetAdjust));
}
} else { } else {
if (!report.quiet) {
report_block.add_before_end( report_block.add_before_end(
make_unique<RoseInstrReportSomExhaust>( make_unique<RoseInstrReportSomExhaust>(
report.onmatch, report.offsetAdjust, report.ekey)); report.onmatch, report.offsetAdjust, report.ekey));
} else {
report_block.add_before_end(
make_unique<RoseInstrSetExhaust>(report.ekey));
} }
} }
}
addLogicalSetRequired(report, build.rm, report_block);
break; break;
case INTERNAL_SOM_LOC_SET: case INTERNAL_SOM_LOC_SET:
case INTERNAL_SOM_LOC_SET_IF_UNSET: case INTERNAL_SOM_LOC_SET_IF_UNSET:
@ -586,6 +631,9 @@ void makeReport(const RoseBuildImpl &build, const ReportID id,
case INTERNAL_SOM_LOC_MAKE_WRITABLE: case INTERNAL_SOM_LOC_MAKE_WRITABLE:
case INTERNAL_SOM_LOC_SET_FROM: case INTERNAL_SOM_LOC_SET_FROM:
case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE:
if (build.rm.numCkeys()) {
addFlushCombinationProgram(report_block);
}
if (has_som) { if (has_som) {
auto ri = make_unique<RoseInstrReportSomAware>(); auto ri = make_unique<RoseInstrReportSomAware>();
writeSomOperation(report, &ri->som); writeSomOperation(report, &ri->som);
@ -605,24 +653,48 @@ void makeReport(const RoseBuildImpl &build, const ReportID id,
case EXTERNAL_CALLBACK_SOM_STORED: case EXTERNAL_CALLBACK_SOM_STORED:
case EXTERNAL_CALLBACK_SOM_ABS: case EXTERNAL_CALLBACK_SOM_ABS:
case EXTERNAL_CALLBACK_SOM_REV_NFA: case EXTERNAL_CALLBACK_SOM_REV_NFA:
if (build.rm.numCkeys()) {
addFlushCombinationProgram(report_block);
}
makeDedupeSom(build.rm, report, report_block); makeDedupeSom(build.rm, report, report_block);
if (report.ekey == INVALID_EKEY) { if (report.ekey == INVALID_EKEY) {
if (!report.quiet) {
report_block.add_before_end(make_unique<RoseInstrReportSom>( report_block.add_before_end(make_unique<RoseInstrReportSom>(
report.onmatch, report.offsetAdjust)); report.onmatch, report.offsetAdjust));
} else {
report_block.add_before_end(make_unique<RoseInstrReportSomExhaust>(
report.onmatch, report.offsetAdjust, report.ekey));
} }
} else {
if (!report.quiet) {
report_block.add_before_end(
make_unique<RoseInstrReportSomExhaust>(
report.onmatch, report.offsetAdjust, report.ekey));
} else {
report_block.add_before_end(
make_unique<RoseInstrSetExhaust>(report.ekey));
}
}
addLogicalSetRequired(report, build.rm, report_block);
break; break;
case EXTERNAL_CALLBACK_SOM_PASS: case EXTERNAL_CALLBACK_SOM_PASS:
if (build.rm.numCkeys()) {
addFlushCombinationProgram(report_block);
}
makeDedupeSom(build.rm, report, report_block); makeDedupeSom(build.rm, report, report_block);
if (report.ekey == INVALID_EKEY) { if (report.ekey == INVALID_EKEY) {
if (!report.quiet) {
report_block.add_before_end(make_unique<RoseInstrReportSom>( report_block.add_before_end(make_unique<RoseInstrReportSom>(
report.onmatch, report.offsetAdjust)); report.onmatch, report.offsetAdjust));
} else {
report_block.add_before_end(make_unique<RoseInstrReportSomExhaust>(
report.onmatch, report.offsetAdjust, report.ekey));
} }
} else {
if (!report.quiet) {
report_block.add_before_end(
make_unique<RoseInstrReportSomExhaust>(
report.onmatch, report.offsetAdjust, report.ekey));
} else {
report_block.add_before_end(
make_unique<RoseInstrSetExhaust>(report.ekey));
}
}
addLogicalSetRequired(report, build.rm, report_block);
break; break;
default: default:
@ -630,7 +702,6 @@ void makeReport(const RoseBuildImpl &build, const ReportID id,
throw CompileError("Unable to generate bytecode."); throw CompileError("Unable to generate bytecode.");
} }
assert(!report_block.empty());
program.add_block(move(report_block)); program.add_block(move(report_block));
} }
@ -1837,7 +1908,7 @@ void makeRoleEagerEodReports(const RoseBuildImpl &build,
program.add_before_end(move(eod_program)); program.add_before_end(move(eod_program));
} }
/* Makes a program for a role/vertex given a specfic pred/in_edge. */ /** Makes a program for a role/vertex given a specific pred/in_edge. */
static static
RoseProgram makeRoleProgram(const RoseBuildImpl &build, RoseProgram makeRoleProgram(const RoseBuildImpl &build,
const map<RoseVertex, left_build_info> &leftfix_info, const map<RoseVertex, left_build_info> &leftfix_info,
@ -2045,7 +2116,7 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
} }
if (lit_id == build.eod_event_literal_id) { if (lit_id == build.eod_event_literal_id) {
/* Note: does not require the lit intial program */ /* Note: does not require the lit initial program */
assert(build.eod_event_literal_id != MO_INVALID_IDX); assert(build.eod_event_literal_id != MO_INVALID_IDX);
return role_programs; return role_programs;
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016-2017, Intel Corporation * Copyright (c) 2016-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -187,6 +187,7 @@ struct ProgramBuild : noncopyable {
void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program); void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program);
void addSuffixesEodProgram(RoseProgram &program); void addSuffixesEodProgram(RoseProgram &program);
void addMatcherEodProgram(RoseProgram &program); void addMatcherEodProgram(RoseProgram &program);
void addFlushCombinationProgram(RoseProgram &program);
static constexpr u32 INVALID_QUEUE = ~0U; static constexpr u32 INVALID_QUEUE = ~0U;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -41,7 +41,6 @@
#include "rose_build.h" #include "rose_build.h"
#include "rose_internal.h" #include "rose_internal.h"
#include "nfa/nfa_internal.h" // for MO_INVALID_IDX #include "nfa/nfa_internal.h" // for MO_INVALID_IDX
#include "util/charreach.h"
#include "util/depth.h" #include "util/depth.h"
#include "util/flat_containers.h" #include "util/flat_containers.h"
#include "util/ue2_graph.h" #include "util/ue2_graph.h"

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -27,7 +27,7 @@
*/ */
/** \file /** \file
* \brief Rose Input Graph: Used for ng_rose -> rose_build_add communication. * \brief Rose Input Graph: Used for ng_violet -> rose_build_add communication.
* *
* The input graph MUST be a DAG. * The input graph MUST be a DAG.
* There MUST be exactly 1 START or ANCHORED_START vertex. * There MUST be exactly 1 START or ANCHORED_START vertex.
@ -127,7 +127,7 @@ public:
flat_set<ReportID> reports; /**< for RIV_ACCEPT/RIV_ACCEPT_EOD */ flat_set<ReportID> reports; /**< for RIV_ACCEPT/RIV_ACCEPT_EOD */
u32 min_offset; /**< Minimum offset at which this vertex can match. */ u32 min_offset; /**< Minimum offset at which this vertex can match. */
u32 max_offset; /**< Maximum offset at which this vertex can match. */ u32 max_offset; /**< Maximum offset at which this vertex can match. */
size_t index = 0; size_t index = 0; /**< \brief Unique vertex index. */
}; };
struct RoseInEdgeProps { struct RoseInEdgeProps {
@ -176,7 +176,13 @@ struct RoseInEdgeProps {
/** \brief Haig version of graph, if required. */ /** \brief Haig version of graph, if required. */
std::shared_ptr<raw_som_dfa> haig; std::shared_ptr<raw_som_dfa> haig;
/**
* \brief Distance behind the match offset for the literal in the target
* vertex that the leftfix needs to be checked at.
*/
u32 graph_lag; u32 graph_lag;
/** \brief Unique edge index. */
size_t index = 0; size_t index = 0;
}; };

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -199,9 +199,25 @@ struct RoseStateOffsets {
* reports with that ekey should not be delivered to the user. */ * reports with that ekey should not be delivered to the user. */
u32 exhausted; u32 exhausted;
/** size of exhausted multibit */ /** size in bytes of exhausted multibit */
u32 exhausted_size; u32 exhausted_size;
/** Logical multibit.
*
* entry per logical key(operand/operator) (used by Logical Combination). */
u32 logicalVec;
/** size in bytes of logical multibit */
u32 logicalVec_size;
/** Combination multibit.
*
* entry per combination key (used by Logical Combination). */
u32 combVec;
/** size in bytes of combination multibit */
u32 combVec_size;
/** Multibit for active suffix/outfix engines. */ /** Multibit for active suffix/outfix engines. */
u32 activeLeafArray; u32 activeLeafArray;
@ -327,6 +343,11 @@ struct RoseEngine {
u32 mode; /**< scanning mode, one of HS_MODE_{BLOCK,STREAM,VECTORED} */ u32 mode; /**< scanning mode, one of HS_MODE_{BLOCK,STREAM,VECTORED} */
u32 historyRequired; /**< max amount of history required for streaming */ u32 historyRequired; /**< max amount of history required for streaming */
u32 ekeyCount; /**< number of exhaustion keys */ u32 ekeyCount; /**< number of exhaustion keys */
u32 lkeyCount; /**< number of logical keys */
u32 lopCount; /**< number of logical ops */
u32 ckeyCount; /**< number of combination keys */
u32 logicalTreeOffset; /**< offset to mapping from lkey to LogicalOp */
u32 combInfoMapOffset; /**< offset to mapping from ckey to combInfo */
u32 dkeyCount; /**< number of dedupe keys */ u32 dkeyCount; /**< number of dedupe keys */
u32 dkeyLogSize; /**< size of fatbit for storing dkey log (bytes) */ u32 dkeyLogSize; /**< size of fatbit for storing dkey log (bytes) */
u32 invDkeyOffset; /**< offset to table mapping from dkeys to the external u32 invDkeyOffset; /**< offset to table mapping from dkeys to the external
@ -404,6 +425,7 @@ struct RoseEngine {
u32 roseCount; u32 roseCount;
u32 eodProgramOffset; //!< EOD program, otherwise 0. u32 eodProgramOffset; //!< EOD program, otherwise 0.
u32 flushCombProgramOffset; /**< FlushCombination program, otherwise 0 */
u32 lastByteHistoryIterOffset; // if non-zero u32 lastByteHistoryIterOffset; // if non-zero

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -183,7 +183,25 @@ enum RoseInstructionCode {
*/ */
ROSE_INSTR_INCLUDED_JUMP, ROSE_INSTR_INCLUDED_JUMP,
LAST_ROSE_INSTRUCTION = ROSE_INSTR_INCLUDED_JUMP //!< Sentinel. /**
* \brief Set matching status of a sub-expression.
*/
ROSE_INSTR_SET_LOGICAL,
/**
* \brief Set combination status pending checking.
*/
ROSE_INSTR_SET_COMBINATION,
/**
* \brief Check if compliant with any logical constraints.
*/
ROSE_INSTR_FLUSH_COMBINATION,
/** \brief Mark as exhausted instead of report while quiet. */
ROSE_INSTR_SET_EXHAUST,
LAST_ROSE_INSTRUCTION = ROSE_INSTR_SET_EXHAUST //!< Sentinel.
}; };
struct ROSE_STRUCT_END { struct ROSE_STRUCT_END {
@ -636,4 +654,24 @@ struct ROSE_STRUCT_INCLUDED_JUMP {
u8 squash; //!< FDR confirm squash mask for included literal. u8 squash; //!< FDR confirm squash mask for included literal.
u32 child_offset; //!< Program offset of included literal. u32 child_offset; //!< Program offset of included literal.
}; };
struct ROSE_STRUCT_SET_LOGICAL {
u8 code; //!< From enum RoseInstructionCode.
u32 lkey; //!< Logical key to set.
s32 offset_adjust; //!< offsetAdjust from struct Report triggers the flush.
};
struct ROSE_STRUCT_SET_COMBINATION {
u8 code; //!< From enum RoseInstructionCode.
u32 ckey; //!< Combination key to set.
};
struct ROSE_STRUCT_FLUSH_COMBINATION {
u8 code; //!< From enum RoseInstructionCode.
};
struct ROSE_STRUCT_SET_EXHAUST {
u8 code; //!< From enum RoseInstructionCode.
u32 ekey; //!< Exhaustion key.
};
#endif // ROSE_ROSE_PROGRAM_H #endif // ROSE_ROSE_PROGRAM_H

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -127,6 +127,15 @@ void updateLastMatchOffset(struct RoseContext *tctxt, u64a offset) {
tctxt->lastMatchOffset = offset; tctxt->lastMatchOffset = offset;
} }
static really_inline
void updateLastCombMatchOffset(struct RoseContext *tctxt, u64a offset) {
DEBUG_PRINTF("match @%llu, last match @%llu\n", offset,
tctxt->lastCombMatchOffset);
assert(offset >= tctxt->lastCombMatchOffset);
tctxt->lastCombMatchOffset = offset;
}
static really_inline static really_inline
void updateMinMatchOffset(struct RoseContext *tctxt, u64a offset) { void updateMinMatchOffset(struct RoseContext *tctxt, u64a offset) {
DEBUG_PRINTF("min match now @%llu, was @%llu\n", offset, DEBUG_PRINTF("min match now @%llu, was @%llu\n", offset,

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -578,6 +578,7 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
tctxt->lastEndOffset = offset; tctxt->lastEndOffset = offset;
tctxt->filledDelayedSlots = 0; tctxt->filledDelayedSlots = 0;
tctxt->lastMatchOffset = 0; tctxt->lastMatchOffset = 0;
tctxt->lastCombMatchOffset = offset;
tctxt->minMatchOffset = offset; tctxt->minMatchOffset = offset;
tctxt->minNonMpvMatchOffset = offset; tctxt->minNonMpvMatchOffset = offset;
tctxt->next_mpv_offset = 0; tctxt->next_mpv_offset = 0;
@ -700,6 +701,7 @@ void roseStreamInitEod(const struct RoseEngine *t, u64a offset,
tctxt->lastEndOffset = offset; tctxt->lastEndOffset = offset;
tctxt->filledDelayedSlots = 0; tctxt->filledDelayedSlots = 0;
tctxt->lastMatchOffset = 0; tctxt->lastMatchOffset = 0;
tctxt->lastCombMatchOffset = offset; /* DO NOT set 0 here! */
tctxt->minMatchOffset = offset; tctxt->minMatchOffset = offset;
tctxt->minNonMpvMatchOffset = offset; tctxt->minNonMpvMatchOffset = offset;
tctxt->next_mpv_offset = offset; tctxt->next_mpv_offset = offset;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -67,7 +67,7 @@ void prefetch_data(const char *data, unsigned length) {
/** dummy event handler for use when user does not provide one */ /** dummy event handler for use when user does not provide one */
static static
int null_onEvent(UNUSED unsigned id, UNUSED unsigned long long from, int HS_CDECL null_onEvent(UNUSED unsigned id, UNUSED unsigned long long from,
UNUSED unsigned long long to, UNUSED unsigned flags, UNUSED unsigned long long to, UNUSED unsigned flags,
UNUSED void *ctxt) { UNUSED void *ctxt) {
return 0; return 0;
@ -356,6 +356,15 @@ hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data,
length, NULL, 0, 0, 0, flags); length, NULL, 0, 0, 0, flags);
clearEvec(rose, scratch->core_info.exhaustionVector); clearEvec(rose, scratch->core_info.exhaustionVector);
if (rose->ckeyCount) {
scratch->core_info.logicalVector = scratch->bstate +
rose->stateOffsets.logicalVec;
scratch->core_info.combVector = scratch->bstate +
rose->stateOffsets.combVec;
scratch->tctxt.lastCombMatchOffset = 0;
clearLvec(rose, scratch->core_info.logicalVector,
scratch->core_info.combVector);
}
if (!length) { if (!length) {
if (rose->boundary.reportZeroEodOffset) { if (rose->boundary.reportZeroEodOffset) {
@ -436,6 +445,13 @@ done_scan:
scratch); scratch);
} }
if (rose->flushCombProgramOffset) {
if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
unmarkScratchInUse(scratch);
return HS_SCAN_TERMINATED;
}
}
set_retval: set_retval:
DEBUG_PRINTF("done. told_to_stop_matching=%d\n", DEBUG_PRINTF("done. told_to_stop_matching=%d\n",
told_to_stop_matching(scratch)); told_to_stop_matching(scratch));
@ -500,6 +516,10 @@ void init_stream(struct hs_stream *s, const struct RoseEngine *rose,
roseInitState(rose, state); roseInitState(rose, state);
clearEvec(rose, state + rose->stateOffsets.exhausted); clearEvec(rose, state + rose->stateOffsets.exhausted);
if (rose->ckeyCount) {
clearLvec(rose, state + rose->stateOffsets.logicalVec,
state + rose->stateOffsets.combVec);
}
// SOM state multibit structures. // SOM state multibit structures.
initSomState(rose, state); initSomState(rose, state);
@ -614,6 +634,13 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
getHistory(state, rose, id->offset), getHistory(state, rose, id->offset),
getHistoryAmount(rose, id->offset), id->offset, status, 0); getHistoryAmount(rose, id->offset), id->offset, status, 0);
if (rose->ckeyCount) {
scratch->core_info.logicalVector = state +
rose->stateOffsets.logicalVec;
scratch->core_info.combVector = state + rose->stateOffsets.combVec;
scratch->tctxt.lastCombMatchOffset = id->offset;
}
if (rose->somLocationCount) { if (rose->somLocationCount) {
loadSomFromStream(scratch, id->offset); loadSomFromStream(scratch, id->offset);
} }
@ -657,6 +684,13 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
scratch->core_info.status |= STATUS_TERMINATED; scratch->core_info.status |= STATUS_TERMINATED;
} }
} }
if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
DEBUG_PRINTF("told to stop matching\n");
scratch->core_info.status |= STATUS_TERMINATED;
}
}
} }
HS_PUBLIC_API HS_PUBLIC_API
@ -849,6 +883,12 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
populateCoreInfo(scratch, rose, state, onEvent, context, data, length, populateCoreInfo(scratch, rose, state, onEvent, context, data, length,
getHistory(state, rose, id->offset), historyAmount, getHistory(state, rose, id->offset), historyAmount,
id->offset, status, flags); id->offset, status, flags);
if (rose->ckeyCount) {
scratch->core_info.logicalVector = state +
rose->stateOffsets.logicalVec;
scratch->core_info.combVector = state + rose->stateOffsets.combVec;
scratch->tctxt.lastCombMatchOffset = id->offset;
}
assert(scratch->core_info.hlen <= id->offset assert(scratch->core_info.hlen <= id->offset
&& scratch->core_info.hlen <= rose->historyRequired); && scratch->core_info.hlen <= rose->historyRequired);
@ -894,6 +934,12 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
} }
} }
if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
scratch->core_info.status |= STATUS_TERMINATED;
}
}
setStreamStatus(state, scratch->core_info.status); setStreamStatus(state, scratch->core_info.status);
if (likely(!can_stop_matching(scratch))) { if (likely(!can_stop_matching(scratch))) {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -36,6 +36,7 @@
#ifndef SCRATCH_H_DA6D4FC06FF410 #ifndef SCRATCH_H_DA6D4FC06FF410
#define SCRATCH_H_DA6D4FC06FF410 #define SCRATCH_H_DA6D4FC06FF410
#include "hs_common.h"
#include "ue2common.h" #include "ue2common.h"
#include "rose/rose_types.h" #include "rose/rose_types.h"
@ -88,12 +89,15 @@ struct core_info {
void *userContext; /**< user-supplied context */ void *userContext; /**< user-supplied context */
/** \brief user-supplied match callback */ /** \brief user-supplied match callback */
int (*userCallback)(unsigned int id, unsigned long long from, int (HS_CDECL *userCallback)(unsigned int id, unsigned long long from,
unsigned long long to, unsigned int flags, void *ctx); unsigned long long to, unsigned int flags,
void *ctx);
const struct RoseEngine *rose; const struct RoseEngine *rose;
char *state; /**< full stream state */ char *state; /**< full stream state */
char *exhaustionVector; /**< pointer to evec for this stream */ char *exhaustionVector; /**< pointer to evec for this stream */
char *logicalVector; /**< pointer to lvec for this stream */
char *combVector; /**< pointer to cvec for this stream */
const u8 *buf; /**< main scan buffer */ const u8 *buf; /**< main scan buffer */
size_t len; /**< length of main scan buffer in bytes */ size_t len; /**< length of main scan buffer in bytes */
const u8 *hbuf; /**< history buffer */ const u8 *hbuf; /**< history buffer */
@ -115,6 +119,7 @@ struct RoseContext {
* stream */ * stream */
u64a lastMatchOffset; /**< last match offset report up out of rose; u64a lastMatchOffset; /**< last match offset report up out of rose;
* used _only_ for debugging, asserts */ * used _only_ for debugging, asserts */
u64a lastCombMatchOffset; /**< last match offset of active combinations */
u64a minMatchOffset; /**< the earliest offset that we are still allowed to u64a minMatchOffset; /**< the earliest offset that we are still allowed to
* report */ * report */
u64a minNonMpvMatchOffset; /**< the earliest offset that non-mpv engines are u64a minNonMpvMatchOffset; /**< the earliest offset that non-mpv engines are

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017, Intel Corporation * Copyright (c) 2017-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -148,6 +148,13 @@ size_t JOIN(sc_, FN_SUFFIX)(const struct RoseEngine *rose,
/* copy the exhaustion multibit */ /* copy the exhaustion multibit */
COPY_MULTIBIT(stream_body + so->exhausted, rose->ekeyCount); COPY_MULTIBIT(stream_body + so->exhausted, rose->ekeyCount);
/* copy the logical multibit */
COPY_MULTIBIT(stream_body + so->logicalVec,
rose->lkeyCount + rose->lopCount);
/* copy the combination multibit */
COPY_MULTIBIT(stream_body + so->combVec, rose->ckeyCount);
/* copy nfa stream state for endfixes */ /* copy nfa stream state for endfixes */
/* Note: in the expand case the active array has already been copied into /* Note: in the expand case the active array has already been copied into
* the stream. */ * the stream. */

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -66,8 +66,13 @@ typedef signed int s32;
/* We append the 'a' for aligned, since these aren't common, garden variety /* We append the 'a' for aligned, since these aren't common, garden variety
* 64 bit values. The alignment is necessary for structs on some platforms, * 64 bit values. The alignment is necessary for structs on some platforms,
* so we don't end up performing accidental unaligned accesses. */ * so we don't end up performing accidental unaligned accesses. */
#if defined(_WIN32) && ! defined(_WIN64)
typedef unsigned long long ALIGN_ATTR(4) u64a;
typedef signed long long ALIGN_ATTR(4) s64a;
#else
typedef unsigned long long ALIGN_ATTR(8) u64a; typedef unsigned long long ALIGN_ATTR(8) u64a;
typedef signed long long ALIGN_ATTR(8) s64a; typedef signed long long ALIGN_ATTR(8) s64a;
#endif
/* get the SIMD types */ /* get the SIMD types */
#include "util/simd_types.h" #include "util/simd_types.h"

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -305,9 +305,10 @@ public:
} }
/// Bitwise OR. /// Bitwise OR.
bitfield operator|(bitfield a) const { bitfield operator|(const bitfield &a) const {
a |= *this; bitfield b = a;
return a; b |= *this;
return b;
} }
/// Bitwise OR-equals. /// Bitwise OR-equals.
@ -325,9 +326,10 @@ public:
} }
/// Bitwise AND. /// Bitwise AND.
bitfield operator&(bitfield a) const { bitfield operator&(const bitfield &a) const {
a &= *this; bitfield b = a;
return a; b &= *this;
return b;
} }
/// Bitwise AND-equals. /// Bitwise AND-equals.

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -56,7 +56,11 @@ void describeChar(ostream &os, char c, enum cc_output_t out_type) {
const string backslash((out_type == CC_OUT_DOT ? 2 : 1), '\\'); const string backslash((out_type == CC_OUT_DOT ? 2 : 1), '\\');
#ifdef _WIN32
if (c >= 0x21 && c < 0x7F && c != '\\') {
#else
if (isgraph(c) && c != '\\') { if (isgraph(c) && c != '\\') {
#endif
if (escaped.find(c) != string::npos) { if (escaped.find(c) != string::npos) {
os << backslash << c; os << backslash << c;
} else if (out_type == CC_OUT_DOT } else if (out_type == CC_OUT_DOT

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017, Intel Corporation * Copyright (c) 2017-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -114,6 +114,21 @@ public:
std::memset(data->data(), val, data->size()); std::memset(data->data(), val, data->size());
} }
size_t count(small_color color) const {
assert(static_cast<u8>(color) < sizeof(fill_lut));
size_t num = 0;
for (size_t i = 0; i < n; i++) {
size_t byte = i / entries_per_byte;
assert(byte < data->size());
size_t bit = (i % entries_per_byte) * bit_size;
u8 val = ((*data)[byte] >> bit) & bit_mask;
if (static_cast<small_color>(val) == color) {
num++;
}
}
return num;
}
small_color get_impl(key_type key) const { small_color get_impl(key_type key) const {
auto i = get(index_map, key); auto i = get(index_map, key);
assert(i < n); assert(i < n);

501
src/util/graph_undirected.h Normal file
View File

@ -0,0 +1,501 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \file
* \brief Adaptor that presents an undirected view of a bidirectional BGL graph.
*
* Analogous to the reverse_graph adapter. You can construct one of these for
* bidirectional graph g with:
*
* auto ug = make_undirected_graph(g);
*
* The vertex descriptor type is the same as that of the underlying graph, but
* the edge descriptor is different.
*/
#ifndef GRAPH_UNDIRECTED_H
#define GRAPH_UNDIRECTED_H
#include "util/operators.h"
#include <boost/graph/adjacency_iterator.hpp>
#include <boost/graph/graph_traits.hpp>
#include <boost/graph/properties.hpp>
#include <boost/iterator/iterator_facade.hpp>
#include <type_traits>
#include <utility>
namespace ue2 {
struct undirected_graph_tag {};
template <class BidirectionalGraph, class GraphRef>
class undirected_graph;
namespace undirected_detail {
template <typename BidirectionalGraph>
class undirected_graph_edge_descriptor
: totally_ordered<undirected_graph_edge_descriptor<BidirectionalGraph>> {
using base_graph_type = BidirectionalGraph;
using base_graph_traits = typename boost::graph_traits<base_graph_type>;
using base_edge_type = typename base_graph_traits::edge_descriptor;
using base_vertex_type = typename base_graph_traits::vertex_descriptor;
base_edge_type underlying_edge;
const base_graph_type *g;
bool reverse; // if true, reverse vertices in source() and target()
inline std::pair<base_vertex_type, base_vertex_type>
canonical_edge() const {
auto u = std::min(source(underlying_edge, *g),
target(underlying_edge, *g));
auto v = std::max(source(underlying_edge, *g),
target(underlying_edge, *g));
return std::make_pair(u, v);
}
template <class BidiGraph, class GraphRef>
friend class ::ue2::undirected_graph;
public:
undirected_graph_edge_descriptor() = default;
undirected_graph_edge_descriptor(base_edge_type edge,
const base_graph_type &g_in,
bool reverse_in)
: underlying_edge(std::move(edge)), g(&g_in), reverse(reverse_in) {}
bool operator==(const undirected_graph_edge_descriptor &other) const {
return canonical_edge() == other.canonical_edge();
}
bool operator<(const undirected_graph_edge_descriptor &other) const {
return canonical_edge() < other.canonical_edge();
}
base_vertex_type get_source() const {
return reverse ? target(underlying_edge, *g)
: source(underlying_edge, *g);
}
base_vertex_type get_target() const {
return reverse ? source(underlying_edge, *g)
: target(underlying_edge, *g);
}
};
} // namespace undirected_detail
template <class BidirectionalGraph, class GraphRef = const BidirectionalGraph &>
class undirected_graph {
private:
using Self = undirected_graph<BidirectionalGraph, GraphRef>;
using Traits = boost::graph_traits<BidirectionalGraph>;
public:
using base_type = BidirectionalGraph;
using base_ref_type = GraphRef;
explicit undirected_graph(GraphRef g_in) : g(g_in) {}
// Graph requirements
using vertex_descriptor = typename Traits::vertex_descriptor;
using edge_descriptor =
undirected_detail::undirected_graph_edge_descriptor<base_type>;
using directed_category = boost::undirected_tag;
using edge_parallel_category = boost::disallow_parallel_edge_tag;
using traversal_category = typename Traits::traversal_category;
// IncidenceGraph requirements
/**
* \brief Templated iterator used for out_edge_iterator and
* in_edge_iterator, depending on the value of Reverse.
*/
template <bool Reverse>
class adj_edge_iterator
: public boost::iterator_facade<
adj_edge_iterator<Reverse>, edge_descriptor,
boost::forward_traversal_tag, edge_descriptor> {
vertex_descriptor u;
const base_type *g;
typename Traits::in_edge_iterator in_it;
typename Traits::out_edge_iterator out_it;
bool done_in = false;
public:
adj_edge_iterator() = default;
adj_edge_iterator(vertex_descriptor u_in, const base_type &g_in,
bool end_iter)
: u(std::move(u_in)), g(&g_in) {
auto pi = in_edges(u, *g);
auto po = out_edges(u, *g);
if (end_iter) {
in_it = pi.second;
out_it = po.second;
done_in = true;
} else {
in_it = pi.first;
out_it = po.first;
if (in_it == pi.second) {
done_in = true;
find_first_valid_out();
}
}
}
private:
friend class boost::iterator_core_access;
void find_first_valid_out() {
auto out_end = out_edges(u, *g).second;
for (; out_it != out_end; ++out_it) {
auto v = target(*out_it, *g);
if (!edge(v, u, *g).second) {
break;
}
}
}
void increment() {
if (!done_in) {
auto in_end = in_edges(u, *g).second;
assert(in_it != in_end);
++in_it;
if (in_it == in_end) {
done_in = true;
find_first_valid_out();
}
} else {
++out_it;
find_first_valid_out();
}
}
bool equal(const adj_edge_iterator &other) const {
return in_it == other.in_it && out_it == other.out_it;
}
edge_descriptor dereference() const {
if (done_in) {
return edge_descriptor(*out_it, *g, Reverse);
} else {
return edge_descriptor(*in_it, *g, !Reverse);
}
}
};
using out_edge_iterator = adj_edge_iterator<false>;
using in_edge_iterator = adj_edge_iterator<true>;
using degree_size_type = typename Traits::degree_size_type;
// AdjacencyGraph requirements
using adjacency_iterator =
typename boost::adjacency_iterator_generator<Self, vertex_descriptor,
out_edge_iterator>::type;
using inv_adjacency_iterator =
typename boost::inv_adjacency_iterator_generator<
Self, vertex_descriptor, in_edge_iterator>::type;
// VertexListGraph requirements
using vertex_iterator = typename Traits::vertex_iterator;
// EdgeListGraph requirements
enum {
is_edge_list = std::is_convertible<traversal_category,
boost::edge_list_graph_tag>::value
};
/** \brief Iterator used for edges(). */
class edge_iterator
: public boost::iterator_facade<edge_iterator, edge_descriptor,
boost::forward_traversal_tag,
edge_descriptor> {
const base_type *g;
typename Traits::edge_iterator it;
public:
edge_iterator() = default;
edge_iterator(typename Traits::edge_iterator it_in,
const base_type &g_in)
: g(&g_in), it(std::move(it_in)) {
find_first_valid_edge();
}
private:
friend class boost::iterator_core_access;
void find_first_valid_edge() {
const auto end = edges(*g).second;
for (; it != end; ++it) {
const auto &u = source(*it, *g);
const auto &v = target(*it, *g);
if (!edge(v, u, *g).second) {
break; // No reverse edge, we must visit this one
}
if (u <= v) {
// We have a reverse edge, but we'll return this one (and
// skip the other). Note that (u, u) shouldn't be skipped.
break;
}
}
}
void increment() {
assert(it != edges(*g).second);
++it;
find_first_valid_edge();
}
bool equal(const edge_iterator &other) const {
return it == other.it;
}
edge_descriptor dereference() const {
return edge_descriptor(*it, *g, false);
}
};
using vertices_size_type = typename Traits::vertices_size_type;
using edges_size_type = typename Traits::edges_size_type;
using graph_tag = undirected_graph_tag;
using vertex_bundle_type =
typename boost::vertex_bundle_type<base_type>::type;
using edge_bundle_type = typename boost::edge_bundle_type<base_type>::type;
vertex_bundle_type &operator[](const vertex_descriptor &d) {
return const_cast<base_type &>(g)[d];
}
const vertex_bundle_type &operator[](const vertex_descriptor &d) const {
return g[d];
}
edge_bundle_type &operator[](const edge_descriptor &d) {
return const_cast<base_type &>(g)[d.underlying_edge];
}
const edge_bundle_type &operator[](const edge_descriptor &d) const {
return g[d.underlying_edge];
}
static vertex_descriptor null_vertex() { return Traits::null_vertex(); }
// Accessor free functions follow
friend std::pair<vertex_iterator, vertex_iterator>
vertices(const undirected_graph &ug) {
return vertices(ug.g);
}
friend std::pair<edge_iterator, edge_iterator>
edges(const undirected_graph &ug) {
auto e = edges(ug.g);
return std::make_pair(edge_iterator(e.first, ug.g),
edge_iterator(e.second, ug.g));
}
friend std::pair<out_edge_iterator, out_edge_iterator>
out_edges(const vertex_descriptor &u, const undirected_graph &ug) {
return std::make_pair(out_edge_iterator(u, ug.g, false),
out_edge_iterator(u, ug.g, true));
}
friend vertices_size_type num_vertices(const undirected_graph &ug) {
return num_vertices(ug.g);
}
friend edges_size_type num_edges(const undirected_graph &ug) {
auto p = edges(ug);
return std::distance(p.first, p.second);
}
friend degree_size_type out_degree(const vertex_descriptor &u,
const undirected_graph &ug) {
return degree(u, ug);
}
friend vertex_descriptor vertex(vertices_size_type n,
const undirected_graph &ug) {
return vertex(n, ug.g);
}
friend std::pair<edge_descriptor, bool> edge(const vertex_descriptor &u,
const vertex_descriptor &v,
const undirected_graph &ug) {
auto e = edge(u, v, ug.g);
if (e.second) {
return std::make_pair(edge_descriptor(e.first, ug.g, false), true);
}
auto e_rev = edge(v, u, ug.g);
if (e_rev.second) {
return std::make_pair(edge_descriptor(e_rev.first, ug.g, true),
true);
}
return std::make_pair(edge_descriptor(), false);
}
friend std::pair<in_edge_iterator, in_edge_iterator>
in_edges(const vertex_descriptor &v, const undirected_graph &ug) {
return std::make_pair(in_edge_iterator(v, ug.g, false),
in_edge_iterator(v, ug.g, true));
}
friend std::pair<adjacency_iterator, adjacency_iterator>
adjacent_vertices(const vertex_descriptor &u, const undirected_graph &ug) {
out_edge_iterator oi, oe;
std::tie(oi, oe) = out_edges(u, ug);
return std::make_pair(adjacency_iterator(oi, &ug),
adjacency_iterator(oe, &ug));
}
friend std::pair<inv_adjacency_iterator, inv_adjacency_iterator>
inv_adjacent_vertices(const vertex_descriptor &v,
const undirected_graph &ug) {
in_edge_iterator ei, ee;
std::tie(ei, ee) = in_edges(v, ug);
return std::make_pair(inv_adjacency_iterator(ei, &ug),
inv_adjacency_iterator(ee, &ug));
}
friend degree_size_type in_degree(const vertex_descriptor &v,
const undirected_graph &ug) {
return degree(v, ug);
}
friend vertex_descriptor source(const edge_descriptor &e,
const undirected_graph &) {
return e.get_source();
}
friend vertex_descriptor target(const edge_descriptor &e,
const undirected_graph &) {
return e.get_target();
}
friend degree_size_type degree(const vertex_descriptor &u,
const undirected_graph &ug) {
auto p = out_edges(u, ug);
return std::distance(p.first, p.second);
}
// Property accessors.
template <typename Property>
using prop_map = typename boost::property_map<undirected_graph, Property>;
template <typename Property>
friend typename prop_map<Property>::type
get(Property p, undirected_graph &ug) {
return get(p, ug.g);
}
template <typename Property>
friend typename prop_map<Property>::const_type
get(Property p, const undirected_graph &ug) {
return get(p, ug.g);
}
template <typename Property, typename Key>
friend typename boost::property_traits<
typename prop_map<Property>::const_type>::value_type
get(Property p, const undirected_graph &ug, const Key &k) {
return get(p, ug.g, get_underlying_descriptor(k));
}
template <typename Property, typename Value, typename Key>
friend void put(Property p, const undirected_graph &ug,
const Key &k, const Value &val) {
put(p, const_cast<BidirectionalGraph &>(ug.g),
get_underlying_descriptor(k), val);
}
private:
// Accessors are here because our free friend functions (above) cannot see
// edge_descriptor's private members.
static typename base_type::vertex_descriptor
get_underlying_descriptor(const vertex_descriptor &v) {
return v;
}
static typename base_type::edge_descriptor
get_underlying_descriptor(const edge_descriptor &e) {
return e.underlying_edge;
}
// Reference to underlying bidirectional graph
GraphRef g;
};
template <class BidirectionalGraph>
undirected_graph<BidirectionalGraph>
make_undirected_graph(const BidirectionalGraph &g) {
return undirected_graph<BidirectionalGraph>(g);
}
} // namespace ue2
namespace boost {
/* Derive all the property map specializations from the underlying
* bidirectional graph. */
template <typename BidirectionalGraph, typename GraphRef, typename Property>
struct property_map<ue2::undirected_graph<BidirectionalGraph, GraphRef>,
Property> {
using base_map_type = property_map<BidirectionalGraph, Property>;
using type = typename base_map_type::type;
using const_type = typename base_map_type::const_type;
};
template <class BidirectionalGraph, class GraphRef>
struct vertex_property_type<ue2::undirected_graph<BidirectionalGraph, GraphRef>>
: vertex_property_type<BidirectionalGraph> {};
template <class BidirectionalGraph, class GraphRef>
struct edge_property_type<ue2::undirected_graph<BidirectionalGraph, GraphRef>>
: edge_property_type<BidirectionalGraph> {};
template <class BidirectionalGraph, class GraphRef>
struct graph_property_type<ue2::undirected_graph<BidirectionalGraph, GraphRef>>
: graph_property_type<BidirectionalGraph> {};
template <typename BidirectionalGraph, typename GraphRef>
struct vertex_bundle_type<ue2::undirected_graph<BidirectionalGraph, GraphRef>>
: vertex_bundle_type<BidirectionalGraph> {};
template <typename BidirectionalGraph, typename GraphRef>
struct edge_bundle_type<ue2::undirected_graph<BidirectionalGraph, GraphRef>>
: edge_bundle_type<BidirectionalGraph> {};
template <typename BidirectionalGraph, typename GraphRef>
struct graph_bundle_type<ue2::undirected_graph<BidirectionalGraph, GraphRef>>
: graph_bundle_type<BidirectionalGraph> {};
} // namespace boost
#endif // GRAPH_UNDIRECTED_H

77
src/util/logical.h Normal file
View File

@ -0,0 +1,77 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Inline functions for manipulating logical combinations.
*/
#ifndef LOGICAL_H
#define LOGICAL_H
#include "ue2common.h"
/** Index meaning a given logical key is invalid. */
#define INVALID_LKEY (~(u32)0)
#define INVALID_CKEY INVALID_LKEY
/** Logical operation type, the priority is from high to low. */
enum LogicalOpType {
LOGICAL_OP_NOT,
LOGICAL_OP_AND,
LOGICAL_OP_OR,
LAST_LOGICAL_OP = LOGICAL_OP_OR //!< Sentinel.
};
#define UNKNOWN_OP (~(u32)0)
/** Logical Operation is consist of 4 parts. */
struct LogicalOp {
u32 id; //!< logical operator/operation id
u32 op; //!< LogicalOpType
u32 lo; //!< left operand
u32 ro; //!< right operand
};
/** Each logical combination has its info:
* It occupies a region in LogicalOp vector.
* It has an exhaustion key for single-match mode. */
struct CombInfo {
u32 id;
u32 ekey; //!< exhaustion key
u32 start; //!< ckey of logical operation to start calculating
u32 result; //!< ckey of logical operation to give final result
u64a min_offset;
u64a max_offset;
};
/** Temporarily use to seperate operations' id from reports' lkey
* when building logicalTree in shunting yard algorithm,
* operations' id will be finally renumbered following reports' lkey. */
#define LOGICAL_OP_BIT 0x80000000UL
#endif

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -1197,7 +1197,11 @@ u32 mmbit_sparse_iter_begin(const u8 *bits, u32 total_bits, u32 *idx,
assert(ISALIGNED_N(it_root, alignof(struct mmbit_sparse_iter))); assert(ISALIGNED_N(it_root, alignof(struct mmbit_sparse_iter)));
// Our state _may_ be on the stack // Our state _may_ be on the stack
#ifndef _WIN32
assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state))); assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
#else
assert(ISALIGNED_N(s, 4));
#endif
MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits); MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
// iterator should have _something_ at the root level // iterator should have _something_ at the root level
@ -1305,7 +1309,11 @@ u32 mmbit_sparse_iter_next(const u8 *bits, u32 total_bits, u32 last_key,
assert(ISALIGNED_N(it_root, alignof(struct mmbit_sparse_iter))); assert(ISALIGNED_N(it_root, alignof(struct mmbit_sparse_iter)));
// Our state _may_ be on the stack // Our state _may_ be on the stack
#ifndef _WIN32
assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state))); assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
#else
assert(ISALIGNED_N(s, 4));
#endif
MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits); MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
MDEBUG_PRINTF("NEXT (total_bits=%u, last_key=%u)\n", total_bits, last_key); MDEBUG_PRINTF("NEXT (total_bits=%u, last_key=%u)\n", total_bits, last_key);
@ -1458,7 +1466,11 @@ void mmbit_sparse_iter_unset(u8 *bits, u32 total_bits,
assert(ISALIGNED_N(it, alignof(struct mmbit_sparse_iter))); assert(ISALIGNED_N(it, alignof(struct mmbit_sparse_iter)));
// Our state _may_ be on the stack // Our state _may_ be on the stack
#ifndef _WIN32
assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state))); assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
#else
assert(ISALIGNED_N(s, 4));
#endif
MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits); MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);

View File

@ -33,6 +33,7 @@
#ifndef MULTIBIT_BUILD_H #ifndef MULTIBIT_BUILD_H
#define MULTIBIT_BUILD_H #define MULTIBIT_BUILD_H
#include "hs_common.h"
#include "multibit_internal.h" #include "multibit_internal.h"
#include "hash.h" #include "hash.h"

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -36,6 +36,7 @@
#include "ue2common.h" #include "ue2common.h"
#include "util/exhaust.h" // for INVALID_EKEY #include "util/exhaust.h" // for INVALID_EKEY
#include "util/logical.h" // for INVALID_LKEY
#include "util/hash.h" #include "util/hash.h"
#include "util/order_check.h" #include "util/order_check.h"
@ -107,6 +108,16 @@ struct Report {
* exhaustible, this will be INVALID_EKEY. */ * exhaustible, this will be INVALID_EKEY. */
u32 ekey = INVALID_EKEY; u32 ekey = INVALID_EKEY;
/** \brief Logical Combination key in each combination.
*
* If in Logical Combination, the lkey to check before reporting a match.
* Additionally before checking the lkey will be set. If not
* in Logical Combination, this will be INVALID_LKEY. */
u32 lkey = INVALID_LKEY;
/** \brief Quiet flag for expressions in any logical combination. */
bool quiet = false;
/** \brief Adjustment to add to the match offset when we report a match. /** \brief Adjustment to add to the match offset when we report a match.
* *
* This is usually used for reports attached to states that form part of a * This is usually used for reports attached to states that form part of a
@ -207,16 +218,17 @@ bool operator==(const Report &a, const Report &b) {
} }
static inline static inline
Report makeECallback(u32 report, s32 offsetAdjust, u32 ekey) { Report makeECallback(u32 report, s32 offsetAdjust, u32 ekey, bool quiet) {
Report ir(EXTERNAL_CALLBACK, report); Report ir(EXTERNAL_CALLBACK, report);
ir.offsetAdjust = offsetAdjust; ir.offsetAdjust = offsetAdjust;
ir.ekey = ekey; ir.ekey = ekey;
ir.quiet = (u8)quiet;
return ir; return ir;
} }
static inline static inline
Report makeCallback(u32 report, s32 offsetAdjust) { Report makeCallback(u32 report, s32 offsetAdjust) {
return makeECallback(report, offsetAdjust, INVALID_EKEY); return makeECallback(report, offsetAdjust, INVALID_EKEY, false);
} }
static inline static inline

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -95,6 +95,31 @@ u32 ReportManager::getExhaustibleKey(u32 a) {
return it->second; return it->second;
} }
const set<u32> &ReportManager::getRelateCKeys(u32 lkey) {
auto it = pl.lkey2ckeys.find(lkey);
assert(it != pl.lkey2ckeys.end());
return it->second;
}
void ReportManager::logicalKeyRenumber() {
pl.logicalKeyRenumber();
// assign to corresponding report
for (u32 i = 0; i < reportIds.size(); i++) {
Report &ir = reportIds[i];
if (contains(pl.toLogicalKeyMap, ir.onmatch)) {
ir.lkey = pl.toLogicalKeyMap.at(ir.onmatch);
}
}
}
const vector<LogicalOp> &ReportManager::getLogicalTree() const {
return pl.logicalTree;
}
const vector<CombInfo> &ReportManager::getCombInfoMap() const {
return pl.combInfoMap;
}
u32 ReportManager::getUnassociatedExhaustibleKey(void) { u32 ReportManager::getUnassociatedExhaustibleKey(void) {
u32 rv = toExhaustibleKeyMap.size(); u32 rv = toExhaustibleKeyMap.size();
bool inserted; bool inserted;
@ -115,6 +140,18 @@ u32 ReportManager::numEkeys() const {
return (u32) toExhaustibleKeyMap.size(); return (u32) toExhaustibleKeyMap.size();
} }
u32 ReportManager::numLogicalKeys() const {
return (u32) pl.toLogicalKeyMap.size();
}
u32 ReportManager::numLogicalOps() const {
return (u32) pl.logicalTree.size();
}
u32 ReportManager::numCkeys() const {
return (u32) pl.toCombKeyMap.size();
}
bool ReportManager::patternSetCanExhaust() const { bool ReportManager::patternSetCanExhaust() const {
return global_exhaust && !toExhaustibleKeyMap.empty(); return global_exhaust && !toExhaustibleKeyMap.empty();
} }
@ -219,7 +256,7 @@ Report ReportManager::getBasicInternalReport(const ExpressionInfo &expr,
ekey = getExhaustibleKey(expr.report); ekey = getExhaustibleKey(expr.report);
} }
return makeECallback(expr.report, adj, ekey); return makeECallback(expr.report, adj, ekey, expr.quiet);
} }
void ReportManager::setProgramOffset(ReportID id, u32 programOffset) { void ReportManager::setProgramOffset(ReportID id, u32 programOffset) {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -38,6 +38,7 @@
#include "util/compile_error.h" #include "util/compile_error.h"
#include "util/noncopyable.h" #include "util/noncopyable.h"
#include "util/report.h" #include "util/report.h"
#include "parser/logical_combination.h"
#include <map> #include <map>
#include <set> #include <set>
@ -80,6 +81,15 @@ public:
/** \brief Total number of exhaustion keys. */ /** \brief Total number of exhaustion keys. */
u32 numEkeys() const; u32 numEkeys() const;
/** \brief Total number of logical keys. */
u32 numLogicalKeys() const;
/** \brief Total number of logical operators. */
u32 numLogicalOps() const;
/** \brief Total number of combination keys. */
u32 numCkeys() const;
/** \brief True if the pattern set can exhaust (i.e. all patterns are /** \brief True if the pattern set can exhaust (i.e. all patterns are
* highlander). */ * highlander). */
bool patternSetCanExhaust() const; bool patternSetCanExhaust() const;
@ -110,6 +120,19 @@ public:
* assigning one if necessary. */ * assigning one if necessary. */
u32 getExhaustibleKey(u32 expressionIndex); u32 getExhaustibleKey(u32 expressionIndex);
/** \brief Get lkey's corresponding ckeys. */
const std::set<u32> &getRelateCKeys(u32 lkey);
/** \brief Renumber lkey for logical operations, after parsed
* all logical expressions. */
void logicalKeyRenumber();
/** \brief Used in Rose for writing bytecode. */
const std::vector<LogicalOp> &getLogicalTree() const;
/** \brief Used in Rose for writing bytecode. */
const std::vector<CombInfo> &getCombInfoMap() const;
/** \brief Fetch the dedupe key associated with the given report. Returns /** \brief Fetch the dedupe key associated with the given report. Returns
* ~0U if no dkey is needed. */ * ~0U if no dkey is needed. */
u32 getDkey(const Report &r) const; u32 getDkey(const Report &r) const;
@ -122,6 +145,9 @@ public:
* set. */ * set. */
u32 getProgramOffset(ReportID id) const; u32 getProgramOffset(ReportID id) const;
/** \brief Parsed logical combination structure. */
ParsedLogical pl;
private: private:
/** \brief Grey box ref, for checking resource limits. */ /** \brief Grey box ref, for checking resource limits. */
const Grey &grey; const Grey &grey;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016-2017, Intel Corporation * Copyright (c) 2016-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -288,7 +288,7 @@ private:
vertex_edge_list<in_edge_hook> in_edge_list; vertex_edge_list<in_edge_hook> in_edge_list;
/* The out going edges are considered owned by the vertex and /* The out going edges are considered owned by the vertex and
* need to be freed when the graph is begin destroyed */ * need to be freed when the graph is being destroyed */
vertex_edge_list<out_edge_hook> out_edge_list; vertex_edge_list<out_edge_hook> out_edge_list;
/* The destructor only frees memory owned by the vertex and will leave /* The destructor only frees memory owned by the vertex and will leave
@ -1025,70 +1025,67 @@ public:
} }
}; };
/** \brief Type trait to enable on whether the Graph is an ue2_graph. */
template<typename Graph> template<typename Graph>
typename std::enable_if< struct is_ue2_graph
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value, : public ::std::integral_constant<
bool, std::is_base_of<graph_detail::graph_base, Graph>::value> {};
template<typename Graph>
typename std::enable_if<is_ue2_graph<Graph>::value,
typename Graph::vertex_descriptor>::type typename Graph::vertex_descriptor>::type
add_vertex(Graph &g) { add_vertex(Graph &g) {
return g.add_vertex_impl(); return g.add_vertex_impl();
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value>::type
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
remove_vertex(typename Graph::vertex_descriptor v, Graph &g) { remove_vertex(typename Graph::vertex_descriptor v, Graph &g) {
g.remove_vertex_impl(v); g.remove_vertex_impl(v);
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value>::type
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
clear_in_edges(typename Graph::vertex_descriptor v, Graph &g) { clear_in_edges(typename Graph::vertex_descriptor v, Graph &g) {
g.clear_in_edges_impl(v); g.clear_in_edges_impl(v);
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value>::type
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
clear_out_edges(typename Graph::vertex_descriptor v, Graph &g) { clear_out_edges(typename Graph::vertex_descriptor v, Graph &g) {
g.clear_out_edges_impl(v); g.clear_out_edges_impl(v);
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value>::type
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
clear_vertex(typename Graph::vertex_descriptor v, Graph &g) { clear_vertex(typename Graph::vertex_descriptor v, Graph &g) {
g.clear_in_edges_impl(v); g.clear_in_edges_impl(v);
g.clear_out_edges_impl(v); g.clear_out_edges_impl(v);
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
typename Graph::vertex_descriptor>::type typename Graph::vertex_descriptor>::type
source(typename Graph::edge_descriptor e, const Graph &) { source(typename Graph::edge_descriptor e, const Graph &) {
return Graph::source_impl(e); return Graph::source_impl(e);
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
typename Graph::vertex_descriptor>::type typename Graph::vertex_descriptor>::type
target(typename Graph::edge_descriptor e, const Graph &) { target(typename Graph::edge_descriptor e, const Graph &) {
return Graph::target_impl(e); return Graph::target_impl(e);
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
typename Graph::degree_size_type>::type typename Graph::degree_size_type>::type
out_degree(typename Graph::vertex_descriptor v, const Graph &) { out_degree(typename Graph::vertex_descriptor v, const Graph &) {
return Graph::out_degree_impl(v); return Graph::out_degree_impl(v);
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
std::pair<typename Graph::out_edge_iterator, std::pair<typename Graph::out_edge_iterator,
typename Graph::out_edge_iterator>>::type typename Graph::out_edge_iterator>>::type
out_edges(typename Graph::vertex_descriptor v, const Graph &) { out_edges(typename Graph::vertex_descriptor v, const Graph &) {
@ -1096,16 +1093,14 @@ out_edges(typename Graph::vertex_descriptor v, const Graph &) {
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
typename Graph::degree_size_type>::type typename Graph::degree_size_type>::type
in_degree(typename Graph::vertex_descriptor v, const Graph &) { in_degree(typename Graph::vertex_descriptor v, const Graph &) {
return Graph::in_degree_impl(v); return Graph::in_degree_impl(v);
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
std::pair<typename Graph::in_edge_iterator, std::pair<typename Graph::in_edge_iterator,
typename Graph::in_edge_iterator>>::type typename Graph::in_edge_iterator>>::type
in_edges(typename Graph::vertex_descriptor v, const Graph &) { in_edges(typename Graph::vertex_descriptor v, const Graph &) {
@ -1113,16 +1108,14 @@ in_edges(typename Graph::vertex_descriptor v, const Graph &) {
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
typename Graph::degree_size_type>::type typename Graph::degree_size_type>::type
degree(typename Graph::vertex_descriptor v, const Graph &) { degree(typename Graph::vertex_descriptor v, const Graph &) {
return Graph::degree_impl(v); return Graph::degree_impl(v);
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
std::pair<typename Graph::adjacency_iterator, std::pair<typename Graph::adjacency_iterator,
typename Graph::adjacency_iterator>>::type typename Graph::adjacency_iterator>>::type
adjacent_vertices(typename Graph::vertex_descriptor v, const Graph &) { adjacent_vertices(typename Graph::vertex_descriptor v, const Graph &) {
@ -1130,8 +1123,7 @@ adjacent_vertices(typename Graph::vertex_descriptor v, const Graph &) {
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
std::pair<typename Graph::edge_descriptor, bool>>::type std::pair<typename Graph::edge_descriptor, bool>>::type
edge(typename Graph::vertex_descriptor u, typename Graph::vertex_descriptor v, edge(typename Graph::vertex_descriptor u, typename Graph::vertex_descriptor v,
const Graph &g) { const Graph &g) {
@ -1139,8 +1131,7 @@ edge(typename Graph::vertex_descriptor u, typename Graph::vertex_descriptor v,
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
std::pair<typename Graph::inv_adjacency_iterator, std::pair<typename Graph::inv_adjacency_iterator,
typename Graph::inv_adjacency_iterator>>::type typename Graph::inv_adjacency_iterator>>::type
inv_adjacent_vertices(typename Graph::vertex_descriptor v, const Graph &) { inv_adjacent_vertices(typename Graph::vertex_descriptor v, const Graph &) {
@ -1148,8 +1139,7 @@ inv_adjacent_vertices(typename Graph::vertex_descriptor v, const Graph &) {
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
std::pair<typename Graph::edge_descriptor, bool>>::type std::pair<typename Graph::edge_descriptor, bool>>::type
add_edge(typename Graph::vertex_descriptor u, add_edge(typename Graph::vertex_descriptor u,
typename Graph::vertex_descriptor v, Graph &g) { typename Graph::vertex_descriptor v, Graph &g) {
@ -1157,62 +1147,55 @@ add_edge(typename Graph::vertex_descriptor u,
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value>::type
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
remove_edge(typename Graph::edge_descriptor e, Graph &g) { remove_edge(typename Graph::edge_descriptor e, Graph &g) {
g.remove_edge_impl(e); g.remove_edge_impl(e);
} }
template<typename Graph, typename Iter> template<typename Graph, typename Iter>
typename std::enable_if< typename std::enable_if<
!std::is_convertible<Iter, typename Graph::edge_descriptor>::value !std::is_convertible<Iter, typename Graph::edge_descriptor>::value &&
&& std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type is_ue2_graph<Graph>::value>::type
remove_edge(Iter it, Graph &g) { remove_edge(Iter it, Graph &g) {
g.remove_edge_impl(*it); g.remove_edge_impl(*it);
} }
template<typename Graph, typename Predicate> template<typename Graph, typename Predicate>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value>::type
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
remove_out_edge_if(typename Graph::vertex_descriptor v, Predicate pred, remove_out_edge_if(typename Graph::vertex_descriptor v, Predicate pred,
Graph &g) { Graph &g) {
g.remove_out_edge_if_impl(v, pred); g.remove_out_edge_if_impl(v, pred);
} }
template<typename Graph, typename Predicate> template<typename Graph, typename Predicate>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value>::type
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
remove_in_edge_if(typename Graph::vertex_descriptor v, Predicate pred, remove_in_edge_if(typename Graph::vertex_descriptor v, Predicate pred,
Graph &g) { Graph &g) {
g.remove_in_edge_if_impl(v, pred); g.remove_in_edge_if_impl(v, pred);
} }
template<typename Graph, typename Predicate> template<typename Graph, typename Predicate>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value>::type
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
remove_edge_if(Predicate pred, Graph &g) { remove_edge_if(Predicate pred, Graph &g) {
g.remove_edge_if_impl(pred); g.remove_edge_if_impl(pred);
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value>::type
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
remove_edge(const typename Graph::vertex_descriptor &u, remove_edge(const typename Graph::vertex_descriptor &u,
const typename Graph::vertex_descriptor &v, Graph &g) { const typename Graph::vertex_descriptor &v, Graph &g) {
g.remove_edge_impl(u, v); g.remove_edge_impl(u, v);
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
typename Graph::vertices_size_type>::type typename Graph::vertices_size_type>::type
num_vertices(const Graph &g) { num_vertices(const Graph &g) {
return g.num_vertices_impl(); return g.num_vertices_impl();
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
std::pair<typename Graph::vertex_iterator, std::pair<typename Graph::vertex_iterator,
typename Graph::vertex_iterator>>::type typename Graph::vertex_iterator>>::type
vertices(const Graph &g) { vertices(const Graph &g) {
@ -1220,16 +1203,14 @@ vertices(const Graph &g) {
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
typename Graph::edges_size_type>::type typename Graph::edges_size_type>::type
num_edges(const Graph &g) { num_edges(const Graph &g) {
return g.num_edges_impl(); return g.num_edges_impl();
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
std::pair<typename Graph::edge_iterator, std::pair<typename Graph::edge_iterator,
typename Graph::edge_iterator>>::type typename Graph::edge_iterator>>::type
edges(const Graph &g) { edges(const Graph &g) {
@ -1237,16 +1218,14 @@ edges(const Graph &g) {
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
typename Graph::vertex_descriptor>::type typename Graph::vertex_descriptor>::type
add_vertex(const typename Graph::vertex_property_type &vp, Graph &g) { add_vertex(const typename Graph::vertex_property_type &vp, Graph &g) {
return g.add_vertex_impl(vp); return g.add_vertex_impl(vp);
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
std::pair<typename Graph::edge_descriptor, bool>>::type std::pair<typename Graph::edge_descriptor, bool>>::type
add_edge(typename Graph::vertex_descriptor u, add_edge(typename Graph::vertex_descriptor u,
typename Graph::vertex_descriptor v, typename Graph::vertex_descriptor v,
@ -1255,35 +1234,59 @@ add_edge(typename Graph::vertex_descriptor u,
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value>::type
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
renumber_edges(Graph &g) { renumber_edges(Graph &g) {
g.renumber_edges_impl(); g.renumber_edges_impl();
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value>::type
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
renumber_vertices(Graph &g) { renumber_vertices(Graph &g) {
g.renumber_vertices_impl(); g.renumber_vertices_impl();
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
typename Graph::vertices_size_type>::type typename Graph::vertices_size_type>::type
vertex_index_upper_bound(const Graph &g) { vertex_index_upper_bound(const Graph &g) {
return g.vertex_index_upper_bound_impl(); return g.vertex_index_upper_bound_impl();
} }
template<typename Graph> template<typename Graph>
typename std::enable_if< typename std::enable_if<is_ue2_graph<Graph>::value,
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
typename Graph::edges_size_type>::type typename Graph::edges_size_type>::type
edge_index_upper_bound(const Graph &g) { edge_index_upper_bound(const Graph &g) {
return g.edge_index_upper_bound_impl(); return g.edge_index_upper_bound_impl();
} }
template<typename T> struct pointer_to_member_traits {};
template<typename Return, typename Class>
struct pointer_to_member_traits<Return(Class::*)> {
using member_type = Return;
using class_type = Class;
};
template<typename Graph, typename Property, typename Enable = void>
struct is_ue2_vertex_or_edge_property {
static constexpr bool value = false;
};
template<typename Graph, typename Property>
struct is_ue2_vertex_or_edge_property<
Graph, Property, typename std::enable_if<is_ue2_graph<Graph>::value &&
std::is_member_object_pointer<
Property>::value>::type> {
private:
using class_type = typename pointer_to_member_traits<Property>::class_type;
using vertex_type = typename Graph::vertex_property_type;
using edge_type = typename Graph::edge_property_type;
public:
static constexpr bool value =
std::is_same<class_type, vertex_type>::value ||
std::is_same<class_type, edge_type>::value;
};
using boost::vertex_index; using boost::vertex_index;
using boost::edge_index; using boost::edge_index;
@ -1295,13 +1298,53 @@ namespace boost {
* adaptors (like filtered_graph) to know the type of the property maps */ * adaptors (like filtered_graph) to know the type of the property maps */
template<typename Graph, typename Prop> template<typename Graph, typename Prop>
struct property_map<Graph, Prop, struct property_map<Graph, Prop,
typename std::enable_if< typename std::enable_if<ue2::is_ue2_graph<Graph>::value &&
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value ue2::is_ue2_vertex_or_edge_property<
>::type > { Graph, Prop>::value>::type> {
typedef decltype(get(std::declval<Prop>(), private:
std::declval<Graph &>())) type; using prop_traits = ue2::pointer_to_member_traits<Prop>;
typedef decltype(get(std::declval<Prop>(), using member_type = typename prop_traits::member_type;
std::declval<const Graph &>())) const_type; using class_type = typename prop_traits::class_type;
public:
using type = typename Graph::template prop_map<member_type &, class_type>;
using const_type = typename Graph::template prop_map<const member_type &,
class_type>;
};
template<typename Graph>
struct property_map<Graph, vertex_index_t,
typename std::enable_if<ue2::is_ue2_graph<Graph>::value>::type> {
using v_prop_type = typename Graph::vertex_property_type;
using type = typename Graph::template prop_map<size_t &, v_prop_type>;
using const_type =
typename Graph::template prop_map<const size_t &, v_prop_type>;
};
template<typename Graph>
struct property_map<Graph, edge_index_t,
typename std::enable_if<ue2::is_ue2_graph<Graph>::value>::type> {
using e_prop_type = typename Graph::edge_property_type;
using type = typename Graph::template prop_map<size_t &, e_prop_type>;
using const_type =
typename Graph::template prop_map<const size_t &, e_prop_type>;
};
template<typename Graph>
struct property_map<Graph, vertex_all_t,
typename std::enable_if<ue2::is_ue2_graph<Graph>::value>::type> {
using v_prop_type = typename Graph::vertex_property_type;
using type = typename Graph::template prop_map_all<v_prop_type &>;
using const_type =
typename Graph::template prop_map_all<const v_prop_type &>;
};
template<typename Graph>
struct property_map<Graph, edge_all_t,
typename std::enable_if<ue2::is_ue2_graph<Graph>::value>::type> {
using e_prop_type = typename Graph::edge_property_type;
using type = typename Graph::template prop_map_all<e_prop_type &>;
using const_type =
typename Graph::template prop_map_all<const e_prop_type &>;
}; };
} // namespace boost } // namespace boost

View File

@ -1,6 +1,3 @@
if (WIN32)
return()
endif()
find_package(Threads) find_package(Threads)
# remove some warnings # remove some warnings
@ -12,6 +9,12 @@ include_directories(${PROJECT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
include_directories(${PROJECT_SOURCE_DIR}/util) include_directories(${PROJECT_SOURCE_DIR}/util)
if (WIN32)
add_subdirectory(hscheck)
add_subdirectory(hsbench)
add_subdirectory(hsdump)
add_subdirectory(hscollider)
else()
# add any subdir with a cmake file # add any subdir with a cmake file
file(GLOB dirents RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *) file(GLOB dirents RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *)
foreach(e ${dirents}) foreach(e ${dirents})
@ -20,3 +23,4 @@ foreach(e ${dirents})
add_subdirectory(${e}) add_subdirectory(${e})
endif () endif ()
endforeach () endforeach ()
endif()

View File

@ -31,6 +31,8 @@ SET(hsbench_SOURCES
common.h common.h
data_corpus.cpp data_corpus.cpp
data_corpus.h data_corpus.h
engine.cpp
engine.h
engine_hyperscan.cpp engine_hyperscan.cpp
engine_hyperscan.h engine_hyperscan.h
heapstats.cpp heapstats.cpp
@ -45,6 +47,28 @@ SET(hsbench_SOURCES
timer.h timer.h
) )
if (BUILD_CHIMERA)
add_definitions(-DHS_HYBRID)
SET(hsbench_SOURCES
${hsbench_SOURCES}
engine_chimera.cpp
engine_chimera.h
engine_pcre.cpp
engine_pcre.h
)
endif()
add_executable(hsbench ${hsbench_SOURCES}) add_executable(hsbench ${hsbench_SOURCES})
target_link_libraries(hsbench hs databaseutil expressionutil ${SQLITE3_LDFLAGS} if (BUILD_CHIMERA)
${CMAKE_THREAD_LIBS_INIT}) include_directories(${PCRE_INCLUDE_DIRS})
if(NOT WIN32)
target_link_libraries(hsbench hs chimera ${PCRE_LDFLAGS} databaseutil
expressionutil ${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
else()
target_link_libraries(hsbench hs chimera pcre databaseutil
expressionutil ${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
endif()
else()
target_link_libraries(hsbench hs databaseutil expressionutil
${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
endif()

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016-2017, Intel Corporation * Copyright (c) 2016-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -42,6 +42,12 @@ extern bool forceEditDistance;
extern unsigned editDistance; extern unsigned editDistance;
extern bool printCompressSize; extern bool printCompressSize;
/** Structure for the result of a single complete scan. */
struct ResultEntry {
double seconds = 0; //!< Time taken for scan.
unsigned int matches = 0; //!< Count of matches found.
};
struct SqlFailure { struct SqlFailure {
explicit SqlFailure(const std::string &s) : message(s) {} explicit SqlFailure(const std::string &s) : message(s) {}
std::string message; std::string message;

35
tools/hsbench/engine.cpp Normal file
View File

@ -0,0 +1,35 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "engine.h"
EngineContext::~EngineContext() { }
EngineStream::~EngineStream() { }
Engine::~Engine() { }

94
tools/hsbench/engine.h Normal file
View File

@ -0,0 +1,94 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ENGINE_H
#define ENGINE_H
#include "common.h"
#include "sqldb.h"
#include <memory>
#include <string>
#include <vector>
#include <boost/core/noncopyable.hpp>
// Engines have an engine context which is allocated on a per-thread basis.
class EngineContext : boost::noncopyable {
public:
virtual ~EngineContext();
};
/** Streaming mode scans have persistent stream state associated with them. */
class EngineStream : boost::noncopyable {
public:
virtual ~EngineStream();
unsigned int sn;
};
// Benchmarking engine
class Engine : boost::noncopyable {
public:
virtual ~Engine();
// allocate an EngineContext
virtual std::unique_ptr<EngineContext> makeContext() const = 0;
// non-streaming scan
virtual void scan(const char *data, unsigned len, unsigned blockId,
ResultEntry &results, EngineContext &ectx) const = 0;
// vectoring scan
virtual void scan_vectored(const char *const *data,
const unsigned int *len, unsigned int count,
unsigned int streamId, ResultEntry &result,
EngineContext &ectx) const = 0;
// stream open
virtual std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
unsigned id) const = 0;
// stream close
virtual void streamClose(std::unique_ptr<EngineStream> stream,
ResultEntry &result) const = 0;
// stream compress and expand
virtual void streamCompressExpand(EngineStream &stream,
std::vector<char> &temp) const = 0;
// streaming scan
virtual void streamScan(EngineStream &stream, const char *data,
unsigned int len, unsigned int id,
ResultEntry &result) const = 0;
virtual void printStats() const = 0;
virtual void sqlStats(SqlDB &db) const = 0;
};
#endif // ENGINE_H

View File

@ -0,0 +1,327 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "ExpressionParser.h"
#include "common.h"
#include "engine_chimera.h"
#include "expressions.h"
#include "heapstats.h"
#include "sqldb.h"
#include "timer.h"
#include "chimera/ch_database.h"
#include "util/make_unique.h"
using namespace std;
EngineCHContext::EngineCHContext(const ch_database_t *db) {
ch_alloc_scratch(db, &scratch);
assert(scratch);
}
EngineCHContext::~EngineCHContext() {
ch_free_scratch(scratch);
}
namespace /* anonymous */ {
/** Scan context structure passed to the onMatch callback function. */
struct ScanCHContext {
ScanCHContext(unsigned id_in, ResultEntry &result_in)
: id(id_in), result(result_in) {}
unsigned id;
ResultEntry &result;
};
} // namespace
/**
* Callback function called for every match that Chimera produces, used when
* "echo matches" is off.
*/
static
int HS_CDECL onMatch(unsigned int, unsigned long long, unsigned long long,
unsigned int, unsigned int, const ch_capture_t *,
void *ctx) {
ScanCHContext *sc = static_cast<ScanCHContext *>(ctx);
assert(sc);
sc->result.matches++;
return 0;
}
/**
* Callback function called for every match that Chimera produces when "echo
* matches" is enabled.
*/
static
int HS_CDECL onMatchEcho(unsigned int id, unsigned long long,
unsigned long long to, unsigned int, unsigned int,
const ch_capture_t *, void *ctx) {
ScanCHContext *sc = static_cast<ScanCHContext *>(ctx);
assert(sc);
sc->result.matches++;
printf("Match @%u:%llu for %u\n", sc->id, to, id);
return 0;
}
EngineChimera::EngineChimera(ch_database_t *db_in, CompileCHStats cs)
: db(db_in), compile_stats(move(cs)) {
assert(db);
}
EngineChimera::~EngineChimera() {
ch_free_database(db);
}
unique_ptr<EngineContext> EngineChimera::makeContext() const {
return ue2::make_unique<EngineCHContext>(db);
}
void EngineChimera::scan(const char *data, unsigned int len, unsigned int id,
ResultEntry &result, EngineContext &ectx) const {
assert(data);
auto &ctx = static_cast<EngineCHContext &>(ectx);
ScanCHContext sc(id, result);
auto callback = echo_matches ? onMatchEcho : onMatch;
ch_error_t rv = ch_scan(db, data, len, 0, ctx.scratch, callback, nullptr,
&sc);
if (rv != CH_SUCCESS) {
printf("Fatal error: ch_scan returned error %d\n", rv);
abort();
}
}
// vectoring scan
void EngineChimera::scan_vectored(UNUSED const char *const *data,
UNUSED const unsigned int *len,
UNUSED unsigned int count,
UNUSED unsigned int streamId,
UNUSED ResultEntry &result,
UNUSED EngineContext &ectx) const {
printf("Hybrid matcher can't support vectored mode.\n");
abort();
}
unique_ptr<EngineStream> EngineChimera::streamOpen(UNUSED EngineContext &ectx,
UNUSED unsigned id) const {
printf("Hybrid matcher can't stream.\n");
abort();
}
void EngineChimera::streamClose(UNUSED unique_ptr<EngineStream> stream,
UNUSED ResultEntry &result) const {
printf("Hybrid matcher can't stream.\n");
abort();
}
void EngineChimera::streamScan(UNUSED EngineStream &stream,
UNUSED const char *data,
UNUSED unsigned len, UNUSED unsigned id,
UNUSED ResultEntry &result) const {
printf("Hybrid matcher can't stream.\n");
abort();
}
void EngineChimera::streamCompressExpand(UNUSED EngineStream &stream,
UNUSED vector<char> &temp) const {
printf("Hybrid matcher can't stream.\n");
abort();
}
void EngineChimera::printStats() const {
// Output summary information.
if (!compile_stats.sigs_name.empty()) {
printf("Signature set: %s\n", compile_stats.sigs_name.c_str());
}
printf("Signatures: %s\n", compile_stats.signatures.c_str());
printf("Chimera info: %s\n", compile_stats.db_info.c_str());
#ifndef _WIN32
printf("Expression count: %'zu\n", compile_stats.expressionCount);
printf("Bytecode size: %'zu bytes\n", compile_stats.compiledSize);
#else
printf("Expression count: %zu\n", compile_stats.expressionCount);
printf("Bytecode size: %zu bytes\n", compile_stats.compiledSize);
#endif
printf("Database CRC: 0x%x\n", compile_stats.crc32);
#ifndef _WIN32
printf("Scratch size: %'zu bytes\n", compile_stats.scratchSize);
printf("Compile time: %'0.3Lf seconds\n", compile_stats.compileSecs);
printf("Peak heap usage: %'u bytes\n", compile_stats.peakMemorySize);
#else
printf("Scratch size: %zu bytes\n", compile_stats.scratchSize);
printf("Compile time: %0.3Lf seconds\n", compile_stats.compileSecs);
printf("Peak heap usage: %u bytes\n", compile_stats.peakMemorySize);
#endif
}
void EngineChimera::sqlStats(SqlDB &sqldb) const {
ostringstream crc;
crc << "0x" << hex << compile_stats.crc32;
static const string Q =
"INSERT INTO Compile ("
"sigsName, signatures, dbInfo, exprCount, dbSize, crc,"
"scratchSize, compileSecs, peakMemory) "
"VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)";
sqldb.insert_all(Q, compile_stats.sigs_name, compile_stats.signatures,
compile_stats.db_info, compile_stats.expressionCount,
compile_stats.compiledSize, crc.str(),
compile_stats.scratchSize, compile_stats.compileSecs,
compile_stats.peakMemorySize);
}
unique_ptr<EngineChimera>
buildEngineChimera(const ExpressionMap &expressions, const string &name,
const string &sigs_name) {
if (expressions.empty()) {
assert(0);
return nullptr;
}
long double compileSecs = 0.0;
size_t compiledSize = 0.0;
size_t scratchSize = 0;
unsigned int peakMemorySize = 0;
string db_info;
ch_database_t *db;
ch_error_t err;
const unsigned int count = expressions.size();
vector<string> exprs;
vector<unsigned int> flags, ids;
vector<hs_expr_ext> ext;
for (const auto &m : expressions) {
string expr;
unsigned int f = 0;
hs_expr_ext extparam; // unused
extparam.flags = 0;
if (!readExpression(m.second, expr, &f, &extparam)) {
printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(),
m.first);
return nullptr;
}
if (extparam.flags) {
printf("Error parsing PCRE with extended flags: %s (id %u)\n",
m.second.c_str(), m.first);
return nullptr;
}
exprs.push_back(expr);
ids.push_back(m.first);
flags.push_back(f);
}
// Our compiler takes an array of plain ol' C strings.
vector<const char *> patterns(count);
for (unsigned int i = 0; i < count; i++) {
patterns[i] = exprs[i].c_str();
}
Timer timer;
timer.start();
// Capture groups by default
unsigned int mode = CH_MODE_GROUPS;
ch_compile_error_t *compile_err;
err = ch_compile_multi(patterns.data(), flags.data(), ids.data(),
count, mode, nullptr, &db, &compile_err);
timer.complete();
compileSecs = timer.seconds();
peakMemorySize = getPeakHeap();
if (err == CH_COMPILER_ERROR) {
if (compile_err->expression >= 0) {
printf("Compile error for signature #%u: %s\n",
compile_err->expression, compile_err->message);
} else {
printf("Compile error: %s\n", compile_err->message);
}
ch_free_compile_error(compile_err);
return nullptr;
}
err = ch_database_size(db, &compiledSize);
if (err != CH_SUCCESS) {
return nullptr;
}
assert(compiledSize > 0);
char *info;
err = ch_database_info(db, &info);
if (err != CH_SUCCESS) {
return nullptr;
} else {
db_info = string(info);
free(info);
}
// Allocate scratch temporarily to find its size: this is a good test
// anyway.
ch_scratch_t *scratch = nullptr;
err = ch_alloc_scratch(db, &scratch);
if (err != HS_SUCCESS) {
return nullptr;
}
err = ch_scratch_size(scratch, &scratchSize);
if (err != CH_SUCCESS) {
return nullptr;
}
ch_free_scratch(scratch);
// Collect summary information.
CompileCHStats cs;
cs.sigs_name = sigs_name;
if (!sigs_name.empty()) {
const auto pos = name.find_last_of('/');
cs.signatures = name.substr(pos + 1);
} else {
cs.signatures = name;
}
cs.db_info = db_info;
cs.expressionCount = expressions.size();
cs.compiledSize = compiledSize;
cs.scratchSize = scratchSize;
cs.compileSecs = compileSecs;
cs.peakMemorySize = peakMemorySize;
return ue2::make_unique<EngineChimera>(db, move(cs));
}

View File

@ -0,0 +1,103 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ENGINECHIMERA_H
#define ENGINECHIMERA_H
#include "expressions.h"
#include "engine.h"
#include "chimera/ch.h"
#include <memory>
#include <string>
#include <vector>
/** Infomation about the database compile */
struct CompileCHStats {
std::string sigs_name;
std::string signatures;
std::string db_info;
size_t expressionCount = 0;
size_t compiledSize = 0;
uint32_t crc32 = 0;
size_t scratchSize = 0;
long double compileSecs = 0;
unsigned int peakMemorySize = 0;
};
/** Engine context which is allocated on a per-thread basis. */
class EngineCHContext : public EngineContext{
public:
explicit EngineCHContext(const ch_database_t *db);
~EngineCHContext();
ch_scratch_t *scratch = nullptr;
};
/** Chimera Engine for scanning data. */
class EngineChimera : public Engine {
public:
explicit EngineChimera(ch_database_t *db, CompileCHStats cs);
~EngineChimera();
std::unique_ptr<EngineContext> makeContext() const;
void scan(const char *data, unsigned int len, unsigned int id,
ResultEntry &result, EngineContext &ectx) const;
void scan_vectored(const char *const *data, const unsigned int *len,
unsigned int count, unsigned int streamId,
ResultEntry &result, EngineContext &ectx) const;
std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
unsigned id) const;
void streamClose(std::unique_ptr<EngineStream> stream,
ResultEntry &result) const;
void streamCompressExpand(EngineStream &stream,
std::vector<char> &temp) const;
void streamScan(EngineStream &stream, const char *data, unsigned int len,
unsigned int id, ResultEntry &result) const;
void printStats() const;
void sqlStats(SqlDB &db) const;
private:
ch_database_t *db;
CompileCHStats compile_stats;
};
std::unique_ptr<EngineChimera>
buildEngineChimera(const ExpressionMap &expressions, const std::string &name,
const std::string &sigs_name);
#endif // ENGINECHIMERA_H

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016-2017, Intel Corporation * Copyright (c) 2016-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -57,20 +57,22 @@
using namespace std; using namespace std;
EngineContext::EngineContext(const hs_database_t *db) { EngineHSContext::EngineHSContext(const hs_database_t *db) {
hs_alloc_scratch(db, &scratch); hs_alloc_scratch(db, &scratch);
assert(scratch); assert(scratch);
} }
EngineContext::~EngineContext() { EngineHSContext::~EngineHSContext() {
hs_free_scratch(scratch); hs_free_scratch(scratch);
} }
EngineHSStream::~EngineHSStream() { }
namespace /* anonymous */ { namespace /* anonymous */ {
/** Scan context structure passed to the onMatch callback function. */ /** Scan context structure passed to the onMatch callback function. */
struct ScanContext { struct ScanHSContext {
ScanContext(unsigned id_in, ResultEntry &result_in, ScanHSContext(unsigned id_in, ResultEntry &result_in,
const EngineStream *stream_in) const EngineStream *stream_in)
: id(id_in), result(result_in), stream(stream_in) {} : id(id_in), result(result_in), stream(stream_in) {}
unsigned id; unsigned id;
@ -85,9 +87,9 @@ struct ScanContext {
* "echo matches" is off. * "echo matches" is off.
*/ */
static static
int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int, int HS_CDECL onMatch(unsigned int, unsigned long long,
void *ctx) { unsigned long long, unsigned int, void *ctx) {
ScanContext *sc = static_cast<ScanContext *>(ctx); ScanHSContext *sc = static_cast<ScanHSContext *>(ctx);
assert(sc); assert(sc);
sc->result.matches++; sc->result.matches++;
@ -99,9 +101,9 @@ int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int,
* matches" is enabled. * matches" is enabled.
*/ */
static static
int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to, int HS_CDECL onMatchEcho(unsigned int id, unsigned long long,
unsigned int, void *ctx) { unsigned long long to, unsigned int, void *ctx) {
ScanContext *sc = static_cast<ScanContext *>(ctx); ScanHSContext *sc = static_cast<ScanHSContext *>(ctx);
assert(sc); assert(sc);
sc->result.matches++; sc->result.matches++;
@ -114,7 +116,7 @@ int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
return 0; return 0;
} }
EngineHyperscan::EngineHyperscan(hs_database_t *db_in, CompileStats cs) EngineHyperscan::EngineHyperscan(hs_database_t *db_in, CompileHSStats cs)
: db(db_in), compile_stats(std::move(cs)) { : db(db_in), compile_stats(std::move(cs)) {
assert(db); assert(db);
} }
@ -124,14 +126,15 @@ EngineHyperscan::~EngineHyperscan() {
} }
unique_ptr<EngineContext> EngineHyperscan::makeContext() const { unique_ptr<EngineContext> EngineHyperscan::makeContext() const {
return ue2::make_unique<EngineContext>(db); return ue2::make_unique<EngineHSContext>(db);
} }
void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id, void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id,
ResultEntry &result, EngineContext &ctx) const { ResultEntry &result, EngineContext &ectx) const {
assert(data); assert(data);
ScanContext sc(id, result, nullptr); EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
ScanHSContext sc(id, result, nullptr);
auto callback = echo_matches ? onMatchEcho : onMatch; auto callback = echo_matches ? onMatchEcho : onMatch;
hs_error_t rv = hs_scan(db, data, len, 0, ctx.scratch, callback, &sc); hs_error_t rv = hs_scan(db, data, len, 0, ctx.scratch, callback, &sc);
@ -144,11 +147,12 @@ void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id,
void EngineHyperscan::scan_vectored(const char *const *data, void EngineHyperscan::scan_vectored(const char *const *data,
const unsigned int *len, unsigned int count, const unsigned int *len, unsigned int count,
unsigned streamId, ResultEntry &result, unsigned streamId, ResultEntry &result,
EngineContext &ctx) const { EngineContext &ectx) const {
assert(data); assert(data);
assert(len); assert(len);
ScanContext sc(streamId, result, nullptr); EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
ScanHSContext sc(streamId, result, nullptr);
auto callback = echo_matches ? onMatchEcho : onMatch; auto callback = echo_matches ? onMatchEcho : onMatch;
hs_error_t rv = hs_error_t rv =
hs_scan_vector(db, data, len, count, 0, ctx.scratch, callback, &sc); hs_scan_vector(db, data, len, count, 0, ctx.scratch, callback, &sc);
@ -159,9 +163,10 @@ void EngineHyperscan::scan_vectored(const char *const *data,
} }
} }
unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ctx, unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ectx,
unsigned streamId) const { unsigned streamId) const {
auto stream = ue2::make_unique<EngineStream>(); EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
auto stream = ue2::make_unique<EngineHSStream>();
stream->ctx = &ctx; stream->ctx = &ctx;
hs_open_stream(db, 0, &stream->id); hs_open_stream(db, 0, &stream->id);
@ -170,17 +175,18 @@ unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ctx,
return nullptr; return nullptr;
} }
stream->sn = streamId; stream->sn = streamId;
return stream; return move(stream);
} }
void EngineHyperscan::streamClose(unique_ptr<EngineStream> stream, void EngineHyperscan::streamClose(unique_ptr<EngineStream> stream,
ResultEntry &result) const { ResultEntry &result) const {
assert(stream); assert(stream);
auto &s = static_cast<EngineStream &>(*stream); auto &s = static_cast<EngineHSStream &>(*stream);
EngineContext &ctx = *s.ctx; EngineContext &ectx = *s.ctx;
EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
ScanContext sc(0, result, &s); ScanHSContext sc(0, result, &s);
auto callback = echo_matches ? onMatchEcho : onMatch; auto callback = echo_matches ? onMatchEcho : onMatch;
assert(s.id); assert(s.id);
@ -193,10 +199,10 @@ void EngineHyperscan::streamScan(EngineStream &stream, const char *data,
ResultEntry &result) const { ResultEntry &result) const {
assert(data); assert(data);
auto &s = static_cast<EngineStream &>(stream); auto &s = static_cast<EngineHSStream &>(stream);
EngineContext &ctx = *s.ctx; EngineHSContext &ctx = *s.ctx;
ScanContext sc(id, result, &s); ScanHSContext sc(id, result, &s);
auto callback = echo_matches ? onMatchEcho : onMatch; auto callback = echo_matches ? onMatchEcho : onMatch;
hs_error_t rv = hs_error_t rv =
hs_scan_stream(s.id, data, len, 0, ctx.scratch, callback, &sc); hs_scan_stream(s.id, data, len, 0, ctx.scratch, callback, &sc);
@ -210,11 +216,12 @@ void EngineHyperscan::streamScan(EngineStream &stream, const char *data,
void EngineHyperscan::streamCompressExpand(EngineStream &stream, void EngineHyperscan::streamCompressExpand(EngineStream &stream,
vector<char> &temp) const { vector<char> &temp) const {
size_t used = 0; size_t used = 0;
hs_error_t err = hs_compress_stream(stream.id, temp.data(), temp.size(), auto &s = static_cast<EngineHSStream &>(stream);
hs_error_t err = hs_compress_stream(s.id, temp.data(), temp.size(),
&used); &used);
if (err == HS_INSUFFICIENT_SPACE) { if (err == HS_INSUFFICIENT_SPACE) {
temp.resize(used); temp.resize(used);
err = hs_compress_stream(stream.id, temp.data(), temp.size(), &used); err = hs_compress_stream(s.id, temp.data(), temp.size(), &used);
} }
if (err != HS_SUCCESS) { if (err != HS_SUCCESS) {
@ -223,10 +230,10 @@ void EngineHyperscan::streamCompressExpand(EngineStream &stream,
} }
if (printCompressSize) { if (printCompressSize) {
printf("stream %u: compressed to %zu\n", stream.sn, used); printf("stream %u: compressed to %zu\n", s.sn, used);
} }
err = hs_reset_and_expand_stream(stream.id, temp.data(), temp.size(), err = hs_reset_and_expand_stream(s.id, temp.data(), temp.size(),
nullptr, nullptr, nullptr); nullptr, nullptr, nullptr);
if (err != HS_SUCCESS) { if (err != HS_SUCCESS) {
@ -243,15 +250,30 @@ void EngineHyperscan::printStats() const {
} }
printf("Signatures: %s\n", compile_stats.signatures.c_str()); printf("Signatures: %s\n", compile_stats.signatures.c_str());
printf("Hyperscan info: %s\n", compile_stats.db_info.c_str()); printf("Hyperscan info: %s\n", compile_stats.db_info.c_str());
#ifndef _WIN32
printf("Expression count: %'zu\n", compile_stats.expressionCount); printf("Expression count: %'zu\n", compile_stats.expressionCount);
printf("Bytecode size: %'zu bytes\n", compile_stats.compiledSize); printf("Bytecode size: %'zu bytes\n", compile_stats.compiledSize);
#else
printf("Expression count: %zu\n", compile_stats.expressionCount);
printf("Bytecode size: %zu bytes\n", compile_stats.compiledSize);
#endif
printf("Database CRC: 0x%x\n", compile_stats.crc32); printf("Database CRC: 0x%x\n", compile_stats.crc32);
if (compile_stats.streaming) { if (compile_stats.streaming) {
#ifndef _WIN32
printf("Stream state size: %'zu bytes\n", compile_stats.streamSize); printf("Stream state size: %'zu bytes\n", compile_stats.streamSize);
#else
printf("Stream state size: %zu bytes\n", compile_stats.streamSize);
#endif
} }
#ifndef _WIN32
printf("Scratch size: %'zu bytes\n", compile_stats.scratchSize); printf("Scratch size: %'zu bytes\n", compile_stats.scratchSize);
printf("Compile time: %'0.3Lf seconds\n", compile_stats.compileSecs); printf("Compile time: %'0.3Lf seconds\n", compile_stats.compileSecs);
printf("Peak heap usage: %'u bytes\n", compile_stats.peakMemorySize); printf("Peak heap usage: %'u bytes\n", compile_stats.peakMemorySize);
#else
printf("Scratch size: %zu bytes\n", compile_stats.scratchSize);
printf("Compile time: %0.3Lf seconds\n", compile_stats.compileSecs);
printf("Peak heap usage: %u bytes\n", compile_stats.peakMemorySize);
#endif
} }
void EngineHyperscan::sqlStats(SqlDB &sqldb) const { void EngineHyperscan::sqlStats(SqlDB &sqldb) const {
@ -469,7 +491,7 @@ buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode,
hs_free_scratch(scratch); hs_free_scratch(scratch);
// Collect summary information. // Collect summary information.
CompileStats cs; CompileHSStats cs;
cs.sigs_name = sigs_name; cs.sigs_name = sigs_name;
if (!sigs_name.empty()) { if (!sigs_name.empty()) {
const auto pos = name.find_last_of('/'); const auto pos = name.find_last_of('/');

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016-2017, Intel Corporation * Copyright (c) 2016-2018, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -30,22 +30,15 @@
#define ENGINEHYPERSCAN_H #define ENGINEHYPERSCAN_H
#include "expressions.h" #include "expressions.h"
#include "common.h" #include "engine.h"
#include "sqldb.h"
#include "hs_runtime.h" #include "hs_runtime.h"
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
/** Structure for the result of a single complete scan. */
struct ResultEntry {
double seconds = 0; //!< Time taken for scan.
unsigned int matches = 0; //!< Count of matches found.
};
/** Infomation about the database compile */ /** Infomation about the database compile */
struct CompileStats { struct CompileHSStats {
std::string sigs_name; std::string sigs_name;
std::string signatures; std::string signatures;
std::string db_info; std::string db_info;
@ -60,38 +53,38 @@ struct CompileStats {
}; };
/** Engine context which is allocated on a per-thread basis. */ /** Engine context which is allocated on a per-thread basis. */
class EngineContext { class EngineHSContext : public EngineContext {
public: public:
explicit EngineContext(const hs_database_t *db); explicit EngineHSContext(const hs_database_t *db);
~EngineContext(); ~EngineHSContext();
hs_scratch_t *scratch = nullptr; hs_scratch_t *scratch = nullptr;
}; };
/** Streaming mode scans have persistent stream state associated with them. */ /** Streaming mode scans have persistent stream state associated with them. */
class EngineStream { class EngineHSStream : public EngineStream {
public: public:
~EngineHSStream();
hs_stream_t *id; hs_stream_t *id;
unsigned int sn; EngineHSContext *ctx;
EngineContext *ctx;
}; };
/** Hyperscan Engine for scanning data. */ /** Hyperscan Engine for scanning data. */
class EngineHyperscan { class EngineHyperscan : public Engine {
public: public:
explicit EngineHyperscan(hs_database_t *db, CompileStats cs); explicit EngineHyperscan(hs_database_t *db, CompileHSStats cs);
~EngineHyperscan(); ~EngineHyperscan();
std::unique_ptr<EngineContext> makeContext() const; std::unique_ptr<EngineContext> makeContext() const;
void scan(const char *data, unsigned int len, unsigned int id, void scan(const char *data, unsigned int len, unsigned int id,
ResultEntry &result, EngineContext &ctx) const; ResultEntry &result, EngineContext &ectx) const;
void scan_vectored(const char *const *data, const unsigned int *len, void scan_vectored(const char *const *data, const unsigned int *len,
unsigned int count, unsigned int streamId, unsigned int count, unsigned int streamId,
ResultEntry &result, EngineContext &ctx) const; ResultEntry &result, EngineContext &ectx) const;
std::unique_ptr<EngineStream> streamOpen(EngineContext &ctx, std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
unsigned id) const; unsigned id) const;
void streamClose(std::unique_ptr<EngineStream> stream, void streamClose(std::unique_ptr<EngineStream> stream,
@ -109,7 +102,7 @@ public:
private: private:
hs_database_t *db; hs_database_t *db;
CompileStats compile_stats; CompileHSStats compile_stats;
}; };
namespace ue2 { namespace ue2 {

View File

@ -0,0 +1,401 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef _WIN32
#define PCRE_STATIC
#endif
#include "config.h"
#include "common.h"
#include "engine_pcre.h"
#include "heapstats.h"
#include "huge.h"
#include "sqldb.h"
#include "timer.h"
#include "util/make_unique.h"
#include "util/unicode_def.h"
#include <algorithm>
using namespace std;
EnginePCREContext::EnginePCREContext(int capture_cnt) {
ovec = (int *)malloc((capture_cnt + 1)* sizeof(int) * 3);
}
EnginePCREContext::~EnginePCREContext() {
free(ovec);
}
namespace /* anonymous */ {
/** Scan context structure passed to the onMatch callback function. */
struct ScanPCREContext {
ScanPCREContext(unsigned id_in, ResultEntry &result_in)
: id(id_in), result(result_in) {}
unsigned id;
ResultEntry &result;
};
} // namespace
/**
* Function called for every match that PCRE produces, used when
* "echo matches" is off.
*/
static
int onMatch(ScanPCREContext *sc) {
assert(sc);
sc->result.matches++;
return 0;
}
/**
* Function called for every match that PCRE produces when "echo
* matches" is enabled.
*/
static
int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
ScanPCREContext *sc) {
assert(sc);
sc->result.matches++;
printf("Match @%u:%llu for %u\n", sc->id, to, id);
return 0;
}
EnginePCRE::EnginePCRE(vector<unique_ptr<PcreDB>> dbs_in, CompilePCREStats cs,
int capture_cnt_in)
: dbs(move(dbs_in)), compile_stats(move(cs)),
capture_cnt(capture_cnt_in) {}
EnginePCRE::~EnginePCRE() {
for (auto &pcreDB : dbs) {
free(pcreDB->extra);
free(pcreDB->db);
}
}
unique_ptr<EngineContext> EnginePCRE::makeContext() const {
return ue2::make_unique<EnginePCREContext>(capture_cnt);
}
void EnginePCRE::scan(const char *data, unsigned int len, unsigned int id,
ResultEntry &result, EngineContext &ectx) const {
assert(data);
ScanPCREContext sc(id, result);
auto &ctx = static_cast<EnginePCREContext &>(ectx);
int *ovec = ctx.ovec;
int ovec_size = (capture_cnt + 1) * 3;
for (const auto &pcreDB : dbs) {
int startoffset = 0;
bool utf8 = pcreDB->utf8;
bool highlander = pcreDB->highlander;
int flags = 0;
int ret;
do {
ret = pcre_exec(pcreDB->db, pcreDB->extra, data, len,
startoffset, flags, ovec, ovec_size);
if (ret <= PCRE_ERROR_NOMATCH) {
break;
}
int from = ovec[0];
int to = ovec[1];
assert(from <= to);
if (echo_matches) {
onMatchEcho(pcreDB->id, from, to, &sc);
} else {
onMatch(&sc);
}
// If we only wanted a single match, we're done.
if (highlander) {
break;
}
// Next scan starts at the first codepoint after the match. It's
// possible that we have a vacuous match, in which case we must step
// past it to ensure that we always progress.
if (from != to) {
startoffset = to;
} else if (utf8) {
startoffset = to + 1;
while (startoffset < (int)len &&
((data[startoffset] & 0xc0) == UTF_CONT_BYTE_HEADER)) {
++startoffset;
}
} else {
startoffset = to + 1;
}
} while (startoffset <= (int)len);
if (ret < PCRE_ERROR_NOMATCH) {
printf("Fatal error: pcre returned error %d\n", ret);
abort();
}
}
}
// vectoring scan
void EnginePCRE::scan_vectored(UNUSED const char *const *data,
UNUSED const unsigned int *len,
UNUSED unsigned int count,
UNUSED unsigned int streamId,
UNUSED ResultEntry &result,
UNUSED EngineContext &ectx) const {
printf("PCRE matcher can't support vectored mode.\n");
abort();
}
unique_ptr<EngineStream> EnginePCRE::streamOpen(UNUSED EngineContext &ectx,
UNUSED unsigned id) const {
printf("PCRE matcher can't stream.\n");
abort();
}
void EnginePCRE::streamClose(UNUSED unique_ptr<EngineStream> stream,
UNUSED ResultEntry &result) const {
printf("PCRE matcher can't stream.\n");
abort();
}
void EnginePCRE::streamScan(UNUSED EngineStream &stream,
UNUSED const char *data,
UNUSED unsigned len, UNUSED unsigned id,
UNUSED ResultEntry &result) const {
printf("PCRE matcher can't stream.\n");
abort();
}
void EnginePCRE::streamCompressExpand(UNUSED EngineStream &stream,
UNUSED vector<char> &temp) const {
printf("PCRE matcher can't stream.\n");
abort();
}
void EnginePCRE::printStats() const {
// Output summary information.
if (!compile_stats.sigs_name.empty()) {
printf("Signature set: %s\n", compile_stats.sigs_name.c_str());
}
printf("Signatures: %s\n", compile_stats.signatures.c_str());
printf("PCRE info: %s\n", compile_stats.db_info.c_str());
#ifndef _WIN32
printf("Expression count: %'zu\n", compile_stats.expressionCount);
printf("Bytecode size: %'zu bytes\n", compile_stats.compiledSize);
printf("Scratch size: %'zu bytes\n", compile_stats.scratchSize);
printf("Compile time: %'0.3Lf seconds\n", compile_stats.compileSecs);
printf("Peak heap usage: %'u bytes\n", compile_stats.peakMemorySize);
#else
printf("Expression count: %zu\n", compile_stats.expressionCount);
printf("Bytecode size: %zu bytes\n", compile_stats.compiledSize);
printf("Scratch size: %zu bytes\n", compile_stats.scratchSize);
printf("Compile time: %0.3Lf seconds\n", compile_stats.compileSecs);
printf("Peak heap usage: %u bytes\n", compile_stats.peakMemorySize);
#endif
}
void EnginePCRE::sqlStats(SqlDB &sqldb) const {
ostringstream crc;
static const string Q =
"INSERT INTO Compile ("
"sigsName, signatures, dbInfo, exprCount, dbSize, crc,"
"scratchSize, compileSecs, peakMemory) "
"VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)";
sqldb.insert_all(Q, compile_stats.sigs_name, compile_stats.signatures,
compile_stats.db_info, compile_stats.expressionCount,
compile_stats.compiledSize, crc.str(),
compile_stats.scratchSize, compile_stats.compileSecs,
compile_stats.peakMemorySize);
}
static
bool decodeExprPCRE(string &expr, unsigned *flags, struct PcreDB &db) {
if (expr[0] != '/') {
return false;
}
size_t end = expr.find_last_of('/');
if (end == string::npos) {
return false;
}
string strFlags = expr.substr(end + 1, expr.length() - end - 1);
// strip starting and trailing slashes and the flags
expr.erase(end, expr.length() - end);
expr.erase(0, 1);
// decode the flags
*flags = 0;
for (size_t i = 0; i != strFlags.length(); ++i) {
switch (strFlags[i]) {
case 's':
*flags |= PCRE_DOTALL;
break;
case 'm':
*flags |= PCRE_MULTILINE;
break;
case 'i':
*flags |= PCRE_CASELESS;
break;
case '8':
*flags |= PCRE_UTF8;
db.utf8 = true;
break;
case 'W':
*flags |= PCRE_UCP;
break;
case 'H':
db.highlander = true;
break;
default:
return false;
}
}
return true;
}
unique_ptr<EnginePCRE>
buildEnginePcre(const ExpressionMap &expressions, const string &name,
const string &sigs_name) {
if (expressions.empty()) {
assert(0);
return nullptr;
}
long double compileSecs = 0.0;
size_t compiledSize = 0.0;
unsigned int peakMemorySize = 0;
string db_info("Version: ");
db_info += string(pcre_version());
vector<unique_ptr<PcreDB>> dbs;
int capture_cnt = 0;
Timer timer;
timer.start();
for (const auto &m : expressions) {
string expr(m.second);
unsigned int flags = 0;
auto pcreDB = ue2::make_unique<PcreDB>();
if (!decodeExprPCRE(expr, &flags, *pcreDB)) {
printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(),
m.first);
return nullptr;
}
const char *errp;
int erro;
pcre *db = pcre_compile(expr.c_str(), flags, &errp, &erro, NULL);
if (!db) {
printf("Compile error %s\n", errp);
return nullptr;
}
pcre_extra *extra = pcre_study(db, PCRE_STUDY_JIT_COMPILE, &errp);
if (errp) {
printf("PCRE could not be studied: %s\n", errp);
return nullptr;
}
if (!extra) {
extra = (pcre_extra *)malloc(sizeof(pcre_extra));
}
int cap = 0; // PCRE_INFO_CAPTURECOUNT demands an int
if (pcre_fullinfo(db, extra, PCRE_INFO_CAPTURECOUNT, &cap)) {
printf("PCRE fullinfo error\n");
free(extra);
free(db);
return nullptr;
}
assert(cap >= 0);
capture_cnt = max(capture_cnt, cap);
size_t db_size = 0;
if (pcre_fullinfo(db, extra, PCRE_INFO_SIZE, &db_size)) {
printf("PCRE fullinfo error\n");
free(extra);
free(db);
return nullptr;
}
size_t study_size = 0;
if (pcre_fullinfo(db, extra, PCRE_INFO_STUDYSIZE,
&study_size)) {
printf("PCRE fullinfo error\n");
free(extra);
free(db);
return nullptr;
}
compiledSize += db_size + study_size;
pcreDB->id = m.first;
pcreDB->db = db;
extra->flags =
PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
extra->match_limit = 10000000;
extra->match_limit_recursion = 1500;
pcreDB->extra = extra;
dbs.push_back(move(pcreDB));
}
timer.complete();
compileSecs = timer.seconds();
peakMemorySize = getPeakHeap();
// Collect summary information.
CompilePCREStats cs;
cs.sigs_name = sigs_name;
if (!sigs_name.empty()) {
const auto pos = name.find_last_of('/');
cs.signatures = name.substr(pos + 1);
} else {
cs.signatures = name;
}
cs.db_info = db_info;
cs.expressionCount = expressions.size();
cs.compiledSize = compiledSize;
cs.scratchSize = (capture_cnt + 1) * sizeof(int) * 3;
cs.compileSecs = compileSecs;
cs.peakMemorySize = peakMemorySize;
return ue2::make_unique<EnginePCRE>(move(dbs), move(cs), capture_cnt);
}

114
tools/hsbench/engine_pcre.h Normal file
View File

@ -0,0 +1,114 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ENGINEPCRE_H
#define ENGINEPCRE_H
#include "expressions.h"
#include "engine.h"
#include <pcre.h>
#include <memory>
#include <string>
#include <vector>
/** Infomation about the database compile */
struct CompilePCREStats {
std::string sigs_name;
std::string signatures;
std::string db_info;
size_t expressionCount = 0;
size_t compiledSize = 0;
size_t scratchSize = 0;
long double compileSecs = 0;
unsigned int peakMemorySize = 0;
};
/** Engine context which is allocated on a per-thread basis. */
class EnginePCREContext : public EngineContext{
public:
explicit EnginePCREContext(int capture_cnt);
~EnginePCREContext();
int *ovec = nullptr;
};
struct PcreDB {
bool highlander = false;
bool utf8 = false;
u32 id;
pcre *db = nullptr;
pcre_extra *extra = nullptr;
};
/** PCRE Engine for scanning data. */
class EnginePCRE : public Engine {
public:
explicit EnginePCRE(std::vector<std::unique_ptr<PcreDB>> dbs_in,
CompilePCREStats cs, int capture_cnt_in);
~EnginePCRE();
std::unique_ptr<EngineContext> makeContext() const;
void scan(const char *data, unsigned int len, unsigned int id,
ResultEntry &result, EngineContext &ectx) const;
void scan_vectored(const char *const *data, const unsigned int *len,
unsigned int count, unsigned int streamId,
ResultEntry &result, EngineContext &ectx) const;
std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
unsigned id) const;
void streamClose(std::unique_ptr<EngineStream> stream,
ResultEntry &result) const;
void streamCompressExpand(EngineStream &stream,
std::vector<char> &temp) const;
void streamScan(EngineStream &stream, const char *data, unsigned int len,
unsigned int id, ResultEntry &result) const;
void printStats() const;
void sqlStats(SqlDB &db) const;
private:
std::vector<std::unique_ptr<PcreDB>> dbs;
CompilePCREStats compile_stats;
int capture_cnt;
};
std::unique_ptr<EnginePCRE>
buildEnginePcre(const ExpressionMap &expressions, const std::string &name,
const std::string &sigs_name);
#endif // ENGINEPCRE_H

Some files were not shown because too many files have changed in this diff Show More