mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge branch develop to master
This commit is contained in:
commit
2060dd3a9c
12
CHANGELOG.md
12
CHANGELOG.md
@ -2,6 +2,18 @@
|
||||
|
||||
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
||||
|
||||
## [5.0.0] 2018-07-09
|
||||
- Introduce chimera hybrid engine of Hyperscan and PCRE, to fully support
|
||||
PCRE syntax as well as to take advantage of the high performance nature of
|
||||
Hyperscan.
|
||||
- New API feature: logical combinations (AND, OR and NOT) of patterns in a
|
||||
given pattern set.
|
||||
- Windows porting: hsbench, hscheck, hscollider and hsdump tools now available
|
||||
on Windows 8 or newer.
|
||||
- Improve undirected graph implementation to avoid graph copy and reduce
|
||||
compile time.
|
||||
- Bugfix for issue #86: enable hscollider for installed PCRE package.
|
||||
|
||||
## [4.7.0] 2018-01-24
|
||||
- Introduced hscollider pattern testing tool, for validating Hyperscan match
|
||||
behaviour against PCRE.
|
||||
|
@ -1,8 +1,8 @@
|
||||
cmake_minimum_required (VERSION 2.8.11)
|
||||
project (hyperscan C CXX)
|
||||
|
||||
set (HS_MAJOR_VERSION 4)
|
||||
set (HS_MINOR_VERSION 7)
|
||||
set (HS_MAJOR_VERSION 5)
|
||||
set (HS_MINOR_VERSION 0)
|
||||
set (HS_PATCH_VERSION 0)
|
||||
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
||||
|
||||
@ -154,7 +154,7 @@ if(MSVC OR MSVC_IDE)
|
||||
# todo: change these as required
|
||||
set(ARCH_C_FLAGS "/arch:AVX2")
|
||||
set(ARCH_CXX_FLAGS "/arch:AVX2")
|
||||
set(MSVC_WARNS "/wd4101 /wd4146 /wd4172 /wd4200 /wd4244 /wd4267 /wd4307 /wd4334 /wd4805 -D_CRT_SECURE_NO_WARNINGS")
|
||||
set(MSVC_WARNS "/wd4101 /wd4146 /wd4172 /wd4200 /wd4244 /wd4267 /wd4307 /wd4334 /wd4805 /wd4996 -D_CRT_SECURE_NO_WARNINGS")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 ${MSVC_WARNS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 ${MSVC_WARNS} /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD")
|
||||
endif()
|
||||
@ -446,11 +446,32 @@ else()
|
||||
endif()
|
||||
|
||||
add_subdirectory(util)
|
||||
add_subdirectory(unit)
|
||||
add_subdirectory(doc/dev-reference)
|
||||
|
||||
if (NOT WIN32)
|
||||
# PCRE check, we have a fixed requirement for PCRE to use Chimera
|
||||
# and hscollider
|
||||
set(PCRE_REQUIRED_MAJOR_VERSION 8)
|
||||
set(PCRE_REQUIRED_MINOR_VERSION 41)
|
||||
set(PCRE_REQUIRED_VERSION ${PCRE_REQUIRED_MAJOR_VERSION}.${PCRE_REQUIRED_MINOR_VERSION})
|
||||
include (${CMAKE_MODULE_PATH}/pcre.cmake)
|
||||
if (NOT CORRECT_PCRE_VERSION)
|
||||
message(STATUS "PCRE ${PCRE_REQUIRED_VERSION} not found")
|
||||
endif()
|
||||
|
||||
# we need static libs for Chimera - too much deep magic for shared libs
|
||||
if (CORRECT_PCRE_VERSION AND PCRE_BUILD_SOURCE AND BUILD_STATIC_LIBS)
|
||||
set(BUILD_CHIMERA TRUE)
|
||||
endif()
|
||||
|
||||
add_subdirectory(unit)
|
||||
if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt)
|
||||
add_subdirectory(tools)
|
||||
endif()
|
||||
if (EXISTS ${CMAKE_SOURCE_DIR}/chimera/CMakeLists.txt AND BUILD_CHIMERA)
|
||||
add_subdirectory(chimera)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# do substitutions
|
||||
configure_file(${CMAKE_MODULE_PATH}/config.h.in ${PROJECT_BINARY_DIR}/config.h)
|
||||
@ -479,6 +500,31 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
# PCRE check, we have a fixed requirement for PCRE to use Chimera
|
||||
# and hscollider
|
||||
set(PCRE_REQUIRED_MAJOR_VERSION 8)
|
||||
set(PCRE_REQUIRED_MINOR_VERSION 41)
|
||||
set(PCRE_REQUIRED_VERSION ${PCRE_REQUIRED_MAJOR_VERSION}.${PCRE_REQUIRED_MINOR_VERSION})
|
||||
include (${CMAKE_MODULE_PATH}/pcre.cmake)
|
||||
if (NOT CORRECT_PCRE_VERSION)
|
||||
message(STATUS "PCRE ${PCRE_REQUIRED_VERSION} not found")
|
||||
endif()
|
||||
|
||||
# we need static libs for Chimera - too much deep magic for shared libs
|
||||
if (CORRECT_PCRE_VERSION AND PCRE_BUILD_SOURCE AND BUILD_STATIC_LIBS)
|
||||
set(BUILD_CHIMERA TRUE)
|
||||
endif()
|
||||
|
||||
add_subdirectory(unit)
|
||||
if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt)
|
||||
add_subdirectory(tools)
|
||||
endif()
|
||||
if (EXISTS ${CMAKE_SOURCE_DIR}/chimera/CMakeLists.txt AND BUILD_CHIMERA)
|
||||
add_subdirectory(chimera)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT WIN32)
|
||||
set(RAGEL_C_FLAGS "-Wno-unused")
|
||||
endif()
|
||||
@ -860,7 +906,6 @@ SET (hs_compile_SRCS
|
||||
src/nfagraph/ng_stop.h
|
||||
src/nfagraph/ng_uncalc_components.cpp
|
||||
src/nfagraph/ng_uncalc_components.h
|
||||
src/nfagraph/ng_undirected.h
|
||||
src/nfagraph/ng_utf8.cpp
|
||||
src/nfagraph/ng_utf8.h
|
||||
src/nfagraph/ng_util.cpp
|
||||
@ -915,6 +960,8 @@ SET (hs_compile_SRCS
|
||||
src/parser/check_refs.h
|
||||
src/parser/control_verbs.cpp
|
||||
src/parser/control_verbs.h
|
||||
src/parser/logical_combination.cpp
|
||||
src/parser/logical_combination.h
|
||||
src/parser/parse_error.cpp
|
||||
src/parser/parse_error.h
|
||||
src/parser/parser_util.cpp
|
||||
@ -1014,6 +1061,7 @@ SET (hs_compile_SRCS
|
||||
src/util/graph.h
|
||||
src/util/graph_range.h
|
||||
src/util/graph_small_color_map.h
|
||||
src/util/graph_undirected.h
|
||||
src/util/hash.h
|
||||
src/util/hash_dynamic_bitset.h
|
||||
src/util/insertion_ordered.h
|
||||
|
49
chimera/CMakeLists.txt
Normal file
49
chimera/CMakeLists.txt
Normal file
@ -0,0 +1,49 @@
|
||||
# Chimera lib
|
||||
|
||||
include_directories(${PCRE_INCLUDE_DIRS})
|
||||
|
||||
# only set these after all tests are done
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
||||
|
||||
SET(chimera_HEADERS
|
||||
ch.h
|
||||
ch_common.h
|
||||
ch_compile.h
|
||||
ch_runtime.h
|
||||
)
|
||||
install(FILES ${chimera_HEADERS} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/hs")
|
||||
|
||||
SET(chimera_SRCS
|
||||
${chimera_HEADERS}
|
||||
ch_alloc.c
|
||||
ch_alloc.h
|
||||
ch_compile.cpp
|
||||
ch_database.c
|
||||
ch_database.h
|
||||
ch_internal.h
|
||||
ch_runtime.c
|
||||
ch_scratch.h
|
||||
ch_scratch.c
|
||||
)
|
||||
|
||||
add_library(chimera STATIC ${chimera_SRCS})
|
||||
add_dependencies(chimera hs pcre)
|
||||
target_link_libraries(chimera hs pcre)
|
||||
|
||||
install(TARGETS chimera DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||
|
||||
if (NOT WIN32)
|
||||
# expand out library names for pkgconfig static link info
|
||||
foreach (LIB ${CMAKE_CXX_IMPLICIT_LINK_LIBRARIES})
|
||||
# this is fragile, but protects us from toolchain specific files
|
||||
if (NOT EXISTS ${LIB})
|
||||
set(PRIVATE_LIBS "${PRIVATE_LIBS} -l${LIB}")
|
||||
endif()
|
||||
endforeach()
|
||||
set(PRIVATE_LIBS "${PRIVATE_LIBS} -L${LIBDIR} -lpcre")
|
||||
|
||||
configure_file(libch.pc.in libch.pc @ONLY) # only replace @ quoted vars
|
||||
install(FILES ${CMAKE_BINARY_DIR}/chimera/libch.pc
|
||||
DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
|
||||
endif()
|
45
chimera/ch.h
Normal file
45
chimera/ch.h
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CH_H_
|
||||
#define CH_H_
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief The complete Chimera API definition.
|
||||
*
|
||||
* Chimera is a hybrid solution of Hyperscan and PCRE.
|
||||
*
|
||||
* This header includes both the Chimera compiler and runtime components. See
|
||||
* the individual component headers for documentation.
|
||||
*/
|
||||
|
||||
#include "ch_compile.h"
|
||||
#include "ch_runtime.h"
|
||||
|
||||
#endif /* CH_H_ */
|
109
chimera/ch_alloc.c
Normal file
109
chimera/ch_alloc.c
Normal file
@ -0,0 +1,109 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Runtime functions for setting custom allocators.
|
||||
*/
|
||||
|
||||
#include "ch.h"
|
||||
#include "ch_common.h"
|
||||
#include "ch_internal.h"
|
||||
#include "hs.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#define default_malloc malloc
|
||||
#define default_free free
|
||||
|
||||
ch_alloc_t ch_database_alloc = default_malloc;
|
||||
ch_alloc_t ch_misc_alloc = default_malloc;
|
||||
ch_alloc_t ch_scratch_alloc = default_malloc;
|
||||
|
||||
ch_free_t ch_database_free = default_free;
|
||||
ch_free_t ch_misc_free = default_free;
|
||||
ch_free_t ch_scratch_free = default_free;
|
||||
|
||||
static
|
||||
ch_alloc_t normalise_alloc(ch_alloc_t a) {
|
||||
if (!a) {
|
||||
return default_malloc;
|
||||
} else {
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
ch_free_t normalise_free(ch_free_t f) {
|
||||
if (!f) {
|
||||
return default_free;
|
||||
} else {
|
||||
return f;
|
||||
}
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_set_allocator(ch_alloc_t allocfunc,
|
||||
ch_free_t freefunc) {
|
||||
ch_set_database_allocator(allocfunc, freefunc);
|
||||
ch_set_misc_allocator(allocfunc, freefunc);
|
||||
ch_set_scratch_allocator(allocfunc, freefunc);
|
||||
|
||||
// Set core Hyperscan alloc/free.
|
||||
hs_error_t ret = hs_set_allocator(allocfunc, freefunc);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_set_database_allocator(ch_alloc_t allocfunc,
|
||||
ch_free_t freefunc) {
|
||||
ch_database_alloc = normalise_alloc(allocfunc);
|
||||
ch_database_free = normalise_free(freefunc);
|
||||
|
||||
// Set Hyperscan database alloc/free.
|
||||
return hs_set_database_allocator(allocfunc, freefunc);
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_set_misc_allocator(ch_alloc_t allocfunc,
|
||||
ch_free_t freefunc) {
|
||||
ch_misc_alloc = normalise_alloc(allocfunc);
|
||||
ch_misc_free = normalise_free(freefunc);
|
||||
|
||||
// Set Hyperscan misc alloc/free.
|
||||
return hs_set_misc_allocator(allocfunc, freefunc);
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_set_scratch_allocator(ch_alloc_t allocfunc,
|
||||
ch_free_t freefunc) {
|
||||
ch_scratch_alloc = normalise_alloc(allocfunc);
|
||||
ch_scratch_free = normalise_free(freefunc);
|
||||
|
||||
// Set Hyperscan scratch alloc/free.
|
||||
return hs_set_scratch_allocator(allocfunc, freefunc);
|
||||
}
|
65
chimera/ch_alloc.h
Normal file
65
chimera/ch_alloc.h
Normal file
@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CH_ALLOC_H
|
||||
#define CH_ALLOC_H
|
||||
|
||||
#include "hs_common.h"
|
||||
#include "ue2common.h"
|
||||
#include "ch_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
extern hs_alloc_t ch_database_alloc;
|
||||
extern hs_alloc_t ch_misc_alloc;
|
||||
extern hs_alloc_t ch_scratch_alloc;
|
||||
|
||||
extern hs_free_t ch_database_free;
|
||||
extern hs_free_t ch_misc_free;
|
||||
extern hs_free_t ch_scratch_free;
|
||||
#ifdef __cplusplus
|
||||
} /* extern C */
|
||||
#endif
|
||||
/** \brief Check the results of an alloc done with hs_alloc for alignment.
|
||||
*
|
||||
* If we have incorrect alignment, return an error. Caller should free the
|
||||
* offending block. */
|
||||
static really_inline
|
||||
ch_error_t ch_check_alloc(const void *mem) {
|
||||
ch_error_t ret = CH_SUCCESS;
|
||||
if (!mem) {
|
||||
ret = CH_NOMEM;
|
||||
} else if (!ISALIGNED_N(mem, alignof(unsigned long long))) {
|
||||
ret = CH_BAD_ALLOC;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
360
chimera/ch_common.h
Normal file
360
chimera/ch_common.h
Normal file
@ -0,0 +1,360 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CH_COMMON_H_
|
||||
#define CH_COMMON_H_
|
||||
|
||||
#include "hs_common.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief The Chimera common API definition.
|
||||
*
|
||||
* Chimera is a hybrid of Hyperscan and PCRE.
|
||||
*
|
||||
* This header contains functions available to both the Chimera compiler and
|
||||
* runtime.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
struct ch_database;
|
||||
|
||||
/**
|
||||
* A Chimera pattern database.
|
||||
*
|
||||
* Generated by one of the Chimera compiler functions:
|
||||
* - @ref ch_compile()
|
||||
* - @ref ch_compile_multi()
|
||||
* - @ref ch_compile_ext_multi()
|
||||
*/
|
||||
typedef struct ch_database ch_database_t;
|
||||
|
||||
/**
|
||||
* A type for errors returned by Chimera functions.
|
||||
*/
|
||||
typedef int ch_error_t;
|
||||
|
||||
/**
|
||||
* Free a compiled pattern database.
|
||||
*
|
||||
* The free callback set by @ref ch_set_allocator()) will be used by this
|
||||
* function.
|
||||
*
|
||||
* @param db
|
||||
* A compiled pattern database. NULL may also be safely provided, in which
|
||||
* case the function does nothing.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_free_database(ch_database_t *db);
|
||||
|
||||
/**
|
||||
* Utility function for identifying this release version.
|
||||
*
|
||||
* @return
|
||||
* A string containing the version number of this release build and the
|
||||
* date of the build. It is allocated statically, so it does not need to
|
||||
* be freed by the caller.
|
||||
*/
|
||||
const char * HS_CDECL ch_version(void);
|
||||
|
||||
/**
|
||||
* Returns the size of the given database.
|
||||
*
|
||||
* @param database
|
||||
* Pointer to compiled expression database.
|
||||
*
|
||||
* @param database_size
|
||||
* On success, the size of the compiled database in bytes is placed in this
|
||||
* parameter.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_database_size(const ch_database_t *database,
|
||||
size_t *database_size);
|
||||
|
||||
/**
|
||||
* Utility function providing information about a database.
|
||||
*
|
||||
* @param database
|
||||
* Pointer to a compiled database.
|
||||
*
|
||||
* @param info
|
||||
* On success, a string containing the version and platform information for
|
||||
* the supplied database is placed in the parameter. The string is
|
||||
* allocated using the allocator supplied in @ref hs_set_allocator()
|
||||
* (or malloc() if no allocator was set) and should be freed by the caller.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_database_info(const ch_database_t *database,
|
||||
char **info);
|
||||
|
||||
/**
|
||||
* The type of the callback function that will be used by Chimera to allocate
|
||||
* more memory at runtime as required.
|
||||
*
|
||||
* If Chimera is to be used in a multi-threaded, or similarly concurrent
|
||||
* environment, the allocation function will need to be re-entrant, or
|
||||
* similarly safe for concurrent use.
|
||||
*
|
||||
* @param size
|
||||
* The number of bytes to allocate.
|
||||
* @return
|
||||
* A pointer to the region of memory allocated, or NULL on error.
|
||||
*/
|
||||
typedef void *(HS_CDECL *ch_alloc_t)(size_t size);
|
||||
|
||||
/**
|
||||
* The type of the callback function that will be used by Chimera to free
|
||||
* memory regions previously allocated using the @ref ch_alloc_t function.
|
||||
*
|
||||
* @param ptr
|
||||
* The region of memory to be freed.
|
||||
*/
|
||||
typedef void (HS_CDECL *ch_free_t)(void *ptr);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Chimera for allocating
|
||||
* memory at runtime for stream state, scratch space, database bytecode,
|
||||
* and various other data structure returned by the Chimera API.
|
||||
*
|
||||
* The function is equivalent to calling @ref ch_set_scratch_allocator(),
|
||||
* @ref ch_set_database_allocator() and
|
||||
* @ref ch_set_misc_allocator() with the provided parameters.
|
||||
*
|
||||
* This call will override any previous allocators that have been set.
|
||||
*
|
||||
* Note: there is no way to change the allocator used for temporary objects
|
||||
* created during the various compile calls (@ref ch_compile() and @ref
|
||||
* ch_compile_multi()).
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_set_allocator(ch_alloc_t alloc_func,
|
||||
ch_free_t free_func);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Chimera for allocating memory
|
||||
* for database bytecode produced by the compile calls (@ref ch_compile() and @ref
|
||||
* ch_compile_multi()).
|
||||
*
|
||||
* If no database allocation functions are set, or if NULL is used in place of
|
||||
* both parameters, then memory allocation will default to standard methods
|
||||
* (such as the system malloc() and free() calls).
|
||||
*
|
||||
* This call will override any previous database allocators that have been set.
|
||||
*
|
||||
* Note: the database allocator may also be set by calling @ref
|
||||
* ch_set_allocator().
|
||||
*
|
||||
* Note: there is no way to change how temporary objects created during the
|
||||
* various compile calls (@ref ch_compile() and @ref ch_compile_multi()) are
|
||||
* allocated.
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_set_database_allocator(ch_alloc_t alloc_func,
|
||||
ch_free_t free_func);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Chimera for allocating memory
|
||||
* for items returned by the Chimera API such as @ref ch_compile_error_t.
|
||||
*
|
||||
* If no misc allocation functions are set, or if NULL is used in place of both
|
||||
* parameters, then memory allocation will default to standard methods (such as
|
||||
* the system malloc() and free() calls).
|
||||
*
|
||||
* This call will override any previous misc allocators that have been set.
|
||||
*
|
||||
* Note: the misc allocator may also be set by calling @ref ch_set_allocator().
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_set_misc_allocator(ch_alloc_t alloc_func,
|
||||
ch_free_t free_func);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Chimera for allocating memory
|
||||
* for scratch space by @ref ch_alloc_scratch() and @ref ch_clone_scratch().
|
||||
*
|
||||
* If no scratch allocation functions are set, or if NULL is used in place of
|
||||
* both parameters, then memory allocation will default to standard methods
|
||||
* (such as the system malloc() and free() calls).
|
||||
*
|
||||
* This call will override any previous scratch allocators that have been set.
|
||||
*
|
||||
* Note: the scratch allocator may also be set by calling @ref
|
||||
* ch_set_allocator().
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_set_scratch_allocator(ch_alloc_t alloc_func,
|
||||
ch_free_t free_func);
|
||||
|
||||
/**
|
||||
* @defgroup CH_ERROR ch_error_t values
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* The engine completed normally.
|
||||
*/
|
||||
#define CH_SUCCESS 0
|
||||
|
||||
/**
|
||||
* A parameter passed to this function was invalid.
|
||||
*/
|
||||
#define CH_INVALID (-1)
|
||||
|
||||
/**
|
||||
* A memory allocation failed.
|
||||
*/
|
||||
#define CH_NOMEM (-2)
|
||||
|
||||
/**
|
||||
* The engine was terminated by callback.
|
||||
*
|
||||
* This return value indicates that the target buffer was partially scanned,
|
||||
* but that the callback function requested that scanning cease after a match
|
||||
* was located.
|
||||
*/
|
||||
#define CH_SCAN_TERMINATED (-3)
|
||||
|
||||
/**
|
||||
* The pattern compiler failed, and the @ref ch_compile_error_t should be
|
||||
* inspected for more detail.
|
||||
*/
|
||||
#define CH_COMPILER_ERROR (-4)
|
||||
|
||||
/**
|
||||
* The given database was built for a different version of the Chimera matcher.
|
||||
*/
|
||||
#define CH_DB_VERSION_ERROR (-5)
|
||||
|
||||
/**
|
||||
* The given database was built for a different platform (i.e., CPU type).
|
||||
*/
|
||||
#define CH_DB_PLATFORM_ERROR (-6)
|
||||
|
||||
/**
|
||||
* The given database was built for a different mode of operation. This error
|
||||
* is returned when streaming calls are used with a non-streaming database and
|
||||
* vice versa.
|
||||
*/
|
||||
#define CH_DB_MODE_ERROR (-7)
|
||||
|
||||
/**
|
||||
* A parameter passed to this function was not correctly aligned.
|
||||
*/
|
||||
#define CH_BAD_ALIGN (-8)
|
||||
|
||||
/**
|
||||
* The memory allocator did not correctly return memory suitably aligned for
|
||||
* the largest representable data type on this platform.
|
||||
*/
|
||||
#define CH_BAD_ALLOC (-9)
|
||||
|
||||
/**
|
||||
* The scratch region was already in use.
|
||||
*
|
||||
* This error is returned when Chimera is able to detect that the scratch
|
||||
* region given is already in use by another Chimera API call.
|
||||
*
|
||||
* A separate scratch region, allocated with @ref ch_alloc_scratch() or @ref
|
||||
* ch_clone_scratch(), is required for every concurrent caller of the Chimera
|
||||
* API.
|
||||
*
|
||||
* For example, this error might be returned when @ref ch_scan() has been
|
||||
* called inside a callback delivered by a currently-executing @ref ch_scan()
|
||||
* call using the same scratch region.
|
||||
*
|
||||
* Note: Not all concurrent uses of scratch regions may be detected. This error
|
||||
* is intended as a best-effort debugging tool, not a guarantee.
|
||||
*/
|
||||
#define CH_SCRATCH_IN_USE (-10)
|
||||
|
||||
/**
|
||||
* Returned when pcre_exec (called for some expressions internally from @ref
|
||||
* ch_scan) failed due to a fatal error.
|
||||
*/
|
||||
#define CH_FAIL_INTERNAL (-32)
|
||||
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* CH_COMMON_H_ */
|
878
chimera/ch_compile.cpp
Normal file
878
chimera/ch_compile.cpp
Normal file
@ -0,0 +1,878 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Compiler front-end, including public API calls for compilation.
|
||||
*/
|
||||
|
||||
#include "ch_compile.h"
|
||||
#include "ch_alloc.h"
|
||||
#include "ch_internal.h"
|
||||
#include "ch_database.h"
|
||||
#include "grey.h"
|
||||
#include "hs_common.h"
|
||||
#include "hs_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/multibit_build.h"
|
||||
#include "util/target_info.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <limits.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
#define PCRE_ERROR_MSG "Internal error building PCRE pattern."
|
||||
|
||||
using namespace std;
|
||||
using namespace ue2;
|
||||
|
||||
static const char failureNoMemory[] = "Unable to allocate memory.";
|
||||
static const char failureInternal[] = "Internal error.";
|
||||
static const char failureBadAlloc[] = "Allocator returned misaligned memory.";
|
||||
|
||||
static const ch_compile_error_t ch_enomem
|
||||
= { const_cast<char *>(failureNoMemory), 0 };
|
||||
static const ch_compile_error_t ch_einternal
|
||||
= { const_cast<char *>(failureInternal), 0 };
|
||||
static const ch_compile_error_t ch_badalloc
|
||||
= { const_cast<char *>(failureBadAlloc), 0 };
|
||||
|
||||
static
|
||||
ch_compile_error_t *generateChimeraCompileError(const string &err,
|
||||
int expression) {
|
||||
ch_compile_error_t *ret =
|
||||
(struct ch_compile_error *)ch_misc_alloc(sizeof(ch_compile_error_t));
|
||||
if (ret) {
|
||||
ch_error_t e = ch_check_alloc(ret);
|
||||
if (e != CH_SUCCESS) {
|
||||
ch_misc_free(ret);
|
||||
return const_cast<ch_compile_error_t *>(&ch_badalloc);
|
||||
}
|
||||
char *msg = (char *)ch_misc_alloc(err.size() + 1);
|
||||
if (msg) {
|
||||
e = ch_check_alloc(msg);
|
||||
if (e != HS_SUCCESS) {
|
||||
ch_misc_free(msg);
|
||||
return const_cast<ch_compile_error_t *>(&ch_badalloc);
|
||||
}
|
||||
memcpy(msg, err.c_str(), err.size() + 1);
|
||||
ret->message = msg;
|
||||
} else {
|
||||
ch_misc_free(ret);
|
||||
ret = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ret || !ret->message) {
|
||||
return const_cast<ch_compile_error_t *>(&ch_enomem);
|
||||
}
|
||||
|
||||
ret->expression = expression;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static
|
||||
void freeChimeraCompileError(ch_compile_error_t *error) {
|
||||
if (!error) {
|
||||
return;
|
||||
}
|
||||
if (error == &ch_enomem || error == &ch_einternal ||
|
||||
error == &ch_badalloc) {
|
||||
// These are not allocated.
|
||||
return;
|
||||
}
|
||||
|
||||
ch_misc_free(error->message);
|
||||
ch_misc_free(error);
|
||||
}
|
||||
|
||||
static
|
||||
bool checkMode(unsigned int mode, ch_compile_error_t **comp_error) {
|
||||
static const unsigned int supported = CH_MODE_GROUPS;
|
||||
|
||||
if (mode & ~supported) {
|
||||
*comp_error =
|
||||
generateChimeraCompileError("Invalid mode flag supplied.", -1);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/** \brief Throw a compile error if we're passed some unsupported flags. */
|
||||
static
|
||||
void checkFlags(const unsigned int flags) {
|
||||
static const unsigned int supported = HS_FLAG_DOTALL
|
||||
| HS_FLAG_MULTILINE
|
||||
| HS_FLAG_CASELESS
|
||||
| HS_FLAG_SINGLEMATCH
|
||||
| HS_FLAG_UCP
|
||||
| HS_FLAG_UTF8;
|
||||
|
||||
if (flags & ~supported) {
|
||||
throw CompileError("Unrecognized flag used.");
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool isHyperscanSupported(const char *expression, unsigned int flags,
|
||||
const hs_platform_info *platform) {
|
||||
hs_database_t *db = nullptr;
|
||||
hs_compile_error *comp_error = nullptr;
|
||||
|
||||
unsigned int id = 0;
|
||||
hs_error_t err = hs_compile_multi(&expression, &flags, &id,
|
||||
1, HS_MODE_BLOCK, platform, &db,
|
||||
&comp_error);
|
||||
if (err != HS_SUCCESS) {
|
||||
assert(!db);
|
||||
assert(comp_error);
|
||||
DEBUG_PRINTF("unsupported: %s\n", comp_error->message);
|
||||
hs_free_compile_error(comp_error);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(db);
|
||||
assert(!comp_error);
|
||||
hs_free_database(db);
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool writeHyperscanDatabase(char *ptr, hs_database_t *db) {
|
||||
// Note: we must use our serialization calls to re-home the database.
|
||||
char *serialized = nullptr;
|
||||
size_t slen = 0;
|
||||
hs_error_t err = hs_serialize_database(db, &serialized, &slen);
|
||||
if (err != HS_SUCCESS) {
|
||||
DEBUG_PRINTF("hs_serialize_database returned %d\n", err);
|
||||
assert(0);
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("writing database to ptr %p\n", ptr);
|
||||
|
||||
// deserialize_at without the platform tests.
|
||||
err = hs_deserialize_database_at(serialized, slen, (hs_database_t *)ptr);
|
||||
if (err != HS_SUCCESS) {
|
||||
DEBUG_PRINTF("hs_deserialize_database_at returned %d\n", err);
|
||||
assert(0);
|
||||
ch_misc_free(serialized);
|
||||
return false;
|
||||
}
|
||||
|
||||
ch_misc_free(serialized);
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool writeHyperscanDatabase(ch_bytecode *db, hs_database_t *hs_db) {
|
||||
db->databaseOffset = ROUNDUP_CL(sizeof(*db));
|
||||
char *ptr = (char *)db + db->databaseOffset;
|
||||
return writeHyperscanDatabase(ptr, hs_db);
|
||||
}
|
||||
|
||||
static
|
||||
int convertFlagsToPcreOptions(unsigned int flags) {
|
||||
int options = 0;
|
||||
if (flags & HS_FLAG_CASELESS) {
|
||||
options |= PCRE_CASELESS;
|
||||
}
|
||||
if (flags & HS_FLAG_DOTALL) {
|
||||
options |= PCRE_DOTALL;
|
||||
}
|
||||
if (flags & HS_FLAG_MULTILINE) {
|
||||
options |= PCRE_MULTILINE;
|
||||
}
|
||||
if (flags & HS_FLAG_UTF8) {
|
||||
options |= PCRE_UTF8;
|
||||
}
|
||||
if (flags & HS_FLAG_UCP) {
|
||||
options |= PCRE_UCP;
|
||||
}
|
||||
|
||||
// All other flags are meaningless to PCRE.
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
/** \brief Data about a single pattern. */
|
||||
struct PatternData : boost::noncopyable {
|
||||
PatternData(const char *pattern, u32 flags, u32 idx, u32 id_in,
|
||||
unsigned mode, unsigned long int match_limit,
|
||||
unsigned long int match_limit_recursion,
|
||||
const hs_platform_info *platform);
|
||||
~PatternData() {
|
||||
pcre_free(compiled);
|
||||
pcre_free(extra);
|
||||
}
|
||||
|
||||
void buildPcre(const char *pattern, u32 flags);
|
||||
|
||||
size_t patternSize() const;
|
||||
|
||||
void writePattern(ch_pattern *pattern) const;
|
||||
|
||||
pcre *compiled; //!< pcre_compile output
|
||||
pcre_extra *extra; //!< pcre_study output
|
||||
size_t compiled_size;
|
||||
int study_size;
|
||||
int capture_cnt;
|
||||
bool utf8;
|
||||
u32 id; //!< ID from the user
|
||||
u32 expr_index; //!< index in the expression array
|
||||
bool singlematch; //!< pattern is in highlander mode
|
||||
bool guard; //!< this pattern should be guarded by the multimatcher
|
||||
u32 minWidth; //!< min match width
|
||||
u32 maxWidth; //!< max match width
|
||||
u32 fixedWidth; //!< fixed pattern width
|
||||
unsigned long int matchLimit; //! pcre match limit
|
||||
unsigned long int matchLimitRecursion; //! pcre match_limit_recursion
|
||||
};
|
||||
|
||||
PatternData::PatternData(const char *pattern, u32 flags, u32 idx, u32 id_in,
|
||||
unsigned mode, unsigned long int match_limit,
|
||||
unsigned long int match_limit_recursion,
|
||||
const hs_platform_info *platform)
|
||||
: compiled(nullptr), extra(nullptr), id(id_in), expr_index(idx),
|
||||
singlematch(flags & HS_FLAG_SINGLEMATCH),
|
||||
guard(false), minWidth(0), maxWidth(UINT_MAX),
|
||||
fixedWidth(UINT_MAX), matchLimit(match_limit),
|
||||
matchLimitRecursion(match_limit_recursion) {
|
||||
assert(pattern);
|
||||
|
||||
flags |= HS_FLAG_ALLOWEMPTY; /* don't hand things off to pcre for no
|
||||
reason */
|
||||
|
||||
buildPcre(pattern, flags);
|
||||
|
||||
// Fetch the expression info for a prefiltering, non-singlematch version of
|
||||
// this pattern, if possible.
|
||||
hs_expr_info *info = nullptr;
|
||||
hs_compile_error_t *error = nullptr;
|
||||
u32 infoflags = (flags | HS_FLAG_PREFILTER) & ~HS_FLAG_SINGLEMATCH;
|
||||
u32 rawflags = (flags | HS_FLAG_SOM_LEFTMOST) & ~HS_FLAG_SINGLEMATCH;
|
||||
hs_error_t err = hs_expression_info(pattern, infoflags, &info, &error);
|
||||
if (err == HS_SUCCESS) {
|
||||
assert(info);
|
||||
hs_expr_info *i = (hs_expr_info *)info;
|
||||
minWidth = i->min_width;
|
||||
maxWidth = i->max_width;
|
||||
bool ordered = i->unordered_matches ? false : true;
|
||||
|
||||
// Only enable capturing if required
|
||||
u32 captureCnt = 0;
|
||||
if (mode & CH_MODE_GROUPS) {
|
||||
captureCnt = capture_cnt;
|
||||
}
|
||||
|
||||
// No need to confirm with PCRE if:
|
||||
// 1) pattern is fixed width
|
||||
// 2) pattern isn't vacuous as it can't combine with start of match
|
||||
// 3) no capturing in this pattern
|
||||
// 4) no offset adjust in this pattern as hyperscan match callback
|
||||
// will arrive without order, i.e. [^a]\z has offset adjust
|
||||
// 5) hyperscan compile succeeds without prefiltering
|
||||
if (minWidth == maxWidth && minWidth && maxWidth != UINT_MAX &&
|
||||
!captureCnt && ordered &&
|
||||
isHyperscanSupported(pattern, rawflags, platform)) {
|
||||
fixedWidth = maxWidth;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("gathered info: widths=[%u,%u]\n", minWidth, maxWidth);
|
||||
|
||||
ch_misc_free(info);
|
||||
|
||||
u32 guardflags;
|
||||
guardflags = (flags | HS_FLAG_PREFILTER) & ~HS_FLAG_SINGLEMATCH;
|
||||
guard = isHyperscanSupported(pattern, guardflags, platform);
|
||||
} else {
|
||||
// We can't even prefilter this pattern, so we're dependent on Big Dumb
|
||||
// Pcre Scans.
|
||||
DEBUG_PRINTF("hs_expression_info failed, falling back to pcre\n");
|
||||
hs_free_compile_error(error);
|
||||
}
|
||||
}
|
||||
|
||||
void PatternData::buildPcre(const char *pattern, u32 flags) {
|
||||
int options = convertFlagsToPcreOptions(flags);
|
||||
const char *errptr = nullptr;
|
||||
int erroffset = 0;
|
||||
|
||||
compiled = pcre_compile(pattern, options, &errptr, &erroffset, nullptr);
|
||||
if (!compiled) {
|
||||
DEBUG_PRINTF("PCRE failed to compile: %s\n", pattern);
|
||||
string err("PCRE compilation failed: ");
|
||||
err += string(errptr);
|
||||
err += ".";
|
||||
throw CompileError(expr_index, err);
|
||||
}
|
||||
|
||||
extra = pcre_study(compiled, PCRE_STUDY_JIT_COMPILE, &errptr);
|
||||
// Note that it's OK for pcre_study to return NULL if there's nothing
|
||||
// to be found, but a non-NULL error is always bad.
|
||||
if (errptr) {
|
||||
DEBUG_PRINTF("PCRE could not be studied: %s\n", errptr);
|
||||
string err("PCRE compilation failed: ");
|
||||
err += string(errptr);
|
||||
err += ".";
|
||||
throw CompileError(expr_index, err);
|
||||
}
|
||||
|
||||
if (pcre_fullinfo(compiled, extra, PCRE_INFO_SIZE, &compiled_size)) {
|
||||
throw CompileError(PCRE_ERROR_MSG);
|
||||
}
|
||||
|
||||
if (!extra) {
|
||||
study_size = 0;
|
||||
} else {
|
||||
if (pcre_fullinfo(compiled, extra, PCRE_INFO_STUDYSIZE, &study_size)) {
|
||||
throw CompileError(PCRE_ERROR_MSG);
|
||||
}
|
||||
}
|
||||
|
||||
if (pcre_fullinfo(compiled, extra, PCRE_INFO_CAPTURECOUNT, &capture_cnt)) {
|
||||
throw CompileError(PCRE_ERROR_MSG);
|
||||
}
|
||||
|
||||
/* We use the pcre rather than hs to get this information as we may need it
|
||||
* even in the pure unguarded pcre mode where there is no hs available. We
|
||||
* can not use the compile flags due to (*UTF8) verb */
|
||||
unsigned long int opts = 0; // PCRE_INFO_OPTIONS demands an unsigned long
|
||||
if (pcre_fullinfo(compiled, extra, PCRE_INFO_OPTIONS, &opts)) {
|
||||
throw CompileError(PCRE_ERROR_MSG);
|
||||
}
|
||||
utf8 = opts & PCRE_UTF8;
|
||||
}
|
||||
|
||||
size_t PatternData::patternSize() const {
|
||||
size_t len = 0;
|
||||
|
||||
// ch_pattern header.
|
||||
len += sizeof(ch_pattern);
|
||||
|
||||
len = ROUNDUP_N(len, 8);
|
||||
DEBUG_PRINTF("compiled pcre at %zu\n", len);
|
||||
len += compiled_size;
|
||||
|
||||
// PCRE study data, which may be zero.
|
||||
if (study_size) {
|
||||
len = ROUNDUP_N(len, 8);
|
||||
DEBUG_PRINTF("study at %zu\n", len);
|
||||
len += (size_t)study_size;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("pattern size %zu\n", len);
|
||||
return len;
|
||||
}
|
||||
|
||||
/** \brief Write out an ch_pattern structure, which should already be sized
|
||||
* correctly according to PatternData::patternSize. */
|
||||
void PatternData::writePattern(ch_pattern *pattern) const {
|
||||
assert(pattern);
|
||||
assert(ISALIGNED_CL(pattern));
|
||||
|
||||
pattern->id = id;
|
||||
|
||||
u32 flags = 0;
|
||||
if (singlematch) {
|
||||
flags |= CHIMERA_PATTERN_FLAG_SINGLEMATCH;
|
||||
}
|
||||
if (utf8) {
|
||||
flags |= CHIMERA_PATTERN_FLAG_UTF8;
|
||||
}
|
||||
|
||||
pattern->flags = flags;
|
||||
pattern->maxWidth = maxWidth;
|
||||
pattern->minWidth = minWidth == UINT_MAX ? 0 : minWidth;
|
||||
pattern->fixedWidth = fixedWidth;
|
||||
|
||||
// Compiled PCRE pattern.
|
||||
char *ptr = (char *)pattern;
|
||||
ptr += ROUNDUP_N(sizeof(*pattern), 8);
|
||||
DEBUG_PRINTF("compiled pcre at %zu\n", (size_t)(ptr - (char *)pattern));
|
||||
memcpy(ptr, compiled, compiled_size);
|
||||
ptr += compiled_size;
|
||||
|
||||
// PCRE match limits
|
||||
pattern->extra.flags = PCRE_EXTRA_MATCH_LIMIT |
|
||||
PCRE_EXTRA_MATCH_LIMIT_RECURSION;
|
||||
pattern->extra.match_limit = matchLimit ? matchLimit : 10000000;
|
||||
// Set to avoid segment fault
|
||||
pattern->extra.match_limit_recursion =
|
||||
matchLimitRecursion ? matchLimitRecursion : 1500;
|
||||
|
||||
// PCRE study_data.
|
||||
u32 studyOffset = 0;
|
||||
if (extra) {
|
||||
assert(extra->study_data);
|
||||
ptr = ROUNDUP_PTR(ptr, 8);
|
||||
DEBUG_PRINTF("study at %zu\n", (size_t)(ptr - (char *)pattern));
|
||||
memcpy(ptr, extra->study_data, study_size);
|
||||
studyOffset = (size_t)(ptr - (char *)pattern);
|
||||
|
||||
pattern->extra.flags |= PCRE_EXTRA_STUDY_DATA;
|
||||
pattern->extra.study_data = ptr;
|
||||
|
||||
ptr += study_size;
|
||||
} else {
|
||||
pattern->extra.flags &= ~PCRE_EXTRA_STUDY_DATA;
|
||||
}
|
||||
pattern->studyOffset = studyOffset;
|
||||
|
||||
size_t pcreLen = (ptr - (char *)pattern);
|
||||
assert(pcreLen <= patternSize());
|
||||
pattern->length = (u32)pcreLen;
|
||||
|
||||
// We shouldn't overrun the space we've allocated for this pattern.
|
||||
assert(patternSize() >= (size_t)(ptr - (char *)pattern));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace ch {
|
||||
|
||||
static
|
||||
void ch_compile_multi_int(const char *const *expressions, const unsigned *flags,
|
||||
const unsigned *ids, unsigned elements,
|
||||
unsigned mode, unsigned long int match_limit,
|
||||
unsigned long int match_limit_recursion,
|
||||
const hs_platform_info_t *platform,
|
||||
ch_database_t **out) {
|
||||
vector<unique_ptr<PatternData>> pcres;
|
||||
pcres.reserve(elements);
|
||||
vector<u32> unguarded; // indices of unguarded PCREs.
|
||||
vector<const char *> multiExpr;
|
||||
vector<unsigned int> multiFlags;
|
||||
vector<unsigned int> multiIds;
|
||||
bool allConfirm = true;
|
||||
bool allSingleMatch = true;
|
||||
for (unsigned int i = 0; i < elements; i++) {
|
||||
const char *myExpr = expressions[i];
|
||||
unsigned int myFlags = flags ? flags[i] : 0;
|
||||
unsigned int myId = ids ? ids[i] : 0;
|
||||
|
||||
checkFlags(myFlags);
|
||||
|
||||
// First, build with libpcre. A build failure from libpcre will throw
|
||||
// an exception up to the caller.
|
||||
auto patternData =
|
||||
ue2::make_unique<PatternData>(myExpr, myFlags, i, myId, mode, match_limit,
|
||||
match_limit_recursion, platform);
|
||||
pcres.push_back(move(patternData));
|
||||
PatternData &curr = *pcres.back();
|
||||
|
||||
if (!(myFlags & HS_FLAG_SINGLEMATCH)) {
|
||||
allSingleMatch = false;
|
||||
}
|
||||
|
||||
// in the multimatch, we always run in prefilter mode and accept vacuous
|
||||
// patterns.
|
||||
myFlags |=
|
||||
HS_FLAG_ALLOWEMPTY | HS_FLAG_PREFILTER;
|
||||
|
||||
if (curr.fixedWidth != UINT_MAX) {
|
||||
myFlags |= HS_FLAG_SOM_LEFTMOST;
|
||||
DEBUG_PRINTF("fixed width, turn off prefiltering\n");
|
||||
myFlags &= ~HS_FLAG_PREFILTER;
|
||||
allConfirm = false;
|
||||
|
||||
// Single match can't coexist with SOM.
|
||||
myFlags &= ~HS_FLAG_SINGLEMATCH;
|
||||
}
|
||||
|
||||
if (curr.guard) {
|
||||
// We use the index into the PCREs array as the Hyperscan idx.
|
||||
multiExpr.push_back(myExpr);
|
||||
multiFlags.push_back(myFlags);
|
||||
multiIds.push_back(i);
|
||||
} else {
|
||||
// No Hyperscan support, PCRE is unguarded.
|
||||
unguarded.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("built %zu PCREs, %zu of which are unguarded\n",
|
||||
pcres.size(), unguarded.size());
|
||||
|
||||
// Work out our sizing for the output database.
|
||||
size_t patternSize = 0;
|
||||
for (unsigned int i = 0; i < elements; i++) {
|
||||
size_t len = pcres[i]->patternSize();
|
||||
patternSize += ROUNDUP_CL(len);
|
||||
}
|
||||
DEBUG_PRINTF("pcre bytecode takes %zu bytes\n", patternSize);
|
||||
|
||||
bool noMulti = multiExpr.empty();
|
||||
size_t multiSize = 0;
|
||||
hs_database *multidb = nullptr;
|
||||
if (!noMulti) {
|
||||
hs_compile_error_t *hs_comp_error = nullptr;
|
||||
hs_error_t err = hs_compile_multi(&multiExpr[0], &multiFlags[0],
|
||||
&multiIds[0], multiExpr.size(),
|
||||
HS_MODE_BLOCK, platform, &multidb,
|
||||
&hs_comp_error);
|
||||
|
||||
if (err != HS_SUCCESS) {
|
||||
assert(hs_comp_error);
|
||||
DEBUG_PRINTF("hs_compile_multi returned error: %s\n",
|
||||
hs_comp_error->message);
|
||||
assert(0);
|
||||
hs_free_compile_error(hs_comp_error);
|
||||
throw CompileError("Internal error.");
|
||||
}
|
||||
|
||||
assert(multidb);
|
||||
err = hs_database_size(multidb, &multiSize);
|
||||
if (err != HS_SUCCESS) {
|
||||
assert(0);
|
||||
throw CompileError("Internal error.");
|
||||
}
|
||||
DEBUG_PRINTF("built hyperscan database with len %zu bytes\n", multiSize);
|
||||
}
|
||||
|
||||
size_t bytecodeLen = sizeof(ch_bytecode) +
|
||||
multiSize + alignof(u32) +
|
||||
(sizeof(u32) * unguarded.size()) +
|
||||
(sizeof(u32) * elements) +
|
||||
patternSize +
|
||||
128; // padding for alignment
|
||||
size_t totalSize = sizeof(ch_database) + bytecodeLen;
|
||||
|
||||
DEBUG_PRINTF("allocating %zu bytes for database\n", totalSize);
|
||||
char *ptr = (char *)ch_database_alloc(totalSize);
|
||||
if (ch_check_alloc(ptr) != CH_SUCCESS) {
|
||||
ch_database_free(ptr);
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
|
||||
memset(ptr, 0, totalSize);
|
||||
|
||||
// First, the header.
|
||||
ch_database *hydb = (ch_database *)ptr;
|
||||
hydb->magic = CH_DB_MAGIC;
|
||||
hydb->version = HS_VERSION_32BIT;
|
||||
hydb->length = bytecodeLen;
|
||||
|
||||
// Then, the bytecode.
|
||||
size_t shift = (size_t)hydb->bytes & 0x3f;
|
||||
hydb->bytecode = offsetof(struct ch_database, bytes) - shift;
|
||||
ch_bytecode *db = (ch_bytecode *)((char *)hydb + hydb->bytecode);
|
||||
db->patternCount = elements;
|
||||
db->activeSize = mmbit_size(elements);
|
||||
db->flags = 0;
|
||||
db->length = bytecodeLen;
|
||||
|
||||
if (noMulti) {
|
||||
db->flags |= CHIMERA_FLAG_NO_MULTIMATCH;
|
||||
}
|
||||
if (mode & CH_MODE_GROUPS) {
|
||||
db->flags |= CHIMERA_FLAG_GROUPS;
|
||||
}
|
||||
if (allConfirm) {
|
||||
db->flags |= CHIMERA_FLAG_ALL_CONFIRM;
|
||||
}
|
||||
if (allSingleMatch) {
|
||||
db->flags |= CHIMERA_FLAG_ALL_SINGLE;
|
||||
}
|
||||
|
||||
|
||||
// Find and set the max ovector size by looking at the capture count for
|
||||
// each pcre.
|
||||
u32 maxCaptureGroups = 0;
|
||||
for (unsigned int i = 0; i < elements; i++) {
|
||||
maxCaptureGroups = max(maxCaptureGroups, (u32)pcres[i]->capture_cnt);
|
||||
}
|
||||
db->maxCaptureGroups = maxCaptureGroups;
|
||||
DEBUG_PRINTF("max capture groups is %u\n", maxCaptureGroups);
|
||||
|
||||
if (!noMulti) {
|
||||
DEBUG_PRINTF("write hyperscan database\n");
|
||||
// Write Hyperscan database directly after the header struct, then free it.
|
||||
if (!writeHyperscanDatabase(db, multidb)) {
|
||||
ch_database_free(hydb);
|
||||
hs_free_database(multidb);
|
||||
throw CompileError("Internal error.");
|
||||
}
|
||||
hs_free_database(multidb);
|
||||
} else {
|
||||
db->databaseOffset = ROUNDUP_CL(sizeof(*db));
|
||||
}
|
||||
|
||||
// Then, write our unguarded PCRE list.
|
||||
db->unguardedCount = unguarded.size();
|
||||
db->unguardedOffset = ROUNDUP_N(db->databaseOffset + multiSize, 4);
|
||||
ptr = (char *)db + db->unguardedOffset;
|
||||
copy(unguarded.begin(), unguarded.end(), (u32 *)ptr);
|
||||
|
||||
// Then, write all our compiled PCRE patterns and the lookup table for
|
||||
// them.
|
||||
db->patternOffset = db->unguardedOffset + unguarded.size() * sizeof(u32);
|
||||
u32 *patternOffset = (u32 *)((char *)db + db->patternOffset);
|
||||
u32 offset = ROUNDUP_CL(db->patternOffset + elements * sizeof(u32));
|
||||
for (unsigned int i = 0; i < elements; i++) {
|
||||
*patternOffset = offset;
|
||||
size_t len = pcres[i]->patternSize();
|
||||
ptr = (char *)db + offset;
|
||||
struct ch_pattern *pattern = (struct ch_pattern *)ptr;
|
||||
pcres[i]->writePattern(pattern);
|
||||
DEBUG_PRINTF("wrote pcre %u into offset %u, len %zu\n", i, offset, len);
|
||||
offset += ROUNDUP_CL(len);
|
||||
patternOffset++;
|
||||
}
|
||||
|
||||
assert(offset <= totalSize);
|
||||
assert(hydb->magic == CH_DB_MAGIC);
|
||||
DEBUG_PRINTF("built hybrid database, size %zu bytes\n", totalSize);
|
||||
DEBUG_PRINTF("offset=%u\n", offset);
|
||||
*out = hydb;
|
||||
}
|
||||
|
||||
} // namespace ch
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_compile(const char *expression, unsigned flags,
|
||||
unsigned mode,
|
||||
const hs_platform_info_t *platform,
|
||||
ch_database_t **db,
|
||||
ch_compile_error_t **comp_error) {
|
||||
if (!comp_error) {
|
||||
if (db) {
|
||||
db = nullptr;
|
||||
}
|
||||
// nowhere to write the string, but we can still report an error code
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
if (!db) {
|
||||
*comp_error =
|
||||
generateChimeraCompileError("Invalid parameter: db is NULL", -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
if (!expression) {
|
||||
*db = nullptr;
|
||||
*comp_error =
|
||||
generateChimeraCompileError("Invalid parameter: expressions is\
|
||||
NULL", -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (!checkMode(mode, comp_error)) {
|
||||
*db = nullptr;
|
||||
assert(*comp_error); // set by checkMode
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
try {
|
||||
unsigned id = 0; // single expressions get zero as an ID
|
||||
// Internal function to do all the work, now that we've handled all the
|
||||
// argument checking.
|
||||
ch::ch_compile_multi_int(&expression, &flags, &id, 1, mode, 0, 0,
|
||||
platform, db);
|
||||
}
|
||||
catch (const CompileError &e) {
|
||||
// Compiler error occurred
|
||||
*db = nullptr;
|
||||
*comp_error = generateChimeraCompileError(e.reason, e.hasIndex ?
|
||||
(int)e.index : -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
catch (std::bad_alloc) {
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<ch_compile_error_t *>(&ch_enomem);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
catch (...) {
|
||||
assert(!"Internal error, unexpected exception");
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<ch_compile_error_t *>(&ch_einternal);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("success!\n");
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_compile_multi(const char *const *expressions,
|
||||
const unsigned *flags, const unsigned *ids,
|
||||
unsigned elements, unsigned mode,
|
||||
const hs_platform_info_t *platform,
|
||||
ch_database_t **db,
|
||||
ch_compile_error_t **comp_error) {
|
||||
if (!comp_error) {
|
||||
if (db) {
|
||||
db = nullptr;
|
||||
}
|
||||
// nowhere to write the string, but we can still report an error code
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
if (!db) {
|
||||
*comp_error =
|
||||
generateChimeraCompileError("Invalid parameter: db is NULL", -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
if (!expressions) {
|
||||
*db = nullptr;
|
||||
*comp_error =
|
||||
generateChimeraCompileError("Invalid parameter: expressions is\
|
||||
NULL", -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
if (!elements) {
|
||||
*db = nullptr;
|
||||
*comp_error = generateChimeraCompileError("Invalid parameter:\
|
||||
elements is zero", -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (!checkMode(mode, comp_error)) {
|
||||
*db = nullptr;
|
||||
assert(*comp_error); // set by checkMode
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
try {
|
||||
// Internal function to do all the work, now that we've handled all the
|
||||
// argument checking.
|
||||
ch::ch_compile_multi_int(expressions, flags, ids, elements, mode, 0, 0,
|
||||
platform, db);
|
||||
}
|
||||
catch (const CompileError &e) {
|
||||
// Compiler error occurred
|
||||
*db = nullptr;
|
||||
*comp_error = generateChimeraCompileError(e.reason, e.hasIndex ?
|
||||
(int)e.index : -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
catch (std::bad_alloc) {
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<ch_compile_error_t *>(&ch_enomem);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
catch (...) {
|
||||
assert(!"Internal error, unexpected exception");
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<ch_compile_error_t *>(&ch_einternal);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("success!\n");
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_compile_ext_multi(
|
||||
const char *const *expressions,
|
||||
const unsigned *flags,
|
||||
const unsigned *ids,
|
||||
unsigned elements, unsigned mode,
|
||||
unsigned long int match_limit,
|
||||
unsigned long int match_limit_recursion,
|
||||
const hs_platform_info_t *platform,
|
||||
ch_database_t **db,
|
||||
ch_compile_error_t **comp_error) {
|
||||
if (!comp_error) {
|
||||
if (db) {
|
||||
db = nullptr;
|
||||
}
|
||||
// nowhere to write the string, but we can still report an error code
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
if (!db) {
|
||||
*comp_error =
|
||||
generateChimeraCompileError("Invalid parameter: db is NULL", -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
if (!expressions) {
|
||||
*db = nullptr;
|
||||
*comp_error =
|
||||
generateChimeraCompileError("Invalid parameter: expressions is\
|
||||
NULL", -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
if (!elements) {
|
||||
*db = nullptr;
|
||||
*comp_error = generateChimeraCompileError("Invalid parameter:\
|
||||
elements is zero", -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (!checkMode(mode, comp_error)) {
|
||||
*db = nullptr;
|
||||
assert(*comp_error); // set by checkMode
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
try {
|
||||
// Internal function to do all the work, now that we've handled all the
|
||||
// argument checking.
|
||||
ch::ch_compile_multi_int(expressions, flags, ids, elements, mode,
|
||||
match_limit, match_limit_recursion, platform,
|
||||
db);
|
||||
}
|
||||
catch (const CompileError &e) {
|
||||
// Compiler error occurred
|
||||
*db = nullptr;
|
||||
*comp_error = generateChimeraCompileError(e.reason, e.hasIndex ?
|
||||
(int)e.index : -1);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
catch (std::bad_alloc) {
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<ch_compile_error_t *>(&ch_enomem);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
catch (...) {
|
||||
assert(!"Internal error, unexpected exception");
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<ch_compile_error_t *>(&ch_einternal);
|
||||
return CH_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("success!\n");
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_free_compile_error(ch_compile_error_t *error) {
|
||||
freeChimeraCompileError(error);
|
||||
return CH_SUCCESS;
|
||||
}
|
394
chimera/ch_compile.h
Normal file
394
chimera/ch_compile.h
Normal file
@ -0,0 +1,394 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CH_COMPILE_H_
|
||||
#define CH_COMPILE_H_
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief The Chimera compiler API definition.
|
||||
*
|
||||
* Chimera is a hybrid solution of Hyperscan and PCRE.
|
||||
*
|
||||
* This header contains functions for compiling regular expressions into
|
||||
* Chimera databases that can be used by the Chimera runtime.
|
||||
*/
|
||||
|
||||
#include "ch_common.h"
|
||||
#include "hs_compile.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/**
|
||||
* A type containing error details that is returned by the compile calls (@ref
|
||||
* ch_compile() and @ref ch_compile_multi() on failure. The caller may inspect
|
||||
* the values returned in this type to determine the cause of failure.
|
||||
*/
|
||||
typedef struct ch_compile_error {
|
||||
/**
|
||||
* A human-readable error message describing the error.
|
||||
*/
|
||||
char *message;
|
||||
|
||||
/**
|
||||
* The zero-based number of the expression that caused the error (if this
|
||||
* can be determined). If the error is not specific to an expression, then
|
||||
* this value will be less than zero.
|
||||
*/
|
||||
int expression;
|
||||
} ch_compile_error_t;
|
||||
|
||||
/**
|
||||
* The basic regular expression compiler.
|
||||
*
|
||||
* This is the function call with which an expression is compiled into a
|
||||
* Chimera database which can be passed to the runtime function (
|
||||
* @ref ch_scan())
|
||||
*
|
||||
* @param expression
|
||||
* The NULL-terminated expression to parse. Note that this string must
|
||||
* represent ONLY the pattern to be matched, with no delimiters or flags;
|
||||
* any global flags should be specified with the @a flags argument. For
|
||||
* example, the expression `/abc?def/i` should be compiled by providing
|
||||
* `abc?def` as the @a expression, and @ref CH_FLAG_CASELESS as the @a
|
||||
* flags.
|
||||
*
|
||||
* @param flags
|
||||
* Flags which modify the behaviour of the expression. Multiple flags may
|
||||
* be used by ORing them together. Valid values are:
|
||||
* - CH_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - CH_FLAG_DOTALL - Matching a `.` will not exclude newlines.
|
||||
* - CH_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - CH_FLAG_SINGLEMATCH - Only one match will be generated for the
|
||||
* expression per stream.
|
||||
* - CH_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
|
||||
* - CH_FLAG_UCP - Use Unicode properties for character classes.
|
||||
*
|
||||
* @param mode
|
||||
* Compiler mode flag that affect the database as a whole for capturing
|
||||
* groups. One of CH_MODE_NOGROUPS or CH_MODE_GROUPS must be supplied.
|
||||
* See @ref CH_MODE_FLAG for more details.
|
||||
*
|
||||
* @param platform
|
||||
* If not NULL, the platform structure is used to determine the target
|
||||
* platform for the database. If NULL, a database suitable for running
|
||||
* on the current host platform is produced.
|
||||
*
|
||||
* @param db
|
||||
* On success, a pointer to the generated database will be returned in
|
||||
* this parameter, or NULL on failure. The caller is responsible for
|
||||
* deallocating the buffer using the @ref ch_free_database() function.
|
||||
*
|
||||
* @param compile_error
|
||||
* If the compile fails, a pointer to a @ref ch_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* ch_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS is returned on successful compilation; @ref
|
||||
* CH_COMPILER_ERROR on failure, with details provided in the error
|
||||
* parameter.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_compile(const char *expression, unsigned int flags,
|
||||
unsigned int mode,
|
||||
const hs_platform_info_t *platform,
|
||||
ch_database_t **db,
|
||||
ch_compile_error_t **compile_error);
|
||||
|
||||
/**
|
||||
* The multiple regular expression compiler.
|
||||
*
|
||||
* This is the function call with which a set of expressions is compiled into a
|
||||
* database which can be passed to the runtime function (@ref ch_scan()).
|
||||
* Each expression can be labelled with a unique integer which is passed into
|
||||
* the match callback to identify the pattern that has matched.
|
||||
*
|
||||
* @param expressions
|
||||
* Array of NULL-terminated expressions to compile. Note that (as for @ref
|
||||
* ch_compile()) these strings must contain only the pattern to be
|
||||
* matched, with no delimiters or flags. For example, the expression
|
||||
* `/abc?def/i` should be compiled by providing `abc?def` as the first
|
||||
* string in the @a expressions array, and @ref CH_FLAG_CASELESS as the
|
||||
* first value in the @a flags array.
|
||||
*
|
||||
* @param flags
|
||||
* Array of flags which modify the behaviour of each expression. Multiple
|
||||
* flags may be used by ORing them together. Specifying the NULL pointer
|
||||
* in place of an array will set the flags value for all patterns to zero.
|
||||
* Valid values are:
|
||||
* - CH_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - CH_FLAG_DOTALL - Matching a `.` will not exclude newlines.
|
||||
* - CH_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - CH_FLAG_SINGLEMATCH - Only one match will be generated by patterns
|
||||
* with this match id per stream.
|
||||
* - CH_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
|
||||
* - CH_FLAG_UCP - Use Unicode properties for character classes.
|
||||
*
|
||||
* @param ids
|
||||
* An array of integers specifying the ID number to be associated with the
|
||||
* corresponding pattern in the expressions array. Specifying the NULL
|
||||
* pointer in place of an array will set the ID value for all patterns to
|
||||
* zero.
|
||||
*
|
||||
* @param elements
|
||||
* The number of elements in the input arrays.
|
||||
*
|
||||
* @param mode
|
||||
* Compiler mode flag that affect the database as a whole for capturing
|
||||
* groups. One of CH_MODE_NOGROUPS or CH_MODE_GROUPS must be supplied.
|
||||
* See @ref CH_MODE_FLAG for more details.
|
||||
*
|
||||
* @param platform
|
||||
* If not NULL, the platform structure is used to determine the target
|
||||
* platform for the database. If NULL, a database suitable for running
|
||||
* on the current host platform is produced.
|
||||
*
|
||||
* @param db
|
||||
* On success, a pointer to the generated database will be returned in
|
||||
* this parameter, or NULL on failure. The caller is responsible for
|
||||
* deallocating the buffer using the @ref ch_free_database() function.
|
||||
*
|
||||
* @param compile_error
|
||||
* If the compile fails, a pointer to a @ref ch_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* ch_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS is returned on successful compilation; @ref
|
||||
* CH_COMPILER_ERROR on failure, with details provided in the @a error
|
||||
* parameter.
|
||||
*
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_compile_multi(const char *const *expressions,
|
||||
const unsigned int *flags,
|
||||
const unsigned int *ids,
|
||||
unsigned int elements, unsigned int mode,
|
||||
const hs_platform_info_t *platform,
|
||||
ch_database_t **db,
|
||||
ch_compile_error_t **compile_error);
|
||||
|
||||
/**
|
||||
* The multiple regular expression compiler with extended match limits support.
|
||||
*
|
||||
* This is the function call with which a set of expressions is compiled into a
|
||||
* database in the same way as @ref ch_compile_multi(), but allows additional
|
||||
* parameters to be specified via match_limit and match_limit_recursion to
|
||||
* define match limits for PCRE runtime.
|
||||
*
|
||||
* @param expressions
|
||||
* Array of NULL-terminated expressions to compile. Note that (as for @ref
|
||||
* ch_compile()) these strings must contain only the pattern to be
|
||||
* matched, with no delimiters or flags. For example, the expression
|
||||
* `/abc?def/i` should be compiled by providing `abc?def` as the first
|
||||
* string in the @a expressions array, and @ref CH_FLAG_CASELESS as the
|
||||
* first value in the @a flags array.
|
||||
*
|
||||
* @param flags
|
||||
* Array of flags which modify the behaviour of each expression. Multiple
|
||||
* flags may be used by ORing them together. Specifying the NULL pointer
|
||||
* in place of an array will set the flags value for all patterns to zero.
|
||||
* Valid values are:
|
||||
* - CH_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - CH_FLAG_DOTALL - Matching a `.` will not exclude newlines.
|
||||
* - CH_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - CH_FLAG_SINGLEMATCH - Only one match will be generated by patterns
|
||||
* with this match id per stream.
|
||||
* - CH_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
|
||||
* - CH_FLAG_UCP - Use Unicode properties for character classes.
|
||||
*
|
||||
* @param ids
|
||||
* An array of integers specifying the ID number to be associated with the
|
||||
* corresponding pattern in the expressions array. Specifying the NULL
|
||||
* pointer in place of an array will set the ID value for all patterns to
|
||||
* zero.
|
||||
*
|
||||
* @param elements
|
||||
* The number of elements in the input arrays.
|
||||
*
|
||||
* @param mode
|
||||
* Compiler mode flag that affect the database as a whole for capturing
|
||||
* groups. One of CH_MODE_NOGROUPS or CH_MODE_GROUPS must be supplied.
|
||||
* See @ref CH_MODE_FLAG for more details.
|
||||
*
|
||||
* @param match_limit
|
||||
* A limit from pcre_extra on the amount of match function called in PCRE
|
||||
* to limit backtracking that can take place.
|
||||
*
|
||||
* @param match_limit_recursion
|
||||
* A limit from pcre_extra on the recursion depth of match function
|
||||
* in PCRE.
|
||||
*
|
||||
* @param platform
|
||||
* If not NULL, the platform structure is used to determine the target
|
||||
* platform for the database. If NULL, a database suitable for running
|
||||
* on the current host platform is produced.
|
||||
*
|
||||
* @param db
|
||||
* On success, a pointer to the generated database will be returned in
|
||||
* this parameter, or NULL on failure. The caller is responsible for
|
||||
* deallocating the buffer using the @ref ch_free_database() function.
|
||||
*
|
||||
* @param compile_error
|
||||
* If the compile fails, a pointer to a @ref ch_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* ch_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS is returned on successful compilation; @ref
|
||||
* CH_COMPILER_ERROR on failure, with details provided in the @a error
|
||||
* parameter.
|
||||
*
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_compile_ext_multi(const char *const *expressions,
|
||||
const unsigned int *flags,
|
||||
const unsigned int *ids,
|
||||
unsigned int elements,
|
||||
unsigned int mode,
|
||||
unsigned long int match_limit,
|
||||
unsigned long int match_limit_recursion,
|
||||
const hs_platform_info_t *platform,
|
||||
ch_database_t **db,
|
||||
ch_compile_error_t **compile_error);
|
||||
|
||||
/**
|
||||
* Free an error structure generated by @ref ch_compile(), @ref
|
||||
* ch_compile_multi().
|
||||
*
|
||||
* @param error
|
||||
* The @ref ch_compile_error_t to be freed. NULL may also be safely
|
||||
* provided.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_free_compile_error(ch_compile_error_t *error);
|
||||
|
||||
/**
|
||||
* @defgroup CH_PATTERN_FLAG Pattern flags
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Compile flag: Set case-insensitive matching.
|
||||
*
|
||||
* This flag sets the expression to be matched case-insensitively by default.
|
||||
* The expression may still use PCRE tokens (notably `(?i)` and
|
||||
* `(?-i)`) to switch case-insensitive matching on and off.
|
||||
*/
|
||||
#define CH_FLAG_CASELESS 1
|
||||
|
||||
/**
|
||||
* Compile flag: Matching a `.` will not exclude newlines.
|
||||
*
|
||||
* This flag sets any instances of the `.` token to match newline characters as
|
||||
* well as all other characters. The PCRE specification states that the `.`
|
||||
* token does not match newline characters by default, so without this flag the
|
||||
* `.` token will not cross line boundaries.
|
||||
*/
|
||||
#define CH_FLAG_DOTALL 2
|
||||
|
||||
/**
|
||||
* Compile flag: Set multi-line anchoring.
|
||||
*
|
||||
* This flag instructs the expression to make the `^` and `$` tokens match
|
||||
* newline characters as well as the start and end of the stream. If this flag
|
||||
* is not specified, the `^` token will only ever match at the start of a
|
||||
* stream, and the `$` token will only ever match at the end of a stream within
|
||||
* the guidelines of the PCRE specification.
|
||||
*/
|
||||
#define CH_FLAG_MULTILINE 4
|
||||
|
||||
/**
|
||||
* Compile flag: Set single-match only mode.
|
||||
*
|
||||
* This flag sets the expression's match ID to match at most once, only the
|
||||
* first match for each invocation of @ref ch_scan() will be returned.
|
||||
*
|
||||
*/
|
||||
#define CH_FLAG_SINGLEMATCH 8
|
||||
|
||||
/**
|
||||
* Compile flag: Enable UTF-8 mode for this expression.
|
||||
*
|
||||
* This flag instructs Chimera to treat the pattern as a sequence of UTF-8
|
||||
* characters. The results of scanning invalid UTF-8 sequences with a Chimera
|
||||
* library that has been compiled with one or more patterns using this flag are
|
||||
* undefined.
|
||||
*/
|
||||
#define CH_FLAG_UTF8 32
|
||||
|
||||
/**
|
||||
* Compile flag: Enable Unicode property support for this expression.
|
||||
*
|
||||
* This flag instructs Chimera to use Unicode properties, rather than the
|
||||
* default ASCII interpretations, for character mnemonics like `\w` and `\s` as
|
||||
* well as the POSIX character classes. It is only meaningful in conjunction
|
||||
* with @ref CH_FLAG_UTF8.
|
||||
*/
|
||||
#define CH_FLAG_UCP 64
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @defgroup CH_MODE_FLAG Compile mode flags
|
||||
*
|
||||
* The mode flags are used as values for the mode parameter of the various
|
||||
* compile calls (@ref ch_compile(), @ref ch_compile_multi().
|
||||
*
|
||||
* By default, the matcher will only supply the start and end offsets of the
|
||||
* match when the match callback is called. Using mode flag @ref CH_MODE_GROUPS
|
||||
* will also fill the `captured' array with the start and end offsets of all
|
||||
* the capturing groups specified by the pattern that has matched.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Compiler mode flag: Disable capturing groups.
|
||||
*/
|
||||
#define CH_MODE_NOGROUPS 0
|
||||
|
||||
/**
|
||||
* Compiler mode flag: Enable capturing groups.
|
||||
*/
|
||||
#define CH_MODE_GROUPS 1048576
|
||||
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* CH_COMPILE_H_ */
|
126
chimera/ch_database.c
Normal file
126
chimera/ch_database.c
Normal file
@ -0,0 +1,126 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Chimera: database construction, etc.
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "allocator.h"
|
||||
#include "database.h"
|
||||
#include "hs.h"
|
||||
#include "ch.h"
|
||||
#include "hs_internal.h"
|
||||
#include "ch_common.h"
|
||||
#include "ch_alloc.h"
|
||||
#include "ch_database.h"
|
||||
#include "ch_internal.h"
|
||||
|
||||
static really_inline
|
||||
int db_correctly_aligned(const void *db) {
|
||||
return ISALIGNED_N(db, alignof(unsigned long long));
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_free_database(ch_database_t *hydb) {
|
||||
if (hydb && hydb->magic != CH_DB_MAGIC) {
|
||||
return CH_INVALID;
|
||||
}
|
||||
ch_database_free(hydb);
|
||||
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_database_size(const ch_database_t *hydb, size_t *size) {
|
||||
if (!size) {
|
||||
return CH_INVALID;
|
||||
}
|
||||
|
||||
ch_error_t ret = hydbIsValid(hydb);
|
||||
if (unlikely(ret != CH_SUCCESS)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
*size = sizeof(struct ch_database) + hydb->length;
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
/** \brief Identifier prepended to database info. */
|
||||
static const char CHIMERA_IDENT[] = "Chimera ";
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_database_info(const ch_database_t *hydb, char **info) {
|
||||
if (!info) {
|
||||
return CH_INVALID;
|
||||
}
|
||||
*info = NULL;
|
||||
|
||||
if (!hydb || !db_correctly_aligned(hydb) || hydb->magic != CH_DB_MAGIC) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
const struct ch_bytecode *bytecode = ch_get_bytecode(hydb);
|
||||
char noMulti = (bytecode->flags & CHIMERA_FLAG_NO_MULTIMATCH);
|
||||
if (noMulti) {
|
||||
size_t len = strlen(CHIMERA_IDENT);
|
||||
*info = ch_misc_alloc(len + 1);
|
||||
if (!(*info)) {
|
||||
return CH_INVALID;
|
||||
}
|
||||
memcpy((*info), CHIMERA_IDENT, len);
|
||||
(*info)[len] = '\0';
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
char *hsinfo = NULL;
|
||||
hs_error_t ret = hs_database_info(getHyperscanDatabase(bytecode), &hsinfo);
|
||||
if (ret != HS_SUCCESS) {
|
||||
assert(!hsinfo);
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t hybridlen = strlen(CHIMERA_IDENT);
|
||||
size_t hslen = strlen(hsinfo);
|
||||
*info = ch_misc_alloc(hybridlen + hslen + 1);
|
||||
if (!(*info)) {
|
||||
ch_misc_free(hsinfo);
|
||||
return CH_INVALID;
|
||||
}
|
||||
|
||||
memcpy((*info), CHIMERA_IDENT, hybridlen);
|
||||
memcpy((*info) + hybridlen, hsinfo, hslen);
|
||||
(*info)[hybridlen + hslen] = '\0';
|
||||
ch_misc_free(hsinfo);
|
||||
|
||||
return CH_SUCCESS;
|
||||
}
|
158
chimera/ch_database.h
Normal file
158
chimera/ch_database.h
Normal file
@ -0,0 +1,158 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Runtime code for ch_database manipulation.
|
||||
*/
|
||||
|
||||
#ifndef CH_DATABASE_H_
|
||||
#define CH_DATABASE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#define PCRE_STATIC
|
||||
#include <pcre.h>
|
||||
|
||||
#include "ch_compile.h" // for CH_MODE_ flags
|
||||
#include "ue2common.h"
|
||||
#include "hs_version.h"
|
||||
#include "hs.h"
|
||||
|
||||
#define CH_DB_MAGIC 0xdedededeU //!< Magic number stored in \ref ch_database
|
||||
|
||||
/** \brief Main Chimera database header. */
|
||||
struct ch_database {
|
||||
u32 magic; //!< must be \ref CH_DB_MAGIC
|
||||
u32 version; //!< release version
|
||||
u32 length; //!< total allocated length in bytes
|
||||
u32 reserved0; //!< unused
|
||||
u32 reserved1; //!< unused
|
||||
u32 bytecode; //!< offset relative to db start
|
||||
u32 padding[16]; //!< padding for alignment of rest of bytecode
|
||||
char bytes[];
|
||||
};
|
||||
|
||||
/** \brief Chimera bytecode header, which follows the \ref ch_database and is
|
||||
* always 64-byte aligned. */
|
||||
struct ch_bytecode {
|
||||
u32 length; //!< length of bytecode including this header struct
|
||||
u32 flags; //!< whole-database flags (CHIMERA_FLAG_NO_MULTIMATCH,
|
||||
// CHIMERA_FLAG_GROUPS)
|
||||
u32 patternCount; //!< total number of patterns
|
||||
u32 activeSize; //!< size of mmbit to store active pattern ids
|
||||
u32 databaseOffset; //!< offset for database following \ref ch_bytecode
|
||||
// header
|
||||
u32 patternOffset; //!< points to an array of u32 offsets, each pointing to
|
||||
// a \ref ch_pattern
|
||||
u32 unguardedOffset; //!< pointer to a list of unguarded pattern indices
|
||||
u32 unguardedCount; //!< number of unguarded patterns
|
||||
u32 maxCaptureGroups; //!< max number of capture groups used by any pattern
|
||||
};
|
||||
|
||||
/** \brief Per-pattern header.
|
||||
*
|
||||
* struct is followed in bytecode by:
|
||||
* 1. pcre bytecode (always present)
|
||||
* 2. pcre study data (sometimes)
|
||||
*/
|
||||
struct ch_pattern {
|
||||
u32 id; //!< pattern ID to report to the user
|
||||
u32 flags; //!< per-pattern flags (e.g. \ref CHIMERA_PATTERN_FLAG_UTF8)
|
||||
u32 maxWidth; //!< maximum width of a match, or UINT_MAX for inf.
|
||||
u32 minWidth; //!< minimum width of a match.
|
||||
u32 fixedWidth;//!< pattern has fixed width.
|
||||
u32 studyOffset; //!< offset relative to struct start of study data,
|
||||
// or zero if there is none
|
||||
u32 length; //!< length of struct plus pcre bytecode and study data
|
||||
pcre_extra extra; //!< pcre_extra struct, used to store study data ptr for
|
||||
// the currently-running pcre at runtime.
|
||||
};
|
||||
|
||||
static really_inline
|
||||
const void *ch_get_bytecode(const struct ch_database *db) {
|
||||
assert(db);
|
||||
const void *bytecode = (const char *)db + db->bytecode;
|
||||
assert(ISALIGNED_16(bytecode));
|
||||
return bytecode;
|
||||
}
|
||||
|
||||
struct hs_database;
|
||||
|
||||
static really_inline
|
||||
const struct hs_database *getHyperscanDatabase(const struct ch_bytecode *db) {
|
||||
assert(db);
|
||||
const char *ptr = (const char *)db;
|
||||
const struct hs_database *hs_db;
|
||||
hs_db = (const struct hs_database *)(ptr + db->databaseOffset);
|
||||
assert(ISALIGNED_CL(hs_db));
|
||||
return hs_db;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u32 *getUnguarded(const struct ch_bytecode *db) {
|
||||
assert(db);
|
||||
const char *ptr = (const char *)db;
|
||||
const u32 *unguarded = (const u32 *)(ptr + db->unguardedOffset);
|
||||
assert(ISALIGNED_N(unguarded, sizeof(u32)));
|
||||
return unguarded;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct ch_pattern *getPattern(const struct ch_bytecode *db, u32 i) {
|
||||
assert(db);
|
||||
assert(i < db->patternCount);
|
||||
const char *ptr = (const char *)db;
|
||||
const u32 *patternOffset = (const u32 *)(ptr + db->patternOffset);
|
||||
assert(patternOffset[i] < db->length);
|
||||
return (const struct ch_pattern *)(ptr + patternOffset[i]);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
ch_error_t hydbIsValid(const struct ch_database *hydb) {
|
||||
if (!hydb || hydb->magic != CH_DB_MAGIC) {
|
||||
DEBUG_PRINTF("bad magic (%u != %u)\n", hydb->magic, CH_DB_MAGIC);
|
||||
return CH_INVALID;
|
||||
}
|
||||
|
||||
if (hydb->version != HS_VERSION_32BIT) {
|
||||
DEBUG_PRINTF("bad version\n");
|
||||
return CH_DB_VERSION_ERROR;
|
||||
}
|
||||
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* CH_DATABASE_H_ */
|
||||
|
44
chimera/ch_internal.h
Normal file
44
chimera/ch_internal.h
Normal file
@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Chimera: data structures and internals.
|
||||
*/
|
||||
|
||||
#ifndef CH_INTERNAL_H
|
||||
#define CH_INTERNAL_H
|
||||
|
||||
#define CHIMERA_FLAG_NO_MULTIMATCH 1 //!< Don't run a multimatch scan
|
||||
#define CHIMERA_FLAG_GROUPS 2 //!< Return capturing groups
|
||||
#define CHIMERA_FLAG_ALL_CONFIRM 4 //!< All patterns need confirm
|
||||
#define CHIMERA_FLAG_ALL_SINGLE 8 //!< All patterns need only one match
|
||||
|
||||
#define CHIMERA_PATTERN_FLAG_SINGLEMATCH 1 //!< only report the first match
|
||||
#define CHIMERA_PATTERN_FLAG_UTF8 2 //!< pattern is in UTF-8 mode
|
||||
|
||||
#endif
|
629
chimera/ch_runtime.c
Normal file
629
chimera/ch_runtime.c
Normal file
@ -0,0 +1,629 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Chimera: main runtime.
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "ch.h"
|
||||
#include "hs.h"
|
||||
#include "hs_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "ch_database.h"
|
||||
#include "ch_internal.h"
|
||||
#include "ch_scratch.h"
|
||||
#include "util/multibit.h"
|
||||
#include "util/unicode_def.h"
|
||||
|
||||
typedef struct queue_item PQ_T;
|
||||
|
||||
static
|
||||
char PQ_COMP(PQ_T *pqc_items, int a, int b) {
|
||||
if ((pqc_items)[a].to != (pqc_items)[b].to) {
|
||||
return (pqc_items)[a].to < (pqc_items)[b].to;
|
||||
} else if ((pqc_items)[a].from != (pqc_items)[b].from) {
|
||||
return (pqc_items)[a].from < (pqc_items)[b].from;
|
||||
} else {
|
||||
return (pqc_items)[a].id < (pqc_items)[b].id;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
char PQ_COMP_B(PQ_T *pqc_items, int a, PQ_T b_fixed) {
|
||||
if ((pqc_items)[a].to != (b_fixed).to) {
|
||||
return (pqc_items)[a].to < (b_fixed).to;
|
||||
} else if ((pqc_items)[a].from != (b_fixed).from) {
|
||||
return (pqc_items)[a].from < (b_fixed).from;
|
||||
} else {
|
||||
return (pqc_items)[a].id < b_fixed.id;
|
||||
}
|
||||
}
|
||||
|
||||
#include "util/pqueue.h"
|
||||
|
||||
static really_inline
|
||||
void pq_insert_with(struct match_pq *pq, int from, int to, u32 id) {
|
||||
DEBUG_PRINTF("inserting pattern%u in pq at %u\n", id, to);
|
||||
struct queue_item temp = {
|
||||
.from = from,
|
||||
.to = to,
|
||||
.id = id,
|
||||
};
|
||||
|
||||
pq_insert(pq->item, pq->size, temp);
|
||||
++pq->size;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void pq_pop_nice(struct match_pq *pq) {
|
||||
pq_pop(pq->item, pq->size);
|
||||
pq->size--;
|
||||
}
|
||||
|
||||
/** dummy event handler for use when user does not provide one */
|
||||
static
|
||||
int HS_CDECL null_onEvent(UNUSED unsigned id, UNUSED unsigned long long from,
|
||||
UNUSED unsigned long long to, UNUSED unsigned flags,
|
||||
UNUSED unsigned size, UNUSED const ch_capture_t *captured,
|
||||
UNUSED void *ctxt) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** \brief Chimera runtime context. */
|
||||
struct HybridContext {
|
||||
const char *data; //!< buffer being scanned
|
||||
u32 length; //!< length of data buffer
|
||||
u32 valid_utf8_highwater; //!< UTF-8 has been validated up to here.
|
||||
const struct ch_bytecode *db;
|
||||
struct ch_scratch *scratch;
|
||||
struct match_pq *pq;
|
||||
/** \brief user-supplied match callback */
|
||||
int (HS_CDECL *match_callback)(unsigned int id, unsigned long long from,
|
||||
unsigned long long to, unsigned int flags,
|
||||
unsigned int size, const ch_capture_t *capture,
|
||||
void *ctx);
|
||||
/** \brief user-supplied error callback */
|
||||
int (HS_CDECL *error_callback)(ch_error_event_t error_type, unsigned int id,
|
||||
void *info, void *ctx);
|
||||
/** \brief user-supplied context */
|
||||
void *context;
|
||||
};
|
||||
|
||||
// Internal PCRE func.
|
||||
extern int _pcre_valid_utf(const unsigned char *, int, int *);
|
||||
|
||||
/** UTF-8 validity check. Returns >0 if the given region of the data is valid
|
||||
* UTF-8, 0 otherwise. */
|
||||
static
|
||||
char isValidUTF8(struct HybridContext *hyctx, u32 end) {
|
||||
assert(hyctx);
|
||||
|
||||
if (hyctx->valid_utf8_highwater >= end) {
|
||||
return 1; // Already validated.
|
||||
}
|
||||
|
||||
const unsigned char *data =
|
||||
(const unsigned char *)hyctx->data + hyctx->valid_utf8_highwater;
|
||||
int validate_len = end - hyctx->valid_utf8_highwater;
|
||||
|
||||
DEBUG_PRINTF("validating %d bytes\n", validate_len);
|
||||
|
||||
int erroroffset = 0;
|
||||
if (_pcre_valid_utf(data, validate_len, &erroroffset)) {
|
||||
DEBUG_PRINTF("UTF8 invalid at offset %d\n", erroroffset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
hyctx->valid_utf8_highwater = end;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static
|
||||
const pcre *getPcre(const struct ch_pattern *pattern) {
|
||||
const char *ptr = (const char *)pattern;
|
||||
const pcre *p = (const pcre *)(ptr + ROUNDUP_N(sizeof(*pattern), 8));
|
||||
assert(ISALIGNED_N(p, 8));
|
||||
return p;
|
||||
}
|
||||
|
||||
/** \brief Fill the Chimera groups array from a pcre_exec ovector. */
|
||||
static
|
||||
void fillGroupsFromOvector(ch_capture_t *groups, int numPairs, int *ovector) {
|
||||
assert(groups);
|
||||
assert(ISALIGNED_N(groups, alignof(ch_capture_t)));
|
||||
|
||||
DEBUG_PRINTF("filling %d groups (@ %p) from pcre ovector\n",
|
||||
numPairs, groups);
|
||||
|
||||
for (int i = 0; i < numPairs * 2; i += 2) {
|
||||
if (ovector[i] == -1) {
|
||||
groups->flags = CH_CAPTURE_FLAG_INACTIVE;
|
||||
} else {
|
||||
groups->flags = CH_CAPTURE_FLAG_ACTIVE;
|
||||
assert(ovector[i] <= ovector[i + 1]);
|
||||
groups->from = ovector[i];
|
||||
groups->to = ovector[i + 1];
|
||||
}
|
||||
++groups;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
ch_error_t handlePcreNonMatch(const struct ch_pattern *pattern, int rv,
|
||||
ch_error_event_handler onError,
|
||||
void *userContext) {
|
||||
assert(rv < 0);
|
||||
|
||||
if (rv == PCRE_ERROR_NOMATCH) {
|
||||
DEBUG_PRINTF("no match found by libpcre\n");
|
||||
return CH_SUCCESS;
|
||||
} else if (rv == PCRE_ERROR_MATCHLIMIT) {
|
||||
DEBUG_PRINTF("pcre hit match limit\n");
|
||||
if (onError) {
|
||||
return onError(CH_ERROR_MATCHLIMIT, pattern->id, NULL,
|
||||
userContext);
|
||||
}
|
||||
return CH_SUCCESS;
|
||||
} else if (rv == PCRE_ERROR_RECURSIONLIMIT) {
|
||||
DEBUG_PRINTF("pcre hit recursion limit\n");
|
||||
if (onError) {
|
||||
return onError(CH_ERROR_RECURSIONLIMIT, pattern->id, NULL,
|
||||
userContext);
|
||||
}
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
// All other errors not handled above are fatal.
|
||||
return CH_FAIL_INTERNAL;
|
||||
}
|
||||
|
||||
static
|
||||
ch_error_t scanPcre(struct HybridContext *hyctx, UNUSED unsigned int length,
|
||||
unsigned int offset, u32 id) {
|
||||
const char *data = hyctx->data;
|
||||
unsigned int full_length = hyctx->length;
|
||||
ch_error_event_handler onError = hyctx->error_callback;
|
||||
void *userContext = hyctx->context;
|
||||
|
||||
const struct ch_pattern *pattern = getPattern(hyctx->db, id);
|
||||
const pcre *p = getPcre(pattern);
|
||||
|
||||
// Set up the PCRE extra block.
|
||||
const pcre_extra *extra = &pattern->extra;
|
||||
|
||||
int startoffset = offset;
|
||||
|
||||
int *ovector = hyctx->scratch->ovector;
|
||||
int ovectorSize = (hyctx->scratch->maxCaptureGroups + 1) * 3;
|
||||
assert(ovectorSize >= 2);
|
||||
|
||||
DEBUG_PRINTF("scanning %u bytes, pattern %u, startoffset %d\n",
|
||||
length, id, startoffset);
|
||||
|
||||
int options = 0;
|
||||
if (pattern->flags & CHIMERA_PATTERN_FLAG_UTF8) {
|
||||
// We do our own UTF-8 validation.
|
||||
options |= PCRE_NO_UTF8_CHECK;
|
||||
if (!isValidUTF8(hyctx, full_length)) {
|
||||
return handlePcreNonMatch(pattern, PCRE_ERROR_BADUTF8, onError,
|
||||
userContext);
|
||||
}
|
||||
}
|
||||
|
||||
int rv = pcre_exec(p, extra, data, full_length, startoffset, options,
|
||||
ovector, ovectorSize);
|
||||
|
||||
DEBUG_PRINTF("pcre return code is %d\n", rv);
|
||||
|
||||
// Handle all non-match or error cases, all of which involve us
|
||||
// terminating the loop.
|
||||
if (rv < 0) {
|
||||
return handlePcreNonMatch(pattern, rv, onError, userContext);
|
||||
}
|
||||
|
||||
// We've found a match, and we should always have room for at least the
|
||||
// start and end offsets in our ovector. Pass this info to the user.
|
||||
assert(rv >= 1);
|
||||
assert(rv < ovectorSize);
|
||||
int from = ovector[0];
|
||||
int to = ovector[1];
|
||||
DEBUG_PRINTF("match %d -> %d\n", from, to);
|
||||
|
||||
struct ch_patterndata *pd = hyctx->scratch->patternData + id;
|
||||
|
||||
if (hyctx->db->flags & CHIMERA_FLAG_GROUPS) {
|
||||
fillGroupsFromOvector(pd->match, rv, ovector);
|
||||
} else {
|
||||
rv = 0;
|
||||
}
|
||||
pd->groupCount = (u32)rv;
|
||||
|
||||
// Insert new matched item to the queue
|
||||
pq_insert_with(hyctx->pq, from, to, id);
|
||||
|
||||
// Next scan starts at the first codepoint after the match. It's
|
||||
// possible that we have a vacuous match, in which case we must step
|
||||
// past it to ensure that we always progress.
|
||||
if (from != to) {
|
||||
startoffset = to;
|
||||
} else if (pattern->flags & CHIMERA_PATTERN_FLAG_UTF8) {
|
||||
startoffset = to + 1;
|
||||
while (startoffset < (int)full_length &&
|
||||
((data[startoffset] & 0xc0) == UTF_CONT_BYTE_HEADER)) {
|
||||
++startoffset;
|
||||
}
|
||||
} else {
|
||||
startoffset = to + 1;
|
||||
}
|
||||
|
||||
pd->scanStart = startoffset;
|
||||
DEBUG_PRINTF("new offset %u\n", pd->scanStart);
|
||||
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
static
|
||||
ch_error_t catchupPcre(struct HybridContext *hyctx, unsigned int id,
|
||||
unsigned long long from, unsigned long long to) {
|
||||
ch_match_event_handler onEvent = hyctx->match_callback;
|
||||
void *userContext = hyctx->context;
|
||||
DEBUG_PRINTF("priority queue size %u\n", hyctx->pq->size);
|
||||
while (hyctx->pq->size) {
|
||||
u32 num_item = hyctx->pq->size;
|
||||
struct queue_item *item = pq_top(hyctx->pq->item);
|
||||
size_t top_from = item->from;
|
||||
size_t top_to = item->to;
|
||||
u32 top_id = item->id;
|
||||
|
||||
if (top_to > to) {
|
||||
pq_insert_with(hyctx->pq, from, to, id);
|
||||
break;
|
||||
}
|
||||
pq_pop_nice(hyctx->pq);
|
||||
|
||||
const struct ch_pattern *pattern = getPattern(hyctx->db, top_id);
|
||||
struct ch_patterndata *pd = hyctx->scratch->patternData + top_id;
|
||||
|
||||
// Report match for pattern
|
||||
DEBUG_PRINTF("trigger match@%zu\n", top_to);
|
||||
ch_callback_t cbrv =
|
||||
onEvent(pattern->id, top_from, top_to, 0 /* flags */,
|
||||
pd->groupCount, pd->match, userContext);
|
||||
|
||||
if (cbrv == CH_CALLBACK_TERMINATE) {
|
||||
DEBUG_PRINTF("user callback told us to terminate scanning\n");
|
||||
return CH_SCAN_TERMINATED;
|
||||
} else if (cbrv == CH_CALLBACK_SKIP_PATTERN) {
|
||||
DEBUG_PRINTF("user callback told us to skip this pattern\n");
|
||||
pd->scanStart = hyctx->length;
|
||||
}
|
||||
|
||||
if (top_id == id) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Push a new match to replace the old one
|
||||
unsigned int start = pd->scanStart;
|
||||
unsigned int len = hyctx->length - pd->scanStart;
|
||||
if (hyctx->length >= pd->scanStart &&
|
||||
!(pattern->flags & CHIMERA_PATTERN_FLAG_SINGLEMATCH)) {
|
||||
DEBUG_PRINTF("get a new match item\n");
|
||||
int ret = scanPcre(hyctx, len, start, top_id);
|
||||
|
||||
if (ret == CH_CALLBACK_TERMINATE) {
|
||||
DEBUG_PRINTF("user callback told us to terminate scanning\n");
|
||||
return CH_SCAN_TERMINATED;
|
||||
} else if (ret == CH_CALLBACK_SKIP_PATTERN) {
|
||||
DEBUG_PRINTF("user callback told us to skip this pattern\n");
|
||||
pd->scanStart = hyctx->length;
|
||||
ret = CH_SUCCESS;
|
||||
} else if (ret == CH_FAIL_INTERNAL) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// No further match is found
|
||||
if (hyctx->pq->size == num_item - 1) {
|
||||
pd->scanStart = hyctx->length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
/** \brief Callback used for internal Hyperscan multi-matcher. */
|
||||
static
|
||||
int HS_CDECL multiCallback(unsigned int id, unsigned long long from,
|
||||
unsigned long long to, UNUSED unsigned int flags,
|
||||
void *ctx) {
|
||||
assert(ctx);
|
||||
struct HybridContext *hyctx = ctx;
|
||||
|
||||
DEBUG_PRINTF("match for ID %u at offset %llu\n", id, to);
|
||||
assert(id < hyctx->db->patternCount);
|
||||
|
||||
const struct ch_pattern *pattern = getPattern(hyctx->db, id);
|
||||
struct ch_patterndata *pd = hyctx->scratch->patternData + id;
|
||||
char needConfirm = pattern->fixedWidth == ~0U;
|
||||
|
||||
if (needConfirm &&
|
||||
mmbit_isset(hyctx->scratch->active, hyctx->db->patternCount, id)) {
|
||||
if ((hyctx->db->flags & CHIMERA_FLAG_ALL_CONFIRM) &&
|
||||
mmbit_all(hyctx->scratch->active, hyctx->db->patternCount)) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
// Store the fact that we've seen this bit.
|
||||
char already = mmbit_set(hyctx->scratch->active,
|
||||
hyctx->db->patternCount, id);
|
||||
DEBUG_PRINTF("match from %u to %llu\n", pd->scanStart, to);
|
||||
|
||||
if (!already) {
|
||||
pd->scanStart = 0;
|
||||
} else if (to < pd->scanStart + pattern->minWidth) {
|
||||
return 0;
|
||||
} else if (pattern->flags & CHIMERA_PATTERN_FLAG_SINGLEMATCH) {
|
||||
if ((hyctx->db->flags & CHIMERA_FLAG_ALL_SINGLE) &&
|
||||
mmbit_all(hyctx->scratch->active, hyctx->db->patternCount)) {
|
||||
return 1;
|
||||
}
|
||||
// Note: we may have unordered match from Hyperscan,
|
||||
// thus possibly get to < pd->scanStart.
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ret = HS_SUCCESS;
|
||||
unsigned int start = pd->scanStart;
|
||||
unsigned int len = hyctx->length - pd->scanStart;
|
||||
assert(hyctx->length >= pd->scanStart);
|
||||
const char *data = hyctx->data;
|
||||
if (needConfirm) {
|
||||
DEBUG_PRINTF("run confirm for the first time\n");
|
||||
ret = scanPcre(hyctx, len, start, id);
|
||||
hyctx->scratch->ret = ret;
|
||||
if (ret == CH_CALLBACK_TERMINATE) {
|
||||
DEBUG_PRINTF("user callback told us to terminate scanning\n");
|
||||
return HS_SCAN_TERMINATED;
|
||||
} else if (ret == CH_CALLBACK_SKIP_PATTERN) {
|
||||
DEBUG_PRINTF("user callback told us to skip this pattern\n");
|
||||
pd->scanStart = hyctx->length;
|
||||
ret = HS_SUCCESS;
|
||||
} else if (ret == CH_FAIL_INTERNAL) {
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
if (already) {
|
||||
DEBUG_PRINTF("catch up with new matches\n");
|
||||
ret = catchupPcre(hyctx, id, from, to);
|
||||
|
||||
hyctx->scratch->ret = ret;
|
||||
if (pd->scanStart >= hyctx->length) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
int startoffset = 0;
|
||||
// Next scan starts at the first codepoint after the match. It's
|
||||
// possible that we have a vacuous match, in which case we must step
|
||||
// past it to ensure that we always progress.
|
||||
if (from != to) {
|
||||
startoffset = to;
|
||||
} else if (pattern->flags & CHIMERA_PATTERN_FLAG_UTF8) {
|
||||
startoffset = to + 1;
|
||||
while (startoffset < (int)hyctx->length &&
|
||||
((data[startoffset] & 0xc0) == UTF_CONT_BYTE_HEADER)) {
|
||||
++startoffset;
|
||||
}
|
||||
} else {
|
||||
startoffset = to + 1;
|
||||
}
|
||||
pd->scanStart = startoffset;
|
||||
int rv = 0;
|
||||
if (hyctx->db->flags & CHIMERA_FLAG_GROUPS) {
|
||||
ch_capture_t *groups = pd->match;
|
||||
groups->flags = CH_CAPTURE_FLAG_ACTIVE;
|
||||
groups->from = from;
|
||||
groups->to = to;
|
||||
rv = 1;
|
||||
}
|
||||
pd->groupCount = (u32)rv;
|
||||
pq_insert_with(hyctx->pq, from, to, id);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static
|
||||
hs_error_t scanHyperscan(struct HybridContext *hyctx, const char *data,
|
||||
unsigned int length) {
|
||||
DEBUG_PRINTF("scanning %u bytes with Hyperscan\n", length);
|
||||
const struct ch_bytecode *hydb = hyctx->db;
|
||||
const hs_database_t *db = getHyperscanDatabase(hydb);
|
||||
hs_scratch_t *scratch = hyctx->scratch->multi_scratch;
|
||||
|
||||
hs_error_t err = hs_scan(db, data, length, 0, scratch, multiCallback,
|
||||
hyctx);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/** \brief Init match priority queue.
|
||||
*
|
||||
* Add a first match offset for each pattern that is not supported by Hyperscan
|
||||
* with prefiltering.
|
||||
*/
|
||||
static really_inline
|
||||
ch_error_t initQueue(struct HybridContext *hyctx, struct match_pq *pq) {
|
||||
const struct ch_bytecode *db = hyctx->db;
|
||||
|
||||
u8 *active = hyctx->scratch->active;
|
||||
mmbit_clear(active, db->patternCount);
|
||||
|
||||
// Init match queue size
|
||||
pq->size = 0;
|
||||
|
||||
unsigned int length = hyctx->length;
|
||||
const u32 *unguarded = getUnguarded(db);
|
||||
for (u32 i = 0; i < db->unguardedCount; i++) {
|
||||
u32 patternId = unguarded[i];
|
||||
DEBUG_PRINTF("switch on unguarded pcre %u\n", patternId);
|
||||
mmbit_set(active, db->patternCount, patternId);
|
||||
|
||||
DEBUG_PRINTF("get a new match item\n");
|
||||
int ret = scanPcre(hyctx, length, 0, patternId);
|
||||
|
||||
struct ch_patterndata *pd = hyctx->scratch->patternData + patternId;
|
||||
if (ret == CH_CALLBACK_TERMINATE) {
|
||||
DEBUG_PRINTF("user callback told us to terminate scanning\n");
|
||||
return CH_SCAN_TERMINATED;
|
||||
} else if (ret == CH_CALLBACK_SKIP_PATTERN) {
|
||||
DEBUG_PRINTF("user callback told us to skip this pattern\n");
|
||||
pd->scanStart = length;
|
||||
ret = CH_SUCCESS;
|
||||
} else if (ret == CH_FAIL_INTERNAL) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
ch_error_t ch_scan_i(const ch_database_t *hydb,
|
||||
const char *data, unsigned int length,
|
||||
UNUSED unsigned int flags,
|
||||
ch_scratch_t *scratch,
|
||||
ch_match_event_handler onEvent,
|
||||
ch_error_event_handler onError,
|
||||
void *userContext) {
|
||||
if (unlikely(!hydb || !scratch || !data)) {
|
||||
DEBUG_PRINTF("args invalid\n");
|
||||
return CH_INVALID;
|
||||
}
|
||||
ch_error_t ret = hydbIsValid(hydb);
|
||||
if (ret != CH_SUCCESS) {
|
||||
DEBUG_PRINTF("database invalid\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!ISALIGNED_CL(scratch)) {
|
||||
DEBUG_PRINTF("bad alignment %p\n", scratch);
|
||||
return CH_INVALID;
|
||||
}
|
||||
|
||||
if (scratch->magic != CH_SCRATCH_MAGIC) {
|
||||
DEBUG_PRINTF("scratch invalid\n");
|
||||
return CH_INVALID;
|
||||
}
|
||||
|
||||
if (unlikely(markScratchInUse(scratch))) {
|
||||
return CH_SCRATCH_IN_USE;
|
||||
}
|
||||
|
||||
// Hyperscan underlying scratch and database validity will be checked by
|
||||
// the hs_scan() call, so no need to do it here.
|
||||
|
||||
// PCRE takes the data region length in as an int, so this limits our block
|
||||
// size to INT_MAX.
|
||||
if (length > INT_MAX) {
|
||||
DEBUG_PRINTF("length invalid\n");
|
||||
unmarkScratchInUse(scratch);
|
||||
return CH_INVALID;
|
||||
}
|
||||
|
||||
const struct ch_bytecode *db = ch_get_bytecode(hydb);
|
||||
|
||||
scratch->pq.size = 0;
|
||||
scratch->ret = CH_SUCCESS;
|
||||
|
||||
// Firstly, we run Hyperscan in block mode and add its matches into the
|
||||
// active list for subsequent confirmation with pcre.
|
||||
struct HybridContext hyctx = {
|
||||
.data = data,
|
||||
.length = length,
|
||||
.valid_utf8_highwater = 0,
|
||||
.db = db,
|
||||
.scratch = scratch,
|
||||
.pq = &scratch->pq,
|
||||
.match_callback = onEvent ? onEvent : null_onEvent,
|
||||
.error_callback = onError,
|
||||
.context = userContext
|
||||
};
|
||||
|
||||
// Init priority queue.
|
||||
ret = initQueue(&hyctx, &scratch->pq);
|
||||
if (ret != CH_SUCCESS) {
|
||||
DEBUG_PRINTF("Chimera returned error %d\n", ret);
|
||||
unmarkScratchInUse(scratch);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!(db->flags & CHIMERA_FLAG_NO_MULTIMATCH)) {
|
||||
ret = scanHyperscan(&hyctx, data, length);
|
||||
if (ret != HS_SUCCESS && scratch->ret != CH_SUCCESS) {
|
||||
DEBUG_PRINTF("Hyperscan returned error %d\n", scratch->ret);
|
||||
unmarkScratchInUse(scratch);
|
||||
return scratch->ret;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("Flush priority queue\n");
|
||||
// Catch up with PCRE and make up id and offsets as we don't really care
|
||||
// about their values
|
||||
ret = catchupPcre(&hyctx, ~0U, length, length);
|
||||
if (ret != CH_SUCCESS) {
|
||||
DEBUG_PRINTF("PCRE catch up returned error %d\n", ret);
|
||||
unmarkScratchInUse(scratch);
|
||||
return ret;
|
||||
}
|
||||
|
||||
unmarkScratchInUse(scratch);
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_scan(const ch_database_t *hydb, const char *data,
|
||||
unsigned int length, unsigned int flags,
|
||||
ch_scratch_t *scratch,
|
||||
ch_match_event_handler onEvent,
|
||||
ch_error_event_handler onError, void *userContext) {
|
||||
ch_error_t ret = ch_scan_i(hydb, data, length, flags, scratch, onEvent,
|
||||
onError, userContext);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
const char * HS_CDECL ch_version(void) {
|
||||
return HS_VERSION_STRING;
|
||||
}
|
378
chimera/ch_runtime.h
Normal file
378
chimera/ch_runtime.h
Normal file
@ -0,0 +1,378 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CH_RUNTIME_H_
|
||||
#define CH_RUNTIME_H_
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief The Chimera runtime API definition.
|
||||
*
|
||||
* Chimera is a hybrid of Hyperscan and PCRE regular expression engine.
|
||||
*
|
||||
* This header contains functions for using compiled Chimera databases for
|
||||
* scanning data at runtime.
|
||||
*/
|
||||
|
||||
#include "hs_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
struct ch_scratch;
|
||||
|
||||
/**
|
||||
* A Chimera scratch space.
|
||||
*/
|
||||
typedef struct ch_scratch ch_scratch_t;
|
||||
|
||||
/**
|
||||
* Callback return value used to tell the Chimera matcher what to do after
|
||||
* processing this match.
|
||||
*/
|
||||
typedef int ch_callback_t;
|
||||
|
||||
/**
|
||||
* @defgroup CH_CALLBACK ch_callback_t values
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Continue matching.
|
||||
*/
|
||||
#define CH_CALLBACK_CONTINUE 0
|
||||
|
||||
/**
|
||||
* Terminate matching.
|
||||
*/
|
||||
#define CH_CALLBACK_TERMINATE 1
|
||||
|
||||
/**
|
||||
* Skip remaining matches for this ID and continue.
|
||||
*/
|
||||
#define CH_CALLBACK_SKIP_PATTERN 2
|
||||
|
||||
|
||||
/** @} */
|
||||
|
||||
|
||||
/**
|
||||
* Type used to differentiate the errors raised with the @ref
|
||||
* ch_error_event_handler callback.
|
||||
*/
|
||||
typedef int ch_error_event_t;
|
||||
|
||||
/**
|
||||
* @defgroup CH_ERROR_EVENT ch_error_event_t values
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* PCRE hits its match limit and reports PCRE_ERROR_MATCHLIMIT.
|
||||
*/
|
||||
#define CH_ERROR_MATCHLIMIT 1
|
||||
|
||||
/**
|
||||
* PCRE hits its recursion limit and reports PCRE_ERROR_RECURSIONLIMIT.
|
||||
*/
|
||||
#define CH_ERROR_RECURSIONLIMIT 2
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Structure representing a captured subexpression within a match. An array of
|
||||
* these structures corresponding to capture groups in order is passed to the
|
||||
* callback on match, with active structures identified by the
|
||||
* CH_CAPTURE_FLAG_ACTIVE flag.
|
||||
*/
|
||||
typedef struct ch_capture {
|
||||
/**
|
||||
* The flags indicating if this structure is active.
|
||||
*/
|
||||
unsigned int flags;
|
||||
|
||||
/**
|
||||
* offset at which this capture group begins.
|
||||
*/
|
||||
unsigned long long from; /*< offset at which this capture group begins. */
|
||||
|
||||
/**
|
||||
* offset at which this capture group ends.
|
||||
*/
|
||||
unsigned long long to;
|
||||
} ch_capture_t;
|
||||
|
||||
/**
|
||||
* @defgroup CH_CAPTURE ch_capture_t flags
|
||||
*
|
||||
* These flags are used in @ref ch_capture_t::flags to indicate if this
|
||||
* structure is active.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Flag indicating that a particular capture group is inactive, used in @ref
|
||||
* ch_capture_t::flags.
|
||||
*/
|
||||
#define CH_CAPTURE_FLAG_INACTIVE 0
|
||||
|
||||
/**
|
||||
* Flag indicating that a particular capture group is active, used in @ref
|
||||
* ch_capture_t::flags.
|
||||
*/
|
||||
#define CH_CAPTURE_FLAG_ACTIVE 1
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Definition of the match event callback function type.
|
||||
*
|
||||
* A callback function matching the defined type must be provided by the
|
||||
* application calling the @ref ch_scan()
|
||||
*
|
||||
* This callback function will be invoked whenever a match is located in the
|
||||
* target data during the execution of a scan. The details of the match are
|
||||
* passed in as parameters to the callback function, and the callback function
|
||||
* should return a value indicating whether or not matching should continue on
|
||||
* the target data. If no callbacks are desired from a scan call, NULL may be
|
||||
* provided in order to suppress match production.
|
||||
*
|
||||
* @param id
|
||||
* The ID number of the expression that matched. If the expression was a
|
||||
* single expression compiled with @ref ch_compile(), this value will be
|
||||
* zero.
|
||||
*
|
||||
* @param from
|
||||
* The offset of the first byte that matches the expression.
|
||||
*
|
||||
* @param to
|
||||
* The offset after the last byte that matches the expression.
|
||||
*
|
||||
* @param flags
|
||||
* This is provided for future use and is unused at present.
|
||||
*
|
||||
* @param size
|
||||
* The number of valid entries pointed to by the captured parameter.
|
||||
*
|
||||
* @param captured
|
||||
* A pointer to an array of @ref ch_capture_t structures that
|
||||
* contain the start and end offsets of entire pattern match and
|
||||
* each captured subexpression.
|
||||
*
|
||||
* @param ctx
|
||||
* The pointer supplied by the user to the @ref ch_scan() function.
|
||||
*
|
||||
* @return
|
||||
* The callback can return @ref CH_CALLBACK_TERMINATE to stop matching.
|
||||
* Otherwise, a return value of @ref CH_CALLBACK_CONTINUE will continue,
|
||||
* with the current pattern if configured to produce multiple matches per
|
||||
* pattern, while a return value of @ref CH_CALLBACK_SKIP_PATTERN will
|
||||
* cease matching this pattern but continue matching the next pattern.
|
||||
*/
|
||||
typedef ch_callback_t (HS_CDECL *ch_match_event_handler)(unsigned int id,
|
||||
unsigned long long from,
|
||||
unsigned long long to,
|
||||
unsigned int flags,
|
||||
unsigned int size,
|
||||
const ch_capture_t *captured,
|
||||
void *ctx);
|
||||
|
||||
/**
|
||||
* Definition of the Chimera error event callback function type.
|
||||
*
|
||||
* A callback function matching the defined type may be provided by the
|
||||
* application calling the @ref ch_scan function. This callback function
|
||||
* will be invoked when an error event occurs during matching; this indicates
|
||||
* that some matches for a given expression may not be reported.
|
||||
*
|
||||
* @param error_type
|
||||
* The type of error event that occurred. Currently these errors
|
||||
* correspond to resource limits on PCRE backtracking
|
||||
* @ref CH_ERROR_MATCHLIMIT and @ref CH_ERROR_RECURSIONLIMIT.
|
||||
*
|
||||
* @param id
|
||||
* The ID number of the expression that matched.
|
||||
*
|
||||
* @param info
|
||||
* Event-specific data, for future use. Currently unused.
|
||||
*
|
||||
* @param ctx
|
||||
* The context pointer supplied by the user to the @ref ch_scan
|
||||
* function.
|
||||
*
|
||||
* @return
|
||||
* The callback can return @ref CH_CALLBACK_SKIP_PATTERN to cease matching
|
||||
* this pattern but continue matching the next pattern. Otherwise, we stop
|
||||
* matching for all patterns with @ref CH_CALLBACK_TERMINATE.
|
||||
*/
|
||||
typedef ch_callback_t (HS_CDECL *ch_error_event_handler)(
|
||||
ch_error_event_t error_type,
|
||||
unsigned int id, void *info,
|
||||
void *ctx);
|
||||
|
||||
/**
|
||||
* The block regular expression scanner.
|
||||
*
|
||||
* This is the function call in which the actual pattern matching takes place
|
||||
* for block-mode pattern databases.
|
||||
*
|
||||
* @param db
|
||||
* A compiled pattern database.
|
||||
*
|
||||
* @param data
|
||||
* Pointer to the data to be scanned.
|
||||
*
|
||||
* @param length
|
||||
* The number of bytes to scan.
|
||||
*
|
||||
* @param flags
|
||||
* Flags modifying the behaviour of this function. This parameter is
|
||||
* provided for future use and is unused at present.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref ch_alloc_scratch() for this
|
||||
* database.
|
||||
*
|
||||
* @param onEvent
|
||||
* Pointer to a match event callback function. If a NULL pointer is given,
|
||||
* no matches will be returned.
|
||||
*
|
||||
* @param onError
|
||||
* Pointer to a error event callback function. If a NULL pointer is given,
|
||||
* @ref CH_ERROR_MATCHLIMIT and @ref CH_ERROR_RECURSIONLIMIT errors will
|
||||
* be ignored and match will continue.
|
||||
*
|
||||
* @param context
|
||||
* The user defined pointer which will be passed to the callback function.
|
||||
*
|
||||
* @return
|
||||
* Returns @ref CH_SUCCESS on success; @ref CH_SCAN_TERMINATED if the
|
||||
* match callback indicated that scanning should stop; other values on
|
||||
* error.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_scan(const ch_database_t *db, const char *data,
|
||||
unsigned int length, unsigned int flags,
|
||||
ch_scratch_t *scratch,
|
||||
ch_match_event_handler onEvent,
|
||||
ch_error_event_handler onError,
|
||||
void *context);
|
||||
|
||||
/**
|
||||
* Allocate a "scratch" space for use by Chimera.
|
||||
*
|
||||
* This is required for runtime use, and one scratch space per thread, or
|
||||
* concurrent caller, is required. Any allocator callback set by @ref
|
||||
* ch_set_scratch_allocator() or @ref ch_set_allocator() will be used by this
|
||||
* function.
|
||||
*
|
||||
* @param db
|
||||
* The database, as produced by @ref ch_compile().
|
||||
*
|
||||
* @param scratch
|
||||
* On first allocation, a pointer to NULL should be provided so a new
|
||||
* scratch can be allocated. If a scratch block has been previously
|
||||
* allocated, then a pointer to it should be passed back in to see if it
|
||||
* is valid for this database block. If a new scratch block is required,
|
||||
* the original will be freed and the new one returned, otherwise the
|
||||
* previous scratch block will be returned. On success, the scratch block
|
||||
* will be suitable for use with the provided database in addition to any
|
||||
* databases that original scratch space was suitable for.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on successful allocation; @ref CH_NOMEM if the
|
||||
* allocation fails. Other errors may be returned if invalid parameters
|
||||
* are specified.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_alloc_scratch(const ch_database_t *db,
|
||||
ch_scratch_t **scratch);
|
||||
|
||||
/**
|
||||
* Allocate a scratch space that is a clone of an existing scratch space.
|
||||
*
|
||||
* This is useful when multiple concurrent threads will be using the same set
|
||||
* of compiled databases, and another scratch space is required. Any allocator
|
||||
* callback set by @ref ch_set_scratch_allocator() or @ref ch_set_allocator()
|
||||
* will be used by this function.
|
||||
*
|
||||
* @param src
|
||||
* The existing @ref ch_scratch_t to be cloned.
|
||||
*
|
||||
* @param dest
|
||||
* A pointer to the new scratch space will be returned here.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success; @ref CH_NOMEM if the allocation fails.
|
||||
* Other errors may be returned if invalid parameters are specified.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_clone_scratch(const ch_scratch_t *src,
|
||||
ch_scratch_t **dest);
|
||||
|
||||
/**
|
||||
* Provides the size of the given scratch space.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref ch_alloc_scratch() or @ref
|
||||
* ch_clone_scratch().
|
||||
*
|
||||
* @param scratch_size
|
||||
* On success, the size of the scratch space in bytes is placed in this
|
||||
* parameter.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_scratch_size(const ch_scratch_t *scratch,
|
||||
size_t *scratch_size);
|
||||
|
||||
/**
|
||||
* Free a scratch block previously allocated by @ref ch_alloc_scratch() or @ref
|
||||
* ch_clone_scratch().
|
||||
*
|
||||
* The free callback set by @ref ch_set_scratch_allocator() or @ref
|
||||
* ch_set_allocator() will be used by this function.
|
||||
*
|
||||
* @param scratch
|
||||
* The scratch block to be freed. NULL may also be safely provided.
|
||||
*
|
||||
* @return
|
||||
* @ref CH_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
ch_error_t HS_CDECL ch_free_scratch(ch_scratch_t *scratch);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* CH_RUNTIME_H_ */
|
317
chimera/ch_scratch.c
Normal file
317
chimera/ch_scratch.c
Normal file
@ -0,0 +1,317 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Chimera: scratch space alloc.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "allocator.h"
|
||||
#include "ch.h"
|
||||
#include "hs.h"
|
||||
#include "hs_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "ch_alloc.h"
|
||||
#include "ch_internal.h"
|
||||
#include "ch_scratch.h"
|
||||
#include "ch_database.h"
|
||||
|
||||
static
|
||||
size_t getPatternDataSize(const ch_scratch_t *s) {
|
||||
size_t numCapturingStructs =
|
||||
s->patternCount * (s->maxCaptureGroups + 1);
|
||||
return (sizeof(struct ch_patterndata) * s->patternCount) +
|
||||
alignof(struct ch_capture) + // padding
|
||||
(sizeof(struct ch_capture) * numCapturingStructs);
|
||||
}
|
||||
|
||||
static
|
||||
void initPatternData(const ch_scratch_t *s) {
|
||||
// ch_capture array is aligned, directly after the patterndata array.
|
||||
char *ptr = (char *)s->patternData +
|
||||
(sizeof(struct ch_patterndata) * s->patternCount);
|
||||
struct ch_capture *cap = (struct ch_capture *)
|
||||
(ROUNDUP_PTR(ptr, alignof(struct ch_capture)));
|
||||
|
||||
for (u32 i = 0; i < s->patternCount; i++) {
|
||||
struct ch_patterndata *pd = &s->patternData[i];
|
||||
pd->match = cap;
|
||||
DEBUG_PRINTF("pattern %u: pd=%p, match=%p\n", i, pd, pd->match);
|
||||
cap += (s->maxCaptureGroups + 1);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
ch_error_t alloc_scratch(const ch_scratch_t *proto, ch_scratch_t **scratch) {
|
||||
size_t ovectorSize = (proto->maxCaptureGroups + 1) * sizeof(int) * 3;
|
||||
size_t capturedSize =
|
||||
sizeof(struct ch_capture) * (proto->maxCaptureGroups + 1);
|
||||
size_t patternDataSize = getPatternDataSize(proto);
|
||||
size_t activeSize = proto->activeSize;
|
||||
size_t queueSize = proto->patternCount * sizeof(struct queue_item);
|
||||
|
||||
// max padding for alignment below.
|
||||
size_t padding = alignof(int) + alignof(struct ch_capture) +
|
||||
alignof(struct ch_patterndata) +
|
||||
alignof(struct queue_item);
|
||||
|
||||
size_t allocSize = sizeof(ch_scratch_t) + ovectorSize + capturedSize +
|
||||
patternDataSize + activeSize + queueSize + padding
|
||||
+ 256; /* padding for cacheline alignment */
|
||||
ch_scratch_t *s;
|
||||
ch_scratch_t *s_tmp = ch_scratch_alloc(allocSize);
|
||||
ch_error_t err = ch_check_alloc(s_tmp);
|
||||
if (err != CH_SUCCESS) {
|
||||
ch_scratch_free(s_tmp);
|
||||
*scratch = NULL;
|
||||
return err;
|
||||
}
|
||||
|
||||
memset(s_tmp, 0, allocSize);
|
||||
s = ROUNDUP_PTR(s_tmp, 64);
|
||||
// Set ordinary members.
|
||||
*s = *proto;
|
||||
|
||||
s->magic = CH_SCRATCH_MAGIC;
|
||||
s->in_use = 0;
|
||||
s->scratch_alloc = (char *)s_tmp;
|
||||
|
||||
// Set pointers internal to allocation.
|
||||
|
||||
char *ptr = (char *)s + sizeof(*s);
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(int));
|
||||
s->ovector = (int *)ptr;
|
||||
ptr += ovectorSize;
|
||||
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(struct ch_capture));
|
||||
s->captured = (struct ch_capture *)ptr;
|
||||
ptr += capturedSize;
|
||||
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(struct ch_patterndata));
|
||||
s->patternData = (struct ch_patterndata *)ptr;
|
||||
ptr += patternDataSize;
|
||||
|
||||
// Pre-fill pattern data, setting captureOffsets
|
||||
initPatternData(s);
|
||||
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(struct queue_item));
|
||||
s->pq.item = (struct queue_item *)ptr;
|
||||
ptr += queueSize;
|
||||
|
||||
s->active = (u8 *)ptr;
|
||||
|
||||
// Store size.
|
||||
s->scratchSize = allocSize;
|
||||
|
||||
// We should never overrun our allocation.
|
||||
assert((ptr + activeSize) - (char *)s <= (ptrdiff_t)allocSize);
|
||||
|
||||
*scratch = s;
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_alloc_scratch(const ch_database_t *hydb,
|
||||
ch_scratch_t **scratch) {
|
||||
if (!hydb || !scratch) {
|
||||
DEBUG_PRINTF("invalid args\n");
|
||||
return CH_INVALID;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("hydb=%p, &scratch=%p\n", hydb, scratch);
|
||||
ch_error_t rv = hydbIsValid(hydb);
|
||||
if (rv != CH_SUCCESS) {
|
||||
DEBUG_PRINTF("invalid database\n");
|
||||
return rv;
|
||||
}
|
||||
|
||||
if (*scratch != NULL) {
|
||||
/* has to be aligned before we can do anything with it */
|
||||
if (!ISALIGNED_CL(*scratch)) {
|
||||
return CH_INVALID;
|
||||
}
|
||||
if ((*scratch)->magic != CH_SCRATCH_MAGIC) {
|
||||
return CH_INVALID;
|
||||
}
|
||||
if (markScratchInUse(*scratch)) {
|
||||
return CH_SCRATCH_IN_USE;
|
||||
}
|
||||
}
|
||||
|
||||
// We allocate a prototype of the scratch header to do our sizing with.
|
||||
ch_scratch_t *proto;
|
||||
ch_scratch_t *proto_tmp = ch_scratch_alloc(sizeof(ch_scratch_t) + 256);
|
||||
ch_error_t proto_ret = ch_check_alloc(proto_tmp);
|
||||
if (proto_ret != CH_SUCCESS) {
|
||||
ch_scratch_free(proto_tmp);
|
||||
ch_scratch_free(*scratch);
|
||||
*scratch = NULL;
|
||||
return proto_ret;
|
||||
}
|
||||
|
||||
proto = ROUNDUP_PTR(proto_tmp, 64);
|
||||
|
||||
int resize = 0;
|
||||
if (*scratch) {
|
||||
*proto = **scratch;
|
||||
} else {
|
||||
memset(proto, 0, sizeof(*proto));
|
||||
resize = 1;
|
||||
}
|
||||
proto->scratch_alloc = (char *)proto_tmp;
|
||||
|
||||
const struct ch_bytecode *db = ch_get_bytecode(hydb);
|
||||
|
||||
if (db->maxCaptureGroups > proto->maxCaptureGroups) {
|
||||
proto->maxCaptureGroups = db->maxCaptureGroups;
|
||||
resize = 1;
|
||||
}
|
||||
|
||||
if (db->patternCount > proto->patternCount) {
|
||||
proto->patternCount = db->patternCount;
|
||||
proto->activeSize = db->activeSize;
|
||||
resize = 1;
|
||||
}
|
||||
|
||||
if (resize) {
|
||||
if (*scratch) {
|
||||
ch_scratch_free((*scratch)->scratch_alloc);
|
||||
}
|
||||
|
||||
ch_error_t alloc_ret = alloc_scratch(proto, scratch);
|
||||
ch_scratch_free(proto_tmp);
|
||||
if (alloc_ret != CH_SUCCESS) {
|
||||
*scratch = NULL;
|
||||
return alloc_ret;
|
||||
}
|
||||
} else {
|
||||
ch_scratch_free(proto_tmp);
|
||||
unmarkScratchInUse(*scratch);
|
||||
}
|
||||
|
||||
if (db->flags & CHIMERA_FLAG_NO_MULTIMATCH) {
|
||||
(*scratch)->multi_scratch = NULL;
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
// We may still have to realloc the underlying Hyperscan scratch.
|
||||
rv = hs_alloc_scratch(getHyperscanDatabase(db),
|
||||
&(*scratch)->multi_scratch);
|
||||
if (rv != HS_SUCCESS) {
|
||||
DEBUG_PRINTF("hs_alloc_scratch for multi_scratch failed\n");
|
||||
hs_free_scratch((*scratch)->multi_scratch);
|
||||
ch_scratch_free((*scratch)->scratch_alloc);
|
||||
*scratch = NULL;
|
||||
return rv;
|
||||
}
|
||||
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_clone_scratch(const ch_scratch_t *src,
|
||||
ch_scratch_t **dest) {
|
||||
if (!dest || !src || !ISALIGNED_CL(src) ||
|
||||
src->magic != CH_SCRATCH_MAGIC) {
|
||||
DEBUG_PRINTF("scratch invalid\n");
|
||||
return CH_INVALID;
|
||||
}
|
||||
|
||||
ch_error_t ret = alloc_scratch(src, dest);
|
||||
if (ret != CH_SUCCESS) {
|
||||
DEBUG_PRINTF("alloc_scratch failed\n");
|
||||
*dest = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (src->multi_scratch) {
|
||||
(*dest)->multi_scratch = NULL;
|
||||
ret = hs_clone_scratch(src->multi_scratch, &(*dest)->multi_scratch);
|
||||
if (ret != HS_SUCCESS) {
|
||||
DEBUG_PRINTF("hs_clone_scratch(multi_scratch,...) failed\n");
|
||||
ch_scratch_free(*dest);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_free_scratch(ch_scratch_t *scratch) {
|
||||
ch_error_t ret = CH_SUCCESS;
|
||||
if (scratch) {
|
||||
/* has to be aligned before we can do anything with it */
|
||||
if (!ISALIGNED_CL(scratch)) {
|
||||
return CH_INVALID;
|
||||
}
|
||||
if (scratch->magic != CH_SCRATCH_MAGIC) {
|
||||
return CH_INVALID;
|
||||
}
|
||||
if (markScratchInUse(scratch)) {
|
||||
return CH_SCRATCH_IN_USE;
|
||||
}
|
||||
|
||||
if (scratch->multi_scratch) {
|
||||
ret = hs_free_scratch(scratch->multi_scratch);
|
||||
}
|
||||
|
||||
scratch->magic = 0;
|
||||
assert(scratch->scratch_alloc);
|
||||
DEBUG_PRINTF("scratch %p is really at %p : freeing\n", scratch,
|
||||
scratch->scratch_alloc);
|
||||
ch_scratch_free(scratch->scratch_alloc);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** Not public, but used for info from our internal tools. Note that in the
|
||||
* hybrid matcher the scratch is definitely not a contiguous memory region. */
|
||||
HS_PUBLIC_API
|
||||
ch_error_t HS_CDECL ch_scratch_size(const ch_scratch_t *scratch, size_t *size) {
|
||||
ch_error_t ret = CH_SUCCESS;
|
||||
if (!size || !scratch || !ISALIGNED_CL(scratch) ||
|
||||
scratch->magic != CH_SCRATCH_MAGIC) {
|
||||
return CH_INVALID;
|
||||
} else {
|
||||
size_t multi_size = 0;
|
||||
|
||||
if (scratch->multi_scratch) {
|
||||
ret = hs_scratch_size(scratch->multi_scratch, &multi_size);
|
||||
}
|
||||
if (ret) {
|
||||
multi_size = 0;
|
||||
}
|
||||
|
||||
*size = scratch->scratchSize + multi_size;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
119
chimera/ch_scratch.h
Normal file
119
chimera/ch_scratch.h
Normal file
@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Scratch and associated data structures.
|
||||
*
|
||||
* This header gets pulled into many places (many deep, slow to compile
|
||||
* places). Try to keep the included headers under control.
|
||||
*/
|
||||
|
||||
#ifndef CH_SCRATCH_H_
|
||||
#define CH_SCRATCH_H_
|
||||
|
||||
#include "ch_common.h"
|
||||
#include "ch_runtime.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#define CH_SCRATCH_MAGIC 0x554F4259 //!< Magic number stored in \ref ch_scratch
|
||||
|
||||
struct queue_item {
|
||||
int from; /** \brief used to store the start location. */
|
||||
int to; /** \brief used to store the current location. */
|
||||
u32 id; /**< pattern index. */
|
||||
};
|
||||
|
||||
struct match_pq {
|
||||
struct queue_item *item;
|
||||
u32 size; /**< current size of the priority queue */
|
||||
};
|
||||
|
||||
/** \brief Information about a pattern stored at runtime when a match is
|
||||
* encountered. */
|
||||
struct ch_patterndata {
|
||||
struct ch_capture *match; //!< buffered group info
|
||||
u32 groupCount; //!< number of capturing groups
|
||||
u32 scanStart; //!< start of match window (still to be single-scanned).
|
||||
};
|
||||
|
||||
/** \brief Scratch space header for Chimera. */
|
||||
struct ch_scratch {
|
||||
u32 magic; //!< must be \ref CH_SCRATCH_MAGIC
|
||||
u8 in_use; /**< non-zero when being used by an API call. */
|
||||
struct hs_scratch *multi_scratch; //!< for hyperscan scatch.
|
||||
int *ovector; //!< maximally-sized ovector for PCRE usage.
|
||||
struct ch_capture *captured; //!< max-sized capture group struct.
|
||||
u8 *active; //!< active multibit.
|
||||
struct ch_patterndata *patternData; //!< per-pattern match data, indexed by
|
||||
// pattern ID.
|
||||
struct match_pq pq; //!< priority queue to ensure matching ordering
|
||||
u32 patternCount; //!< number of patterns, used to size active multibit
|
||||
u32 activeSize; //!< size of active multibit
|
||||
u32 maxCaptureGroups; //!< largest num of capturing groups required
|
||||
u32 scratchSize; //!< size of allocation
|
||||
int ret; //!< return value in Hyperscan callback
|
||||
char *scratch_alloc; /* user allocated scratch object */
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Mark scratch as in use.
|
||||
*
|
||||
* Returns non-zero if it was already in use, zero otherwise.
|
||||
*/
|
||||
static really_inline
|
||||
char markScratchInUse(struct ch_scratch *scratch) {
|
||||
DEBUG_PRINTF("marking scratch as in use\n");
|
||||
assert(scratch && scratch->magic == CH_SCRATCH_MAGIC);
|
||||
if (scratch->in_use) {
|
||||
DEBUG_PRINTF("scratch already in use!\n");
|
||||
return 1;
|
||||
}
|
||||
scratch->in_use = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Mark scratch as no longer in use.
|
||||
*/
|
||||
static really_inline
|
||||
void unmarkScratchInUse(struct ch_scratch *scratch) {
|
||||
DEBUG_PRINTF("marking scratch as not in use\n");
|
||||
assert(scratch && scratch->magic == CH_SCRATCH_MAGIC);
|
||||
assert(scratch->in_use == 1);
|
||||
scratch->in_use = 0;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* CH_SCRATCH_H_ */
|
12
chimera/libch.pc.in
Normal file
12
chimera/libch.pc.in
Normal file
@ -0,0 +1,12 @@
|
||||
prefix=@CMAKE_INSTALL_PREFIX@
|
||||
exec_prefix=@CMAKE_INSTALL_PREFIX@
|
||||
libdir=@CMAKE_INSTALL_PREFIX@/lib
|
||||
includedir=@CMAKE_INSTALL_PREFIX@/include
|
||||
|
||||
Name: libch
|
||||
Description: Intel(R) Chimera Library
|
||||
Version: @HS_VERSION@
|
||||
Requires.private: libhs
|
||||
Libs: -L${libdir} -lchimera
|
||||
Libs.private: @PRIVATE_LIBS@
|
||||
Cflags: -I${includedir}/hs
|
@ -54,11 +54,10 @@ else ()
|
||||
find_package(PkgConfig)
|
||||
pkg_check_modules(PCRE libpcre=${PCRE_REQUIRED_VERSION})
|
||||
if (PCRE_FOUND)
|
||||
set(CORRECT_PCRE_VERSION TRUE)
|
||||
message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION}")
|
||||
else ()
|
||||
message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION} not found")
|
||||
return ()
|
||||
endif ()
|
||||
endif (PCRE_BUILD_SOURCE)
|
||||
|
||||
set (PCRE_CHECKED TRUE PARENT_SCOPE)
|
||||
|
333
doc/dev-reference/chimera.rst
Normal file
333
doc/dev-reference/chimera.rst
Normal file
@ -0,0 +1,333 @@
|
||||
.. _chimera:
|
||||
|
||||
#######
|
||||
Chimera
|
||||
#######
|
||||
|
||||
This section describes Chimera library.
|
||||
|
||||
************
|
||||
Introduction
|
||||
************
|
||||
|
||||
Chimera is a software regular expression matching engine that is a hybrid of
|
||||
Hyperscan and PCRE. The design goals of Chimera are to fully support PCRE
|
||||
syntax as well as to take advantage of the high performance nature of Hyperscan.
|
||||
|
||||
Chimera inherits the design guideline of Hyperscan with C APIs for compilation
|
||||
and scanning.
|
||||
|
||||
The Chimera API itself is composed of two major components:
|
||||
|
||||
===========
|
||||
Compilation
|
||||
===========
|
||||
|
||||
These functions take a group of regular expressions, along with identifiers and
|
||||
option flags, and compile them into an immutable database that can be used by
|
||||
the Chimera scanning API. This compilation process performs considerable
|
||||
analysis and optimization work in order to build a database that will match
|
||||
the given expressions efficiently.
|
||||
|
||||
See :ref:`chcompile` for more details
|
||||
|
||||
========
|
||||
Scanning
|
||||
========
|
||||
|
||||
Once a Chimera database has been created, it can be used to scan data in memory.
|
||||
Chimera only supports block mode in which we scan a single contiguous block in
|
||||
memory.
|
||||
|
||||
Matches are delivered to the application via a user-supplied callback function
|
||||
that is called synchronously for each match.
|
||||
|
||||
For a given database, Chimera provides several guarantees:
|
||||
|
||||
* No memory allocations occur at runtime with the exception of scratch space
|
||||
allocation, it should be done ahead of time for performance-critical
|
||||
applications:
|
||||
|
||||
- **Scratch space**: temporary memory used for internal data at scan time.
|
||||
Structures in scratch space do not persist beyond the end of a single scan
|
||||
call.
|
||||
|
||||
* The size of the scratch space required for a given database is fixed and
|
||||
determined at database compile time. This means that the memory requirement
|
||||
of the application are known ahead of time, and the scratch space can be
|
||||
pre-allocated if required for performance reasons.
|
||||
|
||||
* Any pattern that has successfully been compiled by the Chimera compiler can
|
||||
be scanned against any input. There could be internal resource limits or
|
||||
other limitations caused by PCRE at runtime that could cause a scan call to
|
||||
return an error.
|
||||
|
||||
.. note:: Chimera is designed to have the same matching behavior as PCRE,
|
||||
including greedy/ungreedy, capturing, etc. Chimera reports both
|
||||
**start offset** and **end offset** for each match like PCRE. Different
|
||||
from the fashion of reporting all matches in Hyperscan, Chimera only reports
|
||||
non-overlapping matches. For example, the pattern :regexp:`/foofoo/` will
|
||||
match ``foofoofoofoo`` at offsets (0, 6) and (6, 12).
|
||||
|
||||
.. note:: Since Chimera is a hybrid of Hyperscan and PCRE in order to support
|
||||
full PCRE syntax, there will be extra performance overhead compared to
|
||||
Hyperscan-only solution. Please always use Hyperscan for better performance
|
||||
unless you must need full PCRE syntax support.
|
||||
|
||||
See :ref:`chruntime` for more details
|
||||
|
||||
************
|
||||
Requirements
|
||||
************
|
||||
|
||||
The PCRE library (http://pcre.org/) version 8.41 is required for Chimera.
|
||||
|
||||
.. note:: Since Chimera needs to reference PCRE internal function, please place PCRE source
|
||||
directory under Hyperscan root directory in order to build Chimera.
|
||||
|
||||
Beside this, both hardware and software requirements of Chimera are the same to Hyperscan.
|
||||
See :ref:`hardware` and :ref:`software` for more details.
|
||||
|
||||
.. note:: Building Hyperscan will automatically generate Chimera library.
|
||||
Currently only static library is supported for Chimera, so please
|
||||
use static build type when configure CMake build options.
|
||||
|
||||
.. _chcompile:
|
||||
|
||||
******************
|
||||
Compiling Patterns
|
||||
******************
|
||||
|
||||
===================
|
||||
Building a Database
|
||||
===================
|
||||
|
||||
The Chimera compiler API accepts regular expressions and converts them into a
|
||||
compiled pattern database that can then be used to scan data.
|
||||
|
||||
The API provides two functions that compile regular expressions into
|
||||
databases:
|
||||
|
||||
#. :c:func:`ch_compile`: compiles a single expression into a pattern database.
|
||||
|
||||
#. :c:func:`ch_compile_multi`: compiles an array of expressions into a pattern
|
||||
database. All of the supplied patterns will be scanned for concurrently at
|
||||
scan time, with user-supplied identifiers returned when they match.
|
||||
|
||||
#. :c:func:`ch_compile_ext_multi`: compiles an array of expressions as above,
|
||||
but allows PCRE match limits to be specified for each expression.
|
||||
|
||||
Compilation allows the Chimera library to analyze the given pattern(s) and
|
||||
pre-determine how to scan for these patterns in an optimized fashion using
|
||||
Hyperscan and PCRE.
|
||||
|
||||
===============
|
||||
Pattern Support
|
||||
===============
|
||||
|
||||
Chimera fully supports the pattern syntax used by the PCRE library ("libpcre"),
|
||||
described at <http://www.pcre.org/>.The version of PCRE used to validate
|
||||
Chimera's interpretation of this syntax is 8.41.
|
||||
|
||||
=========
|
||||
Semantics
|
||||
=========
|
||||
|
||||
Chimera supports the exact same semantics of PCRE library. Moreover, it supports
|
||||
multiple simultaneous pattern matching like Hyperscan and the multiple matches
|
||||
will be reported in order by end offset.
|
||||
|
||||
.. _chruntime:
|
||||
|
||||
*********************
|
||||
Scanning for Patterns
|
||||
*********************
|
||||
|
||||
Chimera provides scan function with ``ch_scan``.
|
||||
|
||||
================
|
||||
Handling Matches
|
||||
================
|
||||
|
||||
``ch_scan`` will call a user-supplied callback function when a match
|
||||
is found. This function has the following signature:
|
||||
|
||||
.. doxygentypedef:: ch_match_event_handler
|
||||
:outline:
|
||||
:no-link:
|
||||
|
||||
The *id* argument will be set to the identifier for the matching expression
|
||||
provided at compile time, and the *from* argument will be set to the
|
||||
start-offset of the match the *to* argument will be set to the end-offset
|
||||
of the match. The *captured* stores offsets of entire pattern match as well as
|
||||
captured subexpressions. The *size* will be set to the number of valid entries in
|
||||
the *captured*.
|
||||
|
||||
The match callback function has the capability to continue or halt scanning
|
||||
by returning different values.
|
||||
|
||||
See :c:type:`ch_match_event_handler` for more information.
|
||||
|
||||
=======================
|
||||
Handling Runtime Errors
|
||||
=======================
|
||||
|
||||
``ch_scan`` will call a user-supplied callback function when a runtime error
|
||||
occurs in libpcre. This function has the following signature:
|
||||
|
||||
.. doxygentypedef:: ch_error_event_handler
|
||||
:outline:
|
||||
:no-link:
|
||||
|
||||
The *id* argument will be set to the identifier for the matching expression
|
||||
provided at compile time.
|
||||
|
||||
The match callback function has the capability to either halt scanning or
|
||||
continue scanning for the next pattern.
|
||||
|
||||
See :c:type:`ch_error_event_handler` for more information.
|
||||
|
||||
=============
|
||||
Scratch Space
|
||||
=============
|
||||
|
||||
While scanning data, Chimera needs a small amount of temporary memory to store
|
||||
on-the-fly internal data. This amount is unfortunately too large to fit on the
|
||||
stack, particularly for embedded applications, and allocating memory dynamically
|
||||
is too expensive, so a pre-allocated "scratch" space must be provided to the
|
||||
scanning functions.
|
||||
|
||||
The function :c:func:`ch_alloc_scratch` allocates a large enough region of
|
||||
scratch space to support a given database. If the application uses multiple
|
||||
databases, only a single scratch region is necessary: in this case, calling
|
||||
:c:func:`ch_alloc_scratch` on each database (with the same ``scratch`` pointer)
|
||||
will ensure that the scratch space is large enough to support scanning against
|
||||
any of the given databases.
|
||||
|
||||
While the Chimera library is re-entrant, the use of scratch spaces is not.
|
||||
For example, if by design it is deemed necessary to run recursive or nested
|
||||
scanning (say, from the match callback function), then an additional scratch
|
||||
space is required for that context.
|
||||
|
||||
In the absence of recursive scanning, only one such space is required per thread
|
||||
and can (and indeed should) be allocated before data scanning is to commence.
|
||||
|
||||
In a scenario where a set of expressions are compiled by a single "master"
|
||||
thread and data will be scanned by multiple "worker" threads, the convenience
|
||||
function :c:func:`ch_clone_scratch` allows multiple copies of an existing
|
||||
scratch space to be made for each thread (rather than forcing the caller to pass
|
||||
all the compiled databases through :c:func:`ch_alloc_scratch` multiple times).
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
ch_error_t err;
|
||||
ch_scratch_t *scratch_prototype = NULL;
|
||||
err = ch_alloc_scratch(db, &scratch_prototype);
|
||||
if (err != CH_SUCCESS) {
|
||||
printf("ch_alloc_scratch failed!");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
ch_scratch_t *scratch_thread1 = NULL;
|
||||
ch_scratch_t *scratch_thread2 = NULL;
|
||||
|
||||
err = ch_clone_scratch(scratch_prototype, &scratch_thread1);
|
||||
if (err != CH_SUCCESS) {
|
||||
printf("ch_clone_scratch failed!");
|
||||
exit(1);
|
||||
}
|
||||
err = ch_clone_scratch(scratch_prototype, &scratch_thread2);
|
||||
if (err != CH_SUCCESS) {
|
||||
printf("ch_clone_scratch failed!");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
ch_free_scratch(scratch_prototype);
|
||||
|
||||
/* Now two threads can both scan against database db,
|
||||
each with its own scratch space. */
|
||||
|
||||
|
||||
=================
|
||||
Custom Allocators
|
||||
=================
|
||||
|
||||
By default, structures used by Chimera at runtime (scratch space, etc) are
|
||||
allocated with the default system allocators, usually
|
||||
``malloc()`` and ``free()``.
|
||||
|
||||
The Chimera API provides a facility for changing this behaviour to support
|
||||
applications that use custom memory allocators.
|
||||
|
||||
These functions are:
|
||||
|
||||
- :c:func:`ch_set_database_allocator`, which sets the allocate and free functions
|
||||
used for compiled pattern databases.
|
||||
- :c:func:`ch_set_scratch_allocator`, which sets the allocate and free
|
||||
functions used for scratch space.
|
||||
- :c:func:`ch_set_misc_allocator`, which sets the allocate and free functions
|
||||
used for miscellaneous data, such as compile error structures and
|
||||
informational strings.
|
||||
|
||||
The :c:func:`ch_set_allocator` function can be used to set all of the custom
|
||||
allocators to the same allocate/free pair.
|
||||
|
||||
|
||||
************************
|
||||
API Reference: Constants
|
||||
************************
|
||||
|
||||
===========
|
||||
Error Codes
|
||||
===========
|
||||
|
||||
.. doxygengroup:: CH_ERROR
|
||||
:content-only:
|
||||
:no-link:
|
||||
|
||||
=============
|
||||
Pattern flags
|
||||
=============
|
||||
|
||||
.. doxygengroup:: CH_PATTERN_FLAG
|
||||
:content-only:
|
||||
:no-link:
|
||||
|
||||
==================
|
||||
Compile mode flags
|
||||
==================
|
||||
|
||||
.. doxygengroup:: CH_MODE_FLAG
|
||||
:content-only:
|
||||
:no-link:
|
||||
|
||||
|
||||
********************
|
||||
API Reference: Files
|
||||
********************
|
||||
|
||||
==========
|
||||
File: ch.h
|
||||
==========
|
||||
|
||||
.. doxygenfile:: ch.h
|
||||
|
||||
=================
|
||||
File: ch_common.h
|
||||
=================
|
||||
|
||||
.. doxygenfile:: ch_common.h
|
||||
|
||||
==================
|
||||
File: ch_compile.h
|
||||
==================
|
||||
|
||||
.. doxygenfile:: ch_compile.h
|
||||
|
||||
==================
|
||||
File: ch_runtime.h
|
||||
==================
|
||||
|
||||
.. doxygenfile:: ch_runtime.h
|
@ -471,3 +471,93 @@ matching support. Here they are, in a nutshell:
|
||||
|
||||
Approximate matching is always disabled by default, and can be enabled on a
|
||||
per-pattern basis by using an extended parameter described in :ref:`extparam`.
|
||||
|
||||
.. _logical_combinations:
|
||||
|
||||
********************
|
||||
Logical Combinations
|
||||
********************
|
||||
|
||||
For situations when a user requires behaviour that depends on the presence or
|
||||
absence of matches from groups of patterns, Hyperscan provides support for the
|
||||
logical combination of patterns in a given pattern set, with three operators:
|
||||
``NOT``, ``AND`` and ``OR``.
|
||||
|
||||
The logical value of such a combination is based on each expression's matching
|
||||
status at a given offset. The matching status of any expression has a boolean
|
||||
value: *false* if the expression has not yet matched or *true* if the expression
|
||||
has already matched. In particular, the value of a ``NOT`` operation at a given
|
||||
offset is *true* if the expression it refers to is *false* at this offset.
|
||||
|
||||
For example, ``NOT 101`` means that expression 101 has not yet matched at this
|
||||
offset.
|
||||
|
||||
A logical combination is passed to Hyperscan at compile time as an expression.
|
||||
This combination expression will raise matches at every offset where one of its
|
||||
sub-expressions matches and the logical value of the whole expression is *true*.
|
||||
|
||||
To illustrate, here is an example combination expression: ::
|
||||
|
||||
((301 OR 302) AND 303) AND (304 OR NOT 305)
|
||||
|
||||
If expression 301 matches at offset 10, the logical value of 301 is *true*
|
||||
while the other patterns' values are *false*. Hence, the whole combination's value is
|
||||
*false*.
|
||||
|
||||
Then expression 303 matches at offset 20. Now the values of 301 and 303 are
|
||||
*true* while the other patterns' values are still *false*. In this case, the
|
||||
combination's value is *true*, so the combination expression raises a match at
|
||||
offset 20.
|
||||
|
||||
Finally, expression 305 has matches at offset 30. Now the values of 301, 303 and 305
|
||||
are *true* while the other patterns' values are still *false*. In this case, the
|
||||
combination's value is *false* and no match is raised.
|
||||
|
||||
**Using Logical Combinations**
|
||||
|
||||
In logical combination syntax, an expression is written as infix notation, it
|
||||
consists of operands, operators and parentheses. The operands are expression
|
||||
IDs, and operators are ``!`` (NOT), ``&`` (AND) or ``|`` (OR). For example, the
|
||||
combination described in the previous section would be written as: ::
|
||||
|
||||
((301 | 302) & 303) & (304 | !305)
|
||||
|
||||
In a logical combination expression:
|
||||
|
||||
* The priority of operators are ``!`` > ``&`` > ``|``. For example:
|
||||
- ``A&B|C`` is treated as ``(A&B)|C``,
|
||||
- ``A|B&C`` is treated as ``A|(B&C)``,
|
||||
- ``A&!B`` is treated as ``A&(!B)``.
|
||||
* Extra parentheses are allowed. For example:
|
||||
- ``(A)&!(B)`` is the same as ``A&!B``,
|
||||
- ``(A&B)|C`` is the same as ``A&B|C``.
|
||||
* Whitespace is ignored.
|
||||
|
||||
To use a logical combination expression, it must be passed to one of the
|
||||
Hyperscan compile functions (:c:func:`hs_compile_multi`,
|
||||
:c:func:`hs_compile_ext_multi`) along with the :c:member:`HS_FLAG_COMBINATION` flag,
|
||||
which identifies the pattern as a logical combination expression. The patterns
|
||||
referred to in the logical combination expression must be compiled together in
|
||||
the same pattern set as the combination expression.
|
||||
|
||||
When an expression has the :c:member:`HS_FLAG_COMBINATION` flag set, it ignores
|
||||
all other flags except the :c:member:`HS_FLAG_SINGLEMATCH` flag and the
|
||||
:c:member:`HS_FLAG_QUIET` flag.
|
||||
|
||||
Hyperscan will reject logical combination expressions at compile time that
|
||||
evaluate to *true* when no patterns have matched; for example: ::
|
||||
|
||||
!101
|
||||
!101|102
|
||||
!101&!102
|
||||
!(101&102)
|
||||
|
||||
Patterns that are referred to as operands within a logical combination (for
|
||||
example, 301 through 305 in the examples above) may also use the
|
||||
:c:member:`HS_FLAG_QUIET` flag to silence the reporting of individual matches
|
||||
for those patterns. In the absence of this flag, all matches (for
|
||||
both individual patterns and their logical combinations) will be reported.
|
||||
|
||||
When an expression has both the :c:member:`HS_FLAG_COMBINATION` flag and the
|
||||
:c:member:`HS_FLAG_QUIET` flag set, no matches for this logical combination
|
||||
will be reported.
|
||||
|
@ -27,10 +27,10 @@ Very Quick Start
|
||||
Known working generators:
|
||||
* ``Unix Makefiles`` --- make-compatible makefiles (default on Linux/FreeBSD/Mac OS X)
|
||||
* ``Ninja`` --- `Ninja <http://martine.github.io/ninja/>`_ build files.
|
||||
* ``Visual Studio 15 2017`` --- Visual Studio projects
|
||||
|
||||
Generators that might work include:
|
||||
* ``Xcode`` --- OS X Xcode projects.
|
||||
* ``Visual Studio`` --- Visual Studio projects - very experimental
|
||||
|
||||
#. Build Hyperscan
|
||||
|
||||
@ -38,6 +38,7 @@ Very Quick Start
|
||||
* ``cmake --build .`` --- will build everything
|
||||
* ``make -j<jobs>`` --- use makefiles in parallel
|
||||
* ``ninja`` --- use Ninja build
|
||||
* ``MsBuild.exe`` --- use Visual Studio MsBuild
|
||||
* etc.
|
||||
|
||||
#. Check Hyperscan
|
||||
@ -49,6 +50,8 @@ Very Quick Start
|
||||
Requirements
|
||||
************
|
||||
|
||||
.. _hardware:
|
||||
|
||||
Hardware
|
||||
========
|
||||
|
||||
@ -84,6 +87,7 @@ compiler support. The supported compilers are:
|
||||
* GCC, v4.8.1 or higher
|
||||
* Clang, v3.4 or higher (with libstdc++ or libc++)
|
||||
* Intel C++ Compiler v15 or higher
|
||||
* Visual C++ 2017 Build Tools
|
||||
|
||||
Examples of operating systems that Hyperscan is known to work on include:
|
||||
|
||||
@ -96,13 +100,17 @@ FreeBSD:
|
||||
|
||||
* 10.0 or newer
|
||||
|
||||
Windows:
|
||||
|
||||
* 8 or newer
|
||||
|
||||
Mac OS X:
|
||||
|
||||
* 10.8 or newer, using XCode/Clang
|
||||
|
||||
Hyperscan *may* compile and run on other platforms, but there is no guarantee.
|
||||
We currently have experimental support for Windows using Intel C++ Compiler
|
||||
or Visual Studio 2015.
|
||||
or Visual Studio 2017.
|
||||
|
||||
In addition, the following software is required for compiling the Hyperscan library:
|
||||
|
||||
@ -118,7 +126,8 @@ Dependency Version Notes
|
||||
|
||||
Most of these dependencies can be provided by the package manager on the build
|
||||
system (e.g. Debian/Ubuntu/RedHat packages, FreeBSD ports, etc). However,
|
||||
ensure that the correct version is present.
|
||||
ensure that the correct version is present. As for Windows, in order to have
|
||||
Ragel, you may use Cygwin to build it from source.
|
||||
|
||||
Boost Headers
|
||||
-------------
|
||||
|
@ -758,7 +758,7 @@ WARN_LOGFILE =
|
||||
# spaces.
|
||||
# Note: If this tag is empty the current directory is searched.
|
||||
|
||||
INPUT = @CMAKE_SOURCE_DIR@/src/hs.h @CMAKE_SOURCE_DIR@/src/hs_common.h @CMAKE_SOURCE_DIR@/src/hs_compile.h @CMAKE_SOURCE_DIR@/src/hs_runtime.h
|
||||
INPUT = @CMAKE_SOURCE_DIR@/src/hs.h @CMAKE_SOURCE_DIR@/src/hs_common.h @CMAKE_SOURCE_DIR@/src/hs_compile.h @CMAKE_SOURCE_DIR@/src/hs_runtime.h @CMAKE_SOURCE_DIR@/chimera/ch.h @CMAKE_SOURCE_DIR@/chimera/ch_common.h @CMAKE_SOURCE_DIR@/chimera/ch_compile.h @CMAKE_SOURCE_DIR@/chimera/ch_runtime.h
|
||||
|
||||
# This tag can be used to specify the character encoding of the source files
|
||||
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
|
||||
|
@ -20,3 +20,4 @@ Hyperscan |version| Developer's Reference Guide
|
||||
tools
|
||||
api_constants
|
||||
api_files
|
||||
chimera
|
||||
|
@ -246,6 +246,8 @@ Character API Flag Description
|
||||
``W`` :c:member:`HS_FLAG_UCP` Unicode property support
|
||||
``P`` :c:member:`HS_FLAG_PREFILTER` Prefiltering mode
|
||||
``L`` :c:member:`HS_FLAG_SOM_LEFTMOST` Leftmost start of match reporting
|
||||
``C`` :c:member:`HS_FLAG_COMBINATION` Logical combination of patterns
|
||||
``Q`` :c:member:`HS_FLAG_QUIET` Quiet at matching
|
||||
========= ================================= ===========
|
||||
|
||||
In addition to the set of flags above, :ref:`extparam` can be supplied
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -45,6 +45,7 @@
|
||||
#include "parser/buildstate.h"
|
||||
#include "parser/dump.h"
|
||||
#include "parser/Component.h"
|
||||
#include "parser/logical_combination.h"
|
||||
#include "parser/parse_error.h"
|
||||
#include "parser/Parser.h" // for flags
|
||||
#include "parser/position.h"
|
||||
@ -111,14 +112,21 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
||||
const hs_expr_ext *ext)
|
||||
: expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH,
|
||||
false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET,
|
||||
0, 0, 0) {
|
||||
0, 0, 0, flags & HS_FLAG_QUIET) {
|
||||
// We disallow SOM + Quiet.
|
||||
if ((flags & HS_FLAG_QUIET) && (flags & HS_FLAG_SOM_LEFTMOST)) {
|
||||
throw CompileError("HS_FLAG_QUIET is not supported in "
|
||||
"combination with HS_FLAG_SOM_LEFTMOST.");
|
||||
}
|
||||
flags &= ~HS_FLAG_QUIET;
|
||||
ParseMode mode(flags);
|
||||
|
||||
component = parse(expression, mode);
|
||||
|
||||
expr.utf8 = mode.utf8; /* utf8 may be set by parse() */
|
||||
|
||||
if (expr.utf8 && !isValidUtf8(expression)) {
|
||||
const size_t len = strlen(expression);
|
||||
if (expr.utf8 && !isValidUtf8(expression, len)) {
|
||||
throw ParseError("Expression is not valid UTF-8.");
|
||||
}
|
||||
|
||||
@ -233,6 +241,45 @@ void addExpression(NG &ng, unsigned index, const char *expression,
|
||||
DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s'\n", index, id, flags,
|
||||
expression);
|
||||
|
||||
if (flags & HS_FLAG_COMBINATION) {
|
||||
if (flags & ~(HS_FLAG_COMBINATION | HS_FLAG_QUIET |
|
||||
HS_FLAG_SINGLEMATCH)) {
|
||||
throw CompileError("only HS_FLAG_QUIET and HS_FLAG_SINGLEMATCH "
|
||||
"are supported in combination "
|
||||
"with HS_FLAG_COMBINATION.");
|
||||
}
|
||||
if (flags & HS_FLAG_QUIET) {
|
||||
DEBUG_PRINTF("skip QUIET logical combination expression %u\n", id);
|
||||
} else {
|
||||
u32 ekey = INVALID_EKEY;
|
||||
u64a min_offset = 0;
|
||||
u64a max_offset = MAX_OFFSET;
|
||||
if (flags & HS_FLAG_SINGLEMATCH) {
|
||||
ekey = ng.rm.getExhaustibleKey(id);
|
||||
}
|
||||
if (ext) {
|
||||
validateExt(*ext);
|
||||
if (ext->flags & ~(HS_EXT_FLAG_MIN_OFFSET |
|
||||
HS_EXT_FLAG_MAX_OFFSET)) {
|
||||
throw CompileError("only HS_EXT_FLAG_MIN_OFFSET and "
|
||||
"HS_EXT_FLAG_MAX_OFFSET extra flags "
|
||||
"are supported in combination "
|
||||
"with HS_FLAG_COMBINATION.");
|
||||
}
|
||||
if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) {
|
||||
min_offset = ext->min_offset;
|
||||
}
|
||||
if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) {
|
||||
max_offset = ext->max_offset;
|
||||
}
|
||||
}
|
||||
ng.rm.pl.parseLogicalCombination(id, expression, ekey, min_offset,
|
||||
max_offset);
|
||||
DEBUG_PRINTF("parsed logical combination expression %u\n", id);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure that our pattern isn't too long (in characters).
|
||||
if (strlen(expression) > cc.grey.limitPatternLength) {
|
||||
throw CompileError("Pattern length exceeds limit.");
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -46,12 +46,12 @@ public:
|
||||
bool highlander_in, bool utf8_in, bool prefilter_in,
|
||||
som_type som_in, ReportID report_in, u64a min_offset_in,
|
||||
u64a max_offset_in, u64a min_length_in, u32 edit_distance_in,
|
||||
u32 hamm_distance_in)
|
||||
u32 hamm_distance_in, bool quiet_in)
|
||||
: index(index_in), report(report_in), allow_vacuous(allow_vacuous_in),
|
||||
highlander(highlander_in), utf8(utf8_in), prefilter(prefilter_in),
|
||||
som(som_in), min_offset(min_offset_in), max_offset(max_offset_in),
|
||||
min_length(min_length_in), edit_distance(edit_distance_in),
|
||||
hamm_distance(hamm_distance_in) {}
|
||||
hamm_distance(hamm_distance_in), quiet(quiet_in) {}
|
||||
|
||||
/**
|
||||
* \brief Index of the expression represented by this graph.
|
||||
@ -98,6 +98,9 @@ public:
|
||||
*/
|
||||
u32 edit_distance;
|
||||
u32 hamm_distance;
|
||||
|
||||
/** \brief Quiet on match. */
|
||||
bool quiet;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -245,6 +245,11 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
|
||||
}
|
||||
}
|
||||
|
||||
// Check sub-expression ids
|
||||
ng.rm.pl.validateSubIDs(ids, expressions, flags, elements);
|
||||
// Renumber and assign lkey to reports
|
||||
ng.rm.logicalKeyRenumber();
|
||||
|
||||
unsigned length = 0;
|
||||
struct hs_database *out = build(ng, &length);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -811,6 +811,28 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform);
|
||||
*/
|
||||
#define HS_FLAG_SOM_LEFTMOST 256
|
||||
|
||||
/**
|
||||
* Compile flag: Logical combination.
|
||||
*
|
||||
* This flag instructs Hyperscan to parse this expression as logical
|
||||
* combination syntax.
|
||||
* Logical constraints consist of operands, operators and parentheses.
|
||||
* The operands are expression indices, and operators can be
|
||||
* '!'(NOT), '&'(AND) or '|'(OR).
|
||||
* For example:
|
||||
* (101&102&103)|(104&!105)
|
||||
* ((301|302)&303)&(304|305)
|
||||
*/
|
||||
#define HS_FLAG_COMBINATION 512
|
||||
|
||||
/**
|
||||
* Compile flag: Don't do any match reporting.
|
||||
*
|
||||
* This flag instructs Hyperscan to ignore match reporting for this expression.
|
||||
* It is designed to be used on the sub-expressions in logical combinations.
|
||||
*/
|
||||
#define HS_FLAG_QUIET 1024
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -122,11 +122,11 @@ typedef struct hs_scratch hs_scratch_t;
|
||||
* subsequent calls to @ref hs_scan_stream() for that stream will
|
||||
* immediately return with @ref HS_SCAN_TERMINATED.
|
||||
*/
|
||||
typedef int (*match_event_handler)(unsigned int id,
|
||||
unsigned long long from,
|
||||
unsigned long long to,
|
||||
unsigned int flags,
|
||||
void *context);
|
||||
typedef int (HS_CDECL *match_event_handler)(unsigned int id,
|
||||
unsigned long long from,
|
||||
unsigned long long to,
|
||||
unsigned int flags,
|
||||
void *context);
|
||||
|
||||
/**
|
||||
* Open and initialise a stream.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -374,7 +374,7 @@ unique_ptr<GoughGraph> makeCFG(const raw_som_dfa &raw) {
|
||||
}
|
||||
|
||||
u16 top_sym = raw.alpha_remap[TOP];
|
||||
DEBUG_PRINTF("top: %hu, kind %d\n", top_sym, raw.kind);
|
||||
DEBUG_PRINTF("top: %hu, kind %s\n", top_sym, to_string(raw.kind).c_str());
|
||||
|
||||
/* create edges, JOIN variables (on edge targets) */
|
||||
map<dstate_id_t, GoughEdge> seen;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -84,7 +84,7 @@ struct mcclellan {
|
||||
u8 has_accel; /**< 1 iff there are any accel plans */
|
||||
u8 remap[256]; /**< remaps characters to a smaller alphabet */
|
||||
ReportID arb_report; /**< one of the accepts that this dfa may raise */
|
||||
u32 accel_offset; /**< offset of the accel structures from start of NFA */
|
||||
u32 accel_offset; /**< offset of accel structures from start of McClellan */
|
||||
u32 haig_offset; /**< reserved for use by Haig, relative to start of NFA */
|
||||
};
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -760,7 +760,7 @@ bytecode_ptr<NFA> mcclellanCompile8(dfa_info &info, const CompileContext &cc,
|
||||
return nfa;
|
||||
}
|
||||
|
||||
#define MAX_SHERMAN_LIST_LEN 8
|
||||
#define MAX_SHERMAN_LIST_LEN 9
|
||||
|
||||
static
|
||||
void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -173,7 +173,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
|
||||
u32 sheng_limit_x4 = sheng_limit * 0x01010101;
|
||||
m128 simd_stop_limit = set4x32(sheng_stop_limit_x4);
|
||||
m128 accel_delta = set16x8(sheng_limit - sheng_stop_limit);
|
||||
DEBUG_PRINTF("end %hu, accel %hhu --> limit %hhu\n", sheng_limit,
|
||||
DEBUG_PRINTF("end %hhu, accel %hu --> limit %hhu\n", sheng_limit,
|
||||
m->sheng_accel_limit, sheng_stop_limit);
|
||||
#endif
|
||||
|
||||
@ -181,7 +181,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
|
||||
m128 shuffle_mask = masks[*(c++)]; \
|
||||
s = pshufb_m128(shuffle_mask, s); \
|
||||
u32 s_gpr_x4 = movd(s); /* convert to u8 */ \
|
||||
DEBUG_PRINTF("c %hhu (%c) --> s %hhu\n", c[-1], c[-1], s_gpr_x4); \
|
||||
DEBUG_PRINTF("c %hhu (%c) --> s %u\n", c[-1], c[-1], s_gpr_x4); \
|
||||
if (s_gpr_x4 >= sheng_stop_limit_x4) { \
|
||||
s_gpr = s_gpr_x4; \
|
||||
goto exit; \
|
||||
@ -191,7 +191,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
|
||||
u8 s_gpr;
|
||||
while (c < c_end) {
|
||||
#if defined(HAVE_BMI2) && defined(ARCH_64_BIT)
|
||||
/* This version uses pext for efficently bitbashing out scaled
|
||||
/* This version uses pext for efficiently bitbashing out scaled
|
||||
* versions of the bytes to process from a u64a */
|
||||
|
||||
u64a data_bytes = unaligned_load_u64a(c);
|
||||
@ -201,7 +201,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
|
||||
s = pshufb_m128(shuffle_mask0, s);
|
||||
m128 s_max = s;
|
||||
m128 s_max0 = s_max;
|
||||
DEBUG_PRINTF("c %02llx --> s %hhu\n", cc0 >> 4, movd(s));
|
||||
DEBUG_PRINTF("c %02llx --> s %u\n", cc0 >> 4, movd(s));
|
||||
|
||||
#define SHENG_SINGLE_UNROLL_ITER(iter) \
|
||||
assert(iter); \
|
||||
@ -217,7 +217,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
|
||||
s_max = max_u8_m128(s_max, s); \
|
||||
} \
|
||||
m128 s_max##iter = s_max; \
|
||||
DEBUG_PRINTF("c %02llx --> s %hhu max %hhu\n", cc##iter >> 4, \
|
||||
DEBUG_PRINTF("c %02llx --> s %u max %u\n", cc##iter >> 4, \
|
||||
movd(s), movd(s_max));
|
||||
|
||||
SHENG_SINGLE_UNROLL_ITER(1);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -84,7 +84,7 @@ struct mcsheng {
|
||||
u8 has_accel; /**< 1 iff there are any accel plans */
|
||||
u8 remap[256]; /**< remaps characters to a smaller alphabet */
|
||||
ReportID arb_report; /**< one of the accepts that this dfa may raise */
|
||||
u32 accel_offset; /**< offset of the accel structures from start of NFA */
|
||||
u32 accel_offset; /**< offset of accel structures from start of McClellan */
|
||||
m128 sheng_masks[N_CHARS];
|
||||
};
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -628,7 +628,7 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||
fireSingleReport(cb, ctxt, sh->report, offset);
|
||||
} else {
|
||||
fireReports(sh, cb, ctxt, s, offset, &cached_state_id,
|
||||
&cached_report_id, 1);
|
||||
&cached_report_id, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -577,7 +577,8 @@ bool NG::addHolder(NGHolder &g) {
|
||||
}
|
||||
|
||||
bool NG::addLiteral(const ue2_literal &literal, u32 expr_index,
|
||||
u32 external_report, bool highlander, som_type som) {
|
||||
u32 external_report, bool highlander, som_type som,
|
||||
bool quiet) {
|
||||
assert(!literal.empty());
|
||||
|
||||
if (!cc.grey.shortcutLiterals) {
|
||||
@ -605,7 +606,7 @@ bool NG::addLiteral(const ue2_literal &literal, u32 expr_index,
|
||||
} else {
|
||||
u32 ekey = highlander ? rm.getExhaustibleKey(external_report)
|
||||
: INVALID_EKEY;
|
||||
Report r = makeECallback(external_report, 0, ekey);
|
||||
Report r = makeECallback(external_report, 0, ekey, quiet);
|
||||
id = rm.getInternalId(r);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -77,7 +77,7 @@ public:
|
||||
/** \brief Adds a literal to Rose, used by literal shortcut passes (instead
|
||||
* of using \ref addGraph) */
|
||||
bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report,
|
||||
bool highlander, som_type som);
|
||||
bool highlander, som_type som, bool quiet);
|
||||
|
||||
/** \brief Maximum history in bytes available for use by SOM reverse NFAs,
|
||||
* a hack for pattern support (see UE-1903). This is always set to the max
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -53,11 +53,11 @@
|
||||
#include "ng_depth.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_undirected.h"
|
||||
#include "ng_util.h"
|
||||
#include "grey.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/graph_undirected.h"
|
||||
#include "util/make_unique.h"
|
||||
|
||||
#include <map>
|
||||
@ -310,28 +310,19 @@ void splitIntoComponents(unique_ptr<NGHolder> g,
|
||||
return;
|
||||
}
|
||||
|
||||
unordered_map<NFAVertex, NFAUndirectedVertex> old2new;
|
||||
auto ug = createUnGraph(*g, true, true, old2new);
|
||||
auto ug = make_undirected_graph(*g);
|
||||
|
||||
// Construct reverse mapping.
|
||||
unordered_map<NFAUndirectedVertex, NFAVertex> new2old;
|
||||
for (const auto &m : old2new) {
|
||||
new2old.emplace(m.second, m.first);
|
||||
}
|
||||
// Filter specials and shell vertices from undirected graph.
|
||||
unordered_set<NFAVertex> bad_vertices(
|
||||
{g->start, g->startDs, g->accept, g->acceptEod});
|
||||
bad_vertices.insert(head_shell.begin(), head_shell.end());
|
||||
bad_vertices.insert(tail_shell.begin(), tail_shell.end());
|
||||
|
||||
// Filter shell vertices from undirected graph.
|
||||
unordered_set<NFAUndirectedVertex> shell_undir_vertices;
|
||||
for (auto v : head_shell) {
|
||||
shell_undir_vertices.insert(old2new.at(v));
|
||||
}
|
||||
for (auto v : tail_shell) {
|
||||
shell_undir_vertices.insert(old2new.at(v));
|
||||
}
|
||||
auto filtered_ug = boost::make_filtered_graph(
|
||||
ug, boost::keep_all(), make_bad_vertex_filter(&shell_undir_vertices));
|
||||
ug, boost::keep_all(), make_bad_vertex_filter(&bad_vertices));
|
||||
|
||||
// Actually run the connected components algorithm.
|
||||
map<NFAUndirectedVertex, u32> split_components;
|
||||
map<NFAVertex, u32> split_components;
|
||||
const u32 num = connected_components(
|
||||
filtered_ug, boost::make_assoc_property_map(split_components));
|
||||
|
||||
@ -348,10 +339,8 @@ void splitIntoComponents(unique_ptr<NGHolder> g,
|
||||
|
||||
// Collect vertex lists per component.
|
||||
for (const auto &m : split_components) {
|
||||
NFAUndirectedVertex uv = m.first;
|
||||
NFAVertex v = m.first;
|
||||
u32 c = m.second;
|
||||
assert(contains(new2old, uv));
|
||||
NFAVertex v = new2old.at(uv);
|
||||
verts[c].push_back(v);
|
||||
DEBUG_PRINTF("vertex %zu is in comp %u\n", (*g)[v].index, c);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -73,7 +73,7 @@ static
|
||||
void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused,
|
||||
stateset *init, stateset *initDS,
|
||||
vector<NFAVertex> *v_by_index) {
|
||||
DEBUG_PRINTF("graph kind: %u\n", (int)g.kind);
|
||||
DEBUG_PRINTF("graph kind: %s\n", to_string(g.kind).c_str());
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (contains(unused, v)) {
|
||||
continue;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -542,7 +542,8 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("attempting to build ?%d? mcclellan\n", (int)graph.kind);
|
||||
DEBUG_PRINTF("attempting to build %s mcclellan\n",
|
||||
to_string(graph.kind).c_str());
|
||||
assert(allMatchStatesHaveReports(graph));
|
||||
|
||||
bool prunable = grey.highlanderPruneDFA && has_managed_reports(graph);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -38,7 +38,6 @@
|
||||
#include "ng_prune.h"
|
||||
#include "ng_reports.h"
|
||||
#include "ng_som_util.h"
|
||||
#include "ng_undirected.h"
|
||||
#include "ng_util.h"
|
||||
#include "nfa/accel.h"
|
||||
#include "nfa/limex_limits.h"
|
||||
@ -48,6 +47,7 @@
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/graph_small_color_map.h"
|
||||
#include "util/graph_undirected.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/unordered.h"
|
||||
|
||||
@ -73,40 +73,41 @@ namespace ue2 {
|
||||
|
||||
namespace {
|
||||
|
||||
/** \brief Filter that retains only edges between vertices with the same
|
||||
* reachability. */
|
||||
/**
|
||||
* \brief Filter that retains only edges between vertices with the same
|
||||
* reachability. Special vertices are dropped.
|
||||
*/
|
||||
template<class Graph>
|
||||
struct ReachFilter {
|
||||
ReachFilter() {}
|
||||
ReachFilter() = default;
|
||||
explicit ReachFilter(const Graph *g_in) : g(g_in) {}
|
||||
|
||||
// Convenience typedefs.
|
||||
typedef typename boost::graph_traits<Graph> Traits;
|
||||
typedef typename Traits::vertex_descriptor VertexDescriptor;
|
||||
typedef typename Traits::edge_descriptor EdgeDescriptor;
|
||||
using Traits = typename boost::graph_traits<Graph>;
|
||||
using VertexDescriptor = typename Traits::vertex_descriptor;
|
||||
using EdgeDescriptor = typename Traits::edge_descriptor;
|
||||
|
||||
bool operator()(const VertexDescriptor &v) const {
|
||||
assert(g);
|
||||
// Disallow special vertices, as otherwise we will try to remove them
|
||||
// later.
|
||||
return !is_special(v, *g);
|
||||
}
|
||||
|
||||
bool operator()(const EdgeDescriptor &e) const {
|
||||
assert(g);
|
||||
|
||||
VertexDescriptor u = source(e, *g), v = target(e, *g);
|
||||
|
||||
// Disallow special vertices, as otherwise we will try to remove them
|
||||
// later.
|
||||
if (is_special(u, *g) || is_special(v, *g)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Vertices must have the same reach.
|
||||
auto u = source(e, *g), v = target(e, *g);
|
||||
const CharReach &cr_u = (*g)[u].char_reach;
|
||||
const CharReach &cr_v = (*g)[v].char_reach;
|
||||
|
||||
return cr_u == cr_v;
|
||||
}
|
||||
|
||||
const Graph *g = nullptr;
|
||||
};
|
||||
|
||||
typedef boost::filtered_graph<NGHolder, ReachFilter<NGHolder>> RepeatGraph;
|
||||
using RepeatGraph = boost::filtered_graph<NGHolder, ReachFilter<NGHolder>,
|
||||
ReachFilter<NGHolder>>;
|
||||
|
||||
struct ReachSubgraph {
|
||||
vector<NFAVertex> vertices;
|
||||
@ -300,10 +301,9 @@ void splitSubgraph(const NGHolder &g, const deque<NFAVertex> &verts,
|
||||
unordered_map<NFAVertex, NFAVertex> verts_map; // in g -> in verts_g
|
||||
fillHolder(&verts_g, g, verts, &verts_map);
|
||||
|
||||
unordered_map<NFAVertex, NFAUndirectedVertex> old2new;
|
||||
auto ug = createUnGraph(verts_g, true, true, old2new);
|
||||
const auto ug = make_undirected_graph(verts_g);
|
||||
|
||||
unordered_map<NFAUndirectedVertex, u32> repeatMap;
|
||||
unordered_map<NFAVertex, u32> repeatMap;
|
||||
|
||||
size_t num = connected_components(ug, make_assoc_property_map(repeatMap));
|
||||
DEBUG_PRINTF("found %zu connected repeat components\n", num);
|
||||
@ -312,7 +312,8 @@ void splitSubgraph(const NGHolder &g, const deque<NFAVertex> &verts,
|
||||
vector<ReachSubgraph> rs(num);
|
||||
|
||||
for (auto v : verts) {
|
||||
NFAUndirectedVertex vu = old2new.at(verts_map.at(v));
|
||||
assert(!is_special(v, g));
|
||||
auto vu = verts_map.at(v);
|
||||
auto rit = repeatMap.find(vu);
|
||||
if (rit == repeatMap.end()) {
|
||||
continue; /* not part of a repeat */
|
||||
@ -323,8 +324,14 @@ void splitSubgraph(const NGHolder &g, const deque<NFAVertex> &verts,
|
||||
}
|
||||
|
||||
for (const auto &rsi : rs) {
|
||||
if (rsi.vertices.empty()) {
|
||||
// Empty elements can happen when connected_components finds a
|
||||
// subgraph consisting entirely of specials (which aren't added to
|
||||
// ReachSubgraph in the loop above). There's nothing we can do with
|
||||
// these, so we skip them.
|
||||
continue;
|
||||
}
|
||||
DEBUG_PRINTF("repeat with %zu vertices\n", rsi.vertices.size());
|
||||
assert(!rsi.vertices.empty());
|
||||
if (rsi.vertices.size() >= minNumVertices) {
|
||||
DEBUG_PRINTF("enqueuing\n");
|
||||
q.push(rsi);
|
||||
@ -1023,17 +1030,16 @@ static
|
||||
void buildReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs,
|
||||
const u32 minNumVertices) {
|
||||
const ReachFilter<NGHolder> fil(&g);
|
||||
const RepeatGraph rg(g, fil);
|
||||
const RepeatGraph rg(g, fil, fil);
|
||||
|
||||
if (!isCompBigEnough(rg, minNumVertices)) {
|
||||
DEBUG_PRINTF("component not big enough, bailing\n");
|
||||
return;
|
||||
}
|
||||
|
||||
unordered_map<RepeatGraph::vertex_descriptor, NFAUndirectedVertex> old2new;
|
||||
auto ug = createUnGraph(rg, true, true, old2new);
|
||||
const auto ug = make_undirected_graph(rg);
|
||||
|
||||
unordered_map<NFAUndirectedVertex, u32> repeatMap;
|
||||
unordered_map<NFAVertex, u32> repeatMap;
|
||||
|
||||
unsigned int num;
|
||||
num = connected_components(ug, make_assoc_property_map(repeatMap));
|
||||
@ -1045,8 +1051,7 @@ void buildReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs,
|
||||
rs.resize(num);
|
||||
|
||||
for (auto v : topoOrder) {
|
||||
NFAUndirectedVertex vu = old2new[v];
|
||||
auto rit = repeatMap.find(vu);
|
||||
auto rit = repeatMap.find(v);
|
||||
if (rit == repeatMap.end()) {
|
||||
continue; /* not part of a repeat */
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -87,7 +87,11 @@ private:
|
||||
/** Find the set of characters that are not present in the reachability of
|
||||
* graph \p g after a certain depth (currently 8). If a character in this set
|
||||
* is encountered, it means that the NFA is either dead or has not progressed
|
||||
* more than 8 characters from its start states. */
|
||||
* more than 8 characters from its start states.
|
||||
*
|
||||
* This is only used to guide merging heuristics, use
|
||||
* findLeftOffsetStopAlphabet for real uses.
|
||||
*/
|
||||
CharReach findStopAlphabet(const NGHolder &g, som_type som) {
|
||||
const depth max_depth(MAX_STOP_DEPTH);
|
||||
const InitDepths depths(g);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -47,7 +47,11 @@ class NGHolder;
|
||||
/** Find the set of characters that are not present in the reachability of
|
||||
* graph \p g after a certain depth (currently 8). If a character in this set
|
||||
* is encountered, it means that the NFA is either dead or has not progressed
|
||||
* more than 8 characters from its start states. */
|
||||
* more than 8 characters from its start states.
|
||||
*
|
||||
* This is only used to guide merging heuristics, use
|
||||
* findLeftOffsetStopAlphabet for real uses.
|
||||
*/
|
||||
CharReach findStopAlphabet(const NGHolder &g, som_type som);
|
||||
|
||||
/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then
|
||||
|
@ -1,136 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Create an undirected graph from an NFAGraph.
|
||||
*/
|
||||
|
||||
#ifndef NG_UNDIRECTED_H
|
||||
#define NG_UNDIRECTED_H
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/unordered.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <boost/graph/adjacency_list.hpp>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/**
|
||||
* \brief BGL graph type for the undirected NFA graph.
|
||||
*
|
||||
* Note that we use a set for the out-edge lists: this avoids the construction
|
||||
* of parallel edges. The only vertex property constructed is \a
|
||||
* vertex_index_t.
|
||||
*/
|
||||
using NFAUndirectedGraph = boost::adjacency_list<
|
||||
boost::listS, // out edges
|
||||
boost::listS, // vertices
|
||||
boost::undirectedS, // graph is undirected
|
||||
boost::property<boost::vertex_index_t, size_t>>; // vertex properties
|
||||
|
||||
using NFAUndirectedVertex = NFAUndirectedGraph::vertex_descriptor;
|
||||
|
||||
/**
|
||||
* Make a copy of an NFAGraph with undirected edges, optionally without start
|
||||
* vertices. Mappings from the original graph to the new one are provided.
|
||||
*
|
||||
* Note that new vertex indices are assigned contiguously in \a vertices(g)
|
||||
* order.
|
||||
*/
|
||||
template <typename Graph>
|
||||
NFAUndirectedGraph createUnGraph(const Graph &g,
|
||||
bool excludeStarts,
|
||||
bool excludeAccepts,
|
||||
std::unordered_map<typename Graph::vertex_descriptor,
|
||||
NFAUndirectedVertex> &old2new) {
|
||||
NFAUndirectedGraph ug;
|
||||
size_t idx = 0;
|
||||
|
||||
assert(old2new.empty());
|
||||
old2new.reserve(num_vertices(g));
|
||||
|
||||
for (auto v : ue2::vertices_range(g)) {
|
||||
// skip all accept nodes
|
||||
if (excludeAccepts && is_any_accept(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// skip starts if required
|
||||
if (excludeStarts && is_any_start(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto nuv = boost::add_vertex(ug);
|
||||
old2new.emplace(v, nuv);
|
||||
boost::put(boost::vertex_index, ug, nuv, idx++);
|
||||
}
|
||||
|
||||
// Track seen edges so that we don't insert parallel edges.
|
||||
using Vertex = typename Graph::vertex_descriptor;
|
||||
ue2_unordered_set<std::pair<Vertex, Vertex>> seen;
|
||||
seen.reserve(num_edges(g));
|
||||
auto make_ordered_edge = [](Vertex a, Vertex b) {
|
||||
return std::make_pair(std::min(a, b), std::max(a, b));
|
||||
};
|
||||
|
||||
for (const auto &e : ue2::edges_range(g)) {
|
||||
auto u = source(e, g);
|
||||
auto v = target(e, g);
|
||||
|
||||
if ((excludeAccepts && is_any_accept(u, g))
|
||||
|| (excludeStarts && is_any_start(u, g))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((excludeAccepts && is_any_accept(v, g))
|
||||
|| (excludeStarts && is_any_start(v, g))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!seen.emplace(make_ordered_edge(u, v)).second) {
|
||||
continue; // skip parallel edge.
|
||||
}
|
||||
|
||||
NFAUndirectedVertex new_u = old2new.at(u);
|
||||
NFAUndirectedVertex new_v = old2new.at(v);
|
||||
|
||||
boost::add_edge(new_u, new_v, ug);
|
||||
}
|
||||
|
||||
assert(!has_parallel_edge(ug));
|
||||
return ug;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif /* NG_UNDIRECTED_H */
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -60,6 +60,7 @@
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/graph_small_color_map.h"
|
||||
#include "util/insertion_ordered.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/order_check.h"
|
||||
@ -133,14 +134,21 @@ bool createsTransientLHS(const NGHolder &g, const vector<NFAVertex> &vv,
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts the number of vertices that are reachable from the set of sources
|
||||
* given.
|
||||
*/
|
||||
static
|
||||
double calcSplitRatio(const NGHolder &g, const vector<NFAVertex> &vv) {
|
||||
flat_set<NFAVertex> not_reachable;
|
||||
find_unreachable(g, vv, ¬_reachable);
|
||||
double rv = (double)not_reachable.size() / num_vertices(g);
|
||||
rv = rv > 0.5 ? 1 - rv : rv;
|
||||
size_t count_reachable(const NGHolder &g, const vector<NFAVertex> &sources,
|
||||
small_color_map<decltype(get(vertex_index, g))> &color_map) {
|
||||
auto null_visitor = boost::make_dfs_visitor(boost::null_visitor());
|
||||
color_map.fill(small_color::white);
|
||||
|
||||
return rv;
|
||||
for (auto v : sources) {
|
||||
boost::depth_first_visit(g, v, null_visitor, color_map);
|
||||
}
|
||||
|
||||
return color_map.count(small_color::black);
|
||||
}
|
||||
|
||||
static
|
||||
@ -687,8 +695,12 @@ unique_ptr<VertLitInfo> findBestSplit(const NGHolder &g,
|
||||
}
|
||||
|
||||
if (last_chance) {
|
||||
const size_t num_verts = num_vertices(g);
|
||||
auto color_map = make_small_color_map(g);
|
||||
for (auto &a : lits) {
|
||||
a->split_ratio = calcSplitRatio(g, a->vv);
|
||||
size_t num_reachable = count_reachable(g, a->vv, color_map);
|
||||
double ratio = (double)num_reachable / (double)num_verts;
|
||||
a->split_ratio = ratio > 0.5 ? 1 - ratio : ratio;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -176,11 +176,7 @@ depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter,
|
||||
}
|
||||
|
||||
if (d.is_unreachable()) {
|
||||
// If we're actually reachable, we'll have a min width, so we can
|
||||
// return infinity in this case.
|
||||
if (findMinWidth(h, filter, src).is_reachable()) {
|
||||
return depth::infinity();
|
||||
}
|
||||
assert(findMinWidth(h, filter, src).is_unreachable());
|
||||
return d;
|
||||
}
|
||||
|
||||
|
376
src/parser/logical_combination.cpp
Normal file
376
src/parser/logical_combination.cpp
Normal file
@ -0,0 +1,376 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Parse and build ParsedLogical::logicalTree and combInfoMap.
|
||||
*/
|
||||
#include "logical_combination.h"
|
||||
#include "parser/parse_error.h"
|
||||
#include "util/container.h"
|
||||
#include "hs_compile.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
u32 ParsedLogical::getLogicalKey(u32 a) {
|
||||
auto it = toLogicalKeyMap.find(a);
|
||||
if (it == toLogicalKeyMap.end()) {
|
||||
// get size before assigning to avoid wacky LHS shenanigans
|
||||
u32 size = toLogicalKeyMap.size();
|
||||
bool inserted;
|
||||
tie(it, inserted) = toLogicalKeyMap.emplace(a, size);
|
||||
assert(inserted);
|
||||
}
|
||||
DEBUG_PRINTF("%u -> lkey %u\n", it->first, it->second);
|
||||
return it->second;
|
||||
}
|
||||
|
||||
u32 ParsedLogical::getCombKey(u32 a) {
|
||||
auto it = toCombKeyMap.find(a);
|
||||
if (it == toCombKeyMap.end()) {
|
||||
u32 size = toCombKeyMap.size();
|
||||
bool inserted;
|
||||
tie(it, inserted) = toCombKeyMap.emplace(a, size);
|
||||
assert(inserted);
|
||||
}
|
||||
DEBUG_PRINTF("%u -> ckey %u\n", it->first, it->second);
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void ParsedLogical::addRelateCKey(u32 lkey, u32 ckey) {
|
||||
auto it = lkey2ckeys.find(lkey);
|
||||
if (it == lkey2ckeys.end()) {
|
||||
bool inserted;
|
||||
tie(it, inserted) = lkey2ckeys.emplace(lkey, set<u32>());
|
||||
assert(inserted);
|
||||
}
|
||||
it->second.insert(ckey);
|
||||
DEBUG_PRINTF("lkey %u belongs to combination key %u\n",
|
||||
it->first, ckey);
|
||||
}
|
||||
|
||||
#define TRY_RENUM_OP(ckey) \
|
||||
do { \
|
||||
if (ckey & LOGICAL_OP_BIT) { \
|
||||
ckey = (ckey & ~LOGICAL_OP_BIT) + toLogicalKeyMap.size(); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
u32 ParsedLogical::logicalTreeAdd(u32 op, u32 left, u32 right) {
|
||||
LogicalOp lop;
|
||||
assert((LOGICAL_OP_BIT & (u32)logicalTree.size()) == 0);
|
||||
lop.id = LOGICAL_OP_BIT | (u32)logicalTree.size();
|
||||
lop.op = op;
|
||||
lop.lo = left;
|
||||
lop.ro = right;
|
||||
logicalTree.push_back(lop);
|
||||
return lop.id;
|
||||
}
|
||||
|
||||
void ParsedLogical::combinationInfoAdd(UNUSED u32 ckey, u32 id, u32 ekey,
|
||||
u32 lkey_start, u32 lkey_result,
|
||||
u64a min_offset, u64a max_offset) {
|
||||
assert(ckey == combInfoMap.size());
|
||||
CombInfo ci;
|
||||
ci.id = id;
|
||||
ci.ekey = ekey;
|
||||
ci.start = lkey_start;
|
||||
ci.result = lkey_result;
|
||||
ci.min_offset = min_offset;
|
||||
ci.max_offset = max_offset;
|
||||
combInfoMap.push_back(ci);
|
||||
|
||||
DEBUG_PRINTF("ckey %u (id %u) -> lkey %u..%u, ekey=0x%x\n", ckey, ci.id,
|
||||
ci.start, ci.result, ci.ekey);
|
||||
}
|
||||
|
||||
void ParsedLogical::validateSubIDs(const unsigned *ids,
|
||||
const char *const *expressions,
|
||||
const unsigned *flags,
|
||||
unsigned elements) {
|
||||
for (const auto &it : toLogicalKeyMap) {
|
||||
bool unknown = true;
|
||||
u32 i = 0;
|
||||
for (i = 0; i < elements; i++) {
|
||||
if ((ids ? ids[i] : 0) == it.first) {
|
||||
unknown = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (unknown) {
|
||||
throw CompileError("Unknown sub-expression id.");
|
||||
}
|
||||
if (contains(toCombKeyMap, it.first)) {
|
||||
throw CompileError("Have combination of combination.");
|
||||
}
|
||||
if (flags && (flags[i] & HS_FLAG_SOM_LEFTMOST)) {
|
||||
throw CompileError("Have SOM flag in sub-expression.");
|
||||
}
|
||||
if (flags && (flags[i] & HS_FLAG_PREFILTER)) {
|
||||
throw CompileError("Have PREFILTER flag in sub-expression.");
|
||||
}
|
||||
hs_compile_error_t *compile_err = NULL;
|
||||
hs_expr_info_t *info = NULL;
|
||||
hs_error_t err = hs_expression_info(expressions[i], flags[i], &info,
|
||||
&compile_err);
|
||||
if (err != HS_SUCCESS) {
|
||||
hs_free_compile_error(compile_err);
|
||||
throw CompileError("Run hs_expression_info() failed.");
|
||||
}
|
||||
if (!info) {
|
||||
throw CompileError("Get hs_expr_info_t failed.");
|
||||
} else {
|
||||
if (info->unordered_matches) {
|
||||
throw CompileError("Have unordered match in sub-expressions.");
|
||||
}
|
||||
free(info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ParsedLogical::logicalKeyRenumber() {
|
||||
// renumber operation lkey in op vector
|
||||
for (auto &op : logicalTree) {
|
||||
TRY_RENUM_OP(op.id);
|
||||
TRY_RENUM_OP(op.lo);
|
||||
TRY_RENUM_OP(op.ro);
|
||||
}
|
||||
// renumber operation lkey in info map
|
||||
for (auto &ci : combInfoMap) {
|
||||
TRY_RENUM_OP(ci.start);
|
||||
TRY_RENUM_OP(ci.result);
|
||||
}
|
||||
}
|
||||
|
||||
struct LogicalOperator {
|
||||
LogicalOperator(u32 op_in, u32 paren_in)
|
||||
: op(op_in), paren(paren_in) {}
|
||||
u32 op;
|
||||
u32 paren;
|
||||
};
|
||||
|
||||
static
|
||||
u32 toOperator(char c) {
|
||||
u32 op = UNKNOWN_OP;
|
||||
switch (c) {
|
||||
case '!' :
|
||||
op = LOGICAL_OP_NOT;
|
||||
break;
|
||||
case '&' :
|
||||
op = LOGICAL_OP_AND;
|
||||
break;
|
||||
case '|' :
|
||||
op = LOGICAL_OP_OR;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
};
|
||||
return op;
|
||||
}
|
||||
|
||||
static
|
||||
bool cmpOperator(const LogicalOperator &op1, const LogicalOperator &op2) {
|
||||
if (op1.paren < op2.paren) {
|
||||
return false;
|
||||
}
|
||||
if (op1.paren > op2.paren) {
|
||||
return true;
|
||||
}
|
||||
assert(op1.paren == op2.paren);
|
||||
if (op1.op > op2.op) {
|
||||
return false;
|
||||
}
|
||||
if (op1.op < op2.op) {
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
u32 fetchSubID(const char *logical, u32 &digit, u32 end) {
|
||||
if (digit == (u32)-1) { // no digit parsing in progress
|
||||
return (u32)-1;
|
||||
}
|
||||
assert(end > digit);
|
||||
if (end - digit > 9) {
|
||||
throw LocatedParseError("Expression id too large");
|
||||
}
|
||||
u32 mult = 1;
|
||||
u32 sum = 0;
|
||||
for (u32 j = end - 1; (j >= digit) && (j != (u32)-1) ; j--) {
|
||||
assert(isdigit(logical[j]));
|
||||
sum += (logical[j] - '0') * mult;
|
||||
mult *= 10;
|
||||
}
|
||||
digit = (u32)-1;
|
||||
return sum;
|
||||
}
|
||||
|
||||
static
|
||||
void popOperator(vector<LogicalOperator> &op_stack, vector<u32> &subid_stack,
|
||||
ParsedLogical &pl) {
|
||||
if (subid_stack.empty()) {
|
||||
throw LocatedParseError("Not enough operand");
|
||||
}
|
||||
u32 right = subid_stack.back();
|
||||
subid_stack.pop_back();
|
||||
u32 left = 0;
|
||||
if (op_stack.back().op != LOGICAL_OP_NOT) {
|
||||
if (subid_stack.empty()) {
|
||||
throw LocatedParseError("Not enough operand");
|
||||
}
|
||||
left = subid_stack.back();
|
||||
subid_stack.pop_back();
|
||||
}
|
||||
subid_stack.push_back(pl.logicalTreeAdd(op_stack.back().op, left, right));
|
||||
op_stack.pop_back();
|
||||
}
|
||||
|
||||
static
|
||||
char getValue(const vector<char> &lv, u32 ckey) {
|
||||
if (ckey & LOGICAL_OP_BIT) {
|
||||
return lv[ckey & ~LOGICAL_OP_BIT];
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool hasMatchFromPurelyNegative(const vector<LogicalOp> &tree,
|
||||
u32 start, u32 result) {
|
||||
vector<char> lv(tree.size());
|
||||
assert(start <= result);
|
||||
for (u32 i = start; i <= result; i++) {
|
||||
assert(i & LOGICAL_OP_BIT);
|
||||
const LogicalOp &op = tree[i & ~LOGICAL_OP_BIT];
|
||||
assert(i == op.id);
|
||||
switch (op.op) {
|
||||
case LOGICAL_OP_NOT:
|
||||
lv[op.id & ~LOGICAL_OP_BIT] = !getValue(lv, op.ro);
|
||||
break;
|
||||
case LOGICAL_OP_AND:
|
||||
lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) &
|
||||
getValue(lv, op.ro);
|
||||
break;
|
||||
case LOGICAL_OP_OR:
|
||||
lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) |
|
||||
getValue(lv, op.ro);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return lv[result & ~LOGICAL_OP_BIT];
|
||||
}
|
||||
|
||||
void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical,
|
||||
u32 ekey, u64a min_offset,
|
||||
u64a max_offset) {
|
||||
u32 ckey = getCombKey(id);
|
||||
vector<LogicalOperator> op_stack;
|
||||
vector<u32> subid_stack;
|
||||
u32 lkey_start = INVALID_LKEY; // logical operation's lkey
|
||||
u32 paren = 0; // parentheses
|
||||
u32 digit = (u32)-1; // digit start offset, invalid offset is -1
|
||||
u32 subid = (u32)-1;
|
||||
u32 i;
|
||||
try {
|
||||
for (i = 0; logical[i]; i++) {
|
||||
if (isdigit(logical[i])) {
|
||||
if (digit == (u32)-1) { // new digit start
|
||||
digit = i;
|
||||
}
|
||||
} else {
|
||||
if ((subid = fetchSubID(logical, digit, i)) != (u32)-1) {
|
||||
subid_stack.push_back(getLogicalKey(subid));
|
||||
addRelateCKey(subid_stack.back(), ckey);
|
||||
}
|
||||
if (logical[i] == ' ') { // skip whitespace
|
||||
continue;
|
||||
}
|
||||
if (logical[i] == '(') {
|
||||
paren += 1;
|
||||
} else if (logical[i] == ')') {
|
||||
if (paren <= 0) {
|
||||
throw LocatedParseError("Not enough left parentheses");
|
||||
}
|
||||
paren -= 1;
|
||||
} else {
|
||||
u32 prio = toOperator(logical[i]);
|
||||
if (prio != UNKNOWN_OP) {
|
||||
LogicalOperator op(prio, paren);
|
||||
while (!op_stack.empty()
|
||||
&& cmpOperator(op_stack.back(), op)) {
|
||||
popOperator(op_stack, subid_stack, *this);
|
||||
if (lkey_start == INVALID_LKEY) {
|
||||
lkey_start = subid_stack.back();
|
||||
}
|
||||
}
|
||||
op_stack.push_back(op);
|
||||
} else {
|
||||
throw LocatedParseError("Unknown character");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (paren != 0) {
|
||||
throw LocatedParseError("Not enough right parentheses");
|
||||
}
|
||||
if ((subid = fetchSubID(logical, digit, i)) != (u32)-1) {
|
||||
subid_stack.push_back(getLogicalKey(subid));
|
||||
addRelateCKey(subid_stack.back(), ckey);
|
||||
}
|
||||
while (!op_stack.empty()) {
|
||||
popOperator(op_stack, subid_stack, *this);
|
||||
if (lkey_start == INVALID_LKEY) {
|
||||
lkey_start = subid_stack.back();
|
||||
}
|
||||
}
|
||||
if (subid_stack.size() != 1) {
|
||||
throw LocatedParseError("Not enough operator");
|
||||
}
|
||||
} catch (LocatedParseError &error) {
|
||||
error.locate(i);
|
||||
throw;
|
||||
}
|
||||
u32 lkey_result = subid_stack.back(); // logical operation's lkey
|
||||
if (lkey_start == INVALID_LKEY) {
|
||||
throw CompileError("No logical operation.");
|
||||
}
|
||||
if (hasMatchFromPurelyNegative(logicalTree, lkey_start, lkey_result)) {
|
||||
throw CompileError("Has match from purely negative sub-expressions.");
|
||||
}
|
||||
combinationInfoAdd(ckey, id, ekey, lkey_start, lkey_result,
|
||||
min_offset, max_offset);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
112
src/parser/logical_combination.h
Normal file
112
src/parser/logical_combination.h
Normal file
@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Parse and build ParsedLogical::logicalTree and combInfoMap.
|
||||
*/
|
||||
|
||||
#ifndef LOGICAL_COMBINATION_H
|
||||
#define LOGICAL_COMBINATION_H
|
||||
|
||||
#include "util/logical.h"
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class ParsedLogical {
|
||||
friend class ReportManager;
|
||||
public:
|
||||
/** \brief Parse 1 logical expression \a logical, assign temporary ckey. */
|
||||
void parseLogicalCombination(unsigned id, const char *logical, u32 ekey,
|
||||
u64a min_offset, u64a max_offset);
|
||||
|
||||
/** \brief Check if all sub-expression id in combinations are valid. */
|
||||
void validateSubIDs(const unsigned *ids, const char *const *expressions,
|
||||
const unsigned *flags, unsigned elements);
|
||||
|
||||
/** \brief Renumber and assign final lkey for each logical operation
|
||||
* after parsed all logical expressions. */
|
||||
void logicalKeyRenumber();
|
||||
|
||||
/** \brief Fetch the lkey associated with the given expression id,
|
||||
* assigning one if necessary. */
|
||||
u32 getLogicalKey(u32 expressionId);
|
||||
|
||||
/** \brief Fetch the ckey associated with the given expression id,
|
||||
* assigning one if necessary. */
|
||||
u32 getCombKey(u32 expressionId);
|
||||
|
||||
/** \brief Add lkey's corresponding combination id. */
|
||||
void addRelateCKey(u32 lkey, u32 ckey);
|
||||
|
||||
/** \brief Add one Logical Operation. */
|
||||
u32 logicalTreeAdd(u32 op, u32 left, u32 right);
|
||||
|
||||
/** \brief Assign the combination info associated with the given ckey. */
|
||||
void combinationInfoAdd(u32 ckey, u32 id, u32 ekey, u32 lkey_start,
|
||||
u32 lkey_result, u64a min_offset, u64a max_offset);
|
||||
|
||||
const std::map<u32, u32> &getLkeyMap() const {
|
||||
return toLogicalKeyMap;
|
||||
}
|
||||
|
||||
const std::vector<LogicalOp> &getLogicalTree() const {
|
||||
return logicalTree;
|
||||
}
|
||||
|
||||
CombInfo getCombInfoById(u32 id) const {
|
||||
u32 ckey = toCombKeyMap.at(id);
|
||||
assert(ckey < combInfoMap.size());
|
||||
return combInfoMap.at(ckey);
|
||||
}
|
||||
|
||||
private:
|
||||
/** \brief Mapping from ckey to combination info. */
|
||||
std::vector<CombInfo> combInfoMap;
|
||||
|
||||
/** \brief Mapping from combination expression id to combination key,
|
||||
* combination key is used in combination bit-vector cache. */
|
||||
std::map<u32, u32> toCombKeyMap;
|
||||
|
||||
/** \brief Mapping from expression id to logical key, logical key is used
|
||||
* as index in LogicalOp array. */
|
||||
std::map<u32, u32> toLogicalKeyMap;
|
||||
|
||||
/** \brief Mapping from logical key to related combination keys. */
|
||||
std::map<u32, std::set<u32>> lkey2ckeys;
|
||||
|
||||
/** \brief Logical constraints, each operation from postfix notation. */
|
||||
std::vector<LogicalOp> logicalTree;
|
||||
};
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -199,7 +199,7 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &pe) {
|
||||
|
||||
DEBUG_PRINTF("constructed literal %s\n", dumpString(lit).c_str());
|
||||
return ng.addLiteral(lit, expr.index, expr.report, expr.highlander,
|
||||
expr.som);
|
||||
expr.som, expr.quiet);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -60,12 +60,11 @@ bool isAllowedCodepoint(u32 val) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isValidUtf8(const char *expression) {
|
||||
bool isValidUtf8(const char *expression, const size_t len) {
|
||||
if (!expression) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const size_t len = strlen(expression);
|
||||
const u8 *s = (const u8 *)expression;
|
||||
u32 val;
|
||||
|
||||
|
@ -29,10 +29,12 @@
|
||||
#ifndef PARSER_UTF8_VALIDATE_H
|
||||
#define PARSER_UTF8_VALIDATE_H
|
||||
|
||||
#include <cstddef> // size_t
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Validate that the given expression is well-formed UTF-8. */
|
||||
bool isValidUtf8(const char *expression);
|
||||
bool isValidUtf8(const char *expression, const size_t len);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
90
src/report.h
90
src/report.h
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -42,6 +42,7 @@
|
||||
#include "rose/runtime.h"
|
||||
#include "som/som_runtime.h"
|
||||
#include "util/exhaust.h"
|
||||
#include "util/logical.h"
|
||||
#include "util/fatbit.h"
|
||||
|
||||
enum DedupeResult {
|
||||
@ -151,6 +152,93 @@ void clearEvec(const struct RoseEngine *rose, char *evec) {
|
||||
mmbit_clear((u8 *)evec, rose->ekeyCount);
|
||||
}
|
||||
|
||||
/** \brief Test whether the given key (\a lkey) is set in the logical vector
|
||||
* \a lvec. */
|
||||
static really_inline
|
||||
char getLogicalVal(const struct RoseEngine *rose, const char *lvec, u32 lkey) {
|
||||
DEBUG_PRINTF("checking lkey matching %p %u\n", lvec, lkey);
|
||||
assert(lkey != INVALID_LKEY);
|
||||
assert(lkey < rose->lkeyCount + rose->lopCount);
|
||||
return mmbit_isset((const u8 *)lvec, rose->lkeyCount + rose->lopCount,
|
||||
lkey);
|
||||
}
|
||||
|
||||
/** \brief Mark key \a lkey on in the logical vector. */
|
||||
static really_inline
|
||||
void setLogicalVal(const struct RoseEngine *rose, char *lvec, u32 lkey,
|
||||
char val) {
|
||||
DEBUG_PRINTF("marking as matched logical key %u\n", lkey);
|
||||
assert(lkey != INVALID_LKEY);
|
||||
assert(lkey < rose->lkeyCount + rose->lopCount);
|
||||
switch (val) {
|
||||
case 0:
|
||||
mmbit_unset((u8 *)lvec, rose->lkeyCount + rose->lopCount, lkey);
|
||||
break;
|
||||
default:
|
||||
mmbit_set((u8 *)lvec, rose->lkeyCount + rose->lopCount, lkey);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/** \brief Mark key \a ckey on in the combination vector. */
|
||||
static really_inline
|
||||
void setCombinationActive(const struct RoseEngine *rose, char *cvec, u32 ckey) {
|
||||
DEBUG_PRINTF("marking as active combination key %u\n", ckey);
|
||||
assert(ckey != INVALID_CKEY);
|
||||
assert(ckey < rose->ckeyCount);
|
||||
mmbit_set((u8 *)cvec, rose->ckeyCount, ckey);
|
||||
}
|
||||
|
||||
/** \brief Returns 1 if compliant to all logical combinations. */
|
||||
static really_inline
|
||||
char isLogicalCombination(const struct RoseEngine *rose, char *lvec,
|
||||
u32 start, u32 result) {
|
||||
const struct LogicalOp *logicalTree = (const struct LogicalOp *)
|
||||
((const char *)rose + rose->logicalTreeOffset);
|
||||
assert(start >= rose->lkeyCount);
|
||||
assert(start <= result);
|
||||
assert(result < rose->lkeyCount + rose->lopCount);
|
||||
for (u32 i = start; i <= result; i++) {
|
||||
const struct LogicalOp *op = logicalTree + (i - rose->lkeyCount);
|
||||
assert(i == op->id);
|
||||
assert(op->op <= LAST_LOGICAL_OP);
|
||||
switch ((enum LogicalOpType)op->op) {
|
||||
case LOGICAL_OP_NOT:
|
||||
setLogicalVal(rose, lvec, op->id,
|
||||
!getLogicalVal(rose, lvec, op->ro));
|
||||
break;
|
||||
case LOGICAL_OP_AND:
|
||||
setLogicalVal(rose, lvec, op->id,
|
||||
getLogicalVal(rose, lvec, op->lo) &
|
||||
getLogicalVal(rose, lvec, op->ro)); // &&
|
||||
break;
|
||||
case LOGICAL_OP_OR:
|
||||
setLogicalVal(rose, lvec, op->id,
|
||||
getLogicalVal(rose, lvec, op->lo) |
|
||||
getLogicalVal(rose, lvec, op->ro)); // ||
|
||||
break;
|
||||
}
|
||||
}
|
||||
return getLogicalVal(rose, lvec, result);
|
||||
}
|
||||
|
||||
/** \brief Clear all keys in the logical vector. */
|
||||
static really_inline
|
||||
void clearLvec(const struct RoseEngine *rose, char *lvec, char *cvec) {
|
||||
DEBUG_PRINTF("clearing lvec %p %u\n", lvec,
|
||||
rose->lkeyCount + rose->lopCount);
|
||||
DEBUG_PRINTF("clearing cvec %p %u\n", cvec, rose->ckeyCount);
|
||||
mmbit_clear((u8 *)lvec, rose->lkeyCount + rose->lopCount);
|
||||
mmbit_clear((u8 *)cvec, rose->ckeyCount);
|
||||
}
|
||||
|
||||
/** \brief Clear all keys in the combination vector. */
|
||||
static really_inline
|
||||
void clearCvec(const struct RoseEngine *rose, char *cvec) {
|
||||
DEBUG_PRINTF("clearing cvec %p %u\n", cvec, rose->ckeyCount);
|
||||
mmbit_clear((u8 *)cvec, rose->ckeyCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Deliver the given report to the user callback.
|
||||
*
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -145,6 +145,7 @@ void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch,
|
||||
tctxt->lastEndOffset = 0;
|
||||
tctxt->filledDelayedSlots = 0;
|
||||
tctxt->lastMatchOffset = 0;
|
||||
tctxt->lastCombMatchOffset = 0;
|
||||
tctxt->minMatchOffset = 0;
|
||||
tctxt->minNonMpvMatchOffset = 0;
|
||||
tctxt->next_mpv_offset = 0;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -424,6 +424,12 @@ hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc,
|
||||
}
|
||||
|
||||
done:
|
||||
if (t->flushCombProgramOffset) {
|
||||
if (roseRunFlushCombProgram(t, scratch, mpv_exec_end)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
updateMinMatchOffsetFromMpv(&scratch->tctxt, mpv_exec_end);
|
||||
scratch->tctxt.next_mpv_offset
|
||||
= MAX(next_pos_match_loc + scratch->core_info.buf_offset,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -51,6 +51,7 @@
|
||||
#include "hwlm/hwlm.h"
|
||||
#include "runtime.h"
|
||||
#include "scratch.h"
|
||||
#include "rose.h"
|
||||
#include "rose_common.h"
|
||||
#include "rose_internal.h"
|
||||
#include "ue2common.h"
|
||||
@ -105,6 +106,12 @@ hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, s64a loc,
|
||||
assert(!can_stop_matching(scratch));
|
||||
|
||||
if (canSkipCatchUpMPV(t, scratch, cur_offset)) {
|
||||
if (t->flushCombProgramOffset) {
|
||||
if (roseRunFlushCombProgram(t, scratch, cur_offset)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
updateMinMatchOffsetFromMpv(&scratch->tctxt, cur_offset);
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
@ -139,6 +146,12 @@ hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t,
|
||||
hwlmcb_rv_t rv;
|
||||
if (!t->activeArrayCount
|
||||
|| !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
|
||||
if (t->flushCombProgramOffset) {
|
||||
if (roseRunFlushCombProgram(t, scratch, end)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
updateMinMatchOffset(&scratch->tctxt, end);
|
||||
rv = HWLM_CONTINUE_MATCHING;
|
||||
} else {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -571,6 +571,22 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Execute a flush combination program.
|
||||
*
|
||||
* Returns MO_HALT_MATCHING if the stream is exhausted or the user has
|
||||
* instructed us to halt, or MO_CONTINUE_MATCHING otherwise.
|
||||
*/
|
||||
int roseRunFlushCombProgram(const struct RoseEngine *rose,
|
||||
struct hs_scratch *scratch, u64a end) {
|
||||
hwlmcb_rv_t rv = roseRunProgram(rose, scratch, rose->flushCombProgramOffset,
|
||||
0, end, 0);
|
||||
if (rv == HWLM_TERMINATE_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) {
|
||||
struct hs_scratch *scratch = context;
|
||||
assert(scratch && scratch->magic == SCRATCH_MAGIC);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -66,6 +66,7 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t,
|
||||
u64a top_squash_distance, u64a end,
|
||||
char in_catchup);
|
||||
|
||||
/** \brief Initialize the queue for a suffix/outfix engine. */
|
||||
static really_inline
|
||||
void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t,
|
||||
struct hs_scratch *scratch) {
|
||||
@ -90,6 +91,7 @@ void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t,
|
||||
info->stateOffset, *(u32 *)q->state);
|
||||
}
|
||||
|
||||
/** \brief Initialize the queue for a leftfix (prefix/infix) engine. */
|
||||
static really_inline
|
||||
void initRoseQueue(const struct RoseEngine *t, u32 qi,
|
||||
const struct LeftNfaInfo *left,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -501,8 +501,7 @@ hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch,
|
||||
}
|
||||
|
||||
/* catches up engines enough to ensure any earlier mpv triggers are enqueued
|
||||
* and then adds the trigger to the mpv queue. Must not be called during catch
|
||||
* up */
|
||||
* and then adds the trigger to the mpv queue. */
|
||||
static rose_inline
|
||||
hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t,
|
||||
struct hs_scratch *scratch,
|
||||
@ -558,6 +557,22 @@ void roseHandleSomSom(struct hs_scratch *scratch,
|
||||
setSomFromSomAware(scratch, sr, start, end);
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
hwlmcb_rv_t roseSetExhaust(const struct RoseEngine *t,
|
||||
struct hs_scratch *scratch, u32 ekey) {
|
||||
assert(scratch);
|
||||
assert(scratch->magic == SCRATCH_MAGIC);
|
||||
|
||||
struct core_info *ci = &scratch->core_info;
|
||||
|
||||
assert(!can_stop_matching(scratch));
|
||||
assert(!isExhausted(ci->rose, ci->exhaustionVector, ekey));
|
||||
|
||||
markAsMatched(ci->rose, ci->exhaustionVector, ekey);
|
||||
|
||||
return roseHaltIfExhausted(t, scratch);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int reachHasBit(const u8 *reach, u8 c) {
|
||||
return !!(reach[c / 8U] & (u8)1U << (c % 8U));
|
||||
@ -1823,6 +1838,56 @@ void updateSeqPoint(struct RoseContext *tctxt, u64a offset,
|
||||
}
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t,
|
||||
struct hs_scratch *scratch) {
|
||||
u8 *cvec = (u8 *)scratch->core_info.combVector;
|
||||
if (!mmbit_any(cvec, t->ckeyCount)) {
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
u64a end = scratch->tctxt.lastCombMatchOffset;
|
||||
for (u32 i = mmbit_iterate(cvec, t->ckeyCount, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(cvec, t->ckeyCount, i)) {
|
||||
const struct CombInfo *combInfoMap = (const struct CombInfo *)
|
||||
((const char *)t + t->combInfoMapOffset);
|
||||
const struct CombInfo *ci = combInfoMap + i;
|
||||
if ((ci->min_offset != 0) && (end < ci->min_offset)) {
|
||||
DEBUG_PRINTF("halt: before min_offset=%llu\n", ci->min_offset);
|
||||
continue;
|
||||
}
|
||||
if ((ci->max_offset != MAX_OFFSET) && (end > ci->max_offset)) {
|
||||
DEBUG_PRINTF("halt: after max_offset=%llu\n", ci->max_offset);
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("check ekey %u\n", ci->ekey);
|
||||
if (ci->ekey != INVALID_EKEY) {
|
||||
assert(ci->ekey < t->ekeyCount);
|
||||
const char *evec = scratch->core_info.exhaustionVector;
|
||||
if (isExhausted(t, evec, ci->ekey)) {
|
||||
DEBUG_PRINTF("ekey %u already set, match is exhausted\n",
|
||||
ci->ekey);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("check ckey %u\n", i);
|
||||
char *lvec = scratch->core_info.logicalVector;
|
||||
if (!isLogicalCombination(t, lvec, ci->start, ci->result)) {
|
||||
DEBUG_PRINTF("Logical Combination Failed!\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("Logical Combination Passed!\n");
|
||||
if (roseReport(t, scratch, end, ci->id, 0,
|
||||
ci->ekey) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
clearCvec(t, (char *)cvec);
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
#define PROGRAM_CASE(name) \
|
||||
case ROSE_INSTR_##name: { \
|
||||
DEBUG_PRINTF("instruction: " #name " (pc=%u)\n", \
|
||||
@ -2588,6 +2653,47 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(SET_LOGICAL) {
|
||||
DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n",
|
||||
ri->lkey, ri->offset_adjust);
|
||||
assert(ri->lkey != INVALID_LKEY);
|
||||
assert(ri->lkey < t->lkeyCount);
|
||||
char *lvec = scratch->core_info.logicalVector;
|
||||
setLogicalVal(t, lvec, ri->lkey, 1);
|
||||
updateLastCombMatchOffset(tctxt, end + ri->offset_adjust);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(SET_COMBINATION) {
|
||||
DEBUG_PRINTF("set ckey %u as active\n", ri->ckey);
|
||||
assert(ri->ckey != INVALID_CKEY);
|
||||
assert(ri->ckey < t->ckeyCount);
|
||||
char *cvec = scratch->core_info.combVector;
|
||||
setCombinationActive(t, cvec, ri->ckey);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(FLUSH_COMBINATION) {
|
||||
assert(end >= tctxt->lastCombMatchOffset);
|
||||
if (end > tctxt->lastCombMatchOffset) {
|
||||
if (flushActiveCombinations(t, scratch)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(SET_EXHAUST) {
|
||||
updateSeqPoint(tctxt, end, from_mpv);
|
||||
if (roseSetExhaust(t, scratch, ri->ekey)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
work_done = 1;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -53,4 +53,7 @@ int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context);
|
||||
int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
|
||||
u64a stream_offset, struct hs_scratch *scratch);
|
||||
|
||||
int roseRunFlushCombProgram(const struct RoseEngine *rose,
|
||||
struct hs_scratch *scratch, u64a end);
|
||||
|
||||
#endif // ROSE_H
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -453,7 +453,7 @@ RoseVertex tryForAnchoredVertex(RoseBuildImpl *tbi,
|
||||
<= tbi->cc.grey.maxAnchoredRegion) {
|
||||
if (ep.maxBound || ep.minBound) {
|
||||
/* TODO: handle, however these cases are not generated currently by
|
||||
ng_rose */
|
||||
ng_violet */
|
||||
return RoseGraph::null_vertex();
|
||||
}
|
||||
max_width = depth(ep.maxBound + iv_info.s.length());
|
||||
@ -567,7 +567,7 @@ void doRoseLiteralVertex(RoseBuildImpl *tbi, bool use_eod_table,
|
||||
assert(iv_info.type == RIV_LITERAL);
|
||||
assert(!parents.empty()); /* start vertices should not be here */
|
||||
|
||||
// ng_rose should have ensured that mixed-sensitivity literals are no
|
||||
// ng_violet should have ensured that mixed-sensitivity literals are no
|
||||
// longer than the benefits max width.
|
||||
assert(iv_info.s.length() <= MAX_MASK2_WIDTH ||
|
||||
!mixed_sensitivity(iv_info.s));
|
||||
@ -1849,13 +1849,12 @@ bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out,
|
||||
return true; /* failure is not yet an option */
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
bool prepAcceptForAddAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &w,
|
||||
u32 max_adj, NFAVertex u,
|
||||
NFAVertex u,
|
||||
const vector<DepthMinMax> &vertexDepths,
|
||||
map<u32, DepthMinMax> &depthMap,
|
||||
map<NFAVertex, set<u32> > &reportMap,
|
||||
map<NFAVertex, set<u32>> &reportMap,
|
||||
map<ReportID, u32> &allocated_reports,
|
||||
flat_set<u32> &added_lit_ids) {
|
||||
const depth max_anchored_depth(tbi.cc.grey.maxAnchoredRegion);
|
||||
@ -1883,9 +1882,9 @@ bool prepAcceptForAddAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &w,
|
||||
depthMap[lit_id] = unionDepthMinMax(depthMap[lit_id], d);
|
||||
}
|
||||
|
||||
if (depthMap[lit_id].max + depth(max_adj) > max_anchored_depth) {
|
||||
if (depthMap[lit_id].max > max_anchored_depth) {
|
||||
DEBUG_PRINTF("depth=%s exceeds maxAnchoredRegion=%u\n",
|
||||
(depthMap[lit_id].max + depth(max_adj)).str().c_str(),
|
||||
depthMap[lit_id].max.str().c_str(),
|
||||
tbi.cc.grey.maxAnchoredRegion);
|
||||
return false;
|
||||
}
|
||||
@ -1932,7 +1931,7 @@ bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) {
|
||||
flat_set<u32> added_lit_ids; /* literal ids added for this NFA */
|
||||
|
||||
for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
|
||||
if (!prepAcceptForAddAnchoredNFA(*this, h, 0, v, vertexDepths, depthMap,
|
||||
if (!prepAcceptForAddAnchoredNFA(*this, h, v, vertexDepths, depthMap,
|
||||
reportMap, allocated_reports,
|
||||
added_lit_ids)) {
|
||||
removeAddedLiterals(*this, added_lit_ids);
|
||||
@ -1946,7 +1945,7 @@ bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) {
|
||||
if (v == h.accept) {
|
||||
continue;
|
||||
}
|
||||
if (!prepAcceptForAddAnchoredNFA(*this, h, 0, v, vertexDepths, depthMap,
|
||||
if (!prepAcceptForAddAnchoredNFA(*this, h, v, vertexDepths, depthMap,
|
||||
reportMap, allocated_reports_eod,
|
||||
added_lit_ids)) {
|
||||
removeAddedLiterals(*this, added_lit_ids);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -426,6 +426,17 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount,
|
||||
curr_offset += mmbit_size(build.rm.numEkeys());
|
||||
so->exhausted_size = mmbit_size(build.rm.numEkeys());
|
||||
|
||||
// Logical multibit.
|
||||
so->logicalVec = curr_offset;
|
||||
so->logicalVec_size = mmbit_size(build.rm.numLogicalKeys() +
|
||||
build.rm.numLogicalOps());
|
||||
curr_offset += so->logicalVec_size;
|
||||
|
||||
// Combination multibit.
|
||||
so->combVec = curr_offset;
|
||||
so->combVec_size = mmbit_size(build.rm.numCkeys());
|
||||
curr_offset += so->combVec_size;
|
||||
|
||||
// SOM locations and valid/writeable multibit structures.
|
||||
if (build.ssm.numSomSlots()) {
|
||||
const u32 somWidth = build.ssm.somPrecision();
|
||||
@ -2469,6 +2480,18 @@ void writeLeftInfo(RoseEngineBlob &engine_blob, RoseEngine &proto,
|
||||
proto.rosePrefixCount = countRosePrefixes(leftInfoTable);
|
||||
}
|
||||
|
||||
static
|
||||
void writeLogicalInfo(const ReportManager &rm, RoseEngineBlob &engine_blob,
|
||||
RoseEngine &proto) {
|
||||
const auto &tree = rm.getLogicalTree();
|
||||
proto.logicalTreeOffset = engine_blob.add_range(tree);
|
||||
const auto &combMap = rm.getCombInfoMap();
|
||||
proto.combInfoMapOffset = engine_blob.add_range(combMap);
|
||||
proto.lkeyCount = rm.numLogicalKeys();
|
||||
proto.lopCount = rm.numLogicalOps();
|
||||
proto.ckeyCount = rm.numCkeys();
|
||||
}
|
||||
|
||||
static
|
||||
void writeNfaInfo(const RoseBuildImpl &build, build_context &bc,
|
||||
RoseEngine &proto, const set<u32> &no_retrigger_queues) {
|
||||
@ -3313,6 +3336,15 @@ RoseProgram makeEodProgram(const RoseBuildImpl &build, build_context &bc,
|
||||
return program;
|
||||
}
|
||||
|
||||
static
|
||||
RoseProgram makeFlushCombProgram(const RoseEngine &t) {
|
||||
RoseProgram program;
|
||||
if (t.ckeyCount) {
|
||||
addFlushCombinationProgram(program);
|
||||
}
|
||||
return program;
|
||||
}
|
||||
|
||||
static
|
||||
u32 history_required(const rose_literal_id &key) {
|
||||
if (key.msk.size() < key.s.length()) {
|
||||
@ -3678,6 +3710,10 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||
|
||||
writeDkeyInfo(rm, bc.engine_blob, proto);
|
||||
writeLeftInfo(bc.engine_blob, proto, leftInfoTable);
|
||||
writeLogicalInfo(rm, bc.engine_blob, proto);
|
||||
|
||||
auto flushComb_prog = makeFlushCombProgram(proto);
|
||||
proto.flushCombProgramOffset = writeProgram(bc, move(flushComb_prog));
|
||||
|
||||
// Build anchored matcher.
|
||||
auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -1469,6 +1469,25 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(SET_LOGICAL) {
|
||||
os << " lkey " << ri->lkey << endl;
|
||||
os << " offset_adjust " << ri->offset_adjust << endl;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(SET_COMBINATION) {
|
||||
os << " ckey " << ri->ckey << endl;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(FLUSH_COMBINATION) {}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(SET_EXHAUST) {
|
||||
os << " ekey " << ri->ekey << endl;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
default:
|
||||
os << " UNKNOWN (code " << int{code} << ")" << endl;
|
||||
os << " <stopping>" << endl;
|
||||
@ -1523,6 +1542,23 @@ void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) {
|
||||
os.close();
|
||||
}
|
||||
|
||||
static
|
||||
void dumpRoseFlushCombPrograms(const RoseEngine *t, const string &filename) {
|
||||
ofstream os(filename);
|
||||
const char *base = (const char *)t;
|
||||
|
||||
if (t->flushCombProgramOffset) {
|
||||
os << "Flush Combination Program @ " << t->flushCombProgramOffset
|
||||
<< ":" << endl;
|
||||
dumpProgram(os, t, base + t->flushCombProgramOffset);
|
||||
os << endl;
|
||||
} else {
|
||||
os << "<No Flush Combination Program>" << endl;
|
||||
}
|
||||
|
||||
os.close();
|
||||
}
|
||||
|
||||
static
|
||||
void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) {
|
||||
ofstream os(filename);
|
||||
@ -2028,6 +2064,10 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
|
||||
fprintf(f, " - history buffer : %u bytes\n", t->historyRequired);
|
||||
fprintf(f, " - exhaustion vector : %u bytes\n",
|
||||
t->stateOffsets.exhausted_size);
|
||||
fprintf(f, " - logical vector : %u bytes\n",
|
||||
t->stateOffsets.logicalVec_size);
|
||||
fprintf(f, " - combination vector: %u bytes\n",
|
||||
t->stateOffsets.combVec_size);
|
||||
fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize);
|
||||
fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState);
|
||||
fprintf(f, " - active array : %u bytes\n",
|
||||
@ -2092,6 +2132,11 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
|
||||
DUMP_U32(t, mode);
|
||||
DUMP_U32(t, historyRequired);
|
||||
DUMP_U32(t, ekeyCount);
|
||||
DUMP_U32(t, lkeyCount);
|
||||
DUMP_U32(t, lopCount);
|
||||
DUMP_U32(t, ckeyCount);
|
||||
DUMP_U32(t, logicalTreeOffset);
|
||||
DUMP_U32(t, combInfoMapOffset);
|
||||
DUMP_U32(t, dkeyCount);
|
||||
DUMP_U32(t, dkeyLogSize);
|
||||
DUMP_U32(t, invDkeyOffset);
|
||||
@ -2127,6 +2172,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
|
||||
DUMP_U32(t, leftOffset);
|
||||
DUMP_U32(t, roseCount);
|
||||
DUMP_U32(t, eodProgramOffset);
|
||||
DUMP_U32(t, flushCombProgramOffset);
|
||||
DUMP_U32(t, lastByteHistoryIterOffset);
|
||||
DUMP_U32(t, minWidth);
|
||||
DUMP_U32(t, minWidthExcludingBoundaries);
|
||||
@ -2150,6 +2196,10 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
|
||||
DUMP_U32(t, stateOffsets.history);
|
||||
DUMP_U32(t, stateOffsets.exhausted);
|
||||
DUMP_U32(t, stateOffsets.exhausted_size);
|
||||
DUMP_U32(t, stateOffsets.logicalVec);
|
||||
DUMP_U32(t, stateOffsets.logicalVec_size);
|
||||
DUMP_U32(t, stateOffsets.combVec);
|
||||
DUMP_U32(t, stateOffsets.combVec_size);
|
||||
DUMP_U32(t, stateOffsets.activeLeafArray);
|
||||
DUMP_U32(t, stateOffsets.activeLeafArray_size);
|
||||
DUMP_U32(t, stateOffsets.activeLeftArray);
|
||||
@ -2200,6 +2250,7 @@ void roseDumpPrograms(const vector<LitFragment> &fragments, const RoseEngine *t,
|
||||
const string &base) {
|
||||
dumpRoseLitPrograms(fragments, t, base + "/rose_lit_programs.txt");
|
||||
dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt");
|
||||
dumpRoseFlushCombPrograms(t, base + "/rose_flush_comb_programs.txt");
|
||||
dumpRoseReportPrograms(t, base + "/rose_report_programs.txt");
|
||||
dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt");
|
||||
dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt");
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -47,6 +47,7 @@ RoseInstrSuffixesEod::~RoseInstrSuffixesEod() = default;
|
||||
RoseInstrMatcherEod::~RoseInstrMatcherEod() = default;
|
||||
RoseInstrEnd::~RoseInstrEnd() = default;
|
||||
RoseInstrClearWorkDone::~RoseInstrClearWorkDone() = default;
|
||||
RoseInstrFlushCombination::~RoseInstrFlushCombination() = default;
|
||||
|
||||
using OffsetMap = RoseInstruction::OffsetMap;
|
||||
|
||||
@ -644,4 +645,26 @@ void RoseInstrIncludedJump::write(void *dest, RoseEngineBlob &blob,
|
||||
inst->squash = squash;
|
||||
}
|
||||
|
||||
void RoseInstrSetLogical::write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const {
|
||||
RoseInstrBase::write(dest, blob, offset_map);
|
||||
auto *inst = static_cast<impl_type *>(dest);
|
||||
inst->lkey = lkey;
|
||||
inst->offset_adjust = offset_adjust;
|
||||
}
|
||||
|
||||
void RoseInstrSetCombination::write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const {
|
||||
RoseInstrBase::write(dest, blob, offset_map);
|
||||
auto *inst = static_cast<impl_type *>(dest);
|
||||
inst->ckey = ckey;
|
||||
}
|
||||
|
||||
void RoseInstrSetExhaust::write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const {
|
||||
RoseInstrBase::write(dest, blob, offset_map);
|
||||
auto *inst = static_cast<impl_type *>(dest);
|
||||
inst->ekey = ekey;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -2144,6 +2144,94 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class RoseInstrSetLogical
|
||||
: public RoseInstrBaseNoTargets<ROSE_INSTR_SET_LOGICAL,
|
||||
ROSE_STRUCT_SET_LOGICAL,
|
||||
RoseInstrSetLogical> {
|
||||
public:
|
||||
u32 lkey;
|
||||
s32 offset_adjust;
|
||||
|
||||
RoseInstrSetLogical(u32 lkey_in, s32 offset_adjust_in)
|
||||
: lkey(lkey_in), offset_adjust(offset_adjust_in) {}
|
||||
|
||||
bool operator==(const RoseInstrSetLogical &ri) const {
|
||||
return lkey == ri.lkey && offset_adjust == ri.offset_adjust;
|
||||
}
|
||||
|
||||
size_t hash() const override {
|
||||
return hash_all(opcode, lkey, offset_adjust);
|
||||
}
|
||||
|
||||
void write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const override;
|
||||
|
||||
bool equiv_to(const RoseInstrSetLogical &ri, const OffsetMap &,
|
||||
const OffsetMap &) const {
|
||||
return lkey == ri.lkey && offset_adjust == ri.offset_adjust;
|
||||
}
|
||||
};
|
||||
|
||||
class RoseInstrSetCombination
|
||||
: public RoseInstrBaseNoTargets<ROSE_INSTR_SET_COMBINATION,
|
||||
ROSE_STRUCT_SET_COMBINATION,
|
||||
RoseInstrSetCombination> {
|
||||
public:
|
||||
u32 ckey;
|
||||
|
||||
RoseInstrSetCombination(u32 ckey_in) : ckey(ckey_in) {}
|
||||
|
||||
bool operator==(const RoseInstrSetCombination &ri) const {
|
||||
return ckey == ri.ckey;
|
||||
}
|
||||
|
||||
size_t hash() const override {
|
||||
return hash_all(opcode, ckey);
|
||||
}
|
||||
|
||||
void write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const override;
|
||||
|
||||
bool equiv_to(const RoseInstrSetCombination &ri, const OffsetMap &,
|
||||
const OffsetMap &) const {
|
||||
return ckey == ri.ckey;
|
||||
}
|
||||
};
|
||||
|
||||
class RoseInstrFlushCombination
|
||||
: public RoseInstrBaseTrivial<ROSE_INSTR_FLUSH_COMBINATION,
|
||||
ROSE_STRUCT_FLUSH_COMBINATION,
|
||||
RoseInstrFlushCombination> {
|
||||
public:
|
||||
~RoseInstrFlushCombination() override;
|
||||
};
|
||||
|
||||
class RoseInstrSetExhaust
|
||||
: public RoseInstrBaseNoTargets<ROSE_INSTR_SET_EXHAUST,
|
||||
ROSE_STRUCT_SET_EXHAUST,
|
||||
RoseInstrSetExhaust> {
|
||||
public:
|
||||
u32 ekey;
|
||||
|
||||
RoseInstrSetExhaust(u32 ekey_in) : ekey(ekey_in) {}
|
||||
|
||||
bool operator==(const RoseInstrSetExhaust &ri) const {
|
||||
return ekey == ri.ekey;
|
||||
}
|
||||
|
||||
size_t hash() const override {
|
||||
return hash_all(opcode, ekey);
|
||||
}
|
||||
|
||||
void write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const override;
|
||||
|
||||
bool equiv_to(const RoseInstrSetExhaust &ri, const OffsetMap &,
|
||||
const OffsetMap &) const {
|
||||
return ekey == ri.ekey;
|
||||
}
|
||||
};
|
||||
|
||||
class RoseInstrEnd
|
||||
: public RoseInstrBaseTrivial<ROSE_INSTR_END, ROSE_STRUCT_END,
|
||||
RoseInstrEnd> {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -459,7 +459,7 @@ public:
|
||||
const_iterator end() const { return ordering.end(); }
|
||||
};
|
||||
|
||||
typedef Bouquet<left_id> RoseBouquet;
|
||||
typedef Bouquet<left_id> LeftfixBouquet;
|
||||
typedef Bouquet<suffix_id> SuffixBouquet;
|
||||
|
||||
} // namespace
|
||||
@ -565,7 +565,7 @@ bool hasSameEngineType(const RoseVertexProps &u_prop,
|
||||
*
|
||||
* Parameters are vectors of literals + lag pairs.
|
||||
*
|
||||
* Note: if more constaints of when the leftfixes were going to be checked
|
||||
* Note: if more constraints of when the leftfixes were going to be checked
|
||||
* (mandatory lookarounds passing, offset checks), more merges may be allowed.
|
||||
*/
|
||||
static
|
||||
@ -599,7 +599,7 @@ bool compatibleLiteralsForMerge(
|
||||
/* An engine requires that all accesses to it are ordered by offsets. (ie,
|
||||
we can not check an engine's state at offset Y, if we have already
|
||||
checked its status at offset X and X > Y). If we can not establish that
|
||||
the literals used for triggering will statisfy this property, then it is
|
||||
the literals used for triggering will satisfy this property, then it is
|
||||
not safe to merge the engine. */
|
||||
for (const auto &ue : ulits) {
|
||||
const rose_literal_id &ul = *ue.first;
|
||||
@ -1437,7 +1437,19 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) {
|
||||
|
||||
assert(!parents.empty());
|
||||
|
||||
#ifndef _WIN32
|
||||
engine_groups[MergeKey(left, parents)].push_back(left);
|
||||
#else
|
||||
// On windows, when passing MergeKey object into map 'engine_groups',
|
||||
// it will not be copied, but will be freed along with
|
||||
// engine_groups.clear().
|
||||
// If we construct MergeKey object on the stack, it will be destructed
|
||||
// on its life cycle ending, then on engine_groups.clear(), which
|
||||
// will cause is_block_type_valid() assertion error in MergeKey
|
||||
// destructor.
|
||||
MergeKey *mk = new MergeKey(left, parents);
|
||||
engine_groups[*mk].push_back(left);
|
||||
#endif
|
||||
}
|
||||
|
||||
vector<vector<left_id>> chunks;
|
||||
@ -1778,7 +1790,7 @@ u32 estimatedAccelStates(const RoseBuildImpl &tbi, const NGHolder &h) {
|
||||
}
|
||||
|
||||
static
|
||||
void mergeNfaLeftfixes(RoseBuildImpl &tbi, RoseBouquet &roses) {
|
||||
void mergeNfaLeftfixes(RoseBuildImpl &tbi, LeftfixBouquet &roses) {
|
||||
RoseGraph &g = tbi.g;
|
||||
DEBUG_PRINTF("%zu nfa rose merge candidates\n", roses.size());
|
||||
|
||||
@ -1894,7 +1906,7 @@ void mergeSmallLeftfixes(RoseBuildImpl &tbi) {
|
||||
|
||||
RoseGraph &g = tbi.g;
|
||||
|
||||
RoseBouquet nfa_roses;
|
||||
LeftfixBouquet nfa_leftfixes;
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (!g[v].left) {
|
||||
@ -1939,20 +1951,20 @@ void mergeSmallLeftfixes(RoseBuildImpl &tbi) {
|
||||
continue;
|
||||
}
|
||||
|
||||
nfa_roses.insert(left, v);
|
||||
nfa_leftfixes.insert(left, v);
|
||||
}
|
||||
|
||||
deque<RoseBouquet> rose_groups;
|
||||
chunkBouquets(nfa_roses, rose_groups, MERGE_GROUP_SIZE_MAX);
|
||||
nfa_roses.clear();
|
||||
DEBUG_PRINTF("chunked nfa roses into %zu groups\n", rose_groups.size());
|
||||
deque<LeftfixBouquet> leftfix_groups;
|
||||
chunkBouquets(nfa_leftfixes, leftfix_groups, MERGE_GROUP_SIZE_MAX);
|
||||
nfa_leftfixes.clear();
|
||||
DEBUG_PRINTF("chunked nfa leftfixes into %zu groups\n",
|
||||
leftfix_groups.size());
|
||||
|
||||
for (auto &group : rose_groups) {
|
||||
for (auto &group : leftfix_groups) {
|
||||
mergeNfaLeftfixes(tbi, group);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands,
|
||||
insertion_ordered_map<left_id, vector<RoseVertex>> &eng_verts) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -993,15 +993,19 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief True if there is an engine with a top that is not triggered by a
|
||||
* vertex in the Rose graph. This is a consistency check used in assertions.
|
||||
*/
|
||||
bool hasOrphanedTops(const RoseBuildImpl &build) {
|
||||
const RoseGraph &g = build.g;
|
||||
|
||||
unordered_map<left_id, set<u32>> roses;
|
||||
unordered_map<left_id, set<u32>> leftfixes;
|
||||
unordered_map<suffix_id, set<u32>> suffixes;
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (g[v].left) {
|
||||
set<u32> &tops = roses[g[v].left];
|
||||
set<u32> &tops = leftfixes[g[v].left];
|
||||
if (!build.isRootSuccessor(v)) {
|
||||
// Tops for infixes come from the in-edges.
|
||||
for (const auto &e : in_edges_range(v, g)) {
|
||||
@ -1014,7 +1018,7 @@ bool hasOrphanedTops(const RoseBuildImpl &build) {
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &e : roses) {
|
||||
for (const auto &e : leftfixes) {
|
||||
if (all_tops(e.first) != e.second) {
|
||||
DEBUG_PRINTF("rose tops (%s) don't match rose graph (%s)\n",
|
||||
as_string_list(all_tops(e.first)).c_str(),
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -280,7 +280,7 @@ void stripCheckHandledInstruction(RoseProgram &prog) {
|
||||
}
|
||||
|
||||
|
||||
/** Returns true if the program may read the the interpreter's work_done flag */
|
||||
/** Returns true if the program may read the interpreter's work_done flag */
|
||||
static
|
||||
bool reads_work_done_flag(const RoseProgram &prog) {
|
||||
for (const auto &ri : prog) {
|
||||
@ -313,6 +313,10 @@ void addMatcherEodProgram(RoseProgram &program) {
|
||||
program.add_block(move(block));
|
||||
}
|
||||
|
||||
void addFlushCombinationProgram(RoseProgram &program) {
|
||||
program.add_before_end(make_unique<RoseInstrFlushCombination>());
|
||||
}
|
||||
|
||||
static
|
||||
void makeRoleCheckLeftfix(const RoseBuildImpl &build,
|
||||
const map<RoseVertex, left_build_info> &leftfix_info,
|
||||
@ -496,6 +500,23 @@ void writeSomOperation(const Report &report, som_operation *op) {
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void addLogicalSetRequired(const Report &report, ReportManager &rm,
|
||||
RoseProgram &program) {
|
||||
if (report.lkey == INVALID_LKEY) {
|
||||
return;
|
||||
}
|
||||
// set matching status of current lkey
|
||||
auto risl = make_unique<RoseInstrSetLogical>(report.lkey,
|
||||
report.offsetAdjust);
|
||||
program.add_before_end(move(risl));
|
||||
// set current lkey's corresponding ckeys active, pending to check
|
||||
for (auto ckey : rm.getRelateCKeys(report.lkey)) {
|
||||
auto risc = make_unique<RoseInstrSetCombination>(ckey);
|
||||
program.add_before_end(move(risc));
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void makeReport(const RoseBuildImpl &build, const ReportID id,
|
||||
const bool has_som, RoseProgram &program) {
|
||||
@ -542,38 +563,62 @@ void makeReport(const RoseBuildImpl &build, const ReportID id,
|
||||
|
||||
switch (report.type) {
|
||||
case EXTERNAL_CALLBACK:
|
||||
if (build.rm.numCkeys()) {
|
||||
addFlushCombinationProgram(report_block);
|
||||
}
|
||||
if (!has_som) {
|
||||
// Dedupe is only necessary if this report has a dkey, or if there
|
||||
// are SOM reports to catch up.
|
||||
bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom;
|
||||
if (report.ekey == INVALID_EKEY) {
|
||||
if (needs_dedupe) {
|
||||
report_block.add_before_end(
|
||||
make_unique<RoseInstrDedupeAndReport>(
|
||||
report.quashSom, build.rm.getDkey(report),
|
||||
report.onmatch, report.offsetAdjust, end_inst));
|
||||
if (!report.quiet) {
|
||||
report_block.add_before_end(
|
||||
make_unique<RoseInstrDedupeAndReport>(
|
||||
report.quashSom, build.rm.getDkey(report),
|
||||
report.onmatch, report.offsetAdjust, end_inst));
|
||||
} else {
|
||||
makeDedupe(build.rm, report, report_block);
|
||||
}
|
||||
} else {
|
||||
report_block.add_before_end(make_unique<RoseInstrReport>(
|
||||
report.onmatch, report.offsetAdjust));
|
||||
if (!report.quiet) {
|
||||
report_block.add_before_end(
|
||||
make_unique<RoseInstrReport>(
|
||||
report.onmatch, report.offsetAdjust));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (needs_dedupe) {
|
||||
makeDedupe(build.rm, report, report_block);
|
||||
}
|
||||
report_block.add_before_end(make_unique<RoseInstrReportExhaust>(
|
||||
report.onmatch, report.offsetAdjust, report.ekey));
|
||||
if (!report.quiet) {
|
||||
report_block.add_before_end(
|
||||
make_unique<RoseInstrReportExhaust>(
|
||||
report.onmatch, report.offsetAdjust, report.ekey));
|
||||
} else {
|
||||
report_block.add_before_end(
|
||||
make_unique<RoseInstrSetExhaust>(report.ekey));
|
||||
}
|
||||
}
|
||||
} else { // has_som
|
||||
makeDedupeSom(build.rm, report, report_block);
|
||||
if (report.ekey == INVALID_EKEY) {
|
||||
report_block.add_before_end(make_unique<RoseInstrReportSom>(
|
||||
report.onmatch, report.offsetAdjust));
|
||||
if (!report.quiet) {
|
||||
report_block.add_before_end(make_unique<RoseInstrReportSom>(
|
||||
report.onmatch, report.offsetAdjust));
|
||||
}
|
||||
} else {
|
||||
report_block.add_before_end(
|
||||
make_unique<RoseInstrReportSomExhaust>(
|
||||
report.onmatch, report.offsetAdjust, report.ekey));
|
||||
if (!report.quiet) {
|
||||
report_block.add_before_end(
|
||||
make_unique<RoseInstrReportSomExhaust>(
|
||||
report.onmatch, report.offsetAdjust, report.ekey));
|
||||
} else {
|
||||
report_block.add_before_end(
|
||||
make_unique<RoseInstrSetExhaust>(report.ekey));
|
||||
}
|
||||
}
|
||||
}
|
||||
addLogicalSetRequired(report, build.rm, report_block);
|
||||
break;
|
||||
case INTERNAL_SOM_LOC_SET:
|
||||
case INTERNAL_SOM_LOC_SET_IF_UNSET:
|
||||
@ -586,6 +631,9 @@ void makeReport(const RoseBuildImpl &build, const ReportID id,
|
||||
case INTERNAL_SOM_LOC_MAKE_WRITABLE:
|
||||
case INTERNAL_SOM_LOC_SET_FROM:
|
||||
case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE:
|
||||
if (build.rm.numCkeys()) {
|
||||
addFlushCombinationProgram(report_block);
|
||||
}
|
||||
if (has_som) {
|
||||
auto ri = make_unique<RoseInstrReportSomAware>();
|
||||
writeSomOperation(report, &ri->som);
|
||||
@ -605,24 +653,48 @@ void makeReport(const RoseBuildImpl &build, const ReportID id,
|
||||
case EXTERNAL_CALLBACK_SOM_STORED:
|
||||
case EXTERNAL_CALLBACK_SOM_ABS:
|
||||
case EXTERNAL_CALLBACK_SOM_REV_NFA:
|
||||
if (build.rm.numCkeys()) {
|
||||
addFlushCombinationProgram(report_block);
|
||||
}
|
||||
makeDedupeSom(build.rm, report, report_block);
|
||||
if (report.ekey == INVALID_EKEY) {
|
||||
report_block.add_before_end(make_unique<RoseInstrReportSom>(
|
||||
report.onmatch, report.offsetAdjust));
|
||||
if (!report.quiet) {
|
||||
report_block.add_before_end(make_unique<RoseInstrReportSom>(
|
||||
report.onmatch, report.offsetAdjust));
|
||||
}
|
||||
} else {
|
||||
report_block.add_before_end(make_unique<RoseInstrReportSomExhaust>(
|
||||
report.onmatch, report.offsetAdjust, report.ekey));
|
||||
if (!report.quiet) {
|
||||
report_block.add_before_end(
|
||||
make_unique<RoseInstrReportSomExhaust>(
|
||||
report.onmatch, report.offsetAdjust, report.ekey));
|
||||
} else {
|
||||
report_block.add_before_end(
|
||||
make_unique<RoseInstrSetExhaust>(report.ekey));
|
||||
}
|
||||
}
|
||||
addLogicalSetRequired(report, build.rm, report_block);
|
||||
break;
|
||||
case EXTERNAL_CALLBACK_SOM_PASS:
|
||||
if (build.rm.numCkeys()) {
|
||||
addFlushCombinationProgram(report_block);
|
||||
}
|
||||
makeDedupeSom(build.rm, report, report_block);
|
||||
if (report.ekey == INVALID_EKEY) {
|
||||
report_block.add_before_end(make_unique<RoseInstrReportSom>(
|
||||
report.onmatch, report.offsetAdjust));
|
||||
if (!report.quiet) {
|
||||
report_block.add_before_end(make_unique<RoseInstrReportSom>(
|
||||
report.onmatch, report.offsetAdjust));
|
||||
}
|
||||
} else {
|
||||
report_block.add_before_end(make_unique<RoseInstrReportSomExhaust>(
|
||||
report.onmatch, report.offsetAdjust, report.ekey));
|
||||
if (!report.quiet) {
|
||||
report_block.add_before_end(
|
||||
make_unique<RoseInstrReportSomExhaust>(
|
||||
report.onmatch, report.offsetAdjust, report.ekey));
|
||||
} else {
|
||||
report_block.add_before_end(
|
||||
make_unique<RoseInstrSetExhaust>(report.ekey));
|
||||
}
|
||||
}
|
||||
addLogicalSetRequired(report, build.rm, report_block);
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -630,7 +702,6 @@ void makeReport(const RoseBuildImpl &build, const ReportID id,
|
||||
throw CompileError("Unable to generate bytecode.");
|
||||
}
|
||||
|
||||
assert(!report_block.empty());
|
||||
program.add_block(move(report_block));
|
||||
}
|
||||
|
||||
@ -1837,7 +1908,7 @@ void makeRoleEagerEodReports(const RoseBuildImpl &build,
|
||||
program.add_before_end(move(eod_program));
|
||||
}
|
||||
|
||||
/* Makes a program for a role/vertex given a specfic pred/in_edge. */
|
||||
/** Makes a program for a role/vertex given a specific pred/in_edge. */
|
||||
static
|
||||
RoseProgram makeRoleProgram(const RoseBuildImpl &build,
|
||||
const map<RoseVertex, left_build_info> &leftfix_info,
|
||||
@ -2045,7 +2116,7 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
|
||||
}
|
||||
|
||||
if (lit_id == build.eod_event_literal_id) {
|
||||
/* Note: does not require the lit intial program */
|
||||
/* Note: does not require the lit initial program */
|
||||
assert(build.eod_event_literal_id != MO_INVALID_IDX);
|
||||
return role_programs;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -187,6 +187,7 @@ struct ProgramBuild : noncopyable {
|
||||
void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program);
|
||||
void addSuffixesEodProgram(RoseProgram &program);
|
||||
void addMatcherEodProgram(RoseProgram &program);
|
||||
void addFlushCombinationProgram(RoseProgram &program);
|
||||
|
||||
static constexpr u32 INVALID_QUEUE = ~0U;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -41,7 +41,6 @@
|
||||
#include "rose_build.h"
|
||||
#include "rose_internal.h"
|
||||
#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
|
||||
#include "util/charreach.h"
|
||||
#include "util/depth.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/ue2_graph.h"
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -27,7 +27,7 @@
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Rose Input Graph: Used for ng_rose -> rose_build_add communication.
|
||||
* \brief Rose Input Graph: Used for ng_violet -> rose_build_add communication.
|
||||
*
|
||||
* The input graph MUST be a DAG.
|
||||
* There MUST be exactly 1 START or ANCHORED_START vertex.
|
||||
@ -127,7 +127,7 @@ public:
|
||||
flat_set<ReportID> reports; /**< for RIV_ACCEPT/RIV_ACCEPT_EOD */
|
||||
u32 min_offset; /**< Minimum offset at which this vertex can match. */
|
||||
u32 max_offset; /**< Maximum offset at which this vertex can match. */
|
||||
size_t index = 0;
|
||||
size_t index = 0; /**< \brief Unique vertex index. */
|
||||
};
|
||||
|
||||
struct RoseInEdgeProps {
|
||||
@ -176,7 +176,13 @@ struct RoseInEdgeProps {
|
||||
/** \brief Haig version of graph, if required. */
|
||||
std::shared_ptr<raw_som_dfa> haig;
|
||||
|
||||
/**
|
||||
* \brief Distance behind the match offset for the literal in the target
|
||||
* vertex that the leftfix needs to be checked at.
|
||||
*/
|
||||
u32 graph_lag;
|
||||
|
||||
/** \brief Unique edge index. */
|
||||
size_t index = 0;
|
||||
};
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -199,9 +199,25 @@ struct RoseStateOffsets {
|
||||
* reports with that ekey should not be delivered to the user. */
|
||||
u32 exhausted;
|
||||
|
||||
/** size of exhausted multibit */
|
||||
/** size in bytes of exhausted multibit */
|
||||
u32 exhausted_size;
|
||||
|
||||
/** Logical multibit.
|
||||
*
|
||||
* entry per logical key(operand/operator) (used by Logical Combination). */
|
||||
u32 logicalVec;
|
||||
|
||||
/** size in bytes of logical multibit */
|
||||
u32 logicalVec_size;
|
||||
|
||||
/** Combination multibit.
|
||||
*
|
||||
* entry per combination key (used by Logical Combination). */
|
||||
u32 combVec;
|
||||
|
||||
/** size in bytes of combination multibit */
|
||||
u32 combVec_size;
|
||||
|
||||
/** Multibit for active suffix/outfix engines. */
|
||||
u32 activeLeafArray;
|
||||
|
||||
@ -327,6 +343,11 @@ struct RoseEngine {
|
||||
u32 mode; /**< scanning mode, one of HS_MODE_{BLOCK,STREAM,VECTORED} */
|
||||
u32 historyRequired; /**< max amount of history required for streaming */
|
||||
u32 ekeyCount; /**< number of exhaustion keys */
|
||||
u32 lkeyCount; /**< number of logical keys */
|
||||
u32 lopCount; /**< number of logical ops */
|
||||
u32 ckeyCount; /**< number of combination keys */
|
||||
u32 logicalTreeOffset; /**< offset to mapping from lkey to LogicalOp */
|
||||
u32 combInfoMapOffset; /**< offset to mapping from ckey to combInfo */
|
||||
u32 dkeyCount; /**< number of dedupe keys */
|
||||
u32 dkeyLogSize; /**< size of fatbit for storing dkey log (bytes) */
|
||||
u32 invDkeyOffset; /**< offset to table mapping from dkeys to the external
|
||||
@ -404,6 +425,7 @@ struct RoseEngine {
|
||||
u32 roseCount;
|
||||
|
||||
u32 eodProgramOffset; //!< EOD program, otherwise 0.
|
||||
u32 flushCombProgramOffset; /**< FlushCombination program, otherwise 0 */
|
||||
|
||||
u32 lastByteHistoryIterOffset; // if non-zero
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -183,7 +183,25 @@ enum RoseInstructionCode {
|
||||
*/
|
||||
ROSE_INSTR_INCLUDED_JUMP,
|
||||
|
||||
LAST_ROSE_INSTRUCTION = ROSE_INSTR_INCLUDED_JUMP //!< Sentinel.
|
||||
/**
|
||||
* \brief Set matching status of a sub-expression.
|
||||
*/
|
||||
ROSE_INSTR_SET_LOGICAL,
|
||||
|
||||
/**
|
||||
* \brief Set combination status pending checking.
|
||||
*/
|
||||
ROSE_INSTR_SET_COMBINATION,
|
||||
|
||||
/**
|
||||
* \brief Check if compliant with any logical constraints.
|
||||
*/
|
||||
ROSE_INSTR_FLUSH_COMBINATION,
|
||||
|
||||
/** \brief Mark as exhausted instead of report while quiet. */
|
||||
ROSE_INSTR_SET_EXHAUST,
|
||||
|
||||
LAST_ROSE_INSTRUCTION = ROSE_INSTR_SET_EXHAUST //!< Sentinel.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_END {
|
||||
@ -636,4 +654,24 @@ struct ROSE_STRUCT_INCLUDED_JUMP {
|
||||
u8 squash; //!< FDR confirm squash mask for included literal.
|
||||
u32 child_offset; //!< Program offset of included literal.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_SET_LOGICAL {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 lkey; //!< Logical key to set.
|
||||
s32 offset_adjust; //!< offsetAdjust from struct Report triggers the flush.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_SET_COMBINATION {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 ckey; //!< Combination key to set.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_FLUSH_COMBINATION {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_SET_EXHAUST {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 ekey; //!< Exhaustion key.
|
||||
};
|
||||
#endif // ROSE_ROSE_PROGRAM_H
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -127,6 +127,15 @@ void updateLastMatchOffset(struct RoseContext *tctxt, u64a offset) {
|
||||
tctxt->lastMatchOffset = offset;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void updateLastCombMatchOffset(struct RoseContext *tctxt, u64a offset) {
|
||||
DEBUG_PRINTF("match @%llu, last match @%llu\n", offset,
|
||||
tctxt->lastCombMatchOffset);
|
||||
|
||||
assert(offset >= tctxt->lastCombMatchOffset);
|
||||
tctxt->lastCombMatchOffset = offset;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void updateMinMatchOffset(struct RoseContext *tctxt, u64a offset) {
|
||||
DEBUG_PRINTF("min match now @%llu, was @%llu\n", offset,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -578,6 +578,7 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
|
||||
tctxt->lastEndOffset = offset;
|
||||
tctxt->filledDelayedSlots = 0;
|
||||
tctxt->lastMatchOffset = 0;
|
||||
tctxt->lastCombMatchOffset = offset;
|
||||
tctxt->minMatchOffset = offset;
|
||||
tctxt->minNonMpvMatchOffset = offset;
|
||||
tctxt->next_mpv_offset = 0;
|
||||
@ -700,6 +701,7 @@ void roseStreamInitEod(const struct RoseEngine *t, u64a offset,
|
||||
tctxt->lastEndOffset = offset;
|
||||
tctxt->filledDelayedSlots = 0;
|
||||
tctxt->lastMatchOffset = 0;
|
||||
tctxt->lastCombMatchOffset = offset; /* DO NOT set 0 here! */
|
||||
tctxt->minMatchOffset = offset;
|
||||
tctxt->minNonMpvMatchOffset = offset;
|
||||
tctxt->next_mpv_offset = offset;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -67,9 +67,9 @@ void prefetch_data(const char *data, unsigned length) {
|
||||
|
||||
/** dummy event handler for use when user does not provide one */
|
||||
static
|
||||
int null_onEvent(UNUSED unsigned id, UNUSED unsigned long long from,
|
||||
UNUSED unsigned long long to, UNUSED unsigned flags,
|
||||
UNUSED void *ctxt) {
|
||||
int HS_CDECL null_onEvent(UNUSED unsigned id, UNUSED unsigned long long from,
|
||||
UNUSED unsigned long long to, UNUSED unsigned flags,
|
||||
UNUSED void *ctxt) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -356,6 +356,15 @@ hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data,
|
||||
length, NULL, 0, 0, 0, flags);
|
||||
|
||||
clearEvec(rose, scratch->core_info.exhaustionVector);
|
||||
if (rose->ckeyCount) {
|
||||
scratch->core_info.logicalVector = scratch->bstate +
|
||||
rose->stateOffsets.logicalVec;
|
||||
scratch->core_info.combVector = scratch->bstate +
|
||||
rose->stateOffsets.combVec;
|
||||
scratch->tctxt.lastCombMatchOffset = 0;
|
||||
clearLvec(rose, scratch->core_info.logicalVector,
|
||||
scratch->core_info.combVector);
|
||||
}
|
||||
|
||||
if (!length) {
|
||||
if (rose->boundary.reportZeroEodOffset) {
|
||||
@ -436,6 +445,13 @@ done_scan:
|
||||
scratch);
|
||||
}
|
||||
|
||||
if (rose->flushCombProgramOffset) {
|
||||
if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
|
||||
unmarkScratchInUse(scratch);
|
||||
return HS_SCAN_TERMINATED;
|
||||
}
|
||||
}
|
||||
|
||||
set_retval:
|
||||
DEBUG_PRINTF("done. told_to_stop_matching=%d\n",
|
||||
told_to_stop_matching(scratch));
|
||||
@ -500,6 +516,10 @@ void init_stream(struct hs_stream *s, const struct RoseEngine *rose,
|
||||
roseInitState(rose, state);
|
||||
|
||||
clearEvec(rose, state + rose->stateOffsets.exhausted);
|
||||
if (rose->ckeyCount) {
|
||||
clearLvec(rose, state + rose->stateOffsets.logicalVec,
|
||||
state + rose->stateOffsets.combVec);
|
||||
}
|
||||
|
||||
// SOM state multibit structures.
|
||||
initSomState(rose, state);
|
||||
@ -614,6 +634,13 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
|
||||
getHistory(state, rose, id->offset),
|
||||
getHistoryAmount(rose, id->offset), id->offset, status, 0);
|
||||
|
||||
if (rose->ckeyCount) {
|
||||
scratch->core_info.logicalVector = state +
|
||||
rose->stateOffsets.logicalVec;
|
||||
scratch->core_info.combVector = state + rose->stateOffsets.combVec;
|
||||
scratch->tctxt.lastCombMatchOffset = id->offset;
|
||||
}
|
||||
|
||||
if (rose->somLocationCount) {
|
||||
loadSomFromStream(scratch, id->offset);
|
||||
}
|
||||
@ -657,6 +684,13 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
|
||||
scratch->core_info.status |= STATUS_TERMINATED;
|
||||
}
|
||||
}
|
||||
|
||||
if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
|
||||
if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("told to stop matching\n");
|
||||
scratch->core_info.status |= STATUS_TERMINATED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
@ -849,6 +883,12 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
|
||||
populateCoreInfo(scratch, rose, state, onEvent, context, data, length,
|
||||
getHistory(state, rose, id->offset), historyAmount,
|
||||
id->offset, status, flags);
|
||||
if (rose->ckeyCount) {
|
||||
scratch->core_info.logicalVector = state +
|
||||
rose->stateOffsets.logicalVec;
|
||||
scratch->core_info.combVector = state + rose->stateOffsets.combVec;
|
||||
scratch->tctxt.lastCombMatchOffset = id->offset;
|
||||
}
|
||||
assert(scratch->core_info.hlen <= id->offset
|
||||
&& scratch->core_info.hlen <= rose->historyRequired);
|
||||
|
||||
@ -894,6 +934,12 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
|
||||
}
|
||||
}
|
||||
|
||||
if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
|
||||
if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
|
||||
scratch->core_info.status |= STATUS_TERMINATED;
|
||||
}
|
||||
}
|
||||
|
||||
setStreamStatus(state, scratch->core_info.status);
|
||||
|
||||
if (likely(!can_stop_matching(scratch))) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -36,6 +36,7 @@
|
||||
#ifndef SCRATCH_H_DA6D4FC06FF410
|
||||
#define SCRATCH_H_DA6D4FC06FF410
|
||||
|
||||
#include "hs_common.h"
|
||||
#include "ue2common.h"
|
||||
#include "rose/rose_types.h"
|
||||
|
||||
@ -88,12 +89,15 @@ struct core_info {
|
||||
void *userContext; /**< user-supplied context */
|
||||
|
||||
/** \brief user-supplied match callback */
|
||||
int (*userCallback)(unsigned int id, unsigned long long from,
|
||||
unsigned long long to, unsigned int flags, void *ctx);
|
||||
int (HS_CDECL *userCallback)(unsigned int id, unsigned long long from,
|
||||
unsigned long long to, unsigned int flags,
|
||||
void *ctx);
|
||||
|
||||
const struct RoseEngine *rose;
|
||||
char *state; /**< full stream state */
|
||||
char *exhaustionVector; /**< pointer to evec for this stream */
|
||||
char *logicalVector; /**< pointer to lvec for this stream */
|
||||
char *combVector; /**< pointer to cvec for this stream */
|
||||
const u8 *buf; /**< main scan buffer */
|
||||
size_t len; /**< length of main scan buffer in bytes */
|
||||
const u8 *hbuf; /**< history buffer */
|
||||
@ -115,6 +119,7 @@ struct RoseContext {
|
||||
* stream */
|
||||
u64a lastMatchOffset; /**< last match offset report up out of rose;
|
||||
* used _only_ for debugging, asserts */
|
||||
u64a lastCombMatchOffset; /**< last match offset of active combinations */
|
||||
u64a minMatchOffset; /**< the earliest offset that we are still allowed to
|
||||
* report */
|
||||
u64a minNonMpvMatchOffset; /**< the earliest offset that non-mpv engines are
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -148,6 +148,13 @@ size_t JOIN(sc_, FN_SUFFIX)(const struct RoseEngine *rose,
|
||||
/* copy the exhaustion multibit */
|
||||
COPY_MULTIBIT(stream_body + so->exhausted, rose->ekeyCount);
|
||||
|
||||
/* copy the logical multibit */
|
||||
COPY_MULTIBIT(stream_body + so->logicalVec,
|
||||
rose->lkeyCount + rose->lopCount);
|
||||
|
||||
/* copy the combination multibit */
|
||||
COPY_MULTIBIT(stream_body + so->combVec, rose->ckeyCount);
|
||||
|
||||
/* copy nfa stream state for endfixes */
|
||||
/* Note: in the expand case the active array has already been copied into
|
||||
* the stream. */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -66,8 +66,13 @@ typedef signed int s32;
|
||||
/* We append the 'a' for aligned, since these aren't common, garden variety
|
||||
* 64 bit values. The alignment is necessary for structs on some platforms,
|
||||
* so we don't end up performing accidental unaligned accesses. */
|
||||
#if defined(_WIN32) && ! defined(_WIN64)
|
||||
typedef unsigned long long ALIGN_ATTR(4) u64a;
|
||||
typedef signed long long ALIGN_ATTR(4) s64a;
|
||||
#else
|
||||
typedef unsigned long long ALIGN_ATTR(8) u64a;
|
||||
typedef signed long long ALIGN_ATTR(8) s64a;
|
||||
#endif
|
||||
|
||||
/* get the SIMD types */
|
||||
#include "util/simd_types.h"
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -305,9 +305,10 @@ public:
|
||||
}
|
||||
|
||||
/// Bitwise OR.
|
||||
bitfield operator|(bitfield a) const {
|
||||
a |= *this;
|
||||
return a;
|
||||
bitfield operator|(const bitfield &a) const {
|
||||
bitfield b = a;
|
||||
b |= *this;
|
||||
return b;
|
||||
}
|
||||
|
||||
/// Bitwise OR-equals.
|
||||
@ -325,9 +326,10 @@ public:
|
||||
}
|
||||
|
||||
/// Bitwise AND.
|
||||
bitfield operator&(bitfield a) const {
|
||||
a &= *this;
|
||||
return a;
|
||||
bitfield operator&(const bitfield &a) const {
|
||||
bitfield b = a;
|
||||
b &= *this;
|
||||
return b;
|
||||
}
|
||||
|
||||
/// Bitwise AND-equals.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -56,7 +56,11 @@ void describeChar(ostream &os, char c, enum cc_output_t out_type) {
|
||||
|
||||
const string backslash((out_type == CC_OUT_DOT ? 2 : 1), '\\');
|
||||
|
||||
#ifdef _WIN32
|
||||
if (c >= 0x21 && c < 0x7F && c != '\\') {
|
||||
#else
|
||||
if (isgraph(c) && c != '\\') {
|
||||
#endif
|
||||
if (escaped.find(c) != string::npos) {
|
||||
os << backslash << c;
|
||||
} else if (out_type == CC_OUT_DOT
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -114,6 +114,21 @@ public:
|
||||
std::memset(data->data(), val, data->size());
|
||||
}
|
||||
|
||||
size_t count(small_color color) const {
|
||||
assert(static_cast<u8>(color) < sizeof(fill_lut));
|
||||
size_t num = 0;
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
size_t byte = i / entries_per_byte;
|
||||
assert(byte < data->size());
|
||||
size_t bit = (i % entries_per_byte) * bit_size;
|
||||
u8 val = ((*data)[byte] >> bit) & bit_mask;
|
||||
if (static_cast<small_color>(val) == color) {
|
||||
num++;
|
||||
}
|
||||
}
|
||||
return num;
|
||||
}
|
||||
|
||||
small_color get_impl(key_type key) const {
|
||||
auto i = get(index_map, key);
|
||||
assert(i < n);
|
||||
|
501
src/util/graph_undirected.h
Normal file
501
src/util/graph_undirected.h
Normal file
@ -0,0 +1,501 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief Adaptor that presents an undirected view of a bidirectional BGL graph.
|
||||
*
|
||||
* Analogous to the reverse_graph adapter. You can construct one of these for
|
||||
* bidirectional graph g with:
|
||||
*
|
||||
* auto ug = make_undirected_graph(g);
|
||||
*
|
||||
* The vertex descriptor type is the same as that of the underlying graph, but
|
||||
* the edge descriptor is different.
|
||||
*/
|
||||
|
||||
#ifndef GRAPH_UNDIRECTED_H
|
||||
#define GRAPH_UNDIRECTED_H
|
||||
|
||||
#include "util/operators.h"
|
||||
|
||||
#include <boost/graph/adjacency_iterator.hpp>
|
||||
#include <boost/graph/graph_traits.hpp>
|
||||
#include <boost/graph/properties.hpp>
|
||||
#include <boost/iterator/iterator_facade.hpp>
|
||||
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct undirected_graph_tag {};
|
||||
|
||||
template <class BidirectionalGraph, class GraphRef>
|
||||
class undirected_graph;
|
||||
|
||||
namespace undirected_detail {
|
||||
|
||||
template <typename BidirectionalGraph>
|
||||
class undirected_graph_edge_descriptor
|
||||
: totally_ordered<undirected_graph_edge_descriptor<BidirectionalGraph>> {
|
||||
using base_graph_type = BidirectionalGraph;
|
||||
using base_graph_traits = typename boost::graph_traits<base_graph_type>;
|
||||
using base_edge_type = typename base_graph_traits::edge_descriptor;
|
||||
using base_vertex_type = typename base_graph_traits::vertex_descriptor;
|
||||
|
||||
base_edge_type underlying_edge;
|
||||
const base_graph_type *g;
|
||||
bool reverse; // if true, reverse vertices in source() and target()
|
||||
|
||||
inline std::pair<base_vertex_type, base_vertex_type>
|
||||
canonical_edge() const {
|
||||
auto u = std::min(source(underlying_edge, *g),
|
||||
target(underlying_edge, *g));
|
||||
auto v = std::max(source(underlying_edge, *g),
|
||||
target(underlying_edge, *g));
|
||||
return std::make_pair(u, v);
|
||||
}
|
||||
|
||||
template <class BidiGraph, class GraphRef>
|
||||
friend class ::ue2::undirected_graph;
|
||||
|
||||
public:
|
||||
undirected_graph_edge_descriptor() = default;
|
||||
|
||||
undirected_graph_edge_descriptor(base_edge_type edge,
|
||||
const base_graph_type &g_in,
|
||||
bool reverse_in)
|
||||
: underlying_edge(std::move(edge)), g(&g_in), reverse(reverse_in) {}
|
||||
|
||||
bool operator==(const undirected_graph_edge_descriptor &other) const {
|
||||
return canonical_edge() == other.canonical_edge();
|
||||
}
|
||||
|
||||
bool operator<(const undirected_graph_edge_descriptor &other) const {
|
||||
return canonical_edge() < other.canonical_edge();
|
||||
}
|
||||
|
||||
base_vertex_type get_source() const {
|
||||
return reverse ? target(underlying_edge, *g)
|
||||
: source(underlying_edge, *g);
|
||||
}
|
||||
|
||||
base_vertex_type get_target() const {
|
||||
return reverse ? source(underlying_edge, *g)
|
||||
: target(underlying_edge, *g);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace undirected_detail
|
||||
|
||||
template <class BidirectionalGraph, class GraphRef = const BidirectionalGraph &>
|
||||
class undirected_graph {
|
||||
private:
|
||||
using Self = undirected_graph<BidirectionalGraph, GraphRef>;
|
||||
using Traits = boost::graph_traits<BidirectionalGraph>;
|
||||
|
||||
public:
|
||||
using base_type = BidirectionalGraph;
|
||||
using base_ref_type = GraphRef;
|
||||
|
||||
explicit undirected_graph(GraphRef g_in) : g(g_in) {}
|
||||
|
||||
// Graph requirements
|
||||
using vertex_descriptor = typename Traits::vertex_descriptor;
|
||||
using edge_descriptor =
|
||||
undirected_detail::undirected_graph_edge_descriptor<base_type>;
|
||||
using directed_category = boost::undirected_tag;
|
||||
using edge_parallel_category = boost::disallow_parallel_edge_tag;
|
||||
using traversal_category = typename Traits::traversal_category;
|
||||
|
||||
// IncidenceGraph requirements
|
||||
|
||||
/**
|
||||
* \brief Templated iterator used for out_edge_iterator and
|
||||
* in_edge_iterator, depending on the value of Reverse.
|
||||
*/
|
||||
template <bool Reverse>
|
||||
class adj_edge_iterator
|
||||
: public boost::iterator_facade<
|
||||
adj_edge_iterator<Reverse>, edge_descriptor,
|
||||
boost::forward_traversal_tag, edge_descriptor> {
|
||||
vertex_descriptor u;
|
||||
const base_type *g;
|
||||
typename Traits::in_edge_iterator in_it;
|
||||
typename Traits::out_edge_iterator out_it;
|
||||
bool done_in = false;
|
||||
public:
|
||||
adj_edge_iterator() = default;
|
||||
|
||||
adj_edge_iterator(vertex_descriptor u_in, const base_type &g_in,
|
||||
bool end_iter)
|
||||
: u(std::move(u_in)), g(&g_in) {
|
||||
auto pi = in_edges(u, *g);
|
||||
auto po = out_edges(u, *g);
|
||||
if (end_iter) {
|
||||
in_it = pi.second;
|
||||
out_it = po.second;
|
||||
done_in = true;
|
||||
} else {
|
||||
in_it = pi.first;
|
||||
out_it = po.first;
|
||||
if (in_it == pi.second) {
|
||||
done_in = true;
|
||||
find_first_valid_out();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
friend class boost::iterator_core_access;
|
||||
|
||||
void find_first_valid_out() {
|
||||
auto out_end = out_edges(u, *g).second;
|
||||
for (; out_it != out_end; ++out_it) {
|
||||
auto v = target(*out_it, *g);
|
||||
if (!edge(v, u, *g).second) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void increment() {
|
||||
if (!done_in) {
|
||||
auto in_end = in_edges(u, *g).second;
|
||||
assert(in_it != in_end);
|
||||
++in_it;
|
||||
if (in_it == in_end) {
|
||||
done_in = true;
|
||||
find_first_valid_out();
|
||||
}
|
||||
} else {
|
||||
++out_it;
|
||||
find_first_valid_out();
|
||||
}
|
||||
}
|
||||
bool equal(const adj_edge_iterator &other) const {
|
||||
return in_it == other.in_it && out_it == other.out_it;
|
||||
}
|
||||
edge_descriptor dereference() const {
|
||||
if (done_in) {
|
||||
return edge_descriptor(*out_it, *g, Reverse);
|
||||
} else {
|
||||
return edge_descriptor(*in_it, *g, !Reverse);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
using out_edge_iterator = adj_edge_iterator<false>;
|
||||
using in_edge_iterator = adj_edge_iterator<true>;
|
||||
|
||||
using degree_size_type = typename Traits::degree_size_type;
|
||||
|
||||
// AdjacencyGraph requirements
|
||||
using adjacency_iterator =
|
||||
typename boost::adjacency_iterator_generator<Self, vertex_descriptor,
|
||||
out_edge_iterator>::type;
|
||||
using inv_adjacency_iterator =
|
||||
typename boost::inv_adjacency_iterator_generator<
|
||||
Self, vertex_descriptor, in_edge_iterator>::type;
|
||||
|
||||
// VertexListGraph requirements
|
||||
using vertex_iterator = typename Traits::vertex_iterator;
|
||||
|
||||
// EdgeListGraph requirements
|
||||
enum {
|
||||
is_edge_list = std::is_convertible<traversal_category,
|
||||
boost::edge_list_graph_tag>::value
|
||||
};
|
||||
|
||||
/** \brief Iterator used for edges(). */
|
||||
class edge_iterator
|
||||
: public boost::iterator_facade<edge_iterator, edge_descriptor,
|
||||
boost::forward_traversal_tag,
|
||||
edge_descriptor> {
|
||||
const base_type *g;
|
||||
typename Traits::edge_iterator it;
|
||||
public:
|
||||
edge_iterator() = default;
|
||||
|
||||
edge_iterator(typename Traits::edge_iterator it_in,
|
||||
const base_type &g_in)
|
||||
: g(&g_in), it(std::move(it_in)) {
|
||||
find_first_valid_edge();
|
||||
}
|
||||
|
||||
private:
|
||||
friend class boost::iterator_core_access;
|
||||
|
||||
void find_first_valid_edge() {
|
||||
const auto end = edges(*g).second;
|
||||
for (; it != end; ++it) {
|
||||
const auto &u = source(*it, *g);
|
||||
const auto &v = target(*it, *g);
|
||||
if (!edge(v, u, *g).second) {
|
||||
break; // No reverse edge, we must visit this one
|
||||
}
|
||||
if (u <= v) {
|
||||
// We have a reverse edge, but we'll return this one (and
|
||||
// skip the other). Note that (u, u) shouldn't be skipped.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void increment() {
|
||||
assert(it != edges(*g).second);
|
||||
++it;
|
||||
find_first_valid_edge();
|
||||
}
|
||||
bool equal(const edge_iterator &other) const {
|
||||
return it == other.it;
|
||||
}
|
||||
edge_descriptor dereference() const {
|
||||
return edge_descriptor(*it, *g, false);
|
||||
}
|
||||
};
|
||||
|
||||
using vertices_size_type = typename Traits::vertices_size_type;
|
||||
using edges_size_type = typename Traits::edges_size_type;
|
||||
|
||||
using graph_tag = undirected_graph_tag;
|
||||
|
||||
using vertex_bundle_type =
|
||||
typename boost::vertex_bundle_type<base_type>::type;
|
||||
using edge_bundle_type = typename boost::edge_bundle_type<base_type>::type;
|
||||
|
||||
vertex_bundle_type &operator[](const vertex_descriptor &d) {
|
||||
return const_cast<base_type &>(g)[d];
|
||||
}
|
||||
const vertex_bundle_type &operator[](const vertex_descriptor &d) const {
|
||||
return g[d];
|
||||
}
|
||||
|
||||
edge_bundle_type &operator[](const edge_descriptor &d) {
|
||||
return const_cast<base_type &>(g)[d.underlying_edge];
|
||||
}
|
||||
const edge_bundle_type &operator[](const edge_descriptor &d) const {
|
||||
return g[d.underlying_edge];
|
||||
}
|
||||
|
||||
static vertex_descriptor null_vertex() { return Traits::null_vertex(); }
|
||||
|
||||
// Accessor free functions follow
|
||||
|
||||
friend std::pair<vertex_iterator, vertex_iterator>
|
||||
vertices(const undirected_graph &ug) {
|
||||
return vertices(ug.g);
|
||||
}
|
||||
|
||||
friend std::pair<edge_iterator, edge_iterator>
|
||||
edges(const undirected_graph &ug) {
|
||||
auto e = edges(ug.g);
|
||||
return std::make_pair(edge_iterator(e.first, ug.g),
|
||||
edge_iterator(e.second, ug.g));
|
||||
}
|
||||
|
||||
friend std::pair<out_edge_iterator, out_edge_iterator>
|
||||
out_edges(const vertex_descriptor &u, const undirected_graph &ug) {
|
||||
return std::make_pair(out_edge_iterator(u, ug.g, false),
|
||||
out_edge_iterator(u, ug.g, true));
|
||||
}
|
||||
|
||||
friend vertices_size_type num_vertices(const undirected_graph &ug) {
|
||||
return num_vertices(ug.g);
|
||||
}
|
||||
|
||||
friend edges_size_type num_edges(const undirected_graph &ug) {
|
||||
auto p = edges(ug);
|
||||
return std::distance(p.first, p.second);
|
||||
}
|
||||
|
||||
friend degree_size_type out_degree(const vertex_descriptor &u,
|
||||
const undirected_graph &ug) {
|
||||
return degree(u, ug);
|
||||
}
|
||||
|
||||
friend vertex_descriptor vertex(vertices_size_type n,
|
||||
const undirected_graph &ug) {
|
||||
return vertex(n, ug.g);
|
||||
}
|
||||
|
||||
friend std::pair<edge_descriptor, bool> edge(const vertex_descriptor &u,
|
||||
const vertex_descriptor &v,
|
||||
const undirected_graph &ug) {
|
||||
auto e = edge(u, v, ug.g);
|
||||
if (e.second) {
|
||||
return std::make_pair(edge_descriptor(e.first, ug.g, false), true);
|
||||
}
|
||||
auto e_rev = edge(v, u, ug.g);
|
||||
if (e_rev.second) {
|
||||
return std::make_pair(edge_descriptor(e_rev.first, ug.g, true),
|
||||
true);
|
||||
}
|
||||
return std::make_pair(edge_descriptor(), false);
|
||||
}
|
||||
|
||||
friend std::pair<in_edge_iterator, in_edge_iterator>
|
||||
in_edges(const vertex_descriptor &v, const undirected_graph &ug) {
|
||||
return std::make_pair(in_edge_iterator(v, ug.g, false),
|
||||
in_edge_iterator(v, ug.g, true));
|
||||
}
|
||||
|
||||
friend std::pair<adjacency_iterator, adjacency_iterator>
|
||||
adjacent_vertices(const vertex_descriptor &u, const undirected_graph &ug) {
|
||||
out_edge_iterator oi, oe;
|
||||
std::tie(oi, oe) = out_edges(u, ug);
|
||||
return std::make_pair(adjacency_iterator(oi, &ug),
|
||||
adjacency_iterator(oe, &ug));
|
||||
}
|
||||
|
||||
friend std::pair<inv_adjacency_iterator, inv_adjacency_iterator>
|
||||
inv_adjacent_vertices(const vertex_descriptor &v,
|
||||
const undirected_graph &ug) {
|
||||
in_edge_iterator ei, ee;
|
||||
std::tie(ei, ee) = in_edges(v, ug);
|
||||
return std::make_pair(inv_adjacency_iterator(ei, &ug),
|
||||
inv_adjacency_iterator(ee, &ug));
|
||||
}
|
||||
|
||||
friend degree_size_type in_degree(const vertex_descriptor &v,
|
||||
const undirected_graph &ug) {
|
||||
return degree(v, ug);
|
||||
}
|
||||
|
||||
friend vertex_descriptor source(const edge_descriptor &e,
|
||||
const undirected_graph &) {
|
||||
return e.get_source();
|
||||
}
|
||||
|
||||
friend vertex_descriptor target(const edge_descriptor &e,
|
||||
const undirected_graph &) {
|
||||
return e.get_target();
|
||||
}
|
||||
|
||||
friend degree_size_type degree(const vertex_descriptor &u,
|
||||
const undirected_graph &ug) {
|
||||
auto p = out_edges(u, ug);
|
||||
return std::distance(p.first, p.second);
|
||||
}
|
||||
|
||||
// Property accessors.
|
||||
|
||||
template <typename Property>
|
||||
using prop_map = typename boost::property_map<undirected_graph, Property>;
|
||||
|
||||
template <typename Property>
|
||||
friend typename prop_map<Property>::type
|
||||
get(Property p, undirected_graph &ug) {
|
||||
return get(p, ug.g);
|
||||
}
|
||||
|
||||
template <typename Property>
|
||||
friend typename prop_map<Property>::const_type
|
||||
get(Property p, const undirected_graph &ug) {
|
||||
return get(p, ug.g);
|
||||
}
|
||||
|
||||
template <typename Property, typename Key>
|
||||
friend typename boost::property_traits<
|
||||
typename prop_map<Property>::const_type>::value_type
|
||||
get(Property p, const undirected_graph &ug, const Key &k) {
|
||||
return get(p, ug.g, get_underlying_descriptor(k));
|
||||
}
|
||||
|
||||
template <typename Property, typename Value, typename Key>
|
||||
friend void put(Property p, const undirected_graph &ug,
|
||||
const Key &k, const Value &val) {
|
||||
put(p, const_cast<BidirectionalGraph &>(ug.g),
|
||||
get_underlying_descriptor(k), val);
|
||||
}
|
||||
|
||||
private:
|
||||
// Accessors are here because our free friend functions (above) cannot see
|
||||
// edge_descriptor's private members.
|
||||
static typename base_type::vertex_descriptor
|
||||
get_underlying_descriptor(const vertex_descriptor &v) {
|
||||
return v;
|
||||
}
|
||||
static typename base_type::edge_descriptor
|
||||
get_underlying_descriptor(const edge_descriptor &e) {
|
||||
return e.underlying_edge;
|
||||
}
|
||||
|
||||
// Reference to underlying bidirectional graph
|
||||
GraphRef g;
|
||||
};
|
||||
|
||||
template <class BidirectionalGraph>
|
||||
undirected_graph<BidirectionalGraph>
|
||||
make_undirected_graph(const BidirectionalGraph &g) {
|
||||
return undirected_graph<BidirectionalGraph>(g);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
namespace boost {
|
||||
|
||||
/* Derive all the property map specializations from the underlying
|
||||
* bidirectional graph. */
|
||||
|
||||
template <typename BidirectionalGraph, typename GraphRef, typename Property>
|
||||
struct property_map<ue2::undirected_graph<BidirectionalGraph, GraphRef>,
|
||||
Property> {
|
||||
using base_map_type = property_map<BidirectionalGraph, Property>;
|
||||
using type = typename base_map_type::type;
|
||||
using const_type = typename base_map_type::const_type;
|
||||
};
|
||||
|
||||
template <class BidirectionalGraph, class GraphRef>
|
||||
struct vertex_property_type<ue2::undirected_graph<BidirectionalGraph, GraphRef>>
|
||||
: vertex_property_type<BidirectionalGraph> {};
|
||||
|
||||
template <class BidirectionalGraph, class GraphRef>
|
||||
struct edge_property_type<ue2::undirected_graph<BidirectionalGraph, GraphRef>>
|
||||
: edge_property_type<BidirectionalGraph> {};
|
||||
|
||||
template <class BidirectionalGraph, class GraphRef>
|
||||
struct graph_property_type<ue2::undirected_graph<BidirectionalGraph, GraphRef>>
|
||||
: graph_property_type<BidirectionalGraph> {};
|
||||
|
||||
template <typename BidirectionalGraph, typename GraphRef>
|
||||
struct vertex_bundle_type<ue2::undirected_graph<BidirectionalGraph, GraphRef>>
|
||||
: vertex_bundle_type<BidirectionalGraph> {};
|
||||
|
||||
template <typename BidirectionalGraph, typename GraphRef>
|
||||
struct edge_bundle_type<ue2::undirected_graph<BidirectionalGraph, GraphRef>>
|
||||
: edge_bundle_type<BidirectionalGraph> {};
|
||||
|
||||
template <typename BidirectionalGraph, typename GraphRef>
|
||||
struct graph_bundle_type<ue2::undirected_graph<BidirectionalGraph, GraphRef>>
|
||||
: graph_bundle_type<BidirectionalGraph> {};
|
||||
|
||||
} // namespace boost
|
||||
|
||||
#endif // GRAPH_UNDIRECTED_H
|
77
src/util/logical.h
Normal file
77
src/util/logical.h
Normal file
@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Inline functions for manipulating logical combinations.
|
||||
*/
|
||||
|
||||
#ifndef LOGICAL_H
|
||||
#define LOGICAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
/** Index meaning a given logical key is invalid. */
|
||||
#define INVALID_LKEY (~(u32)0)
|
||||
#define INVALID_CKEY INVALID_LKEY
|
||||
|
||||
/** Logical operation type, the priority is from high to low. */
|
||||
enum LogicalOpType {
|
||||
LOGICAL_OP_NOT,
|
||||
LOGICAL_OP_AND,
|
||||
LOGICAL_OP_OR,
|
||||
LAST_LOGICAL_OP = LOGICAL_OP_OR //!< Sentinel.
|
||||
};
|
||||
|
||||
#define UNKNOWN_OP (~(u32)0)
|
||||
|
||||
/** Logical Operation is consist of 4 parts. */
|
||||
struct LogicalOp {
|
||||
u32 id; //!< logical operator/operation id
|
||||
u32 op; //!< LogicalOpType
|
||||
u32 lo; //!< left operand
|
||||
u32 ro; //!< right operand
|
||||
};
|
||||
|
||||
/** Each logical combination has its info:
|
||||
* It occupies a region in LogicalOp vector.
|
||||
* It has an exhaustion key for single-match mode. */
|
||||
struct CombInfo {
|
||||
u32 id;
|
||||
u32 ekey; //!< exhaustion key
|
||||
u32 start; //!< ckey of logical operation to start calculating
|
||||
u32 result; //!< ckey of logical operation to give final result
|
||||
u64a min_offset;
|
||||
u64a max_offset;
|
||||
};
|
||||
|
||||
/** Temporarily use to seperate operations' id from reports' lkey
|
||||
* when building logicalTree in shunting yard algorithm,
|
||||
* operations' id will be finally renumbered following reports' lkey. */
|
||||
#define LOGICAL_OP_BIT 0x80000000UL
|
||||
|
||||
#endif
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -1197,7 +1197,11 @@ u32 mmbit_sparse_iter_begin(const u8 *bits, u32 total_bits, u32 *idx,
|
||||
assert(ISALIGNED_N(it_root, alignof(struct mmbit_sparse_iter)));
|
||||
|
||||
// Our state _may_ be on the stack
|
||||
#ifndef _WIN32
|
||||
assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
|
||||
#else
|
||||
assert(ISALIGNED_N(s, 4));
|
||||
#endif
|
||||
|
||||
MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
|
||||
// iterator should have _something_ at the root level
|
||||
@ -1305,7 +1309,11 @@ u32 mmbit_sparse_iter_next(const u8 *bits, u32 total_bits, u32 last_key,
|
||||
assert(ISALIGNED_N(it_root, alignof(struct mmbit_sparse_iter)));
|
||||
|
||||
// Our state _may_ be on the stack
|
||||
#ifndef _WIN32
|
||||
assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
|
||||
#else
|
||||
assert(ISALIGNED_N(s, 4));
|
||||
#endif
|
||||
|
||||
MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
|
||||
MDEBUG_PRINTF("NEXT (total_bits=%u, last_key=%u)\n", total_bits, last_key);
|
||||
@ -1458,7 +1466,11 @@ void mmbit_sparse_iter_unset(u8 *bits, u32 total_bits,
|
||||
assert(ISALIGNED_N(it, alignof(struct mmbit_sparse_iter)));
|
||||
|
||||
// Our state _may_ be on the stack
|
||||
#ifndef _WIN32
|
||||
assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
|
||||
#else
|
||||
assert(ISALIGNED_N(s, 4));
|
||||
#endif
|
||||
|
||||
MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
|
||||
|
||||
|
@ -33,6 +33,7 @@
|
||||
#ifndef MULTIBIT_BUILD_H
|
||||
#define MULTIBIT_BUILD_H
|
||||
|
||||
#include "hs_common.h"
|
||||
#include "multibit_internal.h"
|
||||
#include "hash.h"
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -36,6 +36,7 @@
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/exhaust.h" // for INVALID_EKEY
|
||||
#include "util/logical.h" // for INVALID_LKEY
|
||||
#include "util/hash.h"
|
||||
#include "util/order_check.h"
|
||||
|
||||
@ -107,6 +108,16 @@ struct Report {
|
||||
* exhaustible, this will be INVALID_EKEY. */
|
||||
u32 ekey = INVALID_EKEY;
|
||||
|
||||
/** \brief Logical Combination key in each combination.
|
||||
*
|
||||
* If in Logical Combination, the lkey to check before reporting a match.
|
||||
* Additionally before checking the lkey will be set. If not
|
||||
* in Logical Combination, this will be INVALID_LKEY. */
|
||||
u32 lkey = INVALID_LKEY;
|
||||
|
||||
/** \brief Quiet flag for expressions in any logical combination. */
|
||||
bool quiet = false;
|
||||
|
||||
/** \brief Adjustment to add to the match offset when we report a match.
|
||||
*
|
||||
* This is usually used for reports attached to states that form part of a
|
||||
@ -207,16 +218,17 @@ bool operator==(const Report &a, const Report &b) {
|
||||
}
|
||||
|
||||
static inline
|
||||
Report makeECallback(u32 report, s32 offsetAdjust, u32 ekey) {
|
||||
Report makeECallback(u32 report, s32 offsetAdjust, u32 ekey, bool quiet) {
|
||||
Report ir(EXTERNAL_CALLBACK, report);
|
||||
ir.offsetAdjust = offsetAdjust;
|
||||
ir.ekey = ekey;
|
||||
ir.quiet = (u8)quiet;
|
||||
return ir;
|
||||
}
|
||||
|
||||
static inline
|
||||
Report makeCallback(u32 report, s32 offsetAdjust) {
|
||||
return makeECallback(report, offsetAdjust, INVALID_EKEY);
|
||||
return makeECallback(report, offsetAdjust, INVALID_EKEY, false);
|
||||
}
|
||||
|
||||
static inline
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -95,6 +95,31 @@ u32 ReportManager::getExhaustibleKey(u32 a) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
const set<u32> &ReportManager::getRelateCKeys(u32 lkey) {
|
||||
auto it = pl.lkey2ckeys.find(lkey);
|
||||
assert(it != pl.lkey2ckeys.end());
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void ReportManager::logicalKeyRenumber() {
|
||||
pl.logicalKeyRenumber();
|
||||
// assign to corresponding report
|
||||
for (u32 i = 0; i < reportIds.size(); i++) {
|
||||
Report &ir = reportIds[i];
|
||||
if (contains(pl.toLogicalKeyMap, ir.onmatch)) {
|
||||
ir.lkey = pl.toLogicalKeyMap.at(ir.onmatch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const vector<LogicalOp> &ReportManager::getLogicalTree() const {
|
||||
return pl.logicalTree;
|
||||
}
|
||||
|
||||
const vector<CombInfo> &ReportManager::getCombInfoMap() const {
|
||||
return pl.combInfoMap;
|
||||
}
|
||||
|
||||
u32 ReportManager::getUnassociatedExhaustibleKey(void) {
|
||||
u32 rv = toExhaustibleKeyMap.size();
|
||||
bool inserted;
|
||||
@ -115,6 +140,18 @@ u32 ReportManager::numEkeys() const {
|
||||
return (u32) toExhaustibleKeyMap.size();
|
||||
}
|
||||
|
||||
u32 ReportManager::numLogicalKeys() const {
|
||||
return (u32) pl.toLogicalKeyMap.size();
|
||||
}
|
||||
|
||||
u32 ReportManager::numLogicalOps() const {
|
||||
return (u32) pl.logicalTree.size();
|
||||
}
|
||||
|
||||
u32 ReportManager::numCkeys() const {
|
||||
return (u32) pl.toCombKeyMap.size();
|
||||
}
|
||||
|
||||
bool ReportManager::patternSetCanExhaust() const {
|
||||
return global_exhaust && !toExhaustibleKeyMap.empty();
|
||||
}
|
||||
@ -219,7 +256,7 @@ Report ReportManager::getBasicInternalReport(const ExpressionInfo &expr,
|
||||
ekey = getExhaustibleKey(expr.report);
|
||||
}
|
||||
|
||||
return makeECallback(expr.report, adj, ekey);
|
||||
return makeECallback(expr.report, adj, ekey, expr.quiet);
|
||||
}
|
||||
|
||||
void ReportManager::setProgramOffset(ReportID id, u32 programOffset) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -38,6 +38,7 @@
|
||||
#include "util/compile_error.h"
|
||||
#include "util/noncopyable.h"
|
||||
#include "util/report.h"
|
||||
#include "parser/logical_combination.h"
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
@ -80,6 +81,15 @@ public:
|
||||
/** \brief Total number of exhaustion keys. */
|
||||
u32 numEkeys() const;
|
||||
|
||||
/** \brief Total number of logical keys. */
|
||||
u32 numLogicalKeys() const;
|
||||
|
||||
/** \brief Total number of logical operators. */
|
||||
u32 numLogicalOps() const;
|
||||
|
||||
/** \brief Total number of combination keys. */
|
||||
u32 numCkeys() const;
|
||||
|
||||
/** \brief True if the pattern set can exhaust (i.e. all patterns are
|
||||
* highlander). */
|
||||
bool patternSetCanExhaust() const;
|
||||
@ -110,6 +120,19 @@ public:
|
||||
* assigning one if necessary. */
|
||||
u32 getExhaustibleKey(u32 expressionIndex);
|
||||
|
||||
/** \brief Get lkey's corresponding ckeys. */
|
||||
const std::set<u32> &getRelateCKeys(u32 lkey);
|
||||
|
||||
/** \brief Renumber lkey for logical operations, after parsed
|
||||
* all logical expressions. */
|
||||
void logicalKeyRenumber();
|
||||
|
||||
/** \brief Used in Rose for writing bytecode. */
|
||||
const std::vector<LogicalOp> &getLogicalTree() const;
|
||||
|
||||
/** \brief Used in Rose for writing bytecode. */
|
||||
const std::vector<CombInfo> &getCombInfoMap() const;
|
||||
|
||||
/** \brief Fetch the dedupe key associated with the given report. Returns
|
||||
* ~0U if no dkey is needed. */
|
||||
u32 getDkey(const Report &r) const;
|
||||
@ -122,6 +145,9 @@ public:
|
||||
* set. */
|
||||
u32 getProgramOffset(ReportID id) const;
|
||||
|
||||
/** \brief Parsed logical combination structure. */
|
||||
ParsedLogical pl;
|
||||
|
||||
private:
|
||||
/** \brief Grey box ref, for checking resource limits. */
|
||||
const Grey &grey;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -89,7 +89,7 @@
|
||||
* (1) Deterministic ordering for vertices and edges
|
||||
* boost::adjacency_list<> uses pointer ordering for vertex_descriptors. As
|
||||
* a result, ordering of vertices and edges between runs is
|
||||
* non-deterministic unless containers, etc use custom comparators.
|
||||
* non-deterministic unless containers, etc use custom comparators.
|
||||
*
|
||||
* (2) Proper types for descriptors, etc.
|
||||
* No more void * for vertex_descriptors and trying to use it for the wrong
|
||||
@ -288,7 +288,7 @@ private:
|
||||
vertex_edge_list<in_edge_hook> in_edge_list;
|
||||
|
||||
/* The out going edges are considered owned by the vertex and
|
||||
* need to be freed when the graph is begin destroyed */
|
||||
* need to be freed when the graph is being destroyed */
|
||||
vertex_edge_list<out_edge_hook> out_edge_list;
|
||||
|
||||
/* The destructor only frees memory owned by the vertex and will leave
|
||||
@ -1025,229 +1025,208 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
/** \brief Type trait to enable on whether the Graph is an ue2_graph. */
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
typename Graph::vertex_descriptor>::type
|
||||
struct is_ue2_graph
|
||||
: public ::std::integral_constant<
|
||||
bool, std::is_base_of<graph_detail::graph_base, Graph>::value> {};
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
typename Graph::vertex_descriptor>::type
|
||||
add_vertex(Graph &g) {
|
||||
return g.add_vertex_impl();
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value>::type
|
||||
remove_vertex(typename Graph::vertex_descriptor v, Graph &g) {
|
||||
g.remove_vertex_impl(v);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value>::type
|
||||
clear_in_edges(typename Graph::vertex_descriptor v, Graph &g) {
|
||||
g.clear_in_edges_impl(v);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value>::type
|
||||
clear_out_edges(typename Graph::vertex_descriptor v, Graph &g) {
|
||||
g.clear_out_edges_impl(v);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value>::type
|
||||
clear_vertex(typename Graph::vertex_descriptor v, Graph &g) {
|
||||
g.clear_in_edges_impl(v);
|
||||
g.clear_out_edges_impl(v);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
typename Graph::vertex_descriptor>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
typename Graph::vertex_descriptor>::type
|
||||
source(typename Graph::edge_descriptor e, const Graph &) {
|
||||
return Graph::source_impl(e);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
typename Graph::vertex_descriptor>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
typename Graph::vertex_descriptor>::type
|
||||
target(typename Graph::edge_descriptor e, const Graph &) {
|
||||
return Graph::target_impl(e);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
typename Graph::degree_size_type>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
typename Graph::degree_size_type>::type
|
||||
out_degree(typename Graph::vertex_descriptor v, const Graph &) {
|
||||
return Graph::out_degree_impl(v);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
std::pair<typename Graph::out_edge_iterator,
|
||||
typename Graph::out_edge_iterator>>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
std::pair<typename Graph::out_edge_iterator,
|
||||
typename Graph::out_edge_iterator>>::type
|
||||
out_edges(typename Graph::vertex_descriptor v, const Graph &) {
|
||||
return Graph::out_edges_impl(v);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
typename Graph::degree_size_type>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
typename Graph::degree_size_type>::type
|
||||
in_degree(typename Graph::vertex_descriptor v, const Graph &) {
|
||||
return Graph::in_degree_impl(v);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
std::pair<typename Graph::in_edge_iterator,
|
||||
typename Graph::in_edge_iterator>>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
std::pair<typename Graph::in_edge_iterator,
|
||||
typename Graph::in_edge_iterator>>::type
|
||||
in_edges(typename Graph::vertex_descriptor v, const Graph &) {
|
||||
return Graph::in_edges_impl(v);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
typename Graph::degree_size_type>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
typename Graph::degree_size_type>::type
|
||||
degree(typename Graph::vertex_descriptor v, const Graph &) {
|
||||
return Graph::degree_impl(v);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
std::pair<typename Graph::adjacency_iterator,
|
||||
typename Graph::adjacency_iterator>>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
std::pair<typename Graph::adjacency_iterator,
|
||||
typename Graph::adjacency_iterator>>::type
|
||||
adjacent_vertices(typename Graph::vertex_descriptor v, const Graph &) {
|
||||
return Graph::adjacent_vertices_impl(v);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
std::pair<typename Graph::edge_descriptor, bool>>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
std::pair<typename Graph::edge_descriptor, bool>>::type
|
||||
edge(typename Graph::vertex_descriptor u, typename Graph::vertex_descriptor v,
|
||||
const Graph &g) {
|
||||
return g.edge_impl(u, v);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
std::pair<typename Graph::inv_adjacency_iterator,
|
||||
typename Graph::inv_adjacency_iterator>>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
std::pair<typename Graph::inv_adjacency_iterator,
|
||||
typename Graph::inv_adjacency_iterator>>::type
|
||||
inv_adjacent_vertices(typename Graph::vertex_descriptor v, const Graph &) {
|
||||
return Graph::inv_adjacent_vertices_impl(v);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
std::pair<typename Graph::edge_descriptor, bool>>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
std::pair<typename Graph::edge_descriptor, bool>>::type
|
||||
add_edge(typename Graph::vertex_descriptor u,
|
||||
typename Graph::vertex_descriptor v, Graph &g) {
|
||||
return g.add_edge_impl(u, v);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value>::type
|
||||
remove_edge(typename Graph::edge_descriptor e, Graph &g) {
|
||||
g.remove_edge_impl(e);
|
||||
}
|
||||
|
||||
template<typename Graph, typename Iter>
|
||||
typename std::enable_if<
|
||||
!std::is_convertible<Iter, typename Graph::edge_descriptor>::value
|
||||
&& std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
|
||||
!std::is_convertible<Iter, typename Graph::edge_descriptor>::value &&
|
||||
is_ue2_graph<Graph>::value>::type
|
||||
remove_edge(Iter it, Graph &g) {
|
||||
g.remove_edge_impl(*it);
|
||||
}
|
||||
|
||||
template<typename Graph, typename Predicate>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value>::type
|
||||
remove_out_edge_if(typename Graph::vertex_descriptor v, Predicate pred,
|
||||
Graph &g) {
|
||||
g.remove_out_edge_if_impl(v, pred);
|
||||
}
|
||||
|
||||
template<typename Graph, typename Predicate>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value>::type
|
||||
remove_in_edge_if(typename Graph::vertex_descriptor v, Predicate pred,
|
||||
Graph &g) {
|
||||
g.remove_in_edge_if_impl(v, pred);
|
||||
}
|
||||
|
||||
template<typename Graph, typename Predicate>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value>::type
|
||||
remove_edge_if(Predicate pred, Graph &g) {
|
||||
g.remove_edge_if_impl(pred);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value>::type
|
||||
remove_edge(const typename Graph::vertex_descriptor &u,
|
||||
const typename Graph::vertex_descriptor &v, Graph &g) {
|
||||
g.remove_edge_impl(u, v);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
typename Graph::vertices_size_type>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
typename Graph::vertices_size_type>::type
|
||||
num_vertices(const Graph &g) {
|
||||
return g.num_vertices_impl();
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
std::pair<typename Graph::vertex_iterator,
|
||||
typename Graph::vertex_iterator>>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
std::pair<typename Graph::vertex_iterator,
|
||||
typename Graph::vertex_iterator>>::type
|
||||
vertices(const Graph &g) {
|
||||
return g.vertices_impl();
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
typename Graph::edges_size_type>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
typename Graph::edges_size_type>::type
|
||||
num_edges(const Graph &g) {
|
||||
return g.num_edges_impl();
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
std::pair<typename Graph::edge_iterator,
|
||||
typename Graph::edge_iterator>>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
std::pair<typename Graph::edge_iterator,
|
||||
typename Graph::edge_iterator>>::type
|
||||
edges(const Graph &g) {
|
||||
return g.edges_impl();
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
typename Graph::vertex_descriptor>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
typename Graph::vertex_descriptor>::type
|
||||
add_vertex(const typename Graph::vertex_property_type &vp, Graph &g) {
|
||||
return g.add_vertex_impl(vp);
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
std::pair<typename Graph::edge_descriptor, bool>>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
std::pair<typename Graph::edge_descriptor, bool>>::type
|
||||
add_edge(typename Graph::vertex_descriptor u,
|
||||
typename Graph::vertex_descriptor v,
|
||||
const typename Graph::edge_property_type &ep, Graph &g) {
|
||||
@ -1255,35 +1234,59 @@ add_edge(typename Graph::vertex_descriptor u,
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value>::type
|
||||
renumber_edges(Graph &g) {
|
||||
g.renumber_edges_impl();
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value>::type
|
||||
renumber_vertices(Graph &g) {
|
||||
g.renumber_vertices_impl();
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
typename Graph::vertices_size_type>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
typename Graph::vertices_size_type>::type
|
||||
vertex_index_upper_bound(const Graph &g) {
|
||||
return g.vertex_index_upper_bound_impl();
|
||||
}
|
||||
|
||||
template<typename Graph>
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value,
|
||||
typename Graph::edges_size_type>::type
|
||||
typename std::enable_if<is_ue2_graph<Graph>::value,
|
||||
typename Graph::edges_size_type>::type
|
||||
edge_index_upper_bound(const Graph &g) {
|
||||
return g.edge_index_upper_bound_impl();
|
||||
}
|
||||
|
||||
template<typename T> struct pointer_to_member_traits {};
|
||||
|
||||
template<typename Return, typename Class>
|
||||
struct pointer_to_member_traits<Return(Class::*)> {
|
||||
using member_type = Return;
|
||||
using class_type = Class;
|
||||
};
|
||||
|
||||
template<typename Graph, typename Property, typename Enable = void>
|
||||
struct is_ue2_vertex_or_edge_property {
|
||||
static constexpr bool value = false;
|
||||
};
|
||||
|
||||
template<typename Graph, typename Property>
|
||||
struct is_ue2_vertex_or_edge_property<
|
||||
Graph, Property, typename std::enable_if<is_ue2_graph<Graph>::value &&
|
||||
std::is_member_object_pointer<
|
||||
Property>::value>::type> {
|
||||
private:
|
||||
using class_type = typename pointer_to_member_traits<Property>::class_type;
|
||||
using vertex_type = typename Graph::vertex_property_type;
|
||||
using edge_type = typename Graph::edge_property_type;
|
||||
public:
|
||||
static constexpr bool value =
|
||||
std::is_same<class_type, vertex_type>::value ||
|
||||
std::is_same<class_type, edge_type>::value;
|
||||
};
|
||||
|
||||
using boost::vertex_index;
|
||||
using boost::edge_index;
|
||||
|
||||
@ -1295,13 +1298,53 @@ namespace boost {
|
||||
* adaptors (like filtered_graph) to know the type of the property maps */
|
||||
template<typename Graph, typename Prop>
|
||||
struct property_map<Graph, Prop,
|
||||
typename std::enable_if<
|
||||
std::is_base_of<ue2::graph_detail::graph_base, Graph>::value
|
||||
>::type > {
|
||||
typedef decltype(get(std::declval<Prop>(),
|
||||
std::declval<Graph &>())) type;
|
||||
typedef decltype(get(std::declval<Prop>(),
|
||||
std::declval<const Graph &>())) const_type;
|
||||
typename std::enable_if<ue2::is_ue2_graph<Graph>::value &&
|
||||
ue2::is_ue2_vertex_or_edge_property<
|
||||
Graph, Prop>::value>::type> {
|
||||
private:
|
||||
using prop_traits = ue2::pointer_to_member_traits<Prop>;
|
||||
using member_type = typename prop_traits::member_type;
|
||||
using class_type = typename prop_traits::class_type;
|
||||
public:
|
||||
using type = typename Graph::template prop_map<member_type &, class_type>;
|
||||
using const_type = typename Graph::template prop_map<const member_type &,
|
||||
class_type>;
|
||||
};
|
||||
|
||||
template<typename Graph>
|
||||
struct property_map<Graph, vertex_index_t,
|
||||
typename std::enable_if<ue2::is_ue2_graph<Graph>::value>::type> {
|
||||
using v_prop_type = typename Graph::vertex_property_type;
|
||||
using type = typename Graph::template prop_map<size_t &, v_prop_type>;
|
||||
using const_type =
|
||||
typename Graph::template prop_map<const size_t &, v_prop_type>;
|
||||
};
|
||||
|
||||
template<typename Graph>
|
||||
struct property_map<Graph, edge_index_t,
|
||||
typename std::enable_if<ue2::is_ue2_graph<Graph>::value>::type> {
|
||||
using e_prop_type = typename Graph::edge_property_type;
|
||||
using type = typename Graph::template prop_map<size_t &, e_prop_type>;
|
||||
using const_type =
|
||||
typename Graph::template prop_map<const size_t &, e_prop_type>;
|
||||
};
|
||||
|
||||
template<typename Graph>
|
||||
struct property_map<Graph, vertex_all_t,
|
||||
typename std::enable_if<ue2::is_ue2_graph<Graph>::value>::type> {
|
||||
using v_prop_type = typename Graph::vertex_property_type;
|
||||
using type = typename Graph::template prop_map_all<v_prop_type &>;
|
||||
using const_type =
|
||||
typename Graph::template prop_map_all<const v_prop_type &>;
|
||||
};
|
||||
|
||||
template<typename Graph>
|
||||
struct property_map<Graph, edge_all_t,
|
||||
typename std::enable_if<ue2::is_ue2_graph<Graph>::value>::type> {
|
||||
using e_prop_type = typename Graph::edge_property_type;
|
||||
using type = typename Graph::template prop_map_all<e_prop_type &>;
|
||||
using const_type =
|
||||
typename Graph::template prop_map_all<const e_prop_type &>;
|
||||
};
|
||||
|
||||
} // namespace boost
|
||||
|
@ -1,6 +1,3 @@
|
||||
if (WIN32)
|
||||
return()
|
||||
endif()
|
||||
find_package(Threads)
|
||||
|
||||
# remove some warnings
|
||||
@ -12,11 +9,18 @@ include_directories(${PROJECT_SOURCE_DIR})
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
|
||||
include_directories(${PROJECT_SOURCE_DIR}/util)
|
||||
|
||||
# add any subdir with a cmake file
|
||||
file(GLOB dirents RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *)
|
||||
foreach(e ${dirents})
|
||||
if(IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${e} AND
|
||||
EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${e}/CMakeLists.txt)
|
||||
add_subdirectory(${e})
|
||||
endif ()
|
||||
endforeach ()
|
||||
if (WIN32)
|
||||
add_subdirectory(hscheck)
|
||||
add_subdirectory(hsbench)
|
||||
add_subdirectory(hsdump)
|
||||
add_subdirectory(hscollider)
|
||||
else()
|
||||
# add any subdir with a cmake file
|
||||
file(GLOB dirents RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *)
|
||||
foreach(e ${dirents})
|
||||
if(IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${e} AND
|
||||
EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${e}/CMakeLists.txt)
|
||||
add_subdirectory(${e})
|
||||
endif ()
|
||||
endforeach ()
|
||||
endif()
|
||||
|
@ -31,6 +31,8 @@ SET(hsbench_SOURCES
|
||||
common.h
|
||||
data_corpus.cpp
|
||||
data_corpus.h
|
||||
engine.cpp
|
||||
engine.h
|
||||
engine_hyperscan.cpp
|
||||
engine_hyperscan.h
|
||||
heapstats.cpp
|
||||
@ -45,6 +47,28 @@ SET(hsbench_SOURCES
|
||||
timer.h
|
||||
)
|
||||
|
||||
if (BUILD_CHIMERA)
|
||||
add_definitions(-DHS_HYBRID)
|
||||
SET(hsbench_SOURCES
|
||||
${hsbench_SOURCES}
|
||||
engine_chimera.cpp
|
||||
engine_chimera.h
|
||||
engine_pcre.cpp
|
||||
engine_pcre.h
|
||||
)
|
||||
endif()
|
||||
|
||||
add_executable(hsbench ${hsbench_SOURCES})
|
||||
target_link_libraries(hsbench hs databaseutil expressionutil ${SQLITE3_LDFLAGS}
|
||||
${CMAKE_THREAD_LIBS_INIT})
|
||||
if (BUILD_CHIMERA)
|
||||
include_directories(${PCRE_INCLUDE_DIRS})
|
||||
if(NOT WIN32)
|
||||
target_link_libraries(hsbench hs chimera ${PCRE_LDFLAGS} databaseutil
|
||||
expressionutil ${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
else()
|
||||
target_link_libraries(hsbench hs chimera pcre databaseutil
|
||||
expressionutil ${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif()
|
||||
else()
|
||||
target_link_libraries(hsbench hs databaseutil expressionutil
|
||||
${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif()
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -42,6 +42,12 @@ extern bool forceEditDistance;
|
||||
extern unsigned editDistance;
|
||||
extern bool printCompressSize;
|
||||
|
||||
/** Structure for the result of a single complete scan. */
|
||||
struct ResultEntry {
|
||||
double seconds = 0; //!< Time taken for scan.
|
||||
unsigned int matches = 0; //!< Count of matches found.
|
||||
};
|
||||
|
||||
struct SqlFailure {
|
||||
explicit SqlFailure(const std::string &s) : message(s) {}
|
||||
std::string message;
|
||||
|
35
tools/hsbench/engine.cpp
Normal file
35
tools/hsbench/engine.cpp
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "engine.h"
|
||||
|
||||
EngineContext::~EngineContext() { }
|
||||
|
||||
EngineStream::~EngineStream() { }
|
||||
|
||||
Engine::~Engine() { }
|
94
tools/hsbench/engine.h
Normal file
94
tools/hsbench/engine.h
Normal file
@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ENGINE_H
|
||||
#define ENGINE_H
|
||||
|
||||
#include "common.h"
|
||||
#include "sqldb.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
// Engines have an engine context which is allocated on a per-thread basis.
|
||||
class EngineContext : boost::noncopyable {
|
||||
public:
|
||||
virtual ~EngineContext();
|
||||
};
|
||||
|
||||
/** Streaming mode scans have persistent stream state associated with them. */
|
||||
class EngineStream : boost::noncopyable {
|
||||
public:
|
||||
virtual ~EngineStream();
|
||||
unsigned int sn;
|
||||
};
|
||||
|
||||
// Benchmarking engine
|
||||
class Engine : boost::noncopyable {
|
||||
public:
|
||||
virtual ~Engine();
|
||||
|
||||
// allocate an EngineContext
|
||||
virtual std::unique_ptr<EngineContext> makeContext() const = 0;
|
||||
|
||||
// non-streaming scan
|
||||
virtual void scan(const char *data, unsigned len, unsigned blockId,
|
||||
ResultEntry &results, EngineContext &ectx) const = 0;
|
||||
|
||||
// vectoring scan
|
||||
virtual void scan_vectored(const char *const *data,
|
||||
const unsigned int *len, unsigned int count,
|
||||
unsigned int streamId, ResultEntry &result,
|
||||
EngineContext &ectx) const = 0;
|
||||
|
||||
// stream open
|
||||
virtual std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
|
||||
unsigned id) const = 0;
|
||||
|
||||
// stream close
|
||||
virtual void streamClose(std::unique_ptr<EngineStream> stream,
|
||||
ResultEntry &result) const = 0;
|
||||
|
||||
// stream compress and expand
|
||||
virtual void streamCompressExpand(EngineStream &stream,
|
||||
std::vector<char> &temp) const = 0;
|
||||
|
||||
// streaming scan
|
||||
virtual void streamScan(EngineStream &stream, const char *data,
|
||||
unsigned int len, unsigned int id,
|
||||
ResultEntry &result) const = 0;
|
||||
|
||||
virtual void printStats() const = 0;
|
||||
|
||||
virtual void sqlStats(SqlDB &db) const = 0;
|
||||
};
|
||||
|
||||
#endif // ENGINE_H
|
327
tools/hsbench/engine_chimera.cpp
Normal file
327
tools/hsbench/engine_chimera.cpp
Normal file
@ -0,0 +1,327 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "ExpressionParser.h"
|
||||
#include "common.h"
|
||||
#include "engine_chimera.h"
|
||||
#include "expressions.h"
|
||||
#include "heapstats.h"
|
||||
#include "sqldb.h"
|
||||
#include "timer.h"
|
||||
|
||||
#include "chimera/ch_database.h"
|
||||
|
||||
#include "util/make_unique.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
EngineCHContext::EngineCHContext(const ch_database_t *db) {
|
||||
ch_alloc_scratch(db, &scratch);
|
||||
assert(scratch);
|
||||
}
|
||||
|
||||
EngineCHContext::~EngineCHContext() {
|
||||
ch_free_scratch(scratch);
|
||||
}
|
||||
|
||||
namespace /* anonymous */ {
|
||||
|
||||
/** Scan context structure passed to the onMatch callback function. */
|
||||
struct ScanCHContext {
|
||||
ScanCHContext(unsigned id_in, ResultEntry &result_in)
|
||||
: id(id_in), result(result_in) {}
|
||||
unsigned id;
|
||||
ResultEntry &result;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
/**
|
||||
* Callback function called for every match that Chimera produces, used when
|
||||
* "echo matches" is off.
|
||||
*/
|
||||
static
|
||||
int HS_CDECL onMatch(unsigned int, unsigned long long, unsigned long long,
|
||||
unsigned int, unsigned int, const ch_capture_t *,
|
||||
void *ctx) {
|
||||
ScanCHContext *sc = static_cast<ScanCHContext *>(ctx);
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback function called for every match that Chimera produces when "echo
|
||||
* matches" is enabled.
|
||||
*/
|
||||
static
|
||||
int HS_CDECL onMatchEcho(unsigned int id, unsigned long long,
|
||||
unsigned long long to, unsigned int, unsigned int,
|
||||
const ch_capture_t *, void *ctx) {
|
||||
ScanCHContext *sc = static_cast<ScanCHContext *>(ctx);
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
printf("Match @%u:%llu for %u\n", sc->id, to, id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
EngineChimera::EngineChimera(ch_database_t *db_in, CompileCHStats cs)
|
||||
: db(db_in), compile_stats(move(cs)) {
|
||||
assert(db);
|
||||
}
|
||||
|
||||
EngineChimera::~EngineChimera() {
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
unique_ptr<EngineContext> EngineChimera::makeContext() const {
|
||||
return ue2::make_unique<EngineCHContext>(db);
|
||||
}
|
||||
|
||||
void EngineChimera::scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ectx) const {
|
||||
assert(data);
|
||||
|
||||
auto &ctx = static_cast<EngineCHContext &>(ectx);
|
||||
ScanCHContext sc(id, result);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
ch_error_t rv = ch_scan(db, data, len, 0, ctx.scratch, callback, nullptr,
|
||||
&sc);
|
||||
|
||||
if (rv != CH_SUCCESS) {
|
||||
printf("Fatal error: ch_scan returned error %d\n", rv);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
// vectoring scan
|
||||
void EngineChimera::scan_vectored(UNUSED const char *const *data,
|
||||
UNUSED const unsigned int *len,
|
||||
UNUSED unsigned int count,
|
||||
UNUSED unsigned int streamId,
|
||||
UNUSED ResultEntry &result,
|
||||
UNUSED EngineContext &ectx) const {
|
||||
printf("Hybrid matcher can't support vectored mode.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
unique_ptr<EngineStream> EngineChimera::streamOpen(UNUSED EngineContext &ectx,
|
||||
UNUSED unsigned id) const {
|
||||
printf("Hybrid matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EngineChimera::streamClose(UNUSED unique_ptr<EngineStream> stream,
|
||||
UNUSED ResultEntry &result) const {
|
||||
printf("Hybrid matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EngineChimera::streamScan(UNUSED EngineStream &stream,
|
||||
UNUSED const char *data,
|
||||
UNUSED unsigned len, UNUSED unsigned id,
|
||||
UNUSED ResultEntry &result) const {
|
||||
printf("Hybrid matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EngineChimera::streamCompressExpand(UNUSED EngineStream &stream,
|
||||
UNUSED vector<char> &temp) const {
|
||||
printf("Hybrid matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EngineChimera::printStats() const {
|
||||
// Output summary information.
|
||||
if (!compile_stats.sigs_name.empty()) {
|
||||
printf("Signature set: %s\n", compile_stats.sigs_name.c_str());
|
||||
}
|
||||
printf("Signatures: %s\n", compile_stats.signatures.c_str());
|
||||
printf("Chimera info: %s\n", compile_stats.db_info.c_str());
|
||||
#ifndef _WIN32
|
||||
printf("Expression count: %'zu\n", compile_stats.expressionCount);
|
||||
printf("Bytecode size: %'zu bytes\n", compile_stats.compiledSize);
|
||||
#else
|
||||
printf("Expression count: %zu\n", compile_stats.expressionCount);
|
||||
printf("Bytecode size: %zu bytes\n", compile_stats.compiledSize);
|
||||
#endif
|
||||
printf("Database CRC: 0x%x\n", compile_stats.crc32);
|
||||
#ifndef _WIN32
|
||||
printf("Scratch size: %'zu bytes\n", compile_stats.scratchSize);
|
||||
printf("Compile time: %'0.3Lf seconds\n", compile_stats.compileSecs);
|
||||
printf("Peak heap usage: %'u bytes\n", compile_stats.peakMemorySize);
|
||||
#else
|
||||
printf("Scratch size: %zu bytes\n", compile_stats.scratchSize);
|
||||
printf("Compile time: %0.3Lf seconds\n", compile_stats.compileSecs);
|
||||
printf("Peak heap usage: %u bytes\n", compile_stats.peakMemorySize);
|
||||
#endif
|
||||
}
|
||||
|
||||
void EngineChimera::sqlStats(SqlDB &sqldb) const {
|
||||
ostringstream crc;
|
||||
crc << "0x" << hex << compile_stats.crc32;
|
||||
|
||||
static const string Q =
|
||||
"INSERT INTO Compile ("
|
||||
"sigsName, signatures, dbInfo, exprCount, dbSize, crc,"
|
||||
"scratchSize, compileSecs, peakMemory) "
|
||||
"VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)";
|
||||
|
||||
sqldb.insert_all(Q, compile_stats.sigs_name, compile_stats.signatures,
|
||||
compile_stats.db_info, compile_stats.expressionCount,
|
||||
compile_stats.compiledSize, crc.str(),
|
||||
compile_stats.scratchSize, compile_stats.compileSecs,
|
||||
compile_stats.peakMemorySize);
|
||||
}
|
||||
|
||||
unique_ptr<EngineChimera>
|
||||
buildEngineChimera(const ExpressionMap &expressions, const string &name,
|
||||
const string &sigs_name) {
|
||||
if (expressions.empty()) {
|
||||
assert(0);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
long double compileSecs = 0.0;
|
||||
size_t compiledSize = 0.0;
|
||||
size_t scratchSize = 0;
|
||||
unsigned int peakMemorySize = 0;
|
||||
string db_info;
|
||||
|
||||
ch_database_t *db;
|
||||
ch_error_t err;
|
||||
|
||||
const unsigned int count = expressions.size();
|
||||
|
||||
vector<string> exprs;
|
||||
vector<unsigned int> flags, ids;
|
||||
vector<hs_expr_ext> ext;
|
||||
|
||||
for (const auto &m : expressions) {
|
||||
string expr;
|
||||
unsigned int f = 0;
|
||||
hs_expr_ext extparam; // unused
|
||||
extparam.flags = 0;
|
||||
if (!readExpression(m.second, expr, &f, &extparam)) {
|
||||
printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(),
|
||||
m.first);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (extparam.flags) {
|
||||
printf("Error parsing PCRE with extended flags: %s (id %u)\n",
|
||||
m.second.c_str(), m.first);
|
||||
return nullptr;
|
||||
}
|
||||
exprs.push_back(expr);
|
||||
ids.push_back(m.first);
|
||||
flags.push_back(f);
|
||||
}
|
||||
|
||||
// Our compiler takes an array of plain ol' C strings.
|
||||
vector<const char *> patterns(count);
|
||||
for (unsigned int i = 0; i < count; i++) {
|
||||
patterns[i] = exprs[i].c_str();
|
||||
}
|
||||
|
||||
Timer timer;
|
||||
timer.start();
|
||||
|
||||
// Capture groups by default
|
||||
unsigned int mode = CH_MODE_GROUPS;
|
||||
ch_compile_error_t *compile_err;
|
||||
err = ch_compile_multi(patterns.data(), flags.data(), ids.data(),
|
||||
count, mode, nullptr, &db, &compile_err);
|
||||
|
||||
timer.complete();
|
||||
compileSecs = timer.seconds();
|
||||
peakMemorySize = getPeakHeap();
|
||||
|
||||
if (err == CH_COMPILER_ERROR) {
|
||||
if (compile_err->expression >= 0) {
|
||||
printf("Compile error for signature #%u: %s\n",
|
||||
compile_err->expression, compile_err->message);
|
||||
} else {
|
||||
printf("Compile error: %s\n", compile_err->message);
|
||||
}
|
||||
ch_free_compile_error(compile_err);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
err = ch_database_size(db, &compiledSize);
|
||||
if (err != CH_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
assert(compiledSize > 0);
|
||||
|
||||
char *info;
|
||||
err = ch_database_info(db, &info);
|
||||
if (err != CH_SUCCESS) {
|
||||
return nullptr;
|
||||
} else {
|
||||
db_info = string(info);
|
||||
free(info);
|
||||
}
|
||||
|
||||
// Allocate scratch temporarily to find its size: this is a good test
|
||||
// anyway.
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
err = ch_alloc_scratch(db, &scratch);
|
||||
if (err != HS_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
err = ch_scratch_size(scratch, &scratchSize);
|
||||
if (err != CH_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
ch_free_scratch(scratch);
|
||||
|
||||
// Collect summary information.
|
||||
CompileCHStats cs;
|
||||
cs.sigs_name = sigs_name;
|
||||
if (!sigs_name.empty()) {
|
||||
const auto pos = name.find_last_of('/');
|
||||
cs.signatures = name.substr(pos + 1);
|
||||
} else {
|
||||
cs.signatures = name;
|
||||
}
|
||||
cs.db_info = db_info;
|
||||
cs.expressionCount = expressions.size();
|
||||
cs.compiledSize = compiledSize;
|
||||
cs.scratchSize = scratchSize;
|
||||
cs.compileSecs = compileSecs;
|
||||
cs.peakMemorySize = peakMemorySize;
|
||||
|
||||
return ue2::make_unique<EngineChimera>(db, move(cs));
|
||||
}
|
103
tools/hsbench/engine_chimera.h
Normal file
103
tools/hsbench/engine_chimera.h
Normal file
@ -0,0 +1,103 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ENGINECHIMERA_H
|
||||
#define ENGINECHIMERA_H
|
||||
|
||||
#include "expressions.h"
|
||||
#include "engine.h"
|
||||
|
||||
#include "chimera/ch.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/** Infomation about the database compile */
|
||||
struct CompileCHStats {
|
||||
std::string sigs_name;
|
||||
std::string signatures;
|
||||
std::string db_info;
|
||||
size_t expressionCount = 0;
|
||||
size_t compiledSize = 0;
|
||||
uint32_t crc32 = 0;
|
||||
size_t scratchSize = 0;
|
||||
long double compileSecs = 0;
|
||||
unsigned int peakMemorySize = 0;
|
||||
};
|
||||
|
||||
/** Engine context which is allocated on a per-thread basis. */
|
||||
class EngineCHContext : public EngineContext{
|
||||
public:
|
||||
explicit EngineCHContext(const ch_database_t *db);
|
||||
~EngineCHContext();
|
||||
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
};
|
||||
|
||||
/** Chimera Engine for scanning data. */
|
||||
class EngineChimera : public Engine {
|
||||
public:
|
||||
explicit EngineChimera(ch_database_t *db, CompileCHStats cs);
|
||||
~EngineChimera();
|
||||
|
||||
std::unique_ptr<EngineContext> makeContext() const;
|
||||
|
||||
void scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
void scan_vectored(const char *const *data, const unsigned int *len,
|
||||
unsigned int count, unsigned int streamId,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
|
||||
unsigned id) const;
|
||||
|
||||
void streamClose(std::unique_ptr<EngineStream> stream,
|
||||
ResultEntry &result) const;
|
||||
|
||||
void streamCompressExpand(EngineStream &stream,
|
||||
std::vector<char> &temp) const;
|
||||
|
||||
void streamScan(EngineStream &stream, const char *data, unsigned int len,
|
||||
unsigned int id, ResultEntry &result) const;
|
||||
|
||||
void printStats() const;
|
||||
|
||||
void sqlStats(SqlDB &db) const;
|
||||
|
||||
private:
|
||||
ch_database_t *db;
|
||||
CompileCHStats compile_stats;
|
||||
};
|
||||
|
||||
std::unique_ptr<EngineChimera>
|
||||
buildEngineChimera(const ExpressionMap &expressions, const std::string &name,
|
||||
const std::string &sigs_name);
|
||||
|
||||
#endif // ENGINECHIMERA_H
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -57,20 +57,22 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
EngineContext::EngineContext(const hs_database_t *db) {
|
||||
EngineHSContext::EngineHSContext(const hs_database_t *db) {
|
||||
hs_alloc_scratch(db, &scratch);
|
||||
assert(scratch);
|
||||
}
|
||||
|
||||
EngineContext::~EngineContext() {
|
||||
EngineHSContext::~EngineHSContext() {
|
||||
hs_free_scratch(scratch);
|
||||
}
|
||||
|
||||
EngineHSStream::~EngineHSStream() { }
|
||||
|
||||
namespace /* anonymous */ {
|
||||
|
||||
/** Scan context structure passed to the onMatch callback function. */
|
||||
struct ScanContext {
|
||||
ScanContext(unsigned id_in, ResultEntry &result_in,
|
||||
struct ScanHSContext {
|
||||
ScanHSContext(unsigned id_in, ResultEntry &result_in,
|
||||
const EngineStream *stream_in)
|
||||
: id(id_in), result(result_in), stream(stream_in) {}
|
||||
unsigned id;
|
||||
@ -85,9 +87,9 @@ struct ScanContext {
|
||||
* "echo matches" is off.
|
||||
*/
|
||||
static
|
||||
int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int,
|
||||
void *ctx) {
|
||||
ScanContext *sc = static_cast<ScanContext *>(ctx);
|
||||
int HS_CDECL onMatch(unsigned int, unsigned long long,
|
||||
unsigned long long, unsigned int, void *ctx) {
|
||||
ScanHSContext *sc = static_cast<ScanHSContext *>(ctx);
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
@ -99,9 +101,9 @@ int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int,
|
||||
* matches" is enabled.
|
||||
*/
|
||||
static
|
||||
int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
|
||||
unsigned int, void *ctx) {
|
||||
ScanContext *sc = static_cast<ScanContext *>(ctx);
|
||||
int HS_CDECL onMatchEcho(unsigned int id, unsigned long long,
|
||||
unsigned long long to, unsigned int, void *ctx) {
|
||||
ScanHSContext *sc = static_cast<ScanHSContext *>(ctx);
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
@ -114,7 +116,7 @@ int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
|
||||
return 0;
|
||||
}
|
||||
|
||||
EngineHyperscan::EngineHyperscan(hs_database_t *db_in, CompileStats cs)
|
||||
EngineHyperscan::EngineHyperscan(hs_database_t *db_in, CompileHSStats cs)
|
||||
: db(db_in), compile_stats(std::move(cs)) {
|
||||
assert(db);
|
||||
}
|
||||
@ -124,14 +126,15 @@ EngineHyperscan::~EngineHyperscan() {
|
||||
}
|
||||
|
||||
unique_ptr<EngineContext> EngineHyperscan::makeContext() const {
|
||||
return ue2::make_unique<EngineContext>(db);
|
||||
return ue2::make_unique<EngineHSContext>(db);
|
||||
}
|
||||
|
||||
void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ctx) const {
|
||||
ResultEntry &result, EngineContext &ectx) const {
|
||||
assert(data);
|
||||
|
||||
ScanContext sc(id, result, nullptr);
|
||||
EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
|
||||
ScanHSContext sc(id, result, nullptr);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
hs_error_t rv = hs_scan(db, data, len, 0, ctx.scratch, callback, &sc);
|
||||
|
||||
@ -144,11 +147,12 @@ void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id,
|
||||
void EngineHyperscan::scan_vectored(const char *const *data,
|
||||
const unsigned int *len, unsigned int count,
|
||||
unsigned streamId, ResultEntry &result,
|
||||
EngineContext &ctx) const {
|
||||
EngineContext &ectx) const {
|
||||
assert(data);
|
||||
assert(len);
|
||||
|
||||
ScanContext sc(streamId, result, nullptr);
|
||||
EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
|
||||
ScanHSContext sc(streamId, result, nullptr);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
hs_error_t rv =
|
||||
hs_scan_vector(db, data, len, count, 0, ctx.scratch, callback, &sc);
|
||||
@ -159,9 +163,10 @@ void EngineHyperscan::scan_vectored(const char *const *data,
|
||||
}
|
||||
}
|
||||
|
||||
unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ctx,
|
||||
unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ectx,
|
||||
unsigned streamId) const {
|
||||
auto stream = ue2::make_unique<EngineStream>();
|
||||
EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
|
||||
auto stream = ue2::make_unique<EngineHSStream>();
|
||||
stream->ctx = &ctx;
|
||||
|
||||
hs_open_stream(db, 0, &stream->id);
|
||||
@ -170,17 +175,18 @@ unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ctx,
|
||||
return nullptr;
|
||||
}
|
||||
stream->sn = streamId;
|
||||
return stream;
|
||||
return move(stream);
|
||||
}
|
||||
|
||||
void EngineHyperscan::streamClose(unique_ptr<EngineStream> stream,
|
||||
ResultEntry &result) const {
|
||||
assert(stream);
|
||||
|
||||
auto &s = static_cast<EngineStream &>(*stream);
|
||||
EngineContext &ctx = *s.ctx;
|
||||
auto &s = static_cast<EngineHSStream &>(*stream);
|
||||
EngineContext &ectx = *s.ctx;
|
||||
EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
|
||||
|
||||
ScanContext sc(0, result, &s);
|
||||
ScanHSContext sc(0, result, &s);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
|
||||
assert(s.id);
|
||||
@ -193,10 +199,10 @@ void EngineHyperscan::streamScan(EngineStream &stream, const char *data,
|
||||
ResultEntry &result) const {
|
||||
assert(data);
|
||||
|
||||
auto &s = static_cast<EngineStream &>(stream);
|
||||
EngineContext &ctx = *s.ctx;
|
||||
auto &s = static_cast<EngineHSStream &>(stream);
|
||||
EngineHSContext &ctx = *s.ctx;
|
||||
|
||||
ScanContext sc(id, result, &s);
|
||||
ScanHSContext sc(id, result, &s);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
hs_error_t rv =
|
||||
hs_scan_stream(s.id, data, len, 0, ctx.scratch, callback, &sc);
|
||||
@ -210,11 +216,12 @@ void EngineHyperscan::streamScan(EngineStream &stream, const char *data,
|
||||
void EngineHyperscan::streamCompressExpand(EngineStream &stream,
|
||||
vector<char> &temp) const {
|
||||
size_t used = 0;
|
||||
hs_error_t err = hs_compress_stream(stream.id, temp.data(), temp.size(),
|
||||
auto &s = static_cast<EngineHSStream &>(stream);
|
||||
hs_error_t err = hs_compress_stream(s.id, temp.data(), temp.size(),
|
||||
&used);
|
||||
if (err == HS_INSUFFICIENT_SPACE) {
|
||||
temp.resize(used);
|
||||
err = hs_compress_stream(stream.id, temp.data(), temp.size(), &used);
|
||||
err = hs_compress_stream(s.id, temp.data(), temp.size(), &used);
|
||||
}
|
||||
|
||||
if (err != HS_SUCCESS) {
|
||||
@ -223,10 +230,10 @@ void EngineHyperscan::streamCompressExpand(EngineStream &stream,
|
||||
}
|
||||
|
||||
if (printCompressSize) {
|
||||
printf("stream %u: compressed to %zu\n", stream.sn, used);
|
||||
printf("stream %u: compressed to %zu\n", s.sn, used);
|
||||
}
|
||||
|
||||
err = hs_reset_and_expand_stream(stream.id, temp.data(), temp.size(),
|
||||
err = hs_reset_and_expand_stream(s.id, temp.data(), temp.size(),
|
||||
nullptr, nullptr, nullptr);
|
||||
|
||||
if (err != HS_SUCCESS) {
|
||||
@ -243,15 +250,30 @@ void EngineHyperscan::printStats() const {
|
||||
}
|
||||
printf("Signatures: %s\n", compile_stats.signatures.c_str());
|
||||
printf("Hyperscan info: %s\n", compile_stats.db_info.c_str());
|
||||
#ifndef _WIN32
|
||||
printf("Expression count: %'zu\n", compile_stats.expressionCount);
|
||||
printf("Bytecode size: %'zu bytes\n", compile_stats.compiledSize);
|
||||
#else
|
||||
printf("Expression count: %zu\n", compile_stats.expressionCount);
|
||||
printf("Bytecode size: %zu bytes\n", compile_stats.compiledSize);
|
||||
#endif
|
||||
printf("Database CRC: 0x%x\n", compile_stats.crc32);
|
||||
if (compile_stats.streaming) {
|
||||
#ifndef _WIN32
|
||||
printf("Stream state size: %'zu bytes\n", compile_stats.streamSize);
|
||||
#else
|
||||
printf("Stream state size: %zu bytes\n", compile_stats.streamSize);
|
||||
#endif
|
||||
}
|
||||
#ifndef _WIN32
|
||||
printf("Scratch size: %'zu bytes\n", compile_stats.scratchSize);
|
||||
printf("Compile time: %'0.3Lf seconds\n", compile_stats.compileSecs);
|
||||
printf("Peak heap usage: %'u bytes\n", compile_stats.peakMemorySize);
|
||||
#else
|
||||
printf("Scratch size: %zu bytes\n", compile_stats.scratchSize);
|
||||
printf("Compile time: %0.3Lf seconds\n", compile_stats.compileSecs);
|
||||
printf("Peak heap usage: %u bytes\n", compile_stats.peakMemorySize);
|
||||
#endif
|
||||
}
|
||||
|
||||
void EngineHyperscan::sqlStats(SqlDB &sqldb) const {
|
||||
@ -469,7 +491,7 @@ buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode,
|
||||
hs_free_scratch(scratch);
|
||||
|
||||
// Collect summary information.
|
||||
CompileStats cs;
|
||||
CompileHSStats cs;
|
||||
cs.sigs_name = sigs_name;
|
||||
if (!sigs_name.empty()) {
|
||||
const auto pos = name.find_last_of('/');
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -30,22 +30,15 @@
|
||||
#define ENGINEHYPERSCAN_H
|
||||
|
||||
#include "expressions.h"
|
||||
#include "common.h"
|
||||
#include "sqldb.h"
|
||||
#include "engine.h"
|
||||
#include "hs_runtime.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/** Structure for the result of a single complete scan. */
|
||||
struct ResultEntry {
|
||||
double seconds = 0; //!< Time taken for scan.
|
||||
unsigned int matches = 0; //!< Count of matches found.
|
||||
};
|
||||
|
||||
/** Infomation about the database compile */
|
||||
struct CompileStats {
|
||||
struct CompileHSStats {
|
||||
std::string sigs_name;
|
||||
std::string signatures;
|
||||
std::string db_info;
|
||||
@ -60,38 +53,38 @@ struct CompileStats {
|
||||
};
|
||||
|
||||
/** Engine context which is allocated on a per-thread basis. */
|
||||
class EngineContext {
|
||||
class EngineHSContext : public EngineContext {
|
||||
public:
|
||||
explicit EngineContext(const hs_database_t *db);
|
||||
~EngineContext();
|
||||
explicit EngineHSContext(const hs_database_t *db);
|
||||
~EngineHSContext();
|
||||
|
||||
hs_scratch_t *scratch = nullptr;
|
||||
};
|
||||
|
||||
/** Streaming mode scans have persistent stream state associated with them. */
|
||||
class EngineStream {
|
||||
class EngineHSStream : public EngineStream {
|
||||
public:
|
||||
~EngineHSStream();
|
||||
hs_stream_t *id;
|
||||
unsigned int sn;
|
||||
EngineContext *ctx;
|
||||
EngineHSContext *ctx;
|
||||
};
|
||||
|
||||
/** Hyperscan Engine for scanning data. */
|
||||
class EngineHyperscan {
|
||||
class EngineHyperscan : public Engine {
|
||||
public:
|
||||
explicit EngineHyperscan(hs_database_t *db, CompileStats cs);
|
||||
explicit EngineHyperscan(hs_database_t *db, CompileHSStats cs);
|
||||
~EngineHyperscan();
|
||||
|
||||
std::unique_ptr<EngineContext> makeContext() const;
|
||||
|
||||
void scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ctx) const;
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
void scan_vectored(const char *const *data, const unsigned int *len,
|
||||
unsigned int count, unsigned int streamId,
|
||||
ResultEntry &result, EngineContext &ctx) const;
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
std::unique_ptr<EngineStream> streamOpen(EngineContext &ctx,
|
||||
std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
|
||||
unsigned id) const;
|
||||
|
||||
void streamClose(std::unique_ptr<EngineStream> stream,
|
||||
@ -109,7 +102,7 @@ public:
|
||||
|
||||
private:
|
||||
hs_database_t *db;
|
||||
CompileStats compile_stats;
|
||||
CompileHSStats compile_stats;
|
||||
};
|
||||
|
||||
namespace ue2 {
|
||||
|
401
tools/hsbench/engine_pcre.cpp
Normal file
401
tools/hsbench/engine_pcre.cpp
Normal file
@ -0,0 +1,401 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef _WIN32
|
||||
#define PCRE_STATIC
|
||||
#endif
|
||||
#include "config.h"
|
||||
|
||||
#include "common.h"
|
||||
#include "engine_pcre.h"
|
||||
#include "heapstats.h"
|
||||
#include "huge.h"
|
||||
#include "sqldb.h"
|
||||
#include "timer.h"
|
||||
|
||||
#include "util/make_unique.h"
|
||||
#include "util/unicode_def.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
using namespace std;
|
||||
|
||||
EnginePCREContext::EnginePCREContext(int capture_cnt) {
|
||||
ovec = (int *)malloc((capture_cnt + 1)* sizeof(int) * 3);
|
||||
}
|
||||
|
||||
EnginePCREContext::~EnginePCREContext() {
|
||||
free(ovec);
|
||||
}
|
||||
|
||||
namespace /* anonymous */ {
|
||||
|
||||
/** Scan context structure passed to the onMatch callback function. */
|
||||
struct ScanPCREContext {
|
||||
ScanPCREContext(unsigned id_in, ResultEntry &result_in)
|
||||
: id(id_in), result(result_in) {}
|
||||
unsigned id;
|
||||
ResultEntry &result;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
/**
|
||||
* Function called for every match that PCRE produces, used when
|
||||
* "echo matches" is off.
|
||||
*/
|
||||
static
|
||||
int onMatch(ScanPCREContext *sc) {
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function called for every match that PCRE produces when "echo
|
||||
* matches" is enabled.
|
||||
*/
|
||||
static
|
||||
int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
|
||||
ScanPCREContext *sc) {
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
printf("Match @%u:%llu for %u\n", sc->id, to, id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
EnginePCRE::EnginePCRE(vector<unique_ptr<PcreDB>> dbs_in, CompilePCREStats cs,
|
||||
int capture_cnt_in)
|
||||
: dbs(move(dbs_in)), compile_stats(move(cs)),
|
||||
capture_cnt(capture_cnt_in) {}
|
||||
|
||||
EnginePCRE::~EnginePCRE() {
|
||||
for (auto &pcreDB : dbs) {
|
||||
free(pcreDB->extra);
|
||||
free(pcreDB->db);
|
||||
}
|
||||
}
|
||||
|
||||
unique_ptr<EngineContext> EnginePCRE::makeContext() const {
|
||||
return ue2::make_unique<EnginePCREContext>(capture_cnt);
|
||||
}
|
||||
|
||||
void EnginePCRE::scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ectx) const {
|
||||
assert(data);
|
||||
|
||||
ScanPCREContext sc(id, result);
|
||||
auto &ctx = static_cast<EnginePCREContext &>(ectx);
|
||||
int *ovec = ctx.ovec;
|
||||
int ovec_size = (capture_cnt + 1) * 3;
|
||||
for (const auto &pcreDB : dbs) {
|
||||
int startoffset = 0;
|
||||
bool utf8 = pcreDB->utf8;
|
||||
bool highlander = pcreDB->highlander;
|
||||
|
||||
int flags = 0;
|
||||
int ret;
|
||||
do {
|
||||
ret = pcre_exec(pcreDB->db, pcreDB->extra, data, len,
|
||||
startoffset, flags, ovec, ovec_size);
|
||||
if (ret <= PCRE_ERROR_NOMATCH) {
|
||||
break;
|
||||
}
|
||||
|
||||
int from = ovec[0];
|
||||
int to = ovec[1];
|
||||
assert(from <= to);
|
||||
|
||||
if (echo_matches) {
|
||||
onMatchEcho(pcreDB->id, from, to, &sc);
|
||||
} else {
|
||||
onMatch(&sc);
|
||||
}
|
||||
|
||||
// If we only wanted a single match, we're done.
|
||||
if (highlander) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Next scan starts at the first codepoint after the match. It's
|
||||
// possible that we have a vacuous match, in which case we must step
|
||||
// past it to ensure that we always progress.
|
||||
if (from != to) {
|
||||
startoffset = to;
|
||||
} else if (utf8) {
|
||||
startoffset = to + 1;
|
||||
while (startoffset < (int)len &&
|
||||
((data[startoffset] & 0xc0) == UTF_CONT_BYTE_HEADER)) {
|
||||
++startoffset;
|
||||
}
|
||||
} else {
|
||||
startoffset = to + 1;
|
||||
}
|
||||
} while (startoffset <= (int)len);
|
||||
|
||||
if (ret < PCRE_ERROR_NOMATCH) {
|
||||
printf("Fatal error: pcre returned error %d\n", ret);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// vectoring scan
|
||||
void EnginePCRE::scan_vectored(UNUSED const char *const *data,
|
||||
UNUSED const unsigned int *len,
|
||||
UNUSED unsigned int count,
|
||||
UNUSED unsigned int streamId,
|
||||
UNUSED ResultEntry &result,
|
||||
UNUSED EngineContext &ectx) const {
|
||||
printf("PCRE matcher can't support vectored mode.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
unique_ptr<EngineStream> EnginePCRE::streamOpen(UNUSED EngineContext &ectx,
|
||||
UNUSED unsigned id) const {
|
||||
printf("PCRE matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EnginePCRE::streamClose(UNUSED unique_ptr<EngineStream> stream,
|
||||
UNUSED ResultEntry &result) const {
|
||||
printf("PCRE matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EnginePCRE::streamScan(UNUSED EngineStream &stream,
|
||||
UNUSED const char *data,
|
||||
UNUSED unsigned len, UNUSED unsigned id,
|
||||
UNUSED ResultEntry &result) const {
|
||||
printf("PCRE matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EnginePCRE::streamCompressExpand(UNUSED EngineStream &stream,
|
||||
UNUSED vector<char> &temp) const {
|
||||
printf("PCRE matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EnginePCRE::printStats() const {
|
||||
// Output summary information.
|
||||
if (!compile_stats.sigs_name.empty()) {
|
||||
printf("Signature set: %s\n", compile_stats.sigs_name.c_str());
|
||||
}
|
||||
printf("Signatures: %s\n", compile_stats.signatures.c_str());
|
||||
printf("PCRE info: %s\n", compile_stats.db_info.c_str());
|
||||
#ifndef _WIN32
|
||||
printf("Expression count: %'zu\n", compile_stats.expressionCount);
|
||||
printf("Bytecode size: %'zu bytes\n", compile_stats.compiledSize);
|
||||
printf("Scratch size: %'zu bytes\n", compile_stats.scratchSize);
|
||||
printf("Compile time: %'0.3Lf seconds\n", compile_stats.compileSecs);
|
||||
printf("Peak heap usage: %'u bytes\n", compile_stats.peakMemorySize);
|
||||
#else
|
||||
printf("Expression count: %zu\n", compile_stats.expressionCount);
|
||||
printf("Bytecode size: %zu bytes\n", compile_stats.compiledSize);
|
||||
printf("Scratch size: %zu bytes\n", compile_stats.scratchSize);
|
||||
printf("Compile time: %0.3Lf seconds\n", compile_stats.compileSecs);
|
||||
printf("Peak heap usage: %u bytes\n", compile_stats.peakMemorySize);
|
||||
#endif
|
||||
}
|
||||
|
||||
void EnginePCRE::sqlStats(SqlDB &sqldb) const {
|
||||
ostringstream crc;
|
||||
|
||||
static const string Q =
|
||||
"INSERT INTO Compile ("
|
||||
"sigsName, signatures, dbInfo, exprCount, dbSize, crc,"
|
||||
"scratchSize, compileSecs, peakMemory) "
|
||||
"VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)";
|
||||
|
||||
sqldb.insert_all(Q, compile_stats.sigs_name, compile_stats.signatures,
|
||||
compile_stats.db_info, compile_stats.expressionCount,
|
||||
compile_stats.compiledSize, crc.str(),
|
||||
compile_stats.scratchSize, compile_stats.compileSecs,
|
||||
compile_stats.peakMemorySize);
|
||||
}
|
||||
|
||||
static
|
||||
bool decodeExprPCRE(string &expr, unsigned *flags, struct PcreDB &db) {
|
||||
if (expr[0] != '/') {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t end = expr.find_last_of('/');
|
||||
if (end == string::npos) {
|
||||
return false;
|
||||
}
|
||||
string strFlags = expr.substr(end + 1, expr.length() - end - 1);
|
||||
|
||||
// strip starting and trailing slashes and the flags
|
||||
expr.erase(end, expr.length() - end);
|
||||
expr.erase(0, 1);
|
||||
|
||||
// decode the flags
|
||||
*flags = 0;
|
||||
for (size_t i = 0; i != strFlags.length(); ++i) {
|
||||
switch (strFlags[i]) {
|
||||
case 's':
|
||||
*flags |= PCRE_DOTALL;
|
||||
break;
|
||||
case 'm':
|
||||
*flags |= PCRE_MULTILINE;
|
||||
break;
|
||||
case 'i':
|
||||
*flags |= PCRE_CASELESS;
|
||||
break;
|
||||
case '8':
|
||||
*flags |= PCRE_UTF8;
|
||||
db.utf8 = true;
|
||||
break;
|
||||
case 'W':
|
||||
*flags |= PCRE_UCP;
|
||||
break;
|
||||
case 'H':
|
||||
db.highlander = true;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unique_ptr<EnginePCRE>
|
||||
buildEnginePcre(const ExpressionMap &expressions, const string &name,
|
||||
const string &sigs_name) {
|
||||
if (expressions.empty()) {
|
||||
assert(0);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
long double compileSecs = 0.0;
|
||||
size_t compiledSize = 0.0;
|
||||
unsigned int peakMemorySize = 0;
|
||||
string db_info("Version: ");
|
||||
db_info += string(pcre_version());
|
||||
|
||||
vector<unique_ptr<PcreDB>> dbs;
|
||||
int capture_cnt = 0;
|
||||
|
||||
Timer timer;
|
||||
timer.start();
|
||||
|
||||
for (const auto &m : expressions) {
|
||||
string expr(m.second);
|
||||
unsigned int flags = 0;
|
||||
auto pcreDB = ue2::make_unique<PcreDB>();
|
||||
if (!decodeExprPCRE(expr, &flags, *pcreDB)) {
|
||||
printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(),
|
||||
m.first);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const char *errp;
|
||||
int erro;
|
||||
pcre *db = pcre_compile(expr.c_str(), flags, &errp, &erro, NULL);
|
||||
|
||||
if (!db) {
|
||||
printf("Compile error %s\n", errp);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
pcre_extra *extra = pcre_study(db, PCRE_STUDY_JIT_COMPILE, &errp);
|
||||
if (errp) {
|
||||
printf("PCRE could not be studied: %s\n", errp);
|
||||
return nullptr;
|
||||
}
|
||||
if (!extra) {
|
||||
extra = (pcre_extra *)malloc(sizeof(pcre_extra));
|
||||
}
|
||||
int cap = 0; // PCRE_INFO_CAPTURECOUNT demands an int
|
||||
if (pcre_fullinfo(db, extra, PCRE_INFO_CAPTURECOUNT, &cap)) {
|
||||
printf("PCRE fullinfo error\n");
|
||||
free(extra);
|
||||
free(db);
|
||||
return nullptr;
|
||||
}
|
||||
assert(cap >= 0);
|
||||
capture_cnt = max(capture_cnt, cap);
|
||||
|
||||
size_t db_size = 0;
|
||||
if (pcre_fullinfo(db, extra, PCRE_INFO_SIZE, &db_size)) {
|
||||
printf("PCRE fullinfo error\n");
|
||||
free(extra);
|
||||
free(db);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
size_t study_size = 0;
|
||||
if (pcre_fullinfo(db, extra, PCRE_INFO_STUDYSIZE,
|
||||
&study_size)) {
|
||||
printf("PCRE fullinfo error\n");
|
||||
free(extra);
|
||||
free(db);
|
||||
return nullptr;
|
||||
}
|
||||
compiledSize += db_size + study_size;
|
||||
|
||||
pcreDB->id = m.first;
|
||||
pcreDB->db = db;
|
||||
|
||||
extra->flags =
|
||||
PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
|
||||
extra->match_limit = 10000000;
|
||||
extra->match_limit_recursion = 1500;
|
||||
|
||||
pcreDB->extra = extra;
|
||||
dbs.push_back(move(pcreDB));
|
||||
}
|
||||
|
||||
timer.complete();
|
||||
compileSecs = timer.seconds();
|
||||
peakMemorySize = getPeakHeap();
|
||||
|
||||
// Collect summary information.
|
||||
CompilePCREStats cs;
|
||||
cs.sigs_name = sigs_name;
|
||||
if (!sigs_name.empty()) {
|
||||
const auto pos = name.find_last_of('/');
|
||||
cs.signatures = name.substr(pos + 1);
|
||||
} else {
|
||||
cs.signatures = name;
|
||||
}
|
||||
cs.db_info = db_info;
|
||||
cs.expressionCount = expressions.size();
|
||||
cs.compiledSize = compiledSize;
|
||||
cs.scratchSize = (capture_cnt + 1) * sizeof(int) * 3;
|
||||
cs.compileSecs = compileSecs;
|
||||
cs.peakMemorySize = peakMemorySize;
|
||||
|
||||
return ue2::make_unique<EnginePCRE>(move(dbs), move(cs), capture_cnt);
|
||||
}
|
114
tools/hsbench/engine_pcre.h
Normal file
114
tools/hsbench/engine_pcre.h
Normal file
@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ENGINEPCRE_H
|
||||
#define ENGINEPCRE_H
|
||||
|
||||
#include "expressions.h"
|
||||
#include "engine.h"
|
||||
|
||||
#include <pcre.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/** Infomation about the database compile */
|
||||
struct CompilePCREStats {
|
||||
std::string sigs_name;
|
||||
std::string signatures;
|
||||
std::string db_info;
|
||||
size_t expressionCount = 0;
|
||||
size_t compiledSize = 0;
|
||||
size_t scratchSize = 0;
|
||||
long double compileSecs = 0;
|
||||
unsigned int peakMemorySize = 0;
|
||||
};
|
||||
|
||||
/** Engine context which is allocated on a per-thread basis. */
|
||||
class EnginePCREContext : public EngineContext{
|
||||
public:
|
||||
explicit EnginePCREContext(int capture_cnt);
|
||||
~EnginePCREContext();
|
||||
|
||||
int *ovec = nullptr;
|
||||
};
|
||||
|
||||
struct PcreDB {
|
||||
bool highlander = false;
|
||||
bool utf8 = false;
|
||||
u32 id;
|
||||
pcre *db = nullptr;
|
||||
pcre_extra *extra = nullptr;
|
||||
};
|
||||
|
||||
/** PCRE Engine for scanning data. */
|
||||
class EnginePCRE : public Engine {
|
||||
public:
|
||||
explicit EnginePCRE(std::vector<std::unique_ptr<PcreDB>> dbs_in,
|
||||
CompilePCREStats cs, int capture_cnt_in);
|
||||
~EnginePCRE();
|
||||
|
||||
std::unique_ptr<EngineContext> makeContext() const;
|
||||
|
||||
void scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
void scan_vectored(const char *const *data, const unsigned int *len,
|
||||
unsigned int count, unsigned int streamId,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
|
||||
unsigned id) const;
|
||||
|
||||
void streamClose(std::unique_ptr<EngineStream> stream,
|
||||
ResultEntry &result) const;
|
||||
|
||||
void streamCompressExpand(EngineStream &stream,
|
||||
std::vector<char> &temp) const;
|
||||
|
||||
void streamScan(EngineStream &stream, const char *data, unsigned int len,
|
||||
unsigned int id, ResultEntry &result) const;
|
||||
|
||||
void printStats() const;
|
||||
|
||||
void sqlStats(SqlDB &db) const;
|
||||
|
||||
private:
|
||||
std::vector<std::unique_ptr<PcreDB>> dbs;
|
||||
|
||||
CompilePCREStats compile_stats;
|
||||
|
||||
int capture_cnt;
|
||||
};
|
||||
|
||||
std::unique_ptr<EnginePCRE>
|
||||
buildEnginePcre(const ExpressionMap &expressions, const std::string &name,
|
||||
const std::string &sigs_name);
|
||||
|
||||
#endif // ENGINEPCRE_H
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user