mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge pull request #169 from VectorCamp/feature/backport-hyperscan-2023Q3
Feature/backport hyperscan 2023 q3
This commit is contained in:
commit
e843ac80c9
25
CHANGELOG.md
25
CHANGELOG.md
@ -2,6 +2,31 @@
|
||||
|
||||
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
||||
|
||||
## [5.4.2] 2023-04-19
|
||||
- Roll back bugfix for github issue #350: Besides using scratch for
|
||||
corresponding database, Hyperscan also allows user to use larger scratch
|
||||
allocated for another database. Users can leverage this property to achieve
|
||||
safe scratch usage in multi-database scenarios. Behaviors beyond these are
|
||||
discouraged and results are undefined.
|
||||
- Fix hsdump issue due to invalid nfa type.
|
||||
|
||||
## [5.4.1] 2023-02-20
|
||||
- The Intel Hyperscan team is pleased to provide a bug fix release to our open source library.
|
||||
Intel also maintains an upgraded version available through your Intel sales representative.
|
||||
- Bugfix for issue #184: fix random char value of UTF-8.
|
||||
- Bugfix for issue #291: bypass logical combination flag in hs_expression_info().
|
||||
- Bugfix for issue #292: fix build error due to libc symbol parsing.
|
||||
- Bugfix for issue #302/304: add empty string check for pure literal API.
|
||||
- Bugfix for issue #303: fix unknown instruction error in pure literal API.
|
||||
- Bugfix for issue #303: avoid memory leak in stream close stage.
|
||||
- Bugfix for issue #305: fix assertion failure in DFA construction.
|
||||
- Bugfix for issue #317: fix aligned allocator segment faults.
|
||||
- Bugfix for issue #350: add quick validity check for scratch.
|
||||
- Bugfix for issue #359: fix glibc-2.34 stack size issue.
|
||||
- Bugfix for issue #360: fix SKIP flag issue in chimera.
|
||||
- Bugfix for issue #362: fix one cotec check corner issue in UTF-8 validation.
|
||||
- Fix other compile issues.
|
||||
|
||||
## [5.4.0] 2020-12-31
|
||||
- Improvement on literal matcher "Fat Teddy" performance, including
|
||||
support for Intel(R) AVX-512 Vector Byte Manipulation Instructions (Intel(R)
|
||||
|
@ -474,6 +474,18 @@ if (CXX_UNUSED_CONST_VAR)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable")
|
||||
endif()
|
||||
|
||||
# clang-14 complains about unused-but-set variable.
|
||||
CHECK_CXX_COMPILER_FLAG("-Wunused-but-set-variable" CXX_UNUSED_BUT_SET_VAR)
|
||||
if (CXX_UNUSED_BUT_SET_VAR)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-but-set-variable")
|
||||
endif()
|
||||
|
||||
# clang-14 complains about using bitwise operator instead of logical ones.
|
||||
CHECK_CXX_COMPILER_FLAG("-Wbitwise-instead-of-logical" CXX_BITWISE_INSTEAD_OF_LOGICAL)
|
||||
if (CXX_BITWISE_INSTEAD_OF_LOGICAL)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-bitwise-instead-of-logical")
|
||||
endif()
|
||||
|
||||
# gcc 6 complains about type attributes that get ignored, like alignment
|
||||
CHECK_CXX_COMPILER_FLAG("-Wignored-attributes" CXX_IGNORED_ATTR)
|
||||
if (CXX_IGNORED_ATTR)
|
||||
@ -505,8 +517,10 @@ CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE)
|
||||
|
||||
# gcc 10 complains about this
|
||||
CHECK_C_COMPILER_FLAG("-Wstringop-overflow" CC_STRINGOP_OVERFLOW)
|
||||
if(CC_STRINGOP_OVERFLOW)
|
||||
CHECK_CXX_COMPILER_FLAG("-Wstringop-overflow" CXX_STRINGOP_OVERFLOW)
|
||||
if(CC_STRINGOP_OVERFLOW OR CXX_STRINGOP_OVERFLOW)
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-stringop-overflow")
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-stringop-overflow")
|
||||
endif()
|
||||
|
||||
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
|
||||
@ -626,7 +640,7 @@ endif ()
|
||||
|
||||
set (hs_exec_SRCS
|
||||
${hs_HEADERS}
|
||||
src/hs_version.h
|
||||
src/hs_version.h.in
|
||||
src/ue2common.h
|
||||
src/allocator.h
|
||||
src/crc32.c
|
||||
@ -802,7 +816,7 @@ SET (hs_compile_SRCS
|
||||
src/grey.h
|
||||
src/hs.cpp
|
||||
src/hs_internal.h
|
||||
src/hs_version.h
|
||||
src/hs_version.h.in
|
||||
src/scratch.h
|
||||
src/state.h
|
||||
src/ue2common.h
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2020, Intel Corporation
|
||||
* Copyright (c) 2018-2022, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
|
@ -206,6 +206,10 @@ void makeCFG_top_edge(GoughGraph &cfg, const vector<GoughVertex> &vertices,
|
||||
assert(contains(src_slots, slot_id));
|
||||
|
||||
shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
|
||||
if (!vmin) {
|
||||
assert(0);
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
cfg[e].vars.emplace_back(vmin);
|
||||
final_var = vmin.get();
|
||||
|
||||
@ -317,6 +321,10 @@ void makeCFG_edge(GoughGraph &cfg, const map<u32, u32> &som_creators,
|
||||
DEBUG_PRINTF("bypassing min on join %u\n", slot_id);
|
||||
} else {
|
||||
shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
|
||||
if (!vmin) {
|
||||
assert(0);
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
cfg[e].vars.emplace_back(vmin);
|
||||
final_var = vmin.get();
|
||||
|
||||
|
@ -124,6 +124,10 @@ RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin,
|
||||
const depth &repeatMax, u32 minPeriod)
|
||||
: stateSize(0), packedCtrlSize(0), horizon(0), patchCount(0),
|
||||
patchSize(0), encodingSize(0), patchesOffset(0) {
|
||||
if (type == REPEAT_SPARSE_OPTIMAL_P && minPeriod == 0) {
|
||||
assert(0);
|
||||
throw std::domain_error("SPARSE_OPTIMAL_P must have non-zero minPeriod.");
|
||||
}
|
||||
assert(repeatMin <= repeatMax);
|
||||
assert(repeatMax.is_reachable());
|
||||
assert(minPeriod || type != REPEAT_SPARSE_OPTIMAL_P);
|
||||
|
@ -2445,6 +2445,10 @@ static
|
||||
bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) {
|
||||
ue2_literal lit;
|
||||
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
||||
if (!rhs) {
|
||||
assert(0);
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
if (!ng.cc.grey.allowLitHaig) {
|
||||
return false;
|
||||
}
|
||||
@ -2509,6 +2513,11 @@ bool doHaigLitHaigSom(NG &ng, NGHolder &g,
|
||||
ue2_literal lit;
|
||||
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
||||
shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
|
||||
if (!rhs || !lhs) {
|
||||
assert(0);
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
|
||||
if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) {
|
||||
return false;
|
||||
}
|
||||
|
@ -1036,6 +1036,11 @@ bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg,
|
||||
shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
|
||||
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
||||
|
||||
if (!lhs || !rhs) {
|
||||
assert(0);
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
|
||||
unordered_map<NFAVertex, NFAVertex> lhs_map;
|
||||
unordered_map<NFAVertex, NFAVertex> rhs_map;
|
||||
|
||||
@ -1229,6 +1234,10 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg,
|
||||
DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index);
|
||||
unordered_map<NFAVertex, NFAVertex> temp_map;
|
||||
shared_ptr<NGHolder> new_lhs = make_shared<NGHolder>();
|
||||
if (!new_lhs) {
|
||||
assert(0);
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
splitLHS(h, pivot, new_lhs.get(), &temp_map);
|
||||
|
||||
/* want to cut off paths to pivot from things other than the pivot -
|
||||
@ -1310,6 +1319,10 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg,
|
||||
if (!contains(done_rhs, adj)) {
|
||||
unordered_map<NFAVertex, NFAVertex> temp_map;
|
||||
shared_ptr<NGHolder> new_rhs = make_shared<NGHolder>();
|
||||
if (!new_rhs) {
|
||||
assert(0);
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
splitRHS(h, adj, new_rhs.get(), &temp_map);
|
||||
remove_edge(new_rhs->start, new_rhs->accept, *new_rhs);
|
||||
remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs);
|
||||
@ -2281,6 +2294,10 @@ void splitEdgesForSuffix(const NGHolder &base_graph, RoseInGraph &vg,
|
||||
assert(!splitters.empty());
|
||||
|
||||
shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
|
||||
if (!lhs) {
|
||||
assert(0);
|
||||
throw bad_alloc();
|
||||
}
|
||||
unordered_map<NFAVertex, NFAVertex> v_map;
|
||||
cloneHolder(*lhs, base_graph, &v_map);
|
||||
lhs->kind = NFA_INFIX;
|
||||
|
@ -140,7 +140,8 @@ void ParsedLogical::validateSubIDs(const unsigned *ids,
|
||||
}
|
||||
hs_compile_error_t *compile_err = NULL;
|
||||
hs_expr_info_t *info = NULL;
|
||||
hs_error_t err = hs_expression_info(expressions[i], flags[i], &info,
|
||||
hs_error_t err = hs_expression_info(expressions[i],
|
||||
flags ? flags[i] : 0, &info,
|
||||
&compile_err);
|
||||
if (err != HS_SUCCESS) {
|
||||
hs_free_compile_error(compile_err);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2022, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -72,7 +72,7 @@ bool isValidUtf8(const char *expression, const size_t len) {
|
||||
while (i < len) {
|
||||
DEBUG_PRINTF("byte %zu: 0x%02x\n", i, s[i]);
|
||||
// One octet.
|
||||
if (s[i] < 0x7f) {
|
||||
if (s[i] <= 0x7f) {
|
||||
DEBUG_PRINTF("one octet\n");
|
||||
i++;
|
||||
continue;
|
||||
|
@ -561,6 +561,10 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
|
||||
DEBUG_PRINTF("woot?\n");
|
||||
|
||||
shared_ptr<NGHolder> h_new = make_shared<NGHolder>();
|
||||
if (!h_new) {
|
||||
assert(0);
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
unordered_map<NFAVertex, NFAVertex> rhs_map;
|
||||
vector<NFAVertex> exits_vec;
|
||||
insert(&exits_vec, exits_vec.end(), exits);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
* Copyright (c) 2015-2022, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -1013,6 +1013,7 @@ hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
|
||||
report_eod_matches(id, scratch, onEvent, context);
|
||||
if (unlikely(internal_matching_error(scratch))) {
|
||||
unmarkScratchInUse(scratch);
|
||||
hs_stream_free(id);
|
||||
return HS_UNKNOWN_ERROR;
|
||||
}
|
||||
unmarkScratchInUse(scratch);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
* Copyright (c) 2015-2023, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
* Copyright (c) 2015-2023, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
|
@ -77,7 +77,7 @@ namespace ue2 {
|
||||
struct LitTrieVertexProps {
|
||||
LitTrieVertexProps() = default;
|
||||
explicit LitTrieVertexProps(u8 c_in) : c(c_in) {}
|
||||
size_t index; // managed by ue2_graph
|
||||
size_t index = 0; // managed by ue2_graph
|
||||
u8 c = 0; //!< character reached on this vertex
|
||||
flat_set<ReportID> reports; //!< managed reports fired on this vertex
|
||||
};
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2023, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, Intel Corporation
|
||||
* Copyright (c) 2017-2023, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
|
@ -70,8 +70,8 @@ class undirected_graph_edge_descriptor
|
||||
using base_vertex_type = typename base_graph_traits::vertex_descriptor;
|
||||
|
||||
base_edge_type underlying_edge;
|
||||
const base_graph_type *g;
|
||||
bool reverse; // if true, reverse vertices in source() and target()
|
||||
const base_graph_type *g = nullptr;
|
||||
bool reverse = false; // if true, reverse vertices in source() and target()
|
||||
|
||||
inline std::pair<base_vertex_type, base_vertex_type>
|
||||
canonical_edge() const {
|
||||
|
@ -133,7 +133,7 @@ public:
|
||||
: lit(&lit_in), idx(idx_in) {}
|
||||
|
||||
const ue2_literal *lit = nullptr;
|
||||
size_t idx;
|
||||
size_t idx = 0;
|
||||
};
|
||||
|
||||
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
|
||||
|
@ -58,7 +58,10 @@ void readRow(sqlite3_stmt *statement, vector<DataBlock> &blocks,
|
||||
}
|
||||
auto internal_stream_index = stream_indices[stream_id];
|
||||
|
||||
assert(blob || bytes > 0);
|
||||
if (!(blob && bytes > 0)) {
|
||||
assert(0);
|
||||
throw std::domain_error("Invalid blob or bytes from sqlite3.");
|
||||
}
|
||||
blocks.emplace_back(id, stream_id, internal_stream_index,
|
||||
string(blob, blob + bytes));
|
||||
}
|
||||
|
@ -740,6 +740,11 @@ u64a byte_size(const vector<DataBlock> &corpus_blocks) {
|
||||
total += block.payload.size();
|
||||
}
|
||||
|
||||
if (total == 0) {
|
||||
assert(0);
|
||||
throw std::invalid_argument("Empty corpus.");
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
|
@ -61,7 +61,7 @@ public:
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
if (failed) {
|
||||
// We have previously failed to compile this database.
|
||||
return nullptr;
|
||||
throw CompileFailed("Unable to compile db previously.");
|
||||
}
|
||||
if (db) {
|
||||
return db;
|
||||
|
@ -101,7 +101,7 @@ void NfaGeneratedCorpora::generate(unsigned id, vector<Corpus> &data) {
|
||||
pl.logicalKeyRenumber();
|
||||
const auto &m_lkey = pl.getLkeyMap();
|
||||
assert(!m_lkey.empty());
|
||||
u32 a_subid; // arbitrary sub id
|
||||
u32 a_subid = 0; // arbitrary sub id
|
||||
unordered_map<u32, vector<Corpus>> m_data;
|
||||
for (const auto &it : m_lkey) {
|
||||
a_subid = it.first;
|
||||
|
@ -98,6 +98,6 @@ void *Thread::runThread(void *thr) {
|
||||
}
|
||||
|
||||
|
||||
Thread::Thread(size_t num) : thread_id(num) {}
|
||||
Thread::Thread(size_t num) : thread_id(num), thread() {}
|
||||
|
||||
Thread::~Thread() {}
|
||||
|
@ -499,8 +499,8 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
|
||||
} else if (in_corpora) {
|
||||
corpora->push_back(optarg);
|
||||
in_corpora = 2;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2021, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -363,8 +363,9 @@ static const unsigned validModes[] = {
|
||||
// Mode bits for switching off various architecture features
|
||||
static const unsigned long long featureMask[] = {
|
||||
~0ULL, /* native */
|
||||
~(HS_CPU_FEATURES_AVX2 | HS_CPU_FEATURES_AVX512), /* no avx2 */
|
||||
~HS_CPU_FEATURES_AVX512, /* no avx512 */
|
||||
~(HS_CPU_FEATURES_AVX2 | HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX512VBMI), /* no avx2 */
|
||||
~(HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX512VBMI), /* no avx512 */
|
||||
~HS_CPU_FEATURES_AVX512VBMI, /* no avx512vbmi */
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Single,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2021, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -56,6 +56,10 @@ TEST(DB, flagsToPlatform) {
|
||||
p.cpu_features |= HS_CPU_FEATURES_AVX512;
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
p.cpu_features |= HS_CPU_FEATURES_AVX512VBMI;
|
||||
#endif
|
||||
|
||||
platform_t pp = target_to_platform(target_t(p));
|
||||
ASSERT_EQ(pp, hs_current_platform);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2022, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -64,8 +64,8 @@ static ValidUtf8TestInfo valid_utf8_tests[] = {
|
||||
{"공동경비구역", true},
|
||||
{"জলসাঘর", true},
|
||||
|
||||
// Invalid one-byte caseS.
|
||||
{"\x7f", false},
|
||||
// Valid one-byte caseS.
|
||||
{"\x7f", true}, // \x7f is valid
|
||||
|
||||
// These bytes should never appear in a UTF-8 stream.
|
||||
{"\xc0", false},
|
||||
|
@ -42,7 +42,7 @@ CorpusProperties::CorpusProperties()
|
||||
: matchness(100), unmatchness(0), randomness(0), prefixRange(0, 0),
|
||||
suffixRange(0, 0), cycleMin(1), cycleMax(1),
|
||||
corpusLimit(DEFAULT_CORPUS_GENERATOR_LIMIT), editDistance(0),
|
||||
alphabetSize(~0) {
|
||||
alphabetSize(~0), rngSeed(0) {
|
||||
// empty
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user