mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
1305 lines
39 KiB
C++
1305 lines
39 KiB
C++
/*
|
|
* Copyright (c) 2015-2019, Intel Corporation
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "config.h"
|
|
|
|
#include "ResultSet.h"
|
|
#include "UltimateTruth.h"
|
|
#include "util/database_util.h"
|
|
#include "util/ExpressionParser.h"
|
|
#include "util/string_util.h"
|
|
|
|
#include "ue2common.h"
|
|
#include "common.h"
|
|
#include "crc32.h"
|
|
#include "hs.h"
|
|
#include "hs_internal.h"
|
|
|
|
#include "scratch.h"
|
|
#include "nfa/nfa_api_queue.h"
|
|
#include "rose/rose_internal.h"
|
|
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <fstream>
|
|
#include <iomanip>
|
|
#include <iostream>
|
|
#include <map>
|
|
#include <set>
|
|
#include <sstream>
|
|
#include <unordered_set>
|
|
#include <vector>
|
|
|
|
#include <boost/ptr_container/ptr_vector.hpp>
|
|
|
|
using namespace std;
|
|
using namespace ue2;
|
|
using boost::ptr_vector;
|
|
|
|
#ifndef RELEASE_BUILD
|
|
|
|
#include "database.h"
|
|
#include "state.h"
|
|
|
|
static
|
|
hs_error_t open_magic_stream(const hs_database_t *db, unsigned flags,
|
|
hs_stream_t **stream, hs_scratch_t *scratch,
|
|
unsigned long long start_offset) {
|
|
hs_error_t ret = hs_open_stream(db, flags, stream);
|
|
if (ret != HS_SUCCESS) {
|
|
return ret;
|
|
}
|
|
|
|
const char dummy_data[100] = { 0 };
|
|
UNUSED const struct RoseEngine *rose
|
|
= (const struct RoseEngine *)hs_get_bytecode(db);
|
|
assert(sizeof(dummy_data) >= rose->historyRequired);
|
|
hs_scan_stream(*stream, dummy_data, MIN(start_offset, sizeof(dummy_data)), 0,
|
|
scratch, nullptr, nullptr);
|
|
(*stream)->offset = start_offset;
|
|
return ret;
|
|
}
|
|
|
|
#endif // RELEASE_BUILD
|
|
|
|
class BaseDB : boost::noncopyable {
|
|
public:
|
|
// Constructor takes iterators over a container of pattern IDs.
|
|
template <class Iter>
|
|
BaseDB(Iter ids_begin, Iter ids_end)
|
|
: ids(ids_begin, ids_end) {}
|
|
|
|
virtual ~BaseDB();
|
|
|
|
// The set of expression IDs that must return their matches in order.
|
|
unordered_set<unsigned> ordered;
|
|
|
|
// The complete set of expression IDs associated with this database.
|
|
unordered_set<unsigned> ids;
|
|
};
|
|
|
|
BaseDB::~BaseDB() { }
|
|
|
|
class HyperscanDB : public BaseDB {
|
|
public:
|
|
// Constructor takes iterators over a container of pattern IDs.
|
|
template <class Iter>
|
|
HyperscanDB(hs_database_t *db_in, Iter ids_begin, Iter ids_end)
|
|
: BaseDB(ids_begin, ids_end), db(db_in) {}
|
|
|
|
~HyperscanDB();
|
|
|
|
// Underlying Hyperscan database pointer.
|
|
hs_database_t *db;
|
|
};
|
|
|
|
HyperscanDB::~HyperscanDB() {
|
|
hs_free_database(db);
|
|
}
|
|
|
|
#ifdef HS_HYBRID
|
|
|
|
class HybridDB : public BaseDB {
|
|
public:
|
|
// Constructor takes iterators over a container of pattern IDs.
|
|
template <class Iter>
|
|
HybridDB(ch_database_t *db_in, Iter ids_begin, Iter ids_end)
|
|
: BaseDB(ids_begin, ids_end), db(db_in) {}
|
|
|
|
~HybridDB();
|
|
|
|
// Underlying Hyperscan database pointer.
|
|
ch_database_t *db;
|
|
};
|
|
|
|
HybridDB::~HybridDB() {
|
|
ch_free_database(db);
|
|
}
|
|
|
|
#endif // HS_HYBRID
|
|
|
|
// Used to track the ID and result set.
|
|
namespace {
|
|
struct MultiContext {
|
|
MultiContext(unsigned int id_in, const BaseDB &db_in, ResultSet *rs_in,
|
|
bool single_in, ostream &os)
|
|
: id(id_in), db(db_in), rs(rs_in), single(single_in), out(os) {}
|
|
unsigned int id;
|
|
int block = 0;
|
|
const BaseDB &db;
|
|
ResultSet *rs;
|
|
u64a lastRawMatch = 0; /* store last known unadjusted match location */
|
|
u64a lastOrderMatch = 0;
|
|
bool single;
|
|
bool use_max_offset = false;
|
|
unsigned long long max_offset = 0; /* don't record matches beyond this */
|
|
bool terminated = false; //!< user has instructed us to stop
|
|
bool in_scan_call = false;
|
|
ostream &out;
|
|
};
|
|
}
|
|
|
|
// Callback used for all (both single and multi-mode) scans.
|
|
static
|
|
int HS_CDECL callbackMulti(unsigned int id, unsigned long long from,
|
|
unsigned long long to,
|
|
UNUSED unsigned int flags, void *ctx) {
|
|
MultiContext *mctx = static_cast<MultiContext *>(ctx);
|
|
assert(mctx);
|
|
assert(mctx->rs);
|
|
assert(mctx->in_scan_call);
|
|
|
|
ostream &out = mctx->out;
|
|
|
|
// Sanity check: in single mode, we'd better not be getting matches for the
|
|
// wrong ID!
|
|
if (mctx->single && id != mctx->id) {
|
|
out << "UE2 Match @ (" << from << "," << to << ") for " << id
|
|
<< " which is not the id we're looking for" << endl;
|
|
mctx->rs->invalid_id = true;
|
|
return 1;
|
|
}
|
|
|
|
// In any mode, we should NEVER get a match from an ID outside our known set.
|
|
if (mctx->db.ids.find(id) == mctx->db.ids.end()) {
|
|
out << "UE2 Match @ (" << from << "," << to << ") for " << id
|
|
<< " which is not in the pattern set" << endl;
|
|
mctx->rs->invalid_id = true;
|
|
return 1;
|
|
}
|
|
|
|
if (mctx->terminated) {
|
|
out << "UE2 Match @ (" << from << "," << to << ") for " << id
|
|
<< " after termination" << endl;
|
|
mctx->rs->match_after_halt = true;
|
|
}
|
|
|
|
#ifndef RELEASE_BUILD
|
|
unsigned int adjustment = flags & HS_MATCH_FLAG_ADJUSTED ? 1 : 0;
|
|
if (mctx->lastRawMatch > to + adjustment) {
|
|
out << "UE2 Match @ (" << from << "," << to << ") for " << id
|
|
<< " unordered" << endl;
|
|
mctx->rs->uoom = true;
|
|
}
|
|
mctx->lastRawMatch = to + adjustment;
|
|
#endif
|
|
|
|
if (mctx->db.ordered.find(id) != mctx->db.ordered.end()) {
|
|
if (mctx->lastOrderMatch > to) {
|
|
out << "UE2 Match @ (" << from << "," << to << ") for " << id
|
|
<< " unordered" << endl;
|
|
mctx->rs->uoom = true;
|
|
}
|
|
mctx->lastOrderMatch = to;
|
|
}
|
|
|
|
if (mctx->use_max_offset && to > mctx->max_offset) {
|
|
if (echo_matches) {
|
|
out << "UE2 Match @ (" << from << "," << to << ") for " << id
|
|
<< " ignored" << endl;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
if (to - g_streamOffset < g_corpora_prefix.size()) {
|
|
if (echo_matches) {
|
|
out << "UE2 Match @ (" << from << "," << to << ") for " << id
|
|
<< " too early" << endl;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
u64a offsetDelta = g_corpora_prefix.size() + g_streamOffset;
|
|
|
|
if (from) {
|
|
// from only set in SOM mode, otherwise zero. If we wanted to be REALLY
|
|
// principled about this, we'd probably want to stash the flags
|
|
// somewhere at compile time.
|
|
from -= (from > offsetDelta ? offsetDelta : from);
|
|
}
|
|
|
|
to -= offsetDelta;
|
|
|
|
if (echo_matches) {
|
|
out << "UE2 Match @ (" << from << "," << to << ") for " << id << endl;
|
|
}
|
|
|
|
if (mctx->single || id == mctx->id) {
|
|
mctx->rs->addMatch(from, to, mctx->block);
|
|
if (limit_matches && mctx->rs->matches.size() == limit_matches) {
|
|
if (echo_matches) {
|
|
out << "Terminating matching (hit match limit)" << endl;
|
|
}
|
|
mctx->terminated = true;
|
|
return 1; // terminate matching.
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#ifdef HS_HYBRID
|
|
|
|
// Hybrid matcher callback.
|
|
static
|
|
ch_callback_t HS_CDECL callbackHybrid(unsigned id, unsigned long long from,
|
|
unsigned long long to, unsigned, unsigned size,
|
|
const ch_capture_t *captured, void *ctx) {
|
|
MultiContext *mctx = static_cast<MultiContext *>(ctx);
|
|
assert(mctx);
|
|
assert(mctx->rs);
|
|
assert(mctx->in_scan_call);
|
|
|
|
ostream &out = mctx->out;
|
|
|
|
to -= g_corpora_prefix.size();
|
|
|
|
if (mctx->terminated) {
|
|
out << "UE2 Match @ (" << from << "," << to << ") for " << id
|
|
<< " after termination" << endl;
|
|
mctx->rs->match_after_halt = true;
|
|
}
|
|
|
|
if (mctx->single || id == mctx->id) {
|
|
CaptureVec cap;
|
|
for (unsigned int i = 0; i < size; i++) {
|
|
if (!(captured[i].flags & CH_CAPTURE_FLAG_ACTIVE)) {
|
|
cap.push_back(make_pair(-1, -1));
|
|
} else {
|
|
cap.push_back(make_pair(captured[i].from, captured[i].to));
|
|
}
|
|
}
|
|
mctx->rs->addMatch(from, to, cap);
|
|
}
|
|
|
|
if (echo_matches) {
|
|
out << "Match @ [" << from << "," << to << "] for " << id << endl;
|
|
out << " Captured " << size << " groups: ";
|
|
for (unsigned int i = 0; i < size; i++) {
|
|
if (!(captured[i].flags & CH_CAPTURE_FLAG_ACTIVE)) {
|
|
out << "{} ";
|
|
} else {
|
|
out << "{" << captured[i].from << "," << captured[i].to << "} ";
|
|
}
|
|
}
|
|
out << endl;
|
|
}
|
|
|
|
if (limit_matches && mctx->rs->matches.size() == limit_matches) {
|
|
mctx->terminated = true;
|
|
return CH_CALLBACK_TERMINATE;
|
|
}
|
|
|
|
return CH_CALLBACK_CONTINUE;
|
|
}
|
|
|
|
// Hybrid matcher error callback.
|
|
static
|
|
ch_callback_t HS_CDECL errorCallback(UNUSED ch_error_event_t errorType,
|
|
UNUSED unsigned int id, void *,
|
|
void *ctx) {
|
|
UNUSED MultiContext *mctx = static_cast<MultiContext *>(ctx);
|
|
assert(mctx);
|
|
assert(mctx->rs);
|
|
assert(mctx->in_scan_call);
|
|
|
|
return CH_CALLBACK_SKIP_PATTERN;
|
|
}
|
|
|
|
#endif // HS_HYBRID
|
|
|
|
static
|
|
void filterLeftmostSom(ResultSet &rs) {
|
|
if (rs.matches.size() <= 1) {
|
|
return;
|
|
}
|
|
|
|
set<u64a> seen; // End offsets.
|
|
auto it = rs.matches.begin();
|
|
while (it != rs.matches.end()) {
|
|
if (seen.insert(it->to).second) {
|
|
++it; // First time we've seen this end-offset.
|
|
} else {
|
|
rs.matches.erase(it++);
|
|
}
|
|
}
|
|
}
|
|
|
|
UltimateTruth::UltimateTruth(ostream &os, const ExpressionMap &expr,
|
|
const hs_platform_info_t *plat,
|
|
const Grey &grey_in, unsigned int streamBlocks)
|
|
: grey(grey_in), out(os), m_expr(expr), m_xcompile(false),
|
|
m_streamBlocks(streamBlocks), scratch(nullptr),
|
|
#ifdef HS_HYBRID
|
|
chimeraScratch(nullptr),
|
|
#endif
|
|
platform(plat) {
|
|
// Build our mode flags.
|
|
|
|
switch (colliderMode) {
|
|
case MODE_STREAMING:
|
|
m_mode = HS_MODE_STREAM;
|
|
break;
|
|
case MODE_BLOCK:
|
|
m_mode = HS_MODE_BLOCK;
|
|
break;
|
|
case MODE_VECTORED:
|
|
m_mode = HS_MODE_VECTORED;
|
|
break;
|
|
case MODE_HYBRID:
|
|
m_mode = 0;
|
|
break;
|
|
}
|
|
|
|
// Set desired SOM precision, if we're in streaming mode.
|
|
if (colliderMode == MODE_STREAMING) {
|
|
m_mode |= somPrecisionMode;
|
|
}
|
|
|
|
#ifdef HS_HYBRID
|
|
if (colliderMode == MODE_HYBRID && !no_groups) {
|
|
m_mode |= CH_MODE_GROUPS;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
UltimateTruth::~UltimateTruth() {
|
|
#ifdef HS_HYBRID
|
|
ch_free_scratch(chimeraScratch);
|
|
#endif
|
|
hs_free_scratch(scratch);
|
|
}
|
|
|
|
static
|
|
void mangle_scratch(hs_scratch_t *scratch) {
|
|
/* Use our knowledge of the internals of scratch to make a mess */
|
|
|
|
memset(&scratch->tctxt, 0xc0, sizeof(scratch->tctxt));
|
|
memset(scratch->bstate, 0xd0, scratch->bStateSize);
|
|
memset(scratch->tstate, 0xe0, scratch->tStateSize);
|
|
memset(scratch->fullState, 0xf0, scratch->fullStateSize);
|
|
|
|
for (u32 i = 0; i < scratch->queueCount; i++) {
|
|
struct mq *q = &scratch->queues[i];
|
|
memset(q, 0x01, sizeof(*q));
|
|
q->scratch = scratch;
|
|
}
|
|
|
|
memset(scratch->aqa, 0xb0, scratch->activeQueueArraySize);
|
|
for (u32 i = 0; i < DELAY_SLOT_COUNT; i++) {
|
|
memset(scratch->delay_slots[i], 0x05, scratch->delay_fatbit_size);
|
|
}
|
|
|
|
memset(scratch->catchup_pq.qm, 0x06,
|
|
scratch->queueCount * sizeof(struct queue_match));
|
|
scratch->catchup_pq.qm_size = 45;
|
|
memset(&scratch->core_info, 0x07, sizeof(scratch->core_info));
|
|
memset(scratch->deduper.som_start_log[0], 0x90,
|
|
sizeof(u64a) * scratch->deduper.dkey_count);
|
|
memset(scratch->deduper.som_start_log[1], 0x09,
|
|
sizeof(u64a) * scratch->deduper.dkey_count);
|
|
memset(scratch->deduper.log[0], 0xa0, scratch->deduper.log_size);
|
|
memset(scratch->deduper.log[1], 0x0a, scratch->deduper.log_size);
|
|
memset(scratch->deduper.som_log[0], 0xd0, scratch->deduper.log_size);
|
|
memset(scratch->deduper.som_log[1], 0x0d, scratch->deduper.log_size);
|
|
|
|
for (u32 i = 0; i < scratch->anchored_literal_region_len; i++) {
|
|
memset(scratch->al_log[i], 0xa0, scratch->anchored_literal_fatbit_size);
|
|
}
|
|
scratch->al_log_sum=0xf0f;
|
|
|
|
memset(scratch->handled_roles, 0x05, scratch->handledKeyFatbitSize);
|
|
memset(scratch->som_store, 0x06,
|
|
scratch->som_store_count * sizeof(u64a));
|
|
memset(scratch->som_attempted_store, 0x06,
|
|
scratch->som_store_count * sizeof(u64a));
|
|
memset(scratch->som_set_now, 0x03, scratch->som_fatbit_size);
|
|
memset(scratch->som_attempted_set, 0x04, scratch->som_fatbit_size);
|
|
scratch->som_set_now_offset = 45;
|
|
memset(&scratch->fdr_conf, 0x0d, sizeof(scratch->fdr_conf));
|
|
scratch->fdr_conf_offset = 0xe4;
|
|
}
|
|
|
|
bool UltimateTruth::blockScan(const BaseDB &bdb, const string &buffer,
|
|
size_t align, match_event_handler callback,
|
|
void *ctx_in, ResultSet *) {
|
|
assert(colliderMode == MODE_BLOCK);
|
|
assert(!m_xcompile);
|
|
|
|
const hs_database_t *db = reinterpret_cast<const HyperscanDB &>(bdb).db;
|
|
assert(db);
|
|
MultiContext *ctx = (MultiContext *)ctx_in;
|
|
|
|
char *realigned = setupScanBuffer(buffer.c_str(), buffer.size(), align);
|
|
if (!realigned) {
|
|
return false;
|
|
}
|
|
|
|
if (use_copy_scratch && !cloneScratch()) {
|
|
return false;
|
|
}
|
|
|
|
ctx->in_scan_call = true;
|
|
hs_error_t ret =
|
|
hs_scan(db, realigned, buffer.size(), 0, scratch, callback, ctx);
|
|
ctx->in_scan_call = false;
|
|
|
|
if (g_verbose) {
|
|
out << "Scan call returned " << ret << endl;
|
|
}
|
|
|
|
if (ctx->terminated) {
|
|
if (g_verbose && ret != HS_SCAN_TERMINATED) {
|
|
out << "Scan should have returned HS_SCAN_TERMINATED, returned "
|
|
<< ret << " instead." << endl;
|
|
}
|
|
return ret == HS_SCAN_TERMINATED;
|
|
}
|
|
|
|
if (g_verbose && ret != HS_SUCCESS) {
|
|
out << "Scan should have returned HS_SUCCESS, returned " << ret
|
|
<< " instead." << endl;
|
|
}
|
|
|
|
if (use_mangle_scratch) {
|
|
mangle_scratch(scratch);
|
|
}
|
|
|
|
return ret == HS_SUCCESS;
|
|
}
|
|
|
|
static
|
|
vector<char> compressAndCloseStream(hs_stream_t *stream) {
|
|
size_t needed;
|
|
hs_error_t err = hs_compress_stream(stream, nullptr, 0, &needed);
|
|
if (err != HS_INSUFFICIENT_SPACE) {
|
|
return {};
|
|
}
|
|
|
|
vector<char> buf(needed);
|
|
err = hs_compress_stream(stream, buf.data(), needed, &needed);
|
|
if (err != HS_SUCCESS) {
|
|
return {};
|
|
}
|
|
assert(needed == buf.size());
|
|
|
|
err = hs_close_stream(stream, nullptr, nullptr, nullptr);
|
|
if (err != HS_SUCCESS) {
|
|
return {};
|
|
}
|
|
|
|
return buf;
|
|
}
|
|
|
|
|
|
static
|
|
hs_stream_t *compressAndExpandStream(const hs_database_t *db,
|
|
hs_stream_t *stream) {
|
|
vector<char> buf = compressAndCloseStream(stream);
|
|
hs_stream_t *out;
|
|
hs_error_t err = hs_expand_stream(db, &out, buf.data(), buf.size());
|
|
|
|
if (err != HS_SUCCESS) {
|
|
return nullptr;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
static
|
|
hs_stream_t *compressAndResetExpandStream(const hs_database_t *db,
|
|
hs_stream_t *stream) {
|
|
vector<char> buf = compressAndCloseStream(stream);
|
|
if (buf.empty()) {
|
|
return nullptr;
|
|
}
|
|
|
|
hs_stream_t *out;
|
|
|
|
hs_error_t err = hs_open_stream(db, 0, &out);
|
|
|
|
if (err != HS_SUCCESS) {
|
|
return nullptr;
|
|
}
|
|
|
|
err = hs_reset_and_expand_stream(out, buf.data(), buf.size(), nullptr,
|
|
nullptr, nullptr);
|
|
if (err != HS_SUCCESS) {
|
|
return nullptr;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
bool UltimateTruth::streamingScan(const BaseDB &bdb, const string &buffer,
|
|
size_t align, match_event_handler callback,
|
|
void *ctx_in, ResultSet *rs) {
|
|
assert(colliderMode == MODE_STREAMING);
|
|
assert(!m_xcompile);
|
|
|
|
const hs_database_t *db = reinterpret_cast<const HyperscanDB &>(bdb).db;
|
|
assert(db);
|
|
MultiContext *ctx = (MultiContext *)ctx_in;
|
|
|
|
// open a stream
|
|
hs_stream_t *stream;
|
|
size_t stream_size;
|
|
int ret;
|
|
|
|
ret = hs_stream_size(db, &stream_size);
|
|
if (ret != HS_SUCCESS) {
|
|
out << "Unable to size stream." << endl;
|
|
return false;
|
|
}
|
|
|
|
if (!g_streamOffset) {
|
|
ret = hs_open_stream(db, 0, &stream);
|
|
} else {
|
|
#ifndef RELEASE_BUILD
|
|
ret = open_magic_stream(db, 0, &stream, scratch, g_streamOffset);
|
|
#else
|
|
ret = HS_INVALID;
|
|
#endif
|
|
}
|
|
|
|
if (ret != HS_SUCCESS) {
|
|
out << "Unable to open stream." << endl;
|
|
return false;
|
|
}
|
|
|
|
// scan our data, split into blocks and copied into a temporary buffer
|
|
// aligned as requested (out of paranoia)
|
|
unsigned blockSize = buffer.size() / m_streamBlocks;
|
|
if (blockSize == 0) {
|
|
blockSize = 1;
|
|
}
|
|
const char *ptr = buffer.c_str();
|
|
const char *end = ptr + buffer.size();
|
|
ctx->block = 0;
|
|
|
|
// We use a do-while loop here so that zero-byte cases still generate at
|
|
// least one hs_scan_stream call, since it's something users might try.
|
|
do {
|
|
if (ptr + blockSize > end) {
|
|
// last write is a runt
|
|
blockSize = end - ptr;
|
|
}
|
|
char *realigned = setupScanBuffer(ptr, blockSize, align);
|
|
if (!realigned) {
|
|
return false;
|
|
}
|
|
ctx->in_scan_call = true;
|
|
DEBUG_PRINTF("scan stream write %u\n", ctx->block);
|
|
ret = hs_scan_stream(stream, realigned, blockSize, 0, scratch,
|
|
callback, ctx);
|
|
DEBUG_PRINTF("scan %u done\n", ctx->block);
|
|
ctx->in_scan_call = false;
|
|
|
|
if (limit_matches && rs->matches.size() == limit_matches) {
|
|
if (ret != HS_SCAN_TERMINATED) {
|
|
DEBUG_PRINTF("failure to scan %d\n", ret);
|
|
return false;
|
|
}
|
|
} else if (ret != HS_SUCCESS) {
|
|
DEBUG_PRINTF("failure to scan %d\n", ret);
|
|
return false;
|
|
}
|
|
|
|
if (use_copy_scratch && !cloneScratch()) {
|
|
return false;
|
|
}
|
|
|
|
if (use_copy_stream) {
|
|
hs_stream_t *s2;
|
|
ret = hs_copy_stream(&s2, stream);
|
|
if (ret != HS_SUCCESS) {
|
|
DEBUG_PRINTF("failure to copy %d\n", ret);
|
|
return false;
|
|
}
|
|
/* do a short write to the old stream so that it is in the wrong
|
|
* state. */
|
|
char temp[2] = {0, 0};
|
|
ret = hs_scan_stream(stream, temp, sizeof(temp), 0, scratch,
|
|
nullptr, nullptr);
|
|
|
|
hs_error_t expected = HS_SUCCESS;
|
|
if (limit_matches && rs->matches.size() == limit_matches) {
|
|
expected = HS_SCAN_TERMINATED;
|
|
}
|
|
if (ret != expected) {
|
|
DEBUG_PRINTF("failure to scan %d\n", ret);
|
|
return false;
|
|
}
|
|
ret = hs_close_stream(stream, nullptr, nullptr, nullptr);
|
|
if (ret != HS_SUCCESS) {
|
|
DEBUG_PRINTF("failure to close %d\n", ret);
|
|
return false;
|
|
}
|
|
stream = s2;
|
|
}
|
|
if (use_mangle_scratch) {
|
|
mangle_scratch(scratch);
|
|
}
|
|
|
|
if (use_compress_expand) {
|
|
auto rv = compressAndExpandStream(db, stream);
|
|
if (!rv) {
|
|
if (g_verbose) {
|
|
out << "Compress/Expand failed." << endl;
|
|
}
|
|
return false;
|
|
} else {
|
|
stream = rv;
|
|
}
|
|
}
|
|
|
|
if (use_compress_reset_expand) {
|
|
auto rv = compressAndResetExpandStream(db, stream);
|
|
if (!rv) {
|
|
if (g_verbose) {
|
|
out << "Compress/Expand failed." << endl;
|
|
}
|
|
return false;
|
|
} else {
|
|
stream = rv;
|
|
}
|
|
}
|
|
|
|
ptr += blockSize;
|
|
ctx->block++;
|
|
} while (ptr < end);
|
|
|
|
// close the stream
|
|
ctx->in_scan_call = true;
|
|
DEBUG_PRINTF("close stream %u\n", ctx->block);
|
|
ret = hs_close_stream(stream, scratch, callback, ctx);
|
|
DEBUG_PRINTF("close stream done\n");
|
|
ctx->in_scan_call = false;
|
|
|
|
if (ret != HS_SUCCESS) {
|
|
return false;
|
|
}
|
|
|
|
// UE2 cannot dedupe SOM matches across stream boundaries, so we must
|
|
// filter them out.
|
|
filterLeftmostSom(*rs);
|
|
|
|
return ret == HS_SUCCESS;
|
|
}
|
|
|
|
bool UltimateTruth::vectoredScan(const BaseDB &bdb, const string &buffer,
|
|
size_t align, match_event_handler callback,
|
|
void *ctx_in, ResultSet *rs) {
|
|
assert(colliderMode == MODE_VECTORED);
|
|
assert(!m_xcompile);
|
|
|
|
const hs_database_t *db = reinterpret_cast<const HyperscanDB &>(bdb).db;
|
|
assert(db);
|
|
MultiContext *ctx = (MultiContext *)ctx_in;
|
|
|
|
int ret;
|
|
|
|
assert(!g_streamOffset);
|
|
|
|
// scan our data, split into blocks and copied into a temporary buffer
|
|
// aligned as requested (out of paranoia)
|
|
unsigned blockSize = buffer.size() / m_streamBlocks;
|
|
if (blockSize == 0) {
|
|
blockSize = 1;
|
|
}
|
|
const char *ptr = buffer.c_str();
|
|
const char *end = ptr + buffer.size();
|
|
ctx->block = 0;
|
|
|
|
// We use a do-while loop here so that zero-byte cases still generate at
|
|
// least one hs_scan_stream call, since it's something users might try.
|
|
|
|
vector<const char *> data;
|
|
vector<unsigned int> length;
|
|
|
|
u32 block_count = (buffer.size() + blockSize - 1) / blockSize;
|
|
block_count = MAX(block_count, 1);
|
|
|
|
if (block_count > raw_blocks.size()) {
|
|
raw_blocks.resize(block_count);
|
|
}
|
|
|
|
do {
|
|
if (ptr + blockSize > end) {
|
|
// last write is a runt
|
|
blockSize = end - ptr;
|
|
}
|
|
char *realigned = setupVecScanBuffer(ptr, blockSize, align, ctx->block);
|
|
if (!realigned) {
|
|
return false;
|
|
}
|
|
|
|
data.push_back(realigned);
|
|
length.push_back(blockSize);
|
|
|
|
ptr += blockSize;
|
|
ctx->block++;
|
|
|
|
} while (ptr < end);
|
|
|
|
if (use_copy_scratch && !cloneScratch()) {
|
|
return false;
|
|
}
|
|
|
|
DEBUG_PRINTF("scan vectored write %u\n", ctx->block);
|
|
ctx->in_scan_call = true;
|
|
ret = hs_scan_vector(db, &data[0], &length[0], ctx->block, 0, scratch,
|
|
callback, ctx);
|
|
ctx->in_scan_call = false;
|
|
DEBUG_PRINTF("scan %u done\n", ctx->block);
|
|
if (use_mangle_scratch) {
|
|
mangle_scratch(scratch);
|
|
}
|
|
|
|
rs->dupe_matches.clear(); /* TODO: dedupe across vectored blocks */
|
|
|
|
if (limit_matches && rs->matches.size() == limit_matches) {
|
|
if (ret != HS_SCAN_TERMINATED) {
|
|
DEBUG_PRINTF("failure to scan %d\n", ret);
|
|
return false;
|
|
}
|
|
} else if (ret != HS_SUCCESS) {
|
|
DEBUG_PRINTF("failure to scan %d\n", ret);
|
|
return false;
|
|
}
|
|
|
|
// UE2 cannot dedupe SOM matches across vector block boundaries, so we must
|
|
// filter them out.
|
|
filterLeftmostSom(*rs);
|
|
|
|
return true;
|
|
}
|
|
|
|
#ifdef HS_HYBRID
|
|
bool UltimateTruth::hybridScan(const BaseDB &bdb, const string &buffer,
|
|
size_t align, ch_match_event_handler callback,
|
|
ch_error_event_handler error_callback,
|
|
void *ctx_in, ResultSet *) {
|
|
assert(colliderMode == MODE_HYBRID);
|
|
assert(!m_xcompile);
|
|
|
|
const ch_database_t *db = reinterpret_cast<const HybridDB &>(bdb).db;
|
|
assert(db);
|
|
MultiContext *ctx = (MultiContext *)ctx_in;
|
|
|
|
char *realigned = setupScanBuffer(buffer.c_str(), buffer.size(), align);
|
|
if (!realigned) {
|
|
return false;
|
|
}
|
|
|
|
if (use_copy_scratch && !cloneScratch()) {
|
|
return false;
|
|
}
|
|
|
|
ctx->in_scan_call = true;
|
|
ch_error_t ret =
|
|
ch_scan(db, realigned, buffer.size(), 0, chimeraScratch, callback,
|
|
error_callback, ctx);
|
|
ctx->in_scan_call = false;
|
|
|
|
if (g_verbose) {
|
|
out << "Scan call returned " << ret << endl;
|
|
}
|
|
|
|
if (ctx->terminated) {
|
|
if (g_verbose && ret != CH_SCAN_TERMINATED) {
|
|
out << "Scan should have returned CH_SCAN_TERMINATED, returned "
|
|
<< ret << " instead." << endl;
|
|
}
|
|
return ret == CH_SCAN_TERMINATED;
|
|
}
|
|
|
|
if (g_verbose && ret != CH_SUCCESS) {
|
|
out << "Scan should have returned CH_SUCCESS, returned " << ret
|
|
<< " instead." << endl;
|
|
}
|
|
|
|
return ret == CH_SUCCESS;
|
|
}
|
|
#endif
|
|
|
|
bool UltimateTruth::run(unsigned int id, shared_ptr<const BaseDB> bdb,
|
|
const string &buffer, bool single_pattern,
|
|
unsigned int align, ResultSet &rs) {
|
|
assert(!m_xcompile);
|
|
assert(bdb);
|
|
|
|
// Ensure that scratch is appropriate for this database.
|
|
if (!allocScratch(bdb)) {
|
|
out << "Scratch alloc failed." << endl;
|
|
return false;
|
|
}
|
|
|
|
MultiContext ctx(id, *bdb, &rs, single_pattern, out);
|
|
if (!g_corpora_suffix.empty()) {
|
|
ctx.use_max_offset = true;
|
|
ctx.max_offset = buffer.size() - g_corpora_suffix.size();
|
|
}
|
|
|
|
switch (colliderMode) {
|
|
case MODE_BLOCK:
|
|
return blockScan(*bdb, buffer, align, callbackMulti, &ctx, &rs);
|
|
case MODE_STREAMING:
|
|
return streamingScan(*bdb, buffer, align, callbackMulti, &ctx, &rs);
|
|
case MODE_VECTORED:
|
|
return vectoredScan(*bdb, buffer, align, callbackMulti, &ctx, &rs);
|
|
case MODE_HYBRID:
|
|
#ifdef HS_HYBRID
|
|
return hybridScan(*bdb, buffer, align, callbackHybrid, errorCallback,
|
|
&ctx, &rs);
|
|
#else
|
|
cerr << "Hybrid mode not available in this build." << endl;
|
|
abort();
|
|
#endif
|
|
break;
|
|
}
|
|
|
|
assert(0);
|
|
return false;
|
|
}
|
|
|
|
static
|
|
bool isOrdered(const string &expr, unsigned int flags) {
|
|
// SOM doesn't produce ordered matches?
|
|
if (flags & HS_FLAG_SOM_LEFTMOST) {
|
|
return false;
|
|
}
|
|
|
|
hs_expr_info_t *info = nullptr;
|
|
hs_compile_error_t *error = nullptr;
|
|
hs_error_t err = hs_expression_info(expr.c_str(), flags, &info, &error);
|
|
if (err != HS_SUCCESS) {
|
|
// Expression will fail compilation and report error elsewhere.
|
|
free(info);
|
|
hs_free_compile_error(error);
|
|
return false;
|
|
}
|
|
|
|
assert(info);
|
|
|
|
// Any pattern that does not require offset adjustment should produce
|
|
// matches in order.
|
|
bool ordered = !info->unordered_matches;
|
|
free(info);
|
|
return ordered;
|
|
}
|
|
|
|
static unique_ptr<BaseDB>
|
|
compileHyperscan(vector<const char *> &patterns, vector<unsigned> &flags,
|
|
vector<unsigned> &idsvec, ptr_vector<hs_expr_ext> &ext,
|
|
unsigned mode, const hs_platform_info *platform, string &error,
|
|
const Grey &grey) {
|
|
const unsigned count = patterns.size();
|
|
hs_database_t *db = nullptr;
|
|
hs_compile_error_t *compile_err;
|
|
hs_error_t err;
|
|
|
|
if (use_literal_api) {
|
|
// Compute length of each pattern.
|
|
vector<size_t> lens(count);
|
|
for (unsigned int i = 0; i < count; i++) {
|
|
lens[i] = strlen(patterns[i]);
|
|
}
|
|
err = hs_compile_lit_multi_int(&patterns[0], &flags[0], &idsvec[0],
|
|
ext.c_array(), &lens[0], count, mode,
|
|
platform, &db, &compile_err, grey);
|
|
} else {
|
|
err = hs_compile_multi_int(&patterns[0], &flags[0], &idsvec[0],
|
|
ext.c_array(), count, mode, platform, &db,
|
|
&compile_err, grey);
|
|
}
|
|
|
|
if (err != HS_SUCCESS) {
|
|
error = compile_err->message;
|
|
hs_free_compile_error(compile_err);
|
|
return nullptr;
|
|
}
|
|
|
|
return std::make_unique<HyperscanDB>(db, idsvec.begin(), idsvec.end());
|
|
}
|
|
|
|
#ifdef HS_HYBRID
|
|
static unique_ptr<BaseDB>
|
|
compileHybrid(vector<const char *> &patterns,
|
|
vector<unsigned> &flags, vector<unsigned> &idsvec,
|
|
unsigned mode, const hs_platform_info *platform, string &error) {
|
|
const unsigned count = patterns.size();
|
|
ch_database_t *db = nullptr;
|
|
ch_compile_error_t *compile_err;
|
|
|
|
ch_error_t err = ch_compile_multi(&patterns[0], &flags[0],
|
|
&idsvec[0], count, mode, platform, &db,
|
|
&compile_err);
|
|
|
|
if (err != HS_SUCCESS) {
|
|
error = compile_err->message;
|
|
ch_free_compile_error(compile_err);
|
|
return nullptr;
|
|
}
|
|
|
|
return std::make_unique<HybridDB>(db, idsvec.begin(), idsvec.end());
|
|
}
|
|
#endif
|
|
|
|
shared_ptr<BaseDB> UltimateTruth::compile(const set<unsigned> &ids,
|
|
string &error) const {
|
|
// Build our vectors for compilation
|
|
const size_t count = ids.size();
|
|
vector<string> expressions(count);
|
|
vector<unsigned> idsvec(ids.begin(), ids.end());
|
|
vector<unsigned> flags(count);
|
|
vector<bool> check_ordered(count, false);
|
|
ptr_vector<hs_expr_ext> ext;
|
|
ext.reserve(count);
|
|
|
|
size_t n = 0;
|
|
for (const auto &id : ids) {
|
|
auto j = m_expr.find(id);
|
|
if (j == m_expr.end()) {
|
|
error = "Unable to find ID.";
|
|
return nullptr;
|
|
}
|
|
|
|
ext.push_back(new hs_expr_ext);
|
|
bool must_be_ordered;
|
|
if (!readExpression(j->second, expressions[n], &flags[n], &ext[n],
|
|
&must_be_ordered)) {
|
|
ostringstream oss;
|
|
oss << "Unable to decode flags: '" << j->first << ":"
|
|
<< j->second << "'.";
|
|
error = oss.str();
|
|
return nullptr;
|
|
}
|
|
|
|
check_ordered[n] = must_be_ordered;
|
|
|
|
if (force_utf8) {
|
|
flags[n] |= HS_FLAG_UTF8;
|
|
}
|
|
|
|
if (force_prefilter) {
|
|
flags[n] |= HS_FLAG_PREFILTER;
|
|
}
|
|
|
|
if (somFlags) {
|
|
flags[n] |= somFlags;
|
|
}
|
|
|
|
if (force_edit_distance) {
|
|
ext[n].flags |= HS_EXT_FLAG_EDIT_DISTANCE;
|
|
ext[n].edit_distance = edit_distance;
|
|
}
|
|
|
|
if (colliderMode == MODE_HYBRID) {
|
|
if (ext[n].flags) {
|
|
error = "Hybrid does not support extended parameters.";
|
|
return nullptr;
|
|
}
|
|
// We can also strip some other flags in the hybrid matcher.
|
|
flags[n] &= ~HS_FLAG_PREFILTER; // prefilter always used
|
|
flags[n] &= ~HS_FLAG_ALLOWEMPTY; // empty always allowed
|
|
flags[n] &= ~HS_FLAG_SOM_LEFTMOST; // SOM always on
|
|
}
|
|
|
|
n++;
|
|
}
|
|
|
|
// Our compiler takes an array of plain ol' C strings.
|
|
vector<const char *> patterns(count);
|
|
for (unsigned int i = 0; i < count; i++) {
|
|
patterns[i] = expressions[i].c_str();
|
|
}
|
|
|
|
// Compile
|
|
if (!count) { /* slight hack to allow us to compile empty sets cleanly */
|
|
patterns.push_back(nullptr);
|
|
flags.push_back(0);
|
|
idsvec.push_back(0);
|
|
}
|
|
|
|
unique_ptr<BaseDB> db;
|
|
if (colliderMode == MODE_HYBRID) {
|
|
#ifdef HS_HYBRID
|
|
db = compileHybrid(patterns, flags, idsvec, m_mode, platform, error);
|
|
#else
|
|
error = "Hybrid mode not available in this build.";
|
|
#endif
|
|
} else {
|
|
db = compileHyperscan(patterns, flags, idsvec, ext, m_mode,
|
|
platform, error, grey);
|
|
}
|
|
|
|
if (!db) {
|
|
return nullptr;
|
|
}
|
|
|
|
// Track IDs of patterns that require ordering for validation at match
|
|
// time.
|
|
for (unsigned int i = 0; i < count; i++) {
|
|
bool is_ordered = isOrdered(expressions[i], flags[i]);
|
|
if (check_ordered[i] && !is_ordered) {
|
|
error = "Ordering required, but hs_expression_info suggests "
|
|
"that ordering is not guaranteed.";
|
|
return nullptr;
|
|
}
|
|
if (is_ordered) {
|
|
db->ordered.insert(idsvec[i]);
|
|
}
|
|
}
|
|
|
|
return std::move(db);
|
|
}
|
|
|
|
bool UltimateTruth::allocScratch(shared_ptr<const BaseDB> db) {
|
|
assert(db);
|
|
|
|
// We explicitly avoid running scratch allocators for the same BaseDB
|
|
// over and over again by retaining a shared_ptr to the last one we saw.
|
|
if (db == last_db) {
|
|
return true;
|
|
}
|
|
|
|
if (colliderMode == MODE_HYBRID) {
|
|
#ifdef HS_HYBRID
|
|
ch_error_t err = ch_alloc_scratch(
|
|
reinterpret_cast<const HybridDB *>(db.get())->db, &chimeraScratch);
|
|
if (err != HS_SUCCESS) {
|
|
return false;
|
|
}
|
|
#endif // HS_HYBRID
|
|
} else {
|
|
hs_error_t err = hs_alloc_scratch(
|
|
reinterpret_cast<const HyperscanDB *>(db.get())->db, &scratch);
|
|
if (err != HS_SUCCESS) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
last_db = db;
|
|
return true;
|
|
}
|
|
|
|
bool UltimateTruth::cloneScratch(void) {
|
|
if (colliderMode == MODE_HYBRID) {
|
|
#ifdef HS_HYBRID
|
|
ch_scratch_t *old_scratch = chimeraScratch;
|
|
ch_scratch_t *new_scratch;
|
|
ch_error_t ret = ch_clone_scratch(chimeraScratch, &new_scratch);
|
|
if (ret != CH_SUCCESS) {
|
|
DEBUG_PRINTF("failure to clone %d\n", ret);
|
|
return false;
|
|
}
|
|
chimeraScratch = new_scratch;
|
|
ret = ch_free_scratch(old_scratch);
|
|
if (ret != CH_SUCCESS) {
|
|
DEBUG_PRINTF("failure to free %d\n", ret);
|
|
return false;
|
|
}
|
|
DEBUG_PRINTF("hybrid scratch cloned from %p to %p\n",
|
|
old_scratch, chimeraScratch);
|
|
#endif // HS_HYBRID
|
|
} else {
|
|
hs_scratch_t *old_scratch = scratch;
|
|
hs_scratch_t *new_scratch;
|
|
hs_error_t ret = hs_clone_scratch(scratch, &new_scratch);
|
|
if (ret != HS_SUCCESS) {
|
|
DEBUG_PRINTF("failure to clone %d\n", ret);
|
|
return false;
|
|
}
|
|
scratch = new_scratch;
|
|
ret = hs_free_scratch(old_scratch);
|
|
if (ret != HS_SUCCESS) {
|
|
DEBUG_PRINTF("failure to free %d\n", ret);
|
|
return false;
|
|
}
|
|
DEBUG_PRINTF("scratch cloned from %p to %p\n", old_scratch, scratch);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Return an appropriately aligned (modulo max align) copy of the given buffer
|
|
char * UltimateTruth::setupScanBuffer(const char *begin, size_t len,
|
|
size_t align) {
|
|
if (align >= MAX_MAX_UE2_ALIGN) {
|
|
return nullptr;
|
|
}
|
|
|
|
// Realloc if necessary
|
|
size_t maxBufSize = len + MAX_MAX_UE2_ALIGN;
|
|
if (maxBufSize > m_scanBuf.size()) {
|
|
m_scanBuf.resize(maxBufSize);
|
|
}
|
|
|
|
uintptr_t currentAlign = (uintptr_t)(m_scanBuf.data()) % MAX_MAX_UE2_ALIGN;
|
|
char *ptr;
|
|
|
|
ptrdiff_t diff = align - currentAlign;
|
|
if (diff >= 0) {
|
|
ptr = (m_scanBuf.data() + diff);
|
|
} else {
|
|
ptr = (m_scanBuf.data() + (MAX_MAX_UE2_ALIGN + diff));
|
|
}
|
|
assert((uintptr_t)(ptr) % MAX_MAX_UE2_ALIGN == align);
|
|
|
|
// copy the buffer
|
|
memcpy(ptr, begin, len);
|
|
return ptr;
|
|
}
|
|
|
|
char *UltimateTruth::setupVecScanBuffer(const char *begin, size_t len,
|
|
size_t align, u32 block_id) {
|
|
if (align >= MAX_MAX_UE2_ALIGN) {
|
|
return nullptr;
|
|
}
|
|
|
|
assert(block_id < raw_blocks.size());
|
|
vector<char> &raw = raw_blocks[block_id];
|
|
|
|
// Realloc if necessary
|
|
size_t maxBufSize = len + MAX_MAX_UE2_ALIGN;
|
|
if (maxBufSize > raw.size()) {
|
|
raw.resize(maxBufSize);
|
|
}
|
|
assert(maxBufSize <= raw.size());
|
|
|
|
uintptr_t currentAlign = (uintptr_t)(&raw[0]) % MAX_MAX_UE2_ALIGN;
|
|
char *ptr;
|
|
|
|
ptrdiff_t diff = align - currentAlign;
|
|
if (diff >= 0) {
|
|
ptr = (&raw[0] + diff);
|
|
} else {
|
|
ptr = (&raw[0] + (MAX_MAX_UE2_ALIGN + diff));
|
|
}
|
|
assert((uintptr_t)(ptr) % MAX_MAX_UE2_ALIGN == align);
|
|
|
|
// copy the buffer
|
|
memcpy(ptr, begin, len);
|
|
return ptr;
|
|
}
|
|
|
|
bool UltimateTruth::saveDatabase(const BaseDB &bdb,
|
|
const string &filename) const {
|
|
if (colliderMode == MODE_HYBRID) {
|
|
cerr << "Hybrid mode doesn't support serialization." << endl;
|
|
abort();
|
|
} else {
|
|
return ::saveDatabase(reinterpret_cast<const HyperscanDB *>(&bdb)->db,
|
|
filename.c_str(), g_verbose);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
shared_ptr<BaseDB>
|
|
UltimateTruth::loadDatabase(const string &filename,
|
|
const std::set<unsigned> &ids) const {
|
|
shared_ptr<BaseDB> db;
|
|
|
|
if (colliderMode == MODE_HYBRID) {
|
|
cerr << "Hybrid mode doesn't support deserialization." << endl;
|
|
abort();
|
|
} else {
|
|
hs_database_t *hs_db = ::loadDatabase(filename.c_str(), g_verbose);
|
|
if (!hs_db) {
|
|
return nullptr;
|
|
}
|
|
|
|
db = make_shared<HyperscanDB>(hs_db, ids.begin(), ids.end());
|
|
}
|
|
|
|
assert(db);
|
|
|
|
// Fill db::ordered with the expressions that require the ordered flag.
|
|
for (const auto &id : ids) {
|
|
auto j = m_expr.find(id);
|
|
if (j == m_expr.end()) {
|
|
cerr << "Can't find expression with ID " << id << endl;
|
|
assert(0);
|
|
db.reset();
|
|
return db;
|
|
}
|
|
string expr;
|
|
hs_expr_ext ext;
|
|
unsigned int flags;
|
|
if (!readExpression(j->second, expr, &flags, &ext)) {
|
|
cerr << "Can't parse expression with ID " << id << ": "
|
|
<< j->second << endl;
|
|
assert(0);
|
|
db.reset();
|
|
return db;
|
|
}
|
|
if (isOrdered(expr, flags)) {
|
|
db->ordered.insert(id);
|
|
}
|
|
}
|
|
|
|
return db;
|
|
}
|
|
|
|
unsigned int UltimateTruth::describe() const {
|
|
return m_mode;
|
|
}
|
|
|
|
// Hash the settings used to compile a database, returning a string that can be
|
|
// used as a filename.
|
|
string UltimateTruth::dbSettingsHash(const set<unsigned int> &ids) const {
|
|
// create a single string to contain a description of the db
|
|
ostringstream info_oss;
|
|
|
|
// settings from UltimateTruth::describe()
|
|
info_oss << ' ' << describe() << ' ';
|
|
|
|
// our set
|
|
for (unsigned int id : ids) {
|
|
info_oss << id << ' ';
|
|
}
|
|
|
|
string info = info_oss.str();
|
|
|
|
u32 crc = Crc32c_ComputeBuf(0, info.data(), info.size());
|
|
|
|
// return STL string with printable version of digest
|
|
ostringstream oss;
|
|
oss << hex << setw(8) << setfill('0') << crc << dec;
|
|
|
|
return oss.str();
|
|
}
|
|
|
|
string UltimateTruth::dbFilename(const set<unsigned int> &ids) const {
|
|
ostringstream oss;
|
|
oss << serializePath << '/' << dbSettingsHash(ids) << ".db";
|
|
return oss.str();
|
|
}
|