mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
We were using intermediate values int he enum and casting back and forth with a u32; it is cleaner to just use a u32 and define some special values. Silences ICC warning #188: enumerated type mixed with another type.
390 lines
13 KiB
C++
390 lines
13 KiB
C++
/*
|
|
* Copyright (c) 2015, Intel Corporation
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "mpvcompile.h"
|
|
|
|
#include "mpv_internal.h"
|
|
#include "nfa_api_queue.h"
|
|
#include "nfa_internal.h"
|
|
#include "shufticompile.h"
|
|
#include "trufflecompile.h"
|
|
#include "util/alloc.h"
|
|
#include "util/multibit_internal.h"
|
|
#include "util/order_check.h"
|
|
#include "util/verify_types.h"
|
|
|
|
#include <algorithm>
|
|
#include <iterator>
|
|
#include <map>
|
|
|
|
#include <boost/range/adaptor/map.hpp>
|
|
|
|
using namespace std;
|
|
using boost::adaptors::map_values;
|
|
using boost::adaptors::map_keys;
|
|
|
|
namespace ue2 {
|
|
|
|
namespace {
|
|
struct pcomp {
|
|
bool operator()(const raw_puff &a, const raw_puff &b) const {
|
|
ORDER_CHECK(repeats);
|
|
ORDER_CHECK(unbounded);
|
|
ORDER_CHECK(report);
|
|
return false;
|
|
}
|
|
};
|
|
|
|
struct ClusterKey {
|
|
explicit ClusterKey(const raw_puff &src)
|
|
: trigger_event(MQE_INVALID), reach(src.reach),
|
|
auto_restart(src.auto_restart) {}
|
|
ClusterKey(u32 event, const raw_puff &src)
|
|
: trigger_event(event), reach(src.reach),
|
|
auto_restart(src.auto_restart) {}
|
|
|
|
u32 trigger_event;
|
|
CharReach reach;
|
|
bool auto_restart;
|
|
|
|
bool operator<(const ClusterKey &b) const {
|
|
const ClusterKey &a = *this;
|
|
ORDER_CHECK(trigger_event); /* want triggered puffs first */
|
|
ORDER_CHECK(auto_restart);
|
|
ORDER_CHECK(reach);
|
|
return false;
|
|
}
|
|
};
|
|
|
|
} // namespace
|
|
|
|
static
|
|
void writePuffette(mpv_puffette *out, const raw_puff &rp) {
|
|
DEBUG_PRINTF("outputting %u %d %u to %p\n", rp.repeats, (int)rp.unbounded,
|
|
rp.report, out);
|
|
out->repeats = rp.repeats;
|
|
out->unbounded = rp.unbounded;
|
|
out->report = rp.report;
|
|
}
|
|
|
|
static
|
|
void writeDeadPoint(mpv_kilopuff *out, const vector<raw_puff> &puffs) {
|
|
for (const auto &puff : puffs) {
|
|
if (puff.unbounded) { /* mpv can never die */
|
|
out->dead_point = MPV_DEAD_VALUE;
|
|
return;
|
|
}
|
|
}
|
|
|
|
out->dead_point = puffs.back().repeats + 1;
|
|
}
|
|
|
|
static
|
|
size_t calcSize(const map<ClusterKey, vector<raw_puff>> &raw,
|
|
const vector<mpv_counter_info> &counters) {
|
|
size_t len = sizeof(NFA) + sizeof(mpv);
|
|
|
|
len += sizeof(mpv_kilopuff) * raw.size(); /* need a kilopuff for each
|
|
distinct reach */
|
|
|
|
len += sizeof(mpv_counter_info) * counters.size();
|
|
|
|
len += sizeof(mpv_puffette); /* initial sent */
|
|
|
|
for (const vector<raw_puff> &puffs : raw | map_values) {
|
|
len += sizeof(mpv_puffette) * puffs.size();
|
|
len += sizeof(mpv_puffette); /* terminal sent */
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static
|
|
void populateClusters(const vector<raw_puff> &puffs_in,
|
|
const vector<raw_puff> &triggered_puffs,
|
|
map<ClusterKey, vector<raw_puff>> *raw) {
|
|
map<ClusterKey, vector<raw_puff>> &puff_clusters = *raw;
|
|
|
|
u32 e = MQE_TOP_FIRST;
|
|
for (const auto &puff : triggered_puffs) {
|
|
puff_clusters[ClusterKey(e, puff)].push_back(puff);
|
|
e++;
|
|
}
|
|
|
|
for (const auto &puff : puffs_in) {
|
|
puff_clusters[ClusterKey(puff)].push_back(puff);
|
|
}
|
|
|
|
|
|
for (vector<raw_puff> &puffs : puff_clusters | map_values) {
|
|
sort(puffs.begin(), puffs.end(), pcomp());
|
|
}
|
|
}
|
|
|
|
static
|
|
void writeKiloPuff(const map<ClusterKey, vector<raw_puff>>::const_iterator &it,
|
|
u32 counter_offset, mpv *m, mpv_kilopuff *kp,
|
|
mpv_puffette **pa) {
|
|
const CharReach &reach = it->first.reach;
|
|
const vector<raw_puff> &puffs = it->second;
|
|
|
|
kp->auto_restart = it->first.auto_restart;
|
|
|
|
if (reach.all()) {
|
|
kp->type = MPV_DOT;
|
|
} else if (reach.count() == 255) {
|
|
kp->type = MPV_VERM;
|
|
size_t unset = (~reach).find_first();
|
|
assert(unset != CharReach::npos);
|
|
kp->u.verm.c = (char)unset;
|
|
} else if (reach.count() == 1) {
|
|
kp->type = MPV_NVERM;
|
|
size_t set = reach.find_first();
|
|
assert(set != CharReach::npos);
|
|
kp->u.verm.c = (char)set;
|
|
} else if (shuftiBuildMasks(~reach, &kp->u.shuf.mask_lo,
|
|
&kp->u.shuf.mask_hi) != -1) {
|
|
kp->type = MPV_SHUFTI;
|
|
} else {
|
|
kp->type = MPV_TRUFFLE;
|
|
truffleBuildMasks(~reach, &kp->u.truffle.mask1, &kp->u.truffle.mask2);
|
|
}
|
|
|
|
kp->count = verify_u32(puffs.size());
|
|
kp->counter_offset = counter_offset;
|
|
|
|
/* start of real puffette array */
|
|
kp->puffette_offset = verify_u32((char *)*pa - (char *)m);
|
|
for (size_t i = 0; i < puffs.size(); i++) {
|
|
assert(!it->first.auto_restart || puffs[i].unbounded);
|
|
writePuffette(*pa + i, puffs[i]);
|
|
}
|
|
|
|
*pa += puffs.size();
|
|
writePuffette(*pa, raw_puff(0U, false, INVALID_REPORT, CharReach()));
|
|
++*pa;
|
|
|
|
writeDeadPoint(kp, puffs);
|
|
}
|
|
|
|
static
|
|
void writeCoreNfa(NFA *nfa, u32 len, u32 min_width, u32 max_counter,
|
|
u32 streamStateSize, u32 scratchStateSize) {
|
|
assert(nfa);
|
|
|
|
nfa->length = len;
|
|
nfa->nPositions = max_counter - 1;
|
|
nfa->type = MPV_NFA_0;
|
|
nfa->streamStateSize = streamStateSize;
|
|
assert(16 >= sizeof(mpv_decomp_kilo));
|
|
nfa->scratchStateSize = scratchStateSize;
|
|
nfa->minWidth = min_width;
|
|
}
|
|
|
|
static
|
|
void findCounterSize(map<ClusterKey, vector<raw_puff>>::const_iterator kp_it,
|
|
map<ClusterKey, vector<raw_puff>>::const_iterator kp_ite,
|
|
u64a *max_counter_out, u32 *counter_size) {
|
|
u32 max_counter = 0; /* max counter that we may need to know about is one
|
|
more than largest repeat */
|
|
for (; kp_it != kp_ite; ++kp_it) {
|
|
max_counter = MAX(max_counter, kp_it->second.back().repeats + 1);
|
|
}
|
|
|
|
if (max_counter < (1U << 8)) {
|
|
*counter_size = 1;
|
|
} else if (max_counter < (1U << 16)) {
|
|
*counter_size = 2;
|
|
} else if (max_counter < (1U << 24)) {
|
|
*counter_size = 3;
|
|
} else {
|
|
*counter_size = 4;
|
|
}
|
|
|
|
*max_counter_out = max_counter;
|
|
}
|
|
|
|
static
|
|
void fillCounterInfo(mpv_counter_info *out, u32 *curr_decomp_offset,
|
|
u32 *curr_comp_offset,
|
|
const map<ClusterKey, vector<raw_puff>> &kilopuffs,
|
|
map<ClusterKey, vector<raw_puff>>::const_iterator kp_it,
|
|
map<ClusterKey, vector<raw_puff>>::const_iterator kp_ite) {
|
|
|
|
out->kilo_begin = distance(kilopuffs.begin(), kp_it);
|
|
out->kilo_end = distance(kilopuffs.begin(), kp_ite);
|
|
findCounterSize(kp_it, kp_ite, &out->max_counter, &out->counter_size);
|
|
out->counter_offset = *curr_decomp_offset;
|
|
*curr_decomp_offset += sizeof(u64a);
|
|
*curr_comp_offset += out->counter_size;
|
|
}
|
|
|
|
static
|
|
void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset,
|
|
u32 *curr_comp_offset,
|
|
const map<ClusterKey, vector<raw_puff>> &kilopuffs) {
|
|
/* first the triggered puffs */
|
|
map<ClusterKey, vector<raw_puff>>::const_iterator it = kilopuffs.begin();
|
|
while (it != kilopuffs.end() && it->first.trigger_event != MQE_INVALID) {
|
|
assert(!it->first.auto_restart);
|
|
assert(it->first.trigger_event
|
|
== MQE_TOP_FIRST + distance(kilopuffs.begin(), it));
|
|
|
|
out->push_back(mpv_counter_info());
|
|
map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it;
|
|
++it;
|
|
fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset,
|
|
kilopuffs, it_o, it);
|
|
}
|
|
|
|
/* we may have 2 sets of non triggered puffs:
|
|
* 1) always started with no auto_restart
|
|
* 2) always started with auto_restart
|
|
*/
|
|
map<ClusterKey, vector<raw_puff>>::const_iterator trig_ite = it;
|
|
while (it != kilopuffs.end() && !it->first.auto_restart) {
|
|
assert(it->first.trigger_event == MQE_INVALID);
|
|
|
|
++it;
|
|
}
|
|
if (it != trig_ite) {
|
|
out->push_back(mpv_counter_info());
|
|
fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset,
|
|
kilopuffs, kilopuffs.begin(), it);
|
|
}
|
|
while (it != kilopuffs.end() && it->first.auto_restart) {
|
|
assert(it->first.trigger_event == MQE_INVALID);
|
|
|
|
out->push_back(mpv_counter_info());
|
|
map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it;
|
|
++it;
|
|
fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset,
|
|
kilopuffs, it_o, it);
|
|
}
|
|
}
|
|
|
|
static
|
|
const mpv_counter_info &findCounter(const vector<mpv_counter_info> &counters,
|
|
u32 i) {
|
|
for (const auto &counter : counters) {
|
|
if (i >= counter.kilo_begin && i < counter.kilo_end) {
|
|
return counter;
|
|
}
|
|
}
|
|
assert(0);
|
|
return counters.front();
|
|
}
|
|
|
|
aligned_unique_ptr<NFA> mpvCompile(const vector<raw_puff> &puffs_in,
|
|
const vector<raw_puff> &triggered_puffs) {
|
|
assert(!puffs_in.empty() || !triggered_puffs.empty());
|
|
u32 puffette_count = puffs_in.size() + triggered_puffs.size();
|
|
|
|
map<ClusterKey, vector<raw_puff>> puff_clusters;
|
|
populateClusters(puffs_in, triggered_puffs, &puff_clusters);
|
|
|
|
u32 curr_comp_offset = 0;
|
|
|
|
u32 curr_decomp_offset = sizeof(mpv_decomp_state);
|
|
curr_decomp_offset += 16 * puff_clusters.size();
|
|
|
|
vector<mpv_counter_info> counters;
|
|
fillCounterInfos(&counters, &curr_decomp_offset, &curr_comp_offset,
|
|
puff_clusters);
|
|
|
|
u32 pq_offset = curr_decomp_offset;
|
|
curr_decomp_offset += sizeof(mpv_pq_item) * puff_clusters.size();
|
|
|
|
u32 rl_offset = curr_decomp_offset;
|
|
curr_decomp_offset += sizeof(ReportID) * puffette_count;
|
|
|
|
u32 reporter_offset = curr_decomp_offset;
|
|
curr_decomp_offset += mmbit_size(puff_clusters.size());
|
|
|
|
u32 active_offset = curr_comp_offset;
|
|
curr_comp_offset += mmbit_size(puff_clusters.size());
|
|
|
|
u32 len = calcSize(puff_clusters, counters);
|
|
|
|
DEBUG_PRINTF("%u puffs, len = %u\n", puffette_count, len);
|
|
|
|
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(len);
|
|
|
|
mpv_puffette *pa_base = (mpv_puffette *)
|
|
((char *)nfa.get() + sizeof(NFA) + sizeof(mpv)
|
|
+ sizeof(mpv_kilopuff) * puff_clusters.size()
|
|
+ sizeof(mpv_counter_info) * counters.size());
|
|
mpv_puffette *pa = pa_base;
|
|
|
|
writePuffette(pa, raw_puff(0U, false, INVALID_REPORT, CharReach()));
|
|
|
|
++pa; /* skip init sentinel */
|
|
|
|
u32 min_repeat = ~0U;
|
|
u32 max_counter = 0; /* max counter that we may need to know about is one
|
|
more than largest repeat */
|
|
for (const vector<raw_puff> &puffs : puff_clusters | map_values) {
|
|
max_counter = max(max_counter, puffs.back().repeats + 1);
|
|
min_repeat = min(min_repeat, puffs.front().repeats);
|
|
}
|
|
|
|
mpv *m = (mpv *)getMutableImplNfa(nfa.get());
|
|
m->kilo_count = verify_u32(puff_clusters.size());
|
|
m->counter_count = verify_u32(counters.size());
|
|
m->puffette_count = puffette_count;
|
|
m->pq_offset = pq_offset;
|
|
m->reporter_offset = reporter_offset;
|
|
m->report_list_offset = rl_offset;
|
|
m->active_offset = active_offset;
|
|
m->top_kilo_begin = verify_u32(triggered_puffs.size());
|
|
m->top_kilo_end = verify_u32(puff_clusters.size());
|
|
|
|
mpv_kilopuff *kp_begin = (mpv_kilopuff *)(m + 1);
|
|
mpv_kilopuff *kp = kp_begin;
|
|
for (auto it = puff_clusters.begin(); it != puff_clusters.end(); ++it) {
|
|
writeKiloPuff(it, findCounter(counters, kp - kp_begin).counter_offset,
|
|
m, kp, &pa);
|
|
++kp;
|
|
}
|
|
assert((char *)pa == (char *)nfa.get() + len);
|
|
|
|
mpv_counter_info *out_ci = (mpv_counter_info *)kp;
|
|
for (const auto &counter : counters) {
|
|
*out_ci = counter;
|
|
++out_ci;
|
|
}
|
|
assert((char *)out_ci == (char *)pa_base);
|
|
|
|
writeCoreNfa(nfa.get(), len, min_repeat, max_counter, curr_comp_offset,
|
|
curr_decomp_offset);
|
|
|
|
return nfa;
|
|
}
|
|
|
|
} // namespace ue2
|