vectorscan/src/nfa/mpvcompile.cpp
Justin Viiret 55b357f7d1 Remove enum mqe_event and use u32 for queue events
We were using intermediate values int he enum and casting back and forth
with a u32; it is cleaner to just use a u32 and define some special
values.

Silences ICC warning #188: enumerated type mixed with another type.
2015-10-30 11:28:37 +11:00

390 lines
13 KiB
C++

/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "mpvcompile.h"
#include "mpv_internal.h"
#include "nfa_api_queue.h"
#include "nfa_internal.h"
#include "shufticompile.h"
#include "trufflecompile.h"
#include "util/alloc.h"
#include "util/multibit_internal.h"
#include "util/order_check.h"
#include "util/verify_types.h"
#include <algorithm>
#include <iterator>
#include <map>
#include <boost/range/adaptor/map.hpp>
using namespace std;
using boost::adaptors::map_values;
using boost::adaptors::map_keys;
namespace ue2 {
namespace {
struct pcomp {
bool operator()(const raw_puff &a, const raw_puff &b) const {
ORDER_CHECK(repeats);
ORDER_CHECK(unbounded);
ORDER_CHECK(report);
return false;
}
};
struct ClusterKey {
explicit ClusterKey(const raw_puff &src)
: trigger_event(MQE_INVALID), reach(src.reach),
auto_restart(src.auto_restart) {}
ClusterKey(u32 event, const raw_puff &src)
: trigger_event(event), reach(src.reach),
auto_restart(src.auto_restart) {}
u32 trigger_event;
CharReach reach;
bool auto_restart;
bool operator<(const ClusterKey &b) const {
const ClusterKey &a = *this;
ORDER_CHECK(trigger_event); /* want triggered puffs first */
ORDER_CHECK(auto_restart);
ORDER_CHECK(reach);
return false;
}
};
} // namespace
static
void writePuffette(mpv_puffette *out, const raw_puff &rp) {
DEBUG_PRINTF("outputting %u %d %u to %p\n", rp.repeats, (int)rp.unbounded,
rp.report, out);
out->repeats = rp.repeats;
out->unbounded = rp.unbounded;
out->report = rp.report;
}
static
void writeDeadPoint(mpv_kilopuff *out, const vector<raw_puff> &puffs) {
for (const auto &puff : puffs) {
if (puff.unbounded) { /* mpv can never die */
out->dead_point = MPV_DEAD_VALUE;
return;
}
}
out->dead_point = puffs.back().repeats + 1;
}
static
size_t calcSize(const map<ClusterKey, vector<raw_puff>> &raw,
const vector<mpv_counter_info> &counters) {
size_t len = sizeof(NFA) + sizeof(mpv);
len += sizeof(mpv_kilopuff) * raw.size(); /* need a kilopuff for each
distinct reach */
len += sizeof(mpv_counter_info) * counters.size();
len += sizeof(mpv_puffette); /* initial sent */
for (const vector<raw_puff> &puffs : raw | map_values) {
len += sizeof(mpv_puffette) * puffs.size();
len += sizeof(mpv_puffette); /* terminal sent */
}
return len;
}
static
void populateClusters(const vector<raw_puff> &puffs_in,
const vector<raw_puff> &triggered_puffs,
map<ClusterKey, vector<raw_puff>> *raw) {
map<ClusterKey, vector<raw_puff>> &puff_clusters = *raw;
u32 e = MQE_TOP_FIRST;
for (const auto &puff : triggered_puffs) {
puff_clusters[ClusterKey(e, puff)].push_back(puff);
e++;
}
for (const auto &puff : puffs_in) {
puff_clusters[ClusterKey(puff)].push_back(puff);
}
for (vector<raw_puff> &puffs : puff_clusters | map_values) {
sort(puffs.begin(), puffs.end(), pcomp());
}
}
static
void writeKiloPuff(const map<ClusterKey, vector<raw_puff>>::const_iterator &it,
u32 counter_offset, mpv *m, mpv_kilopuff *kp,
mpv_puffette **pa) {
const CharReach &reach = it->first.reach;
const vector<raw_puff> &puffs = it->second;
kp->auto_restart = it->first.auto_restart;
if (reach.all()) {
kp->type = MPV_DOT;
} else if (reach.count() == 255) {
kp->type = MPV_VERM;
size_t unset = (~reach).find_first();
assert(unset != CharReach::npos);
kp->u.verm.c = (char)unset;
} else if (reach.count() == 1) {
kp->type = MPV_NVERM;
size_t set = reach.find_first();
assert(set != CharReach::npos);
kp->u.verm.c = (char)set;
} else if (shuftiBuildMasks(~reach, &kp->u.shuf.mask_lo,
&kp->u.shuf.mask_hi) != -1) {
kp->type = MPV_SHUFTI;
} else {
kp->type = MPV_TRUFFLE;
truffleBuildMasks(~reach, &kp->u.truffle.mask1, &kp->u.truffle.mask2);
}
kp->count = verify_u32(puffs.size());
kp->counter_offset = counter_offset;
/* start of real puffette array */
kp->puffette_offset = verify_u32((char *)*pa - (char *)m);
for (size_t i = 0; i < puffs.size(); i++) {
assert(!it->first.auto_restart || puffs[i].unbounded);
writePuffette(*pa + i, puffs[i]);
}
*pa += puffs.size();
writePuffette(*pa, raw_puff(0U, false, INVALID_REPORT, CharReach()));
++*pa;
writeDeadPoint(kp, puffs);
}
static
void writeCoreNfa(NFA *nfa, u32 len, u32 min_width, u32 max_counter,
u32 streamStateSize, u32 scratchStateSize) {
assert(nfa);
nfa->length = len;
nfa->nPositions = max_counter - 1;
nfa->type = MPV_NFA_0;
nfa->streamStateSize = streamStateSize;
assert(16 >= sizeof(mpv_decomp_kilo));
nfa->scratchStateSize = scratchStateSize;
nfa->minWidth = min_width;
}
static
void findCounterSize(map<ClusterKey, vector<raw_puff>>::const_iterator kp_it,
map<ClusterKey, vector<raw_puff>>::const_iterator kp_ite,
u64a *max_counter_out, u32 *counter_size) {
u32 max_counter = 0; /* max counter that we may need to know about is one
more than largest repeat */
for (; kp_it != kp_ite; ++kp_it) {
max_counter = MAX(max_counter, kp_it->second.back().repeats + 1);
}
if (max_counter < (1U << 8)) {
*counter_size = 1;
} else if (max_counter < (1U << 16)) {
*counter_size = 2;
} else if (max_counter < (1U << 24)) {
*counter_size = 3;
} else {
*counter_size = 4;
}
*max_counter_out = max_counter;
}
static
void fillCounterInfo(mpv_counter_info *out, u32 *curr_decomp_offset,
u32 *curr_comp_offset,
const map<ClusterKey, vector<raw_puff>> &kilopuffs,
map<ClusterKey, vector<raw_puff>>::const_iterator kp_it,
map<ClusterKey, vector<raw_puff>>::const_iterator kp_ite) {
out->kilo_begin = distance(kilopuffs.begin(), kp_it);
out->kilo_end = distance(kilopuffs.begin(), kp_ite);
findCounterSize(kp_it, kp_ite, &out->max_counter, &out->counter_size);
out->counter_offset = *curr_decomp_offset;
*curr_decomp_offset += sizeof(u64a);
*curr_comp_offset += out->counter_size;
}
static
void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset,
u32 *curr_comp_offset,
const map<ClusterKey, vector<raw_puff>> &kilopuffs) {
/* first the triggered puffs */
map<ClusterKey, vector<raw_puff>>::const_iterator it = kilopuffs.begin();
while (it != kilopuffs.end() && it->first.trigger_event != MQE_INVALID) {
assert(!it->first.auto_restart);
assert(it->first.trigger_event
== MQE_TOP_FIRST + distance(kilopuffs.begin(), it));
out->push_back(mpv_counter_info());
map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it;
++it;
fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset,
kilopuffs, it_o, it);
}
/* we may have 2 sets of non triggered puffs:
* 1) always started with no auto_restart
* 2) always started with auto_restart
*/
map<ClusterKey, vector<raw_puff>>::const_iterator trig_ite = it;
while (it != kilopuffs.end() && !it->first.auto_restart) {
assert(it->first.trigger_event == MQE_INVALID);
++it;
}
if (it != trig_ite) {
out->push_back(mpv_counter_info());
fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset,
kilopuffs, kilopuffs.begin(), it);
}
while (it != kilopuffs.end() && it->first.auto_restart) {
assert(it->first.trigger_event == MQE_INVALID);
out->push_back(mpv_counter_info());
map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it;
++it;
fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset,
kilopuffs, it_o, it);
}
}
static
const mpv_counter_info &findCounter(const vector<mpv_counter_info> &counters,
u32 i) {
for (const auto &counter : counters) {
if (i >= counter.kilo_begin && i < counter.kilo_end) {
return counter;
}
}
assert(0);
return counters.front();
}
aligned_unique_ptr<NFA> mpvCompile(const vector<raw_puff> &puffs_in,
const vector<raw_puff> &triggered_puffs) {
assert(!puffs_in.empty() || !triggered_puffs.empty());
u32 puffette_count = puffs_in.size() + triggered_puffs.size();
map<ClusterKey, vector<raw_puff>> puff_clusters;
populateClusters(puffs_in, triggered_puffs, &puff_clusters);
u32 curr_comp_offset = 0;
u32 curr_decomp_offset = sizeof(mpv_decomp_state);
curr_decomp_offset += 16 * puff_clusters.size();
vector<mpv_counter_info> counters;
fillCounterInfos(&counters, &curr_decomp_offset, &curr_comp_offset,
puff_clusters);
u32 pq_offset = curr_decomp_offset;
curr_decomp_offset += sizeof(mpv_pq_item) * puff_clusters.size();
u32 rl_offset = curr_decomp_offset;
curr_decomp_offset += sizeof(ReportID) * puffette_count;
u32 reporter_offset = curr_decomp_offset;
curr_decomp_offset += mmbit_size(puff_clusters.size());
u32 active_offset = curr_comp_offset;
curr_comp_offset += mmbit_size(puff_clusters.size());
u32 len = calcSize(puff_clusters, counters);
DEBUG_PRINTF("%u puffs, len = %u\n", puffette_count, len);
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(len);
mpv_puffette *pa_base = (mpv_puffette *)
((char *)nfa.get() + sizeof(NFA) + sizeof(mpv)
+ sizeof(mpv_kilopuff) * puff_clusters.size()
+ sizeof(mpv_counter_info) * counters.size());
mpv_puffette *pa = pa_base;
writePuffette(pa, raw_puff(0U, false, INVALID_REPORT, CharReach()));
++pa; /* skip init sentinel */
u32 min_repeat = ~0U;
u32 max_counter = 0; /* max counter that we may need to know about is one
more than largest repeat */
for (const vector<raw_puff> &puffs : puff_clusters | map_values) {
max_counter = max(max_counter, puffs.back().repeats + 1);
min_repeat = min(min_repeat, puffs.front().repeats);
}
mpv *m = (mpv *)getMutableImplNfa(nfa.get());
m->kilo_count = verify_u32(puff_clusters.size());
m->counter_count = verify_u32(counters.size());
m->puffette_count = puffette_count;
m->pq_offset = pq_offset;
m->reporter_offset = reporter_offset;
m->report_list_offset = rl_offset;
m->active_offset = active_offset;
m->top_kilo_begin = verify_u32(triggered_puffs.size());
m->top_kilo_end = verify_u32(puff_clusters.size());
mpv_kilopuff *kp_begin = (mpv_kilopuff *)(m + 1);
mpv_kilopuff *kp = kp_begin;
for (auto it = puff_clusters.begin(); it != puff_clusters.end(); ++it) {
writeKiloPuff(it, findCounter(counters, kp - kp_begin).counter_offset,
m, kp, &pa);
++kp;
}
assert((char *)pa == (char *)nfa.get() + len);
mpv_counter_info *out_ci = (mpv_counter_info *)kp;
for (const auto &counter : counters) {
*out_ci = counter;
++out_ci;
}
assert((char *)out_ci == (char *)pa_base);
writeCoreNfa(nfa.get(), len, min_repeat, max_counter, curr_comp_offset,
curr_decomp_offset);
return nfa;
}
} // namespace ue2