FDR: Squash buckets of included literals in FDR confirm

- Change the compile of literal matchers to two passes.
 - Reverse the bucket assignment in FDR, bucket with longer literals has
   smaller bucket id.
 - Squash the buckets of included literals and jump to the the program of
   included literals directly from parent literal program without going
   through FDR confirm for included iterals.
This commit is contained in:
Wang, Xiang W
2017-06-22 04:50:45 -04:00
committed by Matthew Barr
parent d2b5523dd8
commit 86c5f7feb1
26 changed files with 1017 additions and 262 deletions

View File

@@ -41,8 +41,12 @@
#include "scratch.h"
#include "ue2common.h"
#include "fdr/fdr_compile.h"
#include "fdr/fdr_compile_internal.h"
#include "fdr/fdr_engine_description.h"
#include "fdr/teddy_engine_description.h"
#include "util/compile_context.h"
#include "util/compile_error.h"
#include "util/make_unique.h"
#include "util/ue2string.h"
#include <cassert>
@@ -53,6 +57,28 @@ using namespace std;
namespace ue2 {
HWLMProto::HWLMProto(u8 engType_in, vector<hwlmLiteral> lits_in)
: engType(engType_in), lits(move(lits_in)) {}
HWLMProto::HWLMProto(u8 engType_in,
unique_ptr<FDREngineDescription> eng_in,
vector<hwlmLiteral> lits_in,
map<u32, vector<u32>> bucketToLits_in,
bool make_small_in)
: engType(engType_in), fdrEng(move(eng_in)), lits(move(lits_in)),
bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {}
HWLMProto::HWLMProto(u8 engType_in,
unique_ptr<TeddyEngineDescription> eng_in,
vector<hwlmLiteral> lits_in,
map<u32, vector<u32>> bucketToLits_in,
bool make_small_in)
: engType(engType_in), teddyEng(move(eng_in)),
lits(move(lits_in)),
bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {}
HWLMProto::~HWLMProto() {}
static
void dumpLits(UNUSED const vector<hwlmLiteral> &lits) {
#ifdef DEBUG
@@ -92,9 +118,52 @@ bool isNoodleable(const vector<hwlmLiteral> &lits,
return true;
}
bytecode_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits, bool make_small,
const CompileContext &cc,
bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc,
UNUSED hwlm_group_t expected_groups) {
size_t engSize = 0;
shared_ptr<void> eng;
const auto &lits = proto.lits;
DEBUG_PRINTF("building table with %zu strings\n", lits.size());
if (proto.engType == HWLM_ENGINE_NOOD) {
DEBUG_PRINTF("build noodle table\n");
const hwlmLiteral &lit = lits.front();
auto noodle = noodBuildTable(lit);
if (noodle) {
engSize = noodle.size();
}
eng = move(noodle);
} else {
DEBUG_PRINTF("building a new deal\n");
auto fdr = fdrBuildTable(proto, cc.grey);
if (fdr) {
engSize = fdr.size();
}
eng = move(fdr);
}
if (!eng) {
return nullptr;
}
assert(engSize);
if (engSize > cc.grey.limitLiteralMatcherSize) {
throw ResourceLimitError();
}
const size_t hwlm_len = ROUNDUP_CL(sizeof(HWLM)) + engSize;
auto h = make_zeroed_bytecode_ptr<HWLM>(hwlm_len, 64);
h->type = proto.engType;
memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
return h;
}
unique_ptr<HWLMProto>
hwlmBuildProto(vector<hwlmLiteral> &lits, bool make_small,
const CompileContext &cc) {
assert(!lits.empty());
dumpLits(lits);
@@ -124,9 +193,7 @@ bytecode_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits, bool make_small,
}
}
u8 engType = 0;
size_t engSize = 0;
shared_ptr<void> eng;
unique_ptr<HWLMProto> proto;
DEBUG_PRINTF("building table with %zu strings\n", lits.size());
@@ -134,39 +201,17 @@ bytecode_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits, bool make_small,
if (isNoodleable(lits, cc)) {
DEBUG_PRINTF("build noodle table\n");
engType = HWLM_ENGINE_NOOD;
const hwlmLiteral &lit = lits.front();
auto noodle = noodBuildTable(lit);
if (noodle) {
engSize = noodle.size();
}
eng = move(noodle);
proto = ue2::make_unique<HWLMProto>(HWLM_ENGINE_NOOD, lits);
} else {
DEBUG_PRINTF("building a new deal\n");
engType = HWLM_ENGINE_FDR;
auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey);
if (fdr) {
engSize = fdr.size();
proto = fdrBuildProto(HWLM_ENGINE_FDR, lits, make_small,
cc.target_info, cc.grey);
if (!proto) {
return nullptr;
}
eng = move(fdr);
}
if (!eng) {
return nullptr;
}
assert(engSize);
if (engSize > cc.grey.limitLiteralMatcherSize) {
throw ResourceLimitError();
}
const size_t hwlm_len = ROUNDUP_CL(sizeof(HWLM)) + engSize;
auto h = make_zeroed_bytecode_ptr<HWLM>(hwlm_len, 64);
h->type = engType;
memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
return h;
return proto;
}
size_t hwlmSize(const HWLM *h) {

View File

@@ -34,9 +34,11 @@
#define HWLM_BUILD_H
#include "hwlm.h"
#include "hwlm_literal.h"
#include "ue2common.h"
#include "util/bytecode_ptr.h"
#include <map>
#include <memory>
#include <vector>
@@ -44,15 +46,62 @@ struct HWLM;
namespace ue2 {
class FDREngineDescription;
class TeddyEngineDescription;
struct CompileContext;
struct Grey;
struct hwlmLiteral;
/** \brief Class representing a literal matcher prototype. */
struct HWLMProto {
/**
* \brief Engine type to distinguish noodle from FDR and Teddy.
*/
u8 engType;
/**
* \brief FDR engine description.
*/
std::unique_ptr<FDREngineDescription> fdrEng;
/**
* \brief Teddy engine description.
*/
std::unique_ptr<TeddyEngineDescription> teddyEng;
/**
* \brief HWLM literals passed from Rose.
*/
std::vector<hwlmLiteral> lits;
/**
* \brief Bucket assignment info in FDR and Teddy
*/
std::map<u32, std::vector<u32>> bucketToLits;
/**
* \brief Flag to optimise matcher for small size from Rose.
*/
bool make_small;
HWLMProto(u8 engType_in, std::vector<hwlmLiteral> lits_in);
HWLMProto(u8 engType_in, std::unique_ptr<FDREngineDescription> eng_in,
std::vector<hwlmLiteral> lits_in,
std::map<u32, std::vector<u32>> bucketToLits_in,
bool make_small_in);
HWLMProto(u8 engType_in, std::unique_ptr<TeddyEngineDescription> eng_in,
std::vector<hwlmLiteral> lits_in,
std::map<u32, std::vector<u32>> bucketToLits_in,
bool make_small_in);
~HWLMProto();
};
/** \brief Build an \ref HWLM literal matcher runtime structure for a group of
* literals.
*
* \param lits The group of literals.
* \param make_small Optimise matcher for small size.
* \param proto Literal matcher prototype.
* \param cc Compile context.
* \param expected_groups FIXME: document me!
*
@@ -60,10 +109,13 @@ struct hwlmLiteral;
* may result in a nullptr return value, or a std::bad_alloc exception being
* thrown.
*/
bytecode_ptr<HWLM> hwlmBuild(const std::vector<hwlmLiteral> &lits,
bool make_small, const CompileContext &cc,
bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc,
hwlm_group_t expected_groups = HWLM_ALL_GROUPS);
std::unique_ptr<HWLMProto>
hwlmBuildProto(std::vector<hwlmLiteral> &lits, bool make_small,
const CompileContext &cc);
/**
* Returns an estimate of the number of repeated characters on the end of a
* literal that will make a literal set of size \a numLiterals suffer

View File

@@ -45,6 +45,8 @@ namespace ue2 {
/** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
#define HWLM_MASKLEN 8
#define INVALID_LIT_ID ~0U
/** \brief Class representing a literal, fed to \ref hwlmBuild. */
struct hwlmLiteral {
std::string s; //!< \brief The literal itself.
@@ -64,6 +66,21 @@ struct hwlmLiteral {
* can be quashed by the literal matcher. */
bool noruns;
/** \brief included literal id. */
u32 included_id = INVALID_LIT_ID;
/** \brief Squash mask for FDR's confirm mask for included literals.
*
* In FDR confirm, if we have included literal in another bucket,
* we can use this mask to squash the bit for the bucket in FDR confirm
* mask and then run programs of included literal directly and avoid
* confirm work.
*
* This value is calculated in FDR compile code once bucket assignment is
* completed
*/
u8 squash = 0;
/** \brief Set of groups that literal belongs to.
*
* Use \ref HWLM_ALL_GROUPS for a literal that could match regardless of