ue-3145: make parents of included literals exclusive

This commit is contained in:
Wang, Xiang W 2017-07-20 16:40:54 -04:00 committed by Matthew Barr
parent 86c5f7feb1
commit 252eb820c4
4 changed files with 36 additions and 31 deletions

View File

@ -609,16 +609,18 @@ bool includedCheck(const hwlmLiteral &lit1, const hwlmLiteral &lit2) {
} }
/* /*
* if lit2 is an included literal of both lit1 and lit0, and lit1 is an * if lit2 is an included literal of both lit0 and lit1, then lit0 and lit1
* exceptional literal of lit0 - lit1 sometimes matches when lit0 matches, * shouldn't match at the same offset, otherwise we give up squashing for lit1.
* then we give up squashing for lit1. e.g. lit0:AAA(no case), lit1:aa, * e.g. lit0:AAA(no case), lit1:aa, lit2:A(no case). We can have duplicate
* lit2:A(no case). We can have duplicate matches for input "aaa" if lit0 * matches for input "aaa" if lit0 and lit1 both squash lit2.
* and lit1 both squash lit2.
*/ */
static static
bool checkParentLit( bool checkParentLit(
u32 pos1, const unordered_set<u32> &parent_map, const vector<hwlmLiteral> &lits, u32 pos1,
const unordered_set<u32> &parent_map,
const unordered_map<u32, unordered_set<u32>> &exception_map) { const unordered_map<u32, unordered_set<u32>> &exception_map) {
assert(pos1 < lits.size());
const auto &lit1 = lits[pos1];
for (const auto pos2 : parent_map) { for (const auto pos2 : parent_map) {
if (contains(exception_map, pos2)) { if (contains(exception_map, pos2)) {
const auto &exception_pos = exception_map.at(pos2); const auto &exception_pos = exception_map.at(pos2);
@ -626,6 +628,16 @@ bool checkParentLit(
return false; return false;
} }
} }
/* if lit1 isn't an exception of lit2, then we have to do further
* exclusive check.
* TODO: More mask checks. Note if two literals are group exclusive,
* it is possible that they match at the same offset. */
assert(pos2 < lits.size());
const auto &lit2 = lits[pos2];
if (isSuffix(lit2, lit1)) {
return false;
}
} }
return true; return true;
@ -652,30 +664,26 @@ void buildSquashMask(vector<hwlmLiteral> &lits, u32 id1, u32 bucket1,
// check if lit2 is a suffix of lit1 // check if lit2 is a suffix of lit1
if (isSuffix(lit1, lit2)) { if (isSuffix(lit1, lit2)) {
/* if we have a included literal in the same bucket, /* if we have a included literal in the same bucket,
* quit and let the included literal to do possible squashing * quit and let the included literal to do possible squashing */
*/
if (bucket1 == bucket2) { if (bucket1 == bucket2) {
DEBUG_PRINTF("same bucket\n"); DEBUG_PRINTF("same bucket\n");
return; return;
} }
/* /* if lit2 is a suffix but doesn't pass included checks for
* if lit2 is a suffix but doesn't pass included checks for * extra info, we give up sqaushing */
* extra info, we give up sqaushing
*/
if (includedCheck(lit1, lit2)) { if (includedCheck(lit1, lit2)) {
DEBUG_PRINTF("find exceptional suffix %u\n", lit2.id); DEBUG_PRINTF("find exceptional suffix %u\n", lit2.id);
exception_map[id1].insert(id2); exception_map[id1].insert(id2);
exception = true; exception = true;
} else if (checkParentLit(id1, parent_map[id2], exception_map)) { } else if (checkParentLit(lits, id1, parent_map[id2],
exception_map)) {
if (lit1.included_id == INVALID_LIT_ID) { if (lit1.included_id == INVALID_LIT_ID) {
DEBUG_PRINTF("find suffix lit1 %u lit2 %u\n", DEBUG_PRINTF("find suffix lit1 %u lit2 %u\n",
lit1.id, lit2.id); lit1.id, lit2.id);
lit1.included_id = lit2.id; lit1.included_id = lit2.id;
} else { } else {
/* /* if we have multiple included literals in one bucket,
* if we have multiple included literals in one bucket, * give up squashing. */
* give up squashing.
*/
DEBUG_PRINTF("multiple included literals\n"); DEBUG_PRINTF("multiple included literals\n");
lit1.included_id = INVALID_LIT_ID; lit1.included_id = INVALID_LIT_ID;
return; return;
@ -690,10 +698,8 @@ void buildSquashMask(vector<hwlmLiteral> &lits, u32 id1, u32 bucket1,
if (bucket2 != nextBucket) { if (bucket2 != nextBucket) {
if (included) { if (included) {
if (exception) { if (exception) {
/* /* give up if we have exception literals
* give up if we have exception literals * in the same bucket as the included literal. */
* in the same bucket as the included literal
*/
lit1.included_id = INVALID_LIT_ID; lit1.included_id = INVALID_LIT_ID;
} else { } else {
parent_map[child_id].insert(id1); parent_map[child_id].insert(id1);
@ -714,14 +720,12 @@ static constexpr u32 INCLUDED_LIMIT = 1000;
static static
void findIncludedLits(vector<hwlmLiteral> &lits, void findIncludedLits(vector<hwlmLiteral> &lits,
const vector<vector<pair<u32, u32>>> &lastCharMap) { const vector<vector<pair<u32, u32>>> &lastCharMap) {
/** Map for finding the positions of literal which includes a literal /* Map for finding the positions of literal which includes a literal
* in FDR hwlm literal vector. * in FDR hwlm literal vector. */
*/
unordered_map<u32, unordered_set<u32>> parent_map; unordered_map<u32, unordered_set<u32>> parent_map;
/** Map for finding the positions of exception literals which could /* Map for finding the positions of exception literals which could
* sometimes match if a literal matches in FDR hwlm literal vector. * sometimes match if a literal matches in FDR hwlm literal vector. */
*/
unordered_map<u32, unordered_set<u32>> exception_map; unordered_map<u32, unordered_set<u32>> exception_map;
for (const auto &group : lastCharMap) { for (const auto &group : lastCharMap) {
size_t cnt = group.size(); size_t cnt = group.size();

View File

@ -44,10 +44,10 @@ struct FDR;
namespace ue2 { namespace ue2 {
class TeddyEngineDescription;
struct Grey; struct Grey;
struct hwlmLiteral; struct hwlmLiteral;
struct target_t; struct target_t;
struct TeddyEngineDescription;
bytecode_ptr<FDR> teddyBuildTable(const HWLMProto &proto, const Grey &grey); bytecode_ptr<FDR> teddyBuildTable(const HWLMProto &proto, const Grey &grey);

View File

@ -2581,8 +2581,9 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
pc = getByOffset(t, ri->child_offset); pc = getByOffset(t, ri->child_offset);
pc_base = pc; pc_base = pc;
programOffset = (const u8 *)pc_base -(const u8 *)t; programOffset = (const u8 *)pc_base -(const u8 *)t;
DEBUG_PRINTF("pc_base %p pc %p child_offset %u\n", DEBUG_PRINTF("pc_base %p pc %p child_offset %u squash %u\n",
pc_base, pc, ri->child_offset); pc_base, pc, ri->child_offset, ri->squash);
work_done = 0;
continue; continue;
} }
} }

View File

@ -1465,7 +1465,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
PROGRAM_CASE(INCLUDED_JUMP) { PROGRAM_CASE(INCLUDED_JUMP) {
os << " child_offset " << ri->child_offset << endl; os << " child_offset " << ri->child_offset << endl;
os << " squash " << ri->squash << endl; os << " squash " << (u32)ri->squash << endl;
} }
PROGRAM_NEXT_INSTRUCTION PROGRAM_NEXT_INSTRUCTION