diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 181f9512..dc91010e 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -609,16 +609,18 @@ bool includedCheck(const hwlmLiteral &lit1, const hwlmLiteral &lit2) { } /* - * if lit2 is an included literal of both lit1 and lit0, and lit1 is an - * exceptional literal of lit0 - lit1 sometimes matches when lit0 matches, - * then we give up squashing for lit1. e.g. lit0:AAA(no case), lit1:aa, - * lit2:A(no case). We can have duplicate matches for input "aaa" if lit0 - * and lit1 both squash lit2. + * if lit2 is an included literal of both lit0 and lit1, then lit0 and lit1 + * shouldn't match at the same offset, otherwise we give up squashing for lit1. + * e.g. lit0:AAA(no case), lit1:aa, lit2:A(no case). We can have duplicate + * matches for input "aaa" if lit0 and lit1 both squash lit2. */ static bool checkParentLit( - u32 pos1, const unordered_set &parent_map, + const vector &lits, u32 pos1, + const unordered_set &parent_map, const unordered_map> &exception_map) { + assert(pos1 < lits.size()); + const auto &lit1 = lits[pos1]; for (const auto pos2 : parent_map) { if (contains(exception_map, pos2)) { const auto &exception_pos = exception_map.at(pos2); @@ -626,6 +628,16 @@ bool checkParentLit( return false; } } + + /* if lit1 isn't an exception of lit2, then we have to do further + * exclusive check. + * TODO: More mask checks. Note if two literals are group exclusive, + * it is possible that they match at the same offset. */ + assert(pos2 < lits.size()); + const auto &lit2 = lits[pos2]; + if (isSuffix(lit2, lit1)) { + return false; + } } return true; @@ -652,30 +664,26 @@ void buildSquashMask(vector &lits, u32 id1, u32 bucket1, // check if lit2 is a suffix of lit1 if (isSuffix(lit1, lit2)) { /* if we have a included literal in the same bucket, - * quit and let the included literal to do possible squashing - */ + * quit and let the included literal to do possible squashing */ if (bucket1 == bucket2) { DEBUG_PRINTF("same bucket\n"); return; } - /* - * if lit2 is a suffix but doesn't pass included checks for - * extra info, we give up sqaushing - */ + /* if lit2 is a suffix but doesn't pass included checks for + * extra info, we give up sqaushing */ if (includedCheck(lit1, lit2)) { DEBUG_PRINTF("find exceptional suffix %u\n", lit2.id); exception_map[id1].insert(id2); exception = true; - } else if (checkParentLit(id1, parent_map[id2], exception_map)) { + } else if (checkParentLit(lits, id1, parent_map[id2], + exception_map)) { if (lit1.included_id == INVALID_LIT_ID) { DEBUG_PRINTF("find suffix lit1 %u lit2 %u\n", lit1.id, lit2.id); lit1.included_id = lit2.id; } else { - /* - * if we have multiple included literals in one bucket, - * give up squashing. - */ + /* if we have multiple included literals in one bucket, + * give up squashing. */ DEBUG_PRINTF("multiple included literals\n"); lit1.included_id = INVALID_LIT_ID; return; @@ -690,10 +698,8 @@ void buildSquashMask(vector &lits, u32 id1, u32 bucket1, if (bucket2 != nextBucket) { if (included) { if (exception) { - /* - * give up if we have exception literals - * in the same bucket as the included literal - */ + /* give up if we have exception literals + * in the same bucket as the included literal. */ lit1.included_id = INVALID_LIT_ID; } else { parent_map[child_id].insert(id1); @@ -714,14 +720,12 @@ static constexpr u32 INCLUDED_LIMIT = 1000; static void findIncludedLits(vector &lits, const vector>> &lastCharMap) { - /** Map for finding the positions of literal which includes a literal - * in FDR hwlm literal vector. - */ + /* Map for finding the positions of literal which includes a literal + * in FDR hwlm literal vector. */ unordered_map> parent_map; - /** Map for finding the positions of exception literals which could - * sometimes match if a literal matches in FDR hwlm literal vector. - */ + /* Map for finding the positions of exception literals which could + * sometimes match if a literal matches in FDR hwlm literal vector. */ unordered_map> exception_map; for (const auto &group : lastCharMap) { size_t cnt = group.size(); diff --git a/src/fdr/teddy_compile.h b/src/fdr/teddy_compile.h index ec251310..a2b4a13c 100644 --- a/src/fdr/teddy_compile.h +++ b/src/fdr/teddy_compile.h @@ -44,10 +44,10 @@ struct FDR; namespace ue2 { +class TeddyEngineDescription; struct Grey; struct hwlmLiteral; struct target_t; -struct TeddyEngineDescription; bytecode_ptr teddyBuildTable(const HWLMProto &proto, const Grey &grey); diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index ab0934de..e6ce9bdb 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -2581,8 +2581,9 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, pc = getByOffset(t, ri->child_offset); pc_base = pc; programOffset = (const u8 *)pc_base -(const u8 *)t; - DEBUG_PRINTF("pc_base %p pc %p child_offset %u\n", - pc_base, pc, ri->child_offset); + DEBUG_PRINTF("pc_base %p pc %p child_offset %u squash %u\n", + pc_base, pc, ri->child_offset, ri->squash); + work_done = 0; continue; } } diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index e98308ac..5ab9fc99 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1465,7 +1465,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_CASE(INCLUDED_JUMP) { os << " child_offset " << ri->child_offset << endl; - os << " squash " << ri->squash << endl; + os << " squash " << (u32)ri->squash << endl; } PROGRAM_NEXT_INSTRUCTION