Rose: use program for boundary reports

Use the program to handle report lists at boundaries, rather than the
previous list-of-reports approach.
This commit is contained in:
Justin Viiret 2016-03-03 15:36:10 +11:00 committed by Matthew Barr
parent 24ccefa3d3
commit 93a262e54c
5 changed files with 112 additions and 142 deletions

View File

@ -573,3 +573,39 @@ hwlmcb_rv_t rosePureLiteralCallback(size_t start, size_t end, u32 id,
assert(id < rose->literalCount);
return roseRunProgram(rose, scratch, programs[id], real_end, match_len, 0);
}
/**
* \brief Execute a boundary report program.
*
* Returns MO_HALT_MATCHING if the stream is exhausted or the user has
* instructed us to halt, or MO_CONTINUE_MATCHING otherwise.
*/
int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
u64a stream_offset, struct hs_scratch *scratch) {
DEBUG_PRINTF("running boundary program at offset %u\n", program);
if (can_stop_matching(scratch)) {
DEBUG_PRINTF("can stop matching\n");
return MO_HALT_MATCHING;
}
if (rose->hasSom && scratch->deduper.current_report_offset == ~0ULL) {
/* we cannot delay the initialization of the som deduper logs any longer
* as we are reporting matches. This is done explicitly as we are
* shortcutting the som handling in the vacuous repeats as we know they
* all come from non-som patterns. */
fatbit_clear(scratch->deduper.som_log[0]);
fatbit_clear(scratch->deduper.som_log[1]);
scratch->deduper.som_log_dirty = 0;
}
const size_t match_len = 0;
const char in_anchored = 0;
hwlmcb_rv_t rv = roseRunProgram(rose, scratch, program, stream_offset,
match_len, in_anchored);
if (rv == HWLM_TERMINATE_MATCHING) {
return MO_HALT_MATCHING;
}
return MO_CONTINUE_MATCHING;
}

View File

@ -131,4 +131,7 @@ void roseEodExec(const struct RoseEngine *t, u64a offset,
hwlmcb_rv_t rosePureLiteralCallback(size_t start, size_t end, u32 id,
void *context);
int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
u64a stream_offset, struct hs_scratch *scratch);
#endif // ROSE_H

View File

@ -1941,70 +1941,6 @@ struct DerivedBoundaryReports {
set<ReportID> report_at_0_eod_full;
};
static
void reserveBoundaryReports(const BoundaryReports &boundary,
const DerivedBoundaryReports &dboundary,
RoseBoundaryReports *out, u32 *currOffset) {
u32 curr = *currOffset;
curr = ROUNDUP_N(curr, alignof(ReportID));
memset(out, 0, sizeof(*out));
/* report lists are + 1 in size due to terminator */
if (!boundary.report_at_eod.empty()) {
out->reportEodOffset = curr;
curr += sizeof(ReportID) * (boundary.report_at_eod.size() + 1);
}
if (!boundary.report_at_0.empty()) {
out->reportZeroOffset = curr;
curr += sizeof(ReportID) * (boundary.report_at_0.size() + 1);
}
if (!dboundary.report_at_0_eod_full.empty()) {
out->reportZeroEodOffset = curr;
curr += sizeof(ReportID) * (dboundary.report_at_0_eod_full.size() + 1);
}
DEBUG_PRINTF("report ^: %zu\n", boundary.report_at_0.size());
DEBUG_PRINTF("report $: %zu\n", boundary.report_at_eod.size());
DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size());
*currOffset = curr;
}
static
void fillInBoundaryReports(RoseEngine *engine, u32 offset,
const set<ReportID> &rl) {
if (rl.empty()) {
return;
}
u32 *out = (u32 *)((char *)engine + offset);
assert(ISALIGNED(out));
for (ReportID r : rl) {
*out = r;
++out;
}
*out = MO_INVALID_IDX;
}
static
void populateBoundaryReports(RoseEngine *engine,
const BoundaryReports &boundary,
const DerivedBoundaryReports &dboundary,
const RoseBoundaryReports &offsets) {
engine->boundary.reportEodOffset = offsets.reportEodOffset;
engine->boundary.reportZeroOffset = offsets.reportZeroOffset;
engine->boundary.reportZeroEodOffset = offsets.reportZeroEodOffset;
fillInBoundaryReports(engine, offsets.reportEodOffset,
boundary.report_at_eod);
fillInBoundaryReports(engine, offsets.reportZeroOffset,
boundary.report_at_0);
fillInBoundaryReports(engine, offsets.reportZeroEodOffset,
dboundary.report_at_0_eod_full);
}
static
void fillInReportInfo(RoseEngine *engine, u32 reportOffset,
const ReportManager &rm, const vector<Report> &reports) {
@ -2917,6 +2853,43 @@ vector<RoseInstruction> makeProgram(RoseBuildImpl &build, build_context &bc,
return program;
}
static
u32 writeBoundaryProgram(RoseBuildImpl &build, build_context &bc,
const set<ReportID> &reports) {
if (reports.empty()) {
return 0;
}
const bool has_som = false;
vector<RoseInstruction> program;
for (const auto &id : reports) {
makeReport(build, bc, id, has_som, program);
}
return writeProgram(bc, flattenProgram({program}));
}
static
RoseBoundaryReports
makeBoundaryPrograms(RoseBuildImpl &build, build_context &bc,
const BoundaryReports &boundary,
const DerivedBoundaryReports &dboundary) {
RoseBoundaryReports out;
memset(&out, 0, sizeof(out));
DEBUG_PRINTF("report ^: %zu\n", boundary.report_at_0.size());
DEBUG_PRINTF("report $: %zu\n", boundary.report_at_eod.size());
DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size());
out.reportEodOffset =
writeBoundaryProgram(build, bc, boundary.report_at_eod);
out.reportZeroOffset =
writeBoundaryProgram(build, bc, boundary.report_at_0);
out.reportZeroEodOffset =
writeBoundaryProgram(build, bc, dboundary.report_at_0_eod_full);
return out;
}
static
void assignStateIndices(const RoseBuildImpl &build, build_context &bc) {
const auto &g = build.g;
@ -3788,6 +3761,8 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
bc.resources.has_anchored = true;
}
auto boundary_out = makeBoundaryPrograms(*this, bc, boundary, dboundary);
// Build NFAs
set<u32> no_retrigger_queues;
bool mpv_as_outfix;
@ -3928,9 +3903,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
u32 activeLeftCount = leftInfoTable.size();
u32 rosePrefixCount = countRosePrefixes(leftInfoTable);
RoseBoundaryReports boundary_out;
reserveBoundaryReports(boundary, dboundary, &boundary_out, &currOffset);
u32 rev_nfa_table_offset;
vector<u32> rev_nfa_offsets;
prepSomRevNfas(ssm, &rev_nfa_table_offset, &rev_nfa_offsets, &currOffset);
@ -4098,7 +4070,10 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
engine->asize = verify_u32(asize);
engine->ematcherRegionSize = ematcher_region_size;
engine->floatingStreamState = verify_u32(floatingStreamStateRequired);
populateBoundaryReports(engine.get(), boundary, dboundary, boundary_out);
engine->boundary.reportEodOffset = boundary_out.reportEodOffset;
engine->boundary.reportZeroOffset = boundary_out.reportZeroOffset;
engine->boundary.reportZeroEodOffset = boundary_out.reportZeroEodOffset;
write_out(&engine->state_init, (char *)engine.get(), state_scatter,
state_scatter_aux_offset);

View File

@ -235,14 +235,17 @@ struct RoseStateOffsets {
};
struct RoseBoundaryReports {
u32 reportEodOffset; /**< 0 if no reports list, otherwise offset of
* MO_INVALID_IDX terminated list to report at EOD */
u32 reportZeroOffset; /**< 0 if no reports list, otherwise offset of
* MO_INVALID_IDX terminated list to report at offset
* 0 */
u32 reportZeroEodOffset; /**< 0 if no reports list, otherwise offset of
* MO_INVALID_IDX terminated list to report if eod
* is at offset 0. Superset of other lists. */
/** \brief 0 if no reports list, otherwise offset of program to run to
* deliver reports at EOD. */
u32 reportEodOffset;
/** \brief 0 if no reports list, otherwise offset of program to run to
* deliver reports at offset 0. */
u32 reportZeroOffset;
/** \brief 0 if no reports list, otherwise offset of program to run to
* deliver reports if EOD is at offset 0. Superset of other programs. */
u32 reportZeroEodOffset;
};
/* NFA Queue Assignment

View File

@ -260,57 +260,6 @@ SomNfaCallback selectOutfixSomAdaptor(const struct RoseEngine *rose) {
return is_simple ? outfixSimpleSomSomAdaptor : outfixSomSomAdaptor;
}
/**
* \brief Fire callbacks for a boundary report list.
*
* Returns MO_HALT_MATCHING if the user has instructed us to halt, and
* MO_CONTINUE_MATCHING otherwise.
*/
static never_inline
int processReportList(const struct RoseEngine *rose, u32 base_offset,
u64a stream_offset, hs_scratch_t *scratch) {
DEBUG_PRINTF("running report list at offset %u\n", base_offset);
if (told_to_stop_matching(scratch)) {
DEBUG_PRINTF("matching has been terminated\n");
return MO_HALT_MATCHING;
}
if (rose->hasSom && scratch->deduper.current_report_offset == ~0ULL) {
/* we cannot delay the initialization of the som deduper logs any longer
* as we are reporting matches. This is done explicitly as we are
* shortcutting the som handling in the vacuous repeats as we know they
* all come from non-som patterns. */
fatbit_clear(scratch->deduper.som_log[0]);
fatbit_clear(scratch->deduper.som_log[1]);
scratch->deduper.som_log_dirty = 0;
}
const ReportID *report = getByOffset(rose, base_offset);
/* never required to do som as vacuous reports are always external */
if (rose->simpleCallback) {
for (; *report != MO_INVALID_IDX; report++) {
int rv = roseSimpleAdaptor(stream_offset, *report, scratch);
if (rv == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
}
} else {
for (; *report != MO_INVALID_IDX; report++) {
int rv = roseAdaptor(stream_offset, *report, scratch);
if (rv == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
}
}
return MO_CONTINUE_MATCHING;
}
/** \brief Initialise SOM state. Used in both block and streaming mode. */
static really_inline
void initSomState(const struct RoseEngine *rose, char *state) {
@ -488,15 +437,15 @@ hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length,
if (!length) {
if (rose->boundary.reportZeroEodOffset) {
processReportList(rose, rose->boundary.reportZeroEodOffset, 0,
scratch);
roseRunBoundaryProgram(rose, rose->boundary.reportZeroEodOffset, 0,
scratch);
}
goto set_retval;
}
if (rose->boundary.reportZeroOffset) {
int rv = processReportList(rose, rose->boundary.reportZeroOffset, 0,
scratch);
int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset,
0, scratch);
if (rv == MO_HALT_MATCHING) {
goto set_retval;
}
@ -559,8 +508,8 @@ done_scan:
}
if (rose->boundary.reportEodOffset) {
processReportList(rose, rose->boundary.reportEodOffset, length,
scratch);
roseRunBoundaryProgram(rose, rose->boundary.reportEodOffset, length,
scratch);
}
set_retval:
@ -727,25 +676,28 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
getHistory(state, rose, id->offset),
getHistoryAmount(rose, id->offset), id->offset, status, 0);
// Rose program execution (used for some report paths) depends on these
// values being initialised.
scratch->tctxt.lastMatchOffset = 0;
scratch->tctxt.minMatchOffset = id->offset;
if (rose->somLocationCount) {
loadSomFromStream(scratch, id->offset);
}
if (!id->offset) {
if (rose->boundary.reportZeroEodOffset) {
int rv = processReportList(rose, rose->boundary.reportZeroEodOffset,
0, scratch);
int rv = roseRunBoundaryProgram(
rose, rose->boundary.reportZeroEodOffset, 0, scratch);
if (rv == MO_HALT_MATCHING) {
scratch->core_info.status |= STATUS_TERMINATED;
return;
}
}
} else {
if (rose->boundary.reportEodOffset) {
int rv = processReportList(rose, rose->boundary.reportEodOffset,
id->offset, scratch);
int rv = roseRunBoundaryProgram(
rose, rose->boundary.reportEodOffset, id->offset, scratch);
if (rv == MO_HALT_MATCHING) {
scratch->core_info.status |= STATUS_TERMINATED;
return;
}
}
@ -978,9 +930,10 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
if (!id->offset && rose->boundary.reportZeroOffset) {
DEBUG_PRINTF("zero reports\n");
processReportList(rose, rose->boundary.reportZeroOffset, 0, scratch);
if (unlikely(can_stop_matching(scratch))) {
DEBUG_PRINTF("stream is broken, halting scan\n");
int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset,
0, scratch);
if (rv == MO_HALT_MATCHING) {
DEBUG_PRINTF("halting scan\n");
setStreamStatus(state, scratch->core_info.status);
if (told_to_stop_matching(scratch)) {
return HS_SCAN_TERMINATED;