diff --git a/src/rose/catchup.c b/src/rose/catchup.c index d1ef41ff..6893df0e 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -105,13 +105,13 @@ void nextAnchoredMatch(const struct RoseEngine *t, struct RoseContext *tctxt, assert(tctxt->curr_anchored_loc != MMB_INVALID); struct hs_scratch *scratch = tctxtToScratch(tctxt); - u8 **anchoredRows = getAnchoredLog(scratch); + struct fatbit **anchoredRows = getAnchoredLog(scratch); u32 region_width = t->anchoredMatches; - u8 *curr_row = anchoredRows[tctxt->curr_anchored_loc]; + struct fatbit *curr_row = anchoredRows[tctxt->curr_anchored_loc]; - tctxt->curr_row_offset = mmbit_iterate(curr_row, region_width, - tctxt->curr_row_offset); + tctxt->curr_row_offset = fatbit_iterate(curr_row, region_width, + tctxt->curr_row_offset); DEBUG_PRINTF("next %u [idx = %u] @%llu\n", *reportId, tctxt->curr_row_offset, *end); if (tctxt->curr_row_offset != MMB_INVALID) { @@ -132,8 +132,8 @@ void nextAnchoredMatch(const struct RoseEngine *t, struct RoseContext *tctxt, assert(tctxt->curr_anchored_loc < scratch->anchored_region_len); curr_row = anchoredRows[tctxt->curr_anchored_loc]; - tctxt->curr_row_offset = mmbit_iterate(curr_row, region_width, - MMB_INVALID); + tctxt->curr_row_offset = fatbit_iterate(curr_row, region_width, + MMB_INVALID); assert(tctxt->curr_row_offset != MMB_INVALID); *end = tctxt->curr_anchored_loc + t->maxSafeAnchoredDROffset + 1; diff --git a/src/rose/match.c b/src/rose/match.c index f614423b..6397b90e 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -125,7 +125,7 @@ void recordAnchoredMatch(struct RoseContext *tctxt, ReportID reportId, u64a end) { struct hs_scratch *scratch = tctxtToScratch(tctxt); const struct RoseEngine *t = scratch->core_info.rose; - u8 **anchoredRows = getAnchoredLog(scratch); + struct fatbit **anchoredRows = getAnchoredLog(scratch); DEBUG_PRINTF("record %u @ %llu\n", reportId, end); assert(end - t->maxSafeAnchoredDROffset >= 1); @@ -135,13 +135,13 @@ void recordAnchoredMatch(struct RoseContext *tctxt, ReportID reportId, if (!bf64_set(&scratch->am_log_sum, adj_end)) { // first time, clear row - mmbit_clear(anchoredRows[adj_end], t->anchoredMatches); + fatbit_clear(anchoredRows[adj_end]); } u32 idx = getAnchoredInverseMap(t)[reportId]; DEBUG_PRINTF("record %u @ %llu index %u\n", reportId, end, idx); assert(idx < t->anchoredMatches); - mmbit_set(anchoredRows[adj_end], t->anchoredMatches, idx); + fatbit_set(anchoredRows[adj_end], t->anchoredMatches, idx); } static rose_inline @@ -150,21 +150,21 @@ void recordAnchoredLiteralMatch(struct RoseContext *tctxt, u32 literal_id, assert(end); struct hs_scratch *scratch = tctxtToScratch(tctxt); const struct RoseEngine *t = scratch->core_info.rose; - u8 **anchoredLiteralRows = getAnchoredLiteralLog(scratch); + struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch); DEBUG_PRINTF("record %u @ %llu\n", literal_id, end); if (!bf64_set(&scratch->al_log_sum, end - 1)) { // first time, clear row DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count); - mmbit_clear(anchoredLiteralRows[end - 1], t->anchored_count); + fatbit_clear(anchoredLiteralRows[end - 1]); } u32 rel_idx = literal_id - t->anchored_base_id; DEBUG_PRINTF("record %u @ %llu index %u/%u\n", literal_id, end, rel_idx, t->anchored_count); assert(rel_idx < t->anchored_count); - mmbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx); + fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx); } hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r, @@ -447,11 +447,11 @@ hwlmcb_rv_t roseProcessMainMatch(const struct RoseEngine *t, u64a end, static rose_inline hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, struct RoseContext *tctxt, - const u8 *delaySlotBase, size_t delaySlotSize, - u32 vicIndex, u64a offset) { + struct fatbit **delaySlots, u32 vicIndex, + u64a offset) { /* assert(!tctxt->in_anchored); */ assert(vicIndex < DELAY_SLOT_COUNT); - const u8 *vicSlot = delaySlotBase + delaySlotSize * vicIndex; + const struct fatbit *vicSlot = delaySlots[vicIndex]; u32 delay_count = t->delay_count; if (offset < t->floatingMinLiteralMatchOffset) { @@ -463,8 +463,8 @@ hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, struct RoseContext *tctxt, roseFlushLastByteHistory(t, scratch->core_info.state, offset, tctxt); tctxt->lastEndOffset = offset; - for (u32 it = mmbit_iterate(vicSlot, delay_count, MMB_INVALID); - it != MMB_INVALID; it = mmbit_iterate(vicSlot, delay_count, it)) { + for (u32 it = fatbit_iterate(vicSlot, delay_count, MMB_INVALID); + it != MMB_INVALID; it = fatbit_iterate(vicSlot, delay_count, it)) { u32 literal_id = t->delay_base_id + it; UNUSED rose_group old_groups = tctxt->groups; @@ -490,12 +490,13 @@ hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, struct RoseContext *tctxt, static really_inline hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t, struct RoseContext *tctxt, u32 curr_loc) { - u8 *curr_row = getAnchoredLiteralLog(tctxtToScratch(tctxt))[curr_loc - 1]; + struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct fatbit *curr_row = getAnchoredLiteralLog(scratch)[curr_loc - 1]; u32 region_width = t->anchored_count; DEBUG_PRINTF("report matches at curr loc\n"); - for (u32 it = mmbit_iterate(curr_row, region_width, MMB_INVALID); - it != MMB_INVALID; it = mmbit_iterate(curr_row, region_width, it)) { + for (u32 it = fatbit_iterate(curr_row, region_width, MMB_INVALID); + it != MMB_INVALID; it = fatbit_iterate(curr_row, region_width, it)) { DEBUG_PRINTF("it = %u/%u\n", it, region_width); u32 literal_id = t->anchored_base_id + it; @@ -519,7 +520,6 @@ hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t, } /* clear row; does not invalidate iteration */ - struct hs_scratch *scratch = tctxtToScratch(tctxt); bf64_unset(&scratch->al_log_sum, curr_loc - 1); return HWLM_CONTINUE_MATCHING; @@ -566,7 +566,7 @@ hwlmcb_rv_t flushAnchoredLiterals(const struct RoseEngine *t, static really_inline hwlmcb_rv_t playVictims(const struct RoseEngine *t, struct RoseContext *tctxt, u32 *anchored_it, u64a lastEnd, u64a victimDelaySlots, - u8 *delaySlotBase, size_t delaySlotSize) { + struct fatbit **delaySlots) { /* assert (!tctxt->in_anchored); */ while (victimDelaySlots) { @@ -579,9 +579,8 @@ hwlmcb_rv_t playVictims(const struct RoseEngine *t, struct RoseContext *tctxt, return HWLM_TERMINATE_MATCHING; } - if (playDelaySlot(t, tctxt, delaySlotBase, delaySlotSize, - vic % DELAY_SLOT_COUNT, vicOffset) - == HWLM_TERMINATE_MATCHING) { + if (playDelaySlot(t, tctxt, delaySlots, vic % DELAY_SLOT_COUNT, + vicOffset) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } } @@ -609,8 +608,7 @@ hwlmcb_rv_t flushQueuedLiterals_i(struct RoseContext *tctxt, u64a currEnd) { } { - u8 *delaySlotBase = getDelaySlots(scratch); - size_t delaySlotSize = t->delay_slot_size; + struct fatbit **delaySlots = getDelaySlots(tctxtToScratch(tctxt)); u32 lastIndex = lastEnd & DELAY_MASK; u32 currIndex = currEnd & DELAY_MASK; @@ -664,8 +662,7 @@ hwlmcb_rv_t flushQueuedLiterals_i(struct RoseContext *tctxt, u64a currEnd) { } if (playVictims(t, tctxt, &anchored_it, lastEnd, victimDelaySlots, - delaySlotBase, delaySlotSize) - == HWLM_TERMINATE_MATCHING) { + delaySlots) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } } diff --git a/src/rose/match.h b/src/rose/match.h index f3b8fe73..2b6dfb5d 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -40,6 +40,7 @@ #include "nfa/nfa_api_util.h" #include "som/som_runtime.h" #include "util/bitutils.h" +#include "util/fatbit.h" #include "util/internal_report.h" #include "util/multibit.h" @@ -60,16 +61,16 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx); static rose_inline void resetAnchoredLog(const struct RoseEngine *t, struct hs_scratch *scratch) { - u8 **anchoredRows = getAnchoredLog(scratch); + struct fatbit **anchoredRows = getAnchoredLog(scratch); u32 region_width = t->anchoredMatches; struct RoseContext *tctxt = &scratch->tctxt; tctxt->curr_anchored_loc = bf64_iterate(scratch->am_log_sum, MMB_INVALID); if (tctxt->curr_anchored_loc != MMB_INVALID) { assert(tctxt->curr_anchored_loc < scratch->anchored_region_len); - u8 *curr_row = anchoredRows[tctxt->curr_anchored_loc]; - tctxt->curr_row_offset = mmbit_iterate(curr_row, region_width, - MMB_INVALID); + struct fatbit *curr_row = anchoredRows[tctxt->curr_anchored_loc]; + tctxt->curr_row_offset = fatbit_iterate(curr_row, region_width, + MMB_INVALID); assert(tctxt->curr_row_offset != MMB_INVALID); } DEBUG_PRINTF("AL reset --> %u, %u\n", tctxt->curr_anchored_loc, diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index e8e60c7f..309fee5b 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -127,16 +127,16 @@ void rosePushDelayedMatch(const struct RoseEngine *t, u32 delay, } const u32 delay_count = t->delay_count; - u8 *slot = getDelaySlots(tctxtToScratch(tctxt)) + - (t->delay_slot_size * slot_index); + struct fatbit **delaySlots = getDelaySlots(tctxtToScratch(tctxt)); + struct fatbit *slot = delaySlots[slot_index]; DEBUG_PRINTF("pushing tab %u into slot %u\n", delay_index, slot_index); if (!(tctxt->filledDelayedSlots & (1U << slot_index))) { tctxt->filledDelayedSlots |= 1U << slot_index; - mmbit_clear(slot, delay_count); + fatbit_clear(slot); } - mmbit_set(slot, delay_count, delay_index); + fatbit_set(slot, delay_count, delay_index); } static rose_inline diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 45af3bb7..c640f091 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4311,7 +4311,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 delay_count = verify_u32(final_id_to_literal.size() - delay_base_id); engine->delay_count = delay_count; - engine->delay_slot_size = mmbit_size(delay_count); engine->delay_base_id = delay_base_id; engine->anchored_base_id = anchored_base_id; engine->anchored_count = delay_base_id - anchored_base_id; diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index cd70c734..25ec7bae 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -884,7 +884,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, size); DUMP_U32(t, anchoredMatches); DUMP_U32(t, delay_count); - DUMP_U32(t, delay_slot_size); DUMP_U32(t, delay_base_id); DUMP_U32(t, anchored_count); DUMP_U32(t, anchored_base_id); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index c9025600..a1f91cd3 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -447,7 +447,6 @@ struct RoseEngine { u32 size; // (bytes) u32 anchoredMatches; /* number of anchored roles generating matches */ u32 delay_count; /* number of delayed literal ids. */ - u32 delay_slot_size; /* size of delay slot mmbit. */ u32 delay_base_id; /* literal id of the first delayed literal. * delayed literal ids are contiguous */ u32 anchored_count; /* number of anchored literal ids */ diff --git a/src/scratch.c b/src/scratch.c index 30241ab4..eff2289a 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -74,14 +74,16 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { assert(anchored_literal_region_len < 8 * sizeof(s->am_log_sum)); size_t anchored_region_size = anchored_region_len - * (mmbit_size(anchored_region_width) + sizeof(u8 *)); + * (fatbit_size(anchored_region_width) + sizeof(struct fatbit *)); anchored_region_size = ROUNDUP_N(anchored_region_size, 8); size_t anchored_literal_region_size = anchored_literal_region_len - * (mmbit_size(anchored_literal_region_width) + sizeof(u8 *)); + * (fatbit_size(anchored_literal_region_width) + sizeof(struct fatbit *)); anchored_literal_region_size = ROUNDUP_N(anchored_literal_region_size, 8); - size_t delay_size = mmbit_size(proto->delay_count) * DELAY_SLOT_COUNT; + size_t delay_region_size = DELAY_SLOT_COUNT * + (fatbit_size(proto->delay_count) + sizeof(struct fatbit *)); + delay_region_size = ROUNDUP_N(delay_region_size, 8); size_t nfa_context_size = 2 * sizeof(struct NFAContext512) + 127; @@ -96,7 +98,8 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { + 2 * fatbit_size(deduperCount) /* ditto som logs */ + 2 * sizeof(u64a) * deduperCount /* start offsets for som */ + anchored_region_size - + anchored_literal_region_size + qmpq_size + delay_size + + anchored_literal_region_size + qmpq_size + + delay_region_size + som_store_size + som_now_size + som_attempted_size @@ -140,23 +143,28 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { s->som_attempted_store = (u64a *)current; current += som_attempted_store_size; - s->delay_slots = (u8 *)current; - current += delay_size; - current = ROUNDUP_PTR(current, 8); - s->am_log = (u8 **)current; - current += sizeof(u8 *) * anchored_region_len; - for (u32 i = 0; i < anchored_region_len; i++) { - s->am_log[i] = (u8 *)current; - current += mmbit_size(anchored_region_width); + s->delay_slots = (struct fatbit **)current; + current += sizeof(struct fatbit *) * DELAY_SLOT_COUNT; + for (u32 i = 0; i < DELAY_SLOT_COUNT; i++) { + s->delay_slots[i] = (struct fatbit *)current; + current += fatbit_size(proto->delay_count); } current = ROUNDUP_PTR(current, 8); - s->al_log = (u8 **)current; - current += sizeof(u8 *) * anchored_literal_region_len; + s->am_log = (struct fatbit **)current; + current += sizeof(struct fatbit *) * anchored_region_len; + for (u32 i = 0; i < anchored_region_len; i++) { + s->am_log[i] = (struct fatbit *)current; + current += fatbit_size(anchored_region_width); + } + + current = ROUNDUP_PTR(current, 8); + s->al_log = (struct fatbit **)current; + current += sizeof(struct fatbit *) * anchored_literal_region_len; for (u32 i = 0; i < anchored_literal_region_len; i++) { - s->al_log[i] = (u8 *)current; - current += mmbit_size(anchored_literal_region_width); + s->al_log[i] = (struct fatbit *)current; + current += fatbit_size(anchored_literal_region_width); } current = ROUNDUP_PTR(current, 8); diff --git a/src/scratch.h b/src/scratch.h index f23ff5dc..fa112a56 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -37,7 +37,6 @@ #define SCRATCH_H_DA6D4FC06FF410 #include "ue2common.h" -#include "util/multibit_internal.h" #include "rose/rose_types.h" #ifdef __cplusplus @@ -133,7 +132,7 @@ struct RoseContext { struct match_deduper { struct fatbit *log[2]; /**< even, odd logs */ - struct fatbit *som_log[2]; /**< even, odd mmbit logs for som */ + struct fatbit *som_log[2]; /**< even, odd fatbit logs for som */ u64a *som_start_log[2]; /**< even, odd start offset logs for som */ u32 log_size; u64a current_report_offset; @@ -162,9 +161,9 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { struct mq *queues; struct fatbit *aqa; /**< active queue array; fatbit of queues that are valid * & active */ - u8 *delay_slots; - u8 **am_log; - u8 **al_log; + struct fatbit **delay_slots; + struct fatbit **am_log; + struct fatbit **al_log; u64a am_log_sum; u64a al_log_sum; struct catchup_pq catchup_pq; @@ -178,7 +177,7 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { u32 scratchSize; u8 ALIGN_DIRECTIVE fdr_temp_buf[FDR_TEMP_BUF_SIZE]; u32 handledKeyCount; - struct fatbit *handled_roles; /**< mmbit of ROLES (not states) already + struct fatbit *handled_roles; /**< fatbit of ROLES (not states) already * handled by this literal */ u64a *som_store; /**< array of som locations */ u64a *som_attempted_store; /**< array of som locations for fail stores */ @@ -198,18 +197,18 @@ struct hs_scratch *tctxtToScratch(struct RoseContext *tctxt) { } static really_inline -u8 **getAnchoredLog(struct hs_scratch *scratch) { /* array of mmbit ptr */ +struct fatbit **getAnchoredLog(struct hs_scratch *scratch) { return scratch->am_log; } -/* array of mmbit ptr; TODO: why not an array of mmbits? */ +/* array of fatbit ptr; TODO: why not an array of fatbits? */ static really_inline -u8 **getAnchoredLiteralLog(struct hs_scratch *scratch) { +struct fatbit **getAnchoredLiteralLog(struct hs_scratch *scratch) { return scratch->al_log; } static really_inline -u8 *getDelaySlots(struct hs_scratch *scratch) { +struct fatbit **getDelaySlots(struct hs_scratch *scratch) { return scratch->delay_slots; } diff --git a/src/util/fatbit.h b/src/util/fatbit.h index cf906269..ad607638 100644 --- a/src/util/fatbit.h +++ b/src/util/fatbit.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -58,21 +58,25 @@ void fatbit_clear(struct fatbit *bits) { static really_inline char fatbit_set(struct fatbit *bits, u32 total_bits, u32 key) { + assert(ISALIGNED(bits)); return mmbit_set(bits->fb_int.raw, total_bits, key); } static really_inline void fatbit_unset(struct fatbit *bits, u32 total_bits, u32 key) { + assert(ISALIGNED(bits)); mmbit_unset(bits->fb_int.raw, total_bits, key); } static really_inline char fatbit_isset(const struct fatbit *bits, u32 total_bits, u32 key) { + assert(ISALIGNED(bits)); return mmbit_isset(bits->fb_int.raw, total_bits, key); } static really_inline u32 fatbit_iterate(const struct fatbit *bits, u32 total_bits, u32 it_in) { + assert(ISALIGNED(bits)); /* TODO: iterate_flat could be specialised as we don't have to worry about * partial blocks. */ return mmbit_iterate(bits->fb_int.raw, total_bits, it_in);