mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 19:24:25 +03:00
support dynamic stream compression
This commit is contained in:
@@ -393,13 +393,15 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount,
|
||||
|
||||
so->activeLeafArray = curr_offset; /* TODO: limit size of array */
|
||||
curr_offset += mmbit_size(activeArrayCount);
|
||||
so->activeLeafArray_size = mmbit_size(activeArrayCount);
|
||||
|
||||
so->activeLeftArray = curr_offset; /* TODO: limit size of array */
|
||||
curr_offset += mmbit_size(activeLeftCount);
|
||||
so->activeLeftArray_size = mmbit_size(activeLeftCount);
|
||||
curr_offset += so->activeLeftArray_size;
|
||||
|
||||
so->longLitState = curr_offset;
|
||||
curr_offset += longLitStreamStateRequired;
|
||||
so->longLitState_size = longLitStreamStateRequired;
|
||||
|
||||
// ONE WHOLE BYTE for each active leftfix with lag.
|
||||
so->leftfixLagTable = curr_offset;
|
||||
@@ -420,6 +422,7 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount,
|
||||
// Exhaustion multibit.
|
||||
so->exhausted = curr_offset;
|
||||
curr_offset += mmbit_size(build.rm.numEkeys());
|
||||
so->exhausted_size = mmbit_size(build.rm.numEkeys());
|
||||
|
||||
// SOM locations and valid/writeable multibit structures.
|
||||
if (build.ssm.numSomSlots()) {
|
||||
@@ -435,6 +438,7 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount,
|
||||
curr_offset += mmbit_size(build.ssm.numSomSlots());
|
||||
so->somWritable = curr_offset;
|
||||
curr_offset += mmbit_size(build.ssm.numSomSlots());
|
||||
so->somMultibit_size = mmbit_size(build.ssm.numSomSlots());
|
||||
} else {
|
||||
// No SOM handling, avoid growing the stream state any further.
|
||||
so->somLocation = 0;
|
||||
@@ -443,6 +447,7 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount,
|
||||
}
|
||||
|
||||
// note: state space for mask nfas is allocated later
|
||||
so->nfaStateBegin = curr_offset;
|
||||
so->end = curr_offset;
|
||||
}
|
||||
|
||||
@@ -2039,7 +2044,7 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif,
|
||||
static
|
||||
void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info,
|
||||
RoseStateOffsets *so, u32 *scratchStateSize,
|
||||
u32 *streamStateSize, u32 *transientStateSize) {
|
||||
u32 *transientStateSize) {
|
||||
u32 state_offset;
|
||||
if (eng_info.transient) {
|
||||
// Transient engines do not use stream state, but must have room in
|
||||
@@ -2050,7 +2055,6 @@ void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info,
|
||||
// Pack NFA stream state on to the end of the Rose stream state.
|
||||
state_offset = so->end;
|
||||
so->end += eng_info.stream_size;
|
||||
*streamStateSize += eng_info.stream_size;
|
||||
}
|
||||
|
||||
nfa_info.stateOffset = state_offset;
|
||||
@@ -2064,12 +2068,11 @@ void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info,
|
||||
static
|
||||
void updateNfaState(const build_context &bc, vector<NfaInfo> &nfa_infos,
|
||||
RoseStateOffsets *so, u32 *scratchStateSize,
|
||||
u32 *streamStateSize, u32 *transientStateSize) {
|
||||
u32 *transientStateSize) {
|
||||
if (nfa_infos.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
*streamStateSize = 0;
|
||||
*transientStateSize = 0;
|
||||
*scratchStateSize = 0;
|
||||
|
||||
@@ -2077,7 +2080,7 @@ void updateNfaState(const build_context &bc, vector<NfaInfo> &nfa_infos,
|
||||
NfaInfo &nfa_info = nfa_infos[qi];
|
||||
const auto &eng_info = bc.engine_info_by_queue.at(qi);
|
||||
allocateStateSpace(eng_info, nfa_info, so, scratchStateSize,
|
||||
streamStateSize, transientStateSize);
|
||||
transientStateSize);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2491,7 +2494,7 @@ void writeNfaInfo(const RoseBuildImpl &build, build_context &bc,
|
||||
// Update state offsets to do with NFAs in proto and in the NfaInfo
|
||||
// structures.
|
||||
updateNfaState(bc, infos, &proto.stateOffsets, &proto.scratchStateSize,
|
||||
&proto.nfaStateSize, &proto.tStateSize);
|
||||
&proto.tStateSize);
|
||||
|
||||
proto.nfaInfoOffset = bc.engine_blob.add_range(infos);
|
||||
}
|
||||
@@ -3782,7 +3785,6 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||
proto.totalNumLiterals = verify_u32(literal_info.size());
|
||||
proto.asize = verify_u32(atable.size());
|
||||
proto.ematcherRegionSize = ematcher_region_size;
|
||||
proto.longLitStreamState = verify_u32(longLitStreamStateRequired);
|
||||
|
||||
proto.size = currOffset;
|
||||
|
||||
|
@@ -2026,15 +2026,17 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
|
||||
|
||||
fprintf(f, "state space required : %u bytes\n", t->stateOffsets.end);
|
||||
fprintf(f, " - history buffer : %u bytes\n", t->historyRequired);
|
||||
fprintf(f, " - exhaustion vector : %u bytes\n", (t->ekeyCount + 7) / 8);
|
||||
fprintf(f, " - exhaustion vector : %u bytes\n",
|
||||
t->stateOffsets.exhausted_size);
|
||||
fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize);
|
||||
fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState);
|
||||
fprintf(f, " - active array : %u bytes\n",
|
||||
mmbit_size(t->activeArrayCount));
|
||||
t->stateOffsets.activeLeafArray_size);
|
||||
fprintf(f, " - active rose : %u bytes\n",
|
||||
mmbit_size(t->activeLeftCount));
|
||||
t->stateOffsets.activeLeftArray_size);
|
||||
fprintf(f, " - anchored state : %u bytes\n", t->anchorStateSize);
|
||||
fprintf(f, " - nfa state : %u bytes\n", t->nfaStateSize);
|
||||
fprintf(f, " - nfa state : %u bytes\n",
|
||||
t->stateOffsets.end - t->stateOffsets.nfaStateBegin);
|
||||
fprintf(f, " - (trans. nfa state): %u bytes\n", t->tStateSize);
|
||||
fprintf(f, " - one whole bytes : %u bytes\n",
|
||||
t->stateOffsets.anchorState - t->stateOffsets.leftfixLagTable);
|
||||
@@ -2098,7 +2100,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
|
||||
DUMP_U32(t, rolesWithStateCount);
|
||||
DUMP_U32(t, stateSize);
|
||||
DUMP_U32(t, anchorStateSize);
|
||||
DUMP_U32(t, nfaStateSize);
|
||||
DUMP_U32(t, tStateSize);
|
||||
DUMP_U32(t, smallWriteOffset);
|
||||
DUMP_U32(t, amatcherOffset);
|
||||
@@ -2148,7 +2149,9 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
|
||||
DUMP_U32(t, delayRebuildLength);
|
||||
DUMP_U32(t, stateOffsets.history);
|
||||
DUMP_U32(t, stateOffsets.exhausted);
|
||||
DUMP_U32(t, stateOffsets.exhausted_size);
|
||||
DUMP_U32(t, stateOffsets.activeLeafArray);
|
||||
DUMP_U32(t, stateOffsets.activeLeafArray_size);
|
||||
DUMP_U32(t, stateOffsets.activeLeftArray);
|
||||
DUMP_U32(t, stateOffsets.activeLeftArray_size);
|
||||
DUMP_U32(t, stateOffsets.leftfixLagTable);
|
||||
@@ -2156,9 +2159,12 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
|
||||
DUMP_U32(t, stateOffsets.groups);
|
||||
DUMP_U32(t, stateOffsets.groups_size);
|
||||
DUMP_U32(t, stateOffsets.longLitState);
|
||||
DUMP_U32(t, stateOffsets.longLitState_size);
|
||||
DUMP_U32(t, stateOffsets.somLocation);
|
||||
DUMP_U32(t, stateOffsets.somValid);
|
||||
DUMP_U32(t, stateOffsets.somWritable);
|
||||
DUMP_U32(t, stateOffsets.somMultibit_size);
|
||||
DUMP_U32(t, stateOffsets.nfaStateBegin);
|
||||
DUMP_U32(t, stateOffsets.end);
|
||||
DUMP_U32(t, boundary.reportEodOffset);
|
||||
DUMP_U32(t, boundary.reportZeroOffset);
|
||||
@@ -2174,7 +2180,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
|
||||
DUMP_U32(t, ematcherRegionSize);
|
||||
DUMP_U32(t, somRevCount);
|
||||
DUMP_U32(t, somRevOffsetOffset);
|
||||
DUMP_U32(t, longLitStreamState);
|
||||
fprintf(f, "}\n");
|
||||
fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine));
|
||||
}
|
||||
|
@@ -170,6 +170,12 @@ struct NfaInfo {
|
||||
#define OWB_ZOMBIE_ALWAYS_YES 128 /* nfa will always answer yes to any rose
|
||||
* prefix checks */
|
||||
|
||||
/* offset of the status flags in the stream state. */
|
||||
#define ROSE_STATE_OFFSET_STATUS_FLAGS 0
|
||||
|
||||
/* offset of role mmbit in stream state (just after the status flag byte). */
|
||||
#define ROSE_STATE_OFFSET_ROLE_MMBIT sizeof(u8)
|
||||
|
||||
/**
|
||||
* \brief Rose state offsets.
|
||||
*
|
||||
@@ -184,24 +190,28 @@ struct NfaInfo {
|
||||
struct RoseStateOffsets {
|
||||
/** History buffer.
|
||||
*
|
||||
* First byte is an 8-bit count of the number of valid history bytes
|
||||
* available, followed by the history itself. Max size of history is
|
||||
* RoseEngine::historyRequired. */
|
||||
* Max size of history is RoseEngine::historyRequired. */
|
||||
u32 history;
|
||||
|
||||
/** Exhausted bitvector.
|
||||
/** Exhausted multibit.
|
||||
*
|
||||
* 1 bit per exhaustible key (used by Highlander mode). If a bit is set,
|
||||
* entry per exhaustible key (used by Highlander mode). If a bit is set,
|
||||
* reports with that ekey should not be delivered to the user. */
|
||||
u32 exhausted;
|
||||
|
||||
/** size of exhausted multibit */
|
||||
u32 exhausted_size;
|
||||
|
||||
/** Multibit for active suffix/outfix engines. */
|
||||
u32 activeLeafArray;
|
||||
|
||||
/** Multibit for active Rose (prefix/infix) engines. */
|
||||
/** Size of multibit for active suffix/outfix engines in bytes. */
|
||||
u32 activeLeafArray_size;
|
||||
|
||||
/** Multibit for active leftfix (prefix/infix) engines. */
|
||||
u32 activeLeftArray;
|
||||
|
||||
/** Size of the active Rose array multibit, in bytes. */
|
||||
/** Size of multibit for active leftfix (prefix/infix) engines in bytes. */
|
||||
u32 activeLeftArray_size;
|
||||
|
||||
/** Table of lag information (stored as one byte per engine) for active
|
||||
@@ -220,6 +230,9 @@ struct RoseStateOffsets {
|
||||
/** State for long literal support. */
|
||||
u32 longLitState;
|
||||
|
||||
/** Size of the long literal state. */
|
||||
u32 longLitState_size;
|
||||
|
||||
/** Packed SOM location slots. */
|
||||
u32 somLocation;
|
||||
|
||||
@@ -229,6 +242,13 @@ struct RoseStateOffsets {
|
||||
/** Multibit guarding SOM location slots. */
|
||||
u32 somWritable;
|
||||
|
||||
/** Size of each of the somValid and somWritable multibits, in bytes. */
|
||||
u32 somMultibit_size;
|
||||
|
||||
/** Begin of the region where NFA engine state is stored.
|
||||
* The NFA state region extends to end. */
|
||||
u32 nfaStateBegin;
|
||||
|
||||
/** Total size of Rose state, in bytes. */
|
||||
u32 end;
|
||||
};
|
||||
@@ -317,7 +337,6 @@ struct RoseEngine {
|
||||
u32 stateSize; /* size of the state bitset
|
||||
* WARNING: not the size of the rose state */
|
||||
u32 anchorStateSize; /* size of the state for the anchor dfas */
|
||||
u32 nfaStateSize; /* total size of the state for the mask/rose nfas */
|
||||
u32 tStateSize; /* total size of the state for transient rose nfas */
|
||||
u32 scratchStateSize; /**< uncompressed state req'd for NFAs in scratch;
|
||||
* used for sizing scratch only. */
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -68,7 +68,7 @@ const void *getByOffset(const struct RoseEngine *t, u32 offset) {
|
||||
|
||||
static really_inline
|
||||
void *getRoleState(char *state) {
|
||||
return state + sizeof(u8); // status flags
|
||||
return state + ROSE_STATE_OFFSET_ROLE_MMBIT;
|
||||
}
|
||||
|
||||
/** \brief Fetch the active array for suffix nfas. */
|
||||
|
Reference in New Issue
Block a user