support dynamic stream compression

This commit is contained in:
Alex Coyte 2017-02-14 14:18:13 +11:00 committed by Matthew Barr
parent d9e2c3daca
commit 952f0aad21
12 changed files with 894 additions and 27 deletions

View File

@ -662,6 +662,8 @@ SET (hs_compile_SRCS
src/hs_version.h
src/scratch.h
src/state.h
src/stream_compress.c
src/stream_compress.h
src/ue2common.h
src/compiler/asserts.cpp
src/compiler/asserts.h

View File

@ -561,6 +561,18 @@ hs_error_t HS_CDECL hs_valid_platform(void);
*/
#define HS_ARCH_ERROR (-11)
/**
* Provided buffer was too small.
*
* This error indicates that there was insufficient space in the buffer. The
* call should be repeated with a larger provided buffer.
*
* Note: in this situation, it is normal for the amount of space required to be
* returned in the same manner as the used space would have been returned if the
* call was successful.
*/
#define HS_INSUFFICIENT_SPACE (-12)
/** @} */
#ifdef __cplusplus

View File

@ -321,6 +321,119 @@ hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id,
match_event_handler onEvent,
void *context);
/**
* Creates a compressed representation of the provided stream in the buffer
* provided. This compressed representation can be converted back into a stream
* state by using @ref hs_expand_stream() or @ref hs_reset_and_expand_stream().
* The size of the compressed representation will be placed into @a used_space.
*
* If there is not sufficient space in the buffer to hold the compressed
* represention, @ref HS_INSUFFICIENT_SPACE will be returned and @a used_space
* will be populated with the amount of space required.
*
* Note: this function does not close the provided stream, you may continue to
* use the stream or to free it with @ref hs_close_stream().
*
* @param stream
* The stream (as created by @ref hs_open_stream()) to be compressed.
*
* @param buf
* Buffer to write the compressed representation into. Note: if the call is
* just being used to determine the amount of space required, it is allowed
* to pass NULL here and @a buf_space as 0.
*
* @param buf_space
* The number of bytes in @a buf. If buf_space is too small, the call will
* fail with @ref HS_INSUFFICIENT_SPACE.
*
* @param used_space
* Pointer to where the amount of used space will be written to. The used
* buffer space is always less than or equal to @a buf_space. If the call
* fails with @ref HS_INSUFFICIENT_SPACE, this pointer will be used to
* write out the amount of buffer space required.
*
* @return
* @ref HS_SUCCESS on success, @ref HS_INSUFFICIENT_SPACE if the provided
* buffer is too small.
*/
hs_error_t hs_compress_stream(const hs_stream_t *stream, char *buf,
size_t buf_space, size_t *used_space);
/**
* Decompresses a compressed representation created by @ref hs_compress_stream()
* into a new stream.
*
* Note: @a buf must correspond to a complete compressed representation created
* by @ref hs_compress_stream() of a stream that was opened against @a db. It is
* not always possible to detect misuse of this API and behaviour is undefined
* if these properties are not satisfied.
*
* @param db
* The compiled pattern database that the compressed stream was opened
* against.
*
* @param stream
* On success, a pointer to the expanded @ref hs_stream_t will be
* returned; NULL on failure.
*
* @param buf
* A compressed representation of a stream. These compressed forms are
* created by @ref hs_compress_stream().
*
* @param buf_size
* The size in bytes of the compressed representation.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_expand_stream(const hs_database_t *db, hs_stream_t **stream,
const char *buf, size_t buf_size);
/**
* Decompresses a compressed representation created by @ref hs_compress_stream()
* on top of the 'to' stream. The 'to' stream will first be reset (reporting
* any EOD matches if a non-NULL @a onEvent callback handler is provided).
*
* Note: the 'to' stream must be opened against the same database as the
* compressed stream.
*
* Note: @a buf must correspond to a complete compressed representation created
* by @ref hs_compress_stream() of a stream that was opened against @a db. It is
* not always possible to detect misuse of this API and behaviour is undefined
* if these properties are not satisfied.
*
* @param to_stream
* A pointer to the generated @ref hs_stream_t will be
* returned; NULL on failure.
*
* @param buf
* A compressed representation of a stream. These compressed forms are
* created by @ref hs_compress_stream().
*
* @param buf_size
* The size in bytes of the compressed representation.
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
* allowed to be NULL only if the @a onEvent callback is also NULL.
*
* @param onEvent
* Pointer to a match event callback function. If a NULL pointer is given,
* no matches will be returned.
*
* @param context
* The user defined pointer which will be passed to the callback function
* when a match occurs.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_reset_and_expand_stream(hs_stream_t *to_stream,
const char *buf, size_t buf_size,
hs_scratch_t *scratch,
match_event_handler onEvent,
void *context);
/**
* The block (non-streaming) regular expression scanner.
*

View File

@ -393,13 +393,15 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount,
so->activeLeafArray = curr_offset; /* TODO: limit size of array */
curr_offset += mmbit_size(activeArrayCount);
so->activeLeafArray_size = mmbit_size(activeArrayCount);
so->activeLeftArray = curr_offset; /* TODO: limit size of array */
curr_offset += mmbit_size(activeLeftCount);
so->activeLeftArray_size = mmbit_size(activeLeftCount);
curr_offset += so->activeLeftArray_size;
so->longLitState = curr_offset;
curr_offset += longLitStreamStateRequired;
so->longLitState_size = longLitStreamStateRequired;
// ONE WHOLE BYTE for each active leftfix with lag.
so->leftfixLagTable = curr_offset;
@ -420,6 +422,7 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount,
// Exhaustion multibit.
so->exhausted = curr_offset;
curr_offset += mmbit_size(build.rm.numEkeys());
so->exhausted_size = mmbit_size(build.rm.numEkeys());
// SOM locations and valid/writeable multibit structures.
if (build.ssm.numSomSlots()) {
@ -435,6 +438,7 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount,
curr_offset += mmbit_size(build.ssm.numSomSlots());
so->somWritable = curr_offset;
curr_offset += mmbit_size(build.ssm.numSomSlots());
so->somMultibit_size = mmbit_size(build.ssm.numSomSlots());
} else {
// No SOM handling, avoid growing the stream state any further.
so->somLocation = 0;
@ -443,6 +447,7 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount,
}
// note: state space for mask nfas is allocated later
so->nfaStateBegin = curr_offset;
so->end = curr_offset;
}
@ -2039,7 +2044,7 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif,
static
void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info,
RoseStateOffsets *so, u32 *scratchStateSize,
u32 *streamStateSize, u32 *transientStateSize) {
u32 *transientStateSize) {
u32 state_offset;
if (eng_info.transient) {
// Transient engines do not use stream state, but must have room in
@ -2050,7 +2055,6 @@ void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info,
// Pack NFA stream state on to the end of the Rose stream state.
state_offset = so->end;
so->end += eng_info.stream_size;
*streamStateSize += eng_info.stream_size;
}
nfa_info.stateOffset = state_offset;
@ -2064,12 +2068,11 @@ void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info,
static
void updateNfaState(const build_context &bc, vector<NfaInfo> &nfa_infos,
RoseStateOffsets *so, u32 *scratchStateSize,
u32 *streamStateSize, u32 *transientStateSize) {
u32 *transientStateSize) {
if (nfa_infos.empty()) {
return;
}
*streamStateSize = 0;
*transientStateSize = 0;
*scratchStateSize = 0;
@ -2077,7 +2080,7 @@ void updateNfaState(const build_context &bc, vector<NfaInfo> &nfa_infos,
NfaInfo &nfa_info = nfa_infos[qi];
const auto &eng_info = bc.engine_info_by_queue.at(qi);
allocateStateSpace(eng_info, nfa_info, so, scratchStateSize,
streamStateSize, transientStateSize);
transientStateSize);
}
}
@ -2491,7 +2494,7 @@ void writeNfaInfo(const RoseBuildImpl &build, build_context &bc,
// Update state offsets to do with NFAs in proto and in the NfaInfo
// structures.
updateNfaState(bc, infos, &proto.stateOffsets, &proto.scratchStateSize,
&proto.nfaStateSize, &proto.tStateSize);
&proto.tStateSize);
proto.nfaInfoOffset = bc.engine_blob.add_range(infos);
}
@ -3782,7 +3785,6 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
proto.totalNumLiterals = verify_u32(literal_info.size());
proto.asize = verify_u32(atable.size());
proto.ematcherRegionSize = ematcher_region_size;
proto.longLitStreamState = verify_u32(longLitStreamStateRequired);
proto.size = currOffset;

View File

@ -2026,15 +2026,17 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
fprintf(f, "state space required : %u bytes\n", t->stateOffsets.end);
fprintf(f, " - history buffer : %u bytes\n", t->historyRequired);
fprintf(f, " - exhaustion vector : %u bytes\n", (t->ekeyCount + 7) / 8);
fprintf(f, " - exhaustion vector : %u bytes\n",
t->stateOffsets.exhausted_size);
fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize);
fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState);
fprintf(f, " - active array : %u bytes\n",
mmbit_size(t->activeArrayCount));
t->stateOffsets.activeLeafArray_size);
fprintf(f, " - active rose : %u bytes\n",
mmbit_size(t->activeLeftCount));
t->stateOffsets.activeLeftArray_size);
fprintf(f, " - anchored state : %u bytes\n", t->anchorStateSize);
fprintf(f, " - nfa state : %u bytes\n", t->nfaStateSize);
fprintf(f, " - nfa state : %u bytes\n",
t->stateOffsets.end - t->stateOffsets.nfaStateBegin);
fprintf(f, " - (trans. nfa state): %u bytes\n", t->tStateSize);
fprintf(f, " - one whole bytes : %u bytes\n",
t->stateOffsets.anchorState - t->stateOffsets.leftfixLagTable);
@ -2098,7 +2100,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U32(t, rolesWithStateCount);
DUMP_U32(t, stateSize);
DUMP_U32(t, anchorStateSize);
DUMP_U32(t, nfaStateSize);
DUMP_U32(t, tStateSize);
DUMP_U32(t, smallWriteOffset);
DUMP_U32(t, amatcherOffset);
@ -2148,7 +2149,9 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U32(t, delayRebuildLength);
DUMP_U32(t, stateOffsets.history);
DUMP_U32(t, stateOffsets.exhausted);
DUMP_U32(t, stateOffsets.exhausted_size);
DUMP_U32(t, stateOffsets.activeLeafArray);
DUMP_U32(t, stateOffsets.activeLeafArray_size);
DUMP_U32(t, stateOffsets.activeLeftArray);
DUMP_U32(t, stateOffsets.activeLeftArray_size);
DUMP_U32(t, stateOffsets.leftfixLagTable);
@ -2156,9 +2159,12 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U32(t, stateOffsets.groups);
DUMP_U32(t, stateOffsets.groups_size);
DUMP_U32(t, stateOffsets.longLitState);
DUMP_U32(t, stateOffsets.longLitState_size);
DUMP_U32(t, stateOffsets.somLocation);
DUMP_U32(t, stateOffsets.somValid);
DUMP_U32(t, stateOffsets.somWritable);
DUMP_U32(t, stateOffsets.somMultibit_size);
DUMP_U32(t, stateOffsets.nfaStateBegin);
DUMP_U32(t, stateOffsets.end);
DUMP_U32(t, boundary.reportEodOffset);
DUMP_U32(t, boundary.reportZeroOffset);
@ -2174,7 +2180,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U32(t, ematcherRegionSize);
DUMP_U32(t, somRevCount);
DUMP_U32(t, somRevOffsetOffset);
DUMP_U32(t, longLitStreamState);
fprintf(f, "}\n");
fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine));
}

View File

@ -170,6 +170,12 @@ struct NfaInfo {
#define OWB_ZOMBIE_ALWAYS_YES 128 /* nfa will always answer yes to any rose
* prefix checks */
/* offset of the status flags in the stream state. */
#define ROSE_STATE_OFFSET_STATUS_FLAGS 0
/* offset of role mmbit in stream state (just after the status flag byte). */
#define ROSE_STATE_OFFSET_ROLE_MMBIT sizeof(u8)
/**
* \brief Rose state offsets.
*
@ -184,24 +190,28 @@ struct NfaInfo {
struct RoseStateOffsets {
/** History buffer.
*
* First byte is an 8-bit count of the number of valid history bytes
* available, followed by the history itself. Max size of history is
* RoseEngine::historyRequired. */
* Max size of history is RoseEngine::historyRequired. */
u32 history;
/** Exhausted bitvector.
/** Exhausted multibit.
*
* 1 bit per exhaustible key (used by Highlander mode). If a bit is set,
* entry per exhaustible key (used by Highlander mode). If a bit is set,
* reports with that ekey should not be delivered to the user. */
u32 exhausted;
/** size of exhausted multibit */
u32 exhausted_size;
/** Multibit for active suffix/outfix engines. */
u32 activeLeafArray;
/** Multibit for active Rose (prefix/infix) engines. */
/** Size of multibit for active suffix/outfix engines in bytes. */
u32 activeLeafArray_size;
/** Multibit for active leftfix (prefix/infix) engines. */
u32 activeLeftArray;
/** Size of the active Rose array multibit, in bytes. */
/** Size of multibit for active leftfix (prefix/infix) engines in bytes. */
u32 activeLeftArray_size;
/** Table of lag information (stored as one byte per engine) for active
@ -220,6 +230,9 @@ struct RoseStateOffsets {
/** State for long literal support. */
u32 longLitState;
/** Size of the long literal state. */
u32 longLitState_size;
/** Packed SOM location slots. */
u32 somLocation;
@ -229,6 +242,13 @@ struct RoseStateOffsets {
/** Multibit guarding SOM location slots. */
u32 somWritable;
/** Size of each of the somValid and somWritable multibits, in bytes. */
u32 somMultibit_size;
/** Begin of the region where NFA engine state is stored.
* The NFA state region extends to end. */
u32 nfaStateBegin;
/** Total size of Rose state, in bytes. */
u32 end;
};
@ -317,7 +337,6 @@ struct RoseEngine {
u32 stateSize; /* size of the state bitset
* WARNING: not the size of the rose state */
u32 anchorStateSize; /* size of the state for the anchor dfas */
u32 nfaStateSize; /* total size of the state for the mask/rose nfas */
u32 tStateSize; /* total size of the state for transient rose nfas */
u32 scratchStateSize; /**< uncompressed state req'd for NFAs in scratch;
* used for sizing scratch only. */

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -68,7 +68,7 @@ const void *getByOffset(const struct RoseEngine *t, u32 offset) {
static really_inline
void *getRoleState(char *state) {
return state + sizeof(u8); // status flags
return state + ROSE_STATE_OFFSET_ROLE_MMBIT;
}
/** \brief Fetch the active array for suffix nfas. */

View File

@ -53,6 +53,7 @@
#include "som/som_runtime.h"
#include "som/som_stream.h"
#include "state.h"
#include "stream_compress.h"
#include "ue2common.h"
#include "util/exhaust.h"
#include "util/multibit.h"
@ -153,7 +154,7 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose,
/** \brief Retrieve status bitmask from stream state. */
static really_inline
u8 getStreamStatus(const char *state) {
u8 status = *(const u8 *)state;
u8 status = *(const u8 *)(state + ROSE_STATE_OFFSET_STATUS_FLAGS);
assert((status & ~STATUS_VALID_BITS) == 0);
return status;
}
@ -162,7 +163,7 @@ u8 getStreamStatus(const char *state) {
static really_inline
void setStreamStatus(char *state, u8 status) {
assert((status & ~STATUS_VALID_BITS) == 0);
*(u8 *)state = status;
*(u8 *)(state + ROSE_STATE_OFFSET_STATUS_FLAGS) = status;
}
/** \brief Initialise SOM state. Used in both block and streaming mode. */
@ -1092,3 +1093,97 @@ hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db,
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_compress_stream(const hs_stream_t *stream, char *buf,
size_t buf_space, size_t *used_space) {
if (unlikely(!stream || !used_space)) {
return HS_INVALID;
}
if (unlikely(buf_space && !buf)) {
return HS_INVALID;
}
const struct RoseEngine *rose = stream->rose;
size_t stream_size = size_compress_stream(rose, stream);
DEBUG_PRINTF("require %zu [orig %zu]\n", stream_size,
rose->stateOffsets.end + sizeof(struct hs_stream));
*used_space = stream_size;
if (buf_space < stream_size) {
return HS_INSUFFICIENT_SPACE;
}
compress_stream(buf, stream_size, rose, stream);
return HS_SUCCESS;
}
hs_error_t hs_expand_stream(const hs_database_t *db, hs_stream_t **stream,
const char *buf, size_t buf_size) {
if (unlikely(!stream || !buf)) {
return HS_INVALID;
}
*stream = NULL;
hs_error_t err = validDatabase(db);
if (unlikely(err != HS_SUCCESS)) {
return err;
}
const struct RoseEngine *rose = hs_get_bytecode(db);
if (unlikely(!ISALIGNED_16(rose))) {
return HS_INVALID;
}
if (unlikely(rose->mode != HS_MODE_STREAM)) {
return HS_DB_MODE_ERROR;
}
size_t stream_size = rose->stateOffsets.end + sizeof(struct hs_stream);
struct hs_stream *s = hs_stream_alloc(stream_size);
if (unlikely(!s)) {
return HS_NOMEM;
}
if (!expand_stream(s, rose, buf, buf_size)) {
hs_stream_free(s);
return HS_INVALID;
}
*stream = s;
return HS_SUCCESS;
}
hs_error_t hs_reset_and_expand_stream(hs_stream_t *to_stream,
const char *buf, size_t buf_size,
hs_scratch_t *scratch,
match_event_handler onEvent,
void *context) {
if (unlikely(!to_stream || !buf)) {
return HS_INVALID;
}
const struct RoseEngine *rose = to_stream->rose;
if (onEvent) {
if (!scratch || !validScratch(to_stream->rose, scratch)) {
return HS_INVALID;
}
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
report_eod_matches(to_stream, scratch, onEvent, context);
unmarkScratchInUse(scratch);
}
if (expand_stream(to_stream, rose, buf, buf_size)) {
return HS_SUCCESS;
} else {
return HS_INVALID;
}
}

95
src/stream_compress.c Normal file
View File

@ -0,0 +1,95 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "stream_compress.h"
#include "state.h"
#include "nfa/nfa_internal.h"
#include "rose/rose_internal.h"
#include "util/multibit.h"
#include "util/uniform_ops.h"
#include <string.h>
#define COPY_IN(p, sz) do { \
assert(currOffset + sz <= buf_size); \
memcpy(buf + currOffset, p, sz); \
currOffset += sz; \
DEBUG_PRINTF("co = %zu\n", currOffset); \
} while (0);
#define COPY_OUT(p, sz) do { \
if (currOffset + sz > buf_size) { \
return 0; \
} \
memcpy(p, buf + currOffset, sz); \
currOffset += sz; \
DEBUG_PRINTF("co = %zu\n", currOffset); \
} while (0);
#define SIZE_COPY_IN(p, sz) do { \
currOffset += sz; \
DEBUG_PRINTF("co = %zu\n", currOffset); \
} while (0);
#define COPY COPY_OUT
#define ASSIGN(lhs, rhs) do { lhs = rhs; } while (0)
#define FN_SUFFIX expand
#define STREAM_QUAL
#define BUF_QUAL const
#include "stream_compress_impl.h"
int expand_stream(struct hs_stream *stream, const struct RoseEngine *rose,
const char *buf, size_t buf_size) {
return sc_expand(rose, stream, buf, buf_size);
}
#define COPY COPY_IN
#define ASSIGN(lhs, rhs) do { } while (0)
#define FN_SUFFIX compress
#define STREAM_QUAL const
#define BUF_QUAL
#include "stream_compress_impl.h"
size_t compress_stream(char *buf, size_t buf_size,
const struct RoseEngine *rose,
const struct hs_stream *stream) {
return sc_compress(rose, stream, buf, buf_size);
}
#define COPY SIZE_COPY_IN
#define ASSIGN(lhs, rhs) do { } while (0)
#define FN_SUFFIX size
#define STREAM_QUAL const
#define BUF_QUAL UNUSED
#include "stream_compress_impl.h"
size_t size_compress_stream(const struct RoseEngine *rose,
const struct hs_stream *stream) {
return sc_size(rose, stream, NULL, 0);
}

51
src/stream_compress.h Normal file
View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Functions for dynamic compress/expand for streams.
*/
#ifndef STREAM_COMPRESS_H
#define STREAM_COMPRESS_H
#include <stdlib.h>
struct hs_stream;
struct RoseEngine;
int expand_stream(struct hs_stream *out, const struct RoseEngine *rose,
const char *buf, size_t buf_size);
size_t compress_stream(char *buf, size_t buf_size,
const struct RoseEngine *rose,
const struct hs_stream *src);
size_t size_compress_stream(const struct RoseEngine *rose,
const struct hs_stream *stream);
#endif

190
src/stream_compress_impl.h Normal file
View File

@ -0,0 +1,190 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "util/join.h"
#define COPY_FIELD(x) COPY(&x, sizeof(x))
/* TODO: replace with a multibit compress/expand call */
#define COPY_MULTIBIT(mm_p, mm_size_bytes) COPY(mm_p, mm_size_bytes)
#define COPY_LEFTFIXES JOIN(sc_left_, FN_SUFFIX)
#define COPY_SOM_INFO JOIN(sc_som_, FN_SUFFIX)
static
size_t COPY_LEFTFIXES(const struct RoseEngine *rose, size_t currOffset,
STREAM_QUAL struct hs_stream *stream,
BUF_QUAL char *buf, UNUSED size_t buf_size) {
if (!rose->activeLeftIterOffset) {
return currOffset;
}
const struct RoseStateOffsets *so = &rose->stateOffsets;
STREAM_QUAL char *stream_body
= ((STREAM_QUAL char *)stream) + sizeof(struct hs_stream);
/* Note: in the expand case the active left array has already been copied
* into the stream. */
const u8 *ara = (const u8 *)(stream_body + so->activeLeftArray);
const u32 arCount = rose->activeLeftCount;
const struct LeftNfaInfo *left_table = getLeftTable(rose);
/* We only want to look at non-transient leftfixes */
const struct mmbit_sparse_iter *it = getActiveLeftIter(rose);
struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
u32 dummy;
u32 ri = mmbit_sparse_iter_begin(ara, arCount, &dummy, it, si_state);
for (; ri != MMB_INVALID;
ri = mmbit_sparse_iter_next(ara, arCount, ri, &dummy, it, si_state)) {
u32 qi = ri + rose->leftfixBeginQueue;
UNUSED const struct LeftNfaInfo *left = left_table + ri;
const struct NfaInfo *nfa_info = getNfaInfoByQueue(rose, qi);
const struct NFA *nfa = getNfaByInfo(rose, nfa_info);
COPY(stream_body + nfa_info->stateOffset, nfa->streamStateSize);
/* copy the one whole byte for active leftfixes as well */
assert(left->lagIndex != ROSE_OFFSET_INVALID);
COPY(stream_body + so->leftfixLagTable + left->lagIndex, 1);
}
return currOffset;
}
static
size_t COPY_SOM_INFO(const struct RoseEngine *rose, size_t currOffset,
STREAM_QUAL struct hs_stream *stream,
BUF_QUAL char *buf, UNUSED size_t buf_size) {
const struct RoseStateOffsets *so = &rose->stateOffsets;
if (!so->somLocation) {
assert(!so->somValid);
assert(!so->somWritable);
return currOffset;
}
STREAM_QUAL char *stream_body
= ((STREAM_QUAL char *)stream) + sizeof(struct hs_stream);
assert(so->somValid);
assert(so->somWritable);
COPY_MULTIBIT(stream_body + so->somWritable, so->somMultibit_size);
COPY_MULTIBIT(stream_body + so->somValid, so->somMultibit_size);
/* Copy only the som slots which contain valid values. */
/* Note: in the expand case the som valid array has been copied in. */
const u8 *svalid = (const u8 *)(stream_body + so->somValid);
u32 s_count = rose->somLocationCount;
u32 s_width = rose->somHorizon;
for (u32 slot = mmbit_iterate(svalid, s_count, MMB_INVALID);
slot != MMB_INVALID; slot = mmbit_iterate(svalid, s_count, slot)) {
COPY(stream_body + so->somLocation + slot * s_width, s_width);
}
return currOffset;
}
static
size_t JOIN(sc_, FN_SUFFIX)(const struct RoseEngine *rose,
STREAM_QUAL struct hs_stream *stream,
BUF_QUAL char *buf, UNUSED size_t buf_size) {
size_t currOffset = 0;
const struct RoseStateOffsets *so = &rose->stateOffsets;
STREAM_QUAL char *stream_body
= ((STREAM_QUAL char *)stream) + sizeof(struct hs_stream);
COPY_FIELD(stream->offset);
ASSIGN(stream->rose, rose);
COPY(stream_body + ROSE_STATE_OFFSET_STATUS_FLAGS, 1);
COPY_MULTIBIT(stream_body + ROSE_STATE_OFFSET_ROLE_MMBIT, rose->stateSize);
/* stream is valid in compress/size, and stream->offset has been set already
* on the expand side */
u64a offset = stream->offset;
u32 history = MIN((u32)offset, rose->historyRequired);
/* copy the active mmbits */
COPY_MULTIBIT(stream_body + so->activeLeafArray, so->activeLeafArray_size);
COPY_MULTIBIT(stream_body + so->activeLeftArray, so->activeLeftArray_size);
COPY(stream_body + so->longLitState, so->longLitState_size);
/* Leftlag table will be handled later, for active leftfixes */
/* anchored table state is not required once we are deep in the stream */
if (offset <= rose->anchoredDistance) {
COPY(stream_body + so->anchorState, rose->anchorStateSize);
}
COPY(stream_body + so->groups, so->groups_size);
/* copy the real bits of history */
UNUSED u32 hend = so->history + rose->historyRequired;
COPY(stream_body + hend - history, history);
/* copy the exhaustion multibit */
COPY_MULTIBIT(stream_body + so->exhausted, so->exhausted_size);
/* copy nfa stream state for endfixes */
/* Note: in the expand case the active array has already been copied into
* the stream. */
const u8 *aa = (const u8 *)(stream_body + so->activeLeafArray);
u32 aaCount = rose->activeArrayCount;
for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID;
qi = mmbit_iterate(aa, aaCount, qi)) {
DEBUG_PRINTF("saving stream state for qi=%u\n", qi);
const struct NfaInfo *nfa_info = getNfaInfoByQueue(rose, qi);
const struct NFA *nfa = getNfaByInfo(rose, nfa_info);
COPY(stream_body + nfa_info->stateOffset, nfa->streamStateSize);
}
/* copy nfa stream state for leftfixes */
currOffset = COPY_LEFTFIXES(rose, currOffset, stream, buf, buf_size);
if (!currOffset) {
return 0;
}
currOffset = COPY_SOM_INFO(rose, currOffset, stream, buf, buf_size);
if (!currOffset) {
return 0;
}
return currOffset;
}
#undef ASSIGN
#undef COPY
#undef COPY_FIELD
#undef COPT_LEFTFIXES
#undef COPY_MULTIBIT
#undef COPY_SOM_INFO
#undef FN_SUFFIX
#undef BUF_QUAL
#undef STREAM_QUAL

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -2318,6 +2318,289 @@ TEST(HyperscanArgChecks, hs_populate_platform_null) {
ASSERT_EQ(HS_INVALID, err);
}
TEST(HyperscanArgChecks, CompressStreamNoStream) {
char buf[100];
size_t used;
hs_error_t err = hs_compress_stream(nullptr, buf, sizeof(buf), &used);
ASSERT_EQ(HS_INVALID, err);
}
TEST(HyperscanArgChecks, CompressStreamNoUsed) {
hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
ASSERT_NE(nullptr, db);
hs_stream_t *stream;
hs_error_t err = hs_open_stream(db, 0, &stream);
ASSERT_EQ(HS_SUCCESS, err);
char buf[100];
err = hs_compress_stream(stream, buf, sizeof(buf), nullptr);
ASSERT_EQ(HS_INVALID, err);
err = hs_close_stream(stream, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_free_database(db);
ASSERT_EQ(HS_SUCCESS, err);
}
TEST(HyperscanArgChecks, CompressStreamNoBuf) {
hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
ASSERT_NE(nullptr, db);
hs_stream_t *stream;
hs_error_t err = hs_open_stream(db, 0, &stream);
ASSERT_EQ(HS_SUCCESS, err);
char buf[100];
size_t used;
err = hs_compress_stream(stream, nullptr, sizeof(buf), &used);
ASSERT_EQ(HS_INVALID, err);
err = hs_close_stream(stream, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_free_database(db);
ASSERT_EQ(HS_SUCCESS, err);
}
TEST(HyperscanArgChecks, CompressStreamSmallBuff) {
hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
ASSERT_NE(nullptr, db);
hs_stream_t *stream;
hs_error_t err = hs_open_stream(db, 0, &stream);
ASSERT_EQ(HS_SUCCESS, err);
char buf[100];
size_t used = 0;
err = hs_compress_stream(stream, buf, 1, &used);
ASSERT_EQ(HS_INSUFFICIENT_SPACE, err);
ASSERT_LT(0, used);
err = hs_close_stream(stream, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_free_database(db);
ASSERT_EQ(HS_SUCCESS, err);
}
TEST(HyperscanArgChecks, ExpandNoDb) {
hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
ASSERT_NE(nullptr, db);
hs_stream_t *stream1;
hs_error_t err = hs_open_stream(db, 0, &stream1);
ASSERT_EQ(HS_SUCCESS, err);
char buf[2000];
size_t used = 0;
err = hs_compress_stream(stream1, buf, sizeof(buf), &used);
ASSERT_EQ(HS_SUCCESS, err);
hs_stream_t *stream2;
err = hs_expand_stream(nullptr, &stream2, buf, used);
ASSERT_EQ(HS_INVALID, err);
err = hs_close_stream(stream1, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_free_database(db);
ASSERT_EQ(HS_SUCCESS, err);
}
TEST(HyperscanArgChecks, ExpandNoTo) {
hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
ASSERT_NE(nullptr, db);
hs_stream_t *stream1;
hs_error_t err = hs_open_stream(db, 0, &stream1);
ASSERT_EQ(HS_SUCCESS, err);
char buf[2000];
size_t used = 0;
err = hs_compress_stream(stream1, buf, sizeof(buf), &used);
ASSERT_EQ(HS_SUCCESS, err);
hs_stream_t *stream2;
err = hs_expand_stream(db, nullptr, buf, used);
ASSERT_EQ(HS_INVALID, err);
err = hs_close_stream(stream1, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_free_database(db);
ASSERT_EQ(HS_SUCCESS, err);
}
TEST(HyperscanArgChecks, ExpandNoBuf) {
hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
ASSERT_NE(nullptr, db);
hs_stream_t *stream1;
hs_error_t err = hs_open_stream(db, 0, &stream1);
ASSERT_EQ(HS_SUCCESS, err);
char buf[2000];
size_t used = 0;
err = hs_compress_stream(stream1, buf, sizeof(buf), &used);
ASSERT_EQ(HS_SUCCESS, err);
hs_stream_t *stream2;
err = hs_expand_stream(db, &stream2, nullptr, used);
ASSERT_EQ(HS_INVALID, err);
err = hs_close_stream(stream1, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_free_database(db);
ASSERT_EQ(HS_SUCCESS, err);
}
TEST(HyperscanArgChecks, ExpandSmallBuf) {
hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
ASSERT_NE(nullptr, db);
hs_stream_t *stream1;
hs_error_t err = hs_open_stream(db, 0, &stream1);
ASSERT_EQ(HS_SUCCESS, err);
char buf[2000];
size_t used = 0;
err = hs_compress_stream(stream1, buf, sizeof(buf), &used);
ASSERT_EQ(HS_SUCCESS, err);
hs_stream_t *stream2;
err = hs_expand_stream(db, &stream2, buf, used / 2);
ASSERT_EQ(HS_INVALID, err);
err = hs_close_stream(stream1, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_free_database(db);
ASSERT_EQ(HS_SUCCESS, err);
}
TEST(HyperscanArgChecks, ResetAndExpandNoStream) {
hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
ASSERT_NE(nullptr, db);
hs_stream_t *stream1;
hs_error_t err = hs_open_stream(db, 0, &stream1);
ASSERT_EQ(HS_SUCCESS, err);
char buf[2000];
size_t used = 0;
err = hs_compress_stream(stream1, buf, sizeof(buf), &used);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_reset_and_expand_stream(nullptr, buf, used, nullptr, nullptr,
nullptr);
ASSERT_EQ(HS_INVALID, err);
err = hs_close_stream(stream1, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_free_database(db);
ASSERT_EQ(HS_SUCCESS, err);
}
TEST(HyperscanArgChecks, ResetAndExpandNoBuf) {
hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
ASSERT_NE(nullptr, db);
hs_stream_t *stream1;
hs_error_t err = hs_open_stream(db, 0, &stream1);
ASSERT_EQ(HS_SUCCESS, err);
char buf[2000];
size_t used = 0;
err = hs_compress_stream(stream1, buf, sizeof(buf), &used);
ASSERT_EQ(HS_SUCCESS, err);
hs_stream_t *stream2;
err = hs_open_stream(db, 0, &stream2);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_reset_and_expand_stream(stream2, nullptr, used, nullptr, nullptr,
nullptr);
ASSERT_EQ(HS_INVALID, err);
err = hs_close_stream(stream1, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_close_stream(stream2, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_free_database(db);
ASSERT_EQ(HS_SUCCESS, err);
}
TEST(HyperscanArgChecks, ResetAndExpandSmallBuf) {
hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
ASSERT_NE(nullptr, db);
hs_stream_t *stream1;
hs_error_t err = hs_open_stream(db, 0, &stream1);
ASSERT_EQ(HS_SUCCESS, err);
char buf[2000];
size_t used = 0;
err = hs_compress_stream(stream1, buf, sizeof(buf), &used);
ASSERT_EQ(HS_SUCCESS, err);
hs_stream_t *stream2;
err = hs_open_stream(db, 0, &stream2);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_reset_and_expand_stream(stream2, buf, used / 2, nullptr, nullptr,
nullptr);
ASSERT_EQ(HS_INVALID, err);
err = hs_close_stream(stream1, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_close_stream(stream2, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_free_database(db);
ASSERT_EQ(HS_SUCCESS, err);
}
TEST(HyperscanArgChecks, ResetAndExpandNoScratch) {
hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
ASSERT_NE(nullptr, db);
hs_stream_t *stream1;
hs_error_t err = hs_open_stream(db, 0, &stream1);
ASSERT_EQ(HS_SUCCESS, err);
char buf[2000];
size_t used = 0;
err = hs_compress_stream(stream1, buf, sizeof(buf), &used);
ASSERT_EQ(HS_SUCCESS, err);
hs_stream_t *stream2;
err = hs_open_stream(db, 0, &stream2);
ASSERT_EQ(HS_SUCCESS, err);
int temp;
err = hs_reset_and_expand_stream(stream2, buf, used, nullptr, singleHandler,
&temp);
ASSERT_EQ(HS_INVALID, err);
err = hs_close_stream(stream1, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_close_stream(stream2, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_free_database(db);
ASSERT_EQ(HS_SUCCESS, err);
}
class BadModeTest : public testing::TestWithParam<unsigned> {};
// hs_compile: Compile a pattern with bogus mode flags set.