diff --git a/CMakeLists.txt b/CMakeLists.txt index 57cf1043..eb9a62e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -662,6 +662,8 @@ SET (hs_compile_SRCS src/hs_version.h src/scratch.h src/state.h + src/stream_compress.c + src/stream_compress.h src/ue2common.h src/compiler/asserts.cpp src/compiler/asserts.h diff --git a/src/hs_common.h b/src/hs_common.h index ffea397e..e1f079f2 100644 --- a/src/hs_common.h +++ b/src/hs_common.h @@ -561,6 +561,18 @@ hs_error_t HS_CDECL hs_valid_platform(void); */ #define HS_ARCH_ERROR (-11) +/** + * Provided buffer was too small. + * + * This error indicates that there was insufficient space in the buffer. The + * call should be repeated with a larger provided buffer. + * + * Note: in this situation, it is normal for the amount of space required to be + * returned in the same manner as the used space would have been returned if the + * call was successful. + */ +#define HS_INSUFFICIENT_SPACE (-12) + /** @} */ #ifdef __cplusplus diff --git a/src/hs_runtime.h b/src/hs_runtime.h index ecd97ca5..a93437b8 100644 --- a/src/hs_runtime.h +++ b/src/hs_runtime.h @@ -321,6 +321,119 @@ hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id, match_event_handler onEvent, void *context); +/** + * Creates a compressed representation of the provided stream in the buffer + * provided. This compressed representation can be converted back into a stream + * state by using @ref hs_expand_stream() or @ref hs_reset_and_expand_stream(). + * The size of the compressed representation will be placed into @a used_space. + * + * If there is not sufficient space in the buffer to hold the compressed + * represention, @ref HS_INSUFFICIENT_SPACE will be returned and @a used_space + * will be populated with the amount of space required. + * + * Note: this function does not close the provided stream, you may continue to + * use the stream or to free it with @ref hs_close_stream(). + * + * @param stream + * The stream (as created by @ref hs_open_stream()) to be compressed. + * + * @param buf + * Buffer to write the compressed representation into. Note: if the call is + * just being used to determine the amount of space required, it is allowed + * to pass NULL here and @a buf_space as 0. + * + * @param buf_space + * The number of bytes in @a buf. If buf_space is too small, the call will + * fail with @ref HS_INSUFFICIENT_SPACE. + * + * @param used_space + * Pointer to where the amount of used space will be written to. The used + * buffer space is always less than or equal to @a buf_space. If the call + * fails with @ref HS_INSUFFICIENT_SPACE, this pointer will be used to + * write out the amount of buffer space required. + * + * @return + * @ref HS_SUCCESS on success, @ref HS_INSUFFICIENT_SPACE if the provided + * buffer is too small. + */ +hs_error_t hs_compress_stream(const hs_stream_t *stream, char *buf, + size_t buf_space, size_t *used_space); + +/** + * Decompresses a compressed representation created by @ref hs_compress_stream() + * into a new stream. + * + * Note: @a buf must correspond to a complete compressed representation created + * by @ref hs_compress_stream() of a stream that was opened against @a db. It is + * not always possible to detect misuse of this API and behaviour is undefined + * if these properties are not satisfied. + * + * @param db + * The compiled pattern database that the compressed stream was opened + * against. + * + * @param stream + * On success, a pointer to the expanded @ref hs_stream_t will be + * returned; NULL on failure. + * + * @param buf + * A compressed representation of a stream. These compressed forms are + * created by @ref hs_compress_stream(). + * + * @param buf_size + * The size in bytes of the compressed representation. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t hs_expand_stream(const hs_database_t *db, hs_stream_t **stream, + const char *buf, size_t buf_size); + +/** + * Decompresses a compressed representation created by @ref hs_compress_stream() + * on top of the 'to' stream. The 'to' stream will first be reset (reporting + * any EOD matches if a non-NULL @a onEvent callback handler is provided). + * + * Note: the 'to' stream must be opened against the same database as the + * compressed stream. + * + * Note: @a buf must correspond to a complete compressed representation created + * by @ref hs_compress_stream() of a stream that was opened against @a db. It is + * not always possible to detect misuse of this API and behaviour is undefined + * if these properties are not satisfied. + * + * @param to_stream + * A pointer to the generated @ref hs_stream_t will be + * returned; NULL on failure. + * + * @param buf + * A compressed representation of a stream. These compressed forms are + * created by @ref hs_compress_stream(). + * + * @param buf_size + * The size in bytes of the compressed representation. + * + * @param scratch + * A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is + * allowed to be NULL only if the @a onEvent callback is also NULL. + * + * @param onEvent + * Pointer to a match event callback function. If a NULL pointer is given, + * no matches will be returned. + * + * @param context + * The user defined pointer which will be passed to the callback function + * when a match occurs. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t hs_reset_and_expand_stream(hs_stream_t *to_stream, + const char *buf, size_t buf_size, + hs_scratch_t *scratch, + match_event_handler onEvent, + void *context); + /** * The block (non-streaming) regular expression scanner. * diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index e7fd6271..cf3de55c 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -393,13 +393,15 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, so->activeLeafArray = curr_offset; /* TODO: limit size of array */ curr_offset += mmbit_size(activeArrayCount); + so->activeLeafArray_size = mmbit_size(activeArrayCount); so->activeLeftArray = curr_offset; /* TODO: limit size of array */ + curr_offset += mmbit_size(activeLeftCount); so->activeLeftArray_size = mmbit_size(activeLeftCount); - curr_offset += so->activeLeftArray_size; so->longLitState = curr_offset; curr_offset += longLitStreamStateRequired; + so->longLitState_size = longLitStreamStateRequired; // ONE WHOLE BYTE for each active leftfix with lag. so->leftfixLagTable = curr_offset; @@ -420,6 +422,7 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, // Exhaustion multibit. so->exhausted = curr_offset; curr_offset += mmbit_size(build.rm.numEkeys()); + so->exhausted_size = mmbit_size(build.rm.numEkeys()); // SOM locations and valid/writeable multibit structures. if (build.ssm.numSomSlots()) { @@ -435,6 +438,7 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, curr_offset += mmbit_size(build.ssm.numSomSlots()); so->somWritable = curr_offset; curr_offset += mmbit_size(build.ssm.numSomSlots()); + so->somMultibit_size = mmbit_size(build.ssm.numSomSlots()); } else { // No SOM handling, avoid growing the stream state any further. so->somLocation = 0; @@ -443,6 +447,7 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, } // note: state space for mask nfas is allocated later + so->nfaStateBegin = curr_offset; so->end = curr_offset; } @@ -2039,7 +2044,7 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, static void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info, RoseStateOffsets *so, u32 *scratchStateSize, - u32 *streamStateSize, u32 *transientStateSize) { + u32 *transientStateSize) { u32 state_offset; if (eng_info.transient) { // Transient engines do not use stream state, but must have room in @@ -2050,7 +2055,6 @@ void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info, // Pack NFA stream state on to the end of the Rose stream state. state_offset = so->end; so->end += eng_info.stream_size; - *streamStateSize += eng_info.stream_size; } nfa_info.stateOffset = state_offset; @@ -2064,12 +2068,11 @@ void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info, static void updateNfaState(const build_context &bc, vector &nfa_infos, RoseStateOffsets *so, u32 *scratchStateSize, - u32 *streamStateSize, u32 *transientStateSize) { + u32 *transientStateSize) { if (nfa_infos.empty()) { return; } - *streamStateSize = 0; *transientStateSize = 0; *scratchStateSize = 0; @@ -2077,7 +2080,7 @@ void updateNfaState(const build_context &bc, vector &nfa_infos, NfaInfo &nfa_info = nfa_infos[qi]; const auto &eng_info = bc.engine_info_by_queue.at(qi); allocateStateSpace(eng_info, nfa_info, so, scratchStateSize, - streamStateSize, transientStateSize); + transientStateSize); } } @@ -2491,7 +2494,7 @@ void writeNfaInfo(const RoseBuildImpl &build, build_context &bc, // Update state offsets to do with NFAs in proto and in the NfaInfo // structures. updateNfaState(bc, infos, &proto.stateOffsets, &proto.scratchStateSize, - &proto.nfaStateSize, &proto.tStateSize); + &proto.tStateSize); proto.nfaInfoOffset = bc.engine_blob.add_range(infos); } @@ -3782,7 +3785,6 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.totalNumLiterals = verify_u32(literal_info.size()); proto.asize = verify_u32(atable.size()); proto.ematcherRegionSize = ematcher_region_size; - proto.longLitStreamState = verify_u32(longLitStreamStateRequired); proto.size = currOffset; diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 5ab9fc99..b70112f2 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -2026,15 +2026,17 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, "state space required : %u bytes\n", t->stateOffsets.end); fprintf(f, " - history buffer : %u bytes\n", t->historyRequired); - fprintf(f, " - exhaustion vector : %u bytes\n", (t->ekeyCount + 7) / 8); + fprintf(f, " - exhaustion vector : %u bytes\n", + t->stateOffsets.exhausted_size); fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize); fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState); fprintf(f, " - active array : %u bytes\n", - mmbit_size(t->activeArrayCount)); + t->stateOffsets.activeLeafArray_size); fprintf(f, " - active rose : %u bytes\n", - mmbit_size(t->activeLeftCount)); + t->stateOffsets.activeLeftArray_size); fprintf(f, " - anchored state : %u bytes\n", t->anchorStateSize); - fprintf(f, " - nfa state : %u bytes\n", t->nfaStateSize); + fprintf(f, " - nfa state : %u bytes\n", + t->stateOffsets.end - t->stateOffsets.nfaStateBegin); fprintf(f, " - (trans. nfa state): %u bytes\n", t->tStateSize); fprintf(f, " - one whole bytes : %u bytes\n", t->stateOffsets.anchorState - t->stateOffsets.leftfixLagTable); @@ -2098,7 +2100,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, rolesWithStateCount); DUMP_U32(t, stateSize); DUMP_U32(t, anchorStateSize); - DUMP_U32(t, nfaStateSize); DUMP_U32(t, tStateSize); DUMP_U32(t, smallWriteOffset); DUMP_U32(t, amatcherOffset); @@ -2148,7 +2149,9 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, delayRebuildLength); DUMP_U32(t, stateOffsets.history); DUMP_U32(t, stateOffsets.exhausted); + DUMP_U32(t, stateOffsets.exhausted_size); DUMP_U32(t, stateOffsets.activeLeafArray); + DUMP_U32(t, stateOffsets.activeLeafArray_size); DUMP_U32(t, stateOffsets.activeLeftArray); DUMP_U32(t, stateOffsets.activeLeftArray_size); DUMP_U32(t, stateOffsets.leftfixLagTable); @@ -2156,9 +2159,12 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, stateOffsets.groups); DUMP_U32(t, stateOffsets.groups_size); DUMP_U32(t, stateOffsets.longLitState); + DUMP_U32(t, stateOffsets.longLitState_size); DUMP_U32(t, stateOffsets.somLocation); DUMP_U32(t, stateOffsets.somValid); DUMP_U32(t, stateOffsets.somWritable); + DUMP_U32(t, stateOffsets.somMultibit_size); + DUMP_U32(t, stateOffsets.nfaStateBegin); DUMP_U32(t, stateOffsets.end); DUMP_U32(t, boundary.reportEodOffset); DUMP_U32(t, boundary.reportZeroOffset); @@ -2174,7 +2180,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, ematcherRegionSize); DUMP_U32(t, somRevCount); DUMP_U32(t, somRevOffsetOffset); - DUMP_U32(t, longLitStreamState); fprintf(f, "}\n"); fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine)); } diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 57395c9d..d38ee8c0 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -170,6 +170,12 @@ struct NfaInfo { #define OWB_ZOMBIE_ALWAYS_YES 128 /* nfa will always answer yes to any rose * prefix checks */ +/* offset of the status flags in the stream state. */ +#define ROSE_STATE_OFFSET_STATUS_FLAGS 0 + +/* offset of role mmbit in stream state (just after the status flag byte). */ +#define ROSE_STATE_OFFSET_ROLE_MMBIT sizeof(u8) + /** * \brief Rose state offsets. * @@ -184,24 +190,28 @@ struct NfaInfo { struct RoseStateOffsets { /** History buffer. * - * First byte is an 8-bit count of the number of valid history bytes - * available, followed by the history itself. Max size of history is - * RoseEngine::historyRequired. */ + * Max size of history is RoseEngine::historyRequired. */ u32 history; - /** Exhausted bitvector. + /** Exhausted multibit. * - * 1 bit per exhaustible key (used by Highlander mode). If a bit is set, + * entry per exhaustible key (used by Highlander mode). If a bit is set, * reports with that ekey should not be delivered to the user. */ u32 exhausted; + /** size of exhausted multibit */ + u32 exhausted_size; + /** Multibit for active suffix/outfix engines. */ u32 activeLeafArray; - /** Multibit for active Rose (prefix/infix) engines. */ + /** Size of multibit for active suffix/outfix engines in bytes. */ + u32 activeLeafArray_size; + + /** Multibit for active leftfix (prefix/infix) engines. */ u32 activeLeftArray; - /** Size of the active Rose array multibit, in bytes. */ + /** Size of multibit for active leftfix (prefix/infix) engines in bytes. */ u32 activeLeftArray_size; /** Table of lag information (stored as one byte per engine) for active @@ -220,6 +230,9 @@ struct RoseStateOffsets { /** State for long literal support. */ u32 longLitState; + /** Size of the long literal state. */ + u32 longLitState_size; + /** Packed SOM location slots. */ u32 somLocation; @@ -229,6 +242,13 @@ struct RoseStateOffsets { /** Multibit guarding SOM location slots. */ u32 somWritable; + /** Size of each of the somValid and somWritable multibits, in bytes. */ + u32 somMultibit_size; + + /** Begin of the region where NFA engine state is stored. + * The NFA state region extends to end. */ + u32 nfaStateBegin; + /** Total size of Rose state, in bytes. */ u32 end; }; @@ -317,7 +337,6 @@ struct RoseEngine { u32 stateSize; /* size of the state bitset * WARNING: not the size of the rose state */ u32 anchorStateSize; /* size of the state for the anchor dfas */ - u32 nfaStateSize; /* total size of the state for the mask/rose nfas */ u32 tStateSize; /* total size of the state for transient rose nfas */ u32 scratchStateSize; /**< uncompressed state req'd for NFAs in scratch; * used for sizing scratch only. */ diff --git a/src/rose/runtime.h b/src/rose/runtime.h index d2a4b5d7..88342b53 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -68,7 +68,7 @@ const void *getByOffset(const struct RoseEngine *t, u32 offset) { static really_inline void *getRoleState(char *state) { - return state + sizeof(u8); // status flags + return state + ROSE_STATE_OFFSET_ROLE_MMBIT; } /** \brief Fetch the active array for suffix nfas. */ diff --git a/src/runtime.c b/src/runtime.c index 17f13382..5a8168d3 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -53,6 +53,7 @@ #include "som/som_runtime.h" #include "som/som_stream.h" #include "state.h" +#include "stream_compress.h" #include "ue2common.h" #include "util/exhaust.h" #include "util/multibit.h" @@ -153,7 +154,7 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose, /** \brief Retrieve status bitmask from stream state. */ static really_inline u8 getStreamStatus(const char *state) { - u8 status = *(const u8 *)state; + u8 status = *(const u8 *)(state + ROSE_STATE_OFFSET_STATUS_FLAGS); assert((status & ~STATUS_VALID_BITS) == 0); return status; } @@ -162,7 +163,7 @@ u8 getStreamStatus(const char *state) { static really_inline void setStreamStatus(char *state, u8 status) { assert((status & ~STATUS_VALID_BITS) == 0); - *(u8 *)state = status; + *(u8 *)(state + ROSE_STATE_OFFSET_STATUS_FLAGS) = status; } /** \brief Initialise SOM state. Used in both block and streaming mode. */ @@ -1092,3 +1093,97 @@ hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db, return HS_SUCCESS; } + +HS_PUBLIC_API +hs_error_t hs_compress_stream(const hs_stream_t *stream, char *buf, + size_t buf_space, size_t *used_space) { + if (unlikely(!stream || !used_space)) { + return HS_INVALID; + } + + if (unlikely(buf_space && !buf)) { + return HS_INVALID; + } + + const struct RoseEngine *rose = stream->rose; + + size_t stream_size = size_compress_stream(rose, stream); + + DEBUG_PRINTF("require %zu [orig %zu]\n", stream_size, + rose->stateOffsets.end + sizeof(struct hs_stream)); + *used_space = stream_size; + + if (buf_space < stream_size) { + return HS_INSUFFICIENT_SPACE; + } + compress_stream(buf, stream_size, rose, stream); + + return HS_SUCCESS; +} + +hs_error_t hs_expand_stream(const hs_database_t *db, hs_stream_t **stream, + const char *buf, size_t buf_size) { + if (unlikely(!stream || !buf)) { + return HS_INVALID; + } + + *stream = NULL; + + hs_error_t err = validDatabase(db); + if (unlikely(err != HS_SUCCESS)) { + return err; + } + + const struct RoseEngine *rose = hs_get_bytecode(db); + if (unlikely(!ISALIGNED_16(rose))) { + return HS_INVALID; + } + + if (unlikely(rose->mode != HS_MODE_STREAM)) { + return HS_DB_MODE_ERROR; + } + + size_t stream_size = rose->stateOffsets.end + sizeof(struct hs_stream); + + struct hs_stream *s = hs_stream_alloc(stream_size); + if (unlikely(!s)) { + return HS_NOMEM; + } + + if (!expand_stream(s, rose, buf, buf_size)) { + hs_stream_free(s); + return HS_INVALID; + } + + *stream = s; + return HS_SUCCESS; +} + +hs_error_t hs_reset_and_expand_stream(hs_stream_t *to_stream, + const char *buf, size_t buf_size, + hs_scratch_t *scratch, + match_event_handler onEvent, + void *context) { + if (unlikely(!to_stream || !buf)) { + return HS_INVALID; + } + + const struct RoseEngine *rose = to_stream->rose; + + if (onEvent) { + if (!scratch || !validScratch(to_stream->rose, scratch)) { + return HS_INVALID; + } + if (unlikely(markScratchInUse(scratch))) { + return HS_SCRATCH_IN_USE; + } + report_eod_matches(to_stream, scratch, onEvent, context); + unmarkScratchInUse(scratch); + } + + if (expand_stream(to_stream, rose, buf, buf_size)) { + return HS_SUCCESS; + } else { + return HS_INVALID; + } +} diff --git a/src/stream_compress.c b/src/stream_compress.c new file mode 100644 index 00000000..3051af36 --- /dev/null +++ b/src/stream_compress.c @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "stream_compress.h" + +#include "state.h" +#include "nfa/nfa_internal.h" +#include "rose/rose_internal.h" +#include "util/multibit.h" +#include "util/uniform_ops.h" + +#include + +#define COPY_IN(p, sz) do { \ + assert(currOffset + sz <= buf_size); \ + memcpy(buf + currOffset, p, sz); \ + currOffset += sz; \ + DEBUG_PRINTF("co = %zu\n", currOffset); \ + } while (0); + +#define COPY_OUT(p, sz) do { \ + if (currOffset + sz > buf_size) { \ + return 0; \ + } \ + memcpy(p, buf + currOffset, sz); \ + currOffset += sz; \ + DEBUG_PRINTF("co = %zu\n", currOffset); \ + } while (0); + +#define SIZE_COPY_IN(p, sz) do { \ + currOffset += sz; \ + DEBUG_PRINTF("co = %zu\n", currOffset); \ + } while (0); + +#define COPY COPY_OUT +#define ASSIGN(lhs, rhs) do { lhs = rhs; } while (0) +#define FN_SUFFIX expand +#define STREAM_QUAL +#define BUF_QUAL const +#include "stream_compress_impl.h" + +int expand_stream(struct hs_stream *stream, const struct RoseEngine *rose, + const char *buf, size_t buf_size) { + return sc_expand(rose, stream, buf, buf_size); +} + +#define COPY COPY_IN +#define ASSIGN(lhs, rhs) do { } while (0) +#define FN_SUFFIX compress +#define STREAM_QUAL const +#define BUF_QUAL +#include "stream_compress_impl.h" + +size_t compress_stream(char *buf, size_t buf_size, + const struct RoseEngine *rose, + const struct hs_stream *stream) { + return sc_compress(rose, stream, buf, buf_size); +} + +#define COPY SIZE_COPY_IN +#define ASSIGN(lhs, rhs) do { } while (0) +#define FN_SUFFIX size +#define STREAM_QUAL const +#define BUF_QUAL UNUSED +#include "stream_compress_impl.h" + +size_t size_compress_stream(const struct RoseEngine *rose, + const struct hs_stream *stream) { + return sc_size(rose, stream, NULL, 0); +} diff --git a/src/stream_compress.h b/src/stream_compress.h new file mode 100644 index 00000000..0d06d1e0 --- /dev/null +++ b/src/stream_compress.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Functions for dynamic compress/expand for streams. + */ + +#ifndef STREAM_COMPRESS_H +#define STREAM_COMPRESS_H + +#include + +struct hs_stream; +struct RoseEngine; + +int expand_stream(struct hs_stream *out, const struct RoseEngine *rose, + const char *buf, size_t buf_size); + +size_t compress_stream(char *buf, size_t buf_size, + const struct RoseEngine *rose, + const struct hs_stream *src); + +size_t size_compress_stream(const struct RoseEngine *rose, + const struct hs_stream *stream); + +#endif diff --git a/src/stream_compress_impl.h b/src/stream_compress_impl.h new file mode 100644 index 00000000..ec054f07 --- /dev/null +++ b/src/stream_compress_impl.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "util/join.h" + +#define COPY_FIELD(x) COPY(&x, sizeof(x)) + +/* TODO: replace with a multibit compress/expand call */ +#define COPY_MULTIBIT(mm_p, mm_size_bytes) COPY(mm_p, mm_size_bytes) + +#define COPY_LEFTFIXES JOIN(sc_left_, FN_SUFFIX) +#define COPY_SOM_INFO JOIN(sc_som_, FN_SUFFIX) + +static +size_t COPY_LEFTFIXES(const struct RoseEngine *rose, size_t currOffset, + STREAM_QUAL struct hs_stream *stream, + BUF_QUAL char *buf, UNUSED size_t buf_size) { + if (!rose->activeLeftIterOffset) { + return currOffset; + } + + const struct RoseStateOffsets *so = &rose->stateOffsets; + STREAM_QUAL char *stream_body + = ((STREAM_QUAL char *)stream) + sizeof(struct hs_stream); + + /* Note: in the expand case the active left array has already been copied + * into the stream. */ + const u8 *ara = (const u8 *)(stream_body + so->activeLeftArray); + const u32 arCount = rose->activeLeftCount; + const struct LeftNfaInfo *left_table = getLeftTable(rose); + + /* We only want to look at non-transient leftfixes */ + const struct mmbit_sparse_iter *it = getActiveLeftIter(rose); + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + u32 dummy; + u32 ri = mmbit_sparse_iter_begin(ara, arCount, &dummy, it, si_state); + for (; ri != MMB_INVALID; + ri = mmbit_sparse_iter_next(ara, arCount, ri, &dummy, it, si_state)) { + u32 qi = ri + rose->leftfixBeginQueue; + UNUSED const struct LeftNfaInfo *left = left_table + ri; + const struct NfaInfo *nfa_info = getNfaInfoByQueue(rose, qi); + const struct NFA *nfa = getNfaByInfo(rose, nfa_info); + + COPY(stream_body + nfa_info->stateOffset, nfa->streamStateSize); + /* copy the one whole byte for active leftfixes as well */ + assert(left->lagIndex != ROSE_OFFSET_INVALID); + COPY(stream_body + so->leftfixLagTable + left->lagIndex, 1); + } + + return currOffset; +} + +static +size_t COPY_SOM_INFO(const struct RoseEngine *rose, size_t currOffset, + STREAM_QUAL struct hs_stream *stream, + BUF_QUAL char *buf, UNUSED size_t buf_size) { + const struct RoseStateOffsets *so = &rose->stateOffsets; + + if (!so->somLocation) { + assert(!so->somValid); + assert(!so->somWritable); + return currOffset; + } + + STREAM_QUAL char *stream_body + = ((STREAM_QUAL char *)stream) + sizeof(struct hs_stream); + + assert(so->somValid); + assert(so->somWritable); + + COPY_MULTIBIT(stream_body + so->somWritable, so->somMultibit_size); + COPY_MULTIBIT(stream_body + so->somValid, so->somMultibit_size); + + /* Copy only the som slots which contain valid values. */ + /* Note: in the expand case the som valid array has been copied in. */ + const u8 *svalid = (const u8 *)(stream_body + so->somValid); + u32 s_count = rose->somLocationCount; + u32 s_width = rose->somHorizon; + for (u32 slot = mmbit_iterate(svalid, s_count, MMB_INVALID); + slot != MMB_INVALID; slot = mmbit_iterate(svalid, s_count, slot)) { + COPY(stream_body + so->somLocation + slot * s_width, s_width); + } + + return currOffset; +} + +static +size_t JOIN(sc_, FN_SUFFIX)(const struct RoseEngine *rose, + STREAM_QUAL struct hs_stream *stream, + BUF_QUAL char *buf, UNUSED size_t buf_size) { + size_t currOffset = 0; + const struct RoseStateOffsets *so = &rose->stateOffsets; + + STREAM_QUAL char *stream_body + = ((STREAM_QUAL char *)stream) + sizeof(struct hs_stream); + + COPY_FIELD(stream->offset); + ASSIGN(stream->rose, rose); + + COPY(stream_body + ROSE_STATE_OFFSET_STATUS_FLAGS, 1); + COPY_MULTIBIT(stream_body + ROSE_STATE_OFFSET_ROLE_MMBIT, rose->stateSize); + + /* stream is valid in compress/size, and stream->offset has been set already + * on the expand side */ + u64a offset = stream->offset; + u32 history = MIN((u32)offset, rose->historyRequired); + + /* copy the active mmbits */ + COPY_MULTIBIT(stream_body + so->activeLeafArray, so->activeLeafArray_size); + COPY_MULTIBIT(stream_body + so->activeLeftArray, so->activeLeftArray_size); + + COPY(stream_body + so->longLitState, so->longLitState_size); + + /* Leftlag table will be handled later, for active leftfixes */ + + /* anchored table state is not required once we are deep in the stream */ + if (offset <= rose->anchoredDistance) { + COPY(stream_body + so->anchorState, rose->anchorStateSize); + } + + COPY(stream_body + so->groups, so->groups_size); + + /* copy the real bits of history */ + UNUSED u32 hend = so->history + rose->historyRequired; + COPY(stream_body + hend - history, history); + + /* copy the exhaustion multibit */ + COPY_MULTIBIT(stream_body + so->exhausted, so->exhausted_size); + + /* copy nfa stream state for endfixes */ + /* Note: in the expand case the active array has already been copied into + * the stream. */ + const u8 *aa = (const u8 *)(stream_body + so->activeLeafArray); + u32 aaCount = rose->activeArrayCount; + for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID; + qi = mmbit_iterate(aa, aaCount, qi)) { + DEBUG_PRINTF("saving stream state for qi=%u\n", qi); + const struct NfaInfo *nfa_info = getNfaInfoByQueue(rose, qi); + const struct NFA *nfa = getNfaByInfo(rose, nfa_info); + COPY(stream_body + nfa_info->stateOffset, nfa->streamStateSize); + } + + /* copy nfa stream state for leftfixes */ + currOffset = COPY_LEFTFIXES(rose, currOffset, stream, buf, buf_size); + if (!currOffset) { + return 0; + } + + currOffset = COPY_SOM_INFO(rose, currOffset, stream, buf, buf_size); + if (!currOffset) { + return 0; + } + + return currOffset; +} + +#undef ASSIGN +#undef COPY +#undef COPY_FIELD +#undef COPT_LEFTFIXES +#undef COPY_MULTIBIT +#undef COPY_SOM_INFO +#undef FN_SUFFIX +#undef BUF_QUAL +#undef STREAM_QUAL diff --git a/unit/hyperscan/arg_checks.cpp b/unit/hyperscan/arg_checks.cpp index 8e86cc64..0ff4ce5f 100644 --- a/unit/hyperscan/arg_checks.cpp +++ b/unit/hyperscan/arg_checks.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -2318,6 +2318,289 @@ TEST(HyperscanArgChecks, hs_populate_platform_null) { ASSERT_EQ(HS_INVALID, err); } +TEST(HyperscanArgChecks, CompressStreamNoStream) { + char buf[100]; + size_t used; + hs_error_t err = hs_compress_stream(nullptr, buf, sizeof(buf), &used); + ASSERT_EQ(HS_INVALID, err); +} + +TEST(HyperscanArgChecks, CompressStreamNoUsed) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream; + hs_error_t err = hs_open_stream(db, 0, &stream); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[100]; + err = hs_compress_stream(stream, buf, sizeof(buf), nullptr); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, CompressStreamNoBuf) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream; + hs_error_t err = hs_open_stream(db, 0, &stream); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[100]; + size_t used; + err = hs_compress_stream(stream, nullptr, sizeof(buf), &used); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, CompressStreamSmallBuff) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream; + hs_error_t err = hs_open_stream(db, 0, &stream); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[100]; + size_t used = 0; + err = hs_compress_stream(stream, buf, 1, &used); + ASSERT_EQ(HS_INSUFFICIENT_SPACE, err); + ASSERT_LT(0, used); + + err = hs_close_stream(stream, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, ExpandNoDb) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream1; + hs_error_t err = hs_open_stream(db, 0, &stream1); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[2000]; + size_t used = 0; + err = hs_compress_stream(stream1, buf, sizeof(buf), &used); + ASSERT_EQ(HS_SUCCESS, err); + + hs_stream_t *stream2; + err = hs_expand_stream(nullptr, &stream2, buf, used); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream1, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, ExpandNoTo) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream1; + hs_error_t err = hs_open_stream(db, 0, &stream1); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[2000]; + size_t used = 0; + err = hs_compress_stream(stream1, buf, sizeof(buf), &used); + ASSERT_EQ(HS_SUCCESS, err); + + hs_stream_t *stream2; + err = hs_expand_stream(db, nullptr, buf, used); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream1, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, ExpandNoBuf) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream1; + hs_error_t err = hs_open_stream(db, 0, &stream1); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[2000]; + size_t used = 0; + err = hs_compress_stream(stream1, buf, sizeof(buf), &used); + ASSERT_EQ(HS_SUCCESS, err); + + hs_stream_t *stream2; + err = hs_expand_stream(db, &stream2, nullptr, used); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream1, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, ExpandSmallBuf) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream1; + hs_error_t err = hs_open_stream(db, 0, &stream1); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[2000]; + size_t used = 0; + err = hs_compress_stream(stream1, buf, sizeof(buf), &used); + ASSERT_EQ(HS_SUCCESS, err); + + hs_stream_t *stream2; + err = hs_expand_stream(db, &stream2, buf, used / 2); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream1, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, ResetAndExpandNoStream) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream1; + hs_error_t err = hs_open_stream(db, 0, &stream1); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[2000]; + size_t used = 0; + err = hs_compress_stream(stream1, buf, sizeof(buf), &used); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_reset_and_expand_stream(nullptr, buf, used, nullptr, nullptr, + nullptr); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream1, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, ResetAndExpandNoBuf) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream1; + hs_error_t err = hs_open_stream(db, 0, &stream1); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[2000]; + size_t used = 0; + err = hs_compress_stream(stream1, buf, sizeof(buf), &used); + ASSERT_EQ(HS_SUCCESS, err); + + hs_stream_t *stream2; + err = hs_open_stream(db, 0, &stream2); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_reset_and_expand_stream(stream2, nullptr, used, nullptr, nullptr, + nullptr); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream1, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_close_stream(stream2, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + + +TEST(HyperscanArgChecks, ResetAndExpandSmallBuf) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream1; + hs_error_t err = hs_open_stream(db, 0, &stream1); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[2000]; + size_t used = 0; + err = hs_compress_stream(stream1, buf, sizeof(buf), &used); + ASSERT_EQ(HS_SUCCESS, err); + + hs_stream_t *stream2; + err = hs_open_stream(db, 0, &stream2); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_reset_and_expand_stream(stream2, buf, used / 2, nullptr, nullptr, + nullptr); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream1, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_close_stream(stream2, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, ResetAndExpandNoScratch) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream1; + hs_error_t err = hs_open_stream(db, 0, &stream1); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[2000]; + size_t used = 0; + err = hs_compress_stream(stream1, buf, sizeof(buf), &used); + ASSERT_EQ(HS_SUCCESS, err); + + hs_stream_t *stream2; + err = hs_open_stream(db, 0, &stream2); + ASSERT_EQ(HS_SUCCESS, err); + + int temp; + + err = hs_reset_and_expand_stream(stream2, buf, used, nullptr, singleHandler, + &temp); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream1, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_close_stream(stream2, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + class BadModeTest : public testing::TestWithParam {}; // hs_compile: Compile a pattern with bogus mode flags set.