vectorscan/src/nfa/repeat.h
Alex Coyte 05beadf52f Introduce REPEAT_ALWAYS model for {0,} castle repeats
As Castle guards the repeats, no more state is needed for these repeats
2016-03-01 11:10:20 +11:00

371 lines
13 KiB
C

/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief API for handling bounded repeats.
*
* This file provides an internal API for handling bounded repeats of character
* classes. It is used by the Large Bounded Repeat (LBR) engine and by the
* bounded repeat handling in the LimEx NFA engine as well.
*
* The state required by these functions is split into two regions:
*
* 1. Control block. This is a small structure (size varies with repeat mode)
* that may be copied around or compressed into stream state.
* 2. Repeat state. This is a larger structure that can be quite big for large
* repeats, often containing a multibit ring or large vector of indices.
* This generally lives in stream state and is not copied.
*/
#ifndef REPEAT_H
#define REPEAT_H
#include "ue2common.h"
#include "repeat_internal.h"
#include "util/bitutils.h"
#ifdef __cplusplus
extern "C"
{
#endif
/** Returns the offset of the most recent 'top' offset set in the repeat. */
static really_inline
u64a repeatLastTop(const struct RepeatInfo *info,
const union RepeatControl *ctrl, const void *state);
/** Returns the offset of the next match after 'offset', or zero if no further
* matches are possible. */
static really_inline
u64a repeatNextMatch(const struct RepeatInfo *info,
const union RepeatControl *ctrl, const void *state,
u64a offset);
/** Stores a new top in the repeat. If is_alive is false, the repeat will be
* initialised first and this top will become the first (and only) one. */
static really_inline
void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl,
void *state, u64a offset, char is_alive);
/** Return type for repeatHasMatch. */
enum RepeatMatch {
REPEAT_NOMATCH, /**< This offset is not a valid match. */
REPEAT_MATCH, /**< This offset is a valid match. */
REPEAT_STALE /**< This offset is not a valid match and no greater
offset will be (unless another top is stored). */
};
/** Query whether the repeat has a match at the given offset. Returns
* ::REPEAT_STALE if it does not have a match at that offset _and_
* no further matches are possible. */
static really_inline
enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info,
const union RepeatControl *ctrl,
const void *state, u64a offset);
/** \brief Serialize a packed version of the repeat control block into stream
* state. */
void repeatPack(char *dest, const struct RepeatInfo *info,
const union RepeatControl *ctrl, u64a offset);
/** \brief Deserialize a packed version of the repeat control block. */
void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset,
union RepeatControl *ctrl);
////
//// IMPLEMENTATION.
////
u64a repeatLastTopRing(const struct RepeatInfo *info,
const union RepeatControl *ctrl);
u64a repeatLastTopRange(const union RepeatControl *ctrl,
const void *state);
u64a repeatLastTopBitmap(const union RepeatControl *ctrl);
u64a repeatLastTopTrailer(const struct RepeatInfo *info,
const union RepeatControl *ctrl);
u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info,
const union RepeatControl *ctrl,
const void *state);
static really_inline
u64a repeatLastTop(const struct RepeatInfo *info,
const union RepeatControl *ctrl, const void *state) {
assert(info && ctrl && state);
switch ((enum RepeatType)info->type) {
case REPEAT_RING:
return repeatLastTopRing(info, ctrl);
case REPEAT_FIRST:
case REPEAT_LAST:
return ctrl->offset.offset;
case REPEAT_RANGE:
return repeatLastTopRange(ctrl, state);
case REPEAT_BITMAP:
return repeatLastTopBitmap(ctrl);
case REPEAT_SPARSE_OPTIMAL_P:
return repeatLastTopSparseOptimalP(info, ctrl, state);
case REPEAT_TRAILER:
return repeatLastTopTrailer(info, ctrl);
case REPEAT_ALWAYS:
return 0;
}
DEBUG_PRINTF("bad repeat type %u\n", info->type);
assert(0);
return 0;
}
// Used for both FIRST and LAST models.
static really_inline
u64a repeatNextMatchOffset(const struct RepeatInfo *info,
const union RepeatControl *ctrl, u64a offset) {
u64a first = ctrl->offset.offset + info->repeatMin;
if (offset < first) {
return first;
}
if (info->repeatMax == REPEAT_INF ||
offset < ctrl->offset.offset + info->repeatMax) {
return offset + 1;
}
return 0; // No more matches.
}
u64a repeatNextMatchRing(const struct RepeatInfo *info,
const union RepeatControl *ctrl,
const void *state, u64a offset);
u64a repeatNextMatchRange(const struct RepeatInfo *info,
const union RepeatControl *ctrl,
const void *state, u64a offset);
u64a repeatNextMatchBitmap(const struct RepeatInfo *info,
const union RepeatControl *ctrl, u64a offset);
u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info,
const union RepeatControl *ctrl,
const void *state, u64a offset);
u64a repeatNextMatchTrailer(const struct RepeatInfo *info,
const union RepeatControl *ctrl, u64a offset);
static really_inline
u64a repeatNextMatch(const struct RepeatInfo *info,
const union RepeatControl *ctrl, const void *state,
u64a offset) {
assert(info && ctrl && state);
assert(ISALIGNED(info));
assert(ISALIGNED(ctrl));
switch ((enum RepeatType)info->type) {
case REPEAT_RING:
return repeatNextMatchRing(info, ctrl, state, offset);
case REPEAT_FIRST:
// fall through
case REPEAT_LAST:
return repeatNextMatchOffset(info, ctrl, offset);
case REPEAT_RANGE:
return repeatNextMatchRange(info, ctrl, state, offset);
case REPEAT_BITMAP:
return repeatNextMatchBitmap(info, ctrl, offset);
case REPEAT_SPARSE_OPTIMAL_P:
return repeatNextMatchSparseOptimalP(info, ctrl, state, offset);
case REPEAT_TRAILER:
return repeatNextMatchTrailer(info, ctrl, offset);
case REPEAT_ALWAYS:
return offset + 1;
}
DEBUG_PRINTF("bad repeat type %u\n", info->type);
assert(0);
return 0;
}
static really_inline
void repeatStoreFirst(union RepeatControl *ctrl, u64a offset,
char is_alive) {
if (is_alive) {
return;
}
ctrl->offset.offset = offset;
}
static really_inline
void repeatStoreLast(union RepeatControl *ctrl, u64a offset,
UNUSED char is_alive) {
assert(!is_alive || offset >= ctrl->offset.offset);
ctrl->offset.offset = offset;
}
void repeatStoreRing(const struct RepeatInfo *info,
union RepeatControl *ctrl, void *state, u64a offset,
char is_alive);
void repeatStoreRange(const struct RepeatInfo *info,
union RepeatControl *ctrl, void *state, u64a offset,
char is_alive);
void repeatStoreBitmap(const struct RepeatInfo *info,
union RepeatControl *ctrl, u64a offset,
char is_alive);
void repeatStoreSparseOptimalP(const struct RepeatInfo *info,
union RepeatControl *ctrl, void *state,
u64a offset, char is_alive);
void repeatStoreTrailer(const struct RepeatInfo *info,
union RepeatControl *ctrl, u64a offset,
char is_alive);
static really_inline
void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl,
void *state, u64a offset, char is_alive) {
assert(info && ctrl && state);
assert(ISALIGNED(info));
assert(ISALIGNED(ctrl));
assert(info->repeatMin <= info->repeatMax);
assert(info->repeatMax <= REPEAT_INF);
switch ((enum RepeatType)info->type) {
case REPEAT_RING:
repeatStoreRing(info, ctrl, state, offset, is_alive);
break;
case REPEAT_FIRST:
repeatStoreFirst(ctrl, offset, is_alive);
break;
case REPEAT_LAST:
repeatStoreLast(ctrl, offset, is_alive);
break;
case REPEAT_RANGE:
repeatStoreRange(info, ctrl, state, offset, is_alive);
break;
case REPEAT_BITMAP:
repeatStoreBitmap(info, ctrl, offset, is_alive);
break;
case REPEAT_SPARSE_OPTIMAL_P:
repeatStoreSparseOptimalP(info, ctrl, state, offset, is_alive);
break;
case REPEAT_TRAILER:
repeatStoreTrailer(info, ctrl, offset, is_alive);
break;
case REPEAT_ALWAYS:
/* nothing to do - no state */
break;
}
}
static really_inline
enum RepeatMatch repeatHasMatchFirst(const struct RepeatInfo *info,
const union RepeatControl *ctrl,
u64a offset) {
if (offset < ctrl->offset.offset + info->repeatMin) {
return REPEAT_NOMATCH;
}
// FIRST models are {N,} repeats, i.e. they always have inf max depth.
assert(info->repeatMax == REPEAT_INF);
return REPEAT_MATCH;
}
static really_inline
enum RepeatMatch repeatHasMatchLast(const struct RepeatInfo *info,
const union RepeatControl *ctrl,
u64a offset) {
if (offset < ctrl->offset.offset + info->repeatMin) {
return REPEAT_NOMATCH;
}
assert(info->repeatMax < REPEAT_INF);
if (offset <= ctrl->offset.offset + info->repeatMax) {
return REPEAT_MATCH;
}
return REPEAT_STALE;
}
enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info,
const union RepeatControl *ctrl,
const void *state, u64a offset);
enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info,
const union RepeatControl *ctrl,
const void *state, u64a offset);
enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info,
const union RepeatControl *ctrl,
const void *state, u64a offset);
enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info,
const union RepeatControl *ctrl,
u64a offset);
enum RepeatMatch repeatHasMatchTrailer(const struct RepeatInfo *info,
const union RepeatControl *ctrl,
u64a offset);
static really_inline
enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info,
const union RepeatControl *ctrl,
const void *state, u64a offset) {
assert(info && ctrl && state);
assert(ISALIGNED(info));
assert(ISALIGNED(ctrl));
switch ((enum RepeatType)info->type) {
case REPEAT_RING:
return repeatHasMatchRing(info, ctrl, state, offset);
case REPEAT_FIRST:
return repeatHasMatchFirst(info, ctrl, offset);
case REPEAT_LAST:
return repeatHasMatchLast(info, ctrl, offset);
case REPEAT_RANGE:
return repeatHasMatchRange(info, ctrl, state, offset);
case REPEAT_BITMAP:
return repeatHasMatchBitmap(info, ctrl, offset);
case REPEAT_SPARSE_OPTIMAL_P:
return repeatHasMatchSparseOptimalP(info, ctrl, state, offset);
case REPEAT_TRAILER:
return repeatHasMatchTrailer(info, ctrl, offset);
case REPEAT_ALWAYS:
return REPEAT_MATCH;
}
assert(0);
return REPEAT_NOMATCH;
}
#ifdef __cplusplus
}
#endif
#endif // REPEAT_H