mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-30 11:44:28 +03:00
chimera: hybrid of Hyperscan and PCRE
This commit is contained in:
158
chimera/ch_database.h
Normal file
158
chimera/ch_database.h
Normal file
@@ -0,0 +1,158 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Runtime code for ch_database manipulation.
|
||||
*/
|
||||
|
||||
#ifndef CH_DATABASE_H_
|
||||
#define CH_DATABASE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#define PCRE_STATIC
|
||||
#include <pcre.h>
|
||||
|
||||
#include "ch_compile.h" // for CH_MODE_ flags
|
||||
#include "ue2common.h"
|
||||
#include "hs_version.h"
|
||||
#include "hs.h"
|
||||
|
||||
#define CH_DB_MAGIC 0xdedededeU //!< Magic number stored in \ref ch_database
|
||||
|
||||
/** \brief Main Chimera database header. */
|
||||
struct ch_database {
|
||||
u32 magic; //!< must be \ref CH_DB_MAGIC
|
||||
u32 version; //!< release version
|
||||
u32 length; //!< total allocated length in bytes
|
||||
u32 reserved0; //!< unused
|
||||
u32 reserved1; //!< unused
|
||||
u32 bytecode; //!< offset relative to db start
|
||||
u32 padding[16]; //!< padding for alignment of rest of bytecode
|
||||
char bytes[];
|
||||
};
|
||||
|
||||
/** \brief Chimera bytecode header, which follows the \ref ch_database and is
|
||||
* always 64-byte aligned. */
|
||||
struct ch_bytecode {
|
||||
u32 length; //!< length of bytecode including this header struct
|
||||
u32 flags; //!< whole-database flags (CHIMERA_FLAG_NO_MULTIMATCH,
|
||||
// CHIMERA_FLAG_GROUPS)
|
||||
u32 patternCount; //!< total number of patterns
|
||||
u32 activeSize; //!< size of mmbit to store active pattern ids
|
||||
u32 databaseOffset; //!< offset for database following \ref ch_bytecode
|
||||
// header
|
||||
u32 patternOffset; //!< points to an array of u32 offsets, each pointing to
|
||||
// a \ref ch_pattern
|
||||
u32 unguardedOffset; //!< pointer to a list of unguarded pattern indices
|
||||
u32 unguardedCount; //!< number of unguarded patterns
|
||||
u32 maxCaptureGroups; //!< max number of capture groups used by any pattern
|
||||
};
|
||||
|
||||
/** \brief Per-pattern header.
|
||||
*
|
||||
* struct is followed in bytecode by:
|
||||
* 1. pcre bytecode (always present)
|
||||
* 2. pcre study data (sometimes)
|
||||
*/
|
||||
struct ch_pattern {
|
||||
u32 id; //!< pattern ID to report to the user
|
||||
u32 flags; //!< per-pattern flags (e.g. \ref CHIMERA_PATTERN_FLAG_UTF8)
|
||||
u32 maxWidth; //!< maximum width of a match, or UINT_MAX for inf.
|
||||
u32 minWidth; //!< minimum width of a match.
|
||||
u32 fixedWidth;//!< pattern has fixed width.
|
||||
u32 studyOffset; //!< offset relative to struct start of study data,
|
||||
// or zero if there is none
|
||||
u32 length; //!< length of struct plus pcre bytecode and study data
|
||||
pcre_extra extra; //!< pcre_extra struct, used to store study data ptr for
|
||||
// the currently-running pcre at runtime.
|
||||
};
|
||||
|
||||
static really_inline
|
||||
const void *ch_get_bytecode(const struct ch_database *db) {
|
||||
assert(db);
|
||||
const void *bytecode = (const char *)db + db->bytecode;
|
||||
assert(ISALIGNED_16(bytecode));
|
||||
return bytecode;
|
||||
}
|
||||
|
||||
struct hs_database;
|
||||
|
||||
static really_inline
|
||||
const struct hs_database *getHyperscanDatabase(const struct ch_bytecode *db) {
|
||||
assert(db);
|
||||
const char *ptr = (const char *)db;
|
||||
const struct hs_database *hs_db;
|
||||
hs_db = (const struct hs_database *)(ptr + db->databaseOffset);
|
||||
assert(ISALIGNED_CL(hs_db));
|
||||
return hs_db;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u32 *getUnguarded(const struct ch_bytecode *db) {
|
||||
assert(db);
|
||||
const char *ptr = (const char *)db;
|
||||
const u32 *unguarded = (const u32 *)(ptr + db->unguardedOffset);
|
||||
assert(ISALIGNED_N(unguarded, sizeof(u32)));
|
||||
return unguarded;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct ch_pattern *getPattern(const struct ch_bytecode *db, u32 i) {
|
||||
assert(db);
|
||||
assert(i < db->patternCount);
|
||||
const char *ptr = (const char *)db;
|
||||
const u32 *patternOffset = (const u32 *)(ptr + db->patternOffset);
|
||||
assert(patternOffset[i] < db->length);
|
||||
return (const struct ch_pattern *)(ptr + patternOffset[i]);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
ch_error_t hydbIsValid(const struct ch_database *hydb) {
|
||||
if (!hydb || hydb->magic != CH_DB_MAGIC) {
|
||||
DEBUG_PRINTF("bad magic (%u != %u)\n", hydb->magic, CH_DB_MAGIC);
|
||||
return CH_INVALID;
|
||||
}
|
||||
|
||||
if (hydb->version != HS_VERSION_32BIT) {
|
||||
DEBUG_PRINTF("bad version\n");
|
||||
return CH_DB_VERSION_ERROR;
|
||||
}
|
||||
|
||||
return CH_SUCCESS;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* CH_DATABASE_H_ */
|
||||
|
Reference in New Issue
Block a user