vectorscan/chimera/ch_database.h
2018-07-09 11:30:35 -04:00

159 lines
5.6 KiB
C

/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Runtime code for ch_database manipulation.
*/
#ifndef CH_DATABASE_H_
#define CH_DATABASE_H_
#ifdef __cplusplus
extern "C"
{
#endif
#define PCRE_STATIC
#include <pcre.h>
#include "ch_compile.h" // for CH_MODE_ flags
#include "ue2common.h"
#include "hs_version.h"
#include "hs.h"
#define CH_DB_MAGIC 0xdedededeU //!< Magic number stored in \ref ch_database
/** \brief Main Chimera database header. */
struct ch_database {
u32 magic; //!< must be \ref CH_DB_MAGIC
u32 version; //!< release version
u32 length; //!< total allocated length in bytes
u32 reserved0; //!< unused
u32 reserved1; //!< unused
u32 bytecode; //!< offset relative to db start
u32 padding[16]; //!< padding for alignment of rest of bytecode
char bytes[];
};
/** \brief Chimera bytecode header, which follows the \ref ch_database and is
* always 64-byte aligned. */
struct ch_bytecode {
u32 length; //!< length of bytecode including this header struct
u32 flags; //!< whole-database flags (CHIMERA_FLAG_NO_MULTIMATCH,
// CHIMERA_FLAG_GROUPS)
u32 patternCount; //!< total number of patterns
u32 activeSize; //!< size of mmbit to store active pattern ids
u32 databaseOffset; //!< offset for database following \ref ch_bytecode
// header
u32 patternOffset; //!< points to an array of u32 offsets, each pointing to
// a \ref ch_pattern
u32 unguardedOffset; //!< pointer to a list of unguarded pattern indices
u32 unguardedCount; //!< number of unguarded patterns
u32 maxCaptureGroups; //!< max number of capture groups used by any pattern
};
/** \brief Per-pattern header.
*
* struct is followed in bytecode by:
* 1. pcre bytecode (always present)
* 2. pcre study data (sometimes)
*/
struct ch_pattern {
u32 id; //!< pattern ID to report to the user
u32 flags; //!< per-pattern flags (e.g. \ref CHIMERA_PATTERN_FLAG_UTF8)
u32 maxWidth; //!< maximum width of a match, or UINT_MAX for inf.
u32 minWidth; //!< minimum width of a match.
u32 fixedWidth;//!< pattern has fixed width.
u32 studyOffset; //!< offset relative to struct start of study data,
// or zero if there is none
u32 length; //!< length of struct plus pcre bytecode and study data
pcre_extra extra; //!< pcre_extra struct, used to store study data ptr for
// the currently-running pcre at runtime.
};
static really_inline
const void *ch_get_bytecode(const struct ch_database *db) {
assert(db);
const void *bytecode = (const char *)db + db->bytecode;
assert(ISALIGNED_16(bytecode));
return bytecode;
}
struct hs_database;
static really_inline
const struct hs_database *getHyperscanDatabase(const struct ch_bytecode *db) {
assert(db);
const char *ptr = (const char *)db;
const struct hs_database *hs_db;
hs_db = (const struct hs_database *)(ptr + db->databaseOffset);
assert(ISALIGNED_CL(hs_db));
return hs_db;
}
static really_inline
const u32 *getUnguarded(const struct ch_bytecode *db) {
assert(db);
const char *ptr = (const char *)db;
const u32 *unguarded = (const u32 *)(ptr + db->unguardedOffset);
assert(ISALIGNED_N(unguarded, sizeof(u32)));
return unguarded;
}
static really_inline
const struct ch_pattern *getPattern(const struct ch_bytecode *db, u32 i) {
assert(db);
assert(i < db->patternCount);
const char *ptr = (const char *)db;
const u32 *patternOffset = (const u32 *)(ptr + db->patternOffset);
assert(patternOffset[i] < db->length);
return (const struct ch_pattern *)(ptr + patternOffset[i]);
}
static really_inline
ch_error_t hydbIsValid(const struct ch_database *hydb) {
if (!hydb || hydb->magic != CH_DB_MAGIC) {
DEBUG_PRINTF("bad magic (%u != %u)\n", hydb->magic, CH_DB_MAGIC);
return CH_INVALID;
}
if (hydb->version != HS_VERSION_32BIT) {
DEBUG_PRINTF("bad version\n");
return CH_DB_VERSION_ERROR;
}
return CH_SUCCESS;
}
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* CH_DATABASE_H_ */