vectorscan/chimera/ch_database.h

/*
 * Copyright (c) 2018, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *  * Neither the name of Intel Corporation nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/** \file
 * \brief Runtime code for ch_database manipulation.
 */

#ifndef CH_DATABASE_H_
#define CH_DATABASE_H_

#ifdef __cplusplus
extern "C"
{
#endif

#define PCRE_STATIC
#include <pcre.h>

#include "ch_compile.h" // for CH_MODE_ flags
#include "ue2common.h"
#include "hs_version.h"
#include "hs.h"

#define CH_DB_MAGIC 0xdedededeU //!< Magic number stored in \ref ch_database

/** \brief Main Chimera database header. */
struct ch_database {
    u32 magic;     //!< must be \ref CH_DB_MAGIC
    u32 version;   //!< release version
    u32 length;    //!< total allocated length in bytes
    u32 reserved0; //!< unused
    u32 reserved1; //!< unused
    u32 bytecode;  //!< offset relative to db start
    u32 padding[16];  //!< padding for alignment of rest of bytecode
    char bytes[];
};

/** \brief Chimera bytecode header, which follows the \ref ch_database and is
 * always 64-byte aligned. */
struct ch_bytecode {
    u32 length; //!< length of bytecode including this header struct
    u32 flags;          //!< whole-database flags (CHIMERA_FLAG_NO_MULTIMATCH,
                        // CHIMERA_FLAG_GROUPS)
    u32 patternCount;   //!< total number of patterns
    u32 activeSize;     //!< size of mmbit to store active pattern ids
    u32 databaseOffset; //!< offset for database following \ref ch_bytecode
                        // header
    u32 patternOffset;  //!< points to an array of u32 offsets, each pointing to
                        // a \ref ch_pattern
    u32 unguardedOffset;  //!< pointer to a list of unguarded pattern indices
    u32 unguardedCount;   //!< number of unguarded patterns
    u32 maxCaptureGroups; //!< max number of capture groups used by any pattern
};

/** \brief Per-pattern header.
 *
 * struct is followed in bytecode by:
 * 1. pcre bytecode (always present)
 * 2. pcre study data (sometimes)
 */
struct ch_pattern {
    u32 id;        //!< pattern ID to report to the user
    u32 flags;     //!< per-pattern flags (e.g. \ref CHIMERA_PATTERN_FLAG_UTF8)
    u32 maxWidth;  //!< maximum width of a match, or UINT_MAX for inf.
    u32 minWidth;  //!< minimum width of a match.
    u32 fixedWidth;//!< pattern has fixed width.
    u32 studyOffset;     //!< offset relative to struct start of study data,
                         // or zero if there is none
    u32 length; //!< length of struct plus pcre bytecode and study data
    pcre_extra extra; //!< pcre_extra struct, used to store study data ptr for
                      // the currently-running pcre at runtime.
};

static really_inline
const void *ch_get_bytecode(const struct ch_database *db) {
    assert(db);
    const void *bytecode = (const char *)db + db->bytecode;
    assert(ISALIGNED_16(bytecode));
    return bytecode;
}

struct hs_database;

static really_inline
const struct hs_database *getHyperscanDatabase(const struct ch_bytecode *db) {
    assert(db);
    const char *ptr = (const char *)db;
    const struct hs_database *hs_db;
    hs_db = (const struct hs_database *)(ptr + db->databaseOffset);
    assert(ISALIGNED_CL(hs_db));
    return hs_db;
}

static really_inline
const u32 *getUnguarded(const struct ch_bytecode *db) {
    assert(db);
    const char *ptr = (const char *)db;
    const u32 *unguarded = (const u32 *)(ptr + db->unguardedOffset);
    assert(ISALIGNED_N(unguarded, sizeof(u32)));
    return unguarded;
}

static really_inline
const struct ch_pattern *getPattern(const struct ch_bytecode *db, u32 i) {
    assert(db);
    assert(i < db->patternCount);
    const char *ptr = (const char *)db;
    const u32 *patternOffset = (const u32 *)(ptr + db->patternOffset);
    assert(patternOffset[i] < db->length);
    return (const struct ch_pattern *)(ptr + patternOffset[i]);
}

static really_inline
ch_error_t hydbIsValid(const struct ch_database *hydb) {
    if (!hydb || hydb->magic != CH_DB_MAGIC) {
        DEBUG_PRINTF("bad magic (%u != %u)\n", hydb->magic, CH_DB_MAGIC);
        return CH_INVALID;
    }

    if (hydb->version != HS_VERSION_32BIT) {
        DEBUG_PRINTF("bad version\n");
        return CH_DB_VERSION_ERROR;
    }

    return CH_SUCCESS;
}

#ifdef __cplusplus
} /* extern "C" */
#endif

#endif /* CH_DATABASE_H_ */