openappsec/components/utils/pm/pm_adaptor.h
2022-10-26 19:33:19 +03:00

230 lines
12 KiB
C++

// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef _pm_adaptor_h_
#define _pm_adaptor_h_
#include <vector>
#include <iostream>
#include "general_adaptor.h"
#define KISS_PM_ALPHABET_SIZE 256
#define KISS_APPS_CPAPI
// used to copy any struct, array, string, or variable
#if 0
#define DATA_BUFF_COPY(_buf, _buf_size, _data, _data_size) bcopy((_data), (_buf), (_data_size)); \
(_buf) += (_data_size); \
(*(_buf_size)) -= (_data_size)
#endif
// Not using the original DATA_BUFF_COPY which uses bcopy. On 64bit libc2.5, it seems that bcopy reads
// past the source buffer, as long as it is alligned. That's OK, but valgrind complains.
#define DATA_BUFF_COPY(_buf, _buf_size, _data, _data_size) memcpy((_buf), (_data), (_data_size)); \
(_buf) += (_data_size); \
(*(_buf_size)) -= (_data_size)
#define INT_BUFF_COPY(_buf, _buf_size, _val) do { \
int temp_val = _val; \
DATA_BUFF_COPY(_buf, _buf_size, &temp_val, sizeof(int)); \
} while (0)
#define U_INT_BUFF_COPY(_buf, _buf_size, _val) do { \
u_int temp_val = _val; \
DATA_BUFF_COPY(_buf, _buf_size, &temp_val, sizeof(u_int)); \
} while (0)
#define U_SHORT_BUFF_COPY(_buf, _buf_size, _val) do { \
u_short temp_val = _val; \
DATA_BUFF_COPY(_buf, _buf_size, &temp_val, sizeof(u_short)); \
} while (0)
#define U_CHAR_BUFF_COPY(_buf, _buf_size, _val) do { \
u_char temp_val = _val; \
DATA_BUFF_COPY(_buf, _buf_size, &temp_val, sizeof(u_char)); \
} while (0)
#define DATA_BUFF_READ(_buf, _buf_size, _vbuf, _vbuf_iter, _to, _data_size) \
do { \
if ((*(_buf_size)) >= (_data_size)) { \
bcopy(_buf, _to, _data_size); \
_buf += _data_size; \
(*(_buf_size)) -= (_data_size); \
} \
else { \
(*(_buf_size)) = 0; \
} \
} while(0)
#define INT_BUFF_READ(_var, _buf, _buf_size, _vbuf, _vbuf_iter) \
DATA_BUFF_READ(_buf, _buf_size, _vbuf, _vbuf_iter, &_var, sizeof(int))
#define U_INT_BUFF_READ(_var, _buf, _buf_size, _vbuf, _vbuf_iter) \
DATA_BUFF_READ(_buf, _buf_size, _vbuf, _vbuf_iter, &_var, sizeof(u_int))
#define U_SHORT_BUFF_READ(_var, _buf, _buf_size, _vbuf, _vbuf_iter) \
DATA_BUFF_READ(_buf, _buf_size, _vbuf, _vbuf_iter, &_var, sizeof(u_short))
#define U_CHAR_BUFF_READ(_var, _buf, _buf_size, _vbuf, _vbuf_iter) \
DATA_BUFF_READ(_buf, _buf_size, _vbuf, _vbuf_iter, &_var, sizeof(u_char))
// Serialization magics, used to verify buffer structure
#define KISS_PM_SERIALIZED 0x53525A50 // SRZP
#define KISS_DFA_SERIALIZED 0x53525A44 // SRZD
#define KISS_WM_SERIALIZED 0x53525A48 // SRZH
#define KISS_THIN_NFA_SERIALIZED 0x53525A4E // SRZN
#define KISS_EX_REM_SERIALIZED 0x53525A58 // SRZX
#define KISS_STATS_SERIALIZED 0x53525A53 // SRZS
#define KISS_STATE_SERIALIZED 0x53525A54 // SRZT
#define KISS_PM_SERIALIZE_IGNORE_INT 0x53525A49 // SRZI
#define KISS_KW_SERIALIZED 0x53525A4B // SRZK
#define KISS_KW_MGR_SERIALIZED 0x53525A47 // SRZG
typedef enum kiss_pm_error_type_e {
KISS_PM_ERROR_SYNTAX = 0, // < yntax error is an error in the way the pattern is written.
KISS_PM_ERROR_INTERNAL, // < Internal error is an error caused by lack of resources or by design.
KISS_PM_ERROR_COMPLEX_PATTERN, // < Pattern is too complex to compile - too many states or too much memory
KISS_PM_ERROR_NO_ERROR
} kiss_pm_error_type;
class KissPMError {
public:
int pattern_id = -1; //< The user's pattern id
kiss_pm_error_type error_type = KISS_PM_ERROR_INTERNAL; //< The error type syntax or internal
const char *error_string = nullptr; //< string describing the problem
u_int index = 0; //< The place that caused the probelm. Best effort.
const u_char *pattern_buf = nullptr; //< The user's pattern buffer
};
std::ostream& operator<<(std::ostream& os, const KissPMError &k);
void kiss_pm_error_set_details(KissPMError *error, kiss_pm_error_type error_type, const char error_string[]);
// PATTERNS FLAGS
// When adding a new pattern flag,
// add a metadata string below and register it in kiss_pm_pattern_flags_data in kiss_pm.c
// range from 0x00010000 to 0x80000000
// EXTERNAL PATTERN FLAGS
// These flags can be added per pattern when adding it to pm_patterns using kiss_pm_pattern_add_[simple_]pattern_...
#define KISS_PM_COMP_WM_CONT_WORD 0x80000000 // a WM continuous word -
// when used on a word we search for it without delimiters.
// Large impact on performance so think twice before using
#define KISS_PM_COMP_ALLOW_SHORT_LSS 0x40000000 // Accept short lss (shorter than kiss_pm_min_lss_sise
#define KISS_PM_COMP_LITERAL_LSS 0x20000000 // The LSS should not be normalized -
// i.e. all chars read as literals
#define KISS_PM_COMP_CASELESS 0x10000000 // Indicates a caseless pattern
#define KISS_PM_COMP_UTF8 0x08000000 // the pattern is UTF8 encoded.
#define KISS_PM_COMP_BOUNDED_PATT 0x04000000 // find the pattern only between non word character
// (including buffer start end).
// Do not use this flag with `^` or `$`.
#define KISS_PM_COMP_DONT_USE_PCRE 0x02000000 // don't use pcre for second tier.
#define KISS_PM_COMP_VERIFY_PCRE_SYNTAX 0x01000000 // Verify that pattern that compiles with PCRE fits PM syntax
// INTERNAL PATTERN FLAGS
#define KISS_PM_COMP_FIRST_TIER_OF_PATT 0x00800000 // pattern is in it's first tier execution.
#define KISS_PM_COMP_BOUNDED_CIRCUMFLEX_ADDED 0x00400000 // This flag indicates that we have created a pattern
// for bounded word infra which is different
// from the orig patterns. In such cases we need to take
// it into considiration when we look for the match start.
#define KISS_PM_COMP_MORE_THAN_ONE_LSS 0x00200000 // The pattern is made up of one or more simple strings
#define KISS_PM_COMP_DONT_STRIP 0x00100000 // Parse the pattern without stirping ^/$ from the
// RE beggining/end respectively.
#define KISS_PM_LSS_AT_BUF_START 0x00080000 // LSS should be at the begining of the buffer.
#define KISS_PM_LSS_AT_BUF_END 0x00040000 // LSS should be at the end of the buffer.
#define KISS_PM_RE_AT_BUF_START 0x00020000 // RE should be at the begining of the buffer.
#define KISS_PM_COMP_HAVE_SECOND_TIER 0x00010000 // the pattern needs second tier.
#define KISS_PM_COMP_NO_HISTORY 0x00008000 // Execute this pattern only with the buffer
// (not with the history vbuf)
#define KISS_PM_COMP_REDUCE_SIZE 0x00004000 // Favor small memory consumption over good performance
// END OF PATTERNS FLAGS
// Internal flags set in the match data in kiss_dfa_insert_match_data:
#define KISS_PMGLOB_MATCH_DATA_FORCE_ADD 0x00000001 // Force add pomlob match data,
// even if the pattern has already been matched
#define KISS_PMGLOB_MATCH_OFFSET_IN_PRESENT_BUF 0x00000002 // The match offset refers to the present buffer
#define KISS_PMGLOB_REDUCE_BUFFER_LENGTH 0x00000004 // Reduce the length of tier2 buffer using
// LSS ofsets found in tier1
//How many different first tiers can a PM have? (can be smaller than the number of first tier types)
#define KISS_TIER1_MAX_NUM 2
// 8 First tier type
typedef enum kiss_tier1_type_t {
KISS_TIER1_WM, // Word Matcher
KISS_TIER1_SM, // DFA String matcher
KISS_TIER1_THIN_NFA = KISS_TIER1_SM, // Thin NFA - instead of DFA
KISS_TIER1_NUM_TYPES,
KISS_TIER1_INVALID = KISS_TIER1_NUM_TYPES
} kiss_tier1_type;
// which statistics the user want to see
enum kiss_pm_stats_type {
KISS_PM_STATIC_STATS = 0, // number of pattern, number of states, ....
KISS_PM_DYNAMIC_STATS, // number of executions, number of matches, avg buffer length,...
KISS_PM_BOTH_STATS // both statistics
};
#define K_ERROR 0x00000010
#define K_PM 0x00000400
#define K_THINNFA 0x00400000
#define KISS_PM_COMP_DIGITLESS 0x00001000 // Indicates a digitless first tier match
extern int kiss_debug_err_flag;
#define kiss_debug_err(topics, _string) if (kiss_debug_err_flag) printf _string
#define kiss_debug_wrn(topics, _string)if (kiss_debug_err_flag) printf _string
#define kiss_debug_notice(topics, _string) if (kiss_debug_err_flag) printf _string
#define kiss_debug_info(topics, _string) if (kiss_debug_err_flag) printf _string
#define kiss_debug(topics) if (kiss_debug_err_flag) printf
#define kiss_debug_info_perf(topics, _string)
#define kiss_dbg(topics) if (kiss_debug_err_flag)
#define kiss_vbuf void *
#define kiss_vbuf_iter void *
// Which character translations are needed?
enum kiss_pmglob_char_xlation_flags_e {
KISS_PMGLOB_CHAR_XLATION_NONE = 0x00,
KISS_PMGLOB_CHAR_XLATION_CASE = 0x01,
KISS_PMGLOB_CHAR_XLATION_DIGITS = 0x02,
};
enum kiss_pm_dump_format_e {
KISS_PM_DUMP_XML, // XML, for opening with JFlap
KISS_PM_DUMP_CSV, // CSV, for opening with Excel
KISS_PM_DUMP_WIKI // WIKI, for copy&paste into Wiki (Confluence)
};
void kiss_pmglob_char_xlation_build(enum kiss_pmglob_char_xlation_flags_e flags, u_char tab[KISS_PM_ALPHABET_SIZE]);
void kiss_pmglob_char_xlation_build_reverse(
const u_char tab[KISS_PM_ALPHABET_SIZE],
u_char rev[KISS_PM_ALPHABET_SIZE]
);
void kiss_debug_start();
void kiss_debug_stop();
#endif // _pm_adaptor_h_