mirror of
https://github.com/openappsec/openappsec.git
synced 2025-09-29 19:24:26 +03:00
First release of open-appsec source code
This commit is contained in:
3
components/utils/pm/CMakeLists.txt
Normal file
3
components/utils/pm/CMakeLists.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
add_library(pm general_adaptor.cc kiss_hash.cc kiss_patterns.cc kiss_pm_stats.cc kiss_thin_nfa.cc kiss_thin_nfa_analyze.cc kiss_thin_nfa_build.cc kiss_thin_nfa_compile.cc pm_adaptor.cc pm_hook.cc debugpm.cc)
|
||||
|
||||
add_subdirectory(pm_ut)
|
63
components/utils/pm/debugpm.cc
Executable file
63
components/utils/pm/debugpm.cc
Executable file
@@ -0,0 +1,63 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "debug.h"
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include "sasal.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
USE_DEBUG_FLAG(D_PM);
|
||||
|
||||
SASAL_START // Multiple Pattern Matcher
|
||||
// Helper class for printing C format string
|
||||
class CFmtPrinter
|
||||
{
|
||||
public:
|
||||
char buf[500]; // Length limit.
|
||||
explicit CFmtPrinter(const char *fmt, va_list va)
|
||||
{
|
||||
vsnprintf(buf, sizeof(buf), fmt, va);
|
||||
buf[sizeof(buf)-1] = '\0';
|
||||
}
|
||||
};
|
||||
|
||||
static ostream &
|
||||
operator<<(ostream &os, const CFmtPrinter &p)
|
||||
{
|
||||
return os << p.buf;
|
||||
}
|
||||
|
||||
void
|
||||
panicCFmt(const string &func, uint line, const char *fmt, ...)
|
||||
{
|
||||
va_list va;
|
||||
va_start(va, fmt);
|
||||
Debug("PM", func, line).getStreamAggr() << CFmtPrinter(fmt, va);
|
||||
va_end(va);
|
||||
}
|
||||
|
||||
void
|
||||
debugPrtCFmt(const char *func, uint line, const char *fmt, ...)
|
||||
{
|
||||
va_list va;
|
||||
va_start(va, fmt);
|
||||
Debug("PM", func, line, Debug::DebugLevel::TRACE, D_PM).getStreamAggr() << CFmtPrinter(fmt, va);
|
||||
va_end(va);
|
||||
}
|
||||
SASAL_END
|
39
components/utils/pm/debugpm.h
Executable file
39
components/utils/pm/debugpm.h
Executable file
@@ -0,0 +1,39 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __DEBUGPM_H__
|
||||
#define __DEBUGPM_H__
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "debug.h"
|
||||
|
||||
// Assertions
|
||||
|
||||
// C-style BC functions (e.g. for PM).
|
||||
void debugPrtCFmt(const std::string &func, uint line, const char *fmt, ...) __attribute__((format (printf, 3, 4)));
|
||||
#define debugCFmt(flag, fmt, ...) \
|
||||
if (!Debug::isDebugSet(flag)) \
|
||||
{ \
|
||||
} else \
|
||||
debugPrtCFmt(__FUNCTION__, __LINE__, fmt, ##__VA_ARGS__)
|
||||
|
||||
void panicCFmt(const std::string &func, uint line, const char *fmt, ...) __attribute__((format (printf, 3, 4)));
|
||||
#define assertCondCFmt(cond, fmt, ...) \
|
||||
if (CP_LIKELY(cond)) \
|
||||
{ \
|
||||
} else \
|
||||
panicCFmt(__FUNCTION__, __LINE__, fmt, ##__VA_ARGS__)
|
||||
|
||||
#endif // __DEBUGPM_H__
|
65
components/utils/pm/general_adaptor.cc
Normal file
65
components/utils/pm/general_adaptor.cc
Normal file
@@ -0,0 +1,65 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "general_adaptor.h"
|
||||
#include <stdlib.h>
|
||||
#include "sasal.h"
|
||||
|
||||
SASAL_START // Multiple Pattern Matcher
|
||||
void fw_kfree(void *addr, CP_MAYBE_UNUSED size_t size, CP_MAYBE_UNUSED const char *caller)
|
||||
{
|
||||
free(addr);
|
||||
return;
|
||||
}
|
||||
|
||||
void *fw_kmalloc(size_t size, CP_MAYBE_UNUSED const char *caller)
|
||||
{
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
void *fw_kmalloc_ex(size_t size, CP_MAYBE_UNUSED const char *caller, CP_MAYBE_UNUSED int flags)
|
||||
{
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
void *fw_kmalloc_sleep(size_t size, CP_MAYBE_UNUSED const char *caller)
|
||||
{
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
void *kiss_pmglob_memory_kmalloc_ex_(
|
||||
u_int size,
|
||||
CP_MAYBE_UNUSED const char *caller,
|
||||
CP_MAYBE_UNUSED int flags,
|
||||
CP_MAYBE_UNUSED const char *file,
|
||||
CP_MAYBE_UNUSED int line)
|
||||
{
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
void *kiss_pmglob_memory_kmalloc_ex(u_int size, CP_MAYBE_UNUSED const char *caller, CP_MAYBE_UNUSED int flags)
|
||||
{
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
void *kiss_pmglob_memory_kmalloc(u_int size, CP_MAYBE_UNUSED const char *caller)
|
||||
{
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
void kiss_pmglob_memory_kfree(void *addr, CP_MAYBE_UNUSED size_t size, CP_MAYBE_UNUSED const char *caller)
|
||||
{
|
||||
free(addr);
|
||||
return;
|
||||
}
|
||||
SASAL_END
|
80
components/utils/pm/general_adaptor.h
Normal file
80
components/utils/pm/general_adaptor.h
Normal file
@@ -0,0 +1,80 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __general_adaptor_h__
|
||||
#define __general_adaptor_h__
|
||||
|
||||
#include "stdint.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stddef.h>
|
||||
#include "common.h"
|
||||
#include "debug.h"
|
||||
#include "debugpm.h"
|
||||
|
||||
typedef unsigned int u_int;
|
||||
typedef unsigned char u_char;
|
||||
typedef unsigned short u_short;
|
||||
typedef bool boolean_cpt;
|
||||
typedef bool BOOL;
|
||||
typedef uint64_t u_int64;
|
||||
|
||||
#define TRUE true
|
||||
#define FALSE false
|
||||
|
||||
#define CP_INLINE inline
|
||||
#define CP_CACHELINE_SIZE 64
|
||||
#define CP_CACHELINE_ALIGNED __attribute__((__aligned__(CP_CACHELINE_SIZE)))
|
||||
#define CP_MAYBE_UNUSED CP_UNUSED
|
||||
|
||||
#define KISS_OFFSETOF(str_name, field_name) offsetof(str_name, field_name)
|
||||
|
||||
#define KISS_ASSERT_COMPILE_TIME(cond) extern int __kiss_assert_dummy[(cond)?1:-1]
|
||||
|
||||
#define KISS_ASSERT_PERF(...)
|
||||
#define ASSERT_LOCKED
|
||||
#define kiss_multik_this_instance_num (0)
|
||||
|
||||
typedef enum {
|
||||
KISS_ERROR = -1,
|
||||
KISS_OK = 0
|
||||
} kiss_ret_val;
|
||||
|
||||
#define KISS_ASSERT assertCondCFmt
|
||||
#define KISS_ASSERT_CRASH assertCondCFmt
|
||||
|
||||
#define FW_KMEM_SLEEP 0
|
||||
|
||||
#define herror(a, b, ...)
|
||||
|
||||
#define kdprintf printf
|
||||
#define kdprintf_no_prefix printf
|
||||
|
||||
|
||||
void fw_kfree(void *addr, size_t size, const char *caller);
|
||||
void *fw_kmalloc(size_t size, const char *caller);
|
||||
void *fw_kmalloc_ex(size_t size, const char *caller, int flags);
|
||||
void *fw_kmalloc_sleep(size_t size, const char *caller);
|
||||
void *kiss_pmglob_memory_kmalloc_ex_(u_int size, const char *caller, int flags, const char *file, int line);
|
||||
void *kiss_pmglob_memory_kmalloc_ex(u_int size, const char *caller, int flags);
|
||||
void *kiss_pmglob_memory_kmalloc(u_int size, const char *caller);
|
||||
void kiss_pmglob_memory_kfree(void *addr, size_t size, const char *caller);
|
||||
|
||||
#define ENUM_SET_FLAG(e, flag) e = static_cast<decltype(e)>(((u_int)e | (u_int)flag))
|
||||
#define ENUM_UNSET_FLAG(e, flag) e = static_cast<decltype(e)>(((u_int)e & (~(u_int)flag)))
|
||||
|
||||
#define MAX(x, y) (((x)>(y))?(x):(y))
|
||||
#define MIN(x, y) (((x)<(y))?(x):(y))
|
||||
|
||||
|
||||
#endif // __general_adaptor_h__
|
1783
components/utils/pm/kiss_hash.cc
Normal file
1783
components/utils/pm/kiss_hash.cc
Normal file
File diff suppressed because it is too large
Load Diff
586
components/utils/pm/kiss_hash.h
Normal file
586
components/utils/pm/kiss_hash.h
Normal file
@@ -0,0 +1,586 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __KISS_HASH_H__
|
||||
#define __KISS_HASH_H__
|
||||
|
||||
#include "general_adaptor.h"
|
||||
|
||||
typedef struct kiss_hash *kiss_hash_t;
|
||||
|
||||
struct kiss_hashent {
|
||||
void *key;
|
||||
void *val;
|
||||
struct kiss_hashent *next;
|
||||
};
|
||||
|
||||
typedef uintptr_t (*hkeyfunc_t)(const void *key, void *info);
|
||||
typedef int (*hcmpfunc_t)(const void *key1, const void *key2, void *info);
|
||||
typedef void (*freefunc_t)(void *info);
|
||||
|
||||
// {group: API for KISS_HASH}
|
||||
#define H_DESTR(destr, addr) \
|
||||
if (destr && (((uintptr_t)(addr)) > 0x10)) (*destr)(addr);
|
||||
|
||||
// {group: API for KISS_HASH}
|
||||
// Description: Create Hash Table. MT-Level: Reentrant
|
||||
// Parameters:
|
||||
// hsize - hash size
|
||||
// keyfunc - key hashing function
|
||||
// keycmp - key comparison function
|
||||
// info - opaque for use of keyfunc and keycmp functions.
|
||||
// Return values:
|
||||
// o hash pointer
|
||||
// o NULL upon failure
|
||||
// See also: kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr, kiss_hash_undo_destr,
|
||||
// kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey, kiss_hash_insert,
|
||||
// kiss_hash_delete, kiss_hash_destroy, kiss_hash_find_kiss_hashent, kiss_hash_insert_at, kiss_hash_strvalue,
|
||||
// kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue,
|
||||
// kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
|
||||
kiss_hash_t kiss_hash_create (size_t hsize, hkeyfunc_t keyfunc, hcmpfunc_t keycmp, void *info);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Create Hash Table with Destructor. MT-Level: Reentrant
|
||||
// Parameters:
|
||||
// hsize - hash size
|
||||
// keyfunc - key hashing function
|
||||
// keycmp - key comparison function
|
||||
// val_destr - destructor for the values of the hash
|
||||
// key_destr - destructor for the keys of the hash
|
||||
// info - opaque for use of keyfunc and keycmp functions.
|
||||
// Return values:
|
||||
// o hash pointer
|
||||
// o NULL upon failure
|
||||
// See also: kiss_hash_create, kiss_hash_set_destr, kiss_hash_dodestr, kiss_hash_undo_destr, kiss_hash_nelements,
|
||||
// iss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey, kiss_hash_insert, kiss_hash_delete, kiss_hash_destroy,
|
||||
// kiss_hash_find_kiss_hashent, kiss_hash_insert_at, kiss_hash_strvalue, kiss_hash_strcmp, kiss_hash_intvalue,
|
||||
// kiss_hash_bytevalue, kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
|
||||
kiss_hash_t
|
||||
kiss_hash_create_with_destr (
|
||||
size_t hsize,
|
||||
hkeyfunc_t keyfunc,
|
||||
hcmpfunc_t keycmp,
|
||||
freefunc_t val_destr,
|
||||
freefunc_t key_destr,
|
||||
void *info
|
||||
);
|
||||
|
||||
#define kiss_hash_create(hsize, hkeyfunc, hcmpfunc, info) \
|
||||
_kiss_hash_create (hsize, hkeyfunc, hcmpfunc, info, __FILE__, __LINE__)
|
||||
|
||||
#define kiss_hash_create_with_destr(hsize, hkeyfunc, hcmpfunc, freefunc1, freefunc2, info) \
|
||||
_kiss_hash_create_with_destr (hsize, hkeyfunc, hcmpfunc, freefunc1, freefunc2, info, __FILE__, __LINE__)
|
||||
|
||||
kiss_hash_t
|
||||
_kiss_hash_create_with_ksleep(size_t hsize, hkeyfunc_t, hcmpfunc_t, void *info, const char *file, int line);
|
||||
|
||||
#define kiss_hash_create_with_ksleep(hsize, hkeyfunc, hcmpfunc, info) \
|
||||
_kiss_hash_create_with_ksleep (hsize, hkeyfunc, hcmpfunc, info, __FILE__, __LINE__)
|
||||
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Debug single hash. MT-Level: Reentrant
|
||||
//This function calculates and prints the following statistics:
|
||||
//o hash pointer
|
||||
//o file name and line number where kiss_hash_create or kiss_hash_create_with_destr was called
|
||||
//o number of elements in kiss_hash
|
||||
//o number of slots in hash - hash size
|
||||
//o size in bytes of memory occupied by hash maintenance structures
|
||||
//o slot utilzation - percentage of hash slots used to store elements
|
||||
//o average number of lookups - average length of lists of elements
|
||||
// Parameters:
|
||||
// hash - pointer to hash
|
||||
// Return values:
|
||||
// size in bytes of memory occupied by hash maintenance structures.
|
||||
// See also: hash_create, hash_create_with_destr, hash_set_destr, hash_dodestr, hash_undo_destr,
|
||||
// hash_nelements, hash_findaddr, hash_lookup, hash_lookkey, hash_insert, hash_delete, hash_destroy,
|
||||
// hash_find_hashent, hash_insert_at, hash_strvalue, hash_strcmp, hash_intvalue, hash_bytevalue,
|
||||
// hash_bytecmp, hash_debug_all
|
||||
int kiss_hash_debug(kiss_hash_t hp);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Debug single hash. MT-Level: Safe
|
||||
//Iterates a list of all hash tables craeted in the current process and
|
||||
//for each hash calls function hash_debug. In addition the total
|
||||
//memory usage of hash maintenance structures is printed.
|
||||
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
|
||||
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey,
|
||||
// kiss_hash_insert, kiss_hash_delete, kiss_hash_destroy,
|
||||
// kiss_hash_find_kiss_hashent, kiss_hash_insert_at, kiss_hash_strvalue, kiss_hash_strcmp, kiss_hash_intvalue,
|
||||
// kiss_hash_bytevalue, kiss_hash_bytecmp, kiss_hash_debug
|
||||
void kiss_hash_debug_all();
|
||||
|
||||
// {group: API for kiss_hash}
|
||||
kiss_hash_t _kiss_hash_create (size_t hsize, hkeyfunc_t, hcmpfunc_t, void *info, const char *file, int line);
|
||||
|
||||
// {group: API for HASH}
|
||||
kiss_hash_t _kiss_hash_create_with_destr (size_t hsize, hkeyfunc_t, hcmpfunc_t, freefunc_t, freefunc_t,
|
||||
void *info, const char *file, int line);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Set destructor for hash elements. MT-Level: ] Reentrant
|
||||
//Keys and values detsructors are called for every hash key-value pair when the hash is destroyed.
|
||||
// Parameters:
|
||||
// hp - hash
|
||||
// val_destr - destructor for the values of the hash
|
||||
// key_destr - destructor for the keys of the hash
|
||||
// Return values:
|
||||
// hash pointer
|
||||
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_dodestr, kiss_hash_undo_destr,
|
||||
// kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey, kiss_hash_insert,
|
||||
// kiss_hash_delete, kiss_hash_destroy, kiss_hash_find_kiss_hashent, kiss_hash_insert_at, kiss_hash_strvalue,
|
||||
// kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue,
|
||||
// kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
|
||||
kiss_hash_t kiss_hash_set_destr (kiss_hash_t hp, freefunc_t val_destr, freefunc_t key_destr);
|
||||
|
||||
// {group: API for kiss_hash}
|
||||
// Description: Enable hash element detsruction. MT-Level: Reentrant
|
||||
//Hash is created with destruction of elements disabled by default.
|
||||
//This function enables destruction upon a call to kiss_hash_destroy.
|
||||
//Meaning, the hash will automaticly call destructors when an entry gets
|
||||
//deleted from the hash. Usualy this is not the case !
|
||||
// Parameters:
|
||||
// hp - hash
|
||||
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_undo_destr,
|
||||
// kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey, kiss_hash_insert,
|
||||
// kiss_hash_delete, kiss_hash_destroy, kiss_hash_find_kiss_hashent, kiss_hash_insert_at, kiss_hash_strvalue,
|
||||
// kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue,
|
||||
// kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
|
||||
void kiss_hash_dodestr (kiss_hash_t hp);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Disable hash element detsruction. MT-Level: Reentrant
|
||||
// Parameters:
|
||||
// hp - hash
|
||||
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
|
||||
// kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey, kiss_hash_insert,
|
||||
// kiss_hash_delete, kiss_hash_destroy,
|
||||
// kiss_hash_find_kiss_hashent, kiss_hash_insert_at, kiss_hash_strvalue, kiss_hash_strcmp, kiss_hash_intvalue,
|
||||
// kiss_hash_bytevalue, kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
|
||||
void kiss_hash_undo_destr (kiss_hash_t hp);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Number of hash elements. MT-Level: Reentrant
|
||||
// Parameters:
|
||||
// hash - hash table
|
||||
// Return values:
|
||||
// number of elements
|
||||
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
|
||||
// kiss_hash_undo_destr, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey, kiss_hash_insert,
|
||||
// kiss_hash_delete, kiss_hash_destroy,
|
||||
// kiss_hash_find_kiss_hashent, kiss_hash_insert_at, kiss_hash_strvalue, kiss_hash_strcmp, kiss_hash_intvalue,
|
||||
// kiss_hash_bytevalue, kiss_hash_bytecmp, kiss_kiss_hash_debug, kiss_hash_debug_all
|
||||
int kiss_hash_nelements (kiss_hash_t hash);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Hash size. MT-Level: Reentrant
|
||||
// Parameters:
|
||||
// hash - hash table
|
||||
// Return values:
|
||||
// Size of hash
|
||||
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
|
||||
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey,
|
||||
// kiss_hash_insert, kiss_hash_delete, kiss_hash_destroy, kiss_hash_find_hashent, kiss_hash_insert_at,
|
||||
// kiss_hash_strvalue, kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue,
|
||||
// kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
|
||||
int kiss_hash_get_size (kiss_hash_t hash);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Return address of the pointer to the value in the hash table.
|
||||
// Parameters:
|
||||
// hp - hash pointer
|
||||
// key - hash key
|
||||
// Return values:
|
||||
// hash entry
|
||||
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
|
||||
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_lookup, kiss_hash_lookkey, kiss_hash_insert,
|
||||
// kiss_hash_delete, kiss_hash_destroy,
|
||||
// kiss_hash_find_hashent, kiss_hash_insert_at, kiss_hash_strvalue, kiss_hash_strcmp, kiss_hash_intvalue,
|
||||
// kiss_hash_bytevalue, kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
|
||||
void **kiss_hash_findaddr (kiss_hash_t hp, const void *key);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Lookup hash value. MT-Level: Reentrant
|
||||
// Parameters:
|
||||
// hp - hash pointer
|
||||
// key - hash key
|
||||
// Return values:
|
||||
// o hash value
|
||||
// o NULL upon failure
|
||||
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
|
||||
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookkey, kiss_hash_insert,
|
||||
// kiss_hash_delete, kiss_hash_destroy, kiss_hash_find_hashent, kiss_hash_insert_at, kiss_hash_strvalue,
|
||||
// kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue,
|
||||
// kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
|
||||
void *kiss_hash_lookup (kiss_hash_t hp, const void *key);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Lookup hash key. MT-Level: Reentrant
|
||||
//Returns the key pointer as stored in the hash table.
|
||||
// Parameters:
|
||||
// hp - hash pointer
|
||||
// key - hash key that hash a value equal to that of the key stored in the hash.
|
||||
// Return values:
|
||||
// o hash key
|
||||
// o NULL upon failure
|
||||
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
|
||||
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_insert,
|
||||
// kiss_hash_delete, kiss_hash_destroy,kiss_hash_find_hashent, kiss_hash_insert_at, kiss_hash_strvalue,
|
||||
// kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue,
|
||||
// kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
|
||||
void *kiss_hash_lookkey (kiss_hash_t hp, const void *key);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Insert hash element. MT-Level: Reentrant
|
||||
// Parameters:
|
||||
// hp - hash pointer
|
||||
// key - hash key
|
||||
// val - hash val
|
||||
// Return values:
|
||||
// >0 - success
|
||||
// 0 - upon failure
|
||||
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
|
||||
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey,
|
||||
// kiss_hash_delete, kiss_hash_destroy, kiss_hash_find_hashent, kiss_hash_insert_at, kiss_hash_strvalue,
|
||||
// kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue,
|
||||
// kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
|
||||
int kiss_hash_insert (kiss_hash_t hp, void *key, void *val);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Delete hash element. MT-Level: Reentrant
|
||||
//Delete hash element and return a value for the key.
|
||||
// Parameters:
|
||||
// hp - hash pointer
|
||||
// key - hash key
|
||||
// Return values:
|
||||
// o hash val
|
||||
// o NULL upon failure
|
||||
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
|
||||
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey,
|
||||
// kiss_hash_insert, kiss_hash_destroy, kiss_hash_find_hashent, kiss_hash_insert_at, kiss_hash_strvalue,
|
||||
// kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue,
|
||||
// kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
|
||||
void *kiss_hash_delete (kiss_hash_t hash, const void *key);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Destroy hash. MT-Level: Reentrant
|
||||
//If detsructor functions were defined in the call to kiss_hash_with_create_destr or kiss_hash_set_destr
|
||||
//function kiss_hash_dodestr must be called to enable element detsruction.
|
||||
// Parameters:
|
||||
// hp - hash pointer
|
||||
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
|
||||
// kiss_hash_undo_destr,kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey,
|
||||
// kiss_hash_insert, kiss_hash_delete, kiss_hash_find_hashent, kiss_hash_insert_at, kiss_hash_strvalue,
|
||||
// kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue, kiss_hash_bytecmp, kiss_hash_debug,
|
||||
// kiss_hash_debug_all
|
||||
void kiss_hash_destroy (kiss_hash_t hp);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Find hash entry. MT-Level: Reentrant
|
||||
//Used as an efficient but somewhat ugly interface for find/insert operation.
|
||||
//What it does is to return an adrress of a pointer to a hashent structure containing the key/val pair if found.
|
||||
//If not it returns the address of the pointer in which we can append the new val/pair
|
||||
//thus avoiding an unnceccessary repeated search.
|
||||
//We can check if key was found by checking whether the pointer is zero or not.
|
||||
//This function is usually used with kiss_hash_insert_at.
|
||||
// Parameters:
|
||||
// hp - hash pointer
|
||||
// key - hash key
|
||||
// Return values:
|
||||
// hash entry
|
||||
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
|
||||
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey,
|
||||
// kiss_hash_insert, kiss_hash_delete, kiss_hash_destroy, kiss_hash_insert_at, kiss_hash_strvalue, kiss_hash_strcmp,
|
||||
// kiss_hash_intvalue, kiss_hash_bytevalue, kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
|
||||
struct kiss_hashent ** kiss_hash_find_hashent(kiss_hash_t hp, const void *key);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Insert hash element at specified position. MT-Level: Reentrant
|
||||
//This function should be used together with kiss_hash_find_hashent to insert
|
||||
//the value in case it was not found at the hash.
|
||||
// Parameters:
|
||||
// hp - hash pointer
|
||||
// key - hash key
|
||||
// val - hash val
|
||||
// hloc -
|
||||
// Return values:
|
||||
// o 0 upon failure
|
||||
// o number of hash elements after insertion in case of success.
|
||||
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
|
||||
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey,
|
||||
// kiss_hash_insert, kiss_hash_delete,
|
||||
// kiss_hash_destroy, kiss_hash_find_hashent, kiss_hash_strvalue, kiss_hash_strcmp, kiss_hash_intvalue,
|
||||
// kiss_hash_bytevalue, kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
|
||||
int kiss_hash_insert_at (kiss_hash_t hp, void *key, void *val, struct kiss_hashent**hloc);
|
||||
|
||||
|
||||
#define kiss_hash_strcreate(sz) \
|
||||
kiss_hash_create(sz, (hkeyfunc_t)kiss_hash_strvalue, (hcmpfunc_t)kiss_hash_strcmp, NULL)
|
||||
|
||||
#define kiss_hash_intcreate(sz) \
|
||||
kiss_hash_create(sz, (hkeyfunc_t)kiss_hash_intvalue, (hcmpfunc_t)kiss_hash_intcmp, NULL)
|
||||
|
||||
#define kiss_hash_bytecreate(n, esz) \
|
||||
kiss_hash_create(n, (hkeyfunc_t)kiss_hash_bytevalue, (hcmpfunc_t)kiss_hash_bytecmp, (void *)esz)
|
||||
|
||||
// The following provide hash table data type interface,
|
||||
// These functions can be provided by the user,
|
||||
// The default provided functions provide string hash
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Hashing fuction for string hash.
|
||||
//This function is used by kiss_hash_strcreate().
|
||||
// Parameters:
|
||||
// vs - key
|
||||
// info - opaque
|
||||
// Return values:
|
||||
// value of the hash function.
|
||||
uintptr_t kiss_hash_strvalue (const void *vs, void *info);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Comparison fuction for string hash.
|
||||
//This function is used by kiss_hash_strcreate().
|
||||
// Parameters:
|
||||
// vk1 - key
|
||||
// vk2 - key
|
||||
// info - opaque
|
||||
// Return values:
|
||||
// 0 - keys are equal
|
||||
// !0 - keys are different
|
||||
int kiss_hash_strcmp (const void *vk1, const void *vk2, void *info);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Hashing fuction for integer hash.
|
||||
//This function is used by kiss_hash_intcreate().
|
||||
// Parameters:
|
||||
// v - key
|
||||
// info - opaque
|
||||
// Return values:
|
||||
// value of the hash function.
|
||||
uintptr_t kiss_hash_intvalue (const void* v, void *info);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Comparison fuction for integer hash.
|
||||
//This function is used by kiss_hash_intcreate().
|
||||
// Parameters:
|
||||
// vv1 - key
|
||||
// vv2 - key
|
||||
// info - opaque
|
||||
// Return values:
|
||||
// 0 - keys are equal
|
||||
// !0 - keys are different
|
||||
int kiss_hash_intcmp (const void* vv1, const void* vv2, void *info);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Hashing fuction for byte hash.
|
||||
//This function is used by kiss_hash_bytecreate().
|
||||
// Parameters:
|
||||
// data - key
|
||||
// info - opaque
|
||||
// Return values:
|
||||
// value of the hash function.
|
||||
uintptr_t kiss_hash_bytevalue (const void *data, void *info);
|
||||
|
||||
// {group: API for HASH}
|
||||
// Description: Comparison fuction for byte hash.
|
||||
//This function is used by kiss_hash_bytecreate().
|
||||
// Parameters:
|
||||
// d1 - key
|
||||
// d2 - key
|
||||
// info - opaque
|
||||
// Return values:
|
||||
// 0 - keys are equal
|
||||
// !0 - keys are different
|
||||
int kiss_hash_bytecmp (const void *d1, const void *d2, void *info);
|
||||
|
||||
// {group: API for HASH ITERATOR}
|
||||
typedef struct kiss_hash_iter *kiss_hash_iterator;
|
||||
|
||||
// {group: API for HASH ITERATOR}
|
||||
// Description: Create hash iterator. MT-Level: Reentrant
|
||||
// Parameters:
|
||||
// hp - hash
|
||||
// Return values:
|
||||
// o iterator object
|
||||
// o NULL upon failure
|
||||
// See also:
|
||||
// kiss_hash_iterator_next, kiss_hash_iterator_next_key, kiss_hash_iterator_destroy
|
||||
kiss_hash_iterator kiss_hash_iterator_create (kiss_hash_t hp);
|
||||
|
||||
// {group: API for HASH ITERATOR}
|
||||
// Description: Return next hash value. MT-Level: Reentrant
|
||||
// Parameters:
|
||||
// hit - hash iterator
|
||||
// Return values:
|
||||
// o next hash value
|
||||
// o NULL upon failure
|
||||
// See also:
|
||||
// kiss_hash_iterator_create, kiss_hash_iterator_next_key, kiss_hash_iterator_destroy
|
||||
void *kiss_hash_iterator_next (kiss_hash_iterator hit);
|
||||
|
||||
// {group: API for HASH ITERATOR}
|
||||
// Description: Return next hash key. MT-Level: Reentrant
|
||||
// Parameters:
|
||||
// hit - hash iterator
|
||||
// Return values:
|
||||
// o next hash key
|
||||
// o NULL upon failure
|
||||
// See also:
|
||||
// kiss_hash_iterator_create, kiss_hash_iterator_next, kiss_hash_iterator_destroy
|
||||
void *kiss_hash_iterator_next_key (kiss_hash_iterator hit);
|
||||
|
||||
// {group: API for HASH ITERATOR}
|
||||
// Description: Destroy hash iterator. MT-Level: Reentrant
|
||||
// Parameters:
|
||||
// hit - hash iterator
|
||||
// See also:
|
||||
// kiss_hash_iterator_create, kiss_hash_iterator_next, kiss_hash_iterator_next_key
|
||||
void kiss_hash_iterator_destroy (kiss_hash_iterator hit);
|
||||
|
||||
// {group: API for ITERATOR}
|
||||
int kiss_hash_iterator_next_ent(kiss_hash_iterator hit);
|
||||
|
||||
// {group: API for ITERATOR}
|
||||
void * kiss_hash_iterator_get_key(kiss_hash_iterator hit);
|
||||
|
||||
// {group: API for ITERATOR}
|
||||
void * kiss_hash_iterator_get_val(kiss_hash_iterator hit);
|
||||
|
||||
// {group: API for ITERATOR}
|
||||
struct kiss_hashent * kiss_hash_iterator_get_hashent(kiss_hash_iterator hit);
|
||||
|
||||
// {group: API for ITERATOR}
|
||||
int kiss_hash_iterator_equal(kiss_hash_iterator hit1, kiss_hash_iterator hit2);
|
||||
|
||||
// {group: API for ITERATOR}
|
||||
kiss_hash_iterator kiss_hash_iterator_copy(kiss_hash_iterator hit);
|
||||
|
||||
// {group: API for ITERATOR}
|
||||
void kiss_hash_iterator_free(kiss_hash_iterator hit);
|
||||
|
||||
// {group: API for ITERATOR}
|
||||
void kiss_hash_iterator_set_begin(kiss_hash_iterator hit);
|
||||
|
||||
// {group: API for ITERATOR}
|
||||
void kiss_hash_iterator_set_end(kiss_hash_iterator hit);
|
||||
|
||||
// {group: API for HASH}
|
||||
kiss_hash_iterator kiss_hash_find_hashent_new(kiss_hash_t hp, const void *key);
|
||||
|
||||
// {group: API for HASH ITERATOR}
|
||||
void kiss_hash_delete_by_iter(kiss_hash_iterator hit);
|
||||
|
||||
// - - - - - - - - - - - - - - -
|
||||
// Hash resize mechanism
|
||||
// - - - - - - - - - - - - - - -
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
// Determine if hash size can increase, decrease or both.
|
||||
typedef enum {
|
||||
KISS_HASH_SIZE_STATIC = 0, // hash size is kept fixed
|
||||
KISS_HASH_SIZE_INCREASE = 1,
|
||||
KISS_HASH_SIZE_DECREASE = 2,
|
||||
KISS_HASH_SIZE_INC_DEC = 3
|
||||
} KissHashResizeDirection;
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
typedef enum {
|
||||
KISS_HASH_RESIZE_METHOD_UNKNOWN = 0,
|
||||
KISS_HASH_RESIZE_BY_FACTOR = 1
|
||||
} KissHashResizeMethod;
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
// Default maximal hash size:
|
||||
// Hash size will not increase beyond this value unless stated o/w by the application
|
||||
#define DEFAULT_KISS_HASH_SIZE (1<<17)
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
// Default value for hash factorial resizing
|
||||
#define DEFAULT_KISS_HASH_RESIZE_FACTOR_VALUE 4
|
||||
// {group: API for HASH RESIZE}
|
||||
// Default value for hash factorial resizing trigger ratio
|
||||
#define DEFAULT_KISS_HASH_RESIZE_FACTOR_TRIG_RATIO 2
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
// Resize application callback: This callback will be invoked at every successful resize operation.
|
||||
typedef int (* HashResizeCb_t) (kiss_hash_t hp, void *app_info);
|
||||
|
||||
|
||||
// Hash resize mode object & accsess API.
|
||||
// Used for setting resize parameters hash.
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
typedef struct _KissHashResizeMode KissHashResizeMode;
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
int KissHashResizeMode_create(KissHashResizeMode **resize_mode);
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
void KissHashResizeMode_destroy(KissHashResizeMode *resize_mode);
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
int KissHashResizeMode_set_method(
|
||||
KissHashResizeMode *resize_mode,
|
||||
KissHashResizeMethod method,
|
||||
u_int value,
|
||||
u_int trigger_ratio);
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
int KissHashResizeMode_get_method(
|
||||
const KissHashResizeMode *resize_mode,
|
||||
KissHashResizeMethod *method,
|
||||
u_int *value,
|
||||
u_int *trigger_ratio);
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
int KissHashResizeMode_set_direction(KissHashResizeMode *resize_mode, KissHashResizeDirection direction);
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
int KissHashResizeMode_get_direction(const KissHashResizeMode *resize_mode, KissHashResizeDirection *direction);
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
int KissHashResizeMode_set_max_size(KissHashResizeMode *resize_mode, u_int max_size);
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
int KissHashResizeMode_get_max_size(const KissHashResizeMode *resize_mode, u_int *max_size);
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
int kiss_hash_set_resize_cb(kiss_hash_t hp, HashResizeCb_t resize_callback);
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
// Description: Set hash dynamic size parameters.
|
||||
// Parameters:
|
||||
// hp - [in] pointer to hash table
|
||||
// resize_mode - [in] should be created and set using the access API to the KissHashResizeMode object.
|
||||
// After using the set API, this object can be destroyed.
|
||||
//
|
||||
int kiss_hash_set_dynamic_size(kiss_hash_t hp, const KissHashResizeMode *resize_mode);
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
// Description: Get hash dynamic size parameters.
|
||||
// Parameters:
|
||||
// hp - [in] pointer to hash table
|
||||
// resize_mode - [out] a read-only parameter that should not be changed by the application.
|
||||
int kiss_hash_get_dynamic_size(kiss_hash_t hp, const KissHashResizeMode **resize_mode);
|
||||
|
||||
// {group: API for HASH RESIZE}
|
||||
// Description: This API will cause an immediate resizing of hash
|
||||
// table, according to the parameters, given in the input
|
||||
// KissHashResizeMode object (if NULL, the resize will be done
|
||||
// according to the parameters as last set by the application).
|
||||
//
|
||||
// Note that the KissHashResizeMode object parameters are
|
||||
// not kept on the hash handle for future resize oprations.
|
||||
int kiss_hash_trigger_resize(kiss_hash_t hp, const KissHashResizeMode *resize_mode);
|
||||
|
||||
#endif // __KISS_HASH_H__
|
134
components/utils/pm/kiss_patterns.cc
Normal file
134
components/utils/pm/kiss_patterns.cc
Normal file
@@ -0,0 +1,134 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "kiss_patterns.h"
|
||||
#include <vector>
|
||||
#include <ctype.h>
|
||||
#include "general_adaptor.h"
|
||||
#include "pm_adaptor.h"
|
||||
#include "sasal.h"
|
||||
|
||||
SASAL_START // Multiple Pattern Matcher
|
||||
// Add a character's printable representation to a buffer.
|
||||
// Returns the number of bytes written.
|
||||
static u_int
|
||||
pm_str_one_char_to_debug_buf(u_char *buf, int len, u_char ch, BOOL for_csv)
|
||||
{
|
||||
char single_char_buf[10];
|
||||
int single_char_len;
|
||||
|
||||
// Get a printable representation of the character
|
||||
if (isprint(ch) && !(ch == '"' && for_csv)) {
|
||||
single_char_buf[0] = ch;
|
||||
single_char_len = 1;
|
||||
} else {
|
||||
snprintf(single_char_buf, sizeof(single_char_buf), "\\x%02x", ch);
|
||||
single_char_buf[sizeof(single_char_buf)-1] = '\0';
|
||||
single_char_len = strlen(single_char_buf);
|
||||
}
|
||||
|
||||
if (single_char_len > len) {
|
||||
// See that we don't exceed the buffer, and leave room for \0.
|
||||
single_char_len = len;
|
||||
}
|
||||
|
||||
bcopy(single_char_buf, buf, single_char_len);
|
||||
return single_char_len;
|
||||
}
|
||||
|
||||
// Debug only - Returns a printable character pointer for the non null-terminated string
|
||||
static const u_char *
|
||||
pm_str_to_debug_charp_ex(const u_char *str, u_int size, BOOL for_csv)
|
||||
{
|
||||
static u_char buf[200];
|
||||
u_int i;
|
||||
u_char *buf_p;
|
||||
|
||||
// Copy the string. But replace unprintable characters (most importantly \0) with underscores.
|
||||
buf_p = &buf[0];
|
||||
for (i=0; i<size; i++) {
|
||||
int remaining_len = buf+sizeof(buf)-buf_p;
|
||||
if (remaining_len <= 1) break;
|
||||
buf_p += pm_str_one_char_to_debug_buf(buf_p, remaining_len-1, str[i], for_csv);
|
||||
}
|
||||
*buf_p = '\0';
|
||||
return buf;
|
||||
}
|
||||
|
||||
static const u_char *
|
||||
pm_str_to_debug_charp(const u_char *str, u_int size)
|
||||
{
|
||||
return pm_str_to_debug_charp_ex(str, size, FALSE);
|
||||
}
|
||||
|
||||
|
||||
// *********************** STRING *******************************
|
||||
|
||||
kiss_pmglob_string_s::kiss_pmglob_string_s(const char *buffer, size_t size, int _pattern_id, u_int _flags)
|
||||
:
|
||||
kiss_pmglob_string_s(reinterpret_cast<const u_char *>(buffer), size, _pattern_id, _flags)
|
||||
{
|
||||
}
|
||||
|
||||
kiss_pmglob_string_s::kiss_pmglob_string_s(const u_char *buffer, size_t size, int _pattern_id, u_int _flags)
|
||||
{
|
||||
dbgAssert(buffer && size > 0) << "Illegal arguments";
|
||||
buf.resize(size);
|
||||
memcpy(buf.data(), buffer, size);
|
||||
pattern_id = _pattern_id;
|
||||
flags = _flags;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Returns the pattern of the pattern as u_char*
|
||||
int
|
||||
kiss_pmglob_string_get_id(const kiss_pmglob_string_s *pm_string)
|
||||
{
|
||||
KISS_ASSERT(pm_string != nullptr, "Illegal arguments");
|
||||
return pm_string->pattern_id;
|
||||
}
|
||||
|
||||
|
||||
// Returns the size of the pattern
|
||||
u_int
|
||||
kiss_pmglob_string_get_size(const kiss_pmglob_string_s * pm_string)
|
||||
{
|
||||
KISS_ASSERT(pm_string != nullptr, "Illegal arguments");
|
||||
return pm_string->buf.size();
|
||||
}
|
||||
|
||||
// Returns the pattern of the pattern as u_char*
|
||||
const u_char *
|
||||
kiss_pmglob_string_get_pattern(const kiss_pmglob_string_s *pm_string)
|
||||
{
|
||||
KISS_ASSERT(pm_string != nullptr, "Illegal arguments");
|
||||
return pm_string->buf.data();
|
||||
}
|
||||
|
||||
|
||||
// Debug only - Returns a printable character pointer for the string
|
||||
const u_char *
|
||||
kiss_pmglob_string_to_debug_charp(const kiss_pmglob_string_s *pm_string)
|
||||
{
|
||||
return pm_str_to_debug_charp(kiss_pmglob_string_get_pattern(pm_string), kiss_pmglob_string_get_size(pm_string));
|
||||
}
|
||||
|
||||
|
||||
u_int
|
||||
kiss_pmglob_string_get_flags(const kiss_pmglob_string_s *pm_string)
|
||||
{
|
||||
KISS_ASSERT(pm_string != nullptr, "Illegal arguments");
|
||||
return pm_string->flags;
|
||||
}
|
||||
SASAL_END
|
74
components/utils/pm/kiss_patterns.h
Normal file
74
components/utils/pm/kiss_patterns.h
Normal file
@@ -0,0 +1,74 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __kiss_patterns_h__
|
||||
#define __kiss_patterns_h__
|
||||
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include "pm_adaptor.h"
|
||||
|
||||
// kiss_pmglob_string functions
|
||||
|
||||
class kiss_pmglob_string_s {
|
||||
public:
|
||||
explicit kiss_pmglob_string_s(const char *buffer, size_t size, int _pattern_id, u_int _flags);
|
||||
explicit kiss_pmglob_string_s(const u_char *buffer, size_t size, int _pattern_id, u_int _flags);
|
||||
|
||||
std::vector<u_char> buf;
|
||||
int pattern_id;
|
||||
u_int flags;
|
||||
};
|
||||
|
||||
|
||||
// Returns the size of pattern
|
||||
//
|
||||
// Parameters:
|
||||
// pattern - the pattern.
|
||||
// Return value:
|
||||
// int - the size that this pattern represents.
|
||||
KISS_APPS_CPAPI
|
||||
u_int kiss_pmglob_string_get_size(const kiss_pmglob_string_s *pattern);
|
||||
|
||||
// Returns the pattern of the pattern as u_char*
|
||||
//
|
||||
// Parameters:
|
||||
// patterns - the pattern.
|
||||
// Return value:
|
||||
// u_char * - the pattern that this pattern represents.
|
||||
KISS_APPS_CPAPI
|
||||
const u_char *kiss_pmglob_string_get_pattern(const kiss_pmglob_string_s *pattern);
|
||||
|
||||
// For debugging only - returns a printable pointer for the string.
|
||||
// Replaces unprintable characters with underscores.
|
||||
//
|
||||
// Note: In multithreaded situations, the buffer returned may be overrun by another thread.
|
||||
// At worst, this would lead to an incorrect string being printed.
|
||||
KISS_APPS_CPAPI
|
||||
const u_char *kiss_pmglob_string_to_debug_charp(const kiss_pmglob_string_s *pm_string);
|
||||
|
||||
// Returns the id of pattern
|
||||
//
|
||||
// Parameters:
|
||||
// patterns - the pattern.
|
||||
// Return value:
|
||||
// id - the pattern_id that this pattern represents.
|
||||
KISS_APPS_CPAPI
|
||||
int kiss_pmglob_string_get_id(const kiss_pmglob_string_s *pattern);
|
||||
|
||||
|
||||
KISS_APPS_CPAPI
|
||||
u_int kiss_pmglob_string_get_flags(const kiss_pmglob_string_s *pattern);
|
||||
|
||||
|
||||
#endif // __kiss_patterns_h__
|
429
components/utils/pm/kiss_pm_stats.cc
Normal file
429
components/utils/pm/kiss_pm_stats.cc
Normal file
@@ -0,0 +1,429 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "general_adaptor.h"
|
||||
#include "sasal.h"
|
||||
// ********************* INCLUDES **************************
|
||||
#include "kiss_pm_stats.h"
|
||||
// ********************* INCLUDES **************************
|
||||
|
||||
SASAL_START // Multiple Pattern Matcher
|
||||
// ********************* FUNCTIONS **************************
|
||||
|
||||
|
||||
// Initialize the common statistics
|
||||
kiss_ret_val
|
||||
kiss_pm_stats_common_init(kiss_pm_stats_common new_stats)
|
||||
{
|
||||
static const char rname[] = "kiss_pm_stats_common_init";
|
||||
|
||||
if (new_stats == NULL) {
|
||||
kiss_debug_err(K_PM, ("%s: stats is zero\n", rname));
|
||||
return KISS_ERROR;
|
||||
}
|
||||
|
||||
bzero(new_stats, sizeof(struct kiss_pm_stats_common_s));
|
||||
|
||||
#if 0
|
||||
if (kiss_pm_stats_take_exec_time) {
|
||||
new_stats->exec_num_cpus = kiss_multik_instance_num;
|
||||
new_stats->exec = kiss_pmglob_memory_kmalloc_ex(
|
||||
new_stats->exec_num_cpus * sizeof(struct kiss_pm_stats_dynamic_aligned_s),
|
||||
rname,
|
||||
(FW_KMEM_NOSLEEP| FW_KMEM_RETURN_ALIGN_PTR)
|
||||
);
|
||||
|
||||
if (!new_stats->exec) {
|
||||
kiss_debug_err(K_PM, ("%s: Error in allocating the execution stats\n", rname));
|
||||
return KISS_ERROR;
|
||||
}
|
||||
|
||||
bzero(new_stats->exec, new_stats->exec_num_cpus*sizeof(struct kiss_pm_stats_dynamic_aligned_s));
|
||||
}
|
||||
#endif
|
||||
|
||||
return KISS_OK;
|
||||
|
||||
}
|
||||
|
||||
#define KISS_MULTIK_MAX_INSTANCE_NUM 40
|
||||
|
||||
// Free the common statistics
|
||||
void
|
||||
kiss_pm_stats_common_free(kiss_pm_stats_common stats)
|
||||
{
|
||||
static const char rname[] = "kiss_pm_stats_common_free";
|
||||
BOOL should_free_stats_exec =
|
||||
stats &&
|
||||
stats->exec &&
|
||||
stats->exec_num_cpus > 0 &&
|
||||
stats->exec_num_cpus < KISS_MULTIK_MAX_INSTANCE_NUM;
|
||||
if (should_free_stats_exec) {
|
||||
kiss_pmglob_memory_kfree(
|
||||
stats->exec,
|
||||
stats->exec_num_cpus * sizeof(struct kiss_pm_stats_dynamic_aligned_s),
|
||||
rname
|
||||
);
|
||||
stats->exec = NULL;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Update build-time statistics
|
||||
void
|
||||
kiss_pm_stats_common_update_compile(kiss_pm_stats_common stats, u_int bytes, u_int compilation_time,
|
||||
enum kiss_pm_stats_update_compile_type type)
|
||||
{
|
||||
KISS_ASSERT_PERF(stats, ("Illegal arguments"));
|
||||
|
||||
switch (type) {
|
||||
case UPDATE_COMPILE_STATS_MEM:
|
||||
stats->compile.memory_bytes = bytes;
|
||||
return;
|
||||
case UPDATE_COMPILE_STATS_TIME:
|
||||
stats->compile.compilation_time = compilation_time;
|
||||
return;
|
||||
case UPDATE_COMPILE_STATS_BOTH:
|
||||
stats->compile.memory_bytes = bytes;
|
||||
stats->compile.compilation_time = compilation_time;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Will adding to an unsigned variable cause it to wrap around?
|
||||
#define ADDITION_WOULD_WRAP_AROUND(old_val, delta) \
|
||||
((old_val) + (delta) < (old_val))
|
||||
|
||||
// Reset buffer length statistics, so we can add a buffer without wraparound
|
||||
static void
|
||||
handle_buflen_stats_wraparound(struct kiss_pm_stats_dynamic_s *cur_kern_inst_stats)
|
||||
{
|
||||
cur_kern_inst_stats->buflen.total = 0;
|
||||
cur_kern_inst_stats->buflen.sample_num = 0;
|
||||
}
|
||||
|
||||
// Reset execution time statistics, so we can add a sample without wraparound
|
||||
static void
|
||||
handle_runtime_stats_wraparound(struct kiss_pm_stats_dynamic_s *cur_kern_inst_stats)
|
||||
{
|
||||
cur_kern_inst_stats->runtime.total_exec_time = 0;
|
||||
cur_kern_inst_stats->runtime.user_cb_exec_time = 0;
|
||||
cur_kern_inst_stats->runtime.sample_num = 0;
|
||||
}
|
||||
|
||||
|
||||
// Update run-time statistics
|
||||
void
|
||||
kiss_pm_stats_common_update_exec(kiss_pm_stats_common stats, u_int buf_size, u_int num_of_matches)
|
||||
{
|
||||
struct kiss_pm_stats_dynamic_s *cur_kern_inst_stats;
|
||||
KISS_ASSERT_PERF(stats, ("Illegal arguments"));
|
||||
if(stats->exec) {
|
||||
ASSERT_LOCKED;
|
||||
cur_kern_inst_stats = &(stats->exec[kiss_multik_this_instance_num].stats);
|
||||
|
||||
// Buffer length statistics
|
||||
if (ADDITION_WOULD_WRAP_AROUND(cur_kern_inst_stats->buflen.total, buf_size)) {
|
||||
handle_buflen_stats_wraparound(cur_kern_inst_stats);
|
||||
}
|
||||
cur_kern_inst_stats->buflen.total += buf_size;
|
||||
cur_kern_inst_stats->buflen.sample_num++;
|
||||
if (buf_size > cur_kern_inst_stats->buflen.max) {
|
||||
cur_kern_inst_stats->buflen.max = buf_size;
|
||||
}
|
||||
|
||||
// General statistics
|
||||
cur_kern_inst_stats->num_of_buffs++;
|
||||
cur_kern_inst_stats->num_of_matches += num_of_matches;
|
||||
if (num_of_matches > cur_kern_inst_stats->max_matches_on_buf) {
|
||||
cur_kern_inst_stats->max_matches_on_buf = num_of_matches;
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Update run-time (execution) statistics
|
||||
void
|
||||
kiss_pm_stats_common_update_exec_time(kiss_pm_stats_common stats, u_int exec_time, u_int user_cb_time)
|
||||
{
|
||||
struct kiss_pm_stats_dynamic_s *cur_kern_inst_stats;
|
||||
if(stats && stats->exec) {
|
||||
ASSERT_LOCKED;
|
||||
cur_kern_inst_stats = &(stats->exec[kiss_multik_this_instance_num].stats);
|
||||
|
||||
// The execution time includes the callback, but we want the net time.
|
||||
exec_time -= user_cb_time;
|
||||
|
||||
// take care of wrap around
|
||||
if (ADDITION_WOULD_WRAP_AROUND(cur_kern_inst_stats->runtime.total_exec_time, exec_time) ||
|
||||
ADDITION_WOULD_WRAP_AROUND(cur_kern_inst_stats->runtime.user_cb_exec_time, user_cb_time)) {
|
||||
handle_runtime_stats_wraparound(cur_kern_inst_stats);
|
||||
}
|
||||
cur_kern_inst_stats->runtime.total_exec_time += exec_time;
|
||||
cur_kern_inst_stats->runtime.user_cb_exec_time += user_cb_time;
|
||||
cur_kern_inst_stats->runtime.sample_num++;
|
||||
|
||||
// Updating the max values
|
||||
if (exec_time > cur_kern_inst_stats->runtime.max_exec_time){
|
||||
cur_kern_inst_stats->runtime.max_exec_time = exec_time;
|
||||
}
|
||||
if (user_cb_time > cur_kern_inst_stats->runtime.user_cb_max_time){
|
||||
cur_kern_inst_stats->runtime.user_cb_max_time = user_cb_time;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Clear all runtime statistics
|
||||
void
|
||||
kiss_pm_stats_common_reset_exec(kiss_pm_stats_common stats)
|
||||
{
|
||||
u_int i;
|
||||
if(stats && stats->exec) {
|
||||
for (i = 0; i < stats->exec_num_cpus; i++) {
|
||||
struct kiss_pm_stats_dynamic_s *cur_cpu_stats;
|
||||
cur_cpu_stats = &(stats->exec[i].stats);
|
||||
bzero(cur_cpu_stats, sizeof(*cur_cpu_stats));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Aggregate the run-time statistics from all cpus in src to dst
|
||||
static void
|
||||
kiss_pm_stats_common_aggregate_cpus(struct kiss_pm_stats_dynamic_s *dst, const struct kiss_pm_stats_common_s *src)
|
||||
{
|
||||
u_int i;
|
||||
KISS_ASSERT_PERF(src, ("Illegal arguments"));
|
||||
if(src && src->exec)
|
||||
{
|
||||
for (i = 0; i < src->exec_num_cpus; i++) {
|
||||
struct kiss_pm_stats_dynamic_s *cur_cpu_src = &(src->exec[i].stats);
|
||||
|
||||
// Buffer length statistics - add and avoid wrap-around
|
||||
if (ADDITION_WOULD_WRAP_AROUND(dst->buflen.total, cur_cpu_src->buflen.total)) {
|
||||
handle_buflen_stats_wraparound(dst);
|
||||
}
|
||||
dst->buflen.total += cur_cpu_src->buflen.total;
|
||||
dst->buflen.sample_num += cur_cpu_src->buflen.sample_num;
|
||||
dst->buflen.max = MAX(dst->buflen.max, cur_cpu_src->buflen.max);
|
||||
|
||||
// General statistics
|
||||
dst->num_of_matches += cur_cpu_src->num_of_matches;
|
||||
dst->num_of_stage1_matches += cur_cpu_src->num_of_stage1_matches;
|
||||
dst->num_of_stage22_matches += cur_cpu_src->num_of_stage22_matches;
|
||||
dst->num_of_stage23_matches += cur_cpu_src->num_of_stage23_matches;
|
||||
|
||||
dst->num_of_buffs += cur_cpu_src->num_of_buffs;
|
||||
if (dst->max_matches_on_buf < cur_cpu_src->max_matches_on_buf) {
|
||||
dst->max_matches_on_buf = cur_cpu_src->max_matches_on_buf;
|
||||
}
|
||||
|
||||
// Execution time statistics - add and avoid wrap-around
|
||||
if (ADDITION_WOULD_WRAP_AROUND(dst->runtime.total_exec_time, cur_cpu_src->runtime.total_exec_time) ||
|
||||
ADDITION_WOULD_WRAP_AROUND(dst->runtime.user_cb_exec_time, cur_cpu_src->runtime.user_cb_exec_time)) {
|
||||
handle_runtime_stats_wraparound(dst);
|
||||
}
|
||||
dst->runtime.total_exec_time += cur_cpu_src->runtime.total_exec_time;
|
||||
dst->runtime.user_cb_exec_time += cur_cpu_src->runtime.user_cb_exec_time;
|
||||
dst->runtime.sample_num += cur_cpu_src->runtime.sample_num;
|
||||
dst->runtime.max_exec_time = MAX(dst->runtime.max_exec_time, cur_cpu_src->runtime.max_exec_time);
|
||||
dst->runtime.user_cb_max_time = MAX(dst->runtime.user_cb_max_time, cur_cpu_src->runtime.user_cb_max_time);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
#define TOTAL_MICORSEC_TO_AVG_NSEC(total, samples) \
|
||||
((samples)==0 ? 0 : (u_int)((u_int64)(total) * 1000 / (u_int64)(samples)))
|
||||
|
||||
// Print the common statistics
|
||||
void
|
||||
kiss_pm_stats_common_print(
|
||||
kiss_pm_stats_common stats,
|
||||
enum kiss_pm_stats_type type,
|
||||
enum kiss_pm_stats_format format,
|
||||
BOOL print_headline
|
||||
)
|
||||
{
|
||||
struct kiss_pm_stats_dynamic_s dynamic_stats;
|
||||
KISS_ASSERT_PERF((stats && !print_headline) || print_headline, ("Illegal arguments"));
|
||||
|
||||
if (type != KISS_PM_DYNAMIC_STATS) {
|
||||
if (format == KISS_PM_TEXT_FORMAT_STATS) {
|
||||
kdprintf("Memory comsumption for this handle is %u bytes\n", stats->compile.memory_bytes);
|
||||
kdprintf("Compilation time for this handle is %u microseconds\n", stats->compile.compilation_time);
|
||||
} else if (format == KISS_PM_CSV_FORMAT_STATS) {
|
||||
if (print_headline) {
|
||||
kdprintf("Memory consumption;Compilation time (microsec);");
|
||||
} else {
|
||||
kdprintf("%u;%u;", stats->compile.memory_bytes, stats->compile.compilation_time);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!print_headline) {
|
||||
bzero(&dynamic_stats, sizeof(struct kiss_pm_stats_dynamic_s ));
|
||||
kiss_pm_stats_common_aggregate_cpus(&dynamic_stats, stats);
|
||||
}
|
||||
|
||||
if (type != KISS_PM_STATIC_STATS) {
|
||||
if (format == KISS_PM_TEXT_FORMAT_STATS) {
|
||||
kdprintf("Number of executed buffers is %u\n", dynamic_stats.num_of_buffs);
|
||||
kdprintf("Max buffer length is %u\n", dynamic_stats.buflen.max);
|
||||
kdprintf("Avg buffer length is %u\n",
|
||||
dynamic_stats.buflen.sample_num ? (dynamic_stats.buflen.total/dynamic_stats.buflen.sample_num) : 0);
|
||||
kdprintf("Number of matches is %u\n", dynamic_stats.num_of_matches);
|
||||
kdprintf("Number of matches after stage1 is %u\n", dynamic_stats.num_of_stage1_matches);
|
||||
kdprintf("Number of matches after start-anchor is %u\n", dynamic_stats.num_of_stage22_matches);
|
||||
kdprintf("Number of matches after end-anchor is %u\n", dynamic_stats.num_of_stage23_matches);
|
||||
kdprintf("Max number of matches on buffer is %u\n", dynamic_stats.max_matches_on_buf);
|
||||
// Average execution time - display in nanosecond so rounding down won't lose too much
|
||||
kdprintf("Avg execution time is %u ns for PM, %u ns for callbacks\n",
|
||||
TOTAL_MICORSEC_TO_AVG_NSEC(dynamic_stats.runtime.total_exec_time, dynamic_stats.runtime.sample_num),
|
||||
TOTAL_MICORSEC_TO_AVG_NSEC(dynamic_stats.runtime.user_cb_exec_time, dynamic_stats.runtime.sample_num));
|
||||
// Maximum execution time - display in nanosecond for consistency with average.
|
||||
// concatenate 000 instead of multiplying,
|
||||
// to avoid overflow (in very extreme, yet very interesting, cases).
|
||||
kdprintf("Max execution time is %u000 ns for PM, %u000 ns for callbacks\n",
|
||||
dynamic_stats.runtime.max_exec_time, dynamic_stats.runtime.user_cb_max_time);
|
||||
} else if (format == KISS_PM_CSV_FORMAT_STATS) {
|
||||
if (print_headline) {
|
||||
kdprintf(
|
||||
"Executed buffers #;"
|
||||
"Max buffer length;"
|
||||
"Avg buffer length;"
|
||||
"Matches #;"
|
||||
"Max matches on buffer;"
|
||||
"stage1 matches #;"
|
||||
"2nd filter matches #;"
|
||||
"3rd filter matches #;"
|
||||
"Avg PM exec time (ns);"
|
||||
"Max PM exec time (ns);"
|
||||
"Avg callback exec time (ns);"
|
||||
"Max callback exec time (ns)"
|
||||
);
|
||||
} else {
|
||||
kdprintf("%u;%u;%u;%u;%u;%u;%u;%u;%u;%u000;%u;%u000",
|
||||
dynamic_stats.num_of_buffs,
|
||||
dynamic_stats.buflen.max,
|
||||
dynamic_stats.buflen.sample_num ? (dynamic_stats.buflen.total/dynamic_stats.buflen.sample_num) : 0,
|
||||
dynamic_stats.num_of_matches,
|
||||
dynamic_stats.max_matches_on_buf,
|
||||
dynamic_stats.num_of_stage1_matches,
|
||||
dynamic_stats.num_of_stage22_matches,
|
||||
dynamic_stats.num_of_stage23_matches,
|
||||
TOTAL_MICORSEC_TO_AVG_NSEC(
|
||||
dynamic_stats.runtime.total_exec_time,
|
||||
dynamic_stats.runtime.sample_num
|
||||
),
|
||||
dynamic_stats.runtime.max_exec_time,
|
||||
TOTAL_MICORSEC_TO_AVG_NSEC(
|
||||
dynamic_stats.runtime.user_cb_exec_time,
|
||||
dynamic_stats.runtime.sample_num
|
||||
),
|
||||
dynamic_stats.runtime.user_cb_max_time
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
#define kiss_pm_serialize_during_sanity_check 0
|
||||
|
||||
|
||||
// Return the statistics from src in dst (aggregate statistics from all cpus)
|
||||
kiss_ret_val
|
||||
kiss_pm_stats_common_get(struct kiss_pm_stats_static_s *dst_compile,
|
||||
struct kiss_pm_stats_dynamic_s *dst_exec,
|
||||
const struct kiss_pm_stats_common_s *src)
|
||||
{
|
||||
KISS_ASSERT_PERF((dst_compile && dst_exec && src), ("Illegal arguments"));
|
||||
|
||||
if (!(dst_compile && dst_exec && src)) {
|
||||
return KISS_ERROR;
|
||||
}
|
||||
bzero(dst_compile, sizeof(struct kiss_pm_stats_static_s));
|
||||
bzero(dst_exec, sizeof(struct kiss_pm_stats_dynamic_s));
|
||||
bcopy(&(src->compile), dst_compile, sizeof(struct kiss_pm_stats_static_s));
|
||||
|
||||
kiss_pm_stats_common_aggregate_cpus(dst_exec, src);
|
||||
|
||||
// for debug purposes only!
|
||||
// ignore specific statistics fields when performing a sanity check on serialization
|
||||
if (kiss_pm_serialize_during_sanity_check) {
|
||||
dst_compile->memory_bytes = KISS_PM_SERIALIZE_IGNORE_INT;
|
||||
dst_compile->compilation_time = KISS_PM_SERIALIZE_IGNORE_INT;
|
||||
}
|
||||
|
||||
return KISS_OK;
|
||||
}
|
||||
|
||||
// Copy the statistics from src to dst
|
||||
kiss_ret_val
|
||||
kiss_pm_stats_common_copy(kiss_pm_stats_common dst, const struct kiss_pm_stats_common_s *src)
|
||||
{
|
||||
if(src && src->exec) {
|
||||
u_int num_cpus = MIN(src->exec_num_cpus, dst->exec_num_cpus);
|
||||
KISS_ASSERT_PERF((dst && src), ("Illegal arguments"));
|
||||
|
||||
if (!(dst && src)) {
|
||||
return KISS_ERROR;
|
||||
}
|
||||
bcopy(&(src->compile), &(dst->compile), sizeof(struct kiss_pm_stats_static_s));
|
||||
bcopy(src->exec, dst->exec, num_cpus*sizeof(struct kiss_pm_stats_dynamic_aligned_s));
|
||||
}
|
||||
return KISS_OK;
|
||||
}
|
||||
|
||||
// Get size of serialized common statistics. Only build-time statistics are counted
|
||||
u_int
|
||||
kiss_pm_stats_common_get_serialize_size()
|
||||
{
|
||||
return sizeof(struct kiss_pm_stats_static_s);
|
||||
}
|
||||
|
||||
// Serialize common statistics. Only build-time statistics are serialized
|
||||
kiss_ret_val
|
||||
kiss_pm_stats_common_serialize(const struct kiss_pm_stats_common_s *stats, u_char **buf, u_int *size)
|
||||
{
|
||||
KISS_ASSERT_PERF((stats), ("Illegal arguments"));
|
||||
|
||||
DATA_BUFF_COPY(*buf, size, &(stats->compile), sizeof(struct kiss_pm_stats_static_s));
|
||||
|
||||
return KISS_OK;
|
||||
}
|
||||
|
||||
// Deserialize common statistics. Only build-time statistics are deserialized
|
||||
kiss_ret_val
|
||||
kiss_pm_stats_common_deserialize(
|
||||
kiss_pm_stats_common stats,
|
||||
u_char **buf, u_int *size,
|
||||
CP_MAYBE_UNUSED kiss_vbuf vbuf,
|
||||
CP_MAYBE_UNUSED kiss_vbuf_iter *vbuf_iter
|
||||
)
|
||||
{
|
||||
KISS_ASSERT_PERF((stats), ("Illegal arguments"));
|
||||
|
||||
DATA_BUFF_READ(*buf, size, vbuf, *vbuf_iter, &(stats->compile), sizeof(struct kiss_pm_stats_static_s));
|
||||
|
||||
return KISS_OK;
|
||||
}
|
||||
|
||||
// ******************** FUNCTIONS *************************
|
||||
SASAL_END
|
146
components/utils/pm/kiss_pm_stats.h
Normal file
146
components/utils/pm/kiss_pm_stats.h
Normal file
@@ -0,0 +1,146 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __kiss_pm_stats_h__
|
||||
#define __kiss_pm_stats_h__
|
||||
|
||||
#include "pm_adaptor.h"
|
||||
|
||||
// Common statistics
|
||||
|
||||
// Common run time statistics
|
||||
struct kiss_pm_stats_dynamic_s {
|
||||
u_int num_of_buffs; // Number of buffers we ran this dfa on
|
||||
u_int num_of_matches; // how many matches there were in this dfa
|
||||
u_int max_matches_on_buf; // Maximal number of matches per one buf
|
||||
|
||||
struct { // Buffer length statistics
|
||||
u_int max; // Maximum buffer length
|
||||
u_int total; // Total length (for average calculation)
|
||||
u_int sample_num; // Number of buffers, whose lengths make up total.
|
||||
} buflen;
|
||||
|
||||
struct { // Execution time statistics - not collected by default
|
||||
u_int total_exec_time; // PM Execution time (not including user callbacks)
|
||||
u_int max_exec_time; // Maximal PM execution time
|
||||
u_int user_cb_exec_time; // User callback execution time
|
||||
u_int user_cb_max_time; // Maximal user callback execution time
|
||||
u_int sample_num; // Number of execution time samples
|
||||
} runtime;
|
||||
|
||||
u_int num_of_stage1_matches; // Tier1 LSS matches, before filtering by mask
|
||||
u_int num_of_stage22_matches; // Tier1 matches after ^
|
||||
u_int num_of_stage23_matches; // Tier1 matches after $
|
||||
};
|
||||
|
||||
// Common build time statistics
|
||||
struct kiss_pm_stats_static_s {
|
||||
u_int memory_bytes; // How many bytes does this tier consume
|
||||
u_int compilation_time; // Compilation time of this tier in micro-seconds
|
||||
};
|
||||
|
||||
struct CP_CACHELINE_ALIGNED kiss_pm_stats_dynamic_aligned_s {
|
||||
struct kiss_pm_stats_dynamic_s stats;
|
||||
};
|
||||
|
||||
struct kiss_pm_stats_common_s {
|
||||
// Run time statistics, per-CPU, dynamically allocated
|
||||
struct kiss_pm_stats_dynamic_aligned_s* exec;
|
||||
// Size of the exec array
|
||||
u_int exec_num_cpus;
|
||||
// Build time statistics
|
||||
struct kiss_pm_stats_static_s compile;
|
||||
};
|
||||
|
||||
typedef struct kiss_pm_stats_common_s *kiss_pm_stats_common;
|
||||
|
||||
enum kiss_pm_stats_update_compile_type {
|
||||
UPDATE_COMPILE_STATS_MEM,
|
||||
UPDATE_COMPILE_STATS_TIME,
|
||||
UPDATE_COMPILE_STATS_BOTH
|
||||
};
|
||||
|
||||
// In which format the statistics should be printed
|
||||
enum kiss_pm_stats_format {
|
||||
KISS_PM_TEXT_FORMAT_STATS = 0, // Textual, for viewing with text editor
|
||||
KISS_PM_CSV_FORMAT_STATS // CSV, for opening with Excel
|
||||
};
|
||||
|
||||
KISS_APPS_CPAPI
|
||||
kiss_ret_val kiss_pm_stats_common_init(kiss_pm_stats_common new_stats);
|
||||
|
||||
KISS_APPS_CPAPI
|
||||
void kiss_pm_stats_common_free(kiss_pm_stats_common stats);
|
||||
|
||||
KISS_APPS_CPAPI
|
||||
void kiss_pm_stats_common_update_compile(
|
||||
kiss_pm_stats_common stats,
|
||||
u_int bytes,
|
||||
u_int compilation_time,
|
||||
enum kiss_pm_stats_update_compile_type type);
|
||||
|
||||
KISS_APPS_CPAPI
|
||||
void kiss_pm_stats_common_update_exec(kiss_pm_stats_common stats, u_int buf_size, u_int num_of_matches);
|
||||
|
||||
|
||||
// @brief
|
||||
// Updating the execution time of an execution of a buffer in tier2.
|
||||
//
|
||||
// @param stats - [in] The tier2 common stats.
|
||||
// @param exec_time - [in] The execution time.
|
||||
// @param buf_len - [in] the length of the last buffer that was executed
|
||||
//
|
||||
// @return Void
|
||||
//
|
||||
// @note
|
||||
// in case one of the stats vars will warp-around, the aggregated vars will hold only the last exec stats.
|
||||
KISS_APPS_CPAPI
|
||||
void kiss_pm_stats_common_update_exec_time(kiss_pm_stats_common stats, u_int exec_time, u_int user_cb_time);
|
||||
|
||||
KISS_APPS_CPAPI
|
||||
void kiss_pm_stats_common_reset_exec(kiss_pm_stats_common stats);
|
||||
|
||||
KISS_APPS_CPAPI
|
||||
void kiss_pm_stats_common_print(
|
||||
kiss_pm_stats_common stats,
|
||||
enum kiss_pm_stats_type type,
|
||||
enum kiss_pm_stats_format format,
|
||||
BOOL print_headline
|
||||
);
|
||||
|
||||
KISS_APPS_CPAPI
|
||||
kiss_ret_val kiss_pm_stats_common_get(
|
||||
struct kiss_pm_stats_static_s *dst_compile,
|
||||
struct kiss_pm_stats_dynamic_s *dst_exec,
|
||||
const struct kiss_pm_stats_common_s *src
|
||||
);
|
||||
|
||||
KISS_APPS_CPAPI
|
||||
kiss_ret_val kiss_pm_stats_common_copy(kiss_pm_stats_common dst, const struct kiss_pm_stats_common_s *src);
|
||||
|
||||
KISS_APPS_CPAPI
|
||||
u_int kiss_pm_stats_common_get_serialize_size(void);
|
||||
|
||||
KISS_APPS_CPAPI
|
||||
kiss_ret_val kiss_pm_stats_common_serialize(const struct kiss_pm_stats_common_s *stats, u_char **buf, u_int *size);
|
||||
|
||||
KISS_APPS_CPAPI
|
||||
kiss_ret_val kiss_pm_stats_common_deserialize(
|
||||
kiss_pm_stats_common stats,
|
||||
u_char **buf,
|
||||
u_int *size,
|
||||
kiss_vbuf vbuf,
|
||||
kiss_vbuf_iter *vbuf_iter
|
||||
);
|
||||
|
||||
#endif // __kiss_pm_stats_h__
|
462
components/utils/pm/kiss_thin_nfa.cc
Normal file
462
components/utils/pm/kiss_thin_nfa.cc
Normal file
@@ -0,0 +1,462 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Thin NFA I/S
|
||||
// ------------
|
||||
// The thin NFA allows building and executing an automaton for string search, using the
|
||||
// Aho-Corasick algorithm.
|
||||
// The resulting automaton is built in a compact representation. Some states are "full" - they
|
||||
// have an explicit transition per character. Others are "partial" - they have some explicit transitions,
|
||||
// plus a "default transition". This is an epsilon-transition. For characters which don't have an
|
||||
// explicit transition, we follow the default transition, and look up the same character there.
|
||||
//
|
||||
// Source files
|
||||
// ------------
|
||||
// kiss_thin_nfa.c (this file) - execution code.
|
||||
// kiss_thin_nfa_build.c - allocation and destruction code. Contains code which is common to compilation
|
||||
// and serialization/deserialization. All objects which are part of the comipled automaton are created here.
|
||||
// kiss_thin_nfa_compile.c - compilation code. Contains the logic that converts a set of strings into an automaton.
|
||||
// kiss_thin_nfa_analyze.c - Validation and dump. Code that reads the BNFA and tries to make sense of it.
|
||||
// kiss_thin_nfa_impl.h - internal header file. APIs and definitions between the different source files.
|
||||
|
||||
|
||||
// ********************* INCLUDES **************************
|
||||
#include "kiss_thin_nfa_impl.h"
|
||||
#include "sasal.h"
|
||||
|
||||
SASAL_START // Multiple Pattern Matcher
|
||||
// Internal execution flags passed to kiss_dfa_exec_one_buf:
|
||||
#define KISS_PM_EXEC_LAST_BUFF 0x00000001 // This is the last buffer (preset buffer or the last buffer in vbuf)
|
||||
|
||||
|
||||
// The runtime status of the Thin NFA
|
||||
struct kiss_bnfa_runtime_s {
|
||||
KissThinNFA *nfa_h; // The NFA we're executing
|
||||
kiss_bnfa_comp_offset_t last_bnfa_offset; // Last state reached by exec_one_buf
|
||||
std::vector<std::pair<uint, uint>> *matches; // The matches we've found so far
|
||||
u_int scanned_so_far; // The length of all buffers before the current buffer
|
||||
};
|
||||
|
||||
|
||||
// Critical code path debugging - enabled only in debug mode.
|
||||
#define THIN_NFA_TRACE_TRANS(runtime, next_off, ch, op) \
|
||||
thinnfa_debug_perf( \
|
||||
"%s: Transition by 0x%02x to %d - %s\n", \
|
||||
FILE_LINE, \
|
||||
ch, \
|
||||
kiss_bnfa_offset_decompress(next_off), \
|
||||
op \
|
||||
)
|
||||
|
||||
#define TRANSLATE_CHAR_IF_NEEED(do_char_trans, char_trans_table, ch) \
|
||||
((u_char)((do_char_trans) ? ((char_trans_table)[ch]) : (ch)))
|
||||
|
||||
// Given a match for a pattern at a given position, insert an entry to the match list.
|
||||
// We may add more than one entry, depending on the number of matching patterns.
|
||||
//
|
||||
// Parameters:
|
||||
// runtime - the current status of Thin NFA execution.
|
||||
// one_buf_offset - the offset of the match, within the buffer currently scanned.
|
||||
// Together with runtime->scanned_so_far we can get the real match offset.
|
||||
// one_buf_len - the length of the buffer currently scanned. Used for $ processing.
|
||||
// exec_flags - the flags used.
|
||||
static CP_INLINE void
|
||||
kiss_thin_nfa_handle_match(struct kiss_bnfa_runtime_s *runtime, u_int pat_arr_offset,
|
||||
u_int one_buf_offset, u_int one_buf_len, u_int exec_flags)
|
||||
{
|
||||
static const char rname[] = "kiss_thin_nfa_handle_match";
|
||||
u_int match_pos;
|
||||
const kiss_thin_nfa_pattern_array_t *pat_arr;
|
||||
const kiss_thin_nfa_pattern_t *curr_id;
|
||||
const kiss_thin_nfa_pattern_t *pat_end;
|
||||
|
||||
// Where was the match? one_buf_offset is already moved beyond the characeter that caused the match,
|
||||
// so we subtract one to get this character's offset.
|
||||
match_pos = runtime->scanned_so_far + (one_buf_offset - 1);
|
||||
pat_arr = kiss_thin_nfa_offset_to_pat_array_ptr(runtime->nfa_h, pat_arr_offset);
|
||||
// Go over the patterns and add them to the match queue.
|
||||
pat_end = &(pat_arr->pattern[pat_arr->n_patterns]);
|
||||
thinnfa_debug_perf((
|
||||
"%s: Going over %u patterns, starting from offset %u\n",
|
||||
rname,
|
||||
pat_arr->n_patterns,
|
||||
pat_arr_offset
|
||||
));
|
||||
for (curr_id = &(pat_arr->pattern[0]); curr_id != pat_end; curr_id++) {
|
||||
thinnfa_debug(("%s: Match for pattern ID %d at %d len %d\n", rname, curr_id->id, match_pos, curr_id->len));
|
||||
|
||||
// Handle ^ - An N byte pattern at the start of the buffer would match at byte N-1.
|
||||
// NOTE: If the anchored state optimization is implemented in compilation, this test isn't needed.
|
||||
if ((curr_id->pattern_id_flags & KISS_PM_LSS_AT_BUF_START) && (match_pos != curr_id->len - 1)) {
|
||||
thinnfa_debug_perf(("%s: Not match because of ^ %d\n", rname, curr_id->id));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle $ - We must match at the buffer end, and it must be the last buffer
|
||||
if ((curr_id->pattern_id_flags & KISS_PM_LSS_AT_BUF_END) &&
|
||||
!((one_buf_offset == one_buf_len) && (exec_flags & KISS_PM_EXEC_LAST_BUFF))) {
|
||||
thinnfa_debug_perf(("%s: Not match because of $ %d\n", rname, curr_id->id));
|
||||
continue;
|
||||
}
|
||||
runtime->matches->emplace_back(curr_id->id, match_pos);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Wrapper to kiss_thin_nfa_handle_match, gets the state offset, not the ID.
|
||||
static CP_INLINE void
|
||||
kiss_thin_nfa_handle_match_state(struct kiss_bnfa_runtime_s *runtime, kiss_bnfa_comp_offset_t cur_offset,
|
||||
u_int one_buf_offset, u_int one_buf_len, u_int exec_flags)
|
||||
{
|
||||
const kiss_bnfa_state_t *state = kiss_bnfa_comp_offset_to_state(
|
||||
runtime->nfa_h->bnfa,
|
||||
cur_offset,
|
||||
KISS_BNFA_STATE_MATCH
|
||||
);
|
||||
kiss_thin_nfa_handle_match(runtime, state->match.match_id, one_buf_offset, one_buf_len, exec_flags);
|
||||
}
|
||||
|
||||
// Calculate the next state's offset, given a state and character. Good for full states only.
|
||||
// Faster than kiss_thin_nfa_get_next_offset. An offset peremeter is compressed 16-bit offset
|
||||
// The returned offset is also compressed
|
||||
static CP_INLINE kiss_bnfa_comp_offset_t
|
||||
kiss_thin_nfa_get_next_offset_full(const kiss_bnfa_state_t *bnfa, kiss_bnfa_comp_offset_t offset,
|
||||
unsigned char char_to_find)
|
||||
{
|
||||
const kiss_bnfa_state_t *state = kiss_bnfa_comp_offset_to_state(bnfa, offset, KISS_BNFA_STATE_FULL);
|
||||
return (kiss_bnfa_comp_offset_t)state->full.transitions[char_to_find];
|
||||
}
|
||||
|
||||
|
||||
// Calculate the next state's offset, given a state and character. Good for partial states only.
|
||||
// Also indicates whether the buffer position should be incremented (i.e. if an explicit transition was found)
|
||||
static CP_INLINE kiss_bnfa_comp_offset_t
|
||||
kiss_thin_nfa_get_next_offset_partial(const kiss_bnfa_state_t *bnfa, kiss_bnfa_comp_offset_t offset,
|
||||
unsigned char char_to_find, BOOL *inc_pos)
|
||||
{
|
||||
const kiss_bnfa_state_t *state = kiss_bnfa_comp_offset_to_state(bnfa, offset, KISS_BNFA_STATE_PARTIAL);
|
||||
u_int trans_num = state->partial.trans_num;
|
||||
u_int i;
|
||||
|
||||
// Simple linear search is fast for a few transitions. If we have many, we use a full state anyway.
|
||||
for (i = 0; i < trans_num; i++) {
|
||||
const struct kiss_bnfa_partial_transition_s *tran = &state->partial.transitions[i];
|
||||
// Smaller? Keep looking. Larger? Give up (transitions are sorted).
|
||||
if (tran->tran_char < char_to_find) continue;
|
||||
if (tran->tran_char > char_to_find) break;
|
||||
|
||||
// Found the character (explicit transition) - consume a characeter and move the automaton
|
||||
*inc_pos = TRUE;
|
||||
return tran->next_state_offset;
|
||||
}
|
||||
|
||||
// No explicit transition found - move to the fail state, without consuming a character.
|
||||
*inc_pos = FALSE;
|
||||
return state->partial.fail_state_offset;
|
||||
}
|
||||
|
||||
|
||||
// Calculate the next state's offset, when the current is a match state.
|
||||
// Doesn't consume a character (epsilon transition)
|
||||
static CP_INLINE kiss_bnfa_comp_offset_t
|
||||
kiss_thin_nfa_get_next_offset_match(CP_MAYBE_UNUSED const kiss_bnfa_state_t *bnfa, kiss_bnfa_comp_offset_t offset)
|
||||
{
|
||||
// After a match state we just move to the next consecutive state.
|
||||
return offset + (sizeof(kiss_bnfa_match_state_t) / KISS_BNFA_STATE_ALIGNMENT);
|
||||
}
|
||||
|
||||
#define PARALLEL_SCANS_NUM 4 // 4 heads scanning the buffer
|
||||
#define UNROLL_FACTOR 4 // Advance each head 4 bytes per loop
|
||||
|
||||
|
||||
// Move one head of the state machine. bnfa_offset must not be a match state.
|
||||
static CP_INLINE kiss_bnfa_comp_offset_t
|
||||
parallel_scan_advance_one(const kiss_bnfa_state_t *bnfa, kiss_bnfa_comp_offset_t bnfa_offset, const unsigned char ch)
|
||||
{
|
||||
while (bnfa_offset >= 0) {
|
||||
BOOL inc_pos;
|
||||
// Partial state - Look for an explicit transition, or use the fail state
|
||||
bnfa_offset = kiss_thin_nfa_get_next_offset_partial(bnfa, bnfa_offset, ch, &inc_pos);
|
||||
if (inc_pos) {
|
||||
// Found an explicit transition - can move to the next state.
|
||||
return bnfa_offset;
|
||||
}
|
||||
}
|
||||
|
||||
// Full state (either we started with full, or the fail state chain reached one)
|
||||
return kiss_thin_nfa_get_next_offset_full(bnfa, bnfa_offset, ch);
|
||||
}
|
||||
|
||||
|
||||
// Check if all heads are on a full state.
|
||||
// If they are - advance all heads and return TRUE.
|
||||
// If they aren't - do nothing and return FALSE.
|
||||
static CP_INLINE BOOL
|
||||
parallel_scan_advance_if_full(
|
||||
const kiss_bnfa_state_t *bnfa,
|
||||
kiss_bnfa_comp_offset_t *bnfa_offsets,
|
||||
const unsigned char **buf_pos
|
||||
)
|
||||
{
|
||||
kiss_bnfa_comp_offset_t offsets_and;
|
||||
|
||||
// If the bitwise AND of 4 offsets (PARALLEL_SCANS_NUM) is negative, they're all negaitve, so all states are full.
|
||||
offsets_and = bnfa_offsets[0] & bnfa_offsets[1] & bnfa_offsets[2] & bnfa_offsets[3];
|
||||
if (CP_UNLIKELY(offsets_and >= 0)) return FALSE;
|
||||
|
||||
// All states are full - make 4 transitions (PARALLEL_SCANS_NUM).
|
||||
bnfa_offsets[0] = kiss_thin_nfa_get_next_offset_full(bnfa, bnfa_offsets[0], *(buf_pos[0]));
|
||||
buf_pos[0]++;
|
||||
bnfa_offsets[1] = kiss_thin_nfa_get_next_offset_full(bnfa, bnfa_offsets[1], *(buf_pos[1]));
|
||||
buf_pos[1]++;
|
||||
bnfa_offsets[2] = kiss_thin_nfa_get_next_offset_full(bnfa, bnfa_offsets[2], *(buf_pos[2]));
|
||||
buf_pos[2]++;
|
||||
bnfa_offsets[3] = kiss_thin_nfa_get_next_offset_full(bnfa, bnfa_offsets[3], *(buf_pos[3]));
|
||||
buf_pos[3]++;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
// Repeat parallel_scan_advance_if_full up to 4 times (UNROLL_FACTOR).
|
||||
// Retrurn TRUE if all 4 were done, FALSE if stopped earlier.
|
||||
static CP_INLINE BOOL
|
||||
parallel_scan_advance_if_full_unroll(
|
||||
const kiss_bnfa_state_t *bnfa,
|
||||
kiss_bnfa_comp_offset_t *bnfa_offsets,
|
||||
const unsigned char **buf_pos
|
||||
)
|
||||
{
|
||||
if (!parallel_scan_advance_if_full(bnfa, bnfa_offsets, buf_pos)) return FALSE;
|
||||
if (!parallel_scan_advance_if_full(bnfa, bnfa_offsets, buf_pos)) return FALSE;
|
||||
if (!parallel_scan_advance_if_full(bnfa, bnfa_offsets, buf_pos)) return FALSE;
|
||||
if (!parallel_scan_advance_if_full(bnfa, bnfa_offsets, buf_pos)) return FALSE;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
// Find the offset where each head should start and stop
|
||||
static void
|
||||
calc_head_buf_range(const u_char *buffer, u_int len, const u_char **head_start_pos, const u_char **head_end_pos)
|
||||
{
|
||||
static const char rname[] = "calc_head_buf_range";
|
||||
const u_char *orig_buf = buffer;
|
||||
u_int len_per_head = len / PARALLEL_SCANS_NUM;
|
||||
u_int rem = len % PARALLEL_SCANS_NUM;
|
||||
u_int i;
|
||||
|
||||
for (i=0; i<PARALLEL_SCANS_NUM; i++) {
|
||||
u_int head_len = len_per_head;
|
||||
// Give each head its share, late heads get a part of the remainder.
|
||||
// The "Handle remainders" loop below assumes the last head has the largest part.
|
||||
if (i >= PARALLEL_SCANS_NUM-rem) head_len++;
|
||||
head_start_pos[i] = buffer;
|
||||
buffer += head_len;
|
||||
head_end_pos[i] = buffer;
|
||||
thinnfa_debug(("%s: Head %u gets range %ld:%ld\n", rname,
|
||||
i, head_start_pos[i]-orig_buf, head_end_pos[i]-orig_buf));
|
||||
}
|
||||
}
|
||||
|
||||
// Set the initial BNFA offset for each head
|
||||
static void
|
||||
set_head_bnfa_offset(
|
||||
struct kiss_bnfa_runtime_s *runtime,
|
||||
kiss_bnfa_comp_offset_t *bnfa_pos,
|
||||
const u_char **buf_pos,
|
||||
const u_char *buffer
|
||||
)
|
||||
{
|
||||
const KissThinNFA *nfa_h = runtime->nfa_h;
|
||||
kiss_bnfa_comp_offset_t init_off = kiss_bnfa_offset_compress(nfa_h->min_bnfa_offset);
|
||||
u_int i;
|
||||
|
||||
if (nfa_h->flags & KISS_THIN_NFA_HAS_ANCHOR) {
|
||||
// Start from the root (next full state after the anchored root)
|
||||
init_off++;
|
||||
}
|
||||
|
||||
// Heads that scan from the beginning of the buffer, will start at previous buffer's ending state.
|
||||
// The rest start anew.
|
||||
// Several scanning heads will start at buffer's beginning when buffer's size is less than PARALLEL_SCANS_NUM
|
||||
for (i=0; i<PARALLEL_SCANS_NUM; i++) {
|
||||
if (buf_pos[i] - buffer == 0) {
|
||||
bnfa_pos[i] = runtime->last_bnfa_offset;
|
||||
} else {
|
||||
bnfa_pos[i] = init_off;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Run Thin NFA parallely on a single buffer.
|
||||
static CP_INLINE void
|
||||
kiss_thin_nfa_exec_one_buf_parallel_ex(
|
||||
struct kiss_bnfa_runtime_s *runtime,
|
||||
const u_char *buffer,
|
||||
u_int len, u_int flags,
|
||||
BOOL do_char_trans,
|
||||
u_char *char_trans_table
|
||||
)
|
||||
{
|
||||
const kiss_bnfa_state_t *bnfa = runtime->nfa_h->bnfa;
|
||||
const unsigned char *end, *buf_pos[PARALLEL_SCANS_NUM], *head_end_pos[PARALLEL_SCANS_NUM];
|
||||
kiss_bnfa_comp_offset_t bnfa_offset[PARALLEL_SCANS_NUM];
|
||||
u_int i;
|
||||
u_int overlap_bytes;
|
||||
int overlap_head_mask;
|
||||
|
||||
// set starting position, ending position and state for each scanning head
|
||||
calc_head_buf_range(buffer, len, buf_pos, head_end_pos);
|
||||
set_head_bnfa_offset(runtime, bnfa_offset, buf_pos, buffer);
|
||||
|
||||
end = buffer + len;
|
||||
|
||||
// unroll 16 (PARALLEL_SCANS_NUM * UNROLL_FACTOR) times, while we have at least 4 input bytes to process.
|
||||
while (buf_pos[PARALLEL_SCANS_NUM-1] + UNROLL_FACTOR <= end) {
|
||||
// Fastpath - Advance all heads up to 4 chars, as long as they're all on a full state.
|
||||
if (CP_LIKELY(parallel_scan_advance_if_full_unroll(bnfa, bnfa_offset, buf_pos))) continue;
|
||||
|
||||
// At least one head is on partial or match - advance all 4 by their type.
|
||||
for (i=0; i<PARALLEL_SCANS_NUM; i++) {
|
||||
if (bnfa_offset[i] < 0) {
|
||||
// Semi-fastpath. When we reach this loop, normally 3 of 4 heads are on a full state.
|
||||
bnfa_offset[i] = kiss_thin_nfa_get_next_offset_full(bnfa, bnfa_offset[i], *(buf_pos[i]));
|
||||
(buf_pos[i])++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (kiss_bnfa_state_type(bnfa, bnfa_offset[i]) == KISS_BNFA_STATE_MATCH) {
|
||||
// Handle a match
|
||||
kiss_thin_nfa_handle_match_state(runtime, bnfa_offset[i], (u_int)(buf_pos[i] - buffer), len, flags);
|
||||
bnfa_offset[i] = kiss_thin_nfa_get_next_offset_match(bnfa, bnfa_offset[i]);
|
||||
}
|
||||
// Advance to the next state
|
||||
bnfa_offset[i] = parallel_scan_advance_one(bnfa, bnfa_offset[i],
|
||||
TRANSLATE_CHAR_IF_NEEED(do_char_trans, char_trans_table, *(buf_pos[i])));
|
||||
(buf_pos[i])++;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle remainders (the above loop jumps 4 chars at a time, so it may leave up to 3 unscanned)
|
||||
while (buf_pos[PARALLEL_SCANS_NUM-1] < end) {
|
||||
// Advance only heads that haven't reached their end position
|
||||
for (i=0; i<PARALLEL_SCANS_NUM; i++) {
|
||||
if (buf_pos[i] >= head_end_pos[i]) continue;
|
||||
if (kiss_bnfa_state_type(bnfa, bnfa_offset[i]) == KISS_BNFA_STATE_MATCH) {
|
||||
// Handle a match
|
||||
kiss_thin_nfa_handle_match_state(runtime, bnfa_offset[i], (u_int)(buf_pos[i] - buffer), len, flags);
|
||||
bnfa_offset[i] = kiss_thin_nfa_get_next_offset_match(bnfa, bnfa_offset[i]);
|
||||
}
|
||||
// Advance to the next state
|
||||
bnfa_offset[i] = parallel_scan_advance_one(bnfa, bnfa_offset[i],
|
||||
TRANSLATE_CHAR_IF_NEEED(do_char_trans, char_trans_table, *(buf_pos[i])));
|
||||
(buf_pos[i])++;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle overlap - advance all heads into the next head's range, as long as there's a chance
|
||||
// for a match which started in this head's range.
|
||||
overlap_head_mask = (1<<(PARALLEL_SCANS_NUM-1))-1; // All heads except the last
|
||||
for (overlap_bytes = 0; overlap_head_mask!=0; overlap_bytes++) {
|
||||
// Advance each head (except the last) as long as overlap is needed for it
|
||||
for (i=0; i<PARALLEL_SCANS_NUM-1; i++) {
|
||||
int my_mask = (1<<i);
|
||||
u_int state_depth;
|
||||
|
||||
// Did we stop this head's overlap already?
|
||||
if (!(overlap_head_mask & my_mask)) continue;
|
||||
|
||||
// Stop the overlap if the state is not as deep as the overlap, or the buffer ended.
|
||||
state_depth = kiss_bnfa_offset_to_depth(runtime->nfa_h, bnfa_offset[i]);
|
||||
if ((state_depth <= overlap_bytes) || (buf_pos[i] >= end)) {
|
||||
overlap_head_mask &= ~my_mask;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Advance the state machine, including match handling
|
||||
if (kiss_bnfa_state_type(bnfa, bnfa_offset[i]) == KISS_BNFA_STATE_MATCH) {
|
||||
// Handle a match
|
||||
kiss_thin_nfa_handle_match_state(runtime, bnfa_offset[i], (u_int)(buf_pos[i] - buffer), len, flags);
|
||||
bnfa_offset[i] = kiss_thin_nfa_get_next_offset_match(bnfa, bnfa_offset[i]);
|
||||
}
|
||||
// Advance to the next state
|
||||
bnfa_offset[i] = parallel_scan_advance_one(bnfa, bnfa_offset[i],
|
||||
TRANSLATE_CHAR_IF_NEEED(do_char_trans, char_trans_table, *(buf_pos[i])));
|
||||
(buf_pos[i])++;
|
||||
}
|
||||
}
|
||||
|
||||
// We may have stopped on a match state. If so - handle and advance
|
||||
for (i=0; i<PARALLEL_SCANS_NUM; i++) {
|
||||
if (kiss_bnfa_state_type(bnfa, bnfa_offset[i]) == KISS_BNFA_STATE_MATCH) {
|
||||
// Handle a match
|
||||
kiss_thin_nfa_handle_match_state(runtime, bnfa_offset[i], (u_int)(buf_pos[i] - buffer), len, flags);
|
||||
bnfa_offset[i] = kiss_thin_nfa_get_next_offset_match(bnfa, bnfa_offset[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// The next scan should start at the state where the current scan ended.
|
||||
// If multiple heads reached the buffer end, use the one with the lowest index,
|
||||
// because it has covered more data than other heads that reached the buffer end.
|
||||
for (i=0; i<PARALLEL_SCANS_NUM; i++) {
|
||||
if (buf_pos[i] == buf_pos[PARALLEL_SCANS_NUM-1]) {
|
||||
runtime->last_bnfa_offset = bnfa_offset[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Execute a thin NFA on a buffer.
|
||||
// Parameters:
|
||||
// nfa_h - the NFA handle
|
||||
// buf - a buffer to scan.
|
||||
// matches - output - will be filled with a kiss_pmglob_match_data element for each match.
|
||||
void
|
||||
kiss_thin_nfa_exec(KissThinNFA *nfa_h, const Buffer& buf, std::vector<std::pair<uint, uint>> &matches)
|
||||
{
|
||||
struct kiss_bnfa_runtime_s bnfa_runtime;
|
||||
|
||||
dbgAssert(nfa_h != nullptr) << "kiss_thin_nfa_exec() was called with null handle";
|
||||
|
||||
if (buf.size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Set the runtime status structure
|
||||
bnfa_runtime.nfa_h = nfa_h;
|
||||
bnfa_runtime.last_bnfa_offset = kiss_bnfa_offset_compress(nfa_h->min_bnfa_offset); // The initial state
|
||||
bnfa_runtime.matches = &matches;
|
||||
bnfa_runtime.scanned_so_far = 0;
|
||||
|
||||
auto segments = buf.segRange();
|
||||
for( auto iter = segments.begin(); iter != segments.end(); iter++ ) {
|
||||
const u_char * data = iter->data();
|
||||
u_int len = iter->size();
|
||||
u_int flags = ((iter+1)==segments.end()) ? KISS_PM_EXEC_LAST_BUFF : 0;
|
||||
if (nfa_h->flags & KISS_THIN_NFA_USE_CHAR_XLATION) {
|
||||
kiss_thin_nfa_exec_one_buf_parallel_ex(&bnfa_runtime, data, len, flags, TRUE, nfa_h->xlation_tab);
|
||||
} else {
|
||||
kiss_thin_nfa_exec_one_buf_parallel_ex(&bnfa_runtime, data, len, flags, FALSE, nullptr);
|
||||
}
|
||||
bnfa_runtime.scanned_so_far += len;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
SASAL_END
|
1499
components/utils/pm/kiss_thin_nfa_analyze.cc
Normal file
1499
components/utils/pm/kiss_thin_nfa_analyze.cc
Normal file
File diff suppressed because it is too large
Load Diff
261
components/utils/pm/kiss_thin_nfa_base.h
Normal file
261
components/utils/pm/kiss_thin_nfa_base.h
Normal file
@@ -0,0 +1,261 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __kiss_thin_nfa_base_h__
|
||||
#define __kiss_thin_nfa_base_h__
|
||||
|
||||
#include "general_adaptor.h"
|
||||
|
||||
// ****************************** OVERVIEW *******************************
|
||||
// Contians basic Thin NFA structure, used by kiss_pm and bolt (prescan)
|
||||
// ***********************************************************************
|
||||
|
||||
#define KISS_THIN_NFA_ALPHABET_SIZE 256
|
||||
|
||||
// Binary representation of the Thin NFA.
|
||||
// This is what's actually used during runtime.
|
||||
//
|
||||
// Offsets in the BNFA
|
||||
// -------------------
|
||||
// Offsets are signed 32-bit integers, specifying the distance in bytes from the "offset 0" point.
|
||||
//
|
||||
// Offset 0 isn't the BNFA start - there are negative offsets:
|
||||
// All full states are in negative offsets. This is the only way to know that a state is full.
|
||||
// All other states are in positive offsets.
|
||||
//
|
||||
// In full states, offsets are encoded in 16 bits.
|
||||
// In partial states, offsets are encoded in 24 bits.
|
||||
// Offsets are compressed:
|
||||
// Positive offsets are divided by 4. This is possible because all state sizes are a multiple of 4 bytes.
|
||||
// Negative offsets are divided by 512 (the size of a full state). This is possible because negative offsets
|
||||
// are only used for full states, so their offsets are a (negative) multiple of the state size.
|
||||
//
|
||||
// Structure of a BNFA state
|
||||
// -------------------------
|
||||
// 1. Full state:
|
||||
// a. No header. Identified by the fact that its BNFA offset is negative.
|
||||
// b. 256 transitions, 16bits each (uncompressed offsets).
|
||||
// 2. Common header, to partial and match states:
|
||||
// a. State type - 2 bits.
|
||||
// 3. Partial state:
|
||||
// a. State type - 2 bits.
|
||||
// b. Transition number - 6 bits.
|
||||
// c. Fail state offset (compresed) - 24 bits.
|
||||
// d. Per transition:
|
||||
// 1) Character - 8 bits
|
||||
// 2) Next state offset (compressed) - 24 bits
|
||||
// 4. Match state:
|
||||
// a. State type - 2 bits.
|
||||
// b. Unused - 6 bits.
|
||||
// c. Match ID - 24 bits.
|
||||
//
|
||||
// Examples:
|
||||
//
|
||||
// Partial state, 2 transitions - 'a'->100, 'b'->104, fail-> -3072
|
||||
// +----+---+-----+---+-----+---+-----+
|
||||
// Bits: | 2 | 6 | 24 | 8 | 24 | 8 | 24 |
|
||||
// +----+---+-----+---+-----+---+-----+
|
||||
// Data: | P | 2 | -3 | a | 25 | b | 26 |
|
||||
// +----+---+-----+---+-----+---+-----+
|
||||
//
|
||||
// Full state, 0x00->200, 0x01->204, 0xff->280
|
||||
// +-----+-----+ +-----+
|
||||
// Bits: | 16 | 16 | | 16 |
|
||||
// +-----+-----+ .... +-----+
|
||||
// Data: | 50 | 51 | | 70 |
|
||||
// +-----+-----+ +-----+
|
||||
|
||||
|
||||
// Types for normal and compressed (see comment above) BNFA offsets
|
||||
|
||||
typedef int kiss_bnfa_offset_t; // Offset in bytes
|
||||
typedef int kiss_bnfa_comp_offset_t; // Compressed offset
|
||||
typedef short kiss_bnfa_short_offset_t; // Compressed offset in 16bits (for full states)
|
||||
|
||||
#define KISS_BNFA_OFFSET_INVALID ((int)0x80000000)
|
||||
|
||||
// State types
|
||||
typedef enum {
|
||||
KISS_BNFA_STATE_PARTIAL,
|
||||
KISS_BNFA_STATE_MATCH,
|
||||
KISS_BNFA_STATE_FULL,
|
||||
|
||||
KISS_BNFA_STATE_TYPE_NUM
|
||||
} kiss_bnfa_state_type_t;
|
||||
|
||||
|
||||
// State structure
|
||||
|
||||
// Use some header bits for the state type
|
||||
#define KISS_BNFA_STATE_TYPE_BITS 2
|
||||
|
||||
// The type must fit in KISS_BNFA_STATE_TYPE_BITS bits
|
||||
KISS_ASSERT_COMPILE_TIME(KISS_BNFA_STATE_TYPE_NUM <= (1<<KISS_BNFA_STATE_TYPE_BITS));
|
||||
|
||||
// Transition - partial state implementation
|
||||
struct kiss_bnfa_partial_transition_s {
|
||||
u_int tran_char:8;
|
||||
kiss_bnfa_comp_offset_t next_state_offset:24;
|
||||
};
|
||||
|
||||
#define KISS_BNFA_NUM_TRANS_BITS (8-KISS_BNFA_STATE_TYPE_BITS)
|
||||
#define KISS_BNFA_MAX_TRANS_NUM ((1<<KISS_BNFA_NUM_TRANS_BITS)-1)
|
||||
|
||||
// Header common to all state types (except full)
|
||||
typedef struct {
|
||||
kiss_bnfa_state_type_t type:KISS_BNFA_STATE_TYPE_BITS;
|
||||
u_int pad:(32-KISS_BNFA_STATE_TYPE_BITS);
|
||||
} kiss_bnfa_minimal_state_t;
|
||||
|
||||
// Partial state
|
||||
typedef struct {
|
||||
kiss_bnfa_state_type_t type:KISS_BNFA_STATE_TYPE_BITS;
|
||||
u_int trans_num:KISS_BNFA_NUM_TRANS_BITS;
|
||||
kiss_bnfa_comp_offset_t fail_state_offset:24;
|
||||
struct kiss_bnfa_partial_transition_s transitions[1]; // Actual size is trans_num
|
||||
} kiss_bnfa_partial_state_t;
|
||||
|
||||
// Match state
|
||||
typedef struct {
|
||||
kiss_bnfa_state_type_t type:KISS_BNFA_STATE_TYPE_BITS;
|
||||
u_int unused:KISS_BNFA_NUM_TRANS_BITS;
|
||||
u_int match_id:24;
|
||||
} kiss_bnfa_match_state_t;
|
||||
|
||||
// Full state
|
||||
typedef struct {
|
||||
kiss_bnfa_short_offset_t transitions[KISS_THIN_NFA_ALPHABET_SIZE]; // BNFA offset per character
|
||||
} kiss_bnfa_full_state_t;
|
||||
|
||||
// Any state
|
||||
typedef union {
|
||||
kiss_bnfa_minimal_state_t common;
|
||||
kiss_bnfa_partial_state_t partial;
|
||||
kiss_bnfa_match_state_t match;
|
||||
kiss_bnfa_full_state_t full;
|
||||
} kiss_bnfa_state_t;
|
||||
|
||||
// All states are aligned on this boundary
|
||||
#define KISS_BNFA_STATE_ALIGNMENT sizeof(int)
|
||||
|
||||
// Compress a given offset when the state type is known. If the type is a cmpile-time constant, it's faster than
|
||||
// kiss_bnfa_offset_compress since it should be optimized
|
||||
static CP_INLINE kiss_bnfa_comp_offset_t
|
||||
kiss_bnfa_offset_quick_compress(kiss_bnfa_offset_t off, kiss_bnfa_state_type_t type)
|
||||
{
|
||||
if (type == KISS_BNFA_STATE_FULL) {
|
||||
return off / (int)sizeof(kiss_bnfa_full_state_t);
|
||||
} else {
|
||||
return off / (int)KISS_BNFA_STATE_ALIGNMENT;
|
||||
}
|
||||
}
|
||||
|
||||
// Decompress a given offset when the state type is known. If the type is a cmpile-time constant, it's faster than
|
||||
// kiss_bnfa_offset_decompress since it should be optimized
|
||||
static CP_INLINE kiss_bnfa_offset_t
|
||||
kiss_bnfa_offset_quick_decompress(kiss_bnfa_comp_offset_t comp_off, kiss_bnfa_state_type_t type)
|
||||
{
|
||||
if (type == KISS_BNFA_STATE_FULL) {
|
||||
return comp_off * (int)sizeof(kiss_bnfa_full_state_t);
|
||||
} else {
|
||||
return comp_off * (int)KISS_BNFA_STATE_ALIGNMENT;
|
||||
}
|
||||
}
|
||||
|
||||
// Compress a BNFA offset, for use in partial states (24-bit encoding) and full states (16-bit encoding)
|
||||
static CP_INLINE kiss_bnfa_comp_offset_t
|
||||
kiss_bnfa_offset_compress(kiss_bnfa_offset_t off)
|
||||
{
|
||||
return kiss_bnfa_offset_quick_compress(off, off < 0 ? KISS_BNFA_STATE_FULL : KISS_BNFA_STATE_PARTIAL);
|
||||
}
|
||||
|
||||
// Decompress a BNFA offset, which was stored in a partial state (24-bit encoding) and full states (16-bit encoding)
|
||||
static CP_INLINE kiss_bnfa_offset_t
|
||||
kiss_bnfa_offset_decompress(kiss_bnfa_comp_offset_t off)
|
||||
{
|
||||
return kiss_bnfa_offset_quick_decompress(off, off < 0 ? KISS_BNFA_STATE_FULL : KISS_BNFA_STATE_PARTIAL);
|
||||
}
|
||||
|
||||
// Get a state in the BNFA given its offset
|
||||
static CP_INLINE const kiss_bnfa_state_t *
|
||||
kiss_bnfa_offset_to_state(const kiss_bnfa_state_t *bnfa, kiss_bnfa_offset_t bnfa_offset)
|
||||
{
|
||||
const char *bnfa_c = (const char *)bnfa;
|
||||
return (const kiss_bnfa_state_t *)(bnfa_c + bnfa_offset);
|
||||
}
|
||||
|
||||
// Get a state in the BNFA given its offset - without const, usable for writing the state
|
||||
static CP_INLINE kiss_bnfa_state_t *
|
||||
kiss_bnfa_offset_to_state_write(kiss_bnfa_state_t *bnfa, kiss_bnfa_offset_t bnfa_offset)
|
||||
{
|
||||
char *bnfa_c = (char *)bnfa;
|
||||
return (kiss_bnfa_state_t *)(bnfa_c + bnfa_offset);
|
||||
}
|
||||
|
||||
// Get a state in the BNFA given its compressed offset
|
||||
static CP_INLINE const kiss_bnfa_state_t *
|
||||
kiss_bnfa_comp_offset_to_state(
|
||||
const kiss_bnfa_state_t *bnfa,
|
||||
kiss_bnfa_comp_offset_t bnfa_comp_offset,
|
||||
kiss_bnfa_state_type_t type
|
||||
)
|
||||
{
|
||||
return kiss_bnfa_offset_to_state(bnfa, kiss_bnfa_offset_quick_decompress(bnfa_comp_offset, type));
|
||||
}
|
||||
|
||||
// Get the state type by its BNFA offset
|
||||
static CP_INLINE kiss_bnfa_state_type_t
|
||||
kiss_bnfa_state_type(const kiss_bnfa_state_t *bnfa, kiss_bnfa_comp_offset_t bnfa_comp_offset)
|
||||
{
|
||||
if (bnfa_comp_offset < 0) return KISS_BNFA_STATE_FULL;
|
||||
return kiss_bnfa_comp_offset_to_state(bnfa, bnfa_comp_offset, KISS_BNFA_STATE_PARTIAL)->common.type;
|
||||
}
|
||||
|
||||
|
||||
// State size
|
||||
|
||||
// Get the size of a partial state with N transitions
|
||||
static CP_INLINE u_int
|
||||
kiss_bnfa_partial_state_size(u_int trans_num)
|
||||
{
|
||||
// Header + transition table
|
||||
return KISS_OFFSETOF(kiss_bnfa_partial_state_t, transitions)
|
||||
+ sizeof(struct kiss_bnfa_partial_transition_s) * (trans_num);
|
||||
}
|
||||
|
||||
// Get the size of an existing state
|
||||
static CP_INLINE u_int
|
||||
kiss_bnfa_state_size(const kiss_bnfa_state_t *bnfa, kiss_bnfa_offset_t offset)
|
||||
{
|
||||
switch (kiss_bnfa_state_type(bnfa, kiss_bnfa_offset_compress(offset))) {
|
||||
case KISS_BNFA_STATE_PARTIAL: {
|
||||
const kiss_bnfa_state_t *state = kiss_bnfa_offset_to_state(bnfa, offset);
|
||||
return kiss_bnfa_partial_state_size(state->partial.trans_num);
|
||||
}
|
||||
case KISS_BNFA_STATE_MATCH: return sizeof(kiss_bnfa_match_state_t);
|
||||
case KISS_BNFA_STATE_FULL: return sizeof(kiss_bnfa_full_state_t);
|
||||
|
||||
case KISS_BNFA_STATE_TYPE_NUM: break; // Can't happen
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Flags for kiss_thin_nfa_s.flags and kiss_thin_nfa_prescan_hdr_s.flags
|
||||
enum kiss_thin_nfa_flags_e {
|
||||
KISS_THIN_NFA_USE_CHAR_XLATION = 0x01, // Used for caseless and/or digitless
|
||||
KISS_THIN_NFA_HAS_ANCHOR = 0x02, // State at offset 0 is anchored root, not root
|
||||
};
|
||||
|
||||
|
||||
#endif // __kiss_thin_nfa_base_h__
|
242
components/utils/pm/kiss_thin_nfa_build.cc
Normal file
242
components/utils/pm/kiss_thin_nfa_build.cc
Normal file
@@ -0,0 +1,242 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Thin NFA Construction and Destruction
|
||||
// -------------------------------------
|
||||
// This file contains code that builds a Thin NFA.
|
||||
// The functions here may be called from compilation, serialization and de-serialization contexts.
|
||||
// The code allows allocating and releasing the Thin NFA structure, as well as serializing and deserializing it.
|
||||
|
||||
#include "kiss_thin_nfa_impl.h"
|
||||
#include "sasal.h"
|
||||
|
||||
SASAL_START // Multiple Pattern Matcher
|
||||
// Allocate and fill in a pattern ID structure
|
||||
kiss_ret_val
|
||||
kiss_thin_nfa_add_pattern_id(kiss_thin_nfa_pattern_list_t **pat_list_p, const kiss_thin_nfa_pattern_t *new_pat)
|
||||
{
|
||||
static const char rname[] = "kiss_thin_nfa_add_pattern_id";
|
||||
kiss_thin_nfa_pattern_list_t **pat_ptr;
|
||||
kiss_thin_nfa_pattern_list_t *pat;
|
||||
|
||||
// Go over the pattern list - look for our pattern, and find the end
|
||||
for (pat_ptr = pat_list_p; *pat_ptr != NULL; pat_ptr = &((*pat_ptr)->next)) {
|
||||
kiss_thin_nfa_pattern_t *list_pat = &(*pat_ptr)->pattern;
|
||||
|
||||
if (list_pat->id == new_pat->id) {
|
||||
// Already there - nothing to do
|
||||
thinnfa_debug((
|
||||
"%s: Pattern already exists - ID=%d flags=%x(%x) len=%d(%d)\n",
|
||||
rname,
|
||||
new_pat->id,
|
||||
new_pat->pattern_id_flags,
|
||||
list_pat->pattern_id_flags,
|
||||
new_pat->len,
|
||||
list_pat->len
|
||||
));
|
||||
return KISS_OK;
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate the pattern structure
|
||||
pat = (kiss_thin_nfa_pattern_list_t *)kiss_pmglob_memory_kmalloc(sizeof(kiss_thin_nfa_pattern_list_t), rname);
|
||||
if (!pat) {
|
||||
thinnfa_debug_err(("%s: Failed to allocate pattern id\n", rname));
|
||||
return KISS_ERROR;
|
||||
}
|
||||
|
||||
// Fill in the fields
|
||||
bcopy(new_pat, &pat->pattern, sizeof(pat->pattern));
|
||||
|
||||
thinnfa_debug((
|
||||
"%s: Added pattern ID=%d flags=%x len=%d\n",
|
||||
rname,
|
||||
new_pat->id,
|
||||
new_pat->pattern_id_flags,
|
||||
new_pat->len
|
||||
));
|
||||
|
||||
// Add to the linked list of patternss.
|
||||
*pat_ptr = pat;
|
||||
pat->next = NULL;
|
||||
|
||||
return KISS_OK;
|
||||
}
|
||||
|
||||
|
||||
// Free an entire list of pattern IDs.
|
||||
void
|
||||
kiss_thin_nfa_free_pattern_ids(kiss_thin_nfa_pattern_list_t *pat_list)
|
||||
{
|
||||
static const char rname[] = "kiss_thin_nfa_free_pattern_ids";
|
||||
kiss_thin_nfa_pattern_list_t *pat, *next;
|
||||
|
||||
for (pat = pat_list; pat != NULL; pat = next) {
|
||||
next = pat->next;
|
||||
thinnfa_debug((
|
||||
"%s: Releasing pattern ID=%d flags=%x len=%u\n",
|
||||
rname,
|
||||
pat->pattern.id,
|
||||
pat->pattern.pattern_id_flags,
|
||||
pat->pattern.len
|
||||
));
|
||||
kiss_pmglob_memory_kfree(pat, sizeof(kiss_thin_nfa_pattern_list_t), rname);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Allocate and initialize statistics
|
||||
static kiss_ret_val
|
||||
kiss_thin_nfa_stats_init(kiss_thin_nfa_stats stats)
|
||||
{
|
||||
|
||||
if (kiss_pm_stats_common_init(&(stats->common)) != KISS_OK) {
|
||||
return KISS_ERROR;
|
||||
}
|
||||
|
||||
bzero(&(stats->specific), sizeof(struct kiss_thin_nfa_specific_stats_s));
|
||||
|
||||
return KISS_OK;
|
||||
}
|
||||
|
||||
|
||||
// Free statistics
|
||||
static void
|
||||
kiss_thin_nfa_stats_free(kiss_thin_nfa_stats stats)
|
||||
{
|
||||
kiss_pm_stats_common_free(&(stats->common));
|
||||
}
|
||||
|
||||
|
||||
static kiss_ret_val
|
||||
kiss_thin_nfa_alloc_depth_map(KissThinNFA *nfa)
|
||||
{
|
||||
static const char rname[] = "kiss_thin_nfa_alloc_depth_map";
|
||||
kiss_bnfa_comp_offset_t min_comp_off, max_comp_off;
|
||||
|
||||
// The depth map is addressed by the compressed offset
|
||||
min_comp_off = kiss_bnfa_offset_compress(nfa->min_bnfa_offset);
|
||||
max_comp_off = kiss_bnfa_offset_compress(nfa->max_bnfa_offset);
|
||||
|
||||
nfa->depth_map.size = max_comp_off - min_comp_off;
|
||||
nfa->depth_map.mem_start = (u_char *)kiss_pmglob_memory_kmalloc_ex(nfa->depth_map.size, rname, FW_KMEM_SLEEP);
|
||||
if (!nfa->depth_map.mem_start) {
|
||||
thinnfa_debug_err((
|
||||
"%s: Error allocating the depth map, size %d (BNFA offsets %d:%d)\n",
|
||||
rname,
|
||||
nfa->depth_map.size,
|
||||
nfa->min_bnfa_offset,
|
||||
nfa->max_bnfa_offset
|
||||
));
|
||||
return KISS_ERROR;
|
||||
}
|
||||
// Find the place for offset 0. min_comp_offset is negative, so it's after mem_start.
|
||||
nfa->depth_map.offset0 = nfa->depth_map.mem_start - min_comp_off;
|
||||
|
||||
return KISS_OK;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
kiss_thin_nfa_destroy_depth_map(KissThinNFA *nfa)
|
||||
{
|
||||
static const char rname[] = "kiss_thin_nfa_destroy_depth_map";
|
||||
if (nfa->depth_map.mem_start != NULL) {
|
||||
kiss_pmglob_memory_kfree(nfa->depth_map.mem_start, nfa->depth_map.size, rname);
|
||||
nfa->depth_map.mem_start = NULL;
|
||||
nfa->depth_map.offset0 = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
KissThinNFA::~KissThinNFA()
|
||||
{
|
||||
static const char rname[] = "~KissThinNFA";
|
||||
// the code here was once in kiss_thin_nfa_destroy
|
||||
u_int bnfa_size = max_bnfa_offset - min_bnfa_offset;
|
||||
|
||||
thinnfa_debug_major(("%s: Destroying Thin NFA %p, bnfa size=%d\n", rname,
|
||||
this, bnfa_size));
|
||||
|
||||
if(bnfa_start != NULL) {
|
||||
kiss_pmglob_memory_kfree(bnfa_start, bnfa_size, rname);
|
||||
bnfa_start = NULL;
|
||||
bnfa = NULL;
|
||||
}
|
||||
|
||||
kiss_thin_nfa_stats_free(&stats);
|
||||
|
||||
if (pattern_arrays != NULL) {
|
||||
kiss_pmglob_memory_kfree(pattern_arrays, pattern_arrays_size, rname);
|
||||
pattern_arrays = NULL;
|
||||
}
|
||||
|
||||
kiss_thin_nfa_destroy_depth_map(this);
|
||||
}
|
||||
|
||||
|
||||
// Allocate a Thin NFA. The match info array and BNFA are left empty.
|
||||
std::unique_ptr<KissThinNFA>
|
||||
kiss_thin_nfa_create(u_int match_state_num, kiss_bnfa_offset_t min_offset, kiss_bnfa_offset_t max_offset)
|
||||
{
|
||||
static const char rname[] = "kiss_thin_nfa_create";
|
||||
|
||||
// Allocate the structure
|
||||
auto nfa = std::make_unique<KissThinNFA>();
|
||||
void *nfa_ptr = nfa.get();
|
||||
bzero(nfa_ptr, sizeof(*nfa));
|
||||
nfa->min_bnfa_offset = min_offset;
|
||||
nfa->max_bnfa_offset = max_offset;
|
||||
nfa->match_state_num = match_state_num;
|
||||
|
||||
// Allocate the bnfa array. Not initialized.
|
||||
u_int bnfa_size = max_offset - min_offset;
|
||||
nfa->bnfa_start = (kiss_bnfa_state_t *)kiss_pmglob_memory_kmalloc_ex(bnfa_size, rname, FW_KMEM_SLEEP);
|
||||
if (!nfa->bnfa_start) {
|
||||
thinnfa_debug_err((
|
||||
"%s: Error allocating the bnfa - size %d (offset %d:%d)\n",
|
||||
rname,
|
||||
bnfa_size,
|
||||
min_offset,
|
||||
max_offset
|
||||
));
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Calculate bnfa so bnfa_start would be at offset min_offset (min_offset<0, so bnfa>bnfa_start)
|
||||
nfa->bnfa = (kiss_bnfa_state_t *)((char *)nfa->bnfa_start - min_offset);
|
||||
|
||||
// Init the statistics
|
||||
if (kiss_thin_nfa_stats_init(&(nfa->stats)) != KISS_OK) {
|
||||
thinnfa_debug_err(("%s: Error initializing statistics structure\n", rname));
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Allocate the state depth map
|
||||
if (kiss_thin_nfa_alloc_depth_map(nfa.get()) != KISS_OK) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
thinnfa_debug_major((
|
||||
"%s: Allocated Thin NFA %p, bnfa size=%d (offsets %d:%d)\n",
|
||||
rname,
|
||||
nfa.get(),
|
||||
bnfa_size,
|
||||
min_offset,
|
||||
max_offset
|
||||
));
|
||||
|
||||
return nfa;
|
||||
}
|
||||
SASAL_END
|
2232
components/utils/pm/kiss_thin_nfa_compile.cc
Normal file
2232
components/utils/pm/kiss_thin_nfa_compile.cc
Normal file
File diff suppressed because it is too large
Load Diff
189
components/utils/pm/kiss_thin_nfa_impl.h
Normal file
189
components/utils/pm/kiss_thin_nfa_impl.h
Normal file
@@ -0,0 +1,189 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __h_kiss_thin_nfa_impl_h__
|
||||
#define __h_kiss_thin_nfa_impl_h__
|
||||
|
||||
// *********************** OVERVIEW ******************************
|
||||
// Thin NFA definitions, which are only used by Thin NFA files.
|
||||
// 1. A list of patterns which is associated with a finite state.
|
||||
// 2. APIs for building and destroying the Thin NFA structures.
|
||||
// ****************************************************************
|
||||
|
||||
#include <list>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "i_pm_scan.h"
|
||||
#include "kiss_patterns.h"
|
||||
#include "kiss_pm_stats.h"
|
||||
#include "kiss_thin_nfa_base.h"
|
||||
|
||||
KISS_ASSERT_COMPILE_TIME(KISS_PM_ALPHABET_SIZE == KISS_THIN_NFA_ALPHABET_SIZE);
|
||||
|
||||
// Information we keep about a pattern
|
||||
typedef struct {
|
||||
int id; // PM Internal pattern ID
|
||||
u_int pattern_id_flags; // KISS_PM_COMP_ prefix
|
||||
u_int len;
|
||||
} kiss_thin_nfa_pattern_t;
|
||||
|
||||
// Linked list of pattern information - held per finite state, to indicate what it's accepting.
|
||||
typedef struct kiss_thin_nfa_pattern_list_s {
|
||||
struct kiss_thin_nfa_pattern_list_s *next;
|
||||
kiss_thin_nfa_pattern_t pattern;
|
||||
} kiss_thin_nfa_pattern_list_t;
|
||||
|
||||
// Array of pattern information - offset to it held per finite state, to indicate what it's accepting.
|
||||
typedef struct kiss_thin_nfa_pattern_array_s {
|
||||
u_int n_patterns;
|
||||
// NOTE! Always keep this last!
|
||||
kiss_thin_nfa_pattern_t pattern[1]; // Dynamic array, not really 1
|
||||
// Do NOT add anything here!
|
||||
} kiss_thin_nfa_pattern_array_t;
|
||||
|
||||
static CP_INLINE u_int
|
||||
kiss_thin_nfa_pattern_array_size(const u_int n_patterns)
|
||||
{
|
||||
// assignement of NULL value so Windows compiler won't cry about unused variable.
|
||||
kiss_thin_nfa_pattern_array_t CP_MAYBE_UNUSED *dummy = NULL;
|
||||
|
||||
// We substract sizeof(->pattern), becuase it's already included in the sizeof
|
||||
// of the whole struct.
|
||||
return (sizeof(*dummy) + n_patterns * sizeof(dummy->pattern[0]) - sizeof(dummy->pattern));;
|
||||
}
|
||||
|
||||
// ThinNFA statistics
|
||||
|
||||
// Specific ThinNFA Statistics
|
||||
struct kiss_thin_nfa_specific_stats_s {
|
||||
u_int num_of_states; // number of states in this thin_nfa
|
||||
u_int num_of_final_states; // number of final states in this thin_nfa
|
||||
};
|
||||
|
||||
// Statistics for ThinNFA
|
||||
struct kiss_thin_nfa_stats_s {
|
||||
struct kiss_pm_stats_common_s common; // Run-time (per-CPU, dynamic) and build-time common statistics
|
||||
struct kiss_thin_nfa_specific_stats_s specific; // Build-time specific ThinNFA statistics
|
||||
};
|
||||
typedef struct kiss_thin_nfa_stats_s *kiss_thin_nfa_stats;
|
||||
|
||||
// Compressed BNFA offset -> state depth map
|
||||
struct kiss_thin_nfa_depth_map_s {
|
||||
u_char *mem_start; // Array of depth per BNFA compressed offset
|
||||
u_int size;
|
||||
u_char *offset0; // Positive/negative offsets are relative to this
|
||||
};
|
||||
|
||||
#define KISS_THIN_NFA_MAX_ENCODABLE_DEPTH 255 // Fit in u_char
|
||||
|
||||
// A Compiled Thin NFA, used at runtime
|
||||
class KissThinNFA {
|
||||
public:
|
||||
~KissThinNFA();
|
||||
|
||||
kiss_bnfa_state_t *bnfa_start; // The first (in memory) and initial state
|
||||
kiss_bnfa_state_t *bnfa; // The state at offset 0 (somewhere in the middle)
|
||||
kiss_bnfa_offset_t min_bnfa_offset; // The offset of the first (and initial) state.
|
||||
kiss_bnfa_offset_t max_bnfa_offset; // The offset after the last state.
|
||||
enum kiss_thin_nfa_flags_e flags;
|
||||
u_int match_state_num; // Number of match states in the machine
|
||||
u_int pattern_arrays_size; // Total size in bytes of concatanated pattern arrays
|
||||
kiss_thin_nfa_pattern_array_t *pattern_arrays; // A pointer to a buffer holding ALL pattern arrays, for ALL states
|
||||
struct kiss_thin_nfa_stats_s stats;
|
||||
u_int max_pat_len; // Length of the longest string
|
||||
u_char xlation_tab[KISS_PM_ALPHABET_SIZE]; // For caseless/digitless
|
||||
struct kiss_thin_nfa_depth_map_s depth_map; // State -> Depth mapping
|
||||
};
|
||||
|
||||
static CP_INLINE u_int
|
||||
kiss_thin_nfa_pat_array_ptr_to_offset(const KissThinNFA *nfa, const kiss_thin_nfa_pattern_array_t *pat_arr)
|
||||
{
|
||||
return (const char *)pat_arr - (const char *)(nfa->pattern_arrays);
|
||||
}
|
||||
|
||||
static CP_INLINE kiss_thin_nfa_pattern_array_t *
|
||||
kiss_thin_nfa_offset_to_pat_array_ptr(const KissThinNFA *nfa, const u_int offset)
|
||||
{
|
||||
return (kiss_thin_nfa_pattern_array_t *)((char *)(nfa->pattern_arrays) + offset);
|
||||
}
|
||||
|
||||
// Get a state's depth
|
||||
// For very deep states (offset >= 255), returns the maximum pattern length,
|
||||
// which would be greater/equal the real state depth.
|
||||
static CP_INLINE u_int
|
||||
kiss_bnfa_offset_to_depth(const KissThinNFA *nfa, kiss_bnfa_comp_offset_t comp_offset)
|
||||
{
|
||||
u_int depth = nfa->depth_map.offset0[comp_offset];
|
||||
return (depth==KISS_THIN_NFA_MAX_ENCODABLE_DEPTH) ? nfa->max_pat_len : depth;
|
||||
}
|
||||
|
||||
|
||||
// Create a new empty Thin NFA.
|
||||
// Allocates the BNFA and the match_data array, but doesn't fill them.
|
||||
std::unique_ptr<KissThinNFA>
|
||||
kiss_thin_nfa_create(
|
||||
u_int match_state_num,
|
||||
kiss_bnfa_offset_t min_offset,
|
||||
kiss_bnfa_offset_t max_offset
|
||||
);
|
||||
|
||||
|
||||
// Add a pattern (with given id, flags and length) to a list.
|
||||
// pat_list should point to the head of the list, *pat_list may be modified.
|
||||
kiss_ret_val
|
||||
kiss_thin_nfa_add_pattern_id(
|
||||
kiss_thin_nfa_pattern_list_t **pat_list,
|
||||
const kiss_thin_nfa_pattern_t *pat_info
|
||||
);
|
||||
|
||||
// Free all patterns on a list.
|
||||
void kiss_thin_nfa_free_pattern_ids(kiss_thin_nfa_pattern_list_t *pat_list);
|
||||
|
||||
// Compile a Thin NFA
|
||||
std::unique_ptr<KissThinNFA>
|
||||
kiss_thin_nfa_compile(
|
||||
const std::list<kiss_pmglob_string_s> &patterns,
|
||||
u_int compile_flags,
|
||||
KissPMError *error
|
||||
);
|
||||
|
||||
|
||||
// Validate Thin NFA
|
||||
BOOL kiss_thin_nfa_is_valid(const KissThinNFA *nfa_h);
|
||||
|
||||
void
|
||||
kiss_thin_nfa_exec(KissThinNFA *nfa_h, const Buffer &buffer, std::vector<std::pair<uint, uint>> &matches);
|
||||
|
||||
// Dump a PM
|
||||
kiss_ret_val kiss_thin_nfa_dump(const KissThinNFA *nfa_h, enum kiss_pm_dump_format_e format);
|
||||
|
||||
// Debugging macro wrappers.
|
||||
// All get a format string plus parameters in double parenthesis:
|
||||
// thinnfa_debug(("%s: hello, world\n", rname));
|
||||
// Meaning of each macro:
|
||||
// thinnfa_debug_critical - Critical error, printed by default.
|
||||
// thinnfa_debug_err - Error we should live with (e.g. usage error, memory allocation), not printed by default.
|
||||
// thinnfa_debug - Normal debug messages.
|
||||
// thinnfa_debug_major - Debug messages about several major events in Thin NFA constuction. Use sparingly.
|
||||
// thinnfa_debug_extended - Low level debug messages, which may be printed in large numbers.
|
||||
// thinnfa_dbg - An "if" statement checking the debug flag (equivalent to thinnfa_debug).
|
||||
#define thinnfa_debug_critical(_str) kiss_debug_err(K_ERROR, _str)
|
||||
#define thinnfa_debug_err(_str) kiss_debug_err(K_THINNFA|K_PM, _str)
|
||||
#define thinnfa_debug(_str) kiss_debug_info(K_THINNFA, _str)
|
||||
#define thinnfa_debug_major(_str) kiss_debug_info(K_THINNFA|K_PM, _str)
|
||||
#define thinnfa_debug_extended(_str) kiss_debug_info(K_THINNFA, _str)
|
||||
#define thinnfa_debug_perf(_str) kiss_debug_info_perf(K_THINNFA, _str)
|
||||
#define thinnfa_dbg() kiss_dbg(K_THINNFA)
|
||||
|
||||
#endif // __h_kiss_thin_nfa_impl_h__
|
13760
components/utils/pm/lss_example.txt
Normal file
13760
components/utils/pm/lss_example.txt
Normal file
File diff suppressed because it is too large
Load Diff
103
components/utils/pm/pm_adaptor.cc
Normal file
103
components/utils/pm/pm_adaptor.cc
Normal file
@@ -0,0 +1,103 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "pm_adaptor.h"
|
||||
#include "sasal.h"
|
||||
|
||||
SASAL_START // Multiple Pattern Matcher
|
||||
int kiss_debug_err_flag = 0;
|
||||
|
||||
void
|
||||
kiss_debug_start()
|
||||
{
|
||||
kiss_debug_err_flag = 1;
|
||||
}
|
||||
|
||||
void
|
||||
kiss_debug_stop()
|
||||
{
|
||||
kiss_debug_err_flag = 0;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
kiss_pmglob_char_xlation_build(enum kiss_pmglob_char_xlation_flags_e flags, u_char tab[KISS_PM_ALPHABET_SIZE])
|
||||
{
|
||||
u_int i;
|
||||
|
||||
// Find the canonic character for each character.
|
||||
for (i=0; i<KISS_PM_ALPHABET_SIZE; i++) {
|
||||
u_char ch = (u_char)i;
|
||||
if ((flags & KISS_PMGLOB_CHAR_XLATION_DIGITS) && isdigit(ch)) {
|
||||
tab[ch] = '0';
|
||||
} else if (flags & KISS_PMGLOB_CHAR_XLATION_CASE) {
|
||||
tab[ch] = tolower(ch);
|
||||
} else {
|
||||
tab[ch] = ch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Reverse a character translation table, so we can all charaters that map to a canonic character.
|
||||
//
|
||||
// Since the reverse map maps one character to many, it's implemented this way:
|
||||
// 1. Characters are arranged in groups - all characters in a group map to the same canonic character.
|
||||
// 2. A group is represented as a cyclic linked list, where each character points to the next in the same group.
|
||||
// 3. Instead of pointers, we use characters - for each character, rev[ch] is the next character in the group.
|
||||
void
|
||||
kiss_pmglob_char_xlation_build_reverse(const u_char tab[KISS_PM_ALPHABET_SIZE], u_char rev[KISS_PM_ALPHABET_SIZE])
|
||||
{
|
||||
u_int i;
|
||||
|
||||
// Put each character in its own group
|
||||
for (i=0; i<KISS_PM_ALPHABET_SIZE; i++) {
|
||||
u_char ch = (u_char)i;
|
||||
rev[ch] = ch;
|
||||
}
|
||||
|
||||
// Take each character which is not canonic, and add it to its canonic char's group.
|
||||
for (i=0; i<KISS_PM_ALPHABET_SIZE; i++) {
|
||||
u_char ch = (u_char)i;
|
||||
u_char canonic = tab[ch];
|
||||
|
||||
if (canonic == ch) {
|
||||
// Already in the correct group (its own group)
|
||||
continue;
|
||||
}
|
||||
// Add to the linked list
|
||||
rev[ch] = rev[canonic];
|
||||
rev[canonic] = ch;
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream&
|
||||
operator<<(std::ostream& os, const KissPMError &e)
|
||||
{
|
||||
return os << "Reason: " << e.error_string;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
kiss_pm_error_set_details(KissPMError *error,
|
||||
kiss_pm_error_type error_type,
|
||||
const char error_string[])
|
||||
{
|
||||
if ((error == NULL) || (error->error_string != NULL)) // No error struct or error already set. Not a problem
|
||||
return;
|
||||
|
||||
error->error_type = error_type;
|
||||
error->error_string = error_string;
|
||||
return;
|
||||
}
|
||||
SASAL_END
|
229
components/utils/pm/pm_adaptor.h
Normal file
229
components/utils/pm/pm_adaptor.h
Normal file
@@ -0,0 +1,229 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef _pm_adaptor_h_
|
||||
#define _pm_adaptor_h_
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
#include "general_adaptor.h"
|
||||
|
||||
#define KISS_PM_ALPHABET_SIZE 256
|
||||
#define KISS_APPS_CPAPI
|
||||
|
||||
// used to copy any struct, array, string, or variable
|
||||
#if 0
|
||||
#define DATA_BUFF_COPY(_buf, _buf_size, _data, _data_size) bcopy((_data), (_buf), (_data_size)); \
|
||||
(_buf) += (_data_size); \
|
||||
(*(_buf_size)) -= (_data_size)
|
||||
#endif
|
||||
|
||||
// Not using the original DATA_BUFF_COPY which uses bcopy. On 64bit libc2.5, it seems that bcopy reads
|
||||
// past the source buffer, as long as it is alligned. That's OK, but valgrind complains.
|
||||
#define DATA_BUFF_COPY(_buf, _buf_size, _data, _data_size) memcpy((_buf), (_data), (_data_size)); \
|
||||
(_buf) += (_data_size); \
|
||||
(*(_buf_size)) -= (_data_size)
|
||||
|
||||
|
||||
#define INT_BUFF_COPY(_buf, _buf_size, _val) do { \
|
||||
int temp_val = _val; \
|
||||
DATA_BUFF_COPY(_buf, _buf_size, &temp_val, sizeof(int)); \
|
||||
} while (0)
|
||||
|
||||
#define U_INT_BUFF_COPY(_buf, _buf_size, _val) do { \
|
||||
u_int temp_val = _val; \
|
||||
DATA_BUFF_COPY(_buf, _buf_size, &temp_val, sizeof(u_int)); \
|
||||
} while (0)
|
||||
|
||||
#define U_SHORT_BUFF_COPY(_buf, _buf_size, _val) do { \
|
||||
u_short temp_val = _val; \
|
||||
DATA_BUFF_COPY(_buf, _buf_size, &temp_val, sizeof(u_short)); \
|
||||
} while (0)
|
||||
|
||||
#define U_CHAR_BUFF_COPY(_buf, _buf_size, _val) do { \
|
||||
u_char temp_val = _val; \
|
||||
DATA_BUFF_COPY(_buf, _buf_size, &temp_val, sizeof(u_char)); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#define DATA_BUFF_READ(_buf, _buf_size, _vbuf, _vbuf_iter, _to, _data_size) \
|
||||
do { \
|
||||
if ((*(_buf_size)) >= (_data_size)) { \
|
||||
bcopy(_buf, _to, _data_size); \
|
||||
_buf += _data_size; \
|
||||
(*(_buf_size)) -= (_data_size); \
|
||||
} \
|
||||
else { \
|
||||
(*(_buf_size)) = 0; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define INT_BUFF_READ(_var, _buf, _buf_size, _vbuf, _vbuf_iter) \
|
||||
DATA_BUFF_READ(_buf, _buf_size, _vbuf, _vbuf_iter, &_var, sizeof(int))
|
||||
|
||||
#define U_INT_BUFF_READ(_var, _buf, _buf_size, _vbuf, _vbuf_iter) \
|
||||
DATA_BUFF_READ(_buf, _buf_size, _vbuf, _vbuf_iter, &_var, sizeof(u_int))
|
||||
|
||||
#define U_SHORT_BUFF_READ(_var, _buf, _buf_size, _vbuf, _vbuf_iter) \
|
||||
DATA_BUFF_READ(_buf, _buf_size, _vbuf, _vbuf_iter, &_var, sizeof(u_short))
|
||||
|
||||
#define U_CHAR_BUFF_READ(_var, _buf, _buf_size, _vbuf, _vbuf_iter) \
|
||||
DATA_BUFF_READ(_buf, _buf_size, _vbuf, _vbuf_iter, &_var, sizeof(u_char))
|
||||
|
||||
|
||||
// Serialization magics, used to verify buffer structure
|
||||
#define KISS_PM_SERIALIZED 0x53525A50 // SRZP
|
||||
#define KISS_DFA_SERIALIZED 0x53525A44 // SRZD
|
||||
#define KISS_WM_SERIALIZED 0x53525A48 // SRZH
|
||||
#define KISS_THIN_NFA_SERIALIZED 0x53525A4E // SRZN
|
||||
#define KISS_EX_REM_SERIALIZED 0x53525A58 // SRZX
|
||||
#define KISS_STATS_SERIALIZED 0x53525A53 // SRZS
|
||||
#define KISS_STATE_SERIALIZED 0x53525A54 // SRZT
|
||||
#define KISS_PM_SERIALIZE_IGNORE_INT 0x53525A49 // SRZI
|
||||
#define KISS_KW_SERIALIZED 0x53525A4B // SRZK
|
||||
#define KISS_KW_MGR_SERIALIZED 0x53525A47 // SRZG
|
||||
|
||||
|
||||
typedef enum kiss_pm_error_type_e {
|
||||
KISS_PM_ERROR_SYNTAX = 0, // < yntax error is an error in the way the pattern is written.
|
||||
KISS_PM_ERROR_INTERNAL, // < Internal error is an error caused by lack of resources or by design.
|
||||
KISS_PM_ERROR_COMPLEX_PATTERN, // < Pattern is too complex to compile - too many states or too much memory
|
||||
KISS_PM_ERROR_NO_ERROR
|
||||
} kiss_pm_error_type;
|
||||
|
||||
class KissPMError {
|
||||
public:
|
||||
int pattern_id = -1; //< The user's pattern id
|
||||
kiss_pm_error_type error_type = KISS_PM_ERROR_INTERNAL; //< The error type syntax or internal
|
||||
const char *error_string = nullptr; //< string describing the problem
|
||||
u_int index = 0; //< The place that caused the probelm. Best effort.
|
||||
const u_char *pattern_buf = nullptr; //< The user's pattern buffer
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const KissPMError &k);
|
||||
|
||||
void kiss_pm_error_set_details(KissPMError *error, kiss_pm_error_type error_type, const char error_string[]);
|
||||
|
||||
|
||||
// PATTERNS FLAGS
|
||||
// When adding a new pattern flag,
|
||||
// add a metadata string below and register it in kiss_pm_pattern_flags_data in kiss_pm.c
|
||||
// range from 0x00010000 to 0x80000000
|
||||
|
||||
// EXTERNAL PATTERN FLAGS
|
||||
// These flags can be added per pattern when adding it to pm_patterns using kiss_pm_pattern_add_[simple_]pattern_...
|
||||
#define KISS_PM_COMP_WM_CONT_WORD 0x80000000 // a WM continuous word -
|
||||
// when used on a word we search for it without delimiters.
|
||||
// Large impact on performance so think twice before using
|
||||
#define KISS_PM_COMP_ALLOW_SHORT_LSS 0x40000000 // Accept short lss (shorter than kiss_pm_min_lss_sise
|
||||
#define KISS_PM_COMP_LITERAL_LSS 0x20000000 // The LSS should not be normalized -
|
||||
// i.e. all chars read as literals
|
||||
#define KISS_PM_COMP_CASELESS 0x10000000 // Indicates a caseless pattern
|
||||
#define KISS_PM_COMP_UTF8 0x08000000 // the pattern is UTF8 encoded.
|
||||
#define KISS_PM_COMP_BOUNDED_PATT 0x04000000 // find the pattern only between non word character
|
||||
// (including buffer start end).
|
||||
// Do not use this flag with `^` or `$`.
|
||||
#define KISS_PM_COMP_DONT_USE_PCRE 0x02000000 // don't use pcre for second tier.
|
||||
#define KISS_PM_COMP_VERIFY_PCRE_SYNTAX 0x01000000 // Verify that pattern that compiles with PCRE fits PM syntax
|
||||
|
||||
// INTERNAL PATTERN FLAGS
|
||||
#define KISS_PM_COMP_FIRST_TIER_OF_PATT 0x00800000 // pattern is in it's first tier execution.
|
||||
#define KISS_PM_COMP_BOUNDED_CIRCUMFLEX_ADDED 0x00400000 // This flag indicates that we have created a pattern
|
||||
// for bounded word infra which is different
|
||||
// from the orig patterns. In such cases we need to take
|
||||
// it into considiration when we look for the match start.
|
||||
#define KISS_PM_COMP_MORE_THAN_ONE_LSS 0x00200000 // The pattern is made up of one or more simple strings
|
||||
#define KISS_PM_COMP_DONT_STRIP 0x00100000 // Parse the pattern without stirping ^/$ from the
|
||||
// RE beggining/end respectively.
|
||||
#define KISS_PM_LSS_AT_BUF_START 0x00080000 // LSS should be at the begining of the buffer.
|
||||
#define KISS_PM_LSS_AT_BUF_END 0x00040000 // LSS should be at the end of the buffer.
|
||||
#define KISS_PM_RE_AT_BUF_START 0x00020000 // RE should be at the begining of the buffer.
|
||||
#define KISS_PM_COMP_HAVE_SECOND_TIER 0x00010000 // the pattern needs second tier.
|
||||
#define KISS_PM_COMP_NO_HISTORY 0x00008000 // Execute this pattern only with the buffer
|
||||
// (not with the history vbuf)
|
||||
#define KISS_PM_COMP_REDUCE_SIZE 0x00004000 // Favor small memory consumption over good performance
|
||||
// END OF PATTERNS FLAGS
|
||||
|
||||
// Internal flags set in the match data in kiss_dfa_insert_match_data:
|
||||
#define KISS_PMGLOB_MATCH_DATA_FORCE_ADD 0x00000001 // Force add pomlob match data,
|
||||
// even if the pattern has already been matched
|
||||
#define KISS_PMGLOB_MATCH_OFFSET_IN_PRESENT_BUF 0x00000002 // The match offset refers to the present buffer
|
||||
#define KISS_PMGLOB_REDUCE_BUFFER_LENGTH 0x00000004 // Reduce the length of tier2 buffer using
|
||||
// LSS ofsets found in tier1
|
||||
|
||||
|
||||
//How many different first tiers can a PM have? (can be smaller than the number of first tier types)
|
||||
#define KISS_TIER1_MAX_NUM 2
|
||||
|
||||
// 8 First tier type
|
||||
typedef enum kiss_tier1_type_t {
|
||||
KISS_TIER1_WM, // Word Matcher
|
||||
KISS_TIER1_SM, // DFA String matcher
|
||||
KISS_TIER1_THIN_NFA = KISS_TIER1_SM, // Thin NFA - instead of DFA
|
||||
KISS_TIER1_NUM_TYPES,
|
||||
KISS_TIER1_INVALID = KISS_TIER1_NUM_TYPES
|
||||
} kiss_tier1_type;
|
||||
|
||||
// which statistics the user want to see
|
||||
enum kiss_pm_stats_type {
|
||||
KISS_PM_STATIC_STATS = 0, // number of pattern, number of states, ....
|
||||
KISS_PM_DYNAMIC_STATS, // number of executions, number of matches, avg buffer length,...
|
||||
KISS_PM_BOTH_STATS // both statistics
|
||||
};
|
||||
|
||||
#define K_ERROR 0x00000010
|
||||
#define K_PM 0x00000400
|
||||
#define K_THINNFA 0x00400000
|
||||
|
||||
|
||||
#define KISS_PM_COMP_DIGITLESS 0x00001000 // Indicates a digitless first tier match
|
||||
|
||||
extern int kiss_debug_err_flag;
|
||||
#define kiss_debug_err(topics, _string) if (kiss_debug_err_flag) printf _string
|
||||
#define kiss_debug_wrn(topics, _string)if (kiss_debug_err_flag) printf _string
|
||||
#define kiss_debug_notice(topics, _string) if (kiss_debug_err_flag) printf _string
|
||||
#define kiss_debug_info(topics, _string) if (kiss_debug_err_flag) printf _string
|
||||
#define kiss_debug(topics) if (kiss_debug_err_flag) printf
|
||||
#define kiss_debug_info_perf(topics, _string)
|
||||
|
||||
#define kiss_dbg(topics) if (kiss_debug_err_flag)
|
||||
|
||||
#define kiss_vbuf void *
|
||||
#define kiss_vbuf_iter void *
|
||||
|
||||
|
||||
// Which character translations are needed?
|
||||
enum kiss_pmglob_char_xlation_flags_e {
|
||||
KISS_PMGLOB_CHAR_XLATION_NONE = 0x00,
|
||||
KISS_PMGLOB_CHAR_XLATION_CASE = 0x01,
|
||||
KISS_PMGLOB_CHAR_XLATION_DIGITS = 0x02,
|
||||
};
|
||||
|
||||
enum kiss_pm_dump_format_e {
|
||||
KISS_PM_DUMP_XML, // XML, for opening with JFlap
|
||||
KISS_PM_DUMP_CSV, // CSV, for opening with Excel
|
||||
KISS_PM_DUMP_WIKI // WIKI, for copy&paste into Wiki (Confluence)
|
||||
};
|
||||
|
||||
|
||||
void kiss_pmglob_char_xlation_build(enum kiss_pmglob_char_xlation_flags_e flags, u_char tab[KISS_PM_ALPHABET_SIZE]);
|
||||
void kiss_pmglob_char_xlation_build_reverse(
|
||||
const u_char tab[KISS_PM_ALPHABET_SIZE],
|
||||
u_char rev[KISS_PM_ALPHABET_SIZE]
|
||||
);
|
||||
|
||||
void kiss_debug_start();
|
||||
void kiss_debug_stop();
|
||||
|
||||
#endif // _pm_adaptor_h_
|
165
components/utils/pm/pm_hook.cc
Normal file
165
components/utils/pm/pm_hook.cc
Normal file
@@ -0,0 +1,165 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "pm_hook.h"
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
#include "kiss_patterns.h"
|
||||
#include "kiss_thin_nfa_impl.h"
|
||||
#include "sasal.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
SASAL_START // Multiple Pattern Matcher
|
||||
USE_DEBUG_FLAG(D_PM_COMP);
|
||||
USE_DEBUG_FLAG(D_PM_EXEC);
|
||||
USE_DEBUG_FLAG(D_PM);
|
||||
|
||||
static int
|
||||
pm_pattern_to_kiss_pat_flags(const PMPattern &pat)
|
||||
{
|
||||
int kiss_pat_flags = 0;
|
||||
if (pat.isStartMatch()) {
|
||||
kiss_pat_flags |= KISS_PM_LSS_AT_BUF_START;
|
||||
}
|
||||
if (pat.isEndMatch()) {
|
||||
kiss_pat_flags |= KISS_PM_LSS_AT_BUF_END;
|
||||
}
|
||||
return kiss_pat_flags;
|
||||
}
|
||||
|
||||
|
||||
static list<kiss_pmglob_string_s>
|
||||
convert_patt_map_to_kiss_list(const map<int, PMPattern> &patt_map)
|
||||
{
|
||||
list<kiss_pmglob_string_s> kiss_pats;
|
||||
for (auto &pair : patt_map) {
|
||||
auto &id = pair.first;
|
||||
auto &pattern = pair.second;
|
||||
kiss_pats.emplace_back(pattern.data(), pattern.size(), id, pm_pattern_to_kiss_pat_flags(pattern));
|
||||
}
|
||||
return kiss_pats;
|
||||
}
|
||||
|
||||
// Explicit empty ctor and dtor needed due to incomplete definition of class used in unique_ptr. Bummer...
|
||||
PMHook::PMHook()
|
||||
{
|
||||
}
|
||||
|
||||
PMHook::~PMHook()
|
||||
{
|
||||
}
|
||||
|
||||
Maybe<PMPattern>
|
||||
PMHook::lineToPattern(const string &line)
|
||||
{
|
||||
if (line.empty()) return genError("Empty string");
|
||||
|
||||
bool start = (*line.begin()) == '^';
|
||||
bool end = (*line.rbegin()) == '$';
|
||||
|
||||
int start_offset = start ? 1 : 0;
|
||||
int line_size = line.size() - (start ? 1 : 0) - (end ? 1 : 0);
|
||||
auto clean_line = line.substr(start_offset, line_size);
|
||||
|
||||
if (clean_line.empty()) return genError("Pattern must contain actual content");
|
||||
|
||||
return PMPattern(clean_line, start, end);
|
||||
}
|
||||
|
||||
Maybe<void>
|
||||
PMHook::prepare(const set<PMPattern> &inputs)
|
||||
{
|
||||
map<int, PMPattern> tmp;
|
||||
int index = 0;
|
||||
for (auto &pat : inputs) {
|
||||
tmp.emplace(++index, pat);
|
||||
}
|
||||
|
||||
if (Debug::isFlagAtleastLevel(D_PM_COMP, Debug::DebugLevel::DEBUG)) kiss_debug_start();
|
||||
KissPMError pm_err;
|
||||
handle = kiss_thin_nfa_compile(convert_patt_map_to_kiss_list(tmp), KISS_PM_COMP_CASELESS, &pm_err);
|
||||
if (Debug::isFlagAtleastLevel(D_PM_COMP, Debug::DebugLevel::DEBUG)) kiss_debug_stop();
|
||||
|
||||
if (handle == nullptr) {
|
||||
dbgError(D_PM_COMP) << "PMHook::prepare() failed" << pm_err;
|
||||
return genError(pm_err.error_string);
|
||||
}
|
||||
|
||||
patterns = tmp;
|
||||
return Maybe<void>();
|
||||
}
|
||||
|
||||
|
||||
set<PMPattern>
|
||||
PMHook::scanBuf(const Buffer &buf) const
|
||||
{
|
||||
dbgAssert(handle != nullptr) << "Unusable Pattern Matcher";
|
||||
|
||||
vector<pair<uint, uint>> pm_matches;
|
||||
kiss_thin_nfa_exec(handle.get(), buf, pm_matches);
|
||||
dbgTrace(D_PM) << pm_matches.size() << " raw matches found";
|
||||
|
||||
set<PMPattern> res;
|
||||
for (auto &match : pm_matches) {
|
||||
res.insert(patterns.at(match.first));
|
||||
}
|
||||
dbgTrace(D_PM) << res.size() << " matches found after removing the duplicates";
|
||||
return res;
|
||||
}
|
||||
|
||||
set<pair<uint, PMPattern>>
|
||||
PMHook::scanBufWithOffset(const Buffer &buf) const
|
||||
{
|
||||
dbgAssert(handle != nullptr) << "Unusable Pattern Matcher";
|
||||
|
||||
vector<pair<uint, uint>> pm_matches;
|
||||
kiss_thin_nfa_exec(handle.get(), buf, pm_matches);
|
||||
dbgTrace(D_PM) << pm_matches.size() << " raw matches found";
|
||||
|
||||
set<pair<uint, PMPattern>> res;
|
||||
for (auto &match : pm_matches) {
|
||||
res.emplace(match.second, patterns.at(match.first));
|
||||
}
|
||||
dbgTrace(D_PM) << res.size() << " matches found";
|
||||
return res;
|
||||
}
|
||||
|
||||
void
|
||||
PMHook::scanBufWithOffsetLambda(const Buffer &buf, function<void(uint, const PMPattern&)> cb) const
|
||||
{
|
||||
|
||||
for (auto &res : scanBufWithOffset(buf)) {
|
||||
cb(res.first, res.second);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
PMPattern::operator<(const PMPattern &other) const
|
||||
{
|
||||
if (pattern != other.pattern) return pattern < other.pattern;
|
||||
if (index != other.index) return index < other.index;
|
||||
return tie(match_start, match_end) < tie(other.match_start, other.match_end);
|
||||
}
|
||||
bool
|
||||
PMPattern::operator==(const PMPattern &other) const
|
||||
{
|
||||
return
|
||||
index == other.index &&
|
||||
pattern == other.pattern &&
|
||||
match_start == other.match_start &&
|
||||
match_end == other.match_end;
|
||||
}
|
||||
SASAL_END
|
5
components/utils/pm/pm_ut/CMakeLists.txt
Normal file
5
components/utils/pm/pm_ut/CMakeLists.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
add_unit_test(
|
||||
pm_ut
|
||||
"pm_scan_ut.cc;pm_pat_ut.cc"
|
||||
"pm;buffers"
|
||||
)
|
78
components/utils/pm/pm_ut/pm_pat_ut.cc
Normal file
78
components/utils/pm/pm_ut/pm_pat_ut.cc
Normal file
@@ -0,0 +1,78 @@
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
|
||||
#include "cptest.h"
|
||||
#include "pm_hook.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
static void
|
||||
pm_pat_simple_pat(
|
||||
const std::string &hex_line,
|
||||
const std::string &line,
|
||||
bool expected_match_at_start,
|
||||
bool expected_match_at_end)
|
||||
{
|
||||
PMPattern pat;
|
||||
auto res = PMHook::lineToPattern(hex_line.c_str());
|
||||
EXPECT_TRUE(res.ok()) << res.getErr();
|
||||
pat = *res;
|
||||
EXPECT_EQ(pat.isStartMatch(), expected_match_at_start);
|
||||
EXPECT_EQ(pat.isEndMatch(), expected_match_at_end);
|
||||
ASSERT_EQ(pat.size(), line.size());
|
||||
EXPECT_EQ(memcmp((const char *)pat.data(), line.c_str(), line.size()), 0);
|
||||
}
|
||||
|
||||
static void
|
||||
pm_pat_bad_pat(const std::string &bad_hex_line)
|
||||
{
|
||||
EXPECT_FALSE(PMHook::lineToPattern(bad_hex_line).ok());
|
||||
}
|
||||
|
||||
TEST(pm_pat, basic)
|
||||
{
|
||||
pm_pat_simple_pat("ABCDxyz", "ABCDxyz", false, false);
|
||||
}
|
||||
|
||||
TEST(pm_pat, pat_with_begin)
|
||||
{
|
||||
pm_pat_simple_pat("^ABCD", "ABCD", true, false);
|
||||
}
|
||||
|
||||
TEST(pm_pat, pat_with_end)
|
||||
{
|
||||
pm_pat_simple_pat("ABCD$", "ABCD", false, true);
|
||||
}
|
||||
|
||||
TEST(pm_pat, pat_with_begin_end)
|
||||
{
|
||||
pm_pat_simple_pat("^ABCD$", "ABCD", true, true);
|
||||
}
|
||||
|
||||
TEST(pm_pat, pat_with_all_chars)
|
||||
{
|
||||
pm_pat_simple_pat("ABCDEFGHIJKLMNOPJKLMNO", "ABCDEFGHIJKLMNOPJKLMNO", false, false);
|
||||
}
|
||||
|
||||
TEST(pm_pat, empty_pat_with_begin_end)
|
||||
{
|
||||
pm_pat_bad_pat("^$");
|
||||
}
|
||||
|
||||
TEST(pm_pat, empty_pat)
|
||||
{
|
||||
pm_pat_bad_pat("");
|
||||
}
|
||||
|
||||
TEST(pm_pat, chars_above_127)
|
||||
{
|
||||
static const vector<u_char> buf = { 0x80, 0x96, 0xaa, 0xff };
|
||||
PMPattern pat;
|
||||
auto rc = PMHook::lineToPattern(string(buf.begin(), buf.end()));
|
||||
EXPECT_TRUE(rc.ok()) << rc.getErr();
|
||||
pat = *rc;
|
||||
EXPECT_FALSE(pat.isStartMatch());
|
||||
EXPECT_FALSE(pat.isEndMatch());
|
||||
ASSERT_EQ(pat.size(), buf.size());
|
||||
EXPECT_EQ(memcmp(pat.data(), buf.data(), buf.size()), 0);
|
||||
}
|
469
components/utils/pm/pm_ut/pm_scan_ut.cc
Normal file
469
components/utils/pm/pm_ut/pm_scan_ut.cc
Normal file
@@ -0,0 +1,469 @@
|
||||
#include <string>
|
||||
|
||||
#include "cptest.h"
|
||||
#include "pm_hook.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
ostream & operator<<(ostream &os, const PMPattern &) { return os; }
|
||||
|
||||
static void
|
||||
push_pat(set<PMPattern> &pats, const string &hex_pat)
|
||||
{
|
||||
auto pat = PMHook::lineToPattern(hex_pat.c_str());
|
||||
EXPECT_TRUE(pat.ok()) << pat.getErr();
|
||||
pats.insert(*pat);
|
||||
}
|
||||
|
||||
static set<PMPattern>
|
||||
getPatternSet(const string &pattern)
|
||||
{
|
||||
set<PMPattern> res;
|
||||
push_pat(res, pattern);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename ... Patterns>
|
||||
static set<PMPattern>
|
||||
getPatternSet(const string &pattern, Patterns ...more_patterns)
|
||||
{
|
||||
auto res = getPatternSet(more_patterns...);
|
||||
push_pat(res, pattern);
|
||||
return res;
|
||||
}
|
||||
|
||||
static set<PMPattern>
|
||||
prepare_scan_and_compare(const set<PMPattern> &pats, const string &buf)
|
||||
{
|
||||
PMHook pm;
|
||||
EXPECT_TRUE(pm.prepare(pats).ok());
|
||||
|
||||
return pm.scanBuf(Buffer(buf));
|
||||
}
|
||||
|
||||
// This is a helper function for the trivial tests. buf is NULL terminated, and the NULL is NOT passed to the PM.
|
||||
static set<PMPattern>
|
||||
common_scan_test_single_pat(const string &hex_pat, const string &buf)
|
||||
{
|
||||
set<PMPattern> pats;
|
||||
push_pat(pats, hex_pat);
|
||||
|
||||
return prepare_scan_and_compare(pats, buf);
|
||||
}
|
||||
|
||||
TEST(pm_scan, zero_buf_len)
|
||||
{
|
||||
EXPECT_EQ(common_scan_test_single_pat("ABCD", ""), set<PMPattern>());
|
||||
}
|
||||
|
||||
TEST(pm_scan, basic)
|
||||
{
|
||||
EXPECT_EQ(common_scan_test_single_pat("ABCD", "ABCD ABCD AB AB ABC ABCD"), getPatternSet("ABCD"));
|
||||
}
|
||||
|
||||
TEST(pm_scan, with_start_flag)
|
||||
{
|
||||
EXPECT_EQ(common_scan_test_single_pat("^ABCD", "ABCD ABCD AB AB ABC AAAAAAA"), getPatternSet("^ABCD"));
|
||||
}
|
||||
|
||||
TEST(pm_scan, with_start_flag_short_buf)
|
||||
{
|
||||
EXPECT_EQ(common_scan_test_single_pat("^A", "ABC"), getPatternSet("^A"));
|
||||
}
|
||||
|
||||
TEST(pm_scan, with_end_flag)
|
||||
{
|
||||
EXPECT_EQ(common_scan_test_single_pat("ABCD$", "KKKK ABCD ABCD ABCD"), getPatternSet("ABCD$"));
|
||||
}
|
||||
|
||||
TEST(pm_scan, nomatch)
|
||||
{
|
||||
EXPECT_EQ(common_scan_test_single_pat("AAA", "AA"), set<PMPattern>());
|
||||
}
|
||||
|
||||
TEST(pm_scan, exact_match)
|
||||
{
|
||||
EXPECT_EQ(common_scan_test_single_pat("AAA", "AAA"), getPatternSet("AAA"));
|
||||
}
|
||||
|
||||
TEST(pm_scan, overlap_in_buf)
|
||||
{
|
||||
EXPECT_EQ(common_scan_test_single_pat("AAA", "AAAA"), getPatternSet("AAA"));
|
||||
}
|
||||
|
||||
TEST(pm_scan, with_begin_and_end_flag_no_match)
|
||||
{
|
||||
EXPECT_EQ(common_scan_test_single_pat("^AAA$", "AAAA"), set<PMPattern>());
|
||||
}
|
||||
|
||||
TEST(pm_scan, with_begin_and_end_flag_match)
|
||||
{
|
||||
EXPECT_EQ(common_scan_test_single_pat("^ABC$", "ABC"), getPatternSet("^ABC$"));
|
||||
}
|
||||
|
||||
TEST(pm_scan, many_matches)
|
||||
{
|
||||
EXPECT_EQ(
|
||||
common_scan_test_single_pat(
|
||||
"AAA",
|
||||
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
|
||||
),
|
||||
getPatternSet("AAA")
|
||||
);
|
||||
}
|
||||
|
||||
TEST(pm_scan, long_pattern)
|
||||
{
|
||||
string long_str =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
|
||||
EXPECT_EQ(common_scan_test_single_pat(long_str, ".-= " + long_str + " =-."), getPatternSet(long_str));
|
||||
}
|
||||
|
||||
|
||||
TEST(pm_scan, very_long_pattern)
|
||||
{
|
||||
string abc = "abcdefghijklmnopqrstuvwxyz";
|
||||
string very_long_str;
|
||||
// We choose 3000 repeatitions, becuase this gives a total of 78K chars. If there's
|
||||
// some unsigned short used internally ,we hope to overflow it.
|
||||
for (int i = 0; i<3000; i++) {
|
||||
very_long_str += abc;
|
||||
}
|
||||
string pattern = very_long_str;
|
||||
|
||||
// What if the PM internally truncated our very long pattern?
|
||||
// Because it is cyclic, we might not catch it in the line above.
|
||||
// So we ask it to find the pattern in a buffer containing almost the whole pattern, but not all of it.
|
||||
string truncated_begin(pattern, 1, pattern.size() - 1);
|
||||
string truncated_end(pattern, 0, pattern.size() - 1);
|
||||
|
||||
// We put a sepearator between them (which doesn't any char from the pattern), so there's no additional
|
||||
// matches on buf_to_scan
|
||||
const string seperator_str = "1234";
|
||||
auto buf_to_scan = seperator_str+very_long_str+seperator_str+truncated_end+seperator_str+truncated_begin;
|
||||
|
||||
EXPECT_EQ(common_scan_test_single_pat(pattern, buf_to_scan), getPatternSet(pattern));
|
||||
}
|
||||
|
||||
TEST(pm_scan, multiple_pats)
|
||||
{
|
||||
string buf = "KKKK ABCD AB AB ABC ABCD DCBA";
|
||||
set<PMPattern> pats;
|
||||
push_pat(pats, "ABCD");
|
||||
push_pat(pats, "DCBA");
|
||||
EXPECT_EQ(prepare_scan_and_compare(pats, buf), getPatternSet("ABCD", "DCBA"));
|
||||
}
|
||||
|
||||
TEST(pm_scan, multiple_pats_with_overlap)
|
||||
{
|
||||
string buf = "KKKK ABCDCBA";
|
||||
set<PMPattern> pats;
|
||||
push_pat(pats, "ABCD");
|
||||
push_pat(pats, "DCBA");
|
||||
EXPECT_EQ(prepare_scan_and_compare(pats, buf), getPatternSet("ABCD", "DCBA"));
|
||||
}
|
||||
|
||||
|
||||
TEST(pm_scan, multiple_long_pats_with_overlap)
|
||||
{
|
||||
string buf = "KKKK ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUVWXYZ!";
|
||||
set<PMPattern> pats;
|
||||
push_pat(pats, "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
|
||||
push_pat(pats, "ABCDEFGHIJKLMNOPQRSTUVWXYZ!");
|
||||
EXPECT_EQ(
|
||||
prepare_scan_and_compare(pats, buf),
|
||||
getPatternSet("ABCDEFGHIJKLMNOPQRSTUVWXYZ", "ABCDEFGHIJKLMNOPQRSTUVWXYZ!")
|
||||
);
|
||||
}
|
||||
|
||||
TEST(pm_scan, many_pats)
|
||||
{
|
||||
string buf = "KKKK ABC1 asdasdf";
|
||||
set<PMPattern> pats;
|
||||
push_pat(pats, "ABC1");
|
||||
push_pat(pats, "ABC2");
|
||||
push_pat(pats, "ABC3");
|
||||
push_pat(pats, "ABC4");
|
||||
push_pat(pats, "ABC5");
|
||||
push_pat(pats, "ABC6");
|
||||
push_pat(pats, "ABC7");
|
||||
push_pat(pats, "ABC8");
|
||||
push_pat(pats, "asdasdf");
|
||||
push_pat(pats, "zzxxdda");
|
||||
push_pat(pats, "d1tt6335!!");
|
||||
push_pat(pats, "zxcqwwrqwer!!");
|
||||
push_pat(pats, "!sdazsd!");
|
||||
EXPECT_EQ(prepare_scan_and_compare(pats, buf), getPatternSet("ABC1", "asdasdf"));
|
||||
}
|
||||
|
||||
TEST(pm_scan, a_lot_of_pats)
|
||||
{
|
||||
string buf = "KKKK some_100_pat some_1000_pat";
|
||||
set<PMPattern> pats;
|
||||
for (uint i = 0; i<3000; i++) {
|
||||
char temp_buf[100];
|
||||
snprintf(temp_buf, sizeof(temp_buf), "some_%u_pat", i);
|
||||
push_pat(pats, temp_buf);
|
||||
}
|
||||
EXPECT_EQ(prepare_scan_and_compare(pats, buf), getPatternSet("some_100_pat", "some_1000_pat"));
|
||||
}
|
||||
|
||||
TEST(pm_scan, long_pat_prefix_followed_by_many_branches)
|
||||
{
|
||||
string buf = "some_long_prefix_a_pat some_long_prefix_z_pat some_long_prefix_a_pat";
|
||||
set<PMPattern> pats;
|
||||
for (u_char c = 'a'; c<='z'; c++) {
|
||||
char temp_buf[100];
|
||||
snprintf(temp_buf, sizeof(temp_buf), "some_long_prefix_%c_pat", c);
|
||||
push_pat(pats, temp_buf);
|
||||
}
|
||||
EXPECT_EQ(prepare_scan_and_compare(pats, buf), getPatternSet("some_long_prefix_a_pat", "some_long_prefix_z_pat"));
|
||||
}
|
||||
|
||||
TEST(pm_scan, identical_pats)
|
||||
{
|
||||
string buf = "KKKK 123 ---";
|
||||
set<PMPattern> pats;
|
||||
push_pat(pats, "123");
|
||||
push_pat(pats, "123");
|
||||
EXPECT_EQ(prepare_scan_and_compare(pats, buf), getPatternSet("123"));
|
||||
}
|
||||
|
||||
TEST(pm_scan, multiple_scans_using_same_pm)
|
||||
{
|
||||
Buffer buf1("ABC 123 ABC");
|
||||
Buffer buf2("^^^%%%!! 123 ABC");
|
||||
set<PMPattern> pats;
|
||||
push_pat(pats, "ABC");
|
||||
push_pat(pats, "%%%");
|
||||
PMHook pm;
|
||||
ASSERT_TRUE(pm.prepare(pats).ok());
|
||||
|
||||
auto expected_matches1 = getPatternSet("ABC");
|
||||
auto expected_matches2 = getPatternSet("ABC", "%%%");
|
||||
|
||||
EXPECT_EQ(pm.scanBuf(buf1), expected_matches1);
|
||||
EXPECT_EQ(pm.scanBuf(buf2), expected_matches2);
|
||||
EXPECT_EQ(pm.scanBuf(buf1), expected_matches1);
|
||||
}
|
||||
|
||||
TEST(pm_scan, scan_with_offsets)
|
||||
{
|
||||
Buffer buf1("ABC");
|
||||
Buffer buf2("EFG");
|
||||
Buffer buf3 = buf1 + buf2 + buf1;
|
||||
set<PMPattern> pats;
|
||||
push_pat(pats, "ABC");
|
||||
PMHook pm;
|
||||
ASSERT_TRUE(pm.prepare(pats).ok());
|
||||
|
||||
set<pair<uint, PMPattern>> res;
|
||||
res.emplace(2, PMHook::lineToPattern("ABC").unpackMove());
|
||||
res.emplace(8, PMHook::lineToPattern("ABC").unpackMove());
|
||||
EXPECT_THAT(pm.scanBufWithOffset(buf3), ContainerEq(res));
|
||||
}
|
||||
|
||||
TEST(pm_scan, null_buf)
|
||||
{
|
||||
set<PMPattern> pats;
|
||||
push_pat(pats, "ABCD");
|
||||
PMHook pm;
|
||||
ASSERT_TRUE(pm.prepare(pats).ok());
|
||||
EXPECT_EQ(pm.scanBuf(Buffer("")), set<PMPattern>());
|
||||
}
|
||||
|
||||
TEST(pm_scan, exit_on_no_prepare)
|
||||
{
|
||||
Buffer buf("blah");
|
||||
cptestPrepareToDie();
|
||||
PMHook pm;
|
||||
EXPECT_DEATH(pm.scanBuf(buf), "Unusable Pattern Matcher");
|
||||
}
|
||||
|
||||
TEST(pm_scan, prepare_fail_on_no_pats)
|
||||
{
|
||||
set<PMPattern> pats;
|
||||
PMHook pm;
|
||||
EXPECT_FALSE(pm.prepare(pats).ok());
|
||||
}
|
||||
|
||||
TEST(pm_scan, pm_offsets_test_multiple_matches)
|
||||
{
|
||||
PMHook pm;
|
||||
set<PMPattern> initPatts;
|
||||
initPatts.insert(PMPattern("he", false, false));
|
||||
initPatts.insert(PMPattern("ex", false, false));
|
||||
initPatts.insert(PMPattern("hex", false, false, 2));
|
||||
initPatts.insert(PMPattern("(", false, false, 5));
|
||||
initPatts.insert(PMPattern(")", false, false, 7));
|
||||
|
||||
ASSERT_TRUE(pm.prepare(initPatts).ok());
|
||||
|
||||
Buffer buf("hex()");
|
||||
std::set<std::pair<uint, PMPattern>> results = pm.scanBufWithOffset(buf);
|
||||
|
||||
std::set<std::pair<uint, PMPattern>> expected{
|
||||
{1, {"he", false, false, 0}},
|
||||
{2, {"ex", false, false, 0}},
|
||||
{2, {"hex", false, false, 2}},
|
||||
{3, {"(", false, false, 5}},
|
||||
{4, {")", false, false, 7}}
|
||||
};
|
||||
|
||||
EXPECT_EQ(results, expected);
|
||||
}
|
||||
|
||||
TEST(pm_scan, pm_offsets_test_one_char_match)
|
||||
{
|
||||
PMHook pm;
|
||||
set<PMPattern> initPatts;
|
||||
initPatts.insert(PMPattern("/", false, false));
|
||||
|
||||
ASSERT_TRUE(pm.prepare(initPatts).ok());
|
||||
|
||||
Buffer buf("/");
|
||||
std::set<std::pair<uint, PMPattern>> results = pm.scanBufWithOffset(buf);
|
||||
|
||||
std::set<std::pair<uint, PMPattern>> expected{
|
||||
{0, {"/", false, false, 0}}
|
||||
};
|
||||
|
||||
EXPECT_EQ(results, expected);
|
||||
}
|
||||
|
||||
TEST(pm_scan, pm_offsets_test_one_char_at_end_match)
|
||||
{
|
||||
PMHook pm;
|
||||
set<PMPattern> initPatts;
|
||||
initPatts.insert(PMPattern("/", false, false));
|
||||
|
||||
ASSERT_TRUE(pm.prepare(initPatts).ok());
|
||||
|
||||
Buffer buf("abc/");
|
||||
std::set<std::pair<uint, PMPattern>> results = pm.scanBufWithOffset(buf);
|
||||
|
||||
std::set<std::pair<uint, PMPattern>> expected{
|
||||
{3, {"/", false, false, 0}}
|
||||
};
|
||||
|
||||
EXPECT_EQ(results, expected);
|
||||
}
|
||||
|
||||
TEST(pm_scan, pm_offsets_test_one_char_at_start_match)
|
||||
{
|
||||
PMHook pm;
|
||||
set<PMPattern> initPatts;
|
||||
initPatts.insert(PMPattern("/", false, false));
|
||||
|
||||
ASSERT_TRUE(pm.prepare(initPatts).ok());
|
||||
|
||||
Buffer buf("/abc");
|
||||
std::set<std::pair<uint, PMPattern>> results = pm.scanBufWithOffset(buf);
|
||||
|
||||
std::set<std::pair<uint, PMPattern>> expected{
|
||||
{0, {"/", false, false, 0}}
|
||||
};
|
||||
|
||||
EXPECT_EQ(results, expected);
|
||||
}
|
||||
|
||||
TEST(pm_scan, pm_offsets_test_word_full_match)
|
||||
{
|
||||
PMHook pm;
|
||||
set<PMPattern> initPatts;
|
||||
initPatts.insert(PMPattern("abc", false, false));
|
||||
|
||||
ASSERT_TRUE(pm.prepare(initPatts).ok());
|
||||
|
||||
Buffer buf("abc");
|
||||
std::set<std::pair<uint, PMPattern>> results = pm.scanBufWithOffset(buf);
|
||||
|
||||
std::set<std::pair<uint, PMPattern>> expected{
|
||||
{2, {"abc", false, false, 0}}
|
||||
};
|
||||
|
||||
EXPECT_EQ(results, expected);
|
||||
}
|
||||
|
||||
TEST(pm_scan, pm_offsets_test_word_at_start_match)
|
||||
{
|
||||
PMHook pm;
|
||||
set<PMPattern> initPatts;
|
||||
initPatts.insert(PMPattern("application", false, false));
|
||||
|
||||
ASSERT_TRUE(pm.prepare(initPatts).ok());
|
||||
|
||||
Buffer buf("application/x-www-form-urlencoded");
|
||||
std::set<std::pair<uint, PMPattern>> results = pm.scanBufWithOffset(buf);
|
||||
|
||||
std::set<std::pair<uint, PMPattern>> expected{
|
||||
{10, {"application", false, false, 0}}
|
||||
};
|
||||
|
||||
EXPECT_EQ(results, expected);
|
||||
}
|
||||
|
||||
TEST(pm_scan, pm_offsets_test_word_at_end_match)
|
||||
{
|
||||
PMHook pm;
|
||||
set<PMPattern> initPatts;
|
||||
initPatts.insert(PMPattern("x-www-form-urlencoded", false, false));
|
||||
|
||||
ASSERT_TRUE(pm.prepare(initPatts).ok());
|
||||
|
||||
Buffer buf("application/x-www-form-urlencoded");
|
||||
std::set<std::pair<uint, PMPattern>> results = pm.scanBufWithOffset(buf);
|
||||
|
||||
std::set<std::pair<uint, PMPattern>> expected{
|
||||
{32, {"x-www-form-urlencoded", false, false, 0}}
|
||||
};
|
||||
|
||||
EXPECT_EQ(results, expected);
|
||||
}
|
||||
|
||||
TEST(pm_scan, pm_offsets_test_pat_getIndex_method)
|
||||
{
|
||||
set<PMPattern> initPatts;
|
||||
initPatts.insert(PMPattern("ABC", false, false)); // initialized with the default index 0
|
||||
initPatts.insert(PMPattern("ABCD", false, false, 4));
|
||||
initPatts.insert(PMPattern("CDE", false, false, 7));
|
||||
PMHook pm;
|
||||
EXPECT_TRUE(pm.prepare(initPatts).ok());
|
||||
|
||||
Buffer buf("12345ABCDEF5678");
|
||||
std::set<std::pair<uint, PMPattern>> results = pm.scanBufWithOffset(buf);
|
||||
|
||||
std::set<std::pair<uint, PMPattern>> expected{
|
||||
{7, {"ABC", false, false, 0}},
|
||||
{8, {"ABCD", false, false, 4}},
|
||||
{9, {"CDE", false, false, 7}}
|
||||
};
|
||||
EXPECT_EQ(results, expected);
|
||||
}
|
||||
|
||||
TEST(pm_scan, pm_offsets_lambda_test_pat_getIndex_method)
|
||||
{
|
||||
set<PMPattern> initPatts;
|
||||
initPatts.insert(PMPattern("ABC", false, false)); // initialized with the default index 0
|
||||
initPatts.insert(PMPattern("ABCD", false, false, 4));
|
||||
initPatts.insert(PMPattern("CDE", false, false, 7));
|
||||
PMHook pm;
|
||||
EXPECT_TRUE(pm.prepare(initPatts).ok());
|
||||
|
||||
Buffer buf("12345ABCDEF5678");
|
||||
std::vector<std::pair<u_int, PMPattern>> results;
|
||||
pm.scanBufWithOffsetLambda(buf, [&] (uint offset, const PMPattern &pat) { results.emplace_back(offset, pat); });
|
||||
|
||||
std::vector<std::pair<uint, PMPattern>> expected{
|
||||
{7, {"ABC", false, false, 0}},
|
||||
{8, {"ABCD", false, false, 4}},
|
||||
{9, {"CDE", false, false, 7}}
|
||||
};
|
||||
|
||||
EXPECT_EQ(results, expected);
|
||||
}
|
Reference in New Issue
Block a user