First release of open-appsec source code

This commit is contained in:
roybarda
2022-10-26 19:33:19 +03:00
parent 3883109caf
commit a883352f79
1353 changed files with 276290 additions and 1 deletions

View File

@@ -0,0 +1,2 @@
add_subdirectory(ip_utilities)
add_subdirectory(pm)

View File

@@ -0,0 +1 @@
add_library(ip_utilities ip_utilities.cc)

View File

@@ -0,0 +1,347 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ip_utilities.h"
#include "connkey.h"
using namespace std;
// LCOV_EXCL_START Reason: temporary until we add relevant UT until 07/10
bool
operator<(const IpAddress &this_ip_addr, const IpAddress &other_ip_addr)
{
if (this_ip_addr.ip_type < other_ip_addr.ip_type) return true;
if (this_ip_addr.ip_type == IP_VERSION_4) return this_ip_addr.addr4_t.s_addr < other_ip_addr.addr4_t.s_addr;
return memcmp(&this_ip_addr.addr6_t, &other_ip_addr.addr6_t, sizeof(struct in6_addr)) < 0;
}
bool
operator==(const IpAddress &this_ip_addr, const IpAddress &other_ip_addr)
{
if (this_ip_addr.ip_type != other_ip_addr.ip_type) return false;
if (this_ip_addr.ip_type == IP_VERSION_4) return this_ip_addr.addr4_t.s_addr == other_ip_addr.addr4_t.s_addr;
return memcmp(&this_ip_addr.addr6_t, &other_ip_addr.addr6_t, sizeof(struct in6_addr)) == 0;
}
// LCOV_EXCL_STOP
Maybe<pair<string, int>>
extractAddressAndMaskSize(const string &cidr)
{
size_t delimiter_pos = cidr.find("/");
if (delimiter_pos == string::npos) return genError("provided value is not in CIDR notation: " + cidr);
string address = cidr.substr(0, delimiter_pos);
string mask_size = cidr.substr(delimiter_pos + 1, cidr.size() - delimiter_pos - 1);
try {
return make_pair(address, stoi(mask_size));
} catch(...) {
return genError("failed to cast provided value to integer: " + mask_size);
}
return genError("failed to parse provided string as a CIDR: " + cidr);
}
template<typename Integer>
pair<Integer, Integer>
applyMaskOnAddress(const vector<Integer> &oct, Integer mask)
{
Integer start = (oct[0] | oct[1] | oct[2] | oct[3]) & mask;
Integer end = (oct[0] | oct[1] | oct[2] | oct[3]) | (~mask);
return make_pair(start, end);
}
Maybe<pair<string, string>>
createRangeFromCidrV4(const pair<string, int> &cidr_values)
{
string address = cidr_values.first;
int mask_size = cidr_values.second;
vector<uint32_t> oct;
for (int i=3; i>=0; i--) {
size_t delimiter_pos = address.find(".");
string oct_str = address.substr(0, delimiter_pos);
try {
oct.push_back(static_cast<uint32_t>(stoul(oct_str)) << (i * 8));
} catch (...) {
return genError("failed to cast provided value to integer: " + oct_str);
}
if ((i == 0) != (delimiter_pos == string::npos)) {
return genError("provided value is not in a correct ipv4 structure: " + makeSeparatedStr(oct, "."));
}
address.erase(0, delimiter_pos + 1);
}
unsigned int mask = 0xffffffff;
mask <<= (32 - mask_size);
unsigned int start, end;
tie(start, end) = applyMaskOnAddress<unsigned int>(oct, mask);
auto construct_address = [](unsigned int value)
{
stringstream address_stream;
for (int i = 3; i >= 0; i--) {
address_stream << ((value >> (i * 8)) & 0xff) << (i > 0 ? "." : "");
}
return address_stream.str();
};
return make_pair<string, string>(construct_address(start), construct_address(end));
}
// LCOV_EXCL_START Reason: it is tested, but for some reason coverage doesn't catch it
Maybe<pair<string, string>>
createRangeFromCidrV6(const pair<string, int> &cidr_values)
{
string address = cidr_values.first;
int mask_size = cidr_values.second;
// fill compressed zeros
struct in6_addr v6;
if (inet_pton(AF_INET6, address.c_str(), &v6) == -1) {
return genError("faild to convert provided value to ipv6: " + address);
};
struct in6_addr *addr = &v6;
vector<unsigned int> oct_from_str;
for (int i=0; i<15; i+=2){
char hex[8];
unsigned int num;
sprintf(hex, "%02x%02x", static_cast<int>(addr->s6_addr[i]), static_cast<int>(addr->s6_addr[i+1]));
sscanf(hex, "%x", &num);
oct_from_str.push_back(num);
}
uint64_t mask = 0xffffffffffffffff;
function<string(uint64_t, bool)> construct_address;
int oct_offset;
if (mask_size > 64) {
oct_offset = 7;
mask <<= (128 - mask_size);
construct_address = [oct_from_str](uint64_t value, bool is_start)
{
(void)is_start;
stringstream address_stream;
for (int i = 0; i < 4; i++) {
address_stream << hex << oct_from_str[i] << ":";
}
for (int i = 3; i >= 0; i--) {
address_stream << hex << (unsigned int)((value >> (i * 16)) & 0xffff) << (i > 0 ? ":" : "");
}
return address_stream.str();
};
} else {
oct_offset = 3;
mask <<= (64 - mask_size);
construct_address = [](uint64_t value, bool is_start)
{
stringstream address_stream;
for (int i = 3; i >= 0; i--) {
address_stream << hex << (unsigned int)((value >> (i * 16)) & 0xffff) << ":";
}
address_stream << (is_start ? "0:0:0:0" : "ffff:ffff:ffff:ffff");
return address_stream.str();
};
}
uint64_t start, end;
vector<uint64_t> oct;
for (int i = 3; i >= 0; i--) {
oct.push_back(static_cast<uint64_t>(oct_from_str[oct_offset - i]) << (i * 16));
}
tie(start, end) = applyMaskOnAddress<uint64_t>(oct, mask);
return make_pair<string, string>(
construct_address(start, true),
construct_address(end, false)
);
}
// LCOV_EXCL_STOP
namespace IPUtilities {
Maybe<map<IpAddress, string>>
getInterfaceIPs()
{
struct ifaddrs *if_addr_list = nullptr;
if (getifaddrs(&if_addr_list) == -1) {
return genError(string("Failed to get interface IP's. Error: ") + strerror(errno));
}
map<IpAddress, string> interface_ips;
for (struct ifaddrs *if_addr = if_addr_list; if_addr != nullptr; if_addr = if_addr->ifa_next) {
if (if_addr->ifa_addr == nullptr) continue;
if (if_addr->ifa_addr->sa_family != AF_INET && if_addr->ifa_addr->sa_family != AF_INET6) continue;
char address_buffer[INET6_ADDRSTRLEN] = { '\0' };
if (if_addr->ifa_addr->sa_family == AF_INET) {
struct in_addr addr = reinterpret_cast<struct sockaddr_in *>(if_addr->ifa_addr)->sin_addr;
inet_ntop(AF_INET, &addr, address_buffer, INET_ADDRSTRLEN);
string address_string(address_buffer);
if (address_string.find("127.0.0.1") != string::npos) continue;
IpAddress ip_addr;
ip_addr.ip_type = IP_VERSION_4;
memcpy(&ip_addr.ip.ipv4, &addr, sizeof(ip_addr.ip.ipv4));
interface_ips.emplace(ip_addr, address_string);
} else {
struct in6_addr addr = reinterpret_cast<struct sockaddr_in6 *>(if_addr->ifa_addr)->sin6_addr;
inet_ntop(AF_INET6, &addr, address_buffer, INET6_ADDRSTRLEN);
string address_string(address_buffer);
if (address_string.find("::1") != string::npos) continue;
IpAddress ip_addr;
ip_addr.ip_type = IP_VERSION_6;
memcpy(&ip_addr.ip.ipv6, &addr, sizeof(ip_addr.ip.ipv6));
interface_ips.emplace(ip_addr, address_string);
}
}
if (if_addr_list != nullptr) freeifaddrs(if_addr_list);
return interface_ips;
}
Maybe<pair<string, string>>
createRangeFromCidr(const string &cidr)
{
auto cidr_values = extractAddressAndMaskSize(cidr);
if (!cidr_values.ok()) return genError("Failed to create range from Cidr: " + cidr_values.getErr());
return cidr.find(".") != string::npos
? createRangeFromCidrV4(cidr_values.unpack())
: createRangeFromCidrV6(cidr_values.unpack());
}
bool
isIpAddrInRange(const IPRange &rule_ip_range, const IpAddress &ip_addr)
{
IpAddress min_ip = rule_ip_range.start;
IpAddress max_ip = rule_ip_range.end;
if (ip_addr.ip_type == IP_VERSION_4) {
if (max_ip.ip_type != IP_VERSION_4) return 0;
return
memcmp(&ip_addr.ip.ipv4, &min_ip.ip.ipv4, sizeof(struct in_addr)) >= 0 &&
memcmp(&ip_addr.ip.ipv4, &max_ip.ip.ipv4, sizeof(struct in_addr)) <= 0;
}
if (ip_addr.ip_type == IP_VERSION_6) {
if (max_ip.ip_type != IP_VERSION_6) return 0;
return
memcmp(&ip_addr.ip.ipv6, &min_ip.ip.ipv6, sizeof(struct in6_addr)) >= 0 &&
memcmp(&ip_addr.ip.ipv6, &max_ip.ip.ipv6, sizeof(struct in6_addr)) <= 0;
}
return 0;
}
string
IpAddrToString(const IpAddress &address)
{
if (address.ip_type == IP_VERSION_6) {
char ip_str[INET6_ADDRSTRLEN];
struct sockaddr_in6 sa6;
sa6.sin6_family = AF_INET6;
sa6.sin6_addr = address.ip.ipv6;
inet_ntop(AF_INET6, &(sa6.sin6_addr), ip_str, INET6_ADDRSTRLEN);
return move(string(ip_str));
}
char ip_str[INET_ADDRSTRLEN];
struct sockaddr_in sa;
sa.sin_family = AF_INET;
sa.sin_addr = address.ip.ipv4;
inet_ntop(AF_INET, &(sa.sin_addr), ip_str, INET_ADDRSTRLEN);
return move(string(ip_str));
}
IpAddress
createIpFromString(const string &ip_string)
{
IpAddress res_address = {0, IP_VERSION_ANY};
if (ip_string == "any") return res_address;
auto maybe_ip_addr = IPAddr::createIPAddr(ip_string);
if (!maybe_ip_addr.ok()) {
return res_address;
}
IPAddr ip_addr = maybe_ip_addr.unpack();
res_address.ip_type = static_cast<IpVersion>(ip_addr.getType());
if (ip_addr.getType() == IPType::V4) {
res_address.addr4_t = ip_addr.getIPv4();
} else {
res_address.addr6_t = ip_addr.getIPv6();
}
return res_address;
}
IpAddress
ConvertToIpAddress(const IPAddr &addr) {
IpAddress address;
switch (addr.getType()) {
case IPType::UNINITIALIZED: {
address.addr4_t = {0};
address.ip_type = IP_VERSION_ANY;
break;
}
case IPType::V4: {
address.addr4_t = addr.getIPv4(); // reference to a local variable ?
address.ip_type = IP_VERSION_4;
break;
}
case IPType::V6: {
address.addr6_t = addr.getIPv6();
address.ip_type = IP_VERSION_6;
break;
}
default:
dbgAssert(false) << "Unsupported IP type";
}
return address;
}
IpAttrFromString::operator Maybe<IpAddress>()
{
auto ip_addr = IPAddr::createIPAddr(data);
if (!ip_addr.ok()) return genError("Could not create IP address. Error: " + ip_addr.getErr());
return ConvertToIpAddress(ip_addr.unpackMove());
}
IpAttrFromString::operator Maybe<IpProto>()
{
int value;
try {
value = stoi(data);
} catch (...) {
return genError("provided value is not a legal number. Value: " + data);
}
if (value > static_cast<int>(UINT8_MAX) || value < 0) {
return genError("provided value is not a legal ip protocol number. Value: " + data);
}
return static_cast<IpProto>(value);
}
IpAttrFromString::operator Maybe<Port>()
{
int value;
try {
value = stoi(data);
} catch (...) {
return genError("provided value is not a legal number. Value: " + data);
}
if (value > static_cast<int>(UINT16_MAX) || value < 0) {
return genError("provided value is not a legal port number. Value: " + data);
}
return static_cast<Port>(value);
}
}

View File

@@ -0,0 +1,3 @@
add_library(pm general_adaptor.cc kiss_hash.cc kiss_patterns.cc kiss_pm_stats.cc kiss_thin_nfa.cc kiss_thin_nfa_analyze.cc kiss_thin_nfa_build.cc kiss_thin_nfa_compile.cc pm_adaptor.cc pm_hook.cc debugpm.cc)
add_subdirectory(pm_ut)

63
components/utils/pm/debugpm.cc Executable file
View File

@@ -0,0 +1,63 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "debug.h"
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <string>
#include <iostream>
#include "sasal.h"
using namespace std;
USE_DEBUG_FLAG(D_PM);
SASAL_START // Multiple Pattern Matcher
// Helper class for printing C format string
class CFmtPrinter
{
public:
char buf[500]; // Length limit.
explicit CFmtPrinter(const char *fmt, va_list va)
{
vsnprintf(buf, sizeof(buf), fmt, va);
buf[sizeof(buf)-1] = '\0';
}
};
static ostream &
operator<<(ostream &os, const CFmtPrinter &p)
{
return os << p.buf;
}
void
panicCFmt(const string &func, uint line, const char *fmt, ...)
{
va_list va;
va_start(va, fmt);
Debug("PM", func, line).getStreamAggr() << CFmtPrinter(fmt, va);
va_end(va);
}
void
debugPrtCFmt(const char *func, uint line, const char *fmt, ...)
{
va_list va;
va_start(va, fmt);
Debug("PM", func, line, Debug::DebugLevel::TRACE, D_PM).getStreamAggr() << CFmtPrinter(fmt, va);
va_end(va);
}
SASAL_END

39
components/utils/pm/debugpm.h Executable file
View File

@@ -0,0 +1,39 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __DEBUGPM_H__
#define __DEBUGPM_H__
#include <iostream>
#include <string>
#include "debug.h"
// Assertions
// C-style BC functions (e.g. for PM).
void debugPrtCFmt(const std::string &func, uint line, const char *fmt, ...) __attribute__((format (printf, 3, 4)));
#define debugCFmt(flag, fmt, ...) \
if (!Debug::isDebugSet(flag)) \
{ \
} else \
debugPrtCFmt(__FUNCTION__, __LINE__, fmt, ##__VA_ARGS__)
void panicCFmt(const std::string &func, uint line, const char *fmt, ...) __attribute__((format (printf, 3, 4)));
#define assertCondCFmt(cond, fmt, ...) \
if (CP_LIKELY(cond)) \
{ \
} else \
panicCFmt(__FUNCTION__, __LINE__, fmt, ##__VA_ARGS__)
#endif // __DEBUGPM_H__

View File

@@ -0,0 +1,65 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "general_adaptor.h"
#include <stdlib.h>
#include "sasal.h"
SASAL_START // Multiple Pattern Matcher
void fw_kfree(void *addr, CP_MAYBE_UNUSED size_t size, CP_MAYBE_UNUSED const char *caller)
{
free(addr);
return;
}
void *fw_kmalloc(size_t size, CP_MAYBE_UNUSED const char *caller)
{
return malloc(size);
}
void *fw_kmalloc_ex(size_t size, CP_MAYBE_UNUSED const char *caller, CP_MAYBE_UNUSED int flags)
{
return malloc(size);
}
void *fw_kmalloc_sleep(size_t size, CP_MAYBE_UNUSED const char *caller)
{
return malloc(size);
}
void *kiss_pmglob_memory_kmalloc_ex_(
u_int size,
CP_MAYBE_UNUSED const char *caller,
CP_MAYBE_UNUSED int flags,
CP_MAYBE_UNUSED const char *file,
CP_MAYBE_UNUSED int line)
{
return malloc(size);
}
void *kiss_pmglob_memory_kmalloc_ex(u_int size, CP_MAYBE_UNUSED const char *caller, CP_MAYBE_UNUSED int flags)
{
return malloc(size);
}
void *kiss_pmglob_memory_kmalloc(u_int size, CP_MAYBE_UNUSED const char *caller)
{
return malloc(size);
}
void kiss_pmglob_memory_kfree(void *addr, CP_MAYBE_UNUSED size_t size, CP_MAYBE_UNUSED const char *caller)
{
free(addr);
return;
}
SASAL_END

View File

@@ -0,0 +1,80 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __general_adaptor_h__
#define __general_adaptor_h__
#include "stdint.h"
#include <stdio.h>
#include <string.h>
#include <stddef.h>
#include "common.h"
#include "debug.h"
#include "debugpm.h"
typedef unsigned int u_int;
typedef unsigned char u_char;
typedef unsigned short u_short;
typedef bool boolean_cpt;
typedef bool BOOL;
typedef uint64_t u_int64;
#define TRUE true
#define FALSE false
#define CP_INLINE inline
#define CP_CACHELINE_SIZE 64
#define CP_CACHELINE_ALIGNED __attribute__((__aligned__(CP_CACHELINE_SIZE)))
#define CP_MAYBE_UNUSED CP_UNUSED
#define KISS_OFFSETOF(str_name, field_name) offsetof(str_name, field_name)
#define KISS_ASSERT_COMPILE_TIME(cond) extern int __kiss_assert_dummy[(cond)?1:-1]
#define KISS_ASSERT_PERF(...)
#define ASSERT_LOCKED
#define kiss_multik_this_instance_num (0)
typedef enum {
KISS_ERROR = -1,
KISS_OK = 0
} kiss_ret_val;
#define KISS_ASSERT assertCondCFmt
#define KISS_ASSERT_CRASH assertCondCFmt
#define FW_KMEM_SLEEP 0
#define herror(a, b, ...)
#define kdprintf printf
#define kdprintf_no_prefix printf
void fw_kfree(void *addr, size_t size, const char *caller);
void *fw_kmalloc(size_t size, const char *caller);
void *fw_kmalloc_ex(size_t size, const char *caller, int flags);
void *fw_kmalloc_sleep(size_t size, const char *caller);
void *kiss_pmglob_memory_kmalloc_ex_(u_int size, const char *caller, int flags, const char *file, int line);
void *kiss_pmglob_memory_kmalloc_ex(u_int size, const char *caller, int flags);
void *kiss_pmglob_memory_kmalloc(u_int size, const char *caller);
void kiss_pmglob_memory_kfree(void *addr, size_t size, const char *caller);
#define ENUM_SET_FLAG(e, flag) e = static_cast<decltype(e)>(((u_int)e | (u_int)flag))
#define ENUM_UNSET_FLAG(e, flag) e = static_cast<decltype(e)>(((u_int)e & (~(u_int)flag)))
#define MAX(x, y) (((x)>(y))?(x):(y))
#define MIN(x, y) (((x)<(y))?(x):(y))
#endif // __general_adaptor_h__

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,586 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __KISS_HASH_H__
#define __KISS_HASH_H__
#include "general_adaptor.h"
typedef struct kiss_hash *kiss_hash_t;
struct kiss_hashent {
void *key;
void *val;
struct kiss_hashent *next;
};
typedef uintptr_t (*hkeyfunc_t)(const void *key, void *info);
typedef int (*hcmpfunc_t)(const void *key1, const void *key2, void *info);
typedef void (*freefunc_t)(void *info);
// {group: API for KISS_HASH}
#define H_DESTR(destr, addr) \
if (destr && (((uintptr_t)(addr)) > 0x10)) (*destr)(addr);
// {group: API for KISS_HASH}
// Description: Create Hash Table. MT-Level: Reentrant
// Parameters:
// hsize - hash size
// keyfunc - key hashing function
// keycmp - key comparison function
// info - opaque for use of keyfunc and keycmp functions.
// Return values:
// o hash pointer
// o NULL upon failure
// See also: kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr, kiss_hash_undo_destr,
// kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey, kiss_hash_insert,
// kiss_hash_delete, kiss_hash_destroy, kiss_hash_find_kiss_hashent, kiss_hash_insert_at, kiss_hash_strvalue,
// kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue,
// kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
kiss_hash_t kiss_hash_create (size_t hsize, hkeyfunc_t keyfunc, hcmpfunc_t keycmp, void *info);
// {group: API for HASH}
// Description: Create Hash Table with Destructor. MT-Level: Reentrant
// Parameters:
// hsize - hash size
// keyfunc - key hashing function
// keycmp - key comparison function
// val_destr - destructor for the values of the hash
// key_destr - destructor for the keys of the hash
// info - opaque for use of keyfunc and keycmp functions.
// Return values:
// o hash pointer
// o NULL upon failure
// See also: kiss_hash_create, kiss_hash_set_destr, kiss_hash_dodestr, kiss_hash_undo_destr, kiss_hash_nelements,
// iss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey, kiss_hash_insert, kiss_hash_delete, kiss_hash_destroy,
// kiss_hash_find_kiss_hashent, kiss_hash_insert_at, kiss_hash_strvalue, kiss_hash_strcmp, kiss_hash_intvalue,
// kiss_hash_bytevalue, kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
kiss_hash_t
kiss_hash_create_with_destr (
size_t hsize,
hkeyfunc_t keyfunc,
hcmpfunc_t keycmp,
freefunc_t val_destr,
freefunc_t key_destr,
void *info
);
#define kiss_hash_create(hsize, hkeyfunc, hcmpfunc, info) \
_kiss_hash_create (hsize, hkeyfunc, hcmpfunc, info, __FILE__, __LINE__)
#define kiss_hash_create_with_destr(hsize, hkeyfunc, hcmpfunc, freefunc1, freefunc2, info) \
_kiss_hash_create_with_destr (hsize, hkeyfunc, hcmpfunc, freefunc1, freefunc2, info, __FILE__, __LINE__)
kiss_hash_t
_kiss_hash_create_with_ksleep(size_t hsize, hkeyfunc_t, hcmpfunc_t, void *info, const char *file, int line);
#define kiss_hash_create_with_ksleep(hsize, hkeyfunc, hcmpfunc, info) \
_kiss_hash_create_with_ksleep (hsize, hkeyfunc, hcmpfunc, info, __FILE__, __LINE__)
// {group: API for HASH}
// Description: Debug single hash. MT-Level: Reentrant
//This function calculates and prints the following statistics:
//o hash pointer
//o file name and line number where kiss_hash_create or kiss_hash_create_with_destr was called
//o number of elements in kiss_hash
//o number of slots in hash - hash size
//o size in bytes of memory occupied by hash maintenance structures
//o slot utilzation - percentage of hash slots used to store elements
//o average number of lookups - average length of lists of elements
// Parameters:
// hash - pointer to hash
// Return values:
// size in bytes of memory occupied by hash maintenance structures.
// See also: hash_create, hash_create_with_destr, hash_set_destr, hash_dodestr, hash_undo_destr,
// hash_nelements, hash_findaddr, hash_lookup, hash_lookkey, hash_insert, hash_delete, hash_destroy,
// hash_find_hashent, hash_insert_at, hash_strvalue, hash_strcmp, hash_intvalue, hash_bytevalue,
// hash_bytecmp, hash_debug_all
int kiss_hash_debug(kiss_hash_t hp);
// {group: API for HASH}
// Description: Debug single hash. MT-Level: Safe
//Iterates a list of all hash tables craeted in the current process and
//for each hash calls function hash_debug. In addition the total
//memory usage of hash maintenance structures is printed.
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey,
// kiss_hash_insert, kiss_hash_delete, kiss_hash_destroy,
// kiss_hash_find_kiss_hashent, kiss_hash_insert_at, kiss_hash_strvalue, kiss_hash_strcmp, kiss_hash_intvalue,
// kiss_hash_bytevalue, kiss_hash_bytecmp, kiss_hash_debug
void kiss_hash_debug_all();
// {group: API for kiss_hash}
kiss_hash_t _kiss_hash_create (size_t hsize, hkeyfunc_t, hcmpfunc_t, void *info, const char *file, int line);
// {group: API for HASH}
kiss_hash_t _kiss_hash_create_with_destr (size_t hsize, hkeyfunc_t, hcmpfunc_t, freefunc_t, freefunc_t,
void *info, const char *file, int line);
// {group: API for HASH}
// Description: Set destructor for hash elements. MT-Level: ] Reentrant
//Keys and values detsructors are called for every hash key-value pair when the hash is destroyed.
// Parameters:
// hp - hash
// val_destr - destructor for the values of the hash
// key_destr - destructor for the keys of the hash
// Return values:
// hash pointer
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_dodestr, kiss_hash_undo_destr,
// kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey, kiss_hash_insert,
// kiss_hash_delete, kiss_hash_destroy, kiss_hash_find_kiss_hashent, kiss_hash_insert_at, kiss_hash_strvalue,
// kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue,
// kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
kiss_hash_t kiss_hash_set_destr (kiss_hash_t hp, freefunc_t val_destr, freefunc_t key_destr);
// {group: API for kiss_hash}
// Description: Enable hash element detsruction. MT-Level: Reentrant
//Hash is created with destruction of elements disabled by default.
//This function enables destruction upon a call to kiss_hash_destroy.
//Meaning, the hash will automaticly call destructors when an entry gets
//deleted from the hash. Usualy this is not the case !
// Parameters:
// hp - hash
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_undo_destr,
// kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey, kiss_hash_insert,
// kiss_hash_delete, kiss_hash_destroy, kiss_hash_find_kiss_hashent, kiss_hash_insert_at, kiss_hash_strvalue,
// kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue,
// kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
void kiss_hash_dodestr (kiss_hash_t hp);
// {group: API for HASH}
// Description: Disable hash element detsruction. MT-Level: Reentrant
// Parameters:
// hp - hash
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
// kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey, kiss_hash_insert,
// kiss_hash_delete, kiss_hash_destroy,
// kiss_hash_find_kiss_hashent, kiss_hash_insert_at, kiss_hash_strvalue, kiss_hash_strcmp, kiss_hash_intvalue,
// kiss_hash_bytevalue, kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
void kiss_hash_undo_destr (kiss_hash_t hp);
// {group: API for HASH}
// Description: Number of hash elements. MT-Level: Reentrant
// Parameters:
// hash - hash table
// Return values:
// number of elements
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
// kiss_hash_undo_destr, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey, kiss_hash_insert,
// kiss_hash_delete, kiss_hash_destroy,
// kiss_hash_find_kiss_hashent, kiss_hash_insert_at, kiss_hash_strvalue, kiss_hash_strcmp, kiss_hash_intvalue,
// kiss_hash_bytevalue, kiss_hash_bytecmp, kiss_kiss_hash_debug, kiss_hash_debug_all
int kiss_hash_nelements (kiss_hash_t hash);
// {group: API for HASH}
// Description: Hash size. MT-Level: Reentrant
// Parameters:
// hash - hash table
// Return values:
// Size of hash
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey,
// kiss_hash_insert, kiss_hash_delete, kiss_hash_destroy, kiss_hash_find_hashent, kiss_hash_insert_at,
// kiss_hash_strvalue, kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue,
// kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
int kiss_hash_get_size (kiss_hash_t hash);
// {group: API for HASH}
// Description: Return address of the pointer to the value in the hash table.
// Parameters:
// hp - hash pointer
// key - hash key
// Return values:
// hash entry
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_lookup, kiss_hash_lookkey, kiss_hash_insert,
// kiss_hash_delete, kiss_hash_destroy,
// kiss_hash_find_hashent, kiss_hash_insert_at, kiss_hash_strvalue, kiss_hash_strcmp, kiss_hash_intvalue,
// kiss_hash_bytevalue, kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
void **kiss_hash_findaddr (kiss_hash_t hp, const void *key);
// {group: API for HASH}
// Description: Lookup hash value. MT-Level: Reentrant
// Parameters:
// hp - hash pointer
// key - hash key
// Return values:
// o hash value
// o NULL upon failure
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookkey, kiss_hash_insert,
// kiss_hash_delete, kiss_hash_destroy, kiss_hash_find_hashent, kiss_hash_insert_at, kiss_hash_strvalue,
// kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue,
// kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
void *kiss_hash_lookup (kiss_hash_t hp, const void *key);
// {group: API for HASH}
// Description: Lookup hash key. MT-Level: Reentrant
//Returns the key pointer as stored in the hash table.
// Parameters:
// hp - hash pointer
// key - hash key that hash a value equal to that of the key stored in the hash.
// Return values:
// o hash key
// o NULL upon failure
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_insert,
// kiss_hash_delete, kiss_hash_destroy,kiss_hash_find_hashent, kiss_hash_insert_at, kiss_hash_strvalue,
// kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue,
// kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
void *kiss_hash_lookkey (kiss_hash_t hp, const void *key);
// {group: API for HASH}
// Description: Insert hash element. MT-Level: Reentrant
// Parameters:
// hp - hash pointer
// key - hash key
// val - hash val
// Return values:
// >0 - success
// 0 - upon failure
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey,
// kiss_hash_delete, kiss_hash_destroy, kiss_hash_find_hashent, kiss_hash_insert_at, kiss_hash_strvalue,
// kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue,
// kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
int kiss_hash_insert (kiss_hash_t hp, void *key, void *val);
// {group: API for HASH}
// Description: Delete hash element. MT-Level: Reentrant
//Delete hash element and return a value for the key.
// Parameters:
// hp - hash pointer
// key - hash key
// Return values:
// o hash val
// o NULL upon failure
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey,
// kiss_hash_insert, kiss_hash_destroy, kiss_hash_find_hashent, kiss_hash_insert_at, kiss_hash_strvalue,
// kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue,
// kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
void *kiss_hash_delete (kiss_hash_t hash, const void *key);
// {group: API for HASH}
// Description: Destroy hash. MT-Level: Reentrant
//If detsructor functions were defined in the call to kiss_hash_with_create_destr or kiss_hash_set_destr
//function kiss_hash_dodestr must be called to enable element detsruction.
// Parameters:
// hp - hash pointer
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
// kiss_hash_undo_destr,kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey,
// kiss_hash_insert, kiss_hash_delete, kiss_hash_find_hashent, kiss_hash_insert_at, kiss_hash_strvalue,
// kiss_hash_strcmp, kiss_hash_intvalue, kiss_hash_bytevalue, kiss_hash_bytecmp, kiss_hash_debug,
// kiss_hash_debug_all
void kiss_hash_destroy (kiss_hash_t hp);
// {group: API for HASH}
// Description: Find hash entry. MT-Level: Reentrant
//Used as an efficient but somewhat ugly interface for find/insert operation.
//What it does is to return an adrress of a pointer to a hashent structure containing the key/val pair if found.
//If not it returns the address of the pointer in which we can append the new val/pair
//thus avoiding an unnceccessary repeated search.
//We can check if key was found by checking whether the pointer is zero or not.
//This function is usually used with kiss_hash_insert_at.
// Parameters:
// hp - hash pointer
// key - hash key
// Return values:
// hash entry
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey,
// kiss_hash_insert, kiss_hash_delete, kiss_hash_destroy, kiss_hash_insert_at, kiss_hash_strvalue, kiss_hash_strcmp,
// kiss_hash_intvalue, kiss_hash_bytevalue, kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
struct kiss_hashent ** kiss_hash_find_hashent(kiss_hash_t hp, const void *key);
// {group: API for HASH}
// Description: Insert hash element at specified position. MT-Level: Reentrant
//This function should be used together with kiss_hash_find_hashent to insert
//the value in case it was not found at the hash.
// Parameters:
// hp - hash pointer
// key - hash key
// val - hash val
// hloc -
// Return values:
// o 0 upon failure
// o number of hash elements after insertion in case of success.
// See also: kiss_hash_create, kiss_hash_create_with_destr, kiss_hash_set_destr, kiss_hash_dodestr,
// kiss_hash_undo_destr, kiss_hash_nelements, kiss_hash_findaddr, kiss_hash_lookup, kiss_hash_lookkey,
// kiss_hash_insert, kiss_hash_delete,
// kiss_hash_destroy, kiss_hash_find_hashent, kiss_hash_strvalue, kiss_hash_strcmp, kiss_hash_intvalue,
// kiss_hash_bytevalue, kiss_hash_bytecmp, kiss_hash_debug, kiss_hash_debug_all
int kiss_hash_insert_at (kiss_hash_t hp, void *key, void *val, struct kiss_hashent**hloc);
#define kiss_hash_strcreate(sz) \
kiss_hash_create(sz, (hkeyfunc_t)kiss_hash_strvalue, (hcmpfunc_t)kiss_hash_strcmp, NULL)
#define kiss_hash_intcreate(sz) \
kiss_hash_create(sz, (hkeyfunc_t)kiss_hash_intvalue, (hcmpfunc_t)kiss_hash_intcmp, NULL)
#define kiss_hash_bytecreate(n, esz) \
kiss_hash_create(n, (hkeyfunc_t)kiss_hash_bytevalue, (hcmpfunc_t)kiss_hash_bytecmp, (void *)esz)
// The following provide hash table data type interface,
// These functions can be provided by the user,
// The default provided functions provide string hash
// {group: API for HASH}
// Description: Hashing fuction for string hash.
//This function is used by kiss_hash_strcreate().
// Parameters:
// vs - key
// info - opaque
// Return values:
// value of the hash function.
uintptr_t kiss_hash_strvalue (const void *vs, void *info);
// {group: API for HASH}
// Description: Comparison fuction for string hash.
//This function is used by kiss_hash_strcreate().
// Parameters:
// vk1 - key
// vk2 - key
// info - opaque
// Return values:
// 0 - keys are equal
// !0 - keys are different
int kiss_hash_strcmp (const void *vk1, const void *vk2, void *info);
// {group: API for HASH}
// Description: Hashing fuction for integer hash.
//This function is used by kiss_hash_intcreate().
// Parameters:
// v - key
// info - opaque
// Return values:
// value of the hash function.
uintptr_t kiss_hash_intvalue (const void* v, void *info);
// {group: API for HASH}
// Description: Comparison fuction for integer hash.
//This function is used by kiss_hash_intcreate().
// Parameters:
// vv1 - key
// vv2 - key
// info - opaque
// Return values:
// 0 - keys are equal
// !0 - keys are different
int kiss_hash_intcmp (const void* vv1, const void* vv2, void *info);
// {group: API for HASH}
// Description: Hashing fuction for byte hash.
//This function is used by kiss_hash_bytecreate().
// Parameters:
// data - key
// info - opaque
// Return values:
// value of the hash function.
uintptr_t kiss_hash_bytevalue (const void *data, void *info);
// {group: API for HASH}
// Description: Comparison fuction for byte hash.
//This function is used by kiss_hash_bytecreate().
// Parameters:
// d1 - key
// d2 - key
// info - opaque
// Return values:
// 0 - keys are equal
// !0 - keys are different
int kiss_hash_bytecmp (const void *d1, const void *d2, void *info);
// {group: API for HASH ITERATOR}
typedef struct kiss_hash_iter *kiss_hash_iterator;
// {group: API for HASH ITERATOR}
// Description: Create hash iterator. MT-Level: Reentrant
// Parameters:
// hp - hash
// Return values:
// o iterator object
// o NULL upon failure
// See also:
// kiss_hash_iterator_next, kiss_hash_iterator_next_key, kiss_hash_iterator_destroy
kiss_hash_iterator kiss_hash_iterator_create (kiss_hash_t hp);
// {group: API for HASH ITERATOR}
// Description: Return next hash value. MT-Level: Reentrant
// Parameters:
// hit - hash iterator
// Return values:
// o next hash value
// o NULL upon failure
// See also:
// kiss_hash_iterator_create, kiss_hash_iterator_next_key, kiss_hash_iterator_destroy
void *kiss_hash_iterator_next (kiss_hash_iterator hit);
// {group: API for HASH ITERATOR}
// Description: Return next hash key. MT-Level: Reentrant
// Parameters:
// hit - hash iterator
// Return values:
// o next hash key
// o NULL upon failure
// See also:
// kiss_hash_iterator_create, kiss_hash_iterator_next, kiss_hash_iterator_destroy
void *kiss_hash_iterator_next_key (kiss_hash_iterator hit);
// {group: API for HASH ITERATOR}
// Description: Destroy hash iterator. MT-Level: Reentrant
// Parameters:
// hit - hash iterator
// See also:
// kiss_hash_iterator_create, kiss_hash_iterator_next, kiss_hash_iterator_next_key
void kiss_hash_iterator_destroy (kiss_hash_iterator hit);
// {group: API for ITERATOR}
int kiss_hash_iterator_next_ent(kiss_hash_iterator hit);
// {group: API for ITERATOR}
void * kiss_hash_iterator_get_key(kiss_hash_iterator hit);
// {group: API for ITERATOR}
void * kiss_hash_iterator_get_val(kiss_hash_iterator hit);
// {group: API for ITERATOR}
struct kiss_hashent * kiss_hash_iterator_get_hashent(kiss_hash_iterator hit);
// {group: API for ITERATOR}
int kiss_hash_iterator_equal(kiss_hash_iterator hit1, kiss_hash_iterator hit2);
// {group: API for ITERATOR}
kiss_hash_iterator kiss_hash_iterator_copy(kiss_hash_iterator hit);
// {group: API for ITERATOR}
void kiss_hash_iterator_free(kiss_hash_iterator hit);
// {group: API for ITERATOR}
void kiss_hash_iterator_set_begin(kiss_hash_iterator hit);
// {group: API for ITERATOR}
void kiss_hash_iterator_set_end(kiss_hash_iterator hit);
// {group: API for HASH}
kiss_hash_iterator kiss_hash_find_hashent_new(kiss_hash_t hp, const void *key);
// {group: API for HASH ITERATOR}
void kiss_hash_delete_by_iter(kiss_hash_iterator hit);
// - - - - - - - - - - - - - - -
// Hash resize mechanism
// - - - - - - - - - - - - - - -
// {group: API for HASH RESIZE}
// Determine if hash size can increase, decrease or both.
typedef enum {
KISS_HASH_SIZE_STATIC = 0, // hash size is kept fixed
KISS_HASH_SIZE_INCREASE = 1,
KISS_HASH_SIZE_DECREASE = 2,
KISS_HASH_SIZE_INC_DEC = 3
} KissHashResizeDirection;
// {group: API for HASH RESIZE}
typedef enum {
KISS_HASH_RESIZE_METHOD_UNKNOWN = 0,
KISS_HASH_RESIZE_BY_FACTOR = 1
} KissHashResizeMethod;
// {group: API for HASH RESIZE}
// Default maximal hash size:
// Hash size will not increase beyond this value unless stated o/w by the application
#define DEFAULT_KISS_HASH_SIZE (1<<17)
// {group: API for HASH RESIZE}
// Default value for hash factorial resizing
#define DEFAULT_KISS_HASH_RESIZE_FACTOR_VALUE 4
// {group: API for HASH RESIZE}
// Default value for hash factorial resizing trigger ratio
#define DEFAULT_KISS_HASH_RESIZE_FACTOR_TRIG_RATIO 2
// {group: API for HASH RESIZE}
// Resize application callback: This callback will be invoked at every successful resize operation.
typedef int (* HashResizeCb_t) (kiss_hash_t hp, void *app_info);
// Hash resize mode object & accsess API.
// Used for setting resize parameters hash.
// {group: API for HASH RESIZE}
typedef struct _KissHashResizeMode KissHashResizeMode;
// {group: API for HASH RESIZE}
int KissHashResizeMode_create(KissHashResizeMode **resize_mode);
// {group: API for HASH RESIZE}
void KissHashResizeMode_destroy(KissHashResizeMode *resize_mode);
// {group: API for HASH RESIZE}
int KissHashResizeMode_set_method(
KissHashResizeMode *resize_mode,
KissHashResizeMethod method,
u_int value,
u_int trigger_ratio);
// {group: API for HASH RESIZE}
int KissHashResizeMode_get_method(
const KissHashResizeMode *resize_mode,
KissHashResizeMethod *method,
u_int *value,
u_int *trigger_ratio);
// {group: API for HASH RESIZE}
int KissHashResizeMode_set_direction(KissHashResizeMode *resize_mode, KissHashResizeDirection direction);
// {group: API for HASH RESIZE}
int KissHashResizeMode_get_direction(const KissHashResizeMode *resize_mode, KissHashResizeDirection *direction);
// {group: API for HASH RESIZE}
int KissHashResizeMode_set_max_size(KissHashResizeMode *resize_mode, u_int max_size);
// {group: API for HASH RESIZE}
int KissHashResizeMode_get_max_size(const KissHashResizeMode *resize_mode, u_int *max_size);
// {group: API for HASH RESIZE}
int kiss_hash_set_resize_cb(kiss_hash_t hp, HashResizeCb_t resize_callback);
// {group: API for HASH RESIZE}
// Description: Set hash dynamic size parameters.
// Parameters:
// hp - [in] pointer to hash table
// resize_mode - [in] should be created and set using the access API to the KissHashResizeMode object.
// After using the set API, this object can be destroyed.
//
int kiss_hash_set_dynamic_size(kiss_hash_t hp, const KissHashResizeMode *resize_mode);
// {group: API for HASH RESIZE}
// Description: Get hash dynamic size parameters.
// Parameters:
// hp - [in] pointer to hash table
// resize_mode - [out] a read-only parameter that should not be changed by the application.
int kiss_hash_get_dynamic_size(kiss_hash_t hp, const KissHashResizeMode **resize_mode);
// {group: API for HASH RESIZE}
// Description: This API will cause an immediate resizing of hash
// table, according to the parameters, given in the input
// KissHashResizeMode object (if NULL, the resize will be done
// according to the parameters as last set by the application).
//
// Note that the KissHashResizeMode object parameters are
// not kept on the hash handle for future resize oprations.
int kiss_hash_trigger_resize(kiss_hash_t hp, const KissHashResizeMode *resize_mode);
#endif // __KISS_HASH_H__

View File

@@ -0,0 +1,134 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "kiss_patterns.h"
#include <vector>
#include <ctype.h>
#include "general_adaptor.h"
#include "pm_adaptor.h"
#include "sasal.h"
SASAL_START // Multiple Pattern Matcher
// Add a character's printable representation to a buffer.
// Returns the number of bytes written.
static u_int
pm_str_one_char_to_debug_buf(u_char *buf, int len, u_char ch, BOOL for_csv)
{
char single_char_buf[10];
int single_char_len;
// Get a printable representation of the character
if (isprint(ch) && !(ch == '"' && for_csv)) {
single_char_buf[0] = ch;
single_char_len = 1;
} else {
snprintf(single_char_buf, sizeof(single_char_buf), "\\x%02x", ch);
single_char_buf[sizeof(single_char_buf)-1] = '\0';
single_char_len = strlen(single_char_buf);
}
if (single_char_len > len) {
// See that we don't exceed the buffer, and leave room for \0.
single_char_len = len;
}
bcopy(single_char_buf, buf, single_char_len);
return single_char_len;
}
// Debug only - Returns a printable character pointer for the non null-terminated string
static const u_char *
pm_str_to_debug_charp_ex(const u_char *str, u_int size, BOOL for_csv)
{
static u_char buf[200];
u_int i;
u_char *buf_p;
// Copy the string. But replace unprintable characters (most importantly \0) with underscores.
buf_p = &buf[0];
for (i=0; i<size; i++) {
int remaining_len = buf+sizeof(buf)-buf_p;
if (remaining_len <= 1) break;
buf_p += pm_str_one_char_to_debug_buf(buf_p, remaining_len-1, str[i], for_csv);
}
*buf_p = '\0';
return buf;
}
static const u_char *
pm_str_to_debug_charp(const u_char *str, u_int size)
{
return pm_str_to_debug_charp_ex(str, size, FALSE);
}
// *********************** STRING *******************************
kiss_pmglob_string_s::kiss_pmglob_string_s(const char *buffer, size_t size, int _pattern_id, u_int _flags)
:
kiss_pmglob_string_s(reinterpret_cast<const u_char *>(buffer), size, _pattern_id, _flags)
{
}
kiss_pmglob_string_s::kiss_pmglob_string_s(const u_char *buffer, size_t size, int _pattern_id, u_int _flags)
{
dbgAssert(buffer && size > 0) << "Illegal arguments";
buf.resize(size);
memcpy(buf.data(), buffer, size);
pattern_id = _pattern_id;
flags = _flags;
return;
}
// Returns the pattern of the pattern as u_char*
int
kiss_pmglob_string_get_id(const kiss_pmglob_string_s *pm_string)
{
KISS_ASSERT(pm_string != nullptr, "Illegal arguments");
return pm_string->pattern_id;
}
// Returns the size of the pattern
u_int
kiss_pmglob_string_get_size(const kiss_pmglob_string_s * pm_string)
{
KISS_ASSERT(pm_string != nullptr, "Illegal arguments");
return pm_string->buf.size();
}
// Returns the pattern of the pattern as u_char*
const u_char *
kiss_pmglob_string_get_pattern(const kiss_pmglob_string_s *pm_string)
{
KISS_ASSERT(pm_string != nullptr, "Illegal arguments");
return pm_string->buf.data();
}
// Debug only - Returns a printable character pointer for the string
const u_char *
kiss_pmglob_string_to_debug_charp(const kiss_pmglob_string_s *pm_string)
{
return pm_str_to_debug_charp(kiss_pmglob_string_get_pattern(pm_string), kiss_pmglob_string_get_size(pm_string));
}
u_int
kiss_pmglob_string_get_flags(const kiss_pmglob_string_s *pm_string)
{
KISS_ASSERT(pm_string != nullptr, "Illegal arguments");
return pm_string->flags;
}
SASAL_END

View File

@@ -0,0 +1,74 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __kiss_patterns_h__
#define __kiss_patterns_h__
#include <vector>
#include <list>
#include "pm_adaptor.h"
// kiss_pmglob_string functions
class kiss_pmglob_string_s {
public:
explicit kiss_pmglob_string_s(const char *buffer, size_t size, int _pattern_id, u_int _flags);
explicit kiss_pmglob_string_s(const u_char *buffer, size_t size, int _pattern_id, u_int _flags);
std::vector<u_char> buf;
int pattern_id;
u_int flags;
};
// Returns the size of pattern
//
// Parameters:
// pattern - the pattern.
// Return value:
// int - the size that this pattern represents.
KISS_APPS_CPAPI
u_int kiss_pmglob_string_get_size(const kiss_pmglob_string_s *pattern);
// Returns the pattern of the pattern as u_char*
//
// Parameters:
// patterns - the pattern.
// Return value:
// u_char * - the pattern that this pattern represents.
KISS_APPS_CPAPI
const u_char *kiss_pmglob_string_get_pattern(const kiss_pmglob_string_s *pattern);
// For debugging only - returns a printable pointer for the string.
// Replaces unprintable characters with underscores.
//
// Note: In multithreaded situations, the buffer returned may be overrun by another thread.
// At worst, this would lead to an incorrect string being printed.
KISS_APPS_CPAPI
const u_char *kiss_pmglob_string_to_debug_charp(const kiss_pmglob_string_s *pm_string);
// Returns the id of pattern
//
// Parameters:
// patterns - the pattern.
// Return value:
// id - the pattern_id that this pattern represents.
KISS_APPS_CPAPI
int kiss_pmglob_string_get_id(const kiss_pmglob_string_s *pattern);
KISS_APPS_CPAPI
u_int kiss_pmglob_string_get_flags(const kiss_pmglob_string_s *pattern);
#endif // __kiss_patterns_h__

View File

@@ -0,0 +1,429 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "general_adaptor.h"
#include "sasal.h"
// ********************* INCLUDES **************************
#include "kiss_pm_stats.h"
// ********************* INCLUDES **************************
SASAL_START // Multiple Pattern Matcher
// ********************* FUNCTIONS **************************
// Initialize the common statistics
kiss_ret_val
kiss_pm_stats_common_init(kiss_pm_stats_common new_stats)
{
static const char rname[] = "kiss_pm_stats_common_init";
if (new_stats == NULL) {
kiss_debug_err(K_PM, ("%s: stats is zero\n", rname));
return KISS_ERROR;
}
bzero(new_stats, sizeof(struct kiss_pm_stats_common_s));
#if 0
if (kiss_pm_stats_take_exec_time) {
new_stats->exec_num_cpus = kiss_multik_instance_num;
new_stats->exec = kiss_pmglob_memory_kmalloc_ex(
new_stats->exec_num_cpus * sizeof(struct kiss_pm_stats_dynamic_aligned_s),
rname,
(FW_KMEM_NOSLEEP| FW_KMEM_RETURN_ALIGN_PTR)
);
if (!new_stats->exec) {
kiss_debug_err(K_PM, ("%s: Error in allocating the execution stats\n", rname));
return KISS_ERROR;
}
bzero(new_stats->exec, new_stats->exec_num_cpus*sizeof(struct kiss_pm_stats_dynamic_aligned_s));
}
#endif
return KISS_OK;
}
#define KISS_MULTIK_MAX_INSTANCE_NUM 40
// Free the common statistics
void
kiss_pm_stats_common_free(kiss_pm_stats_common stats)
{
static const char rname[] = "kiss_pm_stats_common_free";
BOOL should_free_stats_exec =
stats &&
stats->exec &&
stats->exec_num_cpus > 0 &&
stats->exec_num_cpus < KISS_MULTIK_MAX_INSTANCE_NUM;
if (should_free_stats_exec) {
kiss_pmglob_memory_kfree(
stats->exec,
stats->exec_num_cpus * sizeof(struct kiss_pm_stats_dynamic_aligned_s),
rname
);
stats->exec = NULL;
}
return;
}
// Update build-time statistics
void
kiss_pm_stats_common_update_compile(kiss_pm_stats_common stats, u_int bytes, u_int compilation_time,
enum kiss_pm_stats_update_compile_type type)
{
KISS_ASSERT_PERF(stats, ("Illegal arguments"));
switch (type) {
case UPDATE_COMPILE_STATS_MEM:
stats->compile.memory_bytes = bytes;
return;
case UPDATE_COMPILE_STATS_TIME:
stats->compile.compilation_time = compilation_time;
return;
case UPDATE_COMPILE_STATS_BOTH:
stats->compile.memory_bytes = bytes;
stats->compile.compilation_time = compilation_time;
return;
}
}
// Will adding to an unsigned variable cause it to wrap around?
#define ADDITION_WOULD_WRAP_AROUND(old_val, delta) \
((old_val) + (delta) < (old_val))
// Reset buffer length statistics, so we can add a buffer without wraparound
static void
handle_buflen_stats_wraparound(struct kiss_pm_stats_dynamic_s *cur_kern_inst_stats)
{
cur_kern_inst_stats->buflen.total = 0;
cur_kern_inst_stats->buflen.sample_num = 0;
}
// Reset execution time statistics, so we can add a sample without wraparound
static void
handle_runtime_stats_wraparound(struct kiss_pm_stats_dynamic_s *cur_kern_inst_stats)
{
cur_kern_inst_stats->runtime.total_exec_time = 0;
cur_kern_inst_stats->runtime.user_cb_exec_time = 0;
cur_kern_inst_stats->runtime.sample_num = 0;
}
// Update run-time statistics
void
kiss_pm_stats_common_update_exec(kiss_pm_stats_common stats, u_int buf_size, u_int num_of_matches)
{
struct kiss_pm_stats_dynamic_s *cur_kern_inst_stats;
KISS_ASSERT_PERF(stats, ("Illegal arguments"));
if(stats->exec) {
ASSERT_LOCKED;
cur_kern_inst_stats = &(stats->exec[kiss_multik_this_instance_num].stats);
// Buffer length statistics
if (ADDITION_WOULD_WRAP_AROUND(cur_kern_inst_stats->buflen.total, buf_size)) {
handle_buflen_stats_wraparound(cur_kern_inst_stats);
}
cur_kern_inst_stats->buflen.total += buf_size;
cur_kern_inst_stats->buflen.sample_num++;
if (buf_size > cur_kern_inst_stats->buflen.max) {
cur_kern_inst_stats->buflen.max = buf_size;
}
// General statistics
cur_kern_inst_stats->num_of_buffs++;
cur_kern_inst_stats->num_of_matches += num_of_matches;
if (num_of_matches > cur_kern_inst_stats->max_matches_on_buf) {
cur_kern_inst_stats->max_matches_on_buf = num_of_matches;
}
}
return;
}
// Update run-time (execution) statistics
void
kiss_pm_stats_common_update_exec_time(kiss_pm_stats_common stats, u_int exec_time, u_int user_cb_time)
{
struct kiss_pm_stats_dynamic_s *cur_kern_inst_stats;
if(stats && stats->exec) {
ASSERT_LOCKED;
cur_kern_inst_stats = &(stats->exec[kiss_multik_this_instance_num].stats);
// The execution time includes the callback, but we want the net time.
exec_time -= user_cb_time;
// take care of wrap around
if (ADDITION_WOULD_WRAP_AROUND(cur_kern_inst_stats->runtime.total_exec_time, exec_time) ||
ADDITION_WOULD_WRAP_AROUND(cur_kern_inst_stats->runtime.user_cb_exec_time, user_cb_time)) {
handle_runtime_stats_wraparound(cur_kern_inst_stats);
}
cur_kern_inst_stats->runtime.total_exec_time += exec_time;
cur_kern_inst_stats->runtime.user_cb_exec_time += user_cb_time;
cur_kern_inst_stats->runtime.sample_num++;
// Updating the max values
if (exec_time > cur_kern_inst_stats->runtime.max_exec_time){
cur_kern_inst_stats->runtime.max_exec_time = exec_time;
}
if (user_cb_time > cur_kern_inst_stats->runtime.user_cb_max_time){
cur_kern_inst_stats->runtime.user_cb_max_time = user_cb_time;
}
}
return;
}
// Clear all runtime statistics
void
kiss_pm_stats_common_reset_exec(kiss_pm_stats_common stats)
{
u_int i;
if(stats && stats->exec) {
for (i = 0; i < stats->exec_num_cpus; i++) {
struct kiss_pm_stats_dynamic_s *cur_cpu_stats;
cur_cpu_stats = &(stats->exec[i].stats);
bzero(cur_cpu_stats, sizeof(*cur_cpu_stats));
}
}
}
// Aggregate the run-time statistics from all cpus in src to dst
static void
kiss_pm_stats_common_aggregate_cpus(struct kiss_pm_stats_dynamic_s *dst, const struct kiss_pm_stats_common_s *src)
{
u_int i;
KISS_ASSERT_PERF(src, ("Illegal arguments"));
if(src && src->exec)
{
for (i = 0; i < src->exec_num_cpus; i++) {
struct kiss_pm_stats_dynamic_s *cur_cpu_src = &(src->exec[i].stats);
// Buffer length statistics - add and avoid wrap-around
if (ADDITION_WOULD_WRAP_AROUND(dst->buflen.total, cur_cpu_src->buflen.total)) {
handle_buflen_stats_wraparound(dst);
}
dst->buflen.total += cur_cpu_src->buflen.total;
dst->buflen.sample_num += cur_cpu_src->buflen.sample_num;
dst->buflen.max = MAX(dst->buflen.max, cur_cpu_src->buflen.max);
// General statistics
dst->num_of_matches += cur_cpu_src->num_of_matches;
dst->num_of_stage1_matches += cur_cpu_src->num_of_stage1_matches;
dst->num_of_stage22_matches += cur_cpu_src->num_of_stage22_matches;
dst->num_of_stage23_matches += cur_cpu_src->num_of_stage23_matches;
dst->num_of_buffs += cur_cpu_src->num_of_buffs;
if (dst->max_matches_on_buf < cur_cpu_src->max_matches_on_buf) {
dst->max_matches_on_buf = cur_cpu_src->max_matches_on_buf;
}
// Execution time statistics - add and avoid wrap-around
if (ADDITION_WOULD_WRAP_AROUND(dst->runtime.total_exec_time, cur_cpu_src->runtime.total_exec_time) ||
ADDITION_WOULD_WRAP_AROUND(dst->runtime.user_cb_exec_time, cur_cpu_src->runtime.user_cb_exec_time)) {
handle_runtime_stats_wraparound(dst);
}
dst->runtime.total_exec_time += cur_cpu_src->runtime.total_exec_time;
dst->runtime.user_cb_exec_time += cur_cpu_src->runtime.user_cb_exec_time;
dst->runtime.sample_num += cur_cpu_src->runtime.sample_num;
dst->runtime.max_exec_time = MAX(dst->runtime.max_exec_time, cur_cpu_src->runtime.max_exec_time);
dst->runtime.user_cb_max_time = MAX(dst->runtime.user_cb_max_time, cur_cpu_src->runtime.user_cb_max_time);
}
}
return;
}
#define TOTAL_MICORSEC_TO_AVG_NSEC(total, samples) \
((samples)==0 ? 0 : (u_int)((u_int64)(total) * 1000 / (u_int64)(samples)))
// Print the common statistics
void
kiss_pm_stats_common_print(
kiss_pm_stats_common stats,
enum kiss_pm_stats_type type,
enum kiss_pm_stats_format format,
BOOL print_headline
)
{
struct kiss_pm_stats_dynamic_s dynamic_stats;
KISS_ASSERT_PERF((stats && !print_headline) || print_headline, ("Illegal arguments"));
if (type != KISS_PM_DYNAMIC_STATS) {
if (format == KISS_PM_TEXT_FORMAT_STATS) {
kdprintf("Memory comsumption for this handle is %u bytes\n", stats->compile.memory_bytes);
kdprintf("Compilation time for this handle is %u microseconds\n", stats->compile.compilation_time);
} else if (format == KISS_PM_CSV_FORMAT_STATS) {
if (print_headline) {
kdprintf("Memory consumption;Compilation time (microsec);");
} else {
kdprintf("%u;%u;", stats->compile.memory_bytes, stats->compile.compilation_time);
}
}
}
if (!print_headline) {
bzero(&dynamic_stats, sizeof(struct kiss_pm_stats_dynamic_s ));
kiss_pm_stats_common_aggregate_cpus(&dynamic_stats, stats);
}
if (type != KISS_PM_STATIC_STATS) {
if (format == KISS_PM_TEXT_FORMAT_STATS) {
kdprintf("Number of executed buffers is %u\n", dynamic_stats.num_of_buffs);
kdprintf("Max buffer length is %u\n", dynamic_stats.buflen.max);
kdprintf("Avg buffer length is %u\n",
dynamic_stats.buflen.sample_num ? (dynamic_stats.buflen.total/dynamic_stats.buflen.sample_num) : 0);
kdprintf("Number of matches is %u\n", dynamic_stats.num_of_matches);
kdprintf("Number of matches after stage1 is %u\n", dynamic_stats.num_of_stage1_matches);
kdprintf("Number of matches after start-anchor is %u\n", dynamic_stats.num_of_stage22_matches);
kdprintf("Number of matches after end-anchor is %u\n", dynamic_stats.num_of_stage23_matches);
kdprintf("Max number of matches on buffer is %u\n", dynamic_stats.max_matches_on_buf);
// Average execution time - display in nanosecond so rounding down won't lose too much
kdprintf("Avg execution time is %u ns for PM, %u ns for callbacks\n",
TOTAL_MICORSEC_TO_AVG_NSEC(dynamic_stats.runtime.total_exec_time, dynamic_stats.runtime.sample_num),
TOTAL_MICORSEC_TO_AVG_NSEC(dynamic_stats.runtime.user_cb_exec_time, dynamic_stats.runtime.sample_num));
// Maximum execution time - display in nanosecond for consistency with average.
// concatenate 000 instead of multiplying,
// to avoid overflow (in very extreme, yet very interesting, cases).
kdprintf("Max execution time is %u000 ns for PM, %u000 ns for callbacks\n",
dynamic_stats.runtime.max_exec_time, dynamic_stats.runtime.user_cb_max_time);
} else if (format == KISS_PM_CSV_FORMAT_STATS) {
if (print_headline) {
kdprintf(
"Executed buffers #;"
"Max buffer length;"
"Avg buffer length;"
"Matches #;"
"Max matches on buffer;"
"stage1 matches #;"
"2nd filter matches #;"
"3rd filter matches #;"
"Avg PM exec time (ns);"
"Max PM exec time (ns);"
"Avg callback exec time (ns);"
"Max callback exec time (ns)"
);
} else {
kdprintf("%u;%u;%u;%u;%u;%u;%u;%u;%u;%u000;%u;%u000",
dynamic_stats.num_of_buffs,
dynamic_stats.buflen.max,
dynamic_stats.buflen.sample_num ? (dynamic_stats.buflen.total/dynamic_stats.buflen.sample_num) : 0,
dynamic_stats.num_of_matches,
dynamic_stats.max_matches_on_buf,
dynamic_stats.num_of_stage1_matches,
dynamic_stats.num_of_stage22_matches,
dynamic_stats.num_of_stage23_matches,
TOTAL_MICORSEC_TO_AVG_NSEC(
dynamic_stats.runtime.total_exec_time,
dynamic_stats.runtime.sample_num
),
dynamic_stats.runtime.max_exec_time,
TOTAL_MICORSEC_TO_AVG_NSEC(
dynamic_stats.runtime.user_cb_exec_time,
dynamic_stats.runtime.sample_num
),
dynamic_stats.runtime.user_cb_max_time
);
}
}
}
return;
}
#define kiss_pm_serialize_during_sanity_check 0
// Return the statistics from src in dst (aggregate statistics from all cpus)
kiss_ret_val
kiss_pm_stats_common_get(struct kiss_pm_stats_static_s *dst_compile,
struct kiss_pm_stats_dynamic_s *dst_exec,
const struct kiss_pm_stats_common_s *src)
{
KISS_ASSERT_PERF((dst_compile && dst_exec && src), ("Illegal arguments"));
if (!(dst_compile && dst_exec && src)) {
return KISS_ERROR;
}
bzero(dst_compile, sizeof(struct kiss_pm_stats_static_s));
bzero(dst_exec, sizeof(struct kiss_pm_stats_dynamic_s));
bcopy(&(src->compile), dst_compile, sizeof(struct kiss_pm_stats_static_s));
kiss_pm_stats_common_aggregate_cpus(dst_exec, src);
// for debug purposes only!
// ignore specific statistics fields when performing a sanity check on serialization
if (kiss_pm_serialize_during_sanity_check) {
dst_compile->memory_bytes = KISS_PM_SERIALIZE_IGNORE_INT;
dst_compile->compilation_time = KISS_PM_SERIALIZE_IGNORE_INT;
}
return KISS_OK;
}
// Copy the statistics from src to dst
kiss_ret_val
kiss_pm_stats_common_copy(kiss_pm_stats_common dst, const struct kiss_pm_stats_common_s *src)
{
if(src && src->exec) {
u_int num_cpus = MIN(src->exec_num_cpus, dst->exec_num_cpus);
KISS_ASSERT_PERF((dst && src), ("Illegal arguments"));
if (!(dst && src)) {
return KISS_ERROR;
}
bcopy(&(src->compile), &(dst->compile), sizeof(struct kiss_pm_stats_static_s));
bcopy(src->exec, dst->exec, num_cpus*sizeof(struct kiss_pm_stats_dynamic_aligned_s));
}
return KISS_OK;
}
// Get size of serialized common statistics. Only build-time statistics are counted
u_int
kiss_pm_stats_common_get_serialize_size()
{
return sizeof(struct kiss_pm_stats_static_s);
}
// Serialize common statistics. Only build-time statistics are serialized
kiss_ret_val
kiss_pm_stats_common_serialize(const struct kiss_pm_stats_common_s *stats, u_char **buf, u_int *size)
{
KISS_ASSERT_PERF((stats), ("Illegal arguments"));
DATA_BUFF_COPY(*buf, size, &(stats->compile), sizeof(struct kiss_pm_stats_static_s));
return KISS_OK;
}
// Deserialize common statistics. Only build-time statistics are deserialized
kiss_ret_val
kiss_pm_stats_common_deserialize(
kiss_pm_stats_common stats,
u_char **buf, u_int *size,
CP_MAYBE_UNUSED kiss_vbuf vbuf,
CP_MAYBE_UNUSED kiss_vbuf_iter *vbuf_iter
)
{
KISS_ASSERT_PERF((stats), ("Illegal arguments"));
DATA_BUFF_READ(*buf, size, vbuf, *vbuf_iter, &(stats->compile), sizeof(struct kiss_pm_stats_static_s));
return KISS_OK;
}
// ******************** FUNCTIONS *************************
SASAL_END

View File

@@ -0,0 +1,146 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __kiss_pm_stats_h__
#define __kiss_pm_stats_h__
#include "pm_adaptor.h"
// Common statistics
// Common run time statistics
struct kiss_pm_stats_dynamic_s {
u_int num_of_buffs; // Number of buffers we ran this dfa on
u_int num_of_matches; // how many matches there were in this dfa
u_int max_matches_on_buf; // Maximal number of matches per one buf
struct { // Buffer length statistics
u_int max; // Maximum buffer length
u_int total; // Total length (for average calculation)
u_int sample_num; // Number of buffers, whose lengths make up total.
} buflen;
struct { // Execution time statistics - not collected by default
u_int total_exec_time; // PM Execution time (not including user callbacks)
u_int max_exec_time; // Maximal PM execution time
u_int user_cb_exec_time; // User callback execution time
u_int user_cb_max_time; // Maximal user callback execution time
u_int sample_num; // Number of execution time samples
} runtime;
u_int num_of_stage1_matches; // Tier1 LSS matches, before filtering by mask
u_int num_of_stage22_matches; // Tier1 matches after ^
u_int num_of_stage23_matches; // Tier1 matches after $
};
// Common build time statistics
struct kiss_pm_stats_static_s {
u_int memory_bytes; // How many bytes does this tier consume
u_int compilation_time; // Compilation time of this tier in micro-seconds
};
struct CP_CACHELINE_ALIGNED kiss_pm_stats_dynamic_aligned_s {
struct kiss_pm_stats_dynamic_s stats;
};
struct kiss_pm_stats_common_s {
// Run time statistics, per-CPU, dynamically allocated
struct kiss_pm_stats_dynamic_aligned_s* exec;
// Size of the exec array
u_int exec_num_cpus;
// Build time statistics
struct kiss_pm_stats_static_s compile;
};
typedef struct kiss_pm_stats_common_s *kiss_pm_stats_common;
enum kiss_pm_stats_update_compile_type {
UPDATE_COMPILE_STATS_MEM,
UPDATE_COMPILE_STATS_TIME,
UPDATE_COMPILE_STATS_BOTH
};
// In which format the statistics should be printed
enum kiss_pm_stats_format {
KISS_PM_TEXT_FORMAT_STATS = 0, // Textual, for viewing with text editor
KISS_PM_CSV_FORMAT_STATS // CSV, for opening with Excel
};
KISS_APPS_CPAPI
kiss_ret_val kiss_pm_stats_common_init(kiss_pm_stats_common new_stats);
KISS_APPS_CPAPI
void kiss_pm_stats_common_free(kiss_pm_stats_common stats);
KISS_APPS_CPAPI
void kiss_pm_stats_common_update_compile(
kiss_pm_stats_common stats,
u_int bytes,
u_int compilation_time,
enum kiss_pm_stats_update_compile_type type);
KISS_APPS_CPAPI
void kiss_pm_stats_common_update_exec(kiss_pm_stats_common stats, u_int buf_size, u_int num_of_matches);
// @brief
// Updating the execution time of an execution of a buffer in tier2.
//
// @param stats - [in] The tier2 common stats.
// @param exec_time - [in] The execution time.
// @param buf_len - [in] the length of the last buffer that was executed
//
// @return Void
//
// @note
// in case one of the stats vars will warp-around, the aggregated vars will hold only the last exec stats.
KISS_APPS_CPAPI
void kiss_pm_stats_common_update_exec_time(kiss_pm_stats_common stats, u_int exec_time, u_int user_cb_time);
KISS_APPS_CPAPI
void kiss_pm_stats_common_reset_exec(kiss_pm_stats_common stats);
KISS_APPS_CPAPI
void kiss_pm_stats_common_print(
kiss_pm_stats_common stats,
enum kiss_pm_stats_type type,
enum kiss_pm_stats_format format,
BOOL print_headline
);
KISS_APPS_CPAPI
kiss_ret_val kiss_pm_stats_common_get(
struct kiss_pm_stats_static_s *dst_compile,
struct kiss_pm_stats_dynamic_s *dst_exec,
const struct kiss_pm_stats_common_s *src
);
KISS_APPS_CPAPI
kiss_ret_val kiss_pm_stats_common_copy(kiss_pm_stats_common dst, const struct kiss_pm_stats_common_s *src);
KISS_APPS_CPAPI
u_int kiss_pm_stats_common_get_serialize_size(void);
KISS_APPS_CPAPI
kiss_ret_val kiss_pm_stats_common_serialize(const struct kiss_pm_stats_common_s *stats, u_char **buf, u_int *size);
KISS_APPS_CPAPI
kiss_ret_val kiss_pm_stats_common_deserialize(
kiss_pm_stats_common stats,
u_char **buf,
u_int *size,
kiss_vbuf vbuf,
kiss_vbuf_iter *vbuf_iter
);
#endif // __kiss_pm_stats_h__

View File

@@ -0,0 +1,462 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Thin NFA I/S
// ------------
// The thin NFA allows building and executing an automaton for string search, using the
// Aho-Corasick algorithm.
// The resulting automaton is built in a compact representation. Some states are "full" - they
// have an explicit transition per character. Others are "partial" - they have some explicit transitions,
// plus a "default transition". This is an epsilon-transition. For characters which don't have an
// explicit transition, we follow the default transition, and look up the same character there.
//
// Source files
// ------------
// kiss_thin_nfa.c (this file) - execution code.
// kiss_thin_nfa_build.c - allocation and destruction code. Contains code which is common to compilation
// and serialization/deserialization. All objects which are part of the comipled automaton are created here.
// kiss_thin_nfa_compile.c - compilation code. Contains the logic that converts a set of strings into an automaton.
// kiss_thin_nfa_analyze.c - Validation and dump. Code that reads the BNFA and tries to make sense of it.
// kiss_thin_nfa_impl.h - internal header file. APIs and definitions between the different source files.
// ********************* INCLUDES **************************
#include "kiss_thin_nfa_impl.h"
#include "sasal.h"
SASAL_START // Multiple Pattern Matcher
// Internal execution flags passed to kiss_dfa_exec_one_buf:
#define KISS_PM_EXEC_LAST_BUFF 0x00000001 // This is the last buffer (preset buffer or the last buffer in vbuf)
// The runtime status of the Thin NFA
struct kiss_bnfa_runtime_s {
KissThinNFA *nfa_h; // The NFA we're executing
kiss_bnfa_comp_offset_t last_bnfa_offset; // Last state reached by exec_one_buf
std::vector<std::pair<uint, uint>> *matches; // The matches we've found so far
u_int scanned_so_far; // The length of all buffers before the current buffer
};
// Critical code path debugging - enabled only in debug mode.
#define THIN_NFA_TRACE_TRANS(runtime, next_off, ch, op) \
thinnfa_debug_perf( \
"%s: Transition by 0x%02x to %d - %s\n", \
FILE_LINE, \
ch, \
kiss_bnfa_offset_decompress(next_off), \
op \
)
#define TRANSLATE_CHAR_IF_NEEED(do_char_trans, char_trans_table, ch) \
((u_char)((do_char_trans) ? ((char_trans_table)[ch]) : (ch)))
// Given a match for a pattern at a given position, insert an entry to the match list.
// We may add more than one entry, depending on the number of matching patterns.
//
// Parameters:
// runtime - the current status of Thin NFA execution.
// one_buf_offset - the offset of the match, within the buffer currently scanned.
// Together with runtime->scanned_so_far we can get the real match offset.
// one_buf_len - the length of the buffer currently scanned. Used for $ processing.
// exec_flags - the flags used.
static CP_INLINE void
kiss_thin_nfa_handle_match(struct kiss_bnfa_runtime_s *runtime, u_int pat_arr_offset,
u_int one_buf_offset, u_int one_buf_len, u_int exec_flags)
{
static const char rname[] = "kiss_thin_nfa_handle_match";
u_int match_pos;
const kiss_thin_nfa_pattern_array_t *pat_arr;
const kiss_thin_nfa_pattern_t *curr_id;
const kiss_thin_nfa_pattern_t *pat_end;
// Where was the match? one_buf_offset is already moved beyond the characeter that caused the match,
// so we subtract one to get this character's offset.
match_pos = runtime->scanned_so_far + (one_buf_offset - 1);
pat_arr = kiss_thin_nfa_offset_to_pat_array_ptr(runtime->nfa_h, pat_arr_offset);
// Go over the patterns and add them to the match queue.
pat_end = &(pat_arr->pattern[pat_arr->n_patterns]);
thinnfa_debug_perf((
"%s: Going over %u patterns, starting from offset %u\n",
rname,
pat_arr->n_patterns,
pat_arr_offset
));
for (curr_id = &(pat_arr->pattern[0]); curr_id != pat_end; curr_id++) {
thinnfa_debug(("%s: Match for pattern ID %d at %d len %d\n", rname, curr_id->id, match_pos, curr_id->len));
// Handle ^ - An N byte pattern at the start of the buffer would match at byte N-1.
// NOTE: If the anchored state optimization is implemented in compilation, this test isn't needed.
if ((curr_id->pattern_id_flags & KISS_PM_LSS_AT_BUF_START) && (match_pos != curr_id->len - 1)) {
thinnfa_debug_perf(("%s: Not match because of ^ %d\n", rname, curr_id->id));
continue;
}
// Handle $ - We must match at the buffer end, and it must be the last buffer
if ((curr_id->pattern_id_flags & KISS_PM_LSS_AT_BUF_END) &&
!((one_buf_offset == one_buf_len) && (exec_flags & KISS_PM_EXEC_LAST_BUFF))) {
thinnfa_debug_perf(("%s: Not match because of $ %d\n", rname, curr_id->id));
continue;
}
runtime->matches->emplace_back(curr_id->id, match_pos);
}
return;
}
// Wrapper to kiss_thin_nfa_handle_match, gets the state offset, not the ID.
static CP_INLINE void
kiss_thin_nfa_handle_match_state(struct kiss_bnfa_runtime_s *runtime, kiss_bnfa_comp_offset_t cur_offset,
u_int one_buf_offset, u_int one_buf_len, u_int exec_flags)
{
const kiss_bnfa_state_t *state = kiss_bnfa_comp_offset_to_state(
runtime->nfa_h->bnfa,
cur_offset,
KISS_BNFA_STATE_MATCH
);
kiss_thin_nfa_handle_match(runtime, state->match.match_id, one_buf_offset, one_buf_len, exec_flags);
}
// Calculate the next state's offset, given a state and character. Good for full states only.
// Faster than kiss_thin_nfa_get_next_offset. An offset peremeter is compressed 16-bit offset
// The returned offset is also compressed
static CP_INLINE kiss_bnfa_comp_offset_t
kiss_thin_nfa_get_next_offset_full(const kiss_bnfa_state_t *bnfa, kiss_bnfa_comp_offset_t offset,
unsigned char char_to_find)
{
const kiss_bnfa_state_t *state = kiss_bnfa_comp_offset_to_state(bnfa, offset, KISS_BNFA_STATE_FULL);
return (kiss_bnfa_comp_offset_t)state->full.transitions[char_to_find];
}
// Calculate the next state's offset, given a state and character. Good for partial states only.
// Also indicates whether the buffer position should be incremented (i.e. if an explicit transition was found)
static CP_INLINE kiss_bnfa_comp_offset_t
kiss_thin_nfa_get_next_offset_partial(const kiss_bnfa_state_t *bnfa, kiss_bnfa_comp_offset_t offset,
unsigned char char_to_find, BOOL *inc_pos)
{
const kiss_bnfa_state_t *state = kiss_bnfa_comp_offset_to_state(bnfa, offset, KISS_BNFA_STATE_PARTIAL);
u_int trans_num = state->partial.trans_num;
u_int i;
// Simple linear search is fast for a few transitions. If we have many, we use a full state anyway.
for (i = 0; i < trans_num; i++) {
const struct kiss_bnfa_partial_transition_s *tran = &state->partial.transitions[i];
// Smaller? Keep looking. Larger? Give up (transitions are sorted).
if (tran->tran_char < char_to_find) continue;
if (tran->tran_char > char_to_find) break;
// Found the character (explicit transition) - consume a characeter and move the automaton
*inc_pos = TRUE;
return tran->next_state_offset;
}
// No explicit transition found - move to the fail state, without consuming a character.
*inc_pos = FALSE;
return state->partial.fail_state_offset;
}
// Calculate the next state's offset, when the current is a match state.
// Doesn't consume a character (epsilon transition)
static CP_INLINE kiss_bnfa_comp_offset_t
kiss_thin_nfa_get_next_offset_match(CP_MAYBE_UNUSED const kiss_bnfa_state_t *bnfa, kiss_bnfa_comp_offset_t offset)
{
// After a match state we just move to the next consecutive state.
return offset + (sizeof(kiss_bnfa_match_state_t) / KISS_BNFA_STATE_ALIGNMENT);
}
#define PARALLEL_SCANS_NUM 4 // 4 heads scanning the buffer
#define UNROLL_FACTOR 4 // Advance each head 4 bytes per loop
// Move one head of the state machine. bnfa_offset must not be a match state.
static CP_INLINE kiss_bnfa_comp_offset_t
parallel_scan_advance_one(const kiss_bnfa_state_t *bnfa, kiss_bnfa_comp_offset_t bnfa_offset, const unsigned char ch)
{
while (bnfa_offset >= 0) {
BOOL inc_pos;
// Partial state - Look for an explicit transition, or use the fail state
bnfa_offset = kiss_thin_nfa_get_next_offset_partial(bnfa, bnfa_offset, ch, &inc_pos);
if (inc_pos) {
// Found an explicit transition - can move to the next state.
return bnfa_offset;
}
}
// Full state (either we started with full, or the fail state chain reached one)
return kiss_thin_nfa_get_next_offset_full(bnfa, bnfa_offset, ch);
}
// Check if all heads are on a full state.
// If they are - advance all heads and return TRUE.
// If they aren't - do nothing and return FALSE.
static CP_INLINE BOOL
parallel_scan_advance_if_full(
const kiss_bnfa_state_t *bnfa,
kiss_bnfa_comp_offset_t *bnfa_offsets,
const unsigned char **buf_pos
)
{
kiss_bnfa_comp_offset_t offsets_and;
// If the bitwise AND of 4 offsets (PARALLEL_SCANS_NUM) is negative, they're all negaitve, so all states are full.
offsets_and = bnfa_offsets[0] & bnfa_offsets[1] & bnfa_offsets[2] & bnfa_offsets[3];
if (CP_UNLIKELY(offsets_and >= 0)) return FALSE;
// All states are full - make 4 transitions (PARALLEL_SCANS_NUM).
bnfa_offsets[0] = kiss_thin_nfa_get_next_offset_full(bnfa, bnfa_offsets[0], *(buf_pos[0]));
buf_pos[0]++;
bnfa_offsets[1] = kiss_thin_nfa_get_next_offset_full(bnfa, bnfa_offsets[1], *(buf_pos[1]));
buf_pos[1]++;
bnfa_offsets[2] = kiss_thin_nfa_get_next_offset_full(bnfa, bnfa_offsets[2], *(buf_pos[2]));
buf_pos[2]++;
bnfa_offsets[3] = kiss_thin_nfa_get_next_offset_full(bnfa, bnfa_offsets[3], *(buf_pos[3]));
buf_pos[3]++;
return TRUE;
}
// Repeat parallel_scan_advance_if_full up to 4 times (UNROLL_FACTOR).
// Retrurn TRUE if all 4 were done, FALSE if stopped earlier.
static CP_INLINE BOOL
parallel_scan_advance_if_full_unroll(
const kiss_bnfa_state_t *bnfa,
kiss_bnfa_comp_offset_t *bnfa_offsets,
const unsigned char **buf_pos
)
{
if (!parallel_scan_advance_if_full(bnfa, bnfa_offsets, buf_pos)) return FALSE;
if (!parallel_scan_advance_if_full(bnfa, bnfa_offsets, buf_pos)) return FALSE;
if (!parallel_scan_advance_if_full(bnfa, bnfa_offsets, buf_pos)) return FALSE;
if (!parallel_scan_advance_if_full(bnfa, bnfa_offsets, buf_pos)) return FALSE;
return TRUE;
}
// Find the offset where each head should start and stop
static void
calc_head_buf_range(const u_char *buffer, u_int len, const u_char **head_start_pos, const u_char **head_end_pos)
{
static const char rname[] = "calc_head_buf_range";
const u_char *orig_buf = buffer;
u_int len_per_head = len / PARALLEL_SCANS_NUM;
u_int rem = len % PARALLEL_SCANS_NUM;
u_int i;
for (i=0; i<PARALLEL_SCANS_NUM; i++) {
u_int head_len = len_per_head;
// Give each head its share, late heads get a part of the remainder.
// The "Handle remainders" loop below assumes the last head has the largest part.
if (i >= PARALLEL_SCANS_NUM-rem) head_len++;
head_start_pos[i] = buffer;
buffer += head_len;
head_end_pos[i] = buffer;
thinnfa_debug(("%s: Head %u gets range %ld:%ld\n", rname,
i, head_start_pos[i]-orig_buf, head_end_pos[i]-orig_buf));
}
}
// Set the initial BNFA offset for each head
static void
set_head_bnfa_offset(
struct kiss_bnfa_runtime_s *runtime,
kiss_bnfa_comp_offset_t *bnfa_pos,
const u_char **buf_pos,
const u_char *buffer
)
{
const KissThinNFA *nfa_h = runtime->nfa_h;
kiss_bnfa_comp_offset_t init_off = kiss_bnfa_offset_compress(nfa_h->min_bnfa_offset);
u_int i;
if (nfa_h->flags & KISS_THIN_NFA_HAS_ANCHOR) {
// Start from the root (next full state after the anchored root)
init_off++;
}
// Heads that scan from the beginning of the buffer, will start at previous buffer's ending state.
// The rest start anew.
// Several scanning heads will start at buffer's beginning when buffer's size is less than PARALLEL_SCANS_NUM
for (i=0; i<PARALLEL_SCANS_NUM; i++) {
if (buf_pos[i] - buffer == 0) {
bnfa_pos[i] = runtime->last_bnfa_offset;
} else {
bnfa_pos[i] = init_off;
}
}
}
// Run Thin NFA parallely on a single buffer.
static CP_INLINE void
kiss_thin_nfa_exec_one_buf_parallel_ex(
struct kiss_bnfa_runtime_s *runtime,
const u_char *buffer,
u_int len, u_int flags,
BOOL do_char_trans,
u_char *char_trans_table
)
{
const kiss_bnfa_state_t *bnfa = runtime->nfa_h->bnfa;
const unsigned char *end, *buf_pos[PARALLEL_SCANS_NUM], *head_end_pos[PARALLEL_SCANS_NUM];
kiss_bnfa_comp_offset_t bnfa_offset[PARALLEL_SCANS_NUM];
u_int i;
u_int overlap_bytes;
int overlap_head_mask;
// set starting position, ending position and state for each scanning head
calc_head_buf_range(buffer, len, buf_pos, head_end_pos);
set_head_bnfa_offset(runtime, bnfa_offset, buf_pos, buffer);
end = buffer + len;
// unroll 16 (PARALLEL_SCANS_NUM * UNROLL_FACTOR) times, while we have at least 4 input bytes to process.
while (buf_pos[PARALLEL_SCANS_NUM-1] + UNROLL_FACTOR <= end) {
// Fastpath - Advance all heads up to 4 chars, as long as they're all on a full state.
if (CP_LIKELY(parallel_scan_advance_if_full_unroll(bnfa, bnfa_offset, buf_pos))) continue;
// At least one head is on partial or match - advance all 4 by their type.
for (i=0; i<PARALLEL_SCANS_NUM; i++) {
if (bnfa_offset[i] < 0) {
// Semi-fastpath. When we reach this loop, normally 3 of 4 heads are on a full state.
bnfa_offset[i] = kiss_thin_nfa_get_next_offset_full(bnfa, bnfa_offset[i], *(buf_pos[i]));
(buf_pos[i])++;
continue;
}
if (kiss_bnfa_state_type(bnfa, bnfa_offset[i]) == KISS_BNFA_STATE_MATCH) {
// Handle a match
kiss_thin_nfa_handle_match_state(runtime, bnfa_offset[i], (u_int)(buf_pos[i] - buffer), len, flags);
bnfa_offset[i] = kiss_thin_nfa_get_next_offset_match(bnfa, bnfa_offset[i]);
}
// Advance to the next state
bnfa_offset[i] = parallel_scan_advance_one(bnfa, bnfa_offset[i],
TRANSLATE_CHAR_IF_NEEED(do_char_trans, char_trans_table, *(buf_pos[i])));
(buf_pos[i])++;
}
}
// Handle remainders (the above loop jumps 4 chars at a time, so it may leave up to 3 unscanned)
while (buf_pos[PARALLEL_SCANS_NUM-1] < end) {
// Advance only heads that haven't reached their end position
for (i=0; i<PARALLEL_SCANS_NUM; i++) {
if (buf_pos[i] >= head_end_pos[i]) continue;
if (kiss_bnfa_state_type(bnfa, bnfa_offset[i]) == KISS_BNFA_STATE_MATCH) {
// Handle a match
kiss_thin_nfa_handle_match_state(runtime, bnfa_offset[i], (u_int)(buf_pos[i] - buffer), len, flags);
bnfa_offset[i] = kiss_thin_nfa_get_next_offset_match(bnfa, bnfa_offset[i]);
}
// Advance to the next state
bnfa_offset[i] = parallel_scan_advance_one(bnfa, bnfa_offset[i],
TRANSLATE_CHAR_IF_NEEED(do_char_trans, char_trans_table, *(buf_pos[i])));
(buf_pos[i])++;
}
}
// Handle overlap - advance all heads into the next head's range, as long as there's a chance
// for a match which started in this head's range.
overlap_head_mask = (1<<(PARALLEL_SCANS_NUM-1))-1; // All heads except the last
for (overlap_bytes = 0; overlap_head_mask!=0; overlap_bytes++) {
// Advance each head (except the last) as long as overlap is needed for it
for (i=0; i<PARALLEL_SCANS_NUM-1; i++) {
int my_mask = (1<<i);
u_int state_depth;
// Did we stop this head's overlap already?
if (!(overlap_head_mask & my_mask)) continue;
// Stop the overlap if the state is not as deep as the overlap, or the buffer ended.
state_depth = kiss_bnfa_offset_to_depth(runtime->nfa_h, bnfa_offset[i]);
if ((state_depth <= overlap_bytes) || (buf_pos[i] >= end)) {
overlap_head_mask &= ~my_mask;
continue;
}
// Advance the state machine, including match handling
if (kiss_bnfa_state_type(bnfa, bnfa_offset[i]) == KISS_BNFA_STATE_MATCH) {
// Handle a match
kiss_thin_nfa_handle_match_state(runtime, bnfa_offset[i], (u_int)(buf_pos[i] - buffer), len, flags);
bnfa_offset[i] = kiss_thin_nfa_get_next_offset_match(bnfa, bnfa_offset[i]);
}
// Advance to the next state
bnfa_offset[i] = parallel_scan_advance_one(bnfa, bnfa_offset[i],
TRANSLATE_CHAR_IF_NEEED(do_char_trans, char_trans_table, *(buf_pos[i])));
(buf_pos[i])++;
}
}
// We may have stopped on a match state. If so - handle and advance
for (i=0; i<PARALLEL_SCANS_NUM; i++) {
if (kiss_bnfa_state_type(bnfa, bnfa_offset[i]) == KISS_BNFA_STATE_MATCH) {
// Handle a match
kiss_thin_nfa_handle_match_state(runtime, bnfa_offset[i], (u_int)(buf_pos[i] - buffer), len, flags);
bnfa_offset[i] = kiss_thin_nfa_get_next_offset_match(bnfa, bnfa_offset[i]);
}
}
// The next scan should start at the state where the current scan ended.
// If multiple heads reached the buffer end, use the one with the lowest index,
// because it has covered more data than other heads that reached the buffer end.
for (i=0; i<PARALLEL_SCANS_NUM; i++) {
if (buf_pos[i] == buf_pos[PARALLEL_SCANS_NUM-1]) {
runtime->last_bnfa_offset = bnfa_offset[i];
break;
}
}
return;
}
// Execute a thin NFA on a buffer.
// Parameters:
// nfa_h - the NFA handle
// buf - a buffer to scan.
// matches - output - will be filled with a kiss_pmglob_match_data element for each match.
void
kiss_thin_nfa_exec(KissThinNFA *nfa_h, const Buffer& buf, std::vector<std::pair<uint, uint>> &matches)
{
struct kiss_bnfa_runtime_s bnfa_runtime;
dbgAssert(nfa_h != nullptr) << "kiss_thin_nfa_exec() was called with null handle";
if (buf.size() == 0) {
return;
}
// Set the runtime status structure
bnfa_runtime.nfa_h = nfa_h;
bnfa_runtime.last_bnfa_offset = kiss_bnfa_offset_compress(nfa_h->min_bnfa_offset); // The initial state
bnfa_runtime.matches = &matches;
bnfa_runtime.scanned_so_far = 0;
auto segments = buf.segRange();
for( auto iter = segments.begin(); iter != segments.end(); iter++ ) {
const u_char * data = iter->data();
u_int len = iter->size();
u_int flags = ((iter+1)==segments.end()) ? KISS_PM_EXEC_LAST_BUFF : 0;
if (nfa_h->flags & KISS_THIN_NFA_USE_CHAR_XLATION) {
kiss_thin_nfa_exec_one_buf_parallel_ex(&bnfa_runtime, data, len, flags, TRUE, nfa_h->xlation_tab);
} else {
kiss_thin_nfa_exec_one_buf_parallel_ex(&bnfa_runtime, data, len, flags, FALSE, nullptr);
}
bnfa_runtime.scanned_so_far += len;
}
return;
}
SASAL_END

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,261 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __kiss_thin_nfa_base_h__
#define __kiss_thin_nfa_base_h__
#include "general_adaptor.h"
// ****************************** OVERVIEW *******************************
// Contians basic Thin NFA structure, used by kiss_pm and bolt (prescan)
// ***********************************************************************
#define KISS_THIN_NFA_ALPHABET_SIZE 256
// Binary representation of the Thin NFA.
// This is what's actually used during runtime.
//
// Offsets in the BNFA
// -------------------
// Offsets are signed 32-bit integers, specifying the distance in bytes from the "offset 0" point.
//
// Offset 0 isn't the BNFA start - there are negative offsets:
// All full states are in negative offsets. This is the only way to know that a state is full.
// All other states are in positive offsets.
//
// In full states, offsets are encoded in 16 bits.
// In partial states, offsets are encoded in 24 bits.
// Offsets are compressed:
// Positive offsets are divided by 4. This is possible because all state sizes are a multiple of 4 bytes.
// Negative offsets are divided by 512 (the size of a full state). This is possible because negative offsets
// are only used for full states, so their offsets are a (negative) multiple of the state size.
//
// Structure of a BNFA state
// -------------------------
// 1. Full state:
// a. No header. Identified by the fact that its BNFA offset is negative.
// b. 256 transitions, 16bits each (uncompressed offsets).
// 2. Common header, to partial and match states:
// a. State type - 2 bits.
// 3. Partial state:
// a. State type - 2 bits.
// b. Transition number - 6 bits.
// c. Fail state offset (compresed) - 24 bits.
// d. Per transition:
// 1) Character - 8 bits
// 2) Next state offset (compressed) - 24 bits
// 4. Match state:
// a. State type - 2 bits.
// b. Unused - 6 bits.
// c. Match ID - 24 bits.
//
// Examples:
//
// Partial state, 2 transitions - 'a'->100, 'b'->104, fail-> -3072
// +----+---+-----+---+-----+---+-----+
// Bits: | 2 | 6 | 24 | 8 | 24 | 8 | 24 |
// +----+---+-----+---+-----+---+-----+
// Data: | P | 2 | -3 | a | 25 | b | 26 |
// +----+---+-----+---+-----+---+-----+
//
// Full state, 0x00->200, 0x01->204, 0xff->280
// +-----+-----+ +-----+
// Bits: | 16 | 16 | | 16 |
// +-----+-----+ .... +-----+
// Data: | 50 | 51 | | 70 |
// +-----+-----+ +-----+
// Types for normal and compressed (see comment above) BNFA offsets
typedef int kiss_bnfa_offset_t; // Offset in bytes
typedef int kiss_bnfa_comp_offset_t; // Compressed offset
typedef short kiss_bnfa_short_offset_t; // Compressed offset in 16bits (for full states)
#define KISS_BNFA_OFFSET_INVALID ((int)0x80000000)
// State types
typedef enum {
KISS_BNFA_STATE_PARTIAL,
KISS_BNFA_STATE_MATCH,
KISS_BNFA_STATE_FULL,
KISS_BNFA_STATE_TYPE_NUM
} kiss_bnfa_state_type_t;
// State structure
// Use some header bits for the state type
#define KISS_BNFA_STATE_TYPE_BITS 2
// The type must fit in KISS_BNFA_STATE_TYPE_BITS bits
KISS_ASSERT_COMPILE_TIME(KISS_BNFA_STATE_TYPE_NUM <= (1<<KISS_BNFA_STATE_TYPE_BITS));
// Transition - partial state implementation
struct kiss_bnfa_partial_transition_s {
u_int tran_char:8;
kiss_bnfa_comp_offset_t next_state_offset:24;
};
#define KISS_BNFA_NUM_TRANS_BITS (8-KISS_BNFA_STATE_TYPE_BITS)
#define KISS_BNFA_MAX_TRANS_NUM ((1<<KISS_BNFA_NUM_TRANS_BITS)-1)
// Header common to all state types (except full)
typedef struct {
kiss_bnfa_state_type_t type:KISS_BNFA_STATE_TYPE_BITS;
u_int pad:(32-KISS_BNFA_STATE_TYPE_BITS);
} kiss_bnfa_minimal_state_t;
// Partial state
typedef struct {
kiss_bnfa_state_type_t type:KISS_BNFA_STATE_TYPE_BITS;
u_int trans_num:KISS_BNFA_NUM_TRANS_BITS;
kiss_bnfa_comp_offset_t fail_state_offset:24;
struct kiss_bnfa_partial_transition_s transitions[1]; // Actual size is trans_num
} kiss_bnfa_partial_state_t;
// Match state
typedef struct {
kiss_bnfa_state_type_t type:KISS_BNFA_STATE_TYPE_BITS;
u_int unused:KISS_BNFA_NUM_TRANS_BITS;
u_int match_id:24;
} kiss_bnfa_match_state_t;
// Full state
typedef struct {
kiss_bnfa_short_offset_t transitions[KISS_THIN_NFA_ALPHABET_SIZE]; // BNFA offset per character
} kiss_bnfa_full_state_t;
// Any state
typedef union {
kiss_bnfa_minimal_state_t common;
kiss_bnfa_partial_state_t partial;
kiss_bnfa_match_state_t match;
kiss_bnfa_full_state_t full;
} kiss_bnfa_state_t;
// All states are aligned on this boundary
#define KISS_BNFA_STATE_ALIGNMENT sizeof(int)
// Compress a given offset when the state type is known. If the type is a cmpile-time constant, it's faster than
// kiss_bnfa_offset_compress since it should be optimized
static CP_INLINE kiss_bnfa_comp_offset_t
kiss_bnfa_offset_quick_compress(kiss_bnfa_offset_t off, kiss_bnfa_state_type_t type)
{
if (type == KISS_BNFA_STATE_FULL) {
return off / (int)sizeof(kiss_bnfa_full_state_t);
} else {
return off / (int)KISS_BNFA_STATE_ALIGNMENT;
}
}
// Decompress a given offset when the state type is known. If the type is a cmpile-time constant, it's faster than
// kiss_bnfa_offset_decompress since it should be optimized
static CP_INLINE kiss_bnfa_offset_t
kiss_bnfa_offset_quick_decompress(kiss_bnfa_comp_offset_t comp_off, kiss_bnfa_state_type_t type)
{
if (type == KISS_BNFA_STATE_FULL) {
return comp_off * (int)sizeof(kiss_bnfa_full_state_t);
} else {
return comp_off * (int)KISS_BNFA_STATE_ALIGNMENT;
}
}
// Compress a BNFA offset, for use in partial states (24-bit encoding) and full states (16-bit encoding)
static CP_INLINE kiss_bnfa_comp_offset_t
kiss_bnfa_offset_compress(kiss_bnfa_offset_t off)
{
return kiss_bnfa_offset_quick_compress(off, off < 0 ? KISS_BNFA_STATE_FULL : KISS_BNFA_STATE_PARTIAL);
}
// Decompress a BNFA offset, which was stored in a partial state (24-bit encoding) and full states (16-bit encoding)
static CP_INLINE kiss_bnfa_offset_t
kiss_bnfa_offset_decompress(kiss_bnfa_comp_offset_t off)
{
return kiss_bnfa_offset_quick_decompress(off, off < 0 ? KISS_BNFA_STATE_FULL : KISS_BNFA_STATE_PARTIAL);
}
// Get a state in the BNFA given its offset
static CP_INLINE const kiss_bnfa_state_t *
kiss_bnfa_offset_to_state(const kiss_bnfa_state_t *bnfa, kiss_bnfa_offset_t bnfa_offset)
{
const char *bnfa_c = (const char *)bnfa;
return (const kiss_bnfa_state_t *)(bnfa_c + bnfa_offset);
}
// Get a state in the BNFA given its offset - without const, usable for writing the state
static CP_INLINE kiss_bnfa_state_t *
kiss_bnfa_offset_to_state_write(kiss_bnfa_state_t *bnfa, kiss_bnfa_offset_t bnfa_offset)
{
char *bnfa_c = (char *)bnfa;
return (kiss_bnfa_state_t *)(bnfa_c + bnfa_offset);
}
// Get a state in the BNFA given its compressed offset
static CP_INLINE const kiss_bnfa_state_t *
kiss_bnfa_comp_offset_to_state(
const kiss_bnfa_state_t *bnfa,
kiss_bnfa_comp_offset_t bnfa_comp_offset,
kiss_bnfa_state_type_t type
)
{
return kiss_bnfa_offset_to_state(bnfa, kiss_bnfa_offset_quick_decompress(bnfa_comp_offset, type));
}
// Get the state type by its BNFA offset
static CP_INLINE kiss_bnfa_state_type_t
kiss_bnfa_state_type(const kiss_bnfa_state_t *bnfa, kiss_bnfa_comp_offset_t bnfa_comp_offset)
{
if (bnfa_comp_offset < 0) return KISS_BNFA_STATE_FULL;
return kiss_bnfa_comp_offset_to_state(bnfa, bnfa_comp_offset, KISS_BNFA_STATE_PARTIAL)->common.type;
}
// State size
// Get the size of a partial state with N transitions
static CP_INLINE u_int
kiss_bnfa_partial_state_size(u_int trans_num)
{
// Header + transition table
return KISS_OFFSETOF(kiss_bnfa_partial_state_t, transitions)
+ sizeof(struct kiss_bnfa_partial_transition_s) * (trans_num);
}
// Get the size of an existing state
static CP_INLINE u_int
kiss_bnfa_state_size(const kiss_bnfa_state_t *bnfa, kiss_bnfa_offset_t offset)
{
switch (kiss_bnfa_state_type(bnfa, kiss_bnfa_offset_compress(offset))) {
case KISS_BNFA_STATE_PARTIAL: {
const kiss_bnfa_state_t *state = kiss_bnfa_offset_to_state(bnfa, offset);
return kiss_bnfa_partial_state_size(state->partial.trans_num);
}
case KISS_BNFA_STATE_MATCH: return sizeof(kiss_bnfa_match_state_t);
case KISS_BNFA_STATE_FULL: return sizeof(kiss_bnfa_full_state_t);
case KISS_BNFA_STATE_TYPE_NUM: break; // Can't happen
}
return 0;
}
// Flags for kiss_thin_nfa_s.flags and kiss_thin_nfa_prescan_hdr_s.flags
enum kiss_thin_nfa_flags_e {
KISS_THIN_NFA_USE_CHAR_XLATION = 0x01, // Used for caseless and/or digitless
KISS_THIN_NFA_HAS_ANCHOR = 0x02, // State at offset 0 is anchored root, not root
};
#endif // __kiss_thin_nfa_base_h__

View File

@@ -0,0 +1,242 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Thin NFA Construction and Destruction
// -------------------------------------
// This file contains code that builds a Thin NFA.
// The functions here may be called from compilation, serialization and de-serialization contexts.
// The code allows allocating and releasing the Thin NFA structure, as well as serializing and deserializing it.
#include "kiss_thin_nfa_impl.h"
#include "sasal.h"
SASAL_START // Multiple Pattern Matcher
// Allocate and fill in a pattern ID structure
kiss_ret_val
kiss_thin_nfa_add_pattern_id(kiss_thin_nfa_pattern_list_t **pat_list_p, const kiss_thin_nfa_pattern_t *new_pat)
{
static const char rname[] = "kiss_thin_nfa_add_pattern_id";
kiss_thin_nfa_pattern_list_t **pat_ptr;
kiss_thin_nfa_pattern_list_t *pat;
// Go over the pattern list - look for our pattern, and find the end
for (pat_ptr = pat_list_p; *pat_ptr != NULL; pat_ptr = &((*pat_ptr)->next)) {
kiss_thin_nfa_pattern_t *list_pat = &(*pat_ptr)->pattern;
if (list_pat->id == new_pat->id) {
// Already there - nothing to do
thinnfa_debug((
"%s: Pattern already exists - ID=%d flags=%x(%x) len=%d(%d)\n",
rname,
new_pat->id,
new_pat->pattern_id_flags,
list_pat->pattern_id_flags,
new_pat->len,
list_pat->len
));
return KISS_OK;
}
}
// Allocate the pattern structure
pat = (kiss_thin_nfa_pattern_list_t *)kiss_pmglob_memory_kmalloc(sizeof(kiss_thin_nfa_pattern_list_t), rname);
if (!pat) {
thinnfa_debug_err(("%s: Failed to allocate pattern id\n", rname));
return KISS_ERROR;
}
// Fill in the fields
bcopy(new_pat, &pat->pattern, sizeof(pat->pattern));
thinnfa_debug((
"%s: Added pattern ID=%d flags=%x len=%d\n",
rname,
new_pat->id,
new_pat->pattern_id_flags,
new_pat->len
));
// Add to the linked list of patternss.
*pat_ptr = pat;
pat->next = NULL;
return KISS_OK;
}
// Free an entire list of pattern IDs.
void
kiss_thin_nfa_free_pattern_ids(kiss_thin_nfa_pattern_list_t *pat_list)
{
static const char rname[] = "kiss_thin_nfa_free_pattern_ids";
kiss_thin_nfa_pattern_list_t *pat, *next;
for (pat = pat_list; pat != NULL; pat = next) {
next = pat->next;
thinnfa_debug((
"%s: Releasing pattern ID=%d flags=%x len=%u\n",
rname,
pat->pattern.id,
pat->pattern.pattern_id_flags,
pat->pattern.len
));
kiss_pmglob_memory_kfree(pat, sizeof(kiss_thin_nfa_pattern_list_t), rname);
}
return;
}
// Allocate and initialize statistics
static kiss_ret_val
kiss_thin_nfa_stats_init(kiss_thin_nfa_stats stats)
{
if (kiss_pm_stats_common_init(&(stats->common)) != KISS_OK) {
return KISS_ERROR;
}
bzero(&(stats->specific), sizeof(struct kiss_thin_nfa_specific_stats_s));
return KISS_OK;
}
// Free statistics
static void
kiss_thin_nfa_stats_free(kiss_thin_nfa_stats stats)
{
kiss_pm_stats_common_free(&(stats->common));
}
static kiss_ret_val
kiss_thin_nfa_alloc_depth_map(KissThinNFA *nfa)
{
static const char rname[] = "kiss_thin_nfa_alloc_depth_map";
kiss_bnfa_comp_offset_t min_comp_off, max_comp_off;
// The depth map is addressed by the compressed offset
min_comp_off = kiss_bnfa_offset_compress(nfa->min_bnfa_offset);
max_comp_off = kiss_bnfa_offset_compress(nfa->max_bnfa_offset);
nfa->depth_map.size = max_comp_off - min_comp_off;
nfa->depth_map.mem_start = (u_char *)kiss_pmglob_memory_kmalloc_ex(nfa->depth_map.size, rname, FW_KMEM_SLEEP);
if (!nfa->depth_map.mem_start) {
thinnfa_debug_err((
"%s: Error allocating the depth map, size %d (BNFA offsets %d:%d)\n",
rname,
nfa->depth_map.size,
nfa->min_bnfa_offset,
nfa->max_bnfa_offset
));
return KISS_ERROR;
}
// Find the place for offset 0. min_comp_offset is negative, so it's after mem_start.
nfa->depth_map.offset0 = nfa->depth_map.mem_start - min_comp_off;
return KISS_OK;
}
static void
kiss_thin_nfa_destroy_depth_map(KissThinNFA *nfa)
{
static const char rname[] = "kiss_thin_nfa_destroy_depth_map";
if (nfa->depth_map.mem_start != NULL) {
kiss_pmglob_memory_kfree(nfa->depth_map.mem_start, nfa->depth_map.size, rname);
nfa->depth_map.mem_start = NULL;
nfa->depth_map.offset0 = NULL;
}
}
KissThinNFA::~KissThinNFA()
{
static const char rname[] = "~KissThinNFA";
// the code here was once in kiss_thin_nfa_destroy
u_int bnfa_size = max_bnfa_offset - min_bnfa_offset;
thinnfa_debug_major(("%s: Destroying Thin NFA %p, bnfa size=%d\n", rname,
this, bnfa_size));
if(bnfa_start != NULL) {
kiss_pmglob_memory_kfree(bnfa_start, bnfa_size, rname);
bnfa_start = NULL;
bnfa = NULL;
}
kiss_thin_nfa_stats_free(&stats);
if (pattern_arrays != NULL) {
kiss_pmglob_memory_kfree(pattern_arrays, pattern_arrays_size, rname);
pattern_arrays = NULL;
}
kiss_thin_nfa_destroy_depth_map(this);
}
// Allocate a Thin NFA. The match info array and BNFA are left empty.
std::unique_ptr<KissThinNFA>
kiss_thin_nfa_create(u_int match_state_num, kiss_bnfa_offset_t min_offset, kiss_bnfa_offset_t max_offset)
{
static const char rname[] = "kiss_thin_nfa_create";
// Allocate the structure
auto nfa = std::make_unique<KissThinNFA>();
void *nfa_ptr = nfa.get();
bzero(nfa_ptr, sizeof(*nfa));
nfa->min_bnfa_offset = min_offset;
nfa->max_bnfa_offset = max_offset;
nfa->match_state_num = match_state_num;
// Allocate the bnfa array. Not initialized.
u_int bnfa_size = max_offset - min_offset;
nfa->bnfa_start = (kiss_bnfa_state_t *)kiss_pmglob_memory_kmalloc_ex(bnfa_size, rname, FW_KMEM_SLEEP);
if (!nfa->bnfa_start) {
thinnfa_debug_err((
"%s: Error allocating the bnfa - size %d (offset %d:%d)\n",
rname,
bnfa_size,
min_offset,
max_offset
));
return nullptr;
}
// Calculate bnfa so bnfa_start would be at offset min_offset (min_offset<0, so bnfa>bnfa_start)
nfa->bnfa = (kiss_bnfa_state_t *)((char *)nfa->bnfa_start - min_offset);
// Init the statistics
if (kiss_thin_nfa_stats_init(&(nfa->stats)) != KISS_OK) {
thinnfa_debug_err(("%s: Error initializing statistics structure\n", rname));
return nullptr;
}
// Allocate the state depth map
if (kiss_thin_nfa_alloc_depth_map(nfa.get()) != KISS_OK) {
return nullptr;
}
thinnfa_debug_major((
"%s: Allocated Thin NFA %p, bnfa size=%d (offsets %d:%d)\n",
rname,
nfa.get(),
bnfa_size,
min_offset,
max_offset
));
return nfa;
}
SASAL_END

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,189 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __h_kiss_thin_nfa_impl_h__
#define __h_kiss_thin_nfa_impl_h__
// *********************** OVERVIEW ******************************
// Thin NFA definitions, which are only used by Thin NFA files.
// 1. A list of patterns which is associated with a finite state.
// 2. APIs for building and destroying the Thin NFA structures.
// ****************************************************************
#include <list>
#include <vector>
#include <memory>
#include "i_pm_scan.h"
#include "kiss_patterns.h"
#include "kiss_pm_stats.h"
#include "kiss_thin_nfa_base.h"
KISS_ASSERT_COMPILE_TIME(KISS_PM_ALPHABET_SIZE == KISS_THIN_NFA_ALPHABET_SIZE);
// Information we keep about a pattern
typedef struct {
int id; // PM Internal pattern ID
u_int pattern_id_flags; // KISS_PM_COMP_ prefix
u_int len;
} kiss_thin_nfa_pattern_t;
// Linked list of pattern information - held per finite state, to indicate what it's accepting.
typedef struct kiss_thin_nfa_pattern_list_s {
struct kiss_thin_nfa_pattern_list_s *next;
kiss_thin_nfa_pattern_t pattern;
} kiss_thin_nfa_pattern_list_t;
// Array of pattern information - offset to it held per finite state, to indicate what it's accepting.
typedef struct kiss_thin_nfa_pattern_array_s {
u_int n_patterns;
// NOTE! Always keep this last!
kiss_thin_nfa_pattern_t pattern[1]; // Dynamic array, not really 1
// Do NOT add anything here!
} kiss_thin_nfa_pattern_array_t;
static CP_INLINE u_int
kiss_thin_nfa_pattern_array_size(const u_int n_patterns)
{
// assignement of NULL value so Windows compiler won't cry about unused variable.
kiss_thin_nfa_pattern_array_t CP_MAYBE_UNUSED *dummy = NULL;
// We substract sizeof(->pattern), becuase it's already included in the sizeof
// of the whole struct.
return (sizeof(*dummy) + n_patterns * sizeof(dummy->pattern[0]) - sizeof(dummy->pattern));;
}
// ThinNFA statistics
// Specific ThinNFA Statistics
struct kiss_thin_nfa_specific_stats_s {
u_int num_of_states; // number of states in this thin_nfa
u_int num_of_final_states; // number of final states in this thin_nfa
};
// Statistics for ThinNFA
struct kiss_thin_nfa_stats_s {
struct kiss_pm_stats_common_s common; // Run-time (per-CPU, dynamic) and build-time common statistics
struct kiss_thin_nfa_specific_stats_s specific; // Build-time specific ThinNFA statistics
};
typedef struct kiss_thin_nfa_stats_s *kiss_thin_nfa_stats;
// Compressed BNFA offset -> state depth map
struct kiss_thin_nfa_depth_map_s {
u_char *mem_start; // Array of depth per BNFA compressed offset
u_int size;
u_char *offset0; // Positive/negative offsets are relative to this
};
#define KISS_THIN_NFA_MAX_ENCODABLE_DEPTH 255 // Fit in u_char
// A Compiled Thin NFA, used at runtime
class KissThinNFA {
public:
~KissThinNFA();
kiss_bnfa_state_t *bnfa_start; // The first (in memory) and initial state
kiss_bnfa_state_t *bnfa; // The state at offset 0 (somewhere in the middle)
kiss_bnfa_offset_t min_bnfa_offset; // The offset of the first (and initial) state.
kiss_bnfa_offset_t max_bnfa_offset; // The offset after the last state.
enum kiss_thin_nfa_flags_e flags;
u_int match_state_num; // Number of match states in the machine
u_int pattern_arrays_size; // Total size in bytes of concatanated pattern arrays
kiss_thin_nfa_pattern_array_t *pattern_arrays; // A pointer to a buffer holding ALL pattern arrays, for ALL states
struct kiss_thin_nfa_stats_s stats;
u_int max_pat_len; // Length of the longest string
u_char xlation_tab[KISS_PM_ALPHABET_SIZE]; // For caseless/digitless
struct kiss_thin_nfa_depth_map_s depth_map; // State -> Depth mapping
};
static CP_INLINE u_int
kiss_thin_nfa_pat_array_ptr_to_offset(const KissThinNFA *nfa, const kiss_thin_nfa_pattern_array_t *pat_arr)
{
return (const char *)pat_arr - (const char *)(nfa->pattern_arrays);
}
static CP_INLINE kiss_thin_nfa_pattern_array_t *
kiss_thin_nfa_offset_to_pat_array_ptr(const KissThinNFA *nfa, const u_int offset)
{
return (kiss_thin_nfa_pattern_array_t *)((char *)(nfa->pattern_arrays) + offset);
}
// Get a state's depth
// For very deep states (offset >= 255), returns the maximum pattern length,
// which would be greater/equal the real state depth.
static CP_INLINE u_int
kiss_bnfa_offset_to_depth(const KissThinNFA *nfa, kiss_bnfa_comp_offset_t comp_offset)
{
u_int depth = nfa->depth_map.offset0[comp_offset];
return (depth==KISS_THIN_NFA_MAX_ENCODABLE_DEPTH) ? nfa->max_pat_len : depth;
}
// Create a new empty Thin NFA.
// Allocates the BNFA and the match_data array, but doesn't fill them.
std::unique_ptr<KissThinNFA>
kiss_thin_nfa_create(
u_int match_state_num,
kiss_bnfa_offset_t min_offset,
kiss_bnfa_offset_t max_offset
);
// Add a pattern (with given id, flags and length) to a list.
// pat_list should point to the head of the list, *pat_list may be modified.
kiss_ret_val
kiss_thin_nfa_add_pattern_id(
kiss_thin_nfa_pattern_list_t **pat_list,
const kiss_thin_nfa_pattern_t *pat_info
);
// Free all patterns on a list.
void kiss_thin_nfa_free_pattern_ids(kiss_thin_nfa_pattern_list_t *pat_list);
// Compile a Thin NFA
std::unique_ptr<KissThinNFA>
kiss_thin_nfa_compile(
const std::list<kiss_pmglob_string_s> &patterns,
u_int compile_flags,
KissPMError *error
);
// Validate Thin NFA
BOOL kiss_thin_nfa_is_valid(const KissThinNFA *nfa_h);
void
kiss_thin_nfa_exec(KissThinNFA *nfa_h, const Buffer &buffer, std::vector<std::pair<uint, uint>> &matches);
// Dump a PM
kiss_ret_val kiss_thin_nfa_dump(const KissThinNFA *nfa_h, enum kiss_pm_dump_format_e format);
// Debugging macro wrappers.
// All get a format string plus parameters in double parenthesis:
// thinnfa_debug(("%s: hello, world\n", rname));
// Meaning of each macro:
// thinnfa_debug_critical - Critical error, printed by default.
// thinnfa_debug_err - Error we should live with (e.g. usage error, memory allocation), not printed by default.
// thinnfa_debug - Normal debug messages.
// thinnfa_debug_major - Debug messages about several major events in Thin NFA constuction. Use sparingly.
// thinnfa_debug_extended - Low level debug messages, which may be printed in large numbers.
// thinnfa_dbg - An "if" statement checking the debug flag (equivalent to thinnfa_debug).
#define thinnfa_debug_critical(_str) kiss_debug_err(K_ERROR, _str)
#define thinnfa_debug_err(_str) kiss_debug_err(K_THINNFA|K_PM, _str)
#define thinnfa_debug(_str) kiss_debug_info(K_THINNFA, _str)
#define thinnfa_debug_major(_str) kiss_debug_info(K_THINNFA|K_PM, _str)
#define thinnfa_debug_extended(_str) kiss_debug_info(K_THINNFA, _str)
#define thinnfa_debug_perf(_str) kiss_debug_info_perf(K_THINNFA, _str)
#define thinnfa_dbg() kiss_dbg(K_THINNFA)
#endif // __h_kiss_thin_nfa_impl_h__

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,103 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "pm_adaptor.h"
#include "sasal.h"
SASAL_START // Multiple Pattern Matcher
int kiss_debug_err_flag = 0;
void
kiss_debug_start()
{
kiss_debug_err_flag = 1;
}
void
kiss_debug_stop()
{
kiss_debug_err_flag = 0;
}
void
kiss_pmglob_char_xlation_build(enum kiss_pmglob_char_xlation_flags_e flags, u_char tab[KISS_PM_ALPHABET_SIZE])
{
u_int i;
// Find the canonic character for each character.
for (i=0; i<KISS_PM_ALPHABET_SIZE; i++) {
u_char ch = (u_char)i;
if ((flags & KISS_PMGLOB_CHAR_XLATION_DIGITS) && isdigit(ch)) {
tab[ch] = '0';
} else if (flags & KISS_PMGLOB_CHAR_XLATION_CASE) {
tab[ch] = tolower(ch);
} else {
tab[ch] = ch;
}
}
}
// Reverse a character translation table, so we can all charaters that map to a canonic character.
//
// Since the reverse map maps one character to many, it's implemented this way:
// 1. Characters are arranged in groups - all characters in a group map to the same canonic character.
// 2. A group is represented as a cyclic linked list, where each character points to the next in the same group.
// 3. Instead of pointers, we use characters - for each character, rev[ch] is the next character in the group.
void
kiss_pmglob_char_xlation_build_reverse(const u_char tab[KISS_PM_ALPHABET_SIZE], u_char rev[KISS_PM_ALPHABET_SIZE])
{
u_int i;
// Put each character in its own group
for (i=0; i<KISS_PM_ALPHABET_SIZE; i++) {
u_char ch = (u_char)i;
rev[ch] = ch;
}
// Take each character which is not canonic, and add it to its canonic char's group.
for (i=0; i<KISS_PM_ALPHABET_SIZE; i++) {
u_char ch = (u_char)i;
u_char canonic = tab[ch];
if (canonic == ch) {
// Already in the correct group (its own group)
continue;
}
// Add to the linked list
rev[ch] = rev[canonic];
rev[canonic] = ch;
}
}
std::ostream&
operator<<(std::ostream& os, const KissPMError &e)
{
return os << "Reason: " << e.error_string;
}
void
kiss_pm_error_set_details(KissPMError *error,
kiss_pm_error_type error_type,
const char error_string[])
{
if ((error == NULL) || (error->error_string != NULL)) // No error struct or error already set. Not a problem
return;
error->error_type = error_type;
error->error_string = error_string;
return;
}
SASAL_END

View File

@@ -0,0 +1,229 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef _pm_adaptor_h_
#define _pm_adaptor_h_
#include <vector>
#include <iostream>
#include "general_adaptor.h"
#define KISS_PM_ALPHABET_SIZE 256
#define KISS_APPS_CPAPI
// used to copy any struct, array, string, or variable
#if 0
#define DATA_BUFF_COPY(_buf, _buf_size, _data, _data_size) bcopy((_data), (_buf), (_data_size)); \
(_buf) += (_data_size); \
(*(_buf_size)) -= (_data_size)
#endif
// Not using the original DATA_BUFF_COPY which uses bcopy. On 64bit libc2.5, it seems that bcopy reads
// past the source buffer, as long as it is alligned. That's OK, but valgrind complains.
#define DATA_BUFF_COPY(_buf, _buf_size, _data, _data_size) memcpy((_buf), (_data), (_data_size)); \
(_buf) += (_data_size); \
(*(_buf_size)) -= (_data_size)
#define INT_BUFF_COPY(_buf, _buf_size, _val) do { \
int temp_val = _val; \
DATA_BUFF_COPY(_buf, _buf_size, &temp_val, sizeof(int)); \
} while (0)
#define U_INT_BUFF_COPY(_buf, _buf_size, _val) do { \
u_int temp_val = _val; \
DATA_BUFF_COPY(_buf, _buf_size, &temp_val, sizeof(u_int)); \
} while (0)
#define U_SHORT_BUFF_COPY(_buf, _buf_size, _val) do { \
u_short temp_val = _val; \
DATA_BUFF_COPY(_buf, _buf_size, &temp_val, sizeof(u_short)); \
} while (0)
#define U_CHAR_BUFF_COPY(_buf, _buf_size, _val) do { \
u_char temp_val = _val; \
DATA_BUFF_COPY(_buf, _buf_size, &temp_val, sizeof(u_char)); \
} while (0)
#define DATA_BUFF_READ(_buf, _buf_size, _vbuf, _vbuf_iter, _to, _data_size) \
do { \
if ((*(_buf_size)) >= (_data_size)) { \
bcopy(_buf, _to, _data_size); \
_buf += _data_size; \
(*(_buf_size)) -= (_data_size); \
} \
else { \
(*(_buf_size)) = 0; \
} \
} while(0)
#define INT_BUFF_READ(_var, _buf, _buf_size, _vbuf, _vbuf_iter) \
DATA_BUFF_READ(_buf, _buf_size, _vbuf, _vbuf_iter, &_var, sizeof(int))
#define U_INT_BUFF_READ(_var, _buf, _buf_size, _vbuf, _vbuf_iter) \
DATA_BUFF_READ(_buf, _buf_size, _vbuf, _vbuf_iter, &_var, sizeof(u_int))
#define U_SHORT_BUFF_READ(_var, _buf, _buf_size, _vbuf, _vbuf_iter) \
DATA_BUFF_READ(_buf, _buf_size, _vbuf, _vbuf_iter, &_var, sizeof(u_short))
#define U_CHAR_BUFF_READ(_var, _buf, _buf_size, _vbuf, _vbuf_iter) \
DATA_BUFF_READ(_buf, _buf_size, _vbuf, _vbuf_iter, &_var, sizeof(u_char))
// Serialization magics, used to verify buffer structure
#define KISS_PM_SERIALIZED 0x53525A50 // SRZP
#define KISS_DFA_SERIALIZED 0x53525A44 // SRZD
#define KISS_WM_SERIALIZED 0x53525A48 // SRZH
#define KISS_THIN_NFA_SERIALIZED 0x53525A4E // SRZN
#define KISS_EX_REM_SERIALIZED 0x53525A58 // SRZX
#define KISS_STATS_SERIALIZED 0x53525A53 // SRZS
#define KISS_STATE_SERIALIZED 0x53525A54 // SRZT
#define KISS_PM_SERIALIZE_IGNORE_INT 0x53525A49 // SRZI
#define KISS_KW_SERIALIZED 0x53525A4B // SRZK
#define KISS_KW_MGR_SERIALIZED 0x53525A47 // SRZG
typedef enum kiss_pm_error_type_e {
KISS_PM_ERROR_SYNTAX = 0, // < yntax error is an error in the way the pattern is written.
KISS_PM_ERROR_INTERNAL, // < Internal error is an error caused by lack of resources or by design.
KISS_PM_ERROR_COMPLEX_PATTERN, // < Pattern is too complex to compile - too many states or too much memory
KISS_PM_ERROR_NO_ERROR
} kiss_pm_error_type;
class KissPMError {
public:
int pattern_id = -1; //< The user's pattern id
kiss_pm_error_type error_type = KISS_PM_ERROR_INTERNAL; //< The error type syntax or internal
const char *error_string = nullptr; //< string describing the problem
u_int index = 0; //< The place that caused the probelm. Best effort.
const u_char *pattern_buf = nullptr; //< The user's pattern buffer
};
std::ostream& operator<<(std::ostream& os, const KissPMError &k);
void kiss_pm_error_set_details(KissPMError *error, kiss_pm_error_type error_type, const char error_string[]);
// PATTERNS FLAGS
// When adding a new pattern flag,
// add a metadata string below and register it in kiss_pm_pattern_flags_data in kiss_pm.c
// range from 0x00010000 to 0x80000000
// EXTERNAL PATTERN FLAGS
// These flags can be added per pattern when adding it to pm_patterns using kiss_pm_pattern_add_[simple_]pattern_...
#define KISS_PM_COMP_WM_CONT_WORD 0x80000000 // a WM continuous word -
// when used on a word we search for it without delimiters.
// Large impact on performance so think twice before using
#define KISS_PM_COMP_ALLOW_SHORT_LSS 0x40000000 // Accept short lss (shorter than kiss_pm_min_lss_sise
#define KISS_PM_COMP_LITERAL_LSS 0x20000000 // The LSS should not be normalized -
// i.e. all chars read as literals
#define KISS_PM_COMP_CASELESS 0x10000000 // Indicates a caseless pattern
#define KISS_PM_COMP_UTF8 0x08000000 // the pattern is UTF8 encoded.
#define KISS_PM_COMP_BOUNDED_PATT 0x04000000 // find the pattern only between non word character
// (including buffer start end).
// Do not use this flag with `^` or `$`.
#define KISS_PM_COMP_DONT_USE_PCRE 0x02000000 // don't use pcre for second tier.
#define KISS_PM_COMP_VERIFY_PCRE_SYNTAX 0x01000000 // Verify that pattern that compiles with PCRE fits PM syntax
// INTERNAL PATTERN FLAGS
#define KISS_PM_COMP_FIRST_TIER_OF_PATT 0x00800000 // pattern is in it's first tier execution.
#define KISS_PM_COMP_BOUNDED_CIRCUMFLEX_ADDED 0x00400000 // This flag indicates that we have created a pattern
// for bounded word infra which is different
// from the orig patterns. In such cases we need to take
// it into considiration when we look for the match start.
#define KISS_PM_COMP_MORE_THAN_ONE_LSS 0x00200000 // The pattern is made up of one or more simple strings
#define KISS_PM_COMP_DONT_STRIP 0x00100000 // Parse the pattern without stirping ^/$ from the
// RE beggining/end respectively.
#define KISS_PM_LSS_AT_BUF_START 0x00080000 // LSS should be at the begining of the buffer.
#define KISS_PM_LSS_AT_BUF_END 0x00040000 // LSS should be at the end of the buffer.
#define KISS_PM_RE_AT_BUF_START 0x00020000 // RE should be at the begining of the buffer.
#define KISS_PM_COMP_HAVE_SECOND_TIER 0x00010000 // the pattern needs second tier.
#define KISS_PM_COMP_NO_HISTORY 0x00008000 // Execute this pattern only with the buffer
// (not with the history vbuf)
#define KISS_PM_COMP_REDUCE_SIZE 0x00004000 // Favor small memory consumption over good performance
// END OF PATTERNS FLAGS
// Internal flags set in the match data in kiss_dfa_insert_match_data:
#define KISS_PMGLOB_MATCH_DATA_FORCE_ADD 0x00000001 // Force add pomlob match data,
// even if the pattern has already been matched
#define KISS_PMGLOB_MATCH_OFFSET_IN_PRESENT_BUF 0x00000002 // The match offset refers to the present buffer
#define KISS_PMGLOB_REDUCE_BUFFER_LENGTH 0x00000004 // Reduce the length of tier2 buffer using
// LSS ofsets found in tier1
//How many different first tiers can a PM have? (can be smaller than the number of first tier types)
#define KISS_TIER1_MAX_NUM 2
// 8 First tier type
typedef enum kiss_tier1_type_t {
KISS_TIER1_WM, // Word Matcher
KISS_TIER1_SM, // DFA String matcher
KISS_TIER1_THIN_NFA = KISS_TIER1_SM, // Thin NFA - instead of DFA
KISS_TIER1_NUM_TYPES,
KISS_TIER1_INVALID = KISS_TIER1_NUM_TYPES
} kiss_tier1_type;
// which statistics the user want to see
enum kiss_pm_stats_type {
KISS_PM_STATIC_STATS = 0, // number of pattern, number of states, ....
KISS_PM_DYNAMIC_STATS, // number of executions, number of matches, avg buffer length,...
KISS_PM_BOTH_STATS // both statistics
};
#define K_ERROR 0x00000010
#define K_PM 0x00000400
#define K_THINNFA 0x00400000
#define KISS_PM_COMP_DIGITLESS 0x00001000 // Indicates a digitless first tier match
extern int kiss_debug_err_flag;
#define kiss_debug_err(topics, _string) if (kiss_debug_err_flag) printf _string
#define kiss_debug_wrn(topics, _string)if (kiss_debug_err_flag) printf _string
#define kiss_debug_notice(topics, _string) if (kiss_debug_err_flag) printf _string
#define kiss_debug_info(topics, _string) if (kiss_debug_err_flag) printf _string
#define kiss_debug(topics) if (kiss_debug_err_flag) printf
#define kiss_debug_info_perf(topics, _string)
#define kiss_dbg(topics) if (kiss_debug_err_flag)
#define kiss_vbuf void *
#define kiss_vbuf_iter void *
// Which character translations are needed?
enum kiss_pmglob_char_xlation_flags_e {
KISS_PMGLOB_CHAR_XLATION_NONE = 0x00,
KISS_PMGLOB_CHAR_XLATION_CASE = 0x01,
KISS_PMGLOB_CHAR_XLATION_DIGITS = 0x02,
};
enum kiss_pm_dump_format_e {
KISS_PM_DUMP_XML, // XML, for opening with JFlap
KISS_PM_DUMP_CSV, // CSV, for opening with Excel
KISS_PM_DUMP_WIKI // WIKI, for copy&paste into Wiki (Confluence)
};
void kiss_pmglob_char_xlation_build(enum kiss_pmglob_char_xlation_flags_e flags, u_char tab[KISS_PM_ALPHABET_SIZE]);
void kiss_pmglob_char_xlation_build_reverse(
const u_char tab[KISS_PM_ALPHABET_SIZE],
u_char rev[KISS_PM_ALPHABET_SIZE]
);
void kiss_debug_start();
void kiss_debug_stop();
#endif // _pm_adaptor_h_

View File

@@ -0,0 +1,165 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "pm_hook.h"
#include <ctype.h>
#include <stdlib.h>
#include <fstream>
#include <algorithm>
#include "kiss_patterns.h"
#include "kiss_thin_nfa_impl.h"
#include "sasal.h"
using namespace std;
SASAL_START // Multiple Pattern Matcher
USE_DEBUG_FLAG(D_PM_COMP);
USE_DEBUG_FLAG(D_PM_EXEC);
USE_DEBUG_FLAG(D_PM);
static int
pm_pattern_to_kiss_pat_flags(const PMPattern &pat)
{
int kiss_pat_flags = 0;
if (pat.isStartMatch()) {
kiss_pat_flags |= KISS_PM_LSS_AT_BUF_START;
}
if (pat.isEndMatch()) {
kiss_pat_flags |= KISS_PM_LSS_AT_BUF_END;
}
return kiss_pat_flags;
}
static list<kiss_pmglob_string_s>
convert_patt_map_to_kiss_list(const map<int, PMPattern> &patt_map)
{
list<kiss_pmglob_string_s> kiss_pats;
for (auto &pair : patt_map) {
auto &id = pair.first;
auto &pattern = pair.second;
kiss_pats.emplace_back(pattern.data(), pattern.size(), id, pm_pattern_to_kiss_pat_flags(pattern));
}
return kiss_pats;
}
// Explicit empty ctor and dtor needed due to incomplete definition of class used in unique_ptr. Bummer...
PMHook::PMHook()
{
}
PMHook::~PMHook()
{
}
Maybe<PMPattern>
PMHook::lineToPattern(const string &line)
{
if (line.empty()) return genError("Empty string");
bool start = (*line.begin()) == '^';
bool end = (*line.rbegin()) == '$';
int start_offset = start ? 1 : 0;
int line_size = line.size() - (start ? 1 : 0) - (end ? 1 : 0);
auto clean_line = line.substr(start_offset, line_size);
if (clean_line.empty()) return genError("Pattern must contain actual content");
return PMPattern(clean_line, start, end);
}
Maybe<void>
PMHook::prepare(const set<PMPattern> &inputs)
{
map<int, PMPattern> tmp;
int index = 0;
for (auto &pat : inputs) {
tmp.emplace(++index, pat);
}
if (Debug::isFlagAtleastLevel(D_PM_COMP, Debug::DebugLevel::DEBUG)) kiss_debug_start();
KissPMError pm_err;
handle = kiss_thin_nfa_compile(convert_patt_map_to_kiss_list(tmp), KISS_PM_COMP_CASELESS, &pm_err);
if (Debug::isFlagAtleastLevel(D_PM_COMP, Debug::DebugLevel::DEBUG)) kiss_debug_stop();
if (handle == nullptr) {
dbgError(D_PM_COMP) << "PMHook::prepare() failed" << pm_err;
return genError(pm_err.error_string);
}
patterns = tmp;
return Maybe<void>();
}
set<PMPattern>
PMHook::scanBuf(const Buffer &buf) const
{
dbgAssert(handle != nullptr) << "Unusable Pattern Matcher";
vector<pair<uint, uint>> pm_matches;
kiss_thin_nfa_exec(handle.get(), buf, pm_matches);
dbgTrace(D_PM) << pm_matches.size() << " raw matches found";
set<PMPattern> res;
for (auto &match : pm_matches) {
res.insert(patterns.at(match.first));
}
dbgTrace(D_PM) << res.size() << " matches found after removing the duplicates";
return res;
}
set<pair<uint, PMPattern>>
PMHook::scanBufWithOffset(const Buffer &buf) const
{
dbgAssert(handle != nullptr) << "Unusable Pattern Matcher";
vector<pair<uint, uint>> pm_matches;
kiss_thin_nfa_exec(handle.get(), buf, pm_matches);
dbgTrace(D_PM) << pm_matches.size() << " raw matches found";
set<pair<uint, PMPattern>> res;
for (auto &match : pm_matches) {
res.emplace(match.second, patterns.at(match.first));
}
dbgTrace(D_PM) << res.size() << " matches found";
return res;
}
void
PMHook::scanBufWithOffsetLambda(const Buffer &buf, function<void(uint, const PMPattern&)> cb) const
{
for (auto &res : scanBufWithOffset(buf)) {
cb(res.first, res.second);
}
}
bool
PMPattern::operator<(const PMPattern &other) const
{
if (pattern != other.pattern) return pattern < other.pattern;
if (index != other.index) return index < other.index;
return tie(match_start, match_end) < tie(other.match_start, other.match_end);
}
bool
PMPattern::operator==(const PMPattern &other) const
{
return
index == other.index &&
pattern == other.pattern &&
match_start == other.match_start &&
match_end == other.match_end;
}
SASAL_END

View File

@@ -0,0 +1,5 @@
add_unit_test(
pm_ut
"pm_scan_ut.cc;pm_pat_ut.cc"
"pm;buffers"
)

View File

@@ -0,0 +1,78 @@
#include <string>
#include <fstream>
#include "cptest.h"
#include "pm_hook.h"
using namespace std;
static void
pm_pat_simple_pat(
const std::string &hex_line,
const std::string &line,
bool expected_match_at_start,
bool expected_match_at_end)
{
PMPattern pat;
auto res = PMHook::lineToPattern(hex_line.c_str());
EXPECT_TRUE(res.ok()) << res.getErr();
pat = *res;
EXPECT_EQ(pat.isStartMatch(), expected_match_at_start);
EXPECT_EQ(pat.isEndMatch(), expected_match_at_end);
ASSERT_EQ(pat.size(), line.size());
EXPECT_EQ(memcmp((const char *)pat.data(), line.c_str(), line.size()), 0);
}
static void
pm_pat_bad_pat(const std::string &bad_hex_line)
{
EXPECT_FALSE(PMHook::lineToPattern(bad_hex_line).ok());
}
TEST(pm_pat, basic)
{
pm_pat_simple_pat("ABCDxyz", "ABCDxyz", false, false);
}
TEST(pm_pat, pat_with_begin)
{
pm_pat_simple_pat("^ABCD", "ABCD", true, false);
}
TEST(pm_pat, pat_with_end)
{
pm_pat_simple_pat("ABCD$", "ABCD", false, true);
}
TEST(pm_pat, pat_with_begin_end)
{
pm_pat_simple_pat("^ABCD$", "ABCD", true, true);
}
TEST(pm_pat, pat_with_all_chars)
{
pm_pat_simple_pat("ABCDEFGHIJKLMNOPJKLMNO", "ABCDEFGHIJKLMNOPJKLMNO", false, false);
}
TEST(pm_pat, empty_pat_with_begin_end)
{
pm_pat_bad_pat("^$");
}
TEST(pm_pat, empty_pat)
{
pm_pat_bad_pat("");
}
TEST(pm_pat, chars_above_127)
{
static const vector<u_char> buf = { 0x80, 0x96, 0xaa, 0xff };
PMPattern pat;
auto rc = PMHook::lineToPattern(string(buf.begin(), buf.end()));
EXPECT_TRUE(rc.ok()) << rc.getErr();
pat = *rc;
EXPECT_FALSE(pat.isStartMatch());
EXPECT_FALSE(pat.isEndMatch());
ASSERT_EQ(pat.size(), buf.size());
EXPECT_EQ(memcmp(pat.data(), buf.data(), buf.size()), 0);
}

View File

@@ -0,0 +1,469 @@
#include <string>
#include "cptest.h"
#include "pm_hook.h"
using namespace std;
using namespace testing;
ostream & operator<<(ostream &os, const PMPattern &) { return os; }
static void
push_pat(set<PMPattern> &pats, const string &hex_pat)
{
auto pat = PMHook::lineToPattern(hex_pat.c_str());
EXPECT_TRUE(pat.ok()) << pat.getErr();
pats.insert(*pat);
}
static set<PMPattern>
getPatternSet(const string &pattern)
{
set<PMPattern> res;
push_pat(res, pattern);
return res;
}
template <typename ... Patterns>
static set<PMPattern>
getPatternSet(const string &pattern, Patterns ...more_patterns)
{
auto res = getPatternSet(more_patterns...);
push_pat(res, pattern);
return res;
}
static set<PMPattern>
prepare_scan_and_compare(const set<PMPattern> &pats, const string &buf)
{
PMHook pm;
EXPECT_TRUE(pm.prepare(pats).ok());
return pm.scanBuf(Buffer(buf));
}
// This is a helper function for the trivial tests. buf is NULL terminated, and the NULL is NOT passed to the PM.
static set<PMPattern>
common_scan_test_single_pat(const string &hex_pat, const string &buf)
{
set<PMPattern> pats;
push_pat(pats, hex_pat);
return prepare_scan_and_compare(pats, buf);
}
TEST(pm_scan, zero_buf_len)
{
EXPECT_EQ(common_scan_test_single_pat("ABCD", ""), set<PMPattern>());
}
TEST(pm_scan, basic)
{
EXPECT_EQ(common_scan_test_single_pat("ABCD", "ABCD ABCD AB AB ABC ABCD"), getPatternSet("ABCD"));
}
TEST(pm_scan, with_start_flag)
{
EXPECT_EQ(common_scan_test_single_pat("^ABCD", "ABCD ABCD AB AB ABC AAAAAAA"), getPatternSet("^ABCD"));
}
TEST(pm_scan, with_start_flag_short_buf)
{
EXPECT_EQ(common_scan_test_single_pat("^A", "ABC"), getPatternSet("^A"));
}
TEST(pm_scan, with_end_flag)
{
EXPECT_EQ(common_scan_test_single_pat("ABCD$", "KKKK ABCD ABCD ABCD"), getPatternSet("ABCD$"));
}
TEST(pm_scan, nomatch)
{
EXPECT_EQ(common_scan_test_single_pat("AAA", "AA"), set<PMPattern>());
}
TEST(pm_scan, exact_match)
{
EXPECT_EQ(common_scan_test_single_pat("AAA", "AAA"), getPatternSet("AAA"));
}
TEST(pm_scan, overlap_in_buf)
{
EXPECT_EQ(common_scan_test_single_pat("AAA", "AAAA"), getPatternSet("AAA"));
}
TEST(pm_scan, with_begin_and_end_flag_no_match)
{
EXPECT_EQ(common_scan_test_single_pat("^AAA$", "AAAA"), set<PMPattern>());
}
TEST(pm_scan, with_begin_and_end_flag_match)
{
EXPECT_EQ(common_scan_test_single_pat("^ABC$", "ABC"), getPatternSet("^ABC$"));
}
TEST(pm_scan, many_matches)
{
EXPECT_EQ(
common_scan_test_single_pat(
"AAA",
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
),
getPatternSet("AAA")
);
}
TEST(pm_scan, long_pattern)
{
string long_str =
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ";
EXPECT_EQ(common_scan_test_single_pat(long_str, ".-= " + long_str + " =-."), getPatternSet(long_str));
}
TEST(pm_scan, very_long_pattern)
{
string abc = "abcdefghijklmnopqrstuvwxyz";
string very_long_str;
// We choose 3000 repeatitions, becuase this gives a total of 78K chars. If there's
// some unsigned short used internally ,we hope to overflow it.
for (int i = 0; i<3000; i++) {
very_long_str += abc;
}
string pattern = very_long_str;
// What if the PM internally truncated our very long pattern?
// Because it is cyclic, we might not catch it in the line above.
// So we ask it to find the pattern in a buffer containing almost the whole pattern, but not all of it.
string truncated_begin(pattern, 1, pattern.size() - 1);
string truncated_end(pattern, 0, pattern.size() - 1);
// We put a sepearator between them (which doesn't any char from the pattern), so there's no additional
// matches on buf_to_scan
const string seperator_str = "1234";
auto buf_to_scan = seperator_str+very_long_str+seperator_str+truncated_end+seperator_str+truncated_begin;
EXPECT_EQ(common_scan_test_single_pat(pattern, buf_to_scan), getPatternSet(pattern));
}
TEST(pm_scan, multiple_pats)
{
string buf = "KKKK ABCD AB AB ABC ABCD DCBA";
set<PMPattern> pats;
push_pat(pats, "ABCD");
push_pat(pats, "DCBA");
EXPECT_EQ(prepare_scan_and_compare(pats, buf), getPatternSet("ABCD", "DCBA"));
}
TEST(pm_scan, multiple_pats_with_overlap)
{
string buf = "KKKK ABCDCBA";
set<PMPattern> pats;
push_pat(pats, "ABCD");
push_pat(pats, "DCBA");
EXPECT_EQ(prepare_scan_and_compare(pats, buf), getPatternSet("ABCD", "DCBA"));
}
TEST(pm_scan, multiple_long_pats_with_overlap)
{
string buf = "KKKK ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUVWXYZ!";
set<PMPattern> pats;
push_pat(pats, "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
push_pat(pats, "ABCDEFGHIJKLMNOPQRSTUVWXYZ!");
EXPECT_EQ(
prepare_scan_and_compare(pats, buf),
getPatternSet("ABCDEFGHIJKLMNOPQRSTUVWXYZ", "ABCDEFGHIJKLMNOPQRSTUVWXYZ!")
);
}
TEST(pm_scan, many_pats)
{
string buf = "KKKK ABC1 asdasdf";
set<PMPattern> pats;
push_pat(pats, "ABC1");
push_pat(pats, "ABC2");
push_pat(pats, "ABC3");
push_pat(pats, "ABC4");
push_pat(pats, "ABC5");
push_pat(pats, "ABC6");
push_pat(pats, "ABC7");
push_pat(pats, "ABC8");
push_pat(pats, "asdasdf");
push_pat(pats, "zzxxdda");
push_pat(pats, "d1tt6335!!");
push_pat(pats, "zxcqwwrqwer!!");
push_pat(pats, "!sdazsd!");
EXPECT_EQ(prepare_scan_and_compare(pats, buf), getPatternSet("ABC1", "asdasdf"));
}
TEST(pm_scan, a_lot_of_pats)
{
string buf = "KKKK some_100_pat some_1000_pat";
set<PMPattern> pats;
for (uint i = 0; i<3000; i++) {
char temp_buf[100];
snprintf(temp_buf, sizeof(temp_buf), "some_%u_pat", i);
push_pat(pats, temp_buf);
}
EXPECT_EQ(prepare_scan_and_compare(pats, buf), getPatternSet("some_100_pat", "some_1000_pat"));
}
TEST(pm_scan, long_pat_prefix_followed_by_many_branches)
{
string buf = "some_long_prefix_a_pat some_long_prefix_z_pat some_long_prefix_a_pat";
set<PMPattern> pats;
for (u_char c = 'a'; c<='z'; c++) {
char temp_buf[100];
snprintf(temp_buf, sizeof(temp_buf), "some_long_prefix_%c_pat", c);
push_pat(pats, temp_buf);
}
EXPECT_EQ(prepare_scan_and_compare(pats, buf), getPatternSet("some_long_prefix_a_pat", "some_long_prefix_z_pat"));
}
TEST(pm_scan, identical_pats)
{
string buf = "KKKK 123 ---";
set<PMPattern> pats;
push_pat(pats, "123");
push_pat(pats, "123");
EXPECT_EQ(prepare_scan_and_compare(pats, buf), getPatternSet("123"));
}
TEST(pm_scan, multiple_scans_using_same_pm)
{
Buffer buf1("ABC 123 ABC");
Buffer buf2("^^^%%%!! 123 ABC");
set<PMPattern> pats;
push_pat(pats, "ABC");
push_pat(pats, "%%%");
PMHook pm;
ASSERT_TRUE(pm.prepare(pats).ok());
auto expected_matches1 = getPatternSet("ABC");
auto expected_matches2 = getPatternSet("ABC", "%%%");
EXPECT_EQ(pm.scanBuf(buf1), expected_matches1);
EXPECT_EQ(pm.scanBuf(buf2), expected_matches2);
EXPECT_EQ(pm.scanBuf(buf1), expected_matches1);
}
TEST(pm_scan, scan_with_offsets)
{
Buffer buf1("ABC");
Buffer buf2("EFG");
Buffer buf3 = buf1 + buf2 + buf1;
set<PMPattern> pats;
push_pat(pats, "ABC");
PMHook pm;
ASSERT_TRUE(pm.prepare(pats).ok());
set<pair<uint, PMPattern>> res;
res.emplace(2, PMHook::lineToPattern("ABC").unpackMove());
res.emplace(8, PMHook::lineToPattern("ABC").unpackMove());
EXPECT_THAT(pm.scanBufWithOffset(buf3), ContainerEq(res));
}
TEST(pm_scan, null_buf)
{
set<PMPattern> pats;
push_pat(pats, "ABCD");
PMHook pm;
ASSERT_TRUE(pm.prepare(pats).ok());
EXPECT_EQ(pm.scanBuf(Buffer("")), set<PMPattern>());
}
TEST(pm_scan, exit_on_no_prepare)
{
Buffer buf("blah");
cptestPrepareToDie();
PMHook pm;
EXPECT_DEATH(pm.scanBuf(buf), "Unusable Pattern Matcher");
}
TEST(pm_scan, prepare_fail_on_no_pats)
{
set<PMPattern> pats;
PMHook pm;
EXPECT_FALSE(pm.prepare(pats).ok());
}
TEST(pm_scan, pm_offsets_test_multiple_matches)
{
PMHook pm;
set<PMPattern> initPatts;
initPatts.insert(PMPattern("he", false, false));
initPatts.insert(PMPattern("ex", false, false));
initPatts.insert(PMPattern("hex", false, false, 2));
initPatts.insert(PMPattern("(", false, false, 5));
initPatts.insert(PMPattern(")", false, false, 7));
ASSERT_TRUE(pm.prepare(initPatts).ok());
Buffer buf("hex()");
std::set<std::pair<uint, PMPattern>> results = pm.scanBufWithOffset(buf);
std::set<std::pair<uint, PMPattern>> expected{
{1, {"he", false, false, 0}},
{2, {"ex", false, false, 0}},
{2, {"hex", false, false, 2}},
{3, {"(", false, false, 5}},
{4, {")", false, false, 7}}
};
EXPECT_EQ(results, expected);
}
TEST(pm_scan, pm_offsets_test_one_char_match)
{
PMHook pm;
set<PMPattern> initPatts;
initPatts.insert(PMPattern("/", false, false));
ASSERT_TRUE(pm.prepare(initPatts).ok());
Buffer buf("/");
std::set<std::pair<uint, PMPattern>> results = pm.scanBufWithOffset(buf);
std::set<std::pair<uint, PMPattern>> expected{
{0, {"/", false, false, 0}}
};
EXPECT_EQ(results, expected);
}
TEST(pm_scan, pm_offsets_test_one_char_at_end_match)
{
PMHook pm;
set<PMPattern> initPatts;
initPatts.insert(PMPattern("/", false, false));
ASSERT_TRUE(pm.prepare(initPatts).ok());
Buffer buf("abc/");
std::set<std::pair<uint, PMPattern>> results = pm.scanBufWithOffset(buf);
std::set<std::pair<uint, PMPattern>> expected{
{3, {"/", false, false, 0}}
};
EXPECT_EQ(results, expected);
}
TEST(pm_scan, pm_offsets_test_one_char_at_start_match)
{
PMHook pm;
set<PMPattern> initPatts;
initPatts.insert(PMPattern("/", false, false));
ASSERT_TRUE(pm.prepare(initPatts).ok());
Buffer buf("/abc");
std::set<std::pair<uint, PMPattern>> results = pm.scanBufWithOffset(buf);
std::set<std::pair<uint, PMPattern>> expected{
{0, {"/", false, false, 0}}
};
EXPECT_EQ(results, expected);
}
TEST(pm_scan, pm_offsets_test_word_full_match)
{
PMHook pm;
set<PMPattern> initPatts;
initPatts.insert(PMPattern("abc", false, false));
ASSERT_TRUE(pm.prepare(initPatts).ok());
Buffer buf("abc");
std::set<std::pair<uint, PMPattern>> results = pm.scanBufWithOffset(buf);
std::set<std::pair<uint, PMPattern>> expected{
{2, {"abc", false, false, 0}}
};
EXPECT_EQ(results, expected);
}
TEST(pm_scan, pm_offsets_test_word_at_start_match)
{
PMHook pm;
set<PMPattern> initPatts;
initPatts.insert(PMPattern("application", false, false));
ASSERT_TRUE(pm.prepare(initPatts).ok());
Buffer buf("application/x-www-form-urlencoded");
std::set<std::pair<uint, PMPattern>> results = pm.scanBufWithOffset(buf);
std::set<std::pair<uint, PMPattern>> expected{
{10, {"application", false, false, 0}}
};
EXPECT_EQ(results, expected);
}
TEST(pm_scan, pm_offsets_test_word_at_end_match)
{
PMHook pm;
set<PMPattern> initPatts;
initPatts.insert(PMPattern("x-www-form-urlencoded", false, false));
ASSERT_TRUE(pm.prepare(initPatts).ok());
Buffer buf("application/x-www-form-urlencoded");
std::set<std::pair<uint, PMPattern>> results = pm.scanBufWithOffset(buf);
std::set<std::pair<uint, PMPattern>> expected{
{32, {"x-www-form-urlencoded", false, false, 0}}
};
EXPECT_EQ(results, expected);
}
TEST(pm_scan, pm_offsets_test_pat_getIndex_method)
{
set<PMPattern> initPatts;
initPatts.insert(PMPattern("ABC", false, false)); // initialized with the default index 0
initPatts.insert(PMPattern("ABCD", false, false, 4));
initPatts.insert(PMPattern("CDE", false, false, 7));
PMHook pm;
EXPECT_TRUE(pm.prepare(initPatts).ok());
Buffer buf("12345ABCDEF5678");
std::set<std::pair<uint, PMPattern>> results = pm.scanBufWithOffset(buf);
std::set<std::pair<uint, PMPattern>> expected{
{7, {"ABC", false, false, 0}},
{8, {"ABCD", false, false, 4}},
{9, {"CDE", false, false, 7}}
};
EXPECT_EQ(results, expected);
}
TEST(pm_scan, pm_offsets_lambda_test_pat_getIndex_method)
{
set<PMPattern> initPatts;
initPatts.insert(PMPattern("ABC", false, false)); // initialized with the default index 0
initPatts.insert(PMPattern("ABCD", false, false, 4));
initPatts.insert(PMPattern("CDE", false, false, 7));
PMHook pm;
EXPECT_TRUE(pm.prepare(initPatts).ok());
Buffer buf("12345ABCDEF5678");
std::vector<std::pair<u_int, PMPattern>> results;
pm.scanBufWithOffsetLambda(buf, [&] (uint offset, const PMPattern &pat) { results.emplace_back(offset, pat); });
std::vector<std::pair<uint, PMPattern>> expected{
{7, {"ABC", false, false, 0}},
{8, {"ABCD", false, false, 4}},
{9, {"CDE", false, false, 7}}
};
EXPECT_EQ(results, expected);
}