// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved. // Licensed under the Apache License, Version 2.0 (the "License"); // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include "pm_adaptor.h" #include "kiss_hash.h" #include "kiss_thin_nfa_impl.h" #include "kiss_patterns.h" // Flag for a Thin NFA state typedef enum { THIN_NFA_STATE_FULL = 0x01, // We want a full state table for this state THIN_NFA_STATE_MATCH = 0x02, // A matching state THIN_NFA_STATE_ROOT = 0x04, // The root or anchored root state THIN_NFA_STATE_MAX_IDENTICAL_CHAR = 0x08, // Maximal sequence of identical characters THIN_NFA_STATE_ANCHORED = 0x10, // A part of the anchored tree THIN_NFA_STATE_BUILT_TABLE = 0x20, // Already built the BNFA transition table THIN_NFA_STATE_REACH_FROM_FULL = 0x40, // The state is reachable from full state } nfa_thin_state_flags_t; // A Thin NFA state, or a node in the trie, during compilation time struct kiss_thin_nfa_state_s { u_int state_id; // Sequencial number, starting from 0 nfa_thin_state_flags_t flags; u_int depth; // Level in the trie kiss_thin_nfa_pattern_list_t *ids; // For finite state, patterns associated with it struct kiss_thin_nfa_state_s *bfs_q; // Use for a BFS iteration on the trie struct thin_nfa_comp_s *comp; // Saves passing this pointer around // Outgoing transitions struct kiss_thin_nfa_state_s *child; // First child of this state u_int num_trans; // Number of transitions struct kiss_thin_nfa_state_s *fail_state; // Incoming transition struct kiss_thin_nfa_state_s *sibling; // Next child of this state's father u_char tran_char; // The character that takes us to this state // BNFA offset kiss_bnfa_offset_t bnfa_offset; // Where the real state is kiss_bnfa_offset_t bnfa_incoming_off; // Where incoming transitions should jump (possibly a match state) // DEBUG ONLY const u_char *pattern_text; // Points into the user's pattern list. Not null terminated }; typedef struct kiss_thin_nfa_state_s kiss_thin_nfa_state_t; // Blocks to hold states . A pretty simple pool mechanism. // Not very much needed. We currently use it to iterate states by ID order, and for state pointer validation. #define MAX_THIN_NFA_STATES_BLOCKS 1000 #define KISS_NFA_MAX_STATES_PER_BLOCK 1000 #define KISS_NFA_MAX_STATES_BLOCK_SIZE (KISS_NFA_MAX_STATES_PER_BLOCK * sizeof(kiss_thin_nfa_state_t)) // When do we want a full state? In the first X tiers (root included) and if more than Y transitions. u_int kiss_thin_nfa_full_tiers_num_small = 2; // Old values, for PMs which must remain small u_int kiss_thin_nfa_full_tiers_num_medium = 3; // Used for VSX / 32bit kernel, where memory is expensive u_int kiss_thin_nfa_full_tiers_num = 7; // New value u_int kiss_thin_nfa_max_partial_trans = 15; // Can't exceed KISS_BNFA_MAX_TRANS_NUM anyway u_int kiss_thin_nfa_optimize_contig_chars = 1; // Character translation table for caseless/digitless comparisons. // // The idea: // Each character has a canonic character. This can be itself, or another. // In a caseless Thin NFA, 'a' and '7' are canonic themselves, 'B' has canonic character 'b'. // In a digitless Thin NFA, '7' is not canonic - its canonic character is '0'. // Each character is also a member of a group, containing all characters with the same canonic character. // In a caseless Thin NFA, 'a' and 'A' are in one group. // In a digitless Thin NFA, all digits are in one group. // Notice that a single Thin NFA can be caseless, digitless, neither or both. // // The data structure: // tab - Translates each character into its canonic characer (possibly itself). // rev - A linked list of characters belonging to the same group. The character itself // is used instead of a pointer. The last character in the group points to the first. // Example: For a caseless Thin NFA, rev['a']='A' and rev['A']='a'. struct thin_nfa_char_trans_tab_s { u_char tab[KISS_PM_ALPHABET_SIZE]; u_char rev[KISS_PM_ALPHABET_SIZE]; }; // Flags for an entire Thin NFA during compilation typedef enum { THIN_NFA_FAIL_STATES_CALCULATED = 0x01, // Once we set this, we expect all states to have fail states. THIN_NFA_ENABLE_ANCHOR_OPT = 0x02, // Enable optimization for anochored states THIN_NFA_USE_RECURSIVE_COMPILE = 0x04, // Build full states recursively. Faster, unsuitable for kernel } thin_nfa_comp_flags_e; // A Thin NFA which is under construction. The compiled BNFA is constructed from this later. struct thin_nfa_comp_s { kiss_thin_nfa_state_t *root_state; // The root state (somewhere inside state_blocks) kiss_thin_nfa_state_t *anchored_root_state; // The root for anchored patterns u_int full_state_tier_num; // How many tiers would be full states? u_int state_num; // How many states do we have so far u_int match_state_num; // How many matching states do we have? u_int full_state_num; // How many full states do we have? KissPMError *error; // Error to be returned to the user. thin_nfa_comp_flags_e flags; struct thin_nfa_char_trans_tab_s *xlation_tab; // Caseless/digitless translation table kiss_thin_nfa_state_t *state_blocks[MAX_THIN_NFA_STATES_BLOCKS]; // Dynamically allocated memory for states std::unique_ptr runtime_nfa; // The final NFA we're building kiss_hash_t patterns_hash; // Pattern array to offset mapping kiss_bnfa_offset_t min_bnfa_off, max_bnfa_off; }; #if defined(DEBUG) #define KISS_THIN_NFA_DO_VERIFICATIONS #endif #define MAX_STATE_NAME_LEN 100 #define MAX_STATE_NAME_BUFS 4 static kiss_thin_nfa_state_t * kiss_thin_nfa_get_state_by_id(struct thin_nfa_comp_s *nfa_comp, u_int state_id, const char *caller) { u_int block_index; u_int index_in_block; kiss_thin_nfa_state_t *block; // Find the block and the place in the block block_index = state_id / KISS_NFA_MAX_STATES_PER_BLOCK; index_in_block = state_id % KISS_NFA_MAX_STATES_PER_BLOCK; if (block_index >= MAX_THIN_NFA_STATES_BLOCKS) { thinnfa_debug_critical(("%s: State %d - invalid block index %d (max %d)\n", caller, state_id, block_index, MAX_THIN_NFA_STATES_BLOCKS)); return NULL; } block = nfa_comp->state_blocks[block_index]; if (block == NULL) { thinnfa_debug_critical(( "%s: State %d - block index %d is not allocated yet\n", caller, state_id, block_index )); return NULL; } return &block[index_in_block]; } // DEBUG FUNCTION - return a printable name for the state, in a static buffer. // Accepts a NULL state. static const char * state_name(const kiss_thin_nfa_state_t *state) { static char buffers[MAX_STATE_NAME_BUFS][MAX_STATE_NAME_LEN]; static u_int next_buf = 0; u_int cur_buf; char *name, *p; if (!state) { // Happens when printing the root's fail state return "NULL/-1"; } // What's a state's name? // Each state represents a prefix of one or more patterns. This prefix is the natural name for the state. // We have the pattern text on the state. Its depth tells us how much of it do we need. // We add the state ID as a suffix, to prevent ambiguituies (particularly for unprintable characters). //Choose a buffer to use. Allows calling several times under a single debug message. cur_buf = next_buf; if (cur_buf >= MAX_STATE_NAME_BUFS) cur_buf = 0; next_buf = cur_buf + 1; name = buffers[cur_buf]; p = name; if (state->flags & THIN_NFA_STATE_ANCHORED) { // Prefix for anchored states *p = '^'; p++; } // Fill in the state name. Not null-terminated meanwhile. if (state->pattern_text == NULL) { const char *state_name; // Only the root makes sense. But deal with a missing pattern text anyway state_name = (state->flags & THIN_NFA_STATE_ROOT) ? "ROOT" : "INVALID"; strcpy(p, state_name); p += strlen(state_name); } else { u_int i; // Normal state - use the relevant prefix of the pattern text for (i=0; (idepth) && (ppattern_text[i]) ? state->pattern_text[i] : '.'; p++; } } // Append the state ID. Removes ambituities (e.g. for unprintable characters) snprintf(p, MAX_STATE_NAME_LEN-(p-name), "/%u", state->state_id); name[MAX_STATE_NAME_LEN-1] = '\0'; return name; } #if defined(KISS_THIN_NFA_DO_VERIFICATIONS) // DEBUG FUNCTION - Verify that a state pointer points to a valid state static BOOL is_valid_state_ptr(struct thin_nfa_comp_s *nfa_comp, kiss_thin_nfa_state_t *state, const char *caller) { kiss_thin_nfa_state_t *state_by_id; if (!state) { thinnfa_debug_critical(("%s: Null state pointer\n", caller)); return FALSE; } state_by_id = kiss_thin_nfa_get_state_by_id(nfa_comp, state->state_id, caller); if (!state_by_id) { return FALSE; } // Is the state where we expect it to be in the block? if (state != state_by_id) { thinnfa_debug_critical(("%s: State %p ID %d is invalid - should be at %p\n", caller, state, state->state_id, state_by_id)); return FALSE; } return TRUE; } // DEBUG FUNCTION - Verify a state's transition table static void verify_state_ex(struct thin_nfa_comp_s *nfa_comp, kiss_thin_nfa_state_t *state, const char *caller) { kiss_thin_nfa_state_t *child, *prev_child; u_int actual_tran_num; // Is the pointer itself OK? KISS_ASSERT(is_valid_state_ptr(nfa_comp, state, caller), ("%s: Invalid state pointer %p\n", caller, state)); // Go over the transition table actual_tran_num = 0; prev_child = NULL; for (child = state->child; child != NULL; child = child->sibling) { // Valid pointer? KISS_ASSERT(is_valid_state_ptr(nfa_comp, child, caller), ("%s: State %s(%p) contains an invalid child %p after %02x\n", caller, state_name(state), state, child, prev_child ? prev_child->tran_char : 0)); // Sorted in ascending order? KISS_ASSERT(!prev_child || prev_child->tran_char < child->tran_char, ("%s: State %s(%p) transition %02x -> %s after %02x -> %s\n", caller, state_name(state), state, child->tran_char, state_name(child), prev_child->tran_char, state_name(prev_child))); actual_tran_num++; if (actual_tran_num > state->num_trans) { // We may be looping break; } prev_child = child; } // Counter matches list? KISS_ASSERT(actual_tran_num == state->num_trans, ("%s: State %s(%p) has %d transitions, but it should have %d\n", caller, state_name(state), state, actual_tran_num, state->num_trans)); // Fail state? if (nfa_comp->flags & THIN_NFA_FAIL_STATES_CALCULATED) { if (state->fail_state == NULL) { KISS_ASSERT(state == nfa_comp->root_state, ("%s: State %s has no fail state, but it is not root", caller, state_name(state))); } else { KISS_ASSERT( is_valid_state_ptr(nfa_comp, state->fail_state, caller), "%s: State %s has an invalid fail state %p\n", caller, state_name(state), state->fail_state ); } } } // Use this for sanity test on a state #define verify_state(nfa_comp, state) verify_state_ex(nfa_comp, state, FILE_LINE) #else // KISS_THIN_NFA_DO_VERIFICATIONS // Verifications disabled #define verify_state(nfa_comp, state) #endif // KISS_THIN_NFA_DO_VERIFICATIONS // Mark that a state needs to be full static void make_state_full(kiss_thin_nfa_state_t *state) { if (state->flags & THIN_NFA_STATE_FULL) return; ENUM_SET_FLAG(state->flags, THIN_NFA_STATE_FULL); state->comp->full_state_num++; } // Mark that a state is matching static void make_state_matching(kiss_thin_nfa_state_t *state) { if (state->flags & THIN_NFA_STATE_MATCH) return; ENUM_SET_FLAG(state->flags, THIN_NFA_STATE_MATCH); state->comp->match_state_num++; } // Allocate an empty state on an NFA. // Initializes all fields to defaults. static kiss_thin_nfa_state_t * kiss_thin_nfa_state_create( struct thin_nfa_comp_s *nfa_comp, u_int depth, const u_char *pattern_text, nfa_thin_state_flags_t flags ) { static const char rname[] = "kiss_thin_nfa_state_create"; u_int state_id; u_int block_index; u_int index_in_block; kiss_thin_nfa_state_t *block; kiss_thin_nfa_state_t *state; // Find the next ID and the block it should be in state_id = nfa_comp->state_num; block_index = state_id / KISS_NFA_MAX_STATES_PER_BLOCK; index_in_block = state_id % KISS_NFA_MAX_STATES_PER_BLOCK; thinnfa_debug_extended(("%s: Adding state %d depth %d\n", rname, state_id, depth)); // No more possible blocks? if (block_index >= MAX_THIN_NFA_STATES_BLOCKS) { thinnfa_debug_err(("%s: State %d in block %d exceeds the limit %d\n", rname, state_id, block_index, MAX_THIN_NFA_STATES_BLOCKS)); return NULL; } // Allocate the block if needed (first state in the block) block = nfa_comp->state_blocks[block_index]; if (block == NULL) { block = (kiss_thin_nfa_state_t *)fw_kmalloc_ex(KISS_NFA_MAX_STATES_BLOCK_SIZE, rname, FW_KMEM_SLEEP); if (block == NULL) { thinnfa_debug_err(("%s: Failed to allocate a state block size %lu for the state %u\n", rname, KISS_NFA_MAX_STATES_BLOCK_SIZE, state_id)); return NULL; } nfa_comp->state_blocks[block_index] = block; } // Initialize the state state = &block[index_in_block]; state->state_id = state_id; state->flags = flags; state->ids = NULL; state->bfs_q = NULL; state->child = NULL; state->num_trans = 0; state->fail_state = NULL; state->sibling = NULL; state->tran_char = '\0'; // Will be modified, except for the root state->pattern_text = pattern_text; state->depth = depth; state->comp = nfa_comp; state->bnfa_offset = KISS_BNFA_OFFSET_INVALID; state->bnfa_incoming_off = KISS_BNFA_OFFSET_INVALID; // Do we want a full state? kiss_thin_nfa_full_tiers_num=2 means tiers 0 and 1, i.e. the root plus one, are full. if (state->flags & THIN_NFA_STATE_ROOT) { // The root must be full, because it has no fail state. // The anchored root (if exists) is the first state, and must be full, for the bnfa_full_state_size // condition to work. make_state_full(state); } else if (depth < nfa_comp->full_state_tier_num && !(state->flags & THIN_NFA_STATE_ANCHORED)) { make_state_full(state); } // Advance the counter nfa_comp->state_num++; return state; } // Release all resources on a state structure. // Doesn't release the states, because it's part of a state block. static void kiss_thin_nfa_state_free(kiss_thin_nfa_state_t *state) { // Clean up the pattern list if (state->ids) { kiss_thin_nfa_free_pattern_ids(state->ids); state->ids = NULL; } return; } // Returns the following state, by ID order. // With prev==NULL, returns the first state. // With prev!=NULL, returns the next. // If prev is the last state, returns NULL. static kiss_thin_nfa_state_t * kiss_thin_nfa_get_subsequent_state(struct thin_nfa_comp_s *nfa_comp, kiss_thin_nfa_state_t *prev) { static const char rname[] = "kiss_thin_nfa_get_subsequent_state"; u_int state_id; // Find the next state's ID state_id = prev ? prev->state_id + 1 : 0; if (state_id >= nfa_comp->state_num) { // prev was the last state. return NULL; } // Get the state pointer return kiss_thin_nfa_get_state_by_id(nfa_comp, state_id, rname); } // Find the transition for a given character from a given state. // If no transition found, returns NULL and does not check the fail state. static kiss_thin_nfa_state_t * kiss_thin_nfa_comp_get_next_state(kiss_thin_nfa_state_t *state, u_char ch) { static const char rname[] = "kiss_thin_nfa_comp_get_next_state"; kiss_thin_nfa_state_t *child; verify_state(state->comp, state); // Find the child in the list for (child = state->child; child != NULL; child = child->sibling) { u_char tran_ch = child->tran_char; if (tran_ch == ch) { thinnfa_debug_extended(( "%s: Found transition from the state %s by 0x%02x to %s\n", rname, state_name(state), ch, state_name(child) )); return child; } // The list is sorted, so we don't need to look beyond the character. if (tran_ch > ch) break; } thinnfa_debug_extended(("%s: No transition from the state %s by 0x%02x\n", rname, state_name(state), ch)); return NULL; } // Mark a state as finite and accepting a given kiss_thin_nfa_pattern_t pattern static kiss_ret_val kiss_thin_nfa_state_set_match(kiss_thin_nfa_state_t *state, const kiss_thin_nfa_pattern_t *pat_info) { static const char rname[] = "kiss_thin_nfa_state_set_match"; verify_state(state->comp, state); // Add the pattern to this state's pattern list if (kiss_thin_nfa_add_pattern_id(&(state->ids), pat_info) != KISS_OK) { thinnfa_debug_err(( "%s: Could not add the 'pattern_id' %d to the final state %s\n", rname, pat_info->id, state_name(state) )); return KISS_ERROR; } thinnfa_debug(( "Setting state %s as the matching state for the 'pattern_id' %d\n", state_name(state), pat_info->id )); make_state_matching(state); return KISS_OK; } // Mark a state as finite, and accepting a given kiss_pmglob_string pattern static kiss_ret_val kiss_thin_nfa_state_set_match_pattern(kiss_thin_nfa_state_t *state, const kiss_pmglob_string_s *pattern) { kiss_thin_nfa_pattern_t pat_info; pat_info.id = kiss_pmglob_string_get_id(pattern); pat_info.pattern_id_flags = kiss_pmglob_string_get_flags(pattern); pat_info.len = kiss_pmglob_string_get_size(pattern); return kiss_thin_nfa_state_set_match(state, &pat_info); } // Copy the list of accepted patterns from one state to another. // The destination state can already have patterns, and the lists would be concatenated. static kiss_ret_val kiss_thin_nfa_state_copy_match_ids(kiss_thin_nfa_state_t *dst, kiss_thin_nfa_state_t *src) { static const char rname[] = "kiss_thin_nfa_state_copy_match_ids"; kiss_thin_nfa_pattern_list_t *curr_id; verify_state(src->comp, src); verify_state(dst->comp, dst); thinnfa_debug(("%s: Copying the match IDs from %s to %s\n", rname, state_name(src), state_name(dst))); // traversing on the state_src 'ids' adding each one to 'state_dst' list for(curr_id = src->ids; curr_id; curr_id = curr_id->next) { if (kiss_thin_nfa_state_set_match(dst, &curr_id->pattern) != KISS_OK) { thinnfa_debug_err(( "%s: Failed to set the ID %d on the state %s\n", rname, curr_id->pattern.id, state_name(dst) )); // NOTE: We don't release the IDs we have added. Compilation will fail and clean up anyway. return KISS_ERROR; } } return KISS_OK; } // Destroy the NFA we're compiling static void kiss_thin_nfa_comp_destroy(struct thin_nfa_comp_s *nfa_comp) { static const char rname[] = "kiss_thin_nfa_comp_destroy"; u_int i; kiss_thin_nfa_state_t *state; thinnfa_debug_major(("%s: Destroying the compilation information structure\n", rname)); // Cleanup whatever data we have on the states. for (state = nfa_comp->root_state; state != NULL; state = kiss_thin_nfa_get_subsequent_state(nfa_comp, state)) { kiss_thin_nfa_state_free(state); } // Free the state blocks and transition blocks for (i = 0; i < MAX_THIN_NFA_STATES_BLOCKS; i++) { if (nfa_comp->state_blocks[i] != NULL) { fw_kfree(nfa_comp->state_blocks[i], KISS_NFA_MAX_STATES_BLOCK_SIZE, rname); nfa_comp->state_blocks[i] = NULL; } } if (nfa_comp->xlation_tab!= NULL) { fw_kfree(nfa_comp->xlation_tab, sizeof(*(nfa_comp->xlation_tab)), rname); nfa_comp->xlation_tab= NULL; } nfa_comp->runtime_nfa.reset(nullptr); if (nfa_comp->patterns_hash) { kiss_hash_destroy(nfa_comp->patterns_hash); nfa_comp->patterns_hash = NULL; } fw_kfree(nfa_comp, sizeof(*nfa_comp), rname); } // Allocate an empty thin NFA compilation data structure. static struct thin_nfa_comp_s * kiss_thin_nfa_comp_create(KissPMError *error) { static const char rname[] = "kiss_thin_nfa_comp_create"; struct thin_nfa_comp_s *nfa_comp = NULL; thinnfa_debug_major(("%s: Allocating the compilation information structure\n", rname)); // Allocate and initialize the compilation temporary structure nfa_comp = (struct thin_nfa_comp_s *)fw_kmalloc(sizeof(*nfa_comp), rname); if (!nfa_comp) { thinnfa_debug_err(("%s: Failed to allocate 'nfa_comp'\n", rname)); goto failure; } bzero((void *)nfa_comp, sizeof(*nfa_comp)); nfa_comp->error = error; // Build the root state nfa_comp->root_state = kiss_thin_nfa_state_create(nfa_comp, 0, NULL, THIN_NFA_STATE_ROOT); if (nfa_comp->root_state == NULL) { thinnfa_debug_err(("%s: Failed to create the root state\n", rname)); goto failure; } return nfa_comp; failure: if (nfa_comp != NULL) { kiss_thin_nfa_comp_destroy(nfa_comp); } return NULL; } // Specify the error for failed Thin NFA compilation static void kiss_thin_nfa_set_comp_error(struct thin_nfa_comp_s *nfa_comp, const char *err_text) { // We always use "internal", which is appropriate for both logical errors and resource shortage. // We don't specify a pattern, because nothing is really pattern specific. kiss_pm_error_set_details(nfa_comp->error, KISS_PM_ERROR_INTERNAL, err_text); } // Initialize a translation table for caseless/digitless comparison. // According to compilation flags, builds a table to translate each character. static kiss_ret_val kiss_thin_nfa_create_xlation_tab(struct thin_nfa_comp_s *nfa_comp, int pm_comp_flags) { static const char rname[] = "kiss_thin_nfa_create_xlation_tab"; enum kiss_pmglob_char_xlation_flags_e xlation_flags; // Figure out which translations we need xlation_flags = KISS_PMGLOB_CHAR_XLATION_NONE; if (pm_comp_flags & KISS_PM_COMP_CASELESS) { ENUM_SET_FLAG(xlation_flags, KISS_PMGLOB_CHAR_XLATION_CASE); } if (pm_comp_flags & KISS_PM_COMP_DIGITLESS) { ENUM_SET_FLAG(xlation_flags, KISS_PMGLOB_CHAR_XLATION_DIGITS); } if (xlation_flags == KISS_PMGLOB_CHAR_XLATION_NONE) { // No translation needed nfa_comp->xlation_tab = NULL; return KISS_OK; } thinnfa_debug_major(("%s: Using%s%s translation table\n", rname, (xlation_flags&KISS_PMGLOB_CHAR_XLATION_CASE) ? " caseless" : "", (xlation_flags&KISS_PMGLOB_CHAR_XLATION_DIGITS) ? " digitless" : "")); // Allocate a translation table nfa_comp->xlation_tab = (struct thin_nfa_char_trans_tab_s *)fw_kmalloc(sizeof(*(nfa_comp->xlation_tab)), rname); if (!nfa_comp->xlation_tab) { thinnfa_debug_err(("%s: Failed to allocate the translation table\n", rname)); return KISS_ERROR; } // Build the mapping - normal and reverse kiss_pmglob_char_xlation_build(xlation_flags, nfa_comp->xlation_tab->tab); kiss_pmglob_char_xlation_build_reverse(nfa_comp->xlation_tab->tab, nfa_comp->xlation_tab->rev); return KISS_OK; } // Translate a character to canonic form, if a translation table is defined. static CP_INLINE u_char kiss_thin_nfa_xlate_char(struct thin_nfa_comp_s *nfa_comp, u_char ch) { if (!nfa_comp->xlation_tab) return ch; return nfa_comp->xlation_tab->tab[ch]; } #if defined(KISS_THIN_NFA_DO_VERIFICATIONS) && !defined(KERNEL) // DEBUG FUNCTION - uses a simple&slow algorithm to verify the result of kiss_thin_nfa_are_trans_contained. // Can't run in the kernel because of the large stack consumption. static void verify_trans_contains_( kiss_thin_nfa_state_t *state_contains, kiss_thin_nfa_state_t *state_included, BOOL should_contain ) { kiss_thin_nfa_state_t *trans_contains[KISS_PM_ALPHABET_SIZE]; kiss_thin_nfa_state_t *trans_included[KISS_PM_ALPHABET_SIZE]; kiss_thin_nfa_state_t *child; u_int i; int mismatch_pos; // Fill in both transition tables bzero(trans_contains, sizeof(trans_contains)); for (child = state_contains->child; child != NULL; child = child->sibling) { trans_contains[child->tran_char] = child; } bzero(trans_included, sizeof(trans_included)); for (child = state_included->child; child != NULL; child = child->sibling) { trans_included[child->tran_char] = child; } // Go over the table, looking for a character that's in "included" but not in "contains". mismatch_pos = -1; for (i=0; i %s), but the kiss_thin_nfa_are_trans_contained says it does", state_name(state_contains), state_name(state_included), (u_char)mismatch_pos, state_name(trans_included[i])) ); } } #define verify_trans_contains(state_contains, state_included, expected) \ verify_trans_contains_(state_contains, state_included, expected) #else // KISS_THIN_NFA_DO_VERIFICATIONS #define verify_trans_contains(state_contains, state_included, expected) #endif // KISS_THIN_NFA_DO_VERIFICATIONS // Do all transactions of "included" also exist in "contains"? static BOOL kiss_thin_nfa_are_trans_contained(kiss_thin_nfa_state_t *state_contains, kiss_thin_nfa_state_t *state_included) { kiss_thin_nfa_state_t *included_child, *contains_child; verify_state(state_contains->comp, state_contains); verify_state(state_included->comp, state_included); if (state_contains->num_trans < state_included->num_trans) { // "contains" has fewer states - it can't include all "included" verify_trans_contains(state_contains, state_included, FALSE); return FALSE; } // Advance both included_child and contains_child, to iterate both transition tables. // Keep them in sync - included_child passes children one by one, and contains_child is advanced // to the same transition character at each step. contains_child = state_contains->child; // Go over the transitions in "included", see if they're in "contained" for (included_child = state_included->child; included_child != NULL; included_child = included_child->sibling) { // Advance "tran_contains" until we reach the character we want for (; contains_child != NULL; contains_child = contains_child->sibling) { if (contains_child->tran_char >= included_child->tran_char) break; } // Do we have this character in "contains"? if (contains_child == NULL || contains_child->tran_char != included_child->tran_char) { // This character doesn't exist in state_contains verify_trans_contains(state_contains, state_included, FALSE); return FALSE; } } verify_trans_contains(state_contains, state_included, TRUE); return TRUE; } // Get the root state, or the anchored root state, as appropriate for the pattern. static kiss_thin_nfa_state_t * kiss_thin_nfa_get_root_state(struct thin_nfa_comp_s *nfa_comp, int anchored) { static const char rname[] = "kiss_thin_nfa_get_root_state"; if (!anchored || !(nfa_comp->flags & THIN_NFA_ENABLE_ANCHOR_OPT)) { thinnfa_debug(("%s: Using normal root: %s, feature %s\n", rname, anchored?"anchored":"not anchored", (nfa_comp->flags & THIN_NFA_ENABLE_ANCHOR_OPT)?"enabled":"disabled")); return nfa_comp->root_state; } if (!nfa_comp->anchored_root_state) { // Lazy creation of the anchored root state nfa_thin_state_flags_t flags = THIN_NFA_STATE_ROOT; ENUM_SET_FLAG(flags, THIN_NFA_STATE_ANCHORED); thinnfa_debug(("%s: Creating a new anchored root\n", rname)); nfa_comp->anchored_root_state = kiss_thin_nfa_state_create(nfa_comp, 0, NULL, flags); if (nfa_comp->anchored_root_state == NULL) { thinnfa_debug_err(("%s: Failed to create the anchored root state\n", rname)); return NULL; } } thinnfa_debug(("%s: Returning the anchored root (%d)\n", rname, nfa_comp->anchored_root_state->state_id)); return nfa_comp->anchored_root_state; } // Find the state in the trie, which represents the longest prefix of a given string. static kiss_thin_nfa_state_t * kiss_thin_nfa_find_longest_prefix(struct thin_nfa_comp_s *nfa_comp, const u_char *text, u_int len, int anchored) { u_int offset; kiss_thin_nfa_state_t *state; // Following the path labeled by chars in 'pattern' (skip the states which already exist) state = kiss_thin_nfa_get_root_state(nfa_comp, anchored); if (!state) return NULL; for (offset = 0; offset < len; offset++) { kiss_thin_nfa_state_t *next_state; u_char ch = kiss_thin_nfa_xlate_char(nfa_comp, text[offset]); verify_state(nfa_comp, state); // Do we have a node for the next character? next_state = kiss_thin_nfa_comp_get_next_state(state, ch); if (next_state == NULL) { // No next state - this is as far as we go break; } else { state = next_state; } } return state; } // Add a newly allocated state to the trie. Keep the transition list sorted. static void kiss_thin_nfa_add_transition(kiss_thin_nfa_state_t *parent, u_char tran_char, kiss_thin_nfa_state_t *new_child) { static const char rname[] = "kiss_thin_nfa_add_transition"; kiss_thin_nfa_state_t **child_p; // Go over existing children and find the place to add the transition for (child_p = &parent->child; *child_p != NULL; child_p = &(*child_p)->sibling) { kiss_thin_nfa_state_t *child = *child_p; if (child->tran_char > tran_char) { // Add before this one break; } } // Add the transition new_child->sibling = *child_p; *child_p = new_child; new_child->tran_char = tran_char; parent->num_trans++; thinnfa_debug_extended(("%s: Added transition from %s by 0x%2x to %s\n", rname, state_name(parent), tran_char, state_name(new_child))); if (parent->num_trans > MIN(kiss_thin_nfa_max_partial_trans, KISS_BNFA_MAX_TRANS_NUM)) { thinnfa_debug(( "%s: State %s has %d transitions - making it full\n", rname, state_name(parent), parent->num_trans )); make_state_full(parent); } // Track states which represent a maximal sequence of identical characters if ((parent->flags & THIN_NFA_STATE_ROOT) && !(parent->flags & THIN_NFA_STATE_ANCHORED)) { // Single character - all characters are identical ENUM_SET_FLAG(new_child->flags, THIN_NFA_STATE_MAX_IDENTICAL_CHAR); } else if ((parent->flags & THIN_NFA_STATE_MAX_IDENTICAL_CHAR) && (parent->tran_char == tran_char)) { // The child, not the parent, is now the longest ENUM_UNSET_FLAG(parent->flags, THIN_NFA_STATE_MAX_IDENTICAL_CHAR); ENUM_SET_FLAG(new_child->flags, THIN_NFA_STATE_MAX_IDENTICAL_CHAR); } } // Add a pattern to the trie, which would generate the Thin NFA. // Upon failure, doesn't clean up states it may have created. Will be cleaned up when destroying nfa_comp. static kiss_ret_val kiss_thin_nfa_add_pattern_to_trie(struct thin_nfa_comp_s *nfa_comp, const kiss_pmglob_string_s *sm_cur_pattern) { static const char rname[] = "kiss_thin_nfa_add_pattern_to_trie"; const u_char *pattern_text; u_int pattern_len; u_int i; kiss_thin_nfa_state_t *current_state; int anchored_pattern; pattern_text = kiss_pmglob_string_get_pattern(sm_cur_pattern); pattern_len = kiss_pmglob_string_get_size(sm_cur_pattern); anchored_pattern = kiss_pmglob_string_get_flags(sm_cur_pattern) & KISS_PM_LSS_AT_BUF_START; thinnfa_debug(("%s: Adding the pattern: %s flags=%x\n", rname, kiss_pmglob_string_to_debug_charp(sm_cur_pattern), kiss_pmglob_string_get_flags(sm_cur_pattern))); // How much of this pattern do we already have in the tree? current_state = kiss_thin_nfa_find_longest_prefix(nfa_comp, pattern_text, pattern_len, anchored_pattern); if (!current_state) return KISS_ERROR; // Messages printed inside thinnfa_debug(("%s: State %s (flags %x) represents the longest prefix at the offset %d/%d\n", rname, state_name(current_state), current_state->flags, current_state->depth, pattern_len)); // Go over the remaining bytes (if any) and add more states for (i = current_state->depth; i < pattern_len; i++) { kiss_thin_nfa_state_t *new_state; u_char ch; // Create a new state. Depth i+1, because the first character (i=0) is at depth 1. new_state = kiss_thin_nfa_state_create(nfa_comp, i+1, pattern_text, (nfa_thin_state_flags_t)(current_state->flags & THIN_NFA_STATE_ANCHORED)); if (!new_state) { thinnfa_debug_err(("%s: Failed to allocate a new state\n", rname)); kiss_thin_nfa_set_comp_error(nfa_comp, "Failed to allocate a new state"); return KISS_ERROR; } // Add a transition into the new state ch = kiss_thin_nfa_xlate_char(nfa_comp, pattern_text[i]); kiss_thin_nfa_add_transition(current_state, ch, new_state); thinnfa_debug(("%s: Added new state+transition %s -> %s by 0x%02x offset %d\n", rname, state_name(current_state), state_name(new_state), ch, i)); verify_state(nfa_comp, current_state); // Add the following states after this one current_state = new_state; } // Set state as finite and add the pattern ID to the list of patterns which this state accepts. // Note: It's OK if the state isn't one we just added. E.g. the new pattern is a prefix of an existing one. if (kiss_thin_nfa_state_set_match_pattern(current_state, sm_cur_pattern) != KISS_OK) { thinnfa_debug_err(( "%s: Failed to save the pattern information for the state %s\n", rname, state_name(current_state) )); kiss_thin_nfa_set_comp_error(nfa_comp, "Failed to save the pattern information for the state"); return KISS_ERROR; } return KISS_OK; } // Find the transition from a state by a character, considering fail states. // The state should alrady have its fail state calculated. // // Note: kiss_bnfa_build_full_trans_table may pass from_state=NULL. The result is returning the root, which is OK. static kiss_thin_nfa_state_t * kiss_thin_nfa_calc_transition(struct thin_nfa_comp_s *nfa_comp, kiss_thin_nfa_state_t *from_state, u_char tran_char) { static const char rname[] = "kiss_thin_nfa_calc_transition"; kiss_thin_nfa_state_t *state; // Go down the fail state chain, until we find a transition. for (state = from_state; state != NULL; state = state->fail_state) { kiss_thin_nfa_state_t *next_state; // Look up in this state's transition table next_state = kiss_thin_nfa_comp_get_next_state(state, tran_char); if (next_state != NULL) { if (state == from_state) { thinnfa_debug_extended(("%s: Found transition from %s by 0x%02x to %s\n", rname, state_name(from_state), tran_char, state_name(next_state))); } else { thinnfa_debug_extended(( "%s: Found transition from %s by 0x%02x to %s using the fail state %s\n", rname, state_name(from_state), tran_char, state_name(next_state), state_name(state) )); } return next_state; } } // We've gone down to the root, and found nothing - so the next state is the root. thinnfa_debug_extended(("%s: No transition from %s by 0x%02x - going to root\n", rname, state_name(from_state), tran_char)); return nfa_comp->root_state; } // A callback function prototype for kiss_thin_nfa_iterate_trans typedef kiss_ret_val (*kiss_thin_nfa_iterate_trans_cb)(kiss_thin_nfa_state_t *from_state, u_char tran_char, kiss_thin_nfa_state_t *to_state); // Iterate all the transitions in the trie, in BFS order. // Note: The callback will be called once per transition, i.e. once per state, except for the initial state. static kiss_ret_val kiss_thin_nfa_iterate_trans_bfs(struct thin_nfa_comp_s *nfa_comp, kiss_thin_nfa_iterate_trans_cb iter_cb) { static const char rname[] = "kiss_thin_nfa_iterate_trans_bfs"; kiss_thin_nfa_state_t *bfs_q_head, *bfs_q_tail; thinnfa_debug(("%s: Starting BFS iteration, %d states\n", rname, nfa_comp->state_num)); // This queue contains states, whose children we want to iterate. // We start with the root state followed by the anchored root state. bfs_q_head = nfa_comp->root_state; bfs_q_head->bfs_q = NULL; bfs_q_tail = bfs_q_head; if (nfa_comp->anchored_root_state) { bfs_q_tail->bfs_q = nfa_comp->anchored_root_state; nfa_comp->anchored_root_state->bfs_q = NULL; bfs_q_tail = nfa_comp->anchored_root_state; } // Dequeue each of the states, call the iterator for each transition and enqueue the children while (bfs_q_head != NULL) { kiss_thin_nfa_state_t *from_state; kiss_thin_nfa_state_t *to_state; // Dequeue a state from the head from_state = bfs_q_head; bfs_q_head = from_state->bfs_q; if (bfs_q_head == NULL) bfs_q_tail = NULL; thinnfa_debug_extended(( "%s: Got the state %s with %d children\n", rname, state_name(from_state), from_state->num_trans )); // Go over the state's transitions for (to_state = from_state->child; to_state != NULL; to_state = to_state->sibling) { thinnfa_debug_extended(( "%s: Got the child state %s at the depth %d\n", rname, state_name(to_state), to_state->depth )); // Call the iterator function if (iter_cb(from_state, to_state->tran_char, to_state) != KISS_OK) { return KISS_ERROR; } // No need to enqueue states with no children if (to_state->num_trans == 0) continue; // Enqueue the next state, so we'd iterate its transitions too to_state->bfs_q = NULL; if (bfs_q_tail != NULL) { bfs_q_tail->bfs_q = to_state; } else { bfs_q_head = to_state; } bfs_q_tail = to_state; } } return KISS_OK; } // Set a state's fail state. // To calculate this, we need the state's parent, and the character that takes us from the parent to the current. // The parent's fail state must be calculated already. static kiss_ret_val kiss_thin_nfa_set_fail_state(kiss_thin_nfa_state_t *parent, u_char tran_char, kiss_thin_nfa_state_t *state) { static const char rname[] = "kiss_thin_nfa_set_fail_state"; kiss_thin_nfa_state_t *fail_state; // Calculate the fail state. // The same character that takes us from parent to state would take us from parent->fail_state to state->fail_state fail_state = kiss_thin_nfa_calc_transition(state->comp, parent->fail_state, tran_char); state->fail_state = fail_state; thinnfa_debug(("%s: The fail state of %s is %s (parent %s, parent->fail_state %s, char %02x)\n", rname, state_name(state), state_name(fail_state), state_name(parent), state_name(parent->fail_state), tran_char)); // If a state's fail state is finite, so is the state itself. // This is because the fail state represents a suffix of the state, which is included in // the suffix the state represents. If the shorter suffix is a match, so is the longer one. // Example - The fail state of "abc" is "bc" (if it exists). If "bc" is a match, then so is "abc". if (fail_state->flags & THIN_NFA_STATE_MATCH) { thinnfa_debug(("%s: Fail state %s is finite - so is %s\n", rname, state_name(fail_state), state_name(state))); if (kiss_thin_nfa_state_copy_match_ids(state, fail_state)) { thinnfa_debug_err(( "%s: Failed to copy the pattern IDs from %s to %s\n", rname, state_name(fail_state), state_name(state) )); kiss_thin_nfa_set_comp_error(state->comp, "Failed to copy the pattern IDs"); return KISS_ERROR; } } // This isn't related to calculating fail states. It should be done after the trie was built, but before // starting BNFA construction. if (kiss_thin_nfa_optimize_contig_chars && (state->flags & THIN_NFA_STATE_MAX_IDENTICAL_CHAR)) { // Optimization for identical character sequences. States which represent a maximal sequence of the same // characters will be full. So for a long sequence of a single character, we'll always be in a full state. // Great for the performance lab. thinnfa_debug(( "%s: State %s is a maximal identical character sequence - making it full\n", rname, state_name(state) )); make_state_full(state); } return KISS_OK; } // See if we can find a better fail state for a state. // If the fail state contains only transitions the original state has anyway, we can use its fail state instead. static kiss_thin_nfa_state_t * kiss_thin_nfa_find_better_fail_state(kiss_thin_nfa_state_t *state) { kiss_thin_nfa_state_t *fail_state; if (!state->fail_state) return NULL; // Go down the fail state chain. // Keep going as long as the states contain only transitions the current state has anyway. for (fail_state = state->fail_state; fail_state->fail_state != NULL; fail_state = fail_state->fail_state) { verify_state(state->comp, fail_state); if (fail_state->flags & THIN_NFA_STATE_FULL) { // Full state - failing to it will always give us the answer. break; } if (!kiss_thin_nfa_are_trans_contained(state, fail_state)) { // This state has transitions that the current state doesn't - we must fail to it, // not lower. break; } } return fail_state; } // Change fail states to go faster up the tree, if possible. // Normally, a fail state points one level upward. But sometimes it can be more upward. // // Note: This must be done after kiss_thin_nfa_set_fail_state was called for all states. This is because // kiss_thin_nfa_set_fail_state uses the parent's fail state to calculate the child's. If the parent's fail stae // was "reduced", we'll get the wrong fail state for the child. static void kiss_thin_nfa_reduce_fail_states(struct thin_nfa_comp_s *nfa_comp) { static const char rname[] = "kiss_thin_nfa_reduce_fail_states"; kiss_thin_nfa_state_t *state; for (state = nfa_comp->root_state; state != NULL; state = kiss_thin_nfa_get_subsequent_state(nfa_comp, state)) { kiss_thin_nfa_state_t *fail_state; if (state->flags & THIN_NFA_STATE_FULL) { // A full state's fail state isn't interesting continue; } fail_state = kiss_thin_nfa_find_better_fail_state(state); if (fail_state != state->fail_state) { // We have a better fail state thinnfa_debug(("%s: Changing the fail state of %s from %s to %s\n", rname, state_name(state), state_name(state->fail_state), state_name(fail_state))); state->fail_state = fail_state; } } } // Calculate fail states for all states. static kiss_ret_val kiss_thin_nfa_calc_fail_states(struct thin_nfa_comp_s *nfa_comp) { static const char rname[] = "kiss_thin_nfa_calc_fail_states"; // The root state has no fail state nfa_comp->root_state->fail_state = NULL; if (nfa_comp->anchored_root_state) { // The anchored root fails to the root nfa_comp->anchored_root_state->fail_state = nfa_comp->root_state; } thinnfa_debug(("%s: Calculating the fail states for all states\n", rname)); // Iterate all transitions, and calculate fail states for the target states. // This would cover all states, except the initial (whose fail state was already set). // BFS order assures that a parent's fail state is already calculated when we reach the child. if (kiss_thin_nfa_iterate_trans_bfs(nfa_comp, kiss_thin_nfa_set_fail_state) != KISS_OK) { thinnfa_debug_err(("%s: Failed to calculate the fail states\n", rname)); return KISS_ERROR; } // All states now have their fail states calculated ENUM_SET_FLAG(nfa_comp->flags, THIN_NFA_FAIL_STATES_CALCULATED); // Optimization - reduce fail states kiss_thin_nfa_reduce_fail_states(nfa_comp); return KISS_OK; } // Set a state's BNFA offset to the size so far, and increment by the state size. static void set_state_offset(kiss_thin_nfa_state_t *state, kiss_bnfa_offset_t *cur_offset) { static const char rname[] = "set_state_offset"; u_int state_size=0, match_size=0; verify_state(state->comp, state); if (state->bnfa_offset == KISS_BNFA_OFFSET_INVALID) { // Room for the actual state - negative offset for full states, positive for partial. if ((state->flags & THIN_NFA_STATE_FULL) && (*cur_offset<0)) { state_size = sizeof(kiss_bnfa_full_state_t); } else if (!(state->flags & THIN_NFA_STATE_FULL) && (*cur_offset>=0)) { state_size = kiss_bnfa_partial_state_size(state->num_trans); } } if (state->bnfa_incoming_off == KISS_BNFA_OFFSET_INVALID) { // Room for a match state - if needed, must be a positive offset. if ((state->flags & THIN_NFA_STATE_MATCH) && (*cur_offset >= 0)) { match_size = sizeof(kiss_bnfa_match_state_t); if (state->flags & THIN_NFA_STATE_FULL) { // Need a jump state too match_size += kiss_bnfa_partial_state_size(0); } } } // Update the state offsets if (match_size > 0) { thinnfa_debug_extended(("%s: State %s was given a match offset %d size %d", rname, state_name(state), *cur_offset, match_size)); state->bnfa_incoming_off = *cur_offset; *cur_offset += match_size; } if (state_size > 0) { thinnfa_debug_extended(("%s: State %s was given a real offset %d size %d", rname, state_name(state), *cur_offset, state_size)); state->bnfa_offset = *cur_offset; *cur_offset += state_size; if (!(state->flags & THIN_NFA_STATE_MATCH)) { // Incoming transitions go directly to the state state->bnfa_incoming_off = state->bnfa_offset; } } } // Check if compressed offset fits full state offset size static BOOL comp_offset_fits_short(kiss_bnfa_comp_offset_t comp_offset) { if ((comp_offset) != (kiss_bnfa_short_offset_t)(comp_offset)) { return FALSE; } return TRUE; } // Mark all child of a given state as reacheable as reachable from full state static void kiss_bnfa_mark_childs_reach_from_full(kiss_thin_nfa_state_t *state) { kiss_thin_nfa_state_t *child; for (child = state->child; child != NULL; child = child->sibling) { ENUM_SET_FLAG(child->flags, THIN_NFA_STATE_REACH_FROM_FULL); } } // Mark all states that are reachable from a given full state, // in order to place them at lower offsets to avoid possible overflow due to offset compression. // If a state`s fail state is of partial type, mark it`s children too static void kiss_bnfa_mark_reachable_from_full(kiss_thin_nfa_state_t *state) { kiss_bnfa_mark_childs_reach_from_full(state); for (state = state->fail_state; state && !(state->flags & THIN_NFA_STATE_FULL); state = state->fail_state) { kiss_bnfa_mark_childs_reach_from_full(state); } } // Calcultate the offset of each BNFA state, and the entire BNFA size. // Sets nfa_comp->offset_list to an array, holding the BNFA offset for each state at [state_id]. // Sets *bnfa_size_p to the total BNFA size. static kiss_ret_val kiss_bnfa_calc_offsets(struct thin_nfa_comp_s *nfa_comp) { static const char rname[] = "kiss_bnfa_calc_offsets"; kiss_thin_nfa_state_t *state; kiss_bnfa_offset_t cur_offset; // Full states have negative offsets. So the first state's offset depends on the number of full states. cur_offset = -(kiss_bnfa_offset_t)(nfa_comp->full_state_num * sizeof(kiss_bnfa_full_state_t)); nfa_comp->min_bnfa_off = cur_offset; // Put the anchored root state first, because it's the initial state if (nfa_comp->anchored_root_state) { KISS_ASSERT(nfa_comp->anchored_root_state->flags & THIN_NFA_STATE_FULL, "%s: The anchored root %s must be a full state\n", rname, state_name(nfa_comp->anchored_root_state)); set_state_offset(nfa_comp->anchored_root_state, &cur_offset); } // If there's no anchored root, then root must be initial. If there is, validation expects it second. set_state_offset(nfa_comp->root_state, &cur_offset); // in this loop we add only the full states, which have negative offsets for (state = nfa_comp->root_state; state != NULL; state = kiss_thin_nfa_get_subsequent_state(nfa_comp, state)) { if (state->flags & THIN_NFA_STATE_FULL) { kiss_bnfa_mark_reachable_from_full(state); // Mark child states so they'll get low offsets set_state_offset(state, &cur_offset); } } // We added all full states and moving to partials - we must be at offset 0. KISS_ASSERT(cur_offset==0, "%s: Offset %d != 0 after adding %d full states\n", rname, cur_offset, nfa_comp->full_state_num); // in this loop we add states that are reachable from full states. We want them at low offsets to avoid // possible overflow due to offset compression for (state = nfa_comp->root_state; state != NULL; state = kiss_thin_nfa_get_subsequent_state(nfa_comp, state)) { if (state->flags & THIN_NFA_STATE_REACH_FROM_FULL){ set_state_offset(state, &cur_offset); } } // Make sure we have not exceede the limit of offsets that can be compressed to 16bit // Note: the test is a little too strict - we check the first state that is not reachable from a full state // instead of the last state that is reachable if (!comp_offset_fits_short(kiss_bnfa_offset_compress(cur_offset))) { thinnfa_debug_err(("%s: Current offset is %d, not reachable from the full state\n", rname, cur_offset)); kiss_thin_nfa_set_comp_error(nfa_comp, "Exceeded the limit of reachable states"); return KISS_ERROR; } // in this loop we add the partial and mathing states, which weren't handled in the loop above. for (state = nfa_comp->root_state; state != NULL; state = kiss_thin_nfa_get_subsequent_state(nfa_comp, state)) { set_state_offset(state, &cur_offset); } // The current offset is the size of partial states. Add the full state size to get the total size. nfa_comp->max_bnfa_off = cur_offset; thinnfa_debug_major(("%s: BNFA size - %u full states, %u partial states, total %u bytes\n", rname, nfa_comp->full_state_num, nfa_comp->state_num-nfa_comp->full_state_num, nfa_comp->max_bnfa_off - nfa_comp->min_bnfa_off)); return KISS_OK; } // Get a state's BNFA offset. // skip_match makes a difference for matching states: // TRUE - Get the actual state, where the transition table is. // FALSE - Get the match state, where incoming transitions should go. static kiss_bnfa_offset_t state_bnfa_offset(kiss_thin_nfa_state_t *state, BOOL skip_match) { return skip_match ? state->bnfa_offset : state->bnfa_incoming_off; } // Convert a BNFA offset to a BNFA state pointer static kiss_bnfa_state_t * comp_bnfa_offset_to_state(struct thin_nfa_comp_s *nfa_comp, kiss_bnfa_offset_t bnfa_offset) { return kiss_bnfa_offset_to_state_write(nfa_comp->runtime_nfa->bnfa, bnfa_offset); } // Get a pointer to a state in the BNFA. // skip_match makes a difference for matching states: // TRUE - Get the actual state, where the transition table is. // FALSE - Get the match state, where incoming transitions should go. static kiss_bnfa_state_t * comp_to_bnfa_state(kiss_thin_nfa_state_t *state, BOOL skip_match) { return comp_bnfa_offset_to_state(state->comp, state_bnfa_offset(state, skip_match)); } // Move next to state_bnfa_offset. assert inside. static kiss_bnfa_short_offset_t state_bnfa_short_offset(kiss_thin_nfa_state_t *state) { static const char rname[] = "state_bnfa_short_offset"; kiss_bnfa_comp_offset_t comp_offset = kiss_bnfa_offset_compress(state_bnfa_offset(state, FALSE)); KISS_ASSERT(comp_offset_fits_short(comp_offset), "%s: Compressed offset %d exceeds the allowed size\n", rname, comp_offset); return (kiss_bnfa_short_offset_t) comp_offset; } // If character translation is enabled, duplicate ch's transition to all equivalents static void add_equivalent_transitions(struct thin_nfa_comp_s *nfa_comp, kiss_bnfa_full_state_t *bnfa_state, u_char ch) { static const char rname[] = "add_equivalent_transitions"; u_char other_ch; u_int group_size; if (!nfa_comp->xlation_tab) return; // Go over all characters within the same group group_size = 0; for (other_ch = nfa_comp->xlation_tab->rev[ch]; other_ch != ch; other_ch = nfa_comp->xlation_tab->rev[other_ch]) { thinnfa_debug_extended(("%s: Setting translated transition by %02x - same as %02x\n", rname, other_ch, ch)); bnfa_state->transitions[other_ch] = bnfa_state->transitions[ch]; // Prevent looping in case the table is corrupt group_size++; KISS_ASSERT_CRASH(group_size <= KISS_PM_ALPHABET_SIZE, "%s: Too many characters to translate into %02x\n", rname, ch); } } // Add a transition to a full transition table. // If there's a translation table, add trnasitions for all equivalent characters. static void add_full_transition( struct thin_nfa_comp_s *nfa_comp, kiss_bnfa_full_state_t *bnfa_state, kiss_thin_nfa_state_t *next_state ) { static const char rname[] = "add_full_transition"; u_char ch = next_state->tran_char; thinnfa_debug_extended(("%s: Setting the transition by %02x to %s\n", rname, next_state->tran_char, state_name(next_state))); // Set the transition, for ch and equivalent characters bnfa_state->transitions[ch] = state_bnfa_short_offset(next_state); add_equivalent_transitions(nfa_comp, bnfa_state, ch); } #if !defined(KERNEL) // A recursive algorithm to build full state tables. // Much faster than the previous algorithm, but shouldn't be used in the kernel. // Allow mutual recursion between these two functions: static void build_full_trans_table(kiss_thin_nfa_state_t *comp_state); static void get_full_trans_table(kiss_thin_nfa_state_t *target_state, kiss_thin_nfa_state_t *source_state); // Get the transition table of source_state and write it in target_state's. // source_state is somewhere in the fail state chain of target_state. static void get_full_trans_table(kiss_thin_nfa_state_t *target_state, kiss_thin_nfa_state_t *source_state) { kiss_thin_nfa_state_t *child; kiss_bnfa_state_t *target_bnfa = comp_to_bnfa_state(target_state, TRUE); if (source_state != target_state && (source_state->flags & THIN_NFA_STATE_FULL)) { // We've reached a full state - just copy its transition table (build it first, if needed) build_full_trans_table(source_state); bcopy(comp_to_bnfa_state(source_state, TRUE)->full.transitions, target_bnfa->full.transitions, sizeof(target_bnfa->full.transitions)); return; } // Start with our fail state's state table if (source_state->fail_state) { get_full_trans_table(target_state, source_state->fail_state); } else { int i; kiss_bnfa_short_offset_t root_bnfa_comp_offset = state_bnfa_short_offset(source_state); // Reached the root - fill with transitions to root for (i=0; ifull.transitions[i] = root_bnfa_comp_offset; } } // Override transitions which exist in this state for (child = source_state->child; child != NULL; child = child->sibling) { add_full_transition(target_state->comp, &target_bnfa->full, child); } } // Recursive function for building a full state's state table. // target_bnfa_state is the state who's table we're building. // source_state changes when recursing over the tail state chain static void build_full_trans_table(kiss_thin_nfa_state_t *comp_state) { if (comp_state->flags & THIN_NFA_STATE_BUILT_TABLE) return; get_full_trans_table(comp_state, comp_state); ENUM_SET_FLAG(comp_state->flags, THIN_NFA_STATE_BUILT_TABLE); } #endif // KERNEL static CP_INLINE kiss_ret_val verify_add_state(kiss_thin_nfa_state_t *comp_state, kiss_bnfa_state_t *bnfa_state, u_int state_size, const char *caller, const char *type) { const KissThinNFA *nfa_h = comp_state->comp->runtime_nfa.get(); kiss_bnfa_offset_t bnfa_offset = (char *)bnfa_state - (char *)(nfa_h->bnfa); u_int state_alignment = (bnfa_offset < 0) ? sizeof(kiss_bnfa_full_state_t) : KISS_BNFA_STATE_ALIGNMENT; if ((bnfa_offset < nfa_h->min_bnfa_offset) || (bnfa_offset+(int)state_size > nfa_h->max_bnfa_offset)) { thinnfa_debug_err(("%s: Cannot add the %s state %s at the offset %d:%d - out of range %d:%d\n", caller, type, state_name(comp_state), bnfa_offset, bnfa_offset+state_size, nfa_h->min_bnfa_offset, nfa_h->max_bnfa_offset)); return KISS_ERROR; } if ((bnfa_offset % state_alignment) != 0) { thinnfa_debug_err(( "%s: Cannot add the %s state %s at the offset %d:%d - not aligned on %d bytes\n", caller, type, state_name(comp_state), bnfa_offset, bnfa_offset + state_size, state_alignment )); return KISS_ERROR; } thinnfa_debug(("%s: Adding the %s state %s, offsets %d:%d\n", caller, type, state_name(comp_state), bnfa_offset, bnfa_offset+state_size)); return KISS_OK; } // Old, non-recursive and slow version on build_full_trans_table. static void build_full_trans_table_no_recursion(kiss_thin_nfa_state_t *comp_state) { static const char rname[] = "build_full_trans_table_no_recursion"; struct thin_nfa_comp_s *nfa_comp = comp_state->comp; kiss_bnfa_state_t *bnfa_state = comp_to_bnfa_state(comp_state, TRUE); kiss_thin_nfa_state_t *child; u_int i; // Go over all characters. Maintain a pointer to the next transition in the list. // We rely on the list being sorted. // We could simply call kiss_thin_nfa_calc_transition for each character. But it would look up again and // again in the current state. child = comp_state->child; for (i = 0; i < KISS_PM_ALPHABET_SIZE; i++) { u_char ch = (u_char)i; kiss_thin_nfa_state_t *next_state; // Check if it's a canonic character (e.g. lowercase when we're case insensitive) if (kiss_thin_nfa_xlate_char(nfa_comp, ch) != ch) { // We'll fill this in when we reach the canonic character. continue; } if (child != NULL && child->tran_char == ch) { // Use the explicit transition next_state = child; // Go forward in the transition table child = child->sibling; thinnfa_debug_extended(("%s: Setting the explicit transition by %02x to %s\n", rname, ch, state_name(next_state))); } else { // Note: if comp_state is the initial, we pass from_state=NULL. // This works as desired (returning the initial state). next_state = kiss_thin_nfa_calc_transition(nfa_comp, comp_state->fail_state, ch); thinnfa_debug_extended(("%s: Setting the fail-state transition by %02x to %s\n", rname, ch, state_name(next_state))); } // Set the transition for this character and equivalents bnfa_state->full.transitions[ch] = state_bnfa_short_offset(next_state); add_equivalent_transitions(nfa_comp, &bnfa_state->full, ch); } ENUM_SET_FLAG(comp_state->flags, THIN_NFA_STATE_BUILT_TABLE); } // Build a full state's transition table in the BNFA. // Either uses the explicit transition, or calculates using fail states. static kiss_ret_val kiss_bnfa_build_full_state(kiss_thin_nfa_state_t *comp_state) { static const char rname[] = "kiss_bnfa_build_full_state"; kiss_bnfa_state_t *bnfa_state = comp_to_bnfa_state(comp_state, TRUE); if (verify_add_state(comp_state, bnfa_state, sizeof(kiss_bnfa_full_state_t), rname, "full") != KISS_OK) { return KISS_ERROR; } #if !defined(KERNEL) if (comp_state->comp->flags & THIN_NFA_USE_RECURSIVE_COMPILE) { build_full_trans_table(comp_state); return KISS_OK; } #endif // KERNEL build_full_trans_table_no_recursion(comp_state); return KISS_OK; } static void kiss_bnfa_build_partial_state_header( kiss_bnfa_partial_state_t *bnfa_state, u_int trans_num, kiss_bnfa_offset_t fail_offset ) { bnfa_state->type = KISS_BNFA_STATE_PARTIAL; bnfa_state->trans_num = trans_num; bnfa_state->fail_state_offset = kiss_bnfa_offset_compress(fail_offset); } // Build a partial state's transition table in the BNFA. // Temporary encoding - sets the state ID instead of the BNFA offset (which is yet unknown). static kiss_ret_val kiss_bnfa_build_partial_state(kiss_thin_nfa_state_t *comp_state) { static const char rname[] = "kiss_bnfa_build_partial_state"; kiss_bnfa_state_t *bnfa_state = comp_to_bnfa_state(comp_state, TRUE); kiss_thin_nfa_state_t *child; u_int trans_num; if (verify_add_state( comp_state, bnfa_state, kiss_bnfa_partial_state_size(comp_state->num_trans), rname, "partial" ) != KISS_OK) { return KISS_ERROR; } // Fill in the transition number and fail state kiss_bnfa_build_partial_state_header(&bnfa_state->partial, comp_state->num_trans, state_bnfa_offset(comp_state->fail_state, TRUE)); thinnfa_debug_extended(("%s: The fail state is %s\n", rname, state_name(comp_state->fail_state))); // Build a transition for each existing character trans_num = 0; for (child = comp_state->child; child != NULL; child = child->sibling) { thinnfa_debug_extended(("%s: Setting the transition by %02x to %s\n", rname, child->tran_char, state_name(child))); bnfa_state->partial.transitions[trans_num].tran_char = child->tran_char; bnfa_state->partial.transitions[trans_num].next_state_offset = kiss_bnfa_offset_compress(state_bnfa_offset(child, FALSE)); trans_num++; } KISS_ASSERT(trans_num == comp_state->num_trans, "%s: State %s should have %d transitions, but it has %d", rname, state_name(comp_state), comp_state->num_trans, trans_num); ENUM_SET_FLAG(comp_state->flags, THIN_NFA_STATE_BUILT_TABLE); return KISS_OK; } // Build a match state. static kiss_ret_val kiss_bnfa_build_match_state(kiss_thin_nfa_state_t *comp_state, u_int match_id) { static const char rname[] = "kiss_bnfa_build_match_state"; kiss_bnfa_offset_t match_bnfa_offset = state_bnfa_offset(comp_state, FALSE); kiss_bnfa_state_t *match_state = comp_bnfa_offset_to_state(comp_state->comp, match_bnfa_offset); kiss_bnfa_offset_t following_state_offset, real_state_offset; if (verify_add_state(comp_state, match_state, sizeof(kiss_bnfa_match_state_t), rname, "match") != KISS_OK) { return KISS_ERROR; } // Fill in the match state match_state->match.type = KISS_BNFA_STATE_MATCH; match_state->match.unused = 0; match_state->match.match_id = match_id; // Add a jump state if the real state isn't directly following the match state (i.e. for full-matching states). real_state_offset = state_bnfa_offset(comp_state, TRUE); following_state_offset = match_bnfa_offset + sizeof(kiss_bnfa_match_state_t); if (following_state_offset != real_state_offset) { kiss_bnfa_state_t *jump_state = comp_bnfa_offset_to_state(comp_state->comp, following_state_offset); // Add a jump state (a 0-transition partial state) to the real state if (verify_add_state(comp_state, jump_state, kiss_bnfa_partial_state_size(0), rname, "jump") != KISS_OK) { return KISS_ERROR; } kiss_bnfa_build_partial_state_header(&jump_state->partial, 0, real_state_offset); } return KISS_OK; } // Encode a state in binary NFA form. static kiss_ret_val kiss_bnfa_add_state(kiss_thin_nfa_state_t *comp_state, u_int offset_in_pat_match_array) { if (comp_state->flags & THIN_NFA_STATE_MATCH) { // Build a match state (a jump state too if needed) if (kiss_bnfa_build_match_state(comp_state, offset_in_pat_match_array) != KISS_OK) return KISS_ERROR; } // Add the state if (comp_state->flags & THIN_NFA_STATE_FULL) { if (kiss_bnfa_build_full_state(comp_state) != KISS_OK) return KISS_ERROR; } else { if (kiss_bnfa_build_partial_state(comp_state) != KISS_OK) return KISS_ERROR; } return KISS_OK; } static uintptr_t pat_key_hash_func(const void *key, CP_MAYBE_UNUSED void *info) { const kiss_thin_nfa_pattern_array_t *pat_arr = (const kiss_thin_nfa_pattern_array_t *)key; const char* buf = (const char *)key; const char *buf_end; uintptr_t val = 0; buf_end = buf + kiss_thin_nfa_pattern_array_size(pat_arr->n_patterns); for ( ; buf != buf_end; buf++) { val = ((val >> 3) ^ (val<<5)) + *buf; } return val; } static int pat_key_cmp_func(const void *key1, const void *key2, CP_MAYBE_UNUSED void *info) { const kiss_thin_nfa_pattern_array_t *pat1 = (const kiss_thin_nfa_pattern_array_t *)key1; const kiss_thin_nfa_pattern_array_t *pat2 = (const kiss_thin_nfa_pattern_array_t *)key2; if (pat1->n_patterns != pat2->n_patterns) { return 1; // No match } return memcmp(pat1, pat2, kiss_thin_nfa_pattern_array_size(pat1->n_patterns)); } static u_int pattern_list_len(const kiss_thin_nfa_pattern_list_t *pat_list) { const kiss_thin_nfa_pattern_list_t *pat; u_int n = 0; for (pat = pat_list; pat != NULL; pat = pat->next) { n++; } return n; } static kiss_ret_val kiss_bnfa_match_patterns_prepare(struct thin_nfa_comp_s *nfa_comp, KissThinNFA *nfa) { static const char rname[] = "kiss_bnfa_match_patterns_prepare"; kiss_thin_nfa_pattern_array_t *pat_arr; kiss_thin_nfa_state_t *comp_state; u_int total_size_for_patterns; total_size_for_patterns = 0; for (comp_state = nfa_comp->root_state; comp_state != NULL; comp_state = kiss_thin_nfa_get_subsequent_state(nfa_comp, comp_state)) { if (comp_state->flags & THIN_NFA_STATE_MATCH) { if (!comp_state->ids) { thinnfa_debug_critical(( "%s: State %s is finite, but its IDs are null\n", rname, state_name(comp_state) )); kiss_thin_nfa_set_comp_error(nfa_comp, "The state is finite, but its IDs are null"); return KISS_ERROR; } total_size_for_patterns += kiss_thin_nfa_pattern_array_size(pattern_list_len(comp_state->ids)); } } if (total_size_for_patterns == 0) { thinnfa_debug_critical(("%s: no finite states?!\n", rname)); kiss_thin_nfa_set_comp_error(nfa_comp, "no finite states?!"); return KISS_ERROR; } // We allocate according to maximum possible size. // We might reduce it at the end, if duplicates exist. thinnfa_debug(("%s: alocating %u bytes for a pattern array\n", rname, total_size_for_patterns)); pat_arr = (kiss_thin_nfa_pattern_array_t *)kiss_pmglob_memory_kmalloc_ex( total_size_for_patterns, rname, FW_KMEM_SLEEP ); if (!pat_arr) { thinnfa_debug_critical(( "%s: failed to allocate %d bytes for a complete pattern array\n", rname, total_size_for_patterns )); kiss_thin_nfa_set_comp_error(nfa_comp, "Failed to allocate memory for a complete pattern array"); return KISS_ERROR; } nfa->pattern_arrays = pat_arr; nfa->pattern_arrays_size = total_size_for_patterns; nfa_comp->patterns_hash = kiss_hash_create_with_ksleep( nfa->match_state_num, pat_key_hash_func, pat_key_cmp_func, NULL ); if (!nfa_comp->patterns_hash) { thinnfa_debug(( "%s: failed to create patterns hash table for %u finite states\n", rname, nfa->match_state_num )); kiss_thin_nfa_set_comp_error(nfa_comp, "Failed to create patterns hash table for finite states"); return KISS_ERROR; } return KISS_OK; } static kiss_ret_val kiss_bnfa_match_patterns_finalize(struct thin_nfa_comp_s *nfa_comp, KissThinNFA *nfa, u_int new_size) { static const char rname[] = "kiss_bnfa_match_patterns_finalize"; kiss_thin_nfa_pattern_array_t *new_pat_arr; // Compact the match patter array, if needed if (new_size == nfa->pattern_arrays_size) { thinnfa_debug(("%s: no size change - the pattern array size is %u bytes\n", rname, new_size)); return KISS_OK; } if (new_size > nfa->pattern_arrays_size) { thinnfa_debug_critical(( "%s: new pattern array size (%u) is greater than the current size (%u). This should not happen.\n", rname, new_size, nfa->pattern_arrays_size )); kiss_thin_nfa_set_comp_error(nfa_comp, "Failed to allocate a complete pattern array"); return KISS_ERROR; } new_pat_arr = (kiss_thin_nfa_pattern_array_t *)kiss_pmglob_memory_kmalloc_ex(new_size, rname, FW_KMEM_SLEEP); if (!new_pat_arr) { thinnfa_debug_critical(("%s: failed to allocate %d bytes for a complete pattern array\n", rname, new_size)); kiss_thin_nfa_set_comp_error(nfa_comp, "Failed to allocate a complete pattern array"); return KISS_ERROR; } thinnfa_debug(("%s: reducing the size from %u to %u\n", rname, nfa->pattern_arrays_size, new_size)); bcopy(nfa->pattern_arrays, new_pat_arr, new_size); kiss_pmglob_memory_kfree(nfa->pattern_arrays, nfa->pattern_arrays_size, rname); nfa->pattern_arrays = new_pat_arr; nfa->pattern_arrays_size = new_size; return KISS_OK; } static kiss_ret_val kiss_bnfa_copy_pat_list( const KissThinNFA *nfa, kiss_hash_t patterns_hash, kiss_thin_nfa_state_t *comp_state, u_int *last_used_offset_in_pat_match_array, u_int *offset_for_cur_state) { static const char rname[] = "kiss_bnfa_copy_pat_list"; if (comp_state->flags & THIN_NFA_STATE_MATCH) { kiss_thin_nfa_pattern_list_t *pat_list_ent; kiss_thin_nfa_pattern_array_t *pat_arr; kiss_thin_nfa_pattern_array_t *cached_pat_arr; u_int pat_arr_size; u_int n_patterns; u_int i; n_patterns = pattern_list_len(comp_state->ids); pat_arr_size = kiss_thin_nfa_pattern_array_size(n_patterns); if ((*last_used_offset_in_pat_match_array + pat_arr_size) > nfa->pattern_arrays_size) { thinnfa_debug_critical(("%s: offset (%u) + required size (%u) exceeds the total array size (%u)\n", rname, *last_used_offset_in_pat_match_array, pat_arr_size, nfa->pattern_arrays_size)); return KISS_ERROR; } pat_arr = kiss_thin_nfa_offset_to_pat_array_ptr(nfa, *last_used_offset_in_pat_match_array); pat_arr->n_patterns = n_patterns; for (i = 0, pat_list_ent = comp_state->ids; i < pat_arr->n_patterns; pat_list_ent = pat_list_ent->next, i++) { bcopy(&(pat_list_ent->pattern), &(pat_arr->pattern[i]), sizeof(pat_list_ent->pattern)); } kiss_thin_nfa_free_pattern_ids(comp_state->ids); comp_state->ids = NULL; // Prevent release when the state is cleaned up cached_pat_arr = (kiss_thin_nfa_pattern_array_t *)kiss_hash_lookkey(patterns_hash, pat_arr); if (cached_pat_arr) { u_int cached_offset; cached_offset = kiss_thin_nfa_pat_array_ptr_to_offset(nfa, cached_pat_arr); // No need to move the last_used_offset *offset_for_cur_state = cached_offset; thinnfa_debug(( "%s: returning cached offset of %u for the state ID %u. " "%u patterns %u bytes. The offset stays at %u.\n", rname, *offset_for_cur_state, comp_state->state_id, n_patterns, pat_arr_size, *last_used_offset_in_pat_match_array )); } else { *offset_for_cur_state = *last_used_offset_in_pat_match_array; if (!kiss_hash_insert(patterns_hash, pat_arr, NULL)) { thinnfa_debug(("%s: failed to insert a pattern into a hash (non-critical error)\n", rname)); } *last_used_offset_in_pat_match_array += pat_arr_size; thinnfa_debug(( "%s: returning the offset of %u for the state ID %u. %u patterns, %u bytes. The offset moved to %u.\n", rname, *offset_for_cur_state, comp_state->state_id, n_patterns, pat_arr_size, *last_used_offset_in_pat_match_array )); } } return KISS_OK; } static void kiss_bnfa_update_state_depth(kiss_thin_nfa_state_t *comp_state) { struct kiss_thin_nfa_depth_map_s *map = &comp_state->comp->runtime_nfa->depth_map; u_char depth = MIN(comp_state->depth, KISS_THIN_NFA_MAX_ENCODABLE_DEPTH); kiss_bnfa_offset_t off; // Update depth at the state's offset off = comp_state->bnfa_offset; map->offset0[kiss_bnfa_offset_compress(off)] = depth; // Matching state? Update at the match state offset too. off = comp_state->bnfa_incoming_off; if (off == comp_state->bnfa_offset) return; map->offset0[kiss_bnfa_offset_compress(off)] = depth; // Full-matching state? Update at the jump state offset too. off += sizeof(kiss_bnfa_match_state_t); if (off == comp_state->bnfa_offset) return; map->offset0[kiss_bnfa_offset_compress(off)] = depth; } // Based on thin_nfa_comp_s structure we have built, create a binary Thin NFA. // Parameter: // nfa_comp - the NFA's compilation data structure. // // Performance notes: // This function takes most of the CPU time in the compilation process (in my tests, at least). // Within it, time is divided about equally between full and partial states. // Full states take about 40 times more time, but there are about 40 times more partial states. // Overall, compilation time isn't bad, but there are surely optimization options. // Idea - when constructing a full state, start by bcopy() of its fail state transitions. This would require // filling the states in BFS order, which isn't done today. static kiss_ret_val kiss_bnfa_fill_states(struct thin_nfa_comp_s *nfa_comp) { static const char rname[] = "kiss_bnfa_fill_states"; KissThinNFA *nfa = nfa_comp->runtime_nfa.get(); kiss_thin_nfa_state_t *comp_state; u_int last_used_offset_in_pat_match_array = 0; thinnfa_debug(("%s: Filling BNFA %p size %d with %d states\n", rname, nfa->bnfa_start, nfa->max_bnfa_offset-nfa->min_bnfa_offset, nfa_comp->state_num)); if (kiss_bnfa_match_patterns_prepare(nfa_comp, nfa) != KISS_OK) { return KISS_ERROR; } // Go over the states and build the BNFA representation for (comp_state = nfa_comp->root_state; comp_state != NULL; comp_state = kiss_thin_nfa_get_subsequent_state(nfa_comp, comp_state)) { u_int state_id = comp_state->state_id; u_int offset_for_cur_state = (u_int)-1; if (kiss_bnfa_copy_pat_list( nfa, nfa_comp->patterns_hash, comp_state, &last_used_offset_in_pat_match_array, &offset_for_cur_state ) != KISS_OK) { thinnfa_debug_critical(( "%s: kiss_bnfa_copy_pat_list() failed for the state %s\n", rname, state_name(comp_state) )); kiss_thin_nfa_set_comp_error(nfa_comp, "kiss_bnfa_copy_pat_list() failed"); return KISS_ERROR; } // Update the maximum pattern length (length = state depth) if (comp_state->depth > nfa->max_pat_len) { nfa->max_pat_len = comp_state->depth; } // Build the state if (kiss_bnfa_add_state(comp_state, offset_for_cur_state) != KISS_OK) { thinnfa_debug_critical(("%s: Failed to add the state %d\n", rname, state_id)); return KISS_ERROR; } // Update the depth map kiss_bnfa_update_state_depth(comp_state); } if (kiss_bnfa_match_patterns_finalize(nfa_comp, nfa, last_used_offset_in_pat_match_array) != KISS_OK) { return KISS_ERROR; } return KISS_OK; } static void kiss_thin_nfa_fill_stats(struct thin_nfa_comp_s *nfa_comp) { struct kiss_thin_nfa_specific_stats_s *stats = &nfa_comp->runtime_nfa->stats.specific; stats->num_of_states = nfa_comp->state_num; stats->num_of_final_states = nfa_comp->match_state_num; } // Get the nfa_comp structure and build, according to it, the runtime Thin NFA structure. static kiss_ret_val kiss_thin_nfa_build_bnfa(struct thin_nfa_comp_s *nfa_comp, CP_MAYBE_UNUSED u_int compile_flags) { static const char rname[] = "kiss_thin_nfa_build_bnfa"; thinnfa_debug_major(("%s: Converting the compiled Thin NFA to the binary form\n", rname)); // Get the list of all BNFA offsets if (kiss_bnfa_calc_offsets(nfa_comp) != KISS_OK) { thinnfa_debug_err(("%s: Error allocating the offset list\n", rname)); kiss_thin_nfa_set_comp_error(nfa_comp, "Failed to allocate the offset list"); return KISS_ERROR; } // Allocate the runtime Thin NFA structure nfa_comp->runtime_nfa = kiss_thin_nfa_create( nfa_comp->match_state_num, nfa_comp->min_bnfa_off, nfa_comp->max_bnfa_off ); if (!nfa_comp->runtime_nfa) { thinnfa_debug_err(("%s: Error creating the NFA\n", rname)); kiss_thin_nfa_set_comp_error(nfa_comp, "Failed to allocate BNFA"); return KISS_ERROR; } if (nfa_comp->anchored_root_state) { ENUM_SET_FLAG(nfa_comp->runtime_nfa->flags, KISS_THIN_NFA_HAS_ANCHOR); } // Build the BNFA we'll use on runtime if (kiss_bnfa_fill_states(nfa_comp) != KISS_OK) { thinnfa_debug_err(("%s: kiss_bnfa_fill_states() failed\n", rname)); return KISS_ERROR; } // Copy the character translation table if (nfa_comp->xlation_tab) { bcopy( nfa_comp->xlation_tab->tab, nfa_comp->runtime_nfa->xlation_tab, sizeof(nfa_comp->runtime_nfa->xlation_tab) ); ENUM_SET_FLAG(nfa_comp->runtime_nfa->flags, KISS_THIN_NFA_USE_CHAR_XLATION); } kiss_thin_nfa_fill_stats(nfa_comp); thinnfa_debug_major(("%s: Created the binary Thin NFA %p\n", rname, nfa_comp->runtime_nfa.get())); return KISS_OK; } static void kiss_thin_nfa_select_options(struct thin_nfa_comp_s *nfa_comp, CP_MAYBE_UNUSED u_int compile_flags) { ENUM_SET_FLAG(nfa_comp->flags, THIN_NFA_ENABLE_ANCHOR_OPT); nfa_comp->full_state_tier_num = kiss_thin_nfa_full_tiers_num; ENUM_SET_FLAG(nfa_comp->flags, THIN_NFA_USE_RECURSIVE_COMPILE); return; } // Compiling the SM according to Aho-Corasick algorithm. // // The DFA has two types of states: // 1. Full states - have a transition for each possible character. // 2. Partial states - only have transitions for characters that take us forward in some string. // For all other characters, a "fail state" is defined, and the transition is what that state would have done. // // Paraemters: // patterns - a set of string patterns which the resulting automaton would search for. // compile_flags - flags with the KISS_PM_COMP_ prefix. // error - output - on failure, would be set to indicate the reason. // Retuns NULL on error, pointer to a newly allocated handle on success. std::unique_ptr kiss_thin_nfa_compile(const std::list &patterns, u_int compile_flags, KissPMError *error) { static const char rname[] = "kiss_thin_nfa_compile"; struct thin_nfa_comp_s *nfa_comp = NULL; std::unique_ptr nfa; thinnfa_debug_major(("%s: Compiling a Thin NFA, flags=%x\n", rname, compile_flags)); // Creates a new kiss_thin_dfa_handle with initial state allocated nfa_comp = kiss_thin_nfa_comp_create(error); if (nfa_comp == NULL) { thinnfa_debug_err(("%s: Failed to create a compile time structure\n", rname)); kiss_pm_error_set_details(error, KISS_PM_ERROR_INTERNAL, "Failed to allocate the compilation information"); goto finish; } // Enable some optimization flags as needed kiss_thin_nfa_select_options(nfa_comp, compile_flags); // Handle character translation - instead of converting to lowercase, build a translation // tabel and use it when adding patterns to the trie and building transition tables. if (kiss_thin_nfa_create_xlation_tab(nfa_comp, compile_flags) != KISS_OK) { thinnfa_debug_err(("%s: Function kiss_thin_nfa_create_xlation_tab() failed\n", rname)); goto finish; } // Build a trie which contains all the pattern texts. for (auto &pattern : patterns) { // Adding each pattern to the the Thin NFA - Aho-Corasick first phase if (kiss_thin_nfa_add_pattern_to_trie(nfa_comp, &pattern) != KISS_OK) { thinnfa_debug_err(("%s: Function kiss_thin_nfa_add_pattern_to_trie() failed\n", rname)); goto finish; } } // Calculate fail states for all NFA states if (kiss_thin_nfa_calc_fail_states(nfa_comp) != KISS_OK) { thinnfa_debug_err(("%s: Function kiss_thin_nfa_calc_fail_states() failed\n", rname)); goto finish; } // Convert the compilation data structure to the runtime structure if (kiss_thin_nfa_build_bnfa(nfa_comp, compile_flags) != KISS_OK) { thinnfa_debug_err(("%s: Function kiss_thin_nfa_build_bnfa() failed\n", rname)); goto finish; } if (!kiss_thin_nfa_is_valid(nfa_comp->runtime_nfa.get())) { thinnfa_debug_err(("%s: Function kiss_thin_nfa_is_valid() failed\n", rname)); goto finish; } // Get the resulting NFA (set NULL to protect from free) nfa = std::move(nfa_comp->runtime_nfa); thinnfa_debug_major(("%s: Successfully compiled the Thin NFA %p\n", rname, nfa.get())); finish: if (nfa_comp != NULL) { // We destroy the compilation data structure, whether we succeed or fail. kiss_thin_nfa_comp_destroy(nfa_comp); } return nfa; }