// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved. // Licensed under the Apache License, Version 2.0 (the "License"); // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef __kiss_thin_nfa_base_h__ #define __kiss_thin_nfa_base_h__ #include "general_adaptor.h" // ****************************** OVERVIEW ******************************* // Contians basic Thin NFA structure, used by kiss_pm and bolt (prescan) // *********************************************************************** #define KISS_THIN_NFA_ALPHABET_SIZE 256 // Binary representation of the Thin NFA. // This is what's actually used during runtime. // // Offsets in the BNFA // ------------------- // Offsets are signed 32-bit integers, specifying the distance in bytes from the "offset 0" point. // // Offset 0 isn't the BNFA start - there are negative offsets: // All full states are in negative offsets. This is the only way to know that a state is full. // All other states are in positive offsets. // // In full states, offsets are encoded in 16 bits. // In partial states, offsets are encoded in 24 bits. // Offsets are compressed: // Positive offsets are divided by 4. This is possible because all state sizes are a multiple of 4 bytes. // Negative offsets are divided by 512 (the size of a full state). This is possible because negative offsets // are only used for full states, so their offsets are a (negative) multiple of the state size. // // Structure of a BNFA state // ------------------------- // 1. Full state: // a. No header. Identified by the fact that its BNFA offset is negative. // b. 256 transitions, 16bits each (uncompressed offsets). // 2. Common header, to partial and match states: // a. State type - 2 bits. // 3. Partial state: // a. State type - 2 bits. // b. Transition number - 6 bits. // c. Fail state offset (compresed) - 24 bits. // d. Per transition: // 1) Character - 8 bits // 2) Next state offset (compressed) - 24 bits // 4. Match state: // a. State type - 2 bits. // b. Unused - 6 bits. // c. Match ID - 24 bits. // // Examples: // // Partial state, 2 transitions - 'a'->100, 'b'->104, fail-> -3072 // +----+---+-----+---+-----+---+-----+ // Bits: | 2 | 6 | 24 | 8 | 24 | 8 | 24 | // +----+---+-----+---+-----+---+-----+ // Data: | P | 2 | -3 | a | 25 | b | 26 | // +----+---+-----+---+-----+---+-----+ // // Full state, 0x00->200, 0x01->204, 0xff->280 // +-----+-----+ +-----+ // Bits: | 16 | 16 | | 16 | // +-----+-----+ .... +-----+ // Data: | 50 | 51 | | 70 | // +-----+-----+ +-----+ // Types for normal and compressed (see comment above) BNFA offsets typedef int kiss_bnfa_offset_t; // Offset in bytes typedef int kiss_bnfa_comp_offset_t; // Compressed offset typedef short kiss_bnfa_short_offset_t; // Compressed offset in 16bits (for full states) #define KISS_BNFA_OFFSET_INVALID ((int)0x80000000) // State types typedef enum { KISS_BNFA_STATE_PARTIAL, KISS_BNFA_STATE_MATCH, KISS_BNFA_STATE_FULL, KISS_BNFA_STATE_TYPE_NUM } kiss_bnfa_state_type_t; // State structure // Use some header bits for the state type #define KISS_BNFA_STATE_TYPE_BITS 2 // The type must fit in KISS_BNFA_STATE_TYPE_BITS bits KISS_ASSERT_COMPILE_TIME(KISS_BNFA_STATE_TYPE_NUM <= (1<common.type; } // State size // Get the size of a partial state with N transitions static CP_INLINE u_int kiss_bnfa_partial_state_size(u_int trans_num) { // Header + transition table return KISS_OFFSETOF(kiss_bnfa_partial_state_t, transitions) + sizeof(struct kiss_bnfa_partial_transition_s) * (trans_num); } // Get the size of an existing state static CP_INLINE u_int kiss_bnfa_state_size(const kiss_bnfa_state_t *bnfa, kiss_bnfa_offset_t offset) { switch (kiss_bnfa_state_type(bnfa, kiss_bnfa_offset_compress(offset))) { case KISS_BNFA_STATE_PARTIAL: { const kiss_bnfa_state_t *state = kiss_bnfa_offset_to_state(bnfa, offset); return kiss_bnfa_partial_state_size(state->partial.trans_num); } case KISS_BNFA_STATE_MATCH: return sizeof(kiss_bnfa_match_state_t); case KISS_BNFA_STATE_FULL: return sizeof(kiss_bnfa_full_state_t); case KISS_BNFA_STATE_TYPE_NUM: break; // Can't happen } return 0; } // Flags for kiss_thin_nfa_s.flags and kiss_thin_nfa_prescan_hdr_s.flags enum kiss_thin_nfa_flags_e { KISS_THIN_NFA_USE_CHAR_XLATION = 0x01, // Used for caseless and/or digitless KISS_THIN_NFA_HAS_ANCHOR = 0x02, // State at offset 0 is anchored root, not root }; #endif // __kiss_thin_nfa_base_h__