#include "single_keyword.h" #include "output.h" #include "debug.h" #include #include using namespace std; USE_DEBUG_FLAG(D_KEYWORD); class DataKeyword : public SingleKeyword { public: explicit DataKeyword(const vector &attr, const VariablesMapping &vars); MatchStatus isMatch(const I_KeywordRuntimeState* prev) const override; private: void setOffset(const KeywordAttr &attr, const VariablesMapping &vars) { offset.setAttr(attr, vars, "data"); } void setDepth(const KeywordAttr &attr, const VariablesMapping &vars) { depth.setAttr(attr, vars, "data"); } void setCaret(const KeywordAttr &attr, const VariablesMapping &) { is_caret.setAttr(attr, "data"); } void setRelative(const KeywordAttr &attr, const VariablesMapping &) { is_relative.setAttr(attr, "data"); } void setCaseInsensitive(const KeywordAttr &attr, const VariablesMapping &) { is_case_insensitive.setAttr(attr, "data"); } void setContext(const KeywordAttr &attr, const VariablesMapping &) { ctx.setAttr(attr, "data"); } void parseString(const string &str); void addChar(char ch) { pattern.push_back(static_cast(ch)); } void calcTables(); pair getStartAndEndOffsets(uint buf_size, const I_KeywordRuntimeState *prev) const; uint bytesMatched(const Buffer&, uint) const; uint moveOnMatch() const { return pattern.size(); } uint moveOnNoMatch(uint offset_from_end, unsigned char first_unmatched_byte) const { dbgAssert(shift.size() > offset_from_end) << "Shift table of the 'data' keyword is shorter than the offset"; uint skip_size; if (skip[first_unmatched_byte]>offset_from_end) { skip_size = skip[first_unmatched_byte]-offset_from_end; } else { skip_size = 1; } return max(shift[offset_from_end], skip_size); } bool isConstant() const { return !is_relative && offset.isConstant() && depth.isConstant(); } vector pattern; uint skip[256]; vector shift; NumericAttr offset; NumericAttr depth; BoolAttr is_negative; BoolAttr is_caret; BoolAttr is_relative; BoolAttr is_case_insensitive; CtxAttr ctx; static const map setops; }; const map DataKeyword::setops = { { "relative", &DataKeyword::setRelative }, { "offset", &DataKeyword::setOffset }, { "depth", &DataKeyword::setDepth }, { "caret", &DataKeyword::setCaret }, { "nocase", &DataKeyword::setCaseInsensitive }, { "part", &DataKeyword::setContext } }; DataKeyword::DataKeyword(const vector &attrs, const VariablesMapping &vars) : offset(), depth() { auto &pattern_param = attrs[0].getParams(); if (pattern_param.size() != 1) throw KeywordError("More than one element in the 'data' keyword pattern"); const string &string_pattern = pattern_param[0]; if (string_pattern.length() == 0) throw KeywordError("No input for the 'data' keyword"); uint start = 0; if (string_pattern[0] == '!') { is_negative.setAttr("data", "negative"); start++; } if (string_pattern[start] != '"') throw KeywordError("The data pattern does not begin with '\"'"); uint end = string_pattern.length()-1; if (string_pattern[end] != '"') throw KeywordError("The data pattern does not end with '\"'"); if (start+1 >= end) throw KeywordError("No input for the 'data' keyword"); parseString(string_pattern.substr(start+1, end-(start+1))); for (uint i = 1; isecond; (this->*set_func)(attrs[i], vars); } calcTables(); } void DataKeyword::calcTables() { if (is_case_insensitive) { for (auto &ch : pattern) { if (isupper(ch)) { ch = tolower(ch); } } } // Initialize skip table - when we meet a charecter that isn't in the pattern, we skip the whole pattern for (auto &ch_skip : skip) { ch_skip = pattern.size(); } // Go over the charecters in the pattern. // We can skip from a charecter to the end of the pattern. // If a charecter appear more than once, the latest occurence take precedent. for (uint index = 0; index0) throw KeywordError("Stoping in the middle of hex string in the 'data' keyword"); hex_mode = false; } break; } case '\\': { if (hex_mode) throw KeywordError("Backslash in hex string in the 'data' keyword"); after_bslash = true; break; } case '"': { throw KeywordError("Unescaped double quotation mark in the 'data' keyword"); break; } default: if (hex_mode) { if (!isxdigit(ch)) { if (ch != ' ') { throw KeywordError( "Illegal character '" + dumpHexChar(ch) + "' in the hex string in the 'data' keyword" ); } if (hex.size()>0) { throw KeywordError("Space separating nibbles in the hex string in the 'data' keyword"); } break; } hex += ch; if (hex.size()>=2) { addChar(stol(hex, nullptr, 16)); hex = ""; } } else { if (!isprint(ch)) { throw KeywordError( "Illegal character '" + dumpHexChar(ch) + "' in the pattern in the 'data' keyword" ); } addChar(ch); } } } if ( hex_mode || after_bslash ) { throw KeywordError("The 'data' keyword's pattern has ended in the middle of the parsing"); } } static uint addOffset(uint offset, int add) { if (add<0 && offset(-add)) return 0; return offset + add; } pair DataKeyword::getStartAndEndOffsets(uint buf_size, const I_KeywordRuntimeState *prev) const { uint relative_offset = is_relative?prev->getOffset(ctx):0; int offset_attr = offset.evalAttr(prev); uint start_offset = addOffset(relative_offset, offset_attr); if (depth.isSet()) { uint depth_size = addOffset(start_offset, depth.evalAttr(prev)); buf_size = std::min(buf_size, depth_size); } if (is_caret) { buf_size = std::min(buf_size, start_offset+static_cast(pattern.size())); } return make_pair(start_offset, buf_size); } uint DataKeyword::bytesMatched(const Buffer &buf, uint offset) const { if (is_case_insensitive) { for (uint i = 0; i0) << "Trying to run on an uninitialized keyword data"; dbgDebug(D_KEYWORD) << "Searching for " << dumpHex(pattern); auto part = Singleton::Consume::by()->get(static_cast(ctx)); if (!part.ok()) { if (is_negative) return runNext(prev); return MatchStatus::NoMatchFinal; } const auto &buf = part.unpack(); dbgTrace(D_KEYWORD) << "Full buffer: " << dumpHex(buf); uint offset, max_offset; tie(offset, max_offset) = getStartAndEndOffsets(buf.size(), prev); offset += pattern.size(); bool match_found = false; while (offset<=max_offset) { // Short circuit for the common, simple case where the last byte doesn't match if (skip[buf[offset-1]]) { offset += skip[buf[offset - 1]]; continue; } // Full search Boyer-Moore uint match_size = bytesMatched(buf, offset); if (match_size == pattern.size()) { if (is_negative) { return isConstant()?MatchStatus::NoMatchFinal:MatchStatus::NoMatch; } match_found = true; OffsetRuntimeState new_offset(prev, ctx, offset); auto next_keyword_result = runNext(&new_offset); if (next_keyword_result!=MatchStatus::NoMatch) return next_keyword_result; offset += moveOnMatch(); } else { offset += moveOnNoMatch(match_size, buf[offset-(match_size+1)]); } } // No matchs is a success for negative keywords if (is_negative && !match_found) return runNext(prev); // If there were no matchs and the keyword is an effected by other keywords, then we know that the rule won't match if (isConstant() && !match_found) return MatchStatus::NoMatchFinal; return MatchStatus::NoMatch; } unique_ptr genDataKeyword(const vector &attr, VariablesMapping &known_vars) { return make_unique(attr, known_vars); }