From fae8d2112769f6b785b9cdfb98ebd74da1b404ea Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 20 Nov 2017 13:25:38 +1100 Subject: [PATCH] Introduce hsdump development tool for producing information during compilation. This tool is intended to assist Hyperscan developers with development and debugging by providing insights into the generated bytecode. --- tools/hsdump/CMakeLists.txt | 19 ++ tools/hsdump/main.cpp | 575 ++++++++++++++++++++++++++++++++++++ 2 files changed, 594 insertions(+) create mode 100644 tools/hsdump/CMakeLists.txt create mode 100644 tools/hsdump/main.cpp diff --git a/tools/hsdump/CMakeLists.txt b/tools/hsdump/CMakeLists.txt new file mode 100644 index 00000000..c3db5235 --- /dev/null +++ b/tools/hsdump/CMakeLists.txt @@ -0,0 +1,19 @@ +# dump support is required +if (NOT DUMP_SUPPORT) + return() +endif () + +if (WIN32) + return() +endif () + +include_directories(${PROJECT_SOURCE_DIR}) +include_directories(${PROJECT_SOURCE_DIR}/util) + +# only set these after all tests are done +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") + +add_executable(hsdump main.cpp) +target_link_libraries(hsdump hs expressionutil crosscompileutil) + diff --git a/tools/hsdump/main.cpp b/tools/hsdump/main.cpp new file mode 100644 index 00000000..53a72d20 --- /dev/null +++ b/tools/hsdump/main.cpp @@ -0,0 +1,575 @@ +/* + * Copyright (c) 2015-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Hyperscan compile dump tool + * + * Given a set of patterns, dump all available data from the compilation + * process into a directory. This tool is intended to assist Hyperscan + * developers with developement and debugging by providing insights into the + * built bytecode. + * + * Note: requires that hyperscan is built with DUMP_SUPPORT enabled. + */ + +#include "config.h" + +#include "cross_compile.h" +#include "ExpressionParser.h" +#include "expressions.h" +#include "expression_path.h" +#include "string_util.h" + +#include "grey.h" +#include "hs_compile.h" +#include "hs_internal.h" +#include "scratch_dump.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +using namespace std; +using namespace ue2; +using boost::ptr_vector; + +namespace /* anonymous */ { + +// Input pattern file +string patternfile; +// Output path +string dumpbase("."); +// Compile with streaming +bool streaming = true; +bool vectored = false; + +bool echoSigs = false; +bool dump_db = false; +bool force_utf8 = false; +int force_prefilter = 0; + +unsigned int onlyId; +u32 somFlags = 0; +unsigned somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE; + +bool singleId = false; +string signatureFile; + +unique_ptr plat_info; + +bool dump_intermediate = true; +bool force_edit_distance = false; +u32 edit_distance = 0; + +} // namespace + +// Usage statement. +static +void usage(const char *name, const char *error) { + printf("Usage: %s [OPTIONS...]\n\n", name); + printf("Options:\n\n"); + printf(" -h Display help and exit.\n"); + printf(" -G OVERRIDES Overrides for the grey box.\n"); + printf(" -e PATH Path to expression directory or file.\n"); + printf(" -s FILE Signature file to use.\n"); + printf(" -z NUM Signature ID to use.\n"); + printf(" -N, --block Compile in block mode" + " (default: streaming).\n"); + printf(" -V, --vectored Compile in vectored mode" + " (default: streaming).\n"); + printf(" -o, --output PATH\n"); + printf(" Use data dump directory PATH (default: dump).\n"); + printf(" WARNING: existing files in output directory are" + " deleted.\n"); + printf(" -x NAME Cross-compile for arch NAME\n"); + printf(" -D, --dump_db Dump the final database.\n"); + printf(" -P, --print Echo signature set to stdout.\n"); + printf(" -X, --no_intermediate\n"); + printf(" Do not dump intermediate data.\n"); + printf("\n"); + printf("Pattern flags:\n"); + printf(" -d NUMBER Set SOM precision mode (default: 8 (large)).\n"); + printf(" -E DISTANCE Match all patterns within edit distance" + " DISTANCE.\n"); + printf(" -8 Force UTF8 mode on all patterns.\n"); + printf(" -L Apply HS_FLAG_SOM_LEFTMOST to all patterns.\n"); + printf(" --prefilter Apply HS_FLAG_PREFILTER to all patterns.\n"); + printf("\n"); + printf("Example:\n"); + printf("$ %s -e pattern.file -s sigfile\n", name); + printf("\n"); + + if (error) { + printf("Error: %s\n", error); + } +} + +static +void processArgs(int argc, char *argv[], Grey &grey) { + static const char *options = "d:De:E:G:hLNo:Ps:VXx:z:8"; + static struct option longOptions[] = { + {"dump_db", no_argument, nullptr, 'D'}, + {"help", no_argument, nullptr, 'h'}, + {"output", required_argument, nullptr, 'o'}, + {"block", no_argument, nullptr, 'N'}, + {"no_intermediate", no_argument, nullptr, 'X'}, + {"vectored", no_argument, nullptr, 'V'}, + {"print", no_argument, nullptr, 'P'}, + {"utf8", no_argument, nullptr, '8'}, + {"prefilter", no_argument, &force_prefilter, 1}, + {"som-width", required_argument, nullptr, 'd'}, + {nullptr, 0, nullptr, 0} + }; + + for (;;) { + int c = getopt_long(argc, argv, options, longOptions, nullptr); + + if (c < 0) { + break; + } + switch (c) { + case 'D': + dump_db = true; + break; + + case 'd': { + unsigned dist; + if (!fromString(optarg, dist)) { + usage(argv[0], "Must provide an integer argument to '-d' flag"); + exit(1); + } + switch (dist) { + case 2: + somPrecisionMode = HS_MODE_SOM_HORIZON_SMALL; + break; + case 4: + somPrecisionMode = HS_MODE_SOM_HORIZON_MEDIUM; + break; + case 8: + somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE; + break; + default: + usage(argv[0], "SOM precision must be 2, 4 or 8"); + exit(1); + } + break; + } + + case 'h': + usage(argv[0], nullptr); + exit(0); + + case 'e': + patternfile = optarg; + break; + +#ifndef RELEASE_BUILD + case 'G': + applyGreyOverrides(&grey, string(optarg)); + break; +#endif + + case 'L': + somFlags |= HS_FLAG_SOM_LEFTMOST; + break; + + case 'o': + dumpbase = optarg; + break; + + case 'P': + echoSigs = true; + break; + + case 's': + signatureFile.assign(optarg); + break; + + case 'N': + streaming = false; + break; + + case 'V': + streaming = false; + vectored = true; + break; + + case 'X': + dump_intermediate = false; + break; + + case 'x': + plat_info = xcompileReadMode(optarg); + if (!plat_info) { + usage(argv[0], xcompileUsage().c_str()); + exit(1); + } + break; + + case 'z': + if (!fromString(optarg, onlyId)) { + usage(argv[0], "Argument to '-z' flag must be an integer"); + exit(1); + } + singleId = true; + break; + case 'E': { + u32 dist; + if (!fromString(optarg, dist)) { + usage(argv[0], "Argument to '-E' flag must be an integer"); + exit(1); + } + force_edit_distance = true; + edit_distance = dist; + break; + } + case '8': + force_utf8 = true; + break; + case 0: + break; + default: + usage(argv[0], ""); + exit(1); + } + } + + if (patternfile.empty() && !signatureFile.empty()) { + /* attempt to infer an expression directory */ + patternfile = inferExpressionPath(signatureFile); + } + + if (patternfile.size() == 0) { + usage(argv[0], "No pattern file provided"); + exit(1); + } + if (dumpbase.size() == 0) { + usage(argv[0], "No output directory provided"); + exit(1); + } +} + +static +void dumpDb(const struct hs_database *out, const Grey &grey) { + char *bytes = nullptr; + size_t len = 0; + hs_error_t err = hs_serialize_database(out, &bytes, &len); + if (err != HS_SUCCESS) { + printf("ERROR: hs_serialize_database() failed with error %u\n", err); + return; + } + + FILE *f = fopen((grey.dumpPath + "db.raw").c_str(), "w"); + if (!f) { + printf("ERROR: unable to write database out: %s", strerror(errno)); + } else { + fwrite(bytes, 1, len, f); + fclose(f); + } + free(bytes); +} + +static +u32 buildDumpFlags(void) { + u32 flags = 0; + flags |= Grey::DUMP_BASICS; + flags |= Grey::DUMP_IMPL; + + if (dump_intermediate) { + flags |= Grey::DUMP_PARSE; + flags |= Grey::DUMP_INT_GRAPH; + } + + return flags; +} + +static +void clearDir(const string &path) { + DIR *dir = opendir(path.c_str()); + if (!dir) { + printf("ERROR: couldn't open location %s: %s\n", path.c_str(), + strerror(errno)); + exit(1); + } + + struct dirent *d_ent; + while (nullptr != (d_ent = readdir(dir))) { + string name(d_ent->d_name); + if (name == "." || name == "..") { + continue; + } + string f = path + '/' + name; + if (unlink(f.c_str()) < 0) { + printf("ERROR: couldn't remove file %s: %s\n", f.c_str(), + strerror(errno)); + } + } + closedir(dir); +} + +static +void prepareDumpLoc(string parent, string path, u32 flags, Grey &grey) { + struct stat st; + if (stat(parent.c_str(), &st)) { + // Create dump location if not found + mode_t mode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | + S_IROTH | S_IXOTH; + if (mkdir(parent.c_str(), mode) < 0) { + printf("ERROR: could not create dump location %s: %s\n", + parent.c_str(), strerror(errno)); + exit(1); + } + } + + // If not separator terminated, add separator + if (parent.back() != '/') { + parent.push_back('/'); + } + + // Append path to parent + path = parent.append(path); + if (stat(path.c_str(), &st)) { + // Create dump location if not found + mode_t mode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | + S_IROTH | S_IXOTH; + if (mkdir(path.c_str(), mode) < 0) { + printf("ERROR: could not create dump location %s: %s\n", + path.c_str(), strerror(errno)); + exit(1); + } + } + + // remove anything in the dump dir - most likely stale + clearDir(path); + + // If not separator terminated, add separator + if (path.back() != '/') { + path.push_back('/'); + } + + grey.dumpPath = path; + grey.dumpFlags = flags; +} + +static +unsigned buildMode() { + unsigned mode = 0; + if (streaming) { + mode |= HS_MODE_STREAM; + mode |= somPrecisionMode; + assert(!vectored); + } else if (vectored) { + mode |= HS_MODE_VECTORED; + } else { + mode |= HS_MODE_BLOCK; + } + + return mode; +} + +static +void dumpScratch(const hs_database_t *db, const Grey &grey) { + hs_scratch_t *scratch = nullptr; + hs_error_t err = hs_alloc_scratch(db, &scratch); + if (err == HS_SUCCESS) { + FILE *f = fopen((grey.dumpPath + "scratch.txt").c_str(), "w"); + if (f) { + dumpScratch(scratch, f); + fclose(f); + } else { + printf("ERROR: could not open %s: %s\n", + (grey.dumpPath + "scratch.txt").c_str(), strerror(errno)); + } + } else { + printf("ERROR: hs_alloc_scratch() failed with error %u\n", err); + } + hs_free_scratch(scratch); +} + +static +void dumpInfo(const hs_database_t *db, const Grey &grey) { + char *info = nullptr; + hs_error_t err = hs_database_info(db, &info); + if (err == HS_SUCCESS) { + FILE *f = fopen((grey.dumpPath + "db_info.txt").c_str(), "w"); + if (f) { + fprintf(f, "%s\n", info); + fclose(f); + } else { + printf("ERROR: could not open %s: %s\n", + (grey.dumpPath + "db_info.txt").c_str(), strerror(errno)); + } + } else { + printf("ERROR: hs_database_info() failed with error %u\n", err); + } + free(info); +} + +static +unsigned int dumpDataMulti(const vector &patterns, + const vector &flags, + const vector &ids, + ptr_vector &ext, + const Grey &grey) { + unsigned mode = buildMode(); + + printf("Compiling %zu patterns.\n", patterns.size()); + + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err; + + hs_error_t err = hs_compile_multi_int( + patterns.data(), flags.data(), ids.data(), ext.c_array(), + patterns.size(), mode, plat_info.get(), &db, &compile_err, grey); + + if (err != HS_SUCCESS) { + if (compile_err && compile_err->message) { + printf("ERROR: Compile failed: %s\n", compile_err->message); + } else { + printf("ERROR: hs_compile_multi_int() returned error %u", err); + } + hs_free_compile_error(compile_err); + return 1; + } + + assert(db); + dumpScratch(db, grey); + dumpInfo(db, grey); + + if (dump_db) { + dumpDb(db, grey); + } + + hs_free_database(db); + return 0; +} + +static +unsigned int dumpData(const ExpressionMap &exprMap, Grey &grey) { + u32 dump_flags = buildDumpFlags(); + string path = "dump"; + prepareDumpLoc(dumpbase, path, dump_flags, grey); + printf("Dumping data for all patterns in '%s' to '%s/%s'\n", + patternfile.c_str(), dumpbase.c_str(), path.c_str()); + + string pat_name = grey.dumpPath + "patterns.txt"; + FILE *pat_out = fopen(pat_name.c_str(), "w"); + if (!pat_out) { + printf("ERROR: unable to open %s\n", pat_name.c_str()); + return 1; + } + + const size_t numPatterns = exprMap.size(); + vector expressions(numPatterns); + vector ids(numPatterns); + vector flags(numPatterns); + ptr_vector ext; + ext.reserve(numPatterns); + + size_t n = 0; + for (const auto &elem : exprMap) { + const auto &id = elem.first; + const auto ®ex = elem.second; + if (echoSigs) { + printf("%u:%s\n", id, regex.c_str()); + } + fprintf(pat_out, "%u:%s\n", id, regex.c_str()); + + ext.push_back(new hs_expr_ext); + ids[n] = id; + if (!readExpression(regex, expressions[n], &flags[n], &ext[n])) { + printf("ERROR: failed to parse expr: %s (id %u)\n", + regex.c_str(), id); + fclose(pat_out); + return 1; + } + + if (force_edit_distance) { + ext[n].flags |= HS_EXT_FLAG_EDIT_DISTANCE; + ext[n].edit_distance = edit_distance; + } + + flags[n] |= somFlags; + if (force_utf8) { + flags[n] |= HS_FLAG_UTF8; + } + if (force_prefilter) { + flags[n] |= HS_FLAG_PREFILTER; + } + + n++; + } + assert(n); + + // Our compiler takes an array of plain ol' C strings. + vector patterns(n); + for (size_t i = 0; i < n; i++) { + patterns[i] = expressions[i].c_str(); + } + + fclose(pat_out); + return dumpDataMulti(patterns, flags, ids, ext, grey); +} + +int main(int argc, char *argv[]) { + Grey grey; + grey.dumpFlags = Grey::DUMP_BASICS; + + processArgs(argc, argv, grey); + + // Load patterns + ExpressionMap exprMap; + loadExpressions(patternfile, exprMap); + + if (!signatureFile.empty()) { + SignatureSet sigs; + loadSignatureList(signatureFile, sigs); + exprMap = limitToSignatures(exprMap, sigs); + } + + if (singleId) { + exprMap = limitToSignatures(exprMap, {onlyId}); + } + + if (exprMap.empty()) { + printf("No signatures.\n"); + return 1; + } + + return dumpData(exprMap, grey); +}