vectorscan/examples/patbench.cc

/*
 * Copyright (c) 2015-2017, Intel Corporation
 * Copyright (c) 2024, VectorCamp PC
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *  * Neither the name of Intel Corporation nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Hyperscan pattern benchmarker.
 *
 * This program allows users to detect which signatures may be the most
 * expensive in a set of patterns. It is designed for use with small to medium
 * pattern set sizes (e.g. 5-500). If used with very large pattern sets it may
 * take a very long time - the number of recompiles done is g * O(lg2(n)) where
 * g is the number of generations and n is the number of patterns (assuming
 * that n >> g).
 *
 * This utility will return a cumulative series of removed patterns. The first
 * generation will find and remove a single pattern. The second generation will
 * begin with the first pattern removed and find another pattern to remove,
 * etc. So if we have 100 patterns and 15 generations, the final generation's
 * score will be a run over 85 patterns.
 *
 * This utility is probabilistic. It is possible that the pattern removed in a
 * generation is not a particularly expensive pattern. To reduce noise in the
 * results use 'taskset' and set the number of repeats to a level that still
 * completes in reasonable time (this will reduce the effect of random
 * measurement noise).
 *
 * The criterion for performance can be altered by use of the -C<x> flag where
 * <x> can be t,r,s,c,b, selecting pattern matching throughput, scratch size,
 * stream state size (only available in streaming mode), compile time and
 * bytecode size respectively.
 *
 * This utility will also not produce good results if all the patterns are
 * roughly equally expensive.
 *
 * Factor Group Size:
 *
 * If there are multiple expensive patterns that are very similar on the
 * left-hand-side or identical, this utility will typically not find these
 * groups unless the -F flag is used to search for a group size that is equal
 * to or larger than the size of the group of similar patterns.
 *
 * Otherwise, removing a portion of the similar patterns will have no or almost
 * no effect, and the search procedure used relies on the ability to remove all
 * of the similar patterns in at least one search case, something which will
 * only happen if the factor_group_size is large enough.
 *
 * This alters the operation of our tool so that instead of trying to find the
 * single pattern whose removal has the most effect by binary search (the
 * default with factor_group_size == 1), we attempt to find the N patterns
 * whose removal has the most effect by searching over N+1 evenly sized groups,
 * removing only 1/(N+1) of the search signatures per iteration.
 *
 * Note that the number of recompiles done greatly increases with increased
 * factor group size.  For example, with factor_group_size = 1, we do g * 2 *
 * lg2(n) recompiles, while with factor_group_size = 4, we do g * 4 *
 * log(5/4)(n). Informally the number of generations we require goes up as we
 * eliminate a smaller number of signatures and the we have to do more work per
 * generation.
 *
 *
 * Build instructions:
 *
 *     g++ -o patbench patbench.cc $(pkg-config --cflags --libs libhs) -lpcap
 *
 * Usage:
 *
 *     ./patbench [ -n repeats] [ -G generations] [ -C criterion ]
 *             [ -F factor_group_size ] [ -N | -S ] <pattern file> <pcap file>
 *
 *     -n repeats sets the number of times the PCAP is repeatedly scanned
 *        with the pattern
 *     -G generations sets the number of generations that the algorithm is
 *        run for
 *     -N sets non-streaming mode, -S sets streaming mode (default)
 *     -F sets the factor group size (must be >0); this allows the detection
 *        of multiple interacting factors
 *
 *     -C sets the "criterion", which can be either:
 *          t  throughput (the default) - this requires a pcap file
 *          r  scratch size
 *          s  stream state size
 *          c  compile time
 *          b  bytecode size
 *
 * We recommend the use of a utility like 'taskset' on multiprocessor hosts to
 * lock execution to a single processor: this will remove processor migration
 * by the scheduler as a source of noise in the results.
 *
 */

#include <random>
#include <algorithm>
#include <cstring>
#include <chrono>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <set>
#include <string>
#include <vector>
#include <unordered_map>

#include <unistd.h>

// We use the BSD primitives throughout as they exist on both BSD and Linux.
#define __FAVOR_BSD
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet/ip_icmp.h>
#ifdef __NetBSD__
#include <net/ethertypes.h>
#include <net/if_ether.h>
#else
#include <net/ethernet.h>
#endif /* __NetBSD__ */
#include <arpa/inet.h>

#include <pcap.h>

#include <hs.h>

using std::cerr;
using std::cout;
using std::endl;
using std::ifstream;
using std::string;
using std::unordered_map;
using std::vector;
using std::set;
using std::min;
using std::max;
using std::copy;
using std::random_device;
using std::mt19937;

enum Criterion {
    CRITERION_THROUGHPUT,
    CRITERION_BYTECODE_SIZE,
    CRITERION_COMPILE_TIME,
    CRITERION_STREAM_STATE,
    CRITERION_SCRATCH_SIZE
};

static bool higher_is_better(Criterion c) {
    return c == CRITERION_THROUGHPUT;
}

static void print_criterion(Criterion c, double val) {
    std::ios::fmtflags f(cout.flags());
    switch (c) {
    case CRITERION_THROUGHPUT:
        cout << std::fixed << std::setprecision(3) << val << " Megabits/s";
        break;
    case CRITERION_COMPILE_TIME:
        cout << std::fixed << std::setprecision(3) << val << " seconds";
        break;
    case CRITERION_BYTECODE_SIZE:
    case CRITERION_STREAM_STATE:
    case CRITERION_SCRATCH_SIZE:
    default:
        cout << static_cast<size_t>(val) << " bytes";
        break;
    }
    cout.flags(f);
}

// Key for identifying a stream in our pcap input data, using data from its IP
// headers.
struct FiveTuple {
    unsigned int protocol;
    unsigned int srcAddr;
    unsigned int srcPort;
    unsigned int dstAddr;
    unsigned int dstPort;

    // Construct a FiveTuple from a TCP or UDP packet.
    FiveTuple(const struct ip *iphdr) {
        // IP fields
        protocol = iphdr->ip_p;
        srcAddr = iphdr->ip_src.s_addr;
        dstAddr = iphdr->ip_dst.s_addr;

        // UDP/TCP ports
        const struct udphdr *uh = (const struct udphdr *)
                (((const char *)iphdr) + (iphdr->ip_hl * 4));
        srcPort = uh->uh_sport;
        dstPort = uh->uh_dport;
    }

    bool operator==(const FiveTuple &a) const {
        return protocol == a.protocol && srcAddr == a.srcAddr &&
               srcPort == a.srcPort && dstAddr == a.dstAddr &&
               dstPort == a.dstPort;
    }
};

// A *very* simple hash function, used when we create an unordered_map of
// FiveTuple objects.
struct FiveTupleHash {
    size_t operator()(const FiveTuple &x) const {
        return x.srcAddr ^ x.dstAddr ^ x.protocol ^ x.srcPort ^ x.dstPort;
    }
};

// Helper function. See end of file.
static bool payloadOffset(const unsigned char *pkt_data, unsigned int *offset,
                          unsigned int *length);

// Match event handler: called every time Hyperscan finds a match.
static
int onMatch(unsigned int id, unsigned long long from, unsigned long long to,
            unsigned int flags, void *ctx) {
    // Our context points to a size_t storing the match count
    size_t *matches = (size_t *)ctx;
    (*matches)++;
    return 0; // continue matching
}

// Simple timing class
class Clock {
public:
    void start() {
        time_start = std::chrono::system_clock::now();
    }

    void stop() {
        time_end = std::chrono::system_clock::now();
    }

    double seconds() const {
        std::chrono::duration<double> delta = time_end - time_start;
        return delta.count();
    }
private:
    std::chrono::time_point<std::chrono::system_clock> time_start, time_end;
};

// Class wrapping all state associated with the benchmark
class Benchmark {
private:
    // Packet data to be scanned
    vector<string> packets;

    // Stream ID for each packet
    vector<size_t> stream_ids;

    // Map used to construct stream_ids
    unordered_map<FiveTuple, size_t, FiveTupleHash> stream_map;

    // Hyperscan compiled database
    hs_database_t *db = nullptr;

    // Hyperscan temporary scratch space
    hs_scratch_t *scratch = nullptr;

    // Vector of Hyperscan stream state
    vector<hs_stream_t *> streams;

    // Count of matches found while scanning
    size_t matchCount = 0;
public:
    ~Benchmark() {
        hs_free_scratch(scratch);
        hs_free_database(db);
    }

    // Initialisation; after this call, Benchmark owns the database and will
    // ensure it is freed.
    void setDatabase(hs_database_t *hs_db) {
        hs_free_database(db); // Free previous database.
        db = hs_db;
        // (Re)allocate scratch to ensure that it is large enough to handle the
        // database.
        hs_error_t err = hs_alloc_scratch(db, &scratch);
        if (err != HS_SUCCESS) {
            cerr << "ERROR: could not allocate scratch space. Exiting." << endl;
            exit(-1);
        }
    }
    const hs_database_t *getDatabase() const {
        return db;
    }

    size_t getScratchSize() const {
        size_t scratch_size;
        hs_error_t err = hs_scratch_size(scratch, &scratch_size);
        if (err != HS_SUCCESS) {
            cerr << "ERROR: could not query scratch space size. Exiting."
                 << endl;
            exit(-1);
        }
        return scratch_size;
    }

    // Read a set of streams from a pcap file
    bool readStreams(const char *pcapFile) {
        // Open PCAP file for input
        char errbuf[PCAP_ERRBUF_SIZE];
        pcap_t *pcapHandle = pcap_open_offline(pcapFile, errbuf);
        if (pcapHandle == nullptr) {
            cerr << "ERROR: Unable to open pcap file \"" << pcapFile
                 << "\": " << errbuf << endl;
            return false;
        }

        struct pcap_pkthdr pktHeader;
        const unsigned char *pktData;
        while ((pktData = pcap_next(pcapHandle, &pktHeader)) != nullptr) {
            unsigned int offset = 0, length = 0;
            if (!payloadOffset(pktData, &offset, &length)) {
                continue;
            }

            // Valid TCP or UDP packet
            const struct ip *iphdr = (const struct ip *)(pktData
                    + sizeof(struct ether_header));
            const char *payload = (const char *)pktData + offset;

            size_t id = stream_map.insert(std::make_pair(FiveTuple(iphdr),
                                          stream_map.size())).first->second;

            packets.push_back(string(payload, length));
            stream_ids.push_back(id);
        }
        pcap_close(pcapHandle);

        return !packets.empty();
    }

    // Return the number of bytes scanned
    size_t bytes() const {
        size_t sum = 0;
        auto packs = [](size_t z, const string &packet) { return z + packet.size(); };
        sum += std::accumulate(packets.begin(), packets.end(), 0, packs);
        // for (const auto &packet : packets) {
        //     sum += packet.size();
        // }
        return sum;
    }

    // Return the number of matches found.
    size_t matches() const {
        return matchCount;
    }

    // Clear the number of matches found.
    void clearMatches() {
        matchCount = 0;
    }

    // Open a Hyperscan stream for each stream in stream_ids
    void openStreams() {
        streams.resize(stream_map.size());
        for (auto &stream : streams) {
            hs_error_t err = hs_open_stream(db, 0, &stream);
            if (err != HS_SUCCESS) {
                cerr << "ERROR: Unable to open stream. Exiting." << endl;
                exit(-1);
            }
        }
    }

    // Close all open Hyperscan streams (potentially generating any
    // end-anchored matches)
    void closeStreams() {
        for (auto &stream : streams) {
            hs_error_t err =
                hs_close_stream(stream, scratch, onMatch, &matchCount);
            if (err != HS_SUCCESS) {
                cerr << "ERROR: Unable to close stream. Exiting." << endl;
                exit(-1);
            }
        }
    }

    // Scan each packet (in the ordering given in the PCAP file) through
    // Hyperscan using the streaming interface.
    void scanStreams() {
        for (size_t i = 0; i != packets.size(); ++i) {
            const std::string &pkt = packets[i];
            hs_error_t err = hs_scan_stream(streams[stream_ids[i]],
                                            pkt.c_str(), pkt.length(), 0,
                                            scratch, onMatch, &matchCount);
            if (err != HS_SUCCESS) {
                cerr << "ERROR: Unable to scan packet. Exiting." << endl;
                exit(-1);
            }
        }
    }

    // Scan each packet (in the ordering given in the PCAP file) through
    // Hyperscan using the block-mode interface.
    void scanBlock() {
        for (size_t i = 0; i != packets.size(); ++i) {
            const std::string &pkt = packets[i];
            hs_error_t err = hs_scan(db, pkt.c_str(), pkt.length(), 0,
                                     scratch, onMatch, &matchCount);
            if (err != HS_SUCCESS) {
                cerr << "ERROR: Unable to scan packet. Exiting." << endl;
                exit(-1);
            }
        }
    }
};

// helper function - see end of file
static void parseFile(const char *filename, vector<string> &patterns,
                      vector<unsigned> &flags, vector<unsigned> &ids,
                      vector<string> &originals);

class Sigdata {
    vector<unsigned> flags;
    vector<unsigned> ids;
    vector<string> patterns;
    vector<string> originals;

public:
    Sigdata() {}
    Sigdata(const char *filename) {
        parseFile(filename, patterns, flags, ids, originals);

    }

    const string &get_original(unsigned index) const {
        return originals[index];
    }

    hs_database_t *compileDatabase(unsigned mode, double *compileTime) const {
        hs_database_t *db = nullptr;
        hs_compile_error_t *compileErr;

        // Turn our vector of strings into a vector of char*'s to pass in to
        // hs_compile_multi. (This is just using the vector of strings as
        // dynamic storage.)
        vector<const char *> cstrPatterns;
        cstrPatterns.reserve(patterns.size());
        for (const auto &pattern : patterns) {
            // cppcheck-suppress useStlAlgorithm
            cstrPatterns.push_back(pattern.c_str());
        }

        Clock clock;
        clock.start();
        hs_error_t err = hs_compile_multi(cstrPatterns.data(), flags.data(),
                                          ids.data(), cstrPatterns.size(), mode,
                                          nullptr, &db, &compileErr);
        clock.stop();
        if (err != HS_SUCCESS) {
            if (compileErr->expression < 0) {
                // The error does not refer to a particular expression.
                cerr << "ERROR: " << compileErr->message << endl;
            } else {
                cerr << "ERROR: Pattern '"
                     << patterns[compileErr->expression]
                     << "' failed with error '" << compileErr->message << "'"
                     << endl;
            }
            // As the compileErr pointer points to dynamically allocated memory,
            // if we get an error, we must be sure to release it. This is not
            // necessary when no error is detected.
            hs_free_compile_error(compileErr);
            exit(-1);
        }

        *compileTime = clock.seconds();
        return db;
    }

    unsigned size() const {
        return patterns.size();
    }

    Sigdata cloneExclude(const set<unsigned> &excludeIndexSet) const {
        Sigdata c;
        for (unsigned i = 0, e = size(); i != e; ++i) {
            if (excludeIndexSet.find(i) == excludeIndexSet.end()) {
                c.flags.push_back(flags[i]);
                c.ids.push_back(ids[i]);
                c.patterns.push_back(patterns[i]);
                c.originals.push_back(originals[i]);
            }
        }
        return c;
    }
};

static
void usage(const char *) {
    cerr << "Usage:" << endl << endl;
    cerr << "  patbench [-n repeats] [ -G generations] [ -C criterion ]" << endl
         << "           [ -F factor_group_size ] [ -N | -S ] "
         << "<pattern file> <pcap file>" << endl << endl
         << "    -n repeats sets the number of times the PCAP is repeatedly "
            "scanned" << endl << "       with the pattern." << endl
         << "    -G generations sets the number of generations that the "
            "algorithm is" << endl << "       run for." << endl
         << "    -N sets non-streaming mode, -S sets streaming mode (default)."
         << endl << "    -F sets the factor group size (must be >0); this "
                    "allows the detection" << endl
         << "       of multiple interacting factors." << endl << "" << endl
         << "    -C sets the 'criterion', which can be either:" << endl
         << "         t  throughput (the default) - this requires a pcap file"
         << endl << "         r  scratch size" << endl
         << "         s  stream state size" << endl
         << "         c  compile time" << endl << "         b  bytecode size"
         << endl << endl
         << "We recommend the use of a utility like 'taskset' on "
            "multiprocessor hosts to" << endl
         << "lock execution to a single processor: this will remove processor "
            "migration" << endl
         << "by the scheduler as a source of noise in the results." << endl;
}

static
double measure_stream_time(Benchmark &bench, unsigned int repeatCount) {
    Clock clock;
    bench.clearMatches();
    clock.start();
    for (unsigned int i = 0; i < repeatCount; i++) {
        bench.openStreams();
        bench.scanStreams();
        bench.closeStreams();
    }
    clock.stop();
    double secsScan = clock.seconds();
    return secsScan;
}

static
double measure_block_time(Benchmark &bench, unsigned int repeatCount) {
    Clock clock;
    bench.clearMatches();
    clock.start();
    for (unsigned int i = 0; i < repeatCount; i++) {
        bench.scanBlock();
    }
    clock.stop();
    double secsScan = clock.seconds();
    return secsScan;
}

static
double eval_set(Benchmark &bench, Sigdata &sigs, unsigned int mode,
                unsigned repeatCount, Criterion criterion,
                bool diagnose = true) {
    double compileTime = 0;
    bench.setDatabase(sigs.compileDatabase(mode, &compileTime));

    switch (criterion) {
    case CRITERION_BYTECODE_SIZE: {
        size_t dbSize;
        hs_error_t err = hs_database_size(bench.getDatabase(), &dbSize);
        if (err != HS_SUCCESS) {
            cerr << "ERROR: could not retrieve bytecode size" << endl;
            exit(1);
        }
        return dbSize;
    }
    case CRITERION_COMPILE_TIME:
        return compileTime;
    case CRITERION_STREAM_STATE: {
        size_t streamStateSize;
        hs_error_t err = hs_stream_size(bench.getDatabase(), &streamStateSize);
        if (err != HS_SUCCESS) {
            cerr << "ERROR: could not retrieve stream state size" << endl;
            exit(1);
        }
        return streamStateSize;
    }
    case CRITERION_SCRATCH_SIZE:
        return bench.getScratchSize();
    case CRITERION_THROUGHPUT:
    default:
        break; // do nothing - we are THROUGHPUT
    }
    double scan_time;
    if (mode == HS_MODE_NOSTREAM) {
        scan_time = measure_block_time(bench, repeatCount);
    } else {
        scan_time = measure_stream_time(bench, repeatCount);
    }
    size_t bytes = bench.bytes();
    size_t matches = bench.matches();
    if (diagnose) {
        std::ios::fmtflags f(cout.flags());
        cout << "Scan time " << std::fixed << std::setprecision(3) << scan_time
             << " sec, Scanned " << bytes * repeatCount << " bytes, Throughput "
             << std::fixed << std::setprecision(3)
             << (bytes * 8 * repeatCount) / (scan_time * 1000000)
             << " Mbps, Matches " << matches << endl;
        cout.flags(f);
    }
    return (bytes * 8 * repeatCount) / (scan_time * 1000000);
}

// Main entry point.
int main(int argc, char **argv) {
    unsigned int repeatCount = 1;
    unsigned int mode = HS_MODE_STREAM;
    Criterion criterion = CRITERION_THROUGHPUT;
    unsigned int gen_max = 10;
    unsigned int factor_max = 1;
    // Process command line arguments.
    int opt;
    while ((opt = getopt(argc, argv, "SNn:G:F:C:")) != -1) {
        switch (opt) {
        case 'F':
            factor_max = atoi(optarg);
            break;
        case 'G':
            gen_max = atoi(optarg);
            break;
        case 'S':
            mode = HS_MODE_STREAM;
            break;
        case 'N':
            mode = HS_MODE_NOSTREAM;
            break;
        case 'C':
            switch (optarg[0]) {
            case 't':
                criterion = CRITERION_THROUGHPUT;
                break;
            case 'b':
                criterion = CRITERION_BYTECODE_SIZE;
                break;
            case 'c':
                criterion = CRITERION_COMPILE_TIME;
                break;
            case 's':
                criterion = CRITERION_STREAM_STATE;
                break;
            case 'r':
                criterion = CRITERION_SCRATCH_SIZE;
                break;
            default:
                cerr << "Unrecognised criterion: " << optarg[0] << endl;
                usage(argv[0]);
                exit(-1);
            }
            break;
        case 'n':
            repeatCount = atoi(optarg);
            break;
        default:
            usage(argv[0]);
            exit(-1);
        }
    }

    if (argc - optind != ((criterion == CRITERION_THROUGHPUT) ? 2 : 1)) {
        usage(argv[0]);
        exit(-1);
    }

    const char *patternFile = argv[optind];
    const char *pcapFile = argv[optind + 1];

    // Read our input PCAP file in
    Benchmark bench;
    if (criterion == CRITERION_THROUGHPUT) {
        if (!bench.readStreams(pcapFile)) {
            cerr << "Unable to read packets from PCAP file. Exiting." << endl;
            exit(-1);
        }
    }

    if ((criterion == CRITERION_STREAM_STATE) && (mode != HS_MODE_STREAM)) {
        cerr << "Cannot evaluate stream state for block mode compile. Exiting."
             << endl;
        exit(-1);
    }

    cout << "Base signatures: " << patternFile;
    if (pcapFile) {
        cout << "\tPCAP input file: " << pcapFile
             << "\tRepeat count: " << repeatCount;
    }
    if (mode == HS_MODE_STREAM) {
        cout << "\tMode: streaming";
    } else {
        cout << "\tMode: block";
    }
    cout << endl;

    Sigdata sigs(patternFile);

    // calculate and show a baseline
    eval_set(bench, sigs, mode, repeatCount, criterion);

    set<unsigned> work_sigs, exclude;

    for (unsigned i = 0; i < sigs.size(); ++i) {
        work_sigs.insert(i);
    }

    double score_base =
        eval_set(bench, sigs, mode, repeatCount, criterion, false);
    bool maximize = higher_is_better(criterion);
    cout << "Number of signatures: " << sigs.size() << endl;
    cout << "Base performance: ";
    print_criterion(criterion, score_base);
    cout << endl;

    unsigned generations = min(gen_max, (sigs.size() - 1) / factor_max);

    cout << "Cutting signatures cumulatively for " << generations
         << " generations" << endl;
    for (unsigned gen = 0; gen < generations; ++gen) {
        cout << "Generation " << gen << " ";
        set<unsigned> s(work_sigs.begin(), work_sigs.end());
        double best = maximize ? 0 : 1000000000000.0;
        unsigned count = 0;
        while (s.size() > factor_max) {
            count++;
            cout << "." << std::flush;
            vector<unsigned> sv(s.begin(), s.end());
            random_device rng;
            mt19937 urng(rng());
            shuffle(sv.begin(), sv.end(), urng);
            unsigned groups = factor_max + 1;
            for (unsigned current_group = 0; current_group < groups;
                 current_group++) {
                unsigned sz = sv.size();
                unsigned lo = (current_group * sz) / groups;
                unsigned hi = ((current_group + 1) * sz) / groups;

                set<unsigned> s_part1(sv.begin(), sv.begin() + lo);
                set<unsigned> s_part2(sv.begin() + hi, sv.end());
                set<unsigned> s_tmp = s_part1;
                s_tmp.insert(s_part2.begin(), s_part2.end());
                set<unsigned> tmp = s_tmp;
                tmp.insert(exclude.begin(), exclude.end());
                Sigdata sigs_tmp = sigs.cloneExclude(tmp);
                double score = eval_set(bench, sigs_tmp, mode, repeatCount,
                                        criterion, false);

                if ((current_group == 0) ||
                    (!maximize ? (score < best) : (score > best))) {
                    s = s_tmp;
                    best = score;
                }
            }
        }
        for (unsigned i = count; i < 16; i++) {
            cout << " ";
        }
        std::ios::fmtflags out_f(cout.flags());
        cout << "Performance: ";
        print_criterion(criterion, best);
        cout << " (" << std::fixed << std::setprecision(3) << (best / score_base)
             << "x) after cutting:" << endl;
        cout.flags(out_f);

        // s now has factor_max signatures
        for (const auto &found : s) {
            exclude.insert(found);
            work_sigs.erase(found);
            cout << sigs.get_original(found) << endl;
        }

        cout << endl;
    }
    return 0;
}

/**
 * Helper function to locate the offset of the first byte of the payload in the
 * given ethernet frame. Offset into the packet, and the length of the payload
 * are returned in the arguments @a offset and @a length.
 */
static
bool payloadOffset(const unsigned char *pkt_data, unsigned int *offset,
                   unsigned int *length) {
    const ip *iph = (const ip *)(pkt_data + sizeof(ether_header));
    const tcphdr *th = nullptr;

    // Ignore packets that aren't IPv4
    if (iph->ip_v != 4) {
        return false;
    }

    // Ignore fragmented packets.
    if (iph->ip_off & htons(IP_MF | IP_OFFMASK)) {
        return false;
    }

    // IP header length, and transport header length.
    unsigned int ihlen = iph->ip_hl * 4;
    unsigned int thlen = 0;

    switch (iph->ip_p) {
    case IPPROTO_TCP:
        th = (const tcphdr *)((const char *)iph + ihlen);
        thlen = th->th_off * 4;
        break;
    case IPPROTO_UDP:
        thlen = sizeof(udphdr);
        break;
    default:
        return false;
    }

    *offset = sizeof(ether_header) + ihlen + thlen;
    *length = sizeof(ether_header) + ntohs(iph->ip_len) - *offset;

    return *length != 0;
}

static unsigned parseFlags(const string &flagsStr) {
    unsigned flags = 0;
    for (const auto &c : flagsStr) {
        switch (c) {
        case 'i':
            flags |= HS_FLAG_CASELESS; break;
        case 'm':
            flags |= HS_FLAG_MULTILINE; break;
        case 's':
            flags |= HS_FLAG_DOTALL; break;
        case 'H':
            flags |= HS_FLAG_SINGLEMATCH; break;
        case 'V':
            flags |= HS_FLAG_ALLOWEMPTY; break;
        case '8':
            flags |= HS_FLAG_UTF8; break;
        case 'W':
            flags |= HS_FLAG_UCP; break;
        case '\r': // stray carriage-return
            break;
        default:
            cerr << "Unsupported flag \'" << c << "\'" << endl;
            exit(-1);
        }
    }
    return flags;
}

static void parseFile(const char *filename, vector<string> &patterns,
                      vector<unsigned> &flags, vector<unsigned> &ids,
                      vector<string> &originals) {
    ifstream inFile(filename);
    if (!inFile.good()) {
        cerr << "ERROR: Can't open pattern file \"" << filename << "\"" << endl;
        exit(-1);
    }

    for (unsigned i = 1; !inFile.eof(); ++i) {
        string line;
        getline(inFile, line);

        // if line is empty, or a comment, we can skip it
        if (line.empty() || line[0] == '#') {
            continue;
        }

        // otherwise, it should be ID:PCRE, e.g.
        //  10001:/foobar/is

        size_t colonIdx = line.find_first_of(':');
        if (colonIdx == string::npos) {
            cerr << "ERROR: Could not parse line " << i << endl;
            exit(-1);
        }

        // we should have an unsigned int as an ID, before the colon
        unsigned id = std::stoi(line.substr(0, colonIdx).c_str());

        // rest of the expression is the PCRE
        const string expr(line.substr(colonIdx + 1));

        size_t flagsStart = expr.find_last_of('/');
        if (flagsStart == string::npos) {
            cerr << "ERROR: no trailing '/' char" << endl;
            exit(-1);
        }

        string pcre(expr.substr(1, flagsStart - 1));
        string flagsStr(expr.substr(flagsStart + 1, expr.size() - flagsStart));
        unsigned flag = parseFlags(flagsStr);

        originals.push_back(line);
        patterns.push_back(pcre);
        flags.push_back(flag);
        ids.push_back(id);
    }
}