Initial commit of Hyperscan

This commit is contained in:
Matthew Barr
2015-10-20 09:13:35 +11:00
commit 904e436f11
610 changed files with 213627 additions and 0 deletions

259
src/som/slot_manager.cpp Normal file
View File

@@ -0,0 +1,259 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief SOM Slot Manager.
*/
#include "slot_manager.h"
#include "slot_manager_internal.h"
#include "ue2common.h"
#include "nfagraph/ng_holder.h"
#include "nfagraph/ng_is_equal.h"
#include "nfagraph/ng_som_util.h"
#include "nfagraph/ng_region.h"
#include "util/charreach.h"
#include "util/make_unique.h"
#include "util/dump_charclass.h"
#include "util/verify_types.h"
#include <cassert>
#include <deque>
#include <utility>
#include <boost/functional/hash/hash.hpp>
using namespace std;
namespace ue2 {
/** \brief Define this to disable the cache and have everyone get their own
* SOM slot. */
//#define NO_SLOT_CACHING
SlotCacheEntry::SlotCacheEntry(const NGHolder &prefix_in,
const CharReach &escapes_in, u32 parent_in,
bool is_reset_in, u32 slot_in)
: prefix(cloneHolder(prefix_in)), escapes(escapes_in),
parent_slot(parent_in), is_reset(is_reset_in), slot(slot_in) {}
size_t SlotEntryHasher::operator()(const SlotCacheEntry &e) const {
assert(e.prefix);
using boost::hash_combine;
size_t v = 0;
hash_combine(v, hash_holder(*e.prefix));
hash_combine(v, e.parent_slot);
hash_combine(v, e.is_reset);
hash_combine(v, e.escapes.hash());
DEBUG_PRINTF("%zu vertices, parent_slot=%u, escapes=%s, is_reset=%d "
"hashes to %zx\n", num_vertices(*e.prefix), e.parent_slot,
describeClass(e.escapes, 10, CC_OUT_TEXT).c_str(),
(int)e.is_reset, v);
return v;
}
bool SlotEntryEqual::operator()(const SlotCacheEntry &a,
const SlotCacheEntry &b) const {
assert(a.prefix);
assert(b.prefix);
return a.parent_slot == b.parent_slot
&& a.is_reset == b.is_reset
&& a.escapes == b.escapes
&& is_equal(*a.prefix, *b.prefix);
// NOTE: slot not compared.
}
void SlotCache::insert(const NGHolder &prefix, const CharReach &escapes,
u32 parent_slot, bool is_reset, u32 slot) {
store.emplace(prefix, escapes, parent_slot, is_reset, slot);
}
const SlotCacheEntry *SlotCache::find(const NGHolder &prefix,
const CharReach &escapes, u32 parent_slot,
bool is_reset) {
SlotCacheEntry entry(prefix, escapes, parent_slot, is_reset,
0 /* unused for searching with SlotEntryEqual */);
CacheStore::const_iterator it = store.find(entry);
if (it != store.end()) {
return &(*it);
}
return nullptr;
}
SomSlotManager::SomSlotManager(u8 p)
: nextSomSlot(0), cache(ue2::make_unique<SlotCache>()), historyRequired(0),
precision(p) {}
SomSlotManager::~SomSlotManager() { }
u32 SomSlotManager::getSomSlot(const NGHolder &prefix,
const CharReach &escapes, bool is_reset,
u32 parent_slot) {
assert(parent_slot == NO_PARENT || parent_slot < nextSomSlot);
DEBUG_PRINTF("prefix with %zu vertices, parent_slot=%u\n",
num_vertices(prefix), parent_slot);
DEBUG_PRINTF("nextSomSlot=%u\n", nextSomSlot);
#ifdef NO_SLOT_CACHING
return nextSomSlot++;
#endif
const SlotCacheEntry *entry =
cache->find(prefix, escapes, parent_slot, is_reset);
if (entry) {
DEBUG_PRINTF("cache hit: slot %u\n", entry->slot);
return entry->slot;
}
DEBUG_PRINTF("cache miss: handing out new slot %u\n", nextSomSlot);
cache->insert(prefix, escapes, parent_slot, is_reset, nextSomSlot);
return nextSomSlot++;
}
u32 SomSlotManager::getInitialResetSomSlot(const NGHolder &prefix,
const NGHolder &g,
const ue2::unordered_map<NFAVertex, u32> &region_map,
u32 last_sent_region, bool *prefix_already_implemented) {
DEBUG_PRINTF("getting initial reset; last sent region %u\n",
last_sent_region);
assert(last_sent_region);
assert(!hasBigCycles(prefix));
*prefix_already_implemented = false;
#ifdef NO_SLOT_CACHING
return nextSomSlot++;
#endif
shared_ptr<const NGHolder> pp = cloneHolder(prefix);
assert(hash_holder(*pp) == hash_holder(prefix));
auto hs_it = cache->initial_prefixes.find(pp);
if (hs_it != cache->initial_prefixes.end()) {
DEBUG_PRINTF("pulling from cache\n");
pp = *hs_it;
} else {
DEBUG_PRINTF("storing in cache entry %zu, hash=%llu\n",
cache->initial_prefixes.size(), hash_holder(*pp));
cache->initial_prefixes.insert(pp);
}
// Clone a copy of g (and its region map) that we will be able to store
// later on.
shared_ptr<NGHolder> gg = make_shared<NGHolder>();
ue2::unordered_map<NFAVertex, NFAVertex> orig_to_copy;
cloneHolder(*gg, g, &orig_to_copy);
ue2::unordered_map<NFAVertex, u32> gg_region_map;
for (const auto &m : region_map) {
assert(contains(region_map, m.first));
gg_region_map.emplace(orig_to_copy.at(m.first), m.second);
}
u32 first_bad_region = ~0U;
UNUSED bool rv = sentClearsTail(g, region_map, *pp, last_sent_region,
&first_bad_region);
assert(!rv || first_bad_region == ~0U);
InitialResetInfo *ir = nullptr;
for (auto &reset : cache->initial_resets) {
/* is this prefix already in our list? */
auto has_prefix_func =
[&pp](const InitialResetEntry &e) { return e.sent == pp; };
bool already_seen_prefix =
find_if(reset.entries.begin(), reset.entries.end(),
has_prefix_func) != reset.entries.end();
for (auto &e : reset.entries) {
u32 temp = 0;
/* we don't need to test against sentinels which are identical to
* our current one as races don't matter and we know it clears
* sufficiently. */
if (e.sent != pp &&
!sentClearsTail(g, region_map, *e.sent, last_sent_region - 1,
&temp) &&
(temp < first_bad_region || first_bad_region == ~0U)) {
goto try_next;
}
/* if we have already seen the prefix it must be fine */
if (!already_seen_prefix &&
!sentClearsTail(*e.body, e.body_regions, prefix,
e.sent_region - 1, &temp) &&
(temp < e.first_bad_region || e.first_bad_region == ~0U)) {
goto try_next;
}
}
DEBUG_PRINTF("sharing\n");
if (already_seen_prefix) {
/* if we have already created this prefix using this som slot, we
* can avoid creating another copy of the prefix. */
*prefix_already_implemented = true;
}
ir = &reset;
goto found;
try_next:;
}
cache->initial_resets.emplace_back(nextSomSlot++);
ir = &cache->initial_resets.back();
found:
ir->entries.emplace_back(pp, gg, gg_region_map, last_sent_region,
first_bad_region);
return ir->slot;
}
u32 SomSlotManager::getPrivateSomSlot(void) {
return nextSomSlot++;
}
void SomSlotManager::rollbackSomTo(u32 num) {
assert(nextSomSlot >= num);
nextSomSlot = num;
}
u32 SomSlotManager::numSomSlots() const {
return nextSomSlot;
}
u32 SomSlotManager::addRevNfa(aligned_unique_ptr<NFA> nfa, u32 maxWidth) {
u32 rv = verify_u32(rev_nfas.size());
rev_nfas.push_back(move(nfa));
// A rev nfa commits us to having enough history around to handle its
// max width.
historyRequired = max(historyRequired, maxWidth);
return rv;
}
} // namespace ue2

117
src/som/slot_manager.h Normal file
View File

@@ -0,0 +1,117 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief SOM Slot Manager.
*/
#ifndef SLOT_MANAGER_H
#define SLOT_MANAGER_H
#include "ue2common.h"
#include "nfagraph/ng_graph.h"
#include "util/alloc.h"
#include "util/ue2_containers.h"
#include <deque>
#include <memory>
#include <boost/core/noncopyable.hpp>
struct NFA;
namespace ue2 {
class CharReach;
class NGHolder;
struct Grey;
struct SlotCache;
/** \brief SOM slot manager. Used to hand out SOM slots and track their
* relationships during SOM construction. Also stores reverse NFAs used for
* SOM. */
class SomSlotManager : boost::noncopyable {
public:
explicit SomSlotManager(u8 precision);
~SomSlotManager();
/** \brief Sentinel value used to specify that a slot has no parent. */
static constexpr u32 NO_PARENT = ~0;
u32 getSomSlot(const NGHolder &prefix, const CharReach &escapes,
bool is_reset, u32 parent_slot);
/** prefix must be acting as a resetting sentinel and should be a dag (if
* not how are we establish som?) */
u32 getInitialResetSomSlot(const NGHolder &prefix, const NGHolder &g,
const ue2::unordered_map<NFAVertex, u32> &region_map,
u32 last_sent_region,
bool *prefix_already_implemented);
u32 getPrivateSomSlot(void);
void rollbackSomTo(u32 num);
u32 numSomSlots() const;
const std::deque<aligned_unique_ptr<NFA>> &getRevNfas() const {
return rev_nfas;
}
u32 addRevNfa(aligned_unique_ptr<NFA> nfa, u32 maxWidth);
u32 somHistoryRequired() const { return historyRequired; }
u32 somPrecision() const { return precision; }
void somPrecision(u32 p) {
precision = p;
}
private:
u32 nextSomSlot;
std::unique_ptr<SlotCache> cache;
/** \brief Reverse NFAs used for SOM support. */
std::deque<aligned_unique_ptr<NFA>> rev_nfas;
/** \brief In streaming mode, the amount of history we've committed to
* using for SOM rev NFAs. */
u32 historyRequired;
/** \brief Number of bytes of SOM precision requested by the user, zero if
* not in SOM mode. */
u32 precision;
#ifdef DUMP_SUPPORT
friend void dumpSomSlotManager(const SomSlotManager &ssm, const Grey &grey);
#endif
};
} // namespace ue2
#endif

View File

@@ -0,0 +1,104 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "slot_manager_dump.h"
#include "slot_manager_internal.h"
#include "slot_manager.h"
#include "grey.h"
#include "nfagraph/ng_dump.h"
#include "nfagraph/ng_is_equal.h"
#include "util/container.h"
#include "ue2common.h"
#include <map>
#include <cstdio>
#include <string>
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
using namespace std;
namespace ue2 {
void dumpSomSlotManager(const SomSlotManager &ssm, const Grey &grey) {
if (!grey.dumpFlags) {
return;
}
string filename = grey.dumpPath + "/ssm.txt";
map<u32, const SlotCacheEntry *> by_slot;
map<u32, const InitialResetInfo *> by_slot_ir;
for (const auto &e : ssm.cache->store) {
by_slot[e.slot] = &e;
}
for (const auto &e : ssm.cache->initial_resets) {
by_slot_ir[e.slot] = &e;
}
FILE *f = fopen(filename.c_str(), "w");
fprintf(f, "slot width %u bytes\n\n", ssm.precision);
if (by_slot.empty()) {
fprintf(f, "<no som slots>\n");
}
for (u32 i = 0; i < ssm.numSomSlots(); i++) {
fprintf(f, "%u", i);
if (contains(by_slot_ir, i)) {
const InitialResetInfo &ir = *by_slot_ir[i];
fprintf(f, "\t shared reset (users = %zu)\n", ir.entries.size());
} else if (contains(by_slot, i)) {
const SlotCacheEntry &ce = *by_slot.at(i);
if (ce.parent_slot != SomSlotManager::NO_PARENT) {
fprintf(f, "\tparent:%u", ce.parent_slot);
}
if (ce.is_reset) {
fprintf(f, "\treset");
}
fprintf(f, "\n");
} else {
fprintf(f, "\t<private>\n");
}
}
fclose(f);
for (const auto &h : ssm.cache->initial_prefixes) {
dumpHolder(*h, hash_holder(*h), "ssm_prefix", grey);
}
}
} // namespace ue2

View File

@@ -0,0 +1,51 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SLOT_MANAGER_DUMP_H
#define SLOT_MANAGER_DUMP_H
namespace ue2 {
class SomSlotManager;
struct Grey;
#ifdef DUMP_SUPPORT
void dumpSomSlotManager(const SomSlotManager &ssm, const Grey &grey);
#else
static inline UNUSED
void dumpSomSlotManager(const SomSlotManager &, const Grey &) {
}
#endif
} // namespace ue2
#endif

View File

@@ -0,0 +1,106 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SLOT_MANAGER_INTERNAL_H
#define SLOT_MANAGER_INTERNAL_H
#include "nfagraph/ng.h"
#include "nfagraph/ng_is_equal.h"
#include "util/charreach.h"
#include "util/ue2_containers.h"
#include "ue2common.h"
#include <memory>
#include <vector>
namespace ue2 {
struct InitialResetEntry {
InitialResetEntry(std::shared_ptr<const NGHolder> sent_in,
std::shared_ptr<const NGHolder> body_in,
const ue2::unordered_map<NFAVertex, u32> &body_regions_in,
u32 sent_region_in, u32 first_bad_region_in)
: sent(sent_in), body(body_in), body_regions(body_regions_in),
sent_region(sent_region_in), first_bad_region(first_bad_region_in) {}
std::shared_ptr<const NGHolder> sent;
std::shared_ptr<const NGHolder> body;
ue2::unordered_map<NFAVertex, u32> body_regions;
u32 sent_region;
u32 first_bad_region; /* ~0U if it must cover the whole g */
};
struct InitialResetInfo {
explicit InitialResetInfo(u32 slot_in) : slot(slot_in) {}
std::vector<InitialResetEntry> entries;
u32 slot;
};
struct SlotCacheEntry {
// We store our own copy of the prefix so we control its lifetime. A
// pointer is used so that this entry can be placed in STL containers, as
// NGHolder is not copy-constructible.
SlotCacheEntry(const NGHolder &prefix_in, const CharReach &escapes_in,
u32 parent_in, bool is_reset_in, u32 slot_in);
std::unique_ptr<const NGHolder> prefix;
CharReach escapes;
u32 parent_slot;
bool is_reset;
u32 slot;
};
struct SlotEntryHasher {
size_t operator()(const SlotCacheEntry &e) const;
};
struct SlotEntryEqual {
bool operator()(const SlotCacheEntry &a, const SlotCacheEntry &b) const;
};
struct SlotCache {
typedef ue2::unordered_set<SlotCacheEntry, SlotEntryHasher,
SlotEntryEqual> CacheStore;
void insert(const NGHolder &prefix, const CharReach &escapes,
u32 parent_slot, bool is_reset, u32 slot);
const SlotCacheEntry *find(const NGHolder &prefix, const CharReach &escapes,
u32 parent_slot, bool is_reset);
CacheStore store;
ue2::unordered_set<std::shared_ptr<const NGHolder>, NGHolderHasher,
NGHolderEqual> initial_prefixes;
std::vector<InitialResetInfo> initial_resets;
};
} // namespace ue2
#endif

42
src/som/som.h Normal file
View File

@@ -0,0 +1,42 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Common SOM definitions.
*/
#ifndef UE2_SOM_H
#define UE2_SOM_H
/** \brief Enumeration specifying a start of match behaviour. */
enum som_type {
SOM_NONE, //!< No SOM required
SOM_LEFT //!< Exact leftmost SOM
};
#endif // UE2_SOM_H

534
src/som/som_runtime.c Normal file
View File

@@ -0,0 +1,534 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief SOM runtime code.
*
*
* Runtime code for SOM handling called by the Rose callback adaptors.
*
* Note:
* Races between escapes making a som loc writeable and attempts to write to it
* at the same to_offset are always resolved as if the escape arrived first
* and then the request to write to that location.
*/
#include "hs_internal.h"
#include "som_runtime.h"
#include "scratch.h"
#include "ue2common.h"
#include "rose/rose_internal.h"
#include "nfa/nfa_api.h"
#include "nfa/nfa_internal.h"
#include "util/fatbit.h"
#include "util/multibit.h"
#include "util/internal_report.h"
static really_inline
void setSomLoc(struct fatbit *som_set_now, u64a *som_store, u32 som_store_count,
const struct internal_report *ri, u64a to_offset) {
/* validity handled by callers */
assert(to_offset >= ri->aux.somDistance);
u64a start_offset = to_offset - ri->aux.somDistance;
u32 som_loc = ri->onmatch;
/* resolve any races for matches at this point in favour of the earliest som
*/
if (!fatbit_set(som_set_now, som_store_count, som_loc)) {
som_store[som_loc] = start_offset;
} else {
LIMIT_TO_AT_MOST(&som_store[som_loc], start_offset);
}
DEBUG_PRINTF("som_store[%u] set to %llu\n", som_loc, som_store[som_loc]);
}
static really_inline
char ok_and_mark_if_write(u8 *som_store_valid, struct fatbit *som_set_now,
u8 *som_store_writable, u32 som_store_count,
u32 loc) {
return !mmbit_set(som_store_valid, som_store_count, loc) /* unwritten */
|| fatbit_isset(som_set_now, som_store_count, loc) /* write here, need
* to resolve race */
|| mmbit_isset(som_store_writable, som_store_count, loc); /* writable */
}
static really_inline
char ok_and_mark_if_unset(u8 *som_store_valid, struct fatbit *som_set_now,
u32 som_store_count, u32 loc) {
return !mmbit_set(som_store_valid, som_store_count, loc) /* unwritten */
|| fatbit_isset(som_set_now, som_store_count, loc); /* write here, need
* to resolve race */
}
static
int somRevCallback(u64a offset, ReportID id, void *ctx) {
DEBUG_PRINTF("offset=%llu, id=%u\n", offset, id);
// We use the id to store the offset adjustment (for assertions like a
// leading \b or multiline mode).
assert(id <= 1);
u64a *from_offset = ctx;
LIMIT_TO_AT_MOST(from_offset, offset + id);
return 1; // continue matching.
}
static really_inline
const struct NFA *getSomRevNFA(const struct RoseEngine *t, u32 i) {
assert(t->somRevOffsetOffset);
const u32 *rev_offsets
= (const u32 *)((const u8 *)t + t->somRevOffsetOffset);
u32 nfa_offset = rev_offsets[i];
assert(nfa_offset && nfa_offset < t->size);
const struct NFA *n = (const struct NFA *)(((const u8 *)t + nfa_offset));
assert(ISALIGNED(n));
return n;
}
static
void runRevNfa(struct hs_scratch *scratch, const struct internal_report *ri,
const u64a to_offset, u64a *from_offset) {
struct core_info *ci = &scratch->core_info;
DEBUG_PRINTF("buf has %zu bytes total, history has %zu\n",
ci->len, ci->hlen);
u32 nfa_idx = ri->aux.revNfaIndex;
DEBUG_PRINTF("run rev nfa %u from to_offset=%llu\n", nfa_idx, to_offset);
const struct NFA *nfa = getSomRevNFA(ci->rose, nfa_idx);
assert(nfa->maxWidth); // No inf width rev NFAs.
size_t buf_bytes = to_offset - ci->buf_offset;
size_t history_bytes = ci->hlen;
DEBUG_PRINTF("nfa min/max widths [%u,%u], %zu in buffer, %zu in history\n",
nfa->minWidth, nfa->maxWidth, buf_bytes, history_bytes);
assert(nfa->minWidth <= buf_bytes + history_bytes);
const u8 *buf = ci->buf;
const u8 *hbuf = ci->hbuf;
// Work out if we need to scan any history as well.
if (history_bytes && buf_bytes < nfa->maxWidth) {
assert(hbuf);
size_t remainder = nfa->maxWidth - buf_bytes;
if (remainder < history_bytes) {
hbuf += history_bytes - remainder;
history_bytes = remainder;
}
}
DEBUG_PRINTF("scanning %zu from buffer and %zu from history\n", buf_bytes,
history_bytes);
*from_offset = to_offset;
nfaBlockExecReverse(nfa, to_offset, buf, buf_bytes, hbuf, history_bytes,
scratch, somRevCallback, from_offset);
assert(*from_offset <= to_offset);
}
static really_inline
void setSomLocRevNfa(struct hs_scratch *scratch, struct fatbit *som_set_now,
u64a *som_store, u32 som_store_count,
const struct internal_report *ri, u64a to_offset) {
/* validity handled by callers */
u64a from_offset = 0;
runRevNfa(scratch, ri, to_offset, &from_offset);
u32 som_loc = ri->onmatch;
/* resolve any races for matches at this point in favour of the earliest som
*/
if (!fatbit_set(som_set_now, som_store_count, som_loc)) {
som_store[som_loc] = from_offset;
} else {
LIMIT_TO_AT_MOST(&som_store[som_loc], from_offset);
}
DEBUG_PRINTF("som_store[%u] set to %llu\n", som_loc, som_store[som_loc]);
}
void handleSomInternal(struct hs_scratch *scratch,
const struct internal_report *ri, const u64a to_offset) {
assert(scratch);
assert(ri);
DEBUG_PRINTF("-->som action required at %llu\n", to_offset);
// SOM handling at scan time operates on data held in scratch. In
// streaming mode, this data is read from / written out to stream state at
// stream write boundaries.
struct core_info *ci = &scratch->core_info;
const struct RoseEngine *rose = ci->rose;
assert(rose->hasSom);
const u32 som_store_count = rose->somLocationCount;
u8 *som_store_valid = (u8 *)ci->state + rose->stateOffsets.somValid;
u8 *som_store_writable = (u8 *)ci->state + rose->stateOffsets.somWritable;
struct fatbit *som_set_now = scratch->som_set_now;
struct fatbit *som_attempted_set = scratch->som_attempted_set;
u64a *som_store = scratch->som_store;
u64a *som_failed_store = scratch->som_attempted_store;
if (to_offset != scratch->som_set_now_offset) {
assert(scratch->som_set_now_offset == ~0ULL
|| to_offset > scratch->som_set_now_offset);
DEBUG_PRINTF("setting som_set_now_offset=%llu\n", to_offset);
fatbit_clear(som_set_now);
fatbit_clear(som_attempted_set);
scratch->som_set_now_offset = to_offset;
}
switch (ri->type) {
case INTERNAL_SOM_LOC_SET:
DEBUG_PRINTF("INTERNAL_SOM_LOC_SET\n");
mmbit_set(som_store_valid, som_store_count, ri->onmatch);
setSomLoc(som_set_now, som_store, som_store_count, ri, to_offset);
return;
case INTERNAL_SOM_LOC_SET_IF_UNSET:
DEBUG_PRINTF("INTERNAL_SOM_LOC_SET_IF_UNSET\n");
if (ok_and_mark_if_unset(som_store_valid, som_set_now, som_store_count,
ri->onmatch)) {
setSomLoc(som_set_now, som_store, som_store_count, ri, to_offset);
}
return;
case INTERNAL_SOM_LOC_SET_IF_WRITABLE: {
u32 slot = ri->onmatch;
DEBUG_PRINTF("INTERNAL_SOM_LOC_SET_IF_WRITABLE\n");
if (ok_and_mark_if_write(som_store_valid, som_set_now,
som_store_writable, som_store_count, slot)) {
setSomLoc(som_set_now, som_store, som_store_count, ri, to_offset);
mmbit_unset(som_store_writable, som_store_count, slot);
} else {
/* not writable, stash as an attempted write in case we are
* racing our escape. */
DEBUG_PRINTF("not writable, stashing attempt\n");
assert(to_offset >= ri->aux.somDistance);
u64a start_offset = to_offset - ri->aux.somDistance;
if (!fatbit_set(som_attempted_set, som_store_count, slot)) {
som_failed_store[slot] = start_offset;
} else {
LIMIT_TO_AT_MOST(&som_failed_store[slot], start_offset);
}
DEBUG_PRINTF("som_failed_store[%u] = %llu\n", slot,
som_failed_store[slot]);
}
return;
}
case INTERNAL_SOM_LOC_SET_SOM_REV_NFA:
DEBUG_PRINTF("INTERNAL_SOM_LOC_SET_SOM_REV_NFA\n");
mmbit_set(som_store_valid, som_store_count, ri->onmatch);
setSomLocRevNfa(scratch, som_set_now, som_store, som_store_count, ri,
to_offset);
return;
case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET:
DEBUG_PRINTF("INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET\n");
if (ok_and_mark_if_unset(som_store_valid, som_set_now, som_store_count,
ri->onmatch)) {
setSomLocRevNfa(scratch, som_set_now, som_store, som_store_count,
ri, to_offset);
}
return;
case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: {
u32 slot = ri->onmatch;
DEBUG_PRINTF("INTERNAL_SOM_LOC_SET_IF_WRITABLE\n");
if (ok_and_mark_if_write(som_store_valid, som_set_now,
som_store_writable, som_store_count, slot)) {
setSomLocRevNfa(scratch, som_set_now, som_store, som_store_count,
ri, to_offset);
mmbit_unset(som_store_writable, som_store_count, slot);
} else {
/* not writable, stash as an attempted write in case we are
* racing our escape. */
DEBUG_PRINTF("not writable, stashing attempt\n");
u64a from_offset = 0;
runRevNfa(scratch, ri, to_offset, &from_offset);
if (!fatbit_set(som_attempted_set, som_store_count, slot)) {
som_failed_store[slot] = from_offset;
} else {
LIMIT_TO_AT_MOST(&som_failed_store[slot], from_offset);
}
DEBUG_PRINTF("som_failed_store[%u] = %llu\n", slot,
som_failed_store[slot]);
}
return;
}
case INTERNAL_SOM_LOC_COPY: {
u32 slot_in = ri->aux.somDistance;
u32 slot_out = ri->onmatch;
DEBUG_PRINTF("INTERNAL_SOM_LOC_COPY S[%u] = S[%u]\n", slot_out,
slot_in);
assert(mmbit_isset(som_store_valid, som_store_count, slot_in));
mmbit_set(som_store_valid, som_store_count, slot_out);
fatbit_set(som_set_now, som_store_count, slot_out);
som_store[slot_out] = som_store[slot_in];
return;
}
case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: {
u32 slot_in = ri->aux.somDistance;
u32 slot_out = ri->onmatch;
DEBUG_PRINTF("INTERNAL_SOM_LOC_COPY_IF_WRITABLE S[%u] = S[%u]\n",
slot_out, slot_in);
assert(mmbit_isset(som_store_valid, som_store_count, slot_in));
if (ok_and_mark_if_write(som_store_valid, som_set_now,
som_store_writable, som_store_count,
slot_out)) {
DEBUG_PRINTF("copy, set som_store[%u]=%llu\n", slot_out,
som_store[slot_in]);
som_store[slot_out] = som_store[slot_in];
fatbit_set(som_set_now, som_store_count, slot_out);
mmbit_unset(som_store_writable, som_store_count, slot_out);
} else {
/* not writable, stash as an attempted write in case we are
* racing our escape */
DEBUG_PRINTF("not writable, stashing attempt\n");
fatbit_set(som_attempted_set, som_store_count, slot_out);
som_failed_store[slot_out] = som_store[slot_in];
DEBUG_PRINTF("som_failed_store[%u] = %llu\n", slot_out,
som_failed_store[slot_out]);
}
return;
}
case INTERNAL_SOM_LOC_MAKE_WRITABLE: {
u32 slot = ri->onmatch;
DEBUG_PRINTF("INTERNAL_SOM_LOC_MAKE_WRITABLE\n");
/* if just written to the loc, ignore the racing escape */
if (fatbit_isset(som_set_now, som_store_count, slot)) {
DEBUG_PRINTF("just written\n");
return;
}
if (fatbit_isset(som_attempted_set, som_store_count, slot)) {
/* writes were waiting for an escape to arrive */
DEBUG_PRINTF("setting som_store[%u] = %llu from "
"som_failed_store[%u]\n", slot, som_failed_store[slot],
slot);
som_store[slot] = som_failed_store[slot];
fatbit_set(som_set_now, som_store_count, slot);
return;
}
mmbit_set(som_store_writable, som_store_count, slot);
return;
}
default:
DEBUG_PRINTF("unknown report type!\n");
break;
}
// All valid internal_report types should be handled and returned above.
assert(0);
return;
}
// Returns the SOM offset.
u64a handleSomExternal(struct hs_scratch *scratch,
const struct internal_report *ri,
const u64a to_offset) {
assert(scratch);
assert(ri);
// SOM handling at scan time operates on data held in scratch. In
// streaming mode, this data is read from / written out to stream state at
// stream write boundaries.
struct core_info *ci = &scratch->core_info;
const struct RoseEngine *rose = ci->rose;
assert(rose->hasSom);
switch (ri->type) {
case EXTERNAL_CALLBACK_SOM_REL:
DEBUG_PRINTF("EXTERNAL_CALLBACK_SOM_REL: som is %llu chars back\n",
ri->aux.somDistance);
assert(to_offset >= ri->aux.somDistance);
return to_offset - ri->aux.somDistance;
case EXTERNAL_CALLBACK_SOM_ABS:
DEBUG_PRINTF("EXTERNAL_CALLBACK_SOM_ABS: som is at %llu\n",
ri->aux.somDistance);
assert(to_offset >= ri->aux.somDistance);
return ri->aux.somDistance;
case EXTERNAL_CALLBACK_SOM_STORED: {
const u64a *som_store = scratch->som_store;
u32 slot = ri->aux.somDistance;
DEBUG_PRINTF("EXTERNAL_CALLBACK_SOM_STORED: <- som_store[%u]=%llu\n",
slot, som_store[slot]);
UNUSED const u32 som_store_count = rose->somLocationCount;
UNUSED const u8 *som_store_valid = (u8 *)ci->state
+ rose->stateOffsets.somValid;
assert(mmbit_isset(som_store_valid, som_store_count, slot));
return som_store[slot];
}
case EXTERNAL_CALLBACK_SOM_REV_NFA: {
DEBUG_PRINTF("EXTERNAL_CALLBACK_REV_NFA\n");
u64a from_offset = 0;
runRevNfa(scratch, ri, to_offset, &from_offset);
return from_offset;
}
default:
DEBUG_PRINTF("unknown report type!\n");
break;
}
// All valid internal_report types should be handled and returned above.
assert(0);
return 0;
}
void setSomFromSomAware(struct hs_scratch *scratch,
const struct internal_report *ri, u64a from_offset,
u64a to_offset) {
assert(scratch);
assert(ri);
assert(to_offset);
assert(ri->type == INTERNAL_SOM_LOC_SET_FROM
|| ri->type == INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE);
struct core_info *ci = &scratch->core_info;
const struct RoseEngine *rose = ci->rose;
assert(rose->hasSom);
const u32 som_store_count = rose->somLocationCount;
u8 *som_store_valid = (u8 *)ci->state + rose->stateOffsets.somValid;
u8 *som_store_writable = (u8 *)ci->state + rose->stateOffsets.somWritable;
struct fatbit *som_set_now = scratch->som_set_now;
struct fatbit *som_attempted_set = scratch->som_attempted_set;
u64a *som_store = scratch->som_store;
u64a *som_failed_store = scratch->som_attempted_store;
if (to_offset != scratch->som_set_now_offset) {
DEBUG_PRINTF("setting som_set_now_offset=%llu\n", to_offset);
fatbit_clear(som_set_now);
fatbit_clear(som_attempted_set);
scratch->som_set_now_offset = to_offset;
}
if (ri->type == INTERNAL_SOM_LOC_SET_FROM) {
DEBUG_PRINTF("INTERNAL_SOM_LOC_SET_FROM\n");
mmbit_set(som_store_valid, som_store_count, ri->onmatch);
setSomLoc(som_set_now, som_store, som_store_count, ri, from_offset);
} else {
DEBUG_PRINTF("INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE\n");
if (ok_and_mark_if_write(som_store_valid, som_set_now,
som_store_writable, som_store_count,
ri->onmatch)) {
setSomLoc(som_set_now, som_store, som_store_count, ri, from_offset);
mmbit_unset(som_store_writable, som_store_count, ri->onmatch);
} else {
/* not writable, stash as an attempted write in case we are
* racing our escape. */
DEBUG_PRINTF("not writable, stashing attempt\n");
assert(to_offset >= ri->aux.somDistance);
u32 som_loc = ri->onmatch;
if (!fatbit_set(som_attempted_set, som_store_count, ri->onmatch)) {
som_failed_store[som_loc] = from_offset;
} else {
LIMIT_TO_AT_MOST(&som_failed_store[som_loc], from_offset);
}
DEBUG_PRINTF("som_failed_store[%u] = %llu\n", som_loc,
som_failed_store[som_loc]);
}
}
}
static really_inline
int clearSomLog(struct hs_scratch *scratch, u64a offset, struct fatbit *log,
const u64a *starts) {
DEBUG_PRINTF("at %llu\n", offset);
struct core_info *ci = &scratch->core_info;
const struct RoseEngine *rose = ci->rose;
const u32 dkeyCount = rose->dkeyCount;
const u32 *dkey_to_report = (const u32 *)
((const char *)rose + rose->invDkeyOffset);
u32 flags = 0;
#ifndef RELEASE_BUILD
if (scratch->deduper.current_report_offset != offset) {
flags |= HS_MATCH_FLAG_ADJUSTED;
}
#endif
for (u32 it = fatbit_iterate(log, dkeyCount, MMB_INVALID);
it != MMB_INVALID; it = fatbit_iterate(log, dkeyCount, it)) {
u64a from_offset = starts[it];
u32 onmatch = dkey_to_report[it];
int halt = ci->userCallback(onmatch, from_offset, offset, flags,
ci->userContext);
if (halt) {
return 1;
}
}
fatbit_clear(log);
return 0;
}
int flushStoredSomMatches_i(struct hs_scratch *scratch, u64a offset) {
DEBUG_PRINTF("flush som matches\n");
int halt = 0;
assert(!told_to_stop_matching(scratch));
if (scratch->deduper.current_report_offset == ~0ULL) {
/* no matches recorded yet; just need to clear the logs */
fatbit_clear(scratch->deduper.som_log[0]);
fatbit_clear(scratch->deduper.som_log[1]);
scratch->deduper.som_log_dirty = 0;
return 0;
}
/* fire any reports from the logs and clear them */
if (offset == scratch->deduper.current_report_offset + 1) {
struct fatbit *done_log = scratch->deduper.som_log[offset % 2];
u64a *done_starts = scratch->deduper.som_start_log[offset % 2];
halt = clearSomLog(scratch, scratch->deduper.current_report_offset - 1,
done_log, done_starts);
scratch->deduper.som_log_dirty >>= 1;
} else {
/* need to report both logs */
u64a f_offset = scratch->deduper.current_report_offset - 1;
u64a s_offset = scratch->deduper.current_report_offset;
struct fatbit *first_log = scratch->deduper.som_log[f_offset % 2];
u64a *first_starts = scratch->deduper.som_start_log[f_offset % 2];
struct fatbit *second_log = scratch->deduper.som_log[s_offset % 2];
u64a *second_starts = scratch->deduper.som_start_log[s_offset % 2];
halt = clearSomLog(scratch, f_offset, first_log, first_starts) ||
clearSomLog(scratch, s_offset, second_log, second_starts);
scratch->deduper.som_log_dirty = 0;
}
return halt;
}

67
src/som/som_runtime.h Normal file
View File

@@ -0,0 +1,67 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief SOM runtime code.
*
*
* Runtime code for SOM handling called by the Rose callback adaptors.
*/
#ifndef SOM_RUNTIME_H
#define SOM_RUNTIME_H
#include "scratch.h"
#include "ue2common.h"
struct internal_report;
void handleSomInternal(struct hs_scratch *scratch,
const struct internal_report *ri, const u64a to_offset);
// Returns the from_offset.
u64a handleSomExternal(struct hs_scratch *scratch,
const struct internal_report *ri, const u64a to_offset);
void setSomFromSomAware(struct hs_scratch *scratch,
const struct internal_report *ri, u64a from_offset,
u64a to_offset);
int flushStoredSomMatches_i(struct hs_scratch *scratch, u64a offset);
static really_inline
int flushStoredSomMatches(struct hs_scratch *scratch, u64a offset) {
if (scratch->deduper.som_log_dirty) {
return flushStoredSomMatches_i(scratch, offset);
} else {
return 0;
}
}
#endif // SOM_RUNTIME_H

174
src/som/som_stream.c Normal file
View File

@@ -0,0 +1,174 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief SOM streaming runtime code.
*
* Code in this file handles storing and loading SOM slot information from
* stream state.
*/
#include "scratch.h"
#include "som_stream.h"
#include "rose/rose_internal.h"
#include "util/multibit.h"
// Sentinel values stored in stream state and used to represent an SOM distance
// that is too far in the past to be stored in the available space in stream
// state.
#define SOM_SENTINEL_LARGE (~0ull)
#define SOM_SENTINEL_MEDIUM (~0u)
#define SOM_SENTINEL_SMALL ((u16)~0u)
static really_inline
void storeSomValue(void *stream_som_store, u64a som_value,
u64a stream_offset, u8 som_size) {
// Special case for sentinel value.
if (som_value == SOM_SENTINEL_LARGE) {
switch (som_size) {
case 2:
*(u16 *)stream_som_store = SOM_SENTINEL_SMALL;
break;
case 4:
*(u32 *)stream_som_store = SOM_SENTINEL_MEDIUM;
break;
case 8:
*(u64a *)stream_som_store = SOM_SENTINEL_LARGE;
break;
default:
break;
}
return;
}
assert(som_value <= stream_offset);
u64a rel_offset = stream_offset - som_value;
DEBUG_PRINTF("rel_offset=%llu\n", rel_offset);
switch (som_size) {
case 2:
rel_offset = MIN(rel_offset, SOM_SENTINEL_SMALL);
assert(ISALIGNED_N(stream_som_store, alignof(u16)));
*(u16 *)stream_som_store = rel_offset;
break;
case 4:
rel_offset = MIN(rel_offset, SOM_SENTINEL_MEDIUM);
assert(ISALIGNED_N(stream_som_store, alignof(u32)));
*(u32 *)stream_som_store = rel_offset;
break;
case 8:
assert(ISALIGNED_N(stream_som_store, alignof(u64a)));
*(u64a *)stream_som_store = rel_offset;
break;
default:
assert(0);
break;
}
}
void storeSomToStream(struct hs_scratch *scratch, const u64a offset) {
assert(scratch);
DEBUG_PRINTF("stream offset %llu\n", offset);
struct core_info *ci = &scratch->core_info;
const struct RoseEngine *rose = ci->rose;
const u32 som_store_count = rose->somLocationCount;
assert(som_store_count); // Caller should ensure that we have work to do.
u8 *som_store_valid = (u8 *)ci->state + rose->stateOffsets.somValid;
char *stream_som_store = ci->state + rose->stateOffsets.somLocation;
const u64a *som_store = scratch->som_store;
const u8 som_size = rose->somHorizon;
for (u32 i = mmbit_iterate(som_store_valid, som_store_count, MMB_INVALID);
i != MMB_INVALID;
i = mmbit_iterate(som_store_valid, som_store_count, i)) {
DEBUG_PRINTF("storing %llu in %u\n", som_store[i], i);
storeSomValue(stream_som_store + (i * som_size), som_store[i],
offset, som_size);
}
}
static really_inline
u64a loadSomValue(const void *stream_som_store, u64a stream_offset,
u8 som_size) {
u64a rel_offset;
switch (som_size) {
case 2:
assert(ISALIGNED_N(stream_som_store, alignof(u16)));
rel_offset = *(const u16 *)stream_som_store;
if (rel_offset == SOM_SENTINEL_SMALL) {
return SOM_SENTINEL_LARGE;
}
break;
case 4:
assert(ISALIGNED_N(stream_som_store, alignof(u32)));
rel_offset = *(const u32 *)stream_som_store;
if (rel_offset == SOM_SENTINEL_MEDIUM) {
return SOM_SENTINEL_LARGE;
}
break;
case 8:
assert(ISALIGNED_N(stream_som_store, alignof(u64a)));
rel_offset = *(const u64a *)stream_som_store;
break;
default:
assert(0);
rel_offset = 0;
break;
}
DEBUG_PRINTF("rel_offset=%llu\n", rel_offset);
return stream_offset - rel_offset;
}
void loadSomFromStream(struct hs_scratch *scratch, const u64a offset) {
assert(scratch);
DEBUG_PRINTF("stream offset %llu\n", offset);
struct core_info *ci = &scratch->core_info;
const struct RoseEngine *rose = ci->rose;
const u32 som_store_count = rose->somLocationCount;
assert(som_store_count); // Caller should ensure that we have work to do.
const u8 *som_store_valid = (u8 *)ci->state + rose->stateOffsets.somValid;
const char *stream_som_store = ci->state + rose->stateOffsets.somLocation;
u64a *som_store = scratch->som_store;
const u8 som_size = rose->somHorizon;
for (u32 i = mmbit_iterate(som_store_valid, som_store_count, MMB_INVALID);
i != MMB_INVALID;
i = mmbit_iterate(som_store_valid, som_store_count, i)) {
som_store[i] = loadSomValue(stream_som_store + (i*som_size), offset,
som_size);
DEBUG_PRINTF("loaded %llu from %u\n", som_store[i], i);
}
}

48
src/som/som_stream.h Normal file
View File

@@ -0,0 +1,48 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief SOM streaming runtime code.
*/
#ifndef SOM_STREAM_H
#define SOM_STREAM_H
#include "ue2common.h"
struct hs_scratch;
/** \brief Write all SOM slot information from scratch out to stream state
* (given the current stream offset). */
void storeSomToStream(struct hs_scratch *scratch, const u64a offset);
/** \brief Read all SOM slot information from stream state into scratch (given
* the current stream offset). */
void loadSomFromStream(struct hs_scratch *scratch, const u64a offset);
#endif