mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
fdr_streaming_compile: modernise loops, etc
This commit is contained in:
parent
57cd2331f5
commit
466fc940e5
@ -94,14 +94,13 @@ static
|
|||||||
bool setupLongLits(const vector<hwlmLiteral> &lits,
|
bool setupLongLits(const vector<hwlmLiteral> &lits,
|
||||||
vector<hwlmLiteral> &long_lits, size_t max_len) {
|
vector<hwlmLiteral> &long_lits, size_t max_len) {
|
||||||
long_lits.reserve(lits.size());
|
long_lits.reserve(lits.size());
|
||||||
for (vector<hwlmLiteral>::const_iterator it = lits.begin();
|
for (const auto &lit : lits) {
|
||||||
it != lits.end(); ++it) {
|
if (lit.s.length() > max_len) {
|
||||||
if (it->s.length() > max_len) {
|
hwlmLiteral tmp = lit; // copy
|
||||||
hwlmLiteral tmp = *it; // copy
|
tmp.s.pop_back();
|
||||||
tmp.s.erase(tmp.s.size() - 1, 1); // erase last char
|
|
||||||
tmp.id = 0; // recalc later
|
tmp.id = 0; // recalc later
|
||||||
tmp.groups = 0; // filled in later by hash bucket(s)
|
tmp.groups = 0; // filled in later by hash bucket(s)
|
||||||
long_lits.push_back(tmp);
|
long_lits.push_back(move(tmp));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -112,15 +111,12 @@ bool setupLongLits(const vector<hwlmLiteral> &lits,
|
|||||||
// sort long_literals by caseful/caseless and in lexicographical order,
|
// sort long_literals by caseful/caseless and in lexicographical order,
|
||||||
// remove duplicates
|
// remove duplicates
|
||||||
stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder());
|
stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder());
|
||||||
vector<hwlmLiteral>::iterator new_end =
|
auto new_end = unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
|
||||||
unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
|
|
||||||
long_lits.erase(new_end, long_lits.end());
|
long_lits.erase(new_end, long_lits.end());
|
||||||
|
|
||||||
// fill in ids; not currently used
|
// fill in ids; not currently used
|
||||||
for (vector<hwlmLiteral>::iterator i = long_lits.begin(),
|
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
|
||||||
e = long_lits.end();
|
i->id = distance(long_lits.begin(), i);
|
||||||
i != e; ++i) {
|
|
||||||
i->id = i - long_lits.begin();
|
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -143,23 +139,19 @@ void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
|||||||
hashedPositions[m] = 0;
|
hashedPositions[m] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
|
||||||
e = long_lits.end();
|
|
||||||
i != e; ++i) {
|
|
||||||
if (i->nocase) {
|
if (i->nocase) {
|
||||||
boundaries[CASEFUL] = verify_u32(i - long_lits.begin());
|
boundaries[CASEFUL] = verify_u32(distance(long_lits.begin(), i));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
for (const auto &lit : long_lits) {
|
||||||
e = long_lits.end();
|
MODES m = lit.nocase ? CASELESS : CASEFUL;
|
||||||
i != e; ++i) {
|
for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) {
|
||||||
MODES m = i->nocase ? CASELESS : CASEFUL;
|
|
||||||
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
|
|
||||||
hashedPositions[m]++;
|
hashedPositions[m]++;
|
||||||
}
|
}
|
||||||
positions[m] += i->s.size();
|
positions[m] += lit.s.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
for (u32 m = CASEFUL; m < MAX_MODES; m++) {
|
for (u32 m = CASEFUL; m < MAX_MODES; m++) {
|
||||||
@ -209,18 +201,15 @@ void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
|||||||
map<u32, deque<pair<u32, u32> > > bucketToLitOffPairs;
|
map<u32, deque<pair<u32, u32> > > bucketToLitOffPairs;
|
||||||
map<u32, u64a> bucketToBitfield;
|
map<u32, u64a> bucketToBitfield;
|
||||||
|
|
||||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
for (const auto &lit : long_lits) {
|
||||||
e = long_lits.end();
|
if ((m == CASELESS) != lit.nocase) {
|
||||||
i != e; ++i) {
|
|
||||||
const hwlmLiteral &l = *i;
|
|
||||||
if ((m == CASELESS) != i->nocase) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
|
for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) {
|
||||||
u32 h = hashLit(l, j, max_len, m);
|
u32 h = hashLit(lit, j, max_len, m);
|
||||||
u32 h_ent = h & ((1U << nbits) - 1);
|
u32 h_ent = h & ((1U << nbits) - 1);
|
||||||
u32 h_low = (h >> nbits) & 63;
|
u32 h_low = (h >> nbits) & 63;
|
||||||
bucketToLitOffPairs[h_ent].push_back(make_pair(i->id, j));
|
bucketToLitOffPairs[h_ent].emplace_back(lit.id, j);
|
||||||
bucketToBitfield[h_ent] |= (1ULL << h_low);
|
bucketToBitfield[h_ent] |= (1ULL << h_low);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -231,11 +220,9 @@ void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
|||||||
|
|
||||||
// sweep out bitfield entries and save the results swapped accordingly
|
// sweep out bitfield entries and save the results swapped accordingly
|
||||||
// also, anything with bitfield entries is put in filledBuckets
|
// also, anything with bitfield entries is put in filledBuckets
|
||||||
for (map<u32, u64a>::const_iterator i = bucketToBitfield.begin(),
|
for (const auto &m : bucketToBitfield) {
|
||||||
e = bucketToBitfield.end();
|
const u32 &bucket = m.first;
|
||||||
i != e; ++i) {
|
const u64a &contents = m.second;
|
||||||
u32 bucket = i->first;
|
|
||||||
u64a contents = i->second;
|
|
||||||
tab[bucket].bitfield = contents;
|
tab[bucket].bitfield = contents;
|
||||||
filledBuckets.set(bucket);
|
filledBuckets.set(bucket);
|
||||||
}
|
}
|
||||||
@ -243,12 +230,9 @@ void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
|||||||
// store out all our chains based on free values in our hash table.
|
// store out all our chains based on free values in our hash table.
|
||||||
// find nearest free locations that are empty (there will always be more
|
// find nearest free locations that are empty (there will always be more
|
||||||
// entries than strings, at present)
|
// entries than strings, at present)
|
||||||
for (map<u32, deque<pair<u32, u32> > >::iterator
|
for (auto &m : bucketToLitOffPairs) {
|
||||||
i = bucketToLitOffPairs.begin(),
|
u32 bucket = m.first;
|
||||||
e = bucketToLitOffPairs.end();
|
deque<pair<u32, u32>> &d = m.second;
|
||||||
i != e; ++i) {
|
|
||||||
u32 bucket = i->first;
|
|
||||||
deque<pair<u32, u32> > &d = i->second;
|
|
||||||
|
|
||||||
// sort d by distance of the residual string (len minus our depth into
|
// sort d by distance of the residual string (len minus our depth into
|
||||||
// the string). We need to put the 'furthest back' string first...
|
// the string). We need to put the 'furthest back' string first...
|
||||||
@ -299,9 +283,8 @@ void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
|||||||
static
|
static
|
||||||
size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
|
size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
|
||||||
size_t rv = 0;
|
size_t rv = 0;
|
||||||
vector<hwlmLiteral>::const_iterator it, ite;
|
for (const auto &lit : lits) {
|
||||||
for (it = lits.begin(), ite = lits.end(); it != ite; ++it) {
|
rv = max(rv, lit.msk.size());
|
||||||
rv = max(rv, it->msk.size());
|
|
||||||
}
|
}
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
@ -407,9 +390,7 @@ fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
|
|||||||
ptr += litTabSize;
|
ptr += litTabSize;
|
||||||
|
|
||||||
map<u32, u32> litToOffsetVal;
|
map<u32, u32> litToOffsetVal;
|
||||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
|
||||||
e = long_lits.end();
|
|
||||||
i != e; ++i) {
|
|
||||||
u32 entry = verify_u32(i - long_lits.begin());
|
u32 entry = verify_u32(i - long_lits.begin());
|
||||||
u32 offset = verify_u32(ptr - secondaryTable.get());
|
u32 offset = verify_u32(ptr - secondaryTable.get());
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user