mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
ue2_literal: make nocase member a dynamic_bitset
We were previously using vector<bool>, but dynamic_bitset provides a faster any() impl
This commit is contained in:
parent
36136f1003
commit
1aad3b0ed1
@ -237,12 +237,12 @@ ue2_literal::elem::operator CharReach () const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ue2_literal::ue2_literal(const std::string &s_in, bool nc_in)
|
ue2_literal::ue2_literal(const std::string &s_in, bool nc_in)
|
||||||
: s(nc_in ? toUpperString(s_in) : s_in), nocase(s_in.size(), nc_in) {
|
: s(nc_in ? toUpperString(s_in) : s_in), nocase(s_in.size()) {
|
||||||
if (nc_in) {
|
if (nc_in) {
|
||||||
// Quash nocase bit for non-alpha chars
|
// Switch on nocase bit for all alpha characters.
|
||||||
for (size_t i = 0; i < s.length(); i++) {
|
for (size_t i = 0; i < s.length(); i++) {
|
||||||
if (!ourisalpha(s[i])) {
|
if (ourisalpha(s[i])) {
|
||||||
nocase[i] = false;
|
nocase.set(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -255,21 +255,27 @@ ue2_literal ue2_literal::substr(size_type pos, size_type n) const {
|
|||||||
ue2_literal rv;
|
ue2_literal rv;
|
||||||
rv.s = s.substr(pos, n);
|
rv.s = s.substr(pos, n);
|
||||||
size_type upper = nocase.size();
|
size_type upper = nocase.size();
|
||||||
if (n != string::npos && n + pos < nocase.size()) {
|
if (n != npos && n + pos < nocase.size()) {
|
||||||
upper = n + pos;
|
upper = n + pos;
|
||||||
}
|
}
|
||||||
rv.nocase.insert(rv.nocase.end(), nocase.begin() + pos,
|
|
||||||
nocase.begin() + upper);
|
rv.nocase.resize(upper - pos, false);
|
||||||
|
for (size_t i = pos; i < upper; i++) {
|
||||||
|
rv.nocase.set(i - pos, nocase.test(i));
|
||||||
|
}
|
||||||
|
assert(s.size() == nocase.size());
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
ue2_literal &ue2_literal::erase(size_type pos, size_type n) {
|
ue2_literal &ue2_literal::erase(size_type pos, size_type n) {
|
||||||
s.erase(pos, n);
|
s.erase(pos, n);
|
||||||
size_type upper = nocase.size();
|
|
||||||
if (n != string::npos && n + pos < nocase.size()) {
|
if (n != npos) {
|
||||||
upper = n + pos;
|
for (size_type i = pos + n; i < nocase.size(); i++) {
|
||||||
|
nocase.set(i - n, nocase.test(i));
|
||||||
}
|
}
|
||||||
nocase.erase(nocase.begin() + pos, nocase.begin() + upper);
|
}
|
||||||
|
nocase.resize(s.size());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -306,29 +312,24 @@ bool ue2_literal::operator<(const ue2_literal &b) const {
|
|||||||
return nocase < b.nocase;
|
return nocase < b.nocase;
|
||||||
}
|
}
|
||||||
|
|
||||||
ue2_literal operator+(const ue2_literal &a, const ue2_literal &b) {
|
|
||||||
ue2_literal rv;
|
|
||||||
rv.s = a.s + b.s;
|
|
||||||
rv.nocase = a.nocase;
|
|
||||||
rv.nocase.insert(rv.nocase.end(), b.nocase.begin(), b.nocase.end());
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ue2_literal::operator+=(const ue2_literal &b) {
|
void ue2_literal::operator+=(const ue2_literal &b) {
|
||||||
s += b.s;
|
s += b.s;
|
||||||
nocase.insert(nocase.end(), b.nocase.begin(), b.nocase.end());
|
size_t prefix = nocase.size();
|
||||||
|
nocase.resize(prefix + b.nocase.size());
|
||||||
|
for (size_t i = 0; i < b.nocase.size(); i++) {
|
||||||
|
nocase.set(prefix + i, b.nocase[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ue2_literal::any_nocase() const {
|
bool ue2_literal::any_nocase() const {
|
||||||
return find(nocase.begin(), nocase.end(), true) != nocase.end();
|
return nocase.any();
|
||||||
}
|
}
|
||||||
|
|
||||||
void make_nocase(ue2_literal *lit) {
|
void make_nocase(ue2_literal *lit) {
|
||||||
ue2_literal rv;
|
ue2_literal rv;
|
||||||
|
|
||||||
for (ue2_literal::const_iterator it = lit->begin(); it != lit->end();
|
for (const auto &elem: *lit) {
|
||||||
++it) {
|
rv.push_back(elem.c, ourisalpha(elem.c));
|
||||||
rv.push_back(it->c, ourisalpha(it->c));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
lit->swap(rv);
|
lit->swap(rv);
|
||||||
|
@ -37,11 +37,13 @@
|
|||||||
#include "util/charreach.h"
|
#include "util/charreach.h"
|
||||||
#include "util/compare.h"
|
#include "util/compare.h"
|
||||||
#include "util/hash.h"
|
#include "util/hash.h"
|
||||||
|
#include "util/operators.h"
|
||||||
|
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include <boost/dynamic_bitset.hpp>
|
||||||
#include <boost/iterator/iterator_facade.hpp>
|
#include <boost/iterator/iterator_facade.hpp>
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
@ -80,7 +82,7 @@ struct ue2_case_string {
|
|||||||
bool nocase;
|
bool nocase;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ue2_literal {
|
struct ue2_literal : totally_ordered<ue2_literal> {
|
||||||
public:
|
public:
|
||||||
/// Single element proxy, pointed to by our const_iterator.
|
/// Single element proxy, pointed to by our const_iterator.
|
||||||
struct elem {
|
struct elem {
|
||||||
@ -108,38 +110,37 @@ public:
|
|||||||
private:
|
private:
|
||||||
friend class boost::iterator_core_access;
|
friend class boost::iterator_core_access;
|
||||||
void increment() {
|
void increment() {
|
||||||
++it; ++it_nc;
|
++idx;
|
||||||
}
|
}
|
||||||
void decrement() {
|
void decrement() {
|
||||||
--it; --it_nc;
|
--idx;
|
||||||
}
|
}
|
||||||
void advance(size_t n) {
|
void advance(size_t n) {
|
||||||
it += n; it_nc += n;
|
idx += n;
|
||||||
}
|
}
|
||||||
difference_type distance_to(const const_iterator &other) const {
|
difference_type distance_to(const const_iterator &other) const {
|
||||||
return other.it - it;
|
return other.idx - idx;
|
||||||
}
|
}
|
||||||
bool equal(const const_iterator &other) const {
|
bool equal(const const_iterator &other) const {
|
||||||
return it == other.it;
|
return idx == other.idx && lit == other.lit;
|
||||||
}
|
}
|
||||||
const elem dereference() const {
|
const elem dereference() const {
|
||||||
return elem(*it, *it_nc);
|
return elem(lit->s[idx], lit->nocase[idx]);
|
||||||
}
|
}
|
||||||
|
|
||||||
friend struct ue2_literal;
|
friend struct ue2_literal;
|
||||||
const_iterator(const std::string::const_iterator &it_in,
|
const_iterator(const ue2_literal &lit_in, size_t idx_in)
|
||||||
const std::vector<bool>::const_iterator &it_nc_in)
|
: lit(&lit_in), idx(idx_in) {}
|
||||||
: it(it_in), it_nc(it_nc_in) {}
|
|
||||||
|
|
||||||
std::string::const_iterator it;
|
const ue2_literal *lit = nullptr;
|
||||||
std::vector<bool>::const_iterator it_nc;
|
size_t idx;
|
||||||
};
|
};
|
||||||
|
|
||||||
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
|
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
|
||||||
|
using size_type = std::string::size_type;
|
||||||
|
static const size_type npos = std::string::npos;
|
||||||
|
|
||||||
typedef std::string::size_type size_type;
|
ue2_literal() = default;
|
||||||
|
|
||||||
ue2_literal() {}
|
|
||||||
ue2_literal(const std::string &s_in, bool nc_in);
|
ue2_literal(const std::string &s_in, bool nc_in);
|
||||||
ue2_literal(char c, bool nc_in);
|
ue2_literal(char c, bool nc_in);
|
||||||
ue2_literal(const ue2_literal &) = default;
|
ue2_literal(const ue2_literal &) = default;
|
||||||
@ -156,16 +157,16 @@ public:
|
|||||||
|
|
||||||
size_type length() const { return s.length(); }
|
size_type length() const { return s.length(); }
|
||||||
bool empty() const { return s.empty(); }
|
bool empty() const { return s.empty(); }
|
||||||
ue2_literal substr(size_type pos, size_type n = std::string::npos) const;
|
ue2_literal substr(size_type pos, size_type n = npos) const;
|
||||||
const char *c_str() const { return s.c_str(); }
|
const char *c_str() const { return s.c_str(); }
|
||||||
bool any_nocase() const;
|
bool any_nocase() const;
|
||||||
|
|
||||||
const_iterator begin() const {
|
const_iterator begin() const {
|
||||||
return const_iterator(s.begin(), nocase.begin());
|
return const_iterator(*this, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
const_iterator end() const {
|
const_iterator end() const {
|
||||||
return const_iterator(s.end(), nocase.end());
|
return const_iterator(*this, s.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
const_reverse_iterator rbegin() const {
|
const_reverse_iterator rbegin() const {
|
||||||
@ -176,22 +177,23 @@ public:
|
|||||||
return const_reverse_iterator(begin());
|
return const_reverse_iterator(begin());
|
||||||
}
|
}
|
||||||
|
|
||||||
ue2_literal &erase(size_type pos = 0, size_type n = std::string::npos);
|
ue2_literal &erase(size_type pos = 0, size_type n = npos);
|
||||||
void push_back(const elem &e) {
|
void push_back(const elem &e) {
|
||||||
push_back(e.c, e.nocase);
|
push_back(e.c, e.nocase);
|
||||||
}
|
}
|
||||||
|
|
||||||
void push_back(char c, bool nc);
|
void push_back(char c, bool nc);
|
||||||
const elem back() const { return elem(*s.rbegin(), nocase.back()); }
|
const elem back() const { return *rbegin(); }
|
||||||
friend ue2_literal operator+(const ue2_literal &a, const ue2_literal &b);
|
|
||||||
|
friend ue2_literal operator+(ue2_literal a, const ue2_literal &b) {
|
||||||
|
a += b;
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
void operator+=(const ue2_literal &b);
|
void operator+=(const ue2_literal &b);
|
||||||
bool operator==(const ue2_literal &b) const {
|
bool operator==(const ue2_literal &b) const {
|
||||||
return s == b.s && nocase == b.nocase;
|
return s == b.s && nocase == b.nocase;
|
||||||
}
|
}
|
||||||
bool operator!=(const ue2_literal &b) const {
|
|
||||||
return !(*this == b);
|
|
||||||
}
|
|
||||||
bool operator<(const ue2_literal &b) const;
|
bool operator<(const ue2_literal &b) const;
|
||||||
|
|
||||||
void clear(void) { s.clear(); nocase.clear(); }
|
void clear(void) { s.clear(); nocase.clear(); }
|
||||||
@ -204,8 +206,9 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
friend const_iterator;
|
||||||
std::string s;
|
std::string s;
|
||||||
std::vector<bool> nocase; /* for trolling value */
|
boost::dynamic_bitset<> nocase;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Return a reversed copy of this literal.
|
/// Return a reversed copy of this literal.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user