mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Add support for approximate matching in ue2collider
This commit is contained in:
parent
2de6706df2
commit
4c2b7cc04f
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -218,7 +218,7 @@ namespace {
|
||||
/** \brief Concrete implementation */
|
||||
class CorpusGeneratorImpl : public CorpusGenerator {
|
||||
public:
|
||||
CorpusGeneratorImpl(const NGHolder &graph_in, CorpusProperties &props);
|
||||
CorpusGeneratorImpl(const NGWrapper &graph_in, CorpusProperties &props);
|
||||
~CorpusGeneratorImpl() {}
|
||||
|
||||
void generateCorpus(vector<string> &data);
|
||||
@ -244,10 +244,13 @@ private:
|
||||
CorpusProperties &cProps;
|
||||
};
|
||||
|
||||
CorpusGeneratorImpl::CorpusGeneratorImpl(const NGHolder &graph_in,
|
||||
CorpusGeneratorImpl::CorpusGeneratorImpl(const NGWrapper &graph_in,
|
||||
CorpusProperties &props)
|
||||
: graph(graph_in), cProps(props) {
|
||||
// empty
|
||||
// if this pattern is to be matched approximately
|
||||
if (graph_in.edit_distance && !props.editDistance) {
|
||||
props.editDistance = props.rand(0, graph_in.edit_distance + 1);
|
||||
}
|
||||
}
|
||||
|
||||
void CorpusGeneratorImpl::generateCorpus(vector<string> &data) {
|
||||
@ -388,7 +391,7 @@ hit_limit:
|
||||
/** \brief Concrete implementation for UTF-8 */
|
||||
class CorpusGeneratorUtf8 : public CorpusGenerator {
|
||||
public:
|
||||
CorpusGeneratorUtf8(const NGHolder &graph_in, CorpusProperties &props);
|
||||
CorpusGeneratorUtf8(const NGWrapper &graph_in, CorpusProperties &props);
|
||||
~CorpusGeneratorUtf8() {}
|
||||
|
||||
void generateCorpus(vector<string> &data);
|
||||
@ -407,17 +410,21 @@ private:
|
||||
void addRandom(const min_max &mm, vector<unichar> *out);
|
||||
|
||||
/** \brief The NFA graph we operate over. */
|
||||
const NGHolder &graph;
|
||||
const NGWrapper &graph;
|
||||
|
||||
/** \brief Reference to our corpus generator properties object (stores some
|
||||
* state) */
|
||||
CorpusProperties &cProps;
|
||||
};
|
||||
|
||||
CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGHolder &graph_in,
|
||||
CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGWrapper &graph_in,
|
||||
CorpusProperties &props)
|
||||
: graph(graph_in), cProps(props) {
|
||||
// empty
|
||||
// we do not support Utf8 for approximate matching
|
||||
if (graph.edit_distance) {
|
||||
throw CorpusGenerationFailure("UTF-8 for edited patterns is not "
|
||||
"supported.");
|
||||
}
|
||||
}
|
||||
|
||||
void CorpusGeneratorUtf8::generateCorpus(vector<string> &data) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -45,6 +45,12 @@ class NGWrapper;
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
struct CorpusGenerationFailure {
|
||||
explicit CorpusGenerationFailure(const std::string s) :
|
||||
message(std::move(s)) {}
|
||||
std::string message;
|
||||
};
|
||||
|
||||
/** \brief Abstract interface to corpus generator tool. */
|
||||
class CorpusGenerator {
|
||||
public:
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -51,6 +51,7 @@ struct BoundaryReports;
|
||||
void findMatches(const ue2::NGHolder &g, const ue2::ReportManager &rm,
|
||||
const std::string &input,
|
||||
std::set<std::pair<size_t, size_t>> &matches,
|
||||
const bool notEod, const bool som, const bool utf8);
|
||||
const unsigned int max_edit_distance, const bool notEod,
|
||||
const bool utf8);
|
||||
|
||||
#endif // NG_FIND_MATCHES_H
|
||||
|
Loading…
x
Reference in New Issue
Block a user