mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Add support for approximate matching in ue2collider
This commit is contained in:
parent
2de6706df2
commit
4c2b7cc04f
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -218,7 +218,7 @@ namespace {
|
|||||||
/** \brief Concrete implementation */
|
/** \brief Concrete implementation */
|
||||||
class CorpusGeneratorImpl : public CorpusGenerator {
|
class CorpusGeneratorImpl : public CorpusGenerator {
|
||||||
public:
|
public:
|
||||||
CorpusGeneratorImpl(const NGHolder &graph_in, CorpusProperties &props);
|
CorpusGeneratorImpl(const NGWrapper &graph_in, CorpusProperties &props);
|
||||||
~CorpusGeneratorImpl() {}
|
~CorpusGeneratorImpl() {}
|
||||||
|
|
||||||
void generateCorpus(vector<string> &data);
|
void generateCorpus(vector<string> &data);
|
||||||
@ -244,10 +244,13 @@ private:
|
|||||||
CorpusProperties &cProps;
|
CorpusProperties &cProps;
|
||||||
};
|
};
|
||||||
|
|
||||||
CorpusGeneratorImpl::CorpusGeneratorImpl(const NGHolder &graph_in,
|
CorpusGeneratorImpl::CorpusGeneratorImpl(const NGWrapper &graph_in,
|
||||||
CorpusProperties &props)
|
CorpusProperties &props)
|
||||||
: graph(graph_in), cProps(props) {
|
: graph(graph_in), cProps(props) {
|
||||||
// empty
|
// if this pattern is to be matched approximately
|
||||||
|
if (graph_in.edit_distance && !props.editDistance) {
|
||||||
|
props.editDistance = props.rand(0, graph_in.edit_distance + 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CorpusGeneratorImpl::generateCorpus(vector<string> &data) {
|
void CorpusGeneratorImpl::generateCorpus(vector<string> &data) {
|
||||||
@ -388,7 +391,7 @@ hit_limit:
|
|||||||
/** \brief Concrete implementation for UTF-8 */
|
/** \brief Concrete implementation for UTF-8 */
|
||||||
class CorpusGeneratorUtf8 : public CorpusGenerator {
|
class CorpusGeneratorUtf8 : public CorpusGenerator {
|
||||||
public:
|
public:
|
||||||
CorpusGeneratorUtf8(const NGHolder &graph_in, CorpusProperties &props);
|
CorpusGeneratorUtf8(const NGWrapper &graph_in, CorpusProperties &props);
|
||||||
~CorpusGeneratorUtf8() {}
|
~CorpusGeneratorUtf8() {}
|
||||||
|
|
||||||
void generateCorpus(vector<string> &data);
|
void generateCorpus(vector<string> &data);
|
||||||
@ -407,17 +410,21 @@ private:
|
|||||||
void addRandom(const min_max &mm, vector<unichar> *out);
|
void addRandom(const min_max &mm, vector<unichar> *out);
|
||||||
|
|
||||||
/** \brief The NFA graph we operate over. */
|
/** \brief The NFA graph we operate over. */
|
||||||
const NGHolder &graph;
|
const NGWrapper &graph;
|
||||||
|
|
||||||
/** \brief Reference to our corpus generator properties object (stores some
|
/** \brief Reference to our corpus generator properties object (stores some
|
||||||
* state) */
|
* state) */
|
||||||
CorpusProperties &cProps;
|
CorpusProperties &cProps;
|
||||||
};
|
};
|
||||||
|
|
||||||
CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGHolder &graph_in,
|
CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGWrapper &graph_in,
|
||||||
CorpusProperties &props)
|
CorpusProperties &props)
|
||||||
: graph(graph_in), cProps(props) {
|
: graph(graph_in), cProps(props) {
|
||||||
// empty
|
// we do not support Utf8 for approximate matching
|
||||||
|
if (graph.edit_distance) {
|
||||||
|
throw CorpusGenerationFailure("UTF-8 for edited patterns is not "
|
||||||
|
"supported.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CorpusGeneratorUtf8::generateCorpus(vector<string> &data) {
|
void CorpusGeneratorUtf8::generateCorpus(vector<string> &data) {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -45,6 +45,12 @@ class NGWrapper;
|
|||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
|
struct CorpusGenerationFailure {
|
||||||
|
explicit CorpusGenerationFailure(const std::string s) :
|
||||||
|
message(std::move(s)) {}
|
||||||
|
std::string message;
|
||||||
|
};
|
||||||
|
|
||||||
/** \brief Abstract interface to corpus generator tool. */
|
/** \brief Abstract interface to corpus generator tool. */
|
||||||
class CorpusGenerator {
|
class CorpusGenerator {
|
||||||
public:
|
public:
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -51,6 +51,7 @@ struct BoundaryReports;
|
|||||||
void findMatches(const ue2::NGHolder &g, const ue2::ReportManager &rm,
|
void findMatches(const ue2::NGHolder &g, const ue2::ReportManager &rm,
|
||||||
const std::string &input,
|
const std::string &input,
|
||||||
std::set<std::pair<size_t, size_t>> &matches,
|
std::set<std::pair<size_t, size_t>> &matches,
|
||||||
const bool notEod, const bool som, const bool utf8);
|
const unsigned int max_edit_distance, const bool notEod,
|
||||||
|
const bool utf8);
|
||||||
|
|
||||||
#endif // NG_FIND_MATCHES_H
|
#endif // NG_FIND_MATCHES_H
|
||||||
|
Loading…
x
Reference in New Issue
Block a user