mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
hscollider: fix input length for UTF8 check
This commit is contained in:
parent
f877f14641
commit
08b00f6149
@ -118,7 +118,8 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
|||||||
|
|
||||||
expr.utf8 = mode.utf8; /* utf8 may be set by parse() */
|
expr.utf8 = mode.utf8; /* utf8 may be set by parse() */
|
||||||
|
|
||||||
if (expr.utf8 && !isValidUtf8(expression)) {
|
const size_t len = strlen(expression);
|
||||||
|
if (expr.utf8 && !isValidUtf8(expression, len)) {
|
||||||
throw ParseError("Expression is not valid UTF-8.");
|
throw ParseError("Expression is not valid UTF-8.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,12 +60,11 @@ bool isAllowedCodepoint(u32 val) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isValidUtf8(const char *expression) {
|
bool isValidUtf8(const char *expression, const size_t len) {
|
||||||
if (!expression) {
|
if (!expression) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const size_t len = strlen(expression);
|
|
||||||
const u8 *s = (const u8 *)expression;
|
const u8 *s = (const u8 *)expression;
|
||||||
u32 val;
|
u32 val;
|
||||||
|
|
||||||
|
@ -29,10 +29,12 @@
|
|||||||
#ifndef PARSER_UTF8_VALIDATE_H
|
#ifndef PARSER_UTF8_VALIDATE_H
|
||||||
#define PARSER_UTF8_VALIDATE_H
|
#define PARSER_UTF8_VALIDATE_H
|
||||||
|
|
||||||
|
#include <cstddef> // size_t
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
/** \brief Validate that the given expression is well-formed UTF-8. */
|
/** \brief Validate that the given expression is well-formed UTF-8. */
|
||||||
bool isValidUtf8(const char *expression);
|
bool isValidUtf8(const char *expression, const size_t len);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
|
@ -1060,7 +1060,7 @@ void addCorporaToQueue(ostream &out, BoundedQueue<TestUnit> &testq, unsigned id,
|
|||||||
// is undefined.
|
// is undefined.
|
||||||
if (utf8) {
|
if (utf8) {
|
||||||
auto is_invalid_utf8 = [](const Corpus &corpus) {
|
auto is_invalid_utf8 = [](const Corpus &corpus) {
|
||||||
return !isValidUtf8(corpus.data.c_str());
|
return !isValidUtf8(corpus.data.c_str(), corpus.data.size());
|
||||||
};
|
};
|
||||||
c.erase(remove_if(begin(c), end(c), is_invalid_utf8), end(c));
|
c.erase(remove_if(begin(c), end(c), is_invalid_utf8), end(c));
|
||||||
}
|
}
|
||||||
|
@ -118,5 +118,5 @@ INSTANTIATE_TEST_CASE_P(ValidUtf8, ValidUtf8Test, ValuesIn(valid_utf8_tests));
|
|||||||
TEST_P(ValidUtf8Test, check) {
|
TEST_P(ValidUtf8Test, check) {
|
||||||
const auto &info = GetParam();
|
const auto &info = GetParam();
|
||||||
SCOPED_TRACE(testing::Message() << "String is: " << printable(info.str));
|
SCOPED_TRACE(testing::Message() << "String is: " << printable(info.str));
|
||||||
ASSERT_EQ(info.is_valid, isValidUtf8(info.str.c_str()));
|
ASSERT_EQ(info.is_valid, isValidUtf8(info.str.c_str(), info.str.size()));
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user