mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
hscollider: fix input length for UTF8 check
This commit is contained in:
parent
f877f14641
commit
08b00f6149
@ -118,7 +118,8 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
||||
|
||||
expr.utf8 = mode.utf8; /* utf8 may be set by parse() */
|
||||
|
||||
if (expr.utf8 && !isValidUtf8(expression)) {
|
||||
const size_t len = strlen(expression);
|
||||
if (expr.utf8 && !isValidUtf8(expression, len)) {
|
||||
throw ParseError("Expression is not valid UTF-8.");
|
||||
}
|
||||
|
||||
|
@ -60,12 +60,11 @@ bool isAllowedCodepoint(u32 val) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isValidUtf8(const char *expression) {
|
||||
bool isValidUtf8(const char *expression, const size_t len) {
|
||||
if (!expression) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const size_t len = strlen(expression);
|
||||
const u8 *s = (const u8 *)expression;
|
||||
u32 val;
|
||||
|
||||
|
@ -29,10 +29,12 @@
|
||||
#ifndef PARSER_UTF8_VALIDATE_H
|
||||
#define PARSER_UTF8_VALIDATE_H
|
||||
|
||||
#include <cstddef> // size_t
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Validate that the given expression is well-formed UTF-8. */
|
||||
bool isValidUtf8(const char *expression);
|
||||
bool isValidUtf8(const char *expression, const size_t len);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
@ -1060,7 +1060,7 @@ void addCorporaToQueue(ostream &out, BoundedQueue<TestUnit> &testq, unsigned id,
|
||||
// is undefined.
|
||||
if (utf8) {
|
||||
auto is_invalid_utf8 = [](const Corpus &corpus) {
|
||||
return !isValidUtf8(corpus.data.c_str());
|
||||
return !isValidUtf8(corpus.data.c_str(), corpus.data.size());
|
||||
};
|
||||
c.erase(remove_if(begin(c), end(c), is_invalid_utf8), end(c));
|
||||
}
|
||||
|
@ -118,5 +118,5 @@ INSTANTIATE_TEST_CASE_P(ValidUtf8, ValidUtf8Test, ValuesIn(valid_utf8_tests));
|
||||
TEST_P(ValidUtf8Test, check) {
|
||||
const auto &info = GetParam();
|
||||
SCOPED_TRACE(testing::Message() << "String is: " << printable(info.str));
|
||||
ASSERT_EQ(info.is_valid, isValidUtf8(info.str.c_str()));
|
||||
ASSERT_EQ(info.is_valid, isValidUtf8(info.str.c_str(), info.str.size()));
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user