From 08b00f6149b8506d66430a6be98d7873da9ed6fc Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Tue, 17 Apr 2018 23:26:04 -0400 Subject: [PATCH] hscollider: fix input length for UTF8 check --- src/compiler/compiler.cpp | 3 ++- src/parser/utf8_validate.cpp | 3 +-- src/parser/utf8_validate.h | 4 +++- tools/hscollider/main.cpp | 2 +- unit/internal/utf8_validate.cpp | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 7affb08d..c71ee4b9 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -118,7 +118,8 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, expr.utf8 = mode.utf8; /* utf8 may be set by parse() */ - if (expr.utf8 && !isValidUtf8(expression)) { + const size_t len = strlen(expression); + if (expr.utf8 && !isValidUtf8(expression, len)) { throw ParseError("Expression is not valid UTF-8."); } diff --git a/src/parser/utf8_validate.cpp b/src/parser/utf8_validate.cpp index cedaf944..50aa06d8 100644 --- a/src/parser/utf8_validate.cpp +++ b/src/parser/utf8_validate.cpp @@ -60,12 +60,11 @@ bool isAllowedCodepoint(u32 val) { return true; } -bool isValidUtf8(const char *expression) { +bool isValidUtf8(const char *expression, const size_t len) { if (!expression) { return true; } - const size_t len = strlen(expression); const u8 *s = (const u8 *)expression; u32 val; diff --git a/src/parser/utf8_validate.h b/src/parser/utf8_validate.h index 26a2f22e..6389a085 100644 --- a/src/parser/utf8_validate.h +++ b/src/parser/utf8_validate.h @@ -29,10 +29,12 @@ #ifndef PARSER_UTF8_VALIDATE_H #define PARSER_UTF8_VALIDATE_H +#include // size_t + namespace ue2 { /** \brief Validate that the given expression is well-formed UTF-8. */ -bool isValidUtf8(const char *expression); +bool isValidUtf8(const char *expression, const size_t len); } // namespace ue2 diff --git a/tools/hscollider/main.cpp b/tools/hscollider/main.cpp index e1e543cc..9877b6ae 100644 --- a/tools/hscollider/main.cpp +++ b/tools/hscollider/main.cpp @@ -1060,7 +1060,7 @@ void addCorporaToQueue(ostream &out, BoundedQueue &testq, unsigned id, // is undefined. if (utf8) { auto is_invalid_utf8 = [](const Corpus &corpus) { - return !isValidUtf8(corpus.data.c_str()); + return !isValidUtf8(corpus.data.c_str(), corpus.data.size()); }; c.erase(remove_if(begin(c), end(c), is_invalid_utf8), end(c)); } diff --git a/unit/internal/utf8_validate.cpp b/unit/internal/utf8_validate.cpp index f570e6b0..03357942 100644 --- a/unit/internal/utf8_validate.cpp +++ b/unit/internal/utf8_validate.cpp @@ -118,5 +118,5 @@ INSTANTIATE_TEST_CASE_P(ValidUtf8, ValidUtf8Test, ValuesIn(valid_utf8_tests)); TEST_P(ValidUtf8Test, check) { const auto &info = GetParam(); SCOPED_TRACE(testing::Message() << "String is: " << printable(info.str)); - ASSERT_EQ(info.is_valid, isValidUtf8(info.str.c_str())); + ASSERT_EQ(info.is_valid, isValidUtf8(info.str.c_str(), info.str.size())); }