diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index 913eaa0e..ea8e88a9 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -560,13 +560,13 @@ unichar readUtf8CodePoint4c(const char *s) { throw LocatedParseError("(*UCP) must be at start of " "expression, encountered"); }; - 'UTF16)' => { - throw LocatedParseError("(*UTF16) not supported"); - }; - 'UTF32)' => { - throw LocatedParseError("(*UTF32) not supported"); - }; - any => { + # Use the control verb mini-parser to report an error for this + # unsupported/unknown verb. + [^)]+ ')' => { + ParseMode temp_mode; + assert(ts - 2 >= ptr); // parser needs the '(*' at the start too. + read_control_verbs(ts - 2, te, (ts - 2 - ptr), temp_mode); + assert(0); // Should have thrown a parse error. throw LocatedParseError("Unknown control verb"); }; *|; @@ -1838,7 +1838,7 @@ unique_ptr parse(const char *ptr, ParseMode &globalMode) { // First, read the control verbs, set any global mode flags and move the // ptr forward. - p = read_control_verbs(p, pe, globalMode); + p = read_control_verbs(p, pe, 0, globalMode); const char *eof = pe; int cs; diff --git a/src/parser/control_verbs.h b/src/parser/control_verbs.h index 9cf5b116..58934ec2 100644 --- a/src/parser/control_verbs.h +++ b/src/parser/control_verbs.h @@ -34,11 +34,13 @@ #ifndef CONTROL_VERBS_H #define CONTROL_VERBS_H +#include "ue2common.h" + namespace ue2 { struct ParseMode; -const char *read_control_verbs(const char *ptr, const char *end, +const char *read_control_verbs(const char *ptr, const char *end, size_t start, ParseMode &mode); } // namespace ue2 diff --git a/src/parser/control_verbs.rl b/src/parser/control_verbs.rl index 7eb9b86c..1d3e33a9 100644 --- a/src/parser/control_verbs.rl +++ b/src/parser/control_verbs.rl @@ -43,7 +43,7 @@ using namespace std; namespace ue2 { -const char *read_control_verbs(const char *ptr, const char *end, +const char *read_control_verbs(const char *ptr, const char *end, size_t start, ParseMode &mode) { const char *p = ptr; const char *pe = end; @@ -108,7 +108,7 @@ const char *read_control_verbs(const char *ptr, const char *end, %% write exec; } catch (LocatedParseError &error) { if (ts >= ptr && ts <= pe) { - error.locate(ts - ptr); + error.locate(ts - ptr + start); } else { error.locate(0); } diff --git a/src/parser/parse_error.cpp b/src/parser/parse_error.cpp index 6245adb9..e7f60b26 100644 --- a/src/parser/parse_error.cpp +++ b/src/parser/parse_error.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -44,9 +44,13 @@ ParseError::~ParseError() {} LocatedParseError::~LocatedParseError() {} void LocatedParseError::locate(size_t offset) { + if (finalized) { + return; + } std::ostringstream str; str << reason << " at index " << offset << "."; reason = str.str(); + finalized = true; } } diff --git a/src/parser/parse_error.h b/src/parser/parse_error.h index e727991d..4556ed5e 100644 --- a/src/parser/parse_error.h +++ b/src/parser/parse_error.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,8 +30,8 @@ * \brief Parse/Compile exceptions. */ -#ifndef PARSE_ERROR_H_A02047D1AA16C9 -#define PARSE_ERROR_H_A02047D1AA16C9 +#ifndef PARSE_ERROR_H +#define PARSE_ERROR_H #include "util/compile_error.h" @@ -44,22 +44,24 @@ class ParseError : public CompileError { public: // Note: 'why' should describe why the error occurred and end with a // full stop, but no line break. - explicit ParseError(const std::string &why) : CompileError(why) {} + explicit ParseError(std::string why) : CompileError(std::move(why)) {} ~ParseError() override; }; class LocatedParseError : public ParseError { public: - explicit LocatedParseError(const std::string &why) : ParseError(".") { - reason = why; // don't use ParseError ctor + explicit LocatedParseError(std::string why) : ParseError(".") { + reason = std::move(why); // don't use ParseError ctor } ~LocatedParseError() override; void locate(size_t offset); +private: + bool finalized = false; //!< true when locate() has been called. }; } // namespace ue2 -#endif /* PARSE_ERROR_H_A02047D1AA16C9 */ +#endif /* PARSE_ERROR_H */ diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index 52287ec0..d4de452a 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -92,7 +92,7 @@ 93:/a\o{777}/ #Value in \o{...} sequence is too large at index 1. 94:/(*UTF16)foo/ #Unsupported control verb (*UTF16) at index 0. 95:/(*BSR_UNICODE)abc/ #Unsupported control verb (*BSR_UNICODE) at index 0. -96:/a+(*SKIP)b/ #Unknown control verb at index 4. +96:/a+(*SKIP)b/ #Unknown control verb (*SKIP) at index 2. 97:/foo(*/ #Invalid repeat at index 4. 98:/[:\]:]/ #POSIX named classes are only supported inside a class at index 0. 99:/[[:[:]/ #Invalid POSIX named class at index 1.