diff --git a/CHANGES b/CHANGES index c67bc6fc..18813c15 100644 --- a/CHANGES +++ b/CHANGES @@ -2,6 +2,8 @@ v3.0.????? - ? --------------------------- + - Refactoring on the JSON parser. + [Issue #1576, #1577 - Tobias Gutknecht, @zimmerle, @victorhora, @marcstern] - Adds support to WEBAPPID variable. [Issue #1027 - @zimmerle, @victorhora] - Adds support for SecWebAppId. @@ -47,7 +49,7 @@ v3.0.????? - ? - Makes auditlog more verbose on debug logs [Issue: #1559 - @zimmerle] - Enable support for AuditLogFormat - [Issue: #1583, #1493 and #1453 - @victorhora] + Issue: #1583, #1493 and #1453 - @victorhora] - Adds macro expansion for @rx operator [Issue: #1528, #1536 - @asterite3, @zimmerle] - Consideres under quoted variable while loading the rules. diff --git a/src/request_body_processor/json.cc b/src/request_body_processor/json.cc index 93b739d3..951d17c1 100644 --- a/src/request_body_processor/json.cc +++ b/src/request_body_processor/json.cc @@ -26,11 +26,118 @@ namespace modsecurity { namespace RequestBodyProcessor { -/** - * yajl callback functions - * For more information on the function signatures and order, check - * http://lloyd.github.com/yajl/yajl-1.0.12/structyajl__callbacks.html - */ +JSON::JSON(Transaction *transaction) : m_transaction(transaction), + m_handle(NULL), + m_current_key("") { + /** + * yajl callback functions + * For more information on the function signatures and order, check + * http://lloyd.github.com/yajl/yajl-1.0.12/structyajl__callbacks.html + */ + + /** + * yajl configuration and callbacks + */ + static yajl_callbacks callbacks = { + yajl_null, + yajl_boolean, + NULL /* yajl_integer */, + NULL /* yajl_double */, + yajl_number, + yajl_string, + yajl_start_map, + yajl_map_key, + yajl_end_map, + yajl_start_array, + yajl_end_array + }; + + + /** + * yajl initialization + * + * yajl_parser_config definition: + * http://lloyd.github.io/yajl/yajl-2.0.1/yajl__parse_8h.html#aec816c5518264d2ac41c05469a0f986c + * + * TODO: make UTF8 validation optional, as it depends on Content-Encoding + */ + m_handle = yajl_alloc(&callbacks, NULL, this); + + yajl_config(m_handle, yajl_allow_partial_values, 0); +} + + +JSON::~JSON() { + while (m_containers.size() > 0) { + JSONContainer *a = m_containers.back(); + m_containers.pop_back(); + delete a; + } + yajl_free(m_handle); +} + + +bool JSON::init() { + return true; +} + + +bool JSON::processChunk(const char *buf, unsigned int size, std::string *err) { + /* Feed our parser and catch any errors */ + m_status = yajl_parse(m_handle, + (const unsigned char *)buf, size); + if (m_status != yajl_status_ok) { + const unsigned char *e = yajl_get_error(m_handle, 0, + (const unsigned char *)buf, size); + /* We need to free the yajl error message later, how to do this? */ + err->assign((const char *)e); + return false; + } + + return true; +} + + +bool JSON::complete(std::string *err) { + /* Wrap up the parsing process */ + m_status = yajl_complete_parse(m_handle); + if (m_status != yajl_status_ok) { + const unsigned char *e = yajl_get_error(m_handle, 0, NULL, 0); + /* We need to free the yajl error message later, how to do this? */ + err->assign((const char *)e); + return false; + } + + return true; +} + + +int JSON::addArgument(const std::string& value) { + std::string data(""); + std::string path; + + for (size_t i = 0; i < m_containers.size(); i++) { + JSONContainerArray *a = dynamic_cast(m_containers[i]); + path = path + m_containers[i]->m_name; + if (a != NULL) { + path = path + ".array_" + std::to_string(a->m_elementCounter); + } else { + path = path + "."; + } + } + + JSONContainerArray *a = dynamic_cast(m_containers.back()); + if (a) { + a->m_elementCounter++; + } else { + data = getCurrentKey(); + } + + m_transaction->addArgument("JSON", path + data, value, 0); + + return 1; +} + /** * Callback for hash key values; we use those to define the variable names @@ -48,51 +155,44 @@ int JSON::yajl_map_key(void *ctx, const unsigned char *key, size_t length) { */ safe_key.assign((const char *)key, length); -#ifndef NO_LOGS - tthis->debug(9, "New JSON hash key '" + safe_key + "'"); -#endif - - /** - * TODO: How do we free the previously string value stored here? - */ - tthis->m_data.current_key = safe_key; + tthis->m_current_key = safe_key; return 1; } + /** * Callback for null values * */ int JSON::yajl_null(void *ctx) { JSON *tthis = reinterpret_cast(ctx); - return tthis->addArgument(""); } + /** * Callback for boolean values */ int JSON::yajl_boolean(void *ctx, int value) { JSON *tthis = reinterpret_cast(ctx); - if (value) { return tthis->addArgument("true"); } - return tthis->addArgument("false"); } + /** * Callback for string values */ int JSON::yajl_string(void *ctx, const unsigned char *value, size_t length) { JSON *tthis = reinterpret_cast(ctx); std::string v = std::string((const char*)value, length); - return tthis->addArgument(v); } + /** * Callback for numbers; YAJL can use separate callbacks for integers/longs and * float/double values, but since we are not interested in using the numeric @@ -101,191 +201,67 @@ int JSON::yajl_string(void *ctx, const unsigned char *value, size_t length) { int JSON::yajl_number(void *ctx, const char *value, size_t length) { JSON *tthis = reinterpret_cast(ctx); std::string v = std::string((const char*)value, length); - return tthis->addArgument(v); } + /** * Callback for a new hash, which indicates a new subtree, labeled as the * current argument name, is being created */ -int JSON::yajl_start_map(void *ctx) { +int JSON::yajl_start_array(void *ctx) { JSON *tthis = reinterpret_cast(ctx); + std::string name = tthis->getCurrentKey(); + tthis->m_containers.push_back((JSONContainer *)new JSONContainerArray(name)); + return 1; +} - /** - * If we do not have a current_key, this is a top-level hash, so we do not - * need to do anything - */ - if (tthis->m_data.current_key.empty() == true) { - return true; + +int JSON::yajl_end_array(void *ctx) { + JSON *tthis = reinterpret_cast(ctx); + JSONContainer *a = tthis->m_containers.back(); + tthis->m_containers.pop_back(); + delete a; + if (tthis->m_containers.size() > 0) { + JSONContainerArray *a = dynamic_cast(tthis->m_containers.back()); + if (a) { + a->m_elementCounter++; + } } - /** - * Check if we are already inside a hash context, and append or create the - * current key name accordingly - */ - if (tthis->m_data.prefix.empty() == false) { - tthis->m_data.prefix.append("." + tthis->m_data.current_key); - } else { - tthis->m_data.prefix.assign(tthis->m_data.current_key); - } - -#ifndef NO_LOGS - tthis->debug(9, "New JSON hash context (prefix '" + \ - tthis->m_data.prefix + "')"); -#endif - return 1; } + +int JSON::yajl_start_map(void *ctx) { + JSON *tthis = reinterpret_cast(ctx); + std::string name(tthis->getCurrentKey()); + tthis->m_containers.push_back((JSONContainer *)new JSONContainerMap(name)); + return 1; +} + + /** * Callback for end hash, meaning the current subtree is being closed, and that * we should go back to the parent variable label */ int JSON::yajl_end_map(void *ctx) { JSON *tthis = reinterpret_cast(ctx); - size_t sep_pos = std::string::npos; + JSONContainer *a = tthis->m_containers.back(); + tthis->m_containers.pop_back(); + delete a; - /** - * If we have no prefix, then this is the end of a top-level hash and - * we don't do anything - */ - if (tthis->m_data.prefix.empty() == true) { - return true; - } - - /** - * Current prefix might or not include a separator character; top-level - * hash keys do not have separators in the variable name - */ - sep_pos = tthis->m_data.prefix.find("."); - - if (sep_pos != std::string::npos) { - std::string tmp = tthis->m_data.prefix; - tthis->m_data.prefix.assign(tmp, 0, sep_pos); - tthis->m_data.current_key.assign(tmp, sep_pos + 1, - tmp.length() - sep_pos - 1); - } else { - tthis->m_data.current_key.assign(tthis->m_data.prefix); - tthis->m_data.prefix = ""; + if (tthis->m_containers.size() > 0) { + JSONContainerArray *a = dynamic_cast(tthis->m_containers.back()); + if (a) { + a->m_elementCounter++; + } } return 1; } -int JSON::addArgument(const std::string& value) { - /** - * If we do not have a prefix, we cannot create a variable name - * to reference this argument; for now we simply ignore these - */ - if (m_data.current_key.empty()) { -#ifndef NO_LOGS - debug(3, "Cannot add scalar value without an associated key"); -#endif - return 1; - } - - if (m_data.prefix.empty()) { - m_transaction->addArgument("JSON", m_data.current_key, value, 0); - } else { - m_transaction->addArgument("JSON", m_data.prefix + "." + \ - m_data.current_key, value, 0); - } - - return 1; -} - - -bool JSON::init() { - return true; -} - - -bool JSON::processChunk(const char *buf, unsigned int size, std::string *err) { - /* Feed our parser and catch any errors */ - m_data.status = yajl_parse(m_data.handle, - (const unsigned char *)buf, size); - if (m_data.status != yajl_status_ok) { - const unsigned char *e = yajl_get_error(m_data.handle, 0, - (const unsigned char *)buf, size); - /* We need to free the yajl error message later, how to do this? */ - err->assign((const char *)e); - return false; - } - - return true; -} - - -bool JSON::complete(std::string *err) { - /* Wrap up the parsing process */ - m_data.status = yajl_complete_parse(m_data.handle); - if (m_data.status != yajl_status_ok) { - const unsigned char *e = yajl_get_error(m_data.handle, 0, NULL, 0); - /* We need to free the yajl error message later, how to do this? */ - err->assign((const char *)e); - return false; - } - - return true; -} - - -JSON::JSON(Transaction *transaction) : m_transaction(transaction) { - /** - * yajl configuration and callbacks - */ - static yajl_callbacks callbacks = { - yajl_null, - yajl_boolean, - NULL /* yajl_integer */, - NULL /* yajl_double */, - yajl_number, - yajl_string, - yajl_start_map, - yajl_map_key, - yajl_end_map, - NULL /* yajl_start_array */, - NULL /* yajl_end_array */ - }; - - -#ifndef NO_LOGS - debug(9, "JSON parser initialization"); -#endif - - /** - * Prefix and current key are initially empty - */ - m_data.prefix = ""; - m_data.current_key = ""; - - /** - * yajl initialization - * - * yajl_parser_config definition: - * http://lloyd.github.io/yajl/yajl-2.0.1/yajl__parse_8h.html#aec816c5518264d2ac41c05469a0f986c - * - * TODO: make UTF8 validation optional, as it depends on Content-Encoding - */ -#ifndef NO_LOGS - debug(9, "yajl JSON parsing callback initialization"); -#endif - m_data.handle = yajl_alloc(&callbacks, NULL, this); - - yajl_config(m_data.handle, yajl_allow_partial_values, 0); -} - - -JSON::~JSON() { -#ifndef NO_LOGS - debug(9, "JSON: Cleaning up JSON results"); -#endif - yajl_free(m_data.handle); -} - - } // namespace RequestBodyProcessor } // namespace modsecurity diff --git a/src/request_body_processor/json.h b/src/request_body_processor/json.h index 98ac6b91..2bfef90a 100644 --- a/src/request_body_processor/json.h +++ b/src/request_body_processor/json.h @@ -33,17 +33,26 @@ namespace modsecurity { namespace RequestBodyProcessor { -struct json_data { - /* yajl configuration and parser state */ - yajl_handle handle; - yajl_status status; - - /* prefix is used to create data hierarchy (i.e., 'parent.child.value') */ - std::string prefix; - std::string current_key; +class JSONContainer { + public: + JSONContainer(std::string name) : m_name(name) { }; + virtual ~JSONContainer() { }; + std::string m_name; }; -typedef struct json_data json_data; + +class JSONContainerArray : public JSONContainer { + public: + JSONContainerArray(std::string name) : JSONContainer(name), + m_elementCounter(0) { } + size_t m_elementCounter; +}; + + +class JSONContainerMap : public JSONContainer { + public: + JSONContainerMap(std::string name) : JSONContainer(name) { } +}; class JSON { @@ -57,8 +66,6 @@ class JSON { int addArgument(const std::string& value); - static int yajl_end_map(void *ctx); - static int yajl_start_map(void *ctx); static int yajl_number(void *ctx, const char *value, size_t length); static int yajl_string(void *ctx, const unsigned char *value, size_t length); @@ -66,17 +73,47 @@ class JSON { static int yajl_null(void *ctx); static int yajl_map_key(void *ctx, const unsigned char *key, size_t length); + static int yajl_end_map(void *ctx); + static int yajl_start_map(void *ctx); + static int yajl_start_array(void *ctx); + static int yajl_end_array(void *ctx); #ifndef NO_LOGS void debug(int a, std::string str) { m_transaction->debug(a, str); } #endif - json_data m_data; + + bool isPreviousArray() { + JSONContainerArray *prev = NULL; + if (m_containers.size() < 1) { + return false; + } + prev = dynamic_cast(m_containers[m_containers.size() - 1]); + return prev != NULL; + } + + std::string getCurrentKey(bool emptyIsNull = false) { + std::string ret(m_current_key); + if (m_containers.size() == 0) { + return "json"; + } + if (m_current_key.empty() == true) { + if (isPreviousArray() || emptyIsNull == true) { + return ""; + } + return "empty-key"; + } + m_current_key = ""; + return ret; + } private: + std::deque m_containers; Transaction *m_transaction; - std::string m_header; + yajl_handle m_handle; + yajl_status m_status; + std::string m_current_key; }; @@ -86,3 +123,4 @@ class JSON { #endif // WITH_YAJL #endif // SRC_REQUEST_BODY_PROCESSOR_JSON_H_ + diff --git a/test/test-cases/regression/request-body-parser-json.json b/test/test-cases/regression/request-body-parser-json.json index e7b8f0c0..fc5749d7 100644 --- a/test/test-cases/regression/request-body-parser-json.json +++ b/test/test-cases/regression/request-body-parser-json.json @@ -4,7 +4,7 @@ "version_min":300000, "title":"Testing JSON request body parser 1/1", "expected":{ - "debug_log": "Target value: \"bar\" \\(Variable: ARGS:foo\\)" + "debug_log": "Target value: \"bar\" \\(Variable: ARGS:json.foo\\)" }, "client":{ "ip":"200.249.12.31", @@ -35,7 +35,7 @@ "SecRuleEngine On", "SecRequestBodyAccess On", "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'200001',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", - "SecRule ARGS:foo \"bar\" \"id:'200441',phase:3,log\"" + "SecRule ARGS:json.foo \"bar\" \"id:'200441',phase:3,log\"" ] }, { @@ -43,7 +43,7 @@ "version_min":300000, "title":"Testing JSON request body parser 1/1", "expected":{ - "debug_log": "Target value: \"bar\" \\(Variable: ARGS:first_level.first_key\\)" + "debug_log": "Target value: \"bar\" \\(Variable: ARGS:json.first_level.first_key\\)" }, "client":{ "ip":"200.249.12.31",