Refactoring on the JSON parser

It also address the issue #1576 and #1577
This commit is contained in:
Felipe Zimmerle 2017-11-10 16:07:24 -03:00
parent 23cf656f93
commit 023e7acbad
No known key found for this signature in database
GPG Key ID: E6DFB08CE8B11277
4 changed files with 208 additions and 192 deletions

View File

@ -2,6 +2,8 @@
v3.0.????? - ? v3.0.????? - ?
--------------------------- ---------------------------
- Refactoring on the JSON parser.
[Issue #1576, #1577 - Tobias Gutknecht, @zimmerle, @victorhora, @marcstern]
- Adds support to WEBAPPID variable. - Adds support to WEBAPPID variable.
[Issue #1027 - @zimmerle, @victorhora] [Issue #1027 - @zimmerle, @victorhora]
- Adds support for SecWebAppId. - Adds support for SecWebAppId.
@ -47,7 +49,7 @@ v3.0.????? - ?
- Makes auditlog more verbose on debug logs - Makes auditlog more verbose on debug logs
[Issue: #1559 - @zimmerle] [Issue: #1559 - @zimmerle]
- Enable support for AuditLogFormat - Enable support for AuditLogFormat
[Issue: #1583, #1493 and #1453 - @victorhora] Issue: #1583, #1493 and #1453 - @victorhora]
- Adds macro expansion for @rx operator - Adds macro expansion for @rx operator
[Issue: #1528, #1536 - @asterite3, @zimmerle] [Issue: #1528, #1536 - @asterite3, @zimmerle]
- Consideres under quoted variable while loading the rules. - Consideres under quoted variable while loading the rules.

View File

@ -26,12 +26,119 @@
namespace modsecurity { namespace modsecurity {
namespace RequestBodyProcessor { namespace RequestBodyProcessor {
JSON::JSON(Transaction *transaction) : m_transaction(transaction),
m_handle(NULL),
m_current_key("") {
/** /**
* yajl callback functions * yajl callback functions
* For more information on the function signatures and order, check * For more information on the function signatures and order, check
* http://lloyd.github.com/yajl/yajl-1.0.12/structyajl__callbacks.html * http://lloyd.github.com/yajl/yajl-1.0.12/structyajl__callbacks.html
*/ */
/**
* yajl configuration and callbacks
*/
static yajl_callbacks callbacks = {
yajl_null,
yajl_boolean,
NULL /* yajl_integer */,
NULL /* yajl_double */,
yajl_number,
yajl_string,
yajl_start_map,
yajl_map_key,
yajl_end_map,
yajl_start_array,
yajl_end_array
};
/**
* yajl initialization
*
* yajl_parser_config definition:
* http://lloyd.github.io/yajl/yajl-2.0.1/yajl__parse_8h.html#aec816c5518264d2ac41c05469a0f986c
*
* TODO: make UTF8 validation optional, as it depends on Content-Encoding
*/
m_handle = yajl_alloc(&callbacks, NULL, this);
yajl_config(m_handle, yajl_allow_partial_values, 0);
}
JSON::~JSON() {
while (m_containers.size() > 0) {
JSONContainer *a = m_containers.back();
m_containers.pop_back();
delete a;
}
yajl_free(m_handle);
}
bool JSON::init() {
return true;
}
bool JSON::processChunk(const char *buf, unsigned int size, std::string *err) {
/* Feed our parser and catch any errors */
m_status = yajl_parse(m_handle,
(const unsigned char *)buf, size);
if (m_status != yajl_status_ok) {
const unsigned char *e = yajl_get_error(m_handle, 0,
(const unsigned char *)buf, size);
/* We need to free the yajl error message later, how to do this? */
err->assign((const char *)e);
return false;
}
return true;
}
bool JSON::complete(std::string *err) {
/* Wrap up the parsing process */
m_status = yajl_complete_parse(m_handle);
if (m_status != yajl_status_ok) {
const unsigned char *e = yajl_get_error(m_handle, 0, NULL, 0);
/* We need to free the yajl error message later, how to do this? */
err->assign((const char *)e);
return false;
}
return true;
}
int JSON::addArgument(const std::string& value) {
std::string data("");
std::string path;
for (size_t i = 0; i < m_containers.size(); i++) {
JSONContainerArray *a = dynamic_cast<JSONContainerArray *>(m_containers[i]);
path = path + m_containers[i]->m_name;
if (a != NULL) {
path = path + ".array_" + std::to_string(a->m_elementCounter);
} else {
path = path + ".";
}
}
JSONContainerArray *a = dynamic_cast<JSONContainerArray *>(m_containers.back());
if (a) {
a->m_elementCounter++;
} else {
data = getCurrentKey();
}
m_transaction->addArgument("JSON", path + data, value, 0);
return 1;
}
/** /**
* Callback for hash key values; we use those to define the variable names * Callback for hash key values; we use those to define the variable names
* under ARGS. Whenever we reach a new key, we update the current key value. * under ARGS. Whenever we reach a new key, we update the current key value.
@ -48,51 +155,44 @@ int JSON::yajl_map_key(void *ctx, const unsigned char *key, size_t length) {
*/ */
safe_key.assign((const char *)key, length); safe_key.assign((const char *)key, length);
#ifndef NO_LOGS tthis->m_current_key = safe_key;
tthis->debug(9, "New JSON hash key '" + safe_key + "'");
#endif
/**
* TODO: How do we free the previously string value stored here?
*/
tthis->m_data.current_key = safe_key;
return 1; return 1;
} }
/** /**
* Callback for null values * Callback for null values
* *
*/ */
int JSON::yajl_null(void *ctx) { int JSON::yajl_null(void *ctx) {
JSON *tthis = reinterpret_cast<JSON *>(ctx); JSON *tthis = reinterpret_cast<JSON *>(ctx);
return tthis->addArgument(""); return tthis->addArgument("");
} }
/** /**
* Callback for boolean values * Callback for boolean values
*/ */
int JSON::yajl_boolean(void *ctx, int value) { int JSON::yajl_boolean(void *ctx, int value) {
JSON *tthis = reinterpret_cast<JSON *>(ctx); JSON *tthis = reinterpret_cast<JSON *>(ctx);
if (value) { if (value) {
return tthis->addArgument("true"); return tthis->addArgument("true");
} }
return tthis->addArgument("false"); return tthis->addArgument("false");
} }
/** /**
* Callback for string values * Callback for string values
*/ */
int JSON::yajl_string(void *ctx, const unsigned char *value, size_t length) { int JSON::yajl_string(void *ctx, const unsigned char *value, size_t length) {
JSON *tthis = reinterpret_cast<JSON *>(ctx); JSON *tthis = reinterpret_cast<JSON *>(ctx);
std::string v = std::string((const char*)value, length); std::string v = std::string((const char*)value, length);
return tthis->addArgument(v); return tthis->addArgument(v);
} }
/** /**
* Callback for numbers; YAJL can use separate callbacks for integers/longs and * Callback for numbers; YAJL can use separate callbacks for integers/longs and
* float/double values, but since we are not interested in using the numeric * float/double values, but since we are not interested in using the numeric
@ -101,191 +201,67 @@ int JSON::yajl_string(void *ctx, const unsigned char *value, size_t length) {
int JSON::yajl_number(void *ctx, const char *value, size_t length) { int JSON::yajl_number(void *ctx, const char *value, size_t length) {
JSON *tthis = reinterpret_cast<JSON *>(ctx); JSON *tthis = reinterpret_cast<JSON *>(ctx);
std::string v = std::string((const char*)value, length); std::string v = std::string((const char*)value, length);
return tthis->addArgument(v); return tthis->addArgument(v);
} }
/** /**
* Callback for a new hash, which indicates a new subtree, labeled as the * Callback for a new hash, which indicates a new subtree, labeled as the
* current argument name, is being created * current argument name, is being created
*/ */
int JSON::yajl_start_map(void *ctx) { int JSON::yajl_start_array(void *ctx) {
JSON *tthis = reinterpret_cast<JSON *>(ctx); JSON *tthis = reinterpret_cast<JSON *>(ctx);
std::string name = tthis->getCurrentKey();
/** tthis->m_containers.push_back((JSONContainer *)new JSONContainerArray(name));
* If we do not have a current_key, this is a top-level hash, so we do not return 1;
* need to do anything
*/
if (tthis->m_data.current_key.empty() == true) {
return true;
} }
/**
* Check if we are already inside a hash context, and append or create the
* current key name accordingly
*/
if (tthis->m_data.prefix.empty() == false) {
tthis->m_data.prefix.append("." + tthis->m_data.current_key);
} else {
tthis->m_data.prefix.assign(tthis->m_data.current_key);
}
#ifndef NO_LOGS int JSON::yajl_end_array(void *ctx) {
tthis->debug(9, "New JSON hash context (prefix '" + \ JSON *tthis = reinterpret_cast<JSON *>(ctx);
tthis->m_data.prefix + "')"); JSONContainer *a = tthis->m_containers.back();
#endif tthis->m_containers.pop_back();
delete a;
if (tthis->m_containers.size() > 0) {
JSONContainerArray *a = dynamic_cast<JSONContainerArray *>(tthis->m_containers.back());
if (a) {
a->m_elementCounter++;
}
}
return 1; return 1;
} }
int JSON::yajl_start_map(void *ctx) {
JSON *tthis = reinterpret_cast<JSON *>(ctx);
std::string name(tthis->getCurrentKey());
tthis->m_containers.push_back((JSONContainer *)new JSONContainerMap(name));
return 1;
}
/** /**
* Callback for end hash, meaning the current subtree is being closed, and that * Callback for end hash, meaning the current subtree is being closed, and that
* we should go back to the parent variable label * we should go back to the parent variable label
*/ */
int JSON::yajl_end_map(void *ctx) { int JSON::yajl_end_map(void *ctx) {
JSON *tthis = reinterpret_cast<JSON *>(ctx); JSON *tthis = reinterpret_cast<JSON *>(ctx);
size_t sep_pos = std::string::npos; JSONContainer *a = tthis->m_containers.back();
tthis->m_containers.pop_back();
delete a;
/** if (tthis->m_containers.size() > 0) {
* If we have no prefix, then this is the end of a top-level hash and JSONContainerArray *a = dynamic_cast<JSONContainerArray *>(tthis->m_containers.back());
* we don't do anything if (a) {
*/ a->m_elementCounter++;
if (tthis->m_data.prefix.empty() == true) {
return true;
} }
/**
* Current prefix might or not include a separator character; top-level
* hash keys do not have separators in the variable name
*/
sep_pos = tthis->m_data.prefix.find(".");
if (sep_pos != std::string::npos) {
std::string tmp = tthis->m_data.prefix;
tthis->m_data.prefix.assign(tmp, 0, sep_pos);
tthis->m_data.current_key.assign(tmp, sep_pos + 1,
tmp.length() - sep_pos - 1);
} else {
tthis->m_data.current_key.assign(tthis->m_data.prefix);
tthis->m_data.prefix = "";
} }
return 1; return 1;
} }
int JSON::addArgument(const std::string& value) {
/**
* If we do not have a prefix, we cannot create a variable name
* to reference this argument; for now we simply ignore these
*/
if (m_data.current_key.empty()) {
#ifndef NO_LOGS
debug(3, "Cannot add scalar value without an associated key");
#endif
return 1;
}
if (m_data.prefix.empty()) {
m_transaction->addArgument("JSON", m_data.current_key, value, 0);
} else {
m_transaction->addArgument("JSON", m_data.prefix + "." + \
m_data.current_key, value, 0);
}
return 1;
}
bool JSON::init() {
return true;
}
bool JSON::processChunk(const char *buf, unsigned int size, std::string *err) {
/* Feed our parser and catch any errors */
m_data.status = yajl_parse(m_data.handle,
(const unsigned char *)buf, size);
if (m_data.status != yajl_status_ok) {
const unsigned char *e = yajl_get_error(m_data.handle, 0,
(const unsigned char *)buf, size);
/* We need to free the yajl error message later, how to do this? */
err->assign((const char *)e);
return false;
}
return true;
}
bool JSON::complete(std::string *err) {
/* Wrap up the parsing process */
m_data.status = yajl_complete_parse(m_data.handle);
if (m_data.status != yajl_status_ok) {
const unsigned char *e = yajl_get_error(m_data.handle, 0, NULL, 0);
/* We need to free the yajl error message later, how to do this? */
err->assign((const char *)e);
return false;
}
return true;
}
JSON::JSON(Transaction *transaction) : m_transaction(transaction) {
/**
* yajl configuration and callbacks
*/
static yajl_callbacks callbacks = {
yajl_null,
yajl_boolean,
NULL /* yajl_integer */,
NULL /* yajl_double */,
yajl_number,
yajl_string,
yajl_start_map,
yajl_map_key,
yajl_end_map,
NULL /* yajl_start_array */,
NULL /* yajl_end_array */
};
#ifndef NO_LOGS
debug(9, "JSON parser initialization");
#endif
/**
* Prefix and current key are initially empty
*/
m_data.prefix = "";
m_data.current_key = "";
/**
* yajl initialization
*
* yajl_parser_config definition:
* http://lloyd.github.io/yajl/yajl-2.0.1/yajl__parse_8h.html#aec816c5518264d2ac41c05469a0f986c
*
* TODO: make UTF8 validation optional, as it depends on Content-Encoding
*/
#ifndef NO_LOGS
debug(9, "yajl JSON parsing callback initialization");
#endif
m_data.handle = yajl_alloc(&callbacks, NULL, this);
yajl_config(m_data.handle, yajl_allow_partial_values, 0);
}
JSON::~JSON() {
#ifndef NO_LOGS
debug(9, "JSON: Cleaning up JSON results");
#endif
yajl_free(m_data.handle);
}
} // namespace RequestBodyProcessor } // namespace RequestBodyProcessor
} // namespace modsecurity } // namespace modsecurity

View File

@ -33,17 +33,26 @@ namespace modsecurity {
namespace RequestBodyProcessor { namespace RequestBodyProcessor {
struct json_data { class JSONContainer {
/* yajl configuration and parser state */ public:
yajl_handle handle; JSONContainer(std::string name) : m_name(name) { };
yajl_status status; virtual ~JSONContainer() { };
std::string m_name;
/* prefix is used to create data hierarchy (i.e., 'parent.child.value') */
std::string prefix;
std::string current_key;
}; };
typedef struct json_data json_data;
class JSONContainerArray : public JSONContainer {
public:
JSONContainerArray(std::string name) : JSONContainer(name),
m_elementCounter(0) { }
size_t m_elementCounter;
};
class JSONContainerMap : public JSONContainer {
public:
JSONContainerMap(std::string name) : JSONContainer(name) { }
};
class JSON { class JSON {
@ -57,8 +66,6 @@ class JSON {
int addArgument(const std::string& value); int addArgument(const std::string& value);
static int yajl_end_map(void *ctx);
static int yajl_start_map(void *ctx);
static int yajl_number(void *ctx, const char *value, size_t length); static int yajl_number(void *ctx, const char *value, size_t length);
static int yajl_string(void *ctx, const unsigned char *value, static int yajl_string(void *ctx, const unsigned char *value,
size_t length); size_t length);
@ -66,17 +73,47 @@ class JSON {
static int yajl_null(void *ctx); static int yajl_null(void *ctx);
static int yajl_map_key(void *ctx, const unsigned char *key, static int yajl_map_key(void *ctx, const unsigned char *key,
size_t length); size_t length);
static int yajl_end_map(void *ctx);
static int yajl_start_map(void *ctx);
static int yajl_start_array(void *ctx);
static int yajl_end_array(void *ctx);
#ifndef NO_LOGS #ifndef NO_LOGS
void debug(int a, std::string str) { void debug(int a, std::string str) {
m_transaction->debug(a, str); m_transaction->debug(a, str);
} }
#endif #endif
json_data m_data;
bool isPreviousArray() {
JSONContainerArray *prev = NULL;
if (m_containers.size() < 1) {
return false;
}
prev = dynamic_cast<JSONContainerArray *>(m_containers[m_containers.size() - 1]);
return prev != NULL;
}
std::string getCurrentKey(bool emptyIsNull = false) {
std::string ret(m_current_key);
if (m_containers.size() == 0) {
return "json";
}
if (m_current_key.empty() == true) {
if (isPreviousArray() || emptyIsNull == true) {
return "";
}
return "empty-key";
}
m_current_key = "";
return ret;
}
private: private:
std::deque<JSONContainer *> m_containers;
Transaction *m_transaction; Transaction *m_transaction;
std::string m_header; yajl_handle m_handle;
yajl_status m_status;
std::string m_current_key;
}; };
@ -86,3 +123,4 @@ class JSON {
#endif // WITH_YAJL #endif // WITH_YAJL
#endif // SRC_REQUEST_BODY_PROCESSOR_JSON_H_ #endif // SRC_REQUEST_BODY_PROCESSOR_JSON_H_

View File

@ -4,7 +4,7 @@
"version_min":300000, "version_min":300000,
"title":"Testing JSON request body parser 1/1", "title":"Testing JSON request body parser 1/1",
"expected":{ "expected":{
"debug_log": "Target value: \"bar\" \\(Variable: ARGS:foo\\)" "debug_log": "Target value: \"bar\" \\(Variable: ARGS:json.foo\\)"
}, },
"client":{ "client":{
"ip":"200.249.12.31", "ip":"200.249.12.31",
@ -35,7 +35,7 @@
"SecRuleEngine On", "SecRuleEngine On",
"SecRequestBodyAccess On", "SecRequestBodyAccess On",
"SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'200001',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"", "SecRule REQUEST_HEADERS:Content-Type \"application/json\" \"id:'200001',phase:1,t:none,t:lowercase,pass,nolog,ctl:requestBodyProcessor=JSON\"",
"SecRule ARGS:foo \"bar\" \"id:'200441',phase:3,log\"" "SecRule ARGS:json.foo \"bar\" \"id:'200441',phase:3,log\""
] ]
}, },
{ {
@ -43,7 +43,7 @@
"version_min":300000, "version_min":300000,
"title":"Testing JSON request body parser 1/1", "title":"Testing JSON request body parser 1/1",
"expected":{ "expected":{
"debug_log": "Target value: \"bar\" \\(Variable: ARGS:first_level.first_key\\)" "debug_log": "Target value: \"bar\" \\(Variable: ARGS:json.first_level.first_key\\)"
}, },
"client":{ "client":{
"ip":"200.249.12.31", "ip":"200.249.12.31",