Finish XMLArgs processing in v3

This commit is contained in:
Ervin Hegedus
2025-04-20 18:21:28 +02:00
parent 01a0615887
commit 9e41a53760
12 changed files with 8399 additions and 7362 deletions

View File

@@ -25,11 +25,122 @@ namespace RequestBodyProcessor {
#ifdef WITH_LIBXML2
/*
* NodeData for parsing XML into args
*/
NodeData::NodeData() {
has_child = false;
}
NodeData::~NodeData() {};
/*
* XMLNodes for parsing XML into args
*/
XMLNodes::XMLNodes(Transaction *transaction) {
nodes = {};
node_depth = 0;
currpath = "";
currval = "";
m_transaction = transaction;
}
XMLNodes::~XMLNodes() {};
/*
* SAX handler for parsing XML into args
*/
class MSCSAXHandler {
public:
void onStartElement(void * ctx, const xmlChar *localname) {
std::string name = reinterpret_cast<const char*>(localname);
XMLNodes* xml_data = static_cast<XMLNodes*>(ctx);
xml_data->nodes.push_back(std::make_shared<NodeData>());
xml_data->node_depth++;
// FIXME - later if we want to check the depth of XML tree
/* if (max_depth > 0 && max_depth > xml_data->node_depth) {
std::cout << "Depth of XML tree reached the given maximum value " << xml_data->node_depth << std::endl;
exit(1);
} */
// if it's not the first (root) item, then append a '.'
// note, this can't occur because there is always a pseudo root element: 'xml'
if (xml_data->nodes.size() > 1) {
xml_data->currpath.append(".");
xml_data->nodes[xml_data->nodes.size()-1]->has_child = true;
}
xml_data->currpath.append(name);
}
void onEndElement(void * ctx, const xmlChar *localname) {
std::string name = reinterpret_cast<const char*>(localname);
XMLNodes* xml_data = static_cast<XMLNodes*>(ctx);
std::shared_ptr<NodeData>& nd = xml_data->nodes[xml_data->nodes.size()-1];
if (nd->has_child == true) {
// check the return value
// if it false, then stop parsing
// this means the number of arguments reached the limit
if (xml_data->m_transaction->addArgument("XML", xml_data->currpath, xml_data->currval, 0) == false) {
xmlStopParser(xml_data->parsing_ctx_arg);
}
}
if (xml_data->currpath.length() > 0) {
// set an offset to store this is the first item or not -> remove the '.' or not
int offset = (xml_data->nodes.size() > 1) ? 1 : 0;
xml_data->currpath.erase(xml_data->currpath.length() - (name.length()+offset));
}
xml_data->nodes.pop_back();
xml_data->node_depth--;
}
void onCharacters(void *ctx, const xmlChar *ch, int len) {
XMLNodes* xml_data = static_cast<XMLNodes*>(ctx);
std::string content(reinterpret_cast<const char *>(ch), len);
xml_data->currval = content;
}
};
extern "C" {
void MSC_startElement(void *userData,
const xmlChar *name,
const xmlChar *prefix,
const xmlChar *URI,
int nb_namespaces,
const xmlChar **namespaces,
int nb_attributes,
int nb_defaulted,
const xmlChar **attributes) {
MSCSAXHandler* handler = static_cast<MSCSAXHandler*>(userData);
handler->onStartElement(userData, name);
}
void MSC_endElement(
void *userData,
const xmlChar *name,
const xmlChar* prefix,
const xmlChar* URI) {
MSCSAXHandler* handler = static_cast<MSCSAXHandler*>(userData);
handler->onEndElement(userData, name);
}
void MSC_xmlcharacters(void *userData, const xmlChar *ch, int len) {
MSCSAXHandler* handler = static_cast<MSCSAXHandler*>(userData);
handler->onCharacters(userData, ch, len);
}
}
XML::XML(Transaction *transaction)
: m_transaction(transaction) {
m_data.doc = NULL;
m_data.parsing_ctx = NULL;
m_data.sax_handler = NULL;
m_data.xml_error = "";
m_data.parsing_ctx_arg = NULL;
m_data.xml_parser_state = NULL;
}
@@ -44,7 +155,6 @@ XML::~XML() {
}
}
bool XML::init() {
//xmlParserInputBufferCreateFilenameFunc entity;
if (m_transaction->m_rules->m_secXMLExternalEntity
@@ -55,6 +165,27 @@ bool XML::init() {
/*entity = */xmlParserInputBufferCreateFilenameDefault(
this->unloadExternalEntity);
}
if (m_transaction->m_secXMLParseXmlIntoArgs
== RulesSetProperties::TrueConfigXMLParseXmlIntoArgs ||
m_transaction->m_secXMLParseXmlIntoArgs
== RulesSetProperties::OnlyArgsConfigXMLParseXmlIntoArgs) {
ms_dbg_a(m_transaction, 9,
"XML: SecParseXMLIntoArgs is set to " \
+ RulesSetProperties::configXMLParseXmlIntoArgsString(static_cast<RulesSetProperties::ConfigXMLParseXmlIntoArgs>(m_transaction->m_secXMLParseXmlIntoArgs)));
m_data.sax_handler = std::make_unique<xmlSAXHandler>();
memset(m_data.sax_handler.get(), 0, sizeof(xmlSAXHandler));
m_data.sax_handler->initialized = XML_SAX2_MAGIC;
m_data.sax_handler->startElementNs = &MSC_startElement;
m_data.sax_handler->endElementNs = &MSC_endElement;
m_data.sax_handler->characters = &MSC_xmlcharacters;
// set the parser state struct
m_data.xml_parser_state = std::make_unique<XMLNodes>(m_transaction);
m_data.xml_parser_state->node_depth = 0;
m_data.xml_parser_state->currval = "";
m_data.xml_parser_state->currpath = "xml.";
}
return true;
}
@@ -72,7 +203,7 @@ bool XML::processChunk(const char *buf, unsigned int size,
* enable us to pass it the first chunk of data so that
* it can attempt to auto-detect the encoding.
*/
if (m_data.parsing_ctx == NULL) {
if (m_data.parsing_ctx == NULL && m_data.parsing_ctx_arg == NULL) {
/* First invocation. */
ms_dbg_a(m_transaction, 4, "XML: Initialising parser.");
@@ -90,27 +221,62 @@ bool XML::processChunk(const char *buf, unsigned int size,
*/
m_data.parsing_ctx = xmlCreatePushParserCtxt(NULL, NULL,
buf, size, "body.xml");
if (m_transaction->m_secXMLParseXmlIntoArgs
!= RulesSetProperties::OnlyArgsConfigXMLParseXmlIntoArgs) {
m_data.parsing_ctx = xmlCreatePushParserCtxt(NULL, NULL,
buf, size, "body.xml");
if (m_data.parsing_ctx == NULL) {
ms_dbg_a(m_transaction, 4,
"XML: Failed to create parsing context.");
error->assign("XML: Failed to create parsing context.");
return false;
if (m_data.parsing_ctx == NULL) {
ms_dbg_a(m_transaction, 4,
"XML: Failed to create parsing context.");
error->assign("XML: Failed to create parsing context.");
return false;
}
}
xmlSetGenericErrorFunc(m_data.parsing_ctx, null_error);
if (m_transaction->m_secXMLParseXmlIntoArgs
== RulesSetProperties::OnlyArgsConfigXMLParseXmlIntoArgs ||
m_transaction->m_secXMLParseXmlIntoArgs
== RulesSetProperties::TrueConfigXMLParseXmlIntoArgs) {
m_data.parsing_ctx_arg = xmlCreatePushParserCtxt(
m_data.sax_handler.get(),
m_data.xml_parser_state.get(),
buf,
size,
NULL);
if (m_data.parsing_ctx_arg == NULL) {
error->assign("XML: Failed to create parsing context for ARGS.");
return false;
}
}
return true;
}
/* Not a first invocation. */
xmlParseChunk(m_data.parsing_ctx, buf, size, 0);
if (m_data.parsing_ctx->wellFormed != 1) {
error->assign("XML: Failed to create parsing context.");
ms_dbg_a(m_transaction, 4, "XML: Failed parsing document.");
return false;
if (m_transaction->m_secXMLParseXmlIntoArgs
!= RulesSetProperties::OnlyArgsConfigXMLParseXmlIntoArgs) {
xmlSetGenericErrorFunc(m_data.parsing_ctx, null_error);
xmlParseChunk(m_data.parsing_ctx, buf, size, 0);
m_data.xml_parser_state->parsing_ctx_arg = m_data.parsing_ctx_arg;
if (m_data.parsing_ctx->wellFormed != 1) {
error->assign("XML: Failed parsing document.");
ms_dbg_a(m_transaction, 4, "XML: Failed parsing document.");
return false;
}
}
if (m_transaction->m_secXMLParseXmlIntoArgs
== RulesSetProperties::OnlyArgsConfigXMLParseXmlIntoArgs ||
m_transaction->m_secXMLParseXmlIntoArgs
== RulesSetProperties::TrueConfigXMLParseXmlIntoArgs) {
xmlSetGenericErrorFunc(m_data.parsing_ctx_arg, null_error);
xmlParseChunk(m_data.parsing_ctx_arg, buf, size, 0);
if (m_data.parsing_ctx_arg->wellFormed != 1) {
error->assign("XML: Failed parsing document for ARGS.");
ms_dbg_a(m_transaction, 4, "XML: Failed parsing document for ARGS.");
return false;
}
}
return true;
@@ -119,24 +285,46 @@ bool XML::processChunk(const char *buf, unsigned int size,
bool XML::complete(std::string *error) {
/* Only if we have a context, meaning we've done some work. */
if (m_data.parsing_ctx != NULL) {
/* This is how we signalise the end of parsing to libxml. */
xmlParseChunk(m_data.parsing_ctx, NULL, 0, 1);
if (m_data.parsing_ctx != NULL || m_data.parsing_ctx_arg != NULL) {
if (m_transaction->m_secXMLParseXmlIntoArgs
!= RulesSetProperties::OnlyArgsConfigXMLParseXmlIntoArgs) {
/* This is how we signalise the end of parsing to libxml. */
xmlParseChunk(m_data.parsing_ctx, NULL, 0, 1);
/* Preserve the results for our reference. */
m_data.well_formed = m_data.parsing_ctx->wellFormed;
m_data.doc = m_data.parsing_ctx->myDoc;
/* Preserve the results for our reference. */
m_data.well_formed = m_data.parsing_ctx->wellFormed;
m_data.doc = m_data.parsing_ctx->myDoc;
/* Clean up everything else. */
xmlFreeParserCtxt(m_data.parsing_ctx);
m_data.parsing_ctx = NULL;
ms_dbg_a(m_transaction, 4, "XML: Parsing complete (well_formed " \
+ std::to_string(m_data.well_formed) + ").");
/* Clean up everything else. */
xmlFreeParserCtxt(m_data.parsing_ctx);
m_data.parsing_ctx = NULL;
ms_dbg_a(m_transaction, 4, "XML: Parsing complete (well_formed " \
+ std::to_string(m_data.well_formed) + ").");
if (m_data.well_formed != 1) {
error->assign("XML: Failed parsing document.");
ms_dbg_a(m_transaction, 4, "XML: Failed parsing document.");
return false;
if (m_data.well_formed != 1) {
error->assign("XML: Failed parsing document.");
ms_dbg_a(m_transaction, 4, "XML: Failed parsing document.");
return false;
}
}
if (m_transaction->m_secXMLParseXmlIntoArgs
== RulesSetProperties::OnlyArgsConfigXMLParseXmlIntoArgs ||
m_transaction->m_secXMLParseXmlIntoArgs
== RulesSetProperties::TrueConfigXMLParseXmlIntoArgs) {
/* This is how we signalise the end of parsing to libxml. */
if (xmlParseChunk(m_data.parsing_ctx_arg, NULL, 0, 1) != 0) {
if (m_data.xml_error != "") {
error->assign(m_data.xml_error);
}
else {
error->assign("XML: Failed parsing document for ARGS.");
}
xmlFreeParserCtxt(m_data.parsing_ctx_arg);
m_data.parsing_ctx_arg = NULL;
return false;
}
xmlFreeParserCtxt(m_data.parsing_ctx_arg);
m_data.parsing_ctx_arg = NULL;
}
}