From 0c8cc6e2cf88ba2da48dffe29807521c36ed8ce1 Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Mon, 30 Sep 2024 18:53:19 +0200 Subject: [PATCH 01/19] Finish XMLArgs processing --- apache2/apache2_config.c | 40 +++++++ apache2/modsecurity.h | 5 + apache2/msc_xml.c | 231 +++++++++++++++++++++++++++++++++++---- apache2/msc_xml.h | 21 ++++ apache2/re_actions.c | 30 +++++ 5 files changed, 306 insertions(+), 21 deletions(-) diff --git a/apache2/apache2_config.c b/apache2/apache2_config.c index da10b4bf..1739af28 100644 --- a/apache2/apache2_config.c +++ b/apache2/apache2_config.c @@ -166,6 +166,7 @@ void *create_directory_config(apr_pool_t *mp, char *path) /* xml external entity */ dcfg->xml_external_entity = NOT_SET; + dcfg->parse_xml_into_args = NOT_SET; return dcfg; } @@ -640,6 +641,8 @@ void *merge_directory_configs(apr_pool_t *mp, void *_parent, void *_child) /* xml external entity */ merged->xml_external_entity = (child->xml_external_entity == NOT_SET ? parent->xml_external_entity : child->xml_external_entity); + merged->parse_xml_into_args = (child->parse_xml_into_args == NOT_SET + ? parent->parse_xml_into_args : child->parse_xml_into_args); return merged; } @@ -773,6 +776,7 @@ void init_directory_config(directory_config *dcfg) /* xml external entity */ if (dcfg->xml_external_entity == NOT_SET) dcfg->xml_external_entity = 0; + if (dcfg->parse_xml_into_args == NOT_SET) dcfg->parse_xml_into_args = 0; } @@ -3698,6 +3702,34 @@ static const char *cmd_cache_transformations(cmd_parms *cmd, void *_dcfg, return NULL; } +/** +* \brief Add SecParseXMLIntoArgs configuration option +* +* \param cmd Pointer to configuration data +* \param _dcfg Pointer to directory configuration +* \param p1 Pointer to configuration option +* +* \retval NULL On failure +* \retval apr_psprintf On Success +*/ +static const char *cmd_parse_xml_into_args(cmd_parms *cmd, void *_dcfg, const char *p1) +{ + assert(cmd != NULL); + assert(_dcfg != NULL); + assert(p1 != NULL); + // Normally useless code, left to be safe for the moment + if (_dcfg == NULL) { + ap_log_perror(APLOG_MARK, APLOG_EMERG, 0, cmd->pool, "cmd_hash_engine: _dcfg is NULL"); + return NULL; + } + directory_config *dcfg = (directory_config *)_dcfg; + if (strcasecmp(p1, "on") == 0) { dcfg->parse_xml_into_args = MSC_XML_ARGS_ON; } + else if (strcasecmp(p1, "off") == 0) { dcfg->parse_xml_into_args = MSC_XML_ARGS_OFF; } + else if (strcasecmp(p1, "onlyargs") == 0) { dcfg->parse_xml_into_args = MSC_XML_ARGS_ONLYARGS; } + else return apr_psprintf(cmd->pool, "ModSecurity: Invalid value for SecParseXMLIntoArgs: %s", p1); + + return NULL; +} /* -- Configuration directives definitions -- */ @@ -4466,5 +4498,13 @@ const command_rec module_directives[] = { "Set Hash parameter" ), + AP_INIT_TAKE1 ( + "SecParseXMLintoArgs", + cmd_parse_xml_into_args, + NULL, + CMD_SCOPE_ANY, + "On, Off or OnlyArgs" + ), + { NULL } }; diff --git a/apache2/modsecurity.h b/apache2/modsecurity.h index b3976f93..13334674 100644 --- a/apache2/modsecurity.h +++ b/apache2/modsecurity.h @@ -243,6 +243,10 @@ extern DSOLOCAL int *unicode_map_table; #define RULE_EXCEPTION_REMOVE_MSG 4 #define RULE_EXCEPTION_REMOVE_TAG 5 +#define MSC_XML_ARGS_OFF 0 +#define MSC_XML_ARGS_ON 1 +#define MSC_XML_ARGS_ONLYARGS 2 + #define NBSP 160 struct rule_exception { @@ -643,6 +647,7 @@ struct directory_config { /* xml */ int xml_external_entity; + int parse_xml_into_args; /* This will be used whenever ModSecurity will be ready * to ask the server for newer rules. diff --git a/apache2/msc_xml.c b/apache2/msc_xml.c index 2b668163..68297e07 100644 --- a/apache2/msc_xml.c +++ b/apache2/msc_xml.c @@ -14,6 +14,108 @@ #include "msc_xml.h" +static void msc_xml_on_start_elementns( + void *ctx, + const xmlChar *localname, + const xmlChar *prefix, + const xmlChar *URI, + int nb_namespaces, + const xmlChar **namespaces, + int nb_attributes, + int nb_defaulted, + const xmlChar **attributes +) { + + // get the length of XML tag (localname) + size_t taglen = strlen((const char *)localname); + modsec_rec * msr = (modsec_rec *)ctx; + msc_xml_parser_state * xml_parser_state = msr->xml->xml_parser_state; + + // pathlen contains the concatenated strings of tags with '.' + // eg xml.root.level1.leaf + xml_parser_state->pathlen += (taglen + 1); + char *newpath = apr_pstrcat(msr->mp, xml_parser_state->currpath, ".", (char *)localname, NULL); + xml_parser_state->currpath = newpath; + + int *new_stack_item = (int *)apr_array_push(xml_parser_state->has_child_stack); + *new_stack_item = 0; + xml_parser_state->depth++; + + // if there is an item before the current one we set that has a child + if (xml_parser_state->depth > 1) { + int *parent_stack_item = &((int *)xml_parser_state->has_child_stack->elts)[xml_parser_state->has_child_stack->nelts - 2]; + *parent_stack_item = 1; + } + +} + +static void msc_xml_on_end_elementns( + void* ctx, + const xmlChar* localname, + const xmlChar* prefix, + const xmlChar* URI +) { + + size_t taglen = strlen((const char *)localname); + modsec_rec * msr = (modsec_rec *)ctx; + msc_xml_parser_state * xml_parser_state = msr->xml->xml_parser_state; + + // if the node is a leaf we add it as argument + // get the top item from the stack which tells this info + int * top_stack_item = apr_array_pop(xml_parser_state->has_child_stack); + if (*top_stack_item == 0) { + + if (apr_table_elts(msr->arguments)->nelts >= msr->txcfg->arguments_limit) { + if (msr->txcfg->debuglog_level >= 4) { + msr_log(msr, 4, "Skipping request argument, over limit (XML): name \"%s\", value \"%s\"", + log_escape_ex(msr->mp, xml_parser_state->currpath, strlen(xml_parser_state->currpath)), + log_escape_ex(msr->mp, xml_parser_state->currval, strlen(xml_parser_state->currval))); + } + msr->msc_reqbody_error = 1; + msr->xml->xml_error = apr_psprintf(msr->mp, "More than %ld XML keys", msr->txcfg->arguments_limit); + xmlStopParser((xmlParserCtxtPtr)msr->xml->parsing_ctx_arg); + } + else { + + msc_arg * arg = (msc_arg *) apr_pcalloc(msr->mp, sizeof(msc_arg)); + + arg->name = xml_parser_state->currpath; + arg->name_len = strlen(arg->name); + arg->value = xml_parser_state->currval; + arg->value_len = strlen(xml_parser_state->currval); + arg->value_origin_len = arg->value_len; + //arg->value_origin_offset = value-base_offset; + arg->origin = "XML"; + + if (msr->txcfg->debuglog_level >= 9) { + msr_log(msr, 9, "Adding XML argument '%s' with value '%s'", + xml_parser_state->currpath, xml_parser_state->currval); + } + + apr_table_addn(msr->arguments, + log_escape_nq_ex(msr->mp, arg->name, arg->name_len), (void *) arg); + } // end else + } // end top_stack_item == 0 + + // decrease the length of current path length - +1 because of the '\0' + xml_parser_state->pathlen -= (taglen + 1); + + // -1 need because we don't need the '.' + char * newpath = apr_pstrndup(msr->mp, xml_parser_state->currpath, xml_parser_state->pathlen - 1); + xml_parser_state->currpath = newpath; + + xml_parser_state->depth--; +} + +static void msc_xml_on_characters(void *ctx, const xmlChar *ch, int len) { + + modsec_rec * msr = (modsec_rec *)ctx; + msc_xml_parser_state * xml_parser_state = msr->xml->xml_parser_state; + + xml_parser_state->currval = apr_pstrndup(msr->mp, (const char *)ch, len); +} + + static xmlParserInputBufferPtr xml_unload_external_entity(const char *URI, xmlCharEncoding enc) { return NULL; @@ -37,6 +139,33 @@ int xml_init(modsec_rec *msr, char **error_msg) { entity = xmlParserInputBufferCreateFilenameDefault(xml_unload_external_entity); } + if (msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_OFF) { + + msr->xml->sax_handler = (xmlSAXHandler *)apr_pcalloc(msr->mp, sizeof(xmlSAXHandler)); + memset(msr->xml->sax_handler, 0, sizeof(xmlSAXHandler)); + if (msr->xml->sax_handler == NULL) { + *error_msg = apr_psprintf(msr->mp, "XML: Failed to create SAX handler."); + return -1; + } + + msr->xml->sax_handler->initialized = XML_SAX2_MAGIC; + msr->xml->sax_handler->startElementNs = msc_xml_on_start_elementns; + msr->xml->sax_handler->endElementNs = msc_xml_on_end_elementns; + msr->xml->sax_handler->characters = msc_xml_on_characters; + + // set the parser state struct + msr->xml->xml_parser_state = apr_pcalloc(msr->mp, sizeof(msc_xml_parser_state)); + msr->xml->xml_parser_state->depth = 0; + msr->xml->xml_parser_state->pathlen = 4; // "xml\0" + msr->xml->xml_parser_state->currpath = apr_pstrdup(msr->mp, "xml"); + msr->xml->xml_parser_state->currval = NULL; + msr->xml->xml_parser_state->currpathbufflen = 4; + // initialize the stack with item of 10 + // this will store the information about nodes + // 10 is just an initial value, it can be automatically incremented + msr->xml->xml_parser_state->has_child_stack = apr_array_make(msr->mp, 10, sizeof(int)); + } + return 1; } @@ -68,7 +197,7 @@ int xml_process_chunk(modsec_rec *msr, const char *buf, unsigned int size, char * enable us to pass it the first chunk of data so that * it can attempt to auto-detect the encoding. */ - if (msr->xml->parsing_ctx == NULL) { + if (msr->xml->parsing_ctx == NULL && msr->xml->parsing_ctx_arg == NULL) { /* First invocation. */ @@ -86,18 +215,50 @@ int xml_process_chunk(modsec_rec *msr, const char *buf, unsigned int size, char */ - msr->xml->parsing_ctx = xmlCreatePushParserCtxt(NULL, NULL, buf, size, "body.xml"); - if (msr->xml->parsing_ctx == NULL) { - *error_msg = apr_psprintf(msr->mp, "XML: Failed to create parsing context."); - return -1; + if (msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_ONLYARGS) { + msr->xml->parsing_ctx = xmlCreatePushParserCtxt(NULL, NULL, buf, size, "body.xml"); + if (msr->xml->parsing_ctx == NULL) { + *error_msg = apr_psprintf(msr->mp, "XML: Failed to create parsing context."); + return -1; + } + } + if (msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_OFF) { + msr->xml->parsing_ctx_arg = xmlCreatePushParserCtxt( + msr->xml->sax_handler, + msr, + buf, + size, + NULL); + if (msr->xml->parsing_ctx_arg == NULL) { + *error_msg = apr_psprintf(msr->mp, "XML: Failed to create parsing context for ARGS."); + return -1; + } } } else { /* Not a first invocation. */ + msr_log(msr, 4, "XML: Continue parsing."); + if (msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_ONLYARGS) { + xmlParseChunk(msr->xml->parsing_ctx, buf, size, 0); + if (msr->xml->parsing_ctx->wellFormed != 1) { + *error_msg = apr_psprintf(msr->mp, "XML: Failed parsing document."); + return -1; + } + } - xmlParseChunk(msr->xml->parsing_ctx, buf, size, 0); - if (msr->xml->parsing_ctx->wellFormed != 1) { - *error_msg = apr_psprintf(msr->mp, "XML: Failed parsing document."); + if (msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_OFF) { + if (xmlParseChunk(msr->xml->parsing_ctx_arg, buf, size, 0) != 0) { + if (msr->xml->xml_error) { + *error_msg = msr->xml->xml_error; + } + else { + *error_msg = apr_psprintf(msr->mp, "XML: Failed parsing document for ARGS."); + } + return -1; + } + } + if (msr->xml->xml_error) { + *error_msg = msr->xml->xml_error; return -1; } } @@ -114,23 +275,42 @@ int xml_complete(modsec_rec *msr, char **error_msg) { *error_msg = NULL; /* Only if we have a context, meaning we've done some work. */ - if (msr->xml->parsing_ctx != NULL) { - /* This is how we signalise the end of parsing to libxml. */ - xmlParseChunk(msr->xml->parsing_ctx, NULL, 0, 1); + if (msr->xml->parsing_ctx != NULL || msr->xml->parsing_ctx_arg != NULL) { + if (msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_ONLYARGS) { + /* This is how we signalise the end of parsing to libxml. */ + xmlParseChunk(msr->xml->parsing_ctx, NULL, 0, 1); - /* Preserve the results for our reference. */ - msr->xml->well_formed = msr->xml->parsing_ctx->wellFormed; - msr->xml->doc = msr->xml->parsing_ctx->myDoc; + /* Preserve the results for our reference. */ + msr->xml->well_formed = msr->xml->parsing_ctx->wellFormed; + msr->xml->doc = msr->xml->parsing_ctx->myDoc; - /* Clean up everything else. */ - xmlFreeParserCtxt(msr->xml->parsing_ctx); - msr->xml->parsing_ctx = NULL; - msr_log(msr, 4, "XML: Parsing complete (well_formed %u).", msr->xml->well_formed); + /* Clean up everything else. */ + xmlFreeParserCtxt(msr->xml->parsing_ctx); + msr->xml->parsing_ctx = NULL; + msr_log(msr, 4, "XML: Parsing complete (well_formed %u).", msr->xml->well_formed); - if (msr->xml->well_formed != 1) { - *error_msg = apr_psprintf(msr->mp, "XML: Failed parsing document."); - return -1; + if (msr->xml->well_formed != 1) { + *error_msg = apr_psprintf(msr->mp, "XML: Failed parsing document."); + return -1; + } } + + if (msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_OFF) { + if (xmlParseChunk(msr->xml->parsing_ctx_arg, NULL, 0, 1) != 0) { + if (msr->xml->xml_error) { + *error_msg = msr->xml->xml_error; + } + else { + *error_msg = apr_psprintf(msr->mp, "XML: Failed parsing document for ARGS."); + } + xmlFreeParserCtxt(msr->xml->parsing_ctx_arg); + msr->xml->parsing_ctx_arg = NULL; + return -1; + } + xmlFreeParserCtxt(msr->xml->parsing_ctx_arg); + msr->xml->parsing_ctx_arg = NULL; + } + } return 1; @@ -152,6 +332,15 @@ apr_status_t xml_cleanup(modsec_rec *msr) { xmlFreeParserCtxt(msr->xml->parsing_ctx); msr->xml->parsing_ctx = NULL; } + if (msr->xml->parsing_ctx_arg != NULL) { + + if (msr->xml->parsing_ctx_arg->myDoc) { + xmlFreeDoc(msr->xml->parsing_ctx_arg->myDoc); + } + + xmlFreeParserCtxt(msr->xml->parsing_ctx_arg); + msr->xml->parsing_ctx_arg = NULL; + } if (msr->xml->doc != NULL) { xmlFreeDoc(msr->xml->doc); msr->xml->doc = NULL; diff --git a/apache2/msc_xml.h b/apache2/msc_xml.h index f2390686..86910c43 100644 --- a/apache2/msc_xml.h +++ b/apache2/msc_xml.h @@ -20,15 +20,36 @@ typedef struct xml_data xml_data; #include "modsecurity.h" #include #include +#include /* Structures */ +struct msc_xml_parser_state { + apr_array_header_t * has_child_stack; + unsigned int depth; + unsigned int pathlen; + char * currpath; + char * currval; + size_t currpathbufflen; + apr_pool_t * mp; +}; + +typedef struct msc_xml_parser_state msc_xml_parser_state; + struct xml_data { xmlSAXHandler *sax_handler; xmlParserCtxtPtr parsing_ctx; xmlDocPtr doc; unsigned int well_formed; + + /* error reporting and XML array flag */ + char *xml_error; + + /* another parser context for arguments */ + xmlParserCtxtPtr parsing_ctx_arg; + /* parser state for SAX parser */ + msc_xml_parser_state *xml_parser_state; }; /* Functions */ diff --git a/apache2/re_actions.c b/apache2/re_actions.c index 510d6988..ef5d8310 100644 --- a/apache2/re_actions.c +++ b/apache2/re_actions.c @@ -1022,6 +1022,13 @@ static char *msre_action_ctl_validate(msre_engine *engine, apr_pool_t *mp, msre_ if (strcasecmp(value, "on") == 0) return NULL; if (strcasecmp(value, "off") == 0) return NULL; return apr_psprintf(mp, "Invalid setting for ctl name HashEngine: %s", value); + } + else + if (strcasecmp(name, "parseXMLintoArgs") == 0) { + if (strcasecmp(value, "on") == 0) return NULL; + if (strcasecmp(value, "off") == 0) return NULL; + if (strcasecmp(value, "onlyargs") == 0) return NULL; + return apr_psprintf(mp, "Invalid setting for ctl name parseXMLintoArgs: %s", value); } else { return apr_psprintf(mp, "Invalid ctl name setting: %s", name); } @@ -1377,6 +1384,29 @@ static apr_status_t msre_action_ctl_execute(modsec_rec *msr, apr_pool_t *mptmp, return -1; } apr_table_addn(msr->removed_targets, apr_pstrdup(msr->mp, p2), (void *)re); + return 1; + } else + if (strcasecmp(name, "parseXMLintoArgs") == 0) { + + if (strcasecmp(value, "on") == 0) { + msr->txcfg->parse_xml_into_args = MSC_XML_ARGS_ON; + msr->usercfg->parse_xml_into_args = MSC_XML_ARGS_ON; + } + else + if (strcasecmp(value, "off") == 0) { + msr->txcfg->parse_xml_into_args = MSC_XML_ARGS_OFF; + msr->usercfg->parse_xml_into_args = MSC_XML_ARGS_OFF; + } + else + if (strcasecmp(value, "onlyargs") == 0) { + msr->txcfg->parse_xml_into_args = MSC_XML_ARGS_ONLYARGS; + msr->usercfg->parse_xml_into_args = MSC_XML_ARGS_ONLYARGS; + } + + if (msr->txcfg->debuglog_level >= 4) { + msr_log(msr, 4, "Ctl: Set parseXmlIntoArgs to %s.", value); + } + return 1; } From 1953e372170394d042d0291b545c8b46b0bf05f2 Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Sun, 20 Apr 2025 21:44:47 +0200 Subject: [PATCH 02/19] Add nullptr check conditions --- apache2/msc_xml.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/apache2/msc_xml.c b/apache2/msc_xml.c index 68297e07..a51737f3 100644 --- a/apache2/msc_xml.c +++ b/apache2/msc_xml.c @@ -238,7 +238,8 @@ int xml_process_chunk(modsec_rec *msr, const char *buf, unsigned int size, char /* Not a first invocation. */ msr_log(msr, 4, "XML: Continue parsing."); - if (msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_ONLYARGS) { + if (msr->xml->parsing_ctx != NULL && + msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_ONLYARGS) { xmlParseChunk(msr->xml->parsing_ctx, buf, size, 0); if (msr->xml->parsing_ctx->wellFormed != 1) { *error_msg = apr_psprintf(msr->mp, "XML: Failed parsing document."); @@ -246,7 +247,8 @@ int xml_process_chunk(modsec_rec *msr, const char *buf, unsigned int size, char } } - if (msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_OFF) { + if (msr->xml->parsing_ctx_arg != NULL && + msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_OFF) { if (xmlParseChunk(msr->xml->parsing_ctx_arg, buf, size, 0) != 0) { if (msr->xml->xml_error) { *error_msg = msr->xml->xml_error; @@ -276,7 +278,8 @@ int xml_complete(modsec_rec *msr, char **error_msg) { /* Only if we have a context, meaning we've done some work. */ if (msr->xml->parsing_ctx != NULL || msr->xml->parsing_ctx_arg != NULL) { - if (msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_ONLYARGS) { + if (msr->xml->parsing_ctx != NULL && + msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_ONLYARGS) { /* This is how we signalise the end of parsing to libxml. */ xmlParseChunk(msr->xml->parsing_ctx, NULL, 0, 1); @@ -295,7 +298,8 @@ int xml_complete(modsec_rec *msr, char **error_msg) { } } - if (msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_OFF) { + if (msr->xml->parsing_ctx_arg != NULL && + msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_OFF) { if (xmlParseChunk(msr->xml->parsing_ctx_arg, NULL, 0, 1) != 0) { if (msr->xml->xml_error) { *error_msg = msr->xml->xml_error; From c24ad689beb433b8bfdf3a1f05d3fc77d272c76d Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Sat, 26 Apr 2025 20:01:07 +0200 Subject: [PATCH 03/19] Remove unnecessary comment. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Felipe Zipitría <3012076+fzipi@users.noreply.github.com> --- apache2/msc_xml.c | 1 - 1 file changed, 1 deletion(-) diff --git a/apache2/msc_xml.c b/apache2/msc_xml.c index a51737f3..324e835b 100644 --- a/apache2/msc_xml.c +++ b/apache2/msc_xml.c @@ -84,7 +84,6 @@ static void msc_xml_on_end_elementns( arg->value = xml_parser_state->currval; arg->value_len = strlen(xml_parser_state->currval); arg->value_origin_len = arg->value_len; - //arg->value_origin_offset = value-base_offset; arg->origin = "XML"; if (msr->txcfg->debuglog_level >= 9) { From 78ca32f280d1d290428eb10a0246553b15e22993 Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Sat, 26 Apr 2025 20:02:07 +0200 Subject: [PATCH 04/19] Format comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Felipe Zipitría <3012076+fzipi@users.noreply.github.com> --- apache2/msc_xml.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apache2/msc_xml.c b/apache2/msc_xml.c index 324e835b..d3d420ab 100644 --- a/apache2/msc_xml.c +++ b/apache2/msc_xml.c @@ -99,7 +99,7 @@ static void msc_xml_on_end_elementns( // decrease the length of current path length - +1 because of the '\0' xml_parser_state->pathlen -= (taglen + 1); - // -1 need because we don't need the '.' + // -1 is needed because we don't need the last '.' char * newpath = apr_pstrndup(msr->mp, xml_parser_state->currpath, xml_parser_state->pathlen - 1); xml_parser_state->currpath = newpath; From 055aec7bf634ec13a22944d8531136ea63730709 Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Sat, 26 Apr 2025 20:08:02 +0200 Subject: [PATCH 05/19] Comment clarification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Felipe Zipitría <3012076+fzipi@users.noreply.github.com> --- apache2/msc_xml.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apache2/msc_xml.h b/apache2/msc_xml.h index 86910c43..3d23c60d 100644 --- a/apache2/msc_xml.h +++ b/apache2/msc_xml.h @@ -46,7 +46,7 @@ struct xml_data { /* error reporting and XML array flag */ char *xml_error; - /* another parser context for arguments */ + /* additional parser context for arguments */ xmlParserCtxtPtr parsing_ctx_arg; /* parser state for SAX parser */ msc_xml_parser_state *xml_parser_state; From 3829d65792e412fdce913f8550d242375ad83614 Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Sat, 26 Apr 2025 20:08:40 +0200 Subject: [PATCH 06/19] Debug message clarification Co-authored-by: Max Leske <250711+theseion@users.noreply.github.com> --- apache2/apache2_config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apache2/apache2_config.c b/apache2/apache2_config.c index 1739af28..19d04f98 100644 --- a/apache2/apache2_config.c +++ b/apache2/apache2_config.c @@ -3719,7 +3719,7 @@ static const char *cmd_parse_xml_into_args(cmd_parms *cmd, void *_dcfg, const ch assert(p1 != NULL); // Normally useless code, left to be safe for the moment if (_dcfg == NULL) { - ap_log_perror(APLOG_MARK, APLOG_EMERG, 0, cmd->pool, "cmd_hash_engine: _dcfg is NULL"); + ap_log_perror(APLOG_MARK, APLOG_EMERG, 0, cmd->pool, "cmd_parse_xml_into_args: _dcfg is NULL"); return NULL; } directory_config *dcfg = (directory_config *)_dcfg; From 321c554965270a2cae536f7fe8fe8c2887124d14 Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Sat, 26 Apr 2025 20:25:05 +0200 Subject: [PATCH 07/19] Typo fix. Co-authored-by: Max Leske <250711+theseion@users.noreply.github.com> --- apache2/msc_xml.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apache2/msc_xml.c b/apache2/msc_xml.c index d3d420ab..4c64b5d9 100644 --- a/apache2/msc_xml.c +++ b/apache2/msc_xml.c @@ -304,7 +304,7 @@ int xml_complete(modsec_rec *msr, char **error_msg) { *error_msg = msr->xml->xml_error; } else { - *error_msg = apr_psprintf(msr->mp, "XML: Failed parsing document for ARGS."); + *error_msg = apr_psprintf(msr->mp, "XML: Failed to parse document for ARGS."); } xmlFreeParserCtxt(msr->xml->parsing_ctx_arg); msr->xml->parsing_ctx_arg = NULL; From c3ab480979ca44b083474ccac3f4c4bd5c004ee9 Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Sat, 26 Apr 2025 20:31:43 +0200 Subject: [PATCH 08/19] Typo fix. Co-authored-by: Max Leske <250711+theseion@users.noreply.github.com> --- apache2/msc_xml.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apache2/msc_xml.c b/apache2/msc_xml.c index 4c64b5d9..a24f5b77 100644 --- a/apache2/msc_xml.c +++ b/apache2/msc_xml.c @@ -241,7 +241,7 @@ int xml_process_chunk(modsec_rec *msr, const char *buf, unsigned int size, char msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_ONLYARGS) { xmlParseChunk(msr->xml->parsing_ctx, buf, size, 0); if (msr->xml->parsing_ctx->wellFormed != 1) { - *error_msg = apr_psprintf(msr->mp, "XML: Failed parsing document."); + *error_msg = apr_psprintf(msr->mp, "XML: Failed to parse document."); return -1; } } From c19f90195cd84eca3d4b5dc6299e42c3ffbd5a90 Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Sat, 26 Apr 2025 20:32:25 +0200 Subject: [PATCH 09/19] Typo fix. Co-authored-by: Max Leske <250711+theseion@users.noreply.github.com> --- apache2/re_actions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apache2/re_actions.c b/apache2/re_actions.c index ef5d8310..cf28b3f6 100644 --- a/apache2/re_actions.c +++ b/apache2/re_actions.c @@ -1024,7 +1024,7 @@ static char *msre_action_ctl_validate(msre_engine *engine, apr_pool_t *mp, msre_ return apr_psprintf(mp, "Invalid setting for ctl name HashEngine: %s", value); } else - if (strcasecmp(name, "parseXMLintoArgs") == 0) { + if (strcasecmp(name, "parseXmlIntoArgs") == 0) { if (strcasecmp(value, "on") == 0) return NULL; if (strcasecmp(value, "off") == 0) return NULL; if (strcasecmp(value, "onlyargs") == 0) return NULL; From bfe8047c04079382d9f65c2fabe5ebb4989c2824 Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Sat, 26 Apr 2025 20:32:55 +0200 Subject: [PATCH 10/19] Typo fix. Co-authored-by: Max Leske <250711+theseion@users.noreply.github.com> --- apache2/re_actions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apache2/re_actions.c b/apache2/re_actions.c index cf28b3f6..59150d57 100644 --- a/apache2/re_actions.c +++ b/apache2/re_actions.c @@ -1028,7 +1028,7 @@ static char *msre_action_ctl_validate(msre_engine *engine, apr_pool_t *mp, msre_ if (strcasecmp(value, "on") == 0) return NULL; if (strcasecmp(value, "off") == 0) return NULL; if (strcasecmp(value, "onlyargs") == 0) return NULL; - return apr_psprintf(mp, "Invalid setting for ctl name parseXMLintoArgs: %s", value); + return apr_psprintf(mp, "Invalid setting for ctl name parseXmlIntoArgs: %s", value); } else { return apr_psprintf(mp, "Invalid ctl name setting: %s", name); } From 21d71bb603c60562d48e86c604524527244bd768 Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Sat, 26 Apr 2025 20:33:12 +0200 Subject: [PATCH 11/19] Typo fix. Co-authored-by: Max Leske <250711+theseion@users.noreply.github.com> --- apache2/re_actions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apache2/re_actions.c b/apache2/re_actions.c index 59150d57..a028e42a 100644 --- a/apache2/re_actions.c +++ b/apache2/re_actions.c @@ -1386,7 +1386,7 @@ static apr_status_t msre_action_ctl_execute(modsec_rec *msr, apr_pool_t *mptmp, apr_table_addn(msr->removed_targets, apr_pstrdup(msr->mp, p2), (void *)re); return 1; } else - if (strcasecmp(name, "parseXMLintoArgs") == 0) { + if (strcasecmp(name, "parseXmlIntoArgs") == 0) { if (strcasecmp(value, "on") == 0) { msr->txcfg->parse_xml_into_args = MSC_XML_ARGS_ON; From f1ecdb1cf722227888b8ebc74926951c7f9acb8f Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Sat, 26 Apr 2025 20:35:44 +0200 Subject: [PATCH 12/19] Typo fix. Co-authored-by: Max Leske <250711+theseion@users.noreply.github.com> --- apache2/msc_xml.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apache2/msc_xml.c b/apache2/msc_xml.c index a24f5b77..7e0077bc 100644 --- a/apache2/msc_xml.c +++ b/apache2/msc_xml.c @@ -253,7 +253,7 @@ int xml_process_chunk(modsec_rec *msr, const char *buf, unsigned int size, char *error_msg = msr->xml->xml_error; } else { - *error_msg = apr_psprintf(msr->mp, "XML: Failed parsing document for ARGS."); + *error_msg = apr_psprintf(msr->mp, "XML: Failed to parse document for ARGS."); } return -1; } From b5188237f4d24560320077c00dcfab2e99a72ac0 Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Sat, 26 Apr 2025 20:36:32 +0200 Subject: [PATCH 13/19] Typo fix. Co-authored-by: Max Leske <250711+theseion@users.noreply.github.com> --- apache2/msc_xml.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apache2/msc_xml.c b/apache2/msc_xml.c index 7e0077bc..26c811ec 100644 --- a/apache2/msc_xml.c +++ b/apache2/msc_xml.c @@ -279,7 +279,7 @@ int xml_complete(modsec_rec *msr, char **error_msg) { if (msr->xml->parsing_ctx != NULL || msr->xml->parsing_ctx_arg != NULL) { if (msr->xml->parsing_ctx != NULL && msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_ONLYARGS) { - /* This is how we signalise the end of parsing to libxml. */ + /* This is how we signal the end of parsing to libxml. */ xmlParseChunk(msr->xml->parsing_ctx, NULL, 0, 1); /* Preserve the results for our reference. */ From ed24e70c5829f344b853a25e4643a076bf2e9f41 Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Sat, 26 Apr 2025 20:37:00 +0200 Subject: [PATCH 14/19] Typo fix. Co-authored-by: Max Leske <250711+theseion@users.noreply.github.com> --- apache2/msc_xml.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apache2/msc_xml.c b/apache2/msc_xml.c index 26c811ec..1e415bd1 100644 --- a/apache2/msc_xml.c +++ b/apache2/msc_xml.c @@ -292,7 +292,7 @@ int xml_complete(modsec_rec *msr, char **error_msg) { msr_log(msr, 4, "XML: Parsing complete (well_formed %u).", msr->xml->well_formed); if (msr->xml->well_formed != 1) { - *error_msg = apr_psprintf(msr->mp, "XML: Failed parsing document."); + *error_msg = apr_psprintf(msr->mp, "XML: Failed to parse document."); return -1; } } From c11bd6c6f29768990b38e265929056af181b8e7d Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Sun, 27 Apr 2025 11:25:59 +0200 Subject: [PATCH 15/19] Fix retval logic explanation --- apache2/apache2_config.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apache2/apache2_config.c b/apache2/apache2_config.c index 19d04f98..8e9d50d5 100644 --- a/apache2/apache2_config.c +++ b/apache2/apache2_config.c @@ -3709,8 +3709,8 @@ static const char *cmd_cache_transformations(cmd_parms *cmd, void *_dcfg, * \param _dcfg Pointer to directory configuration * \param p1 Pointer to configuration option * -* \retval NULL On failure -* \retval apr_psprintf On Success +* \retval NULL On Success +* \retval apr_psprintf On error */ static const char *cmd_parse_xml_into_args(cmd_parms *cmd, void *_dcfg, const char *p1) { From bd451080245f1bfd600feb978256b76e36d151f7 Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Sun, 27 Apr 2025 11:28:18 +0200 Subject: [PATCH 16/19] Fix error message explanation. Co-authored-by: Max Leske <250711+theseion@users.noreply.github.com> --- apache2/msc_xml.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apache2/msc_xml.c b/apache2/msc_xml.c index 1e415bd1..23cc364e 100644 --- a/apache2/msc_xml.c +++ b/apache2/msc_xml.c @@ -72,7 +72,7 @@ static void msc_xml_on_end_elementns( log_escape_ex(msr->mp, xml_parser_state->currval, strlen(xml_parser_state->currval))); } msr->msc_reqbody_error = 1; - msr->xml->xml_error = apr_psprintf(msr->mp, "More than %ld XML keys", msr->txcfg->arguments_limit); + msr->xml->xml_error = apr_psprintf(msr->mp, "More than %ld ARGS (GET + XML)", msr->txcfg->arguments_limit); xmlStopParser((xmlParserCtxtPtr)msr->xml->parsing_ctx_arg); } else { From 4c043a0889188dcc8b79b54973a50aa42c8f8e6e Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Mon, 28 Apr 2025 21:05:18 +0200 Subject: [PATCH 17/19] Change directive format to strict camel case --- apache2/apache2_config.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apache2/apache2_config.c b/apache2/apache2_config.c index 8e9d50d5..a831e375 100644 --- a/apache2/apache2_config.c +++ b/apache2/apache2_config.c @@ -3703,7 +3703,7 @@ static const char *cmd_cache_transformations(cmd_parms *cmd, void *_dcfg, } /** -* \brief Add SecParseXMLIntoArgs configuration option +* \brief Add SecParseXmlIntoArgs configuration option * * \param cmd Pointer to configuration data * \param _dcfg Pointer to directory configuration @@ -3726,7 +3726,7 @@ static const char *cmd_parse_xml_into_args(cmd_parms *cmd, void *_dcfg, const ch if (strcasecmp(p1, "on") == 0) { dcfg->parse_xml_into_args = MSC_XML_ARGS_ON; } else if (strcasecmp(p1, "off") == 0) { dcfg->parse_xml_into_args = MSC_XML_ARGS_OFF; } else if (strcasecmp(p1, "onlyargs") == 0) { dcfg->parse_xml_into_args = MSC_XML_ARGS_ONLYARGS; } - else return apr_psprintf(cmd->pool, "ModSecurity: Invalid value for SecParseXMLIntoArgs: %s", p1); + else return apr_psprintf(cmd->pool, "ModSecurity: Invalid value for SecParseXmlIntoArgs: %s", p1); return NULL; } @@ -4499,7 +4499,7 @@ const command_rec module_directives[] = { ), AP_INIT_TAKE1 ( - "SecParseXMLintoArgs", + "SecParseXmlIntoArgs", cmd_parse_xml_into_args, NULL, CMD_SCOPE_ANY, From 19b7e98fb6f27d98baf7a4061d4cdabe708fb1b2 Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Mon, 28 Apr 2025 21:12:32 +0200 Subject: [PATCH 18/19] Change node value's parsing to concatenate instead of duplicate it every time --- apache2/msc_xml.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/apache2/msc_xml.c b/apache2/msc_xml.c index 23cc364e..4413b17a 100644 --- a/apache2/msc_xml.c +++ b/apache2/msc_xml.c @@ -40,6 +40,10 @@ static void msc_xml_on_start_elementns( int *new_stack_item = (int *)apr_array_push(xml_parser_state->has_child_stack); *new_stack_item = 0; xml_parser_state->depth++; + // set null to the current value + // this is necessary because if there is any text between the tags (new line, etc) + // it will be added to the current value + xml_parser_state->currval = NULL; // if there is an item before the current one we set that has a child if (xml_parser_state->depth > 1) { @@ -104,6 +108,7 @@ static void msc_xml_on_end_elementns( xml_parser_state->currpath = newpath; xml_parser_state->depth--; + xml_parser_state->currval = NULL; } static void msc_xml_on_characters(void *ctx, const xmlChar *ch, int len) { @@ -111,7 +116,19 @@ static void msc_xml_on_characters(void *ctx, const xmlChar *ch, int len) { modsec_rec * msr = (modsec_rec *)ctx; msc_xml_parser_state * xml_parser_state = msr->xml->xml_parser_state; - xml_parser_state->currval = apr_pstrndup(msr->mp, (const char *)ch, len); + // libxml2 SAX parser will call this function multiple times + // during the parsing of a single node, if the value has multibyte + // characters, so we need to concatenate the values + xml_parser_state->currval = apr_pstrcat(msr->mp, + ((xml_parser_state->currval != NULL) ? xml_parser_state->currval : ""), + apr_pstrndup(msr->mp, (const char *)ch, len), + NULL); + // check if the memory allocation was successful + if (xml_parser_state->currval == NULL) { + msr->xml->xml_error = apr_psprintf(msr->mp, "Failed to allocate memory for XML value."); + xmlStopParser((xmlParserCtxtPtr)msr->xml->parsing_ctx_arg); + } + } From 87cbf9ea2e66c0e7a5336fc9118c9b83d8c9504e Mon Sep 17 00:00:00 2001 From: Ervin Hegedus Date: Wed, 30 Apr 2025 08:50:55 +0200 Subject: [PATCH 19/19] Update explanation Co-authored-by: Max Leske <250711+theseion@users.noreply.github.com> --- apache2/msc_xml.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apache2/msc_xml.c b/apache2/msc_xml.c index 4413b17a..8b0fa766 100644 --- a/apache2/msc_xml.c +++ b/apache2/msc_xml.c @@ -40,7 +40,7 @@ static void msc_xml_on_start_elementns( int *new_stack_item = (int *)apr_array_push(xml_parser_state->has_child_stack); *new_stack_item = 0; xml_parser_state->depth++; - // set null to the current value + // set the current value to null // this is necessary because if there is any text between the tags (new line, etc) // it will be added to the current value xml_parser_state->currval = NULL;