Merge pull request #3377 from airween/v2/standalonepcre2

fix: add PCRE2 capability to standalone module
This commit is contained in:
Ervin Hegedus 2025-05-10 20:56:57 +02:00 committed by GitHub
commit 5f70b3a8a6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,162 +1,242 @@
/* /*
* ModSecurity for Apache 2.x, http://www.modsecurity.org/ * ModSecurity for Apache 2.x, http://www.modsecurity.org/
* Copyright (c) 2004-2013 Trustwave Holdings, Inc. (http://www.trustwave.com/) * Copyright (c) 2004-2013 Trustwave Holdings, Inc. (http://www.trustwave.com/)
* *
* You may not use this file except in compliance with * You may not use this file except in compliance with
* the License.  You may obtain a copy of the License at * the License.  You may obtain a copy of the License at
* *
*     http://www.apache.org/licenses/LICENSE-2.0 *     http://www.apache.org/licenses/LICENSE-2.0
* *
* If any of the files related to licensing are missing or if you have any * If any of the files related to licensing are missing or if you have any
* other questions related to licensing please contact Trustwave Holdings, Inc. * other questions related to licensing please contact Trustwave Holdings, Inc.
* directly using the email address security@modsecurity.org. * directly using the email address security@modsecurity.org.
*/ */
#include <limits.h> #include <limits.h>
#include "http_core.h" #include "http_core.h"
#include "http_request.h" #include "http_request.h"
#include "modsecurity.h" #include "modsecurity.h"
#include "apache2.h" #include "apache2.h"
#include "http_main.h" #include "http_main.h"
#include "http_connection.h" #include "http_connection.h"
#include "apr_optional.h" #include "apr_optional.h"
#include "mod_log_config.h" #include "mod_log_config.h"
#include "msc_logging.h" #include "msc_logging.h"
#include "msc_util.h" #include "msc_util.h"
#include "ap_mpm.h" #include "ap_mpm.h"
#include "scoreboard.h" #include "scoreboard.h"
#include "apr_version.h" #include "apr_version.h"
#include "apr_lib.h" #include "apr_lib.h"
#include "ap_config.h" #include "ap_config.h"
#include "http_config.h" #include "http_config.h"
static apr_status_t regex_cleanup(void *preg) static apr_status_t regex_cleanup(void *preg)
{ {
ap_regfree((ap_regex_t *) preg); ap_regfree((ap_regex_t *) preg);
return APR_SUCCESS; return APR_SUCCESS;
} }
AP_DECLARE(ap_regex_t *) ap_pregcomp(apr_pool_t *p, const char *pattern, AP_DECLARE(ap_regex_t *) ap_pregcomp(apr_pool_t *p, const char *pattern,
int cflags) int cflags)
{ {
ap_regex_t *preg = apr_palloc(p, sizeof *preg); ap_regex_t *preg = apr_palloc(p, sizeof *preg);
if (ap_regcomp(preg, pattern, cflags)) { if (ap_regcomp(preg, pattern, cflags)) {
return NULL; return NULL;
} }
apr_pool_cleanup_register(p, (void *) preg, regex_cleanup, apr_pool_cleanup_register(p, (void *) preg, regex_cleanup,
apr_pool_cleanup_null); apr_pool_cleanup_null);
return preg; return preg;
} }
AP_DECLARE(void) ap_regfree(ap_regex_t *preg) AP_DECLARE(void) ap_regfree(ap_regex_t *preg)
{ {
(pcre_free)(preg->re_pcre); #ifdef WITH_PCRE2
} (pcre2_code_free)(preg->re_pcre);
#else
AP_DECLARE(int) ap_regcomp(ap_regex_t *preg, const char *pattern, int cflags) (pcre_free)(preg->re_pcre);
{ #endif
const char *errorptr; }
int erroffset;
int options = 0; AP_DECLARE(int) ap_regcomp(ap_regex_t *preg, const char *pattern, int cflags)
int nsub = 0; {
const char *errorptr;
if ((cflags & AP_REG_ICASE) != 0) options |= PCRE_CASELESS; int erroffset;
if ((cflags & AP_REG_NEWLINE) != 0) options |= PCRE_MULTILINE; int options = 0;
int nsub = 0;
preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
preg->re_erroffset = erroffset; #ifdef WITH_PCRE2
if ((cflags & AP_REG_ICASE) != 0) options |= PCRE2_CASELESS;
if (preg->re_pcre == NULL) return AP_REG_INVARG; if ((cflags & AP_REG_NEWLINE) != 0) options |= PCRE2_MULTILINE;
int error_number = 0;
pcre_fullinfo((const pcre *)preg->re_pcre, NULL, PCRE_INFO_CAPTURECOUNT, &nsub); PCRE2_SIZE error_offset = 0;
preg->re_nsub = nsub; PCRE2_SPTR pcre2_pattern = (PCRE2_SPTR)pattern;
return 0;
} preg->re_pcre = pcre2_compile(pcre2_pattern, PCRE2_ZERO_TERMINATED,
options, &error_number, &error_offset, NULL);
#ifndef POSIX_MALLOC_THRESHOLD preg->re_erroffset = error_offset;
#define POSIX_MALLOC_THRESHOLD (10)
#endif if (preg->re_pcre == NULL) return AP_REG_INVARG;
AP_DECLARE(int) ap_regexec(const ap_regex_t *preg, const char *string, pcre2_pattern_info((const pcre2_code *)preg->re_pcre, PCRE2_INFO_CAPTURECOUNT, &nsub);
apr_size_t nmatch, ap_regmatch_t pmatch[], preg->re_nsub = nsub;
int eflags)
{ #else // otherwise use PCRE
int rc; if ((cflags & AP_REG_ICASE) != 0) options |= PCRE_CASELESS;
int options = 0; if ((cflags & AP_REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
int *ovector = NULL;
int small_ovector[POSIX_MALLOC_THRESHOLD * 3]; preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
int allocated_ovector = 0; preg->re_erroffset = erroffset;
if ((eflags & AP_REG_NOTBOL) != 0) options |= PCRE_NOTBOL; if (preg->re_pcre == NULL) return AP_REG_INVARG;
if ((eflags & AP_REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
pcre_fullinfo((const pcre *)preg->re_pcre, NULL, PCRE_INFO_CAPTURECOUNT, &nsub);
((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1); /* Only has meaning after compile */ preg->re_nsub = nsub;
#endif // end of WITH_PCRE
if (nmatch > 0) return 0;
{ }
if (nmatch <= POSIX_MALLOC_THRESHOLD)
{ #ifndef POSIX_MALLOC_THRESHOLD
ovector = &(small_ovector[0]); #define POSIX_MALLOC_THRESHOLD (10)
} #endif
else
{ AP_DECLARE(int) ap_regexec(const ap_regex_t *preg, const char *string,
ovector = (int *)malloc(sizeof(int) * nmatch * 3); apr_size_t nmatch, ap_regmatch_t pmatch[],
if (ovector == NULL) return AP_REG_ESPACE; int eflags)
allocated_ovector = 1; {
} int rc;
} int options = 0;
int *ovector = NULL;
rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string, (int)strlen(string), int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
0, options, ovector, nmatch * 3); int allocated_ovector = 0;
if (rc == 0) rc = nmatch; /* All captured slots were filled in */ #ifdef WITH_PCRE2
if ((eflags & AP_REG_NOTBOL) != 0) options |= PCRE2_NOTBOL;
if (rc >= 0) if ((eflags & AP_REG_NOTEOL) != 0) options |= PCRE2_NOTEOL;
{ #else
apr_size_t i; if ((eflags & AP_REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
for (i = 0; i < (apr_size_t)rc; i++) if ((eflags & AP_REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
{ #endif
pmatch[i].rm_so = ovector[i*2];
pmatch[i].rm_eo = ovector[i*2+1]; ((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1); /* Only has meaning after compile */
}
if (allocated_ovector) free(ovector); if (nmatch > 0)
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; {
return 0; if (nmatch <= POSIX_MALLOC_THRESHOLD)
} {
ovector = &(small_ovector[0]);
else }
{ else
if (allocated_ovector) free(ovector); {
switch(rc) ovector = (int *)malloc(sizeof(int) * nmatch * 3);
{ if (ovector == NULL) return AP_REG_ESPACE;
case PCRE_ERROR_NOMATCH: return AP_REG_NOMATCH; allocated_ovector = 1;
case PCRE_ERROR_NULL: return AP_REG_INVARG; }
case PCRE_ERROR_BADOPTION: return AP_REG_INVARG; }
case PCRE_ERROR_BADMAGIC: return AP_REG_INVARG;
case PCRE_ERROR_UNKNOWN_NODE: return AP_REG_ASSERT; #ifdef WITH_PCRE2
case PCRE_ERROR_NOMEMORY: return AP_REG_ESPACE; {
#ifdef PCRE_ERROR_MATCHLIMIT PCRE2_SPTR pcre2_s;
case PCRE_ERROR_MATCHLIMIT: return AP_REG_ESPACE; int pcre2_ret;
#endif pcre2_match_data *match_data;
#ifdef PCRE_ERROR_BADUTF8 PCRE2_SIZE *pcre2_ovector = NULL;
case PCRE_ERROR_BADUTF8: return AP_REG_INVARG;
#endif pcre2_s = (PCRE2_SPTR)string;
#ifdef PCRE_ERROR_BADUTF8_OFFSET match_data = pcre2_match_data_create_from_pattern(preg->re_pcre, NULL);
case PCRE_ERROR_BADUTF8_OFFSET: return AP_REG_INVARG; pcre2_match_context *match_context = pcre2_match_context_create(NULL);
#endif
default: return AP_REG_ASSERT; pcre2_ret = pcre2_match((const pcre2_code *)preg->re_pcre, pcre2_s, (int)strlen(string),
} 0, (uint32_t)options, match_data, match_context);
}
} if (match_data != NULL) {
pcre2_ovector = pcre2_get_ovector_pointer(match_data);
if (pcre2_ovector != NULL) {
for (int i = 0; ((i < pcre2_ret) && ((i*2) <= nmatch * 3)); i++) {
if ((i*2) < nmatch * 3) {
ovector[2*i] = pcre2_ovector[2*i];
ovector[2*i+1] = pcre2_ovector[2*i+1];
}
}
}
pcre2_match_data_free(match_data);
pcre2_match_context_free(match_context);
}
/*
pcre2_match() returns one more than the highest numbered capturing pair
that has been set (for example, 1 if there are no captures) - see pcre2_match's manual
*/
rc = pcre2_ret - 1;
}
#else
rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string, (int)strlen(string),
0, options, ovector, nmatch * 3);
#endif
if (rc == 0) rc = nmatch; /* All captured slots were filled in */
if (rc >= 0)
{
apr_size_t i;
for (i = 0; i < (apr_size_t)rc; i++)
{
pmatch[i].rm_so = ovector[i*2];
pmatch[i].rm_eo = ovector[i*2+1];
}
if (allocated_ovector) free(ovector);
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
return 0;
}
else
{
if (allocated_ovector) free(ovector);
switch(rc)
{
#ifdef WITH_PCRE2
case PCRE2_ERROR_NOMATCH: return AP_REG_NOMATCH;
case PCRE2_ERROR_NULL: return AP_REG_INVARG;
case PCRE2_ERROR_BADOPTION: return AP_REG_INVARG;
case PCRE2_ERROR_BADMAGIC: return AP_REG_INVARG;
// case PCRE2_ERROR_UNKNOWN_NODE: return AP_REG_ASSERT; not defined in PCRE2
case PCRE2_ERROR_NOMEMORY: return AP_REG_ESPACE;
#ifdef PCRE2_ERROR_MATCHLIMIT
case PCRE2_ERROR_MATCHLIMIT: return AP_REG_ESPACE;
#endif
#ifdef PCRE2_ERROR_BADUTF8
case PCRE2_ERROR_BADUTF8: return AP_REG_INVARG;
#endif
#ifdef PCRE2_ERROR_BADUTF8_OFFSET
case PCRE2_ERROR_BADUTF8_OFFSET: return AP_REG_INVARG;
#endif
#else // with old PCRE
case PCRE_ERROR_NOMATCH: return AP_REG_NOMATCH;
case PCRE_ERROR_NULL: return AP_REG_INVARG;
case PCRE_ERROR_BADOPTION: return AP_REG_INVARG;
case PCRE_ERROR_BADMAGIC: return AP_REG_INVARG;
case PCRE_ERROR_UNKNOWN_NODE: return AP_REG_ASSERT;
case PCRE_ERROR_NOMEMORY: return AP_REG_ESPACE;
#ifdef PCRE_ERROR_MATCHLIMIT
case PCRE_ERROR_MATCHLIMIT: return AP_REG_ESPACE;
#endif
#ifdef PCRE_ERROR_BADUTF8
case PCRE_ERROR_BADUTF8: return AP_REG_INVARG;
#endif
#ifdef PCRE_ERROR_BADUTF8_OFFSET
case PCRE_ERROR_BADUTF8_OFFSET: return AP_REG_INVARG;
#endif
#endif // end of WITH_PCRE
default: return AP_REG_ASSERT;
}
}
}