From 899ee0c365cb54efc33d65edd887da401ee421b4 Mon Sep 17 00:00:00 2001 From: Felipe Zimmerle Date: Thu, 28 Aug 2014 17:15:26 -0700 Subject: [PATCH] Adds support to load remote resources to pmFromFile and ipMatchFromFile Initially those directives were only able to load content from a local file. This commit extends this functionality allowing the user to provide an HTTP URI that can be downloaded and loaded by ModSecurity. Initially the download is associated with a server restart. For next versions we expect to load such resources as it become outdated (Without need to resetart the server). --- apache2/msc_util.c | 158 ++++++++++++++++++- apache2/msc_util.h | 9 ++ apache2/re_operators.c | 212 +++++++++++++++++++------- tests/Makefile.am | 1 + tests/regression/misc/30-pmfromfile.t | 29 ++++ 5 files changed, 355 insertions(+), 54 deletions(-) create mode 100644 tests/regression/misc/30-pmfromfile.t diff --git a/apache2/msc_util.c b/apache2/msc_util.c index 33d0ddba..1647bc44 100644 --- a/apache2/msc_util.c +++ b/apache2/msc_util.c @@ -23,8 +23,11 @@ #include "msc_util.h" #include +#include #include "modsecurity_config.h" +#include "curl/curl.h" + /** * NOTE: Be careful as these can ONLY be used on static values for X. * (i.e. VALID_HEX(c++) will NOT work) @@ -2593,7 +2596,7 @@ int ip_tree_from_file(TreeRoot **rtree, char *uri, tnode = TreeAddIP(start, (*rtree)->ipv6_tree, IPV6_TREE); } #endif - + if (tnode == NULL) { *error_msg = apr_psprintf(mp, "Could not add entry " \ @@ -2610,6 +2613,140 @@ int ip_tree_from_file(TreeRoot **rtree, char *uri, return 0; } +int ip_tree_from_uri(TreeRoot **rtree, char *uri, + apr_pool_t *mp, char **error_msg) +{ + TreeNode *tnode = NULL; + apr_status_t rc; + int line = 0; + apr_file_t *fd; + char *start; + char *end; + char buf[HUGE_STRING_LEN + 1]; // FIXME: 2013-10-29 zimmerle: dynamic? + char errstr[1024]; // + + CURL *curl; + CURLcode res; + + char id[(APR_SHA1_DIGESTSIZE*2) + 1]; + char *apr_id = NULL; + char *beacon_str = NULL; + int beacon_str_len = 0; + char *beacon_apr = NULL; + struct msc_curl_memory_buffer_t chunk; + chunk.memory = malloc(1); /* will be grown as needed by the realloc above */ + chunk.size = 0; /* no data at this point */ + char *word = NULL; + char *brkt = NULL; + char *sep = "\n"; + + + + if (create_radix_tree(mp, rtree, error_msg)) + { + return -1; + } + + /* Retrieve the beacon string */ + beacon_str_len = msc_beacon_string(NULL, 0); + + beacon_str = malloc(sizeof(char) * beacon_str_len + 1); + if (beacon_str == NULL) { + beacon_str = "Failed to retrieve beacon string"; + beacon_apr = apr_psprintf(mp, "ModSec-status: %s", beacon_str); + } + else + { + msc_beacon_string(beacon_str, beacon_str_len); + beacon_apr = apr_psprintf(mp, "ModSec-status: %s", beacon_str); + free(beacon_str); + } + + memset(id, '\0', sizeof(id)); + if (msc_status_engine_unique_id(id)) { + sprintf(id, "no unique id"); + } + + apr_id = apr_psprintf(mp, "ModSec-unique-id: %s", id); + curl_global_init(CURL_GLOBAL_ALL); + curl = curl_easy_init(); + + if (curl) { + struct curl_slist *headers_chunk = NULL; + curl_easy_setopt(curl, CURLOPT_URL, uri); + + headers_chunk = curl_slist_append(headers_chunk, apr_id); + headers_chunk = curl_slist_append(headers_chunk, beacon_apr); + + /* send all data to this function */ + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, msc_curl_write_memory_cb); + + /* we pass our 'chunk' struct to the callback function */ + curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&chunk); + + /* some servers don't like requests that are made without a user-agent + field, so we provide one */ + curl_easy_setopt(curl, CURLOPT_USERAGENT, "ModSecurity"); + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers_chunk); + + res = curl_easy_perform(curl); + + if (res != CURLE_OK) + { + *error_msg = apr_psprintf(mp, "Failed to fetch \"%s\" error: %s ", uri, curl_easy_strerror(res)); + return -1; + } + + curl_easy_cleanup(curl); + curl_slist_free_all(headers_chunk); + } + curl_global_cleanup(); + + for (word = strtok_r(chunk.memory, sep, &brkt); + word; + word = strtok_r(NULL, sep, &brkt)) + { + int i = 0; + line++; + + /* Ignore empty lines and comments */ + if (*word == '#') continue; + + for (i = 0; i < strlen(word); i++) + { + if (apr_isxdigit(word[i]) || word[i] == '.' || word[i] == '/' || word[i] == ':' || word[i] == '\n') + { + continue; + } + + *error_msg = apr_psprintf(mp, "Invalid char \"%c\" in line %d " \ + "of uri %s", *end, line, uri); + return -1; + } + + if (strchr(word, ':') == NULL) + { + tnode = TreeAddIP(word, (*rtree)->ipv4_tree, IPV4_TREE); + } +#if APR_HAVE_IPV6 + else + { + tnode = TreeAddIP(word, (*rtree)->ipv6_tree, IPV6_TREE); + } +#endif + + if (tnode == NULL) + { + *error_msg = apr_psprintf(mp, "Could not add entry " \ + "\"%s\" in line %d of file %s to IP list", word, line, uri); + return -1; + } + + } + return 0; +} + + int tree_contains_ip(apr_pool_t *mp, TreeRoot *rtree, const char *value, modsec_rec *msr, char **error_msg) { @@ -2691,3 +2828,22 @@ int ip_tree_from_param(apr_pool_t *mp, return 0; } +size_t msc_curl_write_memory_cb(void *contents, size_t size, + size_t nmemb, void *userp) +{ + size_t realsize = size * nmemb; + struct msc_curl_memory_buffer_t *mem = (struct msc_curl_memory_buffer_t *)userp; + + mem->memory = realloc(mem->memory, mem->size + realsize + 1); + if(mem->memory == NULL) { + /* out of memory! */ + return 0; + } + + memcpy(&(mem->memory[mem->size]), contents, realsize); + mem->size += realsize; + mem->memory[mem->size] = 0; + + return realsize; +} + diff --git a/apache2/msc_util.h b/apache2/msc_util.h index 2ef30e1d..de2c3a87 100644 --- a/apache2/msc_util.h +++ b/apache2/msc_util.h @@ -161,4 +161,13 @@ int DSOLOCAL ip_tree_from_param(apr_pool_t *pool, int read_line(char *buff, int size, FILE *fp); +size_t msc_curl_write_memory_cb(void *contents, size_t size, + size_t nmemb, void *userp); + +struct msc_curl_memory_buffer_t +{ + char *memory; + size_t size; +}; + #endif diff --git a/apache2/re_operators.c b/apache2/re_operators.c index 196c9a03..c309e9b5 100644 --- a/apache2/re_operators.c +++ b/apache2/re_operators.c @@ -22,6 +22,8 @@ #include "msc_util.h" #include "msc_tree.h" #include "msc_crypt.h" +#include "curl/curl.h" +#include #if APR_HAVE_ARPA_INET_H #include @@ -33,6 +35,7 @@ #include "libinjection/libinjection.h" + /** * */ @@ -192,17 +195,29 @@ static int msre_op_ipmatchFromFile_param_init(msre_rule *rule, char **error_msg) } filepath = fn; - ipfile_path = apr_pstrndup(rule->ruleset->mp, rule->filename, - strlen(rule->filename) - strlen(apr_filepath_name_get(rule->filename))); - if (apr_filepath_root(&rootpath, &filepath, APR_FILEPATH_TRUENAME, - rule->ruleset->mp) != APR_SUCCESS) { - apr_filepath_merge(&fn, ipfile_path, fn, APR_FILEPATH_TRUENAME, rule->ruleset->mp); - } - - res = ip_tree_from_file(&rtree, fn, rule->ruleset->mp, error_msg); - if (res) + if ((strlen(fn) > strlen("http://") && strncmp(fn, "http://", strlen("http://")) == 0) || + (strlen(fn) > strlen("https://") && strncmp(fn, "https://", strlen("https://")) == 0)) { - return 0; + res = ip_tree_from_uri(&rtree, fn, rule->ruleset->mp, error_msg); + if (res) + { + return 0; + } + } + else + { + ipfile_path = apr_pstrndup(rule->ruleset->mp, rule->filename, + strlen(rule->filename) - strlen(apr_filepath_name_get(rule->filename))); + if (apr_filepath_root(&rootpath, &filepath, APR_FILEPATH_TRUENAME, + rule->ruleset->mp) != APR_SUCCESS) { + apr_filepath_merge(&fn, ipfile_path, fn, APR_FILEPATH_TRUENAME, rule->ruleset->mp); + } + + res = ip_tree_from_file(&rtree, fn, rule->ruleset->mp, error_msg); + if (res) + { + return 0; + } } rule->op_param_data = rtree; @@ -1235,61 +1250,152 @@ static int msre_op_pmFromFile_param_init(msre_rule *rule, char **error_msg) { /* Add path of the rule filename for a relative phrase filename */ filepath = fn; - if (apr_filepath_root(&rootpath, &filepath, APR_FILEPATH_TRUENAME, rule->ruleset->mp) != APR_SUCCESS) { - /* We are not an absolute path. It could mean an error, but - * let that pass through to the open call for a better error */ - apr_filepath_merge(&fn, rulefile_path, fn, APR_FILEPATH_TRUENAME, rule->ruleset->mp); + + if ((strlen(fn) > strlen("http://") && strncmp(fn, "http://", strlen("http://")) == 0) || + (strlen(fn) > strlen("https://") && strncmp(fn, "https://", strlen("https://")) == 0)) + { + + CURL *curl; + CURLcode res; + + char id[(APR_SHA1_DIGESTSIZE*2) + 1]; + char *apr_id = NULL; + char *beacon_str = NULL; + int beacon_str_len = 0; + char *beacon_apr = NULL; + + struct msc_curl_memory_buffer_t chunk; + char *word = NULL; + char *brkt = NULL; + char *sep = "\n"; + + /* Retrieve the beacon string */ + beacon_str_len = msc_beacon_string(NULL, 0); + + beacon_str = malloc(sizeof(char) * beacon_str_len + 1); + if (beacon_str == NULL) { + beacon_str = "Failed to retrieve beacon string"; + beacon_apr = apr_psprintf(rule->ruleset->mp, "ModSec-status: %s", beacon_str); + } + else + { + msc_beacon_string(beacon_str, beacon_str_len); + beacon_apr = apr_psprintf(rule->ruleset->mp, "ModSec-status: %s", beacon_str); + free(beacon_str); + } + + memset(id, '\0', sizeof(id)); + if (msc_status_engine_unique_id(id)) { + sprintf(id, "no unique id"); + } + + apr_id = apr_psprintf(rule->ruleset->mp, "ModSec-unique-id: %s", id); + + chunk.memory = malloc(1); /* will be grown as needed by the realloc above */ + chunk.size = 0; /* no data at this point */ + curl_global_init(CURL_GLOBAL_ALL); + curl = curl_easy_init(); + + if (curl) { + struct curl_slist *headers_chunk = NULL; + curl_easy_setopt(curl, CURLOPT_URL, fn); + + headers_chunk = curl_slist_append(headers_chunk, apr_id); + headers_chunk = curl_slist_append(headers_chunk, beacon_apr); + + /* send all data to this function */ + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, msc_curl_write_memory_cb); + + /* we pass our 'chunk' struct to the callback function */ + curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&chunk); + + /* some servers don't like requests that are made without a user-agent + field, so we provide one */ + curl_easy_setopt(curl, CURLOPT_USERAGENT, "ModSecurity"); + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers_chunk); + + res = curl_easy_perform(curl); + + if (res != CURLE_OK) + fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(res)); + + curl_easy_cleanup(curl); + curl_slist_free_all(headers_chunk); + } + curl_global_cleanup(); + + for (word = strtok_r(chunk.memory, sep, &brkt); + word; + word = strtok_r(NULL, sep, &brkt)) + { + /* Ignore empty lines and comments */ + if (*word == '#') continue; + + acmp_add_pattern(p, word, NULL, NULL, strlen(word)); + + } } + else + { + if (apr_filepath_root(&rootpath, &filepath, APR_FILEPATH_TRUENAME, rule->ruleset->mp) != APR_SUCCESS) { + /* We are not an absolute path. It could mean an error, but + * let that pass through to the open call for a better error */ + apr_filepath_merge(&fn, rulefile_path, fn, APR_FILEPATH_TRUENAME, rule->ruleset->mp); + } - /* Open file and read */ - rc = apr_file_open(&fd, fn, APR_READ | APR_BUFFERED | APR_FILE_NOCLEANUP, 0, rule->ruleset->mp); - if (rc != APR_SUCCESS) { - *error_msg = apr_psprintf(rule->ruleset->mp, "Could not open phrase file \"%s\": %s", fn, apr_strerror(rc, errstr, 1024)); - return 0; - } - - #ifdef DEBUG_CONF - fprintf(stderr, "Loading phrase file: \"%s\"\n", fn); - #endif - - /* Read one pattern per line skipping empty/commented */ - for(;;) { - line++; - rc = apr_file_gets(buf, HUGE_STRING_LEN, fd); - if (rc == APR_EOF) break; + /* Open file and read */ + rc = apr_file_open(&fd, fn, APR_READ | APR_BUFFERED | APR_FILE_NOCLEANUP, 0, rule->ruleset->mp); if (rc != APR_SUCCESS) { - *error_msg = apr_psprintf(rule->ruleset->mp, "Could not read \"%s\" line %d: %s", fn, line, apr_strerror(rc, errstr, 1024)); + *error_msg = apr_psprintf(rule->ruleset->mp, "Could not open phrase file \"%s\": %s", fn, apr_strerror(rc, errstr, 1024)); return 0; } - op_len = strlen(buf); - processed = apr_pstrdup(rule->ruleset->mp, parse_pm_content(buf, op_len, rule, error_msg)); + #ifdef DEBUG_CONF + fprintf(stderr, "Loading phrase file: \"%s\"\n", fn); + #endif - /* Trim Whitespace */ - if(processed != NULL) - start = processed; - else - start = buf; + /* Read one pattern per line skipping empty/commented */ + for(;;) { + line++; + rc = apr_file_gets(buf, HUGE_STRING_LEN, fd); + if (rc == APR_EOF) break; + if (rc != APR_SUCCESS) { + *error_msg = apr_psprintf(rule->ruleset->mp, "Could not read \"%s\" line %d: %s", fn, line, apr_strerror(rc, errstr, 1024)); + return 0; + } - while ((apr_isspace(*start) != 0) && (*start != '\0')) start++; - if(processed != NULL) - end = processed + strlen(processed); - else - end = buf + strlen(buf); - if (end > start) end--; - while ((end > start) && (apr_isspace(*end) != 0)) end--; - if (end > start) { - *(++end) = '\0'; + op_len = strlen(buf); + processed = apr_pstrdup(rule->ruleset->mp, parse_pm_content(buf, op_len, rule, error_msg)); + + /* Trim Whitespace */ + if(processed != NULL) + start = processed; + else + start = buf; + + while ((apr_isspace(*start) != 0) && (*start != '\0')) start++; + if(processed != NULL) + end = processed + strlen(processed); + else + end = buf + strlen(buf); + if (end > start) end--; + while ((end > start) && (apr_isspace(*end) != 0)) end--; + if (end > start) { + *(++end) = '\0'; + } + + /* Ignore empty lines and comments */ + if ((start == end) || (*start == '#')) continue; + + acmp_add_pattern(p, start, NULL, NULL, (end - start)); } - - /* Ignore empty lines and comments */ - if ((start == end) || (*start == '#')) continue; - - acmp_add_pattern(p, start, NULL, NULL, (end - start)); } + fn = next; + + if (fd != NULL) apr_file_close(fd); } - if (fd != NULL) apr_file_close(fd); + acmp_prepare(p); rule->op_param_data = p; return 1; diff --git a/tests/Makefile.am b/tests/Makefile.am index 08741124..2c97caec 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -5,6 +5,7 @@ msc_test_SOURCES = msc_test.c \ ../apache2/libinjection/libinjection_sqli.c \ ../apache2/libinjection/libinjection_xss.c \ ../apache2/modsecurity.c \ + ../apache2/msc_status_engine.c \ ../apache2/msc_crypt.c \ ../apache2/msc_geo.c \ ../apache2/msc_gsb.c \ diff --git a/tests/regression/misc/30-pmfromfile.t b/tests/regression/misc/30-pmfromfile.t new file mode 100644 index 00000000..a39ed107 --- /dev/null +++ b/tests/regression/misc/30-pmfromfile.t @@ -0,0 +1,29 @@ +### pmfromfile external resource + +{ + type => "misc", + comment => "pmfromfile", + conf => qq( + SecRuleEngine On + SecDebugLog $ENV{DEBUG_LOG} + SecDebugLogLevel 9 + SecRequestBodyAccess On + SecRule REQUEST_FILENAME "\@pmFromFile http://modsec.zimmerle.org/ip_reputation.txt?code=123" "id:'123',phase:2,log,pass,t:none" + ), + match_log => { + error => [ qr/ModSecurity: Warning. Matched phrase \"127.0.0.1\" at REQUEST_FILENAME./, 1], + debug => [ qr/Matched phrase \"127.0.0.1\" at REQUEST_FILENAME/, 1 ], + }, + match_response => { + status => qr/^404$/, + }, + request => new HTTP::Request( + POST => "http://$ENV{SERVER_NAME}:$ENV{SERVER_PORT}/127.0.0.1.html", + [ + "Content-Type" => "application/x-www-form-urlencoded", + ], + # Args + "some_variable=-1' and 1=1 union/* foo */select load_file('/etc/passwd')--" + ), +}, +