openappsec/components/security_apps/waap/waap_clib/Waf2Util.cc

// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.

// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "Waf2Util.h"

#include "debug.h"
#include <map>
#include <string>
#include <sstream>
#include <vector>
#include <openssl/aes.h>
#include <boost/algorithm/string/case_conv.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <sys/stat.h>
#include <stdio.h>
#include <locale.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>

#include "CidrMatch.h"
#include "debug.h"
#include "config.h"
#include "generic_rulebase/rulebase_config.h"
#include "user_identifiers_config.h"
#include "Waf2Regex.h"

using boost::algorithm::to_lower_copy;
using namespace std;

USE_DEBUG_FLAG(D_WAAP);
USE_DEBUG_FLAG(D_WAAP_EVASIONS);
USE_DEBUG_FLAG(D_WAAP_BASE64);
USE_DEBUG_FLAG(D_WAAP_JSON);

#define MIN_HEX_LENGTH 6
#define charToDigit(c) (c - '0')

// See https://dev.w3.org/html5/html-author/charref
const  struct HtmlEntity g_htmlEntities[] =
{
    {"Tab;", 0x0009},
    {"NewLine;", 0x000A},
    {"nbsp;", 0x00A0},
    {"NonBreakingSpace;", 0x00A0},
    {"excl;", 0x0021},
    {"num;", 0x0023},
    {"dollar;", 0x0024},
    {"percnt;", 0x0025},
    {"lpar;", 0x0028},
    {"rpar;", 0x0029},
    {"ast;", 0x002A},
    {"midast;", 0x002A},
    {"plus;", 0x002B},
    {"comma;", 0x002C},
    {"period;", 0x002E},
    {"sol;", 0x002F},
    {"colon;", 0x003A},
    {"semi;", 0x003B},
    {"iexcl;", 0x00A1},
    {"cent;", 0x00A2},
    {"pound;", 0x00A3},
    {"curren;", 0x00A4},
    {"yen;", 0x00A5},
    {"brvbar;", 0x00A6},
    {"sect;", 0x00A7},
    {"uml;", 0x00A8},
    {"copy;", 0x00A9},
    {"ordf;", 0x00AA},
    {"laquo;", 0x00AB},
    {"not;", 0x00AC},
    {"shy;", 0x00AD},
    {"reg;", 0x00AE},
    {"macr;", 0x00AF},
    {"deg;", 0x00B0},
    {"plusmn;", 0x00B1},
    {"sup2;", 0x00B2},
    {"sup3;", 0x00B3},
    {"acute;", 0x00B4},
    {"micro;", 0x00B5},
    {"para;", 0x00B6},
    {"middot;", 0x00B7},
    {"cedil;", 0x00B8},
    {"sup1;", 0x00B9},
    {"ordm;", 0x00BA},
    {"raquo;", 0x00BB},
    {"frac14;", 0x00BC},
    {"frac12;", 0x00BD},
    {"frac34;", 0x00BE},
    {"iquest;", 0x00BF},
    {"Agrave;", 0x00C0},
    {"Aacute;", 0x00C1},
    {"Acirc;", 0x00C2},
    {"Atilde;", 0x00C3},
    {"Auml;", 0x00C4},
    {"Aring;", 0x00C5},
    {"AElig;", 0x00C6},
    {"Ccedil;", 0x00C7},
    {"Egrave;", 0x00C8},
    {"Eacute;", 0x00C9},
    {"Ecirc;", 0x00CA},
    {"Euml;", 0x00CB},
    {"Igrave;", 0x00CC},
    {"Iacute;", 0x00CD},
    {"Icirc;", 0x00CE},
    {"Iuml;", 0x00CF},
    {"ETH;", 0x00D0},
    {"Ntilde;", 0x00D1},
    {"Ograve;", 0x00D2},
    {"Oacute;", 0x00D3},
    {"Ocirc;", 0x00D4},
    {"Otilde;", 0x00D5},
    {"Ouml;", 0x00D6},
    {"times;", 0x00D7},
    {"Oslash;", 0x00D8},
    {"Ugrave;", 0x00D9},
    {"Uacute;", 0x00DA},
    {"Ucirc;", 0x00DB},
    {"Uuml;", 0x00DC},
    {"Yacute;", 0x00DD},
    {"THORN;", 0x00DE},
    {"szlig;", 0x00DF},
    {"agrave;", 0x00E0},
    {"aacute;", 0x00E1},
    {"acirc;", 0x00E2},
    {"atilde;", 0x00E3},
    {"auml;", 0x00E4},
    {"aring;", 0x00E5},
    {"aelig;", 0x00E6},
    {"ccedil;", 0x00E7},
    {"egrave;", 0x00E8},
    {"eacute;", 0x00E9},
    {"ecirc;", 0x00EA},
    {"euml;", 0x00EB},
    {"igrave;", 0x00EC},
    {"iacute;", 0x00ED},
    {"icirc;", 0x00EE},
    {"iuml;", 0x00EF},
    {"eth;", 0x00F0},
    {"ntilde;", 0x00F1},
    {"ograve;", 0x00F2},
    {"oacute;", 0x00F3},
    {"ocirc;", 0x00F4},
    {"otilde;", 0x00F5},
    {"ouml;", 0x00F6},
    {"divide;", 0x00F7},
    {"oslash;", 0x00F8},
    {"ugrave;", 0x00F9},
    {"uacute;", 0x00FA},
    {"ucirc;", 0x00FB},
    {"uuml;", 0x00FC},
    {"yacute;", 0x00FD},
    {"thorn;", 0x00FE},
    {"yuml;", 0x00FF},
    {"quot;", 0x0022},
    {"amp;", 0x0026},
    {"lt;", 0x003C},
    {"LT;", 0x003C},
    {"equals;", 0x003D},
    {"gt;", 0x003E},
    {"GT;", 0x003E},
    {"quest;", 0x003F},
    {"commat;", 0x0040},
    {"lsqb;", 0x005B},
    {"lback;", 0x005B},
    {"bsol;", 0x005C},
    {"rsqb;", 0x005D},
    {"rbrack;", 0x005D},
    {"Hat;", 0x005E},
    {"lowbar;", 0x005F},
    {"grave;", 0x0060},
    {"DiacriticalGrave;", 0x0060},
    {"lcub;", 0x007B},
    {"lbrace;", 0x007B},
    {"verbar;", 0x007C},
    {"vert;", 0x007C},
    {"VerticalLine;", 0x007C},
    {"rcub;", 0x007D},
    {"rbrace;", 0x007D},
    {"apos;", 0x0027},
    {"OElig;", 0x0152},
    {"oelig;", 0x0153},
    {"Scaron;", 0x0160},
    {"scaron;", 0x0161},
    {"Yuml;", 0x0178},
    {"circ;", 0x02C6},
    {"tilde;", 0x02DC},
    {"ensp;", 0x2002},
    {"emsp;", 0x2003},
    {"emsp13;", 0x2004},
    {"emsp14;", 0x2005},
    {"numsp;", 0x2007},
    {"puncsp;", 0x2008},
    {"thinsp;", 0x2009},
    {"ThinSpace;", 0x2009},
    {"hairsp;", 0x200A},
    {"VeryThinSpace;", 0x200A},
    {"ZeroWidthSpace;", 0x200B},
    {"NegativeVeryThinSpace;", 0x200B},
    {"NegativeThinSpace;", 0x200B},
    {"NegativeMediumSpace;", 0x200B},
    {"NegativeThickSpace;", 0x200B},
    {"zwnj;", 0x200C},
    {"zwj;", 0x200D},
    {"lrm;", 0x200E},
    {"rlm;", 0x200F},
    {"hyphen;", 0x2010},
    {"dash;", 0x2010},
    {"ndash;", 0x2013},
    {"mdash;", 0x2014},
    {"horbar;", 0x2015},
    {"Verbar;", 0x2016},
    {"Vert;", 0x2016},
    {"lsquo;", 0x2018},
    {"OpenCurlyQuote;", 0x2018},
    {"rsquo;", 0x2019},
    {"rsquor;", 0x2019},
    {"CloseCurlyQuote;", 0x2019},
    {"lsquor;", 0x201A},
    {"sbquo;", 0x201A},
    {"ldquo;", 0x201C},
    {"OpenCurlyDoubleQuote;", 0x201C},
    {"rdquo;", 0x201D},
    {"rdquor;", 0x201D},
    {"CloseCurlyDoubleQuote;", 0x201D},
    {"ldquor;", 0x201E},
    {"bdquo;", 0x201E},
    {"dagger;", 0x2020},
    {"Dagger;", 0x2021},
    {"permil;", 0x2030},
    {"lsaquo;", 0x2039},
    {"rsaquo;", 0x203A},
    {"euro;", 0x20AC},
    {"fnof;", 0x0192},
    {"Alpha;", 0x0391},
    {"Beta;", 0x0392},
    {"Gamma;", 0x0393},
    {"Delta;", 0x0394},
    {"Epsilon;", 0x0395},
    {"Zeta;", 0x0396},
    {"Eta;", 0x0397},
    {"Theta;", 0x0398},
    {"Iota;", 0x0399},
    {"Kappa;", 0x039A},
    {"Lambda;", 0x039B},
    {"Mu;", 0x039C},
    {"Nu;", 0x039D},
    {"Xi;", 0x039E},
    {"Omicron;", 0x039F},
    {"Pi;", 0x03A0},
    {"Rho;", 0x03A1},
    {"Sigma;", 0x03A3},
    {"Tau;", 0x03A4},
    {"Upsilon;", 0x03A5},
    {"Phi;", 0x03A6},
    {"Chi;", 0x03A7},
    {"Psi;", 0x03A8},
    {"Omega;", 0x03A9},
    {"alpha;", 0x03B1},
    {"beta;", 0x03B2},
    {"gamma;", 0x03B3},
    {"delta;", 0x03B4},
    {"epsilon;", 0x03B5},
    {"zeta;", 0x03B6},
    {"eta;", 0x03B7},
    {"theta;", 0x03B8},
    {"iota;", 0x03B9},
    {"kappa;", 0x03BA},
    {"lambda;", 0x03BB},
    {"mu;", 0x03BC},
    {"nu;", 0x03BD},
    {"xi;", 0x03BE},
    {"omicron;", 0x03BF},
    {"pi;", 0x03C0},
    {"rho;", 0x03C1},
    {"sigmaf;", 0x03C2},
    {"sigma;", 0x03C3},
    {"tau;", 0x03C4},
    {"upsilon;", 0x03C5},
    {"phi;", 0x03C6},
    {"chi;", 0x03C7},
    {"psi;", 0x03C8},
    {"omega;", 0x03C9},
    {"thetasym;", 0x03D1},
    {"upsih;", 0x03D2},
    {"piv;", 0x03D6},
    {"bull;", 0x2022},
    {"hellip;", 0x2026},
    {"prime;", 0x2032},
    {"Prime;", 0x2033},
    {"oline;", 0x203E},
    {"frasl;", 0x2044},
    {"MediumSpace;", 0x205F},
    {"NoBreak;", 0x2060},
    {"ApplyFunction;", 2061},
    {"af;", 2061},
    {"it;", 0x2062},
    {"InvisibleTimes;", 0x2062},
    {"ic;", 0x2063},
    {"InvisibleComma;", 0x2063},
    {"weierp;", 0x2118},
    {"image;", 0x2111},
    {"real;", 0x211C},
    {"trade;", 0x2122},
    {"alefsym;", 0x2135},
    {"larr;", 0x2190},
    {"uarr;", 0x2191},
    {"rarr;", 0x2192},
    {"darr;", 0x2193},
    {"harr;", 0x2194},
    {"crarr;", 0x21B5},
    {"lArr;", 0x21D0},
    {"uArr;", 0x21D1},
    {"rArr;", 0x21D2},
    {"dArr;", 0x21D3},
    {"hArr;", 0x21D4},
    {"forall;", 0x2200},
    {"part;", 0x2202},
    {"exist;", 0x2203},
    {"empty;", 0x2205},
    {"nabla;", 0x2207},
    {"isin;", 0x2208},
    {"notin;", 0x2209},
    {"ni;", 0x220B},
    {"prod;", 0x220F},
    {"sum;", 0x2211},
    {"minus;", 0x2212},
    {"lowast;", 0x2217},
    {"radic;", 0x221A},
    {"prop;", 0x221D},
    {"infin;", 0x221E},
    {"ang;", 0x2220},
    {"and;", 0x2227},
    {"or;", 0x2228},
    {"cap;", 0x2229},
    {"cup;", 0x222A},
    {"int;", 0x222B},
    {"there4;", 0x2234},
    {"sim;", 0x223C},
    {"cong;", 0x2245},
    {"asymp;", 0x2248},
    {"ne;", 0x2260},
    {"equiv;", 0x2261},
    {"le;", 0x2264},
    {"ge;", 0x2265},
    {"sub;", 0x2282},
    {"sup;", 0x2283},
    {"nsub;", 0x2284},
    {"sube;", 0x2286},
    {"supe;", 0x2287},
    {"oplus;", 0x2295},
    {"otimes;", 0x2297},
    {"perp;", 0x22A5},
    {"sdot;", 0x22C5},
    {"lceil;", 0x2308},
    {"rceil;", 0x2309},
    {"lfloor;", 0x230A},
    {"rfloor;", 0x230B},
    {"lang;", 0x2329},
    {"rang;", 0x232A},
    {"loz;", 0x25CA},
    {"spades;", 0x2660},
    {"clubs;", 0x2663},
    {"hearts;", 0x2665},
    {"diams;", 0x2666}
};

const size_t g_htmlEntitiesCount = sizeof(g_htmlEntities) / sizeof(g_htmlEntities[0]);

const char* g_htmlTags[] = {
    "a",
    "abbr",
    "acronym",
    "address",
    "applet",
    "embed",
    "object",
    "area",
    "article",
    "aside",
    "audio",
    "b",
    "base",
    "basefont",
    "bdi",
    "bdo",
    "big",
    "blockquote",
    "body",
    "br",
    "button",
    "canvas",
    "caption",
    "center",
    "cite",
    "code",
    "col",
    "colgroup",
    "datalist",
    "dd",
    "del",
    "details",
    "dfn",
    "dialog",
    "dir",
    "ul",
    "div",
    "dl",
    "dt",
    "em",
    "fieldset",
    "figcaption",
    "figure",
    "font",
    "footer",
    "form",
    "frame",
    "frameset",
    "h1",
    "h6",
    "head",
    "header",
    "hr",
    "html",
    "i",
    "iframe",
    "img",
    "input",
    "ins",
    "kbd",
    "keygen",
    "label",
    "legend",
    "li",
    "link",
    "main",
    "map",
    "mark",
    "menu",
    "menuitem",
    "meta",
    "meter",
    "nav",
    "noframes",
    "noscript",
    "ol",
    "optgroup",
    "option",
    "output",
    "p",
    "param",
    "pre",
    "progress",
    "q",
    "rp",
    "rt",
    "ruby",
    "s",
    "samp",
    "script",
    "section",
    "select",
    "small",
    "source",
    "video",
    "span",
    "strike",
    "strong",
    "style",
    "sub",
    "summary",
    "sup",
    "table",
    "tbody",
    "td",
    "textarea",
    "tfoot",
    "th",
    "thead",
    "time",
    "title",
    "tr",
    "track",
    "tt",
    "u",
    "var",
    "wbr",
    "event-source",
    "math",
    "svg",
    "h1",
    "h2",
    "h3",
    "h4",
    "h5",
    "h6"
};

static const string b64_prefix("base64,");

const size_t g_htmlTagsCount = sizeof(g_htmlTags) / sizeof(g_htmlTags[0]);

bool startsWithHtmlTagName(const char* text) {
    for (size_t index = 0; index < g_htmlTagsCount; ++index) {
        // Return true if text starts with one of html tags
        if (my_stristarts_with(text, g_htmlTags[index])) {
            // starts with html tag, followed by space/tab/crlf character (see man isspace(),
            // or ends with '>' character.
            char termChar = text[strlen(g_htmlTags[index])];
            if (isspace(termChar) || termChar == '>' || termChar == '/') {
                return true;
            }
        }
    }

    return false;
}

string normalize_uri(const string& uri) {
    string result;
    string::const_iterator mark = uri.begin();
    bool isNumeric = false;

    string::const_iterator it = uri.begin();
    for (; it != uri.end() && *it != '?'; ++it) {
        if (*it == '/') {
            if (mark != it) {
                if (isNumeric) {
                    result += "_num";
                }
                else {
                    result.append(mark, it);
                }
            }

            result += "/";
            mark = it + 1;
            isNumeric = true;
            continue;
        }

        // reset isNumeric flag on first non-digit character in the path element string
        if (!isdigit(*it)) {
            isNumeric = false;
        }
    }

    // At this point, "it" points to where scanning stopped (can be end of uri string or the '?' character)
    // Append the rest of the string (or "_num" if last uri part was all numeric) - to the output.
    if (mark != it) {
        if (isNumeric) {
            result += "_num";
        }
        else {
            result.append(mark, it);
        }
    }

    return result;
}

string
normalize_param(const string& param)
{
    string result;
    string::const_iterator mark = param.begin();
    bool isNumeric = true;
    bool isHex = true;

    string::const_iterator it = param.begin();
    for (; it != param.end(); ++it) {
        if (!isalnum(*it)) {
            if (mark != it) {
                if (isNumeric || (isHex && it - mark >= MIN_HEX_LENGTH)) {
                    result += "_num";
                }
                else {
                    result.append(mark, it);
                }
            }

            result += *it;
            mark = it + 1;
            isNumeric = true;
            isHex = true;
            continue;
        }

        // reset isNumeric flag on first non-digit character in the path element string
        if (isHex && !isdigit(*it)) {
            if (!isHexDigit(*it)) {
                isHex = false;
            }
            isNumeric = false;
        }
    }

    // At this point, "it" points to where scanning stopped (can be end of uri string or the '?' character)
    // Append the rest of the string (or "_num" if last uri part was all numeric) - to the output.
    if (mark != it) {
        if (isNumeric || (isHex && it - mark >= MIN_HEX_LENGTH)) {
            result += "_num";
        }
        else {
            result.append(mark, it);
        }
    }

    return result;
}

void unescapeUnicode(string& text) {
    string::iterator it = text.begin();
    string::iterator result = it;
    char acc[16];   // accumulates characters we are parsing and do not want to copy directly.
                    // max len really possible is "\u00000000" + 1 char = 11 chars
    char* pAcc = NULL; // when non-NULL, points where to put next character inside acc buffer
    int digitsAnticipated = 0; // in state STATE_ESCAPE, how many hex digits we anticipate to be parsed
    uint32_t code = 0; // The Unicode codepoint value can't be larger than 32 bits
    char* p;
    // in state STATE_ESCAPE_X, how many non-zerohex digits discovered - to eliminate leading zeroes like \x000012
    int nonZeroHexCounter = 0;
    enum {
        STATE_COPY,
        STATE_FLUSH,
        STATE_ESCAPE,
        STATE_ESCAPE_U,
        STATE_ESCAPE_X
    } state = STATE_COPY;

    for (; it != text.end(); ++it) {
        const char ch = *it;

        switch (state) {
        case STATE_FLUSH: {
            // flush any accumulated left-overs into output buffer
            if (pAcc) {
                for (p = acc; p < pAcc; p++) {
                    *result++ = *p;
                }
                pAcc = NULL; // clear the acc buffer after we flushed it
            }
            state = STATE_COPY;
            // fall-through
            //RB: why no break?
        }
        // fallthrough
        case STATE_COPY: {

            if (ch == '\\') {
                // start accumulating characters instead of copying them
                pAcc = acc;
                state = STATE_ESCAPE;
                break;
            }
            break;
        }
        case STATE_ESCAPE: {
            // decide which kind of escape
            if (ch == 'u') {
                digitsAnticipated = 4; // parse/skip 4 hex digits
                code = 0;
                state = STATE_ESCAPE_U;
            }
            else if (ch == 'U') {
                digitsAnticipated = 8; // parse/skip 8 hex digits
                code = 0;
                state = STATE_ESCAPE_U;
            }
            else if (ch == 'x') {
#if 1
                digitsAnticipated = 1; // anticipate at least one HEX digit after \x
                code = 0;
                nonZeroHexCounter = 0;
                state = STATE_ESCAPE_X;
#else
                digitsAnticipated = 2; // parse/skip 2 hex digits
                code = 0;
                state = STATE_ESCAPE_U;
#endif
            }
            else {
                // this is invalid escape sequence: rollback and copy this character too
                state = STATE_FLUSH;
            }
            break;
        }
        case STATE_ESCAPE_U: {
            if (isHexDigit(ch)) {
                // accumulate code value
                code = (code << 4) + (isdigit(ch) ? ch - '0' : tolower(ch) - 'a' + 10);
                digitsAnticipated--;

                if (digitsAnticipated == 0) {
                    // only output ASCII codes <= 127. "swallow" all unicode.
                    if (code <= 127) {
                        *result++ = (char)code;
                    }
                    else if (isSpecialUnicode(code)) {
                        *result++ = convertSpecialUnicode(code);
                    }

                    if (pAcc) {
                        pAcc = NULL; // throw away the accumulated source (escaped) sequencec.
                    }

                    // not STATE_COPY to avoid outputting current ch verbatim.
                    // FLUSH will output nothing because there's no ACC
                    state = STATE_FLUSH;
                    break;
                }

            }
            else {
                // invalid (non-hex) digit enountered
                state = STATE_FLUSH;
            }

            break;
        }
        case STATE_ESCAPE_X: {
            if (isHexDigit(ch)) {
                if ((nonZeroHexCounter) > 1) {
                    *result++ = (char)code;
                    if (pAcc) {
                        pAcc = NULL; // throw away the accumulated source (escaped) sequence.
                    }
                    state = STATE_COPY;
                } else {
                    code = (code << 4) + (isdigit(ch) ? ch - '0' : tolower(ch) - 'a' + 10);
                    code &= 0xFF; // clamp the code value to two last digits
                    // Once at least one valid hex digit is here the sequence is considered
                    // valid and there's no need to accumulate it anymore.
                    if (pAcc) {
                        pAcc = NULL;
                    }

                    if (digitsAnticipated > 0) {
                        digitsAnticipated--;
                    }
                    if (code) {
                        nonZeroHexCounter++;
                    }
                }
            } else {
                // According to C standard, '\x' sequence must be followed by at least 1 valid hex digit
                if (digitsAnticipated > 0) {
                    // This is first character right after the '\x' sequence,
                    // and it is not a valid hex. This is bad sequence.
                    state = STATE_FLUSH;
                } else {
                    // We found non-hex character that terminates our \xhhhhh... sequence
                    *result++ = (char)code;
                    if (pAcc) {
                        pAcc = NULL; // throw away the accumulated source (escaped) sequence.
                    }

                    if (ch == '\\') {
                        // start accumulating characters instead of copying them
                        pAcc = acc;
                        state = STATE_ESCAPE;
                        break;
                    }

                    // STATE_COPY will cause current character (sequence terminator)
                    // to be output verbatim.
                    state = STATE_COPY;
                }
            }

            if (digitsAnticipated > 0) {
                digitsAnticipated--;
            }
            break;
        }
        }

        // Copy to output
        if (state == STATE_COPY) {
            *result++ = ch;
        }

        // Accumulate
        if (pAcc) {
            // Ensure we don't have buffer overflow
            assert(size_t(pAcc - acc) < sizeof(acc));
            *pAcc++ = ch;
        }
    }

    dbgTrace(D_WAAP) << " - LOOP FINISHED with state=" << state << "; digitsAnticipated=" <<
        digitsAnticipated << ", acc='" << string(acc, pAcc ? (int)(pAcc - acc) : 0) << "'";

    // Output code if we just finished decoding an escape sequence succesully and reached end of string
    if (state == STATE_ESCAPE_U && digitsAnticipated == 0) {
        // only output ASCII codes <= 127. "swallow" all unicode.
        if (code <= 127) {
            *result++ = (char)code;
        }
        else if (isSpecialUnicode(code)) {
            *result++ = convertSpecialUnicode(code);
        }

        if (pAcc) {
            pAcc = NULL; // throw away the accumulated source (escaped) sequencec.
        }
    }
    else if (state == STATE_ESCAPE_X) {
        if (isSpecialUnicode(code)) {
            *result++ = convertSpecialUnicode(code);
        }
        else
        {
            *result++ = (char)code;
        }
    }

    // flush any accumulated left-overs into output buffer
    if (pAcc) {
        for (p = acc; p < pAcc; p++) {
            *result++ = *p;
        }
    }

    text.erase(result, text.end());
}

// Attempts to validate and decode utf7-encoded chunk.
// Returns "next" iterator to position where to continue parsing for next chunks, and
// fills the "decoded" string with decoded data.
// If failed, the "next" will be equal to passed "it", and empty string put in "decoded".
inline const string::const_iterator
decodeUTF7Chunk(string::const_iterator it, string::const_iterator end, string& decoded) {
    decoded.clear();
    unsigned char val = 0;
    uint32_t acc = 0;
    int acc_bits = 0; // how many bits are filled in acc
    string::const_iterator next = it;

    while (it != end) {
        unsigned char c = *it;

        if (c >= 'A' && c <= 'Z') {
            val = c - 'A';
        }
        else if (c >= 'a' && c <= 'z') {
            val = c - 'a' + 26;
        }
        else if (c >= '0' && c <= '9') {
            val = c - '0' + 52;
        }
        else if (c == '+') {
            val = 62;
        }
        else if (c == '/') {
            val = 63;
        }
        else if (c == '-') {
            // end of encoded sequence (the '-' itself must not be output)
            if (!decoded.empty()) {
                next = it;
                return next; // successfully decoded. Returns decoded data in "decoded" parameter
            }

            decoded.clear(); // discard partial data
            return next;
        }
        else {
            decoded.clear(); // discard partial data
            return next;
        }

        acc = acc << 6 | val;
        acc_bits += 6;

        if (acc_bits >= 16) { // we got 16 bits or more in the accumulator
            int code = (acc >> (acc_bits - 16)) & 0xFFFF;

            // Take into account still-printable Unicode characters, convert them back to ASCII
            if (isSpecialUnicode(code)) {
                code = convertSpecialUnicode(code);
            }

            // Stop and return empty if we hit at least one non-printable character
            if (!isprint(code) && code != 0) {
                decoded.clear(); // discard partial data
                return next;
            }

            decoded += (char)code;
            acc_bits -= 16;
            acc &= (1 - (1 << acc_bits)); // leave only acc_bits low bits in the acc, clear the rest.
        }

        it++;
    }

    decoded.clear(); // discard partial data
    return next;
}

string filterUTF7(const string& text) {
    string result;
    string decoded;
    decoded.reserve(8);
    result.reserve(text.length());

    for (string::const_iterator it = text.begin(); it != text.end(); ++it) {
        if (*it == '+') {
            if (it + 1 == text.end()) { // "+" at end of string
                result += *it;
            }
            else if (*(it + 1) == '-') {
                // '+-' combination is converted to single '+'
                result += '+';
                it++; // this skips the "-"
                if (it == text.end()) {
                    break;
                }
            }
            else {
                // attempt to decode chunk
                it = decodeUTF7Chunk(it + 1, text.end(), decoded);
                if (decoded.empty()) { // decoding failed
                    result += '+';
                    result += *it;
                }
                else { // decoding succeeded
                    result += decoded;
                }
            }
        }
        else {
            result += *it;
        }
    }

    return result;
}

// Attempts to validate and decode base64-encoded chunk.
// Value is the full value inside which potential base64-encoded chunk was found,
// it and end point to start and end of that chunk.
// Returns true if succesful (and fills the "decoded" string with decoded data).
// Success criterias:
//  0. encoded sequence covers the whole value (doesn't have any characters around it)
//  1. encoded sequence consist of base64 alphabet (may end with zero, one or two '=' characters')
//  2. length of encoded sequence is exactly divisible by 4.
//  3. length of decoded is minimum 5 characters.
//  4. percent of non-printable characters (!isprint())
//     in decoded data is less than 10% (statistical garbage detection).
// Returns false above checks fail.
bool decodeBase64Chunk(
        const string& value,
        string::const_iterator it,
        string::const_iterator end,
        string& decoded)
{
    decoded.clear();
    uint32_t acc = 0;
    int acc_bits = 0; // how many bits are filled in acc
    int terminatorCharsSeen = 0; // whether '=' character was seen, and how many of them.
    uint32_t nonPrintableCharsCount = 0;

    dbgTrace(D_WAAP) << "decodeBase64Chunk: value='" << value << "' match='" << string(it, end) << "'";

    // The encoded data length (without the "base64," prefix) should be exactly divisible by 4
    // len % 4 is not 0 i.e. this is not base64
        if ((end - it) % 4 != 0) {
            dbgTrace(D_WAAP_BASE64) <<
                "b64DecodeChunk: (leave as-is) because encoded data length should be exactly divisible by 4.";
            return false;
        }

        while (it != end) {
            unsigned char c = *it;

            if (terminatorCharsSeen) {
                // terminator characters must all be '=', until end of match.
                if (c != '=') {
                    dbgTrace(D_WAAP_BASE64) <<
                        "decodeBase64Chunk: (leave as-is) because terminator characters must all be '='," <<
                        "until end of match.";
                    return false;
                }

                // We should see 0, 1 or 2 (no more) terminator characters
                terminatorCharsSeen++;

                if (terminatorCharsSeen > 2) {
                    dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (leave as-is) because terminatorCharsSeen > 2";
                    return false;
                }

                // allow for more terminator characters
                it++;
                continue;
            }

            unsigned char val = 0;

            if (c >= 'A' && c <= 'Z') {
                val = c - 'A';
            }
            else if (c >= 'a' && c <= 'z') {
                val = c - 'a' + 26;
            }
            else if (isdigit(c)) {
                val = c - '0' + 52;
            }
            else if (c == '+') {
                val = 62;
            }
            else if (c == '/') {
                val = 63;
            }
            else if (c == '=') {
                // Start tracking terminator characters
                terminatorCharsSeen++;
                it++;
                continue;
            }
            else {
                dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (leave as-is) because of non-base64 character ('" <<
                        c << "', ASCII " << (unsigned int)c << ")";
                return false; // non-base64 character
            }

            acc = (acc << 6) | val;
            acc_bits += 6;

            if (acc_bits >= 8) {
                int code = (acc >> (acc_bits - 8)) & 0xFF;
                // only leave low "acc_bits-8" bits, clear all higher bits
                uint32_t mask = ~(1 << (acc_bits - 8));
                acc &= mask;
                acc_bits -= 8;

                // Count non-printable characters seen
                if (!isprint(code)) {
                    nonPrintableCharsCount++;
                }

                decoded += (char)code;
            }

            it++;
        }

        // end of encoded sequence decoded.

        dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: decoded.size=" << decoded.size() <<
                ", nonPrintableCharsCount=" << nonPrintableCharsCount << "; decoded='" << decoded << "'";

        // Return success only if decoded.size>=5 and there are less than 10% of non-printable
        // characters in output.
        if (decoded.size() >= 5) {
            if (nonPrintableCharsCount * 10 < decoded.size()) {
                dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (decode/replace) decoded.size=" << decoded.size() <<
                        ", nonPrintableCharsCount=" << nonPrintableCharsCount << ": replacing with decoded data";
            }
            else {
                dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (delete) because decoded.size=" << decoded.size() <<
                        ", nonPrintableCharsCount=" << nonPrintableCharsCount;
                decoded.clear();
            }
            return true; // successfully decoded. Returns decoded data in "decoded" parameter
        }

        // If decoded size is too small - leave the encoded value (return false)
        decoded.clear(); // discard partial data
        dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (leave as-is) because decoded too small. decoded.size=" <<
                decoded.size() <<
                ", nonPrintableCharsCount=" << nonPrintableCharsCount;
        return false;
}

// Attempts to detect and validate base64 chunk.
// Value is the full value inside which potential base64-encoded chunk was found,
// it and end point to start and end of that chunk.
// Returns true if succesful (and fills the "decoded" string with decoded data).
// Success criterias:
//  0. encoded sequence covers the whole value (doesn't have any characters around it)
//  1. encoded sequence consist of base64 alphabet (may end with zero, one or two '=' characters')
//  2. length of encoded sequence is exactly divisible by 4.
//  3. length of decoded is minimum 5 characters.
//  4. percent of non-printable characters (!isprint())
//     in decoded data is less than 10% (statistical garbage detection).
// Returns false above checks fail.
bool
b64DecodeChunk(
    const string& value,
    string::const_iterator it,
    string::const_iterator end,
    string& decoded)
{

    dbgTrace(D_WAAP_BASE64) << "b64DecodeChunk: value='" << value << "' match='" << string(it, end) << "'";

    // skip "base64," prefix if the line is starting with it.
    unsigned int len = end - it;
    if (len >= b64_prefix.size() &&
        it[0] == 'b' && it[1] == 'a' && it[2] == 's' && it[3] ==
        'e' && it[4] == '6' && it[5] == '4' && it[6] == ',') {
        it = it + b64_prefix.size(); // skip the prefix
    }
    else {
        // If the base64 candidate match within value is surrounded by other dat
        // (doesn't cover the value fully) - ignore the match.
        // This will result in the match being scanned raw.
        // Note that this purposedly doesn't include matches starting with "base64,"
        // prefix: we do want those prefixed matches to be decoded!
        if (it != value.begin() || end != value.end()) {
            dbgTrace(D_WAAP_BASE64) << "b64DecodeChunk: (leave as-is) because match is surrounded by other data.";
            return false;
        }
    }

    return decodeBase64Chunk(value, it, end, decoded);
}

vector<string> split(const string& s, char delim) {
    vector<string> elems;
    stringstream ss(s);
    string value;
    while (getline(ss, value, delim)) {
        elems.push_back(Waap::Util::trim(value));
    }
    return elems;
}

namespace Waap {
namespace Util {

#define B64_TRAILERCHAR '='

static bool err = false;
// based on malicious JSON "{1:\x00}"
static const int minimal_legal_json_size = 8;

static const SingleRegex invalid_hex_evasion_re(
    "%([g-zG-Z][0-9a-zA-Z]|[0-9a-zA-Z][g-zG-Z])",
    err,
    "invalid_hex_evasion"
);
static const SingleRegex broken_utf_evasion_re(
    "(?:^|[^%])(%[0-9a-f]%[0-9a-f])",
    err,
    "broken_utf_evasion"
);
static const SingleRegex csp_report_policy_re(
    "default-src\\s+[^\\w]+.*report-uri\\s+[^\\w]+",
    err,
    "csp_report_policy"
);
static const SingleRegex base64_key_value_detector_re(
        "^[^<>{};,&\\?|=\\s]+={1}\\s*.+",
        err,
        "base64_key_value");
static const SingleRegex json_key_value_detector_re(
        "^[^<>{};,&\\?|=\\s]+={.+:.+}\\z",
        err,
        "json_key_value");
static const SingleRegex base64_key_detector_re(
        "^[^<>{};,&\\?|=\\s]+={1}",
        err,
        "base64_key");
static const SingleRegex base64_prefix_detector_re(
        "data:\\S*;base64,\\S+|base64,\\S+",
        err,
        "base64_prefix");

// looks for combination <param>={<some text>*:<some text>*}
//used to allow parsing param=JSON to reduce false positives
bool detectJSONasParameter(const string &string_buffer,
        string &key,
        string &value)
{
    key.clear();
    value.clear();
    bool is_json_candidate_detected = json_key_value_detector_re.hasMatch(string_buffer);

    if (is_json_candidate_detected) {
        dbgTrace(D_WAAP_JSON) << "===JSONdetect===:  json_key_value_detector_re test passed - looking for key";
        string::const_iterator it = string_buffer.begin();
        for (; it != string_buffer.end(); ++it) {
            if (*it != '{') {
                continue;
            }
            // candidate should have size 8 or more - minimum for JSON with attack
            if ((string_buffer.end() - it) < minimal_legal_json_size) {
                dbgTrace(D_WAAP_JSON)
                    << "===JSONdetect===: candidate is shorter then the length"
                    "of the shortest known json attack which is: " << minimal_legal_json_size;
                return false;
            }

            key = std::string(string_buffer.begin(), it-1);
            value = std::string(it, string_buffer.end());
            break;
        }
    }
    dbgTrace(D_WAAP_JSON)
    << "===JSONdetect===:  key = '"
    << key
    << "', value = '"
    << value <<"'";
    return is_json_candidate_detected;
}

static void b64TestChunk(const string &s,
        string::const_iterator chunkStart,
        string::const_iterator chunkEnd,
        RegexSubCallback_f cb,
        int &decodedCount,
        int &deletedCount,
        string &outStr)
{
    size_t chunkLen = (chunkEnd - chunkStart);

    if ((chunkEnd - chunkStart) > static_cast<int>(b64_prefix.size()) &&
            chunkStart[0] == 'b' && chunkStart[1] == 'a' && chunkStart[2] == 's' && chunkStart[3] == 'e' &&
            chunkStart[4] == '6' && chunkStart[5] == '4' && chunkStart[6] == ',') {
        chunkLen -= b64_prefix.size();
    }

    size_t chunkRem = chunkLen % 4;

    // Only match chunk whose length is divisible by 4
    string repl;
    if (chunkRem == 0 && cb(s, chunkStart, chunkEnd, repl)) {
        // Succesfully matched b64 chunk
        if (!repl.empty()) {
            outStr += repl;
            decodedCount++;
        }
        else {
            deletedCount++;
        }
    }
    else {
        // Chunk was not processed - put original text
        size_t from = chunkStart - s.begin();
        size_t len = chunkEnd - chunkStart;
        outStr += s.substr(from, len);
    }
}

bool detectBase64Chunk(
        const string &s,
        string::const_iterator &start,
        string::const_iterator &end)
{
    dbgTrace(D_WAAP_BASE64) << " ===detectBase64Chunk===:  starting with = '" << s << "'";
    string::const_iterator it = s.begin();

    //detect "base64," prefix to start search after this
    for (; it != s.end()-7; it++) {
        if (it[0] == 'b' && it[1] == 'a' && it[2] == 's' && it[3] ==
                'e' && it[4] == '6' && it[5] == '4' && it[6] == ',') {
            it = it + 7;
            dbgTrace(D_WAAP_BASE64) << " ===detectBase64Chunk===:  prefix skipped = '" << *it << "'";
            break;
        }
    }

    //look for start of encoded string
    dbgTrace(D_WAAP_BASE64) << " ===detectBase64Chunk===:  B64 itself = '" << *it << "'";
    bool isB64AlphaChar = Waap::Util::isAlphaAsciiFast(*it) || isdigit(*it) || *it=='/' || *it=='+';

    if (isB64AlphaChar) {
        // start tracking potential b64 chunk - just check its size
        dbgTrace(D_WAAP_BASE64) << " ===detectBase64Chunk===:  isB64AlphaChar = true, '" << *it << "'";
        start = it;
        end = s.end();
        if ((end - start) % 4 == 0) {
            return true;
        }
    }
    // non base64 before supposed chunk - will not process
    return false;
}

bool isBase64PrefixProcessingOK (
        const string &s,
        string &value)
{
    string::const_iterator start, end;
    bool retVal = false;
    dbgTrace(D_WAAP_BASE64) << " ===isBase64PrefixProcessingOK===: before regex for prefix for string '" << s << "'";
    if (base64_prefix_detector_re.hasMatch(s)) {
        dbgTrace(D_WAAP_BASE64) << " ===isBase64PrefixProcessingOK===: prefix detected on string '" << s << "'";
        if (detectBase64Chunk(s, start, end)) {
            dbgTrace(D_WAAP_BASE64) << " ===isBase64PrefixProcessingOK===: chunk detected";
            if ((start != s.end()) && (end == s.end())) {
                retVal = decodeBase64Chunk(s, start, end, value);
            }
        }
    }
    return retVal;
}

base64_variants b64Test (
        const string &s,
        string &key,
        string &value)
{

    key.clear();
    bool retVal;

    dbgTrace(D_WAAP_BASE64) << " ===b64Test===: string =  " << s
            << " key = " << key << " value = " << value;
    // Minimal length
    if (s.size() < 8) {
        return CONTINUE_AS_IS;
    }
    dbgTrace(D_WAAP_BASE64) << " ===b64Test===: minimal lenght test passed";

    std::string prefix_decoded_val;
    string::const_iterator it = s.begin();

    // 1st check if we have key candidate
    if (base64_key_value_detector_re.hasMatch(s)) {
        base64_stage state = BEFORE_EQUAL;
        dbgTrace(D_WAAP_BASE64) << " ===b64Test===: testB64Key test passed - looking for key";
        for (; (it != s.end()) && (state != DONE) && (state != MISDETECT); ++it) {
            switch(state) {
            case BEFORE_EQUAL:
                if (*it != '=') {
                    key += string(1, *it);
                } else {
                    key += string(1, *it);
                    state = EQUAL;
                }
                break;
            case EQUAL:
                if (*it == '=') {
                    it = s.begin();
                    state=MISDETECT;
                    continue;
                }
                if (*it == ' ') {
                    //skip whitespaces - we don't need them in key
                    continue;
                } else {
                    state = DONE;
                }
                break;
            case DONE:
                continue;
            default:
                break;
            }

        }
        dbgTrace(D_WAAP_BASE64) << " ===b64Test===: detected key = " << key;
        if (it == s.end() || state == MISDETECT) {
            dbgTrace(D_WAAP_BASE64) << " ===b64Test===: detected  *it = s.end()" << *it;
            if (key.size() > 0) {
                it = s.begin();
                key.clear();
            }
        } else {
            it--;

            dbgTrace(D_WAAP_BASE64) << " ===b64Test===: Key is OK  *it = " << *it;
        }
    }

    dbgTrace(D_WAAP_BASE64) << " ===b64Test===: after processing key = '" << key << "'";
    bool found = isBase64PrefixProcessingOK(s, prefix_decoded_val);
    dbgTrace(D_WAAP_BASE64) << " ===b64Test===: after prefix test found = "
            << found << " new value is '" << prefix_decoded_val << "' - done";
    if (found) {
        value = prefix_decoded_val;
        if (key.empty()) {
            return SINGLE_B64_CHUNK_CONVERT;
        } else {
            key.pop_back();
            return KEY_VALUE_B64_PAIR;
        }
    }

    string::const_iterator start = s.end();
    dbgTrace(D_WAAP_BASE64) << " ===b64Test===:  B64 itself = " << *it << " =======";
    bool isB64AlphaChar = Waap::Util::isAlphaAsciiFast(*it) || isdigit(*it) || *it=='/' || *it=='+';
    if (isB64AlphaChar) {
        // 1st char is potential b64, let's try to convert this
        dbgTrace(D_WAAP_BASE64) <<
            " ===b64Test===:  Start tracking potential b64 chunk = " << *it << " =======";
        start = it;
        if ((s.end() - start) % 4 != 0) {
            key.clear();
            value.clear();
            return CONTINUE_AS_IS;;
        }
    }
    else {
        dbgTrace(D_WAAP_BASE64) <<
            " ===b64Test===: Non base64 before supposed chunk - will not process = " << *it << " =======";
        return CONTINUE_AS_IS;
    }

    if (start != s.end()) {
        // key is not empty, it should be tested for correct format (i.e. key=b64val and not splittable)
        // else leave it as is
        dbgTrace(D_WAAP_BASE64) << " ===b64Test===:BEFORE TESTING KEY key = '" << key << "'";
        if (!key.empty()) {
            if (!base64_key_detector_re.hasMatch(key)) {
                dbgTrace(D_WAAP_BASE64) << " ===b64Test===: Key is NOT GOOD regex key = '" << key << "'";
                return CONTINUE_AS_IS;
            }
            // remove '=' as last char in key - we don't need it
            key.pop_back();
            dbgTrace(D_WAAP_BASE64) << " ===b64Test===: FINAL key = '" << key << "'";
        }
        retVal = decodeBase64Chunk(s, start, s.end(), value);
        dbgTrace(D_WAAP_BASE64) << " ===b64Test===: After testing and conversion value = "
                << value << "retVal = '" << retVal <<"'";
        if (!retVal) {
            key.clear();
            value.clear();
            return CONTINUE_AS_IS;
        }
        dbgTrace(D_WAAP_BASE64) << " ===b64Test===: After tpassed retVal check = "
            << value << "retVal = '" << retVal <<"'" << "key = '" << key << "'";
        if (key.empty()) {
            return SINGLE_B64_CHUNK_CONVERT;
        } else {
            return KEY_VALUE_B64_PAIR;
        }

    } else {
        // There are symbols after base64 chunk - leave as is, may be it will be splitted no next step
        key.clear();
        value.clear();
        return CONTINUE_AS_IS;
    }
}

void b64Decode(
        const string &s,
        RegexSubCallback_f cb,
        int &decodedCount,
        int &deletedCount,
        string &outStr)
{
    decodedCount = 0;
    deletedCount = 0;
    outStr = "";
    int offsetFix = 0;

    string::const_iterator it = s.begin();

    // Minimal length
    if (s.end() - it < 8) {
        return;
    }

    // Search for substrings that match these criterias:
    //  1. substring length is divisible by 4
    //  2. substring contains only letters a-z, 0-9, '/' or '+' except last 1 or two characters that can be '='

    string::const_iterator chunkStart = s.end();
    for (; it != s.end(); ++it) {
        bool isB64AlphaChar = Waap::Util::isAlphaAsciiFast(*it) || isdigit(*it) || *it=='/' || *it=='+';
        if (chunkStart == s.end()) {
            if (isB64AlphaChar) {
                // start tracking potential b64 chunk
                chunkStart = it;
            }
            else {
                // Add anything before the potential match
                outStr += string(1, *it);
            }
        }
        else {
            // tracking b64 chunk
            if (!isB64AlphaChar) {
                if (*it == ',') {
                    // Check back and skip the "base64," prefix
                    if (chunkStart + b64_prefix.size() - 1 == it) {
                        string cand(chunkStart, it + 1);
                        if (cand == b64_prefix) {
                            offsetFix = b64_prefix.size();
                            continue;
                        }
                    }
                }

                size_t chunkLen = (it - chunkStart) - offsetFix;
                size_t chunkRem = chunkLen % 4;

                // Allow only one or two '=' characters at the end of the match
                if ((*it == B64_TRAILERCHAR) && (chunkRem == 2 || chunkRem == 3)) {
                    continue;
                }

                // Decode and add chunk
                b64TestChunk(s, chunkStart, it, cb, decodedCount, deletedCount, outStr);

                // stop tracking b64 chunk
                outStr += string(1, *it); // put the character that terminated the chunk
                chunkStart = s.end();
                offsetFix = 0;
            }
        }
    }

    if (chunkStart != s.end()) {
        b64TestChunk(s, chunkStart, it, cb, decodedCount, deletedCount, outStr);
    }
}

// Base64 functions stolen from orchestration_tools.cc
static const string base64_base_str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

string
base64Encode(const string &input)
{
    string out;
    int val = 0, val_base = -6;
    for (unsigned char c : input) {
        val = (val << 8) + c;
        val_base += 8;
        while (val_base >= 0) {
            out.push_back(base64_base_str[(val >> val_base) & 0x3F]);
            val_base -= 6;
        }
    }
    // -6 indicates the number of bits to take from each character
    // (6 bits is enough to present a range of 0 to 63)
    if (val_base > -6) out.push_back(base64_base_str[((val << 8) >> (val_base + 8)) & 0x3F]);
    while (out.size() % 4) out.push_back('=');
    return out;
}

bool find_in_map_of_stringlists_keys(const string &what, const map_of_stringlists_t &where)
{
    for (map_of_stringlists_t::const_iterator it = where.begin(); it != where.end(); ++it) {
        if (it->first.find(what) != string::npos) {
            return true;
        }
    }

    return false;
}

void remove_in_map_of_stringlists_keys(const string &what, map_of_stringlists_t &where)
{
    map_of_stringlists_t::iterator it = where.begin();

    while (it != where.end()) {
        if (it->first.find(what) != string::npos) {
            it = where.erase(it);
        }
        else {
            it++;
        }
    }
}

void remove_startswith(vector<string> &vec, const string &prefix)
{
    vec.erase(
        remove_if(vec.begin(), vec.end(),
            [&prefix](const string &kw)
            {
                return boost::starts_with(kw, prefix);
            }
        ),
        vec.end()
    );
}

string AES128Decrypt(
    string& key,
    string& iv,
    string& message)
{
    unsigned char* outdata = new unsigned char[message.length()];

    // data structure that contains the key itself
    AES_KEY aes_key;

    // set the encryption key
    AES_set_decrypt_key((const unsigned char*)key.c_str(), 128, &aes_key);

    AES_cbc_encrypt(
        (unsigned char*)message.c_str(),
        outdata, message.length(),
        &aes_key, (unsigned char*)iv.c_str(),
        AES_DECRYPT
        );

    // get value without padding
    size_t padding = outdata[message.length() - 1]; // last byte contain padding size
    string result = string((const char*)outdata, message.length() - padding);

    delete[] outdata;
    return result;
}

string
base64Decode(const string &input)
{
    string out;
    vector<int> T(256, -1);
    for (int i = 0; i < 64; i++) {
        T[base64_base_str[i]] = i;
    }
    int val = 0, valb = -8;
    for (unsigned char c : input) {
        if (T[c] == -1) break;
        val = (val << 6) + T[c];
        valb += 6;
        if (valb >= 0) {
            out.push_back(char((val >> valb) & 0xFF));
            valb -= 8;
        }
    }
    return out;
}

bool
containsInvalidUtf8(const string &payload)
{
    return invalid_hex_evasion_re.hasMatch(payload);
}

string
unescapeInvalidUtf8(const string &payload)
{
    dbgFlow(D_WAAP_EVASIONS);
    vector<RegexMatchRange> regex_matches;
    invalid_hex_evasion_re.findMatchRanges(payload, regex_matches);

    string unescaped_text = payload;
    for (const auto &match : regex_matches) {
        static const int evasion_pattern_length = 3;

        int num = 0;
        size_t pos = match.start + 1;
        for (; pos < match.end; pos++) {
            const char &byte = unescaped_text[pos];
            if (isdigit(byte)) {
                num = (num << 4) + charToDigit(byte);
            } else {
                num = (num << 4) + ((tolower(byte) - 'a') + 10);
            }
        }

        char buf[evasion_pattern_length];
        snprintf(buf, evasion_pattern_length, "%02x", (num & 0xff));
        unescaped_text.replace(match.start + 1, evasion_pattern_length - 1, buf);

        dbgTrace(D_WAAP_EVASIONS) << "Value after conversion: decimal = " << num << ", hex = " << buf;
    }

    dbgTrace(D_WAAP_EVASIONS) << "unescaped_text: " << unescaped_text;

    return unescaped_text;
}

Maybe<std::string>
containsBrokenUtf8(const string &payload, const string &unquoted_payload)
{
    if (broken_utf_evasion_re.hasMatch(unquoted_payload)) {
        return unquoted_payload;
    } else if (broken_utf_evasion_re.hasMatch(payload)) {
        return payload;
    } else {
        return genError("does not contain broken-down UTF8");
    }
}

string
unescapeBrokenUtf8(const string &payload)
{
    string unescaped_text;
    unescaped_text.reserve(payload.length());

    int prev_esc_pos = -1;
    for (size_t pos = 0; pos < payload.length(); ++pos) {
        char c = payload[pos];
        if (c == '%') {
            // skip copying current '%' when encountered with the 2nd '%'
            // that follows and followed by only one hex digit
            if (prev_esc_pos >= 0 && pos-prev_esc_pos == 2 && isxdigit(payload[pos-1]) &&
                    pos+1 < payload.length() && isxdigit(payload[pos+1]) ) {
                prev_esc_pos = -1;
                continue;
            }
            // mark current '%' only when not following another '%'
            if (prev_esc_pos < 0 || pos-prev_esc_pos > 1) {
                prev_esc_pos = pos;
            }
        }
        unescaped_text += c;
    }

    dbgTrace(D_WAAP_EVASIONS) << "unescaped_text: " << unescaped_text;
    return unescaped_text;
}

bool
containsCspReportPolicy(const string &payload)
{
    return csp_report_policy_re.hasMatch(payload);
}

string
charToString(const char* s, int slen)
{
    if (!s || slen == 0) return "";

    return string(s, slen);
}

string
vecToString(const vector<string>& vec, char delim) {
    ostringstream vts;

    string delimStr;
    delimStr.push_back(delim);
    if (delim != '\n')
    {
        delimStr.push_back(' ');
    }

    vts << "[";
    if (!vec.empty())
    {
        // Convert all but the last element to avoid a trailing ","
        copy(vec.begin(), vec.end() - 1,
            ostream_iterator<string>(vts, delimStr.c_str()));

        // Now add the last element with no delimiter
        vts << vec.back();
    }
    else
    {
        return string();
    }
    vts << "]";


    return vts.str();
}


string
obfuscateXor(const string& toEncrypt) {
    char key[] = "CHECKPOINT"; //Any chars will work
    string output = toEncrypt;

    for (size_t i = 0; i < toEncrypt.size(); i++) {
        output[i] = toEncrypt[i] ^ key[i % ((sizeof(key)-1) / sizeof(char))];
    }

    return output;
}

string
obfuscateXorBase64(const string& toEncrypt) {
    return base64Encode(obfuscateXor(toEncrypt));
}

string injectSpacesToString(const string& str) {
    string retStr = "";
    if (str.length() == 0)
    {
        return retStr;
    }
    retStr.resize(str.length() * 2, ' ');
    for (size_t i = 0; i < str.length(); i++)
    {
        retStr[i * 2] = str[i];
    }
    retStr.pop_back();
    return retStr;
}

ReportIS::Severity computeSeverityFromThreatLevel(ThreatLevel threatLevel) {
    if (threatLevel == NO_THREAT) {
        return ReportIS::Severity::INFO;
    }
    else if (threatLevel == THREAT_INFO) {
        return ReportIS::Severity::LOW;
    }
    else if (threatLevel == LOW_THREAT) {
        return ReportIS::Severity::MEDIUM;
    }
    else if (threatLevel == MEDIUM_THREAT) {
        return ReportIS::Severity::HIGH;
    }

    return ReportIS::Severity::CRITICAL;
}

ReportIS::Priority computePriorityFromThreatLevel(ThreatLevel threatLevel) {
    if (threatLevel == NO_THREAT) {
        return ReportIS::Priority::LOW;
    }
    else if (threatLevel == THREAT_INFO) {
        return ReportIS::Priority::MEDIUM;
    }
    else if (threatLevel == LOW_THREAT) {
        return ReportIS::Priority::MEDIUM;
    }
    else if (threatLevel == MEDIUM_THREAT) {
        return ReportIS::Priority::HIGH;
    }

    return ReportIS::Priority::HIGH;
}

string computeConfidenceFromThreatLevel(ThreatLevel threatLevel)
{
    switch(threatLevel) {
        case NO_THREAT: return "Low";
        case THREAT_INFO: return "Low";
        case LOW_THREAT: return "Medium";
        case MEDIUM_THREAT: return "High";
        case HIGH_THREAT: return "Very High";
    }
    dbgWarning(D_WAAP) << "Reached impossible threat level value of: " << static_cast<int>(threatLevel);
    return "Low";
}

void decodePercentEncoding(string &text, bool decodePlus)
{
    // Replace %xx sequences by their single-character equivalents.
    // Do not replace the '+' symbol by space character because this would corrupt some base64 source strings
    // (base64 alphabet includes the '+' character).
    text.erase(
        unquote_plus(text.begin(), text.end(), checkUrlEncoded(text.data(), text.size()), decodePlus), text.end()
    );
    dbgTrace(D_WAAP) << "decodePercentEncoding: (after unquote_plus) '" << text << "'";
}

// Try to detect/decode UTF16 (detecting either BE and LE variant).
// The function uses statistics to try to guess whether UTF-16 is present and its exact variant (Big/Little endianess)
// If UTF-16 value is detected, value in cur_val is converted to utf8 in-place for use in later processing.
void decodeUtf16Value(const ValueStatsAnalyzer &valueStats, string &cur_val)
{
    // Do not change cur_val if UTF16 is not detected
    if (!valueStats.isUTF16) {
        return;
    }

    dbgTrace(D_WAAP) << "decoding UTF-16 into UTF-8 in-place";

    bool isBigEndian = false;
    size_t pos = 0;

    // First, detect BOM as a hint of UTF16-BE vs. LE variant. See https://unicode.org/faq/utf_bom.html#utf8-4
    if (cur_val[0] == (char)0xFE && cur_val[1] == (char)0xFF) {
        // UTF16-BE hint
        isBigEndian = true;
        // Skip the BOM
        pos++;
    }
    else if (cur_val[0] == (char)0xFF && cur_val[1] == (char)0xFE) {
        // UTF16-LE hint
        isBigEndian = false;
        // Skip the BOM
        pos++;
    }
    else {
        isBigEndian = (valueStats.longestZerosSeq[0] > valueStats.longestZerosSeq[1]);
    }

    // Decode utf16 into utf8
    string utf8Out;
    for (; pos<cur_val.length()/2; ++pos) {
        unsigned int code;

        if (isBigEndian) {
            code = (cur_val[pos*2] << 8) + cur_val[pos*2+1];
        }
        else {
            code = (cur_val[pos*2+1] << 8) + cur_val[pos*2];
        }

        // Encode UTF code point as UTF-8 bytes
        if (code < 0x80) {
            utf8Out += code;
        }
        else if (code < 0x800 ) {
            utf8Out += (code >> 6) | 0xC0;
            utf8Out += (code & 0x3F) | 0x80;
        }
        else { // (code <= 0xFFFF : always true because code type is unsigned short which is 16-bit
            utf8Out += (code >> 12) | 0xE0;
            utf8Out += ((code >> 6) & 0x3F) | 0x80;
            utf8Out += (code & 0x3F) | 0x80;
        }
    }

    // Return the value converted from UTF-16 to UTF-8
    cur_val = utf8Out;
}

bool testUrlBareUtf8Evasion(const string &line) {
    size_t percentPos = 0;

    while (percentPos < line.size()) {
        percentPos = line.find("%", percentPos);

        if (percentPos == string::npos) {
            return false;
        }

        if (percentPos + 2 < line.size() && tolower(line[percentPos + 1]) == 'c' && line[percentPos + 2] == '0') {
            // found "%c0"
            return true;
        }

        // Continue searching from next character after '%'
        percentPos++;
    }

    return false;
}

bool testUrlBadUtf8Evasion(const string &line) {
    size_t percentPos = 0;

    while (percentPos < line.size()) {
        percentPos = line.find("%", percentPos);

        if (percentPos == string::npos) {
            return false;
        }

        if (percentPos + 2 < line.size() && tolower(line[percentPos + 1]) == 'c' && line[percentPos + 2] == '1') {
            // found "%c1"
            return true;
        }

        // Continue searching from next character after '%'
        percentPos++;
    }

    return false;
}

string urlDecode(string src) {
    src.erase(unquote_plus(src.begin(), src.end(), true, false), src.end());
    return src;
}

// LCOV_EXCL_START Reason: The function will be deleted on another task
string
stripOptionalPort(const string::const_iterator &first, const string::const_iterator &last)
{
    // Microsoft XFF+IPv6+Port yikes - see also here https://github.com/eclipse/jetty.project/issues/3630
    if (*first == '[') {
        // Possible bracketed IPv6 address such as "[2001:db8::1]" + optional numeric ":<port>"
        auto close_bracket = find(first + 1, last, ']');
        if (close_bracket == last) return string(first, last);
        return string(first+1, close_bracket);
    }

    auto first_colon = find(first, last, ':');
    if (first_colon == last) return string(first, last);

    // If there is more than one colon it means its probably IPv6 address without brackets
    auto second_colon = find(first_colon + 1, last, ':');
    if (second_colon != last) return string(first, last);

    // If there's only one colon it can't be IPv6 and can only be IPv4 with port
    return string(first, first_colon);
}

bool
isIpTrusted(const string &ip, const vector<string> &trusted_ips)
{
    Waap::Util::CIDRData cidr_data;
    for (const auto &trusted_ip : trusted_ips) {
        if (
            ip == trusted_ip ||
            (Waap::Util::isCIDR(trusted_ip, cidr_data) && Waap::Util::cidrMatch(ip, cidr_data))
        ) {
            return true;
        }
    }
    return false;
}

string extractForwardedIp(const string &x_forwarded_hdr_val)
{
    vector<string> xff_splitted = split(x_forwarded_hdr_val, ',');
    vector<string> trusted_ips;
    string forward_ip;

    auto identify_config = getConfiguration<UsersAllIdentifiersConfig>(
        "rulebase",
        "usersIdentifiers"
    );

    if (!identify_config.ok()) {
        dbgDebug(D_WAAP) << "did not find xff definition in policy";
    } else {
        trusted_ips = (*identify_config).getHeaderValuesFromConfig("x-forwarded-for");
    }

    if (xff_splitted.size() > 0)
    {
        for (size_t k = 0; k < xff_splitted.size(); ++k)
        {
            string optional_result = trim(xff_splitted[k]);
            optional_result = stripOptionalPort(optional_result.cbegin(), optional_result.cend());
            if (isIpAddress(optional_result))
            {
                if (!isIpTrusted(optional_result, trusted_ips) && !trusted_ips.empty()) {
                    return "";
                } else if (forward_ip.empty()) {
                    forward_ip = optional_result;
                }
            }
        }
    }
    return forward_ip;
}

bool isIpAddress(const string &ip_address)
{
    struct in_addr source_inaddr;
    struct in6_addr source_inaddr6;

    // check from which type the target ip and check if ip belongs to is mask ip
    //convert sourceip to ip v4 or v6.
    bool isIpV4 = inet_pton(AF_INET, ip_address.c_str(), &source_inaddr) == 1;
    bool isIpV6 = inet_pton(AF_INET6, ip_address.c_str(), &source_inaddr6) == 1;

    return isIpV4 || isIpV6;
}

// LCOV_EXCL_STOP

string extractKeyValueFromCookie(const string &cookie, const string &key)
{
    string source = "";
    vector<string> cookie_splitted = split(cookie, ';');
    for (size_t l = 0; l < cookie_splitted.size(); ++l)
    {
        vector<string> cookie_key_splitted = split(cookie_splitted[l], '=');
        if (cookie_key_splitted.empty())
        {
            dbgWarning(D_WAAP) << "Failed to split the key-value from: " << cookie_splitted[l];
            continue;
        }
        if (cookie_key_splitted[0] == key)
        {
            source = cookie_key_splitted[1];

            if (key == "_oauth2_proxy")
            {
                source = Waap::Util::base64Decode(source);

                vector<string> currentUserIdentifier_splitted = split(source, '|');

                if (currentUserIdentifier_splitted.size() > 0)
                {
                    source = currentUserIdentifier_splitted[0];
                }
            }
            break;
        }
    }
    dbgTrace(D_WAAP) << "extracted source from Cookie:" << key << " : " << source;
    return source;
}

bool vectorStringContain(const vector<string>& vec, const string& str)
{
    for(auto &param : vec) {
        if(param.compare(str) == 0)
        {
            return true;
        }
    }
    return false;
}

ContentType detectContentType(const char* hdr_value) {
    const char* plus_p = ::strrchr(hdr_value, '+');
    // Detect XML content type if Content-Type header value ends with "+xml".
    // For example: "application/xhtml+xml", or "image/svg+xml"
    // For reference: see first answer here:
    // https://stackoverflow.com/questions/2965587/valid-content-type-for-xml-html-and-xhtml-documents
    if (plus_p && my_stricmp(plus_p + 1, "xml")) {
        return CONTENT_TYPE_XML;
    }

    const char* slash_p = ::strrchr(hdr_value, '/');

    if (slash_p) {
        // Detect XML content type if Content-Type header value ends with "/xml"
        if (my_stricmp(slash_p + 1, "xml")) {
            return CONTENT_TYPE_XML;
        }

        // Detect JSON content type if Content-Type header value is application/json or ends with "/json"
        if (my_stricmp(slash_p + 1, "json") || my_stristarts_with(hdr_value, "application/json")) {
            return CONTENT_TYPE_JSON;
        }

        // Detect Graphql content type if Content-Type header value is application/graphql
        if (my_stristarts_with(hdr_value, "application/graphql")) {
            return CONTENT_TYPE_GQL;
        }

        // Detect HTML content type
        if (my_stristarts_with(hdr_value, "text/html")) {
            return CONTENT_TYPE_HTML;
        }

        // Detect Multiplart Form Data content type
        if (my_stristarts_with(hdr_value, "multipart/form-data")) {
            return CONTENT_TYPE_MULTIPART_FORM;
        }

        // Detect URL Encoded content type
        if (my_stristarts_with(hdr_value, "application/x-www-form-urlencoded")) {
            return CONTENT_TYPE_URLENCODED;
        }

        // Detect binary xml file type
        if (my_stristarts_with(hdr_value, "application/vnd.ms-sync.wbxml")) {
            return CONTENT_TYPE_WBXML;
        }
    }

    return CONTENT_TYPE_UNKNOWN;
}

string convertParamTypeToStr(ParamType type)
{
    switch (type)
    {
    case UNKNOWN_PARAM_TYPE:
        return "unknown";
    case HTML_PARAM_TYPE:
        return "html_input";
    case URL_PARAM_TYPE:
        return "urls";
    case FREE_TEXT_PARAM_TYPE:
        return "free_text";
    case FREE_TEXT_FRENCH_PARAM_TYPE:
        return "free_text_french";
    case PIPE_PARAM_TYPE:
        return "pipes";
    case LONG_RANDOM_TEXT_PARAM_TYPE:
        return "long_random_text";
    case BASE64_PARAM_TYPE:
        return "base64";
    case ADMINISTRATOR_CONFIG_PARAM_TYPE:
        return "administration_config";
    case FILE_PATH_PARAM_TYPE:
        return "local_file_path";
    case SEMICOLON_DELIMITED_PARAM_TYPE:
        return "semicolon_delimiter";
    case ASTERISK_DELIMITED_PARAM_TYPE:
        return "asterisk_delimiter";
    case COMMA_DELIMITED_PARAM_TYPE:
        return "comma_delimiter";
    case AMPERSAND_DELIMITED_PARAM_TYPE:
        return "ampersand_delimiter";
    case BINARY_PARAM_TYPE:
        return "binary_input";
    default:
        dbgWarning(D_WAAP) << "unrecognized type " << to_string(type);
        return "unrecognized type";
    }
}

ParamType convertTypeStrToEnum(const string& typeStr)
{
    static unordered_map<string, ParamType> sNameTypeMap = {
    {"unknown", ParamType::UNKNOWN_PARAM_TYPE},
    {"administration_config", ParamType::ADMINISTRATOR_CONFIG_PARAM_TYPE},
    {"base64", ParamType::BASE64_PARAM_TYPE },
    {"free_text", ParamType::FREE_TEXT_PARAM_TYPE},
    {"free_text_french", ParamType::FREE_TEXT_FRENCH_PARAM_TYPE},
    {"html_input", ParamType::HTML_PARAM_TYPE},
    {"long_random_text", ParamType::LONG_RANDOM_TEXT_PARAM_TYPE},
    {"pipes", ParamType::PIPE_PARAM_TYPE},
    {"urls", ParamType::URL_PARAM_TYPE},
    {"local_file_path", ParamType::FILE_PATH_PARAM_TYPE},
    {"semicolon_delimiter", ParamType::SEMICOLON_DELIMITED_PARAM_TYPE},
    {"asterisk_delimiter", ParamType::ASTERISK_DELIMITED_PARAM_TYPE},
    {"comma_delimiter", ParamType::COMMA_DELIMITED_PARAM_TYPE},
    {"ampersand_delimiter", ParamType::AMPERSAND_DELIMITED_PARAM_TYPE},
    {"binary_input", ParamType::BINARY_PARAM_TYPE}
    };

    auto mapItr = sNameTypeMap.find(typeStr);
    if (mapItr != sNameTypeMap.end())
    {
        return mapItr->second;
    }
    dbgWarning(D_WAAP) << "unrecognized parameter type name: " << typeStr;
    return UNKNOWN_PARAM_TYPE;

}


}
}