First release of open-appsec source code

This commit is contained in:
roybarda
2022-10-26 19:33:19 +03:00
parent 3883109caf
commit a883352f79
1353 changed files with 276290 additions and 1 deletions

View File

@@ -0,0 +1,94 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "AutonomousSecurityDecision.h"
AutonomousSecurityDecision::AutonomousSecurityDecision(DecisionType type) :
SingleDecision(type),
m_relativeReputation(0.0f),
m_fpMitigationScore(0.0f),
m_finalScore(0.0f),
m_threatLevel(NO_THREAT),
m_overridesLog(false),
m_relativeReputationMean(0.0),
m_variance(0.0)
{}
AutonomousSecurityDecision::~AutonomousSecurityDecision()
{}
std::string AutonomousSecurityDecision::getTypeStr() const
{
return "Autonomous Security";
}
void AutonomousSecurityDecision::setRelativeReputation(double relativeReputation)
{
m_relativeReputation = relativeReputation;
}
void AutonomousSecurityDecision::setFpMitigationScore(double fpMitigationScore)
{
m_fpMitigationScore = fpMitigationScore;
}
void AutonomousSecurityDecision::setFinalScore(double finalScore)
{
m_finalScore = finalScore;
}
void AutonomousSecurityDecision::setThreatLevel(ThreatLevel threatLevel)
{
m_threatLevel = threatLevel;
}
void AutonomousSecurityDecision::setOverridesLog(bool overridesLog)
{
m_overridesLog = overridesLog;
}
void AutonomousSecurityDecision::setRelativeReputationMean(double relativeReputationMean)
{
m_relativeReputationMean = relativeReputationMean;
}
void AutonomousSecurityDecision::setVariance(double variance)
{
m_variance = variance;
}
double AutonomousSecurityDecision::getRelativeReputation() const
{
return m_relativeReputation;
}
double AutonomousSecurityDecision::getFpMitigationScore() const
{
return m_fpMitigationScore;
}
double AutonomousSecurityDecision::getFinalScore() const
{
return m_finalScore;
}
ThreatLevel AutonomousSecurityDecision::getThreatLevel() const
{
return m_threatLevel;
}
bool AutonomousSecurityDecision::getOverridesLog() const
{
return m_overridesLog;
}
double AutonomousSecurityDecision::getRelativeReputationMean() const
{
return m_relativeReputationMean;
}
double AutonomousSecurityDecision::getVariance() const
{
return m_variance;
}

View File

@@ -0,0 +1,53 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __AUTONOMOUS_SECURITY_DECISION_H__
#define __AUTONOMOUS_SECURITY_DECISION_H__
#include "SingleDecision.h"
#include "DecisionType.h"
#include "WaapEnums.h"
#include <string>
class AutonomousSecurityDecision: public SingleDecision
{
public:
explicit AutonomousSecurityDecision(DecisionType type);
virtual ~AutonomousSecurityDecision();
std::string getTypeStr() const override;
void setRelativeReputation(double relativeReputation);
void setFpMitigationScore(double fpMitigationScore);
void setFinalScore(double finalScore);
void setThreatLevel(ThreatLevel threatLevel);
void setOverridesLog(bool overridesLog);
void setRelativeReputationMean(double relativeReputationMean);
void setVariance(double variance);
double getRelativeReputation() const;
double getFpMitigationScore() const;
double getFinalScore() const;
ThreatLevel getThreatLevel() const;
bool getOverridesLog() const;
double getRelativeReputationMean() const;
double getVariance() const;
private:
double m_relativeReputation;
double m_fpMitigationScore;
double m_finalScore;
ThreatLevel m_threatLevel;
bool m_overridesLog;
double m_relativeReputationMean;
double m_variance;
};
#endif

View File

@@ -0,0 +1,442 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// #define WAF2_LOGGING_ENABLE
#include "BehaviorAnalysis.h"
#include <string>
#include <vector>
#include <map>
#include <set>
#include <algorithm>
#include <set>
#include <functional>
#include <math.h>
#include <stdio.h>
#include <iostream>
#include <assert.h>
static const int BUCKET_SIZE = 300;
#define INITIAL_COUNT 1000
#define INITIAL_VARIANCE 100.0
#define INITIAL_MEAN 40.0
void TopBucket::addKeys(std::string& uri, std::string& ip, std::string& ua, std::string& ua_ip)
{
m_ips.addKeys(ip);
m_userAgents.addKeys(ua);
m_ipUserAgents.addKeys(ua_ip);
}
void TopBucket::putAttack(std::string& url,
double score,
std::string& ip,
std::string& ua,
std::string& ua_ip,
const std::string &location)
{
// Only punish reputation is score is above 1.0 and attack location comes from specific places.
// For example, avoid punishing reputation for attacks coming from referer_param, header, or cookie.
if (score > 1.0 && (location == "url" || location == "url_param" || location=="body")) {
m_ips.putAttack(true, score, ip);
m_userAgents.putAttack(true, score, ua);
m_ipUserAgents.putAttack(true, score, ua_ip);
}
getInfo(ip, ua, ua_ip);
}
void TopBucket::cleanSources()
{
m_ips.cleanSources();
m_userAgents.cleanSources();
m_ipUserAgents.cleanSources();
}
void TopBucket::evaluateAvg()
{
m_avgCount = m_ipUserAgents.getSourcesAvg();
}
bool TopBucket::isSourceEmpty(SourceType sourceType)
{
switch (sourceType)
{
case IP_SOURCE_TYPE:
return m_ips.empty();
case UA_SOURCE_TYPE:
return m_userAgents.empty();
case UA_IP_SOURCE_TYPE:
return m_ipUserAgents.empty();
default:
return false;
}
}
double TopBucket::getAvgCount()
{
return m_avgCount;
}
ReputationData TopBucket::getInfo(std::string& ip, std::string& ua, std::string& uaIp)
{
ReputationData output;
output.ipReputation = m_ips.getInfo(ip, m_avgCount);
output.uaReputation = m_userAgents.getInfo(ua, m_avgCount);
output.uaIpReputation = m_ipUserAgents.getInfo(uaIp, m_avgCount);
output.absoluteReputation = (output.ipReputation.reputation + output.uaReputation.reputation +
output.uaIpReputation.reputation) / 3;
m_behaviorAnalyzer->updateAvrageAndVariance(output.absoluteReputation);
output.relativeReputation = m_behaviorAnalyzer->getRelativeReputation(output.absoluteReputation);
return output;
}
Source::Source() : sources()
{
}
Source::~Source()
{
for (auto source : sources) {
delete source.second;
}
sources.clear();
}
void Source::cleanSources()
{
for (auto source = sources.begin(); source != sources.end();) {
if (!source->second->to_remove) {
source->second->to_remove = true;
source++;
}
else {
delete source->second;
sources.erase(source++);
}
}
}
double Source::getSourcesAvg()
{
unsigned int sum = 0;
if (sources.empty())
{
return 0;
}
for (auto source : sources) {
sum += source.second->countLegit;
}
return (double)sum / sources.size();
}
void Source::putAttack(bool missedUrl, double score, std::string& source)
{
if (sources.find(source) == sources.end()) {
sources[source] = new Counters(); // init counters to 0
}
assert(missedUrl != false || score > 0);
if (missedUrl) {
sources[source]->missed_urls++;
}
// Larger value slows down the absolute score reduction during attacks.
const double velocity = 8;
sources[source]->attacksScoreSum += round(score, 5) * velocity;
}
// TODO: rename
void Source::addKeys(std::string& source)
{
if (sources.find(source) == sources.end()) {
sources[source] = new Counters(); // init counters to 0
}
Counters* counters = sources[source];
counters->countLegit++;
}
// assuming count>0 param>0 return value in range (0,100]:
// for count << param -> 100
// for count >> param -> 0
double Source::calcDiff(double count, double param)
{
double res = (double)(int)((((param + 1) * 100)) / (param + count + 1));
return res;
}
Source::Info Source::getInfo(std::string& source, double avgCount)
{
double missed_urls = 0.0, legit_vs_attacks = 0.0, reputation = 0.0, coverage = 0.0;
if (source.find("to_remove") != std::string::npos) {
sources[source]->to_remove = false;
}
if (sources.find(source) == sources.end()) {
sources[source] = new Counters(); // init counters to 0
}
// range (0, 5/6*100]
missed_urls = 100 - calcDiff(5, sources[source]->missed_urls);
coverage = (int)((100 - calcDiff(4, 40)) * 4 / 5 + 60); // = 67.1111111111
// range - [20, 100)
// assuming avg count > 0 -> max(40 - avg, 1) => [1,40)
// count -> 0 & attack -> 0 : legit/attack -> 20
// count -> 0 & attack -> inf : legit/attack -> 20
// count -> inf & attack -> 0 : legit/attack -> 100+
double spcDiff = calcDiff(sources[source]->countLegit + std::max(40 - (int)avgCount, 1) + 20,
sources[source]->attacksScoreSum * 4);
legit_vs_attacks = (double)(100 - (spcDiff)) * 4 / 5 + 20;
coverage = (int)((coverage + missed_urls) / 2);
reputation = (double)(coverage * legit_vs_attacks * missed_urls) / 100 / 100;
Source::Info info = { reputation, coverage, legit_vs_attacks,
{sources[source]->countLegit, sources[source]->attacksScoreSum}};
return info;
}
bool Source::empty()
{
return sources.empty();
}
size_t Source::size() {
return sources.size();
}
BehaviorAnalyzer::BehaviorAnalyzer() :
m_count(INITIAL_COUNT),
m_variance(INITIAL_VARIANCE),
m_reputation_mean(INITIAL_MEAN)
{
}
BehaviorAnalyzer::~BehaviorAnalyzer()
{
for (auto bucket : m_buckets) {
delete bucket.second;
}
m_buckets.clear();
}
ReputationData BehaviorAnalyzer::analyze_behavior(BehaviorAnalysisInputData& data)
{
ReputationData output;
std::string &siteId = data.site_id;
if (m_count % COUNTER_BACKUP_THRESHOLD == 0)
{
// TODO: backup
// calculate average per bucket
for (auto bucket : m_buckets) {
bucket.second->evaluateAvg();
}
// reset
for (auto bucket : m_buckets) {
bucket.second->cleanSources();
}
}
if (m_buckets.find(siteId) == m_buckets.end()){
m_buckets[siteId] = new TopBucket(this);
}
std::string& source = data.source_identifier;
std::string& userAgent = data.user_agent;
std::string userAgentSource = userAgent + " " + source;
if (data.keyword_matches.empty() == false)
{
// Two cases here:
// 1. No probing - always punish reputation
// 2. If there's probing - only punish if too many keyword matches (strong suspipion)
if (data.keyword_matches.size() > 2 ||
std::find(data.keyword_matches.begin(), data.keyword_matches.end(), "probing") ==
data.keyword_matches.end())
{
// Punish reputation conditionally, see TopBucket::putAttack() for the details
m_buckets[siteId]->putAttack(data.short_uri,
data.score * data.fp_mitigation_score / 10,
source,
userAgent,
userAgentSource,
data.location);
}
}
else
{
quickLearn(siteId, source, userAgent, data.uri);
}
output = m_buckets[siteId]->getInfo(source, userAgent, userAgentSource);
return output;
}
bool BehaviorAnalyzer::isSourceEmpty(std::string siteId, SourceType sourceType)
{
return m_buckets[siteId]->isSourceEmpty(sourceType);
}
void BehaviorAnalyzer::clearSources()
{
for (auto bucket : m_buckets) {
bucket.second->cleanSources();
}
}
size_t BehaviorAnalyzer::getCount()
{
return m_count;
}
double BehaviorAnalyzer::getAvgCount(std::string& siteId)
{
if (m_buckets.find(siteId) == m_buckets.end())
{
return -1.0;
}
return m_buckets[siteId]->getAvgCount();
}
double BehaviorAnalyzer::getReputationMean() const
{
return m_reputation_mean;
}
double BehaviorAnalyzer::getVariance() const
{
return m_variance;
}
void BehaviorAnalyzer::quickLearn(std::string& siteId, std::string& source, std::string& userAgent, std::string& uri)
{
if (m_buckets.find(siteId) == m_buckets.end())
{
m_buckets[siteId] = new TopBucket(this);
}
std::string userAgentSource = userAgent + " " + source;
m_buckets[siteId]->addKeys(uri, source, userAgent, userAgentSource);
}
double BehaviorAnalyzer::getRelativeReputation(double reputation)
{
// Larger value slows down the relative score reduction during attacks.
const double viscosity = 0.15;
double score = 0.0;
double mean = (m_reputation_mean + 100) / 2;
double standardDeviation = sqrt(m_variance); // variance is pow2 of standardDeviation
standardDeviation = (standardDeviation / viscosity + 5) / 2;
score = errorProbabilityScore((reputation - mean) / standardDeviation);
return 10 * score;
}
double BehaviorAnalyzer::errorProbabilityScore(double score)
{
double probScore = 0.5 + 0.5 * erf(score / 2);
//round to 3 decimal points
probScore = round(probScore, 3);
return probScore;
}
void BehaviorAnalyzer::updateAvrageAndVariance(double reputation)
{
double prev_mean = m_reputation_mean;
if (reputation > 1.0)
{
m_reputation_mean = (double)(m_reputation_mean * m_count + reputation) / (m_count + 1L);
// variance induction step
m_variance = (m_variance * m_count + pow((reputation - prev_mean), 2)) / (m_count + 1)
- pow((m_reputation_mean - prev_mean), 2);
m_count++;
}
}
Counters::Counters() : countLegit(0), attacksScoreSum(0.0), missed_urls(0), to_remove(false)
{
}
double round(double val, unsigned char precision) {
unsigned int factor = pow(10, precision);
return round(val * factor) / factor;
}
bool compareWithDelta(double rhs, double lhs, double delta) {
return fabs(rhs - lhs) <= delta;
}
bool operator==(const ReputationData& lhs, const ReputationData& rhs)
{
bool res = (lhs.ipReputation == rhs.ipReputation &&
lhs.uaReputation == rhs.uaReputation &&
lhs.uaIpReputation == rhs.uaIpReputation &&
compareWithDelta(lhs.absoluteReputation, rhs.absoluteReputation, 0.0001) &&
compareWithDelta(lhs.relativeReputation, rhs.relativeReputation, 0.0001 ));
if (!res)
{
std::printf("lhs: {absolute rep: %f, reputation: %f} , rhs: {absolute rep: %f, reputation: %f}\n",
lhs.absoluteReputation, lhs.relativeReputation, rhs.absoluteReputation, rhs.relativeReputation);
}
return res;
}
bool operator==(const Source::Info& lhs, const Source::Info& rhs)
{
bool res = compareWithDelta(lhs.coverage, rhs.coverage, 0.0001) &&
compareWithDelta(lhs.legitVsAttacks, rhs.legitVsAttacks, 0.0001) &&
compareWithDelta(lhs.reputation, rhs.reputation, 0.0001) &&
lhs.stats == rhs.stats;
if (!res)
{
std::printf("\tlhs: {coverage: %f, legitVsAttack: %f, reputation: %f}\n",
lhs.coverage, lhs.legitVsAttacks, lhs.reputation);
std::printf("\trhs: {coverage: %f, legitVsAttack: %f, reputation: %f}\n",
rhs.coverage, rhs.legitVsAttacks, rhs.reputation);
}
return res;
}
bool operator==(const Source::Stats& lhs, const Source::Stats& rhs) {
bool res = (compareWithDelta(lhs.attacks, rhs.attacks, 0.0001) &&
lhs.countLegit == rhs.countLegit);
if (!res)
{
std::printf("\t\tlhs: {attacks: %f, count: %u}\n",
lhs.attacks, lhs.countLegit);
std::printf("\t\trhs: {attacks: %f, count: %u}\n",
rhs.attacks, rhs.countLegit);
}
return res;
}

View File

@@ -0,0 +1,179 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __WAF2_BEHAVIOR_H__f1edd27e
#define __WAF2_BEHAVIOR_H__f1edd27e
#include <string>
#include <vector>
#include <map>
#include <set>
#include <string.h>
#include <math.h>
#include <boost/noncopyable.hpp>
#define MAX_NUM_OF_KEYS_IN_COUNTER 100
#define COUNTER_BACKUP_THRESHOLD 200000
#define MAX_RELATIVE_REPUTATION 10.0
class Counters {
public:
Counters();
unsigned int countLegit;
double attacksScoreSum;
long int missed_urls;
bool to_remove;
};
typedef enum _SourceType {
IP_SOURCE_TYPE,
UA_SOURCE_TYPE,
UA_IP_SOURCE_TYPE
}SourceType;
class BehaviorAnalyzer;
class Source : public boost::noncopyable
{
public:
typedef struct _Stats {
unsigned int countLegit;
double attacks;
}Stats;
typedef struct _Info {
double reputation;
double coverage;
double legitVsAttacks;
Source::Stats stats;
}Info;
Source();
~Source();
void cleanSources();
double getSourcesAvg();
size_t size();
void putAttack(bool missedUrl, double score, std::string& source);
void addKeys(std::string& source);
static double calcDiff(double count, double param);
Info getInfo(std::string& source, double avgCount);
// function for unit tests
bool empty();
private:
std::map<std::string, Counters*> sources; // key is either (source_ip) or (useragent+source_ip)
//std::set<std::string> urls; // set of URLs visited by this source
};
#if 0
class Bucket {
public:
size_t add(const std::string& key);
void clean();
bool exist(const std::string& key) const;
size_t get(const std::string& key) const;
size_t size() const { return _data.size(); }
private:
std::map<std::string, size_t> _data;
};
#endif
typedef struct _ReputationData {
Source::Info ipReputation;
Source::Info uaReputation;
Source::Info uaIpReputation;
double relativeReputation; // the absolute reputation relative to the average
double absoluteReputation;
}ReputationData;
double round(double val, unsigned char precision);
bool operator==(const Source::Stats& lhs, const Source::Stats& rhs);
bool operator==(const Source::Info& lhs, const Source::Info& rhs);
bool operator==(const ReputationData& lhs, const ReputationData& rhs);
class TopBucket {
private:
Source m_ips;
Source m_userAgents;
Source m_ipUserAgents;
BehaviorAnalyzer* m_behaviorAnalyzer;
double m_avgCount;
public:
TopBucket(BehaviorAnalyzer* behaviorAnalyzer) : m_behaviorAnalyzer(behaviorAnalyzer), m_avgCount(20) {}
void addKeys(std::string& uri, std::string& ip, std::string& ua, std::string& ua_ip);
void putAttack(std::string& uri,
double score, std::string& ip,
std::string& ua,
std::string& ua_ip,
const std::string& location);
void cleanSources();
void evaluateAvg();
bool isSourceEmpty(SourceType sourceType);
double getAvgCount();
ReputationData getInfo(std::string& ip, std::string& ua, std::string& uaIp);
};
struct BehaviorAnalysisInputData {
std::string site_id;
std::string source_identifier;
std::string user_agent;
std::string short_uri; // data['short_uri'] (see fix_data_keys...)
std::string uri; // data['uri'] (see fix_data_keys...)
std::vector<std::string> keyword_matches;
double score;
double fp_mitigation_score; // calculated outside before analyze_behavior() !!!
std::string location;
};
class BehaviorAnalyzer {
public:
BehaviorAnalyzer();
~BehaviorAnalyzer();
ReputationData analyze_behavior(BehaviorAnalysisInputData& data);
void clearSources();
void quickLearn(std::string& siteId, std::string& sourceIp, std::string& userAgent, std::string& uri);
double getRelativeReputation(double reputation);
void updateAvrageAndVariance(double reputation);
// unit test related functions
bool isSourceEmpty(std::string siteId, SourceType sourceType);
size_t getCount();
double getAvgCount(std::string& siteId);
double getReputationMean() const;
double getVariance() const;
private:
double errorProbabilityScore(double score);
// TODO: move to SMEM
size_t m_count;
double m_variance;
double m_reputation_mean;
std::map<std::string, TopBucket*> m_buckets;
};
#endif // __WAF2_BEHAVIOR_H__f1edd27e

View File

@@ -0,0 +1,108 @@
include_directories(../include)
add_library(waap_clib
Csrf.cc
CsrfPolicy.cc
ContentTypeParser.cc
CidrMatch.cc
DeepParser.cc
KeyStack.cc
ParserBase.cc
ParserBinary.cc
ParserHdrValue.cc
ParserJson.cc
ParserMultipartForm.cc
ParserRaw.cc
ParserUrlEncode.cc
ParserXML.cc
ParserDelimiter.cc
ParserConfluence.cc
ParserHTML.cc
PatternMatcher.cc
PHPSerializedDataParser.cc
WaapScores.cc
WaapKeywords.cc
Waf2Engine.cc
Waf2EngineGetters.cc
WaapScanner.cc
WaapRegexPreconditions.cc
Waf2Regex.cc
WaapAssetState.cc
Signatures.cc
Waf2Util.cc
WaapConfigBase.cc
WaapConfigApi.cc
WaapConfigApplication.cc
BehaviorAnalysis.cc
FpMitigation.cc
D2Main.cc
DeepAnalyzer.cc
Telemetry.cc
WaapOverride.cc
ScoreBuilder.cc
WaapTrigger.cc
WaapDecision.cc
WaapResponseInspectReasons.cc
WaapResponseInjectReasons.cc
WaapResultJson.cc
WaapAssetStatesManager.cc
Serializator.cc
IndicatorsFilterBase.cc
TypeIndicatorsFilter.cc
KeywordIndicatorFilter.cc
WaapOverrideFunctor.cc
WaapValueStatsAnalyzer.cc
TrustedSources.cc
WaapParameters.cc
IndicatorsFiltersManager.cc
ConfidenceFile.cc
ConfidenceCalculator.cc
TrustedSourcesConfidence.cc
RateLimiter.cc
RateLimiting.cc
ErrorLimiting.cc
WaapErrorDisclosurePolicy.cc
WaapOpenRedirect.cc
WaapOpenRedirectPolicy.cc
KeywordTypeValidator.cc
SecurityHeadersPolicy.cc
UserLimitsPolicy.cc
ScannerDetector.cc
TuningDecision.cc
ScanResult.cc
SingleDecision.cc
DecisionFactory.cc
AutonomousSecurityDecision.cc
CsrfDecision.cc
OpenRedirectDecision.cc
ErrorDisclosureDecision.cc
RateLimitingDecision.cc
UserLimitsDecision.cc
ErrorLimitingDecision.cc
WaapConversions.cc
SyncLearningNotification.cc
LogGenWrapper.cc
WaapSampleValue.cc
)
add_definitions("-Wno-unused-function")
add_definitions("-Wno-unused-parameter")
add_definitions("-Wno-deprecated-declarations")
#target_link_libraries(waap_clib pm buffers generic_rulebase ip_utilities connkey report_messaging compression_utils -lz)
# Include static headers
#target_link_libraries(waap_clib)
#target_link_libraries(waap_clib logging event_is metric)
#link_directories(${BOOST_ROOT}/lib)
#link_directories(${ng_module_osrc_zlib_path}/include)
# Link with all the libraries
#target_link_libraries(waap_clib -lresolv)
#target_link_libraries(waap_clib ${PCRE2_LIBRARIES})
#target_link_libraries(waap_clib ${YAJL_LIBRARIES})
#target_link_libraries(waap_clib ${LIBXML2_LIBRARIES})
#target_link_libraries(waap_clib "${ng_module_osrc_openssl_path}/lib/libcrypto.so")
#target_link_libraries(waap_clib "${ng_module_osrc_boost_path}/lib/libboost_context.so")
#target_link_libraries(waap_clib "${ng_module_osrc_boost_path}/lib/libboost_regex.so")

View File

@@ -0,0 +1,159 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "CidrMatch.h"
#include <string.h>
#include <string>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <errno.h>
#include <iostream>
#include <errno.h>
#include "log_generator.h"
#include <stdexcept>
USE_DEBUG_FLAG(D_WAAP);
namespace Waap {
namespace Util {
bool CIDRData::operator==(const CIDRData &other) const {
bool cidrsMatching = isIPV6 ? (memcmp(ipCIDRV6.s6_addr, other.ipCIDRV6.s6_addr, sizeof(ipCIDRV6.s6_addr)) == 0) :
(ipCIDRV4.s_addr == other.ipCIDRV4.s_addr);
return cidrString == other.cidrString &&
cidrsMatching &&
networkBits == other.networkBits &&
isIPV6 == other.isIPV6;
}
bool cidr4_match(const in_addr &addr, const in_addr &net, uint8_t bits) {
if (bits == 0) {
// C99 6.5.7 (3): u32 << 32 is undefined behaviour
return true;
}
return !((addr.s_addr ^ net.s_addr) & htonl(0xFFFFFFFFu << (32 - bits)));
}
bool cidr6_match(const in6_addr &address, const in6_addr &network, uint8_t bits) {
#ifdef __linux__
const uint32_t *a = address.s6_addr32;
const uint32_t *n = network.s6_addr32;
#else
const uint32_t *a = address.__u6_addr.__u6_addr32;
const uint32_t *n = network.__u6_addr.__u6_addr32;
#endif
int bits_whole, bits_incomplete;
bits_whole = bits >> 5; // number of whole u32
bits_incomplete = bits & 0x1F; // number of bits in incomplete u32
if (bits_whole) {
if (memcmp(a, n, bits_whole << 2)) {
return false;
}
}
if (bits_incomplete) {
uint32_t mask = htonl((0xFFFFFFFFu) << (32 - bits_incomplete));
if ((a[bits_whole] ^ n[bits_whole]) & mask) {
return false;
}
}
return true;
}
bool isCIDR(const std::string& strCIDR, CIDRData& cidr)
{
size_t processedBits = 0;
size_t pos = strCIDR.find_last_of('/');
// get ip from targetCidr
std::string strPrefix = pos != std::string::npos ? strCIDR.substr(0, pos) : strCIDR;
// get subnet mask from targetCidr or calculate it based on ipv4 / ipv6
std::string strSuffix = pos != std::string::npos ? strCIDR.substr(pos + 1) :
(strCIDR.find(':') == std::string::npos) ? "32" : "128";
int bits = -1;
try
{
bits = std::stoi(strSuffix, &processedBits);
cidr.networkBits = (uint8_t)bits;
// convert int to uint8_t
}
catch (const std::invalid_argument & e)
{
dbgDebug(D_WAAP) << "Failed to convert CIDR number of bits from string to int (Invalid arguments)."
<< strCIDR;
return false;
}
catch (const std::out_of_range & e)
{
dbgDebug(D_WAAP) << "Failed to convert CIDR number of bits from string to int (out of range)."
<< strCIDR;;
return false;
}
// check if CIDR is valid
if (processedBits != strSuffix.length() || bits > 128 || bits < 0) {
dbgDebug(D_WAAP) << "Failed to convert CIDR number of bits from string to int (out of range)."
<< strCIDR;
return false;
}
memset(&cidr.ipCIDRV4, 0, sizeof(struct in_addr));
memset(&cidr.ipCIDRV6, 0, sizeof(struct in6_addr));
if (inet_pton(AF_INET, strPrefix.c_str(), &cidr.ipCIDRV4) == 1 && bits <= 32) {
cidr.isIPV6 = false;
}
else if (inet_pton(AF_INET6, strPrefix.c_str(), &cidr.ipCIDRV6) == 1 && bits <= 128) {
cidr.isIPV6 = true;
}
else
{
return false;
}
cidr.cidrString = strCIDR;
return true;
}
bool cidrMatch(const std::string& sourceip, const std::string& targetCidr) {
CIDRData cidrData;
// check if target is valid input.
if (!isCIDR(targetCidr, cidrData))
{
return false;
}
return cidrMatch(sourceip, cidrData);
}
bool cidrMatch(const std::string & sourceip, const CIDRData & cidr){
struct in_addr source_inaddr;
struct in6_addr source_inaddr6;
// check from which type the target ip and check if ip belongs to is mask ip
//convert sourceip to ip v4 or v6.
if(!cidr.isIPV6 && inet_pton(AF_INET, sourceip.c_str(), &source_inaddr) == 1) {
return cidr4_match(source_inaddr, cidr.ipCIDRV4, cidr.networkBits);
}
else if (cidr.isIPV6 && inet_pton(AF_INET6, sourceip.c_str(), &source_inaddr6) == 1) {
return cidr6_match(source_inaddr6, cidr.ipCIDRV6, cidr.networkBits);
}
dbgDebug(D_WAAP) << "Source IP address does not match any of the CIDR definitions.";
return false;
}
}
}

View File

@@ -0,0 +1,39 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <errno.h>
namespace Waap {
namespace Util {
struct CIDRData {
std::string cidrString;
struct in_addr ipCIDRV4;
struct in6_addr ipCIDRV6;
uint8_t networkBits;
bool isIPV6;
bool operator==(const CIDRData &other) const;
};
bool isCIDR(const std::string& strCIDR, CIDRData& cidr);
bool cidrMatch(const std::string& sourceip, const CIDRData& cidr);
bool cidrMatch(const std::string &sourceip, const std::string &target);
}
}

View File

@@ -0,0 +1,738 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ConfidenceCalculator.h"
#include <cereal/types/unordered_set.hpp>
#include "waap.h"
#include "ConfidenceFile.h"
#include "i_agent_details.h"
#include "i_messaging.h"
#include "i_mainloop.h"
#include <math.h>
USE_DEBUG_FLAG(D_WAAP);
#define SYNC_WAIT_TIME std::chrono::seconds(300) // 5 minutes in seconds
#define SCORE_THRESHOLD 100.0
#define BUSY_WAIT_TIME std::chrono::microseconds(100000) // 0.1 seconds
#define WAIT_LIMIT 10
#define BENIGN_PARAM_FACTOR 2
double logn(double x, double n)
{
return std::log(x) / std::log(n);
}
ConfidenceCalculator::ConfidenceCalculator(size_t minSources,
size_t minIntervals,
std::chrono::minutes intervalDuration,
double ratioThreshold,
const Val &nullObj,
const std::string &backupPath,
const std::string &remotePath,
const std::string &assetId,
TuningDecision* tuning,
I_IgnoreSources* ignoreSrc) :
SerializeToLocalAndRemoteSyncBase(intervalDuration,
SYNC_WAIT_TIME,
backupPath,
(remotePath == "") ? remotePath : remotePath + "/Confidence",
assetId,
"ConfidenceCalculator"),
m_params({ minSources, minIntervals, intervalDuration, ratioThreshold, true }),
m_null_obj(nullObj),
m_time_window_logger(),
m_confident_sets(),
m_confidence_level(),
m_last_indicators_update(0),
m_ignoreSources(ignoreSrc),
m_tuning(tuning)
{
restore();
}
ConfidenceCalculator::~ConfidenceCalculator()
{
m_time_window_logger.clear();
m_confident_sets.clear();
}
void ConfidenceCalculator::hardReset()
{
m_time_window_logger.clear();
m_confidence_level.clear();
m_confident_sets.clear();
std::remove(m_filePath.c_str());
}
void ConfidenceCalculator::reset()
{
m_time_window_logger.clear();
if (!m_params.learnPermanently)
{
hardReset();
}
}
bool ConfidenceCalculator::reset(ConfidenceCalculatorParams& params)
{
if (params == m_params)
{
return false;
}
dbgInfo(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -"
" resetting the ConfidenceCalculatorParams: " << params;
m_params = params;
reset();
setInterval(m_params.intervalDuration);
return true;
}
class WindowLogPost : public RestGetFile
{
public:
WindowLogPost(ConfidenceCalculator::KeyValSourcesLogger& _window_logger)
: window_logger(_window_logger)
{
}
private:
C2S_PARAM(ConfidenceCalculator::KeyValSourcesLogger, window_logger)
};
class WindowLogGet : public RestGetFile
{
public:
WindowLogGet() : window_logger()
{
}
Maybe<ConfidenceCalculator::KeyValSourcesLogger> getWindowLogger()
{
return window_logger.get();
}
private:
S2C_PARAM(ConfidenceCalculator::KeyValSourcesLogger, window_logger)
};
bool ConfidenceCalculator::postData()
{
m_time_window_logger_backup = m_time_window_logger;
m_time_window_logger.clear();
std::string url = getPostDataUrl();
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Sending the data to: " << url;
WindowLogPost currentWindow(m_time_window_logger_backup);
return sendNoReplyObjectWithRetry(currentWindow,
I_Messaging::Method::PUT,
url);
}
void ConfidenceCalculator::pullData(const std::vector<std::string>& files)
{
if (getIntervalsCount() == m_params.minIntervals)
{
mergeProcessedFromRemote();
}
std::string url = getPostDataUrl();
std::string sentFile = url.erase(0, url.find_first_of('/') + 1);
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "pulling files, skipping: " << sentFile;
for (auto file : files)
{
if (file == sentFile)
{
continue;
}
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Pulling the file: " << file;
WindowLogGet getWindow;
sendObjectWithRetry(getWindow,
I_Messaging::Method::GET,
getUri() + "/" + file);
KeyValSourcesLogger remoteLogger = getWindow.getWindowLogger().unpack();
for (auto& log : remoteLogger)
{
std::string key = log.first;
for (auto& entry : log.second)
{
std::string value = entry.first;
for (auto& source : entry.second)
{
m_time_window_logger_backup[key][value].insert(source);
}
}
}
}
}
void ConfidenceCalculator::processData()
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " - processing the confidence data";
if (m_time_window_logger_backup.empty())
{
m_time_window_logger_backup = m_time_window_logger;
m_time_window_logger.clear();
}
calculateInterval();
}
void ConfidenceCalculator::updateState(const std::vector<std::string>& files)
{
pullProcessedData(files);
}
void ConfidenceCalculator::pullProcessedData(const std::vector<std::string>& files)
{
dbgTrace(D_WAAP) << "Fetching the confidence set object";
bool is_first_pull = true;
for (auto file : files)
{
ConfidenceFileDecryptor getConfFile;
bool res = sendObjectWithRetry(getConfFile,
I_Messaging::Method::GET,
getUri() + "/" + file);
if (res && getConfFile.getConfidenceSet().ok())
{
mergeFromRemote(getConfFile.getConfidenceSet().unpack(), is_first_pull);
is_first_pull = false;
}
if (res && getConfFile.getConfidenceLevels().ok())
{
m_confidence_level = getConfFile.getConfidenceLevels().unpackMove();
}
}
}
void ConfidenceCalculator::postProcessedData()
{
if (getIntervalsCount() < m_params.minIntervals)
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Not sending the processed data - not enough windows";
return;
}
std::string postUrl = getUri() + "/" + m_remotePath + "/processed/confidence.data";
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Posting the confidence set object to: " << postUrl;
ConfidenceFileEncryptor postConfFile(m_confident_sets, m_confidence_level);
sendNoReplyObjectWithRetry(postConfFile,
I_Messaging::Method::PUT,
postUrl);
}
void ConfidenceCalculator::serialize(std::ostream& stream)
{
cereal::JSONOutputArchive archive(stream);
archive(
cereal::make_nvp("version", 3),
cereal::make_nvp("params", m_params),
cereal::make_nvp("last_indicators_update", m_last_indicators_update),
cereal::make_nvp("confidence_levels", m_confidence_level),
cereal::make_nvp("confident_sets", m_confident_sets)
);
}
void ConfidenceCalculator::deserialize(std::istream& stream)
{
size_t version;
cereal::JSONInputArchive archive(stream);
try
{
archive(cereal::make_nvp("version", version));
}
catch (std::runtime_error & e) {
archive.setNextName(nullptr);
version = 0;
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" failed to load the file version: " << e.what();
}
switch (version)
{
case 3:
loadVer3(archive);
break;
case 2:
loadVer2(archive);
break;
case 1:
loadVer1(archive);
break;
case 0:
loadVer0(archive);
break;
default:
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" failed to deserialize, unknown version: " << version;
break;
}
}
void ConfidenceCalculator::loadVer0(cereal::JSONInputArchive& archive)
{
if (!tryParseVersionBasedOnNames(
archive,
std::string("params"),
std::string("last_indicators_update"),
std::string("windows_summary_list"),
std::string("confident_sets")
))
{
tryParseVersionBasedOnNames(
archive,
std::string("m_params"),
std::string("m_lastIndicatorsUpdate"),
std::string("m_windows_summary_list"),
std::string("m_confident_sets")
);
}
}
void ConfidenceCalculator::convertWindowSummaryToConfidenceLevel(const WindowsConfidentValuesList& windows)
{
for (const auto& windowKey : windows)
{
for (const auto& window : windowKey.second)
{
for (const auto& value : window)
{
m_confidence_level[windowKey.first][value] += std::ceil(SCORE_THRESHOLD / m_params.minIntervals);
}
}
}
}
void ConfidenceCalculator::loadVer2(cereal::JSONInputArchive& archive)
{
ConfidenceCalculatorParams params;
ConfidenceSet confidenceSets;
ConfidenceLevels confidenceLevels;
archive(
cereal::make_nvp("params", params),
cereal::make_nvp("last_indicators_update", m_last_indicators_update),
cereal::make_nvp("confidence_levels", confidenceLevels),
cereal::make_nvp("confident_sets", confidenceSets)
);
reset(params);
for (auto& confidentSet : confidenceSets)
{
m_confident_sets[normalize_param(confidentSet.first)] = confidentSet.second;
}
for (auto& confidenceLevel : confidenceLevels)
{
std::string normParam = normalize_param(confidenceLevel.first);
if (m_confidence_level.find(normParam) == m_confidence_level.end())
{
m_confidence_level[normParam] = confidenceLevel.second;
}
else
{
for (auto& valueLevelItr : confidenceLevel.second)
{
if (m_confidence_level[normParam].find(valueLevelItr.first) == m_confidence_level[normParam].end())
{
m_confidence_level[normParam][valueLevelItr.first] = valueLevelItr.second;
}
else
{
double maxScore = std::max(m_confidence_level[normParam][valueLevelItr.first],
valueLevelItr.second);
m_confidence_level[normParam][valueLevelItr.first] = maxScore;
}
}
}
}
}
void ConfidenceCalculator::loadVer3(cereal::JSONInputArchive& archive)
{
ConfidenceCalculatorParams params;
archive(
cereal::make_nvp("params", params),
cereal::make_nvp("last_indicators_update", m_last_indicators_update),
cereal::make_nvp("confidence_levels", m_confidence_level),
cereal::make_nvp("confident_sets", m_confident_sets)
);
reset(params);
}
void ConfidenceCalculator::loadVer1(cereal::JSONInputArchive& archive)
{
WindowsConfidentValuesList windows_summary_list;
ConfidenceCalculatorParams params;
archive(
cereal::make_nvp("params", params),
cereal::make_nvp("last_indicators_update", m_last_indicators_update),
cereal::make_nvp("windows_summary_list", windows_summary_list),
cereal::make_nvp("confident_sets", m_confident_sets)
);
reset(params);
convertWindowSummaryToConfidenceLevel(windows_summary_list);
}
bool ConfidenceCalculator::tryParseVersionBasedOnNames(
cereal::JSONInputArchive& archive,
const std::string &params_field_name,
const std::string &indicators_update_field_name,
const std::string &windows_summary_field_name,
const std::string &confident_sets_field_name)
{
bool result = true;
try
{
ConfidenceCalculatorParams temp_params;
archive(cereal::make_nvp(params_field_name, temp_params));
reset(temp_params);
m_params = temp_params;
}
catch (std::runtime_error & e) {
archive.setNextName(nullptr);
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" failed to load configuration of WAAP parameters from the learned data file: "
<< e.what();
result = false;
}
try
{
size_t temp_last_indicator_update = 0;
archive(cereal::make_nvp(indicators_update_field_name, temp_last_indicator_update));
m_last_indicators_update = temp_last_indicator_update;
}
catch (std::runtime_error & e) {
archive.setNextName(nullptr);
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" failed to load the update date for indicators from the learned data file: "
<< e.what();
result = false;
}
try
{
WindowsConfidentValuesList temp_windows_summary_list;
archive(cereal::make_nvp(windows_summary_field_name, temp_windows_summary_list));
convertWindowSummaryToConfidenceLevel(temp_windows_summary_list);
}
catch (std::runtime_error & e) {
archive.setNextName(nullptr);
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" failed to load windows summary list from the learned data file: " << e.what();
result = false;
}
try
{
std::unordered_map<Key, ValuesSet> temp_confident_sets;
archive(cereal::make_nvp(confident_sets_field_name, temp_confident_sets));
size_t current_time = std::chrono::duration_cast<std::chrono::seconds>(
Singleton::Consume<I_TimeGet>::by<WaapComponent>()->getWalltime()).count();
for (auto setItr : temp_confident_sets)
{
m_confident_sets[setItr.first] = std::pair<ValuesSet, size_t>(setItr.second, current_time);
}
}
catch (std::runtime_error & e) {
archive.setNextName(nullptr);
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" failed to load confident sets from the learned data file: " << e.what();
result = false;
}
return result;
}
void ConfidenceCalculator::mergeConfidenceSets(
ConfidenceSet& confidence_set,
const ConfidenceSet& confidence_set_to_merge,
size_t& last_indicators_update
)
{
for (auto& set : confidence_set_to_merge)
{
size_t num_of_values = confidence_set[set.first].first.size();
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Merging the set for the key: " << set.first <<
". Number of present values: " << num_of_values;
for (auto& value : set.second.first)
{
confidence_set[normalize_param(set.first)].first.insert(value);
}
confidence_set[set.first].second = std::max<size_t>(confidence_set[set.first].second, set.second.second);
last_indicators_update = std::max<size_t>(last_indicators_update, confidence_set[set.first].second);
}
};
void ConfidenceCalculator::mergeFromRemote(const ConfidenceSet& remote_confidence_set, bool is_first_pull)
{
if (is_first_pull) {
m_confident_sets.clear();
}
mergeConfidenceSets(m_confident_sets, remote_confidence_set, m_last_indicators_update);
}
bool ConfidenceCalculator::is_confident(const Key &key, const Val &value) const
{
auto confidentSetItr = m_confident_sets.find(key);
if (confidentSetItr == m_confident_sets.end())
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" failed to find the key(" << key << ")";
return false;
}
const ValuesSet& confidentValues = confidentSetItr->second.first;
if (confidentValues.find(value) != confidentValues.end())
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" confident that " << value << " should be filtered for " << key;
return true;
}
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" failed to find the value(" << value << ")";
return false;
}
void ConfidenceCalculator::calcConfidentValues()
{
std::unordered_map<Key, ValueSetWithTime> confidenceSetCopy = m_confident_sets;
if (!m_params.learnPermanently)
{
m_confident_sets.clear();
}
for (auto& confidenceLevels : m_confidence_level)
{
Key key = confidenceLevels.first;
for (auto& valConfidenceLevel : confidenceLevels.second)
{
Val value = valConfidenceLevel.first;
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "key: " << key << ", value: " << value
<< ", confidence level: " << valConfidenceLevel.second;
if (valConfidenceLevel.second >= SCORE_THRESHOLD)
{
size_t confidenceValuesSize = m_confident_sets[key].first.size();
m_confident_sets[key].first.insert(value);
if (m_confident_sets[key].first.size() > confidenceValuesSize)
{
std::chrono::seconds current_time = std::chrono::duration_cast<std::chrono::seconds>(
Singleton::Consume<I_TimeGet>::by<WaapComponent>()->getWalltime());
m_confident_sets[key].second = current_time.count();
m_last_indicators_update = std::chrono::duration_cast<std::chrono::minutes>(current_time).count();
}
}
}
}
}
ConfidenceCalculator::ValuesSet ConfidenceCalculator::getConfidenceValues(const Key &key) const
{
auto confidentSetItr = m_confident_sets.find(key);
if (confidentSetItr == m_confident_sets.end())
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << ";" <<
" failed to find the key(" << key << ")";
return ValuesSet();
}
return confidentSetItr->second.first;
}
size_t ConfidenceCalculator::getLastConfidenceUpdate()
{
return m_last_indicators_update;
}
void ConfidenceCalculator::log(const Key &key, const Val &value, const std::string &source)
{
m_time_window_logger[key][value].insert(source);
if (value != m_null_obj)
{
logSourceHit(key, source);
}
}
void ConfidenceCalculator::logSourceHit(const Key &key, const std::string &source)
{
log(key, m_null_obj, source);
}
void ConfidenceCalculator::mergeSourcesCounter(const Key& key, const SourcesCounters& counters)
{
if (key.rfind("url#", 0) == 0 && m_owner == "TypeIndicatorFilter")
{
return;
}
SourcesCounters& currentCounters = m_time_window_logger[key];
for (auto& counter : counters)
{
SourcesSet& srcSet = currentCounters[counter.first];
srcSet.insert(counter.second.begin(), counter.second.end());
}
}
void ConfidenceCalculator::removeBadSources(SourcesSet& sources, const std::vector<std::string>* badSources)
{
if (badSources == nullptr)
{
return;
}
for (auto badSource : *badSources)
{
sources.erase(badSource);
}
}
size_t ConfidenceCalculator::sumSourcesWeight(const SourcesSet& sources)
{
size_t sourcesWeights = sources.size();
if (m_tuning == nullptr)
{
return sourcesWeights;
}
for (const auto& source : sources)
{
if (m_tuning->getDecision(source, SOURCE) == BENIGN)
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "increasing source weight";
sourcesWeights += m_params.minSources - 1;
}
}
return sourcesWeights;
}
void ConfidenceCalculator::calculateInterval()
{
std::vector<std::string>* sourcesToIgnore = nullptr;
if (m_ignoreSources != nullptr)
{
int waitItr = 0;
while (!m_ignoreSources->ready() && waitItr < WAIT_LIMIT)
{
Singleton::Consume<I_MainLoop>::by<WaapComponent>()->yield(BUSY_WAIT_TIME);
waitItr++;
}
if (waitItr == WAIT_LIMIT && !m_ignoreSources->ready())
{
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner <<
" - wait for ignore sources ready timeout";
}
sourcesToIgnore = m_ignoreSources->getSourcesToIgnore();
}
for (auto sourcesCtrItr : m_time_window_logger_backup)
{
SourcesCounters& srcCtrs = sourcesCtrItr.second;
Key key = sourcesCtrItr.first;
ValuesSet summary;
double factor = 1.0;
if (m_tuning != nullptr)
{
std::string param_name = key.substr(key.find("#") + 1); // not always accurate but good enough
if (m_tuning->getDecision(param_name, PARAM_NAME) == BENIGN)
{
factor = BENIGN_PARAM_FACTOR;
}
}
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" calculate window summary for the parameter: " << key;
// get all unique sources from the null object counter
SourcesSet& sourcesUnion = srcCtrs[m_null_obj];
removeBadSources(sourcesUnion, sourcesToIgnore);
size_t numOfSources = sumSourcesWeight(sourcesUnion);
m_windows_counter[key]++;
if (numOfSources < m_params.minSources)
{
// not enough sources to learn from
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" not enough sources to learn for " << key << " - needed: " <<
m_params.minSources << ", have: " << sourcesUnion.size();
mergeSourcesCounter(key, srcCtrs);
continue;
}
for (auto srcSets : srcCtrs)
{
// log the ratio of unique sources from all sources for each value
SourcesSet& currentSourcesSet = srcSets.second;
Val value = srcSets.first;
if (value == m_null_obj)
{
continue;
}
removeBadSources(currentSourcesSet, sourcesToIgnore);
size_t currentSourcesCount = sumSourcesWeight(currentSourcesSet);
auto& confidenceLevel = m_confidence_level[key][value];
if (currentSourcesCount == 0)
{
confidenceLevel -= std::ceil(SCORE_THRESHOLD / m_params.minIntervals);
continue;
}
double ratio = ((double)currentSourcesCount / numOfSources);
double diff = std::ceil(SCORE_THRESHOLD / m_params.minIntervals) * (ratio / m_params.ratioThreshold) *
logn(currentSourcesCount, m_params.minSources) * factor;
confidenceLevel += diff;
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " - key: " << key <<
" value: " << value << "confidence level: " << confidenceLevel;
}
m_windows_counter[key] = 0;
}
for (auto& keyMap : m_confidence_level)
{
for (auto& valMap : keyMap.second)
{
if (m_time_window_logger_backup.find(keyMap.first) != m_time_window_logger_backup.end() &&
m_time_window_logger_backup[keyMap.first].find(valMap.first) ==
m_time_window_logger_backup[keyMap.first].end())
{
// reduce confidence when value do not appear
valMap.second *= m_params.ratioThreshold;
}
}
}
m_time_window_logger_backup.clear();
calcConfidentValues();
}
void ConfidenceCalculator::setOwner(const std::string& owner)
{
m_owner = owner + "/ConfidenceCalculator";
}
bool ConfidenceCalculatorParams::operator==(const ConfidenceCalculatorParams& other)
{
return (minSources == other.minSources &&
minIntervals == other.minIntervals &&
intervalDuration == other.intervalDuration &&
ratioThreshold == other.ratioThreshold &&
learnPermanently == other.learnPermanently);
}
std::ostream& operator<<(std::ostream& os, const ConfidenceCalculatorParams& ccp)
{
os << "min sources: " << ccp.minSources <<
" min intervals: " << ccp.minIntervals <<
" interval duration(minutes): " << ccp.intervalDuration.count() <<
" ratio threshold: " << ccp.ratioThreshold <<
" should keep indicators permanently: " << ccp.learnPermanently;
return os;
}

View File

@@ -0,0 +1,155 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <chrono>
#include <unordered_map>
#include <unordered_set>
#include <list>
#include <algorithm>
#include "i_serialize.h"
#include <cereal/archives/json.hpp>
#include <cereal/types/unordered_map.hpp>
#include <cereal/types/list.hpp>
#include <cereal/types/utility.hpp>
#include "debug.h"
#include "Waf2Util.h"
#include <ostream>
#include "i_ignoreSources.h"
#include "TuningDecisions.h"
USE_DEBUG_FLAG(D_WAAP_CONFIDENCE_CALCULATOR);
class WaapComponent;
struct ConfidenceCalculatorParams
{
size_t minSources;
size_t minIntervals;
std::chrono::minutes intervalDuration;
double ratioThreshold;
bool learnPermanently;
template <class Archive>
void serialize(Archive& ar)
{
size_t duration = intervalDuration.count();
ar(cereal::make_nvp("minSources", minSources),
cereal::make_nvp("minIntervals", minIntervals),
cereal::make_nvp("intervalDuration", duration),
cereal::make_nvp("ratioThreshold", ratioThreshold),
cereal::make_nvp("learnPermanently", learnPermanently));
intervalDuration = std::chrono::minutes(duration);
}
bool operator==(const ConfidenceCalculatorParams& other);
friend std::ostream& operator<<(std::ostream& os, const ConfidenceCalculatorParams& ccp);
};
class ConfidenceCalculator : public SerializeToLocalAndRemoteSyncBase
{
public:
typedef std::string Key;
typedef std::string Val;
template<typename K, typename V>
using UMap = std::unordered_map<K, V>;
// key -> val -> sources set
typedef std::unordered_set<std::string> SourcesSet;
typedef UMap<Val, SourcesSet> SourcesCounters;
typedef UMap<Key, SourcesCounters> KeyValSourcesLogger;
// key -> list of values sets
typedef std::set<Val> ValuesSet;
typedef std::pair<ValuesSet, size_t> ValueSetWithTime;
typedef std::list<ValuesSet> ValuesList;
typedef UMap<Key, ValuesList> WindowsConfidentValuesList;
typedef UMap<Key, UMap<Val, double>> ConfidenceLevels;
typedef UMap<Key, int> WindowsCounter;
typedef UMap<Key, ValueSetWithTime> ConfidenceSet;
ConfidenceCalculator(size_t minSources,
size_t minIntervals,
std::chrono::minutes intervalDuration,
double ratioThreshold,
const Val &nullObj,
const std::string& backupPath,
const std::string& remotePath,
const std::string& assetId,
TuningDecision* tuning = nullptr,
I_IgnoreSources* ignoreSrc = nullptr);
~ConfidenceCalculator();
void setOwner(const std::string& owner);
void hardReset();
void reset();
bool reset(ConfidenceCalculatorParams& params);
virtual bool postData();
virtual void pullData(const std::vector<std::string>& files);
virtual void processData();
virtual void postProcessedData();
virtual void pullProcessedData(const std::vector<std::string>& files);
virtual void updateState(const std::vector<std::string>& files);
virtual void serialize(std::ostream& stream);
virtual void deserialize(std::istream& stream);
void mergeFromRemote(const ConfidenceSet& remote_confidence_set, bool is_first_pull);
bool is_confident(const Key &key, const Val &value) const;
void calcConfidentValues();
ValuesSet getConfidenceValues(const Key &key) const;
size_t getLastConfidenceUpdate();
void log(const Key &key, const Val &value, const std::string &source);
void logSourceHit(const Key &key, const std::string &source);
void calculateInterval();
static void mergeConfidenceSets(ConfidenceSet& confidence_set,
const ConfidenceSet& confidence_set_to_merge,
size_t& last_indicators_update);
private:
void loadVer0(cereal::JSONInputArchive& archive);
void loadVer1(cereal::JSONInputArchive& archive);
void loadVer2(cereal::JSONInputArchive& archive);
void loadVer3(cereal::JSONInputArchive& archive);
bool tryParseVersionBasedOnNames(
cereal::JSONInputArchive& archive,
const std::string &params_field_name,
const std::string &indicators_update_field_name,
const std::string &windows_summary_field_name,
const std::string &confident_sets_field_name);
void convertWindowSummaryToConfidenceLevel(const WindowsConfidentValuesList& windows);
std::string getParamName(const Key& key);
size_t sumSourcesWeight(const SourcesSet& sources);
void mergeSourcesCounter(const Key& key, const SourcesCounters& counters);
void removeBadSources(SourcesSet& sources, const std::vector<std::string>* badSources);
ConfidenceCalculatorParams m_params;
Val m_null_obj;
KeyValSourcesLogger m_time_window_logger;
KeyValSourcesLogger m_time_window_logger_backup;
ConfidenceSet m_confident_sets;
ConfidenceLevels m_confidence_level;
WindowsCounter m_windows_counter;
size_t m_last_indicators_update;
I_IgnoreSources* m_ignoreSources;
TuningDecision* m_tuning;
};

View File

@@ -0,0 +1,37 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ConfidenceFile.h"
ConfidenceFileDecryptor::ConfidenceFileDecryptor()
{
}
Maybe<ConfidenceCalculator::ConfidenceSet> ConfidenceFileDecryptor::getConfidenceSet() const
{
if (!confidence_set.get().empty()) return confidence_set.get();
return genError("failed to get file");
}
Maybe<ConfidenceCalculator::ConfidenceLevels> ConfidenceFileDecryptor::getConfidenceLevels() const
{
if (!confidence_levels.get().empty()) return confidence_levels.get();
return genError("failed to get confidence levels");
}
ConfidenceFileEncryptor::ConfidenceFileEncryptor(const ConfidenceCalculator::ConfidenceSet& _confidence_set,
const ConfidenceCalculator::ConfidenceLevels& _confidence_levels) :
confidence_set(_confidence_set), confidence_levels(_confidence_levels)
{
}

View File

@@ -0,0 +1,41 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "ConfidenceCalculator.h"
class ConfidenceFileDecryptor : public RestGetFile
{
public:
ConfidenceFileDecryptor();
Maybe<ConfidenceCalculator::ConfidenceSet>
getConfidenceSet() const;
Maybe<ConfidenceCalculator::ConfidenceLevels>
getConfidenceLevels() const;
private:
S2C_PARAM(ConfidenceCalculator::ConfidenceSet, confidence_set);
S2C_OPTIONAL_PARAM(ConfidenceCalculator::ConfidenceLevels, confidence_levels);
};
class ConfidenceFileEncryptor : public RestGetFile
{
public:
ConfidenceFileEncryptor(const ConfidenceCalculator::ConfidenceSet& _confidence_set,
const ConfidenceCalculator::ConfidenceLevels& _confidence_levels);
private:
C2S_PARAM(ConfidenceCalculator::ConfidenceSet, confidence_set);
C2S_PARAM(ConfidenceCalculator::ConfidenceLevels, confidence_levels);
};

View File

@@ -0,0 +1,75 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ContentTypeParser.h"
#include "Waf2Util.h"
USE_DEBUG_FLAG(D_WAAP_PARSER_CONTENT_TYPE);
const std::string ContentTypeParser::m_parserName = "contentTypeParser";
int ContentTypeParser::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int flags)
{
dbgTrace(D_WAAP_PARSER_CONTENT_TYPE) << "ContentTypeParser::onKv(): " << std::string(v, v_len);
assert((flags & BUFFERED_RECEIVER_F_BOTH) == BUFFERED_RECEIVER_F_BOTH);
if (ctParserState == CTP_STATE_CONTENT_TYPE) {
contentTypeDetected = std::string(k, k_len);
dbgTrace(D_WAAP_PARSER_CONTENT_TYPE) << "ContentTypeParser::onKv(): contentTypeDetected: '" <<
contentTypeDetected << "'";
ctParserState = CTP_STATE_CONTENT_TYPE_PARAMS;
} else if (ctParserState == CTP_STATE_CONTENT_TYPE_PARAMS) {
if (my_strincmp(k, "boundary", k_len)) {
boundaryFound = std::string(v, v_len);
}
} else {
// This should never occur
m_error = true;
dbgWarning(D_WAAP_PARSER_CONTENT_TYPE) << "ContentTypeParser::onKv(): '" << std::string(v, v_len) <<
"': BUG: Unknown content type found: " << ctParserState;
}
return 0; // ok
}
ContentTypeParser::ContentTypeParser()
:ctParserState(CTP_STATE_CONTENT_TYPE), m_rcvr(*this), m_hvp(m_rcvr), m_error(false)
{
}
size_t ContentTypeParser::push(const char *data, size_t data_len)
{
dbgTrace(D_WAAP_PARSER_CONTENT_TYPE) << "ContentTypeParser::push(): processing content type";
// Initialize state
ctParserState = CTP_STATE_CONTENT_TYPE;
contentTypeDetected = "";
boundaryFound = "";
// Execute parsing
return m_hvp.push(data, data_len);
}
void ContentTypeParser::finish()
{
return m_hvp.finish();
}
const std::string &
ContentTypeParser::name() const
{
return m_parserName;
}
bool ContentTypeParser::error() const
{
return m_error;
}

View File

@@ -0,0 +1,49 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __CONTENT_TYPE_PARSER_H__aa67ad9a
#define __CONTENT_TYPE_PARSER_H__aa67ad9a
#include "ParserBase.h"
#include "ParserHdrValue.h"
#include "debug.h"
#include <string>
class ContentTypeParser : public ParserBase, private IParserReceiver {
enum CtParserState {
CTP_STATE_CONTENT_TYPE,
CTP_STATE_CONTENT_TYPE_PARAMS
} ctParserState;
private:
virtual int onKv(const char *k, size_t k_len, const char *v, size_t v_len, int flags);
public:
ContentTypeParser();
virtual size_t push(const char *data, size_t data_len);
virtual void finish();
virtual const std::string &name() const;
bool error() const;
virtual size_t depth() { return 1; }
// After call to execute(), parsing results can be picked up from these variables
std::string contentTypeDetected;
std::string boundaryFound;
private:
BufferedReceiver m_rcvr;
ParserHdrValue m_hvp;
bool m_error;
static const std::string m_parserName;
};
#endif // __CONTENT_TYPE_PARSER__aa67ad9a

View File

@@ -0,0 +1,111 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "Csrf.h"
#include <algorithm>
#include <boost/uuid/uuid.hpp> // uuid class
#include <boost/uuid/uuid_generators.hpp> // uuid generators
#include <boost/uuid/uuid_io.hpp>
namespace Waap {
namespace CSRF {
State::State()
:
csrf_token(),
csrf_header_token(),
csrf_form_token()
{
}
bool
State::decide
(const std::string &method, WaapDecision &decision, const std::shared_ptr<Waap::Csrf::Policy>& csrfPolicy) const
{
dbgTrace(D_WAAP) << "Waap::CSRF::State::decide(): Start.";
std::string low_method = method;
std::transform(low_method.begin(), low_method.end(), low_method.begin(), ::tolower);
if (low_method.compare("get") == 0)
{
dbgTrace(D_WAAP) << "Waap::CSRF::State::decide(): Should not block. Method : " << low_method;
return false;
}
auto csrfDecision = decision.getDecision(CSRF_DECISION);
if (csrf_token.empty())
{
dbgTrace(D_WAAP) << "Waap::CSRF::State::decide(): missing token.";
csrfDecision->setLog(true);
if(!csrfPolicy->enforce) {
return false;
}
csrfDecision->setBlock(true);
return true;
}
dbgTrace(D_WAAP) << "Waap::CSRF::State::decide(): CSRF compare: csrf_token: " << csrf_token
<< " csrf_header_token: " << csrf_header_token << " csrf_form_token: " << csrf_form_token;
bool result = (csrf_token == csrf_header_token ||
csrf_token == csrf_form_token);
dbgTrace(D_WAAP) << "Waap::CSRF::State::decide(): CSRF result : " << result;
if(!result)
{
dbgTrace(D_WAAP) << "Waap::CSRF::State::decide(): invalid token.";
csrfDecision->setLog(true);
if(!csrfPolicy->enforce) {
return false;
}
csrfDecision->setBlock(true);
return true;
}
return false;
}
void State::injectCookieHeader(std::string& injectStr) const
{
// creating CSRF token
boost::uuids::random_generator csrfTokenRand;
boost::uuids::uuid csrfToken = csrfTokenRand();
injectStr = "x-chkp-csrf-token=" + boost::uuids::to_string(csrfToken) + "; Path=/; SameSite=Lax";
dbgTrace(D_WAAP) << "Waap::CSRF::State::injectCookieHeader(): CSRF Token was created:" <<
boost::uuids::to_string(csrfToken);
}
void
State::set_CsrfToken(const char* v, size_t v_len)
{
csrf_token = std::string(v, v_len);
dbgTrace(D_WAAP) << "Waap::CSRF::State::set_CsrfToken(): set csrf_token : " << csrf_token;
}
void
State::set_CsrfHeaderToken(const char* v, size_t v_len)
{
csrf_header_token = std::string(v, v_len);
dbgTrace(D_WAAP) << "Waap::CSRF::State::set_CsrfHeaderToken(): set csrf_token : " << csrf_header_token;
}
void
State::set_CsrfFormToken(const char* v, size_t v_len)
{
csrf_form_token = std::string(v, v_len);
dbgTrace(D_WAAP) << "Waap::CSRF::State::set_CsrfFormToken(): set csrf_form_token : " << csrf_form_token;
}
}
}

View File

@@ -0,0 +1,47 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "debug.h"
#include "WaapDecision.h"
#include "i_waapConfig.h"
#include <iostream>
#include <string>
USE_DEBUG_FLAG(D_WAAP);
namespace Waap {
namespace CSRF {
class State
{
public:
State();
bool decide(
const std::string &method,
WaapDecision &decision,
const std::shared_ptr<Waap::Csrf::Policy>& csrfPolicy) const;
void injectCookieHeader(std::string& injectStr) const;
void set_CsrfToken(const char* v, size_t v_len);
void set_CsrfHeaderToken(const char* v, size_t v_len);
void set_CsrfFormToken(const char* v, size_t v_len);
private:
std::string csrf_token;
std::string csrf_header_token;
std::string csrf_form_token;
};
}
}

View File

@@ -0,0 +1,22 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "CsrfDecision.h"
CsrfDecision::CsrfDecision(DecisionType type): SingleDecision(type)
{}
std::string CsrfDecision::getTypeStr() const
{
return "CSRF Protection";
}

View File

@@ -0,0 +1,27 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __CSRF_DECISION_H__
#define __CSRF_DECISION_H__
#include "SingleDecision.h"
#include "DecisionType.h"
#include <string>
class CsrfDecision: public SingleDecision
{
public:
explicit CsrfDecision(DecisionType type);
std::string getTypeStr() const override;
};
#endif

View File

@@ -0,0 +1,35 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "CsrfPolicy.h"
#include "Waf2Util.h"
namespace Waap {
namespace Csrf {
Policy::Policy()
:
enable(false),
enforce(false)
{
}
bool
Policy::operator==(const Policy &other) const
{
return enable == other.enable &&
enforce == other.enforce;
}
}
}

View File

@@ -0,0 +1,52 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cereal/types/string.hpp>
#include <string>
#include <memory>
#include <boost/algorithm/string/case_conv.hpp>
#include "debug.h"
namespace Waap {
namespace Csrf {
struct Policy {
Policy();
template <typename _A>
Policy(_A &ar)
:
enable(false),
enforce(false)
{
std::string level;
ar(cereal::make_nvp("csrfProtection", level));
level = boost::algorithm::to_lower_copy(level);
if (level == "detect") {
enable = true;
}
else if (level == "prevent") {
enable = true;
enforce = true;
}
}
bool operator==(const Policy &other) const;
bool enable;
bool enforce;
};
}
}

View File

@@ -0,0 +1,73 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "D2Main.h"
#include "FpMitigation.h"
#include "BehaviorAnalysis.h"
#include "WaapDefines.h"
D2Main::D2Main(const std::string& assetId) :
m_assetId(assetId),
m_fpMitigation(std::make_unique<FpMitigationScore>(BACKUP_DIRECTORY_PATH + assetId + std::string("/3.data")))
{
}
D2Main::~D2Main()
{
m_fpMitigation.reset();
}
D2OutputData D2Main::analyzeData(const D2InputData& inputData)
{
D2OutputData d2Output;
BehaviorAnalysisInputData behaviorInput;
PolicyCounterType fpType = UNKNOWN_TYPE;
std::string userAgentSource = inputData.userAgent + inputData.sourceIdentifier;
if (!inputData.keywordMatches.empty())
{
d2Output.fpMitigationScore = m_fpMitigation->calculateFpMitigationScore(inputData.uri, inputData.param);
}
behaviorInput.fp_mitigation_score = d2Output.fpMitigationScore;
behaviorInput.keyword_matches = inputData.keywordMatches;
behaviorInput.score = inputData.score;
behaviorInput.site_id = inputData.siteId;
behaviorInput.short_uri = inputData.uri;
behaviorInput.uri = inputData.uri;
behaviorInput.source_identifier = inputData.sourceIdentifier;
behaviorInput.user_agent = inputData.userAgent;
behaviorInput.location = inputData.location;
ReputationData reputationInfo = m_BehaviorAnalyzer.analyze_behavior(behaviorInput);
d2Output.relativeReputation = reputationInfo.relativeReputation;
d2Output.absoluteReputation = reputationInfo.absoluteReputation;
d2Output.reputationMean = m_BehaviorAnalyzer.getReputationMean();
d2Output.variance = m_BehaviorAnalyzer.getVariance();
if (!inputData.keywordMatches.empty())
{
fpType = m_fpMitigation->IdentifyFalseTruePositive(reputationInfo.relativeReputation, inputData.uri,
inputData.param, userAgentSource);
m_fpMitigation->learnFalsePositive(inputData.keywordMatches, fpType, inputData.uri, inputData.param);
d2Output.finalScore = inputData.score * (10 - reputationInfo.relativeReputation * 0.8) /
10 * d2Output.fpMitigationScore / 10;
d2Output.finalScore = std::min(d2Output.finalScore * 2, 10.0);
}
d2Output.fpClassification = fpType;
return d2Output;
}

View File

@@ -0,0 +1,64 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include <functional>
#include "FpMitigation.h"
#include "BehaviorAnalysis.h"
struct D2InputData {
std::string siteId;
std::string sourceIdentifier;
std::string userAgent;
std::string uri;
std::string param;
std::vector<std::string> keywordMatches;
double score;
std::string location;
};
struct D2OutputData {
double finalScore;
double absoluteReputation;
double relativeReputation;
double fpMitigationScore;
PolicyCounterType fpClassification;
double reputationMean;
double variance;
D2OutputData() : finalScore(0.0),
absoluteReputation(0.0),
relativeReputation(0.0),
fpMitigationScore(0.0),
fpClassification(UNKNOWN_TYPE),
reputationMean(0.0),
variance(0.0)
{
}
};
class D2Main {
public:
D2Main(const std::string& assetId);
virtual ~D2Main();
virtual D2OutputData analyzeData(const D2InputData& inputData);
private:
std::string m_assetId;
std::unique_ptr<FpMitigationScore> m_fpMitigation;
BehaviorAnalyzer m_BehaviorAnalyzer;
};
bool operator==(const D2OutputData& lhs, const D2OutputData& rhs);

View File

@@ -0,0 +1,28 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __DATA_TYPES_H__
#define __DATA_TYPES_H__
enum DataType {
EMPTY,
NUMBER,
BOOLEAN,
STRING,
MAP,
ARRAY,
COMPLEX,
ALWAYSVALID
};
#endif // __DATA_TYPES_H__

View File

@@ -0,0 +1,147 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "DecisionFactory.h"
#include "debug.h"
#include "AutonomousSecurityDecision.h"
#include "CsrfDecision.h"
#include "OpenRedirectDecision.h"
#include "ErrorDisclosureDecision.h"
#include "ErrorLimitingDecision.h"
#include "RateLimitingDecision.h"
#include "UserLimitsDecision.h"
USE_DEBUG_FLAG(D_WAAP);
DecisionFactory::DecisionFactory()
{
for (size_t i = 0; i < getDecisions().size(); i++)
{
initDecision(static_cast<DecisionType>(i));
}
}
void DecisionFactory::initDecision(DecisionType type)
{
switch (type)
{
case AUTONOMOUS_SECURITY_DECISION:
{
initAutonomousSecurityDecision();
break;
}
case CSRF_DECISION:
{
initCsrfDecision();
break;
}
case OPEN_REDIRECT_DECISION:
{
initOpenRedirectDecision();
break;
}
case ERROR_DISCLOSURE_DECISION:
{
initErrorDisclosureDecision();
break;
}
case ERROR_LIMITING_DECISION:
{
initErrorLimitingDecision();
break;
}
case RATE_LIMITING_DECISION:
{
initRateLimitingDecision();
break;
}
case USER_LIMITS_DECISION:
{
initUserLimitsDecision();
break;
}
default:
static_assert(true, "Illegal DecisionType ENUM value");
dbgError(D_WAAP) << "Illegal DecisionType ENUM value " << type;
break;
}
}
void DecisionFactory::initAutonomousSecurityDecision()
{
DecisionType type = DecisionType::AUTONOMOUS_SECURITY_DECISION;
if (!m_decisions[type])
{
m_decisions[type] = std::make_shared<AutonomousSecurityDecision>(type);
}
}
void DecisionFactory::initCsrfDecision()
{
DecisionType type = DecisionType::CSRF_DECISION;
if (!m_decisions[type])
{
m_decisions[type] = std::make_shared<CsrfDecision>(type);
}
}
void DecisionFactory::initOpenRedirectDecision()
{
DecisionType type = DecisionType::OPEN_REDIRECT_DECISION;
if (!m_decisions[type])
{
m_decisions[type] = std::make_shared<OpenRedirectDecision>(type);
}
}
void DecisionFactory::initErrorDisclosureDecision()
{
DecisionType type = DecisionType::ERROR_DISCLOSURE_DECISION;
if (!m_decisions[type])
{
m_decisions[type] = std::make_shared<ErrorDisclosureDecision>(type);
}
}
void DecisionFactory::initErrorLimitingDecision()
{
DecisionType type = DecisionType::ERROR_LIMITING_DECISION;
if (!m_decisions[type])
{
m_decisions[type] = std::make_shared<ErrorLimitingDecision>(type);
}
}
void DecisionFactory::initRateLimitingDecision()
{
DecisionType type = DecisionType::RATE_LIMITING_DECISION;
if (!m_decisions[type])
{
m_decisions[type] = std::make_shared<RateLimitingDecision>(type);
}
}
void DecisionFactory::initUserLimitsDecision()
{
DecisionType type = DecisionType::USER_LIMITS_DECISION;
if (!m_decisions[type])
{
m_decisions[type] = std::make_shared<UserLimitsDecision>(type);
}
}
std::shared_ptr<SingleDecision>
DecisionFactory::getDecision(DecisionType type) const
{
return (type < NO_WAAP_DECISION) ? m_decisions[type] : nullptr;
}

View File

@@ -0,0 +1,45 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __DECISION_FACTORY_H__
#define __DECISION_FACTORY_H__
#include "DecisionType.h"
#include "SingleDecision.h"
#include <array>
#include <memory>
typedef std::array<std::shared_ptr<SingleDecision>, NO_WAAP_DECISION> DecisionsArr;
class DecisionFactory
{
public:
DecisionFactory();
std::shared_ptr<SingleDecision> getDecision(DecisionType type) const;
const DecisionsArr& getDecisions() const
{
return m_decisions;
}
private:
void initDecision(DecisionType type);
void initAutonomousSecurityDecision();
void initCsrfDecision();
void initOpenRedirectDecision();
void initErrorDisclosureDecision();
void initErrorLimitingDecision();
void initRateLimitingDecision();
void initUserLimitsDecision();
DecisionsArr m_decisions;
};
#endif

View File

@@ -0,0 +1,31 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __DECISION_TYPE_H__
#define __DECISION_TYPE_H__
enum DecisionType
{
// This order determines the priority of the decisions sent to management
// Priority goes from top to bottom
AUTONOMOUS_SECURITY_DECISION,
CSRF_DECISION,
OPEN_REDIRECT_DECISION,
ERROR_DISCLOSURE_DECISION,
ERROR_LIMITING_DECISION,
USER_LIMITS_DECISION,
RATE_LIMITING_DECISION,
// Must be kept last
NO_WAAP_DECISION
};
#endif

View File

@@ -0,0 +1,160 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// #define WAF2_LOGGING_ENABLE
#include "DeepAnalyzer.h"
#include "Waf2Engine.h"
#include "WaapConversions.h"
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP);
DeepAnalyzer::DeepAnalyzer() : pimpl(std::make_unique<DeepAnalyzer::Impl>())
{
}
DeepAnalyzer::~DeepAnalyzer()
{
}
void DeepAnalyzer::reset()
{
pimpl->reset();
}
AnalysisResult DeepAnalyzer::analyzeData(IWaf2Transaction* pWaf2Trans, const IWaapConfig* pSitePolicy)
{
return pimpl->analyzeData(pWaf2Trans, pSitePolicy);
}
bool DeepAnalyzer::Impl::isException(const IWaapConfig* pWaapConfig, const std::string& sourceIp)
{
bool isException = false;
if (pWaapConfig != NULL)
{
isException |= false;
}
return isException;
}
void DeepAnalyzer::Impl::setD2Main(std::string assetId, D2Main* d2main)
{
std::unordered_map<std::string, std::unique_ptr<D2Main>>::iterator it;
it = m_d2MainMap.find(assetId);
if (it == m_d2MainMap.end())
{
m_d2MainMap.insert(std::make_pair(assetId, std::unique_ptr<D2Main>(d2main)));
}
else
{
m_d2MainMap[assetId].reset(d2main);
}
}
DeepAnalyzer::Impl::Impl() : m_d2MainMap()
{
}
DeepAnalyzer::Impl::~Impl()
{
}
void DeepAnalyzer::Impl::reset()
{
auto itr = m_d2MainMap.begin();
while (itr != m_d2MainMap.end())
{
itr->second.reset();
itr = m_d2MainMap.erase(itr);
}
}
bool DeepAnalyzer::Impl::isMapEmpty()
{
return m_d2MainMap.empty();
}
AnalysisResult DeepAnalyzer::Impl::analyzeData(const D2InputData& data, const IWaapConfig* pSitePolicy)
{
AnalysisResult analysis;
const std::unique_ptr<D2Main>& d2Main = getD2Main(data.siteId);
analysis.d2Analysis = d2Main->analyzeData(data);
ThreatLevel threat = Waap::Conversions::convertFinalScoreToThreatLevel(analysis.d2Analysis.finalScore);
bool shouldBlock = Waap::Conversions::shouldDoWafBlocking(pSitePolicy, threat);
bool shouldExcept = isException(pSitePolicy, data.sourceIdentifier);
dbgDebug(D_WAAP) << "stage2 analysis: final score: " << analysis.d2Analysis.finalScore << ", reputation: " <<
analysis.d2Analysis.relativeReputation << ", false positive mitigation score: " <<
analysis.d2Analysis.fpMitigationScore << ", threat level: " << threat << "\nWAF2 decision to block: " <<
(shouldBlock ? "block" : "pass") << ", is the request in exception list: " <<
(shouldExcept ? "true" : "false");
analysis.threatLevel = threat;
analysis.shouldBlock = shouldBlock && !shouldExcept;
return analysis;
}
AnalysisResult DeepAnalyzer::Impl::analyzeData(IWaf2Transaction* pWaf2Trans, const IWaapConfig* pSitePolicy)
{
D2InputData input;
if (pWaf2Trans == NULL || pSitePolicy == NULL)
{
dbgWarning(D_WAAP) << "invalid argument pWaf2Trans(0x" << std::hex << pWaf2Trans << "), pSitePolicy(0x" <<
std::hex << pSitePolicy << ")";
return AnalysisResult();
}
input.sourceIdentifier = pWaf2Trans->getSourceIdentifier();
input.userAgent = pWaf2Trans->getUserAgent();
input.param = pWaf2Trans->getParam();
input.location = pWaf2Trans->getLocation();
input.siteId = pSitePolicy->get_AssetId();
input.keywordMatches = pWaf2Trans->getKeywordMatches();
input.uri = pWaf2Trans->getUriStr();
input.score = pWaf2Trans->getScore();
return analyzeData(input, pSitePolicy);
}
const std::unique_ptr<D2Main>& DeepAnalyzer::Impl::getD2Main(const std::string& assetId)
{
std::unordered_map<std::string, std::unique_ptr<D2Main>>::iterator it;
std::string mapKey = assetId;
if (Singleton::exists<I_InstanceAwareness>())
{
I_InstanceAwareness* instanceAwareness = Singleton::Consume<I_InstanceAwareness>::by<WaapComponent>();
Maybe<std::string> uniqueId = instanceAwareness->getUniqueID();
if (uniqueId.ok())
{
mapKey += "/" + uniqueId.unpack();
}
}
it = m_d2MainMap.find(mapKey);
if (it == m_d2MainMap.end())
{
m_d2MainMap.insert(std::make_pair(mapKey, std::unique_ptr<D2Main>(new D2Main(mapKey))));
}
const std::unique_ptr<D2Main>& result = m_d2MainMap[mapKey];
return result;
};

View File

@@ -0,0 +1,71 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "D2Main.h"
#include "i_waapConfig.h"
#include "WaapEnums.h"
#include "i_deepAnalyzer.h"
#include <memory>
#include <unordered_map>
struct D1AnalysisInput {
std::string siteId;
std::string sourceIp;
std::string userAgent;
std::string uri;
std::string shortUri;
std::string param;
std::vector<std::string> keywordMatches;
double score;
};
struct AnalysisResult {
D2OutputData d2Analysis;
ThreatLevel threatLevel;
bool shouldBlock;
};
class DeepAnalyzer : Singleton::Provide<I_DeepAnalyzer> {
public:
DeepAnalyzer();
virtual ~DeepAnalyzer();
virtual AnalysisResult analyzeData(IWaf2Transaction* waf2Trans, const IWaapConfig* pSitePolicy);
void reset();
class Impl;
protected:
std::unique_ptr<Impl> pimpl;
};
class DeepAnalyzer::Impl : Singleton::Provide<I_DeepAnalyzer>::From<DeepAnalyzer>
{
public:
Impl();
virtual ~Impl();
void reset();
bool isMapEmpty();
AnalysisResult analyzeData(const D2InputData& data, const IWaapConfig* pSitePolicy);
virtual AnalysisResult analyzeData(IWaf2Transaction* waf2Trans, const IWaapConfig* pSitePolicy);
static bool isException(const IWaapConfig* pSitePolicy, const std::string& sourceIp);
// API for testing
void setD2Main(std::string assetId, D2Main* d2main);
protected:
const std::unique_ptr<D2Main>& getD2Main(const std::string& assetId);
std::unordered_map<std::string, std::unique_ptr<D2Main> > m_d2MainMap;
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,140 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_PARAMETER_DEEP_H__549cc3ee
#define __PARSER_PARAMETER_DEEP_H__549cc3ee
#include "ParserBase.h"
#include "KeyStack.h"
#include "WaapAssetState.h"
#include "Waf2Regex.h"
#include "maybe_res.h"
#include <deque>
// Deep (recursively) parses/dissects parameters based on input stream
class DeepParser : public IParserReceiver
{
public:
DeepParser(std::shared_ptr<WaapAssetState> pWaapAssetState, IParserReceiver &receiver,
IWaf2Transaction* pTransaction);
virtual ~DeepParser();
void setWaapAssetState(std::shared_ptr<WaapAssetState> pWaapAssetState);
// This callback receives input key/value pairs, dissects, decodes and deep-scans these, recursively
// finally, it calls onDetected() on each detected parameter.
virtual int onKv(const char *k, size_t k_len, const char *v, size_t v_len, int flags);
void clear();
void apiProcessKey(const char *v, size_t v_len);
size_t depth() const;
void setGlobalMaxObjectDepth(size_t depth) { m_globalMaxObjectDepth = depth; }
size_t getGlobalMaxObjectDepth() const { return m_globalMaxObjectDepth; }
bool isGlobalMaxObjectDepthReached() const;
size_t getLocalMaxObjectDepth() const { return m_localMaxObjectDepth; }
void setMultipartBoundary(const std::string &boundary);
const std::string &getMultipartBoundary() const;
bool isBinaryData() const;
bool isWBXmlData() const;
Maybe<std::string> getSplitType() const;
std::vector<std::pair<std::string, std::string> > kv_pairs;
// Represents information stored per-keyword
struct KeywordInfo
{
std::string type;
std::string name;
std::string val;
KeywordInfo() {}
KeywordInfo(
const std::string &type,
const std::string &name,
const char *v,
size_t v_len) :
type(type),
name(name),
val(std::string(v, v_len))
{
}
size_t getLength() const
{
return val.size();
}
const std::string &getName() const
{
return name;
}
const std::string &getType() const
{
return type;
}
// Return the value itself
const std::string &getValue() const
{
return val;
}
};
// KeywordInfo maintained for each keyword name
std::vector<KeywordInfo> m_keywordInfo;
KeyStack m_key;
private:
class Ref
{
public:
Ref(int &ref):m_ref(ref) { m_ref++; }
~Ref() { m_ref--; }
private:
int &m_ref;
};
std::shared_ptr<WaapAssetState> m_pWaapAssetState;
IWaf2Transaction* m_pTransaction;
IParserReceiver &m_receiver;
size_t m_depth;
int m_splitRefs; // incremented when entering recursion due to "split" action,
// decremented afterwards. If >0, apiProcessKey should not be called.
// Split a value by given regexp. Return true if split, false otherwise.
// note: This function calls onKv(), and the call can be recursive!
// TODO:: maybe convert this splitter to Parser-derived class?!
bool splitByRegex(const std::string &val, const Regex &r, const char *keyPrefix);
void createInternalParser(std::string& cur_val,
const ValueStatsAnalyzer &valueStats,
bool isBodyPayload,
bool isRefererPayload,
bool isRefererParamPayload,
bool isUrlPayload,
bool isUrlParamPayload);
int pushValueToTopParser(std::string& cur_val, int flags, bool base64ParamFound);
int parseBuffer(ValueStatsAnalyzer& valueStats, const std::string &cur_val, bool base64ParamFound,
bool shouldUpdateKeyStack);
bool shouldEnforceDepthLimit(const std::shared_ptr<ParserBase>& parser) const;
void setLocalMaxObjectDepth(size_t depth) { m_localMaxObjectDepth = depth; }
void setGlobalMaxObjectDepthReached() { m_globalMaxObjectDepthReached = true; }
bool m_deepParserFlag;
std::stack<std::tuple<size_t, size_t, std::string>> m_splitTypesStack; // depth, splitIndex, splitType
std::deque<std::shared_ptr<ParserBase>> m_parsersDeque;
std::string m_multipart_boundary;
size_t m_globalMaxObjectDepth;
size_t m_localMaxObjectDepth;
bool m_globalMaxObjectDepthReached;
bool m_is_wbxml;
};
#endif // __PARSER_PARAMETER_DEEP_H__549cc3ee

View File

@@ -0,0 +1,22 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ErrorDisclosureDecision.h"
ErrorDisclosureDecision::ErrorDisclosureDecision(DecisionType type) : SingleDecision(type)
{}
std::string ErrorDisclosureDecision::getTypeStr() const
{
return "Error Disclosure";
}

View File

@@ -0,0 +1,28 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __ERROR_DISCLOSURE_DECISION_H__
#define __ERROR_DISCLOSURE_DECISION_H__
#include "SingleDecision.h"
#include "DecisionType.h"
#include <string>
class ErrorDisclosureDecision: public SingleDecision
{
public:
explicit ErrorDisclosureDecision(DecisionType type);
std::string getTypeStr() const override;
};
#endif

View File

@@ -0,0 +1,59 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "WaapAssetState.h"
#include "waap.h"
#include <string>
#include <chrono>
#include <memory>
#include <cereal/types/vector.hpp>
#include <cereal/types/unordered_map.hpp>
#include <cereal/archives/json.hpp>
#include "ErrorLimiting.h"
namespace Waap
{
namespace ErrorLimiting
{
bool
ErrorLimiter::getErrorLimitingEnforcementStatus(){
return m_errorLimiting.enable;
}
bool enforce(
const std::string& sourceIdentifier,
const std::string& uriStr,
const std::shared_ptr<WaapAssetState>& pWaapAssetState,
bool& log)
{
dbgTrace(D_WAAP) << "ErrorLimiting::enforce:: response code: 404 :: error Limiting.";
// Get current clock time
I_TimeGet* timer = Singleton::Consume<I_TimeGet>::by<WaapComponent>();
// The error limiting state tracks error limiting information for all sources
std::shared_ptr<Waap::RateLimiting::State> errorLimitingState = pWaapAssetState->getErrorLimitingState();
std::chrono::seconds now = std::chrono::duration_cast<std::chrono::seconds>(timer->getMonotonicTime());
if (errorLimitingState && (errorLimitingState->execute(sourceIdentifier, uriStr, now, log) == false)) {
// block request due to error limiting
return true;
}
return false;
}
}
}

View File

@@ -0,0 +1,95 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "WaapAssetState.h"
#include "waap.h"
#include <string>
#include <chrono>
#include <memory>
#include <cereal/types/vector.hpp>
#include <cereal/types/unordered_map.hpp>
#include <cereal/archives/json.hpp>
namespace Waap
{
namespace ErrorLimiting
{
struct ErrorLimiter
{
struct Policy
{
template <typename _A>
void serialize(_A &ar)
{
ar(
cereal::make_nvp("interval", interval),
cereal::make_nvp("events", events),
cereal::make_nvp("type", type)
);
if(type == "quarantine")
{
ar(cereal::make_nvp("blockingTime", blockingTime));
}
}
unsigned interval = 0;
unsigned events = 0;
std::string type;
int blockingTime = 0;
};
class ErrorLimitingEnforcement
{
public:
template <typename _A>
ErrorLimitingEnforcement(_A &ar)
:
enable(false)
{
std::string level;
ar(cereal::make_nvp("errorLimitingEnforcement", level));
level = boost::algorithm::to_lower_copy(level);
if (level == "prevent") {
enable = true;
}
}
bool operator==(const ErrorLimitingEnforcement &other) const;
bool enable;
};
Policy m_errorLimiterPolicy;
ErrorLimitingEnforcement m_errorLimiting;
bool getErrorLimitingEnforcementStatus();
template <typename _A>
ErrorLimiter(_A& ar) :
m_errorLimiting(ar)
{
ar(cereal::make_nvp("errorLimiter", m_errorLimiterPolicy));
};
};
bool enforce(
const std::string& sourceIdentifier,
const std::string& uriStr,
const std::shared_ptr<WaapAssetState>& pWaapAssetState,
bool& log);
}
}

View File

@@ -0,0 +1,22 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ErrorLimitingDecision.h"
ErrorLimitingDecision::ErrorLimitingDecision(DecisionType type): SingleDecision(type)
{}
std::string ErrorLimitingDecision::getTypeStr() const
{
return "Error Limiting";
}

View File

@@ -0,0 +1,27 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __ERROR_LIMITING_DECISION_H__
#define __ERROR_LIMITING_DECISION_H__
#include "DecisionType.h"
#include "SingleDecision.h"
#include <string>
class ErrorLimitingDecision: public SingleDecision
{
public:
explicit ErrorLimitingDecision(DecisionType type);
std::string getTypeStr() const override;
};
#endif

View File

@@ -0,0 +1,226 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "FpMitigation.h"
#include <memory>
#include <algorithm>
#include <string.h>
#define DEFAULT_SCORE 10.0
#define TRUE_POSITIVE_REPUTATION_THRESHOLD 1.5
#define FALSE_POSITIVE_REPUTATION_THRESHOLD 5
USE_DEBUG_FLAG(D_WAAP);
using namespace std::chrono;
FpMitigationScore::FpMitigationScore(const std::string& backupFilePath) :
SerializeToFilePeriodically(duration_cast<seconds>(minutes(10)), backupFilePath),
m_policyDataUrl(),
m_policyDataParam(),
m_history(),
m_counter(0)
{
dbgTrace(D_WAAP) << "False positive mitigation constructor";
restore();
}
FpMitigationScore::~FpMitigationScore()
{
reset();
}
void FpMitigationScore::reset() {
m_policyDataParam.clear();
m_policyDataUrl.clear();
m_history.clear();
}
void FpMitigationScore::serialize(std::ostream& stream) {
cereal::JSONOutputArchive archive(stream);
archive(cereal::make_nvp("version", 1),
cereal::make_nvp("policyDataUrl", m_policyDataUrl),
cereal::make_nvp("policyDataParam", m_policyDataParam));
}
void FpMitigationScore::deserialize(std::istream& stream) {
cereal::JSONInputArchive archive(stream);
size_t version = 0;
try
{
archive(cereal::make_nvp("version", version));
}
catch (std::runtime_error & e) {
archive.setNextName(nullptr);
version = 0;
dbgDebug(D_WAAP) << "Can't load file version: " << e.what();
}
switch (version)
{
case 0:
archive(cereal::make_nvp("m_policyDataUrl", m_policyDataUrl),
cereal::make_nvp("m_policyDataParam", m_policyDataParam));
break;
case 1:
archive(cereal::make_nvp("policyDataUrl", m_policyDataUrl),
cereal::make_nvp("policyDataParam", m_policyDataParam));
break;
default:
dbgWarning(D_WAAP) << "unknown file format version: " << version;
break;
}
}
double FpMitigationScore::calculateFpMitigationScore(const std::string& shortUri,
const std::string& canonisedParam)
{
double urlScore = DEFAULT_SCORE, paramScore = DEFAULT_SCORE;
if (m_policyDataUrl.find(shortUri) != m_policyDataUrl.end())
{
urlScore = m_policyDataUrl[shortUri]->getScore();
}
if (m_policyDataParam.find(canonisedParam) != m_policyDataParam.end())
{
paramScore = m_policyDataParam[canonisedParam]->getScore();
}
return ((int)(paramScore * 2) / 3 + 3.3) * ((int)(urlScore * 2) / 3 + 3.3) / 10;
}
template<typename T>
bool hasElement(std::vector<T> vec, T& elem) {
return (std::find(vec.begin(), vec.end(), elem) != vec.end());
}
void FpMitigationScore::learnFalsePositive(
const std::vector<std::string>& keywordMatches,
PolicyCounterType rep,
const std::string& shortUri,
const std::string& canonisedParam)
{
static std::string probing = "probing";
if (keywordMatches.size() > 3 && hasElement(keywordMatches, probing))
{
return;
}
if (rep != UNKNOWN_TYPE)
{
if (m_policyDataUrl.find(shortUri) == m_policyDataUrl.end())
{
m_policyDataUrl[shortUri] = std::make_shared<PolicyDataCounter>();
}
if (m_policyDataParam.find(canonisedParam) == m_policyDataParam.end())
{
m_policyDataParam[canonisedParam] = std::make_shared<PolicyDataCounter>();
}
incrementCounter(shortUri, canonisedParam, rep);
m_counter++;
if (m_counter % FP_SCORE_CALCULATION_INTERVALS == 0)
{
dbgTrace(D_WAAP) << "evaluating fp mitigation scores";
evaluatePolicyDataCounterScore();
}
}
}
PolicyCounterType FpMitigationScore::IdentifyFalseTruePositive(double relativeReputation,
const std::string& shortUri, const std::string& canonisedParam, const std::string& userAgentIp)
{
std::string uriParamCat = shortUri + canonisedParam;
if (relativeReputation < TRUE_POSITIVE_REPUTATION_THRESHOLD && m_history.find(uriParamCat) == m_history.end())
{
m_history.insert(uriParamCat);
return TRUE_POSITIVE;
}
if (relativeReputation > FALSE_POSITIVE_REPUTATION_THRESHOLD && m_history.find(userAgentIp) == m_history.end())
{
m_history.insert(userAgentIp);
return FALSE_POSITIVE;
}
return UNKNOWN_TYPE;
}
void FpMitigationScore::incrementCounter(const std::string& shortUri,
const std::string& canonisedParam,
PolicyCounterType counterType)
{
// It is assumed that m_policyDataUrl contains shortUrl and
// m_policyDataParam contains canonisedParam. See caller.
std::shared_ptr<PolicyDataCounter> urlCounter = m_policyDataUrl[shortUri];
std::shared_ptr<PolicyDataCounter> paramCounter = m_policyDataParam[canonisedParam];
urlCounter->incrementCounter(counterType);
paramCounter->incrementCounter(counterType);
}
void FpMitigationScore::evaluatePolicyDataCounterScore()
{
for (auto urlPolicy : m_policyDataUrl) {
urlPolicy.second->evaluateScore();
}
for (auto paramPolicy : m_policyDataParam) {
paramPolicy.second->evaluateScore();
}
}
PolicyDataCounter::PolicyDataCounter() : falsePositive(0), truePositive(0), score(10.0)
{
}
double PolicyDataCounter::getScore()
{
return score;
}
void PolicyDataCounter::incrementCounter(PolicyCounterType counterType)
{
switch (counterType)
{
case UNKNOWN_TYPE:
// add assert
break;
case FALSE_POSITIVE:
case HTML_CONTENT:
falsePositive++;
break;
case TRUE_POSITIVE:
case SPAM:
truePositive++;
break;
default:
break;
}
}
void PolicyDataCounter::evaluateScore()
{
size_t tp = truePositive + 50 + 1, fp = falsePositive;
score = (double)(10.0 * tp) / (10.0 * fp + tp);
}

View File

@@ -0,0 +1,90 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include <map>
#include <string>
#include <unordered_set>
#include <boost/noncopyable.hpp>
#include <cereal/types/map.hpp>
#include <cereal/archives/json.hpp>
#include <cereal/types/memory.hpp>
#include "i_serialize.h"
#define FP_SCORE_CALCULATION_INTERVALS 20
enum PolicyCounterType {
UNKNOWN_TYPE = 0,
FALSE_POSITIVE,
HTML_CONTENT,
TRUE_POSITIVE,
SPAM
};
class PolicyDataCounter {
public:
PolicyDataCounter();
double getScore();
void incrementCounter(PolicyCounterType counterType);
void evaluateScore();
bool operator==(PolicyDataCounter& other);
bool operator!=(PolicyDataCounter& other) { return !(*this == other); }
template <class Archive>
void serialize(Archive& ar) {
ar(cereal::make_nvp("falsePositive", falsePositive),
cereal::make_nvp("truePositive", truePositive),
cereal::make_nvp("score", score));
}
private:
size_t falsePositive;
size_t truePositive;
double score;
};
class FpMitigationScore : public boost::noncopyable, public SerializeToFilePeriodically {
public:
FpMitigationScore(const std::string& backupFilePath);
~FpMitigationScore();
double calculateFpMitigationScore(const std::string& shortUri, const std::string& canonisedParam);
void learnFalsePositive(const std::vector<std::string>& keywordMatches, PolicyCounterType rep,
const std::string& shortUri, const std::string& canonisedParam);
PolicyCounterType IdentifyFalseTruePositive(double relativeReputation, const std::string& shortUri,
const std::string& canonisedParam, const std::string& userAgentIp);
void reset();
virtual void serialize(std::ostream& stream);
virtual void deserialize(std::istream& stream);
typedef std::map<std::string, std::shared_ptr<PolicyDataCounter>> PolicyDataMap;
protected:
void incrementCounter(const std::string& shortUri, const std::string& canonisedParam,
PolicyCounterType counterType);
void evaluatePolicyDataCounterScore();
// TODO: move to SMEM
PolicyDataMap m_policyDataUrl;
PolicyDataMap m_policyDataParam;
std::unordered_set<std::string> m_history;
size_t m_counter;
};

View File

@@ -0,0 +1,193 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "i_indicatorsFilter.h"
#include "IndicatorsFilterBase.h"
#include "Waf2Engine.h"
IndicatorFilterBase::IndicatorFilterBase(const std::string& confidence_path,
const std::string& trusted_path,
const std::string& remotePath,
const std::string& assetId,
size_t min_sources,
size_t min_intervals,
std::chrono::minutes interval_duration,
double ratio_threshold,
const std::string& null_obj,
TuningDecision* tuning,
I_IgnoreSources* ignoreSources) :
m_confidence_calc(min_sources,
min_intervals,
interval_duration,
ratio_threshold,
null_obj,
confidence_path,
remotePath,
assetId,
tuning,
ignoreSources),
m_trusted_confidence_calc(trusted_path, remotePath, assetId),
m_policy(nullptr),
m_tuning(tuning)
{
}
void IndicatorFilterBase::filterKeywords(
const std::string &key,
Waap::Keywords::KeywordsSet& keywords,
std::vector<std::string>& filteredKeywords)
{
for (auto keyword = keywords.begin(); keyword != keywords.end(); )
{
if (shouldFilterKeyword(key, *keyword))
{
filteredKeywords.push_back(*keyword);
keyword = keywords.erase(keyword);
}
else
{
keyword++;
}
}
}
bool IndicatorFilterBase::setTrustedSrcParameter(
std::shared_ptr<Waap::TrustedSources::TrustedSourcesParameter> policy)
{
bool isChanged = false;
if (m_policy != nullptr && *policy != *m_policy)
{
isChanged = true;
m_trusted_confidence_calc.reset();
}
m_policy = policy;
return isChanged;
}
void IndicatorFilterBase::reset()
{
m_confidence_calc.hardReset();
m_trusted_confidence_calc.reset();
}
bool IndicatorFilterBase::isTrustedSourceOfType(const std::string& source,
Waap::TrustedSources::TrustedSourceType srcType)
{
if (m_policy == nullptr)
{
dbgTrace(D_WAAP) << "missing policy";
return false;
}
std::string trusted_src(source);
if (srcType == Waap::TrustedSources::TrustedSourceType::X_FORWARDED_FOR)
{
auto env = Singleton::Consume<I_Environment>::by<WaapComponent>();
auto proxy_ip = env->get<std::string>(HttpTransactionData::proxy_ip_ctx);
if (proxy_ip.ok())
{
trusted_src = proxy_ip.unpack();
} else{
trusted_src = "";
}
}
else if (srcType == Waap::TrustedSources::TrustedSourceType::COOKIE_OAUTH2_PROXY)
{
trusted_src = Waap::Util::extractKeyValueFromCookie(source, "_oauth2_proxy");
}
else if (srcType == Waap::TrustedSources::TrustedSourceType::SM_USER)
{
trusted_src = source;
}
return m_policy->isSourceTrusted(trusted_src, srcType);
}
std::string IndicatorFilterBase::getTrustedSource(IWaf2Transaction* pTransaction)
{
if (m_policy == nullptr)
{
dbgTrace(D_WAAP) << "Policy for trusted sources is not set";
return "";
}
auto trustedTypes = m_policy->getTrustedTypes();
std::string xFwdVal;
std::string cookieVal;
std::string smuserVal;
for (auto& trustedType : trustedTypes)
{
switch (trustedType)
{
case Waap::TrustedSources::TrustedSourceType::SOURCE_IP:
if (isTrustedSourceOfType(pTransaction->getRemoteAddr(), trustedType))
{
return pTransaction->getRemoteAddr();
}
break;
case Waap::TrustedSources::TrustedSourceType::X_FORWARDED_FOR:
if (xFwdVal.empty())
{
xFwdVal = pTransaction->getHdrContent("X-Forwarded-For");
}
if (isTrustedSourceOfType(xFwdVal, trustedType))
{
return xFwdVal;
}
break;
case Waap::TrustedSources::TrustedSourceType::SM_USER:
if (smuserVal.empty())
{
smuserVal = pTransaction->getHdrContent("sm_user");
}
if (isTrustedSourceOfType(smuserVal, trustedType))
{
return smuserVal;
}
break;
case Waap::TrustedSources::TrustedSourceType::COOKIE_OAUTH2_PROXY:
if (cookieVal.empty())
{
cookieVal = pTransaction->getHdrContent("Cookie");
}
if (isTrustedSourceOfType(cookieVal, trustedType))
{
return cookieVal;
}
break;
default:
dbgWarning(D_WAAP) << "unrecognized trusted source identifier type: " << trustedType;
break;
}
}
return "";
}
void IndicatorFilterBase::registerKeyword(const std::string& key,
const std::string& keyword,
const std::string& source,
const std::string& trusted_src)
{
dbgTrace(D_WAAP) << "registering keyword: " << keyword << " for parameter: " << key << " from source: " << source;
if (keyword == "probing" || keyword == "repetition")
{
dbgTrace(D_WAAP) << "ignoring keyword " << keyword;
return;
}
m_confidence_calc.log(key, keyword, source);
if (trusted_src != "")
{
m_trusted_confidence_calc.log(key, keyword, trusted_src);
}
}

View File

@@ -0,0 +1,56 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "i_indicatorsFilter.h"
#include "i_messaging.h"
#include "waap.h"
#include "TrustedSources.h"
#include "TrustedSourcesConfidence.h"
#include "ConfidenceCalculator.h"
#include "TuningDecisions.h"
class IndicatorFilterBase : public I_IndicatorsFilter
{
public:
IndicatorFilterBase(const std::string& confidence_path,
const std::string& trusted_path,
const std::string& remotePath,
const std::string& assetId,
size_t min_sources,
size_t min_intervals,
std::chrono::minutes interval_duration,
double ratio_threshold,
const std::string& null_obj,
TuningDecision* tuning,
I_IgnoreSources* ignoreSources = nullptr);
virtual void filterKeywords(const std::string &key, Waap::Keywords::KeywordsSet& keywords,
std::vector<std::string>& filteredKeywords);
bool setTrustedSrcParameter(std::shared_ptr<Waap::TrustedSources::TrustedSourcesParameter> policy);
void reset();
protected:
std::string getTrustedSource(IWaf2Transaction* pTransaction);
void registerKeyword(const std::string& key,
const std::string& keyword,
const std::string& source,
const std::string& trusted_src);
ConfidenceCalculator m_confidence_calc;
TrustedSourcesConfidenceCalculator m_trusted_confidence_calc;
std::shared_ptr<Waap::TrustedSources::TrustedSourcesParameter> m_policy;
TuningDecision* m_tuning;
private:
bool isTrustedSourceOfType(const std::string& source, Waap::TrustedSources::TrustedSourceType srcType);
};

View File

@@ -0,0 +1,317 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "IndicatorsFiltersManager.h"
#include "WaapConfigApi.h"
#include "WaapConfigApplication.h"
#include <vector>
#include "Waf2Util.h"
#include "FpMitigation.h"
#include "Waf2Engine.h"
#include "WaapKeywords.h"
IndicatorsFiltersManager::IndicatorsFiltersManager(const std::string& remotePath, const std::string &assetId,
I_WaapAssetState* pWaapAssetState)
:
SerializeToFileBase(pWaapAssetState->getSignaturesFilterDir() + "/6.data"),
m_ignoreSources(pWaapAssetState->getSignaturesFilterDir(), remotePath, assetId),
m_tuning(remotePath)
{
restore();
m_keywordsFreqFilter = std::make_unique<KeywordIndicatorFilter>(
pWaapAssetState->getSignaturesFilterDir(),
remotePath,
assetId,
&m_ignoreSources,
&m_tuning);
m_typeFilter = std::make_unique<TypeIndicatorFilter>(pWaapAssetState, remotePath, assetId, &m_tuning);
}
IndicatorsFiltersManager::~IndicatorsFiltersManager()
{
}
void IndicatorsFiltersManager::registerKeywords(const std::string& key,
Waap::Keywords::KeywordsSet& keywords,
IWaf2Transaction* pWaapTransaction)
{
if (m_tuning.getDecision(pWaapTransaction->getLastScanParamName(), PARAM_NAME) == MALICIOUS ||
m_tuning.getDecision(pWaapTransaction->getLastScanSample(), PARAM_VALUE) == MALICIOUS ||
m_tuning.getDecision(pWaapTransaction->getUri(), URL) == MALICIOUS ||
m_tuning.getDecision(pWaapTransaction->getSourceIdentifier(), SOURCE) == MALICIOUS)
{
return;
}
if (!keywords.empty())
{
m_ignoreSources.log(pWaapTransaction->getSourceIdentifier(), key, keywords);
}
m_keywordsFreqFilter->registerKeywords(key, keywords, pWaapTransaction);
if (key.rfind("url#", 0) == 0)
{
return;
}
m_typeFilter->registerKeywords(key, keywords, pWaapTransaction);
auto types = getParameterTypes(key);
for (auto type : types)
{
if (type == "html_input")
{
m_keywordsFreqFilter->registerKeywords(type, keywords, pWaapTransaction);
}
}
}
bool IndicatorsFiltersManager::shouldFilterKeyword(const std::string &key, const std::string &keyword) const
{
bool shouldFilter = false;
if (m_keywordsFreqFilter != nullptr)
{
shouldFilter |= m_keywordsFreqFilter->shouldFilterKeyword(key, keyword);
}
if (m_typeFilter != nullptr)
{
shouldFilter |= m_typeFilter->shouldFilterKeyword(key, keyword);
auto types = getParameterTypes(key);
for (auto& type : types)
{
shouldFilter |= m_keywordsFreqFilter->shouldFilterKeyword(type, keyword);
}
}
return shouldFilter;
}
void IndicatorsFiltersManager::serialize(std::ostream& stream)
{
cereal::JSONOutputArchive archive(stream);
archive(cereal::make_nvp("version", 1), cereal::make_nvp("trustedSrcParams", m_trustedSrcParams));
}
void IndicatorsFiltersManager::deserialize(std::istream& stream)
{
cereal::JSONInputArchive archive(stream);
size_t version = 0;
try
{
archive(cereal::make_nvp("version", version));
}
catch (std::runtime_error & e) {
archive.setNextName(nullptr);
version = 0;
dbgDebug(D_WAAP) << "Can't load file version: " << e.what();
}
switch (version)
{
case 0:
archive(cereal::make_nvp("m_trustedSrcParams", m_trustedSrcParams));
break;
case 1:
archive(cereal::make_nvp("trustedSrcParams", m_trustedSrcParams));
break;
default:
dbgWarning(D_WAAP) << "unknown file format version: " << version;
break;
}
}
std::set<std::string> IndicatorsFiltersManager::getParameterTypes(const std::string& canonicParam) const
{
return m_typeFilter->getParamTypes(canonicParam);
}
bool IndicatorsFiltersManager::loadPolicy(IWaapConfig* pConfig)
{
bool shouldSave = false;
if (pConfig != NULL)
{
m_trustedSrcParams = pConfig->get_TrustedSourcesPolicy();
if (m_trustedSrcParams != nullptr)
{
shouldSave = m_keywordsFreqFilter->setTrustedSrcParameter(m_trustedSrcParams);
shouldSave |= m_typeFilter->setTrustedSrcParameter(m_trustedSrcParams);
}
auto waapParams = pConfig->get_WaapParametersPolicy();
if (waapParams != nullptr)
{
m_keywordsFreqFilter->loadParams(waapParams);
m_typeFilter->loadParams(waapParams);
m_ignoreSources.loadParams(waapParams);
}
if (shouldSave)
{
saveData();
}
}
else
{
dbgWarning(D_WAAP) << "Failed to get configuration";
}
return pConfig != NULL;
}
void IndicatorsFiltersManager::filterVerbose(const std::string &param,
std::vector<std::string>& filteredKeywords,
std::map<std::string, std::vector<std::string>>& filteredKeywordsVerbose)
{
static std::string typeFilterName = "type indicators filter";
static std::string keywordsFilterName = "keywords frequency indicators filter";
filteredKeywordsVerbose[typeFilterName];
filteredKeywordsVerbose[keywordsFilterName];
auto types = getParameterTypes(param);
for (auto keyword : filteredKeywords)
{
if (m_typeFilter->shouldFilterKeyword(param, keyword))
{
filteredKeywordsVerbose[typeFilterName].push_back(param + "#" + keyword);
}
if (m_keywordsFreqFilter->shouldFilterKeyword(param, keyword))
{
filteredKeywordsVerbose[keywordsFilterName].push_back(param + "#" + keyword);
for (auto type : types)
{
if (m_keywordsFreqFilter->shouldFilterKeyword(type, keyword))
{
filteredKeywordsVerbose[keywordsFilterName].push_back(param + "#" + type + "#" + keyword);
}
}
}
}
}
void IndicatorsFiltersManager::reset()
{
m_typeFilter->reset();
m_keywordsFreqFilter->reset();
}
std::string IndicatorsFiltersManager::extractUri(const std::string& referer, const IWaf2Transaction* pTransaction)
{
std::string url;
size_t pos = referer.find("://");
if (pos == std::string::npos)
{
url = referer;
}
else
{
url = referer.substr(pos + 3);
}
pos = url.find('/');
if (pos == std::string::npos)
{
return url;
}
std::string host = url.substr(0, pos);
if (host == pTransaction->getHdrContent("host"))
{
return url.substr(pos);
}
return url;
}
std::string IndicatorsFiltersManager::generateKey(const std::string& location,
const std::string& param_name,
const IWaf2Transaction* pTransaction)
{
std::string key = location;
static const std::string delim = "#";
std::string param = normalize_param(param_name);
if (location == "header" || location == "cookie" || location == "url_param")
{
key += delim + param;
}
else if (location == "referer_param")
{
key = "url_param" + delim + param;
}
else if (location == "body")
{
if (param == "")
{
key += delim + normalize_uri(pTransaction->getUriStr());
}
else
{
key += delim + param;
}
}
else if (location == "url")
{
key += delim + normalize_uri(pTransaction->getUriStr());
}
else if (location == "referer")
{
std::string referer = pTransaction->getHdrContent("referer");
std::string uri = extractUri(referer, pTransaction);
key = "url" + delim + normalize_uri(uri);
}
else
{
key = normalize_uri(pTransaction->getUriStr()) + delim + param;
}
return key;
}
std::string IndicatorsFiltersManager::getLocationFromKey(const std::string& canonicKey, IWaf2Transaction* pTransaction)
{
std::vector<std::string> known_locations = { "header", "cookie", "url", "body", "referer", "url_param" };
std::string delim = "#";
for (auto location : known_locations)
{
if (canonicKey.find(location + delim) == 0)
{
return location;
}
}
return "";
}
void IndicatorsFiltersManager::filterKeywords(
const std::string &key,
Waap::Keywords::KeywordsSet& keywords,
std::vector<std::string>& filteredKeywords)
{
for (auto keyword = keywords.begin(); keyword != keywords.end(); )
{
if (shouldFilterKeyword(key, *keyword))
{
filteredKeywords.push_back(*keyword);
keyword = keywords.erase(keyword);
}
else
{
keyword++;
}
}
}
void IndicatorsFiltersManager::pushSample(
const std::string& key,
const std::string& sample,
IWaf2Transaction* pTransaction)
{
if (key.rfind("url#", 0) == 0)
{
return;
}
m_typeFilter->registerKeywords(key, sample, pTransaction);
}

View File

@@ -0,0 +1,70 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "IndicatorsFilterBase.h"
#include "TrustedSources.h"
#include "KeywordIndicatorFilter.h"
#include "TypeIndicatorsFilter.h"
#include "WaapParameters.h"
#include "i_waapConfig.h"
#include "i_messaging.h"
#include "ScannersDetector.h"
#include "TuningDecisions.h"
#include <cereal/cereal.hpp>
#include <cereal/types/memory.hpp>
#include <cereal/archives/json.hpp>
using namespace Waap::Parameters;
class IWaf2Transaction;
struct Waf2ScanResult;
class IndicatorsFiltersManager : public I_IndicatorsFilter, public SerializeToFileBase
{
public:
IndicatorsFiltersManager(const std::string &remotePath, const std::string &assetId,
I_WaapAssetState* pWaapAssetState);
~IndicatorsFiltersManager();
virtual void registerKeywords(const std::string& key, Waap::Keywords::KeywordsSet& keywords,
IWaf2Transaction* pWaapTransaction);
virtual bool shouldFilterKeyword(const std::string &key, const std::string &keyword) const;
virtual void filterKeywords(const std::string &key, Waap::Keywords::KeywordsSet& keywords,
std::vector<std::string>& filteredKeywords);
void pushSample(const std::string& key, const std::string& sample, IWaf2Transaction* pTransaction);
bool loadPolicy(IWaapConfig* pConfig);
void reset();
void filterVerbose(const std::string &param,
std::vector<std::string>& filteredKeywords,
std::map<std::string, std::vector<std::string>>& filteredKeywordsVerbose);
static std::string getLocationFromKey(const std::string& canonicKey, IWaf2Transaction* pTransaction);
static std::string generateKey(const std::string& location,
const std::string& param,
const IWaf2Transaction* pTransaction);
virtual void serialize(std::ostream& stream);
virtual void deserialize(std::istream& stream);
virtual std::set<std::string> getParameterTypes(const std::string& canonicParam) const;
private:
static std::string extractUri(const std::string& referer, const IWaf2Transaction* pTransaction);
std::unique_ptr<KeywordIndicatorFilter> m_keywordsFreqFilter;
std::unique_ptr<TypeIndicatorFilter> m_typeFilter;
std::shared_ptr<Waap::TrustedSources::TrustedSourcesParameter> m_trustedSrcParams;
ScannerDetector m_ignoreSources;
TuningDecision m_tuning;
};

View File

@@ -0,0 +1,60 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// #define WAF2_LOGGING_ENABLE
#include "debug.h"
#include "KeyStack.h"
#include <string.h>
#include "assert.h"
USE_DEBUG_FLAG(D_WAAP);
KeyStack::KeyStack(const char* name)
:m_name(name), m_nameDepth(0) {
}
void KeyStack::push(const char* subkey, size_t subkeySize, bool countDepth) {
m_stack.push_back(m_key.size());
// Prefix all subkeys (except the first) with '.'
if (!m_key.empty()) {
m_key += '.';
}
m_key += std::string(subkey, subkeySize);
if (countDepth) {
m_nameDepth++;
}
dbgTrace(D_WAAP) << "KeyStack(" << m_name << ")::push(): '" << std::string(subkey, subkeySize) <<
"' => full_key='" << std::string(m_key.data(), m_key.size()) << "'";
}
void KeyStack::pop(const char* log, bool countDepth) {
// Keep depth balanced even if m_key[] buffer is full
if (m_key.empty() || m_stack.empty()) {
dbgDebug(D_WAAP) << "KeyStack(" << m_name << ")::pop(): [ERROR] ATTEMPT TO POP FROM EMPTY KEY STACK! " << log;
return;
}
if (countDepth) {
m_nameDepth--;
}
// Remove last subkey.
m_key.erase(m_stack.back());
m_stack.pop_back();
dbgTrace(D_WAAP) << "KeyStack(" << m_name << ")::pop(): full_key='" <<
std::string(m_key.data(), (int)m_key.size()) << "': pop_key=" << log << "'";
}

View File

@@ -0,0 +1,81 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __KEYSTACK_H__0a8039e6
#define __KEYSTACK_H__0a8039e6
#include <stddef.h>
#include <string>
#include <vector>
// Represent string (key) that is concatenation of substrings (subkeys) separated by '.' character.
// Mostly emulates API of C++ std::string class, with addition of push() and pop() methods
// that append individual subkey and delete last subkey from the string efficiently.
class KeyStack {
public:
KeyStack(const char *name);
void push(const char *subkey, size_t subkeySize, bool countDepth=true);
void pop(const char* log, bool countDepth=true);
bool empty() const { return m_key.empty(); }
void clear() { m_key.clear(); m_stack.clear(); }
size_t depth() const { return m_nameDepth; }
size_t size() const {
return str().size();
}
const char *c_str() const {
// If pushed none - return empty string.
// If pushed once - still return empty string (the once-pushed subkey will only be returned
// by the first() method.
// If pushed twice or more - return all subkeys starting from the second one.
// Also, even if pushed 2 or more times, but pushed empty strings as subkeys,
// then it could happen that m_key is still empty, in which case we should still return empty string.
if (m_stack.size() <= 1 || m_stack[1] + 1 >= m_key.size()) {
return "";
}
return m_key.c_str() + m_stack[1] + 1;
}
const std::string str() const {
// If pushed none - return empty string.
// If pushed once - still return empty string (the once-pushed subkey will only be returned
// by the first() method.
// If pushed twice or more - return all subkeys starting from the second one.
// Also, even if pushed 2 or more times, but pushed empty strings as subkeys,
// then it could happen that m_key is still empty, in which case we should still return empty string.
if (m_stack.size() <= 1 || m_stack[1] + 1 >= m_key.size()) {
return "";
}
return m_key.substr(m_stack[1] + 1);
}
const std::string first() const {
if (m_stack.size() == 0) {
return "";
}
else if (m_stack.size() == 1) {
return m_key;
}
else {
// m_stack.size() > 1, so m_stack[1] is valid
return m_key.substr(0, m_stack[1]);
}
}
private:
const char *m_name;
std::string m_key;
std::vector<size_t> m_stack; // position of individual key name starts in m_key,
// used to backtrack 1 key at a time.
int m_nameDepth;
};
#endif // __KEYSTACK_H__0a8039e6

View File

@@ -0,0 +1,125 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "KeywordIndicatorFilter.h"
#include "waap.h"
#include "WaapConfigApi.h"
#include "WaapConfigApplication.h"
#include "FpMitigation.h"
#include "i_transaction.h"
#include <boost/algorithm/string/predicate.hpp>
#include <boost/algorithm/string/trim.hpp>
#define KEYWORDS_FILTER_PATH(dirPath) dirPath + "/5.data"
#define KEYWORDS_FILTER_TRUSTED_PATH(dirPath) dirPath + "/7.data"
#define KEYWORD_FILTER_PARAM(var) "KeywordsFilter." var
KeywordIndicatorFilter::KeywordIndicatorFilter(std::string dirPath,
const std::string& remotePath,
const std::string& assetId,
I_IgnoreSources* ignoreSources,
TuningDecision* tuning,
size_t minSources,
size_t minIntervals,
std::chrono::minutes intervalDuration,
double ratioThreshold) : IndicatorFilterBase(KEYWORDS_FILTER_PATH(dirPath),
KEYWORDS_FILTER_TRUSTED_PATH(dirPath),
(remotePath == "") ? remotePath : remotePath + "/Indicators",
assetId,
minSources,
minIntervals,
intervalDuration,
ratioThreshold,
"",
tuning,
ignoreSources)
{
m_confidence_calc.setOwner("KeywordIndicatorFilter");
}
KeywordIndicatorFilter::~KeywordIndicatorFilter()
{
}
void KeywordIndicatorFilter::registerSource(const std::string &key, const std::string &source)
{
dbgTrace(D_WAAP) << "registering source: " << source << " for parameter: " << key;
m_confidence_calc.logSourceHit(key, source);
}
bool KeywordIndicatorFilter::shouldFilterKeyword(const std::string &key, const std::string &keyword) const
{
bool is_confident = m_confidence_calc.is_confident(key, keyword);
if (m_policy != nullptr)
{
is_confident |= m_trusted_confidence_calc.is_confident(key, keyword, m_policy->getNumOfSources());
}
std::string trimed_keyword = keyword;
boost::algorithm::trim(trimed_keyword);
is_confident |= m_confidence_calc.is_confident(key, trimed_keyword);
return is_confident;
}
bool KeywordIndicatorFilter::loadParams(std::shared_ptr<Waap::Parameters::WaapParameters> pParams)
{
ConfidenceCalculatorParams params;
params.minSources = std::stoul(
pParams->getParamVal("learnIndicators.minSources", std::to_string(CONFIDENCE_MIN_SOURCES)));
params.minIntervals = std::stoul(
pParams->getParamVal("learnIndicators.minIntervals", std::to_string(CONFIDENCE_MIN_INTERVALS)));
params.intervalDuration = std::chrono::minutes(std::stoul(
pParams->getParamVal("learnIndicators.intervalDuration",
std::to_string(CONFIDENCE_WINDOW_INTERVAL.count()))));
params.ratioThreshold = std::stod(pParams->getParamVal("learnIndicators.ratio",
std::to_string(CONFIDENCE_THRESHOLD)));
std::string learnPermanentlyStr = pParams->getParamVal("learnIndicators.learnPermanently", "true");
params.learnPermanently = !boost::iequals(learnPermanentlyStr.c_str(), "false");
std::string remoteSyncStr = pParams->getParamVal("remoteSync", "true");
bool syncEnabled = !boost::iequals(remoteSyncStr, "false");
dbgTrace(D_WAAP) << params << " remote sync: " << remoteSyncStr;
m_confidence_calc.setRemoteSyncEnabled(syncEnabled);
m_trusted_confidence_calc.setRemoteSyncEnabled(syncEnabled);
return m_confidence_calc.reset(params);
}
void KeywordIndicatorFilter::registerKeywords(const std::string& key, Waap::Keywords::KeywordsSet& keywords,
IWaf2Transaction* pTransaction)
{
std::string source(pTransaction->getSourceIdentifier());
std::string trusted_source = getTrustedSource(pTransaction);
if (keywords.empty())
{
registerSource(key, source);
}
for (auto keyword : keywords)
{
boost::algorithm::trim(keyword);
registerKeyword(key, keyword, source, trusted_source);
}
if (m_tuning != nullptr && (m_tuning->getDecision(pTransaction->getUri(), URL) == BENIGN ||
m_tuning->getDecision(pTransaction->getLastScanSample(), PARAM_VALUE) == BENIGN))
{
source = "TuningDecisionSource_" + source;
for (auto keyword : keywords)
{
boost::algorithm::trim(keyword);
registerKeyword(key, keyword, source, trusted_source);
}
}
}

View File

@@ -0,0 +1,48 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "IndicatorsFilterBase.h"
#include "ConfidenceCalculator.h"
#include "WaapParameters.h"
#define CONFIDENCE_MIN_SOURCES 3
#define CONFIDENCE_MIN_INTERVALS 5
#define CONFIDENCE_THRESHOLD 0.8
#define CONFIDENCE_WINDOW_INTERVAL std::chrono::minutes(120)
class KeywordIndicatorFilter : public IndicatorFilterBase
{
public:
KeywordIndicatorFilter(std::string dirPath,
const std::string& remotePath,
const std::string& assetId,
I_IgnoreSources* ignoreSources,
TuningDecision* tuning = nullptr,
size_t minSources = CONFIDENCE_MIN_SOURCES,
size_t minIntervals = CONFIDENCE_MIN_INTERVALS,
std::chrono::minutes intervalDuration = CONFIDENCE_WINDOW_INTERVAL,
double ratioThreshold = CONFIDENCE_THRESHOLD);
~KeywordIndicatorFilter();
virtual void registerKeywords(const std::string& key, Waap::Keywords::KeywordsSet& keywords,
IWaf2Transaction* pTransaction);
virtual bool shouldFilterKeyword(const std::string &key, const std::string &keyword) const;
bool loadParams(std::shared_ptr<Waap::Parameters::WaapParameters> pParams);
private:
void registerSource(const std::string &key, const std::string &source);
};

View File

@@ -0,0 +1,81 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "KeywordTypeValidator.h"
#include <cereal/archives/json.hpp>
#include <cereal/types/unordered_map.hpp>
#include <cereal/types/unordered_set.hpp>
#include "debug.h"
#include "Waf2Util.h"
USE_DEBUG_FLAG(D_WAAP);
KeywordTypeValidator::KeywordTypeValidator(const std::string& mapFilePath) :
SerializeToFileBase(mapFilePath),
m_keywordTypeMap()
{
restore();
}
KeywordTypeValidator::~KeywordTypeValidator()
{
}
void KeywordTypeValidator::serialize(std::ostream& stream)
{
(void)stream;
}
void KeywordTypeValidator::saveData()
{
// do not override existing file
}
void KeywordTypeValidator::deserialize(std::istream& stream)
{
cereal::JSONInputArchive archive(stream);
std::unordered_map<std::string, std::unordered_set<std::string>> typesStrToKeysMap;
archive(cereal::make_nvp("keywordsTypeMap", typesStrToKeysMap));
for (auto typeStrItr : typesStrToKeysMap)
{
ParamType type = Waap::Util::convertTypeStrToEnum(typeStrItr.first);
for (auto keyword : typeStrItr.second)
{
if (m_keywordTypeMap.find(keyword) == m_keywordTypeMap.end())
{
// initialize type set
m_keywordTypeMap[keyword];
}
m_keywordTypeMap[keyword].insert(type);
}
}
}
bool KeywordTypeValidator::isKeywordOfType(const std::string& keyword, ParamType type) const
{
auto keywordEntry = m_keywordTypeMap.find(keyword);
if (keywordEntry != m_keywordTypeMap.end())
{
auto &typeSet = keywordEntry->second;
return (typeSet.count(type) != 0);
}
else
{
dbgTrace(D_WAAP) << "keyword: " << keyword << " not found";
}
return false;
}

View File

@@ -0,0 +1,35 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <unordered_map>
#include <unordered_set>
#include "WaapEnums.h"
#include "i_serialize.h"
class KeywordTypeValidator : public SerializeToFileBase
{
public:
KeywordTypeValidator(const std::string& mapFilePath);
~KeywordTypeValidator();
bool isKeywordOfType(const std::string& keyword, ParamType type) const;
virtual void serialize(std::ostream& stream);
virtual void deserialize(std::istream& stream);
virtual void saveData();
private:
std::unordered_map<std::string, std::unordered_set<ParamType>> m_keywordTypeMap;
};

View File

@@ -0,0 +1,63 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "LogGenWrapper.h"
#include "log_generator.h"
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP);
LogGenWrapper::LogGenWrapper(
const Maybe<LogTriggerConf, Config::Errors>& maybe_trigger,
const std::string& title,
const ReportIS::Audience& audience,
const LogTriggerConf::SecurityType& security_type,
const ReportIS::Severity& severity,
const ReportIS::Priority& priority,
bool is_action_drop_or_prevent) : m_log_gen(nullptr)
{
if (!maybe_trigger.ok()) {
dbgWarning(D_WAAP) << "Couldn't get log trigger from the I/S. " <<
"Continuing with waap log trigger policy..." <<
"Reason: " << maybe_trigger.getErr();
m_log_gen = std::make_unique<LogGen>(
title,
audience,
severity,
priority,
ReportIS::Tags::WAF,
ReportIS::Tags::THREAT_PREVENTION
);
}
else {
m_log_gen = std::make_unique<LogGen>(
maybe_trigger.unpack(),
title,
security_type,
severity,
priority,
is_action_drop_or_prevent,
ReportIS::Tags::WAF,
ReportIS::Tags::THREAT_PREVENTION
);
}
}
LogGenWrapper::~LogGenWrapper()
{
}
LogGen& LogGenWrapper::getLogGen()
{
return *m_log_gen;
}

View File

@@ -0,0 +1,46 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __LOG_GEN_WRAPPER_H__
#define __LOG_GEN_WRAPPER_H__
#include "report/report_enums.h"
#include "maybe_res.h"
#include "config.h"
#include "generic_rulebase/triggers_config.h"
#include <string>
#include <memory>
class LogTriggerConf;
class LogGen;
class LogGenWrapper
{
public:
LogGenWrapper(
const Maybe<LogTriggerConf, Config::Errors>& maybe_trigger,
const std::string& title,
const ReportIS::Audience& audience,
const LogTriggerConf::SecurityType& security_type,
const ReportIS::Severity& severity,
const ReportIS::Priority& priority,
bool is_action_drop_or_prevent);
~LogGenWrapper();
LogGen& getLogGen();
private:
std::unique_ptr<LogGen> m_log_gen;
};
#endif

View File

@@ -0,0 +1,34 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "OpenRedirectDecision.h"
OpenRedirectDecision::OpenRedirectDecision(DecisionType type) :
SingleDecision(type),
m_link("")
{}
std::string OpenRedirectDecision::getTypeStr() const
{
return "Open Redirect";
}
void OpenRedirectDecision::setLink(const std::string& link)
{
m_link = link;
}
std::string OpenRedirectDecision::getLink() const
{
return m_link;
}

View File

@@ -0,0 +1,34 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __OPEN_REDIRECT_DEICSION_H__
#define __OPEN_REDIRECT_DEICSION_H__
#include "SingleDecision.h"
#include "DecisionType.h"
#include <string>
class OpenRedirectDecision: public SingleDecision
{
public:
explicit OpenRedirectDecision(DecisionType type);
std::string getTypeStr() const override;
void setLink(const std::string& link);
std::string getLink() const;
private:
std::string m_link;
};
#endif

View File

@@ -0,0 +1,804 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "PHPSerializedDataParser.h"
#include "log_generator.h"
#include <errno.h>
USE_DEBUG_FLAG(D_WAAP_PARSER_PHPSERIALIZE);
const std::string PHPSerializedDataParser::m_parserName = "PHPSerializedDataParser";
PHPSerializedDataParser::PHPSerializedDataParser(IParserStreamReceiver &outReceiver)
: m_state(), m_outReceiver(outReceiver), m_keyStack("php_serialized")
{
}
size_t
PHPSerializedDataParser::push(const char* buf, size_t len)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push()";
size_t i = 0;
char c;
if (len == 0)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): len = 0 ";
if(m_state.phase_state != s_start) {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): len = 0 ;"
"phase_state != s_start ; m_state.phase_state: " << m_state.phase_state;
m_error = true;
return -1;
}
switch (m_state.kv_state)
{
case (s_onKey):
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): len = 0 ; s_onKey";
m_outReceiver.onKey(m_value.c_str(), m_value.length());
break;
}
case (s_onValue):
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): len = 0 ; s_onValue";
m_outReceiver.onValue(m_value.c_str(), m_value.length());
m_outReceiver.onKvDone();
break;
}
case (s_clear_kv):
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): len = 0 ; s_clear_kv;"
"State Finished has expected";
// State Finished has expected.
break;
}
}
return 1;
}
while (i < len)
{
c = buf[i];
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push():while(i<len)" "check: " << c
<< " state: " << m_state.phase_state;
switch (m_state.phase_state)
{
case s_data_end:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_data_end";
if (!onDataEnd(c, true))
{
// Error
return -1;
}
break;
}
case s_class_data_end:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_class_data_end";
if (!onDataEnd(c, false))
{
// Error
return -1;
}
break;
}
case s_value:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_value";
size_t result = handleValue(c);
if ( result == (size_t)-1 )
{
return -1;
}
break;
}
// Getting length of complex types like: array, string, object and custom.
case s_length:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_length";
if (c == ':')
{
// convert length string to int.
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): m_length" <<
m_length;
char *pEnd = NULL;
m_state.length = ::strtoll(m_length.c_str(), &pEnd, 10);
if (pEnd != m_length.c_str() + m_length.length())
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) <<
"Failed to convert length from string to integer (Invalid arguments).";
m_error = true;
return -1;
}
m_state.phase_state = s_value;
m_length.clear();
break;
}
m_length.push_back(c);
break;
}
// primitive colon belongs to int, double, bool, ref which does not require length case
case s_prim_colon:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_prim_colon";
if (c != ':')
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_prim_colon" <<
"Error: ':' should appear, instead " << c << " appeared";
m_error = true;
return -1;
}
m_state.phase_state = s_value;
break;
}
// belongs to object, string, array, class which require length case
case s_colon:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_colon";
if (c != ':')
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_colon" <<
"Error: ':' should appear, instead " << c << " appeared";
m_error = true;
return -1;
}
m_state.phase_state = s_length;
break;
}
// s_start is being called every time we need to discover new object type
// (state is first intilaized by s_start).
case s_start:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start";
switch (tolower(c)) {
case 'n':
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: NULL";
m_state.type_state = s_null;
m_state.phase_state = s_value;
break;
}
case 'a':
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: Array";
// Array cannot be key. Throw failure.
if (m_state.kv_state == s_onKey)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: " <<
"Array cannot be a key";
m_error = true;
return -1;
}
// If stack not empty and Value is array then value should be empty.
// next key will be the key inside the array.
if (!m_stack.empty())
{
// Send empty value for the case of array/object as a subitem
m_value = "";
onStateValue();
m_state.kv_state = s_clear_kv;
}
m_state.type_state = s_start_array;
m_state.phase_state = s_colon;
break;
}
case 's': {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: String";
m_state.type_state = s_start_string;
m_state.phase_state = s_colon;
break;
}
case 'b': {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: Boolean";
if (m_state.kv_state == s_onKey)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: " <<
"Boolean cannot be a key";
m_error = true;
return -1;
}
m_state.type_state = s_boolean_OnValue;
m_state.phase_state = s_prim_colon;
break;
}
case 'i': {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: Integer";
m_state.type_state = s_integer_onValue;
m_state.phase_state = s_prim_colon;
break;
}
// parsing double as integer is ok in this case because integer are not really validated,
// instead they are reported as strings.
case 'd': {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: Double";
m_state.type_state = s_integer_onValue;
m_state.phase_state = s_prim_colon;
break;
}
case 'o': {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: Object";
if (m_state.kv_state == s_onKey)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: " <<
"Object cannot be a key";
m_error = true;
return -1;
}
if (!m_stack.empty())
{
m_value = "";
onStateValue();
m_state.kv_state = s_clear_kv;
}
m_state.isObject = true;
m_state.type_state = s_start_string;
m_state.phase_state = s_colon;
break;
}
case 'c': {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: Class";
if (m_state.kv_state == s_onKey)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: " <<
"Class cannot be a key";
m_error = true;
return -1;
}
if (!m_stack.empty())
{
m_value = "";
onStateValue();
m_state.kv_state = s_clear_kv;
}
m_state.isClass = true;
m_state.type_state = s_start_string;
m_state.phase_state = s_colon;
break;
}
case 'r': {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: Reference";
m_state.type_state = s_ref_onValue;
m_state.phase_state = s_prim_colon;
break;
}
case '}': {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: }";
if (!onDataEnd(c, false))
{
// Error
return -1;
}
break;
}
default: {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) <<
"PHPSerializedDataParser::push(): s_start: Unexpected Error. "
"Invalid char in s_start: " << c;
m_error = true;
return -1;
}
}
}
}
++i;
}
return 0;
}
size_t PHPSerializedDataParser::handleValue (const char &c)
{
switch (m_state.type_state)
{
case s_start_class:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start_class";
if (c != '{')
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start_class " <<
"Class start with: " << c << " instead of {";
m_error = true;
return -1;
}
std::string keyStack("Class");
std::string val("");
m_keyStack.push(keyStack.c_str(), keyStack.length());
m_key = m_value;
m_outReceiver.onKey(m_value.c_str(), m_value.length());
m_outReceiver.onValue(val.c_str(), val.length());
m_outReceiver.onKvDone();
m_value.clear();
// changing isClass to false because this object handle class definition.
m_state.isClass = false;
m_state.current_length = m_state.length;
m_state.kv_state = s_onKey;
m_state.type_state = s_class_onValue;
State state = m_state;
m_stack.push(state);
break;
}
case s_class_onValue:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_class_onValue";
// counting down the characters that we get on buffer.
// if we get all chars '}' should occur.
if (m_state.current_length != 0)
{
m_state.current_length--;
m_value.push_back(c);
break;
}
// Class object can handle string or more serialized data.
// If it's a string than the parser will retuen with error
// else will parse it normaly.
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): End of Class object" <<
" sending class object data to PHPSerializedDataParser";
PHPSerializedDataParser psdp(m_outReceiver);
psdp.push(m_value.c_str(), m_value.length());
if(psdp.error())
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): " <<
"class object data return with an error !";
m_outReceiver.onKey(m_key.c_str(), m_key.length());
m_outReceiver.onValue(m_value.c_str(), m_value.length());
m_outReceiver.onKvDone();
m_value.clear();
m_key.clear();
}
m_state.phase_state = s_class_data_end;
break;
}
case s_start_object:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start_object";
if (c != '{')
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start_object" <<
"Object start with: " << c << " instead of {";
m_error = true;
return -1;
}
std::string keyStack("Object");
std::string val("");
m_keyStack.push(keyStack.c_str(), keyStack.length());
m_outReceiver.onKey(m_value.c_str(), m_value.length());
m_outReceiver.onValue(val.c_str(), val.length());
m_outReceiver.onKvDone();
m_value.clear();
// changing isObject to false because this object handle class definition.
m_state.isObject = false;
m_state.kv_state = s_onKey;
m_state.phase_state = s_start;
State state = m_state;
m_stack.push(state);
break;
}
case s_start_array:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start_array";
if (c != '{')
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start_array" <<
"Array start with: " << c << " instead of {";
m_error = true;
return -1;
}
std::string keyVal("array");
m_keyStack.push(keyVal.c_str(), keyVal.length());
m_state.kv_state = s_onKey;
m_state.phase_state = s_start;
State state = m_state;
m_stack.push(state);
break;
}
case s_start_string:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start_string";
if (c != '"')
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start_string" <<
"string start with: " << c << " instead of \"";
m_error = true;
return -1;
}
m_state.current_length = 0;
m_state.type_state = s_string_onValue;
break;
}
case s_string_onValue:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_onValue";
if (c != '"')
{
if (c == '\\')
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_onValue " <<
"escape ?: " << c;
m_state.current_length++;
m_state.type_state = s_string_escape;
break;
}
m_value.push_back(c);
m_state.current_length++;
break;
}
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_onValue" <<
" End of String";
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_onValue" <<
"m_state.isClass: " << m_state.isClass;
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_onValue" <<
" m_state.isObject: " << m_state.isObject;
if (m_state.isObject || m_state.isClass)
{
m_state.type_state = s_object_string_calc;
}
else
{
m_state.type_state = s_string_calc;
}
break;
}
case s_string_escape:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_escape";
if (c == 'x')
{
m_state.type_state = s_string_escape_x_1;
}
else if (c == '0')
{
m_value.push_back('@');
m_state.type_state = s_string_onValue;
}
else
{
m_value.push_back('\\');
m_value.push_back(c);
m_state.current_length++;
m_state.type_state = s_string_onValue;
}
break;
}
case s_string_escape_x_1:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_escape_x_1";
if (c == '0')
{
m_state.type_state = s_string_escape_x_2;
break;
}
m_value = m_value + "\\x";
m_value.push_back(c);
m_state.current_length = m_state.current_length + 2;
m_state.type_state = s_string_onValue;
break;
}
case s_string_escape_x_2:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_escape_x_2";
if (c == '0')
{
m_value.push_back('@');
m_state.type_state = s_string_onValue;
break;
}
m_value = m_value + "\\x0";
m_value.push_back(c);
m_state.current_length = m_state.current_length + 3;
m_state.type_state = s_string_onValue;
break;
}
case s_object_string_calc:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_object_string_calc";
if (c != ':') {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_object_string_calc" <<
" Error: After object name ':' should appear instead " << c << " appeared";
m_error = true;
return -1;
}
// check string length
if (m_state.current_length != m_state.length)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_object_string_calc" <<
" m_state.current_length: " << m_state.current_length << "!=" << " m_state.length: "
<< m_state.length;
m_error = true;
return -1;
}
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_object_string_calc" <<
" Start object";
m_state.current_length = 0;
m_state.phase_state = s_length;
if (m_state.isObject)
{
m_state.type_state = s_start_object;
}
else
{
m_state.type_state = s_start_class;
}
break;
}
case s_string_calc:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_calc";
if (c != ';') {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_calc" <<
" Error: string should end with ';' not with " << c;
m_error = true;
return -1;
}
// check string length
if (m_state.current_length != m_state.length)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_calc" <<
" m_state.current_length: " << m_state.current_length << "!=" << " m_state.length: "
<< m_state.length;
m_error = true;
return -1;
}
if (handleStateAfterFinish("String"))
{
break;
}
m_state.current_length = 0;
m_value.clear();
m_state.phase_state = s_start;
break;
}
case s_integer_onValue:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_integer_onValue";
if ( c != ';')
{
m_value.push_back(c);
break;
}
if (handleStateAfterFinish("Integer"))
{
break;
}
m_value.clear();
m_state.phase_state = s_start;
break;
}
case s_ref_onValue:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_ref_onValue";
if ( c != ';')
{
m_value.push_back(c);
break;
}
if (handleStateAfterFinish("Reference"))
{
break;
}
m_value.clear();
m_state.phase_state = s_start;
break;
}
case s_boolean_OnValue:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_boolean_OnValue";
if (m_value.length() > 1)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_boolean_OnValue" <<
" Error length is bigger than 1 : Boolean should be with 0 or 1";
m_error = true;
return -1;
}
if ( c != ';' )
{
m_value.push_back(c);
break;
}
// boolean can be 0 or 1 only.
if (m_value.compare("1") != 0 && m_value.compare("0") != 0)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_boolean_OnValue" <<
" Error Boolean value is not 0 or 1 : " << m_value;
m_error = true;
return -1;
}
if (handleStateAfterFinish("Boolean"))
{
break;
}
m_value.clear();
m_state.phase_state = s_start;
break;
}
case s_null:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_null";
if (c != ';')
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_null" <<
" Null should end with ';' not with : " << c;
m_error = true;
return -1;
}
if (handleStateAfterFinish("Null"))
{
break;
}
m_value.clear();
m_state.phase_state = s_start;
break;
}
default:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): default" <<
" Unexpected Error.";
m_error = true;
return -1;
}
}
return 0;
}
// Handle data end of an object and if he got the right number of values.
// termChar = is char terminator (f.e })
// checkEndBlock enable check if last char is equal to }
bool
PHPSerializedDataParser::onDataEnd(char termChar, bool checkEndBlock)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::onDataEnd (phase_state=" <<
m_state.phase_state << ", termChar='" << termChar << "')";
if (m_state.current_length != m_state.length)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push():" <<
"current_length " << m_state.current_length << "!=" << " m_state.length " << m_state.length;
m_error = true;
return false;
}
if (termChar != '}' && checkEndBlock)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push():" <<
"termChar is not }";
m_error = true;
return false;
}
if (m_stack.empty())
{
return true;
}
m_state.isObject = false;
m_keyStack.pop(m_keyStack.first().c_str());
m_state = m_stack.top();
m_stack.pop();
m_state.phase_state = s_start;
return true;
}
void
PHPSerializedDataParser::onEmptyStack(std::string type)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::onEmptyStack(): stack is empty.";
m_outReceiver.onKey(type.c_str(), type.length());
m_outReceiver.onValue(m_value.c_str(), m_value.length());
m_outReceiver.onKvDone();
m_value.clear();
m_state.current_length = 0;
m_state.phase_state = s_start;
}
void
PHPSerializedDataParser::onStateKey()
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::onStateKey()";
if (m_keyStack.size() >= 1)
{
m_value = m_keyStack.str() + "." + m_value;
}
m_outReceiver.onKey(m_value.c_str(), m_value.length());
// clear current length
m_state.current_length = 0;
//clear value
m_value.clear();
// change state from key to value.
m_state.kv_state = s_onValue;
m_state.phase_state = s_start;
}
void
PHPSerializedDataParser::onStateValue()
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::onStateValue()";
// change state from value to key.
m_state.kv_state = s_onKey;
// Look at our last state and raise its member counter.
State &stack_state = m_stack.top();
stack_state.current_length++;
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "stack_state.current_length: " << stack_state.current_length;
// set Value and KvDone.
m_outReceiver.onValue(m_value.c_str(), m_value.length());
m_outReceiver.onKvDone();
}
// checking if current length is equal to the length the object got
// and move it to s_data_end
bool
PHPSerializedDataParser::onCheckLength()
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::onCheckLength()";
State &stack_state = m_stack.top();
if (stack_state.current_length == stack_state.length)
{
m_state.current_length = 0;
m_value.clear();
m_state = m_stack.top();
m_state.phase_state = s_data_end;
m_state.kv_state = s_clear_kv;
return true;
}
return false;
}
// Handle State after finishing reading data from a type.
bool
PHPSerializedDataParser::handleStateAfterFinish(const std::string &type)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::handleStateAfterFinish()";
// If stack empty that means we don't have last state : Object || Custom || Array
if (m_stack.empty())
{
onEmptyStack(type);
return true;
}
if (m_state.kv_state == s_onKey)
{
onStateKey();
return true;
}
// If stack is not empty check last state Object || Custom || Array
// change state from value - if state on key should throw error on s_start
// key must be value on boolean
onStateValue();
if (onCheckLength())
{
return true;
}
return false;
}
void
PHPSerializedDataParser::finish()
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::finish()";
push(NULL, 0);
}
const std::string &
PHPSerializedDataParser::name() const
{
return m_parserName;
}
bool
PHPSerializedDataParser::error() const
{
if (m_error)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::error(): parser returned with an error";
return true;
}
return false;
}

View File

@@ -0,0 +1,89 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <string.h>
#include "ParserBase.h"
#include "KeyStack.h"
#include <stack>
class PHPSerializedDataParser : public ParserBase {
public:
PHPSerializedDataParser(IParserStreamReceiver &outReceiver);
size_t push(const char* buf, size_t len);
void finish();
virtual const std::string &name() const;
bool error() const;
virtual size_t depth() { return m_keyStack.depth(); }
private:
bool onCheckLength ();
size_t handleValue (const char &c);
bool handleStateAfterFinish (const std::string &type);
void onStateValue (); // this function must never be called when the m_stack is empty
void onStateKey ();
void onEmptyStack (std::string type);
bool onDataEnd (char termChar, bool checkEndBlock);
bool m_error = false;
enum type_state
{
s_start_class,
s_class_onValue,
s_object_string_calc,
s_start_object,
s_start_array,
s_null,
s_start_string,
s_string_calc,
s_string_onValue,
s_string_escape,
s_string_escape_x_1,
s_string_escape_x_2,
s_integer_onValue,
s_boolean_OnValue,
s_ref_onValue
};
enum phase_state {
s_start,
s_data_end,
s_class_data_end,
s_colon,
s_length,
s_value,
s_prim_colon
};
enum key_value_state {
s_clear_kv,
s_onKey,
s_onValue
};
struct State {
enum phase_state phase_state = s_start;
enum type_state type_state;
enum key_value_state kv_state = s_clear_kv;
size_t length = 0;
size_t current_length = 0;
bool isObject = false;
bool isClass = false;
};
State m_state;
std::string m_value;
std::string m_key;
std::string m_length;
IParserStreamReceiver &m_outReceiver;
KeyStack m_keyStack;
std::stack <State> m_stack;
static const std::string m_parserName;
};

View File

@@ -0,0 +1,88 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserBase.h"
#include <string.h>
// Max size for key and value that can be stored in memory (per thread)
#define MAX_KEY_SIZE 64*1024
#define MAX_VALUE_SIZE 64*1024
BufferedReceiver::BufferedReceiver(IParserReceiver &receiver)
:m_receiver(receiver),
m_flags(BUFFERED_RECEIVER_F_FIRST)
{
}
int BufferedReceiver::onKey(const char *k, size_t k_len)
{
if (m_key.size() + k_len < MAX_KEY_SIZE) {
m_key += std::string(k, k_len);
}
return 0;
}
int BufferedReceiver::onValue(const char *v, size_t v_len)
{
int rc = 0;
while (v_len > 0) {
// Move data from buffer v to accumulated m_value string in an attempt to fill m_value to its max size
size_t bytesToFill = std::min(v_len, MAX_VALUE_SIZE - m_value.size());
m_value += std::string(v, bytesToFill);
// Update v and v_len (input buffer) to reflect that we already consumed part (or all) of it
v += bytesToFill;
v_len -= bytesToFill;
// Only push full buffers to the m_receiver
if (m_value.size() == MAX_VALUE_SIZE) {
// The first full-size buffer will be pushed with BUFFERED_RECEIVER_F_FIRST flag
int tempRc= m_receiver.onKv(m_key.data(), m_key.size(), m_value.data(), m_value.size(), m_flags);
if (tempRc != 0) {
rc = tempRc;
}
// Clear accumulted buffer that is already pushed (and processed) by the receiver
m_value.clear();
// Clear the "first buffer" flag for all the next buffers
m_flags &= ~BUFFERED_RECEIVER_F_FIRST;
}
}
return rc;
}
int BufferedReceiver::onKvDone()
{
m_flags |= BUFFERED_RECEIVER_F_LAST; // set flag
// Call onKv on the remainder of the buffer not yet pushed to the receiver
// This must be called even if m_value is empty in order to signal the BUFFERED_RECEIVER_F_LAST flag to the
// receiver!
int rc = onKv(m_key.data(), m_key.size(), m_value.data(), m_value.size(), m_flags);
// Reset the object's state to allow reuse for other parsers
clear();
return rc;
}
int BufferedReceiver::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int flags)
{
return m_receiver.onKv(k, k_len, v, v_len, flags);
}
void BufferedReceiver::clear()
{
m_flags = BUFFERED_RECEIVER_F_FIRST;
m_key.clear();
m_value.clear();
}

View File

@@ -0,0 +1,141 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_BASE_H__1106fa38
#define __PARSER_BASE_H__1106fa38
#include "DataTypes.h"
#include <string>
#include <stddef.h>
#define BUFFERED_RECEIVER_F_FIRST 0x01
#define BUFFERED_RECEIVER_F_LAST 0x02
#define BUFFERED_RECEIVER_F_BOTH (BUFFERED_RECEIVER_F_FIRST | BUFFERED_RECEIVER_F_LAST)
#define BUFFERED_RECEIVER_F_UNNAMED 0x04
#if (DISTRO_centos6)
// pre c++11 compiler doesn' support the "final" keyword
#define final
#else
// c++11 and beyond
#define final final
#endif
// Interface for receiver classes that accept full key/value pairs
struct IParserReceiver {
virtual int onKv(const char *k, size_t k_len, const char *v, size_t v_len, int flags) = 0;
};
struct IParserReceiver2 {
virtual void onKvt(const char *k, size_t k_len, const char *v, size_t v_len, const DataType &type) = 0;
virtual void onStartMap() = 0;
virtual void onMapKey(const char *k, size_t k_len) = 0;
virtual void onEndMap() = 0;
virtual void onStartArray() = 0;
virtual void onEndArray() = 0;
};
// Interface for receiver classes that can accept not only full key/value pairs, but also partial content
// Senders could do multiple calls to onKey() and onValue(), followed by call to onKvDone() that signals
// that both key and value data is ready.
// Alternatively, when they can, senders would do single call onKv(), bringing whole data in a single buffer,
// which is normally faster because this way senders could avoid unnecessary memory copying.
struct IParserStreamReceiver : public IParserReceiver {
virtual int onKey(const char *k, size_t k_len) = 0;
virtual int onValue(const char *v, size_t v_len) = 0;
virtual int onKvDone() = 0;
virtual void clear() = 0;
};
// This class acts as an adapter between senders that require IParserStreamReceiver and receivers
// that can only accept IParserReceiver (and do not want to cope with buffering).
// When onKv is received by an instance of BuferedReceiver -it will be transparently forwarded to destination
// (without memory copying).
// However, if BufferedReceiver instance accepts onKey, onValue calls, it buffers the data until onKvDone
// is called, at which point it passes buffered data to onKv callback of the final (non stream capable) receiver.
// TODO:: 1) when constructing this class, pass limits on key and value lengths as constructor parameters?
// TODO:: 2) add extra callback like "onFlush()" to both IParserStreamReceiver and its implementation
// BufferedReceiver, which tells BufferedReceiver that it has last chance to copy data aside
// before the underlying buffer is dead. Without receiving this call, BufferedStreamReceiver
// can simply collect ptr+len pairs on buffer instead of copying stuff to m_key and m_value.
// Once onFlush() is received, the data must be collected from those spans, because the underlying buffer
// is going to be destroyed.
// Note that calls to onFlush() must be added to end of all parser functions before they loose control of their
// input buffer!
// However, this seems to be easy to implement: just call m_receiver.onFlush() before exiting parser's push()
// method, and we finally got zero-copy!
// Note that for optimization, the getAccumulatedKey() and getAccumulatedValue()
// should return pointers to the input buffer.
// This will in many cases cause sub-parsers to also work in zero-copy style too!
class BufferedReceiver : public IParserStreamReceiver {
public:
BufferedReceiver(IParserReceiver &receiver);
virtual int onKey(const char *k, size_t k_len);
virtual int onValue(const char *v, size_t v_len);
virtual int onKvDone();
virtual int onKv(const char *k, size_t k_len, const char *v, size_t v_len, int flags);
virtual void clear();
// Helper methods to access accumulated key and value (read-only)
const std::string &getAccumulatedKey() const { return m_key; }
const std::string &getAccumulatedValue() const { return m_value; }
private:
IParserReceiver &m_receiver;
int m_flags;
// Accumulated key/value pair
std::string m_key;
std::string m_value;
};
// Base class for various streaming parsers that accept data stream in multiple pieces through
// the push() calls, followed by the finish() call that signals end of the stream.
// Normally, the parsers will accept data, dissect/decode it and pass resulting data as
// stream of key/value pairs to a target that is either IParserReceiver or IParserStreamReceiver,
class ParserBase {
public:
virtual ~ParserBase() {}
virtual size_t push(const char *data, size_t data_len) = 0;
virtual void finish() = 0; // TODO: I think this should return status of some sort, just like push()
virtual const std::string &name() const = 0;
virtual bool error() const = 0;
virtual size_t depth() = 0;
virtual void setRecursionFlag() { m_recursionFlag = true; }
virtual void clearRecursionFlag() { m_recursionFlag = false; }
virtual bool getRecursionFlag() const { return m_recursionFlag; }
private:
bool m_recursionFlag = false;
};
template<typename _ParserType>
class BufferedParser : public ParserBase
{
public:
template<typename ..._Args>
explicit BufferedParser(IParserReceiver &receiver, _Args... _args)
:
m_bufferedReceiver(receiver),
m_parser(m_bufferedReceiver, _args...) // pass any extra arguments to specific parser's constructor
{}
virtual ~BufferedParser() {}
virtual size_t push(const char *data, size_t data_len) { return m_parser.push(data, data_len); }
virtual void finish() { m_parser.finish(); }
virtual const std::string &name() const { return m_parser.name(); }
virtual bool error() const { return m_parser.error(); }
virtual size_t depth() { return m_parser.depth(); }
private:
BufferedReceiver m_bufferedReceiver;
_ParserType m_parser;
};
#endif // __PARSER_BASE_H___1106fa38

View File

@@ -0,0 +1,157 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserBinary.h"
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP_PARSER_BINARY);
#define MIN_TEXT_SIZE 10
ParserBinary::ParserBinary(IParserStreamReceiver& receiver) :
m_parserName("binary"),
m_receiver(receiver),
m_state(s_start),
m_textFromLastBuffer(),
m_textCharCount(0)
{
}
ParserBinary::~ParserBinary()
{
}
size_t ParserBinary::push(const char* data, size_t data_len)
{
if (data_len == 0)
{
dbgTrace(D_WAAP_PARSER_BINARY) << "ParserBinary::push(): end of stream. m_state=" << m_state <<
", m_textCharCount=" << m_textCharCount;
if (m_state == s_text && m_textCharCount >= MIN_TEXT_SIZE) {
// Flush text data collected from previous buffer
flush();
}
m_receiver.onKvDone();
return 0;
}
size_t i = 0, textStartIdx = 0;
while (i < data_len)
{
char c = data[i];
bool is_last = (i + 1 == data_len);
switch (m_state) {
case s_start:
m_receiver.onKey("text", 4);
m_state = s_binary;
// fallthrough //
CP_FALL_THROUGH;
case s_binary:
if (!::isprint(c)) {
// Skip binary stuff
break;
}
textStartIdx = i; // remember index of potential text block start
m_textCharCount = 0; // count consecutive text characters in the input stream
if (!m_textFromLastBuffer.empty()) {
m_textFromLastBuffer.clear();
}
m_state = s_text;
// fallthrough //
CP_FALL_THROUGH;
case s_text: {
if (::isprint(c)) {
m_textCharCount++;
}
else {
dbgTrace(D_WAAP_PARSER_BINARY) << "ParserBinary::push(): switch to binary at i=" << i <<
", textStartIdx=" << textStartIdx << ", m_textCharCount=" << m_textCharCount;
// Transition from text to binary
// Only output text chunk when it is large enough, ignore small text chunks
if (m_textCharCount >= MIN_TEXT_SIZE) {
// Flush text data collected from previous buffer
flush();
// Output text data from current buffer
m_receiver.onValue(data+textStartIdx, i-textStartIdx); // do not include current character
}
m_textCharCount = 0;
m_state = s_binary;
break;
}
// Handle hitting buffer edge while collecting text.
// Note that current buffer is going to be invalidated so we need to save everything needed to be able
// to continue on next invocation.
if (is_last) {
dbgTrace(D_WAAP_PARSER_BINARY) << "ParserBinary::push(): last char in buffer. m_textCharCount=" <<
m_textCharCount;
// If enough data collected so far no need to remember it - flush it to output right away
if (m_textCharCount >= MIN_TEXT_SIZE) {
// Flush text data collected from previous buffer
flush();
// Output text data from current buffer
m_receiver.onValue(data+textStartIdx, i-textStartIdx + 1); // +1 to include current character
}
else {
// If there's not enough text to decide - store the text data from current buffer for the next
// invocation
m_textFromLastBuffer.append(data+textStartIdx, i-textStartIdx + 1);
}
}
break;
}
case s_error:
return 0;
default:
break;
}
i++;
}
return i;
}
void ParserBinary::finish()
{
push(NULL, 0);
}
const std::string& ParserBinary::name() const
{
return m_parserName;
}
bool ParserBinary::error() const
{
return m_state == s_error;
}
void ParserBinary::flush() {
// Flush text data collected from previous buffer
if (m_textFromLastBuffer.size() > 0) {
dbgTrace(D_WAAP_PARSER_BINARY) << "ParserBinary::flush() flushing " << m_textFromLastBuffer.size() <<
" chars from last buf";
m_receiver.onValue(m_textFromLastBuffer.data(), m_textFromLastBuffer.size());
m_textFromLastBuffer.clear();
}
}

View File

@@ -0,0 +1,46 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_BINARY_H__
#define __PARSER_BINARY_H__
#include "ParserBase.h"
class ParserBinary : public ParserBase
{
public:
ParserBinary(IParserStreamReceiver& receiver);
virtual ~ParserBinary();
virtual size_t push(const char* data, size_t data_len);
virtual void finish();
virtual const std::string& name() const;
virtual bool error() const;
virtual size_t depth() { return 1; }
private:
enum state {
s_start,
s_binary,
s_text,
s_error
};
const std::string m_parserName;
IParserStreamReceiver& m_receiver;
state m_state;
std::string m_textFromLastBuffer;
size_t m_textCharCount;
void flush();
};
#endif // __PARSER_BINARY_H__

View File

@@ -0,0 +1,180 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserConfluence.h"
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP_PARSER_CONFLUENCE);
ParserConfluence::ParserConfluence(IParserStreamReceiver& receiver) :
m_parserName("confluence"),
m_state(s_start),
m_receiver(receiver),
m_name()
{
}
ParserConfluence::~ParserConfluence()
{
}
size_t ParserConfluence::push(const char* data, size_t data_len)
{
if (data_len == 0)
{
if (m_state != s_end)
{
m_state = s_error;
return 0;
}
}
size_t i = 0, name_index = 0, attribute_index = 0;
while (i < data_len)
{
char c = data[i];
bool is_last = (i + 1 == data_len);
dbgTrace(D_WAAP_PARSER_CONFLUENCE) << "parsing confluence: index: " << i << " char: " << c << " state: " <<
m_state;
switch (m_state)
{
case s_start:
if (c != '{')
{
m_state = s_error;
break;
}
i++;
m_state = s_start_name;
break;
case s_start_name:
m_name = "";
name_index = i;
m_state = s_name;
break;
case s_name:
if (c == ':')
{
m_name += std::string(data + name_index, i - name_index);
m_name += ".";
m_state = s_start_attributes;
}
else if (c == '"')
{
m_state = s_error;
break;
}
else if (is_last)
{
m_name += std::string(data + name_index, i - name_index + 1);
name_index = 0;
}
i++;
break;
case s_start_attributes:
attribute_index = i;
m_receiver.onKey(m_name.c_str(), m_name.length());
m_state = s_attribute_name;
break;
case s_attribute_name:
if (c == '=')
{
if (i > attribute_index)
{
m_receiver.onKey(data + attribute_index, i - attribute_index);
}
attribute_index = is_last ? 0 : i + 1;
m_state = s_attribute_value;
}
else if (c == '|')
{
if (i > attribute_index)
{
m_receiver.onKey(data + attribute_index, i - attribute_index);
}
m_receiver.onKvDone();
m_state = s_start_attributes;
}
else if (c == '}')
{
if (i > attribute_index)
{
m_receiver.onKey(data + attribute_index, i - attribute_index);
}
m_receiver.onKvDone();
m_state = s_end;
}
else if (is_last)
{
m_receiver.onKey(data + attribute_index, i - attribute_index + 1);
attribute_index = 0;
}
i++;
break;
case s_attribute_value:
if (c == '|')
{
if (i > attribute_index)
{
m_receiver.onValue(data + attribute_index, i - attribute_index);
}
m_receiver.onKvDone();
m_state = s_start_attributes;
}
else if (c == '}')
{
if (i > attribute_index)
{
m_receiver.onValue(data + attribute_index, i - attribute_index);
}
m_receiver.onKvDone();
m_state = s_end;
break;
}
else if (is_last)
{
m_receiver.onValue(data + attribute_index, i - attribute_index + 1);
attribute_index = 0;
}
i++;
break;
case s_end:
if (!is_last)
{
m_state = s_error;
}
i++;
break;
case s_error:
return i;
break;
default:
break;
}
}
return 0;
}
void ParserConfluence::finish()
{
push(NULL, 0);
}
const std::string& ParserConfluence::name() const
{
return m_parserName;
}
bool ParserConfluence::error() const
{
return m_state == s_error;
}

View File

@@ -0,0 +1,48 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_CONFLUENCE_H__
#define __PARSER_CONFLUENCE_H__
#include "ParserBase.h"
class ParserConfluence : public ParserBase
{
public:
ParserConfluence(IParserStreamReceiver& receiver);
virtual ~ParserConfluence();
virtual size_t push(const char* data, size_t data_len);
virtual void finish();
virtual const std::string& name() const;
virtual bool error() const;
virtual size_t depth() { return 1; }
private:
enum state {
s_start,
s_start_name,
s_name,
s_start_attributes,
s_attribute_name,
s_attribute_value,
s_end,
s_error
};
const std::string m_parserName;
state m_state;
IParserStreamReceiver& m_receiver;
std::string m_name;
};
#endif

View File

@@ -0,0 +1,140 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserDelimiter.h"
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP_PARSER_DELIMITER);
ParserDelimiter::ParserDelimiter(IParserStreamReceiver& receiver, char delim, const std::string& delimName)
: ParserBase(),
m_state(s_start),
m_receiver(receiver),
m_delim(delim),
m_delim_name(delimName),
m_found_delim(false)
{
}
ParserDelimiter::~ParserDelimiter()
{
}
void ParserDelimiter::pushKey()
{
std::string delim_key = m_delim_name;
dbgTrace(D_WAAP_PARSER_DELIMITER) << "parsing delimiter: send key='" << delim_key << "'";
m_receiver.onKey(delim_key.c_str(), delim_key.length());
}
size_t ParserDelimiter::push(const char* data, size_t data_len)
{
if (data_len == 0)
{
if (!m_found_delim)
{
m_state = s_error;
return 0;
}
if (m_receiver.onKvDone() != 0) {
m_state = s_error;
}
return 0;
}
size_t i = 0, value_start_index = 0;
while (i < data_len)
{
char c = data[i];
switch (m_state)
{
case s_start:
m_found_delim = false;
pushKey();
if (c == m_delim)
{
m_state = s_start_with_delimiter;
}
else
{
m_state = s_value_start;
}
break;
case s_start_with_delimiter:
m_found_delim = true;
m_state = s_value_start;
i++;
break;
case s_value_start:
value_start_index = i;
m_state = s_value;
// fall through
case s_value:
if (c == m_delim)
{
dbgTrace(D_WAAP_PARSER_DELIMITER) << "parsing delimiter: send val='" <<
std::string(data + value_start_index, i - value_start_index) << "'";
m_receiver.onValue(data + value_start_index, i - value_start_index);
m_state = s_delimiter;
break;
}
else if (i + 1 == data_len)
{
dbgTrace(D_WAAP_PARSER_DELIMITER) << "parsing delimiter: send val='" <<
std::string(data + value_start_index, i - value_start_index) << "'";
m_receiver.onValue(data + value_start_index, i - value_start_index + 1);
}
i++;
break;
case s_delimiter:
m_found_delim = true;
dbgTrace(D_WAAP_PARSER_DELIMITER) << "parsing delimiter: send onKvDone";
if (m_receiver.onKvDone() != 0) {
m_state = s_error;
break;
}
i++;
pushKey();
m_state = s_value_start;
break;
case s_error:
break;
default:
break;
}
if (m_state == s_error)
{
break;
}
}
return 0;
}
void ParserDelimiter::finish()
{
push(NULL, 0);
}
bool ParserDelimiter::error() const
{
return m_state == s_error;
}
const std::string& ParserDelimiter::name() const
{
return m_delim_name;
}

View File

@@ -0,0 +1,51 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_DELIMIETER_BASE_H__
#define __PARSER_DELIMIETER_BASE_H__
#include "ParserBase.h"
class ParserDelimiter : public ParserBase
{
public:
ParserDelimiter(IParserStreamReceiver& receiver, char delim, const std::string& delimName);
virtual ~ParserDelimiter();
virtual size_t push(const char* data, size_t data_len);
virtual void finish();
virtual bool error() const;
virtual const std::string& name() const;
virtual size_t depth() { return 1; }
private:
enum state {
s_start,
s_start_with_delimiter,
s_value_start,
s_delimiter,
s_value,
s_error
};
void pushKey();
state m_state;
IParserStreamReceiver& m_receiver;
std::string m_key;
char m_delim;
std::string m_delim_name;
bool m_found_delim;
};
#endif

View File

@@ -0,0 +1,304 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserHTML.h"
#include "Waf2Util.h"
#include "debug.h"
#include <assert.h>
USE_DEBUG_FLAG(D_WAAP_PARSER_HTML);
const std::string ParserHTML::m_parserName = "ParserHTML";
void ParserHTML::onStartElement(
void* ctx,
const xmlChar* localname,
const xmlChar** attributes)
{
ParserHTML* p = (ParserHTML*)ctx;
dbgTrace(D_WAAP_PARSER_HTML) << "HTML OPEN: '" << localname << "'";
p->m_key.push((const char*)localname, xmlStrlen(localname));
if (attributes != NULL) {
int i;
for (i = 0; attributes[i*2]; i++) {
const xmlChar* attr_localname = attributes[i * 2 + 0];
const xmlChar* attr_value = attributes[i * 2 + 1];
if (attr_value == NULL) {
attr_value = (const xmlChar*)"";
}
dbgTrace(D_WAAP_PARSER_HTML) << "\tHTML ATTR: elem='" << (char*)localname << "', " << attr_localname <<
"='" << std::string((char*)attr_value) << "'";
p->m_key.push((const char*)attr_localname, xmlStrlen(attr_localname));
if (p->m_receiver.onKv(
p->m_key.first().c_str(),
p->m_key.first().size(),
(const char*)attr_value, strlen((const char*)attr_value),
BUFFERED_RECEIVER_F_BOTH
) != 0) {
p->m_state = s_error;
}
p->m_key.pop("HTML end attribute");
}
}
// before we add new tracking element to the stack for this new element,
// set "children exists" flag to true for the parent element.
if (!p->m_elemTrackStack.empty()) {
p->m_elemTrackStack.back().hasChildren = true;
}
// when opening new element - start tracking its properties (internal text and existence of subelements)
p->m_elemTrackStack.push_back(ElemTrackInfo());
}
void
ParserHTML::onEndElement(
void* ctx,
const xmlChar* localname)
{
ParserHTML* p = (ParserHTML*)ctx;
dbgTrace(D_WAAP_PARSER_HTML) << "HTML CLOSE: '" << localname << "'";
if (p->m_elemTrackStack.empty()) {
dbgWarning(D_WAAP_PARSER_HTML) <<
"HTML closing tag and elem track stack is empty. This is probably sign of a bug!";
return;
}
ElemTrackInfo& elemTrackInfo = p->m_elemTrackStack.back();
// Usability optimization: only output kv pair for HTML elements that had either sub children
// and/or value within.
// Those "wrapper elements" such as <wrapper><name>john</name><age>21</age></wrapper> only
// contain sub elements. For these we don't emit kv pair.
// However, for truly empty element such as <wrapper></wrapper>, or similar element with
// text: <wrapper>some text</wrapper>, we do output a kv pair.
bool isWrapperElement = elemTrackInfo.hasChildren && (elemTrackInfo.value.size() == 0);
if (!isWrapperElement) {
// Emit tag name as key
if (p->m_receiver.onKey(p->m_key.first().c_str(), p->m_key.first().size()) != 0) {
p->m_state = s_error;
}
if (p->m_receiver.onValue(elemTrackInfo.value.c_str(), elemTrackInfo.value.size()) != 0) {
p->m_state = s_error;
}
if (p->m_receiver.onKvDone() != 0) {
p->m_state = s_error; // error
}
}
// when closing an element - pop its tracking info from the tracking stack
p->m_elemTrackStack.pop_back();
// Also, pop the element's name from m_key stack, so the key name always reflects
// current depth within the elements tree
p->m_key.pop("HTML end element");
}
void ParserHTML::onCharacters(void* ctx, const xmlChar* ch, int len) {
ParserHTML* p = (ParserHTML*)ctx;
if (p->m_elemTrackStack.empty()) {
dbgWarning(D_WAAP_PARSER_HTML) << "HTML text and elem track stack is empty. This is probably sign of a bug!";
return;
}
if ((ch == NULL) || (len == 0)) {
dbgTrace(D_WAAP_PARSER_HTML) << "Got empty HTML text element. Ignoring.";
return;
}
ElemTrackInfo& elemTrackInfo = p->m_elemTrackStack.back();
dbgTrace(D_WAAP_PARSER_HTML) << "HTML TEXT: '[" << std::string((char*)ch, (size_t)len) << "]'";
std::string val = std::string((char*)ch, (size_t)len);
// trim isspace() characters around html text chunks.
// The chunks can occur multiple times within one value, when text value is intermixed with html sub-tags.
// for example, for HTML source "<a>sta<b>zzz</b>rt</a>", the "a" tag will include two text
// chunks "sta" and "rt"
// which are concatenated here to form the word "start".
// The trimming is done here to prevent false alarms on detection algorithm that sees
// "\n" characters in the HTML value.
// Example of input that causes false alarm without this trim is (multiline HTML):
// <html><script>\nclean_html_value '\n<\/script><\/html>
Waap::Util::trim(val);
elemTrackInfo.value += val;
}
static void onError(void* ctx, const char* msg, ...) {
static const size_t TMP_BUF_SIZE = 4096;
char string[TMP_BUF_SIZE];
va_list arg_ptr;
va_start(arg_ptr, msg);
vsnprintf(string, TMP_BUF_SIZE, msg, arg_ptr);
va_end(arg_ptr);
dbgTrace(D_WAAP_PARSER_HTML) << "LIBXML (html) onError: " << std::string(string);
}
ParserHTML::ParserHTML(IParserStreamReceiver& receiver)
:m_receiver(receiver), m_state(s_start), m_bufLen(0), m_key("html_parser"), m_pushParserCtxPtr(NULL) {
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::ParserHTML()";
// TODO:: is zeroing this really needed?
memset(m_buf, 0, sizeof(m_buf));
// Custom sax handler
memset(&m_saxHandler, 0, sizeof(htmlSAXHandler));
m_saxHandler.startElement = onStartElement;
m_saxHandler.endElement = onEndElement;
m_saxHandler.characters = onCharacters;
m_saxHandler.error = onError;
// Register "dummy" tag to receive any text
m_elemTrackStack.push_back(ElemTrackInfo());
// Ugly: push first element into key (it will be ignored since we will never call
// the "first()" method of this key within HTML parser object.
m_key.push("html", 4);
}
ParserHTML::~ParserHTML() {
// Cleanup HTML
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::~ParserHTML()";
if (m_pushParserCtxPtr) {
htmlFreeParserCtxt(m_pushParserCtxPtr);
}
}
bool ParserHTML::filterErrors(xmlErrorPtr xmlError) {
dbgDebug(D_WAAP_PARSER_HTML) << "ParserHTML::filterErrors(): xmlError " << xmlError->code << ": '" <<
xmlError->message << "'";
// Ignore specific error: "HTML declaration allowed only at the start of the document".
// This includes the case of "multiple HTML declarations" we've seen sent by some SOAP clients.
// The HTML is still parsed because the parser is put into permissive mode with the HTML_PARSE_RECOVER flag,
// but even though it recovers and parses the HTML correctly, the error code is still reported here.
// Ignoring this error prevents the WAAP code from thinking the HTML is "broken" and from scanning the HTML
// source as-is, in effect preventing false alarm on that HTML source.
if (xmlError->code == XML_ERR_RESERVED_XML_NAME || xmlError->code == XML_ERR_UNDECLARED_ENTITY) {
dbgDebug(D_WAAP_PARSER_HTML) << "ParserHTML::filterErrors(): ignoring the '" << xmlError->code << ": " <<
xmlError->message << "' html parser error.";
return false;
}
return true;
}
size_t ParserHTML::push(const char* data, size_t data_len) {
size_t i = 0;
char c;
if (data_len == 0) {
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::push(): end of data signal! m_state=" << m_state;
// Send zero-length chunk with "terminate" flag enabled to signify end-of-stream
if (htmlParseChunk(m_pushParserCtxPtr, m_buf, 0, 1)) {
xmlErrorPtr xmlError = xmlCtxtGetLastError(m_pushParserCtxPtr);
if (xmlError && filterErrors(xmlError)) {
dbgDebug(D_WAAP_PARSER_HTML) << "ParserHTML::push(): xmlError: code=" << xmlError->code << ": '" <<
xmlError->message << "'";
m_state = s_error; // error
return -1;
}
}
return m_bufLen;
}
int expected_buffer_len = FIRST_BUFFER_SIZE - 1;
while (i < data_len) {
c = data[i];
switch (m_state) {
case s_start:
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::push(): s_start";
m_state = s_accumulate_first_bytes;
// fall through //
CP_FALL_THROUGH;
case s_accumulate_first_bytes:
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::push(): s_accumulate_first_bytes. c='" << data[i] <<
"'; m_bufLen=" << m_bufLen << "; i=" << i;
m_buf[m_bufLen] = c;
m_bufLen++;
if (c == '?') {
expected_buffer_len = FIRST_BUFFER_SIZE;
}
if (m_bufLen == expected_buffer_len) {
m_state = s_start_parsing;
}
break;
case s_start_parsing:
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::push(): s_start_parsing. sending len=" << m_bufLen << ": '" <<
std::string(m_buf, m_bufLen) << "'; i=" << i;
// Create HTML SAX (push parser) context
// It is important to buffer at least first 4 bytes of input stream so libxml can determine text encoding!
m_pushParserCtxPtr = htmlCreatePushParserCtxt(&m_saxHandler, this, m_buf, m_bufLen, NULL,
XML_CHAR_ENCODING_UTF8);
// Enable "permissive mode" for HTML SAX parser.
// In this mode, the libxml parser doesn't stop on errors, but still reports them!
htmlCtxtUseOptions(m_pushParserCtxPtr, HTML_PARSE_RECOVER);
m_state = s_parsing;
// fall through //
CP_FALL_THROUGH;
case s_parsing:
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::push(): s_parsing. sending len=" << (int)(data_len - i) <<
": '" << std::string(data + i, data_len - i) << "'; i=" << i;
if (m_pushParserCtxPtr) {
if (htmlParseChunk(m_pushParserCtxPtr, data + i, data_len - i, 0)) {
xmlErrorPtr xmlError = xmlCtxtGetLastError(m_pushParserCtxPtr);
if (xmlError && filterErrors(xmlError)) {
dbgDebug(D_WAAP_PARSER_HTML) << "ParserHTML::push(): xmlError: code=" << xmlError->code <<
": '" << xmlError->message << "'";
m_state = s_error; // error
return 0;
}
}
// success (whole buffer consumed)
i = data_len - 1; // take into account ++i at the end of the state machine loop
}
break;
case s_error:
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::push(): s_error";
return 0;
}
++i;
}
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::push(): exiting with param(len)=" << data_len << ": i=" << i;
return i;
}
void ParserHTML::finish() {
push(NULL, 0);
}
const std::string &
ParserHTML::name() const {
return m_parserName;
}
bool ParserHTML::error() const {
return m_state == s_error;
}

View File

@@ -0,0 +1,84 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "ParserBase.h"
#include "KeyStack.h"
#include <libxml/xmlstring.h>
#include <libxml/xmlerror.h>
#include <libxml/parser.h>
#include <libxml/HTMLparser.h>
#define FIRST_BUFFER_SIZE 5 // must buffer at least 4 first bytes to allow unicode autodetection (BOM).
class ParserHTML : public ParserBase {
public:
ParserHTML(IParserStreamReceiver &receiver);
virtual ~ParserHTML();
size_t push(const char *data, size_t data_len);
void finish();
virtual const std::string &name() const;
bool error() const;
virtual size_t depth() { return (m_key.depth() > 0) ? m_key.depth()-1 : m_key.depth(); }
private:
enum state {
s_start,
s_accumulate_first_bytes,
s_start_parsing,
s_parsing,
s_error
};
// Information tracked per each element in current stack of tracked HTML elements
struct ElemTrackInfo {
std::string value;
bool hasChildren;
ElemTrackInfo():hasChildren(false) {
// when element is just opened - we still didn't see any children,
// hence start with the "hasChildren" flag as false.
// This flag will be enabled once we meet opening of the a subelement.
// Also, we start from empty value string and gradually append to it each
// time we receive next piece of text from HTML parser.
// The collected value is then emitted when element finishes.
}
};
static void onStartElement(
void *ctx,
const xmlChar *localname,
const xmlChar **attributes);
static void onEndElement(
void* ctx,
const xmlChar* localname);
static void onCharacters(
void *ctx,
const xmlChar *ch,
int len);
// Filter out errors that should be ignored. Returns true if error should be treated,
// false if an error should be ignored
bool filterErrors(xmlErrorPtr xmlError);
IParserStreamReceiver &m_receiver;
enum state m_state;
// buffer first few bytes of stream (required before calling SAX parser for the first time)
char m_buf[FIRST_BUFFER_SIZE];
int m_bufLen;
KeyStack m_key;
std::vector<ElemTrackInfo> m_elemTrackStack;
htmlSAXHandler m_saxHandler;
htmlParserCtxtPtr m_pushParserCtxPtr;
static const std::string m_parserName;
};

View File

@@ -0,0 +1,448 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserHdrValue.h"
#include "Waf2Util.h"
#include "debug.h"
#include <stdio.h>
#include <string.h>
USE_DEBUG_FLAG(D_WAAP_PARSER_HDRVALUE);
const std::string ParserHdrValue::m_parserName = "hdrValue";
enum state {
s_start,
s_key_start,
s_key_restart,
s_key,
s_key_escaped1,
s_key_escaped2,
s_value_start,
s_value_restart,
s_value,
s_value_escaped1,
s_value_escaped2,
s_value_finishing_after_dblquotes,
s_end
};
ParserHdrValue::ParserHdrValue(IParserStreamReceiver& receiver)
:m_receiver(receiver), in_key(0), in_dbl_quotes(0), escaped_len(0), escapedCharCandidate(0) {
// TODO:: maybe remove?
memset(escaped, 0, sizeof(escaped));
state = s_start;
}
ParserHdrValue::~ParserHdrValue() {
}
size_t ParserHdrValue::push(const char* buf, size_t len) {
size_t i = 0;
size_t mark = 0;
char c;
int is_last = 0;
if (len == 0) {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): end of data signal! state=" << state;
// flush unescaped data collected (if any)
if (escaped_len > 0) {
if (state == s_key_restart) {
if (m_receiver.onKey(escaped, escaped_len) != 0) {
return i;
}
}
else if (state == s_value_restart) {
if (m_receiver.onValue(escaped, escaped_len) != 0) {
return i;
}
}
if (m_receiver.onKvDone() != 0) {
return i;
}
escaped_len = 0;
}
if (m_receiver.onKvDone() != 0) {
return i;
}
return 0;
}
while (i < len) {
c = buf[i];
is_last = (i == (len - 1));
switch (state) {
case s_start: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_start";
//state = s_key_start;
// fallthrough //
CP_FALL_THROUGH;
}
case s_key_start: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_key_start";
in_key = 0; // we are not parsing the key
//state = s_key_restart;
// fallthrough //
CP_FALL_THROUGH;
}
case s_key_restart: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_key_restart";
mark = i;
state = s_key;
// fallthrough //
CP_FALL_THROUGH;
}
case s_key: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_key; c='" << c << "'; in_key=" << in_key;
// skip leading spaces in the key
if (isspace(c) && !in_key) {
state = s_key_restart; // skip the space character without including it in the output
break;
}
// Note that first non-space character is read
in_key = 1;
if (c == '%') {
if (i - mark > 0) {
if (m_receiver.onKey(buf + mark, i - mark) != 0) {
return i;
}
}
state = s_key_escaped1;
break;
}
#if 0 // '+' encoding is not done in header values (AFAIK)
else if (c == '+') {
// convert plus character to space
if (i - mark > 0) {
EMIT_DATA_CB(key, i, buf + mark, i - mark);
mark = i;
}
escaped[escaped_len] = ' ';
escaped_len++;
if (escaped_len >= MAX_ESCAPED_SIZE) {
EMIT_DATA_CB(value, i, escaped, escaped_len);
escaped_len = 0;
}
state = s_key_restart;
break;
}
#endif
else {
// flush unescaped data collected (if any)
if (escaped_len > 0) {
if (m_receiver.onKey(escaped, escaped_len) != 0) {
return i;
}
escaped_len = 0;
mark = i;
}
}
if (c == ';') {
// name finished without value
if (m_receiver.onKey(buf + mark, i - mark) != 0) {
return i;
}
if (m_receiver.onKvDone() != 0) {
return i;
}
state = s_key_start;
break;
}
else if (c == '=') {
if (m_receiver.onKey(buf + mark, i - mark) != 0) {
return i;
}
state = s_value_start;
break;
}
if (is_last) {
if (m_receiver.onKey(buf + mark, (i - mark) + 1) != 0) {
return i;
}
}
break;
}
case s_key_escaped1: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_key_escaped1";
bool valid;
unsigned char v = from_hex(c, valid);
if (!valid) { // character right after the '%' is not a valid hex char.
// return the '%' character back to the output.
if (escaped_len > 0) {
if (m_receiver.onKey(escaped, escaped_len) != 0) {
return i;
}
escaped_len = 0;
}
if (m_receiver.onKey("%", 1) != 0) {
return i;
}
// If the character is '%' - stay in the same state (correctly treat '%%%%hhh' sequences
if (c != '%') {
// pass the non-hex character back to the output too.
if (m_receiver.onKey(&c, 1) != 0) {
return i;
}
// otherwise (the character is not '%s'), switch back to the s_key state
state = s_key;
}
break;
}
escapedCharCandidate = c;
escaped[escaped_len] = v << 4;
state = s_key_escaped2;
break;
}
case s_key_escaped2: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_key_escaped2";
bool valid;
unsigned char v = from_hex(c, valid);
if (!valid) {
// add converted escaped chars
if (escaped_len > 0 && m_receiver.onKey(escaped, escaped_len)) {
return i;
}
// return % to output
if (m_receiver.onKey("%", 1) != 0) {
return i;
}
// add the character that was thought to be escaped value
if (m_receiver.onKey(&escapedCharCandidate, 1)) {
return i;
}
// re parse the character as a key (i is incremented back to current value)
i--;
escaped_len = 0;
state = s_key_restart;
break;
}
escapedCharCandidate = 0;
escaped[escaped_len] |= v;
escaped_len++;
if (escaped_len >= MAX_ESCAPED_SIZE) {
if (m_receiver.onKey(escaped, escaped_len) != 0) {
return i;
}
escaped_len = 0;
}
state = s_key_restart;
break;
}
case s_value_start: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_value_start";
mark = i;
state = s_value;
in_dbl_quotes = 0; // we are not parsing the
// detect first double-quotes
if (c == '"' && !in_dbl_quotes) {
in_dbl_quotes = 1;
state = s_value_restart;
break; // skip the leading " character
}
// fallthrough //
CP_FALL_THROUGH;
}
case s_value_restart: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_value_restart";
mark = i;
state = s_value;
// fallthrough //
CP_FALL_THROUGH;
}
case s_value: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_value; c='" << c << "', in_dbl_quotes=" <<
in_dbl_quotes;
if (c == '%') {
if (i - mark > 0) {
if (m_receiver.onValue(buf + mark, i - mark) != 0) {
return i;
}
}
state = s_value_escaped1;
break;
}
else if (c == '+') {
// convert plus character to space
if (i - mark > 0) {
if (m_receiver.onValue(buf + mark, i - mark) != 0) {
return i;
}
}
escaped[escaped_len] = ' ';
escaped_len++;
if (escaped_len >= MAX_ESCAPED_SIZE) {
if (m_receiver.onValue(escaped, escaped_len) != 0) {
return i;
}
escaped_len = 0;
}
state = s_value_restart;
break;
}
else {
// flush unescaped data collected (if any)
if (escaped_len > 0) {
if (m_receiver.onValue(escaped, escaped_len) != 0) {
return i;
}
escaped_len = 0;
mark = i;
}
}
// detect end of dbl-quotes
if (c == '"' && in_dbl_quotes) {
if (i - mark > 0) {
if (m_receiver.onValue(buf + mark, i - mark) != 0) {
return i;
}
}
if (m_receiver.onKvDone() != 0) {
return i;
}
state = s_value_finishing_after_dblquotes;
break;
}
if (c == ';') {
if (m_receiver.onValue(buf + mark, i - mark) != 0) {
return i;
}
if (m_receiver.onKvDone() != 0) {
return i;
}
state = s_key_start;
break;
}
if (is_last) {
if (m_receiver.onValue(buf + mark, (i - mark) + 1) != 0) {
return i;
}
}
break;
}
case s_value_escaped1: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_value_escaped1";
bool valid;
unsigned char v = from_hex(c, valid);
if (!valid) { // character right after the '%' is not a valid hex char.
// return the '%' character back to the output.
if (m_receiver.onValue("%", 1) != 0) {
return i;
}
// If the character is '%' - stay in the same state (correctly treat '%%%%hhh' sequences
if (c != '%') {
// pass the non-hex character back to the output too.
if (m_receiver.onValue(&c, 1) != 0) {
return i;
}
// otherwise (the character is not '%'), switch back to the s_value state
state = s_value_restart;
}
break;
}
escapedCharCandidate = c;
escaped[escaped_len] = v << 4;
state = s_value_escaped2;
break;
}
case s_value_escaped2: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_key_escaped2";
bool valid;
unsigned char v = from_hex(c, valid);
if (!valid) {
// add converted escaped chars
if (escaped_len > 0 && m_receiver.onValue(escaped, escaped_len) != 0) {
return i;
}
// return % to output
if (m_receiver.onValue("%", 1) != 0) {
return i;
}
// add the character that was thought to be escaped value
if (m_receiver.onValue(&escapedCharCandidate, 1)) {
return i;
}
// re parse the character as a key (i is incremented back to current value)
i--;
escaped_len = 0;
state = s_value_restart;
break;
}
escapedCharCandidate = 0;
escaped[escaped_len] |= v;
escaped_len++;
if (escaped_len >= MAX_ESCAPED_SIZE) {
if (m_receiver.onValue(escaped, escaped_len) != 0) {
return i;
}
escaped_len = 0;
}
state = s_value_restart;
break;
}
case s_value_finishing_after_dblquotes: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_value_finishing_after_dblquotes; c='" <<
c << "'";
if (c == ';') {
state = s_key_start;
break;
}
break;
}
default: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): hdrvalue parser unrecoverable error";
return 0;
}
}// end switch()
++i;
}
return len;
}
void ParserHdrValue::finish() {
push(NULL, 0);
}
const std::string &
ParserHdrValue::name() const {
return m_parserName;
}
bool ParserHdrValue::error() const {
//return m_state == s_error;
return false; // TODO:: add error handling
}

View File

@@ -0,0 +1,58 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_HDRVALUE_H__7d37fe50
#define __PARSER_HDRVALUE_H__7d37fe50
#include "ParserBase.h"
#include <string.h>
class ParserHdrValue : public ParserBase{
public:
ParserHdrValue(IParserStreamReceiver &receiver);
virtual ~ParserHdrValue();
size_t push(const char *data, size_t len);
void finish();
virtual const std::string &name() const;
virtual bool error() const;
virtual size_t depth() { return 1; }
private:
static const int MAX_ESCAPED_SIZE = 16;
IParserStreamReceiver &m_receiver;
enum state {
s_start,
s_key_start,
s_key_restart,
s_key,
s_key_escaped1,
s_key_escaped2,
s_value_start,
s_value_restart,
s_value,
s_value_escaped1,
s_value_escaped2,
s_value_finishing_after_dblquotes,
s_end
};
enum state state;
char in_key; // turns true when first non-space key character is read
char in_dbl_quotes; // turns true (1) during double-quoted value parsing
unsigned char escaped_len; // count of characters loaded in escaped[] buff
char escaped[MAX_ESCAPED_SIZE];
char escapedCharCandidate;
static const std::string m_parserName;
};
#endif // __PARSER_HDRVALUE_H__7d37fe50

View File

@@ -0,0 +1,342 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserJson.h"
#include "debug.h"
#include "yajl/yajl_parse.h"
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include <assert.h>
USE_DEBUG_FLAG(D_WAAP_PARSER_JSON);
const std::string ParserJson::m_parserName = "jsonParser";
int ParserJson::cb_null() {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_null():";
if (m_receiver2) {
m_receiver2->onKvt(m_key.c_str(), m_key.size(), "null", 4, DataType::EMPTY);
}
if (m_receiver.onKv(m_key.c_str(), m_key.size(), "null", 4, BUFFERED_RECEIVER_F_BOTH)) {
return 0;
}
if (!m_depthStack.empty() && m_depthStack.back() == js_map) {
m_key.pop("json null");
}
return 1;
}
int ParserJson::cb_boolean(int boolean) {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_boolean(): " << boolean;
if (m_receiver2) {
m_receiver2->onKvt(m_key.c_str(), m_key.size(), NULL, boolean, DataType::BOOLEAN);
}
if (boolean) {
if (m_receiver.onKv(m_key.c_str(), m_key.size(), "true", 4, BUFFERED_RECEIVER_F_BOTH)) {
return 0;
}
}
else {
if (m_receiver.onKv(m_key.c_str(), m_key.size(), "false", 5, BUFFERED_RECEIVER_F_BOTH)) {
return 0;
}
}
if (!m_depthStack.empty() && m_depthStack.back() == js_map) {
m_key.pop("json boolean");
}
return 1;
}
int ParserJson::cb_number(const char* s, yajl_size_t slen) {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_number(): '" << std::string(s, slen) << "'";
if (m_receiver2) {
m_receiver2->onKvt(m_key.c_str(), m_key.size(), s, slen, DataType::NUMBER);
}
if (m_receiver.onKv(m_key.c_str(), m_key.size(), s, slen, BUFFERED_RECEIVER_F_BOTH)) {
return 0;
}
if (!m_depthStack.empty() && m_depthStack.back() == js_map) {
m_key.pop("json number");
}
return 1;
}
int ParserJson::cb_string(const unsigned char* s, yajl_size_t slen) {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_string(): '" << std::string((const char*)s, slen) << "'";
if (m_receiver2) {
m_receiver2->onKvt(m_key.c_str(), m_key.size(), (const char*)s, slen, DataType::STRING);
}
if (m_receiver.onKv(m_key.c_str(), m_key.size(), (const char*)s, slen, BUFFERED_RECEIVER_F_BOTH)) {
return 0;
}
if (!m_depthStack.empty() && m_depthStack.back() == js_map) {
m_key.pop("json string");
}
return 1;
}
int ParserJson::cb_map_key(const unsigned char* s, yajl_size_t slen) {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_map_key(): '" << std::string((const char*)s, slen) << "'";
if (m_receiver2) {
m_receiver2->onMapKey((const char*)s, slen);
}
m_key.push((char*)s, slen);
return 1;
}
int ParserJson::cb_start_map() {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_start_map():";
if (m_receiver2) {
m_receiver2->onStartMap();
}
m_depthStack.push_back(ParserJson::js_map);
return 1;
}
int ParserJson::cb_end_map() {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_end_map():";
if (m_receiver2) {
m_receiver2->onEndMap();
}
if (!m_depthStack.empty()) {
m_depthStack.pop_back();
}
if (!m_depthStack.empty() && m_depthStack.back() == js_map) {
m_key.pop("json end map");
}
return 1;
}
int ParserJson::cb_start_array() {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_start_array():";
if (m_receiver2) {
m_receiver2->onStartArray();
}
m_depthStack.push_back(ParserJson::js_array);
return 1;
}
int ParserJson::cb_end_array() {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_end_array():";
if (m_receiver2) {
m_receiver2->onEndArray();
}
if (!m_depthStack.empty()) {
m_depthStack.pop_back();
}
if (!m_depthStack.empty() && m_depthStack.back() == js_map) {
m_key.pop("json end array");
}
return 1;
}
// Static functions to be called from C and forward the calls to respective class cb_* methods
int ParserJson::p_null(void* ctx)
{
return ((ParserJson*)ctx)->cb_null();
}
int ParserJson::p_boolean(void* ctx, int boolean)
{
return ((ParserJson*)ctx)->cb_boolean(boolean);
}
int ParserJson::p_number(void* ctx, const char* s, yajl_size_t slen)
{
return ((ParserJson*)ctx)->cb_number(s, slen);
}
int ParserJson::p_string(void* ctx, const unsigned char* s, yajl_size_t slen)
{
return ((ParserJson*)ctx)->cb_string(s, slen);
}
int ParserJson::p_map_key(void* ctx, const unsigned char* s, yajl_size_t slen)
{
return ((ParserJson*)ctx)->cb_map_key(s, slen);
}
int ParserJson::p_start_map(void* ctx)
{
return ((ParserJson*)ctx)->cb_start_map();
}
int ParserJson::p_end_map(void* ctx)
{
return ((ParserJson*)ctx)->cb_end_map();
}
int ParserJson::p_start_array(void* ctx)
{
return ((ParserJson*)ctx)->cb_start_array();
}
int ParserJson::p_end_array(void* ctx)
{
return ((ParserJson*)ctx)->cb_end_array();
}
ParserJson::ParserJson(IParserReceiver& receiver, IParserReceiver2* receiver2) :
m_receiver(receiver),
m_receiver2(receiver2),
m_state(s_start),
m_bufLen(0),
m_key("json_parser"),
m_jsonHandler(NULL)
{
// TODO:: do we really want to clear this?
memset(m_buf, 0, sizeof(m_buf));
static const yajl_callbacks callbacks = {
p_null,
p_boolean,
NULL,
NULL,
p_number,
p_string,
p_start_map,
p_map_key,
p_end_map,
p_start_array,
p_end_array
};
m_jsonHandler = yajl_alloc(&callbacks, NULL, this);
if (m_jsonHandler == NULL) {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::ParserJson(): yajl_alloc() failed. Switching to s_error state.";
m_state = s_error;
return;
}
// Configure yajl parser
yajl_config(m_jsonHandler, yajl_allow_comments, 1);
yajl_config(m_jsonHandler, yajl_dont_validate_strings, 1); // disable utf8 checking
yajl_config(m_jsonHandler, yajl_allow_multiple_values, 1);
// Ugly: push first element into key (it will be ignored since we will never call the "first()"
// method of this key within Json parser object.
m_key.push("json", 4);
}
ParserJson::~ParserJson() {
// Cleanup JSON
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::~ParserJson():";
if (m_jsonHandler) {
yajl_free(m_jsonHandler);
}
}
size_t ParserJson::push(const char* buf, size_t len) {
size_t i = 0;
char c;
if (len == 0) {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::push(): end of data signal! m_state=" << m_state;
// TODO:: think - should I send existing data in buffer to yajl_parse() here?
// Tell yajl that there's end of stream here
if (yajl_complete_parse(m_jsonHandler) != yajl_status_ok) {
m_state = s_error;
}
return 0;
}
while (i < len) {
c = buf[i];
switch (m_state) {
case s_start:
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::push(): s_start";
m_state = s_accumulate_first_bytes;
// fallthrough //
CP_FALL_THROUGH;
case s_accumulate_first_bytes:
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::push(): s_accumulate_first_bytes. i=" << i <<
" c='" << buf[i] << "'";
m_buf[m_bufLen] = c;
m_bufLen++;
if (m_bufLen == FIRST_JSON_BUFFER_SIZE) {
m_state = s_start_parsing;
}
break;
case s_start_parsing:
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::push(): s_start_parsing. sending len=" <<
(int)m_bufLen << ": '" << std::string(m_buf, m_bufLen) << "'";
m_state = s_parsing;
// fallthrough //
CP_FALL_THROUGH;
case s_parsing:
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::push(): s_parsing. sending len=" << (int)(len - i) << ": '" <<
std::string(buf + i, len - i) << "'";
if (m_bufLen > 0) {
// Send accumulated bytes (if any)
if (yajl_parse(m_jsonHandler, (unsigned char*)m_buf, m_bufLen) != yajl_status_ok) {
m_state = s_error;
}
// And reset buffer (so it's only get sent once)
m_bufLen = 0;
}
if (yajl_parse(m_jsonHandler, (unsigned char*)(buf + i), len - i) != yajl_status_ok) {
m_state = s_error;
}
// success (whole buffer consumed)
i = len - 1; // take into account ++i at the end of the m_state machine loop
break;
case s_error: {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::push(): s_error";
return 0;
}
}
++i;
}
return len;
}
void ParserJson::finish() {
push(NULL, 0);
}
const std::string &
ParserJson::name() const {
return m_parserName;
}
bool ParserJson::error() const {
return m_state == s_error;
}

View File

@@ -0,0 +1,87 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_JSON_H__a94f1be2
#define __PARSER_JSON_H__a94f1be2
#include <string.h>
#include <vector>
#include "ParserBase.h"
#include "KeyStack.h"
#include "yajl/yajl_parse.h"
#define FIRST_JSON_BUFFER_SIZE 4 // must buffer at least 4 first bytes to allow unicode autodetection (BOM).
typedef size_t yajl_size_t;
class ParserJson : public ParserBase {
public:
ParserJson(IParserReceiver &receiver, IParserReceiver2 *receiver2=NULL);
virtual ~ParserJson();
size_t push(const char *data, size_t data_len);
void finish();
virtual const std::string &name() const;
bool error() const;
virtual size_t depth() { return (m_key.depth() > 0) ? m_key.depth()-1 : m_key.depth(); }
private:
int cb_null();
int cb_boolean(int boolean);
int cb_number(const char *s, yajl_size_t slen);
int cb_string(const unsigned char *s, yajl_size_t slen);
int cb_map_key(const unsigned char *s, yajl_size_t slen);
int cb_start_map();
int cb_end_map();
int cb_start_array();
int cb_end_array();
// Static callbacks to be called from C
static int p_null(void *ctx);
static int p_boolean(void *ctx, int boolean);
static int p_number(void *ctx, const char *s, yajl_size_t slen);
static int p_string(void *ctx, const unsigned char *s, yajl_size_t slen);
static int p_map_key(void *ctx, const unsigned char *s, yajl_size_t slen);
static int p_start_map(void *ctx);
static int p_end_map(void *ctx);
static int p_start_array(void *ctx);
static int p_end_array(void *ctx);
enum state {
s_start,
s_accumulate_first_bytes,
s_start_parsing,
s_parsing,
s_error
};
enum js_state {
js_array,
js_map
};
IParserReceiver &m_receiver;
IParserReceiver2 *m_receiver2;
enum state m_state;
// buffer first few bytes of stream
// (required before calling JSON parser for the first time so it can recognize stuff like UTF-8 BOM)
char m_buf[FIRST_JSON_BUFFER_SIZE];
size_t m_bufLen;
// Key and structure depth stacks
KeyStack m_key;
std::vector<enum js_state> m_depthStack;
yajl_handle m_jsonHandler;
public:
static const std::string m_parserName;
};
#endif // __PARSER_JSON_H__a94f1be2

View File

@@ -0,0 +1,485 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserMultipartForm.h"
#include "ParserHdrValue.h"
#include "Waf2Util.h"
#include "debug.h"
#include <stdlib.h>
#include <ctype.h>
USE_DEBUG_FLAG(D_WAAP_PARSER_MULTIPART_FORM);
#define LF 10
#define CR 13
const std::string ParserMultipartForm::m_parserName = "ParserMultipartForm";
int ParserMultipartForm::HdrValueAnalyzer::onKv(const char* k, size_t k_len, const char* v, size_t v_len, int flags)
{
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "HdrValueAnalyzer::onKv(): k='%.*s' v='%.*s'" << (int)k_len << v;
assert((flags & BUFFERED_RECEIVER_F_BOTH) == BUFFERED_RECEIVER_F_BOTH);
if (my_strincmp(k, "name", k_len)) {
m_partName = std::string(v, v_len);
}
return 0;
}
void ParserMultipartForm::HdrValueAnalyzer::clear() {
m_partName.clear();
}
ParserMultipartForm::ParserMultipartForm(
IParserStreamReceiver& receiver,
const char* boundary,
size_t boundary_len)
:
m_receiver(receiver),
m_partIdx(0),
state(s_start),
index(0),
boundary_length(boundary_len + 2),
lookbehind(NULL),
multipart_boundary(NULL),
m_headerValueParser(NULL),
m_hdrValueAnalyzerBufferedReceiver(m_hdrValueAnalyzer)
{
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::ParserMultipartForm()";
boundary_len += 2; // two hyphens will be prepended to boundary string provided
multipart_boundary = (char*)malloc(boundary_len + boundary_len + 9);
if (multipart_boundary == NULL) {
dbgWarning(D_WAAP_PARSER_MULTIPART_FORM) <<
"ParserMultipartForm::ParserMultipartForm(): failed allocation of multipart_boundary buffer.";
state = s_error;
return;
}
// prepend two hyphens to boundary string provided
multipart_boundary[0] = '-';
multipart_boundary[1] = '-';
memcpy(multipart_boundary + 2, boundary, boundary_len - 2);
multipart_boundary[boundary_len] = 0;
lookbehind = (multipart_boundary + boundary_length + 1);
}
ParserMultipartForm::~ParserMultipartForm() {
if (multipart_boundary != NULL) {
free(multipart_boundary);
}
}
size_t ParserMultipartForm::push(const char* buf, size_t len) {
size_t i = 0;
size_t mark = 0;
char c, cl;
int is_last = 0;
if (multipart_boundary == NULL) {
dbgWarning(D_WAAP_PARSER_MULTIPART_FORM) <<
"ParserMultipartForm::push(): can't parse. multipart_boundary=NULL.";
state = s_error;
return 0;
}
// Detect end of stream
if (len == 0) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): len = 0";
// end of stream
if (state != s_end) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) <<
"ParserMultipartForm::push(): MIME stream finished while inside part";
state = s_error;
return 0;
}
}
while (i < len) {
c = buf[i];
is_last = (i == (len - 1));
switch (state) {
case s_start:
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_start";
index = 0;
state = s_start_boundary;
// fallthrough //
CP_FALL_THROUGH;
case s_start_boundary: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_start_boundary";
if (index == boundary_length) {
if (c != CR) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) <<
"ParserMultipartForm::push(): didn't get CR character";
state = s_error;
return i;
}
index++;
break;
}
else if (index == (boundary_length + 1)) {
if (c != LF) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) <<
"ParserMultipartForm::push(): didn't get LF character";
state = s_error;
return i;
}
index = 0;
if (on_form_part_begin() != 0) {
state = s_error;
return i;
}
state = s_key_start;
break;
}
if (c != multipart_boundary[index]) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) <<
"ParserMultipartForm::push(): boundary check failed at index=" << index <<
" char '" << c << "', must be '" << multipart_boundary[index] << "'";
state = s_error;
return i;
}
index++;
break;
}
case s_key_start: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_key_start";
mark = i;
state = s_key;
// fallthrough //
CP_FALL_THROUGH;
}
case s_key: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_key";
if (c == CR) {
state = s_headers_almost_done;
break;
}
if (c == ':') {
if (on_form_part_hdr_key(buf + mark, i - mark) != 0) {
state = s_error;
return i;
}
state = s_value_start;
break;
}
cl = tolower(c);
if ((c != '-') && (cl < 'a' || cl > 'z')) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) <<
"ParserMultipartForm::push(): invalid character in header name: " << int(c);
state = s_error;
return i;
}
if (is_last) {
if (on_form_part_hdr_key(buf + mark, (i - mark) + 1) != 0) {
state = s_error;
return i;
}
}
break;
}
case s_headers_almost_done: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_headers_almost_done";
if (c != LF) {
state = s_error;
return i;
}
state = s_part_start;
break;
}
case s_value_start: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_value_start";
if (c == ' ') {
break;
}
mark = i;
state = s_value;
// fallthrough //
CP_FALL_THROUGH;
}
case s_value: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_value";
if (c == CR) {
if (on_form_part_hdr_value(buf + mark, i - mark) != 0) {
state = s_error;
return i;
}
state = s_value_almost_done;
break;
}
if (is_last) {
if (on_form_part_hdr_value(buf + mark, (i - mark) + 1) != 0) {
state = s_error;
return i;
}
}
break;
}
case s_value_almost_done: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_value_almost_done";
if (c != LF) {
state = s_error;
return i;
}
state = s_key_start;
if (this->on_form_part_hdr_kv_done() != 0) {
state = s_error;
return i;
}
break;
}
case s_part_start: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_part_start";
if (on_form_headers_complete() != 0) {
state = s_error;
return i;
}
mark = i;
state = s_part;
// fallthrough //
CP_FALL_THROUGH;
}
case s_part: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_part";
if (c == CR) {
if (on_form_part_data(buf + mark, i - mark) != 0) {
state = s_error;
return i;
}
mark = i;
state = s_part_almost_boundary;
lookbehind[0] = CR;
break;
}
if (is_last) {
if (on_form_part_data(buf + mark, (i - mark) + 1) != 0) {
state = s_error;
return i;
}
}
break;
}
case s_part_almost_boundary: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_part_almost_boundary";
if (c == LF) {
state = s_part_boundary;
lookbehind[1] = LF;
index = 0;
break;
}
if (on_form_part_data(lookbehind, 1) != 0) {
state = s_error;
return i;
}
state = s_part;
mark = i--;
break;
}
case s_part_boundary: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_part_boundary";
if (multipart_boundary[index] != c) {
if (on_form_part_data(lookbehind, 2 + index) != 0) {
state = s_error;
return i;
}
state = s_part;
mark = i--;
break;
}
lookbehind[2 + index] = c;
if ((++index) == boundary_length) {
if (on_form_part_end() != 0) {
state = s_error;
return i;
}
state = s_part_almost_end;
}
break;
}
case s_part_almost_end: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_part_almost_end";
if (c == '-') {
state = s_part_final_hyphen;
break;
}
if (c == CR) {
state = s_part_end;
break;
}
state = s_error;
return i;
}
case s_part_final_hyphen: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_part_final_hyphen";
if (c == '-') {
if (on_form_body_end() != 0) {
state = s_error;
return i;
}
state = s_end;
break;
}
state = s_error;
return i;
}
case s_part_end: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_part_end";
if (c == LF) {
state = s_key_start;
if (on_form_part_begin() != 0) {
state = s_error;
return i;
}
break;
}
state = s_error;
return i;
}
case s_end: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_end";
break;
}
case s_error: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_error";
return 0;
}
default: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): unknown state: " << state;
state = s_error;
return 0;
}
}
++i;
}
return len;
}
void ParserMultipartForm::finish() {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::finish():";
push(NULL, 0);
}
const std::string &
ParserMultipartForm::name() const {
return m_parserName;
}
bool ParserMultipartForm::error() const {
return state == s_error;
}
// MIME form parsing
int ParserMultipartForm::on_form_part_hdr_key(const char* k, size_t k_len) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::on_form_part_hdr_key(): '" <<
std::string(k, k_len) << "'";
m_hdrName += std::string(k, k_len);
return 0; // ok
}
int ParserMultipartForm::on_form_part_hdr_value(const char* v, size_t v_len) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::on_form_part_hdr_value(): '" <<
std::string(v, v_len) << "'";
// This function could be called multiple times, only on the first call we allocated m_headerValueParser
if (!m_headerValueParser) {
// The m_hdrValueAnalyzer instance will receive information about part headers
// and extract information from them, like the part name (if available).
m_headerValueParser = new ParserHdrValue(m_hdrValueAnalyzerBufferedReceiver);
}
// push pieces of header value to header value processor/analyzer
if (m_headerValueParser) {
m_headerValueParser->push(v, v_len);
}
return 0; // ok
}
int ParserMultipartForm::on_form_part_hdr_kv_done() {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::on_form_part_hdr_kv_done():";
// finish any header value parsing in progress. the collected data is stored in m_hdrValueAnalyzer object.
if (m_headerValueParser) {
m_headerValueParser->finish();
delete m_headerValueParser;
m_headerValueParser = NULL;
}
// collect parsed information about header value only If current header name is "Content-Disposition"
// the reason this check is done here is only because (at least by the Parsers API protocol)
// the part header name was not ready until this point.
if (my_stricmp(m_hdrName.c_str(), "content-disposition")) {
// the m_hdrValueAnalyzer contains information (like part name) extracted from the
// Content-Disposition header. Lets collect it now.
m_partName = m_hdrValueAnalyzer.getPartName();
// if part name could be extracted from part header - use it,
// otherwise name "anonymous" part "part-NNN" where NNN is part number within the MIME message
if (m_partName.empty()) {
char buf[128];
snprintf(buf, sizeof(buf), "part-%lu", (unsigned long int)m_partIdx);
m_partName = buf;
}
}
// reset m_hdrValueAnalyzer object state before next part header.
// we already collected all relevant information from it above.
m_hdrValueAnalyzer.clear();
// also clear accumulated part header name string before next part header
m_hdrName = "";
return 0; // ok
}
int ParserMultipartForm::on_form_headers_complete() {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::on_form_headers_complete():";
int rc = m_receiver.onKey(m_partName.data(), m_partName.size());
m_hdrValueAnalyzer.clear();
return rc;
}
int ParserMultipartForm::on_form_part_begin() {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::on_form_part_begin():";
// count parts
m_partIdx++;
// reset currently known part name before switching to next part
m_partName = "";
return 0; // ok
}
int ParserMultipartForm::on_form_part_end() {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::on_form_part_end():";
return m_receiver.onKvDone();
}
int ParserMultipartForm::on_form_part_data(const char* at, size_t length) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::on_form_part_data(): '" <<
std::string(at, length) << "'";
return m_receiver.onValue(at, length);
}
int ParserMultipartForm::on_form_body_end() {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::on_form_body_end():";
return 0; // ok
}

View File

@@ -0,0 +1,93 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_MULTIPART_FORM_H__1c7eb4fa
#define __PARSER_MULTIPART_FORM_H__1c7eb4fa
#include "ParserBase.h"
#include "ParserHdrValue.h"
#include <boost/core/noncopyable.hpp>
class ParserMultipartForm : public ParserBase, boost::noncopyable {
public:
class HdrValueAnalyzer : public IParserReceiver {
public:
int onKv(const char *k, size_t k_len, const char *v, size_t v_len, int flags);
void clear();
const std::string &getPartName() const { return m_partName; }
private:
std::string m_partName;
};
ParserMultipartForm(IParserStreamReceiver &receiver, const char *boundary, size_t boundary_len);
virtual ~ParserMultipartForm();
size_t push(const char *buf, size_t len);
void finish();
virtual const std::string &name() const;
virtual bool error() const;
virtual size_t depth() { return 1; }
private:
enum state {
s_start,
s_start_boundary,
s_key_start,
s_key,
s_headers_almost_done,
s_value_start,
s_value,
s_value_almost_done,
s_part_start,
s_part,
s_part_almost_boundary,
s_part_boundary,
s_part_almost_end,
s_part_end,
s_part_final_hyphen,
s_end,
s_error
};
// MIME form parsing
int on_form_part_hdr_key(const char *k, size_t k_len);
int on_form_part_hdr_value(const char *v, size_t v_len);
int on_form_part_hdr_kv_done();
int on_form_headers_complete();
int on_form_part_begin();
int on_form_part_end();
int on_form_part_data(const char *at, size_t length);
int on_form_body_end();
IParserStreamReceiver &m_receiver;
// index of currently processed part (0-based)
size_t m_partIdx;
enum state state;
size_t index;
size_t boundary_length;
char* lookbehind;
char *multipart_boundary;
ParserHdrValue *m_headerValueParser; // Part Header's value parser/dissector.
// Reports dissected parts to m_hdrValueAnalyzer.
HdrValueAnalyzer m_hdrValueAnalyzer; // Receives and analyzes dissected parts of part header value,
// and extracts information like part name from it.
BufferedReceiver m_hdrValueAnalyzerBufferedReceiver; // Buffers partial header value data before
// it is available to m_hdrValueAnalyzer.
std::string m_hdrName; // Current part header name (accumulated until on_form_part_hdr_kv_done() is called.
std::string m_partName; // Part name
static const std::string m_parserName;
};
#endif // __PARSER_MULTIPART_FORM_H__1c7eb4fa

View File

@@ -0,0 +1,80 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserRaw.h"
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP_PARSER_RAW);
const std::string ParserRaw::m_parserName = "ParserRaw";
ParserRaw::ParserRaw(IParserStreamReceiver &receiver, const std::string &key)
:m_receiver(receiver), m_key(key), m_state(s_start) {
}
ParserRaw::~ParserRaw() {
}
size_t ParserRaw::push(const char *buf, size_t len) {
dbgTrace(D_WAAP_PARSER_RAW) << "ParserRaw::push(): (len=" << (unsigned long int)len << ")";
if (len == 0) {
dbgTrace(D_WAAP_PARSER_RAW) << "ParserRaw::push(): end of data signal! m_state=" << m_state;
// flush unescaped data collected (if any)
if (m_state != s_start) { // only emit if at least something was pushed
if (m_receiver.onKvDone() != 0) {
m_state = s_error;
}
}
return 0;
}
switch (m_state) {
case s_start:
dbgTrace(D_WAAP_PARSER_RAW) << "ParserRaw::push(): s_start";
if (m_receiver.onKey(m_key.data(), m_key.size()) != 0) {
m_state = s_error;
return 0;
}
m_state = s_forward;
// fallthrough //
CP_FALL_THROUGH;
case s_forward:
dbgTrace(D_WAAP_PARSER_RAW) << "ParserRaw::push(): s_forward";
if (m_receiver.onValue(buf, len) != 0) {
m_state = s_error;
}
break;
case s_error:
dbgTrace(D_WAAP_PARSER_RAW) << "ParserRaw::push(): s_error";
return 0;
}
return len;
}
void ParserRaw::finish() {
push(NULL, 0);
}
const std::string &
ParserRaw::name() const {
return m_parserName;
}
bool ParserRaw::error() const {
return m_state == s_error;
}

View File

@@ -0,0 +1,43 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_RAW_H__7989ff78
#define __PARSER_RAW_H__7989ff78
#include "ParserBase.h"
#include <string.h>
class ParserRaw : public ParserBase {
public:
ParserRaw(IParserStreamReceiver &receiver, const std::string &key);
virtual ~ParserRaw();
size_t push(const char *data, size_t data_len);
void finish();
virtual const std::string &name() const;
bool error() const;
virtual size_t depth() { return 1; }
private:
enum state {
s_start,
s_forward,
s_error
};
IParserStreamReceiver &m_receiver;
std::string m_key;
state m_state;
static const std::string m_parserName;
};
#endif // __PARSER_RAW_H__7989ff78

View File

@@ -0,0 +1,439 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserUrlEncode.h"
#include "Waf2Util.h"
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP_PARSER_URLENCODE);
const std::string ParserUrlEncode::m_parserName = "ParserUrlEncode";
ParserUrlEncode::ParserUrlEncode(IParserStreamReceiver &receiver, char separatorChar, bool should_decode_per)
:
m_receiver(receiver),
m_state(s_start),
m_escapedLen(0),
m_separatorChar(separatorChar),
m_escapedCharCandidate(0),
should_decode_percent(should_decode_per)
{
dbgTrace(D_WAAP_PARSER_URLENCODE) << "should_decode_per=" << should_decode_per;
// TODO:: is there a need for this?
memset(m_escaped, 0, sizeof(m_escaped));
}
ParserUrlEncode::~ParserUrlEncode() {
}
size_t ParserUrlEncode::push(const char *buf, size_t len) {
size_t i = 0;
size_t mark = 0;
char c;
int is_last = 0;
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): starting (len=" << len << ")";
if (len == 0) {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): end of data signal! m_state=" << m_state;
// flush unescaped data collected (if any)
if (m_escapedLen > 0) {
if (m_state == s_key_start) {
if (m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
}
else if (m_state == s_value_start) {
if (m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
}
m_escapedLen = 0;
}
if (m_receiver.onKvDone() != 0) {
m_state = s_error;
return i;
}
return 0;
}
while (i < len) {
c = buf[i];
is_last = (i == (len - 1));
// Checking valid char urlencode
if (c < 32)
{
dbgDebug(D_WAAP_PARSER_URLENCODE) << "invalid URL encoding character: " << c;
m_state = s_error;
return i;
}
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): state=" << m_state << "; ch='" << c << "'";
switch (m_state) {
case s_start: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_start";
//m_state = s_key_start;
// fallthrough //
CP_FALL_THROUGH;
}
case s_key_start: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_key_start";
mark = i;
m_state = s_key;
// fallthrough //
CP_FALL_THROUGH;
}
case s_key: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_key";
// skip leading spaces in the key
if (isspace(c)) {
m_state = s_key_start; // skip the space character without including it in the output
break;
}
if (c == '%' && should_decode_percent) {
if (i - mark > 0) {
if (m_receiver.onKey(buf + mark, i - mark) != 0) {
m_state = s_error;
return i;
}
}
m_state = s_key_escaped1;
break;
}
else if (c == '+') {
// convert plus character to space
if (i - mark > 0) {
if (m_receiver.onKey(buf + mark, i - mark) != 0) {
m_state = s_error;
return i;
}
mark = i;
}
m_escaped[m_escapedLen] = ' ';
m_escapedLen++;
if (m_escapedLen >= MAX_URLENCODE_ESCAPED_SIZE) {
if (m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
}
m_state = s_key_start;
break;
}
else {
// flush unescaped data collected (if any)
if (m_escapedLen > 0) {
if (m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
mark = i;
}
}
if (c == m_separatorChar) {
// this happens when there is a key without value. Example: ?p&a=b&k&%61&blah
// in this case we emit the key, but not the value, and send onKvDone to cause
// the receiver to process the pair: key will be provided with no value.
if (m_receiver.onKey(buf + mark, i - mark) != 0) {
m_state = s_error;
return i;
}
if (m_receiver.onKvDone() != 0) {
m_state = s_error;
return i;
}
m_state = s_key_start;
break;
}
if (c == '=') {
if (m_receiver.onKey(buf + mark, i - mark) != 0) {
m_state = s_error;
return i;
}
m_state = s_value_start;
break;
}
if (is_last) {
if (m_receiver.onKey(buf + mark, (i - mark) + 1) != 0) {
m_state = s_error;
return i;
}
}
break;
}
case s_key_escaped1: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_key_escaped1";
bool valid;
unsigned char v = from_hex(c, valid);
if (!valid) { // character right after the '%' is not a valid hex char.
// dump escaped chars
if (m_escapedLen > 0 && m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
// return the '%' character back to the output.
if (m_receiver.onKey("%", 1) != 0) {
return i;
}
// If the character is '%' - stay in the same state (correctly treat '%%%%hhh' sequences
if (c != '%') {
// pass the non-hex character back to the output too.
if (m_receiver.onKey(&c, 1) != 0) {
return i;
}
// otherwise (the character is not '%'), switch back to the s_key state
m_state = s_key_start;
}
break;
}
m_escapedCharCandidate = c;
m_escaped[m_escapedLen] = v << 4;
m_state = s_key_escaped2;
break;
}
case s_key_escaped2: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_key_escaped2";
bool valid;
unsigned char v = from_hex(c, valid);
if (!valid) {
// This situation (2nd character is not valid hex) is not treated right now.
// In this case, v will be equal to 0 and output character will be invalid one.
//dump escaped chars
if (m_escapedLen >0 && m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
// return the '%' character back to the output.
if (m_receiver.onKey("%", 1) != 0) {
return i;
}
// add the character that was thought to be escaped value
if (m_receiver.onKey(&m_escapedCharCandidate, 1)) {
return i;
}
// re parse the character as a key (i is incremented back to current value)
i--;
m_state = s_key_start;
break;
}
m_escapedCharCandidate = 0;
m_escaped[m_escapedLen] |= v;
m_escapedLen++;
if (m_escapedLen >= MAX_URLENCODE_ESCAPED_SIZE) {
if (m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
}
m_state = s_key_start;
break;
}
case s_value_start: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_value_start";
mark = i;
m_state = s_value;
// fallthrough //
CP_FALL_THROUGH;
}
case s_value: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_value";
if (c == '%' && should_decode_percent) {
if (i - mark > 0) {
if (m_receiver.onValue(buf + mark, i - mark) != 0) {
m_state = s_error;
return i;
}
}
m_state = s_value_escaped1;
break;
}
else if (c == '+') {
// convert plus character to space
if (i - mark > 0) {
if (m_receiver.onValue(buf + mark, i - mark) != 0) {
m_state = s_error;
return i;
}
}
m_escaped[m_escapedLen] = ' ';
m_escapedLen++;
if (m_escapedLen >= MAX_URLENCODE_ESCAPED_SIZE) {
if (m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
}
m_state = s_value_start;
break;
}
else {
// flush unescaped data collected (if any)
if (m_escapedLen > 0) {
if (m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
mark = i;
}
}
if (c == m_separatorChar) {
if (m_receiver.onValue(buf + mark, i - mark) != 0) {
dbgWarning(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push() s_value : failed on value";
m_state = s_error;
return i;
}
if (m_receiver.onKvDone() != 0) {
dbgWarning(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push() : s_value : failed on KV";
m_state = s_error;
return i;
}
m_state = s_key_start;
break;
}
if (is_last) {
if (m_receiver.onValue(buf + mark, (i - mark) + 1) != 0) {
m_state = s_error;
return i;
}
}
break;
}
case s_value_escaped1: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_value_escaped1";
bool valid;
unsigned char v = from_hex(c, valid);
if (!valid) { // character right after the '%' is not a valid hex char.
// dump escaped chars
if (m_escapedLen > 0 && m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
// return the '%' character back to the output.
if (m_receiver.onValue("%", 1) != 0) {
return i;
}
// If the character is '%' - stay in the same state (correctly treat '%%%%hhh' sequences)
if (c != '%') {
// pass the non-hex character back to the output too.
if (m_receiver.onValue(&c, 1) != 0) {
return i;
}
// otherwise (the character is not '%'), switch back to the s_value state
m_state = s_value_start;
}
break;
}
m_escapedCharCandidate = c;
m_escaped[m_escapedLen] = v << 4;
m_state = s_value_escaped2;
break;
}
case s_value_escaped2: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_value_escaped2";
bool valid;
unsigned char v = from_hex(c, valid);
if (!valid) {
// This situation (2nd character is not valid hex) is not treated right now.
// In this case, v will be equal to 0 and output character will be invalid one.
//dump escaped chars
if (m_escapedLen > 0 && m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
// return the '%' character back to the output.
if (m_receiver.onValue("%", 1) != 0) {
return i;
}
// add the character that was thought to be escaped value
if (m_receiver.onValue(&m_escapedCharCandidate, 1)) {
return i;
}
// re parse the character as a key (i is incremented back to current value)
i--;
m_state = s_value_start;
break;
}
m_escapedCharCandidate = 0;
m_escaped[m_escapedLen] |= v;
m_escapedLen++;
if (m_escapedLen >= MAX_URLENCODE_ESCAPED_SIZE) {
if (m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
}
m_state = s_value_start;
break;
}
case s_error: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_error";
return 0;
}
default: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): URL parser unrecoverable error";
m_state = s_error;
return 0;
}
}// end of switch()
++i;
}
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): finished: len=" << len;
return len;
}
void ParserUrlEncode::finish() {
push(NULL, 0);
}
const std::string &
ParserUrlEncode::name() const {
return m_parserName;
}
bool ParserUrlEncode::error() const {
return m_state == s_error;
}

View File

@@ -0,0 +1,57 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_URL_ENCODE_H__29ebe806
#define __PARSER_URL_ENCODE_H__29ebe806
#include "ParserBase.h"
#include <string.h>
#define MAX_URLENCODE_ESCAPED_SIZE 16
class ParserUrlEncode : public ParserBase {
public:
ParserUrlEncode(IParserStreamReceiver &receiver, char separatorChar = '&', bool should_decode_per = true);
virtual ~ParserUrlEncode();
size_t push(const char *data, size_t data_len);
void finish();
virtual const std::string &name() const;
bool error() const;
virtual size_t depth() { return 1; }
private:
enum state {
s_start,
s_key_start,
s_key,
s_key_escaped1,
s_key_escaped2,
s_value_start,
s_value,
s_value_escaped1,
s_value_escaped2,
s_end,
s_error
};
IParserStreamReceiver &m_receiver;
enum state m_state;
unsigned char m_escapedLen; // count of characters loaded in m_escaped[] buffer
char m_escaped[MAX_URLENCODE_ESCAPED_SIZE];
char m_separatorChar;
char m_escapedCharCandidate;
bool should_decode_percent;
static const std::string m_parserName;
};
#endif // __PARSER_URL_ENCODE_H__29ebe806

View File

@@ -0,0 +1,334 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserXML.h"
#include "Waf2Util.h"
#include "debug.h"
#include <assert.h>
USE_DEBUG_FLAG(D_WAAP_PARSER_XML);
const std::string ParserXML::m_parserName = "ParserXML";
void ParserXML::onStartElementNs(
void* ctx,
const xmlChar* localname,
const xmlChar* prefix,
const xmlChar* URI,
int nb_namespaces,
const xmlChar** namespaces,
int nb_attributes,
int nb_defaulted,
const xmlChar** attributes)
{
ParserXML* p = (ParserXML*)ctx;
dbgTrace(D_WAAP_PARSER_XML) << "XML OPEN: '" << localname << "'";
p->m_key.push((const char*)localname, xmlStrlen(localname));
int i;
for (i = 0; i < nb_attributes; i++) {
const xmlChar* attr_localname = attributes[i * 5 + 0];
//const xmlChar *attr_prefix = attributes[i*5+1];
//const xmlChar *attr_URI = attributes[i*5+2];
const xmlChar* attr_value_begin = attributes[i * 5 + 3];
const xmlChar* attr_value_end = attributes[i * 5 + 4];
dbgTrace(D_WAAP_PARSER_XML) << "\tXML ATTR: elem='" << (char*)localname << "', " << attr_localname <<
"='" << std::string((char*)attr_value_begin, (size_t)(attr_value_end - attr_value_begin)) << "'";
p->m_key.push((const char*)attr_localname, xmlStrlen(attr_localname));
if (p->m_receiver.onKv(
p->m_key.c_str(),
p->m_key.size(),
(const char*)attr_value_begin, attr_value_end - attr_value_begin,
BUFFERED_RECEIVER_F_BOTH
) != 0) {
p->m_state = s_error;
}
p->m_key.pop("XML end attribute");
}
// before we add new tracking element to the stack for this new element,
// set "children exists" flag to true for the parent element.
if (!p->m_elemTrackStack.empty()) {
p->m_elemTrackStack.back().hasChildren = true;
}
// when opening new element - start tracking its properties (internal text and existence of subelements)
p->m_elemTrackStack.push_back(ElemTrackInfo());
}
void
ParserXML::onEndElementNs(
void* ctx,
const xmlChar* localname,
const xmlChar* prefix,
const xmlChar* URI)
{
ParserXML* p = (ParserXML*)ctx;
dbgTrace(D_WAAP_PARSER_XML) << "XML CLOSE: '" << localname << "'";
if (p->m_elemTrackStack.empty()) {
dbgWarning(D_WAAP_PARSER_XML) <<
"XML closing tag and elem track stack is empty. This is probably sign of a bug!";
return;
}
ElemTrackInfo& elemTrackInfo = p->m_elemTrackStack.back();
// Usability optimization: only output kv pair for XML elements that had either sub children
// and/or value within.
// Those "wrapper elements" such as <wrapper><name>john</name><age>21</age></wrapper> only
// contain sub elements. For these we don't emit kv pair.
// However, for truly empty element such as <wrapper></wrapper>, or similar element with
// text: <wrapper>some text</wrapper>, we do output a kv pair.
bool isWrapperElement = elemTrackInfo.hasChildren && (elemTrackInfo.value.size() == 0);
if (!isWrapperElement) {
// Emit tag name as key
if (p->m_receiver.onKey(p->m_key.c_str(), p->m_key.size()) != 0) {
p->m_state = s_error;
}
if (p->m_receiver.onValue(elemTrackInfo.value.c_str(), elemTrackInfo.value.size()) != 0) {
p->m_state = s_error;
}
if (p->m_receiver.onKvDone() != 0) {
p->m_state = s_error; // error
}
}
// when closing an element - pop its tracking info from the tracking stack
p->m_elemTrackStack.pop_back();
// Also, pop the element's name from m_key stack, so the key name always reflects
// current depth within the elements tree
p->m_key.pop("XML end element");
}
void ParserXML::onCharacters(void* ctx, const xmlChar* ch, int len) {
ParserXML* p = (ParserXML*)ctx;
if (p->m_elemTrackStack.empty()) {
dbgWarning(D_WAAP_PARSER_XML) << "XML text and elem track stack is empty. This is probably sign of a bug!";
return;
}
if ((ch == NULL) || (len == 0)) {
dbgTrace(D_WAAP_PARSER_XML) << "Got empty XML text element. Ignoring.";
return;
}
ElemTrackInfo& elemTrackInfo = p->m_elemTrackStack.back();
dbgTrace(D_WAAP_PARSER_XML) << "XML TEXT: '[" << std::string((char*)ch, (size_t)len) << "]'";
std::string val = std::string((char*)ch, (size_t)len);
// trim isspace() characters around xml text chunks.
// The chunks can occur multiple times within one value, when text value is intermixed with xml sub-tags.
// for example, for XML source "<a>sta<b>zzz</b>rt</a>", the "a" tag will include two text
// chunks "sta" and "rt"
// which are concatenated here to form the word "start".
// The trimming is done here to prevent false alarms on detection algorithm that sees
// "\n" characters in the XML value.
// Example of input that causes false alarm without this trim is (multiline XML):
// <xml><script>\nclean_xml_value '\n<\/script><\/xml>
Waap::Util::trim(val);
elemTrackInfo.value += val;
}
void
ParserXML::onEntityDeclaration(
void* ctx,
const xmlChar* name,
int type,
const xmlChar* publicId,
const xmlChar* systmeid,
xmlChar* content)
{
dbgTrace(D_WAAP_PARSER_XML) << "ENTITY FOUND WITH VALUE: '" << (content ? (const char*)content : "null") << "'";
ParserXML* p = (ParserXML*)ctx;
std::string kw = "08a80340-06d3-11ea-9f87-0242ac11000f";
if (p->m_receiver.onKey(p->m_key.c_str(), p->m_key.size()) != 0) {
p->m_state = s_error;
}
if (p->m_receiver.onValue(kw.data(), kw.size()) != 0) {
p->m_state = s_error;
}
if (p->m_receiver.onKvDone() != 0) {
p->m_state = s_error; // error
}
}
static void onError(void* ctx, const char* msg, ...) {
static const size_t TMP_BUF_SIZE = 4096;
char string[TMP_BUF_SIZE];
va_list arg_ptr;
va_start(arg_ptr, msg);
vsnprintf(string, TMP_BUF_SIZE, msg, arg_ptr);
va_end(arg_ptr);
dbgTrace(D_WAAP_PARSER_XML) << "LIBXML (xml) onError: " << std::string(string);
}
ParserXML::ParserXML(IParserStreamReceiver& receiver)
:m_receiver(receiver), m_state(s_start), m_bufLen(0), m_key("xml_parser"), m_pushParserCtxPtr(NULL) {
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::ParserXML()";
// TODO:: is zeroing this really needed?
memset(m_buf, 0, sizeof(m_buf));
// Custom sax handler
memset(&m_saxHandler, 0, sizeof(xmlSAXHandler));
m_saxHandler.initialized = XML_SAX2_MAGIC;
m_saxHandler.startElementNs = onStartElementNs;
m_saxHandler.endElementNs = onEndElementNs;
m_saxHandler.characters = onCharacters;
m_saxHandler.entityDecl = onEntityDeclaration;
m_saxHandler.error = onError;
// Ugly: push first element into key (it will be ignored since we will never call
// the "first()" method of this key within XML parser object.
m_key.push("xml", 3);
}
ParserXML::~ParserXML() {
// Cleanup XML
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::~ParserXML()";
if (m_pushParserCtxPtr) {
xmlFreeParserCtxt(m_pushParserCtxPtr);
}
}
bool ParserXML::filterErrors(xmlErrorPtr xmlError) {
dbgDebug(D_WAAP_PARSER_XML) << "ParserXML::filterErrors(): xmlError " << xmlError->code << ": '" <<
xmlError->message << "'";
// Ignore specific error: "XML declaration allowed only at the start of the document".
// This includes the case of "multiple XML declarations" we've seen sent by some SOAP clients.
// The XML is still parsed because the parser is put into permissive mode with the XML_PARSE_RECOVER flag,
// but even though it recovers and parses the XML correctly, the error code is still reported here.
// Ignoring this error prevents the WAAP code from thinking the XML is "broken" and from scanning the XML
// source as-is, in effect preventing false alarm on that XML source.
if (xmlError->code == XML_ERR_RESERVED_XML_NAME || xmlError->code == XML_ERR_UNDECLARED_ENTITY) {
dbgDebug(D_WAAP_PARSER_XML) << "ParserXML::filterErrors(): ignoring the '" << xmlError->code << ": " <<
xmlError->message << "' xml parser error.";
return false;
}
return true;
}
size_t ParserXML::push(const char* data, size_t data_len) {
size_t i = 0;
char c;
if (data_len == 0) {
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::push(): end of data signal! m_state=" << m_state;
// Send zero-length chunk with "terminate" flag enabled to signify end-of-stream
if (xmlParseChunk(m_pushParserCtxPtr, m_buf, 0, 1)) {
xmlErrorPtr xmlError = xmlCtxtGetLastError(m_pushParserCtxPtr);
if (xmlError && filterErrors(xmlError)) {
dbgDebug(D_WAAP_PARSER_XML) << "ParserXML::push(): xmlError: code=" << xmlError->code << ": '" <<
xmlError->message << "'";
m_state = s_error; // error
return -1;
}
}
return m_bufLen;
}
int expected_buffer_len = FIRST_BUFFER_SIZE - 1;
while (i < data_len) {
c = data[i];
switch (m_state) {
case s_start:
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::push(): s_start";
m_state = s_accumulate_first_bytes;
// fall through //
CP_FALL_THROUGH;
case s_accumulate_first_bytes:
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::push(): s_accumulate_first_bytes. c='" << data[i] <<
"'; m_bufLen=" << m_bufLen << "; i=" << i;
m_buf[m_bufLen] = c;
m_bufLen++;
if (c == '?') {
expected_buffer_len = FIRST_BUFFER_SIZE;
}
if (m_bufLen == expected_buffer_len) {
m_state = s_start_parsing;
}
break;
case s_start_parsing:
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::push(): s_start_parsing. sending len=" << m_bufLen << ": '" <<
std::string(m_buf, m_bufLen) << "'; i=" << i;
// Create XML SAX (push parser) context
// It is important to buffer at least first 4 bytes of input stream so libxml can determine text encoding!
m_pushParserCtxPtr = xmlCreatePushParserCtxt(&m_saxHandler, this, m_buf, m_bufLen, NULL);
// Enable "permissive mode" for XML SAX parser.
// In this mode, the libxml parser doesn't stop on errors, but still reports them!
xmlCtxtUseOptions(m_pushParserCtxPtr, XML_PARSE_RECOVER);
m_state = s_parsing;
// fall through //
CP_FALL_THROUGH;
case s_parsing:
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::push(): s_parsing. sending len=" << (int)(data_len - i) <<
": '" << std::string(data + i, data_len - i) << "'; i=" << i;
if (m_pushParserCtxPtr) {
if (xmlParseChunk(m_pushParserCtxPtr, data + i, data_len - i, 0)) {
xmlErrorPtr xmlError = xmlCtxtGetLastError(m_pushParserCtxPtr);
if (xmlError && filterErrors(xmlError)) {
dbgDebug(D_WAAP_PARSER_XML) << "ParserXML::push(): xmlError: code=" << xmlError->code <<
": '" << xmlError->message << "'";
m_state = s_error; // error
return 0;
}
}
// success (whole buffer consumed)
i = data_len - 1; // take into account ++i at the end of the state machine loop
}
break;
case s_error:
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::push(): s_error";
return 0;
}
++i;
}
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::push(): exiting with param(len)=" << data_len << ": i=" << i;
return i;
}
void ParserXML::finish() {
push(NULL, 0);
}
const std::string &
ParserXML::name() const {
return m_parserName;
}
bool ParserXML::error() const {
return m_state == s_error;
}

View File

@@ -0,0 +1,101 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_XML_H__5bf3b834
#define __PARSER_XML_H__5bf3b834
#include "ParserBase.h"
#include "KeyStack.h"
#include <libxml/xmlstring.h>
#include <libxml/xmlerror.h>
#include <libxml/parser.h>
#define FIRST_BUFFER_SIZE 5 // must buffer at least 4 first bytes to allow unicode autodetection (BOM).
class ParserXML : public ParserBase {
public:
ParserXML(IParserStreamReceiver &receiver);
virtual ~ParserXML();
size_t push(const char *data, size_t data_len);
void finish();
virtual const std::string &name() const;
bool error() const;
virtual size_t depth() { return (m_key.depth() > 0) ? m_key.depth()-1 : m_key.depth(); }
private:
enum state {
s_start,
s_accumulate_first_bytes,
s_start_parsing,
s_parsing,
s_error
};
// Information tracked per each element in current stack of tracked XML elements
struct ElemTrackInfo {
std::string value;
bool hasChildren;
ElemTrackInfo():hasChildren(false) {
// when element is just opened - we still didn't see any children,
// hence start with the "hasChildren" flag as false.
// This flag will be enabled once we meet opening of the a subelement.
// Also, we start from empty value string and gradually append to it each
// time we receive next piece of text from XML parser.
// The collected value is then emitted when element finishes.
}
};
static void onStartElementNs(
void *ctx,
const xmlChar *localname,
const xmlChar *prefix,
const xmlChar *URI,
int nb_namespaces,
const xmlChar **namespaces,
int nb_attributes,
int nb_defaulted,
const xmlChar **attributes);
static void onEndElementNs(
void* ctx,
const xmlChar* localname,
const xmlChar* prefix,
const xmlChar* URI);
static void onCharacters(
void *ctx,
const xmlChar *ch,
int len);
static void onEntityDeclaration(
void* ctx,
const xmlChar* name,
int type,
const xmlChar* publicId,
const xmlChar* systmeid,
xmlChar* content);
// Filter out errors that should be ignored. Returns true if error should be treated,
// false if an error should be ignored
bool filterErrors(xmlErrorPtr xmlError);
IParserStreamReceiver &m_receiver;
enum state m_state;
// buffer first few bytes of stream (required before calling SAX parser for the first time)
char m_buf[FIRST_BUFFER_SIZE];
int m_bufLen;
KeyStack m_key;
std::vector<ElemTrackInfo> m_elemTrackStack;
xmlSAXHandler m_saxHandler;
xmlParserCtxtPtr m_pushParserCtxPtr;
public:
static const std::string m_parserName;
};
#endif // __PARSER_XML_H__5bf3b834

View File

@@ -0,0 +1,53 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// #define WAF2_LOGGING_ENABLE
#include "PatternMatcher.h"
#include "Waf2Regex.h"
#include <string>
#include <boost/algorithm/string.hpp>
#include <fnmatch.h>
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP);
// PatternMatcherWildcard implementation
PatternMatcherWildcard::PatternMatcherWildcard(const std::string &pattern)
:m_pattern(pattern) {
dbgTrace(D_WAAP) << "Compiled pattern: '" << m_pattern.c_str() << "'\n";
}
bool PatternMatcherWildcard::match(const std::string &value) const {
// Use unix filename (glob) string pattern matcher.
// The Unix fnmatch() function only returns 0 in case of a succesful match.
// In case no-match it returns FNM_NOMATCH constant.
// In case of error it returns other non-zero return values.
// However, in our usage here error is considered a "no-match".
return ::fnmatch(m_pattern.c_str(), value.c_str(), 0) == 0;
}
// PatternMatcherRegex implementation
PatternMatcherRegex::PatternMatcherRegex(const std::string &pattern)
:m_pattern(pattern), m_regexError(false), m_regex(pattern, m_regexError, "patternMatcherRegex_" + pattern) {
if (m_regexError) {
dbgWarning(D_WAAP) << "Failed compiling regex pattern: '" << m_pattern.c_str() << "'\n";
}
}
bool PatternMatcherRegex::match(const std::string &value) const {
if (m_regexError) {
return false;
}
return m_regex.hasMatch(value);
}

View File

@@ -0,0 +1,47 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PATTERN_MATCHER_H__9baf179a
#define __PATTERN_MATCHER_H__9baf179a
#include "Waf2Regex.h"
#include <vector>
#include <string>
#include <memory>
class PatternMatcherBase {
public:
virtual bool match(const std::string &value) const =0;
};
class PatternMatcherWildcard : public PatternMatcherBase {
public:
PatternMatcherWildcard(const std::string &pattern);
virtual bool match(const std::string &value) const;
private:
std::string m_pattern;
};
class PatternMatcherRegex : public PatternMatcherBase {
public:
PatternMatcherRegex(const std::string &pattern);
virtual bool match(const std::string &value) const;
private:
std::string m_pattern;
bool m_regexError;
Regex m_regex;
};
typedef std::shared_ptr<PatternMatcherBase> PatternMatcherBasePtr;
#endif // __PATTERN_MATCHER_H__9baf179a

View File

@@ -0,0 +1,80 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "RateLimiter.h"
namespace Waap {
namespace Util {
RateLimiter::RateLimiter(unsigned events, std::chrono::seconds interval)
:
m_max_events(events),
m_interval(interval),
m_hitsPerSecond(),
m_recentIdx(0),
m_recentHitTime(0),
m_hitsCount(0)
{
m_hitsPerSecond.resize(interval.count(), 0);
}
void
RateLimiter::clear(const std::chrono::seconds& now)
{
for (unsigned int i=0; i<m_hitsPerSecond.size(); ++i) {
m_hitsPerSecond[i] = 0;
}
m_recentIdx=0;
m_recentHitTime = now;
m_hitsCount = 0;
}
bool
RateLimiter::event(const std::chrono::seconds& now)
{
if (m_hitsPerSecond.empty()) {
// Handle the case when rate limiter object is initialized with 0-seconds interval - always pass
return true;
}
// Clear counts buffer on the very first event, of after whole interval passed without events
if (m_recentHitTime == std::chrono::seconds(0) || now - m_recentHitTime >= m_interval) {
clear(now);
}
while (m_recentHitTime < now) {
// switch idx to next slot (with wrap since this is circular buffer).
// since this is circular buffer, the next slot is actually a tail (oldest): wrap --->[HEAD][TAIL]---> wrap
m_recentIdx++;
if (m_recentIdx >= m_hitsPerSecond.size()) {
m_recentIdx = 0;
}
// forget the hits from the oldest second in this interval (deduct them from total count)
m_hitsCount -= m_hitsPerSecond[m_recentIdx];
m_hitsPerSecond[m_recentIdx] = 0;
// Update recentHitTime (switch to next second)
m_recentHitTime += std::chrono::seconds(1);
}
// increment hitcount in the most recent second's slot, and also the total count
m_hitsPerSecond[m_recentIdx]++;
m_hitsCount++;
return m_hitsCount <= m_max_events;
}
}
}

View File

@@ -0,0 +1,42 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <chrono>
#include <vector>
namespace Waap {
namespace Util {
// Simple rate limiter primitive that collects events() and only allows up to X events per Y seconds.
// For each event, call RateLimiter::event() passing real or simulated timestamp (in seconds).
// The returned boolean value will tell the caller whether this event must pass (true) or be blocked (false).
class RateLimiter {
public:
RateLimiter(unsigned int events, std::chrono::seconds interval);
void clear(const std::chrono::seconds& now);
bool event(const std::chrono::seconds& now);
private:
unsigned m_max_events; // max events allowed during the recent interval window
std::chrono::seconds m_interval; // configured interval window
std::vector<unsigned> m_hitsPerSecond; // array of hitcounts per second (remembers up to interval recent seconds)
unsigned m_recentIdx; // index of recent second
std::chrono::seconds m_recentHitTime; // timestamp of recent second
unsigned m_hitsCount; // total events during last interval seconds (rolling update)
};
}
}

View File

@@ -0,0 +1,255 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <chrono>
#include <boost/regex.hpp>
#include "RateLimiting.h"
#include "Waf2Engine.h"
#include "agent_core_utilities.h"
#define RATE_LIMITING_LRU_SIZE 10000
namespace Waap {
namespace RateLimiting {
bool Policy::getRateLimitingEnforcementStatus()
{
return m_rateLimiting.enable;
}
bool
EntryKey::operator==(EntryKey const& other) const
{
return url == other.url && source == other.source;
}
bool
Policy::RateLimitingEnforcement::operator==(const Policy::RateLimitingEnforcement &other) const
{
return enable == other.enable;
}
bool
Policy::operator==(const Policy &other) const {
return rules == other.rules && m_rateLimiting == other.m_rateLimiting;
}
bool
Policy::Rule::operator==(const Policy::Rule &other) const {
return action == other.action && rate == other.rate &&
sourceFilter == other.sourceFilter && uriFilter == other.uriFilter;
}
bool
Policy::Rule::Action::operator==(const Policy::Rule::Action &other) const {
return quarantineTimeSeconds == other.quarantineTimeSeconds &&
type == other.type;
}
bool
Policy::Rule::Rate::operator==(const Policy::Rule::Rate &other) const {
return events == other.events && interval == other.interval;
}
bool
Policy::Rule::SourceFilter::operator==(const Policy::Rule::SourceFilter &other) const {
if (!(groupBy == other.groupBy && scope == other.scope))
{
return false;
}
if (specific_source_regexes_pattern.size() != other.specific_source_regexes_pattern.size())
{
return false;
}
for(size_t i=0; i<specific_source_regexes_pattern.size(); i++)
{
if(specific_source_regexes_pattern[i] != other.specific_source_regexes_pattern[i])
{
return false;
}
}
return true;
}
bool
Policy::Rule::UriFilter::operator==(const Policy::Rule::UriFilter &other) const {
if (!(groupBy == other.groupBy && scope == other.scope))
{
return false;
}
if (specific_uri_regexes_pattern.size() != other.specific_uri_regexes_pattern.size())
{
return false;
}
for(size_t i=0; i<specific_uri_regexes_pattern.size(); i++)
{
if (specific_uri_regexes_pattern[i] != other.specific_uri_regexes_pattern[i])
{
return false;
}
}
return true;
}
TrackEntry::TrackEntry(unsigned int events, std::chrono::seconds interval)
:eventRateLimiter(events, interval), state(MEASURING), quarantinedUntil()
{
}
bool
TrackEntry::event(std::chrono::seconds now)
{
// Release temporary block when time arrives
if (state == TrackEntry::State::QUARANTINED) {
if (now >= quarantinedUntil) {
// Release blocking state
state = TrackEntry::State::MEASURING;
}
}
// Count this event, the result will be true if rate limiter not saturated (should allow), or false if it
// is (should block).
return eventRateLimiter.event(now);
}
void
TrackEntry::quarantineUntil(std::chrono::seconds until)
{
state = TrackEntry::State::QUARANTINED;
quarantinedUntil = until;
}
bool
TrackEntry::isBlocked() const
{
return state != TrackEntry::State::MEASURING;
}
State::State(const std::shared_ptr<Policy> &policy)
:policy(policy), perRuleTrackingTable()
{
// For each rule create separate rate limiter states tracking table
for (unsigned ruleId=0; ruleId < policy->rules.size(); ++ruleId) {
perRuleTrackingTable.push_back(std::make_shared<EntriesLru>(RATE_LIMITING_LRU_SIZE));
}
}
static bool
matchOneOfRegexes(const std::string& value, const std::vector<std::shared_ptr<boost::regex>> &regexesList)
{
for (auto &regex : regexesList) {
if (regex != nullptr && NGEN::Regex::regexMatch(__FILE__, __LINE__, value, *regex)) {
return true;
}
}
return false;
}
bool
State::execute(const std::string& sourceIdentifier, const std::string& uriStr, std::chrono::seconds now, bool& log)
{
bool allow = true;
log = false;
// Run rules one by one.
for (unsigned ruleId=0; ruleId < policy->rules.size(); ++ruleId) {
const Policy::Rule &rule = policy->rules[ruleId];
const Policy::Rule::UriFilter &uriFilter = rule.uriFilter;
const Policy::Rule::SourceFilter &sourceFilter = rule.sourceFilter;
const Policy::Rule::Rate &rate = rule.rate;
const Policy::Rule::Action &action = rule.action;
// Get rate limiter states tracking table specific to current rule
std::shared_ptr<EntriesLru> table = perRuleTrackingTable[ruleId];
// Build a key to look up an entry
EntryKey entryKey;
// Filter out unmatched Urls
if (uriFilter.scope == Waap::RateLimiting::Policy::Rule::UriFilter::Scope::SPECIFIC
&& !matchOneOfRegexes(uriStr, uriFilter.specific_uri_regexes))
{
continue;
}
// Filter out unmatched Sources
if (sourceFilter.scope == Waap::RateLimiting::Policy::Rule::SourceFilter::Scope::SPECIFIC
&& !matchOneOfRegexes(sourceIdentifier, sourceFilter.specific_source_regexes))
{
continue;
}
if (uriFilter.groupBy == Policy::Rule::UriFilter::GroupBy::URL) {
// Include the HTTP source ID in the key
entryKey.url = uriStr;
}
if (sourceFilter.groupBy == Policy::Rule::SourceFilter::GroupBy::SOURCE) {
// Include the HTTP source ID in the key
entryKey.source = sourceIdentifier;
}
// Find entry in LRU, or create new
std::shared_ptr<TrackEntry> trackEntry;
if (!table->get(entryKey, trackEntry)) {
trackEntry = std::make_shared<TrackEntry>(rate.events, std::chrono::seconds(rate.interval));
}
// Insert or update an entry in LRU (this moves entry up if exist, or inserts new, possibly expiring old ones
// to keep the LRU size under control).
table->insert(std::make_pair(entryKey, trackEntry));
// Count this event in the entry's rate limiter. Release temporary block if time arrived.
if (trackEntry->event(now) == false) {
// TrackEntry's rate limiter is saturated (too many requests) - act according to rule's Action
switch (action.type) {
case Policy::Rule::Action::Type::DETECT:
// log block action.
log = true;
// Detect
break;
case Policy::Rule::Action::Type::QUARANTINE:
// Mark this entry blocked temorarily, for at least X seconds
trackEntry->quarantineUntil(now + std::chrono::seconds(action.quarantineTimeSeconds));
break;
case Policy::Rule::Action::Type::RATE_LIMIT:
// log block action.
log = true;
// Block this event only
allow = false;
break;
}
}
// If at least one of the rules says "block" - block the request
if (trackEntry->isBlocked()) {
// log block action.
log = true;
allow = false;
}
}
return allow;
}
}
}

View File

@@ -0,0 +1,337 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lru_cache_map.h"
#include "RateLimiter.h"
#include <string>
#include <chrono>
#include <cereal/types/vector.hpp>
#include <cereal/types/unordered_map.hpp>
#include <cereal/archives/json.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/algorithm/string/case_conv.hpp>
#include <boost/regex.hpp>
#include <memory>
class Waf2Transaction;
namespace Waap {
namespace RateLimiting {
struct Policy {
struct Rule {
struct UriFilter {
enum class GroupBy {
GLOBAL,
URL
};
enum class Scope {
ALL,
SPECIFIC
};
// Deserialize the Type enum
Scope strScopeToEnum(std::string const &value)
{
if (boost::iequals(value, "all")) {
return Scope::ALL;
}
else if (boost::iequals(value, "specific")) {
return Scope::SPECIFIC;
}
else {
throw cereal::Exception(
"Invalid value for RateLimiting::Policy::Rule::SourceFilter::GroupBy='" + value + "'");
}
}
// Deserialize the Type enum
GroupBy strGroupByToEnum(std::string const &value)
{
if (boost::iequals(value, "all uris")) {
return GroupBy::GLOBAL;
}
else if (boost::iequals(value, "single uri")) {
return GroupBy::URL;
}
else {
throw cereal::Exception(
"Invalid value for RateLimiting::Policy::Rule::SourceFilter::GroupBy='" + value + "'");
}
}
template <typename _A>
void serialize(_A &ar)
{
std::string groupByStr;
ar(cereal::make_nvp("groupBy", groupByStr));
groupBy = strGroupByToEnum(groupByStr);
std::string scopeStr;
ar(cereal::make_nvp("scope", scopeStr));
scope = strScopeToEnum(scopeStr);
if(scope == Scope::SPECIFIC)
{
ar(cereal::make_nvp("specific_uris", specific_uri_regexes_pattern));
specific_uri_regexes.clear();
for (auto &specific_uri_pattern : specific_uri_regexes_pattern)
{
specific_uri_regexes.push_back(std::make_shared<boost::regex>(specific_uri_pattern));
}
}
}
bool operator==(const Policy::Rule::UriFilter &other) const;
GroupBy groupBy;
std::vector<std::shared_ptr<boost::regex>> specific_uri_regexes;
std::vector<std::string> specific_uri_regexes_pattern;
Scope scope;
};
struct SourceFilter {
enum class GroupBy {
GLOBAL,
SOURCE
};
enum class Scope {
ALL,
SPECIFIC
};
// Deserialize the Type enum
Scope strScopeToEnum(std::string const &value)
{
if (boost::iequals(value, "all")) {
return Scope::ALL;
}
else if (boost::iequals(value, "specific")) {
return Scope::SPECIFIC;
}
else {
throw cereal::Exception(
"Invalid value for RateLimiting::Policy::Rule::SourceFilter::GroupBy='" + value + "'");
}
}
// Deserialize the Type enum
GroupBy strToEnum(std::string const &value)
{
if (boost::iequals(value, "all sources")) {
return GroupBy::GLOBAL;
}
else if (boost::iequals(value, "single source")) {
return GroupBy::SOURCE;
}
else {
throw cereal::Exception(
"Invalid value for RateLimiting::Policy::Rule::SourceFilter::GroupBy='" + value + "'");
}
}
template <typename _A>
void serialize(_A &ar) {
std::string groupByStr;
ar(cereal::make_nvp("groupBy", groupByStr));
groupBy = strToEnum(groupByStr);
std::string scopeStr;
ar(cereal::make_nvp("scope", scopeStr));
scope = strScopeToEnum(scopeStr);
if(scope == Scope::SPECIFIC)
{
ar(cereal::make_nvp("specific_sources", specific_source_regexes_pattern));
specific_source_regexes.clear();
for (auto &specific_source_pattern : specific_source_regexes_pattern) {
specific_source_regexes.push_back(std::make_shared<boost::regex>(specific_source_pattern));
}
}
}
bool operator==(const Policy::Rule::SourceFilter &other) const;
GroupBy groupBy;
std::vector<std::shared_ptr<boost::regex>> specific_source_regexes;
std::vector<std::string> specific_source_regexes_pattern;
Scope scope;
};
struct Rate {
template <typename _A>
void serialize(_A &ar) {
ar(cereal::make_nvp("interval", interval));
ar(cereal::make_nvp("events", events));
}
bool operator==(const Policy::Rule::Rate &other) const;
unsigned interval; // Interval in seconds
unsigned events; // Events allowed during the interval
};
struct Action {
enum class Type {
DETECT,
QUARANTINE,
RATE_LIMIT
};
// Deserialize the Type enum
Type strToEnum(std::string const &value)
{
if (boost::iequals(value, "detect")) {
return Type::DETECT;
}
else if (boost::iequals(value, "quarantine")) {
return Type::QUARANTINE;
}
else if (boost::iequals(value, "rate limit")) {
return Type::RATE_LIMIT;
}
else {
throw cereal::Exception(
"Invalid value for RateLimiting::Policy::Action::Type='" + value + "'");
}
}
template <typename _A>
void serialize(_A &ar) {
std::string typeStr;
ar(cereal::make_nvp("type", typeStr));
type = strToEnum(typeStr);
quarantineTimeSeconds = 0;
if (type == Type::QUARANTINE) {
ar(cereal::make_nvp("quarantineTimeSeconds", quarantineTimeSeconds));
}
}
bool operator==(const Policy::Rule::Action &other) const;
Type type;
unsigned quarantineTimeSeconds; // time to block (in seconds), relevant only for QUARANTINE action type
};
template <typename _A>
void serialize(_A &ar) {
ar(cereal::make_nvp("uriFilter", uriFilter));
ar(cereal::make_nvp("sourceFilter", sourceFilter));
ar(cereal::make_nvp("rate", rate));
ar(cereal::make_nvp("action", action));
}
bool operator==(const Rule &other) const;
UriFilter uriFilter;
SourceFilter sourceFilter;
Rate rate;
Action action;
};
class RateLimitingEnforcement
{
public:
RateLimitingEnforcement()
:
enable(false)
{
}
template <typename _A>
RateLimitingEnforcement(_A &ar)
:
enable(false)
{
std::string level;
ar(cereal::make_nvp("rateLimitingEnforcement", level));
level = boost::algorithm::to_lower_copy(level);
if (level == "prevent") {
enable = true;
}
}
bool operator==(const Policy::RateLimitingEnforcement &other) const;
bool enable;
};
std::vector<Rule> rules;
RateLimitingEnforcement m_rateLimiting;
Policy() {}
bool getRateLimitingEnforcementStatus();
bool operator==(const Policy &other) const;
template <typename _A>
Policy(_A& ar) : m_rateLimiting(ar) {
ar(cereal::make_nvp("rateLimiting", rules));
}
};
// Key used to identify specific rate limiting entry
struct EntryKey {
std::string url;
std::string source;
// comparison operator should be implemented to use this struct as a key in an LRU cache.
bool operator==(EntryKey const& other) const;
};
// Support efficient hashing for the EntryKey struct so it can participate in unordered (hashed) containers such as LRU
inline std::size_t hash_value(EntryKey const &entryKey)
{
std::size_t hash = 0;
boost::hash_combine(hash, entryKey.url);
boost::hash_combine(hash, entryKey.source);
return hash;
}
// Rate limiting tracking entry
struct TrackEntry {
enum State {
MEASURING,
QUARANTINED
};
Waap::Util::RateLimiter eventRateLimiter;
State state;
std::chrono::seconds quarantinedUntil;
TrackEntry(unsigned int events, std::chrono::seconds interval);
bool event(std::chrono::seconds now);
void quarantineUntil(std::chrono::seconds until);
bool isBlocked() const;
};
// Rate limiting state maintained per asset
class State {
public:
typedef LruCacheMap<EntryKey, std::shared_ptr<TrackEntry>> EntriesLru;
const std::shared_ptr<Policy> policy;
// For each rule - hold corresponding tracking state (EntriesLru) instance
std::vector<std::shared_ptr<EntriesLru>> perRuleTrackingTable;
State(const std::shared_ptr<Policy> &policy);
bool execute(
const std::string& sourceIdentifier,
const std::string& uriStr,
std::chrono::seconds now,
bool& log);
};
}
}

View File

@@ -0,0 +1,22 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "RateLimitingDecision.h"
RateLimitingDecision::RateLimitingDecision(DecisionType type) : SingleDecision(type)
{}
std::string RateLimitingDecision::getTypeStr() const
{
return "Rate Limiting";
}

View File

@@ -0,0 +1,27 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __RATE_LIMITING_DECISION_H__
#define __RATE_LIMITING_DECISION_H__
#include "SingleDecision.h"
#include "DecisionType.h"
#include <string>
class RateLimitingDecision: public SingleDecision
{
public:
explicit RateLimitingDecision(DecisionType type);
std::string getTypeStr() const override;
};
#endif

View File

@@ -0,0 +1,79 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ScanResult.h"
Waf2ScanResult::Waf2ScanResult()
:
keyword_matches(),
regex_matches(),
filtered_keywords(),
found_patterns(),
unescaped_line(),
param_name(),
location(),
score(0.0f),
scoreArray(),
keywordCombinations(),
attack_types(),
m_isAttackInParam(false)
{
}
void Waf2ScanResult::clear()
{
keyword_matches.clear();
regex_matches.clear();
filtered_keywords.clear();
found_patterns.clear();
unescaped_line.clear();
param_name.clear();
location.clear();
score = 0;
scoreArray.clear();
keywordCombinations.clear();
attack_types.clear();
}
void Waf2ScanResult::mergeFrom(const Waf2ScanResult& other)
{
location = other.location;
param_name = other.param_name;
Waap::Util::mergeFromVectorWithoutDuplicates(
other.keyword_matches,
keyword_matches
);
Waap::Util::mergeFromVectorWithoutDuplicates(
other.regex_matches,
regex_matches
);
Waap::Util::mergeFromMapOfVectorsWithoutDuplicates(
other.found_patterns,
found_patterns
);
if (unescaped_line.empty())
{
unescaped_line = other.unescaped_line;
}
unescaped_line = other.unescaped_line + "?" + unescaped_line;
Waap::Util::mergeFromVectorWithoutDuplicates(
other.scoreArray,
scoreArray
);
attack_types.insert(other.attack_types.begin(), other.attack_types.end());
}

View File

@@ -0,0 +1,41 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __SCAN_RESULT_H__
#define __SCAN_RESULT_H__
#include "Waf2Util.h"
#include <string>
#include <vector>
#include <set>
struct Waf2ScanResult {
std::vector<std::string> keyword_matches;
std::vector<std::string> regex_matches;
std::vector<std::string> filtered_keywords;
Waap::Util::map_of_stringlists_t found_patterns;
std::string unescaped_line;
std::string param_name;
std::string location;
double score;
std::vector<double> scoreArray;
std::vector<std::string> keywordCombinations;
std::set<std::string> attack_types;
bool m_isAttackInParam;
void clear(); // clear Waf2ScanResult
Waf2ScanResult();
void mergeFrom(const Waf2ScanResult& other);
};
#endif // __SCAN_RESULT_H__

View File

@@ -0,0 +1,235 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ScannersDetector.h"
#include "waap.h"
#include "i_messaging.h"
#include <boost/algorithm/string/predicate.hpp>
USE_DEBUG_FLAG(D_WAAP);
#define SYNC_WAIT_TIME std::chrono::seconds(300) // 5 minutes in seconds
#define INTERVAL std::chrono::minutes(120)
#define EQUAL_VALUES_COUNT_THRESHOLD 2
#define MAX_RETENTION 5
ScannerDetector::ScannerDetector(const std::string& localPath, const std::string& remotePath,
const std::string &assetId) :
SerializeToLocalAndRemoteSyncBase(INTERVAL, SYNC_WAIT_TIME,
localPath + "/11.data",
(remotePath == "") ? remotePath : remotePath + "/ScannersDetector",
assetId,
"ScannerDetector")
{
m_sources_monitor.push_front(SourceKeyValsMap());
}
bool ScannerDetector::ready()
{
if (m_lastSync.count() == 0)
{
return false;
}
std::chrono::microseconds currentTime = Singleton::Consume<I_TimeGet>::by<WaapComponent>()->getWalltime();
return (currentTime - m_lastSync < m_interval / 2);
}
std::vector<std::string>* ScannerDetector::getSourcesToIgnore()
{
return &m_sources;
}
void ScannerDetector::log(const std::string& source, const std::string& key, Waap::Keywords::KeywordsSet& keywords)
{
m_sources_monitor.front()[source][key].insert(keywords.begin(), keywords.end());
}
void ScannerDetector::loadParams(std::shared_ptr<Waap::Parameters::WaapParameters> pParams)
{
std::string interval = pParams->getParamVal("learnIndicators.intervalDuration",
std::to_string(INTERVAL.count()));
setInterval(std::chrono::minutes(std::stoul(interval)));
std::string remoteSyncStr = pParams->getParamVal("remoteSync", "true");
setRemoteSyncEnabled(!boost::iequals(remoteSyncStr, "false"));
}
class SourcesMonitorPost : public RestGetFile
{
public:
SourcesMonitorPost(ScannerDetector::SourceKeyValsMap& _monitor)
: monitor(_monitor)
{
}
private:
C2S_PARAM(ScannerDetector::SourceKeyValsMap, monitor)
};
class SourcesMonitorGet : public RestGetFile
{
public:
SourcesMonitorGet() : monitor()
{
}
Maybe<ScannerDetector::SourceKeyValsMap> getSourcesMonitor()
{
return monitor.get();
}
private:
S2C_PARAM(ScannerDetector::SourceKeyValsMap, monitor)
};
bool ScannerDetector::postData()
{
m_sources_monitor_backup = m_sources_monitor.front();
m_sources_monitor.push_front(SourceKeyValsMap());
std::string url = getPostDataUrl();
dbgTrace(D_WAAP) << "Sending the data to: " << url;
SourcesMonitorPost currentWindow(m_sources_monitor_backup);
return sendNoReplyObjectWithRetry(currentWindow,
I_Messaging::Method::PUT,
url);
}
void ScannerDetector::pullData(const std::vector<std::string>& files)
{
std::string url = getPostDataUrl();
std::string sentFile = url.erase(0, url.find_first_of('/') + 1);
dbgTrace(D_WAAP) << "pulling files, skipping: " << sentFile;
for (auto file : files)
{
if (file == sentFile)
{
continue;
}
dbgTrace(D_WAAP) << "Pulling the file: " << file;
SourcesMonitorGet getMonitor;
sendObjectWithRetry(getMonitor,
I_Messaging::Method::GET,
getUri() + "/" + file);
SourceKeyValsMap remoteMonitor = getMonitor.getSourcesMonitor().unpack();
for (const auto& srcData : remoteMonitor)
{
for (const auto& keyData : srcData.second)
{
m_sources_monitor_backup[srcData.first][keyData.first].insert(
keyData.second.begin(),
keyData.second.end());
}
}
// update the sources monitor in previous "time window"
auto temp = m_sources_monitor.front();
m_sources_monitor.pop_front();
m_sources_monitor.pop_front();
m_sources_monitor.push_front(m_sources_monitor_backup);
m_sources_monitor.push_front(temp);
}
}
void ScannerDetector::postProcessedData()
{
}
void ScannerDetector::updateState(const std::vector<std::string>&)
{
}
void ScannerDetector::pullProcessedData(const std::vector<std::string>& files)
{
(void)files;
}
void ScannerDetector::mergeMonitors(SourceKeyValsMap& mergeTo, SourceKeyValsMap& mergeFrom)
{
for (const auto& srcData : mergeFrom)
{
for (const auto& keyData : srcData.second)
{
dbgTrace(D_WAAP) << "merging src: " << srcData.first << ", key: " << keyData.first <<
", keywords: " << Waap::Util::setToString(keyData.second);
mergeTo[srcData.first][keyData.first].insert(keyData.second.begin(), keyData.second.end());
}
}
}
void ScannerDetector::processData()
{
if (m_sources_monitor_backup.empty())
{
m_sources_monitor_backup = m_sources_monitor.front();
m_sources_monitor.push_front(SourceKeyValsMap());
}
if (m_sources_monitor.size() > 2)
{
auto monitorItr = m_sources_monitor.begin()++;
for (monitorItr++; monitorItr != m_sources_monitor.end(); monitorItr++)
{
mergeMonitors(m_sources_monitor_backup, *monitorItr);
}
}
m_sources.clear();
for (auto source : m_sources_monitor_backup)
{
if (source.second.size() <= 2)
{
continue;
}
std::map<std::string, std::set<std::string>>& keyVals = source.second;
for (auto key = keyVals.begin(); key != keyVals.end(); key++)
{
auto otherKey = key;
int counter = 0;
for (++otherKey; otherKey != keyVals.end(); otherKey++)
{
if (key->second != otherKey->second)
{
continue;
}
dbgTrace(D_WAAP) << "source monitor: src: " << source.first << ", key_1: " << key->first << ", key_2: "
<< otherKey->first << ", vals: " << Waap::Util::setToString(key->second);
counter++;
}
if (counter >= EQUAL_VALUES_COUNT_THRESHOLD)
{
dbgDebug(D_WAAP) << "source: " << source.first << " will be ignored";
m_sources.push_back(source.first);
break;
}
}
}
if (m_sources_monitor.size() > MAX_RETENTION)
{
m_sources_monitor.pop_back();
}
m_sources_monitor_backup.clear();
m_lastSync = Singleton::Consume<I_TimeGet>::by<WaapComponent>()->getWalltime();
}
void ScannerDetector::serialize(std::ostream& stream)
{
(void)stream;
}
void ScannerDetector::deserialize(std::istream& stream)
{
(void)stream;
}

View File

@@ -0,0 +1,55 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __SCANNERS_DETECTOR_H__
#define __SCANNERS_DETECTOR_H__
#include "WaapKeywords.h"
#include "i_serialize.h"
#include "i_ignoreSources.h"
#include "WaapParameters.h"
class ScannerDetector : public SerializeToLocalAndRemoteSyncBase, public I_IgnoreSources
{
public:
typedef std::map<std::string, std::map<std::string, std::set<std::string>>> SourceKeyValsMap;
ScannerDetector(const std::string& localPath, const std::string& remotePath = "", const std::string &assetId = "");
virtual bool ready();
virtual std::vector<std::string>* getSourcesToIgnore();
void log(const std::string& source, const std::string& key, Waap::Keywords::KeywordsSet& keywords);
void loadParams(std::shared_ptr<Waap::Parameters::WaapParameters> pParams);
virtual bool postData();
virtual void pullData(const std::vector<std::string>& files);
virtual void processData();
virtual void postProcessedData();
virtual void pullProcessedData(const std::vector<std::string>& files);
virtual void updateState(const std::vector<std::string>& files);
virtual void serialize(std::ostream& stream);
virtual void deserialize(std::istream& stream);
private:
void mergeMonitors(SourceKeyValsMap& mergeTo, SourceKeyValsMap& mergeFrom);
std::list<SourceKeyValsMap> m_sources_monitor; // list of map source -> key -> set of indicators
SourceKeyValsMap m_sources_monitor_backup; // stores data of the last window to process
std::vector<std::string> m_sources;
std::chrono::microseconds m_lastSync;
};
#endif

View File

@@ -0,0 +1,499 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ScoreBuilder.h"
#include "Waf2Regex.h"
#include <iostream>
#include <algorithm>
#include <math.h>
#include "WaapAssetState.h"
#include <cereal/types/unordered_map.hpp>
#include <cereal/archives/json.hpp>
#include <cereal/types/memory.hpp>
#include <sstream>
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP_SCORE_BUILDER);
#define GENERATE_FALSE_POSITIVES_LIST_THRESHOLD 100
#define SCORE_CALCULATION_THRESHOLD 5000
using namespace std::chrono;
ScoreBuilderData::ScoreBuilderData() :
m_sourceIdentifier(),
m_userAgent(),
m_sample(),
m_relativeReputation(0.0),
m_fpClassification(UNKNOWN_TYPE)
{}
ScoreBuilderData::ScoreBuilderData(
const std::string &sourceIdentifier,
const std::string &userAgent,
const std::string &sample,
double relativeReputation,
PolicyCounterType type,
const std::vector<std::string> &keywordsMatches,
const std::vector<std::string> &keywordsCombinations)
:
m_sourceIdentifier(sourceIdentifier),
m_userAgent(userAgent),
m_sample(sample),
m_relativeReputation(relativeReputation),
m_fpClassification(type),
m_keywordsMatches(keywordsMatches),
m_keywordsCombinations(keywordsCombinations)
{}
KeywordsScorePool::KeywordsScorePool()
:
m_keywordsDataMap(),
m_stats()
{
}
void KeywordsScorePool::mergeScores(const KeywordsScorePool& baseScores)
{
// find all keywords that exist in base but not in this
std::vector<std::string> removedElements;
std::vector<std::string>::iterator removedElementsIt;
for (KeywordDataMap::const_iterator it = m_keywordsDataMap.begin();
it != m_keywordsDataMap.end(); ++it)
{
// key not found in base array
if (baseScores.m_keywordsDataMap.find(it->first) == baseScores.m_keywordsDataMap.end())
{
removedElements.push_back(it->first);
}
}
// removing elements that were deleted
for (removedElementsIt = removedElements.begin();
removedElementsIt != removedElements.end();
++removedElementsIt)
{
m_keywordsDataMap.erase(*removedElementsIt);
}
// learning new scores
for (KeywordDataMap::const_iterator it = baseScores.m_keywordsDataMap.begin();
it != baseScores.m_keywordsDataMap.end(); ++it)
{
if (m_keywordsDataMap.find(it->first) == m_keywordsDataMap.end())
{
m_keywordsDataMap[it->first] = it->second;
}
}
}
ScoreBuilder::ScoreBuilder(I_WaapAssetState* pWaapAssetState) :
SerializeToFilePeriodically(duration_cast<seconds>(minutes(10)), pWaapAssetState->getSignaturesScoresFilePath()),
m_scoreTrigger(0),
m_fpStore(),
m_keywordsScorePools(),
m_falsePositivesSetsIntersection(),
m_pWaapAssetState(pWaapAssetState)
{
restore();
}
ScoreBuilder::ScoreBuilder(I_WaapAssetState* pWaapAssetState, ScoreBuilder& baseScores) :
SerializeToFilePeriodically(duration_cast<seconds>(minutes(10)), pWaapAssetState->getSignaturesScoresFilePath()),
m_scoreTrigger(0),
m_fpStore(),
m_keywordsScorePools(),
m_falsePositivesSetsIntersection(),
m_pWaapAssetState(pWaapAssetState)
{
restore();
// merge
mergeScores(baseScores);
}
void ScoreBuilder::serialize(std::ostream& stream) {
cereal::JSONOutputArchive archive(stream);
static const size_t version = 1;
archive(
cereal::make_nvp("version", version),
cereal::make_nvp("scorePools", m_keywordsScorePools)
);
}
void ScoreBuilder::deserialize(std::istream& stream) {
cereal::JSONInputArchive iarchive(stream);
size_t version = 0;
try {
iarchive(cereal::make_nvp("version", version));
}
catch (std::runtime_error & e) {
iarchive.setNextName(nullptr);
version = 0;
dbgDebug(D_WAAP_SCORE_BUILDER) << "ScoreBuilder version absent, using version " << version <<
" e.what() is " << e.what();
}
dbgDebug(D_WAAP_SCORE_BUILDER) << "Loading scores from file version " << version << "...";
switch (version)
{
case 1: {
iarchive(cereal::make_nvp("scorePools", m_keywordsScorePools));
break;
}
case 0: {
m_keywordsScorePools[KEYWORDS_SCORE_POOL_BASE] = KeywordsScorePool(iarchive);
break;
}
default: {
dbgDebug(D_WAAP_SCORE_BUILDER) << "Unknown scores file version: " << version;
}
}
}
void ScoreBuilder::analyzeFalseTruePositive(ScoreBuilderData& data, const std::string &poolName, bool doBackup)
{
if (data.m_fpClassification == UNKNOWN_TYPE)
{
dbgTrace(D_WAAP_SCORE_BUILDER) <<
"analyzeFalseTruePositive(): Got UNKNOWN_TYPE as false positive classification "
", will not pump keywords score";
return;
}
dbgTrace(D_WAAP_SCORE_BUILDER) << "ScoreBuilder::analyzeFalseTruePositive: pumping score pool=" << poolName;
pumpKeywordScore(data, poolName, doBackup);
}
bool ScoreBuilder::isHtmlContent(std::string sample)
{
// count closing html elements
unsigned int closingHtmlElem = 0;
std::string::size_type pos = 0;
std::string htmlClosingElementHint = "</";
while ((pos = sample.find(htmlClosingElementHint, pos)) != std::string::npos) {
++closingHtmlElem;
pos += htmlClosingElementHint.length();
}
if (closingHtmlElem > 3)
{
return true;
}
unsigned int openingHtmlElem = 0;
bool regexError = false;
std::string reName = "html opening element regex";
Regex htmlOpenElementRe("<html|<p|<div|<img|<ul|<li|<body|<a", regexError, reName);
std::vector<RegexMatch> matches;
if (sample.length() <= 30)
{
return false;
}
openingHtmlElem = htmlOpenElementRe.findAllMatches(sample, matches);
if (openingHtmlElem > 5)
{
return true;
}
return false;
}
void ScoreBuilder::checkBadSourcesForLearning(double reputation, std::string& source, std::string& userAgent)
{
if (m_fpStore.count == 0)
{
return;
}
m_fpStore.count++;
if (reputation < 2.0)
{
if (m_fpStore.hasUaItem(userAgent))
{
m_fpStore.uaItems.erase(userAgent);
}
if (m_fpStore.hasIpItem(source))
{
m_fpStore.ipItems.erase(source);
}
}
if (m_fpStore.count >= GENERATE_FALSE_POSITIVES_LIST_THRESHOLD)
{
m_fpStore.appendKeywordsSetsIntersectionToList(m_falsePositivesSetsIntersection);
m_fpStore.clear();
}
}
void ScoreBuilder::pumpKeywordScore(ScoreBuilderData& data, const std::string &poolName, bool doBackup)
{
std::map<std::string, KeywordsScorePool>::iterator poolIt = m_keywordsScorePools.find(poolName);
if (poolIt == m_keywordsScorePools.end()) {
dbgDebug(D_WAAP_SCORE_BUILDER) << "pumpKeywordScore() is called with unknown poolName='" << poolName <<
"'. Creating the pool.";
m_keywordsScorePools[poolName] = KeywordsScorePool();
}
poolIt = m_keywordsScorePools.find(poolName);
if (poolIt == m_keywordsScorePools.end()) {
dbgWarning(D_WAAP_SCORE_BUILDER) << "pumpKeywordScore() failed to create pool '" << poolName << "'";
return;
}
KeywordsScorePool &keywordsScorePool = poolIt->second;
if (isHtmlContent(data.m_sample))
{
dbgTrace(D_WAAP_SCORE_BUILDER) << "pumpKeywordScore: isHtmlContent -> do not process";
return;
}
for (const std::string &keyword : data.m_keywordsMatches) {
pumpKeywordScorePerKeyword(data, keyword, KEYWORD_TYPE_KEYWORD, keywordsScorePool);
}
for (const std::string &keyword : data.m_keywordsCombinations) {
pumpKeywordScorePerKeyword(data, keyword, KEYWORD_TYPE_COMBINATION, keywordsScorePool);
}
if (doBackup && m_scoreTrigger >= SCORE_CALCULATION_THRESHOLD)
{
calcScore(poolName);
if (m_pWaapAssetState != NULL)
{
m_pWaapAssetState->updateScores();
}
backupWorker();
}
}
void ScoreBuilder::calcScore(const std::string &poolName)
{
std::map<std::string, KeywordsScorePool>::iterator poolIt = m_keywordsScorePools.find(poolName);
if (poolIt == m_keywordsScorePools.end()) {
dbgDebug(D_WAAP_SCORE_BUILDER) << "calcScore() is called with unknown poolName='" << poolName <<
"'. Creating the pool.";
m_keywordsScorePools[poolName] = KeywordsScorePool();
}
poolIt = m_keywordsScorePools.find(poolName);
if (poolIt == m_keywordsScorePools.end()) {
dbgWarning(D_WAAP_SCORE_BUILDER) << "calcScore() failed to create pool '" << poolName << "'";
return;
}
KeywordsScorePool &keywordsScorePool = poolIt->second;
KeywordDataMap &keywordsDataMap = keywordsScorePool.m_keywordsDataMap;
KeywordsStats &keywordsStats = keywordsScorePool.m_stats;
m_scoreTrigger = 0;
for (auto fpKeyword : m_falsePositivesSetsIntersection)
{
if (keywordsDataMap.find(fpKeyword) == keywordsScorePool.m_keywordsDataMap.end())
{
keywordsDataMap[fpKeyword];
}
keywordsDataMap[fpKeyword].falsePositiveCtr++;
keywordsStats.falsePositiveCtr++;
}
m_falsePositivesSetsIntersection.clear();
KeywordDataMap newKeywordsDataMap;
double tpAverageLog = log(keywordsStats.truePositiveCtr / std::max(keywordsDataMap.size(), (size_t)1) + 101);
for (auto keyword : keywordsDataMap)
{
double tpLog = log(keyword.second.truePositiveCtr + 1);
double tpScore = tpLog / (tpLog + tpAverageLog / 4 + 1); // range [0,1)
int fpAvg = 1;
keyword.second.score = 10 * tpScore * (fpAvg + 1) / (fpAvg + (keyword.second.falsePositiveCtr * 5) + 2);
if (keyword.second.score > 1 ||
keyword.second.falsePositiveCtr < 10 ||
keyword.second.type == KEYWORD_TYPE_KEYWORD)
{
newKeywordsDataMap[keyword.first] = keyword.second;
}
}
keywordsDataMap = newKeywordsDataMap;
}
void ScoreBuilder::snap()
{
// Copy data from all mutable score pools to "snapshot" keyword->scores map
for (const std::pair<std::string, KeywordsScorePool> &pool : m_keywordsScorePools) {
const std::string &poolName = pool.first;
const KeywordsScorePool& keywordScorePool = pool.second;
m_snapshotKwScoreMap[poolName];
for (const std::pair<std::string, KeywordData> &kwData : keywordScorePool.m_keywordsDataMap)
{
const std::string &kwName = kwData.first;
double kwScore = kwData.second.score;
m_snapshotKwScoreMap[poolName][kwName] = kwScore;
}
}
}
double ScoreBuilder::getSnapshotKeywordScore(const std::string &keyword, double defaultScore,
const std::string &poolName) const
{
std::map<std::string, KeywordScoreMap>::const_iterator poolIt = m_snapshotKwScoreMap.find(poolName);
if (poolIt == m_snapshotKwScoreMap.end()) {
dbgTrace(D_WAAP_SCORE_BUILDER) << "pool " << poolName << " does not exist. Getting score from base pool";
poolIt = m_snapshotKwScoreMap.find(KEYWORDS_SCORE_POOL_BASE);
}
if (poolIt == m_snapshotKwScoreMap.end()) {
dbgDebug(D_WAAP_SCORE_BUILDER) <<
"base pool does not exist! This is probably a bug. Returning default score " << defaultScore;
return defaultScore;
}
const KeywordScoreMap &kwScoreMap = poolIt->second;
KeywordScoreMap::const_iterator kwScoreFound = kwScoreMap.find(keyword);
if (kwScoreFound == kwScoreMap.end()) {
dbgTrace(D_WAAP_SCORE_BUILDER) << "keywordScore:'" << keyword << "': " << defaultScore <<
" (default, keyword not found in pool '" << poolName << "')";
return defaultScore;
}
dbgTrace(D_WAAP_SCORE_BUILDER) << "keywordScore:'" << keyword << "': " << kwScoreFound->second << " (pool '" <<
poolName << "')";
return kwScoreFound->second;
}
keywords_set ScoreBuilder::getIpItemKeywordsSet(std::string ip)
{
return m_fpStore.ipItems[ip];
}
keywords_set ScoreBuilder::getUaItemKeywordsSet(std::string userAgent)
{
return m_fpStore.uaItems[userAgent];
}
unsigned int ScoreBuilder::getFpStoreCount()
{
return m_fpStore.count;
}
void ScoreBuilder::mergeScores(const ScoreBuilder& baseScores)
{
for (const std::pair<std::string, KeywordsScorePool> &pool : baseScores.m_keywordsScorePools) {
const std::string &poolName = pool.first;
if (m_keywordsScorePools.find(poolName) == m_keywordsScorePools.end()) {
m_keywordsScorePools[poolName];
}
const KeywordsScorePool &baseKeywordsScorePool = pool.second;
m_keywordsScorePools[poolName].mergeScores(baseKeywordsScorePool);
}
}
void ScoreBuilder::pumpKeywordScorePerKeyword(ScoreBuilderData& data, const std::string& keyword,
KeywordType keywordSource, KeywordsScorePool &keywordsScorePool)
{
m_scoreTrigger++;
if (data.m_fpClassification == UNKNOWN_TYPE) {
dbgTrace(D_WAAP_SCORE_BUILDER) <<
"pumpKeywordScorePerKeyword(): Got UNKNOWN_TYPE as false positive classifiaction "
", will not pump keywords score";
return;
}
if (keywordsScorePool.m_keywordsDataMap.find(keyword) == keywordsScorePool.m_keywordsDataMap.end())
{
keywordsScorePool.m_keywordsDataMap[keyword];
}
KeywordData& keyData = keywordsScorePool.m_keywordsDataMap[keyword];
keyData.type = keywordSource;
if (data.m_fpClassification == TRUE_POSITIVE && keyData.score < 8)
{
dbgTrace(D_WAAP_SCORE_BUILDER) <<
"pumpKeywordScorePerKeyword(): fpClassification = TRUE_POSITIVE for keyword: " << keyword;
keyData.truePositiveCtr++;
keywordsScorePool.m_stats.truePositiveCtr++;
}
else if (data.m_fpClassification == FALSE_POSITIVE && (keyData.score > 0.1 || keyData.truePositiveCtr < 10))
{
dbgTrace(D_WAAP_SCORE_BUILDER) <<
"pumpKeywordScorePerKeyword(): fpClassification = FALSE_POSITIVE for keyword: " << keyword;
m_fpStore.putFalsePositive(data.m_sourceIdentifier, data.m_userAgent, keyword);
}
}
void FalsePoisitiveStore::putFalsePositive(const std::string& ip, const std::string& userAgent,
const std::string& keyword)
{
count = 1;
ipItems[ip].insert(keyword);
uaItems[userAgent].insert(keyword);
}
bool FalsePoisitiveStore::hasIpItem(const std::string& ip) const
{
return ipItems.find(ip) != ipItems.end();
}
bool FalsePoisitiveStore::hasUaItem(const std::string& ua) const
{
return uaItems.find(ua) != uaItems.end();
}
void FalsePoisitiveStore::appendKeywordsSetsIntersectionToList(std::list<std::string>& keywordsList)
{
std::list<std::string> ipKeywords;
std::unordered_set<std::string> uaKeywords;
for (auto ip : ipItems) {
for (auto keyword : ip.second)
{
ipKeywords.push_back(keyword);
}
}
for (auto ua : uaItems) {
for (auto keyword : ua.second)
{
uaKeywords.insert(keyword);
}
}
for (auto keyword : ipKeywords)
{
if (uaKeywords.find(keyword) != uaKeywords.end())
{
keywordsList.push_back(keyword);
}
}
}
void FalsePoisitiveStore::clear()
{
count = 0;
ipItems.clear();
uaItems.clear();
}

View File

@@ -0,0 +1,173 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <list>
#include "FpMitigation.h"
#include "Waf2Util.h"
#include "picojson.h"
#include "i_serialize.h"
#include <cereal/archives/json.hpp>
#include <cereal/types/unordered_map.hpp>
#include <cereal/types/string.hpp>
struct ScoreBuilderData {
std::string m_sourceIdentifier;
std::string m_userAgent;
std::string m_sample;
double m_relativeReputation;
PolicyCounterType m_fpClassification;
std::vector<std::string> m_keywordsMatches;
std::vector<std::string> m_keywordsCombinations;
ScoreBuilderData();
ScoreBuilderData(
const std::string &sourceIdentifier,
const std::string &userAgent,
const std::string &sample,
double relativeReputation,
PolicyCounterType type,
const std::vector<std::string> &keywordsMatches,
const std::vector<std::string> &keywordsCombinations);
};
enum KeywordType {
KEYWORD_TYPE_UNKNOWN,
KEYWORD_TYPE_KEYWORD,
KEYWORD_TYPE_COMBINATION
};
struct KeywordData {
KeywordData() : truePositiveCtr(0), falsePositiveCtr(0), score(0.0), type(KEYWORD_TYPE_UNKNOWN) {}
unsigned int truePositiveCtr;
unsigned int falsePositiveCtr;
double score;
KeywordType type;
template <class Archive>
void serialize(Archive& ar) {
ar(cereal::make_nvp("false_positives", falsePositiveCtr),
cereal::make_nvp("true_positives", truePositiveCtr),
cereal::make_nvp("score", score),
cereal::make_nvp("type", type));
}
};
struct KeywordsStats {
KeywordsStats() : truePositiveCtr(0), falsePositiveCtr(0) {}
template <class Archive>
void serialize(Archive& ar) {
ar(cereal::make_nvp("false_positives", falsePositiveCtr),
cereal::make_nvp("true_positives", truePositiveCtr));
}
unsigned int truePositiveCtr;
unsigned int falsePositiveCtr;
};
typedef std::unordered_set<std::string> keywords_set;
struct FalsePoisitiveStore {
unsigned int count;
std::unordered_map<std::string, keywords_set> ipItems;
std::unordered_map<std::string, keywords_set> uaItems;
FalsePoisitiveStore() : count(0), ipItems(), uaItems() {}
void putFalsePositive(const std::string& ip, const std::string& userAgent, const std::string& keyword);
bool hasIpItem(const std::string& ip) const;
bool hasUaItem(const std::string& ua) const;
void appendKeywordsSetsIntersectionToList(std::list<std::string>& keywordsList);
void clear();
};
class I_WaapAssetState;
typedef std::unordered_map<std::string, KeywordData> KeywordDataMap;
struct KeywordsScorePool {
KeywordDataMap m_keywordsDataMap;
KeywordsStats m_stats;
KeywordsScorePool();
template <typename _A>
KeywordsScorePool(_A &iarchive)
{
KeywordDataMap tmpKeyordsDataMap;
iarchive(cereal::make_nvp("keyword_data", tmpKeyordsDataMap),
cereal::make_nvp("keyword_stats", m_stats));
// Decode keys (originally urlencoded in the source file)
for (auto item : tmpKeyordsDataMap) {
std::string key = item.first;
key.erase(unquote_plus(key.begin(), key.end()), key.end());
m_keywordsDataMap[key] = item.second;
}
}
template <class Archive>
void serialize(Archive& ar) {
ar(
cereal::make_nvp("keyword_data", m_keywordsDataMap),
cereal::make_nvp("keyword_stats", m_stats)
);
}
void mergeScores(const KeywordsScorePool& baseScores);
};
class ScoreBuilder : public SerializeToFilePeriodically {
public:
ScoreBuilder(I_WaapAssetState* pWaapAssetState);
ScoreBuilder(I_WaapAssetState* pWaapAssetState, ScoreBuilder& baseScores);
~ScoreBuilder() {}
void analyzeFalseTruePositive(ScoreBuilderData& data, const std::string &poolName, bool doBackup=true);
bool isHtmlContent(std::string sample);
void checkBadSourcesForLearning(double reputation, std::string& source, std::string& userAgent);
void pumpKeywordScore(ScoreBuilderData& data, const std::string &poolName, bool doBackup=true);
void calcScore(const std::string &poolName);
void snap();
double getSnapshotKeywordScore(const std::string &keyword, double defaultScore, const std::string &poolName) const;
keywords_set getIpItemKeywordsSet(std::string ip);
keywords_set getUaItemKeywordsSet(std::string userAgent);
unsigned int getFpStoreCount();
virtual void serialize(std::ostream& stream);
virtual void deserialize(std::istream& stream);
void mergeScores(const ScoreBuilder& baseScores);
protected:
typedef std::map<std::string, double> KeywordScoreMap;
void pumpKeywordScorePerKeyword(ScoreBuilderData& data,
const std::string& keyword,
KeywordType keywordSource,
KeywordsScorePool &keywordsScorePool);
unsigned int m_scoreTrigger;
FalsePoisitiveStore m_fpStore;
std::map<std::string, KeywordsScorePool> m_keywordsScorePools; // live data continuously updated during traffic
std::map<std::string, KeywordScoreMap> m_snapshotKwScoreMap; // the snapshot is updated only by a call to snap()
std::list<std::string> m_falsePositivesSetsIntersection;
I_WaapAssetState* m_pWaapAssetState;
};

View File

@@ -0,0 +1,104 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "SecurityHeadersPolicy.h"
#include "Waf2Util.h"
namespace Waap {
namespace SecurityHeaders {
void
Policy::StrictTransportSecurity::buildInjectStr() {
if (preload && includeSubDomains)
{
directivesStr = "max-age=" + maxAge + "; includeSubDomains; preload";
}
else if (includeSubDomains)
{
directivesStr = "max-age=" + maxAge + "; includeSubDomains";
}
else if (preload)
{
directivesStr = "max-age=" + maxAge + "; preload";
}
else
{
directivesStr = "max-age=" + maxAge;
}
headerDetails = std::make_pair(headerName, directivesStr);
}
void
Policy::XFrameOptions::buildInjectStr() {
headerDetails = std::make_pair(headerName, directivesStr);
}
void
Policy::XContentTypeOptions::buildInjectStr() {
headerDetails = std::make_pair(headerName, directivesStr);
}
bool
Policy::SecurityHeadersEnforcement::operator==(const Policy::SecurityHeadersEnforcement &other) const
{
return enable == other.enable;
}
bool
Policy::XFrameOptions::operator==(const XFrameOptions &other) const
{
return sameOrigin == other.sameOrigin && directivesStr == other.directivesStr &&
deny == other.deny && headerName == other.headerName &&
headerDetails.first == other.headerDetails.first &&
headerDetails.second == other.headerDetails.second;
}
bool
Policy::XContentTypeOptions::operator==(const XContentTypeOptions &other) const
{
return directivesStr == other.directivesStr && headerName == other.headerName &&
headerDetails.first == other.headerDetails.first && headerDetails.second == other.headerDetails.second;
}
bool
Policy::StrictTransportSecurity::operator==(const StrictTransportSecurity &other) const
{
return maxAge == other.maxAge && directivesStr == other.directivesStr &&
includeSubDomains == other.includeSubDomains && headerName == other.headerName &&
preload == other.preload && headerDetails.first == other.headerDetails.first &&
headerDetails.second == other.headerDetails.second;
}
bool
Policy::Headers::operator==(const Headers &other) const
{
return other.headersInjectStr == headersInjectStr && hsts == other.hsts &&
xContentTypeOptions == other.xContentTypeOptions && xFrameOptions == other.xFrameOptions;
}
bool
Policy::operator==(const Policy &other) const
{
return headers == other.headers && m_securityHeaders == other.m_securityHeaders;
}
State::State(const std::shared_ptr<Policy> &policy)
{
for(auto headerStr : policy->headers.headersInjectStr)
{
headersInjectStrs.push_back(headerStr);
}
}
}
}

View File

@@ -0,0 +1,225 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cereal/types/string.hpp>
#include <string>
#include <memory>
#include <boost/algorithm/string/case_conv.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP);
namespace Waap {
namespace SecurityHeaders {
struct Policy {
struct StrictTransportSecurity {
void setDefaults()
{
maxAge = "31536000";
includeSubDomains = true;
preload = false;
buildInjectStr();
}
template <typename _A>
void serialize(_A &ar) {
ar(cereal::make_nvp("maxAge", maxAge));
ar(cereal::make_nvp("includeSubDomains", includeSubDomains));
ar(cereal::make_nvp("preload", preload));
buildInjectStr();
}
void buildInjectStr();
bool operator==(const StrictTransportSecurity &other) const;
const std::string headerName = "Strict-Transport-Security";
std::string maxAge;
bool includeSubDomains;
bool preload;
std::string directivesStr;
// string that define exactly how the header should be inject after collecting all data.
std::pair<std::string, std::string> headerDetails;
};
struct XFrameOptions {
void setDefaults()
{
directivesStr = sameOrigin;
buildInjectStr();
}
template <typename _A>
void serialize(_A &ar) {
std::string value;
ar(cereal::make_nvp("directive", value));
if(boost::iequals(value, "sameOrigin"))
{
directivesStr = sameOrigin;
}
else if(boost::iequals(value, "deny"))
{
directivesStr = deny;
}
else
{
throw cereal::Exception(
"Invalid value for SecurityHeaders::Policy::XFrameOptions::directive='" + value + "'");
}
buildInjectStr();
}
void buildInjectStr();
bool operator==(const XFrameOptions &other) const;
const std::string sameOrigin = "SAMEORIGIN";
const std::string deny = "DENY";
const std::string headerName = "X-Frame-Options";
std::string directivesStr;
// string that define exactly how the header should be inject after collecting all data.
std::pair<std::string, std::string> headerDetails;
};
struct XContentTypeOptions
{
void setDefaults()
{
directivesStr = nosniff;
buildInjectStr();
}
template <typename _A>
void serialize(_A &ar) {
std::string value;
ar(cereal::make_nvp("directive", value));
if(boost::iequals(value, "nosniff"))
{
directivesStr = nosniff;
}
else
{
throw cereal::Exception(
"Invalid value for SecurityHeaders::Policy::XContentTypeOptions::directive='" + value + "'");
}
buildInjectStr();
}
void buildInjectStr();
bool operator==(const XContentTypeOptions &other) const;
const std::string headerName = "X-Content-Type-Options";
const std::string nosniff = "nosniff";
std::string directivesStr;
// string that define exactly how the header should be inject after collecting all data.
std::pair<std::string, std::string> headerDetails;
};
struct Headers {
template <typename _A>
void serialize(_A &ar) {
try
{
ar(cereal::make_nvp("strictTransportSecurity", hsts));
headersInjectStr.push_back(
std::make_pair(hsts.headerDetails.first, hsts.headerDetails.second));
}
catch (std::runtime_error& e)
{
dbgTrace(D_WAAP) << "Strict-Transport-Security header is not configured. Loading defaults.";
hsts.setDefaults();
headersInjectStr.push_back(
std::make_pair(hsts.headerDetails.first, hsts.headerDetails.second));
}
try
{
ar(cereal::make_nvp("xFrameOptions", xFrameOptions));
headersInjectStr.push_back(
std::make_pair(xFrameOptions.headerDetails.first, xFrameOptions.headerDetails.second));
}
catch (std::runtime_error& e)
{
dbgTrace(D_WAAP) << "X-Frame-Options header is not configured. Loading defaults.";
xFrameOptions.setDefaults();
headersInjectStr.push_back(
std::make_pair(xFrameOptions.headerDetails.first, xFrameOptions.headerDetails.second));
}
try
{
ar(cereal::make_nvp("xContentTypeOptions", xContentTypeOptions));
headersInjectStr.push_back(
std::make_pair(xContentTypeOptions.headerDetails.first, xContentTypeOptions.headerDetails.second));
}
catch (std::runtime_error& e)
{
dbgTrace(D_WAAP) << "X Content Type Options header is not configured. Loading defaults.";
xContentTypeOptions.setDefaults();
headersInjectStr.push_back(
std::make_pair(xContentTypeOptions.headerDetails.first, xContentTypeOptions.headerDetails.second));
}
}
bool operator==(const Headers &other) const;
// will contain all strings that should be injected as headers.
std::vector<std::pair<std::string, std::string>> headersInjectStr;
StrictTransportSecurity hsts;
XFrameOptions xFrameOptions;
XContentTypeOptions xContentTypeOptions;
};
class SecurityHeadersEnforcement
{
public:
template <typename _A>
SecurityHeadersEnforcement(_A &ar)
:
enable(false)
{
std::string level;
ar(cereal::make_nvp("securityHeadersEnforcement", level));
level = boost::algorithm::to_lower_copy(level);
if (level == "prevent") {
enable = true;
}
}
bool operator==(const Policy::SecurityHeadersEnforcement &other) const;
bool enable;
};
Headers headers;
SecurityHeadersEnforcement m_securityHeaders;
bool operator==(const Policy &other) const;
template <typename _A>
Policy(_A& ar) : m_securityHeaders(ar) {
ar(cereal::make_nvp("securityHeaders", headers));
}
};
class State {
public:
const std::shared_ptr<Policy> policy;
State(const std::shared_ptr<Policy> &policy);
std::vector<std::pair<std::string, std::string>> headersInjectStrs;
};
}
}

View File

@@ -0,0 +1,850 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "i_serialize.h"
#include "waap.h"
#include "Waf2Util.h"
#include "WaapAssetState.h"
#include "i_instance_awareness.h"
#include <sstream>
#include <fstream>
#include <functional>
#include "debug.h"
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xmlstring.h>
#include "SyncLearningNotification.h"
#include "report_messaging.h"
#include "compression_utils.h"
#include "config.h"
USE_DEBUG_FLAG(D_WAAP_CONFIDENCE_CALCULATOR);
namespace ch = std::chrono;
using namespace std;
typedef ch::duration<size_t, std::ratio<86400>> days;
// Define interval between successful sync times
static const ch::minutes assetSyncTimeSliceLength(10);
static const int remoteSyncMaxPollingAttempts = 10;
static const string defaultLearningHost = "appsec-learning-svc";
static const string defaultSharedStorageHost = "appsec-shared-storage-svc";
#define SHARED_STORAGE_HOST_ENV_NAME "SHARED_STORAGE_HOST"
#define LEARNING_HOST_ENV_NAME "LEARNING_HOST"
static bool
isGZipped(const std::string &stream)
{
if (stream.size() < 2) return false;
auto unsinged_stream = reinterpret_cast<const u_char *>(stream.data());
return unsinged_stream[0] == 0x1f && unsinged_stream[1] == 0x8b;
}
bool RestGetFile::loadJson(const std::string& json)
{
std::string json_str = json;
if (isGZipped(json_str) == 0)
{
return ClientRest::loadJson(json_str);
}
auto compression_stream = initCompressionStream();
DecompressionResult res = decompressData(
compression_stream,
json_str.size(),
reinterpret_cast<const unsigned char *>(json_str.c_str()));
if (res.ok){
json_str = std::string((const char *)res.output, res.num_output_bytes);
if (res.output) free(res.output);
res.output = nullptr;
res.num_output_bytes = 0;
}
finiCompressionStream(compression_stream);
return ClientRest::loadJson(json_str);
}
Maybe<std::string> RestGetFile::genJson() const
{
Maybe<std::string> json = ClientRest::genJson();
if (json.ok())
{
std::string data = json.unpack();
auto compression_stream = initCompressionStream();
CompressionResult res = compressData(
compression_stream,
CompressionType::GZIP,
data.size(),
reinterpret_cast<const unsigned char *>(data.c_str()),
true);
finiCompressionStream(compression_stream);
if (!res.ok) {
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Failed to gzip data";
return genError("Failed to compress data");
}
data = std::string((const char *)res.output, res.num_output_bytes);
json = data;
if (res.output) free(res.output);
res.output = nullptr;
res.num_output_bytes = 0;
}
return json;
}
SerializeToFilePeriodically::SerializeToFilePeriodically(std::chrono::seconds pollingIntervals, std::string filePath) :
SerializeToFileBase(filePath),
m_lastSerialization(0),
m_interval(pollingIntervals)
{
I_TimeGet* timer = Singleton::Consume<I_TimeGet>::by<WaapComponent>();
if (timer != NULL)
{
m_lastSerialization = timer->getMonotonicTime();
}
}
SerializeToFilePeriodically::~SerializeToFilePeriodically()
{
}
void SerializeToFilePeriodically::backupWorker()
{
I_TimeGet* timer = Singleton::Consume<I_TimeGet>::by<WaapComponent>();
auto currentTime = timer->getMonotonicTime();
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "backup worker: current time: " << currentTime.count();
if (currentTime - m_lastSerialization >= m_interval)
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "backup worker: backing up data";
m_lastSerialization = currentTime;
// save data
saveData();
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "backup worker: data is backed up";
}
}
void SerializeToFilePeriodically::setInterval(std::chrono::seconds newInterval)
{
if (m_interval != newInterval)
{
m_interval = newInterval;
I_TimeGet* timer = Singleton::Consume<I_TimeGet>::by<WaapComponent>();
m_lastSerialization = timer->getMonotonicTime();
}
}
SerializeToFileBase::SerializeToFileBase(std::string fileName) : m_filePath(fileName)
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "SerializeToFileBase::SerializeToFileBase() fname='" << m_filePath
<< "'";
}
SerializeToFileBase::~SerializeToFileBase()
{
}
void SerializeToFileBase::saveData()
{
std::fstream filestream;
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "saving to file: " << m_filePath;
filestream.open(m_filePath, std::fstream::out);
std::stringstream ss;
if (filestream.is_open() == false) {
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "failed to open file: " << m_filePath << " Error: "
<< strerror(errno);
return;
}
serialize(ss);
filestream << ss.str();
filestream.close();
}
void SerializeToFileBase::loadFromFile(std::string filePath)
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "loadFromFile() file: " << filePath;
std::fstream filestream;
filestream.open(filePath, std::fstream::in);
if (filestream.is_open() == false) {
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "failed to open file: " << filePath << " Error: " <<
strerror(errno);
if (!Singleton::exists<I_InstanceAwareness>() || errno != ENOENT)
{
return;
}
// if we fail to open a file because it doesn't exist and instance awareness is present
// try to strip the unique ID from the path and load the file from the parent directory
// that might exist in previous run where instance awareness didn't exits.
I_InstanceAwareness* instanceAwareness = Singleton::Consume<I_InstanceAwareness>::by<WaapComponent>();
Maybe<std::string> id = instanceAwareness->getUniqueID();
if (!id.ok())
{
return;
}
std::string idStr = "/" + id.unpack() + "/";
size_t idPosition = filePath.find(idStr);
if (idPosition != std::string::npos)
{
filePath.erase(idPosition, idStr.length() - 1);
dbgDebug(D_WAAP_CONFIDENCE_CALCULATOR) << "retry to load file from : " << filePath;
loadFromFile(filePath);
}
return;
}
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "loading from file: " << filePath;
int length;
filestream.seekg(0, std::ios::end); // go to the end
length = filestream.tellg(); // report location (this is the length)
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "file length: " << length;
assert(length >= 0); // length -1 really happens if filePath is a directory (!)
char* buffer = new char[length]; // allocate memory for a buffer of appropriate dimension
filestream.seekg(0, std::ios::beg); // go back to the beginning
if (!filestream.read(buffer, length)) // read the whole file into the buffer
{
filestream.close();
delete[] buffer;
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Failed to read file, file: " << filePath;
return;
}
filestream.close();
std::string dataObfuscated(buffer, length);
delete[] buffer;
std::stringstream ss(dataObfuscated);
try
{
deserialize(ss);
}
catch (std::runtime_error & e) {
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "failed to deserialize file: " << m_filePath << ", error: " <<
e.what();
}
}
void SerializeToFileBase::restore()
{
loadFromFile(m_filePath);
}
void SerializeToFileBase::setFilePath(const std::string& new_file_path)
{
m_filePath = new_file_path;
}
RemoteFilesList::RemoteFilesList() : files(), filesPathsList()
{
}
// parses xml instead of json
// extracts a file list in <Contents><Key>
bool RemoteFilesList::loadJson(const std::string& xml)
{
xmlDocPtr doc; // the resulting document tree
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "XML input: " << xml;
doc = xmlParseMemory(xml.c_str(), xml.length());
if (doc == NULL) {
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Failed to parse " << xml;
return false;
}
xmlNodePtr node = doc->children;
if (node->children == NULL)
{
return false;
}
node = node->children;
xmlChar *contents_name = xmlCharStrdup("Contents");
xmlChar *key_name = xmlCharStrdup("Key");
xmlChar *last_modified_name = xmlCharStrdup("LastModified");
// allows to get reference to the internal member and modify it
files.setActive(true);
while (node != NULL)
{
if (xmlStrEqual(contents_name, node->name) == 1)
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Found the Contents element";
xmlNodePtr contents_node = node->children;
std::string file;
std::string lastModified;
while (contents_node != NULL)
{
if (xmlStrEqual(key_name, contents_node->name) == 1)
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Found the Key element";
xmlChar* xml_file = xmlNodeGetContent(contents_node);
file = std::string(reinterpret_cast<const char*>(xml_file));
xmlFree(xml_file);
}
if (xmlStrEqual(last_modified_name, contents_node->name) == 1)
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Found the LastModified element";
xmlChar* xml_file = xmlNodeGetContent(contents_node);
lastModified = std::string(reinterpret_cast<const char*>(xml_file));
xmlFree(xml_file);
}
if (!file.empty() && !lastModified.empty())
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Adding the file: " << file <<
" last modified: " << lastModified;
break;
}
contents_node = contents_node->next;
}
files.get().push_back(FileMetaData{ file, lastModified });
filesPathsList.push_back(file);
}
node = node->next;
}
// free up memory
xmlFree(last_modified_name);
xmlFree(contents_name);
xmlFree(key_name);
xmlFreeDoc(doc);
return true;
}
const std::vector<std::string>& RemoteFilesList::getFilesList() const
{
return filesPathsList;
}
const std::vector<FileMetaData>& RemoteFilesList::getFilesMetadataList() const
{
return files.get();
}
SerializeToLocalAndRemoteSyncBase::SerializeToLocalAndRemoteSyncBase(
std::chrono::minutes interval,
std::chrono::seconds waitForSync,
const std::string& filePath,
const std::string& remotePath,
const std::string& assetId,
const std::string& owner)
:
SerializeToFileBase(filePath),
m_remotePath(remotePath),
m_interval(0),
m_owner(owner),
m_pMainLoop(nullptr),
m_waitForSync(waitForSync),
m_workerRoutineId(0),
m_daysCount(0),
m_windowsCount(0),
m_intervalsCounter(0),
m_remoteSyncEnabled(true),
m_assetId(assetId),
m_shared_storage_host(genError("not set")),
m_learning_host(genError("not set"))
{
dbgInfo(D_WAAP_CONFIDENCE_CALCULATOR) << "Create SerializeToLocalAndRemoteSyncBase. assetId='" << assetId <<
"', owner='" << m_owner << "'";
if (Singleton::exists<I_AgentDetails>() &&
Singleton::Consume<I_AgentDetails>::by<WaapComponent>()->getOrchestrationMode() ==
OrchestrationMode::HYBRID) {
char* sharedStorageHost = getenv(SHARED_STORAGE_HOST_ENV_NAME);
if (sharedStorageHost != NULL) {
m_shared_storage_host = string(sharedStorageHost);
} else {
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) <<
"shared storage host name(" <<
SHARED_STORAGE_HOST_ENV_NAME <<
") is not set";
}
char* learningHost = getenv(LEARNING_HOST_ENV_NAME);
if (learningHost != NULL) {
m_learning_host = string(learningHost);
} else {
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) <<
"learning host name(" <<
SHARED_STORAGE_HOST_ENV_NAME <<
") is not set";
}
}
if (remotePath != "") {
// remote path is /<tenantId>/<assetId>/<type>
auto parts = split(remotePath, '/');
if (parts.size() > 2) {
size_t offset = 0;
if (parts[0].empty()) {
offset = 1;
}
std::string type = "";
for (size_t i = offset + 2; i < parts.size(); i++)
{
type += type.empty() ? parts[i] : "/" + parts[i];
}
m_type = type;
}
}
m_pMainLoop = Singleton::Consume<I_MainLoop>::by<WaapComponent>();
setInterval(interval);
}
bool SerializeToLocalAndRemoteSyncBase::isBase()
{
return m_remotePath == "";
}
std::string SerializeToLocalAndRemoteSyncBase::getUri()
{
static const string hybridModeUri = "/api";
static const string onlineModeUri = "/storage/waap";
if (Singleton::exists<I_AgentDetails>() &&
Singleton::Consume<I_AgentDetails>::by<WaapComponent>()->getOrchestrationMode() ==
OrchestrationMode::HYBRID) return hybridModeUri;
return onlineModeUri;
}
size_t SerializeToLocalAndRemoteSyncBase::getIntervalsCount()
{
return m_intervalsCounter;
}
SerializeToLocalAndRemoteSyncBase::~SerializeToLocalAndRemoteSyncBase()
{
}
std::string SerializeToLocalAndRemoteSyncBase::getWindowId()
{
return "window_" + std::to_string(m_daysCount) + "_" + std::to_string(m_windowsCount);
}
std::string SerializeToLocalAndRemoteSyncBase::getPostDataUrl()
{
std::string agentId = Singleton::Consume<I_AgentDetails>::by<WaapComponent>()->getAgentId();
if (Singleton::exists<I_InstanceAwareness>())
{
I_InstanceAwareness* instance = Singleton::Consume<I_InstanceAwareness>::by<WaapComponent>();
Maybe<std::string> uniqueId = instance->getUniqueID();
if (uniqueId.ok())
{
agentId += "/" + uniqueId.unpack();
}
}
std::string windowId = getWindowId();
return getUri() + "/" + m_remotePath + "/" + windowId + "/" + agentId + "/data.data";
}
void SerializeToLocalAndRemoteSyncBase::setRemoteSyncEnabled(bool enabled)
{
m_remoteSyncEnabled = enabled;
}
void SerializeToLocalAndRemoteSyncBase::setInterval(std::chrono::seconds newInterval)
{
dbgDebug(D_WAAP_CONFIDENCE_CALCULATOR) << "setInterval: from " << m_interval.count() << " to " <<
newInterval.count() << " seconds. assetId='" << m_assetId << "', owner='" << m_owner << "'";
if (newInterval == m_interval)
{
return;
}
m_interval = newInterval;
if (m_workerRoutineId != 0)
{
return;
}
I_MainLoop::Routine syncRoutineOnLoad = [this]() {
I_TimeGet* timer = Singleton::Consume<I_TimeGet>::by<WaapComponent>();
ch::microseconds timeBeforeSyncWorker = timer->getWalltime();
ch::microseconds timeAfterSyncWorker = timeBeforeSyncWorker;
while (true)
{
m_daysCount = ch::duration_cast<days>(timeBeforeSyncWorker).count();
ch::microseconds timeSinceMidnight = timeBeforeSyncWorker - ch::duration_cast<days>(timeBeforeSyncWorker);
m_windowsCount = timeSinceMidnight / m_interval;
// Distribute syncWorker tasks for different assets spread over assetSyncTimeSliceLengthintervals
// It is guaranteed that for the same asset, sync events will start at the same time on all
// http_transaction_host instances.
size_t slicesCount = m_interval / assetSyncTimeSliceLength;
size_t sliceIndex = 0;
if (slicesCount != 0 && m_assetId != "") {
sliceIndex = std::hash<std::string>{}(m_assetId) % slicesCount;
}
ch::seconds sliceOffset = assetSyncTimeSliceLength * sliceIndex;
ch::microseconds remainingTime = m_interval - (timeAfterSyncWorker - timeBeforeSyncWorker) -
timeBeforeSyncWorker % m_interval + sliceOffset;
if (remainingTime > m_interval) {
// on load between trigger and offset remaining time is larger than the interval itself
remainingTime -= m_interval;
dbgDebug(D_WAAP_CONFIDENCE_CALCULATOR) << "adjusting remaining time: " << remainingTime.count();
if (timeBeforeSyncWorker.count() != 0)
{
auto updateTime = timeBeforeSyncWorker - m_interval;
m_daysCount = ch::duration_cast<days>(updateTime).count();
ch::microseconds timeSinceMidnight = updateTime - ch::duration_cast<days>(updateTime);
m_windowsCount = timeSinceMidnight / m_interval;
}
}
if (remainingTime < ch::seconds(0)) {
// syncWorker execution time was so large the remaining time became negative
remainingTime = ch::seconds(0);
dbgError(D_WAAP_CONFIDENCE_CALCULATOR) << "syncWorker execution time (owner='" << m_owner <<
"', assetId='" << m_assetId << "') is " <<
ch::duration_cast<ch::seconds>(timeAfterSyncWorker - timeBeforeSyncWorker).count() <<
" seconds, too long to cause negative remainingTime. Waiting 0 seconds...";
}
dbgDebug(D_WAAP_CONFIDENCE_CALCULATOR) << "current time: " << timeBeforeSyncWorker.count() << " \u00b5s" <<
": assetId='" << m_assetId << "'" <<
", owner='" << m_owner << "'" <<
", daysCount=" << m_daysCount <<
", windowsCount=" << m_windowsCount <<
", interval=" << m_interval.count() << " seconds"
", seconds till next window=" << ch::duration_cast<ch::seconds>(remainingTime - sliceOffset).count() <<
", sliceOffset=" << sliceOffset.count() << " seconds" <<
", hashIndex=" << sliceIndex <<
": next wakeup in " << ch::duration_cast<ch::seconds>(remainingTime).count() << " seconds";
m_pMainLoop->yield(remainingTime);
timeBeforeSyncWorker = timer->getWalltime();
syncWorker();
timeAfterSyncWorker = timer->getWalltime();
}
};
m_workerRoutineId = m_pMainLoop->addOneTimeRoutine(
I_MainLoop::RoutineType::System,
syncRoutineOnLoad,
"Sync worker learning on load"
);
}
bool SerializeToLocalAndRemoteSyncBase::localSyncAndProcess()
{
RemoteFilesList rawDataFiles;
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Getting files of all agents";
bool isSuccessful = sendObjectWithRetry(rawDataFiles,
I_Messaging::Method::GET,
getUri() + "/?list-type=2&prefix=" + m_remotePath + "/" + getWindowId() + "/");
if (!isSuccessful)
{
dbgError(D_WAAP_CONFIDENCE_CALCULATOR) << "Failed to get the list of files";
return false;
}
pullData(rawDataFiles.getFilesList());
processData();
saveData();
postProcessedData();
return true;
}
std::chrono::seconds SerializeToLocalAndRemoteSyncBase::getIntervalDuration() const
{
return m_interval;
}
void SerializeToLocalAndRemoteSyncBase::updateStateFromRemoteService()
{
for (int i = 0; i < remoteSyncMaxPollingAttempts; i++)
{
m_pMainLoop->yield(std::chrono::seconds(60));
RemoteFilesList remoteFiles = getRemoteProcessedFilesList();
if (remoteFiles.getFilesMetadataList().empty())
{
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "no files generated by the remote service were found";
continue;
}
std::string lastModified = remoteFiles.getFilesMetadataList().begin()->modified;
if (lastModified != m_lastProcessedModified)
{
m_lastProcessedModified = lastModified;
updateState(remoteFiles.getFilesList());
dbgInfo(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner <<
". updated state generated by remote at " << m_lastProcessedModified;
return;
}
}
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "polling for update state timeout. for assetId='"
<< m_assetId << "', owner='" << m_owner;
localSyncAndProcess();
}
void SerializeToLocalAndRemoteSyncBase::syncWorker()
{
dbgInfo(D_WAAP_CONFIDENCE_CALCULATOR) << "Running the sync worker for assetId='" << m_assetId << "', owner='" <<
m_owner << "'" << " last modified state: " << m_lastProcessedModified;
m_intervalsCounter++;
OrchestrationMode mode = Singleton::exists<I_AgentDetails>() ?
Singleton::Consume<I_AgentDetails>::by<WaapComponent>()->getOrchestrationMode() : OrchestrationMode::ONLINE;
if (!m_remoteSyncEnabled || isBase() || !postData() ||
mode == OrchestrationMode::OFFLINE)
{
dbgDebug(D_WAAP_CONFIDENCE_CALCULATOR)
<< "Did not synchronize the data. Remote URL: "
<< m_remotePath
<< " is enabled: "
<< std::to_string(m_remoteSyncEnabled);
processData();
saveData();
return;
}
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Waiting for all agents to post their data";
m_pMainLoop->yield(m_waitForSync);
// check if learning service is operational
if (m_lastProcessedModified == "")
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "check if remote service is operational";
RemoteFilesList remoteFiles = getRemoteProcessedFilesList();
if (!remoteFiles.getFilesMetadataList().empty())
{
m_lastProcessedModified = remoteFiles.getFilesMetadataList()[0].modified;
dbgInfo(D_WAAP_CONFIDENCE_CALCULATOR) << "First sync by remote service: " << m_lastProcessedModified;
}
}
// check if learning service is enabled
bool isRemoteServiceEnabled = getProfileAgentSettingWithDefault<bool>(
true,
"appsecLearningSettings.remoteServiceEnabled");
dbgDebug(D_WAAP_CONFIDENCE_CALCULATOR) << "using remote service: " << isRemoteServiceEnabled;
if ((m_lastProcessedModified == "" || !isRemoteServiceEnabled) && !localSyncAndProcess())
{
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "local sync and process failed";
return;
}
if (mode == OrchestrationMode::HYBRID) {
dbgDebug(D_WAAP_CONFIDENCE_CALCULATOR) << "detected running in standalone mode";
I_AgentDetails *agentDetails = Singleton::Consume<I_AgentDetails>::by<WaapComponent>();
I_Messaging *messaging = Singleton::Consume<I_Messaging>::by<WaapComponent>();
SyncLearningObject syncObj(m_assetId, m_type, getWindowId());
Flags<MessageConnConfig> conn_flags;
conn_flags.setFlag(MessageConnConfig::EXTERNAL);
std::string tenant_header = "X-Tenant-Id: " + agentDetails->getTenantId();
bool ok = messaging->sendNoReplyObject(syncObj,
I_Messaging::Method::POST,
getLearningHost(),
80,
conn_flags,
"/api/sync",
tenant_header);
dbgDebug(D_WAAP_CONFIDENCE_CALCULATOR) << "sent learning sync notification ok: " << ok;
if (!ok) {
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "failed to send learning notification";
}
} else {
SyncLearningNotificationObject syncNotification(m_assetId, m_type, getWindowId());
dbgDebug(D_WAAP_CONFIDENCE_CALCULATOR) << "sending sync notification: " << syncNotification;
ReportMessaging(
"sync notification for '" + m_assetId + "'",
ReportIS::AudienceTeam::WAAP,
syncNotification,
false,
MessageTypeTag::WAAP_LEARNING,
ReportIS::Tags::WAF,
ReportIS::Notification::SYNC_LEARNING
);
}
if (m_lastProcessedModified != "" && isRemoteServiceEnabled)
{
updateStateFromRemoteService();
}
}
void SerializeToLocalAndRemoteSyncBase::restore()
{
SerializeToFileBase::restore();
if (!isBase())
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "merge state from remote service";
mergeProcessedFromRemote();
}
}
RemoteFilesList SerializeToLocalAndRemoteSyncBase::getRemoteProcessedFilesList()
{
RemoteFilesList remoteFiles;
bool isRemoteServiceEnabled = getProfileAgentSettingWithDefault<bool>(
true,
"appsecLearningSettings.remoteServiceEnabled");
if (!isRemoteServiceEnabled)
{
dbgDebug(D_WAAP_CONFIDENCE_CALCULATOR) << "remote service is disabled";
return remoteFiles;
}
bool isSuccessful = sendObject(
remoteFiles,
I_Messaging::Method::GET,
getUri() + "/?list-type=2&prefix=" + m_remotePath + "/remote");
if (!isSuccessful)
{
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Failed to get the list of files";
}
return remoteFiles;
}
RemoteFilesList SerializeToLocalAndRemoteSyncBase::getProcessedFilesList()
{
RemoteFilesList processedFilesList = getRemoteProcessedFilesList();
if (!processedFilesList.getFilesList().empty())
{
const std::vector<FileMetaData>& filesMD = processedFilesList.getFilesMetadataList();
if (filesMD.size() > 1) {
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "got more than 1 expected processed file";
}
if (!filesMD.empty()) {
m_lastProcessedModified = filesMD[0].modified;
}
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "found " << filesMD.size() << " remote service state files. "
"last modified: " << m_lastProcessedModified;
return processedFilesList;
}
bool isSuccessful = sendObject(
processedFilesList,
I_Messaging::Method::GET,
getUri() + "/?list-type=2&prefix=" + m_remotePath + "/processed");
if (!isSuccessful)
{
dbgDebug(D_WAAP_CONFIDENCE_CALCULATOR) << "Failed to get the list of files";
}
else if (!processedFilesList.getFilesList().empty())
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "found state files";
return processedFilesList;
}
// backward compatibility - try to get backup file with the buggy prefix tenantID/assetID/instanceID/
std::string bcRemotePath = m_remotePath;
size_t pos = bcRemotePath.find('/');
pos = bcRemotePath.find('/', pos + 1);
if (!Singleton::exists<I_InstanceAwareness>())
{
dbgDebug(D_WAAP_CONFIDENCE_CALCULATOR) << "missing instance of instance awareness,"
" can't check backward compatibility";
return processedFilesList;
}
I_InstanceAwareness* instanceAwareness = Singleton::Consume<I_InstanceAwareness>::by<WaapComponent>();
Maybe<std::string> id = instanceAwareness->getUniqueID();
if (!id.ok())
{
dbgDebug(D_WAAP_CONFIDENCE_CALCULATOR) << "failed to get instance id err: " << id.getErr() <<
". can't check backward compatibility";
return processedFilesList;
}
std::string idStr = id.unpack();
bcRemotePath.insert(pos + 1, idStr + "/");
dbgDebug(D_WAAP_CONFIDENCE_CALCULATOR) << "List of files is empty - trying to get the file from " <<
bcRemotePath;
isSuccessful = sendObject(
processedFilesList,
I_Messaging::Method::GET,
getUri() + "/?list-type=2&prefix=" + bcRemotePath + "/processed");
if (!isSuccessful)
{
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Failed to get the list of files";
}
dbgDebug(D_WAAP_CONFIDENCE_CALCULATOR) << "backwards computability: got "
<< processedFilesList.getFilesList().size() << " state files";
return processedFilesList;
}
void SerializeToLocalAndRemoteSyncBase::mergeProcessedFromRemote()
{
dbgDebug(D_WAAP_CONFIDENCE_CALCULATOR) << "Merging processed data from remote. assetId='" << m_assetId <<
"', owner='" << m_owner << "'";
m_pMainLoop->addOneTimeRoutine(
I_MainLoop::RoutineType::Offline,
[&]()
{
RemoteFilesList processedFiles = getProcessedFilesList();
pullProcessedData(processedFiles.getFilesList());
},
"Merge processed data from remote for asset Id: " + m_assetId + ", owner:" + m_owner
);
}
string
SerializeToLocalAndRemoteSyncBase::getLearningHost()
{
if (m_learning_host.ok()) {
return *m_learning_host;
} else {
char* learningHost = getenv(LEARNING_HOST_ENV_NAME);
if (learningHost != NULL) {
m_learning_host = string(learningHost);
return learningHost;
}
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "learning host is not set. using default";
}
return defaultLearningHost;
}
string
SerializeToLocalAndRemoteSyncBase::getSharedStorageHost()
{
if (m_shared_storage_host.ok()) {
return *m_shared_storage_host;
} else {
char* sharedStorageHost = getenv(SHARED_STORAGE_HOST_ENV_NAME);
if (sharedStorageHost != NULL) {
m_shared_storage_host = string(sharedStorageHost);
return sharedStorageHost;
}
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "shared storage host is not set. using default";
}
return defaultSharedStorageHost;
}

View File

@@ -0,0 +1,278 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "Signatures.h"
#include "i_encryptor.h"
#include "waap.h"
#include <fstream>
USE_DEBUG_FLAG(D_WAAP);
typedef picojson::value::object JsObj;
typedef picojson::value JsVal;
typedef picojson::value::array JsArr;
typedef std::map<std::string, std::vector<std::string>> filtered_parameters_t;
static std::vector<std::string> to_strvec(const picojson::value::array& jsV)
{
std::vector<std::string> r;
for (auto it = jsV.begin(); it != jsV.end(); ++it) {
r.push_back(it->get<std::string>());
}
return r;
}
static std::set<std::string> to_strset(const picojson::value::array& jsA)
{
std::set<std::string> r;
for (auto it = jsA.begin(); it != jsA.end(); ++it) {
r.insert(it->get<std::string>());
}
return r;
}
static std::map<std::string, Regex*> to_regexmap(const picojson::value::object& jsO, bool& error)
{
std::map<std::string, Regex*> r;
for (auto it = jsO.begin(); it != jsO.end(); ++it) {
const std::string& n = it->first;
// convert name to lowercase now (so we don't need to do it at runtime every time).
std::string n_lower;
for (std::string::const_iterator pCh = n.begin(); pCh != n.end(); ++pCh) {
n_lower += std::tolower(*pCh);
}
const picojson::value& v = it->second;
if (error) {
// stop loading regexes if there's previous error...
break;
}
// Pointers to Regex instances are stored instead of instances themselves to avoid
// the need to make the Regex objects copyable.
// However, these pointers must be freed by the holder of the returned map!
// note: in our case this freeing is happening in the destructor of the WaapAssetState class.
r[n] = new Regex(v.get<std::string>(), error, n_lower);
}
return r;
}
static filtered_parameters_t to_filtermap(const picojson::value::object& JsObj)
{
filtered_parameters_t result;
for (auto it = JsObj.begin(); it != JsObj.end(); ++it)
{
const std::string parameter = it->first;
const picojson::value::array& arr = it->second.get<picojson::value::array>();
result[parameter] = to_strvec(arr);
}
return result;
}
std::string genDelimitedKeyValPattern(const std::string& delim)
{
std::string pattern = "^([^" + delim + "]+?=[^" + delim + "]+?" + delim + ")+"
"([^" + delim + "]+?=[^" + delim + "]+?)" + delim + "?$";
return pattern;
}
Signatures::Signatures(const std::string& filepath) :
sigsSource(loadSource(filepath)),
error(false),
m_regexPreconditions(std::make_shared<Waap::RegexPreconditions>(sigsSource, error)),
words_regex(
to_strvec(sigsSource["words_regex_list"].get<picojson::value::array>()),
error,
"words_regex_list",
m_regexPreconditions
),
specific_acuracy_keywords_regex(
to_strvec(sigsSource["specific_acuracy_keywords_regex_list"].get<picojson::value::array>()),
error,
"specific_acuracy_keywords_regex_list",
m_regexPreconditions
),
pattern_regex(
to_strvec(sigsSource["pattern_regex_list"].get<picojson::value::array>()),
error,
"pattern_regex_list",
m_regexPreconditions
),
un_escape_pattern(sigsSource["un_escape_pattern"].get<std::string>(), error, "un_escape_pattern"),
quotes_ev_pattern(sigsSource["quotes_ev_pattern"].get<std::string>(), error, "quotes_ev_pattern"),
comment_ev_pattern(sigsSource["comment_ev_pattern"].get<std::string>(), error, "comment_ev_pattern"),
quotes_space_ev_pattern(
sigsSource["quotes_space_ev_fast_reg"].get<std::string>(), error,
"quotes_space_ev_fast_reg"
),
allowed_text_re(sigsSource["allowed_text_re"].get<std::string>(), error, "allowed_text_re"),
pipe_split_re(
"([\\w\\=\\-\\_\\.\\,\\(\\)\\[\\]\\/\\%\\s]+?)\\||([\\w\\=\\-\\_\\.\\,\\(\\)\\[\\]\\/\\%\\s]+)|\\|()",
error,
"pipe_decode"),
semicolon_split_re("([\\w\\=\\-\\_\\.\\,\\(\\)\\%]+?);|([\\w\\=\\-\\_\\.\\,\\(\\)\\%]+)|;()", error, "sem_decode"),
longtext_re(sigsSource["longtext_re"].get<std::string>(), error, "longtext_re"),
nospaces_long_value_re("^[^\\s]{16,}$", error, "nospaces_long_value_re"),
good_header_name_re(sigsSource["good_header_name_re"].get<std::string>(), error, "good_header_name"),
good_header_value_re(sigsSource["good_header_value_re"].get<std::string>(), error, "good_header_value"),
ignored_for_nospace_long_value(
to_strset(sigsSource["ignored_for_nospace_long_value"].get<picojson::value::array>())),
global_ignored_keywords(
to_strset(
sigsSource["global_ignored"].get<picojson::value::object>()["keys"].get<picojson::value::array>()
)
),
global_ignored_patterns(
to_strset(
sigsSource["global_ignored"].get<picojson::value::object>()["patterns"].get<picojson::value::array>()
)
),
url_ignored_keywords(
to_strset(
sigsSource["ignored_for_url"].get<picojson::value::object>()["keys"].get<picojson::value::array>()
)
),
url_ignored_patterns(
to_strset(
sigsSource["ignored_for_url"].get<picojson::value::object>()["patterns"].get<picojson::value::array>()
)
),
url_ignored_re(
sigsSource["ignored_for_url"].get<picojson::value::object>()["regex"].get<std::string>(),
error,
"url_ignored"
),
header_ignored_keywords(
to_strset(
sigsSource["ignored_for_headers"].get<picojson::value::object>()["keys"].get<picojson::value::array>()
)
),
header_ignored_patterns(
to_strset(
sigsSource["ignored_for_headers"].get<picojson::value::object>()
["patterns"].get<picojson::value::array>()
)
),
header_ignored_re(
sigsSource["ignored_for_headers"].get<picojson::value::object>()["regex"].get<std::string>(),
error,
"header_ignored"
),
filter_parameters(
to_filtermap(
sigsSource["filter_parameters"].get<picojson::object>()
)
),
m_attack_types(
to_filtermap(
sigsSource["attack_types_map"].get<picojson::object>()
)
),
// Removed by Pavel's request. Leaving here in case he'll want to add this back...
#if 0
cookie_ignored_keywords(
to_strset(
sigsSource["ignored_for_cookies"].get<picojson::value::object>()["keys"].get<picojson::value::array>()
)
),
cookie_ignored_patterns(
to_strset(
sigsSource["ignored_for_cookies"].get<picojson::value::object>()
["patterns"].get<picojson::value::array>()
)
),
cookie_ignored_re(
sigsSource["ignored_for_cookies"].get<picojson::value::object>()["regex"].get<std::string>(),
error,
"cookie_ignored"
),
#endif
php_serialize_identifier("^(N;)|^([ibdsOoCcRra]:\\d+)", error, "php_serialize_identifier"),
html_regex("(<(?>body|head)\\b.*>(?>.|[\\r\\n]){0,400}){2}|<html", error, "htmlRegex"),
uri_parser_regex("(http|https)://([^/ :]+):?([^/ ]*)(/?[^ #?]*)", error, "uriParserRegex"),
confluence_macro_re("{[^\"]+:(?>.+\\|)+.+}"),
pipes_delimited_key_val_re(genDelimitedKeyValPattern("\\|")),
semicolon_delimited_key_val_re(genDelimitedKeyValPattern(";")),
asterisk_delimited_key_val_re(genDelimitedKeyValPattern("\\*")),
comma_delimited_key_val_re(genDelimitedKeyValPattern(",")),
ampersand_delimited_key_val_re(genDelimitedKeyValPattern("&")),
headers_re(to_regexmap(sigsSource["headers_re"].get<JsObj>(), error)),
format_magic_binary_re(sigsSource["format_magic_binary_re"].get<std::string>(), error, "format_magic_binary_re"),
params_type_re(to_regexmap(sigsSource["format_types_regex_list"].get<JsObj>(), error)),
resp_hdr_pattern_regex_list(to_strvec(sigsSource["resp_hdr_pattern_regex_list"].get<JsArr>()),
error, "resp_hdr_pattern_regex_list", nullptr),
resp_hdr_words_regex_list(to_strvec(sigsSource["resp_hdr_words_regex_list"].get<JsArr>()),
error, "resp_hdr_words_regex_list", nullptr),
resp_body_pattern_regex_list(to_strvec(sigsSource["resp_body_pattern_regex_list"].get<JsArr>()),
error, "resp_body_pattern_regex_list", nullptr),
resp_body_words_regex_list(to_strvec(sigsSource["resp_body_words_regex_list"].get<JsArr>()),
error, "resp_body_words_regex_list", nullptr),
remove_keywords_always(
to_strset(sigsSource["remove_keywords_always"].get<JsArr>())),
user_agent_prefix_re(sigsSource["user_agent_prefix_re"].get<std::string>()),
binary_data_kw_filter(sigsSource["binary_data_kw_filter"].get<std::string>()),
wbxml_data_kw_filter(sigsSource["wbxml_data_kw_filter"].get<std::string>())
{
}
Signatures::~Signatures()
{
}
bool Signatures::fail()
{
return error;
}
picojson::value::object Signatures::loadSource(const std::string& sigsFname)
{
picojson::value doc;
std::ifstream f(sigsFname.c_str());
if (f.fail()) {
dbgError(D_WAAP) << "Failed to open json data file '" << sigsFname << "'!";
error = true; // flag an error
return picojson::value::object();
}
int length;
f.seekg(0, std::ios::end); // go to the end
length = f.tellg(); // report location (this is the length)
char* buffer = new char[length]; // allocate memory for a buffer of appropriate dimension
f.seekg(0, std::ios::beg); // go back to the beginning
f.read(buffer, length); // read the whole file into the buffer
f.close();
std::string dataObfuscated(buffer, length);
delete[] buffer;
std::stringstream ss(dataObfuscated);
ss >> doc;
if (!picojson::get_last_error().empty()) {
dbgError(D_WAAP) << "WaapAssetState::loadSource('" << sigsFname << "') failed (parse error: '" <<
picojson::get_last_error() << "').";
error = true; // flag an error
return picojson::value::object();
}
return doc.get<picojson::value::object>();
}

View File

@@ -0,0 +1,93 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __SIGNATURES_H__
#define __SIGNATURES_H__
#include "Waf2Regex.h"
#include "picojson.h"
#include <boost/regex.hpp>
class Signatures {
private:
// json parsed sources (not really needed once data is loaded)
picojson::value::object sigsSource;
bool error;
public:
Signatures(const std::string& filepath);
~Signatures();
bool fail();
std::shared_ptr<Waap::RegexPreconditions> m_regexPreconditions;
// Regexes loaded from compiled signatures
const Regex words_regex;
const Regex specific_acuracy_keywords_regex;
const Regex pattern_regex;
const Regex un_escape_pattern;
const Regex quotes_ev_pattern;
const Regex comment_ev_pattern;
const Regex quotes_space_ev_pattern;
const Regex allowed_text_re;
const Regex pipe_split_re;
const Regex semicolon_split_re;
const Regex longtext_re;
const Regex nospaces_long_value_re;
const Regex good_header_name_re;
const Regex good_header_value_re;
const std::set<std::string> ignored_for_nospace_long_value;
const std::set<std::string> global_ignored_keywords;
const std::set<std::string> global_ignored_patterns;
const std::set<std::string> url_ignored_keywords;
const std::set<std::string> url_ignored_patterns;
const Regex url_ignored_re;
const std::set<std::string> header_ignored_keywords;
const std::set<std::string> header_ignored_patterns;
const Regex header_ignored_re;
const std::map<std::string, std::vector<std::string>> filter_parameters;
const std::map<std::string, std::vector<std::string>> m_attack_types;
const Regex php_serialize_identifier;
const Regex html_regex;
const Regex uri_parser_regex;
const boost::regex confluence_macro_re;
const boost::regex pipes_delimited_key_val_re;
const boost::regex semicolon_delimited_key_val_re;
const boost::regex asterisk_delimited_key_val_re;
const boost::regex comma_delimited_key_val_re;
const boost::regex ampersand_delimited_key_val_re;
#if 0 // Removed by Pavel's request. Leaving here in case he'll want to add this back...
const std::set<std::string> cookie_ignored_keywords;
const std::set<std::string> cookie_ignored_patterns;
const Regex cookie_ignored_re;
#endif
std::map<std::string, Regex*> headers_re;
const Regex format_magic_binary_re;
std::map<std::string, Regex*> params_type_re;
// Signatures for responses
const Regex resp_hdr_pattern_regex_list;
const Regex resp_hdr_words_regex_list;
const Regex resp_body_pattern_regex_list;
const Regex resp_body_words_regex_list;
const std::set<std::string> remove_keywords_always;
const boost::regex user_agent_prefix_re;
const boost::regex binary_data_kw_filter;
const boost::regex wbxml_data_kw_filter;
private:
picojson::value::object loadSource(const std::string& sigsFname);
};
#endif

View File

@@ -0,0 +1,53 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "SingleDecision.h"
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP);
SingleDecision::SingleDecision(DecisionType type):
m_type(type),
m_log(false),
m_block(false)
{}
SingleDecision::~SingleDecision()
{}
DecisionType SingleDecision::getType() const
{
return m_type;
}
bool SingleDecision::shouldLog() const
{
return m_log;
}
bool SingleDecision::shouldBlock() const
{
return m_block;
}
void SingleDecision::setLog(bool log)
{
dbgTrace(D_WAAP) << "Decision " << getTypeStr() << " changes should log from " << m_log << " to " << log;
m_log = log;
}
void SingleDecision::setBlock(bool block)
{
dbgTrace(D_WAAP) << "Decision " << getTypeStr() << " changes should block from " << m_block << " to " << block;
m_block = block;
}

View File

@@ -0,0 +1,39 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __SINGLE_DECISION_H__
#define __SINGLE_DECISION_H__
#include "DecisionType.h"
#include <string>
class SingleDecision
{
public:
explicit SingleDecision(DecisionType type);
virtual ~SingleDecision();
void setLog(bool log);
void setBlock(bool block);
DecisionType getType() const;
bool shouldLog() const;
bool shouldBlock() const;
virtual std::string getTypeStr() const = 0;
protected:
DecisionType m_type;
bool m_log;
bool m_block;
};
#endif

View File

@@ -0,0 +1,58 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "SyncLearningNotification.h"
SyncLearningNotificationObject::SyncLearningNotificationObject(const std::string& asset_id,
const std::string& type,
const std::string& window_id) :
m_asset_id(asset_id),
m_type(type),
m_window_id(window_id)
{
}
SyncLearningNotificationObject::~SyncLearningNotificationObject()
{
}
void SyncLearningNotificationObject::serialize(cereal::JSONOutputArchive& ar) const
{
ar.setNextName("notificationConsumerData");
ar.startNode();
ar.setNextName("syncLearnNotificationConsumers");
ar.startNode();
ar(cereal::make_nvp("assetId", m_asset_id));
ar(cereal::make_nvp("type", m_type));
ar(cereal::make_nvp("windowId", m_window_id));
ar.finishNode();
ar.finishNode();
}
std::string SyncLearningNotificationObject::toString() const
{
std::stringstream ss;
{
cereal::JSONOutputArchive ar(ss);
serialize(ar);
}
return ss.str();
}
std::ostream& operator<<(std::ostream& os, const SyncLearningNotificationObject& obj)
{
return os << obj.toString();
}

View File

@@ -0,0 +1,59 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __SYNC_LEARNING_NOTIFICATION_OBJECT_H__
#define __SYNC_LEARNING_NOTIFICATION_OBJECT_H__
#include <string>
#include <ostream>
#include "cereal/archives/json.hpp"
#include "report/report.h"
#include "rest.h"
class SyncLearningNotificationObject
{
public:
explicit SyncLearningNotificationObject(
const std::string& asset_id,
const std::string& type,
const std::string& window_id
);
~SyncLearningNotificationObject();
void serialize(cereal::JSONOutputArchive& ar) const;
friend std::ostream& operator<<(std::ostream& os, const SyncLearningNotificationObject& obj);
private:
std::string toString() const;
std::string m_asset_id;
std::string m_type;
std::string m_window_id;
};
class SyncLearningObject : public ClientRest
{
public:
SyncLearningObject(
const std::string& _asset_id,
const std::string& _type,
const std::string& _window_id
) : assetId(_asset_id), type(_type), windowId(_window_id) {}
private:
C2S_PARAM(std::string, assetId);
C2S_PARAM(std::string, type);
C2S_PARAM(std::string, windowId);
};
#endif

View File

@@ -0,0 +1,304 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "telemetry.h"
#include "waap.h"
#include "report/report.h"
#include "log_generator.h"
#include "generic_rulebase/triggers_config.h"
#include "config.h"
#include "maybe_res.h"
#include "LogGenWrapper.h"
#include <memory>
USE_DEBUG_FLAG(D_WAAP);
#define LOGGING_INTERVAL_IN_MINUTES 10
using namespace std;
void
WaapTelemetrics::initMetrics()
{
requests.report(0);
sources.report(0);
threat_info.report(0);
threat_low.report(0);
threat_medium.report(0);
threat_high.report(0);
api_blocked.report(0);
bot_blocked.report(0);
waf_blocked.report(0);
force_and_block_exceptions.report(0);
}
void
WaapTelemetrics::updateMetrics(const string &asset_id, const DecisionTelemetryData &data)
{
initMetrics();
requests.report(1);
if (sources_seen.find(data.source) == sources_seen.end()) {
if (sources.getCounter() == 0) sources_seen.clear();
sources_seen.insert(data.source);
sources.report(1);
}
if (data.blockType == WAF_BLOCK || data.blockType == NOT_BLOCKING)
{
switch (data.threat)
{
case NO_THREAT: {
break;
}
case THREAT_INFO: {
threat_info.report(1);
break;
}
case LOW_THREAT: {
threat_low.report(1);
break;
}
case MEDIUM_THREAT: {
threat_medium.report(1);
break;
}
case HIGH_THREAT: {
threat_high.report(1);
break;
}
default: {
dbgWarning(D_WAAP) << "Unexpected Enum value: " << data.threat;
break;
}
}
}
switch (data.blockType)
{
case API_BLOCK: {
api_blocked.report(1);
break;
}
case BOT_BLOCK: {
bot_blocked.report(1);
break;
}
case WAF_BLOCK: {
waf_blocked.report(1);
break;
}
case FORCE_BLOCK:
case FORCE_EXCEPTION: {
force_and_block_exceptions.report(1);
break;
}
case NOT_BLOCKING: {
break;
}
default: {
dbgWarning(D_WAAP) << "Unexpected Enum value: " << data.blockType;
break;
}
}
}
void
WaapAttackTypesMetrics::initMetrics()
{
sql_inj.report(0);
vulnerability_scan.report(0);
path_traversal.report(0);
ldap_inj.report(0);
evasion_techs.report(0);
remote_code_exec.report(0);
xml_extern_entity.report(0);
cross_site_scripting.report(0);
general.report(0);
}
void
WaapAttackTypesMetrics::updateMetrics(const string &asset_id, const DecisionTelemetryData &data)
{
if (data.blockType == FORCE_EXCEPTION) {
dbgInfo(D_WAAP) << "Data block type is FORCE_EXCEPTION, no update needed";
return;
}
if (!data.attackTypes.empty()) initMetrics();
for (const auto &attackType : data.attackTypes) {
if (attackType == "SQL Injection") sql_inj.report(1);
if (attackType == "Vulnerability Scanning") vulnerability_scan.report(1);
if (attackType == "Path Traversal") path_traversal.report(1);
if (attackType == "LDAP Injection") ldap_inj.report(1);
if (attackType == "Evasion Techniques") evasion_techs.report(1);
if (attackType == "Remote Code Execution") remote_code_exec.report(1);
if (attackType == "XML External Entity") xml_extern_entity.report(1);
if (attackType == "Cross Site Scripting") cross_site_scripting.report(1);
if (attackType == "General") general.report(1);
}
}
void
WaapMetricWrapper::upon(const WaapTelemetryEvent &event)
{
const string &asset_id = event.getAssetId();
const DecisionTelemetryData &data = event.getData();
dbgTrace(D_WAAP)
<< "Log the decision for telemetry. Asset ID: "
<< asset_id
<< ", Practice ID: "
<< data.practiceId
<< ", Source: "
<< data.source
<< ", Block type: "
<< data.blockType
<< ", Threat level: "
<< data.threat;
if (!telemetries.count(asset_id)) {
telemetries.emplace(asset_id, make_shared<WaapTelemetrics>());
telemetries[asset_id]->init(
"WAAP telemetry",
ReportIS::AudienceTeam::WAAP,
ReportIS::IssuingEngine::AGENT_CORE,
chrono::minutes(10),
true,
ReportIS::Audience::SECURITY
);
telemetries[asset_id]->registerContext<string>(
"pracitceType",
string("Threat Prevention"),
EnvKeyAttr::LogSection::SOURCE
);
telemetries[asset_id]->registerContext<string>(
"practiceSubType",
string("Web Application"),
EnvKeyAttr::LogSection::SOURCE
);
telemetries[asset_id]->registerContext<string>("assetId", asset_id, EnvKeyAttr::LogSection::SOURCE);
telemetries[asset_id]->registerContext<string>("assetName", data.assetName, EnvKeyAttr::LogSection::SOURCE);
telemetries[asset_id]->registerContext<string>("practiceId", data.practiceId, EnvKeyAttr::LogSection::SOURCE);
telemetries[asset_id]->registerContext<string>(
"practiceName",
data.practiceName,
EnvKeyAttr::LogSection::SOURCE
);
telemetries[asset_id]->registerListener();
}
if (!attack_types_telemetries.count(asset_id)) {
attack_types_telemetries.emplace(asset_id, make_shared<WaapAttackTypesMetrics>());
attack_types_telemetries[asset_id]->init(
"WAAP attack type telemetry",
ReportIS::AudienceTeam::WAAP,
ReportIS::IssuingEngine::AGENT_CORE,
chrono::minutes(10),
true,
ReportIS::Audience::SECURITY
);
attack_types_telemetries[asset_id]->registerContext<string>(
"pracitceType",
string("Threat Prevention"),
EnvKeyAttr::LogSection::SOURCE
);
attack_types_telemetries[asset_id]->registerContext<string>(
"practiceSubType",
string("Web Application"),
EnvKeyAttr::LogSection::SOURCE
);
attack_types_telemetries[asset_id]->registerContext<string>(
"assetId",
asset_id,
EnvKeyAttr::LogSection::SOURCE
);
attack_types_telemetries[asset_id]->registerContext<string>(
"assetName",
data.assetName,
EnvKeyAttr::LogSection::SOURCE
);
attack_types_telemetries[asset_id]->registerContext<string>(
"practiceId",
data.practiceId,
EnvKeyAttr::LogSection::SOURCE
);
attack_types_telemetries[asset_id]->registerContext<string>(
"practiceName",
data.practiceName,
EnvKeyAttr::LogSection::SOURCE
);
attack_types_telemetries[asset_id]->registerListener();
}
telemetries[asset_id]->updateMetrics(asset_id, data);
attack_types_telemetries[asset_id]->updateMetrics(asset_id, data);
auto agent_mode = Singleton::Consume<I_AgentDetails>::by<WaapMetricWrapper>()->getOrchestrationMode();
string tenant_id = Singleton::Consume<I_AgentDetails>::by<WaapMetricWrapper>()->getTenantId();
if (agent_mode == OrchestrationMode::HYBRID || tenant_id.rfind("org_", 0) == 0) {
if (!metrics.count(asset_id)) {
metrics.emplace(asset_id, make_shared<WaapTelemetrics>());
metrics[asset_id]->init(
"Waap Metrics",
ReportIS::AudienceTeam::WAAP,
ReportIS::IssuingEngine::AGENT_CORE,
chrono::minutes(10),
true,
ReportIS::Audience::INTERNAL
);
metrics[asset_id]->registerListener();
}
if (!attack_types.count(asset_id)) {
attack_types.emplace(asset_id, make_shared<WaapAttackTypesMetrics>());
attack_types[asset_id]->init(
"WAAP Attack Type Metrics",
ReportIS::AudienceTeam::WAAP,
ReportIS::IssuingEngine::AGENT_CORE,
chrono::minutes(10),
true,
ReportIS::Audience::INTERNAL
);
attack_types[asset_id]->registerListener();
}
metrics[asset_id]->updateMetrics(asset_id, data);
attack_types[asset_id]->updateMetrics(asset_id, data);
}
}
void
AssetsMetric::upon(const AssetCountEvent &event)
{
int assets_count = event.getAssetCount();
switch (event.getAssetType()) {
case AssetType::API: {
api_assets.report(assets_count);
break;
}
case AssetType::WEB: {
web_assets.report(assets_count);
break;
}
case AssetType::ALL: {
all_assets.report(assets_count);
break;
}
default: {
dbgWarning(D_WAAP) << "Invalid Asset Type was reported";
}
}
}

View File

@@ -0,0 +1,217 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <boost/regex.hpp>
#include "TrustedSources.h"
#include "Waf2Util.h"
#include "CidrMatch.h"
#include "agent_core_utilities.h"
using namespace Waap::TrustedSources;
TrustedSourcesParameter::TrustedSourcesParameter() : m_identifiers()
{
}
bool TrustedSourcesParameter::isSourceTrusted(std::string source, TrustedSourceType srcType)
{
if (m_identifiers.empty())
{
return false;
}
if (source.empty())
{
return false;
}
switch (srcType)
{
case SOURCE_IP:
case X_FORWARDED_FOR:
return m_identifiers[0].isCidrMatch(source, srcType);
case COOKIE_OAUTH2_PROXY:
return m_identifiers[0].isRegexMatch(source, COOKIE_OAUTH2_PROXY);
case SM_USER:
return m_identifiers[0].isRegexMatch(source, SM_USER);
case UNKNOWN:
break;
default:
break;
}
return false;
}
size_t TrustedSourcesParameter::getNumOfSources()
{
if (m_identifiers.empty())
{
return (size_t)(-1);
}
return m_identifiers[0].getNumOfSources();
}
std::set<Waap::TrustedSources::TrustedSourceType> Waap::TrustedSources::TrustedSourcesParameter::getTrustedTypes()
{
if (m_identifiers.empty())
{
return std::set<TrustedSourceType>();
}
return m_identifiers[0].getTrustedTypes();
}
bool SourcesIdentifers::isCidrMatch(const std::string &source, const TrustedSourceType &trustedSourceType) const
{
auto found = m_identifiersMap.find(trustedSourceType);
if (found == m_identifiersMap.end())
{
return false;
}
const std::vector<std::string>& cidrs = found->second;
for (auto cidr : cidrs)
{
if (Waap::Util::cidrMatch(source, cidr))
{
dbgTrace(D_WAAP) << "source: " << source << " is trusted for type: " << trustedSourceType <<
", cidr: " << cidr;
return true;
}
}
return false;
}
bool SourcesIdentifers::isRegexMatch(const std::string &source, const TrustedSourceType& type) const
{
auto found = m_identifiersMap.find(type);
if (found == m_identifiersMap.end())
{
return false;
}
const std::vector<std::string>& regexes = found->second;
for (auto regex : regexes)
{
boost::regex expr{ regex };
boost::smatch matches;
if (NGEN::Regex::regexSearch(__FILE__, __LINE__, source, matches, expr))
{
dbgTrace(D_WAAP) << "source: " << source << " is trusted for type: " << type <<
", expr: " << regex;
return true;
}
}
return false;
}
size_t SourcesIdentifers::getNumOfSources() const
{
return m_minSources;
}
const std::set<TrustedSourceType>& SourcesIdentifers::getTrustedTypes()
{
return m_trustedTypes;
}
bool SourcesIdentifers::operator!=(const SourcesIdentifers& other) const
{
if (m_identifiersMap.size() != other.m_identifiersMap.size())
{
return true;
}
if (m_minSources != other.m_minSources)
{
return true;
}
for (auto identifier : m_identifiersMap)
{
if (other.m_identifiersMap.find(identifier.first) == other.m_identifiersMap.end())
{
return true;
}
TrustedSourceType currType = identifier.first;
const std::vector<std::string>& values = identifier.second;
std::vector<std::string> otherValues = other.m_identifiersMap.at(currType);
if (values.size() != otherValues.size())
{
return true;
}
for (size_t i = 0; i < values.size(); i++)
{
if (values[i] != otherValues[i])
{
return true;
}
}
}
return false;
}
Identifer::Identifer() : identitySource(UNKNOWN), value()
{
}
TrustedSourceType Identifer::convertSourceIdentifierToEnum(std::string identifierType)
{
static const std::string SourceIp = "Source IP";
static const std::string cookie = "Cookie:_oauth2_proxy";
static const std::string smUser = "Header:sm_user";
static const std::string forwrded = "X-Forwarded-For";
if (memcaseinsensitivecmp(identifierType.c_str(), identifierType.size(), SourceIp.c_str(), SourceIp.size()))
{
return SOURCE_IP;
}
if (memcaseinsensitivecmp(identifierType.c_str(), identifierType.size(), cookie.c_str(), cookie.size()))
{
return COOKIE_OAUTH2_PROXY;
}
if (memcaseinsensitivecmp(identifierType.c_str(), identifierType.size(), forwrded.c_str(), forwrded.size()))
{
return X_FORWARDED_FOR;
}
if (memcaseinsensitivecmp(identifierType.c_str(), identifierType.size(), smUser.c_str(), smUser.size()))
{
return SM_USER;
}
dbgTrace(D_WAAP) << identifierType << " is not a recognized identifier type";
return UNKNOWN;
}
bool TrustedSourcesParameter::operator==(const TrustedSourcesParameter &other) const
{
return !(*this != other);
}
bool TrustedSourcesParameter::operator!=(const TrustedSourcesParameter& other) const
{
if (m_identifiers.size() != other.m_identifiers.size())
{
return true;
}
for (size_t i = 0; i < m_identifiers.size(); i++)
{
if (m_identifiers[i] != other.m_identifiers[i])
{
return true;
}
}
return false;
}

Some files were not shown because too many files have changed in this diff Show More