First release of open-appsec source code

This commit is contained in:
roybarda
2022-10-26 19:33:19 +03:00
parent 3883109caf
commit a883352f79
1353 changed files with 276290 additions and 1 deletions

View File

@@ -0,0 +1,13 @@
add_library(waap
waap_component.cc
waap_component_impl.cc
first_request_object.cc
)
add_subdirectory(waap_clib)
add_subdirectory(reputation)
include_directories(include)
include_directories(reputation)
install(DIRECTORY resources DESTINATION http_transaction_handler_service USE_SOURCE_PERMISSIONS)

View File

@@ -0,0 +1,57 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "first_request_object.h"
#include "tag_and_enum_management.h"
FirstRequestNotificationObject::FirstRequestNotificationObject(
std::string asset_id,
std::string asset_name,
ReportIS::Severity severity
):
m_asset_id(asset_id),
m_asset_name(asset_name),
m_severity(severity)
{}
FirstRequestNotificationObject::~FirstRequestNotificationObject()
{}
void FirstRequestNotificationObject::serialize(cereal::JSONOutputArchive& ar) const
{
ar.setNextName("notificationConsumerData");
ar.startNode();
ar.setNextName("firstRequestNotificationConsumers");
ar.startNode();
ar(cereal::make_nvp("assetId", m_asset_id));
ar(cereal::make_nvp("assetName", m_asset_name));
ar(cereal::make_nvp("originalEventSeverity", TagAndEnumManagement::convertToString(m_severity)));
ar.finishNode();
ar.finishNode();
}
std::string FirstRequestNotificationObject::toString() const
{
std::stringstream ss;
{
cereal::JSONOutputArchive ar(ss);
serialize(ar);
}
return ss.str();
}
std::ostream& operator<<(std::ostream& os, const FirstRequestNotificationObject& obj)
{
return os << obj.toString();
}

View File

@@ -0,0 +1,42 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __FIRST_REQUEST_NOTIFICATION_OBJECT_H__
#define __FIRST_REQUEST_NOTIFICATION_OBJECT_H__
#include <string>
#include <ostream>
#include "cereal/archives/json.hpp"
#include "report/report.h"
class FirstRequestNotificationObject
{
public:
explicit FirstRequestNotificationObject(
std::string asset_id,
std::string asset_name,
ReportIS::Severity severity
);
virtual ~FirstRequestNotificationObject();
void serialize(cereal::JSONOutputArchive& ar) const;
friend std::ostream& operator<<(std::ostream& os, const FirstRequestNotificationObject& obj);
private:
std::string toString() const;
std::string m_asset_id;
std::string m_asset_name;
ReportIS::Severity m_severity;
};
#endif

View File

@@ -0,0 +1,30 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#define BACKUP_DIRECTORY_PATH "/etc/cp/conf/waap/"
// reduce from 2048 in order to accomodate in 10K max log size in Kibana
#define MAX_LOG_FIELD_SIZE 1536
// maximum bytes response body log field size can reduce from request body log
#define MIN_RESP_BODY_LOG_FIELD_SIZE (std::size_t{500})
// size of clean values LRU cache
#define SIGS_APPLY_CLEAN_CACHE_CAPACITY 4096
// size of suspicious values LRU cache
#define SIGS_APPLY_SUSPICIOUS_CACHE_CAPACITY 4096
// size of SampleType cache capacity
#define SIGS_SAMPLE_TYPE_CACHE_CAPACITY 4096
// ScoreBuilder pool names
#define KEYWORDS_SCORE_POOL_BASE "base_scores"
#define KEYWORDS_SCORE_POOL_HEADERS "headers_scores"

View File

@@ -0,0 +1,24 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
class IWaf2Transaction;
class IWaapConfig;
struct AnalysisResult;
class I_DeepAnalyzer {
public:
virtual AnalysisResult analyzeData(IWaf2Transaction* waf2Trans, const IWaapConfig* pSitePolicy) = 0;
virtual ~I_DeepAnalyzer() {};
};

View File

@@ -0,0 +1,26 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __I_IGNORE_SOURCES_H__
#define __I_IGNORE_SOURCES_H__
#include <vector>
#include <string>
class I_IgnoreSources
{
public:
virtual std::vector<std::string>* getSourcesToIgnore() = 0;
virtual bool ready() = 0;
};
#endif

View File

@@ -0,0 +1,39 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "../waap_clib/WaapKeywords.h"
#include "i_serialize.h"
#include <unordered_set>
#include <vector>
class IWaf2Transaction;
class I_IndicatorsFilter{
public:
virtual ~I_IndicatorsFilter() { }
// filters indicators from keywords vector
virtual void filterKeywords(
const std::string &key,
Waap::Keywords::KeywordsSet& keywords,
Waap::Keywords::KeywordsVec& filteredKeywords) = 0;
// register keyword for a specific key
virtual void registerKeywords(const std::string& key, Waap::Keywords::KeywordsSet& keyword,
IWaf2Transaction* pTransaction) = 0;
// returns true if the keyword based on the key should be filtered out
virtual bool shouldFilterKeyword(const std::string &key, const std::string &keyword) const = 0;
};

View File

@@ -0,0 +1,281 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <chrono>
#include <fstream>
#include "i_time_get.h"
#include "i_encryptor.h"
#include "rest.h"
#include "i_messaging.h"
#include "i_mainloop.h"
#include "i_agent_details.h"
static const uint max_send_obj_retries = 3;
USE_DEBUG_FLAG(D_WAAP);
class RestGetFile : public ClientRest
{
public:
// decrypts and load json
bool loadJson(const std::string& json);
// gen json and encrypt
Maybe<std::string> genJson() const;
};
struct FileMetaData
{
std::string filename;
std::string modified;
};
class RemoteFilesList : public ClientRest
{
public:
RemoteFilesList();
// parses xml instead of json
// extracts a file list in <Contents><Key>
bool loadJson(const std::string& xml);
const std::vector<FileMetaData>& getFilesMetadataList() const;
const std::vector<std::string>& getFilesList() const;
private:
RestParam<std::vector<FileMetaData>> files;
std::vector<std::string> filesPathsList;
};
class I_Serializable {
public:
virtual void serialize(std::ostream& stream) = 0;
virtual void deserialize(std::istream& stream) = 0;
};
class I_RemoteSyncSerialize {
public:
virtual bool postData() = 0;
virtual void pullData(const std::vector<std::string>& files) = 0;
virtual void processData() = 0;
virtual void postProcessedData() = 0;
virtual void pullProcessedData(const std::vector<std::string>& files) = 0;
virtual void updateState(const std::vector<std::string>& files) = 0;
};
class I_Backup {
public:
// open stream and serialize data
virtual void saveData() = 0;
// open stream and deserialize data
virtual void restore() = 0;
};
class SerializeToFileBase :
public I_Backup,
public I_Serializable
{
public:
SerializeToFileBase(std::string filePath);
virtual ~SerializeToFileBase();
virtual void saveData();
virtual void restore();
virtual void setFilePath(const std::string &new_file_path);
protected:
// saved file name for testing
std::string m_filePath;
private:
void loadFromFile(std::string filePath);
};
class SerializeToFilePeriodically : public SerializeToFileBase
{
public:
SerializeToFilePeriodically(std::chrono::seconds pollingIntervals, std::string filePath);
virtual ~SerializeToFilePeriodically();
void setInterval(std::chrono::seconds newInterval);
protected:
void backupWorker();
private:
std::chrono::microseconds m_lastSerialization;
std::chrono::seconds m_interval;
};
class WaapComponent;
class SerializeToLocalAndRemoteSyncBase : public I_RemoteSyncSerialize, public SerializeToFileBase
{
public:
SerializeToLocalAndRemoteSyncBase(std::chrono::minutes interval,
std::chrono::seconds waitForSync,
const std::string& filePath,
const std::string& remotePath,
const std::string& assetId,
const std::string& owner);
virtual ~SerializeToLocalAndRemoteSyncBase();
virtual void restore();
virtual void syncWorker();
void setInterval(std::chrono::seconds newInterval);
std::chrono::seconds getIntervalDuration() const;
void setRemoteSyncEnabled(bool enabled);
protected:
void mergeProcessedFromRemote();
std::string getPostDataUrl();
std::string getUri();
size_t getIntervalsCount();
template<typename T>
bool sendObject(T &obj, I_Messaging::Method method, std::string uri)
{
I_Messaging *messaging = Singleton::Consume<I_Messaging>::by<WaapComponent>();
I_AgentDetails *agentDetails = Singleton::Consume<I_AgentDetails>::by<WaapComponent>();
if (agentDetails->getOrchestrationMode() == OrchestrationMode::OFFLINE) {
dbgDebug(D_WAAP) << "offline mode not sending object";
return false;
}
if (agentDetails->getOrchestrationMode() == OrchestrationMode::HYBRID) {
Flags <MessageConnConfig> conn_flags;
conn_flags.setFlag(MessageConnConfig::EXTERNAL);
std::string tenant_header = "X-Tenant-Id: " + agentDetails->getTenantId();
return messaging->sendObject(
obj,
method,
getSharedStorageHost(),
80,
conn_flags,
uri,
tenant_header,
nullptr,
MessageTypeTag::WAAP_LEARNING);
}
return messaging->sendObject(
obj,
method,
uri,
"",
nullptr,
true,
MessageTypeTag::WAAP_LEARNING);
}
template<typename T>
bool sendObjectWithRetry(T &obj, I_Messaging::Method method, std::string uri)
{
I_MainLoop *mainloop = Singleton::Consume<I_MainLoop>::by<WaapComponent>();
for (uint i = 0; i < max_send_obj_retries; i++)
{
if (sendObject(obj, method, uri))
{
dbgTrace(D_WAAP) <<
"object sent successfully after " << i << " retry attempts";
return true;
}
dbgWarning(D_WAAP) << "Failed to send object. Attempt: " << i;
mainloop->yield(true);
}
dbgError(D_WAAP) << "Failed to send object, reached maximum attempts: " <<
max_send_obj_retries;
return false;
}
template<typename T>
bool sendNoReplyObject(T &obj, I_Messaging::Method method, std::string uri)
{
I_Messaging *messaging = Singleton::Consume<I_Messaging>::by<WaapComponent>();
I_AgentDetails *agentDetails = Singleton::Consume<I_AgentDetails>::by<WaapComponent>();
if (agentDetails->getOrchestrationMode() == OrchestrationMode::OFFLINE) {
dbgDebug(D_WAAP) << "offline mode not sending object";
return false;
}
if (agentDetails->getOrchestrationMode() == OrchestrationMode::HYBRID) {
Flags<MessageConnConfig> conn_flags;
conn_flags.setFlag(MessageConnConfig::EXTERNAL);
std::string tenant_header = "X-Tenant-Id: " + agentDetails->getTenantId();
return messaging->sendNoReplyObject(
obj,
method,
getSharedStorageHost(),
80,
conn_flags,
uri,
tenant_header,
nullptr,
MessageTypeTag::WAAP_LEARNING);
}
return messaging->sendNoReplyObject(
obj,
method,
uri,
"",
nullptr,
true,
MessageTypeTag::WAAP_LEARNING);
}
template<typename T>
bool sendNoReplyObjectWithRetry(T &obj, I_Messaging::Method method, std::string uri)
{
I_MainLoop *mainloop= Singleton::Consume<I_MainLoop>::by<WaapComponent>();
for (uint i = 0; i < max_send_obj_retries; i++)
{
if (sendNoReplyObject(obj, method, uri))
{
dbgTrace(D_WAAP) <<
"object sent successfully after " << i << " retry attempts";
return true;
}
dbgWarning(D_WAAP) << "Failed to send object. Attempt: " << i;
mainloop->yield(true);
}
dbgError(D_WAAP) << "Failed to send object, reached maximum attempts: " <<
max_send_obj_retries;
return false;
}
const std::string m_remotePath; // Created from tenentId + / + assetId + / + class
std::chrono::seconds m_interval;
std::string m_owner;
private:
bool localSyncAndProcess();
void updateStateFromRemoteService();
RemoteFilesList getProcessedFilesList();
RemoteFilesList getRemoteProcessedFilesList();
std::string getWindowId();
bool isBase();
std::string getLearningHost();
std::string getSharedStorageHost();
I_MainLoop* m_pMainLoop;
std::chrono::microseconds m_waitForSync;
uint m_workerRoutineId;
size_t m_daysCount;
size_t m_windowsCount;
size_t m_intervalsCounter;
bool m_remoteSyncEnabled;
const std::string m_assetId;
std::string m_type;
std::string m_lastProcessedModified;
Maybe<std::string> m_shared_storage_host;
Maybe<std::string> m_learning_host;
};

View File

@@ -0,0 +1,144 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "../waap_clib/WaapDecision.h"
#include "../include/WaapDefines.h"
#include "../waap_clib/Csrf.h"
#include "../waap_clib/Waf2Util.h"
#include "../waap_clib/WaapOpenRedirect.h"
#include "../waap_clib/FpMitigation.h"
#include "../waap_clib/DeepParser.h"
#include "http_inspection_events.h"
enum HeaderType {
UNKNOWN_HEADER,
HOST_HEADER,
USER_AGENT_HEADER,
COOKIE_HEADER,
REFERER_HEADER,
CONTENT_TYPE_HEADER,
CLEAN_HEADER,
OTHER_KNOWN_HEADERS
};
struct AnalysisResult;
class WaapAssetState;
struct Waf2TransactionFlags {
bool endResponseHeadersCalled;
bool requestDataPushStarted;
bool responseDataPushStarted;
Waf2TransactionFlags():
endResponseHeadersCalled(false),
requestDataPushStarted(false),
responseDataPushStarted(false)
{
}
};
class IWaf2Transaction {
public:
virtual ~IWaf2Transaction() {}
virtual uint64_t getIndex() const = 0;
virtual void setIndex(uint64_t index) = 0;
virtual std::shared_ptr<WaapAssetState> getAssetState() = 0;
virtual IWaapConfig* getSiteConfig() = 0;
virtual DeepParser& getDeepParser() = 0;
virtual bool get_ignoreScore() const = 0;
virtual void addNote(const std::string &note) = 0;
virtual bool shouldIgnoreOverride(const Waf2ScanResult &res) = 0;
virtual bool reportScanResult(const Waf2ScanResult &res) = 0;
virtual const std::string getHost() const = 0;
virtual Waap::OpenRedirect::State &getOpenRedirectState() = 0;
virtual const std::string getLocation() const = 0;
virtual const std::string getUserAgent() const = 0;
virtual const std::string getParam() const = 0;
virtual const std::vector<std::string> getKeywordMatches() const = 0;
virtual const std::vector<std::string> getKeywordsCombinations() const = 0;
virtual const std::string getContentTypeStr() const = 0;
virtual Waap::Util::ContentType getContentType() const = 0;
virtual const std::string getKeywordMatchesStr() const = 0;
virtual const std::string getSample() const = 0;
virtual const std::string getLastScanSample() const = 0;
virtual const std::string& getLastScanParamName() const = 0;
virtual const std::string getMethod() const = 0;
virtual const std::string getHdrContent(std::string hdrName) const = 0;
virtual const WaapDecision &getWaapDecision() const = 0;
virtual const std::string& getRemoteAddr() const = 0;
virtual const std::string getUri() const = 0;
virtual const std::string getUriStr() const = 0;
virtual const std::string& getSourceIdentifier() const = 0;
virtual double getScore() const = 0;
virtual const std::vector<double> getScoreArray() const = 0;
virtual Waap::CSRF::State& getCsrfState() = 0;
virtual ngx_http_cp_verdict_e getUserLimitVerdict() = 0;
virtual const std::string getUserLimitVerdictStr() const = 0;
virtual const std::string getViolatedUserLimitTypeStr() const = 0;
virtual void checkShouldInject() = 0;
virtual void completeInjectionResponseBody(std::string& strInjection) = 0;
virtual void sendLog() = 0;
virtual bool decideAfterHeaders() = 0;
virtual int decideFinal(
int mode,
AnalysisResult &transactionResult,
const std::string &poolName=KEYWORDS_SCORE_POOL_BASE,
PolicyCounterType fpClassification = UNKNOWN_TYPE) = 0;
virtual bool decideResponse() = 0;
virtual void clearAllInjectionReasons() = 0;
virtual bool shouldInspectResponse() = 0;
virtual bool shouldInjectResponse() = 0;
virtual bool shouldInjectCSRF() = 0;
virtual bool shouldInjectSecurityHeaders() = 0;
virtual void handleSecurityHeadersInjection(
std::vector<std::pair<std::string, std::string>>& injectHeaderStrs) = 0;
virtual void disableShouldInjectSecurityHeaders() = 0;
virtual void handleCsrfHeaderInjection(std::string& injectStr) = 0;
virtual bool findHtmlTagToInject(const char* data, int data_len, int& pos) = 0;
virtual bool isHtmlType(const char* data, int data_len) = 0;
virtual HeaderType detectHeaderType(const char* name, int name_len) = 0;
virtual void start() = 0;
virtual void set_transaction_time(const char* log_time) = 0;
virtual void set_transaction_remote(const char* remote_addr, int remote_port) = 0;
virtual void set_transaction_local(const char* local_addr, int local_port) = 0;
// Request
virtual void set_method(const char* method) = 0;
virtual void set_uri(const char* uri) = 0;
virtual void start_request_hdrs() = 0;
virtual void add_request_hdr(const char* name, int name_len, const char* value, int value_len) = 0;
virtual void end_request_hdrs() = 0;
virtual void start_request_body() = 0;
virtual void add_request_body_chunk(const char* data, int data_len) = 0;
virtual void end_request_body() = 0;
virtual void end_request() = 0;
// Response
virtual void start_response(int response_status, int http_version) = 0;
virtual void start_response_hdrs() = 0;
virtual void add_response_hdr(const char* name, int name_len, const char* value, int value_len) = 0;
virtual void end_response_hdrs() = 0;
virtual void start_response_body() = 0;
virtual void add_response_body_chunk(const char* data, int data_len) = 0;
virtual void end_response_body() = 0;
virtual void end_response() = 0;
virtual void collectFoundPatterns() = 0;
virtual ReportIS::Severity computeEventSeverityFromDecision() const = 0;
virtual void finish() = 0;
virtual Waf2TransactionFlags &getTransactionFlags() = 0;
};

View File

@@ -0,0 +1,69 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "../waap_clib/WaapOverride.h"
#include "../waap_clib/WaapTrigger.h"
#include "../waap_clib/TrustedSources.h"
#include "../waap_clib/WaapParameters.h"
#include "../waap_clib/WaapOpenRedirectPolicy.h"
#include "../waap_clib/WaapErrorDisclosurePolicy.h"
#include "../waap_clib/CsrfPolicy.h"
#include "../waap_clib/UserLimitsPolicy.h"
#include "../waap_clib/RateLimiting.h"
#include "../waap_clib/SecurityHeadersPolicy.h"
#include <memory>
enum class BlockingLevel {
NO_BLOCKING = 0,
LOW_BLOCKING_LEVEL,
MEDIUM_BLOCKING_LEVEL,
HIGH_BLOCKING_LEVEL
};
enum class AttackMitigationMode
{
DISABLED = 0,
LEARNING,
PREVENT,
UNKNOWN
};
class IWaapConfig {
public:
virtual const std::string& get_AssetId() const = 0;
virtual const std::string& get_AssetName() const = 0;
virtual const BlockingLevel& get_BlockingLevel() const = 0;
virtual const std::string& get_PracticeId() const = 0;
virtual const std::string& get_PracticeName() const = 0;
virtual const std::string& get_PracticeSubType() const = 0;
virtual const std::string& get_RuleId() const = 0;
virtual const std::string& get_RuleName() const = 0;
virtual const bool& get_WebAttackMitigation() const = 0;
virtual const std::string& get_WebAttackMitigationAction() const = 0;
virtual const std::shared_ptr<Waap::Override::Policy>& get_OverridePolicy() const = 0;
virtual const std::shared_ptr<Waap::Trigger::Policy>& get_TriggerPolicy() const = 0;
virtual const std::shared_ptr<Waap::TrustedSources::TrustedSourcesParameter>& get_TrustedSourcesPolicy() const = 0;
virtual const std::shared_ptr<Waap::Parameters::WaapParameters>& get_WaapParametersPolicy() const = 0;
virtual const std::shared_ptr<Waap::OpenRedirect::Policy>& get_OpenRedirectPolicy() const = 0;
virtual const std::shared_ptr<Waap::ErrorDisclosure::Policy>& get_ErrorDisclosurePolicy() const = 0;
virtual const std::shared_ptr<Waap::Csrf::Policy>& get_CsrfPolicy() const = 0;
virtual const std::shared_ptr<Waap::RateLimiting::Policy>& get_RateLimitingPolicy() const = 0;
virtual const std::shared_ptr<Waap::RateLimiting::Policy>& get_ErrorLimitingPolicy() const = 0;
virtual const std::shared_ptr<Waap::SecurityHeaders::Policy>& get_SecurityHeadersPolicy() const = 0;
virtual const std::shared_ptr<Waap::UserLimits::Policy>& get_UserLimitsPolicy() const = 0;
virtual void printMe(std::ostream& os) const = 0;
};

View File

@@ -0,0 +1,27 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "WaapEnums.h"
class I_WaapAssetState {
public:
virtual void updateScores() = 0;
virtual std::string getSignaturesScoresFilePath() const = 0;
virtual std::string getSignaturesFilterDir() const = 0;
virtual bool isKeywordOfType(const std::string& keyword, ParamType type) const = 0;
virtual bool isBinarySampleType(const std::string & sample) const = 0;
virtual bool isWBXMLSampleType(const std::string & sample) const = 0;
virtual std::set<std::string> getSampleType(const std::string& sample) const = 0;
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,100 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __REPUTATION_FEATURES_EVENTS_H__
#define __REPUTATION_FEATURES_EVENTS_H__
#include "event.h"
#include "http_inspection_events.h"
using ResponseCode = uint16_t;
class ReputationFeaturesEntry;
class TearDownEvent : public Event<TearDownEvent>
{
public:
TearDownEvent(ReputationFeaturesEntry *pEntry) : m_pEntry(pEntry)
{
}
ReputationFeaturesEntry *
getEntry() const
{
return m_pEntry;
}
private:
ReputationFeaturesEntry *m_pEntry;
};
class IdentifiersEvent : public Event<IdentifiersEvent>
{
public:
IdentifiersEvent(const std::string &sourceId, const std::string &assetId)
:
m_sourceId(sourceId),
m_assetId(assetId)
{ }
const std::string &
getSourceId() const
{
return m_sourceId;
}
const std::string &
getAssetId() const
{
return m_assetId;
}
private:
const std::string m_sourceId;
const std::string m_assetId;
};
class DetectionEvent : public Event<DetectionEvent>
{
public:
DetectionEvent(const std::string &location, const std::vector<std::string> &indicators)
:
m_location(location),
m_indicators(indicators)
{ }
// LCOV_EXCL_START - sync functions, can only be tested once the sync module exists
DetectionEvent() {}
template <typename T>
void
serialize(T &ar)
{
ar(m_location, m_indicators);
}
// LCOV_EXCL_STOP
const std::string&
getLocation() const
{
return m_location;
}
private:
std::string m_location;
std::vector<std::string> m_indicators;
};
#endif // __REPUTATION_FEATURES_EVENTS_H__

View File

@@ -0,0 +1,3 @@
include_directories(../include)
add_library(reputation reputation_features_agg.cc)

View File

@@ -0,0 +1,379 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "reputation_features_agg.h"
#include <cereal/types/set.hpp>
#include <boost/algorithm/string.hpp>
#include "i_mainloop.h"
#include "i_time_get.h"
#include "i_serialize.h"
#include "../waap_clib/Waf2Util.h"
#include "customized_cereal_map.h"
USE_DEBUG_FLAG(D_WAAP_REPUTATION);
using namespace std;
template <typename EventType>
class DefaultListener : public Listener<EventType>
{
public:
DefaultListener(EventVerdict defaultVerdict = EventVerdict(ngx_http_cp_verdict_e::TRAFFIC_VERDICT_IRRELEVANT))
:
m_default_verdict(defaultVerdict)
{}
EventVerdict
respond(const EventType &event)
{
this->upon(event);
return m_default_verdict;
}
private:
EventVerdict m_default_verdict;
};
class ReputationFeaturesAgg::Impl
:
public Listener<IdentifiersEvent>,
public Listener<DetectionEvent>,
public Listener<TearDownEvent>,
public DefaultListener<NewHttpTransactionEvent>,
public DefaultListener<HttpRequestHeaderEvent>,
public DefaultListener<ResponseCodeEvent>
{
public:
Impl()
:
DefaultListener<ResponseCodeEvent>(EventVerdict(ngx_http_cp_verdict_e::TRAFFIC_VERDICT_IRRELEVANT)),
m_agg_entries()
{
}
void reportReputationFeatures();
void
init()
{
registerListener();
I_MainLoop* i_mainLoop = Singleton::Consume<I_MainLoop>::by<ReputationFeaturesAgg>();
I_MainLoop::Routine routine = [this]() { reportReputationFeatures(); };
i_mainLoop->addOneTimeRoutine(I_MainLoop::RoutineType::Offline, routine, "report reputation features");
}
void
fini()
{
unregisterListener();
}
void upon(const IdentifiersEvent &event) override;
void upon(const DetectionEvent &event) override;
void upon(const TearDownEvent &event) override;
void upon(const NewHttpTransactionEvent &event) override;
void upon(const HttpRequestHeaderEvent &event) override;
void upon(const ResponseCodeEvent &event) override;
string getListenerName() const { return "reputationFeaturesAgg"; }
private:
map<string, map<string, SourceReputationFeaturesAgg>> m_agg_entries;
};
void
SourceReputationFeaturesAgg::addEntry(const ReputationFeaturesEntry &entry)
{
m_requests++;
if (m_wall_time_hour == 0) {
chrono::hours wallTimeHour = chrono::duration_cast<chrono::hours>(entry.getTime());
m_wall_time_hour = wallTimeHour.count();
}
addMethod(entry.getMethod());
addResponseCode(entry.getResponseCode());
addDetections(entry.getDetections());
addUri(entry.getUri());
addHeaders(entry);
dbgTrace(D_WAAP_REPUTATION) << "aggregated request from: " << m_wall_time_hour % 24 <<
" count: " << m_requests << " for source: " << entry.getSourceId() << " on asset: " << entry.getAssetId();
}
string
SourceReputationFeaturesAgg::extractCookieKey(const string &cookie_seg)
{
size_t pos = cookie_seg.find("=");
return cookie_seg.substr(0, pos);
}
void
SourceReputationFeaturesAgg::addHeaders(const ReputationFeaturesEntry &entry)
{
const auto &headers = entry.getHeaders();
const auto &user_agent_header_itr = headers.find("user-agent");
if (user_agent_header_itr != headers.cend()) {
m_unique_user_agent.insert(user_agent_header_itr->second);
}
const auto &referer_header_itr = headers.find("referer");
if (referer_header_itr == headers.cend()) {
m_referer_count.na++;
} else {
const string &uri = referer_header_itr->second;
size_t scheme_end_pos = uri.find("://") + 3;
size_t authority_end_pos = uri.find("/", scheme_end_pos + 1);
string authority = uri.substr(scheme_end_pos + 1, authority_end_pos);
if (authority.find(entry.getHost()) != string::npos) {
m_referer_count.external_host++;
} else {
m_referer_count.internal_host++;
}
}
const auto &cookie_header_itr = headers.find("cookie");
if (cookie_header_itr == headers.cend()) {
return;
}
const string &cookie = cookie_header_itr->second;
const vector<string> &cookie_split = split(cookie, ';');
for (const auto& cookie_seg : cookie_split)
{
const string &key = extractCookieKey(cookie_seg);
m_unique_cookies.insert(key);
}
}
void
SourceReputationFeaturesAgg::addDetections(const vector<DetectionEvent> &detections)
{
for (const auto &detect : detections) {
m_hit_count_per_location[detect.getLocation()]++;
}
}
void
SourceReputationFeaturesAgg::addUri(const string &uri)
{
size_t pos = uri.find_first_of("?;");
if (pos == string::npos) {
m_unique_uris.insert(uri);
return;
}
string uri_path = uri.substr(0, pos);
m_unique_uris.insert(uri_path);
}
void
SourceReputationFeaturesAgg::addMethod(const string &method)
{
m_method_count[method]++;
}
void
SourceReputationFeaturesAgg::addResponseCode(const ResponseCode &responseCode)
{
if (responseCode >= 500) {
m_response_code_count.response_5xx++;
} else if (responseCode >= 400) {
m_response_code_count.response_4xx++;
} else if (responseCode >= 300) {
m_response_code_count.response_3xx++;
} else if (responseCode >= 200) {
m_response_code_count.response_2xx++;
} else if (responseCode >= 100) {
m_response_code_count.response_1xx++;
} else {
m_response_code_count.response_na++;
}
}
class ReputationFeaturesReport : public RestGetFile
{
using SourceAggPerAsset = map<string, map<string, SourceReputationFeaturesAgg>>;
public:
ReputationFeaturesReport(SourceAggPerAsset &entries) :
reputation_entries(entries)
{
}
private:
C2S_PARAM(SourceAggPerAsset, reputation_entries);
};
void
ReputationFeaturesAgg::Impl::upon(const IdentifiersEvent &event)
{
I_Table *pTable = Singleton::Consume<I_Table>().by<ReputationFeaturesAgg>();
if (!pTable->hasState<ReputationFeaturesEntry>())
{
dbgWarning(D_WAAP_REPUTATION) << "reputation entry state is missing";
return;
}
ReputationFeaturesEntry &entry = pTable->getState<ReputationFeaturesEntry>();
entry.m_assetId = event.getAssetId();
entry.m_sourceId = event.getSourceId();
dbgTrace(D_WAAP_REPUTATION) << "assign identifiers to reputation entry. src: " << event.getSourceId() <<
", asset: " << event.getAssetId();
}
void
ReputationFeaturesAgg::Impl::upon(const DetectionEvent &event)
{
I_Table *pTable = Singleton::Consume<I_Table>().by<ReputationFeaturesAgg>();
if (!pTable->hasState<ReputationFeaturesEntry>())
{
dbgWarning(D_WAAP_REPUTATION) << "reputation entry state is missing";
return;
}
ReputationFeaturesEntry &entry = pTable->getState<ReputationFeaturesEntry>();
entry.m_detections.push_back(event);
dbgTrace(D_WAAP_REPUTATION) << "add a detection event. detection location: " << event.getLocation();
}
void
ReputationFeaturesAgg::Impl::upon(const TearDownEvent &event)
{
dbgDebug(D_WAAP_REPUTATION) << "aggregating reputation entry data";
ReputationFeaturesEntry *entry = event.getEntry();
SourceReputationFeaturesAgg &srvAgg = m_agg_entries[entry->getAssetId()][entry->getSourceId()];
srvAgg.addEntry(*entry);
}
void
ReputationFeaturesAgg::Impl::upon(const NewHttpTransactionEvent &event)
{
dbgDebug(D_WAAP_REPUTATION) << "new transaction";
I_Table *pTable = Singleton::Consume<I_Table>().by<ReputationFeaturesAgg>();
if (pTable->hasState<ReputationFeaturesEntry>())
{
dbgDebug(D_WAAP_REPUTATION) << "reputation entry state already exists";
return;
}
if (!pTable->createState<ReputationFeaturesEntry>())
{
dbgError(D_WAAP_REPUTATION) << "failed to create reputation entry state";
return;
}
if (!pTable->hasState<ReputationFeaturesEntry>())
{
dbgWarning(D_WAAP_REPUTATION) << "reputation entry state is missing";
return;
}
ReputationFeaturesEntry& entry = pTable->getState<ReputationFeaturesEntry>();
I_TimeGet *timeGet = Singleton::Consume<I_TimeGet>::by<ReputationFeaturesAgg>();
auto currentTime = timeGet->getWalltime();
entry.m_wallTime = currentTime;
entry.m_method = event.getHttpMethod();
entry.m_uri = event.getURI();
entry.m_host = event.getDestinationHost();
dbgTrace(D_WAAP_REPUTATION) << "created a new reputation entry state";
}
void
ReputationFeaturesAgg::Impl::upon(const HttpRequestHeaderEvent &event)
{
I_Table *pTable = Singleton::Consume<I_Table>().by<ReputationFeaturesAgg>();
if (!pTable->hasState<ReputationFeaturesEntry>())
{
dbgWarning(D_WAAP_REPUTATION) << "reputation entry state is missing";
return;
}
ReputationFeaturesEntry &entry = pTable->getState<ReputationFeaturesEntry>();
std::string key = event.getKey();
boost::algorithm::to_lower(key);
entry.m_headers[key] = event.getValue();
dbgTrace(D_WAAP_REPUTATION) << "add header: " << string(event.getKey());
}
void
ReputationFeaturesAgg::Impl::upon(const ResponseCodeEvent &event)
{
I_Table *pTable = Singleton::Consume<I_Table>().by<ReputationFeaturesAgg>();
if (!pTable->hasState<ReputationFeaturesEntry>())
{
dbgWarning(D_WAAP_REPUTATION) << "reputation entry state is missing";
return;
}
ReputationFeaturesEntry &entry = pTable->getState<ReputationFeaturesEntry>();
entry.m_responseCode = event.getResponseCode();
dbgTrace(D_WAAP_REPUTATION) << "add response code: " << entry.getResponseCode();
}
void
ReputationFeaturesAgg::Impl::reportReputationFeatures()
{
I_TimeGet *timeGet = Singleton::Consume<I_TimeGet>::by<ReputationFeaturesAgg>();
I_Messaging *msg = Singleton::Consume<I_Messaging>::by<ReputationFeaturesAgg>();
I_AgentDetails *agentDetails = Singleton::Consume<I_AgentDetails>::by<ReputationFeaturesAgg>();
I_MainLoop *i_mainLoop = Singleton::Consume<I_MainLoop>::by<ReputationFeaturesAgg>();
string tenantId = agentDetails->getTenantId();
string agentId = agentDetails->getAgentId();
if (Singleton::exists<I_InstanceAwareness>())
{
I_InstanceAwareness *instance = Singleton::Consume<I_InstanceAwareness>::by<ReputationFeaturesAgg>();
Maybe<string> uniqueId = instance->getUniqueID();
if (uniqueId.ok())
{
agentId += "/" + uniqueId.unpack();
}
}
while (true)
{
auto currentTime = timeGet->getWalltime();
chrono::microseconds remainingTime = chrono::hours(1) - (currentTime % chrono::hours(1));
i_mainLoop->yield(remainingTime);
dbgDebug(D_WAAP_REPUTATION) << "sending features report";
ReputationFeaturesReport report(m_agg_entries);
m_agg_entries.clear();
string uri = "/storage/waap/" + tenantId + "/reputation/" +
to_string(chrono::duration_cast<chrono::hours>(currentTime).count()) +
"/" + agentId + "/data.data";
msg->sendObjectWithPersistence(report,
I_Messaging::Method::PUT,
uri,
"",
true,
MessageTypeTag::WAAP_LEARNING);
}
}
ReputationFeaturesAgg::ReputationFeaturesAgg() : Component("ReputationComp"), pimpl(make_unique<Impl>())
{
}
ReputationFeaturesAgg::~ReputationFeaturesAgg()
{
}
void
ReputationFeaturesAgg::init()
{
pimpl->init();
}
void
ReputationFeaturesAgg::fini()
{
pimpl->fini();
}

View File

@@ -0,0 +1,221 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __REPUTATION_FEATURES_AGG_H__
#define __REPUTATION_FEATURES_AGG_H__
#include <cereal/types/map.hpp>
#include "reputation_features_events.h"
#include "component.h"
#include "table_opaque.h"
#include "i_table.h"
#include "i_agent_details.h"
#include "i_instance_awareness.h"
class ReputationFeaturesEntry : public TableOpaqueSerialize<ReputationFeaturesEntry>
{
public:
ReputationFeaturesEntry()
:
TableOpaqueSerialize<ReputationFeaturesEntry>(this),
m_wallTime(),
m_sourceId(),
m_assetId(),
m_method(),
m_uri(),
m_responseCode(),
m_detections()
{
}
~ReputationFeaturesEntry()
{
TearDownEvent(this).notify();
}
// LCOV_EXCL_START - sync functions, can only be tested once the sync module exists
template <typename T>
void serialize(T &ar, uint)
{
ar(m_wallTime,
m_sourceId,
m_assetId,
m_method,
m_uri,
m_host,
m_responseCode,
m_detections,
m_headers);
}
static std::string name() { return "ReputationFeatures"; }
static std::unique_ptr<TableOpaqueBase> prototype() { return std::make_unique<ReputationFeaturesEntry>(); }
static uint currVer() { return 0; }
static uint minVer() { return 0; }
// LCOV_EXCL_STOP
const std::chrono::microseconds & getTime() const { return m_wallTime; }
const std::string & getSourceId() const { return m_sourceId; }
const std::string & getAssetId() const { return m_assetId; }
const std::string & getMethod() const { return m_method; }
const std::string & getUri() const { return m_uri; }
const std::string & getHost() const { return m_host; }
const ResponseCode & getResponseCode() const { return m_responseCode; }
const std::vector<DetectionEvent> & getDetections() const { return m_detections; }
const std::map<std::string, std::string> & getHeaders() const { return m_headers; }
friend class ReputationFeaturesAgg;
private:
std::chrono::microseconds m_wallTime;
std::string m_sourceId;
std::string m_assetId;
std::string m_method;
std::string m_uri;
std::string m_host;
ResponseCode m_responseCode;
std::vector<DetectionEvent> m_detections;
std::map<std::string, std::string> m_headers;
};
typedef struct ResponseCodeCounters
{
size_t response_na;
size_t response_1xx;
size_t response_2xx;
size_t response_3xx;
size_t response_4xx;
size_t response_5xx;
ResponseCodeCounters()
:
response_na(0),
response_1xx(0),
response_2xx(0),
response_3xx(0),
response_4xx(0),
response_5xx(0)
{
}
template<class Archive>
void serialize(Archive &ar)
{
ar(
cereal::make_nvp("response_NA", response_na),
cereal::make_nvp("response_1xx", response_1xx),
cereal::make_nvp("response_2xx", response_2xx),
cereal::make_nvp("response_3xx", response_3xx),
cereal::make_nvp("response_4xx", response_4xx),
cereal::make_nvp("response_5xx", response_5xx)
);
}
} ResponseCodeCounters;
typedef struct RefererCounters
{
size_t na;
size_t internal_host;
size_t external_host;
RefererCounters()
:
na(0),
internal_host(0),
external_host(0)
{
}
template<class Archive>
void
serialize(Archive &ar)
{
ar(
cereal::make_nvp("referer_NA", na),
cereal::make_nvp("internal_host", internal_host),
cereal::make_nvp("external_host", external_host)
);
}
} RefererCounters;
class SourceReputationFeaturesAgg
{
public:
SourceReputationFeaturesAgg() : m_wall_time_hour(0), m_requests(0)
{
}
template<class Archive>
void
serialize(Archive &ar)
{
ar(
cereal::make_nvp("wall_time_hour", m_wall_time_hour),
cereal::make_nvp("requests_count", m_requests),
cereal::make_nvp("hits_per_location", m_hit_count_per_location),
cereal::make_nvp("method_counters", m_method_count),
cereal::make_nvp("response_code_counters", m_response_code_count),
cereal::make_nvp("referer_counters", m_referer_count),
cereal::make_nvp("uris", m_unique_uris),
cereal::make_nvp("user_agents", m_unique_user_agent),
cereal::make_nvp("cookies", m_unique_cookies)
);
}
void addEntry(const ReputationFeaturesEntry &entry);
private:
std::string extractCookieKey(const std::string &cookie_seg);
void addHeaders(const ReputationFeaturesEntry &entry);
void addDetections(const std::vector<DetectionEvent> &detections);
void addUri(const std::string &uri);
void addMethod(const std::string &method);
void addResponseCode(const ResponseCode &responseCode);
size_t m_wall_time_hour;
size_t m_requests;
std::map<std::string, size_t> m_hit_count_per_location;
std::map<std::string, size_t> m_method_count;
ResponseCodeCounters m_response_code_count;
RefererCounters m_referer_count;
std::set<std::string> m_unique_uris;
std::set<std::string> m_unique_user_agent;
std::set<std::string> m_unique_cookies;
};
class ReputationFeaturesAgg
:
public Component,
Singleton::Consume<I_MainLoop>,
Singleton::Consume<I_TimeGet>,
Singleton::Consume<I_Table>,
Singleton::Consume<I_AgentDetails>,
Singleton::Consume<I_InstanceAwareness>,
Singleton::Consume<I_Messaging>
{
public:
ReputationFeaturesAgg();
~ReputationFeaturesAgg();
void init() override;
void fini() override;
private:
class Impl;
std::unique_ptr<Impl> pimpl;
};
#endif // __REPUTATION_FEATURES_AGG_H__

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,87 @@
{
"keywordsTypeMap": [
{
"key": "html_input",
"value": [
"document.",
"<input",
"<h3",
"<title",
"<center",
"<html",
"<hr",
"<form",
"high_acuracy_fast_reg_xss_1",
"/html",
"id",
"--",
"settimeout(",
"javascript:",
"<h2",
"encoded_\\n",
"to:",
"<h1",
"<h4",
"<h5",
"<table",
"<thead",
"high_acuracy_fast_reg_xss_0",
"<",
"<iframe",
"<strong",
"<span",
"<ul",
"<div",
"<link",
"quotes_ev_fast_reg_2",
"style",
"ssti_fast_reg_4",
"'",
"<body",
"<tbody",
"<!--",
"=",
"innerhtml",
";",
"href=",
"/images",
"table",
"<br",
">",
"$(",
"offset",
"<img",
"encoded_\\r\\n",
"<td",
"and",
"or",
"from",
"create",
"into",
"&",
"#",
"<colgroup",
"regex_postfix_0",
"regex_code_execution_2",
"<pre",
"<ol",
"comment_ev_fast_reg_0",
"regex_code_execution_1",
"|",
"os_cmd_high_acuracy_fast_reg_3",
"ldap_injection_regex_5",
"where",
"having",
"high_acuracy_fast_reg_evasion_1",
"regex_xss_8",
"regex_xss_3",
"regex_xss_0",
"regex_xss_1",
"regex_xss_2",
"regex_sqli_17",
"xpath_injeciton_regex_3",
"quotes_ev_fast_reg_0"
]
}
]
}

View File

@@ -0,0 +1,94 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "AutonomousSecurityDecision.h"
AutonomousSecurityDecision::AutonomousSecurityDecision(DecisionType type) :
SingleDecision(type),
m_relativeReputation(0.0f),
m_fpMitigationScore(0.0f),
m_finalScore(0.0f),
m_threatLevel(NO_THREAT),
m_overridesLog(false),
m_relativeReputationMean(0.0),
m_variance(0.0)
{}
AutonomousSecurityDecision::~AutonomousSecurityDecision()
{}
std::string AutonomousSecurityDecision::getTypeStr() const
{
return "Autonomous Security";
}
void AutonomousSecurityDecision::setRelativeReputation(double relativeReputation)
{
m_relativeReputation = relativeReputation;
}
void AutonomousSecurityDecision::setFpMitigationScore(double fpMitigationScore)
{
m_fpMitigationScore = fpMitigationScore;
}
void AutonomousSecurityDecision::setFinalScore(double finalScore)
{
m_finalScore = finalScore;
}
void AutonomousSecurityDecision::setThreatLevel(ThreatLevel threatLevel)
{
m_threatLevel = threatLevel;
}
void AutonomousSecurityDecision::setOverridesLog(bool overridesLog)
{
m_overridesLog = overridesLog;
}
void AutonomousSecurityDecision::setRelativeReputationMean(double relativeReputationMean)
{
m_relativeReputationMean = relativeReputationMean;
}
void AutonomousSecurityDecision::setVariance(double variance)
{
m_variance = variance;
}
double AutonomousSecurityDecision::getRelativeReputation() const
{
return m_relativeReputation;
}
double AutonomousSecurityDecision::getFpMitigationScore() const
{
return m_fpMitigationScore;
}
double AutonomousSecurityDecision::getFinalScore() const
{
return m_finalScore;
}
ThreatLevel AutonomousSecurityDecision::getThreatLevel() const
{
return m_threatLevel;
}
bool AutonomousSecurityDecision::getOverridesLog() const
{
return m_overridesLog;
}
double AutonomousSecurityDecision::getRelativeReputationMean() const
{
return m_relativeReputationMean;
}
double AutonomousSecurityDecision::getVariance() const
{
return m_variance;
}

View File

@@ -0,0 +1,53 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __AUTONOMOUS_SECURITY_DECISION_H__
#define __AUTONOMOUS_SECURITY_DECISION_H__
#include "SingleDecision.h"
#include "DecisionType.h"
#include "WaapEnums.h"
#include <string>
class AutonomousSecurityDecision: public SingleDecision
{
public:
explicit AutonomousSecurityDecision(DecisionType type);
virtual ~AutonomousSecurityDecision();
std::string getTypeStr() const override;
void setRelativeReputation(double relativeReputation);
void setFpMitigationScore(double fpMitigationScore);
void setFinalScore(double finalScore);
void setThreatLevel(ThreatLevel threatLevel);
void setOverridesLog(bool overridesLog);
void setRelativeReputationMean(double relativeReputationMean);
void setVariance(double variance);
double getRelativeReputation() const;
double getFpMitigationScore() const;
double getFinalScore() const;
ThreatLevel getThreatLevel() const;
bool getOverridesLog() const;
double getRelativeReputationMean() const;
double getVariance() const;
private:
double m_relativeReputation;
double m_fpMitigationScore;
double m_finalScore;
ThreatLevel m_threatLevel;
bool m_overridesLog;
double m_relativeReputationMean;
double m_variance;
};
#endif

View File

@@ -0,0 +1,442 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// #define WAF2_LOGGING_ENABLE
#include "BehaviorAnalysis.h"
#include <string>
#include <vector>
#include <map>
#include <set>
#include <algorithm>
#include <set>
#include <functional>
#include <math.h>
#include <stdio.h>
#include <iostream>
#include <assert.h>
static const int BUCKET_SIZE = 300;
#define INITIAL_COUNT 1000
#define INITIAL_VARIANCE 100.0
#define INITIAL_MEAN 40.0
void TopBucket::addKeys(std::string& uri, std::string& ip, std::string& ua, std::string& ua_ip)
{
m_ips.addKeys(ip);
m_userAgents.addKeys(ua);
m_ipUserAgents.addKeys(ua_ip);
}
void TopBucket::putAttack(std::string& url,
double score,
std::string& ip,
std::string& ua,
std::string& ua_ip,
const std::string &location)
{
// Only punish reputation is score is above 1.0 and attack location comes from specific places.
// For example, avoid punishing reputation for attacks coming from referer_param, header, or cookie.
if (score > 1.0 && (location == "url" || location == "url_param" || location=="body")) {
m_ips.putAttack(true, score, ip);
m_userAgents.putAttack(true, score, ua);
m_ipUserAgents.putAttack(true, score, ua_ip);
}
getInfo(ip, ua, ua_ip);
}
void TopBucket::cleanSources()
{
m_ips.cleanSources();
m_userAgents.cleanSources();
m_ipUserAgents.cleanSources();
}
void TopBucket::evaluateAvg()
{
m_avgCount = m_ipUserAgents.getSourcesAvg();
}
bool TopBucket::isSourceEmpty(SourceType sourceType)
{
switch (sourceType)
{
case IP_SOURCE_TYPE:
return m_ips.empty();
case UA_SOURCE_TYPE:
return m_userAgents.empty();
case UA_IP_SOURCE_TYPE:
return m_ipUserAgents.empty();
default:
return false;
}
}
double TopBucket::getAvgCount()
{
return m_avgCount;
}
ReputationData TopBucket::getInfo(std::string& ip, std::string& ua, std::string& uaIp)
{
ReputationData output;
output.ipReputation = m_ips.getInfo(ip, m_avgCount);
output.uaReputation = m_userAgents.getInfo(ua, m_avgCount);
output.uaIpReputation = m_ipUserAgents.getInfo(uaIp, m_avgCount);
output.absoluteReputation = (output.ipReputation.reputation + output.uaReputation.reputation +
output.uaIpReputation.reputation) / 3;
m_behaviorAnalyzer->updateAvrageAndVariance(output.absoluteReputation);
output.relativeReputation = m_behaviorAnalyzer->getRelativeReputation(output.absoluteReputation);
return output;
}
Source::Source() : sources()
{
}
Source::~Source()
{
for (auto source : sources) {
delete source.second;
}
sources.clear();
}
void Source::cleanSources()
{
for (auto source = sources.begin(); source != sources.end();) {
if (!source->second->to_remove) {
source->second->to_remove = true;
source++;
}
else {
delete source->second;
sources.erase(source++);
}
}
}
double Source::getSourcesAvg()
{
unsigned int sum = 0;
if (sources.empty())
{
return 0;
}
for (auto source : sources) {
sum += source.second->countLegit;
}
return (double)sum / sources.size();
}
void Source::putAttack(bool missedUrl, double score, std::string& source)
{
if (sources.find(source) == sources.end()) {
sources[source] = new Counters(); // init counters to 0
}
assert(missedUrl != false || score > 0);
if (missedUrl) {
sources[source]->missed_urls++;
}
// Larger value slows down the absolute score reduction during attacks.
const double velocity = 8;
sources[source]->attacksScoreSum += round(score, 5) * velocity;
}
// TODO: rename
void Source::addKeys(std::string& source)
{
if (sources.find(source) == sources.end()) {
sources[source] = new Counters(); // init counters to 0
}
Counters* counters = sources[source];
counters->countLegit++;
}
// assuming count>0 param>0 return value in range (0,100]:
// for count << param -> 100
// for count >> param -> 0
double Source::calcDiff(double count, double param)
{
double res = (double)(int)((((param + 1) * 100)) / (param + count + 1));
return res;
}
Source::Info Source::getInfo(std::string& source, double avgCount)
{
double missed_urls = 0.0, legit_vs_attacks = 0.0, reputation = 0.0, coverage = 0.0;
if (source.find("to_remove") != std::string::npos) {
sources[source]->to_remove = false;
}
if (sources.find(source) == sources.end()) {
sources[source] = new Counters(); // init counters to 0
}
// range (0, 5/6*100]
missed_urls = 100 - calcDiff(5, sources[source]->missed_urls);
coverage = (int)((100 - calcDiff(4, 40)) * 4 / 5 + 60); // = 67.1111111111
// range - [20, 100)
// assuming avg count > 0 -> max(40 - avg, 1) => [1,40)
// count -> 0 & attack -> 0 : legit/attack -> 20
// count -> 0 & attack -> inf : legit/attack -> 20
// count -> inf & attack -> 0 : legit/attack -> 100+
double spcDiff = calcDiff(sources[source]->countLegit + std::max(40 - (int)avgCount, 1) + 20,
sources[source]->attacksScoreSum * 4);
legit_vs_attacks = (double)(100 - (spcDiff)) * 4 / 5 + 20;
coverage = (int)((coverage + missed_urls) / 2);
reputation = (double)(coverage * legit_vs_attacks * missed_urls) / 100 / 100;
Source::Info info = { reputation, coverage, legit_vs_attacks,
{sources[source]->countLegit, sources[source]->attacksScoreSum}};
return info;
}
bool Source::empty()
{
return sources.empty();
}
size_t Source::size() {
return sources.size();
}
BehaviorAnalyzer::BehaviorAnalyzer() :
m_count(INITIAL_COUNT),
m_variance(INITIAL_VARIANCE),
m_reputation_mean(INITIAL_MEAN)
{
}
BehaviorAnalyzer::~BehaviorAnalyzer()
{
for (auto bucket : m_buckets) {
delete bucket.second;
}
m_buckets.clear();
}
ReputationData BehaviorAnalyzer::analyze_behavior(BehaviorAnalysisInputData& data)
{
ReputationData output;
std::string &siteId = data.site_id;
if (m_count % COUNTER_BACKUP_THRESHOLD == 0)
{
// TODO: backup
// calculate average per bucket
for (auto bucket : m_buckets) {
bucket.second->evaluateAvg();
}
// reset
for (auto bucket : m_buckets) {
bucket.second->cleanSources();
}
}
if (m_buckets.find(siteId) == m_buckets.end()){
m_buckets[siteId] = new TopBucket(this);
}
std::string& source = data.source_identifier;
std::string& userAgent = data.user_agent;
std::string userAgentSource = userAgent + " " + source;
if (data.keyword_matches.empty() == false)
{
// Two cases here:
// 1. No probing - always punish reputation
// 2. If there's probing - only punish if too many keyword matches (strong suspipion)
if (data.keyword_matches.size() > 2 ||
std::find(data.keyword_matches.begin(), data.keyword_matches.end(), "probing") ==
data.keyword_matches.end())
{
// Punish reputation conditionally, see TopBucket::putAttack() for the details
m_buckets[siteId]->putAttack(data.short_uri,
data.score * data.fp_mitigation_score / 10,
source,
userAgent,
userAgentSource,
data.location);
}
}
else
{
quickLearn(siteId, source, userAgent, data.uri);
}
output = m_buckets[siteId]->getInfo(source, userAgent, userAgentSource);
return output;
}
bool BehaviorAnalyzer::isSourceEmpty(std::string siteId, SourceType sourceType)
{
return m_buckets[siteId]->isSourceEmpty(sourceType);
}
void BehaviorAnalyzer::clearSources()
{
for (auto bucket : m_buckets) {
bucket.second->cleanSources();
}
}
size_t BehaviorAnalyzer::getCount()
{
return m_count;
}
double BehaviorAnalyzer::getAvgCount(std::string& siteId)
{
if (m_buckets.find(siteId) == m_buckets.end())
{
return -1.0;
}
return m_buckets[siteId]->getAvgCount();
}
double BehaviorAnalyzer::getReputationMean() const
{
return m_reputation_mean;
}
double BehaviorAnalyzer::getVariance() const
{
return m_variance;
}
void BehaviorAnalyzer::quickLearn(std::string& siteId, std::string& source, std::string& userAgent, std::string& uri)
{
if (m_buckets.find(siteId) == m_buckets.end())
{
m_buckets[siteId] = new TopBucket(this);
}
std::string userAgentSource = userAgent + " " + source;
m_buckets[siteId]->addKeys(uri, source, userAgent, userAgentSource);
}
double BehaviorAnalyzer::getRelativeReputation(double reputation)
{
// Larger value slows down the relative score reduction during attacks.
const double viscosity = 0.15;
double score = 0.0;
double mean = (m_reputation_mean + 100) / 2;
double standardDeviation = sqrt(m_variance); // variance is pow2 of standardDeviation
standardDeviation = (standardDeviation / viscosity + 5) / 2;
score = errorProbabilityScore((reputation - mean) / standardDeviation);
return 10 * score;
}
double BehaviorAnalyzer::errorProbabilityScore(double score)
{
double probScore = 0.5 + 0.5 * erf(score / 2);
//round to 3 decimal points
probScore = round(probScore, 3);
return probScore;
}
void BehaviorAnalyzer::updateAvrageAndVariance(double reputation)
{
double prev_mean = m_reputation_mean;
if (reputation > 1.0)
{
m_reputation_mean = (double)(m_reputation_mean * m_count + reputation) / (m_count + 1L);
// variance induction step
m_variance = (m_variance * m_count + pow((reputation - prev_mean), 2)) / (m_count + 1)
- pow((m_reputation_mean - prev_mean), 2);
m_count++;
}
}
Counters::Counters() : countLegit(0), attacksScoreSum(0.0), missed_urls(0), to_remove(false)
{
}
double round(double val, unsigned char precision) {
unsigned int factor = pow(10, precision);
return round(val * factor) / factor;
}
bool compareWithDelta(double rhs, double lhs, double delta) {
return fabs(rhs - lhs) <= delta;
}
bool operator==(const ReputationData& lhs, const ReputationData& rhs)
{
bool res = (lhs.ipReputation == rhs.ipReputation &&
lhs.uaReputation == rhs.uaReputation &&
lhs.uaIpReputation == rhs.uaIpReputation &&
compareWithDelta(lhs.absoluteReputation, rhs.absoluteReputation, 0.0001) &&
compareWithDelta(lhs.relativeReputation, rhs.relativeReputation, 0.0001 ));
if (!res)
{
std::printf("lhs: {absolute rep: %f, reputation: %f} , rhs: {absolute rep: %f, reputation: %f}\n",
lhs.absoluteReputation, lhs.relativeReputation, rhs.absoluteReputation, rhs.relativeReputation);
}
return res;
}
bool operator==(const Source::Info& lhs, const Source::Info& rhs)
{
bool res = compareWithDelta(lhs.coverage, rhs.coverage, 0.0001) &&
compareWithDelta(lhs.legitVsAttacks, rhs.legitVsAttacks, 0.0001) &&
compareWithDelta(lhs.reputation, rhs.reputation, 0.0001) &&
lhs.stats == rhs.stats;
if (!res)
{
std::printf("\tlhs: {coverage: %f, legitVsAttack: %f, reputation: %f}\n",
lhs.coverage, lhs.legitVsAttacks, lhs.reputation);
std::printf("\trhs: {coverage: %f, legitVsAttack: %f, reputation: %f}\n",
rhs.coverage, rhs.legitVsAttacks, rhs.reputation);
}
return res;
}
bool operator==(const Source::Stats& lhs, const Source::Stats& rhs) {
bool res = (compareWithDelta(lhs.attacks, rhs.attacks, 0.0001) &&
lhs.countLegit == rhs.countLegit);
if (!res)
{
std::printf("\t\tlhs: {attacks: %f, count: %u}\n",
lhs.attacks, lhs.countLegit);
std::printf("\t\trhs: {attacks: %f, count: %u}\n",
rhs.attacks, rhs.countLegit);
}
return res;
}

View File

@@ -0,0 +1,179 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __WAF2_BEHAVIOR_H__f1edd27e
#define __WAF2_BEHAVIOR_H__f1edd27e
#include <string>
#include <vector>
#include <map>
#include <set>
#include <string.h>
#include <math.h>
#include <boost/noncopyable.hpp>
#define MAX_NUM_OF_KEYS_IN_COUNTER 100
#define COUNTER_BACKUP_THRESHOLD 200000
#define MAX_RELATIVE_REPUTATION 10.0
class Counters {
public:
Counters();
unsigned int countLegit;
double attacksScoreSum;
long int missed_urls;
bool to_remove;
};
typedef enum _SourceType {
IP_SOURCE_TYPE,
UA_SOURCE_TYPE,
UA_IP_SOURCE_TYPE
}SourceType;
class BehaviorAnalyzer;
class Source : public boost::noncopyable
{
public:
typedef struct _Stats {
unsigned int countLegit;
double attacks;
}Stats;
typedef struct _Info {
double reputation;
double coverage;
double legitVsAttacks;
Source::Stats stats;
}Info;
Source();
~Source();
void cleanSources();
double getSourcesAvg();
size_t size();
void putAttack(bool missedUrl, double score, std::string& source);
void addKeys(std::string& source);
static double calcDiff(double count, double param);
Info getInfo(std::string& source, double avgCount);
// function for unit tests
bool empty();
private:
std::map<std::string, Counters*> sources; // key is either (source_ip) or (useragent+source_ip)
//std::set<std::string> urls; // set of URLs visited by this source
};
#if 0
class Bucket {
public:
size_t add(const std::string& key);
void clean();
bool exist(const std::string& key) const;
size_t get(const std::string& key) const;
size_t size() const { return _data.size(); }
private:
std::map<std::string, size_t> _data;
};
#endif
typedef struct _ReputationData {
Source::Info ipReputation;
Source::Info uaReputation;
Source::Info uaIpReputation;
double relativeReputation; // the absolute reputation relative to the average
double absoluteReputation;
}ReputationData;
double round(double val, unsigned char precision);
bool operator==(const Source::Stats& lhs, const Source::Stats& rhs);
bool operator==(const Source::Info& lhs, const Source::Info& rhs);
bool operator==(const ReputationData& lhs, const ReputationData& rhs);
class TopBucket {
private:
Source m_ips;
Source m_userAgents;
Source m_ipUserAgents;
BehaviorAnalyzer* m_behaviorAnalyzer;
double m_avgCount;
public:
TopBucket(BehaviorAnalyzer* behaviorAnalyzer) : m_behaviorAnalyzer(behaviorAnalyzer), m_avgCount(20) {}
void addKeys(std::string& uri, std::string& ip, std::string& ua, std::string& ua_ip);
void putAttack(std::string& uri,
double score, std::string& ip,
std::string& ua,
std::string& ua_ip,
const std::string& location);
void cleanSources();
void evaluateAvg();
bool isSourceEmpty(SourceType sourceType);
double getAvgCount();
ReputationData getInfo(std::string& ip, std::string& ua, std::string& uaIp);
};
struct BehaviorAnalysisInputData {
std::string site_id;
std::string source_identifier;
std::string user_agent;
std::string short_uri; // data['short_uri'] (see fix_data_keys...)
std::string uri; // data['uri'] (see fix_data_keys...)
std::vector<std::string> keyword_matches;
double score;
double fp_mitigation_score; // calculated outside before analyze_behavior() !!!
std::string location;
};
class BehaviorAnalyzer {
public:
BehaviorAnalyzer();
~BehaviorAnalyzer();
ReputationData analyze_behavior(BehaviorAnalysisInputData& data);
void clearSources();
void quickLearn(std::string& siteId, std::string& sourceIp, std::string& userAgent, std::string& uri);
double getRelativeReputation(double reputation);
void updateAvrageAndVariance(double reputation);
// unit test related functions
bool isSourceEmpty(std::string siteId, SourceType sourceType);
size_t getCount();
double getAvgCount(std::string& siteId);
double getReputationMean() const;
double getVariance() const;
private:
double errorProbabilityScore(double score);
// TODO: move to SMEM
size_t m_count;
double m_variance;
double m_reputation_mean;
std::map<std::string, TopBucket*> m_buckets;
};
#endif // __WAF2_BEHAVIOR_H__f1edd27e

View File

@@ -0,0 +1,108 @@
include_directories(../include)
add_library(waap_clib
Csrf.cc
CsrfPolicy.cc
ContentTypeParser.cc
CidrMatch.cc
DeepParser.cc
KeyStack.cc
ParserBase.cc
ParserBinary.cc
ParserHdrValue.cc
ParserJson.cc
ParserMultipartForm.cc
ParserRaw.cc
ParserUrlEncode.cc
ParserXML.cc
ParserDelimiter.cc
ParserConfluence.cc
ParserHTML.cc
PatternMatcher.cc
PHPSerializedDataParser.cc
WaapScores.cc
WaapKeywords.cc
Waf2Engine.cc
Waf2EngineGetters.cc
WaapScanner.cc
WaapRegexPreconditions.cc
Waf2Regex.cc
WaapAssetState.cc
Signatures.cc
Waf2Util.cc
WaapConfigBase.cc
WaapConfigApi.cc
WaapConfigApplication.cc
BehaviorAnalysis.cc
FpMitigation.cc
D2Main.cc
DeepAnalyzer.cc
Telemetry.cc
WaapOverride.cc
ScoreBuilder.cc
WaapTrigger.cc
WaapDecision.cc
WaapResponseInspectReasons.cc
WaapResponseInjectReasons.cc
WaapResultJson.cc
WaapAssetStatesManager.cc
Serializator.cc
IndicatorsFilterBase.cc
TypeIndicatorsFilter.cc
KeywordIndicatorFilter.cc
WaapOverrideFunctor.cc
WaapValueStatsAnalyzer.cc
TrustedSources.cc
WaapParameters.cc
IndicatorsFiltersManager.cc
ConfidenceFile.cc
ConfidenceCalculator.cc
TrustedSourcesConfidence.cc
RateLimiter.cc
RateLimiting.cc
ErrorLimiting.cc
WaapErrorDisclosurePolicy.cc
WaapOpenRedirect.cc
WaapOpenRedirectPolicy.cc
KeywordTypeValidator.cc
SecurityHeadersPolicy.cc
UserLimitsPolicy.cc
ScannerDetector.cc
TuningDecision.cc
ScanResult.cc
SingleDecision.cc
DecisionFactory.cc
AutonomousSecurityDecision.cc
CsrfDecision.cc
OpenRedirectDecision.cc
ErrorDisclosureDecision.cc
RateLimitingDecision.cc
UserLimitsDecision.cc
ErrorLimitingDecision.cc
WaapConversions.cc
SyncLearningNotification.cc
LogGenWrapper.cc
WaapSampleValue.cc
)
add_definitions("-Wno-unused-function")
add_definitions("-Wno-unused-parameter")
add_definitions("-Wno-deprecated-declarations")
#target_link_libraries(waap_clib pm buffers generic_rulebase ip_utilities connkey report_messaging compression_utils -lz)
# Include static headers
#target_link_libraries(waap_clib)
#target_link_libraries(waap_clib logging event_is metric)
#link_directories(${BOOST_ROOT}/lib)
#link_directories(${ng_module_osrc_zlib_path}/include)
# Link with all the libraries
#target_link_libraries(waap_clib -lresolv)
#target_link_libraries(waap_clib ${PCRE2_LIBRARIES})
#target_link_libraries(waap_clib ${YAJL_LIBRARIES})
#target_link_libraries(waap_clib ${LIBXML2_LIBRARIES})
#target_link_libraries(waap_clib "${ng_module_osrc_openssl_path}/lib/libcrypto.so")
#target_link_libraries(waap_clib "${ng_module_osrc_boost_path}/lib/libboost_context.so")
#target_link_libraries(waap_clib "${ng_module_osrc_boost_path}/lib/libboost_regex.so")

View File

@@ -0,0 +1,159 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "CidrMatch.h"
#include <string.h>
#include <string>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <errno.h>
#include <iostream>
#include <errno.h>
#include "log_generator.h"
#include <stdexcept>
USE_DEBUG_FLAG(D_WAAP);
namespace Waap {
namespace Util {
bool CIDRData::operator==(const CIDRData &other) const {
bool cidrsMatching = isIPV6 ? (memcmp(ipCIDRV6.s6_addr, other.ipCIDRV6.s6_addr, sizeof(ipCIDRV6.s6_addr)) == 0) :
(ipCIDRV4.s_addr == other.ipCIDRV4.s_addr);
return cidrString == other.cidrString &&
cidrsMatching &&
networkBits == other.networkBits &&
isIPV6 == other.isIPV6;
}
bool cidr4_match(const in_addr &addr, const in_addr &net, uint8_t bits) {
if (bits == 0) {
// C99 6.5.7 (3): u32 << 32 is undefined behaviour
return true;
}
return !((addr.s_addr ^ net.s_addr) & htonl(0xFFFFFFFFu << (32 - bits)));
}
bool cidr6_match(const in6_addr &address, const in6_addr &network, uint8_t bits) {
#ifdef __linux__
const uint32_t *a = address.s6_addr32;
const uint32_t *n = network.s6_addr32;
#else
const uint32_t *a = address.__u6_addr.__u6_addr32;
const uint32_t *n = network.__u6_addr.__u6_addr32;
#endif
int bits_whole, bits_incomplete;
bits_whole = bits >> 5; // number of whole u32
bits_incomplete = bits & 0x1F; // number of bits in incomplete u32
if (bits_whole) {
if (memcmp(a, n, bits_whole << 2)) {
return false;
}
}
if (bits_incomplete) {
uint32_t mask = htonl((0xFFFFFFFFu) << (32 - bits_incomplete));
if ((a[bits_whole] ^ n[bits_whole]) & mask) {
return false;
}
}
return true;
}
bool isCIDR(const std::string& strCIDR, CIDRData& cidr)
{
size_t processedBits = 0;
size_t pos = strCIDR.find_last_of('/');
// get ip from targetCidr
std::string strPrefix = pos != std::string::npos ? strCIDR.substr(0, pos) : strCIDR;
// get subnet mask from targetCidr or calculate it based on ipv4 / ipv6
std::string strSuffix = pos != std::string::npos ? strCIDR.substr(pos + 1) :
(strCIDR.find(':') == std::string::npos) ? "32" : "128";
int bits = -1;
try
{
bits = std::stoi(strSuffix, &processedBits);
cidr.networkBits = (uint8_t)bits;
// convert int to uint8_t
}
catch (const std::invalid_argument & e)
{
dbgDebug(D_WAAP) << "Failed to convert CIDR number of bits from string to int (Invalid arguments)."
<< strCIDR;
return false;
}
catch (const std::out_of_range & e)
{
dbgDebug(D_WAAP) << "Failed to convert CIDR number of bits from string to int (out of range)."
<< strCIDR;;
return false;
}
// check if CIDR is valid
if (processedBits != strSuffix.length() || bits > 128 || bits < 0) {
dbgDebug(D_WAAP) << "Failed to convert CIDR number of bits from string to int (out of range)."
<< strCIDR;
return false;
}
memset(&cidr.ipCIDRV4, 0, sizeof(struct in_addr));
memset(&cidr.ipCIDRV6, 0, sizeof(struct in6_addr));
if (inet_pton(AF_INET, strPrefix.c_str(), &cidr.ipCIDRV4) == 1 && bits <= 32) {
cidr.isIPV6 = false;
}
else if (inet_pton(AF_INET6, strPrefix.c_str(), &cidr.ipCIDRV6) == 1 && bits <= 128) {
cidr.isIPV6 = true;
}
else
{
return false;
}
cidr.cidrString = strCIDR;
return true;
}
bool cidrMatch(const std::string& sourceip, const std::string& targetCidr) {
CIDRData cidrData;
// check if target is valid input.
if (!isCIDR(targetCidr, cidrData))
{
return false;
}
return cidrMatch(sourceip, cidrData);
}
bool cidrMatch(const std::string & sourceip, const CIDRData & cidr){
struct in_addr source_inaddr;
struct in6_addr source_inaddr6;
// check from which type the target ip and check if ip belongs to is mask ip
//convert sourceip to ip v4 or v6.
if(!cidr.isIPV6 && inet_pton(AF_INET, sourceip.c_str(), &source_inaddr) == 1) {
return cidr4_match(source_inaddr, cidr.ipCIDRV4, cidr.networkBits);
}
else if (cidr.isIPV6 && inet_pton(AF_INET6, sourceip.c_str(), &source_inaddr6) == 1) {
return cidr6_match(source_inaddr6, cidr.ipCIDRV6, cidr.networkBits);
}
dbgDebug(D_WAAP) << "Source IP address does not match any of the CIDR definitions.";
return false;
}
}
}

View File

@@ -0,0 +1,39 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <errno.h>
namespace Waap {
namespace Util {
struct CIDRData {
std::string cidrString;
struct in_addr ipCIDRV4;
struct in6_addr ipCIDRV6;
uint8_t networkBits;
bool isIPV6;
bool operator==(const CIDRData &other) const;
};
bool isCIDR(const std::string& strCIDR, CIDRData& cidr);
bool cidrMatch(const std::string& sourceip, const CIDRData& cidr);
bool cidrMatch(const std::string &sourceip, const std::string &target);
}
}

View File

@@ -0,0 +1,738 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ConfidenceCalculator.h"
#include <cereal/types/unordered_set.hpp>
#include "waap.h"
#include "ConfidenceFile.h"
#include "i_agent_details.h"
#include "i_messaging.h"
#include "i_mainloop.h"
#include <math.h>
USE_DEBUG_FLAG(D_WAAP);
#define SYNC_WAIT_TIME std::chrono::seconds(300) // 5 minutes in seconds
#define SCORE_THRESHOLD 100.0
#define BUSY_WAIT_TIME std::chrono::microseconds(100000) // 0.1 seconds
#define WAIT_LIMIT 10
#define BENIGN_PARAM_FACTOR 2
double logn(double x, double n)
{
return std::log(x) / std::log(n);
}
ConfidenceCalculator::ConfidenceCalculator(size_t minSources,
size_t minIntervals,
std::chrono::minutes intervalDuration,
double ratioThreshold,
const Val &nullObj,
const std::string &backupPath,
const std::string &remotePath,
const std::string &assetId,
TuningDecision* tuning,
I_IgnoreSources* ignoreSrc) :
SerializeToLocalAndRemoteSyncBase(intervalDuration,
SYNC_WAIT_TIME,
backupPath,
(remotePath == "") ? remotePath : remotePath + "/Confidence",
assetId,
"ConfidenceCalculator"),
m_params({ minSources, minIntervals, intervalDuration, ratioThreshold, true }),
m_null_obj(nullObj),
m_time_window_logger(),
m_confident_sets(),
m_confidence_level(),
m_last_indicators_update(0),
m_ignoreSources(ignoreSrc),
m_tuning(tuning)
{
restore();
}
ConfidenceCalculator::~ConfidenceCalculator()
{
m_time_window_logger.clear();
m_confident_sets.clear();
}
void ConfidenceCalculator::hardReset()
{
m_time_window_logger.clear();
m_confidence_level.clear();
m_confident_sets.clear();
std::remove(m_filePath.c_str());
}
void ConfidenceCalculator::reset()
{
m_time_window_logger.clear();
if (!m_params.learnPermanently)
{
hardReset();
}
}
bool ConfidenceCalculator::reset(ConfidenceCalculatorParams& params)
{
if (params == m_params)
{
return false;
}
dbgInfo(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -"
" resetting the ConfidenceCalculatorParams: " << params;
m_params = params;
reset();
setInterval(m_params.intervalDuration);
return true;
}
class WindowLogPost : public RestGetFile
{
public:
WindowLogPost(ConfidenceCalculator::KeyValSourcesLogger& _window_logger)
: window_logger(_window_logger)
{
}
private:
C2S_PARAM(ConfidenceCalculator::KeyValSourcesLogger, window_logger)
};
class WindowLogGet : public RestGetFile
{
public:
WindowLogGet() : window_logger()
{
}
Maybe<ConfidenceCalculator::KeyValSourcesLogger> getWindowLogger()
{
return window_logger.get();
}
private:
S2C_PARAM(ConfidenceCalculator::KeyValSourcesLogger, window_logger)
};
bool ConfidenceCalculator::postData()
{
m_time_window_logger_backup = m_time_window_logger;
m_time_window_logger.clear();
std::string url = getPostDataUrl();
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Sending the data to: " << url;
WindowLogPost currentWindow(m_time_window_logger_backup);
return sendNoReplyObjectWithRetry(currentWindow,
I_Messaging::Method::PUT,
url);
}
void ConfidenceCalculator::pullData(const std::vector<std::string>& files)
{
if (getIntervalsCount() == m_params.minIntervals)
{
mergeProcessedFromRemote();
}
std::string url = getPostDataUrl();
std::string sentFile = url.erase(0, url.find_first_of('/') + 1);
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "pulling files, skipping: " << sentFile;
for (auto file : files)
{
if (file == sentFile)
{
continue;
}
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Pulling the file: " << file;
WindowLogGet getWindow;
sendObjectWithRetry(getWindow,
I_Messaging::Method::GET,
getUri() + "/" + file);
KeyValSourcesLogger remoteLogger = getWindow.getWindowLogger().unpack();
for (auto& log : remoteLogger)
{
std::string key = log.first;
for (auto& entry : log.second)
{
std::string value = entry.first;
for (auto& source : entry.second)
{
m_time_window_logger_backup[key][value].insert(source);
}
}
}
}
}
void ConfidenceCalculator::processData()
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " - processing the confidence data";
if (m_time_window_logger_backup.empty())
{
m_time_window_logger_backup = m_time_window_logger;
m_time_window_logger.clear();
}
calculateInterval();
}
void ConfidenceCalculator::updateState(const std::vector<std::string>& files)
{
pullProcessedData(files);
}
void ConfidenceCalculator::pullProcessedData(const std::vector<std::string>& files)
{
dbgTrace(D_WAAP) << "Fetching the confidence set object";
bool is_first_pull = true;
for (auto file : files)
{
ConfidenceFileDecryptor getConfFile;
bool res = sendObjectWithRetry(getConfFile,
I_Messaging::Method::GET,
getUri() + "/" + file);
if (res && getConfFile.getConfidenceSet().ok())
{
mergeFromRemote(getConfFile.getConfidenceSet().unpack(), is_first_pull);
is_first_pull = false;
}
if (res && getConfFile.getConfidenceLevels().ok())
{
m_confidence_level = getConfFile.getConfidenceLevels().unpackMove();
}
}
}
void ConfidenceCalculator::postProcessedData()
{
if (getIntervalsCount() < m_params.minIntervals)
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Not sending the processed data - not enough windows";
return;
}
std::string postUrl = getUri() + "/" + m_remotePath + "/processed/confidence.data";
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Posting the confidence set object to: " << postUrl;
ConfidenceFileEncryptor postConfFile(m_confident_sets, m_confidence_level);
sendNoReplyObjectWithRetry(postConfFile,
I_Messaging::Method::PUT,
postUrl);
}
void ConfidenceCalculator::serialize(std::ostream& stream)
{
cereal::JSONOutputArchive archive(stream);
archive(
cereal::make_nvp("version", 3),
cereal::make_nvp("params", m_params),
cereal::make_nvp("last_indicators_update", m_last_indicators_update),
cereal::make_nvp("confidence_levels", m_confidence_level),
cereal::make_nvp("confident_sets", m_confident_sets)
);
}
void ConfidenceCalculator::deserialize(std::istream& stream)
{
size_t version;
cereal::JSONInputArchive archive(stream);
try
{
archive(cereal::make_nvp("version", version));
}
catch (std::runtime_error & e) {
archive.setNextName(nullptr);
version = 0;
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" failed to load the file version: " << e.what();
}
switch (version)
{
case 3:
loadVer3(archive);
break;
case 2:
loadVer2(archive);
break;
case 1:
loadVer1(archive);
break;
case 0:
loadVer0(archive);
break;
default:
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" failed to deserialize, unknown version: " << version;
break;
}
}
void ConfidenceCalculator::loadVer0(cereal::JSONInputArchive& archive)
{
if (!tryParseVersionBasedOnNames(
archive,
std::string("params"),
std::string("last_indicators_update"),
std::string("windows_summary_list"),
std::string("confident_sets")
))
{
tryParseVersionBasedOnNames(
archive,
std::string("m_params"),
std::string("m_lastIndicatorsUpdate"),
std::string("m_windows_summary_list"),
std::string("m_confident_sets")
);
}
}
void ConfidenceCalculator::convertWindowSummaryToConfidenceLevel(const WindowsConfidentValuesList& windows)
{
for (const auto& windowKey : windows)
{
for (const auto& window : windowKey.second)
{
for (const auto& value : window)
{
m_confidence_level[windowKey.first][value] += std::ceil(SCORE_THRESHOLD / m_params.minIntervals);
}
}
}
}
void ConfidenceCalculator::loadVer2(cereal::JSONInputArchive& archive)
{
ConfidenceCalculatorParams params;
ConfidenceSet confidenceSets;
ConfidenceLevels confidenceLevels;
archive(
cereal::make_nvp("params", params),
cereal::make_nvp("last_indicators_update", m_last_indicators_update),
cereal::make_nvp("confidence_levels", confidenceLevels),
cereal::make_nvp("confident_sets", confidenceSets)
);
reset(params);
for (auto& confidentSet : confidenceSets)
{
m_confident_sets[normalize_param(confidentSet.first)] = confidentSet.second;
}
for (auto& confidenceLevel : confidenceLevels)
{
std::string normParam = normalize_param(confidenceLevel.first);
if (m_confidence_level.find(normParam) == m_confidence_level.end())
{
m_confidence_level[normParam] = confidenceLevel.second;
}
else
{
for (auto& valueLevelItr : confidenceLevel.second)
{
if (m_confidence_level[normParam].find(valueLevelItr.first) == m_confidence_level[normParam].end())
{
m_confidence_level[normParam][valueLevelItr.first] = valueLevelItr.second;
}
else
{
double maxScore = std::max(m_confidence_level[normParam][valueLevelItr.first],
valueLevelItr.second);
m_confidence_level[normParam][valueLevelItr.first] = maxScore;
}
}
}
}
}
void ConfidenceCalculator::loadVer3(cereal::JSONInputArchive& archive)
{
ConfidenceCalculatorParams params;
archive(
cereal::make_nvp("params", params),
cereal::make_nvp("last_indicators_update", m_last_indicators_update),
cereal::make_nvp("confidence_levels", m_confidence_level),
cereal::make_nvp("confident_sets", m_confident_sets)
);
reset(params);
}
void ConfidenceCalculator::loadVer1(cereal::JSONInputArchive& archive)
{
WindowsConfidentValuesList windows_summary_list;
ConfidenceCalculatorParams params;
archive(
cereal::make_nvp("params", params),
cereal::make_nvp("last_indicators_update", m_last_indicators_update),
cereal::make_nvp("windows_summary_list", windows_summary_list),
cereal::make_nvp("confident_sets", m_confident_sets)
);
reset(params);
convertWindowSummaryToConfidenceLevel(windows_summary_list);
}
bool ConfidenceCalculator::tryParseVersionBasedOnNames(
cereal::JSONInputArchive& archive,
const std::string &params_field_name,
const std::string &indicators_update_field_name,
const std::string &windows_summary_field_name,
const std::string &confident_sets_field_name)
{
bool result = true;
try
{
ConfidenceCalculatorParams temp_params;
archive(cereal::make_nvp(params_field_name, temp_params));
reset(temp_params);
m_params = temp_params;
}
catch (std::runtime_error & e) {
archive.setNextName(nullptr);
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" failed to load configuration of WAAP parameters from the learned data file: "
<< e.what();
result = false;
}
try
{
size_t temp_last_indicator_update = 0;
archive(cereal::make_nvp(indicators_update_field_name, temp_last_indicator_update));
m_last_indicators_update = temp_last_indicator_update;
}
catch (std::runtime_error & e) {
archive.setNextName(nullptr);
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" failed to load the update date for indicators from the learned data file: "
<< e.what();
result = false;
}
try
{
WindowsConfidentValuesList temp_windows_summary_list;
archive(cereal::make_nvp(windows_summary_field_name, temp_windows_summary_list));
convertWindowSummaryToConfidenceLevel(temp_windows_summary_list);
}
catch (std::runtime_error & e) {
archive.setNextName(nullptr);
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" failed to load windows summary list from the learned data file: " << e.what();
result = false;
}
try
{
std::unordered_map<Key, ValuesSet> temp_confident_sets;
archive(cereal::make_nvp(confident_sets_field_name, temp_confident_sets));
size_t current_time = std::chrono::duration_cast<std::chrono::seconds>(
Singleton::Consume<I_TimeGet>::by<WaapComponent>()->getWalltime()).count();
for (auto setItr : temp_confident_sets)
{
m_confident_sets[setItr.first] = std::pair<ValuesSet, size_t>(setItr.second, current_time);
}
}
catch (std::runtime_error & e) {
archive.setNextName(nullptr);
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" failed to load confident sets from the learned data file: " << e.what();
result = false;
}
return result;
}
void ConfidenceCalculator::mergeConfidenceSets(
ConfidenceSet& confidence_set,
const ConfidenceSet& confidence_set_to_merge,
size_t& last_indicators_update
)
{
for (auto& set : confidence_set_to_merge)
{
size_t num_of_values = confidence_set[set.first].first.size();
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Merging the set for the key: " << set.first <<
". Number of present values: " << num_of_values;
for (auto& value : set.second.first)
{
confidence_set[normalize_param(set.first)].first.insert(value);
}
confidence_set[set.first].second = std::max<size_t>(confidence_set[set.first].second, set.second.second);
last_indicators_update = std::max<size_t>(last_indicators_update, confidence_set[set.first].second);
}
};
void ConfidenceCalculator::mergeFromRemote(const ConfidenceSet& remote_confidence_set, bool is_first_pull)
{
if (is_first_pull) {
m_confident_sets.clear();
}
mergeConfidenceSets(m_confident_sets, remote_confidence_set, m_last_indicators_update);
}
bool ConfidenceCalculator::is_confident(const Key &key, const Val &value) const
{
auto confidentSetItr = m_confident_sets.find(key);
if (confidentSetItr == m_confident_sets.end())
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" failed to find the key(" << key << ")";
return false;
}
const ValuesSet& confidentValues = confidentSetItr->second.first;
if (confidentValues.find(value) != confidentValues.end())
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" confident that " << value << " should be filtered for " << key;
return true;
}
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" failed to find the value(" << value << ")";
return false;
}
void ConfidenceCalculator::calcConfidentValues()
{
std::unordered_map<Key, ValueSetWithTime> confidenceSetCopy = m_confident_sets;
if (!m_params.learnPermanently)
{
m_confident_sets.clear();
}
for (auto& confidenceLevels : m_confidence_level)
{
Key key = confidenceLevels.first;
for (auto& valConfidenceLevel : confidenceLevels.second)
{
Val value = valConfidenceLevel.first;
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "key: " << key << ", value: " << value
<< ", confidence level: " << valConfidenceLevel.second;
if (valConfidenceLevel.second >= SCORE_THRESHOLD)
{
size_t confidenceValuesSize = m_confident_sets[key].first.size();
m_confident_sets[key].first.insert(value);
if (m_confident_sets[key].first.size() > confidenceValuesSize)
{
std::chrono::seconds current_time = std::chrono::duration_cast<std::chrono::seconds>(
Singleton::Consume<I_TimeGet>::by<WaapComponent>()->getWalltime());
m_confident_sets[key].second = current_time.count();
m_last_indicators_update = std::chrono::duration_cast<std::chrono::minutes>(current_time).count();
}
}
}
}
}
ConfidenceCalculator::ValuesSet ConfidenceCalculator::getConfidenceValues(const Key &key) const
{
auto confidentSetItr = m_confident_sets.find(key);
if (confidentSetItr == m_confident_sets.end())
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << ";" <<
" failed to find the key(" << key << ")";
return ValuesSet();
}
return confidentSetItr->second.first;
}
size_t ConfidenceCalculator::getLastConfidenceUpdate()
{
return m_last_indicators_update;
}
void ConfidenceCalculator::log(const Key &key, const Val &value, const std::string &source)
{
m_time_window_logger[key][value].insert(source);
if (value != m_null_obj)
{
logSourceHit(key, source);
}
}
void ConfidenceCalculator::logSourceHit(const Key &key, const std::string &source)
{
log(key, m_null_obj, source);
}
void ConfidenceCalculator::mergeSourcesCounter(const Key& key, const SourcesCounters& counters)
{
if (key.rfind("url#", 0) == 0 && m_owner == "TypeIndicatorFilter")
{
return;
}
SourcesCounters& currentCounters = m_time_window_logger[key];
for (auto& counter : counters)
{
SourcesSet& srcSet = currentCounters[counter.first];
srcSet.insert(counter.second.begin(), counter.second.end());
}
}
void ConfidenceCalculator::removeBadSources(SourcesSet& sources, const std::vector<std::string>* badSources)
{
if (badSources == nullptr)
{
return;
}
for (auto badSource : *badSources)
{
sources.erase(badSource);
}
}
size_t ConfidenceCalculator::sumSourcesWeight(const SourcesSet& sources)
{
size_t sourcesWeights = sources.size();
if (m_tuning == nullptr)
{
return sourcesWeights;
}
for (const auto& source : sources)
{
if (m_tuning->getDecision(source, SOURCE) == BENIGN)
{
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "increasing source weight";
sourcesWeights += m_params.minSources - 1;
}
}
return sourcesWeights;
}
void ConfidenceCalculator::calculateInterval()
{
std::vector<std::string>* sourcesToIgnore = nullptr;
if (m_ignoreSources != nullptr)
{
int waitItr = 0;
while (!m_ignoreSources->ready() && waitItr < WAIT_LIMIT)
{
Singleton::Consume<I_MainLoop>::by<WaapComponent>()->yield(BUSY_WAIT_TIME);
waitItr++;
}
if (waitItr == WAIT_LIMIT && !m_ignoreSources->ready())
{
dbgWarning(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner <<
" - wait for ignore sources ready timeout";
}
sourcesToIgnore = m_ignoreSources->getSourcesToIgnore();
}
for (auto sourcesCtrItr : m_time_window_logger_backup)
{
SourcesCounters& srcCtrs = sourcesCtrItr.second;
Key key = sourcesCtrItr.first;
ValuesSet summary;
double factor = 1.0;
if (m_tuning != nullptr)
{
std::string param_name = key.substr(key.find("#") + 1); // not always accurate but good enough
if (m_tuning->getDecision(param_name, PARAM_NAME) == BENIGN)
{
factor = BENIGN_PARAM_FACTOR;
}
}
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" calculate window summary for the parameter: " << key;
// get all unique sources from the null object counter
SourcesSet& sourcesUnion = srcCtrs[m_null_obj];
removeBadSources(sourcesUnion, sourcesToIgnore);
size_t numOfSources = sumSourcesWeight(sourcesUnion);
m_windows_counter[key]++;
if (numOfSources < m_params.minSources)
{
// not enough sources to learn from
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " -" <<
" not enough sources to learn for " << key << " - needed: " <<
m_params.minSources << ", have: " << sourcesUnion.size();
mergeSourcesCounter(key, srcCtrs);
continue;
}
for (auto srcSets : srcCtrs)
{
// log the ratio of unique sources from all sources for each value
SourcesSet& currentSourcesSet = srcSets.second;
Val value = srcSets.first;
if (value == m_null_obj)
{
continue;
}
removeBadSources(currentSourcesSet, sourcesToIgnore);
size_t currentSourcesCount = sumSourcesWeight(currentSourcesSet);
auto& confidenceLevel = m_confidence_level[key][value];
if (currentSourcesCount == 0)
{
confidenceLevel -= std::ceil(SCORE_THRESHOLD / m_params.minIntervals);
continue;
}
double ratio = ((double)currentSourcesCount / numOfSources);
double diff = std::ceil(SCORE_THRESHOLD / m_params.minIntervals) * (ratio / m_params.ratioThreshold) *
logn(currentSourcesCount, m_params.minSources) * factor;
confidenceLevel += diff;
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Owner: " << m_owner << " - key: " << key <<
" value: " << value << "confidence level: " << confidenceLevel;
}
m_windows_counter[key] = 0;
}
for (auto& keyMap : m_confidence_level)
{
for (auto& valMap : keyMap.second)
{
if (m_time_window_logger_backup.find(keyMap.first) != m_time_window_logger_backup.end() &&
m_time_window_logger_backup[keyMap.first].find(valMap.first) ==
m_time_window_logger_backup[keyMap.first].end())
{
// reduce confidence when value do not appear
valMap.second *= m_params.ratioThreshold;
}
}
}
m_time_window_logger_backup.clear();
calcConfidentValues();
}
void ConfidenceCalculator::setOwner(const std::string& owner)
{
m_owner = owner + "/ConfidenceCalculator";
}
bool ConfidenceCalculatorParams::operator==(const ConfidenceCalculatorParams& other)
{
return (minSources == other.minSources &&
minIntervals == other.minIntervals &&
intervalDuration == other.intervalDuration &&
ratioThreshold == other.ratioThreshold &&
learnPermanently == other.learnPermanently);
}
std::ostream& operator<<(std::ostream& os, const ConfidenceCalculatorParams& ccp)
{
os << "min sources: " << ccp.minSources <<
" min intervals: " << ccp.minIntervals <<
" interval duration(minutes): " << ccp.intervalDuration.count() <<
" ratio threshold: " << ccp.ratioThreshold <<
" should keep indicators permanently: " << ccp.learnPermanently;
return os;
}

View File

@@ -0,0 +1,155 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <chrono>
#include <unordered_map>
#include <unordered_set>
#include <list>
#include <algorithm>
#include "i_serialize.h"
#include <cereal/archives/json.hpp>
#include <cereal/types/unordered_map.hpp>
#include <cereal/types/list.hpp>
#include <cereal/types/utility.hpp>
#include "debug.h"
#include "Waf2Util.h"
#include <ostream>
#include "i_ignoreSources.h"
#include "TuningDecisions.h"
USE_DEBUG_FLAG(D_WAAP_CONFIDENCE_CALCULATOR);
class WaapComponent;
struct ConfidenceCalculatorParams
{
size_t minSources;
size_t minIntervals;
std::chrono::minutes intervalDuration;
double ratioThreshold;
bool learnPermanently;
template <class Archive>
void serialize(Archive& ar)
{
size_t duration = intervalDuration.count();
ar(cereal::make_nvp("minSources", minSources),
cereal::make_nvp("minIntervals", minIntervals),
cereal::make_nvp("intervalDuration", duration),
cereal::make_nvp("ratioThreshold", ratioThreshold),
cereal::make_nvp("learnPermanently", learnPermanently));
intervalDuration = std::chrono::minutes(duration);
}
bool operator==(const ConfidenceCalculatorParams& other);
friend std::ostream& operator<<(std::ostream& os, const ConfidenceCalculatorParams& ccp);
};
class ConfidenceCalculator : public SerializeToLocalAndRemoteSyncBase
{
public:
typedef std::string Key;
typedef std::string Val;
template<typename K, typename V>
using UMap = std::unordered_map<K, V>;
// key -> val -> sources set
typedef std::unordered_set<std::string> SourcesSet;
typedef UMap<Val, SourcesSet> SourcesCounters;
typedef UMap<Key, SourcesCounters> KeyValSourcesLogger;
// key -> list of values sets
typedef std::set<Val> ValuesSet;
typedef std::pair<ValuesSet, size_t> ValueSetWithTime;
typedef std::list<ValuesSet> ValuesList;
typedef UMap<Key, ValuesList> WindowsConfidentValuesList;
typedef UMap<Key, UMap<Val, double>> ConfidenceLevels;
typedef UMap<Key, int> WindowsCounter;
typedef UMap<Key, ValueSetWithTime> ConfidenceSet;
ConfidenceCalculator(size_t minSources,
size_t minIntervals,
std::chrono::minutes intervalDuration,
double ratioThreshold,
const Val &nullObj,
const std::string& backupPath,
const std::string& remotePath,
const std::string& assetId,
TuningDecision* tuning = nullptr,
I_IgnoreSources* ignoreSrc = nullptr);
~ConfidenceCalculator();
void setOwner(const std::string& owner);
void hardReset();
void reset();
bool reset(ConfidenceCalculatorParams& params);
virtual bool postData();
virtual void pullData(const std::vector<std::string>& files);
virtual void processData();
virtual void postProcessedData();
virtual void pullProcessedData(const std::vector<std::string>& files);
virtual void updateState(const std::vector<std::string>& files);
virtual void serialize(std::ostream& stream);
virtual void deserialize(std::istream& stream);
void mergeFromRemote(const ConfidenceSet& remote_confidence_set, bool is_first_pull);
bool is_confident(const Key &key, const Val &value) const;
void calcConfidentValues();
ValuesSet getConfidenceValues(const Key &key) const;
size_t getLastConfidenceUpdate();
void log(const Key &key, const Val &value, const std::string &source);
void logSourceHit(const Key &key, const std::string &source);
void calculateInterval();
static void mergeConfidenceSets(ConfidenceSet& confidence_set,
const ConfidenceSet& confidence_set_to_merge,
size_t& last_indicators_update);
private:
void loadVer0(cereal::JSONInputArchive& archive);
void loadVer1(cereal::JSONInputArchive& archive);
void loadVer2(cereal::JSONInputArchive& archive);
void loadVer3(cereal::JSONInputArchive& archive);
bool tryParseVersionBasedOnNames(
cereal::JSONInputArchive& archive,
const std::string &params_field_name,
const std::string &indicators_update_field_name,
const std::string &windows_summary_field_name,
const std::string &confident_sets_field_name);
void convertWindowSummaryToConfidenceLevel(const WindowsConfidentValuesList& windows);
std::string getParamName(const Key& key);
size_t sumSourcesWeight(const SourcesSet& sources);
void mergeSourcesCounter(const Key& key, const SourcesCounters& counters);
void removeBadSources(SourcesSet& sources, const std::vector<std::string>* badSources);
ConfidenceCalculatorParams m_params;
Val m_null_obj;
KeyValSourcesLogger m_time_window_logger;
KeyValSourcesLogger m_time_window_logger_backup;
ConfidenceSet m_confident_sets;
ConfidenceLevels m_confidence_level;
WindowsCounter m_windows_counter;
size_t m_last_indicators_update;
I_IgnoreSources* m_ignoreSources;
TuningDecision* m_tuning;
};

View File

@@ -0,0 +1,37 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ConfidenceFile.h"
ConfidenceFileDecryptor::ConfidenceFileDecryptor()
{
}
Maybe<ConfidenceCalculator::ConfidenceSet> ConfidenceFileDecryptor::getConfidenceSet() const
{
if (!confidence_set.get().empty()) return confidence_set.get();
return genError("failed to get file");
}
Maybe<ConfidenceCalculator::ConfidenceLevels> ConfidenceFileDecryptor::getConfidenceLevels() const
{
if (!confidence_levels.get().empty()) return confidence_levels.get();
return genError("failed to get confidence levels");
}
ConfidenceFileEncryptor::ConfidenceFileEncryptor(const ConfidenceCalculator::ConfidenceSet& _confidence_set,
const ConfidenceCalculator::ConfidenceLevels& _confidence_levels) :
confidence_set(_confidence_set), confidence_levels(_confidence_levels)
{
}

View File

@@ -0,0 +1,41 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "ConfidenceCalculator.h"
class ConfidenceFileDecryptor : public RestGetFile
{
public:
ConfidenceFileDecryptor();
Maybe<ConfidenceCalculator::ConfidenceSet>
getConfidenceSet() const;
Maybe<ConfidenceCalculator::ConfidenceLevels>
getConfidenceLevels() const;
private:
S2C_PARAM(ConfidenceCalculator::ConfidenceSet, confidence_set);
S2C_OPTIONAL_PARAM(ConfidenceCalculator::ConfidenceLevels, confidence_levels);
};
class ConfidenceFileEncryptor : public RestGetFile
{
public:
ConfidenceFileEncryptor(const ConfidenceCalculator::ConfidenceSet& _confidence_set,
const ConfidenceCalculator::ConfidenceLevels& _confidence_levels);
private:
C2S_PARAM(ConfidenceCalculator::ConfidenceSet, confidence_set);
C2S_PARAM(ConfidenceCalculator::ConfidenceLevels, confidence_levels);
};

View File

@@ -0,0 +1,75 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ContentTypeParser.h"
#include "Waf2Util.h"
USE_DEBUG_FLAG(D_WAAP_PARSER_CONTENT_TYPE);
const std::string ContentTypeParser::m_parserName = "contentTypeParser";
int ContentTypeParser::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int flags)
{
dbgTrace(D_WAAP_PARSER_CONTENT_TYPE) << "ContentTypeParser::onKv(): " << std::string(v, v_len);
assert((flags & BUFFERED_RECEIVER_F_BOTH) == BUFFERED_RECEIVER_F_BOTH);
if (ctParserState == CTP_STATE_CONTENT_TYPE) {
contentTypeDetected = std::string(k, k_len);
dbgTrace(D_WAAP_PARSER_CONTENT_TYPE) << "ContentTypeParser::onKv(): contentTypeDetected: '" <<
contentTypeDetected << "'";
ctParserState = CTP_STATE_CONTENT_TYPE_PARAMS;
} else if (ctParserState == CTP_STATE_CONTENT_TYPE_PARAMS) {
if (my_strincmp(k, "boundary", k_len)) {
boundaryFound = std::string(v, v_len);
}
} else {
// This should never occur
m_error = true;
dbgWarning(D_WAAP_PARSER_CONTENT_TYPE) << "ContentTypeParser::onKv(): '" << std::string(v, v_len) <<
"': BUG: Unknown content type found: " << ctParserState;
}
return 0; // ok
}
ContentTypeParser::ContentTypeParser()
:ctParserState(CTP_STATE_CONTENT_TYPE), m_rcvr(*this), m_hvp(m_rcvr), m_error(false)
{
}
size_t ContentTypeParser::push(const char *data, size_t data_len)
{
dbgTrace(D_WAAP_PARSER_CONTENT_TYPE) << "ContentTypeParser::push(): processing content type";
// Initialize state
ctParserState = CTP_STATE_CONTENT_TYPE;
contentTypeDetected = "";
boundaryFound = "";
// Execute parsing
return m_hvp.push(data, data_len);
}
void ContentTypeParser::finish()
{
return m_hvp.finish();
}
const std::string &
ContentTypeParser::name() const
{
return m_parserName;
}
bool ContentTypeParser::error() const
{
return m_error;
}

View File

@@ -0,0 +1,49 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __CONTENT_TYPE_PARSER_H__aa67ad9a
#define __CONTENT_TYPE_PARSER_H__aa67ad9a
#include "ParserBase.h"
#include "ParserHdrValue.h"
#include "debug.h"
#include <string>
class ContentTypeParser : public ParserBase, private IParserReceiver {
enum CtParserState {
CTP_STATE_CONTENT_TYPE,
CTP_STATE_CONTENT_TYPE_PARAMS
} ctParserState;
private:
virtual int onKv(const char *k, size_t k_len, const char *v, size_t v_len, int flags);
public:
ContentTypeParser();
virtual size_t push(const char *data, size_t data_len);
virtual void finish();
virtual const std::string &name() const;
bool error() const;
virtual size_t depth() { return 1; }
// After call to execute(), parsing results can be picked up from these variables
std::string contentTypeDetected;
std::string boundaryFound;
private:
BufferedReceiver m_rcvr;
ParserHdrValue m_hvp;
bool m_error;
static const std::string m_parserName;
};
#endif // __CONTENT_TYPE_PARSER__aa67ad9a

View File

@@ -0,0 +1,111 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "Csrf.h"
#include <algorithm>
#include <boost/uuid/uuid.hpp> // uuid class
#include <boost/uuid/uuid_generators.hpp> // uuid generators
#include <boost/uuid/uuid_io.hpp>
namespace Waap {
namespace CSRF {
State::State()
:
csrf_token(),
csrf_header_token(),
csrf_form_token()
{
}
bool
State::decide
(const std::string &method, WaapDecision &decision, const std::shared_ptr<Waap::Csrf::Policy>& csrfPolicy) const
{
dbgTrace(D_WAAP) << "Waap::CSRF::State::decide(): Start.";
std::string low_method = method;
std::transform(low_method.begin(), low_method.end(), low_method.begin(), ::tolower);
if (low_method.compare("get") == 0)
{
dbgTrace(D_WAAP) << "Waap::CSRF::State::decide(): Should not block. Method : " << low_method;
return false;
}
auto csrfDecision = decision.getDecision(CSRF_DECISION);
if (csrf_token.empty())
{
dbgTrace(D_WAAP) << "Waap::CSRF::State::decide(): missing token.";
csrfDecision->setLog(true);
if(!csrfPolicy->enforce) {
return false;
}
csrfDecision->setBlock(true);
return true;
}
dbgTrace(D_WAAP) << "Waap::CSRF::State::decide(): CSRF compare: csrf_token: " << csrf_token
<< " csrf_header_token: " << csrf_header_token << " csrf_form_token: " << csrf_form_token;
bool result = (csrf_token == csrf_header_token ||
csrf_token == csrf_form_token);
dbgTrace(D_WAAP) << "Waap::CSRF::State::decide(): CSRF result : " << result;
if(!result)
{
dbgTrace(D_WAAP) << "Waap::CSRF::State::decide(): invalid token.";
csrfDecision->setLog(true);
if(!csrfPolicy->enforce) {
return false;
}
csrfDecision->setBlock(true);
return true;
}
return false;
}
void State::injectCookieHeader(std::string& injectStr) const
{
// creating CSRF token
boost::uuids::random_generator csrfTokenRand;
boost::uuids::uuid csrfToken = csrfTokenRand();
injectStr = "x-chkp-csrf-token=" + boost::uuids::to_string(csrfToken) + "; Path=/; SameSite=Lax";
dbgTrace(D_WAAP) << "Waap::CSRF::State::injectCookieHeader(): CSRF Token was created:" <<
boost::uuids::to_string(csrfToken);
}
void
State::set_CsrfToken(const char* v, size_t v_len)
{
csrf_token = std::string(v, v_len);
dbgTrace(D_WAAP) << "Waap::CSRF::State::set_CsrfToken(): set csrf_token : " << csrf_token;
}
void
State::set_CsrfHeaderToken(const char* v, size_t v_len)
{
csrf_header_token = std::string(v, v_len);
dbgTrace(D_WAAP) << "Waap::CSRF::State::set_CsrfHeaderToken(): set csrf_token : " << csrf_header_token;
}
void
State::set_CsrfFormToken(const char* v, size_t v_len)
{
csrf_form_token = std::string(v, v_len);
dbgTrace(D_WAAP) << "Waap::CSRF::State::set_CsrfFormToken(): set csrf_form_token : " << csrf_form_token;
}
}
}

View File

@@ -0,0 +1,47 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "debug.h"
#include "WaapDecision.h"
#include "i_waapConfig.h"
#include <iostream>
#include <string>
USE_DEBUG_FLAG(D_WAAP);
namespace Waap {
namespace CSRF {
class State
{
public:
State();
bool decide(
const std::string &method,
WaapDecision &decision,
const std::shared_ptr<Waap::Csrf::Policy>& csrfPolicy) const;
void injectCookieHeader(std::string& injectStr) const;
void set_CsrfToken(const char* v, size_t v_len);
void set_CsrfHeaderToken(const char* v, size_t v_len);
void set_CsrfFormToken(const char* v, size_t v_len);
private:
std::string csrf_token;
std::string csrf_header_token;
std::string csrf_form_token;
};
}
}

View File

@@ -0,0 +1,22 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "CsrfDecision.h"
CsrfDecision::CsrfDecision(DecisionType type): SingleDecision(type)
{}
std::string CsrfDecision::getTypeStr() const
{
return "CSRF Protection";
}

View File

@@ -0,0 +1,27 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __CSRF_DECISION_H__
#define __CSRF_DECISION_H__
#include "SingleDecision.h"
#include "DecisionType.h"
#include <string>
class CsrfDecision: public SingleDecision
{
public:
explicit CsrfDecision(DecisionType type);
std::string getTypeStr() const override;
};
#endif

View File

@@ -0,0 +1,35 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "CsrfPolicy.h"
#include "Waf2Util.h"
namespace Waap {
namespace Csrf {
Policy::Policy()
:
enable(false),
enforce(false)
{
}
bool
Policy::operator==(const Policy &other) const
{
return enable == other.enable &&
enforce == other.enforce;
}
}
}

View File

@@ -0,0 +1,52 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cereal/types/string.hpp>
#include <string>
#include <memory>
#include <boost/algorithm/string/case_conv.hpp>
#include "debug.h"
namespace Waap {
namespace Csrf {
struct Policy {
Policy();
template <typename _A>
Policy(_A &ar)
:
enable(false),
enforce(false)
{
std::string level;
ar(cereal::make_nvp("csrfProtection", level));
level = boost::algorithm::to_lower_copy(level);
if (level == "detect") {
enable = true;
}
else if (level == "prevent") {
enable = true;
enforce = true;
}
}
bool operator==(const Policy &other) const;
bool enable;
bool enforce;
};
}
}

View File

@@ -0,0 +1,73 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "D2Main.h"
#include "FpMitigation.h"
#include "BehaviorAnalysis.h"
#include "WaapDefines.h"
D2Main::D2Main(const std::string& assetId) :
m_assetId(assetId),
m_fpMitigation(std::make_unique<FpMitigationScore>(BACKUP_DIRECTORY_PATH + assetId + std::string("/3.data")))
{
}
D2Main::~D2Main()
{
m_fpMitigation.reset();
}
D2OutputData D2Main::analyzeData(const D2InputData& inputData)
{
D2OutputData d2Output;
BehaviorAnalysisInputData behaviorInput;
PolicyCounterType fpType = UNKNOWN_TYPE;
std::string userAgentSource = inputData.userAgent + inputData.sourceIdentifier;
if (!inputData.keywordMatches.empty())
{
d2Output.fpMitigationScore = m_fpMitigation->calculateFpMitigationScore(inputData.uri, inputData.param);
}
behaviorInput.fp_mitigation_score = d2Output.fpMitigationScore;
behaviorInput.keyword_matches = inputData.keywordMatches;
behaviorInput.score = inputData.score;
behaviorInput.site_id = inputData.siteId;
behaviorInput.short_uri = inputData.uri;
behaviorInput.uri = inputData.uri;
behaviorInput.source_identifier = inputData.sourceIdentifier;
behaviorInput.user_agent = inputData.userAgent;
behaviorInput.location = inputData.location;
ReputationData reputationInfo = m_BehaviorAnalyzer.analyze_behavior(behaviorInput);
d2Output.relativeReputation = reputationInfo.relativeReputation;
d2Output.absoluteReputation = reputationInfo.absoluteReputation;
d2Output.reputationMean = m_BehaviorAnalyzer.getReputationMean();
d2Output.variance = m_BehaviorAnalyzer.getVariance();
if (!inputData.keywordMatches.empty())
{
fpType = m_fpMitigation->IdentifyFalseTruePositive(reputationInfo.relativeReputation, inputData.uri,
inputData.param, userAgentSource);
m_fpMitigation->learnFalsePositive(inputData.keywordMatches, fpType, inputData.uri, inputData.param);
d2Output.finalScore = inputData.score * (10 - reputationInfo.relativeReputation * 0.8) /
10 * d2Output.fpMitigationScore / 10;
d2Output.finalScore = std::min(d2Output.finalScore * 2, 10.0);
}
d2Output.fpClassification = fpType;
return d2Output;
}

View File

@@ -0,0 +1,64 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include <functional>
#include "FpMitigation.h"
#include "BehaviorAnalysis.h"
struct D2InputData {
std::string siteId;
std::string sourceIdentifier;
std::string userAgent;
std::string uri;
std::string param;
std::vector<std::string> keywordMatches;
double score;
std::string location;
};
struct D2OutputData {
double finalScore;
double absoluteReputation;
double relativeReputation;
double fpMitigationScore;
PolicyCounterType fpClassification;
double reputationMean;
double variance;
D2OutputData() : finalScore(0.0),
absoluteReputation(0.0),
relativeReputation(0.0),
fpMitigationScore(0.0),
fpClassification(UNKNOWN_TYPE),
reputationMean(0.0),
variance(0.0)
{
}
};
class D2Main {
public:
D2Main(const std::string& assetId);
virtual ~D2Main();
virtual D2OutputData analyzeData(const D2InputData& inputData);
private:
std::string m_assetId;
std::unique_ptr<FpMitigationScore> m_fpMitigation;
BehaviorAnalyzer m_BehaviorAnalyzer;
};
bool operator==(const D2OutputData& lhs, const D2OutputData& rhs);

View File

@@ -0,0 +1,28 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __DATA_TYPES_H__
#define __DATA_TYPES_H__
enum DataType {
EMPTY,
NUMBER,
BOOLEAN,
STRING,
MAP,
ARRAY,
COMPLEX,
ALWAYSVALID
};
#endif // __DATA_TYPES_H__

View File

@@ -0,0 +1,147 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "DecisionFactory.h"
#include "debug.h"
#include "AutonomousSecurityDecision.h"
#include "CsrfDecision.h"
#include "OpenRedirectDecision.h"
#include "ErrorDisclosureDecision.h"
#include "ErrorLimitingDecision.h"
#include "RateLimitingDecision.h"
#include "UserLimitsDecision.h"
USE_DEBUG_FLAG(D_WAAP);
DecisionFactory::DecisionFactory()
{
for (size_t i = 0; i < getDecisions().size(); i++)
{
initDecision(static_cast<DecisionType>(i));
}
}
void DecisionFactory::initDecision(DecisionType type)
{
switch (type)
{
case AUTONOMOUS_SECURITY_DECISION:
{
initAutonomousSecurityDecision();
break;
}
case CSRF_DECISION:
{
initCsrfDecision();
break;
}
case OPEN_REDIRECT_DECISION:
{
initOpenRedirectDecision();
break;
}
case ERROR_DISCLOSURE_DECISION:
{
initErrorDisclosureDecision();
break;
}
case ERROR_LIMITING_DECISION:
{
initErrorLimitingDecision();
break;
}
case RATE_LIMITING_DECISION:
{
initRateLimitingDecision();
break;
}
case USER_LIMITS_DECISION:
{
initUserLimitsDecision();
break;
}
default:
static_assert(true, "Illegal DecisionType ENUM value");
dbgError(D_WAAP) << "Illegal DecisionType ENUM value " << type;
break;
}
}
void DecisionFactory::initAutonomousSecurityDecision()
{
DecisionType type = DecisionType::AUTONOMOUS_SECURITY_DECISION;
if (!m_decisions[type])
{
m_decisions[type] = std::make_shared<AutonomousSecurityDecision>(type);
}
}
void DecisionFactory::initCsrfDecision()
{
DecisionType type = DecisionType::CSRF_DECISION;
if (!m_decisions[type])
{
m_decisions[type] = std::make_shared<CsrfDecision>(type);
}
}
void DecisionFactory::initOpenRedirectDecision()
{
DecisionType type = DecisionType::OPEN_REDIRECT_DECISION;
if (!m_decisions[type])
{
m_decisions[type] = std::make_shared<OpenRedirectDecision>(type);
}
}
void DecisionFactory::initErrorDisclosureDecision()
{
DecisionType type = DecisionType::ERROR_DISCLOSURE_DECISION;
if (!m_decisions[type])
{
m_decisions[type] = std::make_shared<ErrorDisclosureDecision>(type);
}
}
void DecisionFactory::initErrorLimitingDecision()
{
DecisionType type = DecisionType::ERROR_LIMITING_DECISION;
if (!m_decisions[type])
{
m_decisions[type] = std::make_shared<ErrorLimitingDecision>(type);
}
}
void DecisionFactory::initRateLimitingDecision()
{
DecisionType type = DecisionType::RATE_LIMITING_DECISION;
if (!m_decisions[type])
{
m_decisions[type] = std::make_shared<RateLimitingDecision>(type);
}
}
void DecisionFactory::initUserLimitsDecision()
{
DecisionType type = DecisionType::USER_LIMITS_DECISION;
if (!m_decisions[type])
{
m_decisions[type] = std::make_shared<UserLimitsDecision>(type);
}
}
std::shared_ptr<SingleDecision>
DecisionFactory::getDecision(DecisionType type) const
{
return (type < NO_WAAP_DECISION) ? m_decisions[type] : nullptr;
}

View File

@@ -0,0 +1,45 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __DECISION_FACTORY_H__
#define __DECISION_FACTORY_H__
#include "DecisionType.h"
#include "SingleDecision.h"
#include <array>
#include <memory>
typedef std::array<std::shared_ptr<SingleDecision>, NO_WAAP_DECISION> DecisionsArr;
class DecisionFactory
{
public:
DecisionFactory();
std::shared_ptr<SingleDecision> getDecision(DecisionType type) const;
const DecisionsArr& getDecisions() const
{
return m_decisions;
}
private:
void initDecision(DecisionType type);
void initAutonomousSecurityDecision();
void initCsrfDecision();
void initOpenRedirectDecision();
void initErrorDisclosureDecision();
void initErrorLimitingDecision();
void initRateLimitingDecision();
void initUserLimitsDecision();
DecisionsArr m_decisions;
};
#endif

View File

@@ -0,0 +1,31 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __DECISION_TYPE_H__
#define __DECISION_TYPE_H__
enum DecisionType
{
// This order determines the priority of the decisions sent to management
// Priority goes from top to bottom
AUTONOMOUS_SECURITY_DECISION,
CSRF_DECISION,
OPEN_REDIRECT_DECISION,
ERROR_DISCLOSURE_DECISION,
ERROR_LIMITING_DECISION,
USER_LIMITS_DECISION,
RATE_LIMITING_DECISION,
// Must be kept last
NO_WAAP_DECISION
};
#endif

View File

@@ -0,0 +1,160 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// #define WAF2_LOGGING_ENABLE
#include "DeepAnalyzer.h"
#include "Waf2Engine.h"
#include "WaapConversions.h"
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP);
DeepAnalyzer::DeepAnalyzer() : pimpl(std::make_unique<DeepAnalyzer::Impl>())
{
}
DeepAnalyzer::~DeepAnalyzer()
{
}
void DeepAnalyzer::reset()
{
pimpl->reset();
}
AnalysisResult DeepAnalyzer::analyzeData(IWaf2Transaction* pWaf2Trans, const IWaapConfig* pSitePolicy)
{
return pimpl->analyzeData(pWaf2Trans, pSitePolicy);
}
bool DeepAnalyzer::Impl::isException(const IWaapConfig* pWaapConfig, const std::string& sourceIp)
{
bool isException = false;
if (pWaapConfig != NULL)
{
isException |= false;
}
return isException;
}
void DeepAnalyzer::Impl::setD2Main(std::string assetId, D2Main* d2main)
{
std::unordered_map<std::string, std::unique_ptr<D2Main>>::iterator it;
it = m_d2MainMap.find(assetId);
if (it == m_d2MainMap.end())
{
m_d2MainMap.insert(std::make_pair(assetId, std::unique_ptr<D2Main>(d2main)));
}
else
{
m_d2MainMap[assetId].reset(d2main);
}
}
DeepAnalyzer::Impl::Impl() : m_d2MainMap()
{
}
DeepAnalyzer::Impl::~Impl()
{
}
void DeepAnalyzer::Impl::reset()
{
auto itr = m_d2MainMap.begin();
while (itr != m_d2MainMap.end())
{
itr->second.reset();
itr = m_d2MainMap.erase(itr);
}
}
bool DeepAnalyzer::Impl::isMapEmpty()
{
return m_d2MainMap.empty();
}
AnalysisResult DeepAnalyzer::Impl::analyzeData(const D2InputData& data, const IWaapConfig* pSitePolicy)
{
AnalysisResult analysis;
const std::unique_ptr<D2Main>& d2Main = getD2Main(data.siteId);
analysis.d2Analysis = d2Main->analyzeData(data);
ThreatLevel threat = Waap::Conversions::convertFinalScoreToThreatLevel(analysis.d2Analysis.finalScore);
bool shouldBlock = Waap::Conversions::shouldDoWafBlocking(pSitePolicy, threat);
bool shouldExcept = isException(pSitePolicy, data.sourceIdentifier);
dbgDebug(D_WAAP) << "stage2 analysis: final score: " << analysis.d2Analysis.finalScore << ", reputation: " <<
analysis.d2Analysis.relativeReputation << ", false positive mitigation score: " <<
analysis.d2Analysis.fpMitigationScore << ", threat level: " << threat << "\nWAF2 decision to block: " <<
(shouldBlock ? "block" : "pass") << ", is the request in exception list: " <<
(shouldExcept ? "true" : "false");
analysis.threatLevel = threat;
analysis.shouldBlock = shouldBlock && !shouldExcept;
return analysis;
}
AnalysisResult DeepAnalyzer::Impl::analyzeData(IWaf2Transaction* pWaf2Trans, const IWaapConfig* pSitePolicy)
{
D2InputData input;
if (pWaf2Trans == NULL || pSitePolicy == NULL)
{
dbgWarning(D_WAAP) << "invalid argument pWaf2Trans(0x" << std::hex << pWaf2Trans << "), pSitePolicy(0x" <<
std::hex << pSitePolicy << ")";
return AnalysisResult();
}
input.sourceIdentifier = pWaf2Trans->getSourceIdentifier();
input.userAgent = pWaf2Trans->getUserAgent();
input.param = pWaf2Trans->getParam();
input.location = pWaf2Trans->getLocation();
input.siteId = pSitePolicy->get_AssetId();
input.keywordMatches = pWaf2Trans->getKeywordMatches();
input.uri = pWaf2Trans->getUriStr();
input.score = pWaf2Trans->getScore();
return analyzeData(input, pSitePolicy);
}
const std::unique_ptr<D2Main>& DeepAnalyzer::Impl::getD2Main(const std::string& assetId)
{
std::unordered_map<std::string, std::unique_ptr<D2Main>>::iterator it;
std::string mapKey = assetId;
if (Singleton::exists<I_InstanceAwareness>())
{
I_InstanceAwareness* instanceAwareness = Singleton::Consume<I_InstanceAwareness>::by<WaapComponent>();
Maybe<std::string> uniqueId = instanceAwareness->getUniqueID();
if (uniqueId.ok())
{
mapKey += "/" + uniqueId.unpack();
}
}
it = m_d2MainMap.find(mapKey);
if (it == m_d2MainMap.end())
{
m_d2MainMap.insert(std::make_pair(mapKey, std::unique_ptr<D2Main>(new D2Main(mapKey))));
}
const std::unique_ptr<D2Main>& result = m_d2MainMap[mapKey];
return result;
};

View File

@@ -0,0 +1,71 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "D2Main.h"
#include "i_waapConfig.h"
#include "WaapEnums.h"
#include "i_deepAnalyzer.h"
#include <memory>
#include <unordered_map>
struct D1AnalysisInput {
std::string siteId;
std::string sourceIp;
std::string userAgent;
std::string uri;
std::string shortUri;
std::string param;
std::vector<std::string> keywordMatches;
double score;
};
struct AnalysisResult {
D2OutputData d2Analysis;
ThreatLevel threatLevel;
bool shouldBlock;
};
class DeepAnalyzer : Singleton::Provide<I_DeepAnalyzer> {
public:
DeepAnalyzer();
virtual ~DeepAnalyzer();
virtual AnalysisResult analyzeData(IWaf2Transaction* waf2Trans, const IWaapConfig* pSitePolicy);
void reset();
class Impl;
protected:
std::unique_ptr<Impl> pimpl;
};
class DeepAnalyzer::Impl : Singleton::Provide<I_DeepAnalyzer>::From<DeepAnalyzer>
{
public:
Impl();
virtual ~Impl();
void reset();
bool isMapEmpty();
AnalysisResult analyzeData(const D2InputData& data, const IWaapConfig* pSitePolicy);
virtual AnalysisResult analyzeData(IWaf2Transaction* waf2Trans, const IWaapConfig* pSitePolicy);
static bool isException(const IWaapConfig* pSitePolicy, const std::string& sourceIp);
// API for testing
void setD2Main(std::string assetId, D2Main* d2main);
protected:
const std::unique_ptr<D2Main>& getD2Main(const std::string& assetId);
std::unordered_map<std::string, std::unique_ptr<D2Main> > m_d2MainMap;
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,140 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_PARAMETER_DEEP_H__549cc3ee
#define __PARSER_PARAMETER_DEEP_H__549cc3ee
#include "ParserBase.h"
#include "KeyStack.h"
#include "WaapAssetState.h"
#include "Waf2Regex.h"
#include "maybe_res.h"
#include <deque>
// Deep (recursively) parses/dissects parameters based on input stream
class DeepParser : public IParserReceiver
{
public:
DeepParser(std::shared_ptr<WaapAssetState> pWaapAssetState, IParserReceiver &receiver,
IWaf2Transaction* pTransaction);
virtual ~DeepParser();
void setWaapAssetState(std::shared_ptr<WaapAssetState> pWaapAssetState);
// This callback receives input key/value pairs, dissects, decodes and deep-scans these, recursively
// finally, it calls onDetected() on each detected parameter.
virtual int onKv(const char *k, size_t k_len, const char *v, size_t v_len, int flags);
void clear();
void apiProcessKey(const char *v, size_t v_len);
size_t depth() const;
void setGlobalMaxObjectDepth(size_t depth) { m_globalMaxObjectDepth = depth; }
size_t getGlobalMaxObjectDepth() const { return m_globalMaxObjectDepth; }
bool isGlobalMaxObjectDepthReached() const;
size_t getLocalMaxObjectDepth() const { return m_localMaxObjectDepth; }
void setMultipartBoundary(const std::string &boundary);
const std::string &getMultipartBoundary() const;
bool isBinaryData() const;
bool isWBXmlData() const;
Maybe<std::string> getSplitType() const;
std::vector<std::pair<std::string, std::string> > kv_pairs;
// Represents information stored per-keyword
struct KeywordInfo
{
std::string type;
std::string name;
std::string val;
KeywordInfo() {}
KeywordInfo(
const std::string &type,
const std::string &name,
const char *v,
size_t v_len) :
type(type),
name(name),
val(std::string(v, v_len))
{
}
size_t getLength() const
{
return val.size();
}
const std::string &getName() const
{
return name;
}
const std::string &getType() const
{
return type;
}
// Return the value itself
const std::string &getValue() const
{
return val;
}
};
// KeywordInfo maintained for each keyword name
std::vector<KeywordInfo> m_keywordInfo;
KeyStack m_key;
private:
class Ref
{
public:
Ref(int &ref):m_ref(ref) { m_ref++; }
~Ref() { m_ref--; }
private:
int &m_ref;
};
std::shared_ptr<WaapAssetState> m_pWaapAssetState;
IWaf2Transaction* m_pTransaction;
IParserReceiver &m_receiver;
size_t m_depth;
int m_splitRefs; // incremented when entering recursion due to "split" action,
// decremented afterwards. If >0, apiProcessKey should not be called.
// Split a value by given regexp. Return true if split, false otherwise.
// note: This function calls onKv(), and the call can be recursive!
// TODO:: maybe convert this splitter to Parser-derived class?!
bool splitByRegex(const std::string &val, const Regex &r, const char *keyPrefix);
void createInternalParser(std::string& cur_val,
const ValueStatsAnalyzer &valueStats,
bool isBodyPayload,
bool isRefererPayload,
bool isRefererParamPayload,
bool isUrlPayload,
bool isUrlParamPayload);
int pushValueToTopParser(std::string& cur_val, int flags, bool base64ParamFound);
int parseBuffer(ValueStatsAnalyzer& valueStats, const std::string &cur_val, bool base64ParamFound,
bool shouldUpdateKeyStack);
bool shouldEnforceDepthLimit(const std::shared_ptr<ParserBase>& parser) const;
void setLocalMaxObjectDepth(size_t depth) { m_localMaxObjectDepth = depth; }
void setGlobalMaxObjectDepthReached() { m_globalMaxObjectDepthReached = true; }
bool m_deepParserFlag;
std::stack<std::tuple<size_t, size_t, std::string>> m_splitTypesStack; // depth, splitIndex, splitType
std::deque<std::shared_ptr<ParserBase>> m_parsersDeque;
std::string m_multipart_boundary;
size_t m_globalMaxObjectDepth;
size_t m_localMaxObjectDepth;
bool m_globalMaxObjectDepthReached;
bool m_is_wbxml;
};
#endif // __PARSER_PARAMETER_DEEP_H__549cc3ee

View File

@@ -0,0 +1,22 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ErrorDisclosureDecision.h"
ErrorDisclosureDecision::ErrorDisclosureDecision(DecisionType type) : SingleDecision(type)
{}
std::string ErrorDisclosureDecision::getTypeStr() const
{
return "Error Disclosure";
}

View File

@@ -0,0 +1,28 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __ERROR_DISCLOSURE_DECISION_H__
#define __ERROR_DISCLOSURE_DECISION_H__
#include "SingleDecision.h"
#include "DecisionType.h"
#include <string>
class ErrorDisclosureDecision: public SingleDecision
{
public:
explicit ErrorDisclosureDecision(DecisionType type);
std::string getTypeStr() const override;
};
#endif

View File

@@ -0,0 +1,59 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "WaapAssetState.h"
#include "waap.h"
#include <string>
#include <chrono>
#include <memory>
#include <cereal/types/vector.hpp>
#include <cereal/types/unordered_map.hpp>
#include <cereal/archives/json.hpp>
#include "ErrorLimiting.h"
namespace Waap
{
namespace ErrorLimiting
{
bool
ErrorLimiter::getErrorLimitingEnforcementStatus(){
return m_errorLimiting.enable;
}
bool enforce(
const std::string& sourceIdentifier,
const std::string& uriStr,
const std::shared_ptr<WaapAssetState>& pWaapAssetState,
bool& log)
{
dbgTrace(D_WAAP) << "ErrorLimiting::enforce:: response code: 404 :: error Limiting.";
// Get current clock time
I_TimeGet* timer = Singleton::Consume<I_TimeGet>::by<WaapComponent>();
// The error limiting state tracks error limiting information for all sources
std::shared_ptr<Waap::RateLimiting::State> errorLimitingState = pWaapAssetState->getErrorLimitingState();
std::chrono::seconds now = std::chrono::duration_cast<std::chrono::seconds>(timer->getMonotonicTime());
if (errorLimitingState && (errorLimitingState->execute(sourceIdentifier, uriStr, now, log) == false)) {
// block request due to error limiting
return true;
}
return false;
}
}
}

View File

@@ -0,0 +1,95 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "WaapAssetState.h"
#include "waap.h"
#include <string>
#include <chrono>
#include <memory>
#include <cereal/types/vector.hpp>
#include <cereal/types/unordered_map.hpp>
#include <cereal/archives/json.hpp>
namespace Waap
{
namespace ErrorLimiting
{
struct ErrorLimiter
{
struct Policy
{
template <typename _A>
void serialize(_A &ar)
{
ar(
cereal::make_nvp("interval", interval),
cereal::make_nvp("events", events),
cereal::make_nvp("type", type)
);
if(type == "quarantine")
{
ar(cereal::make_nvp("blockingTime", blockingTime));
}
}
unsigned interval = 0;
unsigned events = 0;
std::string type;
int blockingTime = 0;
};
class ErrorLimitingEnforcement
{
public:
template <typename _A>
ErrorLimitingEnforcement(_A &ar)
:
enable(false)
{
std::string level;
ar(cereal::make_nvp("errorLimitingEnforcement", level));
level = boost::algorithm::to_lower_copy(level);
if (level == "prevent") {
enable = true;
}
}
bool operator==(const ErrorLimitingEnforcement &other) const;
bool enable;
};
Policy m_errorLimiterPolicy;
ErrorLimitingEnforcement m_errorLimiting;
bool getErrorLimitingEnforcementStatus();
template <typename _A>
ErrorLimiter(_A& ar) :
m_errorLimiting(ar)
{
ar(cereal::make_nvp("errorLimiter", m_errorLimiterPolicy));
};
};
bool enforce(
const std::string& sourceIdentifier,
const std::string& uriStr,
const std::shared_ptr<WaapAssetState>& pWaapAssetState,
bool& log);
}
}

View File

@@ -0,0 +1,22 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ErrorLimitingDecision.h"
ErrorLimitingDecision::ErrorLimitingDecision(DecisionType type): SingleDecision(type)
{}
std::string ErrorLimitingDecision::getTypeStr() const
{
return "Error Limiting";
}

View File

@@ -0,0 +1,27 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __ERROR_LIMITING_DECISION_H__
#define __ERROR_LIMITING_DECISION_H__
#include "DecisionType.h"
#include "SingleDecision.h"
#include <string>
class ErrorLimitingDecision: public SingleDecision
{
public:
explicit ErrorLimitingDecision(DecisionType type);
std::string getTypeStr() const override;
};
#endif

View File

@@ -0,0 +1,226 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "FpMitigation.h"
#include <memory>
#include <algorithm>
#include <string.h>
#define DEFAULT_SCORE 10.0
#define TRUE_POSITIVE_REPUTATION_THRESHOLD 1.5
#define FALSE_POSITIVE_REPUTATION_THRESHOLD 5
USE_DEBUG_FLAG(D_WAAP);
using namespace std::chrono;
FpMitigationScore::FpMitigationScore(const std::string& backupFilePath) :
SerializeToFilePeriodically(duration_cast<seconds>(minutes(10)), backupFilePath),
m_policyDataUrl(),
m_policyDataParam(),
m_history(),
m_counter(0)
{
dbgTrace(D_WAAP) << "False positive mitigation constructor";
restore();
}
FpMitigationScore::~FpMitigationScore()
{
reset();
}
void FpMitigationScore::reset() {
m_policyDataParam.clear();
m_policyDataUrl.clear();
m_history.clear();
}
void FpMitigationScore::serialize(std::ostream& stream) {
cereal::JSONOutputArchive archive(stream);
archive(cereal::make_nvp("version", 1),
cereal::make_nvp("policyDataUrl", m_policyDataUrl),
cereal::make_nvp("policyDataParam", m_policyDataParam));
}
void FpMitigationScore::deserialize(std::istream& stream) {
cereal::JSONInputArchive archive(stream);
size_t version = 0;
try
{
archive(cereal::make_nvp("version", version));
}
catch (std::runtime_error & e) {
archive.setNextName(nullptr);
version = 0;
dbgDebug(D_WAAP) << "Can't load file version: " << e.what();
}
switch (version)
{
case 0:
archive(cereal::make_nvp("m_policyDataUrl", m_policyDataUrl),
cereal::make_nvp("m_policyDataParam", m_policyDataParam));
break;
case 1:
archive(cereal::make_nvp("policyDataUrl", m_policyDataUrl),
cereal::make_nvp("policyDataParam", m_policyDataParam));
break;
default:
dbgWarning(D_WAAP) << "unknown file format version: " << version;
break;
}
}
double FpMitigationScore::calculateFpMitigationScore(const std::string& shortUri,
const std::string& canonisedParam)
{
double urlScore = DEFAULT_SCORE, paramScore = DEFAULT_SCORE;
if (m_policyDataUrl.find(shortUri) != m_policyDataUrl.end())
{
urlScore = m_policyDataUrl[shortUri]->getScore();
}
if (m_policyDataParam.find(canonisedParam) != m_policyDataParam.end())
{
paramScore = m_policyDataParam[canonisedParam]->getScore();
}
return ((int)(paramScore * 2) / 3 + 3.3) * ((int)(urlScore * 2) / 3 + 3.3) / 10;
}
template<typename T>
bool hasElement(std::vector<T> vec, T& elem) {
return (std::find(vec.begin(), vec.end(), elem) != vec.end());
}
void FpMitigationScore::learnFalsePositive(
const std::vector<std::string>& keywordMatches,
PolicyCounterType rep,
const std::string& shortUri,
const std::string& canonisedParam)
{
static std::string probing = "probing";
if (keywordMatches.size() > 3 && hasElement(keywordMatches, probing))
{
return;
}
if (rep != UNKNOWN_TYPE)
{
if (m_policyDataUrl.find(shortUri) == m_policyDataUrl.end())
{
m_policyDataUrl[shortUri] = std::make_shared<PolicyDataCounter>();
}
if (m_policyDataParam.find(canonisedParam) == m_policyDataParam.end())
{
m_policyDataParam[canonisedParam] = std::make_shared<PolicyDataCounter>();
}
incrementCounter(shortUri, canonisedParam, rep);
m_counter++;
if (m_counter % FP_SCORE_CALCULATION_INTERVALS == 0)
{
dbgTrace(D_WAAP) << "evaluating fp mitigation scores";
evaluatePolicyDataCounterScore();
}
}
}
PolicyCounterType FpMitigationScore::IdentifyFalseTruePositive(double relativeReputation,
const std::string& shortUri, const std::string& canonisedParam, const std::string& userAgentIp)
{
std::string uriParamCat = shortUri + canonisedParam;
if (relativeReputation < TRUE_POSITIVE_REPUTATION_THRESHOLD && m_history.find(uriParamCat) == m_history.end())
{
m_history.insert(uriParamCat);
return TRUE_POSITIVE;
}
if (relativeReputation > FALSE_POSITIVE_REPUTATION_THRESHOLD && m_history.find(userAgentIp) == m_history.end())
{
m_history.insert(userAgentIp);
return FALSE_POSITIVE;
}
return UNKNOWN_TYPE;
}
void FpMitigationScore::incrementCounter(const std::string& shortUri,
const std::string& canonisedParam,
PolicyCounterType counterType)
{
// It is assumed that m_policyDataUrl contains shortUrl and
// m_policyDataParam contains canonisedParam. See caller.
std::shared_ptr<PolicyDataCounter> urlCounter = m_policyDataUrl[shortUri];
std::shared_ptr<PolicyDataCounter> paramCounter = m_policyDataParam[canonisedParam];
urlCounter->incrementCounter(counterType);
paramCounter->incrementCounter(counterType);
}
void FpMitigationScore::evaluatePolicyDataCounterScore()
{
for (auto urlPolicy : m_policyDataUrl) {
urlPolicy.second->evaluateScore();
}
for (auto paramPolicy : m_policyDataParam) {
paramPolicy.second->evaluateScore();
}
}
PolicyDataCounter::PolicyDataCounter() : falsePositive(0), truePositive(0), score(10.0)
{
}
double PolicyDataCounter::getScore()
{
return score;
}
void PolicyDataCounter::incrementCounter(PolicyCounterType counterType)
{
switch (counterType)
{
case UNKNOWN_TYPE:
// add assert
break;
case FALSE_POSITIVE:
case HTML_CONTENT:
falsePositive++;
break;
case TRUE_POSITIVE:
case SPAM:
truePositive++;
break;
default:
break;
}
}
void PolicyDataCounter::evaluateScore()
{
size_t tp = truePositive + 50 + 1, fp = falsePositive;
score = (double)(10.0 * tp) / (10.0 * fp + tp);
}

View File

@@ -0,0 +1,90 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include <map>
#include <string>
#include <unordered_set>
#include <boost/noncopyable.hpp>
#include <cereal/types/map.hpp>
#include <cereal/archives/json.hpp>
#include <cereal/types/memory.hpp>
#include "i_serialize.h"
#define FP_SCORE_CALCULATION_INTERVALS 20
enum PolicyCounterType {
UNKNOWN_TYPE = 0,
FALSE_POSITIVE,
HTML_CONTENT,
TRUE_POSITIVE,
SPAM
};
class PolicyDataCounter {
public:
PolicyDataCounter();
double getScore();
void incrementCounter(PolicyCounterType counterType);
void evaluateScore();
bool operator==(PolicyDataCounter& other);
bool operator!=(PolicyDataCounter& other) { return !(*this == other); }
template <class Archive>
void serialize(Archive& ar) {
ar(cereal::make_nvp("falsePositive", falsePositive),
cereal::make_nvp("truePositive", truePositive),
cereal::make_nvp("score", score));
}
private:
size_t falsePositive;
size_t truePositive;
double score;
};
class FpMitigationScore : public boost::noncopyable, public SerializeToFilePeriodically {
public:
FpMitigationScore(const std::string& backupFilePath);
~FpMitigationScore();
double calculateFpMitigationScore(const std::string& shortUri, const std::string& canonisedParam);
void learnFalsePositive(const std::vector<std::string>& keywordMatches, PolicyCounterType rep,
const std::string& shortUri, const std::string& canonisedParam);
PolicyCounterType IdentifyFalseTruePositive(double relativeReputation, const std::string& shortUri,
const std::string& canonisedParam, const std::string& userAgentIp);
void reset();
virtual void serialize(std::ostream& stream);
virtual void deserialize(std::istream& stream);
typedef std::map<std::string, std::shared_ptr<PolicyDataCounter>> PolicyDataMap;
protected:
void incrementCounter(const std::string& shortUri, const std::string& canonisedParam,
PolicyCounterType counterType);
void evaluatePolicyDataCounterScore();
// TODO: move to SMEM
PolicyDataMap m_policyDataUrl;
PolicyDataMap m_policyDataParam;
std::unordered_set<std::string> m_history;
size_t m_counter;
};

View File

@@ -0,0 +1,193 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "i_indicatorsFilter.h"
#include "IndicatorsFilterBase.h"
#include "Waf2Engine.h"
IndicatorFilterBase::IndicatorFilterBase(const std::string& confidence_path,
const std::string& trusted_path,
const std::string& remotePath,
const std::string& assetId,
size_t min_sources,
size_t min_intervals,
std::chrono::minutes interval_duration,
double ratio_threshold,
const std::string& null_obj,
TuningDecision* tuning,
I_IgnoreSources* ignoreSources) :
m_confidence_calc(min_sources,
min_intervals,
interval_duration,
ratio_threshold,
null_obj,
confidence_path,
remotePath,
assetId,
tuning,
ignoreSources),
m_trusted_confidence_calc(trusted_path, remotePath, assetId),
m_policy(nullptr),
m_tuning(tuning)
{
}
void IndicatorFilterBase::filterKeywords(
const std::string &key,
Waap::Keywords::KeywordsSet& keywords,
std::vector<std::string>& filteredKeywords)
{
for (auto keyword = keywords.begin(); keyword != keywords.end(); )
{
if (shouldFilterKeyword(key, *keyword))
{
filteredKeywords.push_back(*keyword);
keyword = keywords.erase(keyword);
}
else
{
keyword++;
}
}
}
bool IndicatorFilterBase::setTrustedSrcParameter(
std::shared_ptr<Waap::TrustedSources::TrustedSourcesParameter> policy)
{
bool isChanged = false;
if (m_policy != nullptr && *policy != *m_policy)
{
isChanged = true;
m_trusted_confidence_calc.reset();
}
m_policy = policy;
return isChanged;
}
void IndicatorFilterBase::reset()
{
m_confidence_calc.hardReset();
m_trusted_confidence_calc.reset();
}
bool IndicatorFilterBase::isTrustedSourceOfType(const std::string& source,
Waap::TrustedSources::TrustedSourceType srcType)
{
if (m_policy == nullptr)
{
dbgTrace(D_WAAP) << "missing policy";
return false;
}
std::string trusted_src(source);
if (srcType == Waap::TrustedSources::TrustedSourceType::X_FORWARDED_FOR)
{
auto env = Singleton::Consume<I_Environment>::by<WaapComponent>();
auto proxy_ip = env->get<std::string>(HttpTransactionData::proxy_ip_ctx);
if (proxy_ip.ok())
{
trusted_src = proxy_ip.unpack();
} else{
trusted_src = "";
}
}
else if (srcType == Waap::TrustedSources::TrustedSourceType::COOKIE_OAUTH2_PROXY)
{
trusted_src = Waap::Util::extractKeyValueFromCookie(source, "_oauth2_proxy");
}
else if (srcType == Waap::TrustedSources::TrustedSourceType::SM_USER)
{
trusted_src = source;
}
return m_policy->isSourceTrusted(trusted_src, srcType);
}
std::string IndicatorFilterBase::getTrustedSource(IWaf2Transaction* pTransaction)
{
if (m_policy == nullptr)
{
dbgTrace(D_WAAP) << "Policy for trusted sources is not set";
return "";
}
auto trustedTypes = m_policy->getTrustedTypes();
std::string xFwdVal;
std::string cookieVal;
std::string smuserVal;
for (auto& trustedType : trustedTypes)
{
switch (trustedType)
{
case Waap::TrustedSources::TrustedSourceType::SOURCE_IP:
if (isTrustedSourceOfType(pTransaction->getRemoteAddr(), trustedType))
{
return pTransaction->getRemoteAddr();
}
break;
case Waap::TrustedSources::TrustedSourceType::X_FORWARDED_FOR:
if (xFwdVal.empty())
{
xFwdVal = pTransaction->getHdrContent("X-Forwarded-For");
}
if (isTrustedSourceOfType(xFwdVal, trustedType))
{
return xFwdVal;
}
break;
case Waap::TrustedSources::TrustedSourceType::SM_USER:
if (smuserVal.empty())
{
smuserVal = pTransaction->getHdrContent("sm_user");
}
if (isTrustedSourceOfType(smuserVal, trustedType))
{
return smuserVal;
}
break;
case Waap::TrustedSources::TrustedSourceType::COOKIE_OAUTH2_PROXY:
if (cookieVal.empty())
{
cookieVal = pTransaction->getHdrContent("Cookie");
}
if (isTrustedSourceOfType(cookieVal, trustedType))
{
return cookieVal;
}
break;
default:
dbgWarning(D_WAAP) << "unrecognized trusted source identifier type: " << trustedType;
break;
}
}
return "";
}
void IndicatorFilterBase::registerKeyword(const std::string& key,
const std::string& keyword,
const std::string& source,
const std::string& trusted_src)
{
dbgTrace(D_WAAP) << "registering keyword: " << keyword << " for parameter: " << key << " from source: " << source;
if (keyword == "probing" || keyword == "repetition")
{
dbgTrace(D_WAAP) << "ignoring keyword " << keyword;
return;
}
m_confidence_calc.log(key, keyword, source);
if (trusted_src != "")
{
m_trusted_confidence_calc.log(key, keyword, trusted_src);
}
}

View File

@@ -0,0 +1,56 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "i_indicatorsFilter.h"
#include "i_messaging.h"
#include "waap.h"
#include "TrustedSources.h"
#include "TrustedSourcesConfidence.h"
#include "ConfidenceCalculator.h"
#include "TuningDecisions.h"
class IndicatorFilterBase : public I_IndicatorsFilter
{
public:
IndicatorFilterBase(const std::string& confidence_path,
const std::string& trusted_path,
const std::string& remotePath,
const std::string& assetId,
size_t min_sources,
size_t min_intervals,
std::chrono::minutes interval_duration,
double ratio_threshold,
const std::string& null_obj,
TuningDecision* tuning,
I_IgnoreSources* ignoreSources = nullptr);
virtual void filterKeywords(const std::string &key, Waap::Keywords::KeywordsSet& keywords,
std::vector<std::string>& filteredKeywords);
bool setTrustedSrcParameter(std::shared_ptr<Waap::TrustedSources::TrustedSourcesParameter> policy);
void reset();
protected:
std::string getTrustedSource(IWaf2Transaction* pTransaction);
void registerKeyword(const std::string& key,
const std::string& keyword,
const std::string& source,
const std::string& trusted_src);
ConfidenceCalculator m_confidence_calc;
TrustedSourcesConfidenceCalculator m_trusted_confidence_calc;
std::shared_ptr<Waap::TrustedSources::TrustedSourcesParameter> m_policy;
TuningDecision* m_tuning;
private:
bool isTrustedSourceOfType(const std::string& source, Waap::TrustedSources::TrustedSourceType srcType);
};

View File

@@ -0,0 +1,317 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "IndicatorsFiltersManager.h"
#include "WaapConfigApi.h"
#include "WaapConfigApplication.h"
#include <vector>
#include "Waf2Util.h"
#include "FpMitigation.h"
#include "Waf2Engine.h"
#include "WaapKeywords.h"
IndicatorsFiltersManager::IndicatorsFiltersManager(const std::string& remotePath, const std::string &assetId,
I_WaapAssetState* pWaapAssetState)
:
SerializeToFileBase(pWaapAssetState->getSignaturesFilterDir() + "/6.data"),
m_ignoreSources(pWaapAssetState->getSignaturesFilterDir(), remotePath, assetId),
m_tuning(remotePath)
{
restore();
m_keywordsFreqFilter = std::make_unique<KeywordIndicatorFilter>(
pWaapAssetState->getSignaturesFilterDir(),
remotePath,
assetId,
&m_ignoreSources,
&m_tuning);
m_typeFilter = std::make_unique<TypeIndicatorFilter>(pWaapAssetState, remotePath, assetId, &m_tuning);
}
IndicatorsFiltersManager::~IndicatorsFiltersManager()
{
}
void IndicatorsFiltersManager::registerKeywords(const std::string& key,
Waap::Keywords::KeywordsSet& keywords,
IWaf2Transaction* pWaapTransaction)
{
if (m_tuning.getDecision(pWaapTransaction->getLastScanParamName(), PARAM_NAME) == MALICIOUS ||
m_tuning.getDecision(pWaapTransaction->getLastScanSample(), PARAM_VALUE) == MALICIOUS ||
m_tuning.getDecision(pWaapTransaction->getUri(), URL) == MALICIOUS ||
m_tuning.getDecision(pWaapTransaction->getSourceIdentifier(), SOURCE) == MALICIOUS)
{
return;
}
if (!keywords.empty())
{
m_ignoreSources.log(pWaapTransaction->getSourceIdentifier(), key, keywords);
}
m_keywordsFreqFilter->registerKeywords(key, keywords, pWaapTransaction);
if (key.rfind("url#", 0) == 0)
{
return;
}
m_typeFilter->registerKeywords(key, keywords, pWaapTransaction);
auto types = getParameterTypes(key);
for (auto type : types)
{
if (type == "html_input")
{
m_keywordsFreqFilter->registerKeywords(type, keywords, pWaapTransaction);
}
}
}
bool IndicatorsFiltersManager::shouldFilterKeyword(const std::string &key, const std::string &keyword) const
{
bool shouldFilter = false;
if (m_keywordsFreqFilter != nullptr)
{
shouldFilter |= m_keywordsFreqFilter->shouldFilterKeyword(key, keyword);
}
if (m_typeFilter != nullptr)
{
shouldFilter |= m_typeFilter->shouldFilterKeyword(key, keyword);
auto types = getParameterTypes(key);
for (auto& type : types)
{
shouldFilter |= m_keywordsFreqFilter->shouldFilterKeyword(type, keyword);
}
}
return shouldFilter;
}
void IndicatorsFiltersManager::serialize(std::ostream& stream)
{
cereal::JSONOutputArchive archive(stream);
archive(cereal::make_nvp("version", 1), cereal::make_nvp("trustedSrcParams", m_trustedSrcParams));
}
void IndicatorsFiltersManager::deserialize(std::istream& stream)
{
cereal::JSONInputArchive archive(stream);
size_t version = 0;
try
{
archive(cereal::make_nvp("version", version));
}
catch (std::runtime_error & e) {
archive.setNextName(nullptr);
version = 0;
dbgDebug(D_WAAP) << "Can't load file version: " << e.what();
}
switch (version)
{
case 0:
archive(cereal::make_nvp("m_trustedSrcParams", m_trustedSrcParams));
break;
case 1:
archive(cereal::make_nvp("trustedSrcParams", m_trustedSrcParams));
break;
default:
dbgWarning(D_WAAP) << "unknown file format version: " << version;
break;
}
}
std::set<std::string> IndicatorsFiltersManager::getParameterTypes(const std::string& canonicParam) const
{
return m_typeFilter->getParamTypes(canonicParam);
}
bool IndicatorsFiltersManager::loadPolicy(IWaapConfig* pConfig)
{
bool shouldSave = false;
if (pConfig != NULL)
{
m_trustedSrcParams = pConfig->get_TrustedSourcesPolicy();
if (m_trustedSrcParams != nullptr)
{
shouldSave = m_keywordsFreqFilter->setTrustedSrcParameter(m_trustedSrcParams);
shouldSave |= m_typeFilter->setTrustedSrcParameter(m_trustedSrcParams);
}
auto waapParams = pConfig->get_WaapParametersPolicy();
if (waapParams != nullptr)
{
m_keywordsFreqFilter->loadParams(waapParams);
m_typeFilter->loadParams(waapParams);
m_ignoreSources.loadParams(waapParams);
}
if (shouldSave)
{
saveData();
}
}
else
{
dbgWarning(D_WAAP) << "Failed to get configuration";
}
return pConfig != NULL;
}
void IndicatorsFiltersManager::filterVerbose(const std::string &param,
std::vector<std::string>& filteredKeywords,
std::map<std::string, std::vector<std::string>>& filteredKeywordsVerbose)
{
static std::string typeFilterName = "type indicators filter";
static std::string keywordsFilterName = "keywords frequency indicators filter";
filteredKeywordsVerbose[typeFilterName];
filteredKeywordsVerbose[keywordsFilterName];
auto types = getParameterTypes(param);
for (auto keyword : filteredKeywords)
{
if (m_typeFilter->shouldFilterKeyword(param, keyword))
{
filteredKeywordsVerbose[typeFilterName].push_back(param + "#" + keyword);
}
if (m_keywordsFreqFilter->shouldFilterKeyword(param, keyword))
{
filteredKeywordsVerbose[keywordsFilterName].push_back(param + "#" + keyword);
for (auto type : types)
{
if (m_keywordsFreqFilter->shouldFilterKeyword(type, keyword))
{
filteredKeywordsVerbose[keywordsFilterName].push_back(param + "#" + type + "#" + keyword);
}
}
}
}
}
void IndicatorsFiltersManager::reset()
{
m_typeFilter->reset();
m_keywordsFreqFilter->reset();
}
std::string IndicatorsFiltersManager::extractUri(const std::string& referer, const IWaf2Transaction* pTransaction)
{
std::string url;
size_t pos = referer.find("://");
if (pos == std::string::npos)
{
url = referer;
}
else
{
url = referer.substr(pos + 3);
}
pos = url.find('/');
if (pos == std::string::npos)
{
return url;
}
std::string host = url.substr(0, pos);
if (host == pTransaction->getHdrContent("host"))
{
return url.substr(pos);
}
return url;
}
std::string IndicatorsFiltersManager::generateKey(const std::string& location,
const std::string& param_name,
const IWaf2Transaction* pTransaction)
{
std::string key = location;
static const std::string delim = "#";
std::string param = normalize_param(param_name);
if (location == "header" || location == "cookie" || location == "url_param")
{
key += delim + param;
}
else if (location == "referer_param")
{
key = "url_param" + delim + param;
}
else if (location == "body")
{
if (param == "")
{
key += delim + normalize_uri(pTransaction->getUriStr());
}
else
{
key += delim + param;
}
}
else if (location == "url")
{
key += delim + normalize_uri(pTransaction->getUriStr());
}
else if (location == "referer")
{
std::string referer = pTransaction->getHdrContent("referer");
std::string uri = extractUri(referer, pTransaction);
key = "url" + delim + normalize_uri(uri);
}
else
{
key = normalize_uri(pTransaction->getUriStr()) + delim + param;
}
return key;
}
std::string IndicatorsFiltersManager::getLocationFromKey(const std::string& canonicKey, IWaf2Transaction* pTransaction)
{
std::vector<std::string> known_locations = { "header", "cookie", "url", "body", "referer", "url_param" };
std::string delim = "#";
for (auto location : known_locations)
{
if (canonicKey.find(location + delim) == 0)
{
return location;
}
}
return "";
}
void IndicatorsFiltersManager::filterKeywords(
const std::string &key,
Waap::Keywords::KeywordsSet& keywords,
std::vector<std::string>& filteredKeywords)
{
for (auto keyword = keywords.begin(); keyword != keywords.end(); )
{
if (shouldFilterKeyword(key, *keyword))
{
filteredKeywords.push_back(*keyword);
keyword = keywords.erase(keyword);
}
else
{
keyword++;
}
}
}
void IndicatorsFiltersManager::pushSample(
const std::string& key,
const std::string& sample,
IWaf2Transaction* pTransaction)
{
if (key.rfind("url#", 0) == 0)
{
return;
}
m_typeFilter->registerKeywords(key, sample, pTransaction);
}

View File

@@ -0,0 +1,70 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "IndicatorsFilterBase.h"
#include "TrustedSources.h"
#include "KeywordIndicatorFilter.h"
#include "TypeIndicatorsFilter.h"
#include "WaapParameters.h"
#include "i_waapConfig.h"
#include "i_messaging.h"
#include "ScannersDetector.h"
#include "TuningDecisions.h"
#include <cereal/cereal.hpp>
#include <cereal/types/memory.hpp>
#include <cereal/archives/json.hpp>
using namespace Waap::Parameters;
class IWaf2Transaction;
struct Waf2ScanResult;
class IndicatorsFiltersManager : public I_IndicatorsFilter, public SerializeToFileBase
{
public:
IndicatorsFiltersManager(const std::string &remotePath, const std::string &assetId,
I_WaapAssetState* pWaapAssetState);
~IndicatorsFiltersManager();
virtual void registerKeywords(const std::string& key, Waap::Keywords::KeywordsSet& keywords,
IWaf2Transaction* pWaapTransaction);
virtual bool shouldFilterKeyword(const std::string &key, const std::string &keyword) const;
virtual void filterKeywords(const std::string &key, Waap::Keywords::KeywordsSet& keywords,
std::vector<std::string>& filteredKeywords);
void pushSample(const std::string& key, const std::string& sample, IWaf2Transaction* pTransaction);
bool loadPolicy(IWaapConfig* pConfig);
void reset();
void filterVerbose(const std::string &param,
std::vector<std::string>& filteredKeywords,
std::map<std::string, std::vector<std::string>>& filteredKeywordsVerbose);
static std::string getLocationFromKey(const std::string& canonicKey, IWaf2Transaction* pTransaction);
static std::string generateKey(const std::string& location,
const std::string& param,
const IWaf2Transaction* pTransaction);
virtual void serialize(std::ostream& stream);
virtual void deserialize(std::istream& stream);
virtual std::set<std::string> getParameterTypes(const std::string& canonicParam) const;
private:
static std::string extractUri(const std::string& referer, const IWaf2Transaction* pTransaction);
std::unique_ptr<KeywordIndicatorFilter> m_keywordsFreqFilter;
std::unique_ptr<TypeIndicatorFilter> m_typeFilter;
std::shared_ptr<Waap::TrustedSources::TrustedSourcesParameter> m_trustedSrcParams;
ScannerDetector m_ignoreSources;
TuningDecision m_tuning;
};

View File

@@ -0,0 +1,60 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// #define WAF2_LOGGING_ENABLE
#include "debug.h"
#include "KeyStack.h"
#include <string.h>
#include "assert.h"
USE_DEBUG_FLAG(D_WAAP);
KeyStack::KeyStack(const char* name)
:m_name(name), m_nameDepth(0) {
}
void KeyStack::push(const char* subkey, size_t subkeySize, bool countDepth) {
m_stack.push_back(m_key.size());
// Prefix all subkeys (except the first) with '.'
if (!m_key.empty()) {
m_key += '.';
}
m_key += std::string(subkey, subkeySize);
if (countDepth) {
m_nameDepth++;
}
dbgTrace(D_WAAP) << "KeyStack(" << m_name << ")::push(): '" << std::string(subkey, subkeySize) <<
"' => full_key='" << std::string(m_key.data(), m_key.size()) << "'";
}
void KeyStack::pop(const char* log, bool countDepth) {
// Keep depth balanced even if m_key[] buffer is full
if (m_key.empty() || m_stack.empty()) {
dbgDebug(D_WAAP) << "KeyStack(" << m_name << ")::pop(): [ERROR] ATTEMPT TO POP FROM EMPTY KEY STACK! " << log;
return;
}
if (countDepth) {
m_nameDepth--;
}
// Remove last subkey.
m_key.erase(m_stack.back());
m_stack.pop_back();
dbgTrace(D_WAAP) << "KeyStack(" << m_name << ")::pop(): full_key='" <<
std::string(m_key.data(), (int)m_key.size()) << "': pop_key=" << log << "'";
}

View File

@@ -0,0 +1,81 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __KEYSTACK_H__0a8039e6
#define __KEYSTACK_H__0a8039e6
#include <stddef.h>
#include <string>
#include <vector>
// Represent string (key) that is concatenation of substrings (subkeys) separated by '.' character.
// Mostly emulates API of C++ std::string class, with addition of push() and pop() methods
// that append individual subkey and delete last subkey from the string efficiently.
class KeyStack {
public:
KeyStack(const char *name);
void push(const char *subkey, size_t subkeySize, bool countDepth=true);
void pop(const char* log, bool countDepth=true);
bool empty() const { return m_key.empty(); }
void clear() { m_key.clear(); m_stack.clear(); }
size_t depth() const { return m_nameDepth; }
size_t size() const {
return str().size();
}
const char *c_str() const {
// If pushed none - return empty string.
// If pushed once - still return empty string (the once-pushed subkey will only be returned
// by the first() method.
// If pushed twice or more - return all subkeys starting from the second one.
// Also, even if pushed 2 or more times, but pushed empty strings as subkeys,
// then it could happen that m_key is still empty, in which case we should still return empty string.
if (m_stack.size() <= 1 || m_stack[1] + 1 >= m_key.size()) {
return "";
}
return m_key.c_str() + m_stack[1] + 1;
}
const std::string str() const {
// If pushed none - return empty string.
// If pushed once - still return empty string (the once-pushed subkey will only be returned
// by the first() method.
// If pushed twice or more - return all subkeys starting from the second one.
// Also, even if pushed 2 or more times, but pushed empty strings as subkeys,
// then it could happen that m_key is still empty, in which case we should still return empty string.
if (m_stack.size() <= 1 || m_stack[1] + 1 >= m_key.size()) {
return "";
}
return m_key.substr(m_stack[1] + 1);
}
const std::string first() const {
if (m_stack.size() == 0) {
return "";
}
else if (m_stack.size() == 1) {
return m_key;
}
else {
// m_stack.size() > 1, so m_stack[1] is valid
return m_key.substr(0, m_stack[1]);
}
}
private:
const char *m_name;
std::string m_key;
std::vector<size_t> m_stack; // position of individual key name starts in m_key,
// used to backtrack 1 key at a time.
int m_nameDepth;
};
#endif // __KEYSTACK_H__0a8039e6

View File

@@ -0,0 +1,125 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "KeywordIndicatorFilter.h"
#include "waap.h"
#include "WaapConfigApi.h"
#include "WaapConfigApplication.h"
#include "FpMitigation.h"
#include "i_transaction.h"
#include <boost/algorithm/string/predicate.hpp>
#include <boost/algorithm/string/trim.hpp>
#define KEYWORDS_FILTER_PATH(dirPath) dirPath + "/5.data"
#define KEYWORDS_FILTER_TRUSTED_PATH(dirPath) dirPath + "/7.data"
#define KEYWORD_FILTER_PARAM(var) "KeywordsFilter." var
KeywordIndicatorFilter::KeywordIndicatorFilter(std::string dirPath,
const std::string& remotePath,
const std::string& assetId,
I_IgnoreSources* ignoreSources,
TuningDecision* tuning,
size_t minSources,
size_t minIntervals,
std::chrono::minutes intervalDuration,
double ratioThreshold) : IndicatorFilterBase(KEYWORDS_FILTER_PATH(dirPath),
KEYWORDS_FILTER_TRUSTED_PATH(dirPath),
(remotePath == "") ? remotePath : remotePath + "/Indicators",
assetId,
minSources,
minIntervals,
intervalDuration,
ratioThreshold,
"",
tuning,
ignoreSources)
{
m_confidence_calc.setOwner("KeywordIndicatorFilter");
}
KeywordIndicatorFilter::~KeywordIndicatorFilter()
{
}
void KeywordIndicatorFilter::registerSource(const std::string &key, const std::string &source)
{
dbgTrace(D_WAAP) << "registering source: " << source << " for parameter: " << key;
m_confidence_calc.logSourceHit(key, source);
}
bool KeywordIndicatorFilter::shouldFilterKeyword(const std::string &key, const std::string &keyword) const
{
bool is_confident = m_confidence_calc.is_confident(key, keyword);
if (m_policy != nullptr)
{
is_confident |= m_trusted_confidence_calc.is_confident(key, keyword, m_policy->getNumOfSources());
}
std::string trimed_keyword = keyword;
boost::algorithm::trim(trimed_keyword);
is_confident |= m_confidence_calc.is_confident(key, trimed_keyword);
return is_confident;
}
bool KeywordIndicatorFilter::loadParams(std::shared_ptr<Waap::Parameters::WaapParameters> pParams)
{
ConfidenceCalculatorParams params;
params.minSources = std::stoul(
pParams->getParamVal("learnIndicators.minSources", std::to_string(CONFIDENCE_MIN_SOURCES)));
params.minIntervals = std::stoul(
pParams->getParamVal("learnIndicators.minIntervals", std::to_string(CONFIDENCE_MIN_INTERVALS)));
params.intervalDuration = std::chrono::minutes(std::stoul(
pParams->getParamVal("learnIndicators.intervalDuration",
std::to_string(CONFIDENCE_WINDOW_INTERVAL.count()))));
params.ratioThreshold = std::stod(pParams->getParamVal("learnIndicators.ratio",
std::to_string(CONFIDENCE_THRESHOLD)));
std::string learnPermanentlyStr = pParams->getParamVal("learnIndicators.learnPermanently", "true");
params.learnPermanently = !boost::iequals(learnPermanentlyStr.c_str(), "false");
std::string remoteSyncStr = pParams->getParamVal("remoteSync", "true");
bool syncEnabled = !boost::iequals(remoteSyncStr, "false");
dbgTrace(D_WAAP) << params << " remote sync: " << remoteSyncStr;
m_confidence_calc.setRemoteSyncEnabled(syncEnabled);
m_trusted_confidence_calc.setRemoteSyncEnabled(syncEnabled);
return m_confidence_calc.reset(params);
}
void KeywordIndicatorFilter::registerKeywords(const std::string& key, Waap::Keywords::KeywordsSet& keywords,
IWaf2Transaction* pTransaction)
{
std::string source(pTransaction->getSourceIdentifier());
std::string trusted_source = getTrustedSource(pTransaction);
if (keywords.empty())
{
registerSource(key, source);
}
for (auto keyword : keywords)
{
boost::algorithm::trim(keyword);
registerKeyword(key, keyword, source, trusted_source);
}
if (m_tuning != nullptr && (m_tuning->getDecision(pTransaction->getUri(), URL) == BENIGN ||
m_tuning->getDecision(pTransaction->getLastScanSample(), PARAM_VALUE) == BENIGN))
{
source = "TuningDecisionSource_" + source;
for (auto keyword : keywords)
{
boost::algorithm::trim(keyword);
registerKeyword(key, keyword, source, trusted_source);
}
}
}

View File

@@ -0,0 +1,48 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "IndicatorsFilterBase.h"
#include "ConfidenceCalculator.h"
#include "WaapParameters.h"
#define CONFIDENCE_MIN_SOURCES 3
#define CONFIDENCE_MIN_INTERVALS 5
#define CONFIDENCE_THRESHOLD 0.8
#define CONFIDENCE_WINDOW_INTERVAL std::chrono::minutes(120)
class KeywordIndicatorFilter : public IndicatorFilterBase
{
public:
KeywordIndicatorFilter(std::string dirPath,
const std::string& remotePath,
const std::string& assetId,
I_IgnoreSources* ignoreSources,
TuningDecision* tuning = nullptr,
size_t minSources = CONFIDENCE_MIN_SOURCES,
size_t minIntervals = CONFIDENCE_MIN_INTERVALS,
std::chrono::minutes intervalDuration = CONFIDENCE_WINDOW_INTERVAL,
double ratioThreshold = CONFIDENCE_THRESHOLD);
~KeywordIndicatorFilter();
virtual void registerKeywords(const std::string& key, Waap::Keywords::KeywordsSet& keywords,
IWaf2Transaction* pTransaction);
virtual bool shouldFilterKeyword(const std::string &key, const std::string &keyword) const;
bool loadParams(std::shared_ptr<Waap::Parameters::WaapParameters> pParams);
private:
void registerSource(const std::string &key, const std::string &source);
};

View File

@@ -0,0 +1,81 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "KeywordTypeValidator.h"
#include <cereal/archives/json.hpp>
#include <cereal/types/unordered_map.hpp>
#include <cereal/types/unordered_set.hpp>
#include "debug.h"
#include "Waf2Util.h"
USE_DEBUG_FLAG(D_WAAP);
KeywordTypeValidator::KeywordTypeValidator(const std::string& mapFilePath) :
SerializeToFileBase(mapFilePath),
m_keywordTypeMap()
{
restore();
}
KeywordTypeValidator::~KeywordTypeValidator()
{
}
void KeywordTypeValidator::serialize(std::ostream& stream)
{
(void)stream;
}
void KeywordTypeValidator::saveData()
{
// do not override existing file
}
void KeywordTypeValidator::deserialize(std::istream& stream)
{
cereal::JSONInputArchive archive(stream);
std::unordered_map<std::string, std::unordered_set<std::string>> typesStrToKeysMap;
archive(cereal::make_nvp("keywordsTypeMap", typesStrToKeysMap));
for (auto typeStrItr : typesStrToKeysMap)
{
ParamType type = Waap::Util::convertTypeStrToEnum(typeStrItr.first);
for (auto keyword : typeStrItr.second)
{
if (m_keywordTypeMap.find(keyword) == m_keywordTypeMap.end())
{
// initialize type set
m_keywordTypeMap[keyword];
}
m_keywordTypeMap[keyword].insert(type);
}
}
}
bool KeywordTypeValidator::isKeywordOfType(const std::string& keyword, ParamType type) const
{
auto keywordEntry = m_keywordTypeMap.find(keyword);
if (keywordEntry != m_keywordTypeMap.end())
{
auto &typeSet = keywordEntry->second;
return (typeSet.count(type) != 0);
}
else
{
dbgTrace(D_WAAP) << "keyword: " << keyword << " not found";
}
return false;
}

View File

@@ -0,0 +1,35 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <unordered_map>
#include <unordered_set>
#include "WaapEnums.h"
#include "i_serialize.h"
class KeywordTypeValidator : public SerializeToFileBase
{
public:
KeywordTypeValidator(const std::string& mapFilePath);
~KeywordTypeValidator();
bool isKeywordOfType(const std::string& keyword, ParamType type) const;
virtual void serialize(std::ostream& stream);
virtual void deserialize(std::istream& stream);
virtual void saveData();
private:
std::unordered_map<std::string, std::unordered_set<ParamType>> m_keywordTypeMap;
};

View File

@@ -0,0 +1,63 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "LogGenWrapper.h"
#include "log_generator.h"
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP);
LogGenWrapper::LogGenWrapper(
const Maybe<LogTriggerConf, Config::Errors>& maybe_trigger,
const std::string& title,
const ReportIS::Audience& audience,
const LogTriggerConf::SecurityType& security_type,
const ReportIS::Severity& severity,
const ReportIS::Priority& priority,
bool is_action_drop_or_prevent) : m_log_gen(nullptr)
{
if (!maybe_trigger.ok()) {
dbgWarning(D_WAAP) << "Couldn't get log trigger from the I/S. " <<
"Continuing with waap log trigger policy..." <<
"Reason: " << maybe_trigger.getErr();
m_log_gen = std::make_unique<LogGen>(
title,
audience,
severity,
priority,
ReportIS::Tags::WAF,
ReportIS::Tags::THREAT_PREVENTION
);
}
else {
m_log_gen = std::make_unique<LogGen>(
maybe_trigger.unpack(),
title,
security_type,
severity,
priority,
is_action_drop_or_prevent,
ReportIS::Tags::WAF,
ReportIS::Tags::THREAT_PREVENTION
);
}
}
LogGenWrapper::~LogGenWrapper()
{
}
LogGen& LogGenWrapper::getLogGen()
{
return *m_log_gen;
}

View File

@@ -0,0 +1,46 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __LOG_GEN_WRAPPER_H__
#define __LOG_GEN_WRAPPER_H__
#include "report/report_enums.h"
#include "maybe_res.h"
#include "config.h"
#include "generic_rulebase/triggers_config.h"
#include <string>
#include <memory>
class LogTriggerConf;
class LogGen;
class LogGenWrapper
{
public:
LogGenWrapper(
const Maybe<LogTriggerConf, Config::Errors>& maybe_trigger,
const std::string& title,
const ReportIS::Audience& audience,
const LogTriggerConf::SecurityType& security_type,
const ReportIS::Severity& severity,
const ReportIS::Priority& priority,
bool is_action_drop_or_prevent);
~LogGenWrapper();
LogGen& getLogGen();
private:
std::unique_ptr<LogGen> m_log_gen;
};
#endif

View File

@@ -0,0 +1,34 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "OpenRedirectDecision.h"
OpenRedirectDecision::OpenRedirectDecision(DecisionType type) :
SingleDecision(type),
m_link("")
{}
std::string OpenRedirectDecision::getTypeStr() const
{
return "Open Redirect";
}
void OpenRedirectDecision::setLink(const std::string& link)
{
m_link = link;
}
std::string OpenRedirectDecision::getLink() const
{
return m_link;
}

View File

@@ -0,0 +1,34 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __OPEN_REDIRECT_DEICSION_H__
#define __OPEN_REDIRECT_DEICSION_H__
#include "SingleDecision.h"
#include "DecisionType.h"
#include <string>
class OpenRedirectDecision: public SingleDecision
{
public:
explicit OpenRedirectDecision(DecisionType type);
std::string getTypeStr() const override;
void setLink(const std::string& link);
std::string getLink() const;
private:
std::string m_link;
};
#endif

View File

@@ -0,0 +1,804 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "PHPSerializedDataParser.h"
#include "log_generator.h"
#include <errno.h>
USE_DEBUG_FLAG(D_WAAP_PARSER_PHPSERIALIZE);
const std::string PHPSerializedDataParser::m_parserName = "PHPSerializedDataParser";
PHPSerializedDataParser::PHPSerializedDataParser(IParserStreamReceiver &outReceiver)
: m_state(), m_outReceiver(outReceiver), m_keyStack("php_serialized")
{
}
size_t
PHPSerializedDataParser::push(const char* buf, size_t len)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push()";
size_t i = 0;
char c;
if (len == 0)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): len = 0 ";
if(m_state.phase_state != s_start) {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): len = 0 ;"
"phase_state != s_start ; m_state.phase_state: " << m_state.phase_state;
m_error = true;
return -1;
}
switch (m_state.kv_state)
{
case (s_onKey):
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): len = 0 ; s_onKey";
m_outReceiver.onKey(m_value.c_str(), m_value.length());
break;
}
case (s_onValue):
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): len = 0 ; s_onValue";
m_outReceiver.onValue(m_value.c_str(), m_value.length());
m_outReceiver.onKvDone();
break;
}
case (s_clear_kv):
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): len = 0 ; s_clear_kv;"
"State Finished has expected";
// State Finished has expected.
break;
}
}
return 1;
}
while (i < len)
{
c = buf[i];
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push():while(i<len)" "check: " << c
<< " state: " << m_state.phase_state;
switch (m_state.phase_state)
{
case s_data_end:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_data_end";
if (!onDataEnd(c, true))
{
// Error
return -1;
}
break;
}
case s_class_data_end:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_class_data_end";
if (!onDataEnd(c, false))
{
// Error
return -1;
}
break;
}
case s_value:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_value";
size_t result = handleValue(c);
if ( result == (size_t)-1 )
{
return -1;
}
break;
}
// Getting length of complex types like: array, string, object and custom.
case s_length:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_length";
if (c == ':')
{
// convert length string to int.
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): m_length" <<
m_length;
char *pEnd = NULL;
m_state.length = ::strtoll(m_length.c_str(), &pEnd, 10);
if (pEnd != m_length.c_str() + m_length.length())
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) <<
"Failed to convert length from string to integer (Invalid arguments).";
m_error = true;
return -1;
}
m_state.phase_state = s_value;
m_length.clear();
break;
}
m_length.push_back(c);
break;
}
// primitive colon belongs to int, double, bool, ref which does not require length case
case s_prim_colon:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_prim_colon";
if (c != ':')
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_prim_colon" <<
"Error: ':' should appear, instead " << c << " appeared";
m_error = true;
return -1;
}
m_state.phase_state = s_value;
break;
}
// belongs to object, string, array, class which require length case
case s_colon:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_colon";
if (c != ':')
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_colon" <<
"Error: ':' should appear, instead " << c << " appeared";
m_error = true;
return -1;
}
m_state.phase_state = s_length;
break;
}
// s_start is being called every time we need to discover new object type
// (state is first intilaized by s_start).
case s_start:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start";
switch (tolower(c)) {
case 'n':
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: NULL";
m_state.type_state = s_null;
m_state.phase_state = s_value;
break;
}
case 'a':
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: Array";
// Array cannot be key. Throw failure.
if (m_state.kv_state == s_onKey)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: " <<
"Array cannot be a key";
m_error = true;
return -1;
}
// If stack not empty and Value is array then value should be empty.
// next key will be the key inside the array.
if (!m_stack.empty())
{
// Send empty value for the case of array/object as a subitem
m_value = "";
onStateValue();
m_state.kv_state = s_clear_kv;
}
m_state.type_state = s_start_array;
m_state.phase_state = s_colon;
break;
}
case 's': {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: String";
m_state.type_state = s_start_string;
m_state.phase_state = s_colon;
break;
}
case 'b': {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: Boolean";
if (m_state.kv_state == s_onKey)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: " <<
"Boolean cannot be a key";
m_error = true;
return -1;
}
m_state.type_state = s_boolean_OnValue;
m_state.phase_state = s_prim_colon;
break;
}
case 'i': {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: Integer";
m_state.type_state = s_integer_onValue;
m_state.phase_state = s_prim_colon;
break;
}
// parsing double as integer is ok in this case because integer are not really validated,
// instead they are reported as strings.
case 'd': {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: Double";
m_state.type_state = s_integer_onValue;
m_state.phase_state = s_prim_colon;
break;
}
case 'o': {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: Object";
if (m_state.kv_state == s_onKey)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: " <<
"Object cannot be a key";
m_error = true;
return -1;
}
if (!m_stack.empty())
{
m_value = "";
onStateValue();
m_state.kv_state = s_clear_kv;
}
m_state.isObject = true;
m_state.type_state = s_start_string;
m_state.phase_state = s_colon;
break;
}
case 'c': {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: Class";
if (m_state.kv_state == s_onKey)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: " <<
"Class cannot be a key";
m_error = true;
return -1;
}
if (!m_stack.empty())
{
m_value = "";
onStateValue();
m_state.kv_state = s_clear_kv;
}
m_state.isClass = true;
m_state.type_state = s_start_string;
m_state.phase_state = s_colon;
break;
}
case 'r': {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: Reference";
m_state.type_state = s_ref_onValue;
m_state.phase_state = s_prim_colon;
break;
}
case '}': {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start: }";
if (!onDataEnd(c, false))
{
// Error
return -1;
}
break;
}
default: {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) <<
"PHPSerializedDataParser::push(): s_start: Unexpected Error. "
"Invalid char in s_start: " << c;
m_error = true;
return -1;
}
}
}
}
++i;
}
return 0;
}
size_t PHPSerializedDataParser::handleValue (const char &c)
{
switch (m_state.type_state)
{
case s_start_class:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start_class";
if (c != '{')
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start_class " <<
"Class start with: " << c << " instead of {";
m_error = true;
return -1;
}
std::string keyStack("Class");
std::string val("");
m_keyStack.push(keyStack.c_str(), keyStack.length());
m_key = m_value;
m_outReceiver.onKey(m_value.c_str(), m_value.length());
m_outReceiver.onValue(val.c_str(), val.length());
m_outReceiver.onKvDone();
m_value.clear();
// changing isClass to false because this object handle class definition.
m_state.isClass = false;
m_state.current_length = m_state.length;
m_state.kv_state = s_onKey;
m_state.type_state = s_class_onValue;
State state = m_state;
m_stack.push(state);
break;
}
case s_class_onValue:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_class_onValue";
// counting down the characters that we get on buffer.
// if we get all chars '}' should occur.
if (m_state.current_length != 0)
{
m_state.current_length--;
m_value.push_back(c);
break;
}
// Class object can handle string or more serialized data.
// If it's a string than the parser will retuen with error
// else will parse it normaly.
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): End of Class object" <<
" sending class object data to PHPSerializedDataParser";
PHPSerializedDataParser psdp(m_outReceiver);
psdp.push(m_value.c_str(), m_value.length());
if(psdp.error())
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): " <<
"class object data return with an error !";
m_outReceiver.onKey(m_key.c_str(), m_key.length());
m_outReceiver.onValue(m_value.c_str(), m_value.length());
m_outReceiver.onKvDone();
m_value.clear();
m_key.clear();
}
m_state.phase_state = s_class_data_end;
break;
}
case s_start_object:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start_object";
if (c != '{')
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start_object" <<
"Object start with: " << c << " instead of {";
m_error = true;
return -1;
}
std::string keyStack("Object");
std::string val("");
m_keyStack.push(keyStack.c_str(), keyStack.length());
m_outReceiver.onKey(m_value.c_str(), m_value.length());
m_outReceiver.onValue(val.c_str(), val.length());
m_outReceiver.onKvDone();
m_value.clear();
// changing isObject to false because this object handle class definition.
m_state.isObject = false;
m_state.kv_state = s_onKey;
m_state.phase_state = s_start;
State state = m_state;
m_stack.push(state);
break;
}
case s_start_array:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start_array";
if (c != '{')
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start_array" <<
"Array start with: " << c << " instead of {";
m_error = true;
return -1;
}
std::string keyVal("array");
m_keyStack.push(keyVal.c_str(), keyVal.length());
m_state.kv_state = s_onKey;
m_state.phase_state = s_start;
State state = m_state;
m_stack.push(state);
break;
}
case s_start_string:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start_string";
if (c != '"')
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_start_string" <<
"string start with: " << c << " instead of \"";
m_error = true;
return -1;
}
m_state.current_length = 0;
m_state.type_state = s_string_onValue;
break;
}
case s_string_onValue:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_onValue";
if (c != '"')
{
if (c == '\\')
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_onValue " <<
"escape ?: " << c;
m_state.current_length++;
m_state.type_state = s_string_escape;
break;
}
m_value.push_back(c);
m_state.current_length++;
break;
}
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_onValue" <<
" End of String";
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_onValue" <<
"m_state.isClass: " << m_state.isClass;
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_onValue" <<
" m_state.isObject: " << m_state.isObject;
if (m_state.isObject || m_state.isClass)
{
m_state.type_state = s_object_string_calc;
}
else
{
m_state.type_state = s_string_calc;
}
break;
}
case s_string_escape:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_escape";
if (c == 'x')
{
m_state.type_state = s_string_escape_x_1;
}
else if (c == '0')
{
m_value.push_back('@');
m_state.type_state = s_string_onValue;
}
else
{
m_value.push_back('\\');
m_value.push_back(c);
m_state.current_length++;
m_state.type_state = s_string_onValue;
}
break;
}
case s_string_escape_x_1:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_escape_x_1";
if (c == '0')
{
m_state.type_state = s_string_escape_x_2;
break;
}
m_value = m_value + "\\x";
m_value.push_back(c);
m_state.current_length = m_state.current_length + 2;
m_state.type_state = s_string_onValue;
break;
}
case s_string_escape_x_2:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_escape_x_2";
if (c == '0')
{
m_value.push_back('@');
m_state.type_state = s_string_onValue;
break;
}
m_value = m_value + "\\x0";
m_value.push_back(c);
m_state.current_length = m_state.current_length + 3;
m_state.type_state = s_string_onValue;
break;
}
case s_object_string_calc:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_object_string_calc";
if (c != ':') {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_object_string_calc" <<
" Error: After object name ':' should appear instead " << c << " appeared";
m_error = true;
return -1;
}
// check string length
if (m_state.current_length != m_state.length)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_object_string_calc" <<
" m_state.current_length: " << m_state.current_length << "!=" << " m_state.length: "
<< m_state.length;
m_error = true;
return -1;
}
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_object_string_calc" <<
" Start object";
m_state.current_length = 0;
m_state.phase_state = s_length;
if (m_state.isObject)
{
m_state.type_state = s_start_object;
}
else
{
m_state.type_state = s_start_class;
}
break;
}
case s_string_calc:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_calc";
if (c != ';') {
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_calc" <<
" Error: string should end with ';' not with " << c;
m_error = true;
return -1;
}
// check string length
if (m_state.current_length != m_state.length)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_string_calc" <<
" m_state.current_length: " << m_state.current_length << "!=" << " m_state.length: "
<< m_state.length;
m_error = true;
return -1;
}
if (handleStateAfterFinish("String"))
{
break;
}
m_state.current_length = 0;
m_value.clear();
m_state.phase_state = s_start;
break;
}
case s_integer_onValue:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_integer_onValue";
if ( c != ';')
{
m_value.push_back(c);
break;
}
if (handleStateAfterFinish("Integer"))
{
break;
}
m_value.clear();
m_state.phase_state = s_start;
break;
}
case s_ref_onValue:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_ref_onValue";
if ( c != ';')
{
m_value.push_back(c);
break;
}
if (handleStateAfterFinish("Reference"))
{
break;
}
m_value.clear();
m_state.phase_state = s_start;
break;
}
case s_boolean_OnValue:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_boolean_OnValue";
if (m_value.length() > 1)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_boolean_OnValue" <<
" Error length is bigger than 1 : Boolean should be with 0 or 1";
m_error = true;
return -1;
}
if ( c != ';' )
{
m_value.push_back(c);
break;
}
// boolean can be 0 or 1 only.
if (m_value.compare("1") != 0 && m_value.compare("0") != 0)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_boolean_OnValue" <<
" Error Boolean value is not 0 or 1 : " << m_value;
m_error = true;
return -1;
}
if (handleStateAfterFinish("Boolean"))
{
break;
}
m_value.clear();
m_state.phase_state = s_start;
break;
}
case s_null:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_null";
if (c != ';')
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): s_null" <<
" Null should end with ';' not with : " << c;
m_error = true;
return -1;
}
if (handleStateAfterFinish("Null"))
{
break;
}
m_value.clear();
m_state.phase_state = s_start;
break;
}
default:
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push(): default" <<
" Unexpected Error.";
m_error = true;
return -1;
}
}
return 0;
}
// Handle data end of an object and if he got the right number of values.
// termChar = is char terminator (f.e })
// checkEndBlock enable check if last char is equal to }
bool
PHPSerializedDataParser::onDataEnd(char termChar, bool checkEndBlock)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::onDataEnd (phase_state=" <<
m_state.phase_state << ", termChar='" << termChar << "')";
if (m_state.current_length != m_state.length)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push():" <<
"current_length " << m_state.current_length << "!=" << " m_state.length " << m_state.length;
m_error = true;
return false;
}
if (termChar != '}' && checkEndBlock)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::push():" <<
"termChar is not }";
m_error = true;
return false;
}
if (m_stack.empty())
{
return true;
}
m_state.isObject = false;
m_keyStack.pop(m_keyStack.first().c_str());
m_state = m_stack.top();
m_stack.pop();
m_state.phase_state = s_start;
return true;
}
void
PHPSerializedDataParser::onEmptyStack(std::string type)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::onEmptyStack(): stack is empty.";
m_outReceiver.onKey(type.c_str(), type.length());
m_outReceiver.onValue(m_value.c_str(), m_value.length());
m_outReceiver.onKvDone();
m_value.clear();
m_state.current_length = 0;
m_state.phase_state = s_start;
}
void
PHPSerializedDataParser::onStateKey()
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::onStateKey()";
if (m_keyStack.size() >= 1)
{
m_value = m_keyStack.str() + "." + m_value;
}
m_outReceiver.onKey(m_value.c_str(), m_value.length());
// clear current length
m_state.current_length = 0;
//clear value
m_value.clear();
// change state from key to value.
m_state.kv_state = s_onValue;
m_state.phase_state = s_start;
}
void
PHPSerializedDataParser::onStateValue()
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::onStateValue()";
// change state from value to key.
m_state.kv_state = s_onKey;
// Look at our last state and raise its member counter.
State &stack_state = m_stack.top();
stack_state.current_length++;
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "stack_state.current_length: " << stack_state.current_length;
// set Value and KvDone.
m_outReceiver.onValue(m_value.c_str(), m_value.length());
m_outReceiver.onKvDone();
}
// checking if current length is equal to the length the object got
// and move it to s_data_end
bool
PHPSerializedDataParser::onCheckLength()
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::onCheckLength()";
State &stack_state = m_stack.top();
if (stack_state.current_length == stack_state.length)
{
m_state.current_length = 0;
m_value.clear();
m_state = m_stack.top();
m_state.phase_state = s_data_end;
m_state.kv_state = s_clear_kv;
return true;
}
return false;
}
// Handle State after finishing reading data from a type.
bool
PHPSerializedDataParser::handleStateAfterFinish(const std::string &type)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::handleStateAfterFinish()";
// If stack empty that means we don't have last state : Object || Custom || Array
if (m_stack.empty())
{
onEmptyStack(type);
return true;
}
if (m_state.kv_state == s_onKey)
{
onStateKey();
return true;
}
// If stack is not empty check last state Object || Custom || Array
// change state from value - if state on key should throw error on s_start
// key must be value on boolean
onStateValue();
if (onCheckLength())
{
return true;
}
return false;
}
void
PHPSerializedDataParser::finish()
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::finish()";
push(NULL, 0);
}
const std::string &
PHPSerializedDataParser::name() const
{
return m_parserName;
}
bool
PHPSerializedDataParser::error() const
{
if (m_error)
{
dbgTrace(D_WAAP_PARSER_PHPSERIALIZE) << "PHPSerializedDataParser::error(): parser returned with an error";
return true;
}
return false;
}

View File

@@ -0,0 +1,89 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <string.h>
#include "ParserBase.h"
#include "KeyStack.h"
#include <stack>
class PHPSerializedDataParser : public ParserBase {
public:
PHPSerializedDataParser(IParserStreamReceiver &outReceiver);
size_t push(const char* buf, size_t len);
void finish();
virtual const std::string &name() const;
bool error() const;
virtual size_t depth() { return m_keyStack.depth(); }
private:
bool onCheckLength ();
size_t handleValue (const char &c);
bool handleStateAfterFinish (const std::string &type);
void onStateValue (); // this function must never be called when the m_stack is empty
void onStateKey ();
void onEmptyStack (std::string type);
bool onDataEnd (char termChar, bool checkEndBlock);
bool m_error = false;
enum type_state
{
s_start_class,
s_class_onValue,
s_object_string_calc,
s_start_object,
s_start_array,
s_null,
s_start_string,
s_string_calc,
s_string_onValue,
s_string_escape,
s_string_escape_x_1,
s_string_escape_x_2,
s_integer_onValue,
s_boolean_OnValue,
s_ref_onValue
};
enum phase_state {
s_start,
s_data_end,
s_class_data_end,
s_colon,
s_length,
s_value,
s_prim_colon
};
enum key_value_state {
s_clear_kv,
s_onKey,
s_onValue
};
struct State {
enum phase_state phase_state = s_start;
enum type_state type_state;
enum key_value_state kv_state = s_clear_kv;
size_t length = 0;
size_t current_length = 0;
bool isObject = false;
bool isClass = false;
};
State m_state;
std::string m_value;
std::string m_key;
std::string m_length;
IParserStreamReceiver &m_outReceiver;
KeyStack m_keyStack;
std::stack <State> m_stack;
static const std::string m_parserName;
};

View File

@@ -0,0 +1,88 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserBase.h"
#include <string.h>
// Max size for key and value that can be stored in memory (per thread)
#define MAX_KEY_SIZE 64*1024
#define MAX_VALUE_SIZE 64*1024
BufferedReceiver::BufferedReceiver(IParserReceiver &receiver)
:m_receiver(receiver),
m_flags(BUFFERED_RECEIVER_F_FIRST)
{
}
int BufferedReceiver::onKey(const char *k, size_t k_len)
{
if (m_key.size() + k_len < MAX_KEY_SIZE) {
m_key += std::string(k, k_len);
}
return 0;
}
int BufferedReceiver::onValue(const char *v, size_t v_len)
{
int rc = 0;
while (v_len > 0) {
// Move data from buffer v to accumulated m_value string in an attempt to fill m_value to its max size
size_t bytesToFill = std::min(v_len, MAX_VALUE_SIZE - m_value.size());
m_value += std::string(v, bytesToFill);
// Update v and v_len (input buffer) to reflect that we already consumed part (or all) of it
v += bytesToFill;
v_len -= bytesToFill;
// Only push full buffers to the m_receiver
if (m_value.size() == MAX_VALUE_SIZE) {
// The first full-size buffer will be pushed with BUFFERED_RECEIVER_F_FIRST flag
int tempRc= m_receiver.onKv(m_key.data(), m_key.size(), m_value.data(), m_value.size(), m_flags);
if (tempRc != 0) {
rc = tempRc;
}
// Clear accumulted buffer that is already pushed (and processed) by the receiver
m_value.clear();
// Clear the "first buffer" flag for all the next buffers
m_flags &= ~BUFFERED_RECEIVER_F_FIRST;
}
}
return rc;
}
int BufferedReceiver::onKvDone()
{
m_flags |= BUFFERED_RECEIVER_F_LAST; // set flag
// Call onKv on the remainder of the buffer not yet pushed to the receiver
// This must be called even if m_value is empty in order to signal the BUFFERED_RECEIVER_F_LAST flag to the
// receiver!
int rc = onKv(m_key.data(), m_key.size(), m_value.data(), m_value.size(), m_flags);
// Reset the object's state to allow reuse for other parsers
clear();
return rc;
}
int BufferedReceiver::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int flags)
{
return m_receiver.onKv(k, k_len, v, v_len, flags);
}
void BufferedReceiver::clear()
{
m_flags = BUFFERED_RECEIVER_F_FIRST;
m_key.clear();
m_value.clear();
}

View File

@@ -0,0 +1,141 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_BASE_H__1106fa38
#define __PARSER_BASE_H__1106fa38
#include "DataTypes.h"
#include <string>
#include <stddef.h>
#define BUFFERED_RECEIVER_F_FIRST 0x01
#define BUFFERED_RECEIVER_F_LAST 0x02
#define BUFFERED_RECEIVER_F_BOTH (BUFFERED_RECEIVER_F_FIRST | BUFFERED_RECEIVER_F_LAST)
#define BUFFERED_RECEIVER_F_UNNAMED 0x04
#if (DISTRO_centos6)
// pre c++11 compiler doesn' support the "final" keyword
#define final
#else
// c++11 and beyond
#define final final
#endif
// Interface for receiver classes that accept full key/value pairs
struct IParserReceiver {
virtual int onKv(const char *k, size_t k_len, const char *v, size_t v_len, int flags) = 0;
};
struct IParserReceiver2 {
virtual void onKvt(const char *k, size_t k_len, const char *v, size_t v_len, const DataType &type) = 0;
virtual void onStartMap() = 0;
virtual void onMapKey(const char *k, size_t k_len) = 0;
virtual void onEndMap() = 0;
virtual void onStartArray() = 0;
virtual void onEndArray() = 0;
};
// Interface for receiver classes that can accept not only full key/value pairs, but also partial content
// Senders could do multiple calls to onKey() and onValue(), followed by call to onKvDone() that signals
// that both key and value data is ready.
// Alternatively, when they can, senders would do single call onKv(), bringing whole data in a single buffer,
// which is normally faster because this way senders could avoid unnecessary memory copying.
struct IParserStreamReceiver : public IParserReceiver {
virtual int onKey(const char *k, size_t k_len) = 0;
virtual int onValue(const char *v, size_t v_len) = 0;
virtual int onKvDone() = 0;
virtual void clear() = 0;
};
// This class acts as an adapter between senders that require IParserStreamReceiver and receivers
// that can only accept IParserReceiver (and do not want to cope with buffering).
// When onKv is received by an instance of BuferedReceiver -it will be transparently forwarded to destination
// (without memory copying).
// However, if BufferedReceiver instance accepts onKey, onValue calls, it buffers the data until onKvDone
// is called, at which point it passes buffered data to onKv callback of the final (non stream capable) receiver.
// TODO:: 1) when constructing this class, pass limits on key and value lengths as constructor parameters?
// TODO:: 2) add extra callback like "onFlush()" to both IParserStreamReceiver and its implementation
// BufferedReceiver, which tells BufferedReceiver that it has last chance to copy data aside
// before the underlying buffer is dead. Without receiving this call, BufferedStreamReceiver
// can simply collect ptr+len pairs on buffer instead of copying stuff to m_key and m_value.
// Once onFlush() is received, the data must be collected from those spans, because the underlying buffer
// is going to be destroyed.
// Note that calls to onFlush() must be added to end of all parser functions before they loose control of their
// input buffer!
// However, this seems to be easy to implement: just call m_receiver.onFlush() before exiting parser's push()
// method, and we finally got zero-copy!
// Note that for optimization, the getAccumulatedKey() and getAccumulatedValue()
// should return pointers to the input buffer.
// This will in many cases cause sub-parsers to also work in zero-copy style too!
class BufferedReceiver : public IParserStreamReceiver {
public:
BufferedReceiver(IParserReceiver &receiver);
virtual int onKey(const char *k, size_t k_len);
virtual int onValue(const char *v, size_t v_len);
virtual int onKvDone();
virtual int onKv(const char *k, size_t k_len, const char *v, size_t v_len, int flags);
virtual void clear();
// Helper methods to access accumulated key and value (read-only)
const std::string &getAccumulatedKey() const { return m_key; }
const std::string &getAccumulatedValue() const { return m_value; }
private:
IParserReceiver &m_receiver;
int m_flags;
// Accumulated key/value pair
std::string m_key;
std::string m_value;
};
// Base class for various streaming parsers that accept data stream in multiple pieces through
// the push() calls, followed by the finish() call that signals end of the stream.
// Normally, the parsers will accept data, dissect/decode it and pass resulting data as
// stream of key/value pairs to a target that is either IParserReceiver or IParserStreamReceiver,
class ParserBase {
public:
virtual ~ParserBase() {}
virtual size_t push(const char *data, size_t data_len) = 0;
virtual void finish() = 0; // TODO: I think this should return status of some sort, just like push()
virtual const std::string &name() const = 0;
virtual bool error() const = 0;
virtual size_t depth() = 0;
virtual void setRecursionFlag() { m_recursionFlag = true; }
virtual void clearRecursionFlag() { m_recursionFlag = false; }
virtual bool getRecursionFlag() const { return m_recursionFlag; }
private:
bool m_recursionFlag = false;
};
template<typename _ParserType>
class BufferedParser : public ParserBase
{
public:
template<typename ..._Args>
explicit BufferedParser(IParserReceiver &receiver, _Args... _args)
:
m_bufferedReceiver(receiver),
m_parser(m_bufferedReceiver, _args...) // pass any extra arguments to specific parser's constructor
{}
virtual ~BufferedParser() {}
virtual size_t push(const char *data, size_t data_len) { return m_parser.push(data, data_len); }
virtual void finish() { m_parser.finish(); }
virtual const std::string &name() const { return m_parser.name(); }
virtual bool error() const { return m_parser.error(); }
virtual size_t depth() { return m_parser.depth(); }
private:
BufferedReceiver m_bufferedReceiver;
_ParserType m_parser;
};
#endif // __PARSER_BASE_H___1106fa38

View File

@@ -0,0 +1,157 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserBinary.h"
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP_PARSER_BINARY);
#define MIN_TEXT_SIZE 10
ParserBinary::ParserBinary(IParserStreamReceiver& receiver) :
m_parserName("binary"),
m_receiver(receiver),
m_state(s_start),
m_textFromLastBuffer(),
m_textCharCount(0)
{
}
ParserBinary::~ParserBinary()
{
}
size_t ParserBinary::push(const char* data, size_t data_len)
{
if (data_len == 0)
{
dbgTrace(D_WAAP_PARSER_BINARY) << "ParserBinary::push(): end of stream. m_state=" << m_state <<
", m_textCharCount=" << m_textCharCount;
if (m_state == s_text && m_textCharCount >= MIN_TEXT_SIZE) {
// Flush text data collected from previous buffer
flush();
}
m_receiver.onKvDone();
return 0;
}
size_t i = 0, textStartIdx = 0;
while (i < data_len)
{
char c = data[i];
bool is_last = (i + 1 == data_len);
switch (m_state) {
case s_start:
m_receiver.onKey("text", 4);
m_state = s_binary;
// fallthrough //
CP_FALL_THROUGH;
case s_binary:
if (!::isprint(c)) {
// Skip binary stuff
break;
}
textStartIdx = i; // remember index of potential text block start
m_textCharCount = 0; // count consecutive text characters in the input stream
if (!m_textFromLastBuffer.empty()) {
m_textFromLastBuffer.clear();
}
m_state = s_text;
// fallthrough //
CP_FALL_THROUGH;
case s_text: {
if (::isprint(c)) {
m_textCharCount++;
}
else {
dbgTrace(D_WAAP_PARSER_BINARY) << "ParserBinary::push(): switch to binary at i=" << i <<
", textStartIdx=" << textStartIdx << ", m_textCharCount=" << m_textCharCount;
// Transition from text to binary
// Only output text chunk when it is large enough, ignore small text chunks
if (m_textCharCount >= MIN_TEXT_SIZE) {
// Flush text data collected from previous buffer
flush();
// Output text data from current buffer
m_receiver.onValue(data+textStartIdx, i-textStartIdx); // do not include current character
}
m_textCharCount = 0;
m_state = s_binary;
break;
}
// Handle hitting buffer edge while collecting text.
// Note that current buffer is going to be invalidated so we need to save everything needed to be able
// to continue on next invocation.
if (is_last) {
dbgTrace(D_WAAP_PARSER_BINARY) << "ParserBinary::push(): last char in buffer. m_textCharCount=" <<
m_textCharCount;
// If enough data collected so far no need to remember it - flush it to output right away
if (m_textCharCount >= MIN_TEXT_SIZE) {
// Flush text data collected from previous buffer
flush();
// Output text data from current buffer
m_receiver.onValue(data+textStartIdx, i-textStartIdx + 1); // +1 to include current character
}
else {
// If there's not enough text to decide - store the text data from current buffer for the next
// invocation
m_textFromLastBuffer.append(data+textStartIdx, i-textStartIdx + 1);
}
}
break;
}
case s_error:
return 0;
default:
break;
}
i++;
}
return i;
}
void ParserBinary::finish()
{
push(NULL, 0);
}
const std::string& ParserBinary::name() const
{
return m_parserName;
}
bool ParserBinary::error() const
{
return m_state == s_error;
}
void ParserBinary::flush() {
// Flush text data collected from previous buffer
if (m_textFromLastBuffer.size() > 0) {
dbgTrace(D_WAAP_PARSER_BINARY) << "ParserBinary::flush() flushing " << m_textFromLastBuffer.size() <<
" chars from last buf";
m_receiver.onValue(m_textFromLastBuffer.data(), m_textFromLastBuffer.size());
m_textFromLastBuffer.clear();
}
}

View File

@@ -0,0 +1,46 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_BINARY_H__
#define __PARSER_BINARY_H__
#include "ParserBase.h"
class ParserBinary : public ParserBase
{
public:
ParserBinary(IParserStreamReceiver& receiver);
virtual ~ParserBinary();
virtual size_t push(const char* data, size_t data_len);
virtual void finish();
virtual const std::string& name() const;
virtual bool error() const;
virtual size_t depth() { return 1; }
private:
enum state {
s_start,
s_binary,
s_text,
s_error
};
const std::string m_parserName;
IParserStreamReceiver& m_receiver;
state m_state;
std::string m_textFromLastBuffer;
size_t m_textCharCount;
void flush();
};
#endif // __PARSER_BINARY_H__

View File

@@ -0,0 +1,180 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserConfluence.h"
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP_PARSER_CONFLUENCE);
ParserConfluence::ParserConfluence(IParserStreamReceiver& receiver) :
m_parserName("confluence"),
m_state(s_start),
m_receiver(receiver),
m_name()
{
}
ParserConfluence::~ParserConfluence()
{
}
size_t ParserConfluence::push(const char* data, size_t data_len)
{
if (data_len == 0)
{
if (m_state != s_end)
{
m_state = s_error;
return 0;
}
}
size_t i = 0, name_index = 0, attribute_index = 0;
while (i < data_len)
{
char c = data[i];
bool is_last = (i + 1 == data_len);
dbgTrace(D_WAAP_PARSER_CONFLUENCE) << "parsing confluence: index: " << i << " char: " << c << " state: " <<
m_state;
switch (m_state)
{
case s_start:
if (c != '{')
{
m_state = s_error;
break;
}
i++;
m_state = s_start_name;
break;
case s_start_name:
m_name = "";
name_index = i;
m_state = s_name;
break;
case s_name:
if (c == ':')
{
m_name += std::string(data + name_index, i - name_index);
m_name += ".";
m_state = s_start_attributes;
}
else if (c == '"')
{
m_state = s_error;
break;
}
else if (is_last)
{
m_name += std::string(data + name_index, i - name_index + 1);
name_index = 0;
}
i++;
break;
case s_start_attributes:
attribute_index = i;
m_receiver.onKey(m_name.c_str(), m_name.length());
m_state = s_attribute_name;
break;
case s_attribute_name:
if (c == '=')
{
if (i > attribute_index)
{
m_receiver.onKey(data + attribute_index, i - attribute_index);
}
attribute_index = is_last ? 0 : i + 1;
m_state = s_attribute_value;
}
else if (c == '|')
{
if (i > attribute_index)
{
m_receiver.onKey(data + attribute_index, i - attribute_index);
}
m_receiver.onKvDone();
m_state = s_start_attributes;
}
else if (c == '}')
{
if (i > attribute_index)
{
m_receiver.onKey(data + attribute_index, i - attribute_index);
}
m_receiver.onKvDone();
m_state = s_end;
}
else if (is_last)
{
m_receiver.onKey(data + attribute_index, i - attribute_index + 1);
attribute_index = 0;
}
i++;
break;
case s_attribute_value:
if (c == '|')
{
if (i > attribute_index)
{
m_receiver.onValue(data + attribute_index, i - attribute_index);
}
m_receiver.onKvDone();
m_state = s_start_attributes;
}
else if (c == '}')
{
if (i > attribute_index)
{
m_receiver.onValue(data + attribute_index, i - attribute_index);
}
m_receiver.onKvDone();
m_state = s_end;
break;
}
else if (is_last)
{
m_receiver.onValue(data + attribute_index, i - attribute_index + 1);
attribute_index = 0;
}
i++;
break;
case s_end:
if (!is_last)
{
m_state = s_error;
}
i++;
break;
case s_error:
return i;
break;
default:
break;
}
}
return 0;
}
void ParserConfluence::finish()
{
push(NULL, 0);
}
const std::string& ParserConfluence::name() const
{
return m_parserName;
}
bool ParserConfluence::error() const
{
return m_state == s_error;
}

View File

@@ -0,0 +1,48 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_CONFLUENCE_H__
#define __PARSER_CONFLUENCE_H__
#include "ParserBase.h"
class ParserConfluence : public ParserBase
{
public:
ParserConfluence(IParserStreamReceiver& receiver);
virtual ~ParserConfluence();
virtual size_t push(const char* data, size_t data_len);
virtual void finish();
virtual const std::string& name() const;
virtual bool error() const;
virtual size_t depth() { return 1; }
private:
enum state {
s_start,
s_start_name,
s_name,
s_start_attributes,
s_attribute_name,
s_attribute_value,
s_end,
s_error
};
const std::string m_parserName;
state m_state;
IParserStreamReceiver& m_receiver;
std::string m_name;
};
#endif

View File

@@ -0,0 +1,140 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserDelimiter.h"
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP_PARSER_DELIMITER);
ParserDelimiter::ParserDelimiter(IParserStreamReceiver& receiver, char delim, const std::string& delimName)
: ParserBase(),
m_state(s_start),
m_receiver(receiver),
m_delim(delim),
m_delim_name(delimName),
m_found_delim(false)
{
}
ParserDelimiter::~ParserDelimiter()
{
}
void ParserDelimiter::pushKey()
{
std::string delim_key = m_delim_name;
dbgTrace(D_WAAP_PARSER_DELIMITER) << "parsing delimiter: send key='" << delim_key << "'";
m_receiver.onKey(delim_key.c_str(), delim_key.length());
}
size_t ParserDelimiter::push(const char* data, size_t data_len)
{
if (data_len == 0)
{
if (!m_found_delim)
{
m_state = s_error;
return 0;
}
if (m_receiver.onKvDone() != 0) {
m_state = s_error;
}
return 0;
}
size_t i = 0, value_start_index = 0;
while (i < data_len)
{
char c = data[i];
switch (m_state)
{
case s_start:
m_found_delim = false;
pushKey();
if (c == m_delim)
{
m_state = s_start_with_delimiter;
}
else
{
m_state = s_value_start;
}
break;
case s_start_with_delimiter:
m_found_delim = true;
m_state = s_value_start;
i++;
break;
case s_value_start:
value_start_index = i;
m_state = s_value;
// fall through
case s_value:
if (c == m_delim)
{
dbgTrace(D_WAAP_PARSER_DELIMITER) << "parsing delimiter: send val='" <<
std::string(data + value_start_index, i - value_start_index) << "'";
m_receiver.onValue(data + value_start_index, i - value_start_index);
m_state = s_delimiter;
break;
}
else if (i + 1 == data_len)
{
dbgTrace(D_WAAP_PARSER_DELIMITER) << "parsing delimiter: send val='" <<
std::string(data + value_start_index, i - value_start_index) << "'";
m_receiver.onValue(data + value_start_index, i - value_start_index + 1);
}
i++;
break;
case s_delimiter:
m_found_delim = true;
dbgTrace(D_WAAP_PARSER_DELIMITER) << "parsing delimiter: send onKvDone";
if (m_receiver.onKvDone() != 0) {
m_state = s_error;
break;
}
i++;
pushKey();
m_state = s_value_start;
break;
case s_error:
break;
default:
break;
}
if (m_state == s_error)
{
break;
}
}
return 0;
}
void ParserDelimiter::finish()
{
push(NULL, 0);
}
bool ParserDelimiter::error() const
{
return m_state == s_error;
}
const std::string& ParserDelimiter::name() const
{
return m_delim_name;
}

View File

@@ -0,0 +1,51 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_DELIMIETER_BASE_H__
#define __PARSER_DELIMIETER_BASE_H__
#include "ParserBase.h"
class ParserDelimiter : public ParserBase
{
public:
ParserDelimiter(IParserStreamReceiver& receiver, char delim, const std::string& delimName);
virtual ~ParserDelimiter();
virtual size_t push(const char* data, size_t data_len);
virtual void finish();
virtual bool error() const;
virtual const std::string& name() const;
virtual size_t depth() { return 1; }
private:
enum state {
s_start,
s_start_with_delimiter,
s_value_start,
s_delimiter,
s_value,
s_error
};
void pushKey();
state m_state;
IParserStreamReceiver& m_receiver;
std::string m_key;
char m_delim;
std::string m_delim_name;
bool m_found_delim;
};
#endif

View File

@@ -0,0 +1,304 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserHTML.h"
#include "Waf2Util.h"
#include "debug.h"
#include <assert.h>
USE_DEBUG_FLAG(D_WAAP_PARSER_HTML);
const std::string ParserHTML::m_parserName = "ParserHTML";
void ParserHTML::onStartElement(
void* ctx,
const xmlChar* localname,
const xmlChar** attributes)
{
ParserHTML* p = (ParserHTML*)ctx;
dbgTrace(D_WAAP_PARSER_HTML) << "HTML OPEN: '" << localname << "'";
p->m_key.push((const char*)localname, xmlStrlen(localname));
if (attributes != NULL) {
int i;
for (i = 0; attributes[i*2]; i++) {
const xmlChar* attr_localname = attributes[i * 2 + 0];
const xmlChar* attr_value = attributes[i * 2 + 1];
if (attr_value == NULL) {
attr_value = (const xmlChar*)"";
}
dbgTrace(D_WAAP_PARSER_HTML) << "\tHTML ATTR: elem='" << (char*)localname << "', " << attr_localname <<
"='" << std::string((char*)attr_value) << "'";
p->m_key.push((const char*)attr_localname, xmlStrlen(attr_localname));
if (p->m_receiver.onKv(
p->m_key.first().c_str(),
p->m_key.first().size(),
(const char*)attr_value, strlen((const char*)attr_value),
BUFFERED_RECEIVER_F_BOTH
) != 0) {
p->m_state = s_error;
}
p->m_key.pop("HTML end attribute");
}
}
// before we add new tracking element to the stack for this new element,
// set "children exists" flag to true for the parent element.
if (!p->m_elemTrackStack.empty()) {
p->m_elemTrackStack.back().hasChildren = true;
}
// when opening new element - start tracking its properties (internal text and existence of subelements)
p->m_elemTrackStack.push_back(ElemTrackInfo());
}
void
ParserHTML::onEndElement(
void* ctx,
const xmlChar* localname)
{
ParserHTML* p = (ParserHTML*)ctx;
dbgTrace(D_WAAP_PARSER_HTML) << "HTML CLOSE: '" << localname << "'";
if (p->m_elemTrackStack.empty()) {
dbgWarning(D_WAAP_PARSER_HTML) <<
"HTML closing tag and elem track stack is empty. This is probably sign of a bug!";
return;
}
ElemTrackInfo& elemTrackInfo = p->m_elemTrackStack.back();
// Usability optimization: only output kv pair for HTML elements that had either sub children
// and/or value within.
// Those "wrapper elements" such as <wrapper><name>john</name><age>21</age></wrapper> only
// contain sub elements. For these we don't emit kv pair.
// However, for truly empty element such as <wrapper></wrapper>, or similar element with
// text: <wrapper>some text</wrapper>, we do output a kv pair.
bool isWrapperElement = elemTrackInfo.hasChildren && (elemTrackInfo.value.size() == 0);
if (!isWrapperElement) {
// Emit tag name as key
if (p->m_receiver.onKey(p->m_key.first().c_str(), p->m_key.first().size()) != 0) {
p->m_state = s_error;
}
if (p->m_receiver.onValue(elemTrackInfo.value.c_str(), elemTrackInfo.value.size()) != 0) {
p->m_state = s_error;
}
if (p->m_receiver.onKvDone() != 0) {
p->m_state = s_error; // error
}
}
// when closing an element - pop its tracking info from the tracking stack
p->m_elemTrackStack.pop_back();
// Also, pop the element's name from m_key stack, so the key name always reflects
// current depth within the elements tree
p->m_key.pop("HTML end element");
}
void ParserHTML::onCharacters(void* ctx, const xmlChar* ch, int len) {
ParserHTML* p = (ParserHTML*)ctx;
if (p->m_elemTrackStack.empty()) {
dbgWarning(D_WAAP_PARSER_HTML) << "HTML text and elem track stack is empty. This is probably sign of a bug!";
return;
}
if ((ch == NULL) || (len == 0)) {
dbgTrace(D_WAAP_PARSER_HTML) << "Got empty HTML text element. Ignoring.";
return;
}
ElemTrackInfo& elemTrackInfo = p->m_elemTrackStack.back();
dbgTrace(D_WAAP_PARSER_HTML) << "HTML TEXT: '[" << std::string((char*)ch, (size_t)len) << "]'";
std::string val = std::string((char*)ch, (size_t)len);
// trim isspace() characters around html text chunks.
// The chunks can occur multiple times within one value, when text value is intermixed with html sub-tags.
// for example, for HTML source "<a>sta<b>zzz</b>rt</a>", the "a" tag will include two text
// chunks "sta" and "rt"
// which are concatenated here to form the word "start".
// The trimming is done here to prevent false alarms on detection algorithm that sees
// "\n" characters in the HTML value.
// Example of input that causes false alarm without this trim is (multiline HTML):
// <html><script>\nclean_html_value '\n<\/script><\/html>
Waap::Util::trim(val);
elemTrackInfo.value += val;
}
static void onError(void* ctx, const char* msg, ...) {
static const size_t TMP_BUF_SIZE = 4096;
char string[TMP_BUF_SIZE];
va_list arg_ptr;
va_start(arg_ptr, msg);
vsnprintf(string, TMP_BUF_SIZE, msg, arg_ptr);
va_end(arg_ptr);
dbgTrace(D_WAAP_PARSER_HTML) << "LIBXML (html) onError: " << std::string(string);
}
ParserHTML::ParserHTML(IParserStreamReceiver& receiver)
:m_receiver(receiver), m_state(s_start), m_bufLen(0), m_key("html_parser"), m_pushParserCtxPtr(NULL) {
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::ParserHTML()";
// TODO:: is zeroing this really needed?
memset(m_buf, 0, sizeof(m_buf));
// Custom sax handler
memset(&m_saxHandler, 0, sizeof(htmlSAXHandler));
m_saxHandler.startElement = onStartElement;
m_saxHandler.endElement = onEndElement;
m_saxHandler.characters = onCharacters;
m_saxHandler.error = onError;
// Register "dummy" tag to receive any text
m_elemTrackStack.push_back(ElemTrackInfo());
// Ugly: push first element into key (it will be ignored since we will never call
// the "first()" method of this key within HTML parser object.
m_key.push("html", 4);
}
ParserHTML::~ParserHTML() {
// Cleanup HTML
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::~ParserHTML()";
if (m_pushParserCtxPtr) {
htmlFreeParserCtxt(m_pushParserCtxPtr);
}
}
bool ParserHTML::filterErrors(xmlErrorPtr xmlError) {
dbgDebug(D_WAAP_PARSER_HTML) << "ParserHTML::filterErrors(): xmlError " << xmlError->code << ": '" <<
xmlError->message << "'";
// Ignore specific error: "HTML declaration allowed only at the start of the document".
// This includes the case of "multiple HTML declarations" we've seen sent by some SOAP clients.
// The HTML is still parsed because the parser is put into permissive mode with the HTML_PARSE_RECOVER flag,
// but even though it recovers and parses the HTML correctly, the error code is still reported here.
// Ignoring this error prevents the WAAP code from thinking the HTML is "broken" and from scanning the HTML
// source as-is, in effect preventing false alarm on that HTML source.
if (xmlError->code == XML_ERR_RESERVED_XML_NAME || xmlError->code == XML_ERR_UNDECLARED_ENTITY) {
dbgDebug(D_WAAP_PARSER_HTML) << "ParserHTML::filterErrors(): ignoring the '" << xmlError->code << ": " <<
xmlError->message << "' html parser error.";
return false;
}
return true;
}
size_t ParserHTML::push(const char* data, size_t data_len) {
size_t i = 0;
char c;
if (data_len == 0) {
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::push(): end of data signal! m_state=" << m_state;
// Send zero-length chunk with "terminate" flag enabled to signify end-of-stream
if (htmlParseChunk(m_pushParserCtxPtr, m_buf, 0, 1)) {
xmlErrorPtr xmlError = xmlCtxtGetLastError(m_pushParserCtxPtr);
if (xmlError && filterErrors(xmlError)) {
dbgDebug(D_WAAP_PARSER_HTML) << "ParserHTML::push(): xmlError: code=" << xmlError->code << ": '" <<
xmlError->message << "'";
m_state = s_error; // error
return -1;
}
}
return m_bufLen;
}
int expected_buffer_len = FIRST_BUFFER_SIZE - 1;
while (i < data_len) {
c = data[i];
switch (m_state) {
case s_start:
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::push(): s_start";
m_state = s_accumulate_first_bytes;
// fall through //
CP_FALL_THROUGH;
case s_accumulate_first_bytes:
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::push(): s_accumulate_first_bytes. c='" << data[i] <<
"'; m_bufLen=" << m_bufLen << "; i=" << i;
m_buf[m_bufLen] = c;
m_bufLen++;
if (c == '?') {
expected_buffer_len = FIRST_BUFFER_SIZE;
}
if (m_bufLen == expected_buffer_len) {
m_state = s_start_parsing;
}
break;
case s_start_parsing:
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::push(): s_start_parsing. sending len=" << m_bufLen << ": '" <<
std::string(m_buf, m_bufLen) << "'; i=" << i;
// Create HTML SAX (push parser) context
// It is important to buffer at least first 4 bytes of input stream so libxml can determine text encoding!
m_pushParserCtxPtr = htmlCreatePushParserCtxt(&m_saxHandler, this, m_buf, m_bufLen, NULL,
XML_CHAR_ENCODING_UTF8);
// Enable "permissive mode" for HTML SAX parser.
// In this mode, the libxml parser doesn't stop on errors, but still reports them!
htmlCtxtUseOptions(m_pushParserCtxPtr, HTML_PARSE_RECOVER);
m_state = s_parsing;
// fall through //
CP_FALL_THROUGH;
case s_parsing:
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::push(): s_parsing. sending len=" << (int)(data_len - i) <<
": '" << std::string(data + i, data_len - i) << "'; i=" << i;
if (m_pushParserCtxPtr) {
if (htmlParseChunk(m_pushParserCtxPtr, data + i, data_len - i, 0)) {
xmlErrorPtr xmlError = xmlCtxtGetLastError(m_pushParserCtxPtr);
if (xmlError && filterErrors(xmlError)) {
dbgDebug(D_WAAP_PARSER_HTML) << "ParserHTML::push(): xmlError: code=" << xmlError->code <<
": '" << xmlError->message << "'";
m_state = s_error; // error
return 0;
}
}
// success (whole buffer consumed)
i = data_len - 1; // take into account ++i at the end of the state machine loop
}
break;
case s_error:
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::push(): s_error";
return 0;
}
++i;
}
dbgTrace(D_WAAP_PARSER_HTML) << "ParserHTML::push(): exiting with param(len)=" << data_len << ": i=" << i;
return i;
}
void ParserHTML::finish() {
push(NULL, 0);
}
const std::string &
ParserHTML::name() const {
return m_parserName;
}
bool ParserHTML::error() const {
return m_state == s_error;
}

View File

@@ -0,0 +1,84 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "ParserBase.h"
#include "KeyStack.h"
#include <libxml/xmlstring.h>
#include <libxml/xmlerror.h>
#include <libxml/parser.h>
#include <libxml/HTMLparser.h>
#define FIRST_BUFFER_SIZE 5 // must buffer at least 4 first bytes to allow unicode autodetection (BOM).
class ParserHTML : public ParserBase {
public:
ParserHTML(IParserStreamReceiver &receiver);
virtual ~ParserHTML();
size_t push(const char *data, size_t data_len);
void finish();
virtual const std::string &name() const;
bool error() const;
virtual size_t depth() { return (m_key.depth() > 0) ? m_key.depth()-1 : m_key.depth(); }
private:
enum state {
s_start,
s_accumulate_first_bytes,
s_start_parsing,
s_parsing,
s_error
};
// Information tracked per each element in current stack of tracked HTML elements
struct ElemTrackInfo {
std::string value;
bool hasChildren;
ElemTrackInfo():hasChildren(false) {
// when element is just opened - we still didn't see any children,
// hence start with the "hasChildren" flag as false.
// This flag will be enabled once we meet opening of the a subelement.
// Also, we start from empty value string and gradually append to it each
// time we receive next piece of text from HTML parser.
// The collected value is then emitted when element finishes.
}
};
static void onStartElement(
void *ctx,
const xmlChar *localname,
const xmlChar **attributes);
static void onEndElement(
void* ctx,
const xmlChar* localname);
static void onCharacters(
void *ctx,
const xmlChar *ch,
int len);
// Filter out errors that should be ignored. Returns true if error should be treated,
// false if an error should be ignored
bool filterErrors(xmlErrorPtr xmlError);
IParserStreamReceiver &m_receiver;
enum state m_state;
// buffer first few bytes of stream (required before calling SAX parser for the first time)
char m_buf[FIRST_BUFFER_SIZE];
int m_bufLen;
KeyStack m_key;
std::vector<ElemTrackInfo> m_elemTrackStack;
htmlSAXHandler m_saxHandler;
htmlParserCtxtPtr m_pushParserCtxPtr;
static const std::string m_parserName;
};

View File

@@ -0,0 +1,448 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserHdrValue.h"
#include "Waf2Util.h"
#include "debug.h"
#include <stdio.h>
#include <string.h>
USE_DEBUG_FLAG(D_WAAP_PARSER_HDRVALUE);
const std::string ParserHdrValue::m_parserName = "hdrValue";
enum state {
s_start,
s_key_start,
s_key_restart,
s_key,
s_key_escaped1,
s_key_escaped2,
s_value_start,
s_value_restart,
s_value,
s_value_escaped1,
s_value_escaped2,
s_value_finishing_after_dblquotes,
s_end
};
ParserHdrValue::ParserHdrValue(IParserStreamReceiver& receiver)
:m_receiver(receiver), in_key(0), in_dbl_quotes(0), escaped_len(0), escapedCharCandidate(0) {
// TODO:: maybe remove?
memset(escaped, 0, sizeof(escaped));
state = s_start;
}
ParserHdrValue::~ParserHdrValue() {
}
size_t ParserHdrValue::push(const char* buf, size_t len) {
size_t i = 0;
size_t mark = 0;
char c;
int is_last = 0;
if (len == 0) {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): end of data signal! state=" << state;
// flush unescaped data collected (if any)
if (escaped_len > 0) {
if (state == s_key_restart) {
if (m_receiver.onKey(escaped, escaped_len) != 0) {
return i;
}
}
else if (state == s_value_restart) {
if (m_receiver.onValue(escaped, escaped_len) != 0) {
return i;
}
}
if (m_receiver.onKvDone() != 0) {
return i;
}
escaped_len = 0;
}
if (m_receiver.onKvDone() != 0) {
return i;
}
return 0;
}
while (i < len) {
c = buf[i];
is_last = (i == (len - 1));
switch (state) {
case s_start: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_start";
//state = s_key_start;
// fallthrough //
CP_FALL_THROUGH;
}
case s_key_start: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_key_start";
in_key = 0; // we are not parsing the key
//state = s_key_restart;
// fallthrough //
CP_FALL_THROUGH;
}
case s_key_restart: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_key_restart";
mark = i;
state = s_key;
// fallthrough //
CP_FALL_THROUGH;
}
case s_key: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_key; c='" << c << "'; in_key=" << in_key;
// skip leading spaces in the key
if (isspace(c) && !in_key) {
state = s_key_restart; // skip the space character without including it in the output
break;
}
// Note that first non-space character is read
in_key = 1;
if (c == '%') {
if (i - mark > 0) {
if (m_receiver.onKey(buf + mark, i - mark) != 0) {
return i;
}
}
state = s_key_escaped1;
break;
}
#if 0 // '+' encoding is not done in header values (AFAIK)
else if (c == '+') {
// convert plus character to space
if (i - mark > 0) {
EMIT_DATA_CB(key, i, buf + mark, i - mark);
mark = i;
}
escaped[escaped_len] = ' ';
escaped_len++;
if (escaped_len >= MAX_ESCAPED_SIZE) {
EMIT_DATA_CB(value, i, escaped, escaped_len);
escaped_len = 0;
}
state = s_key_restart;
break;
}
#endif
else {
// flush unescaped data collected (if any)
if (escaped_len > 0) {
if (m_receiver.onKey(escaped, escaped_len) != 0) {
return i;
}
escaped_len = 0;
mark = i;
}
}
if (c == ';') {
// name finished without value
if (m_receiver.onKey(buf + mark, i - mark) != 0) {
return i;
}
if (m_receiver.onKvDone() != 0) {
return i;
}
state = s_key_start;
break;
}
else if (c == '=') {
if (m_receiver.onKey(buf + mark, i - mark) != 0) {
return i;
}
state = s_value_start;
break;
}
if (is_last) {
if (m_receiver.onKey(buf + mark, (i - mark) + 1) != 0) {
return i;
}
}
break;
}
case s_key_escaped1: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_key_escaped1";
bool valid;
unsigned char v = from_hex(c, valid);
if (!valid) { // character right after the '%' is not a valid hex char.
// return the '%' character back to the output.
if (escaped_len > 0) {
if (m_receiver.onKey(escaped, escaped_len) != 0) {
return i;
}
escaped_len = 0;
}
if (m_receiver.onKey("%", 1) != 0) {
return i;
}
// If the character is '%' - stay in the same state (correctly treat '%%%%hhh' sequences
if (c != '%') {
// pass the non-hex character back to the output too.
if (m_receiver.onKey(&c, 1) != 0) {
return i;
}
// otherwise (the character is not '%s'), switch back to the s_key state
state = s_key;
}
break;
}
escapedCharCandidate = c;
escaped[escaped_len] = v << 4;
state = s_key_escaped2;
break;
}
case s_key_escaped2: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_key_escaped2";
bool valid;
unsigned char v = from_hex(c, valid);
if (!valid) {
// add converted escaped chars
if (escaped_len > 0 && m_receiver.onKey(escaped, escaped_len)) {
return i;
}
// return % to output
if (m_receiver.onKey("%", 1) != 0) {
return i;
}
// add the character that was thought to be escaped value
if (m_receiver.onKey(&escapedCharCandidate, 1)) {
return i;
}
// re parse the character as a key (i is incremented back to current value)
i--;
escaped_len = 0;
state = s_key_restart;
break;
}
escapedCharCandidate = 0;
escaped[escaped_len] |= v;
escaped_len++;
if (escaped_len >= MAX_ESCAPED_SIZE) {
if (m_receiver.onKey(escaped, escaped_len) != 0) {
return i;
}
escaped_len = 0;
}
state = s_key_restart;
break;
}
case s_value_start: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_value_start";
mark = i;
state = s_value;
in_dbl_quotes = 0; // we are not parsing the
// detect first double-quotes
if (c == '"' && !in_dbl_quotes) {
in_dbl_quotes = 1;
state = s_value_restart;
break; // skip the leading " character
}
// fallthrough //
CP_FALL_THROUGH;
}
case s_value_restart: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_value_restart";
mark = i;
state = s_value;
// fallthrough //
CP_FALL_THROUGH;
}
case s_value: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_value; c='" << c << "', in_dbl_quotes=" <<
in_dbl_quotes;
if (c == '%') {
if (i - mark > 0) {
if (m_receiver.onValue(buf + mark, i - mark) != 0) {
return i;
}
}
state = s_value_escaped1;
break;
}
else if (c == '+') {
// convert plus character to space
if (i - mark > 0) {
if (m_receiver.onValue(buf + mark, i - mark) != 0) {
return i;
}
}
escaped[escaped_len] = ' ';
escaped_len++;
if (escaped_len >= MAX_ESCAPED_SIZE) {
if (m_receiver.onValue(escaped, escaped_len) != 0) {
return i;
}
escaped_len = 0;
}
state = s_value_restart;
break;
}
else {
// flush unescaped data collected (if any)
if (escaped_len > 0) {
if (m_receiver.onValue(escaped, escaped_len) != 0) {
return i;
}
escaped_len = 0;
mark = i;
}
}
// detect end of dbl-quotes
if (c == '"' && in_dbl_quotes) {
if (i - mark > 0) {
if (m_receiver.onValue(buf + mark, i - mark) != 0) {
return i;
}
}
if (m_receiver.onKvDone() != 0) {
return i;
}
state = s_value_finishing_after_dblquotes;
break;
}
if (c == ';') {
if (m_receiver.onValue(buf + mark, i - mark) != 0) {
return i;
}
if (m_receiver.onKvDone() != 0) {
return i;
}
state = s_key_start;
break;
}
if (is_last) {
if (m_receiver.onValue(buf + mark, (i - mark) + 1) != 0) {
return i;
}
}
break;
}
case s_value_escaped1: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_value_escaped1";
bool valid;
unsigned char v = from_hex(c, valid);
if (!valid) { // character right after the '%' is not a valid hex char.
// return the '%' character back to the output.
if (m_receiver.onValue("%", 1) != 0) {
return i;
}
// If the character is '%' - stay in the same state (correctly treat '%%%%hhh' sequences
if (c != '%') {
// pass the non-hex character back to the output too.
if (m_receiver.onValue(&c, 1) != 0) {
return i;
}
// otherwise (the character is not '%'), switch back to the s_value state
state = s_value_restart;
}
break;
}
escapedCharCandidate = c;
escaped[escaped_len] = v << 4;
state = s_value_escaped2;
break;
}
case s_value_escaped2: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_key_escaped2";
bool valid;
unsigned char v = from_hex(c, valid);
if (!valid) {
// add converted escaped chars
if (escaped_len > 0 && m_receiver.onValue(escaped, escaped_len) != 0) {
return i;
}
// return % to output
if (m_receiver.onValue("%", 1) != 0) {
return i;
}
// add the character that was thought to be escaped value
if (m_receiver.onValue(&escapedCharCandidate, 1)) {
return i;
}
// re parse the character as a key (i is incremented back to current value)
i--;
escaped_len = 0;
state = s_value_restart;
break;
}
escapedCharCandidate = 0;
escaped[escaped_len] |= v;
escaped_len++;
if (escaped_len >= MAX_ESCAPED_SIZE) {
if (m_receiver.onValue(escaped, escaped_len) != 0) {
return i;
}
escaped_len = 0;
}
state = s_value_restart;
break;
}
case s_value_finishing_after_dblquotes: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): s_value_finishing_after_dblquotes; c='" <<
c << "'";
if (c == ';') {
state = s_key_start;
break;
}
break;
}
default: {
dbgTrace(D_WAAP_PARSER_HDRVALUE) << "ParserHdrValue::push(): hdrvalue parser unrecoverable error";
return 0;
}
}// end switch()
++i;
}
return len;
}
void ParserHdrValue::finish() {
push(NULL, 0);
}
const std::string &
ParserHdrValue::name() const {
return m_parserName;
}
bool ParserHdrValue::error() const {
//return m_state == s_error;
return false; // TODO:: add error handling
}

View File

@@ -0,0 +1,58 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_HDRVALUE_H__7d37fe50
#define __PARSER_HDRVALUE_H__7d37fe50
#include "ParserBase.h"
#include <string.h>
class ParserHdrValue : public ParserBase{
public:
ParserHdrValue(IParserStreamReceiver &receiver);
virtual ~ParserHdrValue();
size_t push(const char *data, size_t len);
void finish();
virtual const std::string &name() const;
virtual bool error() const;
virtual size_t depth() { return 1; }
private:
static const int MAX_ESCAPED_SIZE = 16;
IParserStreamReceiver &m_receiver;
enum state {
s_start,
s_key_start,
s_key_restart,
s_key,
s_key_escaped1,
s_key_escaped2,
s_value_start,
s_value_restart,
s_value,
s_value_escaped1,
s_value_escaped2,
s_value_finishing_after_dblquotes,
s_end
};
enum state state;
char in_key; // turns true when first non-space key character is read
char in_dbl_quotes; // turns true (1) during double-quoted value parsing
unsigned char escaped_len; // count of characters loaded in escaped[] buff
char escaped[MAX_ESCAPED_SIZE];
char escapedCharCandidate;
static const std::string m_parserName;
};
#endif // __PARSER_HDRVALUE_H__7d37fe50

View File

@@ -0,0 +1,342 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserJson.h"
#include "debug.h"
#include "yajl/yajl_parse.h"
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include <assert.h>
USE_DEBUG_FLAG(D_WAAP_PARSER_JSON);
const std::string ParserJson::m_parserName = "jsonParser";
int ParserJson::cb_null() {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_null():";
if (m_receiver2) {
m_receiver2->onKvt(m_key.c_str(), m_key.size(), "null", 4, DataType::EMPTY);
}
if (m_receiver.onKv(m_key.c_str(), m_key.size(), "null", 4, BUFFERED_RECEIVER_F_BOTH)) {
return 0;
}
if (!m_depthStack.empty() && m_depthStack.back() == js_map) {
m_key.pop("json null");
}
return 1;
}
int ParserJson::cb_boolean(int boolean) {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_boolean(): " << boolean;
if (m_receiver2) {
m_receiver2->onKvt(m_key.c_str(), m_key.size(), NULL, boolean, DataType::BOOLEAN);
}
if (boolean) {
if (m_receiver.onKv(m_key.c_str(), m_key.size(), "true", 4, BUFFERED_RECEIVER_F_BOTH)) {
return 0;
}
}
else {
if (m_receiver.onKv(m_key.c_str(), m_key.size(), "false", 5, BUFFERED_RECEIVER_F_BOTH)) {
return 0;
}
}
if (!m_depthStack.empty() && m_depthStack.back() == js_map) {
m_key.pop("json boolean");
}
return 1;
}
int ParserJson::cb_number(const char* s, yajl_size_t slen) {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_number(): '" << std::string(s, slen) << "'";
if (m_receiver2) {
m_receiver2->onKvt(m_key.c_str(), m_key.size(), s, slen, DataType::NUMBER);
}
if (m_receiver.onKv(m_key.c_str(), m_key.size(), s, slen, BUFFERED_RECEIVER_F_BOTH)) {
return 0;
}
if (!m_depthStack.empty() && m_depthStack.back() == js_map) {
m_key.pop("json number");
}
return 1;
}
int ParserJson::cb_string(const unsigned char* s, yajl_size_t slen) {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_string(): '" << std::string((const char*)s, slen) << "'";
if (m_receiver2) {
m_receiver2->onKvt(m_key.c_str(), m_key.size(), (const char*)s, slen, DataType::STRING);
}
if (m_receiver.onKv(m_key.c_str(), m_key.size(), (const char*)s, slen, BUFFERED_RECEIVER_F_BOTH)) {
return 0;
}
if (!m_depthStack.empty() && m_depthStack.back() == js_map) {
m_key.pop("json string");
}
return 1;
}
int ParserJson::cb_map_key(const unsigned char* s, yajl_size_t slen) {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_map_key(): '" << std::string((const char*)s, slen) << "'";
if (m_receiver2) {
m_receiver2->onMapKey((const char*)s, slen);
}
m_key.push((char*)s, slen);
return 1;
}
int ParserJson::cb_start_map() {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_start_map():";
if (m_receiver2) {
m_receiver2->onStartMap();
}
m_depthStack.push_back(ParserJson::js_map);
return 1;
}
int ParserJson::cb_end_map() {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_end_map():";
if (m_receiver2) {
m_receiver2->onEndMap();
}
if (!m_depthStack.empty()) {
m_depthStack.pop_back();
}
if (!m_depthStack.empty() && m_depthStack.back() == js_map) {
m_key.pop("json end map");
}
return 1;
}
int ParserJson::cb_start_array() {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_start_array():";
if (m_receiver2) {
m_receiver2->onStartArray();
}
m_depthStack.push_back(ParserJson::js_array);
return 1;
}
int ParserJson::cb_end_array() {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::cb_end_array():";
if (m_receiver2) {
m_receiver2->onEndArray();
}
if (!m_depthStack.empty()) {
m_depthStack.pop_back();
}
if (!m_depthStack.empty() && m_depthStack.back() == js_map) {
m_key.pop("json end array");
}
return 1;
}
// Static functions to be called from C and forward the calls to respective class cb_* methods
int ParserJson::p_null(void* ctx)
{
return ((ParserJson*)ctx)->cb_null();
}
int ParserJson::p_boolean(void* ctx, int boolean)
{
return ((ParserJson*)ctx)->cb_boolean(boolean);
}
int ParserJson::p_number(void* ctx, const char* s, yajl_size_t slen)
{
return ((ParserJson*)ctx)->cb_number(s, slen);
}
int ParserJson::p_string(void* ctx, const unsigned char* s, yajl_size_t slen)
{
return ((ParserJson*)ctx)->cb_string(s, slen);
}
int ParserJson::p_map_key(void* ctx, const unsigned char* s, yajl_size_t slen)
{
return ((ParserJson*)ctx)->cb_map_key(s, slen);
}
int ParserJson::p_start_map(void* ctx)
{
return ((ParserJson*)ctx)->cb_start_map();
}
int ParserJson::p_end_map(void* ctx)
{
return ((ParserJson*)ctx)->cb_end_map();
}
int ParserJson::p_start_array(void* ctx)
{
return ((ParserJson*)ctx)->cb_start_array();
}
int ParserJson::p_end_array(void* ctx)
{
return ((ParserJson*)ctx)->cb_end_array();
}
ParserJson::ParserJson(IParserReceiver& receiver, IParserReceiver2* receiver2) :
m_receiver(receiver),
m_receiver2(receiver2),
m_state(s_start),
m_bufLen(0),
m_key("json_parser"),
m_jsonHandler(NULL)
{
// TODO:: do we really want to clear this?
memset(m_buf, 0, sizeof(m_buf));
static const yajl_callbacks callbacks = {
p_null,
p_boolean,
NULL,
NULL,
p_number,
p_string,
p_start_map,
p_map_key,
p_end_map,
p_start_array,
p_end_array
};
m_jsonHandler = yajl_alloc(&callbacks, NULL, this);
if (m_jsonHandler == NULL) {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::ParserJson(): yajl_alloc() failed. Switching to s_error state.";
m_state = s_error;
return;
}
// Configure yajl parser
yajl_config(m_jsonHandler, yajl_allow_comments, 1);
yajl_config(m_jsonHandler, yajl_dont_validate_strings, 1); // disable utf8 checking
yajl_config(m_jsonHandler, yajl_allow_multiple_values, 1);
// Ugly: push first element into key (it will be ignored since we will never call the "first()"
// method of this key within Json parser object.
m_key.push("json", 4);
}
ParserJson::~ParserJson() {
// Cleanup JSON
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::~ParserJson():";
if (m_jsonHandler) {
yajl_free(m_jsonHandler);
}
}
size_t ParserJson::push(const char* buf, size_t len) {
size_t i = 0;
char c;
if (len == 0) {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::push(): end of data signal! m_state=" << m_state;
// TODO:: think - should I send existing data in buffer to yajl_parse() here?
// Tell yajl that there's end of stream here
if (yajl_complete_parse(m_jsonHandler) != yajl_status_ok) {
m_state = s_error;
}
return 0;
}
while (i < len) {
c = buf[i];
switch (m_state) {
case s_start:
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::push(): s_start";
m_state = s_accumulate_first_bytes;
// fallthrough //
CP_FALL_THROUGH;
case s_accumulate_first_bytes:
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::push(): s_accumulate_first_bytes. i=" << i <<
" c='" << buf[i] << "'";
m_buf[m_bufLen] = c;
m_bufLen++;
if (m_bufLen == FIRST_JSON_BUFFER_SIZE) {
m_state = s_start_parsing;
}
break;
case s_start_parsing:
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::push(): s_start_parsing. sending len=" <<
(int)m_bufLen << ": '" << std::string(m_buf, m_bufLen) << "'";
m_state = s_parsing;
// fallthrough //
CP_FALL_THROUGH;
case s_parsing:
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::push(): s_parsing. sending len=" << (int)(len - i) << ": '" <<
std::string(buf + i, len - i) << "'";
if (m_bufLen > 0) {
// Send accumulated bytes (if any)
if (yajl_parse(m_jsonHandler, (unsigned char*)m_buf, m_bufLen) != yajl_status_ok) {
m_state = s_error;
}
// And reset buffer (so it's only get sent once)
m_bufLen = 0;
}
if (yajl_parse(m_jsonHandler, (unsigned char*)(buf + i), len - i) != yajl_status_ok) {
m_state = s_error;
}
// success (whole buffer consumed)
i = len - 1; // take into account ++i at the end of the m_state machine loop
break;
case s_error: {
dbgTrace(D_WAAP_PARSER_JSON) << "ParserJson::push(): s_error";
return 0;
}
}
++i;
}
return len;
}
void ParserJson::finish() {
push(NULL, 0);
}
const std::string &
ParserJson::name() const {
return m_parserName;
}
bool ParserJson::error() const {
return m_state == s_error;
}

View File

@@ -0,0 +1,87 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_JSON_H__a94f1be2
#define __PARSER_JSON_H__a94f1be2
#include <string.h>
#include <vector>
#include "ParserBase.h"
#include "KeyStack.h"
#include "yajl/yajl_parse.h"
#define FIRST_JSON_BUFFER_SIZE 4 // must buffer at least 4 first bytes to allow unicode autodetection (BOM).
typedef size_t yajl_size_t;
class ParserJson : public ParserBase {
public:
ParserJson(IParserReceiver &receiver, IParserReceiver2 *receiver2=NULL);
virtual ~ParserJson();
size_t push(const char *data, size_t data_len);
void finish();
virtual const std::string &name() const;
bool error() const;
virtual size_t depth() { return (m_key.depth() > 0) ? m_key.depth()-1 : m_key.depth(); }
private:
int cb_null();
int cb_boolean(int boolean);
int cb_number(const char *s, yajl_size_t slen);
int cb_string(const unsigned char *s, yajl_size_t slen);
int cb_map_key(const unsigned char *s, yajl_size_t slen);
int cb_start_map();
int cb_end_map();
int cb_start_array();
int cb_end_array();
// Static callbacks to be called from C
static int p_null(void *ctx);
static int p_boolean(void *ctx, int boolean);
static int p_number(void *ctx, const char *s, yajl_size_t slen);
static int p_string(void *ctx, const unsigned char *s, yajl_size_t slen);
static int p_map_key(void *ctx, const unsigned char *s, yajl_size_t slen);
static int p_start_map(void *ctx);
static int p_end_map(void *ctx);
static int p_start_array(void *ctx);
static int p_end_array(void *ctx);
enum state {
s_start,
s_accumulate_first_bytes,
s_start_parsing,
s_parsing,
s_error
};
enum js_state {
js_array,
js_map
};
IParserReceiver &m_receiver;
IParserReceiver2 *m_receiver2;
enum state m_state;
// buffer first few bytes of stream
// (required before calling JSON parser for the first time so it can recognize stuff like UTF-8 BOM)
char m_buf[FIRST_JSON_BUFFER_SIZE];
size_t m_bufLen;
// Key and structure depth stacks
KeyStack m_key;
std::vector<enum js_state> m_depthStack;
yajl_handle m_jsonHandler;
public:
static const std::string m_parserName;
};
#endif // __PARSER_JSON_H__a94f1be2

View File

@@ -0,0 +1,485 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserMultipartForm.h"
#include "ParserHdrValue.h"
#include "Waf2Util.h"
#include "debug.h"
#include <stdlib.h>
#include <ctype.h>
USE_DEBUG_FLAG(D_WAAP_PARSER_MULTIPART_FORM);
#define LF 10
#define CR 13
const std::string ParserMultipartForm::m_parserName = "ParserMultipartForm";
int ParserMultipartForm::HdrValueAnalyzer::onKv(const char* k, size_t k_len, const char* v, size_t v_len, int flags)
{
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "HdrValueAnalyzer::onKv(): k='%.*s' v='%.*s'" << (int)k_len << v;
assert((flags & BUFFERED_RECEIVER_F_BOTH) == BUFFERED_RECEIVER_F_BOTH);
if (my_strincmp(k, "name", k_len)) {
m_partName = std::string(v, v_len);
}
return 0;
}
void ParserMultipartForm::HdrValueAnalyzer::clear() {
m_partName.clear();
}
ParserMultipartForm::ParserMultipartForm(
IParserStreamReceiver& receiver,
const char* boundary,
size_t boundary_len)
:
m_receiver(receiver),
m_partIdx(0),
state(s_start),
index(0),
boundary_length(boundary_len + 2),
lookbehind(NULL),
multipart_boundary(NULL),
m_headerValueParser(NULL),
m_hdrValueAnalyzerBufferedReceiver(m_hdrValueAnalyzer)
{
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::ParserMultipartForm()";
boundary_len += 2; // two hyphens will be prepended to boundary string provided
multipart_boundary = (char*)malloc(boundary_len + boundary_len + 9);
if (multipart_boundary == NULL) {
dbgWarning(D_WAAP_PARSER_MULTIPART_FORM) <<
"ParserMultipartForm::ParserMultipartForm(): failed allocation of multipart_boundary buffer.";
state = s_error;
return;
}
// prepend two hyphens to boundary string provided
multipart_boundary[0] = '-';
multipart_boundary[1] = '-';
memcpy(multipart_boundary + 2, boundary, boundary_len - 2);
multipart_boundary[boundary_len] = 0;
lookbehind = (multipart_boundary + boundary_length + 1);
}
ParserMultipartForm::~ParserMultipartForm() {
if (multipart_boundary != NULL) {
free(multipart_boundary);
}
}
size_t ParserMultipartForm::push(const char* buf, size_t len) {
size_t i = 0;
size_t mark = 0;
char c, cl;
int is_last = 0;
if (multipart_boundary == NULL) {
dbgWarning(D_WAAP_PARSER_MULTIPART_FORM) <<
"ParserMultipartForm::push(): can't parse. multipart_boundary=NULL.";
state = s_error;
return 0;
}
// Detect end of stream
if (len == 0) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): len = 0";
// end of stream
if (state != s_end) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) <<
"ParserMultipartForm::push(): MIME stream finished while inside part";
state = s_error;
return 0;
}
}
while (i < len) {
c = buf[i];
is_last = (i == (len - 1));
switch (state) {
case s_start:
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_start";
index = 0;
state = s_start_boundary;
// fallthrough //
CP_FALL_THROUGH;
case s_start_boundary: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_start_boundary";
if (index == boundary_length) {
if (c != CR) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) <<
"ParserMultipartForm::push(): didn't get CR character";
state = s_error;
return i;
}
index++;
break;
}
else if (index == (boundary_length + 1)) {
if (c != LF) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) <<
"ParserMultipartForm::push(): didn't get LF character";
state = s_error;
return i;
}
index = 0;
if (on_form_part_begin() != 0) {
state = s_error;
return i;
}
state = s_key_start;
break;
}
if (c != multipart_boundary[index]) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) <<
"ParserMultipartForm::push(): boundary check failed at index=" << index <<
" char '" << c << "', must be '" << multipart_boundary[index] << "'";
state = s_error;
return i;
}
index++;
break;
}
case s_key_start: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_key_start";
mark = i;
state = s_key;
// fallthrough //
CP_FALL_THROUGH;
}
case s_key: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_key";
if (c == CR) {
state = s_headers_almost_done;
break;
}
if (c == ':') {
if (on_form_part_hdr_key(buf + mark, i - mark) != 0) {
state = s_error;
return i;
}
state = s_value_start;
break;
}
cl = tolower(c);
if ((c != '-') && (cl < 'a' || cl > 'z')) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) <<
"ParserMultipartForm::push(): invalid character in header name: " << int(c);
state = s_error;
return i;
}
if (is_last) {
if (on_form_part_hdr_key(buf + mark, (i - mark) + 1) != 0) {
state = s_error;
return i;
}
}
break;
}
case s_headers_almost_done: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_headers_almost_done";
if (c != LF) {
state = s_error;
return i;
}
state = s_part_start;
break;
}
case s_value_start: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_value_start";
if (c == ' ') {
break;
}
mark = i;
state = s_value;
// fallthrough //
CP_FALL_THROUGH;
}
case s_value: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_value";
if (c == CR) {
if (on_form_part_hdr_value(buf + mark, i - mark) != 0) {
state = s_error;
return i;
}
state = s_value_almost_done;
break;
}
if (is_last) {
if (on_form_part_hdr_value(buf + mark, (i - mark) + 1) != 0) {
state = s_error;
return i;
}
}
break;
}
case s_value_almost_done: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_value_almost_done";
if (c != LF) {
state = s_error;
return i;
}
state = s_key_start;
if (this->on_form_part_hdr_kv_done() != 0) {
state = s_error;
return i;
}
break;
}
case s_part_start: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_part_start";
if (on_form_headers_complete() != 0) {
state = s_error;
return i;
}
mark = i;
state = s_part;
// fallthrough //
CP_FALL_THROUGH;
}
case s_part: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_part";
if (c == CR) {
if (on_form_part_data(buf + mark, i - mark) != 0) {
state = s_error;
return i;
}
mark = i;
state = s_part_almost_boundary;
lookbehind[0] = CR;
break;
}
if (is_last) {
if (on_form_part_data(buf + mark, (i - mark) + 1) != 0) {
state = s_error;
return i;
}
}
break;
}
case s_part_almost_boundary: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_part_almost_boundary";
if (c == LF) {
state = s_part_boundary;
lookbehind[1] = LF;
index = 0;
break;
}
if (on_form_part_data(lookbehind, 1) != 0) {
state = s_error;
return i;
}
state = s_part;
mark = i--;
break;
}
case s_part_boundary: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_part_boundary";
if (multipart_boundary[index] != c) {
if (on_form_part_data(lookbehind, 2 + index) != 0) {
state = s_error;
return i;
}
state = s_part;
mark = i--;
break;
}
lookbehind[2 + index] = c;
if ((++index) == boundary_length) {
if (on_form_part_end() != 0) {
state = s_error;
return i;
}
state = s_part_almost_end;
}
break;
}
case s_part_almost_end: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_part_almost_end";
if (c == '-') {
state = s_part_final_hyphen;
break;
}
if (c == CR) {
state = s_part_end;
break;
}
state = s_error;
return i;
}
case s_part_final_hyphen: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_part_final_hyphen";
if (c == '-') {
if (on_form_body_end() != 0) {
state = s_error;
return i;
}
state = s_end;
break;
}
state = s_error;
return i;
}
case s_part_end: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_part_end";
if (c == LF) {
state = s_key_start;
if (on_form_part_begin() != 0) {
state = s_error;
return i;
}
break;
}
state = s_error;
return i;
}
case s_end: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_end";
break;
}
case s_error: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): s_error";
return 0;
}
default: {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::push(): unknown state: " << state;
state = s_error;
return 0;
}
}
++i;
}
return len;
}
void ParserMultipartForm::finish() {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::finish():";
push(NULL, 0);
}
const std::string &
ParserMultipartForm::name() const {
return m_parserName;
}
bool ParserMultipartForm::error() const {
return state == s_error;
}
// MIME form parsing
int ParserMultipartForm::on_form_part_hdr_key(const char* k, size_t k_len) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::on_form_part_hdr_key(): '" <<
std::string(k, k_len) << "'";
m_hdrName += std::string(k, k_len);
return 0; // ok
}
int ParserMultipartForm::on_form_part_hdr_value(const char* v, size_t v_len) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::on_form_part_hdr_value(): '" <<
std::string(v, v_len) << "'";
// This function could be called multiple times, only on the first call we allocated m_headerValueParser
if (!m_headerValueParser) {
// The m_hdrValueAnalyzer instance will receive information about part headers
// and extract information from them, like the part name (if available).
m_headerValueParser = new ParserHdrValue(m_hdrValueAnalyzerBufferedReceiver);
}
// push pieces of header value to header value processor/analyzer
if (m_headerValueParser) {
m_headerValueParser->push(v, v_len);
}
return 0; // ok
}
int ParserMultipartForm::on_form_part_hdr_kv_done() {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::on_form_part_hdr_kv_done():";
// finish any header value parsing in progress. the collected data is stored in m_hdrValueAnalyzer object.
if (m_headerValueParser) {
m_headerValueParser->finish();
delete m_headerValueParser;
m_headerValueParser = NULL;
}
// collect parsed information about header value only If current header name is "Content-Disposition"
// the reason this check is done here is only because (at least by the Parsers API protocol)
// the part header name was not ready until this point.
if (my_stricmp(m_hdrName.c_str(), "content-disposition")) {
// the m_hdrValueAnalyzer contains information (like part name) extracted from the
// Content-Disposition header. Lets collect it now.
m_partName = m_hdrValueAnalyzer.getPartName();
// if part name could be extracted from part header - use it,
// otherwise name "anonymous" part "part-NNN" where NNN is part number within the MIME message
if (m_partName.empty()) {
char buf[128];
snprintf(buf, sizeof(buf), "part-%lu", (unsigned long int)m_partIdx);
m_partName = buf;
}
}
// reset m_hdrValueAnalyzer object state before next part header.
// we already collected all relevant information from it above.
m_hdrValueAnalyzer.clear();
// also clear accumulated part header name string before next part header
m_hdrName = "";
return 0; // ok
}
int ParserMultipartForm::on_form_headers_complete() {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::on_form_headers_complete():";
int rc = m_receiver.onKey(m_partName.data(), m_partName.size());
m_hdrValueAnalyzer.clear();
return rc;
}
int ParserMultipartForm::on_form_part_begin() {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::on_form_part_begin():";
// count parts
m_partIdx++;
// reset currently known part name before switching to next part
m_partName = "";
return 0; // ok
}
int ParserMultipartForm::on_form_part_end() {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::on_form_part_end():";
return m_receiver.onKvDone();
}
int ParserMultipartForm::on_form_part_data(const char* at, size_t length) {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::on_form_part_data(): '" <<
std::string(at, length) << "'";
return m_receiver.onValue(at, length);
}
int ParserMultipartForm::on_form_body_end() {
dbgTrace(D_WAAP_PARSER_MULTIPART_FORM) << "ParserMultipartForm::on_form_body_end():";
return 0; // ok
}

View File

@@ -0,0 +1,93 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_MULTIPART_FORM_H__1c7eb4fa
#define __PARSER_MULTIPART_FORM_H__1c7eb4fa
#include "ParserBase.h"
#include "ParserHdrValue.h"
#include <boost/core/noncopyable.hpp>
class ParserMultipartForm : public ParserBase, boost::noncopyable {
public:
class HdrValueAnalyzer : public IParserReceiver {
public:
int onKv(const char *k, size_t k_len, const char *v, size_t v_len, int flags);
void clear();
const std::string &getPartName() const { return m_partName; }
private:
std::string m_partName;
};
ParserMultipartForm(IParserStreamReceiver &receiver, const char *boundary, size_t boundary_len);
virtual ~ParserMultipartForm();
size_t push(const char *buf, size_t len);
void finish();
virtual const std::string &name() const;
virtual bool error() const;
virtual size_t depth() { return 1; }
private:
enum state {
s_start,
s_start_boundary,
s_key_start,
s_key,
s_headers_almost_done,
s_value_start,
s_value,
s_value_almost_done,
s_part_start,
s_part,
s_part_almost_boundary,
s_part_boundary,
s_part_almost_end,
s_part_end,
s_part_final_hyphen,
s_end,
s_error
};
// MIME form parsing
int on_form_part_hdr_key(const char *k, size_t k_len);
int on_form_part_hdr_value(const char *v, size_t v_len);
int on_form_part_hdr_kv_done();
int on_form_headers_complete();
int on_form_part_begin();
int on_form_part_end();
int on_form_part_data(const char *at, size_t length);
int on_form_body_end();
IParserStreamReceiver &m_receiver;
// index of currently processed part (0-based)
size_t m_partIdx;
enum state state;
size_t index;
size_t boundary_length;
char* lookbehind;
char *multipart_boundary;
ParserHdrValue *m_headerValueParser; // Part Header's value parser/dissector.
// Reports dissected parts to m_hdrValueAnalyzer.
HdrValueAnalyzer m_hdrValueAnalyzer; // Receives and analyzes dissected parts of part header value,
// and extracts information like part name from it.
BufferedReceiver m_hdrValueAnalyzerBufferedReceiver; // Buffers partial header value data before
// it is available to m_hdrValueAnalyzer.
std::string m_hdrName; // Current part header name (accumulated until on_form_part_hdr_kv_done() is called.
std::string m_partName; // Part name
static const std::string m_parserName;
};
#endif // __PARSER_MULTIPART_FORM_H__1c7eb4fa

View File

@@ -0,0 +1,80 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserRaw.h"
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP_PARSER_RAW);
const std::string ParserRaw::m_parserName = "ParserRaw";
ParserRaw::ParserRaw(IParserStreamReceiver &receiver, const std::string &key)
:m_receiver(receiver), m_key(key), m_state(s_start) {
}
ParserRaw::~ParserRaw() {
}
size_t ParserRaw::push(const char *buf, size_t len) {
dbgTrace(D_WAAP_PARSER_RAW) << "ParserRaw::push(): (len=" << (unsigned long int)len << ")";
if (len == 0) {
dbgTrace(D_WAAP_PARSER_RAW) << "ParserRaw::push(): end of data signal! m_state=" << m_state;
// flush unescaped data collected (if any)
if (m_state != s_start) { // only emit if at least something was pushed
if (m_receiver.onKvDone() != 0) {
m_state = s_error;
}
}
return 0;
}
switch (m_state) {
case s_start:
dbgTrace(D_WAAP_PARSER_RAW) << "ParserRaw::push(): s_start";
if (m_receiver.onKey(m_key.data(), m_key.size()) != 0) {
m_state = s_error;
return 0;
}
m_state = s_forward;
// fallthrough //
CP_FALL_THROUGH;
case s_forward:
dbgTrace(D_WAAP_PARSER_RAW) << "ParserRaw::push(): s_forward";
if (m_receiver.onValue(buf, len) != 0) {
m_state = s_error;
}
break;
case s_error:
dbgTrace(D_WAAP_PARSER_RAW) << "ParserRaw::push(): s_error";
return 0;
}
return len;
}
void ParserRaw::finish() {
push(NULL, 0);
}
const std::string &
ParserRaw::name() const {
return m_parserName;
}
bool ParserRaw::error() const {
return m_state == s_error;
}

View File

@@ -0,0 +1,43 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_RAW_H__7989ff78
#define __PARSER_RAW_H__7989ff78
#include "ParserBase.h"
#include <string.h>
class ParserRaw : public ParserBase {
public:
ParserRaw(IParserStreamReceiver &receiver, const std::string &key);
virtual ~ParserRaw();
size_t push(const char *data, size_t data_len);
void finish();
virtual const std::string &name() const;
bool error() const;
virtual size_t depth() { return 1; }
private:
enum state {
s_start,
s_forward,
s_error
};
IParserStreamReceiver &m_receiver;
std::string m_key;
state m_state;
static const std::string m_parserName;
};
#endif // __PARSER_RAW_H__7989ff78

View File

@@ -0,0 +1,439 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserUrlEncode.h"
#include "Waf2Util.h"
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP_PARSER_URLENCODE);
const std::string ParserUrlEncode::m_parserName = "ParserUrlEncode";
ParserUrlEncode::ParserUrlEncode(IParserStreamReceiver &receiver, char separatorChar, bool should_decode_per)
:
m_receiver(receiver),
m_state(s_start),
m_escapedLen(0),
m_separatorChar(separatorChar),
m_escapedCharCandidate(0),
should_decode_percent(should_decode_per)
{
dbgTrace(D_WAAP_PARSER_URLENCODE) << "should_decode_per=" << should_decode_per;
// TODO:: is there a need for this?
memset(m_escaped, 0, sizeof(m_escaped));
}
ParserUrlEncode::~ParserUrlEncode() {
}
size_t ParserUrlEncode::push(const char *buf, size_t len) {
size_t i = 0;
size_t mark = 0;
char c;
int is_last = 0;
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): starting (len=" << len << ")";
if (len == 0) {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): end of data signal! m_state=" << m_state;
// flush unescaped data collected (if any)
if (m_escapedLen > 0) {
if (m_state == s_key_start) {
if (m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
}
else if (m_state == s_value_start) {
if (m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
}
m_escapedLen = 0;
}
if (m_receiver.onKvDone() != 0) {
m_state = s_error;
return i;
}
return 0;
}
while (i < len) {
c = buf[i];
is_last = (i == (len - 1));
// Checking valid char urlencode
if (c < 32)
{
dbgDebug(D_WAAP_PARSER_URLENCODE) << "invalid URL encoding character: " << c;
m_state = s_error;
return i;
}
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): state=" << m_state << "; ch='" << c << "'";
switch (m_state) {
case s_start: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_start";
//m_state = s_key_start;
// fallthrough //
CP_FALL_THROUGH;
}
case s_key_start: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_key_start";
mark = i;
m_state = s_key;
// fallthrough //
CP_FALL_THROUGH;
}
case s_key: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_key";
// skip leading spaces in the key
if (isspace(c)) {
m_state = s_key_start; // skip the space character without including it in the output
break;
}
if (c == '%' && should_decode_percent) {
if (i - mark > 0) {
if (m_receiver.onKey(buf + mark, i - mark) != 0) {
m_state = s_error;
return i;
}
}
m_state = s_key_escaped1;
break;
}
else if (c == '+') {
// convert plus character to space
if (i - mark > 0) {
if (m_receiver.onKey(buf + mark, i - mark) != 0) {
m_state = s_error;
return i;
}
mark = i;
}
m_escaped[m_escapedLen] = ' ';
m_escapedLen++;
if (m_escapedLen >= MAX_URLENCODE_ESCAPED_SIZE) {
if (m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
}
m_state = s_key_start;
break;
}
else {
// flush unescaped data collected (if any)
if (m_escapedLen > 0) {
if (m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
mark = i;
}
}
if (c == m_separatorChar) {
// this happens when there is a key without value. Example: ?p&a=b&k&%61&blah
// in this case we emit the key, but not the value, and send onKvDone to cause
// the receiver to process the pair: key will be provided with no value.
if (m_receiver.onKey(buf + mark, i - mark) != 0) {
m_state = s_error;
return i;
}
if (m_receiver.onKvDone() != 0) {
m_state = s_error;
return i;
}
m_state = s_key_start;
break;
}
if (c == '=') {
if (m_receiver.onKey(buf + mark, i - mark) != 0) {
m_state = s_error;
return i;
}
m_state = s_value_start;
break;
}
if (is_last) {
if (m_receiver.onKey(buf + mark, (i - mark) + 1) != 0) {
m_state = s_error;
return i;
}
}
break;
}
case s_key_escaped1: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_key_escaped1";
bool valid;
unsigned char v = from_hex(c, valid);
if (!valid) { // character right after the '%' is not a valid hex char.
// dump escaped chars
if (m_escapedLen > 0 && m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
// return the '%' character back to the output.
if (m_receiver.onKey("%", 1) != 0) {
return i;
}
// If the character is '%' - stay in the same state (correctly treat '%%%%hhh' sequences
if (c != '%') {
// pass the non-hex character back to the output too.
if (m_receiver.onKey(&c, 1) != 0) {
return i;
}
// otherwise (the character is not '%'), switch back to the s_key state
m_state = s_key_start;
}
break;
}
m_escapedCharCandidate = c;
m_escaped[m_escapedLen] = v << 4;
m_state = s_key_escaped2;
break;
}
case s_key_escaped2: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_key_escaped2";
bool valid;
unsigned char v = from_hex(c, valid);
if (!valid) {
// This situation (2nd character is not valid hex) is not treated right now.
// In this case, v will be equal to 0 and output character will be invalid one.
//dump escaped chars
if (m_escapedLen >0 && m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
// return the '%' character back to the output.
if (m_receiver.onKey("%", 1) != 0) {
return i;
}
// add the character that was thought to be escaped value
if (m_receiver.onKey(&m_escapedCharCandidate, 1)) {
return i;
}
// re parse the character as a key (i is incremented back to current value)
i--;
m_state = s_key_start;
break;
}
m_escapedCharCandidate = 0;
m_escaped[m_escapedLen] |= v;
m_escapedLen++;
if (m_escapedLen >= MAX_URLENCODE_ESCAPED_SIZE) {
if (m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
}
m_state = s_key_start;
break;
}
case s_value_start: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_value_start";
mark = i;
m_state = s_value;
// fallthrough //
CP_FALL_THROUGH;
}
case s_value: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_value";
if (c == '%' && should_decode_percent) {
if (i - mark > 0) {
if (m_receiver.onValue(buf + mark, i - mark) != 0) {
m_state = s_error;
return i;
}
}
m_state = s_value_escaped1;
break;
}
else if (c == '+') {
// convert plus character to space
if (i - mark > 0) {
if (m_receiver.onValue(buf + mark, i - mark) != 0) {
m_state = s_error;
return i;
}
}
m_escaped[m_escapedLen] = ' ';
m_escapedLen++;
if (m_escapedLen >= MAX_URLENCODE_ESCAPED_SIZE) {
if (m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
}
m_state = s_value_start;
break;
}
else {
// flush unescaped data collected (if any)
if (m_escapedLen > 0) {
if (m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
mark = i;
}
}
if (c == m_separatorChar) {
if (m_receiver.onValue(buf + mark, i - mark) != 0) {
dbgWarning(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push() s_value : failed on value";
m_state = s_error;
return i;
}
if (m_receiver.onKvDone() != 0) {
dbgWarning(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push() : s_value : failed on KV";
m_state = s_error;
return i;
}
m_state = s_key_start;
break;
}
if (is_last) {
if (m_receiver.onValue(buf + mark, (i - mark) + 1) != 0) {
m_state = s_error;
return i;
}
}
break;
}
case s_value_escaped1: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_value_escaped1";
bool valid;
unsigned char v = from_hex(c, valid);
if (!valid) { // character right after the '%' is not a valid hex char.
// dump escaped chars
if (m_escapedLen > 0 && m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
// return the '%' character back to the output.
if (m_receiver.onValue("%", 1) != 0) {
return i;
}
// If the character is '%' - stay in the same state (correctly treat '%%%%hhh' sequences)
if (c != '%') {
// pass the non-hex character back to the output too.
if (m_receiver.onValue(&c, 1) != 0) {
return i;
}
// otherwise (the character is not '%'), switch back to the s_value state
m_state = s_value_start;
}
break;
}
m_escapedCharCandidate = c;
m_escaped[m_escapedLen] = v << 4;
m_state = s_value_escaped2;
break;
}
case s_value_escaped2: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_value_escaped2";
bool valid;
unsigned char v = from_hex(c, valid);
if (!valid) {
// This situation (2nd character is not valid hex) is not treated right now.
// In this case, v will be equal to 0 and output character will be invalid one.
//dump escaped chars
if (m_escapedLen > 0 && m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
// return the '%' character back to the output.
if (m_receiver.onValue("%", 1) != 0) {
return i;
}
// add the character that was thought to be escaped value
if (m_receiver.onValue(&m_escapedCharCandidate, 1)) {
return i;
}
// re parse the character as a key (i is incremented back to current value)
i--;
m_state = s_value_start;
break;
}
m_escapedCharCandidate = 0;
m_escaped[m_escapedLen] |= v;
m_escapedLen++;
if (m_escapedLen >= MAX_URLENCODE_ESCAPED_SIZE) {
if (m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
m_state = s_error;
return i;
}
m_escapedLen = 0;
}
m_state = s_value_start;
break;
}
case s_error: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): s_error";
return 0;
}
default: {
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): URL parser unrecoverable error";
m_state = s_error;
return 0;
}
}// end of switch()
++i;
}
dbgTrace(D_WAAP_PARSER_URLENCODE) << "ParserUrlEncode::push(): finished: len=" << len;
return len;
}
void ParserUrlEncode::finish() {
push(NULL, 0);
}
const std::string &
ParserUrlEncode::name() const {
return m_parserName;
}
bool ParserUrlEncode::error() const {
return m_state == s_error;
}

View File

@@ -0,0 +1,57 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_URL_ENCODE_H__29ebe806
#define __PARSER_URL_ENCODE_H__29ebe806
#include "ParserBase.h"
#include <string.h>
#define MAX_URLENCODE_ESCAPED_SIZE 16
class ParserUrlEncode : public ParserBase {
public:
ParserUrlEncode(IParserStreamReceiver &receiver, char separatorChar = '&', bool should_decode_per = true);
virtual ~ParserUrlEncode();
size_t push(const char *data, size_t data_len);
void finish();
virtual const std::string &name() const;
bool error() const;
virtual size_t depth() { return 1; }
private:
enum state {
s_start,
s_key_start,
s_key,
s_key_escaped1,
s_key_escaped2,
s_value_start,
s_value,
s_value_escaped1,
s_value_escaped2,
s_end,
s_error
};
IParserStreamReceiver &m_receiver;
enum state m_state;
unsigned char m_escapedLen; // count of characters loaded in m_escaped[] buffer
char m_escaped[MAX_URLENCODE_ESCAPED_SIZE];
char m_separatorChar;
char m_escapedCharCandidate;
bool should_decode_percent;
static const std::string m_parserName;
};
#endif // __PARSER_URL_ENCODE_H__29ebe806

View File

@@ -0,0 +1,334 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserXML.h"
#include "Waf2Util.h"
#include "debug.h"
#include <assert.h>
USE_DEBUG_FLAG(D_WAAP_PARSER_XML);
const std::string ParserXML::m_parserName = "ParserXML";
void ParserXML::onStartElementNs(
void* ctx,
const xmlChar* localname,
const xmlChar* prefix,
const xmlChar* URI,
int nb_namespaces,
const xmlChar** namespaces,
int nb_attributes,
int nb_defaulted,
const xmlChar** attributes)
{
ParserXML* p = (ParserXML*)ctx;
dbgTrace(D_WAAP_PARSER_XML) << "XML OPEN: '" << localname << "'";
p->m_key.push((const char*)localname, xmlStrlen(localname));
int i;
for (i = 0; i < nb_attributes; i++) {
const xmlChar* attr_localname = attributes[i * 5 + 0];
//const xmlChar *attr_prefix = attributes[i*5+1];
//const xmlChar *attr_URI = attributes[i*5+2];
const xmlChar* attr_value_begin = attributes[i * 5 + 3];
const xmlChar* attr_value_end = attributes[i * 5 + 4];
dbgTrace(D_WAAP_PARSER_XML) << "\tXML ATTR: elem='" << (char*)localname << "', " << attr_localname <<
"='" << std::string((char*)attr_value_begin, (size_t)(attr_value_end - attr_value_begin)) << "'";
p->m_key.push((const char*)attr_localname, xmlStrlen(attr_localname));
if (p->m_receiver.onKv(
p->m_key.c_str(),
p->m_key.size(),
(const char*)attr_value_begin, attr_value_end - attr_value_begin,
BUFFERED_RECEIVER_F_BOTH
) != 0) {
p->m_state = s_error;
}
p->m_key.pop("XML end attribute");
}
// before we add new tracking element to the stack for this new element,
// set "children exists" flag to true for the parent element.
if (!p->m_elemTrackStack.empty()) {
p->m_elemTrackStack.back().hasChildren = true;
}
// when opening new element - start tracking its properties (internal text and existence of subelements)
p->m_elemTrackStack.push_back(ElemTrackInfo());
}
void
ParserXML::onEndElementNs(
void* ctx,
const xmlChar* localname,
const xmlChar* prefix,
const xmlChar* URI)
{
ParserXML* p = (ParserXML*)ctx;
dbgTrace(D_WAAP_PARSER_XML) << "XML CLOSE: '" << localname << "'";
if (p->m_elemTrackStack.empty()) {
dbgWarning(D_WAAP_PARSER_XML) <<
"XML closing tag and elem track stack is empty. This is probably sign of a bug!";
return;
}
ElemTrackInfo& elemTrackInfo = p->m_elemTrackStack.back();
// Usability optimization: only output kv pair for XML elements that had either sub children
// and/or value within.
// Those "wrapper elements" such as <wrapper><name>john</name><age>21</age></wrapper> only
// contain sub elements. For these we don't emit kv pair.
// However, for truly empty element such as <wrapper></wrapper>, or similar element with
// text: <wrapper>some text</wrapper>, we do output a kv pair.
bool isWrapperElement = elemTrackInfo.hasChildren && (elemTrackInfo.value.size() == 0);
if (!isWrapperElement) {
// Emit tag name as key
if (p->m_receiver.onKey(p->m_key.c_str(), p->m_key.size()) != 0) {
p->m_state = s_error;
}
if (p->m_receiver.onValue(elemTrackInfo.value.c_str(), elemTrackInfo.value.size()) != 0) {
p->m_state = s_error;
}
if (p->m_receiver.onKvDone() != 0) {
p->m_state = s_error; // error
}
}
// when closing an element - pop its tracking info from the tracking stack
p->m_elemTrackStack.pop_back();
// Also, pop the element's name from m_key stack, so the key name always reflects
// current depth within the elements tree
p->m_key.pop("XML end element");
}
void ParserXML::onCharacters(void* ctx, const xmlChar* ch, int len) {
ParserXML* p = (ParserXML*)ctx;
if (p->m_elemTrackStack.empty()) {
dbgWarning(D_WAAP_PARSER_XML) << "XML text and elem track stack is empty. This is probably sign of a bug!";
return;
}
if ((ch == NULL) || (len == 0)) {
dbgTrace(D_WAAP_PARSER_XML) << "Got empty XML text element. Ignoring.";
return;
}
ElemTrackInfo& elemTrackInfo = p->m_elemTrackStack.back();
dbgTrace(D_WAAP_PARSER_XML) << "XML TEXT: '[" << std::string((char*)ch, (size_t)len) << "]'";
std::string val = std::string((char*)ch, (size_t)len);
// trim isspace() characters around xml text chunks.
// The chunks can occur multiple times within one value, when text value is intermixed with xml sub-tags.
// for example, for XML source "<a>sta<b>zzz</b>rt</a>", the "a" tag will include two text
// chunks "sta" and "rt"
// which are concatenated here to form the word "start".
// The trimming is done here to prevent false alarms on detection algorithm that sees
// "\n" characters in the XML value.
// Example of input that causes false alarm without this trim is (multiline XML):
// <xml><script>\nclean_xml_value '\n<\/script><\/xml>
Waap::Util::trim(val);
elemTrackInfo.value += val;
}
void
ParserXML::onEntityDeclaration(
void* ctx,
const xmlChar* name,
int type,
const xmlChar* publicId,
const xmlChar* systmeid,
xmlChar* content)
{
dbgTrace(D_WAAP_PARSER_XML) << "ENTITY FOUND WITH VALUE: '" << (content ? (const char*)content : "null") << "'";
ParserXML* p = (ParserXML*)ctx;
std::string kw = "08a80340-06d3-11ea-9f87-0242ac11000f";
if (p->m_receiver.onKey(p->m_key.c_str(), p->m_key.size()) != 0) {
p->m_state = s_error;
}
if (p->m_receiver.onValue(kw.data(), kw.size()) != 0) {
p->m_state = s_error;
}
if (p->m_receiver.onKvDone() != 0) {
p->m_state = s_error; // error
}
}
static void onError(void* ctx, const char* msg, ...) {
static const size_t TMP_BUF_SIZE = 4096;
char string[TMP_BUF_SIZE];
va_list arg_ptr;
va_start(arg_ptr, msg);
vsnprintf(string, TMP_BUF_SIZE, msg, arg_ptr);
va_end(arg_ptr);
dbgTrace(D_WAAP_PARSER_XML) << "LIBXML (xml) onError: " << std::string(string);
}
ParserXML::ParserXML(IParserStreamReceiver& receiver)
:m_receiver(receiver), m_state(s_start), m_bufLen(0), m_key("xml_parser"), m_pushParserCtxPtr(NULL) {
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::ParserXML()";
// TODO:: is zeroing this really needed?
memset(m_buf, 0, sizeof(m_buf));
// Custom sax handler
memset(&m_saxHandler, 0, sizeof(xmlSAXHandler));
m_saxHandler.initialized = XML_SAX2_MAGIC;
m_saxHandler.startElementNs = onStartElementNs;
m_saxHandler.endElementNs = onEndElementNs;
m_saxHandler.characters = onCharacters;
m_saxHandler.entityDecl = onEntityDeclaration;
m_saxHandler.error = onError;
// Ugly: push first element into key (it will be ignored since we will never call
// the "first()" method of this key within XML parser object.
m_key.push("xml", 3);
}
ParserXML::~ParserXML() {
// Cleanup XML
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::~ParserXML()";
if (m_pushParserCtxPtr) {
xmlFreeParserCtxt(m_pushParserCtxPtr);
}
}
bool ParserXML::filterErrors(xmlErrorPtr xmlError) {
dbgDebug(D_WAAP_PARSER_XML) << "ParserXML::filterErrors(): xmlError " << xmlError->code << ": '" <<
xmlError->message << "'";
// Ignore specific error: "XML declaration allowed only at the start of the document".
// This includes the case of "multiple XML declarations" we've seen sent by some SOAP clients.
// The XML is still parsed because the parser is put into permissive mode with the XML_PARSE_RECOVER flag,
// but even though it recovers and parses the XML correctly, the error code is still reported here.
// Ignoring this error prevents the WAAP code from thinking the XML is "broken" and from scanning the XML
// source as-is, in effect preventing false alarm on that XML source.
if (xmlError->code == XML_ERR_RESERVED_XML_NAME || xmlError->code == XML_ERR_UNDECLARED_ENTITY) {
dbgDebug(D_WAAP_PARSER_XML) << "ParserXML::filterErrors(): ignoring the '" << xmlError->code << ": " <<
xmlError->message << "' xml parser error.";
return false;
}
return true;
}
size_t ParserXML::push(const char* data, size_t data_len) {
size_t i = 0;
char c;
if (data_len == 0) {
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::push(): end of data signal! m_state=" << m_state;
// Send zero-length chunk with "terminate" flag enabled to signify end-of-stream
if (xmlParseChunk(m_pushParserCtxPtr, m_buf, 0, 1)) {
xmlErrorPtr xmlError = xmlCtxtGetLastError(m_pushParserCtxPtr);
if (xmlError && filterErrors(xmlError)) {
dbgDebug(D_WAAP_PARSER_XML) << "ParserXML::push(): xmlError: code=" << xmlError->code << ": '" <<
xmlError->message << "'";
m_state = s_error; // error
return -1;
}
}
return m_bufLen;
}
int expected_buffer_len = FIRST_BUFFER_SIZE - 1;
while (i < data_len) {
c = data[i];
switch (m_state) {
case s_start:
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::push(): s_start";
m_state = s_accumulate_first_bytes;
// fall through //
CP_FALL_THROUGH;
case s_accumulate_first_bytes:
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::push(): s_accumulate_first_bytes. c='" << data[i] <<
"'; m_bufLen=" << m_bufLen << "; i=" << i;
m_buf[m_bufLen] = c;
m_bufLen++;
if (c == '?') {
expected_buffer_len = FIRST_BUFFER_SIZE;
}
if (m_bufLen == expected_buffer_len) {
m_state = s_start_parsing;
}
break;
case s_start_parsing:
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::push(): s_start_parsing. sending len=" << m_bufLen << ": '" <<
std::string(m_buf, m_bufLen) << "'; i=" << i;
// Create XML SAX (push parser) context
// It is important to buffer at least first 4 bytes of input stream so libxml can determine text encoding!
m_pushParserCtxPtr = xmlCreatePushParserCtxt(&m_saxHandler, this, m_buf, m_bufLen, NULL);
// Enable "permissive mode" for XML SAX parser.
// In this mode, the libxml parser doesn't stop on errors, but still reports them!
xmlCtxtUseOptions(m_pushParserCtxPtr, XML_PARSE_RECOVER);
m_state = s_parsing;
// fall through //
CP_FALL_THROUGH;
case s_parsing:
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::push(): s_parsing. sending len=" << (int)(data_len - i) <<
": '" << std::string(data + i, data_len - i) << "'; i=" << i;
if (m_pushParserCtxPtr) {
if (xmlParseChunk(m_pushParserCtxPtr, data + i, data_len - i, 0)) {
xmlErrorPtr xmlError = xmlCtxtGetLastError(m_pushParserCtxPtr);
if (xmlError && filterErrors(xmlError)) {
dbgDebug(D_WAAP_PARSER_XML) << "ParserXML::push(): xmlError: code=" << xmlError->code <<
": '" << xmlError->message << "'";
m_state = s_error; // error
return 0;
}
}
// success (whole buffer consumed)
i = data_len - 1; // take into account ++i at the end of the state machine loop
}
break;
case s_error:
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::push(): s_error";
return 0;
}
++i;
}
dbgTrace(D_WAAP_PARSER_XML) << "ParserXML::push(): exiting with param(len)=" << data_len << ": i=" << i;
return i;
}
void ParserXML::finish() {
push(NULL, 0);
}
const std::string &
ParserXML::name() const {
return m_parserName;
}
bool ParserXML::error() const {
return m_state == s_error;
}

View File

@@ -0,0 +1,101 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_XML_H__5bf3b834
#define __PARSER_XML_H__5bf3b834
#include "ParserBase.h"
#include "KeyStack.h"
#include <libxml/xmlstring.h>
#include <libxml/xmlerror.h>
#include <libxml/parser.h>
#define FIRST_BUFFER_SIZE 5 // must buffer at least 4 first bytes to allow unicode autodetection (BOM).
class ParserXML : public ParserBase {
public:
ParserXML(IParserStreamReceiver &receiver);
virtual ~ParserXML();
size_t push(const char *data, size_t data_len);
void finish();
virtual const std::string &name() const;
bool error() const;
virtual size_t depth() { return (m_key.depth() > 0) ? m_key.depth()-1 : m_key.depth(); }
private:
enum state {
s_start,
s_accumulate_first_bytes,
s_start_parsing,
s_parsing,
s_error
};
// Information tracked per each element in current stack of tracked XML elements
struct ElemTrackInfo {
std::string value;
bool hasChildren;
ElemTrackInfo():hasChildren(false) {
// when element is just opened - we still didn't see any children,
// hence start with the "hasChildren" flag as false.
// This flag will be enabled once we meet opening of the a subelement.
// Also, we start from empty value string and gradually append to it each
// time we receive next piece of text from XML parser.
// The collected value is then emitted when element finishes.
}
};
static void onStartElementNs(
void *ctx,
const xmlChar *localname,
const xmlChar *prefix,
const xmlChar *URI,
int nb_namespaces,
const xmlChar **namespaces,
int nb_attributes,
int nb_defaulted,
const xmlChar **attributes);
static void onEndElementNs(
void* ctx,
const xmlChar* localname,
const xmlChar* prefix,
const xmlChar* URI);
static void onCharacters(
void *ctx,
const xmlChar *ch,
int len);
static void onEntityDeclaration(
void* ctx,
const xmlChar* name,
int type,
const xmlChar* publicId,
const xmlChar* systmeid,
xmlChar* content);
// Filter out errors that should be ignored. Returns true if error should be treated,
// false if an error should be ignored
bool filterErrors(xmlErrorPtr xmlError);
IParserStreamReceiver &m_receiver;
enum state m_state;
// buffer first few bytes of stream (required before calling SAX parser for the first time)
char m_buf[FIRST_BUFFER_SIZE];
int m_bufLen;
KeyStack m_key;
std::vector<ElemTrackInfo> m_elemTrackStack;
xmlSAXHandler m_saxHandler;
xmlParserCtxtPtr m_pushParserCtxPtr;
public:
static const std::string m_parserName;
};
#endif // __PARSER_XML_H__5bf3b834

View File

@@ -0,0 +1,53 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// #define WAF2_LOGGING_ENABLE
#include "PatternMatcher.h"
#include "Waf2Regex.h"
#include <string>
#include <boost/algorithm/string.hpp>
#include <fnmatch.h>
#include "debug.h"
USE_DEBUG_FLAG(D_WAAP);
// PatternMatcherWildcard implementation
PatternMatcherWildcard::PatternMatcherWildcard(const std::string &pattern)
:m_pattern(pattern) {
dbgTrace(D_WAAP) << "Compiled pattern: '" << m_pattern.c_str() << "'\n";
}
bool PatternMatcherWildcard::match(const std::string &value) const {
// Use unix filename (glob) string pattern matcher.
// The Unix fnmatch() function only returns 0 in case of a succesful match.
// In case no-match it returns FNM_NOMATCH constant.
// In case of error it returns other non-zero return values.
// However, in our usage here error is considered a "no-match".
return ::fnmatch(m_pattern.c_str(), value.c_str(), 0) == 0;
}
// PatternMatcherRegex implementation
PatternMatcherRegex::PatternMatcherRegex(const std::string &pattern)
:m_pattern(pattern), m_regexError(false), m_regex(pattern, m_regexError, "patternMatcherRegex_" + pattern) {
if (m_regexError) {
dbgWarning(D_WAAP) << "Failed compiling regex pattern: '" << m_pattern.c_str() << "'\n";
}
}
bool PatternMatcherRegex::match(const std::string &value) const {
if (m_regexError) {
return false;
}
return m_regex.hasMatch(value);
}

View File

@@ -0,0 +1,47 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PATTERN_MATCHER_H__9baf179a
#define __PATTERN_MATCHER_H__9baf179a
#include "Waf2Regex.h"
#include <vector>
#include <string>
#include <memory>
class PatternMatcherBase {
public:
virtual bool match(const std::string &value) const =0;
};
class PatternMatcherWildcard : public PatternMatcherBase {
public:
PatternMatcherWildcard(const std::string &pattern);
virtual bool match(const std::string &value) const;
private:
std::string m_pattern;
};
class PatternMatcherRegex : public PatternMatcherBase {
public:
PatternMatcherRegex(const std::string &pattern);
virtual bool match(const std::string &value) const;
private:
std::string m_pattern;
bool m_regexError;
Regex m_regex;
};
typedef std::shared_ptr<PatternMatcherBase> PatternMatcherBasePtr;
#endif // __PATTERN_MATCHER_H__9baf179a

View File

@@ -0,0 +1,80 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "RateLimiter.h"
namespace Waap {
namespace Util {
RateLimiter::RateLimiter(unsigned events, std::chrono::seconds interval)
:
m_max_events(events),
m_interval(interval),
m_hitsPerSecond(),
m_recentIdx(0),
m_recentHitTime(0),
m_hitsCount(0)
{
m_hitsPerSecond.resize(interval.count(), 0);
}
void
RateLimiter::clear(const std::chrono::seconds& now)
{
for (unsigned int i=0; i<m_hitsPerSecond.size(); ++i) {
m_hitsPerSecond[i] = 0;
}
m_recentIdx=0;
m_recentHitTime = now;
m_hitsCount = 0;
}
bool
RateLimiter::event(const std::chrono::seconds& now)
{
if (m_hitsPerSecond.empty()) {
// Handle the case when rate limiter object is initialized with 0-seconds interval - always pass
return true;
}
// Clear counts buffer on the very first event, of after whole interval passed without events
if (m_recentHitTime == std::chrono::seconds(0) || now - m_recentHitTime >= m_interval) {
clear(now);
}
while (m_recentHitTime < now) {
// switch idx to next slot (with wrap since this is circular buffer).
// since this is circular buffer, the next slot is actually a tail (oldest): wrap --->[HEAD][TAIL]---> wrap
m_recentIdx++;
if (m_recentIdx >= m_hitsPerSecond.size()) {
m_recentIdx = 0;
}
// forget the hits from the oldest second in this interval (deduct them from total count)
m_hitsCount -= m_hitsPerSecond[m_recentIdx];
m_hitsPerSecond[m_recentIdx] = 0;
// Update recentHitTime (switch to next second)
m_recentHitTime += std::chrono::seconds(1);
}
// increment hitcount in the most recent second's slot, and also the total count
m_hitsPerSecond[m_recentIdx]++;
m_hitsCount++;
return m_hitsCount <= m_max_events;
}
}
}

View File

@@ -0,0 +1,42 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <chrono>
#include <vector>
namespace Waap {
namespace Util {
// Simple rate limiter primitive that collects events() and only allows up to X events per Y seconds.
// For each event, call RateLimiter::event() passing real or simulated timestamp (in seconds).
// The returned boolean value will tell the caller whether this event must pass (true) or be blocked (false).
class RateLimiter {
public:
RateLimiter(unsigned int events, std::chrono::seconds interval);
void clear(const std::chrono::seconds& now);
bool event(const std::chrono::seconds& now);
private:
unsigned m_max_events; // max events allowed during the recent interval window
std::chrono::seconds m_interval; // configured interval window
std::vector<unsigned> m_hitsPerSecond; // array of hitcounts per second (remembers up to interval recent seconds)
unsigned m_recentIdx; // index of recent second
std::chrono::seconds m_recentHitTime; // timestamp of recent second
unsigned m_hitsCount; // total events during last interval seconds (rolling update)
};
}
}

View File

@@ -0,0 +1,255 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <chrono>
#include <boost/regex.hpp>
#include "RateLimiting.h"
#include "Waf2Engine.h"
#include "agent_core_utilities.h"
#define RATE_LIMITING_LRU_SIZE 10000
namespace Waap {
namespace RateLimiting {
bool Policy::getRateLimitingEnforcementStatus()
{
return m_rateLimiting.enable;
}
bool
EntryKey::operator==(EntryKey const& other) const
{
return url == other.url && source == other.source;
}
bool
Policy::RateLimitingEnforcement::operator==(const Policy::RateLimitingEnforcement &other) const
{
return enable == other.enable;
}
bool
Policy::operator==(const Policy &other) const {
return rules == other.rules && m_rateLimiting == other.m_rateLimiting;
}
bool
Policy::Rule::operator==(const Policy::Rule &other) const {
return action == other.action && rate == other.rate &&
sourceFilter == other.sourceFilter && uriFilter == other.uriFilter;
}
bool
Policy::Rule::Action::operator==(const Policy::Rule::Action &other) const {
return quarantineTimeSeconds == other.quarantineTimeSeconds &&
type == other.type;
}
bool
Policy::Rule::Rate::operator==(const Policy::Rule::Rate &other) const {
return events == other.events && interval == other.interval;
}
bool
Policy::Rule::SourceFilter::operator==(const Policy::Rule::SourceFilter &other) const {
if (!(groupBy == other.groupBy && scope == other.scope))
{
return false;
}
if (specific_source_regexes_pattern.size() != other.specific_source_regexes_pattern.size())
{
return false;
}
for(size_t i=0; i<specific_source_regexes_pattern.size(); i++)
{
if(specific_source_regexes_pattern[i] != other.specific_source_regexes_pattern[i])
{
return false;
}
}
return true;
}
bool
Policy::Rule::UriFilter::operator==(const Policy::Rule::UriFilter &other) const {
if (!(groupBy == other.groupBy && scope == other.scope))
{
return false;
}
if (specific_uri_regexes_pattern.size() != other.specific_uri_regexes_pattern.size())
{
return false;
}
for(size_t i=0; i<specific_uri_regexes_pattern.size(); i++)
{
if (specific_uri_regexes_pattern[i] != other.specific_uri_regexes_pattern[i])
{
return false;
}
}
return true;
}
TrackEntry::TrackEntry(unsigned int events, std::chrono::seconds interval)
:eventRateLimiter(events, interval), state(MEASURING), quarantinedUntil()
{
}
bool
TrackEntry::event(std::chrono::seconds now)
{
// Release temporary block when time arrives
if (state == TrackEntry::State::QUARANTINED) {
if (now >= quarantinedUntil) {
// Release blocking state
state = TrackEntry::State::MEASURING;
}
}
// Count this event, the result will be true if rate limiter not saturated (should allow), or false if it
// is (should block).
return eventRateLimiter.event(now);
}
void
TrackEntry::quarantineUntil(std::chrono::seconds until)
{
state = TrackEntry::State::QUARANTINED;
quarantinedUntil = until;
}
bool
TrackEntry::isBlocked() const
{
return state != TrackEntry::State::MEASURING;
}
State::State(const std::shared_ptr<Policy> &policy)
:policy(policy), perRuleTrackingTable()
{
// For each rule create separate rate limiter states tracking table
for (unsigned ruleId=0; ruleId < policy->rules.size(); ++ruleId) {
perRuleTrackingTable.push_back(std::make_shared<EntriesLru>(RATE_LIMITING_LRU_SIZE));
}
}
static bool
matchOneOfRegexes(const std::string& value, const std::vector<std::shared_ptr<boost::regex>> &regexesList)
{
for (auto &regex : regexesList) {
if (regex != nullptr && NGEN::Regex::regexMatch(__FILE__, __LINE__, value, *regex)) {
return true;
}
}
return false;
}
bool
State::execute(const std::string& sourceIdentifier, const std::string& uriStr, std::chrono::seconds now, bool& log)
{
bool allow = true;
log = false;
// Run rules one by one.
for (unsigned ruleId=0; ruleId < policy->rules.size(); ++ruleId) {
const Policy::Rule &rule = policy->rules[ruleId];
const Policy::Rule::UriFilter &uriFilter = rule.uriFilter;
const Policy::Rule::SourceFilter &sourceFilter = rule.sourceFilter;
const Policy::Rule::Rate &rate = rule.rate;
const Policy::Rule::Action &action = rule.action;
// Get rate limiter states tracking table specific to current rule
std::shared_ptr<EntriesLru> table = perRuleTrackingTable[ruleId];
// Build a key to look up an entry
EntryKey entryKey;
// Filter out unmatched Urls
if (uriFilter.scope == Waap::RateLimiting::Policy::Rule::UriFilter::Scope::SPECIFIC
&& !matchOneOfRegexes(uriStr, uriFilter.specific_uri_regexes))
{
continue;
}
// Filter out unmatched Sources
if (sourceFilter.scope == Waap::RateLimiting::Policy::Rule::SourceFilter::Scope::SPECIFIC
&& !matchOneOfRegexes(sourceIdentifier, sourceFilter.specific_source_regexes))
{
continue;
}
if (uriFilter.groupBy == Policy::Rule::UriFilter::GroupBy::URL) {
// Include the HTTP source ID in the key
entryKey.url = uriStr;
}
if (sourceFilter.groupBy == Policy::Rule::SourceFilter::GroupBy::SOURCE) {
// Include the HTTP source ID in the key
entryKey.source = sourceIdentifier;
}
// Find entry in LRU, or create new
std::shared_ptr<TrackEntry> trackEntry;
if (!table->get(entryKey, trackEntry)) {
trackEntry = std::make_shared<TrackEntry>(rate.events, std::chrono::seconds(rate.interval));
}
// Insert or update an entry in LRU (this moves entry up if exist, or inserts new, possibly expiring old ones
// to keep the LRU size under control).
table->insert(std::make_pair(entryKey, trackEntry));
// Count this event in the entry's rate limiter. Release temporary block if time arrived.
if (trackEntry->event(now) == false) {
// TrackEntry's rate limiter is saturated (too many requests) - act according to rule's Action
switch (action.type) {
case Policy::Rule::Action::Type::DETECT:
// log block action.
log = true;
// Detect
break;
case Policy::Rule::Action::Type::QUARANTINE:
// Mark this entry blocked temorarily, for at least X seconds
trackEntry->quarantineUntil(now + std::chrono::seconds(action.quarantineTimeSeconds));
break;
case Policy::Rule::Action::Type::RATE_LIMIT:
// log block action.
log = true;
// Block this event only
allow = false;
break;
}
}
// If at least one of the rules says "block" - block the request
if (trackEntry->isBlocked()) {
// log block action.
log = true;
allow = false;
}
}
return allow;
}
}
}

View File

@@ -0,0 +1,337 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lru_cache_map.h"
#include "RateLimiter.h"
#include <string>
#include <chrono>
#include <cereal/types/vector.hpp>
#include <cereal/types/unordered_map.hpp>
#include <cereal/archives/json.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/algorithm/string/case_conv.hpp>
#include <boost/regex.hpp>
#include <memory>
class Waf2Transaction;
namespace Waap {
namespace RateLimiting {
struct Policy {
struct Rule {
struct UriFilter {
enum class GroupBy {
GLOBAL,
URL
};
enum class Scope {
ALL,
SPECIFIC
};
// Deserialize the Type enum
Scope strScopeToEnum(std::string const &value)
{
if (boost::iequals(value, "all")) {
return Scope::ALL;
}
else if (boost::iequals(value, "specific")) {
return Scope::SPECIFIC;
}
else {
throw cereal::Exception(
"Invalid value for RateLimiting::Policy::Rule::SourceFilter::GroupBy='" + value + "'");
}
}
// Deserialize the Type enum
GroupBy strGroupByToEnum(std::string const &value)
{
if (boost::iequals(value, "all uris")) {
return GroupBy::GLOBAL;
}
else if (boost::iequals(value, "single uri")) {
return GroupBy::URL;
}
else {
throw cereal::Exception(
"Invalid value for RateLimiting::Policy::Rule::SourceFilter::GroupBy='" + value + "'");
}
}
template <typename _A>
void serialize(_A &ar)
{
std::string groupByStr;
ar(cereal::make_nvp("groupBy", groupByStr));
groupBy = strGroupByToEnum(groupByStr);
std::string scopeStr;
ar(cereal::make_nvp("scope", scopeStr));
scope = strScopeToEnum(scopeStr);
if(scope == Scope::SPECIFIC)
{
ar(cereal::make_nvp("specific_uris", specific_uri_regexes_pattern));
specific_uri_regexes.clear();
for (auto &specific_uri_pattern : specific_uri_regexes_pattern)
{
specific_uri_regexes.push_back(std::make_shared<boost::regex>(specific_uri_pattern));
}
}
}
bool operator==(const Policy::Rule::UriFilter &other) const;
GroupBy groupBy;
std::vector<std::shared_ptr<boost::regex>> specific_uri_regexes;
std::vector<std::string> specific_uri_regexes_pattern;
Scope scope;
};
struct SourceFilter {
enum class GroupBy {
GLOBAL,
SOURCE
};
enum class Scope {
ALL,
SPECIFIC
};
// Deserialize the Type enum
Scope strScopeToEnum(std::string const &value)
{
if (boost::iequals(value, "all")) {
return Scope::ALL;
}
else if (boost::iequals(value, "specific")) {
return Scope::SPECIFIC;
}
else {
throw cereal::Exception(
"Invalid value for RateLimiting::Policy::Rule::SourceFilter::GroupBy='" + value + "'");
}
}
// Deserialize the Type enum
GroupBy strToEnum(std::string const &value)
{
if (boost::iequals(value, "all sources")) {
return GroupBy::GLOBAL;
}
else if (boost::iequals(value, "single source")) {
return GroupBy::SOURCE;
}
else {
throw cereal::Exception(
"Invalid value for RateLimiting::Policy::Rule::SourceFilter::GroupBy='" + value + "'");
}
}
template <typename _A>
void serialize(_A &ar) {
std::string groupByStr;
ar(cereal::make_nvp("groupBy", groupByStr));
groupBy = strToEnum(groupByStr);
std::string scopeStr;
ar(cereal::make_nvp("scope", scopeStr));
scope = strScopeToEnum(scopeStr);
if(scope == Scope::SPECIFIC)
{
ar(cereal::make_nvp("specific_sources", specific_source_regexes_pattern));
specific_source_regexes.clear();
for (auto &specific_source_pattern : specific_source_regexes_pattern) {
specific_source_regexes.push_back(std::make_shared<boost::regex>(specific_source_pattern));
}
}
}
bool operator==(const Policy::Rule::SourceFilter &other) const;
GroupBy groupBy;
std::vector<std::shared_ptr<boost::regex>> specific_source_regexes;
std::vector<std::string> specific_source_regexes_pattern;
Scope scope;
};
struct Rate {
template <typename _A>
void serialize(_A &ar) {
ar(cereal::make_nvp("interval", interval));
ar(cereal::make_nvp("events", events));
}
bool operator==(const Policy::Rule::Rate &other) const;
unsigned interval; // Interval in seconds
unsigned events; // Events allowed during the interval
};
struct Action {
enum class Type {
DETECT,
QUARANTINE,
RATE_LIMIT
};
// Deserialize the Type enum
Type strToEnum(std::string const &value)
{
if (boost::iequals(value, "detect")) {
return Type::DETECT;
}
else if (boost::iequals(value, "quarantine")) {
return Type::QUARANTINE;
}
else if (boost::iequals(value, "rate limit")) {
return Type::RATE_LIMIT;
}
else {
throw cereal::Exception(
"Invalid value for RateLimiting::Policy::Action::Type='" + value + "'");
}
}
template <typename _A>
void serialize(_A &ar) {
std::string typeStr;
ar(cereal::make_nvp("type", typeStr));
type = strToEnum(typeStr);
quarantineTimeSeconds = 0;
if (type == Type::QUARANTINE) {
ar(cereal::make_nvp("quarantineTimeSeconds", quarantineTimeSeconds));
}
}
bool operator==(const Policy::Rule::Action &other) const;
Type type;
unsigned quarantineTimeSeconds; // time to block (in seconds), relevant only for QUARANTINE action type
};
template <typename _A>
void serialize(_A &ar) {
ar(cereal::make_nvp("uriFilter", uriFilter));
ar(cereal::make_nvp("sourceFilter", sourceFilter));
ar(cereal::make_nvp("rate", rate));
ar(cereal::make_nvp("action", action));
}
bool operator==(const Rule &other) const;
UriFilter uriFilter;
SourceFilter sourceFilter;
Rate rate;
Action action;
};
class RateLimitingEnforcement
{
public:
RateLimitingEnforcement()
:
enable(false)
{
}
template <typename _A>
RateLimitingEnforcement(_A &ar)
:
enable(false)
{
std::string level;
ar(cereal::make_nvp("rateLimitingEnforcement", level));
level = boost::algorithm::to_lower_copy(level);
if (level == "prevent") {
enable = true;
}
}
bool operator==(const Policy::RateLimitingEnforcement &other) const;
bool enable;
};
std::vector<Rule> rules;
RateLimitingEnforcement m_rateLimiting;
Policy() {}
bool getRateLimitingEnforcementStatus();
bool operator==(const Policy &other) const;
template <typename _A>
Policy(_A& ar) : m_rateLimiting(ar) {
ar(cereal::make_nvp("rateLimiting", rules));
}
};
// Key used to identify specific rate limiting entry
struct EntryKey {
std::string url;
std::string source;
// comparison operator should be implemented to use this struct as a key in an LRU cache.
bool operator==(EntryKey const& other) const;
};
// Support efficient hashing for the EntryKey struct so it can participate in unordered (hashed) containers such as LRU
inline std::size_t hash_value(EntryKey const &entryKey)
{
std::size_t hash = 0;
boost::hash_combine(hash, entryKey.url);
boost::hash_combine(hash, entryKey.source);
return hash;
}
// Rate limiting tracking entry
struct TrackEntry {
enum State {
MEASURING,
QUARANTINED
};
Waap::Util::RateLimiter eventRateLimiter;
State state;
std::chrono::seconds quarantinedUntil;
TrackEntry(unsigned int events, std::chrono::seconds interval);
bool event(std::chrono::seconds now);
void quarantineUntil(std::chrono::seconds until);
bool isBlocked() const;
};
// Rate limiting state maintained per asset
class State {
public:
typedef LruCacheMap<EntryKey, std::shared_ptr<TrackEntry>> EntriesLru;
const std::shared_ptr<Policy> policy;
// For each rule - hold corresponding tracking state (EntriesLru) instance
std::vector<std::shared_ptr<EntriesLru>> perRuleTrackingTable;
State(const std::shared_ptr<Policy> &policy);
bool execute(
const std::string& sourceIdentifier,
const std::string& uriStr,
std::chrono::seconds now,
bool& log);
};
}
}

Some files were not shown because too many files have changed in this diff Show More