mirror of
https://github.com/openappsec/openappsec.git
synced 2025-09-30 11:44:29 +03:00
First release of open-appsec source code
This commit is contained in:
84
components/security_apps/waap/waap_clib/ParserHTML.h
Executable file
84
components/security_apps/waap/waap_clib/ParserHTML.h
Executable file
@@ -0,0 +1,84 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ParserBase.h"
|
||||
#include "KeyStack.h"
|
||||
#include <libxml/xmlstring.h>
|
||||
#include <libxml/xmlerror.h>
|
||||
#include <libxml/parser.h>
|
||||
#include <libxml/HTMLparser.h>
|
||||
|
||||
#define FIRST_BUFFER_SIZE 5 // must buffer at least 4 first bytes to allow unicode autodetection (BOM).
|
||||
|
||||
class ParserHTML : public ParserBase {
|
||||
public:
|
||||
ParserHTML(IParserStreamReceiver &receiver);
|
||||
virtual ~ParserHTML();
|
||||
size_t push(const char *data, size_t data_len);
|
||||
void finish();
|
||||
virtual const std::string &name() const;
|
||||
bool error() const;
|
||||
virtual size_t depth() { return (m_key.depth() > 0) ? m_key.depth()-1 : m_key.depth(); }
|
||||
private:
|
||||
enum state {
|
||||
s_start,
|
||||
s_accumulate_first_bytes,
|
||||
s_start_parsing,
|
||||
s_parsing,
|
||||
s_error
|
||||
};
|
||||
|
||||
// Information tracked per each element in current stack of tracked HTML elements
|
||||
struct ElemTrackInfo {
|
||||
std::string value;
|
||||
bool hasChildren;
|
||||
ElemTrackInfo():hasChildren(false) {
|
||||
// when element is just opened - we still didn't see any children,
|
||||
// hence start with the "hasChildren" flag as false.
|
||||
// This flag will be enabled once we meet opening of the a subelement.
|
||||
// Also, we start from empty value string and gradually append to it each
|
||||
// time we receive next piece of text from HTML parser.
|
||||
// The collected value is then emitted when element finishes.
|
||||
}
|
||||
};
|
||||
|
||||
static void onStartElement(
|
||||
void *ctx,
|
||||
const xmlChar *localname,
|
||||
const xmlChar **attributes);
|
||||
static void onEndElement(
|
||||
void* ctx,
|
||||
const xmlChar* localname);
|
||||
static void onCharacters(
|
||||
void *ctx,
|
||||
const xmlChar *ch,
|
||||
int len);
|
||||
|
||||
// Filter out errors that should be ignored. Returns true if error should be treated,
|
||||
// false if an error should be ignored
|
||||
bool filterErrors(xmlErrorPtr xmlError);
|
||||
|
||||
IParserStreamReceiver &m_receiver;
|
||||
enum state m_state;
|
||||
// buffer first few bytes of stream (required before calling SAX parser for the first time)
|
||||
char m_buf[FIRST_BUFFER_SIZE];
|
||||
int m_bufLen;
|
||||
KeyStack m_key;
|
||||
std::vector<ElemTrackInfo> m_elemTrackStack;
|
||||
htmlSAXHandler m_saxHandler;
|
||||
htmlParserCtxtPtr m_pushParserCtxPtr;
|
||||
|
||||
static const std::string m_parserName;
|
||||
};
|
Reference in New Issue
Block a user