// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved. // Licensed under the Apache License, Version 2.0 (the "License"); // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "ParserXML.h" #include "Waf2Util.h" #include "debug.h" #include USE_DEBUG_FLAG(D_WAAP_PARSER_XML); const std::string ParserXML::m_parserName = "ParserXML"; void ParserXML::onStartElementNs( void* ctx, const xmlChar* localname, const xmlChar* prefix, const xmlChar* URI, int nb_namespaces, const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** attributes) { ParserXML* p = (ParserXML*)ctx; dbgTrace(D_WAAP_PARSER_XML) << "XML OPEN: '" << localname << "'"; p->m_key.push((const char*)localname, xmlStrlen(localname)); int i; for (i = 0; i < nb_attributes; i++) { const xmlChar* attr_localname = attributes[i * 5 + 0]; //const xmlChar *attr_prefix = attributes[i*5+1]; //const xmlChar *attr_URI = attributes[i*5+2]; const xmlChar* attr_value_begin = attributes[i * 5 + 3]; const xmlChar* attr_value_end = attributes[i * 5 + 4]; dbgTrace(D_WAAP_PARSER_XML) << "\tXML ATTR: elem='" << (char*)localname << "', " << attr_localname << "='" << std::string((char*)attr_value_begin, (size_t)(attr_value_end - attr_value_begin)) << "'"; p->m_key.push((const char*)attr_localname, xmlStrlen(attr_localname)); if (p->m_receiver.onKv( p->m_key.c_str(), p->m_key.size(), (const char*)attr_value_begin, attr_value_end - attr_value_begin, BUFFERED_RECEIVER_F_BOTH ) != 0) { p->m_state = s_error; } p->m_key.pop("XML end attribute"); } // before we add new tracking element to the stack for this new element, // set "children exists" flag to true for the parent element. if (!p->m_elemTrackStack.empty()) { p->m_elemTrackStack.back().hasChildren = true; } // when opening new element - start tracking its properties (internal text and existence of subelements) p->m_elemTrackStack.push_back(ElemTrackInfo()); } void ParserXML::onEndElementNs( void* ctx, const xmlChar* localname, const xmlChar* prefix, const xmlChar* URI) { ParserXML* p = (ParserXML*)ctx; dbgTrace(D_WAAP_PARSER_XML) << "XML CLOSE: '" << localname << "'"; if (p->m_elemTrackStack.empty()) { dbgWarning(D_WAAP_PARSER_XML) << "XML closing tag and elem track stack is empty. This is probably sign of a bug!"; return; } ElemTrackInfo& elemTrackInfo = p->m_elemTrackStack.back(); // Usability optimization: only output kv pair for XML elements that had either sub children // and/or value within. // Those "wrapper elements" such as john21 only // contain sub elements. For these we don't emit kv pair. // However, for truly empty element such as , or similar element with // text: some text, we do output a kv pair. bool isWrapperElement = elemTrackInfo.hasChildren && (elemTrackInfo.value.size() == 0); if (!isWrapperElement) { // Emit tag name as key if (p->m_receiver.onKey(p->m_key.c_str(), p->m_key.size()) != 0) { p->m_state = s_error; } if (p->m_receiver.onValue(elemTrackInfo.value.c_str(), elemTrackInfo.value.size()) != 0) { p->m_state = s_error; } if (p->m_receiver.onKvDone() != 0) { p->m_state = s_error; // error } } // when closing an element - pop its tracking info from the tracking stack p->m_elemTrackStack.pop_back(); // Also, pop the element's name from m_key stack, so the key name always reflects // current depth within the elements tree p->m_key.pop("XML end element"); } void ParserXML::onCharacters(void* ctx, const xmlChar* ch, int len) { ParserXML* p = (ParserXML*)ctx; if (p->m_elemTrackStack.empty()) { dbgWarning(D_WAAP_PARSER_XML) << "XML text and elem track stack is empty. This is probably sign of a bug!"; return; } if ((ch == NULL) || (len == 0)) { dbgTrace(D_WAAP_PARSER_XML) << "Got empty XML text element. Ignoring."; return; } ElemTrackInfo& elemTrackInfo = p->m_elemTrackStack.back(); dbgTrace(D_WAAP_PARSER_XML) << "XML TEXT: '[" << std::string((char*)ch, (size_t)len) << "]'"; std::string val = std::string((char*)ch, (size_t)len); // trim isspace() characters around xml text chunks. // The chunks can occur multiple times within one value, when text value is intermixed with xml sub-tags. // for example, for XML source "stazzzrt", the "a" tag will include two text // chunks "sta" and "rt" // which are concatenated here to form the word "start". // The trimming is done here to prevent false alarms on detection algorithm that sees // "\n" characters in the XML value. // Example of input that causes false alarm without this trim is (multiline XML): //