• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "third_party/libxml/chromium/xml_reader.h"
6 
7 #include <libxml/xmlreader.h>
8 
9 #include <vector>
10 
11 #include "third_party/libxml/chromium/libxml_utils.h"
12 
13 using internal::XmlStringToStdString;
14 
15 namespace {
16 
17 // Same as XmlStringToStdString but also frees |xmlstring|.
XmlStringToStdStringWithDelete(xmlChar * xmlstring)18 std::string XmlStringToStdStringWithDelete(xmlChar* xmlstring) {
19   std::string result = XmlStringToStdString(xmlstring);
20   xmlFree(xmlstring);
21   return result;
22 }
23 
24 enum GetAttributesQueryType { ATTRIBUTES, NAMESPACES_PREFIXES };
25 
26 // Populates |names| with the names of the attributes or prefix of namespaces
27 // (depending on |query_type|) for the current node in |reader|.
28 // Returns true if attribute names/namespace prefixes were retrieved, false
29 // otherwise.
30 // Note the strings in |names| are valid as long as |reader| is valid and should
31 // not be deleted.
GetNodeAttributeNames(xmlTextReaderPtr reader,GetAttributesQueryType query_type,std::vector<const xmlChar * > * names)32 bool GetNodeAttributeNames(xmlTextReaderPtr reader,
33                            GetAttributesQueryType query_type,
34                            std::vector<const xmlChar*>* names) {
35   if (xmlTextReaderHasAttributes(reader) <= 0)
36     return false;
37 
38   if (!xmlTextReaderMoveToFirstAttribute(reader))
39     return false;
40 
41   do {
42     bool is_namespace = xmlTextReaderIsNamespaceDecl(reader) == 1;
43     if (query_type == NAMESPACES_PREFIXES && is_namespace) {
44       // Use the local name for namespaces so we don't include 'xmlns:".
45       names->push_back(xmlTextReaderConstLocalName(reader));
46     } else if (query_type == ATTRIBUTES && !is_namespace) {
47       // Use the fully qualified name for attributes.
48       names->push_back(xmlTextReaderConstName(reader));
49     }
50   } while (xmlTextReaderMoveToNextAttribute(reader) > 0);
51 
52   // Move the reader from the attributes back to the containing element.
53   if (!xmlTextReaderMoveToElement(reader))
54     return false;
55 
56   return true;
57 }
58 
59 }  // namespace
60 
XmlReader()61 XmlReader::XmlReader() : reader_(nullptr) {}
62 
~XmlReader()63 XmlReader::~XmlReader() {
64   if (reader_)
65     xmlFreeTextReader(reader_);
66 }
67 
Load(const std::string & input)68 bool XmlReader::Load(const std::string& input) {
69   const int kParseOptions = XML_PARSE_NONET;  // forbid network access
70   // TODO(evanm): Verify it's OK to pass nullptr for the URL and encoding.
71   // The libxml code allows for these, but it's unclear what effect is has.
72   reader_ = xmlReaderForMemory(input.data(), static_cast<int>(input.size()),
73                                nullptr, nullptr, kParseOptions);
74   return reader_ != nullptr;
75 }
76 
LoadFile(const std::string & file_path)77 bool XmlReader::LoadFile(const std::string& file_path) {
78   const int kParseOptions = XML_PARSE_NONET;  // forbid network access
79   reader_ = xmlReaderForFile(file_path.c_str(), nullptr, kParseOptions);
80   return reader_ != nullptr;
81 }
82 
Read()83 bool XmlReader::Read() {
84   return xmlTextReaderRead(reader_) == 1;
85 }
86 
87 // Next(), when pointing at an opening tag, advances to the node after
88 // the matching closing tag.  Returns false on EOF or error.
Next()89 bool XmlReader::Next() {
90   return xmlTextReaderNext(reader_) == 1;
91 }
92 
93 // Return the depth in the tree of the current node.
Depth()94 int XmlReader::Depth() {
95   return xmlTextReaderDepth(reader_);
96 }
97 
NodeName()98 std::string XmlReader::NodeName() {
99   return XmlStringToStdString(xmlTextReaderConstLocalName(reader_));
100 }
101 
NodeFullName()102 std::string XmlReader::NodeFullName() {
103   return XmlStringToStdString(xmlTextReaderConstName(reader_));
104 }
105 
NodeAttribute(const char * name,std::string * out)106 bool XmlReader::NodeAttribute(const char* name, std::string* out) {
107   xmlChar* value = xmlTextReaderGetAttribute(reader_, BAD_CAST name);
108   if (!value)
109     return false;
110   *out = XmlStringToStdStringWithDelete(value);
111   return true;
112 }
113 
GetAllNodeAttributes(std::map<std::string,std::string> * attributes)114 bool XmlReader::GetAllNodeAttributes(
115     std::map<std::string, std::string>* attributes) {
116   std::vector<const xmlChar*> attribute_names;
117   if (!GetNodeAttributeNames(reader_, ATTRIBUTES, &attribute_names))
118     return false;
119 
120   // Retrieve the attribute values.
121   for (const auto* name : attribute_names) {
122     (*attributes)[XmlStringToStdString(name)] = XmlStringToStdStringWithDelete(
123         xmlTextReaderGetAttribute(reader_, name));
124   }
125   return true;
126 }
127 
GetAllDeclaredNamespaces(std::map<std::string,std::string> * namespaces)128 bool XmlReader::GetAllDeclaredNamespaces(
129     std::map<std::string, std::string>* namespaces) {
130   std::vector<const xmlChar*> prefixes;
131   if (!GetNodeAttributeNames(reader_, NAMESPACES_PREFIXES, &prefixes))
132     return false;
133 
134   // Retrieve the namespace URIs.
135   for (const auto* prefix : prefixes) {
136     bool default_namespace = xmlStrcmp(prefix, BAD_CAST "xmlns") == 0;
137 
138     std::string value = XmlStringToStdStringWithDelete(
139         xmlTextReaderLookupNamespace(reader_, prefix));
140     if (value.empty() && default_namespace) {
141       // Default namespace is treated as an attribute for some reason.
142       value = XmlStringToStdStringWithDelete(
143           xmlTextReaderGetAttribute(reader_, prefix));
144     }
145     (*namespaces)[default_namespace ? "" : XmlStringToStdString(prefix)] =
146         value;
147   }
148   return true;
149 }
150 
GetTextIfTextElement(std::string * content)151 bool XmlReader::GetTextIfTextElement(std::string* content) {
152   return GetTextFromNodeIfType(XML_READER_TYPE_TEXT, content);
153 }
154 
GetTextIfCDataElement(std::string * content)155 bool XmlReader::GetTextIfCDataElement(std::string* content) {
156   return GetTextFromNodeIfType(XML_READER_TYPE_CDATA, content);
157 }
158 
GetTextIfSignificantWhitespaceElement(std::string * content)159 bool XmlReader::GetTextIfSignificantWhitespaceElement(std::string* content) {
160   return GetTextFromNodeIfType(XML_READER_TYPE_SIGNIFICANT_WHITESPACE, content);
161 }
162 
IsElement()163 bool XmlReader::IsElement() {
164   return NodeType() == XML_READER_TYPE_ELEMENT;
165 }
166 
IsClosingElement()167 bool XmlReader::IsClosingElement() {
168   return NodeType() == XML_READER_TYPE_END_ELEMENT;
169 }
170 
IsEmptyElement()171 bool XmlReader::IsEmptyElement() {
172   return xmlTextReaderIsEmptyElement(reader_);
173 }
174 
ReadElementContent(std::string * content)175 bool XmlReader::ReadElementContent(std::string* content) {
176   const int start_depth = Depth();
177 
178   if (xmlTextReaderIsEmptyElement(reader_)) {
179     // Empty tag.  We succesfully read the content, but it's
180     // empty.
181     *content = "";
182     // Advance past this empty tag.
183     if (!Read())
184       return false;
185     return true;
186   }
187 
188   // Advance past opening element tag.
189   if (!Read())
190     return false;
191 
192   // Read the content.  We read up until we hit a closing tag at the
193   // same level as our starting point.
194   while (NodeType() != XML_READER_TYPE_END_ELEMENT || Depth() != start_depth) {
195     *content += XmlStringToStdString(xmlTextReaderConstValue(reader_));
196     if (!Read())
197       return false;
198   }
199 
200   // Advance past ending element tag.
201   if (!Read())
202     return false;
203 
204   return true;
205 }
206 
SkipToElement()207 bool XmlReader::SkipToElement() {
208   do {
209     switch (NodeType()) {
210       case XML_READER_TYPE_ELEMENT:
211         return true;
212       case XML_READER_TYPE_END_ELEMENT:
213         return false;
214       default:
215         // Skip all other node types.
216         continue;
217     }
218   } while (Read());
219   return false;
220 }
221 
NodeType()222 int XmlReader::NodeType() {
223   return xmlTextReaderNodeType(reader_);
224 }
225 
GetTextFromNodeIfType(int node_type,std::string * content)226 bool XmlReader::GetTextFromNodeIfType(int node_type, std::string* content) {
227   if (NodeType() != node_type)
228     return false;
229 
230   *content = XmlStringToStdString(xmlTextReaderConstValue(reader_));
231   return true;
232 }
233