1 // Copyright 2019 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/libxml/chromium/xml_reader.h"
6
7 #include <libxml/xmlreader.h>
8
9 #include <vector>
10
11 #include "third_party/libxml/chromium/libxml_utils.h"
12
13 using internal::XmlStringToStdString;
14
15 namespace {
16
17 // Same as XmlStringToStdString but also frees |xmlstring|.
XmlStringToStdStringWithDelete(xmlChar * xmlstring)18 std::string XmlStringToStdStringWithDelete(xmlChar* xmlstring) {
19 std::string result = XmlStringToStdString(xmlstring);
20 xmlFree(xmlstring);
21 return result;
22 }
23
24 enum GetAttributesQueryType { ATTRIBUTES, NAMESPACES_PREFIXES };
25
26 // Populates |names| with the names of the attributes or prefix of namespaces
27 // (depending on |query_type|) for the current node in |reader|.
28 // Returns true if attribute names/namespace prefixes were retrieved, false
29 // otherwise.
30 // Note the strings in |names| are valid as long as |reader| is valid and should
31 // not be deleted.
GetNodeAttributeNames(xmlTextReaderPtr reader,GetAttributesQueryType query_type,std::vector<const xmlChar * > * names)32 bool GetNodeAttributeNames(xmlTextReaderPtr reader,
33 GetAttributesQueryType query_type,
34 std::vector<const xmlChar*>* names) {
35 if (xmlTextReaderHasAttributes(reader) <= 0)
36 return false;
37
38 if (!xmlTextReaderMoveToFirstAttribute(reader))
39 return false;
40
41 do {
42 bool is_namespace = xmlTextReaderIsNamespaceDecl(reader) == 1;
43 if (query_type == NAMESPACES_PREFIXES && is_namespace) {
44 // Use the local name for namespaces so we don't include 'xmlns:".
45 names->push_back(xmlTextReaderConstLocalName(reader));
46 } else if (query_type == ATTRIBUTES && !is_namespace) {
47 // Use the fully qualified name for attributes.
48 names->push_back(xmlTextReaderConstName(reader));
49 }
50 } while (xmlTextReaderMoveToNextAttribute(reader) > 0);
51
52 // Move the reader from the attributes back to the containing element.
53 if (!xmlTextReaderMoveToElement(reader))
54 return false;
55
56 return true;
57 }
58
59 } // namespace
60
XmlReader()61 XmlReader::XmlReader() : reader_(nullptr) {}
62
~XmlReader()63 XmlReader::~XmlReader() {
64 if (reader_)
65 xmlFreeTextReader(reader_);
66 }
67
Load(const std::string & input)68 bool XmlReader::Load(const std::string& input) {
69 const int kParseOptions = XML_PARSE_NONET; // forbid network access
70 // TODO(evanm): Verify it's OK to pass nullptr for the URL and encoding.
71 // The libxml code allows for these, but it's unclear what effect is has.
72 reader_ = xmlReaderForMemory(input.data(), static_cast<int>(input.size()),
73 nullptr, nullptr, kParseOptions);
74 return reader_ != nullptr;
75 }
76
LoadFile(const std::string & file_path)77 bool XmlReader::LoadFile(const std::string& file_path) {
78 const int kParseOptions = XML_PARSE_NONET; // forbid network access
79 reader_ = xmlReaderForFile(file_path.c_str(), nullptr, kParseOptions);
80 return reader_ != nullptr;
81 }
82
Read()83 bool XmlReader::Read() {
84 return xmlTextReaderRead(reader_) == 1;
85 }
86
87 // Next(), when pointing at an opening tag, advances to the node after
88 // the matching closing tag. Returns false on EOF or error.
Next()89 bool XmlReader::Next() {
90 return xmlTextReaderNext(reader_) == 1;
91 }
92
93 // Return the depth in the tree of the current node.
Depth()94 int XmlReader::Depth() {
95 return xmlTextReaderDepth(reader_);
96 }
97
NodeName()98 std::string XmlReader::NodeName() {
99 return XmlStringToStdString(xmlTextReaderConstLocalName(reader_));
100 }
101
NodeFullName()102 std::string XmlReader::NodeFullName() {
103 return XmlStringToStdString(xmlTextReaderConstName(reader_));
104 }
105
NodeAttribute(const char * name,std::string * out)106 bool XmlReader::NodeAttribute(const char* name, std::string* out) {
107 xmlChar* value = xmlTextReaderGetAttribute(reader_, BAD_CAST name);
108 if (!value)
109 return false;
110 *out = XmlStringToStdStringWithDelete(value);
111 return true;
112 }
113
GetAllNodeAttributes(std::map<std::string,std::string> * attributes)114 bool XmlReader::GetAllNodeAttributes(
115 std::map<std::string, std::string>* attributes) {
116 std::vector<const xmlChar*> attribute_names;
117 if (!GetNodeAttributeNames(reader_, ATTRIBUTES, &attribute_names))
118 return false;
119
120 // Retrieve the attribute values.
121 for (const auto* name : attribute_names) {
122 (*attributes)[XmlStringToStdString(name)] = XmlStringToStdStringWithDelete(
123 xmlTextReaderGetAttribute(reader_, name));
124 }
125 return true;
126 }
127
GetAllDeclaredNamespaces(std::map<std::string,std::string> * namespaces)128 bool XmlReader::GetAllDeclaredNamespaces(
129 std::map<std::string, std::string>* namespaces) {
130 std::vector<const xmlChar*> prefixes;
131 if (!GetNodeAttributeNames(reader_, NAMESPACES_PREFIXES, &prefixes))
132 return false;
133
134 // Retrieve the namespace URIs.
135 for (const auto* prefix : prefixes) {
136 bool default_namespace = xmlStrcmp(prefix, BAD_CAST "xmlns") == 0;
137
138 std::string value = XmlStringToStdStringWithDelete(
139 xmlTextReaderLookupNamespace(reader_, prefix));
140 if (value.empty() && default_namespace) {
141 // Default namespace is treated as an attribute for some reason.
142 value = XmlStringToStdStringWithDelete(
143 xmlTextReaderGetAttribute(reader_, prefix));
144 }
145 (*namespaces)[default_namespace ? "" : XmlStringToStdString(prefix)] =
146 value;
147 }
148 return true;
149 }
150
GetTextIfTextElement(std::string * content)151 bool XmlReader::GetTextIfTextElement(std::string* content) {
152 return GetTextFromNodeIfType(XML_READER_TYPE_TEXT, content);
153 }
154
GetTextIfCDataElement(std::string * content)155 bool XmlReader::GetTextIfCDataElement(std::string* content) {
156 return GetTextFromNodeIfType(XML_READER_TYPE_CDATA, content);
157 }
158
GetTextIfSignificantWhitespaceElement(std::string * content)159 bool XmlReader::GetTextIfSignificantWhitespaceElement(std::string* content) {
160 return GetTextFromNodeIfType(XML_READER_TYPE_SIGNIFICANT_WHITESPACE, content);
161 }
162
IsElement()163 bool XmlReader::IsElement() {
164 return NodeType() == XML_READER_TYPE_ELEMENT;
165 }
166
IsClosingElement()167 bool XmlReader::IsClosingElement() {
168 return NodeType() == XML_READER_TYPE_END_ELEMENT;
169 }
170
IsEmptyElement()171 bool XmlReader::IsEmptyElement() {
172 return xmlTextReaderIsEmptyElement(reader_);
173 }
174
ReadElementContent(std::string * content)175 bool XmlReader::ReadElementContent(std::string* content) {
176 const int start_depth = Depth();
177
178 if (xmlTextReaderIsEmptyElement(reader_)) {
179 // Empty tag. We succesfully read the content, but it's
180 // empty.
181 *content = "";
182 // Advance past this empty tag.
183 if (!Read())
184 return false;
185 return true;
186 }
187
188 // Advance past opening element tag.
189 if (!Read())
190 return false;
191
192 // Read the content. We read up until we hit a closing tag at the
193 // same level as our starting point.
194 while (NodeType() != XML_READER_TYPE_END_ELEMENT || Depth() != start_depth) {
195 *content += XmlStringToStdString(xmlTextReaderConstValue(reader_));
196 if (!Read())
197 return false;
198 }
199
200 // Advance past ending element tag.
201 if (!Read())
202 return false;
203
204 return true;
205 }
206
SkipToElement()207 bool XmlReader::SkipToElement() {
208 do {
209 switch (NodeType()) {
210 case XML_READER_TYPE_ELEMENT:
211 return true;
212 case XML_READER_TYPE_END_ELEMENT:
213 return false;
214 default:
215 // Skip all other node types.
216 continue;
217 }
218 } while (Read());
219 return false;
220 }
221
NodeType()222 int XmlReader::NodeType() {
223 return xmlTextReaderNodeType(reader_);
224 }
225
GetTextFromNodeIfType(int node_type,std::string * content)226 bool XmlReader::GetTextFromNodeIfType(int node_type, std::string* content) {
227 if (NodeType() != node_type)
228 return false;
229
230 *content = XmlStringToStdString(xmlTextReaderConstValue(reader_));
231 return true;
232 }
233