1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "core/fxcrt/xml/cfx_xmlparser.h"
6
7 #include <memory>
8
9 #include "core/fxcrt/cfx_readonlymemorystream.h"
10 #include "core/fxcrt/fx_codepage.h"
11 #include "core/fxcrt/xml/cfx_xmldocument.h"
12 #include "core/fxcrt/xml/cfx_xmlelement.h"
13 #include "core/fxcrt/xml/cfx_xmlinstruction.h"
14 #include "testing/gtest/include/gtest/gtest.h"
15
16 class CFX_XMLParserTest : public testing::Test {
17 public:
Parse(pdfium::span<const char> input)18 std::unique_ptr<CFX_XMLDocument> Parse(pdfium::span<const char> input) {
19 CFX_XMLParser parser(
20 pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(pdfium::as_bytes(input)));
21 return parser.Parse();
22 }
23 };
24
TEST_F(CFX_XMLParserTest,AttributesMustBeQuoted)25 TEST_F(CFX_XMLParserTest, AttributesMustBeQuoted) {
26 static const char input[] =
27 "<script display=1>\n"
28 "</script>";
29 ASSERT_TRUE(Parse(input) == nullptr);
30 }
31
TEST_F(CFX_XMLParserTest,Attributes)32 TEST_F(CFX_XMLParserTest, Attributes) {
33 static const char input[] =
34 "<script contentType=\"application/x-javascript\" display=\"1\">\n"
35 "</script>";
36
37 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
38 ASSERT_TRUE(doc != nullptr);
39
40 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
41 ASSERT_TRUE(script != nullptr);
42
43 EXPECT_EQ(L"application/x-javascript", script->GetAttribute(L"contentType"));
44 EXPECT_EQ(L"1", script->GetAttribute(L"display"));
45 }
46
TEST_F(CFX_XMLParserTest,CData)47 TEST_F(CFX_XMLParserTest, CData) {
48 static const char input[] =
49 "<script>\n"
50 " <![CDATA[\n"
51 " if (a[1] < 3)\n"
52 " app.alert(\"Tclams\");\n"
53 " ]]>\n"
54 "</script>";
55
56 static const wchar_t cdata[] =
57 L"\n \n"
58 L" if (a[1] < 3)\n"
59 L" app.alert(\"Tclams\");\n"
60 L" \n";
61
62 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
63 ASSERT_TRUE(doc != nullptr);
64
65 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
66 ASSERT_TRUE(script != nullptr);
67 EXPECT_EQ(cdata, script->GetTextData());
68 }
69
TEST_F(CFX_XMLParserTest,CDataWithInnerScript)70 TEST_F(CFX_XMLParserTest, CDataWithInnerScript) {
71 static const char input[] =
72 "<script>\n"
73 " <![CDATA[\n"
74 " if (a[1] < 3)\n"
75 " app.alert(\"Tclams\");\n"
76 " </script>\n"
77 " ]]>\n"
78 "</script>";
79
80 static const wchar_t cdata[] =
81 L"\n \n"
82 L" if (a[1] < 3)\n"
83 L" app.alert(\"Tclams\");\n"
84 L" </script>\n"
85 L" \n";
86
87 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
88 ASSERT_TRUE(doc != nullptr);
89
90 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
91 ASSERT_TRUE(script != nullptr);
92 EXPECT_EQ(cdata, script->GetTextData());
93 }
94
TEST_F(CFX_XMLParserTest,ArrowBangArrow)95 TEST_F(CFX_XMLParserTest, ArrowBangArrow) {
96 static const char input[] =
97 "<script>\n"
98 " <!>\n"
99 "</script>";
100
101 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
102 ASSERT_TRUE(doc != nullptr);
103
104 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
105 ASSERT_TRUE(script != nullptr);
106 EXPECT_EQ(L"\n \n", script->GetTextData());
107 }
108
TEST_F(CFX_XMLParserTest,ArrowBangBracketArrow)109 TEST_F(CFX_XMLParserTest, ArrowBangBracketArrow) {
110 static const char input[] =
111 "<script>\n"
112 " <![>\n"
113 "</script>";
114
115 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
116 ASSERT_TRUE(doc != nullptr);
117
118 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
119 ASSERT_TRUE(script != nullptr);
120 EXPECT_EQ(L"\n ", script->GetTextData());
121 }
122
TEST_F(CFX_XMLParserTest,IncompleteCData)123 TEST_F(CFX_XMLParserTest, IncompleteCData) {
124 static const char input[] =
125 "<script>\n"
126 " <![CDATA>\n"
127 "</script>";
128
129 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
130 ASSERT_TRUE(doc != nullptr);
131
132 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
133 ASSERT_TRUE(script != nullptr);
134 EXPECT_EQ(L"\n ", script->GetTextData());
135 }
136
TEST_F(CFX_XMLParserTest,UnClosedCData)137 TEST_F(CFX_XMLParserTest, UnClosedCData) {
138 static const char input[] =
139 "<script>\n"
140 " <![CDATA[\n"
141 "</script>";
142
143 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
144 ASSERT_TRUE(doc != nullptr);
145
146 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
147 ASSERT_TRUE(script != nullptr);
148 EXPECT_EQ(L"\n ", script->GetTextData());
149 }
150
TEST_F(CFX_XMLParserTest,EmptyCData)151 TEST_F(CFX_XMLParserTest, EmptyCData) {
152 static const char input[] =
153 "<script>\n"
154 " <![CDATA[]]>\n"
155 "</script>";
156
157 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
158 ASSERT_TRUE(doc != nullptr);
159
160 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
161 ASSERT_TRUE(script != nullptr);
162 EXPECT_EQ(L"\n \n", script->GetTextData());
163 }
164
TEST_F(CFX_XMLParserTest,Comment)165 TEST_F(CFX_XMLParserTest, Comment) {
166 static const char input[] =
167 "<script>\n"
168 " <!-- A Comment -->\n"
169 "</script>";
170
171 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
172 ASSERT_TRUE(doc != nullptr);
173
174 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
175 ASSERT_TRUE(script != nullptr);
176 EXPECT_EQ(L"\n \n", script->GetTextData());
177 }
178
TEST_F(CFX_XMLParserTest,IncorrectCommentStart)179 TEST_F(CFX_XMLParserTest, IncorrectCommentStart) {
180 static const char input[] =
181 "<script>\n"
182 " <!- A Comment -->\n"
183 "</script>";
184
185 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
186 ASSERT_TRUE(doc != nullptr);
187
188 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
189 ASSERT_TRUE(script != nullptr);
190 EXPECT_EQ(L"\n \n", script->GetTextData());
191 }
192
TEST_F(CFX_XMLParserTest,CommentEmpty)193 TEST_F(CFX_XMLParserTest, CommentEmpty) {
194 static const char input[] =
195 "<script>\n"
196 " <!---->\n"
197 "</script>";
198
199 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
200 ASSERT_TRUE(doc != nullptr);
201
202 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
203 ASSERT_TRUE(script != nullptr);
204 EXPECT_EQ(L"\n \n", script->GetTextData());
205 }
206
TEST_F(CFX_XMLParserTest,CommentThreeDash)207 TEST_F(CFX_XMLParserTest, CommentThreeDash) {
208 static const char input[] =
209 "<script>\n"
210 " <!--->\n"
211 "</script>";
212
213 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
214 ASSERT_TRUE(doc != nullptr);
215
216 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
217 ASSERT_TRUE(script != nullptr);
218 EXPECT_EQ(L"\n ", script->GetTextData());
219 }
220
TEST_F(CFX_XMLParserTest,CommentTwoDash)221 TEST_F(CFX_XMLParserTest, CommentTwoDash) {
222 static const char input[] =
223 "<script>\n"
224 " <!-->\n"
225 "</script>";
226
227 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
228 ASSERT_TRUE(doc != nullptr);
229
230 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
231 EXPECT_EQ(L"\n ", script->GetTextData());
232 }
233
TEST_F(CFX_XMLParserTest,Entities)234 TEST_F(CFX_XMLParserTest, Entities) {
235 static const char input[] =
236 "<script>"
237 "B" // B
238 "T" // T
239 "j" // j
240 "H" // H
241 "ꭈ" // \xab48
242 "�"
243 "&"
244 "<"
245 ">"
246 "'"
247 """
248 "&something_else;"
249 "</script>";
250
251 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
252 ASSERT_TRUE(doc != nullptr);
253
254 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
255 ASSERT_TRUE(script != nullptr);
256 EXPECT_EQ(L"BTjH\xab48&<>'\"", script->GetTextData());
257 }
258
TEST_F(CFX_XMLParserTest,EntityOverflowHex)259 TEST_F(CFX_XMLParserTest, EntityOverflowHex) {
260 static const char input[] =
261 "<script>"
262 "�"
263 "�"
264 "</script>";
265
266 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
267 ASSERT_TRUE(doc != nullptr);
268
269 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
270 ASSERT_TRUE(script != nullptr);
271 EXPECT_EQ(L" ", script->GetTextData());
272 }
273
TEST_F(CFX_XMLParserTest,EntityOverflowDecimal)274 TEST_F(CFX_XMLParserTest, EntityOverflowDecimal) {
275 static const char input[] =
276 "<script>"
277 "�"
278 "�"
279 "</script>";
280
281 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
282 ASSERT_TRUE(doc != nullptr);
283
284 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
285 ASSERT_TRUE(script != nullptr);
286 EXPECT_EQ(L" ", script->GetTextData());
287 }
288
TEST_F(CFX_XMLParserTest,IsXMLNameChar)289 TEST_F(CFX_XMLParserTest, IsXMLNameChar) {
290 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L'-', true));
291 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(L'-', false));
292
293 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0x2069, true));
294 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0x2070, true));
295 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0x2073, true));
296 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0x218F, true));
297 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0x2190, true));
298
299 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0xFDEF, true));
300 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0xFDF0, true));
301 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0xFDF1, true));
302 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0xFFFD, true));
303 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0xFFFE, true));
304 }
305
TEST_F(CFX_XMLParserTest,BadElementClose)306 TEST_F(CFX_XMLParserTest, BadElementClose) {
307 ASSERT_TRUE(Parse("</endtag>") == nullptr);
308 }
309
TEST_F(CFX_XMLParserTest,DoubleElementClose)310 TEST_F(CFX_XMLParserTest, DoubleElementClose) {
311 ASSERT_TRUE(Parse("<p></p></p>") == nullptr);
312 }
313
TEST_F(CFX_XMLParserTest,ParseInstruction)314 TEST_F(CFX_XMLParserTest, ParseInstruction) {
315 static const char input[] =
316 "<?originalXFAVersion http://www.xfa.org/schema/xfa-template/3.3/ ?>"
317 "<form></form>";
318
319 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
320 ASSERT_TRUE(doc != nullptr);
321
322 CFX_XMLElement* root = doc->GetRoot();
323 ASSERT_TRUE(root->GetFirstChild() != nullptr);
324 ASSERT_EQ(CFX_XMLNode::Type::kInstruction, root->GetFirstChild()->GetType());
325
326 CFX_XMLInstruction* instruction = ToXMLInstruction(root->GetFirstChild());
327 EXPECT_TRUE(instruction->IsOriginalXFAVersion());
328 }
329
TEST_F(CFX_XMLParserTest,BadEntity)330 TEST_F(CFX_XMLParserTest, BadEntity) {
331 static const char input[] =
332 "<script>"
333 "Test &<p>; thing"
334 "</script>";
335 ASSERT_TRUE(Parse(input) == nullptr);
336 }
337