• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "core/fxcrt/xml/cfx_xmlparser.h"
6 
7 #include <memory>
8 
9 #include "core/fxcrt/cfx_read_only_span_stream.h"
10 #include "core/fxcrt/fx_codepage.h"
11 #include "core/fxcrt/xml/cfx_xmldocument.h"
12 #include "core/fxcrt/xml/cfx_xmlelement.h"
13 #include "core/fxcrt/xml/cfx_xmlinstruction.h"
14 #include "testing/gtest/include/gtest/gtest.h"
15 
16 class CFXXMLParserTest : public testing::Test {
17  public:
Parse(pdfium::span<const char> input)18   std::unique_ptr<CFX_XMLDocument> Parse(pdfium::span<const char> input) {
19     CFX_XMLParser parser(
20         pdfium::MakeRetain<CFX_ReadOnlySpanStream>(pdfium::as_bytes(input)));
21     return parser.Parse();
22   }
23 };
24 
TEST_F(CFXXMLParserTest,AttributesMustBeQuoted)25 TEST_F(CFXXMLParserTest, AttributesMustBeQuoted) {
26   static const char input[] =
27       "<script display=1>\n"
28       "</script>";
29   ASSERT_TRUE(Parse(input) == nullptr);
30 }
31 
TEST_F(CFXXMLParserTest,Attributes)32 TEST_F(CFXXMLParserTest, Attributes) {
33   static const char input[] =
34       "<script contentType=\"application/x-javascript\" display=\"1\">\n"
35       "</script>";
36 
37   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
38   ASSERT_TRUE(doc != nullptr);
39 
40   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
41   ASSERT_TRUE(script != nullptr);
42 
43   EXPECT_EQ(L"application/x-javascript", script->GetAttribute(L"contentType"));
44   EXPECT_EQ(L"1", script->GetAttribute(L"display"));
45 }
46 
TEST_F(CFXXMLParserTest,CData)47 TEST_F(CFXXMLParserTest, CData) {
48   static const char input[] =
49       "<script>\n"
50       "  <![CDATA[\n"
51       "    if (a[1] < 3)\n"
52       "      app.alert(\"Tclams\");\n"
53       "  ]]>\n"
54       "</script>";
55 
56   static const wchar_t cdata[] =
57       L"\n  \n"
58       L"    if (a[1] < 3)\n"
59       L"      app.alert(\"Tclams\");\n"
60       L"  \n";
61 
62   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
63   ASSERT_TRUE(doc != nullptr);
64 
65   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
66   ASSERT_TRUE(script != nullptr);
67   EXPECT_EQ(cdata, script->GetTextData());
68 }
69 
TEST_F(CFXXMLParserTest,CDataWithInnerScript)70 TEST_F(CFXXMLParserTest, CDataWithInnerScript) {
71   static const char input[] =
72       "<script>\n"
73       "  <![CDATA[\n"
74       "    if (a[1] < 3)\n"
75       "      app.alert(\"Tclams\");\n"
76       "    </script>\n"
77       "  ]]>\n"
78       "</script>";
79 
80   static const wchar_t cdata[] =
81       L"\n  \n"
82       L"    if (a[1] < 3)\n"
83       L"      app.alert(\"Tclams\");\n"
84       L"    </script>\n"
85       L"  \n";
86 
87   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
88   ASSERT_TRUE(doc != nullptr);
89 
90   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
91   ASSERT_TRUE(script != nullptr);
92   EXPECT_EQ(cdata, script->GetTextData());
93 }
94 
TEST_F(CFXXMLParserTest,ArrowBangArrow)95 TEST_F(CFXXMLParserTest, ArrowBangArrow) {
96   static const char input[] =
97       "<script>\n"
98       "  <!>\n"
99       "</script>";
100 
101   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
102   ASSERT_TRUE(doc != nullptr);
103 
104   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
105   ASSERT_TRUE(script != nullptr);
106   EXPECT_EQ(L"\n  \n", script->GetTextData());
107 }
108 
TEST_F(CFXXMLParserTest,ArrowBangBracketArrow)109 TEST_F(CFXXMLParserTest, ArrowBangBracketArrow) {
110   static const char input[] =
111       "<script>\n"
112       "  <![>\n"
113       "</script>";
114 
115   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
116   ASSERT_TRUE(doc != nullptr);
117 
118   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
119   ASSERT_TRUE(script != nullptr);
120   EXPECT_EQ(L"\n  ", script->GetTextData());
121 }
122 
TEST_F(CFXXMLParserTest,IncompleteCData)123 TEST_F(CFXXMLParserTest, IncompleteCData) {
124   static const char input[] =
125       "<script>\n"
126       "  <![CDATA>\n"
127       "</script>";
128 
129   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
130   ASSERT_TRUE(doc != nullptr);
131 
132   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
133   ASSERT_TRUE(script != nullptr);
134   EXPECT_EQ(L"\n  ", script->GetTextData());
135 }
136 
TEST_F(CFXXMLParserTest,UnClosedCData)137 TEST_F(CFXXMLParserTest, UnClosedCData) {
138   static const char input[] =
139       "<script>\n"
140       "  <![CDATA[\n"
141       "</script>";
142 
143   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
144   ASSERT_TRUE(doc != nullptr);
145 
146   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
147   ASSERT_TRUE(script != nullptr);
148   EXPECT_EQ(L"\n  ", script->GetTextData());
149 }
150 
TEST_F(CFXXMLParserTest,EmptyCData)151 TEST_F(CFXXMLParserTest, EmptyCData) {
152   static const char input[] =
153       "<script>\n"
154       "  <![CDATA[]]>\n"
155       "</script>";
156 
157   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
158   ASSERT_TRUE(doc != nullptr);
159 
160   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
161   ASSERT_TRUE(script != nullptr);
162   EXPECT_EQ(L"\n  \n", script->GetTextData());
163 }
164 
TEST_F(CFXXMLParserTest,Comment)165 TEST_F(CFXXMLParserTest, Comment) {
166   static const char input[] =
167       "<script>\n"
168       "  <!-- A Comment -->\n"
169       "</script>";
170 
171   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
172   ASSERT_TRUE(doc != nullptr);
173 
174   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
175   ASSERT_TRUE(script != nullptr);
176   EXPECT_EQ(L"\n  \n", script->GetTextData());
177 }
178 
TEST_F(CFXXMLParserTest,IncorrectCommentStart)179 TEST_F(CFXXMLParserTest, IncorrectCommentStart) {
180   static const char input[] =
181       "<script>\n"
182       "  <!- A Comment -->\n"
183       "</script>";
184 
185   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
186   ASSERT_TRUE(doc != nullptr);
187 
188   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
189   ASSERT_TRUE(script != nullptr);
190   EXPECT_EQ(L"\n  \n", script->GetTextData());
191 }
192 
TEST_F(CFXXMLParserTest,CommentEmpty)193 TEST_F(CFXXMLParserTest, CommentEmpty) {
194   static const char input[] =
195       "<script>\n"
196       "  <!---->\n"
197       "</script>";
198 
199   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
200   ASSERT_TRUE(doc != nullptr);
201 
202   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
203   ASSERT_TRUE(script != nullptr);
204   EXPECT_EQ(L"\n  \n", script->GetTextData());
205 }
206 
TEST_F(CFXXMLParserTest,CommentThreeDash)207 TEST_F(CFXXMLParserTest, CommentThreeDash) {
208   static const char input[] =
209       "<script>\n"
210       "  <!--->\n"
211       "</script>";
212 
213   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
214   ASSERT_TRUE(doc != nullptr);
215 
216   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
217   ASSERT_TRUE(script != nullptr);
218   EXPECT_EQ(L"\n  ", script->GetTextData());
219 }
220 
TEST_F(CFXXMLParserTest,CommentTwoDash)221 TEST_F(CFXXMLParserTest, CommentTwoDash) {
222   static const char input[] =
223       "<script>\n"
224       "  <!-->\n"
225       "</script>";
226 
227   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
228   ASSERT_TRUE(doc != nullptr);
229 
230   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
231   EXPECT_EQ(L"\n  ", script->GetTextData());
232 }
233 
TEST_F(CFXXMLParserTest,Entities)234 TEST_F(CFXXMLParserTest, Entities) {
235   static const char input[] =
236       "<script>"
237       "&#66;"                     // B
238       "&#x54;"                    // T
239       "&#x6a;"                    // j
240       "&#x00000000000000000048;"  // H
241       "&#x0000000000000000AB48;"  // \xab48
242       "&#x0000000000000000000;"
243       "&amp;"
244       "&lt;"
245       "&gt;"
246       "&apos;"
247       "&quot;"
248       "&something_else;"
249       "</script>";
250 
251   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
252   ASSERT_TRUE(doc != nullptr);
253 
254   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
255   ASSERT_TRUE(script != nullptr);
256   EXPECT_EQ(L"BTjH\xab48&<>'\"", script->GetTextData());
257 }
258 
TEST_F(CFXXMLParserTest,EntityOverflowHex)259 TEST_F(CFXXMLParserTest, EntityOverflowHex) {
260   static const char input[] =
261       "<script>"
262       "&#xaDBDFFFFF;"
263       "&#xafffffffffffffffffffffffffffffffff;"
264       "</script>";
265 
266   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
267   ASSERT_TRUE(doc != nullptr);
268 
269   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
270   ASSERT_TRUE(script != nullptr);
271   EXPECT_EQ(L"  ", script->GetTextData());
272 }
273 
TEST_F(CFXXMLParserTest,EntityOverflowDecimal)274 TEST_F(CFXXMLParserTest, EntityOverflowDecimal) {
275   static const char input[] =
276       "<script>"
277       "&#2914910205;"
278       "&#29149102052342342134521341234512351234213452315;"
279       "</script>";
280 
281   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
282   ASSERT_TRUE(doc != nullptr);
283 
284   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
285   ASSERT_TRUE(script != nullptr);
286   EXPECT_EQ(L"  ", script->GetTextData());
287 }
288 
TEST_F(CFXXMLParserTest,IsXMLNameChar)289 TEST_F(CFXXMLParserTest, IsXMLNameChar) {
290   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L'-', true));
291   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(L'-', false));
292 
293   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L'.', true));
294   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(L'.', false));
295 
296   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L'0', true));
297   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(L'0', false));
298 
299   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(L'a', true));
300   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(L'a', false));
301 
302   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(L'A', true));
303   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(L'A', false));
304 
305   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L'(', false));
306   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L'(', true));
307   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L')', false));
308   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L')', true));
309   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L'[', false));
310   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L'[', true));
311   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L']', false));
312   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L']', true));
313 
314   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0x2069, true));
315   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0x2070, true));
316   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0x2073, true));
317   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0x218F, true));
318   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0x2190, true));
319 
320   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0xFDEF, true));
321   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0xFDF0, true));
322   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0xFDF1, true));
323   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0xFFFD, true));
324   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0xFFFE, true));
325 }
326 
TEST_F(CFXXMLParserTest,BadElementClose)327 TEST_F(CFXXMLParserTest, BadElementClose) {
328   ASSERT_TRUE(Parse("</endtag>") == nullptr);
329 }
330 
TEST_F(CFXXMLParserTest,DoubleElementClose)331 TEST_F(CFXXMLParserTest, DoubleElementClose) {
332   ASSERT_TRUE(Parse("<p></p></p>") == nullptr);
333 }
334 
TEST_F(CFXXMLParserTest,ParseInstruction)335 TEST_F(CFXXMLParserTest, ParseInstruction) {
336   static const char input[] =
337       "<?originalXFAVersion http://www.xfa.org/schema/xfa-template/3.3/ ?>"
338       "<form></form>";
339 
340   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
341   ASSERT_TRUE(doc != nullptr);
342 
343   CFX_XMLElement* root = doc->GetRoot();
344   ASSERT_TRUE(root->GetFirstChild() != nullptr);
345   ASSERT_EQ(CFX_XMLNode::Type::kInstruction, root->GetFirstChild()->GetType());
346 
347   CFX_XMLInstruction* instruction = ToXMLInstruction(root->GetFirstChild());
348   EXPECT_TRUE(instruction->IsOriginalXFAVersion());
349 }
350 
TEST_F(CFXXMLParserTest,BadEntity)351 TEST_F(CFXXMLParserTest, BadEntity) {
352   static const char input[] =
353       "<script>"
354       "Test &<p>; thing"
355       "</script>";
356   ASSERT_TRUE(Parse(input) == nullptr);
357 }
358