• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "core/fxcrt/xml/cfx_xmlparser.h"
6 
7 #include <memory>
8 
9 #include "core/fxcrt/cfx_readonlymemorystream.h"
10 #include "core/fxcrt/fx_codepage.h"
11 #include "core/fxcrt/xml/cfx_xmldocument.h"
12 #include "core/fxcrt/xml/cfx_xmlelement.h"
13 #include "core/fxcrt/xml/cfx_xmlinstruction.h"
14 #include "testing/gtest/include/gtest/gtest.h"
15 
16 class CFX_XMLParserTest : public testing::Test {
17  public:
Parse(pdfium::span<const char> input)18   std::unique_ptr<CFX_XMLDocument> Parse(pdfium::span<const char> input) {
19     CFX_XMLParser parser(
20         pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(pdfium::as_bytes(input)));
21     return parser.Parse();
22   }
23 };
24 
TEST_F(CFX_XMLParserTest,AttributesMustBeQuoted)25 TEST_F(CFX_XMLParserTest, AttributesMustBeQuoted) {
26   static const char input[] =
27       "<script display=1>\n"
28       "</script>";
29   ASSERT_TRUE(Parse(input) == nullptr);
30 }
31 
TEST_F(CFX_XMLParserTest,Attributes)32 TEST_F(CFX_XMLParserTest, Attributes) {
33   static const char input[] =
34       "<script contentType=\"application/x-javascript\" display=\"1\">\n"
35       "</script>";
36 
37   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
38   ASSERT_TRUE(doc != nullptr);
39 
40   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
41   ASSERT_TRUE(script != nullptr);
42 
43   EXPECT_EQ(L"application/x-javascript", script->GetAttribute(L"contentType"));
44   EXPECT_EQ(L"1", script->GetAttribute(L"display"));
45 }
46 
TEST_F(CFX_XMLParserTest,CData)47 TEST_F(CFX_XMLParserTest, CData) {
48   static const char input[] =
49       "<script>\n"
50       "  <![CDATA[\n"
51       "    if (a[1] < 3)\n"
52       "      app.alert(\"Tclams\");\n"
53       "  ]]>\n"
54       "</script>";
55 
56   static const wchar_t cdata[] =
57       L"\n  \n"
58       L"    if (a[1] < 3)\n"
59       L"      app.alert(\"Tclams\");\n"
60       L"  \n";
61 
62   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
63   ASSERT_TRUE(doc != nullptr);
64 
65   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
66   ASSERT_TRUE(script != nullptr);
67   EXPECT_EQ(cdata, script->GetTextData());
68 }
69 
TEST_F(CFX_XMLParserTest,CDataWithInnerScript)70 TEST_F(CFX_XMLParserTest, CDataWithInnerScript) {
71   static const char input[] =
72       "<script>\n"
73       "  <![CDATA[\n"
74       "    if (a[1] < 3)\n"
75       "      app.alert(\"Tclams\");\n"
76       "    </script>\n"
77       "  ]]>\n"
78       "</script>";
79 
80   static const wchar_t cdata[] =
81       L"\n  \n"
82       L"    if (a[1] < 3)\n"
83       L"      app.alert(\"Tclams\");\n"
84       L"    </script>\n"
85       L"  \n";
86 
87   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
88   ASSERT_TRUE(doc != nullptr);
89 
90   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
91   ASSERT_TRUE(script != nullptr);
92   EXPECT_EQ(cdata, script->GetTextData());
93 }
94 
TEST_F(CFX_XMLParserTest,ArrowBangArrow)95 TEST_F(CFX_XMLParserTest, ArrowBangArrow) {
96   static const char input[] =
97       "<script>\n"
98       "  <!>\n"
99       "</script>";
100 
101   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
102   ASSERT_TRUE(doc != nullptr);
103 
104   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
105   ASSERT_TRUE(script != nullptr);
106   EXPECT_EQ(L"\n  \n", script->GetTextData());
107 }
108 
TEST_F(CFX_XMLParserTest,ArrowBangBracketArrow)109 TEST_F(CFX_XMLParserTest, ArrowBangBracketArrow) {
110   static const char input[] =
111       "<script>\n"
112       "  <![>\n"
113       "</script>";
114 
115   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
116   ASSERT_TRUE(doc != nullptr);
117 
118   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
119   ASSERT_TRUE(script != nullptr);
120   EXPECT_EQ(L"\n  ", script->GetTextData());
121 }
122 
TEST_F(CFX_XMLParserTest,IncompleteCData)123 TEST_F(CFX_XMLParserTest, IncompleteCData) {
124   static const char input[] =
125       "<script>\n"
126       "  <![CDATA>\n"
127       "</script>";
128 
129   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
130   ASSERT_TRUE(doc != nullptr);
131 
132   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
133   ASSERT_TRUE(script != nullptr);
134   EXPECT_EQ(L"\n  ", script->GetTextData());
135 }
136 
TEST_F(CFX_XMLParserTest,UnClosedCData)137 TEST_F(CFX_XMLParserTest, UnClosedCData) {
138   static const char input[] =
139       "<script>\n"
140       "  <![CDATA[\n"
141       "</script>";
142 
143   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
144   ASSERT_TRUE(doc != nullptr);
145 
146   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
147   ASSERT_TRUE(script != nullptr);
148   EXPECT_EQ(L"\n  ", script->GetTextData());
149 }
150 
TEST_F(CFX_XMLParserTest,EmptyCData)151 TEST_F(CFX_XMLParserTest, EmptyCData) {
152   static const char input[] =
153       "<script>\n"
154       "  <![CDATA[]]>\n"
155       "</script>";
156 
157   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
158   ASSERT_TRUE(doc != nullptr);
159 
160   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
161   ASSERT_TRUE(script != nullptr);
162   EXPECT_EQ(L"\n  \n", script->GetTextData());
163 }
164 
TEST_F(CFX_XMLParserTest,Comment)165 TEST_F(CFX_XMLParserTest, Comment) {
166   static const char input[] =
167       "<script>\n"
168       "  <!-- A Comment -->\n"
169       "</script>";
170 
171   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
172   ASSERT_TRUE(doc != nullptr);
173 
174   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
175   ASSERT_TRUE(script != nullptr);
176   EXPECT_EQ(L"\n  \n", script->GetTextData());
177 }
178 
TEST_F(CFX_XMLParserTest,IncorrectCommentStart)179 TEST_F(CFX_XMLParserTest, IncorrectCommentStart) {
180   static const char input[] =
181       "<script>\n"
182       "  <!- A Comment -->\n"
183       "</script>";
184 
185   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
186   ASSERT_TRUE(doc != nullptr);
187 
188   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
189   ASSERT_TRUE(script != nullptr);
190   EXPECT_EQ(L"\n  \n", script->GetTextData());
191 }
192 
TEST_F(CFX_XMLParserTest,CommentEmpty)193 TEST_F(CFX_XMLParserTest, CommentEmpty) {
194   static const char input[] =
195       "<script>\n"
196       "  <!---->\n"
197       "</script>";
198 
199   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
200   ASSERT_TRUE(doc != nullptr);
201 
202   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
203   ASSERT_TRUE(script != nullptr);
204   EXPECT_EQ(L"\n  \n", script->GetTextData());
205 }
206 
TEST_F(CFX_XMLParserTest,CommentThreeDash)207 TEST_F(CFX_XMLParserTest, CommentThreeDash) {
208   static const char input[] =
209       "<script>\n"
210       "  <!--->\n"
211       "</script>";
212 
213   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
214   ASSERT_TRUE(doc != nullptr);
215 
216   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
217   ASSERT_TRUE(script != nullptr);
218   EXPECT_EQ(L"\n  ", script->GetTextData());
219 }
220 
TEST_F(CFX_XMLParserTest,CommentTwoDash)221 TEST_F(CFX_XMLParserTest, CommentTwoDash) {
222   static const char input[] =
223       "<script>\n"
224       "  <!-->\n"
225       "</script>";
226 
227   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
228   ASSERT_TRUE(doc != nullptr);
229 
230   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
231   EXPECT_EQ(L"\n  ", script->GetTextData());
232 }
233 
TEST_F(CFX_XMLParserTest,Entities)234 TEST_F(CFX_XMLParserTest, Entities) {
235   static const char input[] =
236       "<script>"
237       "&#66;"                     // B
238       "&#x54;"                    // T
239       "&#x6a;"                    // j
240       "&#x00000000000000000048;"  // H
241       "&#x0000000000000000AB48;"  // \xab48
242       "&#x0000000000000000000;"
243       "&amp;"
244       "&lt;"
245       "&gt;"
246       "&apos;"
247       "&quot;"
248       "&something_else;"
249       "</script>";
250 
251   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
252   ASSERT_TRUE(doc != nullptr);
253 
254   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
255   ASSERT_TRUE(script != nullptr);
256   EXPECT_EQ(L"BTjH\xab48&<>'\"", script->GetTextData());
257 }
258 
TEST_F(CFX_XMLParserTest,EntityOverflowHex)259 TEST_F(CFX_XMLParserTest, EntityOverflowHex) {
260   static const char input[] =
261       "<script>"
262       "&#xaDBDFFFFF;"
263       "&#xafffffffffffffffffffffffffffffffff;"
264       "</script>";
265 
266   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
267   ASSERT_TRUE(doc != nullptr);
268 
269   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
270   ASSERT_TRUE(script != nullptr);
271   EXPECT_EQ(L"  ", script->GetTextData());
272 }
273 
TEST_F(CFX_XMLParserTest,EntityOverflowDecimal)274 TEST_F(CFX_XMLParserTest, EntityOverflowDecimal) {
275   static const char input[] =
276       "<script>"
277       "&#2914910205;"
278       "&#29149102052342342134521341234512351234213452315;"
279       "</script>";
280 
281   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
282   ASSERT_TRUE(doc != nullptr);
283 
284   CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
285   ASSERT_TRUE(script != nullptr);
286   EXPECT_EQ(L"  ", script->GetTextData());
287 }
288 
TEST_F(CFX_XMLParserTest,IsXMLNameChar)289 TEST_F(CFX_XMLParserTest, IsXMLNameChar) {
290   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L'-', true));
291   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(L'-', false));
292 
293   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0x2069, true));
294   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0x2070, true));
295   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0x2073, true));
296   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0x218F, true));
297   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0x2190, true));
298 
299   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0xFDEF, true));
300   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0xFDF0, true));
301   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0xFDF1, true));
302   EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0xFFFD, true));
303   EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0xFFFE, true));
304 }
305 
TEST_F(CFX_XMLParserTest,BadElementClose)306 TEST_F(CFX_XMLParserTest, BadElementClose) {
307   ASSERT_TRUE(Parse("</endtag>") == nullptr);
308 }
309 
TEST_F(CFX_XMLParserTest,DoubleElementClose)310 TEST_F(CFX_XMLParserTest, DoubleElementClose) {
311   ASSERT_TRUE(Parse("<p></p></p>") == nullptr);
312 }
313 
TEST_F(CFX_XMLParserTest,ParseInstruction)314 TEST_F(CFX_XMLParserTest, ParseInstruction) {
315   static const char input[] =
316       "<?originalXFAVersion http://www.xfa.org/schema/xfa-template/3.3/ ?>"
317       "<form></form>";
318 
319   std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
320   ASSERT_TRUE(doc != nullptr);
321 
322   CFX_XMLElement* root = doc->GetRoot();
323   ASSERT_TRUE(root->GetFirstChild() != nullptr);
324   ASSERT_EQ(CFX_XMLNode::Type::kInstruction, root->GetFirstChild()->GetType());
325 
326   CFX_XMLInstruction* instruction = ToXMLInstruction(root->GetFirstChild());
327   EXPECT_TRUE(instruction->IsOriginalXFAVersion());
328 }
329 
TEST_F(CFX_XMLParserTest,BadEntity)330 TEST_F(CFX_XMLParserTest, BadEntity) {
331   static const char input[] =
332       "<script>"
333       "Test &<p>; thing"
334       "</script>";
335   ASSERT_TRUE(Parse(input) == nullptr);
336 }
337