1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "core/fxcrt/xml/cfx_xmlsyntaxparser.h"
6
7 #include <memory>
8
9 #include "core/fxcrt/cfx_seekablestreamproxy.h"
10 #include "core/fxcrt/fx_codepage.h"
11 #include "testing/gtest/include/gtest/gtest.h"
12 #include "testing/test_support.h"
13
TEST(CFX_XMLSyntaxParserTest,CData)14 TEST(CFX_XMLSyntaxParserTest, CData) {
15 const char* input =
16 "<script contentType=\"application/x-javascript\">\n"
17 " <![CDATA[\n"
18 " if (a[1] < 3)\n"
19 " app.alert(\"Tclams\");\n"
20 " ]]>\n"
21 "</script>";
22
23 const wchar_t* cdata =
24 L"\n"
25 L" if (a[1] < 3)\n"
26 L" app.alert(\"Tclams\");\n"
27 L" ";
28
29 RetainPtr<CFX_SeekableStreamProxy> stream =
30 pdfium::MakeRetain<CFX_SeekableStreamProxy>(
31 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
32 stream->SetCodePage(FX_CODEPAGE_UTF8);
33
34 CFX_XMLSyntaxParser parser(stream);
35 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
36 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
37 ASSERT_EQ(L"script", parser.GetTagName());
38
39 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
40 ASSERT_EQ(L"contentType", parser.GetAttributeName());
41 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
42 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
43
44 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
45 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
46 ASSERT_EQ(L"\n ", parser.GetTextData());
47
48 ASSERT_EQ(FX_XmlSyntaxResult::CData, parser.DoSyntaxParse());
49 ASSERT_EQ(cdata, parser.GetTextData());
50
51 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
52 ASSERT_EQ(L"\n", parser.GetTextData());
53
54 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
55 ASSERT_EQ(L"script", parser.GetTagName());
56
57 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
58 }
59
TEST(CFX_XMLSyntaxParserTest,CDataWithInnerScript)60 TEST(CFX_XMLSyntaxParserTest, CDataWithInnerScript) {
61 const char* input =
62 "<script contentType=\"application/x-javascript\">\n"
63 " <![CDATA[\n"
64 " if (a[1] < 3)\n"
65 " app.alert(\"Tclams\");\n"
66 " </script>\n"
67 " ]]>\n"
68 "</script>";
69
70 const wchar_t* cdata =
71 L"\n"
72 L" if (a[1] < 3)\n"
73 L" app.alert(\"Tclams\");\n"
74 L" </script>\n"
75 L" ";
76
77 RetainPtr<CFX_SeekableStreamProxy> stream =
78 pdfium::MakeRetain<CFX_SeekableStreamProxy>(
79 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
80 stream->SetCodePage(FX_CODEPAGE_UTF8);
81
82 CFX_XMLSyntaxParser parser(stream);
83 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
84 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
85 ASSERT_EQ(L"script", parser.GetTagName());
86
87 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
88 ASSERT_EQ(L"contentType", parser.GetAttributeName());
89 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
90 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
91
92 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
93 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
94 ASSERT_EQ(L"\n ", parser.GetTextData());
95
96 ASSERT_EQ(FX_XmlSyntaxResult::CData, parser.DoSyntaxParse());
97 ASSERT_EQ(cdata, parser.GetTextData());
98
99 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
100 ASSERT_EQ(L"\n", parser.GetTextData());
101
102 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
103 ASSERT_EQ(L"script", parser.GetTagName());
104
105 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
106 }
107
TEST(CFX_XMLSyntaxParserTest,ArrowBangArrow)108 TEST(CFX_XMLSyntaxParserTest, ArrowBangArrow) {
109 const char* input =
110 "<script contentType=\"application/x-javascript\">\n"
111 " <!>\n"
112 "</script>";
113
114 RetainPtr<CFX_SeekableStreamProxy> stream =
115 pdfium::MakeRetain<CFX_SeekableStreamProxy>(
116 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
117 stream->SetCodePage(FX_CODEPAGE_UTF8);
118
119 CFX_XMLSyntaxParser parser(stream);
120 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
121 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
122
123 ASSERT_EQ(L"script", parser.GetTagName());
124
125 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
126 ASSERT_EQ(L"contentType", parser.GetAttributeName());
127 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
128 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
129
130 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
131 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
132 ASSERT_EQ(L"\n ", parser.GetTextData());
133
134 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
135 ASSERT_EQ(L"\n", parser.GetTextData());
136
137 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
138 ASSERT_EQ(L"script", parser.GetTagName());
139
140 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
141 }
142
TEST(CFX_XMLSyntaxParserTest,ArrowBangBracketArrow)143 TEST(CFX_XMLSyntaxParserTest, ArrowBangBracketArrow) {
144 const char* input =
145 "<script contentType=\"application/x-javascript\">\n"
146 " <![>\n"
147 "</script>";
148
149 RetainPtr<CFX_SeekableStreamProxy> stream =
150 pdfium::MakeRetain<CFX_SeekableStreamProxy>(
151 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
152 stream->SetCodePage(FX_CODEPAGE_UTF8);
153
154 CFX_XMLSyntaxParser parser(stream);
155 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
156 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
157 ASSERT_EQ(L"script", parser.GetTagName());
158
159 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
160 ASSERT_EQ(L"contentType", parser.GetAttributeName());
161 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
162 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
163
164 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
165 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
166 ASSERT_EQ(L"\n ", parser.GetTextData());
167
168 // Parser walks to end of input.
169
170 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
171 }
172
TEST(CFX_XMLSyntaxParserTest,IncompleteCData)173 TEST(CFX_XMLSyntaxParserTest, IncompleteCData) {
174 const char* input =
175 "<script contentType=\"application/x-javascript\">\n"
176 " <![CDATA>\n"
177 "</script>";
178
179 RetainPtr<CFX_SeekableStreamProxy> stream =
180 pdfium::MakeRetain<CFX_SeekableStreamProxy>(
181 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
182 stream->SetCodePage(FX_CODEPAGE_UTF8);
183
184 CFX_XMLSyntaxParser parser(stream);
185 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
186 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
187 ASSERT_EQ(L"script", parser.GetTagName());
188
189 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
190 ASSERT_EQ(L"contentType", parser.GetAttributeName());
191 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
192 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
193
194 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
195 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
196 ASSERT_EQ(L"\n ", parser.GetTextData());
197
198 // Parser walks to end of input.
199
200 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
201 }
202
TEST(CFX_XMLSyntaxParserTest,UnClosedCData)203 TEST(CFX_XMLSyntaxParserTest, UnClosedCData) {
204 const char* input =
205 "<script contentType=\"application/x-javascript\">\n"
206 " <![CDATA[\n"
207 "</script>";
208
209 RetainPtr<CFX_SeekableStreamProxy> stream =
210 pdfium::MakeRetain<CFX_SeekableStreamProxy>(
211 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
212 stream->SetCodePage(FX_CODEPAGE_UTF8);
213
214 CFX_XMLSyntaxParser parser(stream);
215 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
216 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
217 ASSERT_EQ(L"script", parser.GetTagName());
218
219 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
220 ASSERT_EQ(L"contentType", parser.GetAttributeName());
221 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
222 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
223
224 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
225 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
226 ASSERT_EQ(L"\n ", parser.GetTextData());
227
228 // Parser walks to end of input.
229
230 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
231 }
232
TEST(CFX_XMLSyntaxParserTest,EmptyCData)233 TEST(CFX_XMLSyntaxParserTest, EmptyCData) {
234 const char* input =
235 "<script contentType=\"application/x-javascript\">\n"
236 " <![CDATA[]]>\n"
237 "</script>";
238
239 RetainPtr<CFX_SeekableStreamProxy> stream =
240 pdfium::MakeRetain<CFX_SeekableStreamProxy>(
241 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
242 stream->SetCodePage(FX_CODEPAGE_UTF8);
243
244 CFX_XMLSyntaxParser parser(stream);
245 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
246 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
247 ASSERT_EQ(L"script", parser.GetTagName());
248
249 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
250 ASSERT_EQ(L"contentType", parser.GetAttributeName());
251 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
252 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
253
254 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
255 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
256 ASSERT_EQ(L"\n ", parser.GetTextData());
257
258 ASSERT_EQ(FX_XmlSyntaxResult::CData, parser.DoSyntaxParse());
259 ASSERT_EQ(L"", parser.GetTextData());
260
261 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
262 ASSERT_EQ(L"\n", parser.GetTextData());
263
264 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
265 ASSERT_EQ(L"script", parser.GetTagName());
266
267 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
268 }
269
TEST(CFX_XMLSyntaxParserTest,Comment)270 TEST(CFX_XMLSyntaxParserTest, Comment) {
271 const char* input =
272 "<script contentType=\"application/x-javascript\">\n"
273 " <!-- A Comment -->\n"
274 "</script>";
275
276 RetainPtr<CFX_SeekableStreamProxy> stream =
277 pdfium::MakeRetain<CFX_SeekableStreamProxy>(
278 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
279 stream->SetCodePage(FX_CODEPAGE_UTF8);
280
281 CFX_XMLSyntaxParser parser(stream);
282 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
283 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
284 ASSERT_EQ(L"script", parser.GetTagName());
285
286 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
287 ASSERT_EQ(L"contentType", parser.GetAttributeName());
288 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
289 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
290
291 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
292 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
293 ASSERT_EQ(L"\n ", parser.GetTextData());
294
295 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
296 ASSERT_EQ(L"\n", parser.GetTextData());
297
298 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
299 ASSERT_EQ(L"script", parser.GetTagName());
300
301 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
302 }
303
TEST(CFX_XMLSyntaxParserTest,IncorrectCommentStart)304 TEST(CFX_XMLSyntaxParserTest, IncorrectCommentStart) {
305 const char* input =
306 "<script contentType=\"application/x-javascript\">\n"
307 " <!- A Comment -->\n"
308 "</script>";
309
310 RetainPtr<CFX_SeekableStreamProxy> stream =
311 pdfium::MakeRetain<CFX_SeekableStreamProxy>(
312 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
313 stream->SetCodePage(FX_CODEPAGE_UTF8);
314
315 CFX_XMLSyntaxParser parser(stream);
316 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
317 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
318 ASSERT_EQ(L"script", parser.GetTagName());
319
320 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
321 ASSERT_EQ(L"contentType", parser.GetAttributeName());
322 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
323 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
324
325 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
326 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
327 ASSERT_EQ(L"\n ", parser.GetTextData());
328
329 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
330 ASSERT_EQ(L"\n", parser.GetTextData());
331
332 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
333 ASSERT_EQ(L"script", parser.GetTagName());
334
335 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
336 }
337
TEST(CFX_XMLSyntaxParserTest,CommentEmpty)338 TEST(CFX_XMLSyntaxParserTest, CommentEmpty) {
339 const char* input =
340 "<script contentType=\"application/x-javascript\">\n"
341 " <!---->\n"
342 "</script>";
343
344 RetainPtr<CFX_SeekableStreamProxy> stream =
345 pdfium::MakeRetain<CFX_SeekableStreamProxy>(
346 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
347 stream->SetCodePage(FX_CODEPAGE_UTF8);
348
349 CFX_XMLSyntaxParser parser(stream);
350 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
351 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
352 ASSERT_EQ(L"script", parser.GetTagName());
353
354 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
355 ASSERT_EQ(L"contentType", parser.GetAttributeName());
356 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
357 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
358
359 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
360 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
361 ASSERT_EQ(L"\n ", parser.GetTextData());
362
363 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
364 ASSERT_EQ(L"\n", parser.GetTextData());
365
366 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
367 ASSERT_EQ(L"script", parser.GetTagName());
368
369 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
370 }
371
TEST(CFX_XMLSyntaxParserTest,CommentThreeDash)372 TEST(CFX_XMLSyntaxParserTest, CommentThreeDash) {
373 const char* input =
374 "<script contentType=\"application/x-javascript\">\n"
375 " <!--->\n"
376 "</script>";
377
378 RetainPtr<CFX_SeekableStreamProxy> stream =
379 pdfium::MakeRetain<CFX_SeekableStreamProxy>(
380 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
381 stream->SetCodePage(FX_CODEPAGE_UTF8);
382
383 CFX_XMLSyntaxParser parser(stream);
384 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
385 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
386 ASSERT_EQ(L"script", parser.GetTagName());
387
388 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
389 ASSERT_EQ(L"contentType", parser.GetAttributeName());
390 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
391 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
392
393 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
394 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
395 ASSERT_EQ(L"\n ", parser.GetTextData());
396
397 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
398 }
399
TEST(CFX_XMLSyntaxParserTest,CommentTwoDash)400 TEST(CFX_XMLSyntaxParserTest, CommentTwoDash) {
401 const char* input =
402 "<script contentType=\"application/x-javascript\">\n"
403 " <!-->\n"
404 "</script>";
405
406 RetainPtr<CFX_SeekableStreamProxy> stream =
407 pdfium::MakeRetain<CFX_SeekableStreamProxy>(
408 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
409 stream->SetCodePage(FX_CODEPAGE_UTF8);
410
411 CFX_XMLSyntaxParser parser(stream);
412 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
413 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
414 ASSERT_EQ(L"script", parser.GetTagName());
415
416 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
417 ASSERT_EQ(L"contentType", parser.GetAttributeName());
418 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
419 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
420
421 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
422 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
423 ASSERT_EQ(L"\n ", parser.GetTextData());
424
425 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
426 }
427
TEST(CFX_XMLSyntaxParserTest,Entities)428 TEST(CFX_XMLSyntaxParserTest, Entities) {
429 const char* input =
430 "<script contentType=\"application/x-javascript\">"
431 "B"
432 "T"
433 "H"
434 "ꭈ"
435 "�"
436 "</script>";
437
438 RetainPtr<CFX_SeekableStreamProxy> stream =
439 pdfium::MakeRetain<CFX_SeekableStreamProxy>(
440 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
441 stream->SetCodePage(FX_CODEPAGE_UTF8);
442
443 CFX_XMLSyntaxParser parser(stream);
444 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
445 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
446 ASSERT_EQ(L"script", parser.GetTagName());
447
448 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
449 ASSERT_EQ(L"contentType", parser.GetAttributeName());
450 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
451 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
452
453 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
454 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
455 ASSERT_EQ(L"BTH\xab48", parser.GetTextData());
456
457 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
458 ASSERT_EQ(L"script", parser.GetTagName());
459
460 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
461 }
462
TEST(CFX_XMLSyntaxParserTest,EntityOverflowHex)463 TEST(CFX_XMLSyntaxParserTest, EntityOverflowHex) {
464 const char* input =
465 "<script contentType=\"application/x-javascript\">"
466 "�"
467 "�"
468 "</script>";
469
470 RetainPtr<CFX_SeekableStreamProxy> stream =
471 pdfium::MakeRetain<CFX_SeekableStreamProxy>(
472 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
473 stream->SetCodePage(FX_CODEPAGE_UTF8);
474
475 CFX_XMLSyntaxParser parser(stream);
476 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
477 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
478 ASSERT_EQ(L"script", parser.GetTagName());
479
480 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
481 ASSERT_EQ(L"contentType", parser.GetAttributeName());
482 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
483 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
484
485 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
486 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
487 ASSERT_EQ(L" ", parser.GetTextData());
488
489 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
490 ASSERT_EQ(L"script", parser.GetTagName());
491
492 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
493 }
494
TEST(CFX_XMLSyntaxParserTest,EntityOverflowDecimal)495 TEST(CFX_XMLSyntaxParserTest, EntityOverflowDecimal) {
496 const char* input =
497 "<script contentType=\"application/x-javascript\">"
498 "�"
499 "�"
500 "</script>";
501
502 RetainPtr<CFX_SeekableStreamProxy> stream =
503 pdfium::MakeRetain<CFX_SeekableStreamProxy>(
504 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
505 stream->SetCodePage(FX_CODEPAGE_UTF8);
506
507 CFX_XMLSyntaxParser parser(stream);
508 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
509 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
510 ASSERT_EQ(L"script", parser.GetTagName());
511
512 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
513 ASSERT_EQ(L"contentType", parser.GetAttributeName());
514 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
515 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
516
517 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
518 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
519 ASSERT_EQ(L" ", parser.GetTextData());
520
521 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
522 ASSERT_EQ(L"script", parser.GetTagName());
523
524 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
525 }
526
TEST(CFX_XMLSyntaxParserTest,IsXMLNameChar)527 TEST(CFX_XMLSyntaxParserTest, IsXMLNameChar) {
528 EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(L'-', true));
529 EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(L'-', false));
530
531 EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(0x2069, true));
532 EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0x2070, true));
533 EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0x2073, true));
534 EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0x218F, true));
535 EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(0x2190, true));
536
537 EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFDEF, true));
538 EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFDF0, true));
539 EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFDF1, true));
540 EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFFFD, true));
541 EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFFFE, true));
542 }
543