1 // Copyright 2017 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fxcrt/xml/cfx_xmlsyntaxparser.h"
8
9 #include <algorithm>
10 #include <cwctype>
11 #include <iterator>
12
13 #include "core/fxcrt/fx_extension.h"
14 #include "core/fxcrt/fx_safe_types.h"
15
16 namespace {
17
18 const uint32_t kMaxCharRange = 0x10ffff;
19
IsXMLWhiteSpace(wchar_t ch)20 bool IsXMLWhiteSpace(wchar_t ch) {
21 return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09;
22 }
23
24 struct FX_XMLNAMECHAR {
25 uint16_t wStart;
26 uint16_t wEnd;
27 bool bStartChar;
28 };
29
30 const FX_XMLNAMECHAR g_XMLNameChars[] = {
31 {L'-', L'.', false}, {L'0', L'9', false}, {L':', L':', false},
32 {L'A', L'Z', true}, {L'_', L'_', true}, {L'a', L'z', true},
33 {0xB7, 0xB7, false}, {0xC0, 0xD6, true}, {0xD8, 0xF6, true},
34 {0xF8, 0x02FF, true}, {0x0300, 0x036F, false}, {0x0370, 0x037D, true},
35 {0x037F, 0x1FFF, true}, {0x200C, 0x200D, true}, {0x203F, 0x2040, false},
36 {0x2070, 0x218F, true}, {0x2C00, 0x2FEF, true}, {0x3001, 0xD7FF, true},
37 {0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true},
38 };
39
40
GetUTF8EncodeLength(const std::vector<wchar_t> & src,FX_FILESIZE iSrcLen)41 int32_t GetUTF8EncodeLength(const std::vector<wchar_t>& src,
42 FX_FILESIZE iSrcLen) {
43 uint32_t unicode = 0;
44 int32_t iDstNum = 0;
45 const wchar_t* pSrc = src.data();
46 while (iSrcLen-- > 0) {
47 unicode = *pSrc++;
48 int nbytes = 0;
49 if ((uint32_t)unicode < 0x80) {
50 nbytes = 1;
51 } else if ((uint32_t)unicode < 0x800) {
52 nbytes = 2;
53 } else if ((uint32_t)unicode < 0x10000) {
54 nbytes = 3;
55 } else if ((uint32_t)unicode < 0x200000) {
56 nbytes = 4;
57 } else if ((uint32_t)unicode < 0x4000000) {
58 nbytes = 5;
59 } else {
60 nbytes = 6;
61 }
62 iDstNum += nbytes;
63 }
64 return iDstNum;
65 }
66
67 } // namespace
68
69 // static
IsXMLNameChar(wchar_t ch,bool bFirstChar)70 bool CFX_XMLSyntaxParser::IsXMLNameChar(wchar_t ch, bool bFirstChar) {
71 auto* it = std::lower_bound(
72 std::begin(g_XMLNameChars), std::end(g_XMLNameChars), ch,
73 [](const FX_XMLNAMECHAR& arg, wchar_t ch) { return arg.wEnd < ch; });
74 return it != std::end(g_XMLNameChars) && ch >= it->wStart &&
75 (!bFirstChar || it->bStartChar);
76 }
77
CFX_XMLSyntaxParser(const RetainPtr<CFX_SeekableStreamProxy> & pStream)78 CFX_XMLSyntaxParser::CFX_XMLSyntaxParser(
79 const RetainPtr<CFX_SeekableStreamProxy>& pStream)
80 : m_pStream(pStream),
81 m_iXMLPlaneSize(32 * 1024),
82 m_iCurrentPos(0),
83 m_iCurrentNodeNum(-1),
84 m_iLastNodeNum(-1),
85 m_iParsedBytes(0),
86 m_ParsedChars(0),
87 m_iBufferChars(0),
88 m_bEOS(false),
89 m_Start(0),
90 m_End(0),
91 m_iAllocStep(m_BlockBuffer.GetAllocStep()),
92 m_pCurrentBlock(nullptr),
93 m_iIndexInBlock(0),
94 m_iTextDataLength(0),
95 m_syntaxParserResult(FX_XmlSyntaxResult::None),
96 m_syntaxParserState(FDE_XmlSyntaxState::Text),
97 m_wQuotationMark(0),
98 m_iEntityStart(-1) {
99 ASSERT(pStream);
100
101 m_CurNode.iNodeNum = -1;
102 m_CurNode.eNodeType = FX_XMLNODE_Unknown;
103
104 m_iXMLPlaneSize =
105 std::min(m_iXMLPlaneSize,
106 pdfium::base::checked_cast<size_t>(m_pStream->GetLength()));
107 m_iCurrentPos = m_pStream->GetBOMLength();
108
109 FX_SAFE_SIZE_T alloc_size_safe = m_iXMLPlaneSize;
110 alloc_size_safe += 1; // For NUL.
111 if (!alloc_size_safe.IsValid() || alloc_size_safe.ValueOrDie() <= 0) {
112 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
113 return;
114 }
115
116 m_Buffer.resize(pdfium::base::ValueOrDieForType<size_t>(alloc_size_safe));
117
118 m_BlockBuffer.InitBuffer();
119 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
120 m_BlockBuffer.GetAvailableBlock();
121 }
122
~CFX_XMLSyntaxParser()123 CFX_XMLSyntaxParser::~CFX_XMLSyntaxParser() {}
124
DoSyntaxParse()125 FX_XmlSyntaxResult CFX_XMLSyntaxParser::DoSyntaxParse() {
126 if (m_syntaxParserResult == FX_XmlSyntaxResult::Error ||
127 m_syntaxParserResult == FX_XmlSyntaxResult::EndOfString) {
128 return m_syntaxParserResult;
129 }
130
131 FX_FILESIZE iStreamLength = m_pStream->GetLength();
132 FX_FILESIZE iPos;
133
134 FX_XmlSyntaxResult syntaxParserResult = FX_XmlSyntaxResult::None;
135 while (true) {
136 if (m_Start >= m_End) {
137 if (m_bEOS || m_iCurrentPos >= iStreamLength) {
138 m_syntaxParserResult = FX_XmlSyntaxResult::EndOfString;
139 return m_syntaxParserResult;
140 }
141 m_ParsedChars += m_End;
142 m_iParsedBytes = m_iCurrentPos;
143 if (m_pStream->GetPosition() != m_iCurrentPos)
144 m_pStream->Seek(CFX_SeekableStreamProxy::From::Begin, m_iCurrentPos);
145
146 m_iBufferChars =
147 m_pStream->ReadString(m_Buffer.data(), m_iXMLPlaneSize, &m_bEOS);
148 iPos = m_pStream->GetPosition();
149 if (m_iBufferChars < 1) {
150 m_iCurrentPos = iStreamLength;
151 m_syntaxParserResult = FX_XmlSyntaxResult::EndOfString;
152 return m_syntaxParserResult;
153 }
154 m_iCurrentPos = iPos;
155 m_Start = 0;
156 m_End = m_iBufferChars;
157 }
158
159 while (m_Start < m_End) {
160 wchar_t ch = m_Buffer[m_Start];
161 switch (m_syntaxParserState) {
162 case FDE_XmlSyntaxState::Text:
163 if (ch == L'<') {
164 if (!m_BlockBuffer.IsEmpty()) {
165 m_iTextDataLength = m_BlockBuffer.GetDataLength();
166 m_BlockBuffer.Reset(true);
167 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
168 m_BlockBuffer.GetAvailableBlock();
169 m_iEntityStart = -1;
170 syntaxParserResult = FX_XmlSyntaxResult::Text;
171 } else {
172 m_Start++;
173 m_syntaxParserState = FDE_XmlSyntaxState::Node;
174 }
175 } else {
176 ParseTextChar(ch);
177 }
178 break;
179 case FDE_XmlSyntaxState::Node:
180 if (ch == L'!') {
181 m_Start++;
182 m_syntaxParserState = FDE_XmlSyntaxState::SkipCommentOrDecl;
183 } else if (ch == L'/') {
184 m_Start++;
185 m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
186 } else if (ch == L'?') {
187 m_iLastNodeNum++;
188 m_iCurrentNodeNum = m_iLastNodeNum;
189 m_CurNode.iNodeNum = m_iLastNodeNum;
190 m_CurNode.eNodeType = FX_XMLNODE_Instruction;
191 m_XMLNodeStack.push(m_CurNode);
192 m_Start++;
193 m_syntaxParserState = FDE_XmlSyntaxState::Target;
194 syntaxParserResult = FX_XmlSyntaxResult::InstructionOpen;
195 } else {
196 m_iLastNodeNum++;
197 m_iCurrentNodeNum = m_iLastNodeNum;
198 m_CurNode.iNodeNum = m_iLastNodeNum;
199 m_CurNode.eNodeType = FX_XMLNODE_Element;
200 m_XMLNodeStack.push(m_CurNode);
201 m_syntaxParserState = FDE_XmlSyntaxState::Tag;
202 syntaxParserResult = FX_XmlSyntaxResult::ElementOpen;
203 }
204 break;
205 case FDE_XmlSyntaxState::Target:
206 case FDE_XmlSyntaxState::Tag:
207 if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
208 if (m_BlockBuffer.IsEmpty()) {
209 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
210 return m_syntaxParserResult;
211 }
212
213 m_iTextDataLength = m_BlockBuffer.GetDataLength();
214 m_BlockBuffer.Reset(true);
215 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
216 m_BlockBuffer.GetAvailableBlock();
217 if (m_syntaxParserState != FDE_XmlSyntaxState::Target)
218 syntaxParserResult = FX_XmlSyntaxResult::TagName;
219 else
220 syntaxParserResult = FX_XmlSyntaxResult::TargetName;
221
222 m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
223 } else {
224 if (m_iIndexInBlock == m_iAllocStep) {
225 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
226 m_BlockBuffer.GetAvailableBlock();
227 if (!m_pCurrentBlock) {
228 return FX_XmlSyntaxResult::Error;
229 }
230 }
231 m_pCurrentBlock[m_iIndexInBlock++] = ch;
232 m_BlockBuffer.IncrementDataLength();
233 m_Start++;
234 }
235 break;
236 case FDE_XmlSyntaxState::AttriName:
237 if (m_BlockBuffer.IsEmpty() && IsXMLWhiteSpace(ch)) {
238 m_Start++;
239 break;
240 }
241 if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
242 if (m_BlockBuffer.IsEmpty()) {
243 if (m_CurNode.eNodeType == FX_XMLNODE_Element) {
244 if (ch == L'>' || ch == L'/') {
245 m_syntaxParserState = FDE_XmlSyntaxState::BreakElement;
246 break;
247 }
248 } else if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) {
249 if (ch == L'?') {
250 m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
251 m_Start++;
252 } else {
253 m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
254 }
255 break;
256 }
257 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
258 return m_syntaxParserResult;
259 } else {
260 if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) {
261 if (ch != '=' && !IsXMLWhiteSpace(ch)) {
262 m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
263 break;
264 }
265 }
266 m_iTextDataLength = m_BlockBuffer.GetDataLength();
267 m_BlockBuffer.Reset(true);
268 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
269 m_BlockBuffer.GetAvailableBlock();
270 m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign;
271 syntaxParserResult = FX_XmlSyntaxResult::AttriName;
272 }
273 } else {
274 if (m_iIndexInBlock == m_iAllocStep) {
275 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
276 m_BlockBuffer.GetAvailableBlock();
277 if (!m_pCurrentBlock) {
278 return FX_XmlSyntaxResult::Error;
279 }
280 }
281 m_pCurrentBlock[m_iIndexInBlock++] = ch;
282 m_BlockBuffer.IncrementDataLength();
283 m_Start++;
284 }
285 break;
286 case FDE_XmlSyntaxState::AttriEqualSign:
287 if (IsXMLWhiteSpace(ch)) {
288 m_Start++;
289 break;
290 }
291 if (ch != L'=') {
292 if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) {
293 m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
294 break;
295 }
296 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
297 return m_syntaxParserResult;
298 } else {
299 m_syntaxParserState = FDE_XmlSyntaxState::AttriQuotation;
300 m_Start++;
301 }
302 break;
303 case FDE_XmlSyntaxState::AttriQuotation:
304 if (IsXMLWhiteSpace(ch)) {
305 m_Start++;
306 break;
307 }
308 if (ch != L'\"' && ch != L'\'') {
309 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
310 return m_syntaxParserResult;
311 } else {
312 m_wQuotationMark = ch;
313 m_syntaxParserState = FDE_XmlSyntaxState::AttriValue;
314 m_Start++;
315 }
316 break;
317 case FDE_XmlSyntaxState::AttriValue:
318 if (ch == m_wQuotationMark) {
319 if (m_iEntityStart > -1) {
320 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
321 return m_syntaxParserResult;
322 }
323 m_iTextDataLength = m_BlockBuffer.GetDataLength();
324 m_wQuotationMark = 0;
325 m_BlockBuffer.Reset(true);
326 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
327 m_BlockBuffer.GetAvailableBlock();
328 m_Start++;
329 m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
330 syntaxParserResult = FX_XmlSyntaxResult::AttriValue;
331 } else {
332 ParseTextChar(ch);
333 }
334 break;
335 case FDE_XmlSyntaxState::CloseInstruction:
336 if (ch != L'>') {
337 if (m_iIndexInBlock == m_iAllocStep) {
338 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
339 m_BlockBuffer.GetAvailableBlock();
340 if (!m_pCurrentBlock) {
341 return FX_XmlSyntaxResult::Error;
342 }
343 }
344 m_pCurrentBlock[m_iIndexInBlock++] = ch;
345 m_BlockBuffer.IncrementDataLength();
346 m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
347 } else if (!m_BlockBuffer.IsEmpty()) {
348 m_iTextDataLength = m_BlockBuffer.GetDataLength();
349 m_BlockBuffer.Reset(true);
350 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
351 m_BlockBuffer.GetAvailableBlock();
352 syntaxParserResult = FX_XmlSyntaxResult::TargetData;
353 } else {
354 m_Start++;
355 if (m_XMLNodeStack.empty()) {
356 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
357 return m_syntaxParserResult;
358 }
359 m_XMLNodeStack.pop();
360 if (!m_XMLNodeStack.empty()) {
361 m_CurNode = m_XMLNodeStack.top();
362 } else {
363 m_CurNode.iNodeNum = -1;
364 m_CurNode.eNodeType = FX_XMLNODE_Unknown;
365 }
366 m_iCurrentNodeNum = m_CurNode.iNodeNum;
367 m_BlockBuffer.Reset(true);
368 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
369 m_BlockBuffer.GetAvailableBlock();
370 m_syntaxParserState = FDE_XmlSyntaxState::Text;
371 syntaxParserResult = FX_XmlSyntaxResult::InstructionClose;
372 }
373 break;
374 case FDE_XmlSyntaxState::BreakElement:
375 if (ch == L'>') {
376 m_syntaxParserState = FDE_XmlSyntaxState::Text;
377 syntaxParserResult = FX_XmlSyntaxResult::ElementBreak;
378 } else if (ch == L'/') {
379 m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
380 } else {
381 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
382 return m_syntaxParserResult;
383 }
384 m_Start++;
385 break;
386 case FDE_XmlSyntaxState::CloseElement:
387 if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
388 if (ch == L'>') {
389 if (m_XMLNodeStack.empty()) {
390 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
391 return m_syntaxParserResult;
392 }
393 m_XMLNodeStack.pop();
394 if (!m_XMLNodeStack.empty()) {
395 m_CurNode = m_XMLNodeStack.top();
396 } else {
397 m_CurNode.iNodeNum = -1;
398 m_CurNode.eNodeType = FX_XMLNODE_Unknown;
399 }
400 m_iCurrentNodeNum = m_CurNode.iNodeNum;
401 m_iTextDataLength = m_BlockBuffer.GetDataLength();
402 m_BlockBuffer.Reset(true);
403 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
404 m_BlockBuffer.GetAvailableBlock();
405 m_syntaxParserState = FDE_XmlSyntaxState::Text;
406 syntaxParserResult = FX_XmlSyntaxResult::ElementClose;
407 } else if (!IsXMLWhiteSpace(ch)) {
408 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
409 return m_syntaxParserResult;
410 }
411 } else {
412 if (m_iIndexInBlock == m_iAllocStep) {
413 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
414 m_BlockBuffer.GetAvailableBlock();
415 if (!m_pCurrentBlock) {
416 return FX_XmlSyntaxResult::Error;
417 }
418 }
419 m_pCurrentBlock[m_iIndexInBlock++] = ch;
420 m_BlockBuffer.IncrementDataLength();
421 }
422 m_Start++;
423 break;
424 case FDE_XmlSyntaxState::SkipCommentOrDecl:
425 if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"--", 2) == 0) {
426 m_Start += 2;
427 m_syntaxParserState = FDE_XmlSyntaxState::SkipComment;
428 } else if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"[CDATA[", 7) ==
429 0) {
430 m_Start += 7;
431 m_syntaxParserState = FDE_XmlSyntaxState::SkipCData;
432 } else {
433 m_syntaxParserState = FDE_XmlSyntaxState::SkipDeclNode;
434 m_SkipChar = L'>';
435 m_SkipStack.push(L'>');
436 }
437 break;
438 case FDE_XmlSyntaxState::SkipCData: {
439 if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"]]>", 3) == 0) {
440 m_Start += 3;
441 syntaxParserResult = FX_XmlSyntaxResult::CData;
442 m_iTextDataLength = m_BlockBuffer.GetDataLength();
443 m_BlockBuffer.Reset(true);
444 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
445 m_BlockBuffer.GetAvailableBlock();
446 m_syntaxParserState = FDE_XmlSyntaxState::Text;
447 } else {
448 if (m_iIndexInBlock == m_iAllocStep) {
449 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
450 m_BlockBuffer.GetAvailableBlock();
451 if (!m_pCurrentBlock)
452 return FX_XmlSyntaxResult::Error;
453 }
454 m_pCurrentBlock[m_iIndexInBlock++] = ch;
455 m_BlockBuffer.IncrementDataLength();
456 m_Start++;
457 }
458 break;
459 }
460 case FDE_XmlSyntaxState::SkipDeclNode:
461 if (m_SkipChar == L'\'' || m_SkipChar == L'\"') {
462 m_Start++;
463 if (ch != m_SkipChar)
464 break;
465
466 m_SkipStack.pop();
467 if (m_SkipStack.empty())
468 m_syntaxParserState = FDE_XmlSyntaxState::Text;
469 else
470 m_SkipChar = m_SkipStack.top();
471 } else {
472 switch (ch) {
473 case L'<':
474 m_SkipChar = L'>';
475 m_SkipStack.push(L'>');
476 break;
477 case L'[':
478 m_SkipChar = L']';
479 m_SkipStack.push(L']');
480 break;
481 case L'(':
482 m_SkipChar = L')';
483 m_SkipStack.push(L')');
484 break;
485 case L'\'':
486 m_SkipChar = L'\'';
487 m_SkipStack.push(L'\'');
488 break;
489 case L'\"':
490 m_SkipChar = L'\"';
491 m_SkipStack.push(L'\"');
492 break;
493 default:
494 if (ch == m_SkipChar) {
495 m_SkipStack.pop();
496 if (m_SkipStack.empty()) {
497 if (m_BlockBuffer.GetDataLength() >= 9)
498 (void)m_BlockBuffer.GetTextData(0, 7);
499
500 m_iTextDataLength = m_BlockBuffer.GetDataLength();
501 m_BlockBuffer.Reset(true);
502 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
503 m_BlockBuffer.GetAvailableBlock();
504 m_syntaxParserState = FDE_XmlSyntaxState::Text;
505 } else {
506 m_SkipChar = m_SkipStack.top();
507 }
508 }
509 break;
510 }
511 if (!m_SkipStack.empty()) {
512 if (m_iIndexInBlock == m_iAllocStep) {
513 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
514 m_BlockBuffer.GetAvailableBlock();
515 if (!m_pCurrentBlock) {
516 return FX_XmlSyntaxResult::Error;
517 }
518 }
519 m_pCurrentBlock[m_iIndexInBlock++] = ch;
520 m_BlockBuffer.IncrementDataLength();
521 }
522 m_Start++;
523 }
524 break;
525 case FDE_XmlSyntaxState::SkipComment:
526 if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"-->", 3) == 0) {
527 m_Start += 2;
528 m_syntaxParserState = FDE_XmlSyntaxState::Text;
529 }
530
531 m_Start++;
532 break;
533 case FDE_XmlSyntaxState::TargetData:
534 if (IsXMLWhiteSpace(ch)) {
535 if (m_BlockBuffer.IsEmpty()) {
536 m_Start++;
537 break;
538 }
539 if (m_wQuotationMark == 0) {
540 m_iTextDataLength = m_BlockBuffer.GetDataLength();
541 m_wQuotationMark = 0;
542 m_BlockBuffer.Reset(true);
543 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
544 m_BlockBuffer.GetAvailableBlock();
545 m_Start++;
546 syntaxParserResult = FX_XmlSyntaxResult::TargetData;
547 break;
548 }
549 }
550 if (ch == '?') {
551 m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
552 m_Start++;
553 } else if (ch == '\"') {
554 if (m_wQuotationMark == 0) {
555 m_wQuotationMark = ch;
556 m_Start++;
557 } else if (ch == m_wQuotationMark) {
558 m_iTextDataLength = m_BlockBuffer.GetDataLength();
559 m_wQuotationMark = 0;
560 m_BlockBuffer.Reset(true);
561 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
562 m_BlockBuffer.GetAvailableBlock();
563 m_Start++;
564 syntaxParserResult = FX_XmlSyntaxResult::TargetData;
565 } else {
566 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
567 return m_syntaxParserResult;
568 }
569 } else {
570 if (m_iIndexInBlock == m_iAllocStep) {
571 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
572 m_BlockBuffer.GetAvailableBlock();
573 if (!m_pCurrentBlock) {
574 return FX_XmlSyntaxResult::Error;
575 }
576 }
577 m_pCurrentBlock[m_iIndexInBlock++] = ch;
578 m_BlockBuffer.IncrementDataLength();
579 m_Start++;
580 }
581 break;
582 default:
583 break;
584 }
585 if (syntaxParserResult != FX_XmlSyntaxResult::None)
586 return syntaxParserResult;
587 }
588 }
589 return FX_XmlSyntaxResult::Text;
590 }
591
GetStatus() const592 int32_t CFX_XMLSyntaxParser::GetStatus() const {
593 if (!m_pStream)
594 return -1;
595
596 int32_t iStreamLength = m_pStream->GetLength();
597 if (iStreamLength < 1)
598 return 100;
599
600 if (m_syntaxParserResult == FX_XmlSyntaxResult::Error)
601 return -1;
602
603 if (m_syntaxParserResult == FX_XmlSyntaxResult::EndOfString)
604 return 100;
605 return m_iParsedBytes * 100 / iStreamLength;
606 }
607
GetCurrentBinaryPos() const608 FX_FILESIZE CFX_XMLSyntaxParser::GetCurrentBinaryPos() const {
609 if (!m_pStream)
610 return 0;
611
612 int32_t nDstLen = GetUTF8EncodeLength(m_Buffer, m_Start);
613 return m_iParsedBytes + nDstLen;
614 }
615
ParseTextChar(wchar_t character)616 void CFX_XMLSyntaxParser::ParseTextChar(wchar_t character) {
617 if (m_iIndexInBlock == m_iAllocStep) {
618 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
619 m_BlockBuffer.GetAvailableBlock();
620 if (!m_pCurrentBlock)
621 return;
622 }
623
624 m_pCurrentBlock[m_iIndexInBlock++] = character;
625 m_BlockBuffer.IncrementDataLength();
626 if (m_iEntityStart > -1 && character == L';') {
627 WideString csEntity = m_BlockBuffer.GetTextData(
628 m_iEntityStart + 1,
629 m_BlockBuffer.GetDataLength() - 1 - m_iEntityStart - 1);
630 int32_t iLen = csEntity.GetLength();
631 if (iLen > 0) {
632 if (csEntity[0] == L'#') {
633 uint32_t ch = 0;
634 wchar_t w;
635 if (iLen > 1 && csEntity[1] == L'x') {
636 for (int32_t i = 2; i < iLen; i++) {
637 w = csEntity[i];
638 if (std::iswdigit(w))
639 ch = (ch << 4) + w - L'0';
640 else if (w >= L'A' && w <= L'F')
641 ch = (ch << 4) + w - 55;
642 else if (w >= L'a' && w <= L'f')
643 ch = (ch << 4) + w - 87;
644 else
645 break;
646 }
647 } else {
648 for (int32_t i = 1; i < iLen; i++) {
649 w = csEntity[i];
650 if (!std::iswdigit(w))
651 break;
652 ch = ch * 10 + w - L'0';
653 }
654 }
655 if (ch > kMaxCharRange)
656 ch = ' ';
657
658 character = static_cast<wchar_t>(ch);
659 if (character != 0) {
660 m_BlockBuffer.SetTextChar(m_iEntityStart, character);
661 m_iEntityStart++;
662 }
663 } else {
664 if (csEntity.Compare(L"amp") == 0) {
665 m_BlockBuffer.SetTextChar(m_iEntityStart, L'&');
666 m_iEntityStart++;
667 } else if (csEntity.Compare(L"lt") == 0) {
668 m_BlockBuffer.SetTextChar(m_iEntityStart, L'<');
669 m_iEntityStart++;
670 } else if (csEntity.Compare(L"gt") == 0) {
671 m_BlockBuffer.SetTextChar(m_iEntityStart, L'>');
672 m_iEntityStart++;
673 } else if (csEntity.Compare(L"apos") == 0) {
674 m_BlockBuffer.SetTextChar(m_iEntityStart, L'\'');
675 m_iEntityStart++;
676 } else if (csEntity.Compare(L"quot") == 0) {
677 m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"');
678 m_iEntityStart++;
679 }
680 }
681 }
682 if (m_iEntityStart >= 0 &&
683 m_BlockBuffer.GetDataLength() > static_cast<size_t>(m_iEntityStart)) {
684 m_BlockBuffer.DeleteTextChars(m_BlockBuffer.GetDataLength() -
685 m_iEntityStart);
686 }
687 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
688 m_BlockBuffer.GetAvailableBlock();
689 m_iEntityStart = -1;
690 } else if (m_iEntityStart < 0 && character == L'&') {
691 m_iEntityStart = m_BlockBuffer.GetDataLength() - 1;
692 }
693 m_Start++;
694 }
695