1 // Copyright 2017 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "xfa/fxfa/cxfa_textparser.h"
8
9 #include <algorithm>
10 #include <utility>
11 #include <vector>
12
13 #include "core/fxcrt/css/cfx_css.h"
14 #include "core/fxcrt/css/cfx_csscomputedstyle.h"
15 #include "core/fxcrt/css/cfx_cssstyleselector.h"
16 #include "core/fxcrt/css/cfx_cssstylesheet.h"
17 #include "core/fxcrt/fx_codepage.h"
18 #include "core/fxcrt/xml/cfx_xmlelement.h"
19 #include "core/fxcrt/xml/cfx_xmlnode.h"
20 #include "core/fxge/fx_font.h"
21 #include "third_party/base/ptr_util.h"
22 #include "xfa/fgas/font/cfgas_fontmgr.h"
23 #include "xfa/fgas/font/cfgas_gefont.h"
24 #include "xfa/fxfa/cxfa_ffapp.h"
25 #include "xfa/fxfa/cxfa_ffdoc.h"
26 #include "xfa/fxfa/cxfa_fontmgr.h"
27 #include "xfa/fxfa/cxfa_textparsecontext.h"
28 #include "xfa/fxfa/cxfa_textprovider.h"
29 #include "xfa/fxfa/cxfa_texttabstopscontext.h"
30 #include "xfa/fxfa/parser/cxfa_font.h"
31 #include "xfa/fxfa/parser/cxfa_measurement.h"
32 #include "xfa/fxfa/parser/cxfa_para.h"
33
34 namespace {
35
36 enum class TabStopStatus {
37 Error,
38 EOS,
39 None,
40 Alignment,
41 StartLeader,
42 Leader,
43 Location,
44 };
45
GetLowerCaseElementAttributeOrDefault(const CFX_XMLElement * pElement,const WideString & wsName,const WideString & wsDefaultValue)46 WideString GetLowerCaseElementAttributeOrDefault(
47 const CFX_XMLElement* pElement,
48 const WideString& wsName,
49 const WideString& wsDefaultValue) {
50 WideString ws = pElement->GetAttribute(wsName);
51 if (ws.IsEmpty())
52 ws = wsDefaultValue;
53 else
54 ws.MakeLower();
55 return ws;
56 }
57
58 } // namespace
59
CXFA_TextParser()60 CXFA_TextParser::CXFA_TextParser()
61 : m_bParsed(false), m_cssInitialized(false) {}
62
~CXFA_TextParser()63 CXFA_TextParser::~CXFA_TextParser() {}
64
Reset()65 void CXFA_TextParser::Reset() {
66 m_mapXMLNodeToParseContext.clear();
67 m_bParsed = false;
68 }
69
InitCSSData(CXFA_TextProvider * pTextProvider)70 void CXFA_TextParser::InitCSSData(CXFA_TextProvider* pTextProvider) {
71 if (!pTextProvider)
72 return;
73
74 if (!m_pSelector) {
75 m_pSelector = pdfium::MakeUnique<CFX_CSSStyleSelector>();
76
77 CXFA_Font* font = pTextProvider->GetFontIfExists();
78 m_pSelector->SetDefFontSize(font ? font->GetFontSize() : 10.0f);
79 }
80
81 if (m_cssInitialized)
82 return;
83
84 m_cssInitialized = true;
85 auto uaSheet = LoadDefaultSheetStyle();
86 m_pSelector->SetUAStyleSheet(std::move(uaSheet));
87 m_pSelector->UpdateStyleIndex();
88 }
89
LoadDefaultSheetStyle()90 std::unique_ptr<CFX_CSSStyleSheet> CXFA_TextParser::LoadDefaultSheetStyle() {
91 static const char kStyle[] =
92 "html,body,ol,p,ul{display:block}"
93 "li{display:list-item}"
94 "ol,ul{padding-left:33px;margin:1.12em 0}"
95 "ol{list-style-type:decimal}"
96 "a{color:#0000ff;text-decoration:underline}"
97 "b{font-weight:bolder}"
98 "i{font-style:italic}"
99 "sup{vertical-align:+15em;font-size:.66em}"
100 "sub{vertical-align:-15em;font-size:.66em}";
101 WideString ws = WideString::FromASCII(kStyle);
102 auto sheet = pdfium::MakeUnique<CFX_CSSStyleSheet>();
103 if (!sheet->LoadBuffer(ws.c_str(), ws.GetLength()))
104 return nullptr;
105
106 return sheet;
107 }
108
CreateRootStyle(CXFA_TextProvider * pTextProvider)109 RetainPtr<CFX_CSSComputedStyle> CXFA_TextParser::CreateRootStyle(
110 CXFA_TextProvider* pTextProvider) {
111 CXFA_Para* para = pTextProvider->GetParaIfExists();
112 auto pStyle = m_pSelector->CreateComputedStyle(nullptr);
113 float fLineHeight = 0;
114 float fFontSize = 10;
115
116 if (para) {
117 fLineHeight = para->GetLineHeight();
118 CFX_CSSLength indent;
119 indent.Set(CFX_CSSLengthUnit::Point, para->GetTextIndent());
120 pStyle->SetTextIndent(indent);
121 CFX_CSSTextAlign hAlign = CFX_CSSTextAlign::Left;
122 switch (para->GetHorizontalAlign()) {
123 case XFA_AttributeValue::Center:
124 hAlign = CFX_CSSTextAlign::Center;
125 break;
126 case XFA_AttributeValue::Right:
127 hAlign = CFX_CSSTextAlign::Right;
128 break;
129 case XFA_AttributeValue::Justify:
130 hAlign = CFX_CSSTextAlign::Justify;
131 break;
132 case XFA_AttributeValue::JustifyAll:
133 hAlign = CFX_CSSTextAlign::JustifyAll;
134 break;
135 case XFA_AttributeValue::Left:
136 case XFA_AttributeValue::Radix:
137 break;
138 default:
139 NOTREACHED();
140 break;
141 }
142 pStyle->SetTextAlign(hAlign);
143 CFX_CSSRect rtMarginWidth;
144 rtMarginWidth.left.Set(CFX_CSSLengthUnit::Point, para->GetMarginLeft());
145 rtMarginWidth.top.Set(CFX_CSSLengthUnit::Point, para->GetSpaceAbove());
146 rtMarginWidth.right.Set(CFX_CSSLengthUnit::Point, para->GetMarginRight());
147 rtMarginWidth.bottom.Set(CFX_CSSLengthUnit::Point, para->GetSpaceBelow());
148 pStyle->SetMarginWidth(rtMarginWidth);
149 }
150
151 CXFA_Font* font = pTextProvider->GetFontIfExists();
152 if (font) {
153 pStyle->SetColor(font->GetColor());
154 pStyle->SetFontStyle(font->IsItalic() ? CFX_CSSFontStyle::Italic
155 : CFX_CSSFontStyle::Normal);
156 pStyle->SetFontWeight(font->IsBold() ? FXFONT_FW_BOLD : FXFONT_FW_NORMAL);
157 pStyle->SetNumberVerticalAlign(-font->GetBaselineShift());
158 fFontSize = font->GetFontSize();
159 CFX_CSSLength letterSpacing;
160 letterSpacing.Set(CFX_CSSLengthUnit::Point, font->GetLetterSpacing());
161 pStyle->SetLetterSpacing(letterSpacing);
162 uint32_t dwDecoration = 0;
163 if (font->GetLineThrough() > 0)
164 dwDecoration |= CFX_CSSTEXTDECORATION_LineThrough;
165 if (font->GetUnderline() > 1)
166 dwDecoration |= CFX_CSSTEXTDECORATION_Double;
167 else if (font->GetUnderline() > 0)
168 dwDecoration |= CFX_CSSTEXTDECORATION_Underline;
169
170 pStyle->SetTextDecoration(dwDecoration);
171 }
172 pStyle->SetLineHeight(fLineHeight);
173 pStyle->SetFontSize(fFontSize);
174 return pStyle;
175 }
176
CreateStyle(CFX_CSSComputedStyle * pParentStyle)177 RetainPtr<CFX_CSSComputedStyle> CXFA_TextParser::CreateStyle(
178 CFX_CSSComputedStyle* pParentStyle) {
179 auto pNewStyle = m_pSelector->CreateComputedStyle(pParentStyle);
180 ASSERT(pNewStyle);
181 if (!pParentStyle)
182 return pNewStyle;
183
184 uint32_t dwDecoration = pParentStyle->GetTextDecoration();
185 float fBaseLine = 0;
186 if (pParentStyle->GetVerticalAlign() == CFX_CSSVerticalAlign::Number)
187 fBaseLine = pParentStyle->GetNumberVerticalAlign();
188
189 pNewStyle->SetTextDecoration(dwDecoration);
190 pNewStyle->SetNumberVerticalAlign(fBaseLine);
191
192 const CFX_CSSRect* pRect = pParentStyle->GetMarginWidth();
193 if (pRect)
194 pNewStyle->SetMarginWidth(*pRect);
195 return pNewStyle;
196 }
197
ComputeStyle(const CFX_XMLNode * pXMLNode,CFX_CSSComputedStyle * pParentStyle)198 RetainPtr<CFX_CSSComputedStyle> CXFA_TextParser::ComputeStyle(
199 const CFX_XMLNode* pXMLNode,
200 CFX_CSSComputedStyle* pParentStyle) {
201 auto it = m_mapXMLNodeToParseContext.find(pXMLNode);
202 if (it == m_mapXMLNodeToParseContext.end())
203 return nullptr;
204
205 CXFA_TextParseContext* pContext = it->second.get();
206 if (!pContext)
207 return nullptr;
208
209 pContext->m_pParentStyle.Reset(pParentStyle);
210
211 auto tagProvider = ParseTagInfo(pXMLNode);
212 if (tagProvider->m_bContent)
213 return nullptr;
214
215 auto pStyle = CreateStyle(pParentStyle);
216 m_pSelector->ComputeStyle(pContext->GetDecls(),
217 tagProvider->GetAttribute(L"style"),
218 tagProvider->GetAttribute(L"align"), pStyle.Get());
219 return pStyle;
220 }
221
DoParse(const CFX_XMLNode * pXMLContainer,CXFA_TextProvider * pTextProvider)222 void CXFA_TextParser::DoParse(const CFX_XMLNode* pXMLContainer,
223 CXFA_TextProvider* pTextProvider) {
224 if (!pXMLContainer || !pTextProvider || m_bParsed)
225 return;
226
227 m_bParsed = true;
228 InitCSSData(pTextProvider);
229 auto pRootStyle = CreateRootStyle(pTextProvider);
230 ParseRichText(pXMLContainer, pRootStyle.Get());
231 }
232
ParseRichText(const CFX_XMLNode * pXMLNode,CFX_CSSComputedStyle * pParentStyle)233 void CXFA_TextParser::ParseRichText(const CFX_XMLNode* pXMLNode,
234 CFX_CSSComputedStyle* pParentStyle) {
235 if (!pXMLNode)
236 return;
237
238 auto tagProvider = ParseTagInfo(pXMLNode);
239 if (!tagProvider->m_bTagAvailable)
240 return;
241
242 RetainPtr<CFX_CSSComputedStyle> pNewStyle;
243 if (!(tagProvider->GetTagName().EqualsASCII("body") &&
244 tagProvider->GetTagName().EqualsASCII("html"))) {
245 auto pTextContext = pdfium::MakeUnique<CXFA_TextParseContext>();
246 CFX_CSSDisplay eDisplay = CFX_CSSDisplay::Inline;
247 if (!tagProvider->m_bContent) {
248 auto declArray =
249 m_pSelector->MatchDeclarations(tagProvider->GetTagName());
250 pNewStyle = CreateStyle(pParentStyle);
251 m_pSelector->ComputeStyle(declArray, tagProvider->GetAttribute(L"style"),
252 tagProvider->GetAttribute(L"align"),
253 pNewStyle.Get());
254
255 if (!declArray.empty())
256 pTextContext->SetDecls(std::move(declArray));
257
258 eDisplay = pNewStyle->GetDisplay();
259 }
260 pTextContext->SetDisplay(eDisplay);
261 m_mapXMLNodeToParseContext[pXMLNode] = std::move(pTextContext);
262 }
263
264 for (CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild;
265 pXMLChild = pXMLChild->GetNextSibling()) {
266 ParseRichText(pXMLChild, pNewStyle.Get());
267 }
268 }
269
TagValidate(const WideString & wsName) const270 bool CXFA_TextParser::TagValidate(const WideString& wsName) const {
271 static const uint32_t s_XFATagName[] = {
272 0x61, // a
273 0x62, // b
274 0x69, // i
275 0x70, // p
276 0x0001f714, // br
277 0x00022a55, // li
278 0x000239bb, // ol
279 0x00025881, // ul
280 0x0bd37faa, // sub
281 0x0bd37fb8, // sup
282 0xa73e3af2, // span
283 0xb182eaae, // body
284 0xdb8ac455, // html
285 };
286 return std::binary_search(std::begin(s_XFATagName), std::end(s_XFATagName),
287 FX_HashCode_GetW(wsName.AsStringView(), true));
288 }
289
290 // static
ParseTagInfo(const CFX_XMLNode * pXMLNode)291 std::unique_ptr<CXFA_TextParser::TagProvider> CXFA_TextParser::ParseTagInfo(
292 const CFX_XMLNode* pXMLNode) {
293 auto tagProvider = pdfium::MakeUnique<TagProvider>();
294 const CFX_XMLElement* pXMLElement = ToXMLElement(pXMLNode);
295 if (pXMLElement) {
296 WideString wsName = pXMLElement->GetLocalTagName();
297 tagProvider->SetTagName(wsName);
298 tagProvider->m_bTagAvailable = TagValidate(wsName);
299 WideString wsValue = pXMLElement->GetAttribute(L"style");
300 if (!wsValue.IsEmpty())
301 tagProvider->SetAttribute(L"style", wsValue);
302
303 return tagProvider;
304 }
305 if (pXMLNode->GetType() == CFX_XMLNode::Type::kText) {
306 tagProvider->m_bTagAvailable = true;
307 tagProvider->m_bContent = true;
308 }
309 return tagProvider;
310 }
311
GetVAlign(CXFA_TextProvider * pTextProvider) const312 XFA_AttributeValue CXFA_TextParser::GetVAlign(
313 CXFA_TextProvider* pTextProvider) const {
314 CXFA_Para* para = pTextProvider->GetParaIfExists();
315 return para ? para->GetVerticalAlign() : XFA_AttributeValue::Top;
316 }
317
GetTabInterval(CFX_CSSComputedStyle * pStyle) const318 float CXFA_TextParser::GetTabInterval(CFX_CSSComputedStyle* pStyle) const {
319 WideString wsValue;
320 if (pStyle && pStyle->GetCustomStyle(L"tab-interval", &wsValue))
321 return CXFA_Measurement(wsValue.AsStringView()).ToUnit(XFA_Unit::Pt);
322 return 36;
323 }
324
CountTabs(CFX_CSSComputedStyle * pStyle) const325 int32_t CXFA_TextParser::CountTabs(CFX_CSSComputedStyle* pStyle) const {
326 WideString wsValue;
327 if (pStyle && pStyle->GetCustomStyle(L"xfa-tab-count", &wsValue))
328 return wsValue.GetInteger();
329 return 0;
330 }
331
IsSpaceRun(CFX_CSSComputedStyle * pStyle) const332 bool CXFA_TextParser::IsSpaceRun(CFX_CSSComputedStyle* pStyle) const {
333 WideString wsValue;
334 return pStyle && pStyle->GetCustomStyle(L"xfa-spacerun", &wsValue) &&
335 wsValue.EqualsASCIINoCase("yes");
336 }
337
GetFont(CXFA_FFDoc * doc,CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle) const338 RetainPtr<CFGAS_GEFont> CXFA_TextParser::GetFont(
339 CXFA_FFDoc* doc,
340 CXFA_TextProvider* pTextProvider,
341 CFX_CSSComputedStyle* pStyle) const {
342 WideString wsFamily = L"Courier";
343 uint32_t dwStyle = 0;
344 CXFA_Font* font = pTextProvider->GetFontIfExists();
345 if (font) {
346 wsFamily = font->GetTypeface();
347 if (font->IsBold())
348 dwStyle |= FXFONT_FORCE_BOLD;
349 if (font->IsItalic())
350 dwStyle |= FXFONT_FORCE_BOLD;
351 }
352
353 if (pStyle) {
354 int32_t iCount = pStyle->CountFontFamilies();
355 if (iCount > 0)
356 wsFamily = pStyle->GetFontFamily(iCount - 1).AsStringView();
357
358 dwStyle = 0;
359 if (pStyle->GetFontWeight() > FXFONT_FW_NORMAL)
360 dwStyle |= FXFONT_FORCE_BOLD;
361 if (pStyle->GetFontStyle() == CFX_CSSFontStyle::Italic)
362 dwStyle |= FXFONT_ITALIC;
363 }
364
365 CXFA_FontMgr* pFontMgr = doc->GetApp()->GetXFAFontMgr();
366 return pFontMgr->GetFont(doc, wsFamily.AsStringView(), dwStyle);
367 }
368
GetFontSize(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle) const369 float CXFA_TextParser::GetFontSize(CXFA_TextProvider* pTextProvider,
370 CFX_CSSComputedStyle* pStyle) const {
371 if (pStyle)
372 return pStyle->GetFontSize();
373
374 CXFA_Font* font = pTextProvider->GetFontIfExists();
375 return font ? font->GetFontSize() : 10;
376 }
377
GetHorScale(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle,const CFX_XMLNode * pXMLNode) const378 int32_t CXFA_TextParser::GetHorScale(CXFA_TextProvider* pTextProvider,
379 CFX_CSSComputedStyle* pStyle,
380 const CFX_XMLNode* pXMLNode) const {
381 if (pStyle) {
382 WideString wsValue;
383 if (pStyle->GetCustomStyle(L"xfa-font-horizontal-scale", &wsValue))
384 return wsValue.GetInteger();
385
386 while (pXMLNode) {
387 auto it = m_mapXMLNodeToParseContext.find(pXMLNode);
388 if (it != m_mapXMLNodeToParseContext.end()) {
389 CXFA_TextParseContext* pContext = it->second.get();
390 if (pContext && pContext->m_pParentStyle &&
391 pContext->m_pParentStyle->GetCustomStyle(
392 L"xfa-font-horizontal-scale", &wsValue)) {
393 return wsValue.GetInteger();
394 }
395 }
396 pXMLNode = pXMLNode->GetParent();
397 }
398 }
399
400 CXFA_Font* font = pTextProvider->GetFontIfExists();
401 return font ? static_cast<int32_t>(font->GetHorizontalScale()) : 100;
402 }
403
GetVerScale(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle) const404 int32_t CXFA_TextParser::GetVerScale(CXFA_TextProvider* pTextProvider,
405 CFX_CSSComputedStyle* pStyle) const {
406 if (pStyle) {
407 WideString wsValue;
408 if (pStyle->GetCustomStyle(L"xfa-font-vertical-scale", &wsValue))
409 return wsValue.GetInteger();
410 }
411
412 CXFA_Font* font = pTextProvider->GetFontIfExists();
413 return font ? static_cast<int32_t>(font->GetVerticalScale()) : 100;
414 }
415
GetUnderline(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle,int32_t & iUnderline,XFA_AttributeValue & iPeriod) const416 void CXFA_TextParser::GetUnderline(CXFA_TextProvider* pTextProvider,
417 CFX_CSSComputedStyle* pStyle,
418 int32_t& iUnderline,
419 XFA_AttributeValue& iPeriod) const {
420 iUnderline = 0;
421 iPeriod = XFA_AttributeValue::All;
422 CXFA_Font* font = pTextProvider->GetFontIfExists();
423 if (!pStyle) {
424 if (font) {
425 iUnderline = font->GetUnderline();
426 iPeriod = font->GetUnderlinePeriod();
427 }
428 return;
429 }
430
431 uint32_t dwDecoration = pStyle->GetTextDecoration();
432 if (dwDecoration & CFX_CSSTEXTDECORATION_Double)
433 iUnderline = 2;
434 else if (dwDecoration & CFX_CSSTEXTDECORATION_Underline)
435 iUnderline = 1;
436
437 WideString wsValue;
438 if (pStyle->GetCustomStyle(L"underlinePeriod", &wsValue)) {
439 if (wsValue.EqualsASCII("word"))
440 iPeriod = XFA_AttributeValue::Word;
441 } else if (font) {
442 iPeriod = font->GetUnderlinePeriod();
443 }
444 }
445
GetLinethrough(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle,int32_t & iLinethrough) const446 void CXFA_TextParser::GetLinethrough(CXFA_TextProvider* pTextProvider,
447 CFX_CSSComputedStyle* pStyle,
448 int32_t& iLinethrough) const {
449 iLinethrough = 0;
450 if (pStyle) {
451 uint32_t dwDecoration = pStyle->GetTextDecoration();
452 if (dwDecoration & CFX_CSSTEXTDECORATION_LineThrough)
453 iLinethrough = 1;
454 return;
455 }
456
457 CXFA_Font* font = pTextProvider->GetFontIfExists();
458 if (font)
459 iLinethrough = font->GetLineThrough();
460 }
461
GetColor(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle) const462 FX_ARGB CXFA_TextParser::GetColor(CXFA_TextProvider* pTextProvider,
463 CFX_CSSComputedStyle* pStyle) const {
464 if (pStyle)
465 return pStyle->GetColor();
466
467 CXFA_Font* font = pTextProvider->GetFontIfExists();
468 return font ? font->GetColor() : 0xFF000000;
469 }
470
GetBaseline(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle) const471 float CXFA_TextParser::GetBaseline(CXFA_TextProvider* pTextProvider,
472 CFX_CSSComputedStyle* pStyle) const {
473 if (pStyle) {
474 if (pStyle->GetVerticalAlign() == CFX_CSSVerticalAlign::Number)
475 return pStyle->GetNumberVerticalAlign();
476 } else {
477 CXFA_Font* font = pTextProvider->GetFontIfExists();
478 if (font)
479 return font->GetBaselineShift();
480 }
481 return 0;
482 }
483
GetLineHeight(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle,bool bFirst,float fVerScale) const484 float CXFA_TextParser::GetLineHeight(CXFA_TextProvider* pTextProvider,
485 CFX_CSSComputedStyle* pStyle,
486 bool bFirst,
487 float fVerScale) const {
488 float fLineHeight = 0;
489 if (pStyle) {
490 fLineHeight = pStyle->GetLineHeight();
491 } else {
492 CXFA_Para* para = pTextProvider->GetParaIfExists();
493 if (para)
494 fLineHeight = para->GetLineHeight();
495 }
496
497 if (bFirst) {
498 float fFontSize = GetFontSize(pTextProvider, pStyle);
499 if (fLineHeight < 0.1f)
500 fLineHeight = fFontSize;
501 else
502 fLineHeight = std::min(fLineHeight, fFontSize);
503 } else if (fLineHeight < 0.1f) {
504 fLineHeight = GetFontSize(pTextProvider, pStyle) * 1.2f;
505 }
506 fLineHeight *= fVerScale;
507 return fLineHeight;
508 }
509
GetEmbeddedObj(const CXFA_TextProvider * pTextProvider,const CFX_XMLNode * pXMLNode)510 Optional<WideString> CXFA_TextParser::GetEmbeddedObj(
511 const CXFA_TextProvider* pTextProvider,
512 const CFX_XMLNode* pXMLNode) {
513 if (!pXMLNode)
514 return {};
515
516 const CFX_XMLElement* pElement = ToXMLElement(pXMLNode);
517 if (!pElement)
518 return {};
519
520 WideString wsAttr = pElement->GetAttribute(L"xfa:embed");
521 if (wsAttr.IsEmpty())
522 return {};
523
524 if (wsAttr[0] == L'#')
525 wsAttr.Delete(0);
526
527 WideString ws =
528 GetLowerCaseElementAttributeOrDefault(pElement, L"xfa:embedType", L"som");
529 if (!ws.EqualsASCII("uri"))
530 return {};
531
532 ws = GetLowerCaseElementAttributeOrDefault(pElement, L"xfa:embedMode",
533 L"formatted");
534 if (!(ws.EqualsASCII("raw") || ws.EqualsASCII("formatted")))
535 return {};
536
537 return pTextProvider->GetEmbeddedObj(wsAttr);
538 }
539
GetParseContextFromMap(const CFX_XMLNode * pXMLNode)540 CXFA_TextParseContext* CXFA_TextParser::GetParseContextFromMap(
541 const CFX_XMLNode* pXMLNode) {
542 auto it = m_mapXMLNodeToParseContext.find(pXMLNode);
543 return it != m_mapXMLNodeToParseContext.end() ? it->second.get() : nullptr;
544 }
545
GetTabstops(CFX_CSSComputedStyle * pStyle,CXFA_TextTabstopsContext * pTabstopContext)546 bool CXFA_TextParser::GetTabstops(CFX_CSSComputedStyle* pStyle,
547 CXFA_TextTabstopsContext* pTabstopContext) {
548 if (!pStyle || !pTabstopContext)
549 return false;
550
551 WideString wsValue;
552 if (!pStyle->GetCustomStyle(L"xfa-tab-stops", &wsValue) &&
553 !pStyle->GetCustomStyle(L"tab-stops", &wsValue)) {
554 return false;
555 }
556
557 pdfium::span<const wchar_t> spTabStops = wsValue.span();
558 size_t iCur = 0;
559 size_t iLast = 0;
560 WideString wsAlign;
561 TabStopStatus eStatus = TabStopStatus::None;
562 while (iCur < spTabStops.size()) {
563 wchar_t ch = spTabStops[iCur];
564 switch (eStatus) {
565 case TabStopStatus::None:
566 if (ch <= ' ') {
567 iCur++;
568 } else {
569 eStatus = TabStopStatus::Alignment;
570 iLast = iCur;
571 }
572 break;
573 case TabStopStatus::Alignment:
574 if (ch == ' ') {
575 wsAlign = WideStringView(spTabStops.subspan(iLast, iCur - iLast));
576 eStatus = TabStopStatus::StartLeader;
577 iCur++;
578 while (iCur < spTabStops.size() && spTabStops[iCur] <= ' ')
579 iCur++;
580 iLast = iCur;
581 } else {
582 iCur++;
583 }
584 break;
585 case TabStopStatus::StartLeader:
586 if (ch != 'l') {
587 eStatus = TabStopStatus::Location;
588 } else {
589 int32_t iCount = 0;
590 while (iCur < spTabStops.size()) {
591 ch = spTabStops[iCur];
592 iCur++;
593 if (ch == '(') {
594 iCount++;
595 } else if (ch == ')') {
596 iCount--;
597 if (iCount == 0)
598 break;
599 }
600 }
601 while (iCur < spTabStops.size() && spTabStops[iCur] <= ' ')
602 iCur++;
603
604 iLast = iCur;
605 eStatus = TabStopStatus::Location;
606 }
607 break;
608 case TabStopStatus::Location:
609 if (ch == ' ') {
610 uint32_t dwHashCode = FX_HashCode_GetW(wsAlign.AsStringView(), true);
611 CXFA_Measurement ms(
612 WideStringView(spTabStops.subspan(iLast, iCur - iLast)));
613 float fPos = ms.ToUnit(XFA_Unit::Pt);
614 pTabstopContext->Append(dwHashCode, fPos);
615 wsAlign.clear();
616 eStatus = TabStopStatus::None;
617 }
618 iCur++;
619 break;
620 default:
621 break;
622 }
623 }
624
625 if (!wsAlign.IsEmpty()) {
626 uint32_t dwHashCode = FX_HashCode_GetW(wsAlign.AsStringView(), true);
627 CXFA_Measurement ms(
628 WideStringView(spTabStops.subspan(iLast, iCur - iLast)));
629 float fPos = ms.ToUnit(XFA_Unit::Pt);
630 pTabstopContext->Append(dwHashCode, fPos);
631 }
632 return true;
633 }
634
TagProvider()635 CXFA_TextParser::TagProvider::TagProvider()
636 : m_bTagAvailable(false), m_bContent(false) {}
637
~TagProvider()638 CXFA_TextParser::TagProvider::~TagProvider() {}
639