• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/page/cpdf_textobject.h"
8 
9 #include <algorithm>
10 
11 #include "core/fpdfapi/font/cpdf_cidfont.h"
12 #include "core/fpdfapi/font/cpdf_font.h"
13 #include "core/fxcrt/check.h"
14 #include "core/fxcrt/fx_coordinates.h"
15 #include "core/fxcrt/span.h"
16 #include "core/fxcrt/span_util.h"
17 
18 #define ISLATINWORD(u) (u != 0x20 && u <= 0x28FF)
19 
20 namespace {
21 
IsVertWritingCIDFont(const CPDF_CIDFont * font)22 bool IsVertWritingCIDFont(const CPDF_CIDFont* font) {
23   return font && font->IsVertWriting();
24 }
25 
26 }  // namespace
27 
28 CPDF_TextObject::Item::Item() = default;
29 
30 CPDF_TextObject::Item::Item(const Item& that) = default;
31 
32 CPDF_TextObject::Item::~Item() = default;
33 
CPDF_TextObject(int32_t content_stream)34 CPDF_TextObject::CPDF_TextObject(int32_t content_stream)
35     : CPDF_PageObject(content_stream) {}
36 
CPDF_TextObject()37 CPDF_TextObject::CPDF_TextObject() : CPDF_TextObject(kNoContentStream) {}
38 
39 CPDF_TextObject::~CPDF_TextObject() = default;
40 
CountItems() const41 size_t CPDF_TextObject::CountItems() const {
42   return m_CharCodes.size();
43 }
44 
GetItemInfo(size_t index) const45 CPDF_TextObject::Item CPDF_TextObject::GetItemInfo(size_t index) const {
46   DCHECK(index < m_CharCodes.size());
47 
48   Item info;
49   info.m_CharCode = m_CharCodes[index];
50   info.m_Origin = CFX_PointF(index > 0 ? m_CharPos[index - 1] : 0, 0);
51   if (info.m_CharCode == CPDF_Font::kInvalidCharCode)
52     return info;
53 
54   RetainPtr<CPDF_Font> pFont = GetFont();
55   const CPDF_CIDFont* pCIDFont = pFont->AsCIDFont();
56   if (!IsVertWritingCIDFont(pCIDFont))
57     return info;
58 
59   uint16_t cid = pCIDFont->CIDFromCharCode(info.m_CharCode);
60   info.m_Origin = CFX_PointF(0, info.m_Origin.x);
61 
62   CFX_Point16 vertical_origin = pCIDFont->GetVertOrigin(cid);
63   float fontsize = GetFontSize();
64   info.m_Origin.x -= fontsize * vertical_origin.x / 1000;
65   info.m_Origin.y -= fontsize * vertical_origin.y / 1000;
66   return info;
67 }
68 
CountChars() const69 size_t CPDF_TextObject::CountChars() const {
70   size_t count = 0;
71   for (uint32_t charcode : m_CharCodes) {
72     if (charcode != CPDF_Font::kInvalidCharCode)
73       ++count;
74   }
75   return count;
76 }
77 
GetCharCode(size_t index) const78 uint32_t CPDF_TextObject::GetCharCode(size_t index) const {
79   size_t count = 0;
80   for (uint32_t code : m_CharCodes) {
81     if (code == CPDF_Font::kInvalidCharCode)
82       continue;
83     if (count++ != index)
84       continue;
85     return code;
86   }
87   return CPDF_Font::kInvalidCharCode;
88 }
89 
GetCharInfo(size_t index) const90 CPDF_TextObject::Item CPDF_TextObject::GetCharInfo(size_t index) const {
91   size_t count = 0;
92   for (size_t i = 0; i < m_CharCodes.size(); ++i) {
93     uint32_t charcode = m_CharCodes[i];
94     if (charcode == CPDF_Font::kInvalidCharCode)
95       continue;
96     if (count++ == index)
97       return GetItemInfo(i);
98   }
99   return Item();
100 }
101 
CountWords() const102 int CPDF_TextObject::CountWords() const {
103   RetainPtr<CPDF_Font> pFont = GetFont();
104   bool bInLatinWord = false;
105   int nWords = 0;
106   for (size_t i = 0, sz = CountChars(); i < sz; ++i) {
107     uint32_t charcode = GetCharCode(i);
108 
109     WideString swUnicode = pFont->UnicodeFromCharCode(charcode);
110     uint16_t unicode = 0;
111     if (swUnicode.GetLength() > 0)
112       unicode = swUnicode[0];
113 
114     bool bIsLatin = ISLATINWORD(unicode);
115     if (bIsLatin && bInLatinWord)
116       continue;
117 
118     bInLatinWord = bIsLatin;
119     if (unicode != 0x20)
120       nWords++;
121   }
122 
123   return nWords;
124 }
125 
GetWordString(int nWordIndex) const126 WideString CPDF_TextObject::GetWordString(int nWordIndex) const {
127   RetainPtr<CPDF_Font> pFont = GetFont();
128   WideString swRet;
129   int nWords = 0;
130   bool bInLatinWord = false;
131   for (size_t i = 0, sz = CountChars(); i < sz; ++i) {
132     uint32_t charcode = GetCharCode(i);
133 
134     WideString swUnicode = pFont->UnicodeFromCharCode(charcode);
135     uint16_t unicode = 0;
136     if (swUnicode.GetLength() > 0)
137       unicode = swUnicode[0];
138 
139     bool bIsLatin = ISLATINWORD(unicode);
140     if (!bIsLatin || !bInLatinWord) {
141       bInLatinWord = bIsLatin;
142       if (unicode != 0x20)
143         nWords++;
144     }
145     if (nWords - 1 == nWordIndex)
146       swRet += unicode;
147   }
148   return swRet;
149 }
150 
Clone() const151 std::unique_ptr<CPDF_TextObject> CPDF_TextObject::Clone() const {
152   auto obj = std::make_unique<CPDF_TextObject>();
153   obj->CopyData(this);
154   obj->m_CharCodes = m_CharCodes;
155   obj->m_CharPos = m_CharPos;
156   obj->m_Pos = m_Pos;
157   return obj;
158 }
159 
GetType() const160 CPDF_PageObject::Type CPDF_TextObject::GetType() const {
161   return Type::kText;
162 }
163 
Transform(const CFX_Matrix & matrix)164 void CPDF_TextObject::Transform(const CFX_Matrix& matrix) {
165   SetTextMatrix(GetTextMatrix() * matrix);
166   SetDirty(true);
167 }
168 
IsText() const169 bool CPDF_TextObject::IsText() const {
170   return true;
171 }
172 
AsText()173 CPDF_TextObject* CPDF_TextObject::AsText() {
174   return this;
175 }
176 
AsText() const177 const CPDF_TextObject* CPDF_TextObject::AsText() const {
178   return this;
179 }
180 
GetTextMatrix() const181 CFX_Matrix CPDF_TextObject::GetTextMatrix() const {
182   pdfium::span<const float> pTextMatrix = text_state().GetMatrix();
183   return CFX_Matrix(pTextMatrix[0], pTextMatrix[2], pTextMatrix[1],
184                     pTextMatrix[3], m_Pos.x, m_Pos.y);
185 }
186 
SetTextMatrix(const CFX_Matrix & matrix)187 void CPDF_TextObject::SetTextMatrix(const CFX_Matrix& matrix) {
188   pdfium::span<float> pTextMatrix = mutable_text_state().GetMutableMatrix();
189   pTextMatrix[0] = matrix.a;
190   pTextMatrix[1] = matrix.c;
191   pTextMatrix[2] = matrix.b;
192   pTextMatrix[3] = matrix.d;
193   m_Pos = CFX_PointF(matrix.e, matrix.f);
194   CalcPositionDataInternal(GetFont());
195 }
196 
SetSegments(pdfium::span<const ByteString> strings,pdfium::span<const float> kernings)197 void CPDF_TextObject::SetSegments(pdfium::span<const ByteString> strings,
198                                   pdfium::span<const float> kernings) {
199   size_t nSegs = strings.size();
200   CHECK(nSegs);
201   m_CharCodes.clear();
202   m_CharPos.clear();
203   RetainPtr<CPDF_Font> pFont = GetFont();
204   size_t nChars = nSegs - 1;
205   for (const auto& str : strings) {
206     nChars += pFont->CountChar(str.AsStringView());
207   }
208   CHECK(nChars);
209   m_CharCodes.resize(nChars);
210   m_CharPos.resize(nChars - 1);
211   size_t index = 0;
212   for (size_t i = 0; i < nSegs; ++i) {
213     ByteStringView segment = strings[i].AsStringView();
214     size_t offset = 0;
215     while (offset < segment.GetLength()) {
216       DCHECK(index < m_CharCodes.size());
217       m_CharCodes[index++] = pFont->GetNextChar(segment, &offset);
218     }
219     if (i != nSegs - 1) {
220       m_CharPos[index - 1] = kernings[i];
221       m_CharCodes[index++] = CPDF_Font::kInvalidCharCode;
222     }
223   }
224 }
225 
SetText(const ByteString & str)226 void CPDF_TextObject::SetText(const ByteString& str) {
227   SetSegments(pdfium::span_from_ref(str), pdfium::span<float>());
228   CalcPositionDataInternal(GetFont());
229   SetDirty(true);
230 }
231 
GetCharWidth(uint32_t charcode) const232 float CPDF_TextObject::GetCharWidth(uint32_t charcode) const {
233   const float fontsize = GetFontSize() / 1000;
234   RetainPtr<CPDF_Font> pFont = GetFont();
235   const CPDF_CIDFont* pCIDFont = pFont->AsCIDFont();
236   if (!IsVertWritingCIDFont(pCIDFont))
237     return pFont->GetCharWidthF(charcode) * fontsize;
238 
239   uint16_t cid = pCIDFont->CIDFromCharCode(charcode);
240   return pCIDFont->GetVertWidth(cid) * fontsize;
241 }
242 
GetFont() const243 RetainPtr<CPDF_Font> CPDF_TextObject::GetFont() const {
244   return text_state().GetFont();
245 }
246 
GetFontSize() const247 float CPDF_TextObject::GetFontSize() const {
248   return text_state().GetFontSize();
249 }
250 
GetTextRenderMode() const251 TextRenderingMode CPDF_TextObject::GetTextRenderMode() const {
252   return text_state().GetTextMode();
253 }
254 
SetTextRenderMode(TextRenderingMode mode)255 void CPDF_TextObject::SetTextRenderMode(TextRenderingMode mode) {
256   mutable_text_state().SetTextMode(mode);
257   SetDirty(true);
258 }
259 
CalcPositionData(float horz_scale)260 CFX_PointF CPDF_TextObject::CalcPositionData(float horz_scale) {
261   RetainPtr<CPDF_Font> pFont = GetFont();
262   const float curpos = CalcPositionDataInternal(pFont);
263   if (IsVertWritingCIDFont(pFont->AsCIDFont()))
264     return {0, curpos};
265   return {curpos * horz_scale, 0};
266 }
267 
CalcPositionDataInternal(const RetainPtr<CPDF_Font> & pFont)268 float CPDF_TextObject::CalcPositionDataInternal(
269     const RetainPtr<CPDF_Font>& pFont) {
270   float curpos = 0;
271   float min_x = 10000.0f;
272   float max_x = -10000.0f;
273   float min_y = 10000.0f;
274   float max_y = -10000.0f;
275   const CPDF_CIDFont* pCIDFont = pFont->AsCIDFont();
276   const bool bVertWriting = IsVertWritingCIDFont(pCIDFont);
277   const float fontsize = GetFontSize();
278 
279   for (size_t i = 0; i < m_CharCodes.size(); ++i) {
280     const uint32_t charcode = m_CharCodes[i];
281     if (i > 0) {
282       if (charcode == CPDF_Font::kInvalidCharCode) {
283         curpos -= (m_CharPos[i - 1] * fontsize) / 1000;
284         continue;
285       }
286       m_CharPos[i - 1] = curpos;
287     }
288 
289     FX_RECT char_rect = pFont->GetCharBBox(charcode);
290     float charwidth;
291     if (bVertWriting) {
292       uint16_t cid = pCIDFont->CIDFromCharCode(charcode);
293       CFX_Point16 vertical_origin = pCIDFont->GetVertOrigin(cid);
294       char_rect.Offset(-vertical_origin.x, -vertical_origin.y);
295       min_x = std::min({min_x, static_cast<float>(char_rect.left),
296                         static_cast<float>(char_rect.right)});
297       max_x = std::max({max_x, static_cast<float>(char_rect.left),
298                         static_cast<float>(char_rect.right)});
299       const float char_top = curpos + char_rect.top * fontsize / 1000;
300       const float char_bottom = curpos + char_rect.bottom * fontsize / 1000;
301       min_y = std::min({min_y, char_top, char_bottom});
302       max_y = std::max({max_y, char_top, char_bottom});
303       charwidth = pCIDFont->GetVertWidth(cid) * fontsize / 1000;
304     } else {
305       min_y = std::min({min_y, static_cast<float>(char_rect.top),
306                         static_cast<float>(char_rect.bottom)});
307       max_y = std::max({max_y, static_cast<float>(char_rect.top),
308                         static_cast<float>(char_rect.bottom)});
309       const float char_left = curpos + char_rect.left * fontsize / 1000;
310       const float char_right = curpos + char_rect.right * fontsize / 1000;
311       min_x = std::min({min_x, char_left, char_right});
312       max_x = std::max({max_x, char_left, char_right});
313       charwidth = pFont->GetCharWidthF(charcode) * fontsize / 1000;
314     }
315     curpos += charwidth;
316     if (charcode == ' ' && (!pCIDFont || pCIDFont->GetCharSize(' ') == 1))
317       curpos += text_state().GetWordSpace();
318 
319     curpos += text_state().GetCharSpace();
320   }
321 
322   if (bVertWriting) {
323     min_x = min_x * fontsize / 1000;
324     max_x = max_x * fontsize / 1000;
325   } else {
326     min_y = min_y * fontsize / 1000;
327     max_y = max_y * fontsize / 1000;
328   }
329 
330   SetOriginalRect(CFX_FloatRect(min_x, min_y, max_x, max_y));
331   CFX_FloatRect rect = GetTextMatrix().TransformRect(GetOriginalRect());
332   if (TextRenderingModeIsStrokeMode(text_state().GetTextMode())) {
333     // TODO(crbug.com/pdfium/1840): Does the original rect need a similar
334     // adjustment?
335     const float half_width = graph_state().GetLineWidth() / 2;
336     rect.Inflate(half_width, half_width);
337   }
338   SetRect(rect);
339 
340   return curpos;
341 }
342