• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/page/cpdf_textobject.h"
8 
9 #include <algorithm>
10 
11 #include "core/fpdfapi/font/cpdf_cidfont.h"
12 #include "core/fpdfapi/font/cpdf_font.h"
13 #include "core/fxcrt/fx_coordinates.h"
14 #include "third_party/base/check.h"
15 #include "third_party/base/span.h"
16 
17 #define ISLATINWORD(u) (u != 0x20 && u <= 0x28FF)
18 
19 namespace {
20 
IsVertWritingCIDFont(const CPDF_CIDFont * font)21 bool IsVertWritingCIDFont(const CPDF_CIDFont* font) {
22   return font && font->IsVertWriting();
23 }
24 
25 }  // namespace
26 
27 CPDF_TextObject::Item::Item() = default;
28 
29 CPDF_TextObject::Item::Item(const Item& that) = default;
30 
31 CPDF_TextObject::Item::~Item() = default;
32 
CPDF_TextObject(int32_t content_stream)33 CPDF_TextObject::CPDF_TextObject(int32_t content_stream)
34     : CPDF_PageObject(content_stream) {}
35 
CPDF_TextObject()36 CPDF_TextObject::CPDF_TextObject() : CPDF_TextObject(kNoContentStream) {}
37 
38 CPDF_TextObject::~CPDF_TextObject() = default;
39 
CountItems() const40 size_t CPDF_TextObject::CountItems() const {
41   return m_CharCodes.size();
42 }
43 
GetItemInfo(size_t index) const44 CPDF_TextObject::Item CPDF_TextObject::GetItemInfo(size_t index) const {
45   DCHECK(index < m_CharCodes.size());
46 
47   Item info;
48   info.m_CharCode = m_CharCodes[index];
49   info.m_Origin = CFX_PointF(index > 0 ? m_CharPos[index - 1] : 0, 0);
50   if (info.m_CharCode == CPDF_Font::kInvalidCharCode)
51     return info;
52 
53   RetainPtr<CPDF_Font> pFont = GetFont();
54   const CPDF_CIDFont* pCIDFont = pFont->AsCIDFont();
55   if (!IsVertWritingCIDFont(pCIDFont))
56     return info;
57 
58   uint16_t cid = pCIDFont->CIDFromCharCode(info.m_CharCode);
59   info.m_Origin = CFX_PointF(0, info.m_Origin.x);
60 
61   CFX_Point16 vertical_origin = pCIDFont->GetVertOrigin(cid);
62   float fontsize = GetFontSize();
63   info.m_Origin.x -= fontsize * vertical_origin.x / 1000;
64   info.m_Origin.y -= fontsize * vertical_origin.y / 1000;
65   return info;
66 }
67 
CountChars() const68 size_t CPDF_TextObject::CountChars() const {
69   size_t count = 0;
70   for (uint32_t charcode : m_CharCodes) {
71     if (charcode != CPDF_Font::kInvalidCharCode)
72       ++count;
73   }
74   return count;
75 }
76 
GetCharCode(size_t index) const77 uint32_t CPDF_TextObject::GetCharCode(size_t index) const {
78   size_t count = 0;
79   for (uint32_t code : m_CharCodes) {
80     if (code == CPDF_Font::kInvalidCharCode)
81       continue;
82     if (count++ != index)
83       continue;
84     return code;
85   }
86   return CPDF_Font::kInvalidCharCode;
87 }
88 
GetCharInfo(size_t index) const89 CPDF_TextObject::Item CPDF_TextObject::GetCharInfo(size_t index) const {
90   size_t count = 0;
91   for (size_t i = 0; i < m_CharCodes.size(); ++i) {
92     uint32_t charcode = m_CharCodes[i];
93     if (charcode == CPDF_Font::kInvalidCharCode)
94       continue;
95     if (count++ == index)
96       return GetItemInfo(i);
97   }
98   return Item();
99 }
100 
CountWords() const101 int CPDF_TextObject::CountWords() const {
102   RetainPtr<CPDF_Font> pFont = GetFont();
103   bool bInLatinWord = false;
104   int nWords = 0;
105   for (size_t i = 0, sz = CountChars(); i < sz; ++i) {
106     uint32_t charcode = GetCharCode(i);
107 
108     WideString swUnicode = pFont->UnicodeFromCharCode(charcode);
109     uint16_t unicode = 0;
110     if (swUnicode.GetLength() > 0)
111       unicode = swUnicode[0];
112 
113     bool bIsLatin = ISLATINWORD(unicode);
114     if (bIsLatin && bInLatinWord)
115       continue;
116 
117     bInLatinWord = bIsLatin;
118     if (unicode != 0x20)
119       nWords++;
120   }
121 
122   return nWords;
123 }
124 
GetWordString(int nWordIndex) const125 WideString CPDF_TextObject::GetWordString(int nWordIndex) const {
126   RetainPtr<CPDF_Font> pFont = GetFont();
127   WideString swRet;
128   int nWords = 0;
129   bool bInLatinWord = false;
130   for (size_t i = 0, sz = CountChars(); i < sz; ++i) {
131     uint32_t charcode = GetCharCode(i);
132 
133     WideString swUnicode = pFont->UnicodeFromCharCode(charcode);
134     uint16_t unicode = 0;
135     if (swUnicode.GetLength() > 0)
136       unicode = swUnicode[0];
137 
138     bool bIsLatin = ISLATINWORD(unicode);
139     if (!bIsLatin || !bInLatinWord) {
140       bInLatinWord = bIsLatin;
141       if (unicode != 0x20)
142         nWords++;
143     }
144     if (nWords - 1 == nWordIndex)
145       swRet += unicode;
146   }
147   return swRet;
148 }
149 
Clone() const150 std::unique_ptr<CPDF_TextObject> CPDF_TextObject::Clone() const {
151   auto obj = std::make_unique<CPDF_TextObject>();
152   obj->CopyData(this);
153   obj->m_CharCodes = m_CharCodes;
154   obj->m_CharPos = m_CharPos;
155   obj->m_Pos = m_Pos;
156   return obj;
157 }
158 
GetType() const159 CPDF_PageObject::Type CPDF_TextObject::GetType() const {
160   return Type::kText;
161 }
162 
Transform(const CFX_Matrix & matrix)163 void CPDF_TextObject::Transform(const CFX_Matrix& matrix) {
164   SetTextMatrix(GetTextMatrix() * matrix);
165   SetDirty(true);
166 }
167 
IsText() const168 bool CPDF_TextObject::IsText() const {
169   return true;
170 }
171 
AsText()172 CPDF_TextObject* CPDF_TextObject::AsText() {
173   return this;
174 }
175 
AsText() const176 const CPDF_TextObject* CPDF_TextObject::AsText() const {
177   return this;
178 }
179 
GetTextMatrix() const180 CFX_Matrix CPDF_TextObject::GetTextMatrix() const {
181   pdfium::span<const float> pTextMatrix = m_TextState.GetMatrix();
182   return CFX_Matrix(pTextMatrix[0], pTextMatrix[2], pTextMatrix[1],
183                     pTextMatrix[3], m_Pos.x, m_Pos.y);
184 }
185 
SetTextMatrix(const CFX_Matrix & matrix)186 void CPDF_TextObject::SetTextMatrix(const CFX_Matrix& matrix) {
187   pdfium::span<float> pTextMatrix = m_TextState.GetMutableMatrix();
188   pTextMatrix[0] = matrix.a;
189   pTextMatrix[1] = matrix.c;
190   pTextMatrix[2] = matrix.b;
191   pTextMatrix[3] = matrix.d;
192   m_Pos = CFX_PointF(matrix.e, matrix.f);
193   CalcPositionDataInternal(GetFont());
194 }
195 
SetSegments(const ByteString * pStrs,const std::vector<float> & kernings,size_t nSegs)196 void CPDF_TextObject::SetSegments(const ByteString* pStrs,
197                                   const std::vector<float>& kernings,
198                                   size_t nSegs) {
199   CHECK(nSegs);
200   m_CharCodes.clear();
201   m_CharPos.clear();
202   RetainPtr<CPDF_Font> pFont = GetFont();
203   size_t nChars = nSegs - 1;
204   for (size_t i = 0; i < nSegs; ++i)
205     nChars += pFont->CountChar(pStrs[i].AsStringView());
206 
207   CHECK(nChars);
208   m_CharCodes.resize(nChars);
209   m_CharPos.resize(nChars - 1);
210   size_t index = 0;
211   for (size_t i = 0; i < nSegs; ++i) {
212     ByteStringView segment = pStrs[i].AsStringView();
213     size_t offset = 0;
214     while (offset < segment.GetLength()) {
215       DCHECK(index < m_CharCodes.size());
216       m_CharCodes[index++] = pFont->GetNextChar(segment, &offset);
217     }
218     if (i != nSegs - 1) {
219       m_CharPos[index - 1] = kernings[i];
220       m_CharCodes[index++] = CPDF_Font::kInvalidCharCode;
221     }
222   }
223 }
224 
SetText(const ByteString & str)225 void CPDF_TextObject::SetText(const ByteString& str) {
226   SetSegments(&str, std::vector<float>(), 1);
227   CalcPositionDataInternal(GetFont());
228   SetDirty(true);
229 }
230 
GetCharWidth(uint32_t charcode) const231 float CPDF_TextObject::GetCharWidth(uint32_t charcode) const {
232   const float fontsize = GetFontSize() / 1000;
233   RetainPtr<CPDF_Font> pFont = GetFont();
234   const CPDF_CIDFont* pCIDFont = pFont->AsCIDFont();
235   if (!IsVertWritingCIDFont(pCIDFont))
236     return pFont->GetCharWidthF(charcode) * fontsize;
237 
238   uint16_t cid = pCIDFont->CIDFromCharCode(charcode);
239   return pCIDFont->GetVertWidth(cid) * fontsize;
240 }
241 
GetFont() const242 RetainPtr<CPDF_Font> CPDF_TextObject::GetFont() const {
243   return m_TextState.GetFont();
244 }
245 
GetFontSize() const246 float CPDF_TextObject::GetFontSize() const {
247   return m_TextState.GetFontSize();
248 }
249 
GetTextRenderMode() const250 TextRenderingMode CPDF_TextObject::GetTextRenderMode() const {
251   return m_TextState.GetTextMode();
252 }
253 
SetTextRenderMode(TextRenderingMode mode)254 void CPDF_TextObject::SetTextRenderMode(TextRenderingMode mode) {
255   m_TextState.SetTextMode(mode);
256   SetDirty(true);
257 }
258 
CalcPositionData(float horz_scale)259 CFX_PointF CPDF_TextObject::CalcPositionData(float horz_scale) {
260   RetainPtr<CPDF_Font> pFont = GetFont();
261   const float curpos = CalcPositionDataInternal(pFont);
262   if (IsVertWritingCIDFont(pFont->AsCIDFont()))
263     return {0, curpos};
264   return {curpos * horz_scale, 0};
265 }
266 
CalcPositionDataInternal(const RetainPtr<CPDF_Font> & pFont)267 float CPDF_TextObject::CalcPositionDataInternal(
268     const RetainPtr<CPDF_Font>& pFont) {
269   float curpos = 0;
270   float min_x = 10000.0f;
271   float max_x = -10000.0f;
272   float min_y = 10000.0f;
273   float max_y = -10000.0f;
274   const CPDF_CIDFont* pCIDFont = pFont->AsCIDFont();
275   const bool bVertWriting = IsVertWritingCIDFont(pCIDFont);
276   const float fontsize = GetFontSize();
277 
278   for (size_t i = 0; i < m_CharCodes.size(); ++i) {
279     const uint32_t charcode = m_CharCodes[i];
280     if (i > 0) {
281       if (charcode == CPDF_Font::kInvalidCharCode) {
282         curpos -= (m_CharPos[i - 1] * fontsize) / 1000;
283         continue;
284       }
285       m_CharPos[i - 1] = curpos;
286     }
287 
288     FX_RECT char_rect = pFont->GetCharBBox(charcode);
289     float charwidth;
290     if (bVertWriting) {
291       uint16_t cid = pCIDFont->CIDFromCharCode(charcode);
292       CFX_Point16 vertical_origin = pCIDFont->GetVertOrigin(cid);
293       char_rect.Offset(-vertical_origin.x, -vertical_origin.y);
294       min_x = std::min(
295           min_x, static_cast<float>(std::min(char_rect.left, char_rect.right)));
296       max_x = std::max(
297           max_x, static_cast<float>(std::max(char_rect.left, char_rect.right)));
298       const float char_top = curpos + char_rect.top * fontsize / 1000;
299       const float char_bottom = curpos + char_rect.bottom * fontsize / 1000;
300       min_y = std::min(min_y, std::min(char_top, char_bottom));
301       max_y = std::max(max_y, std::max(char_top, char_bottom));
302       charwidth = pCIDFont->GetVertWidth(cid) * fontsize / 1000;
303     } else {
304       min_y = std::min(
305           min_y, static_cast<float>(std::min(char_rect.top, char_rect.bottom)));
306       max_y = std::max(
307           max_y, static_cast<float>(std::max(char_rect.top, char_rect.bottom)));
308       const float char_left = curpos + char_rect.left * fontsize / 1000;
309       const float char_right = curpos + char_rect.right * fontsize / 1000;
310       min_x = std::min(min_x, std::min(char_left, char_right));
311       max_x = std::max(max_x, std::max(char_left, char_right));
312       charwidth = pFont->GetCharWidthF(charcode) * fontsize / 1000;
313     }
314     curpos += charwidth;
315     if (charcode == ' ' && (!pCIDFont || pCIDFont->GetCharSize(' ') == 1))
316       curpos += m_TextState.GetWordSpace();
317 
318     curpos += m_TextState.GetCharSpace();
319   }
320 
321   if (bVertWriting) {
322     min_x = min_x * fontsize / 1000;
323     max_x = max_x * fontsize / 1000;
324   } else {
325     min_y = min_y * fontsize / 1000;
326     max_y = max_y * fontsize / 1000;
327   }
328 
329   SetOriginalRect(CFX_FloatRect(min_x, min_y, max_x, max_y));
330   CFX_FloatRect rect = GetTextMatrix().TransformRect(GetOriginalRect());
331   if (TextRenderingModeIsStrokeMode(m_TextState.GetTextMode())) {
332     // TODO(crbug.com/pdfium/1840): Does the original rect need a similar
333     // adjustment?
334     const float half_width = m_GraphState.GetLineWidth() / 2;
335     rect.Inflate(half_width, half_width);
336   }
337   SetRect(rect);
338 
339   return curpos;
340 }
341