1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/page/cpdf_textobject.h"
8
9 #include <algorithm>
10 #include <utility>
11
12 #include "core/fpdfapi/font/cpdf_cidfont.h"
13 #include "core/fpdfapi/font/cpdf_font.h"
14 #include "third_party/base/ptr_util.h"
15
16 #define ISLATINWORD(u) (u != 0x20 && u <= 0x28FF)
17
CPDF_TextObjectItem()18 CPDF_TextObjectItem::CPDF_TextObjectItem() : m_CharCode(0) {}
19
20 CPDF_TextObjectItem::~CPDF_TextObjectItem() = default;
21
CPDF_TextObject(int32_t content_stream)22 CPDF_TextObject::CPDF_TextObject(int32_t content_stream)
23 : CPDF_PageObject(content_stream) {}
24
CPDF_TextObject()25 CPDF_TextObject::CPDF_TextObject() : CPDF_TextObject(kNoContentStream) {}
26
~CPDF_TextObject()27 CPDF_TextObject::~CPDF_TextObject() {
28 // Move m_CharCodes to a local variable so it will be captured in crash dumps,
29 // to help with investigating crbug.com/782215.
30 auto char_codes_copy = std::move(m_CharCodes);
31 }
32
CountItems() const33 size_t CPDF_TextObject::CountItems() const {
34 return m_CharCodes.size();
35 }
36
GetItemInfo(size_t index,CPDF_TextObjectItem * pInfo) const37 void CPDF_TextObject::GetItemInfo(size_t index,
38 CPDF_TextObjectItem* pInfo) const {
39 ASSERT(index < m_CharCodes.size());
40 pInfo->m_CharCode = m_CharCodes[index];
41 pInfo->m_Origin = CFX_PointF(index > 0 ? m_CharPos[index - 1] : 0, 0);
42 if (pInfo->m_CharCode == CPDF_Font::kInvalidCharCode)
43 return;
44
45 RetainPtr<CPDF_Font> pFont = GetFont();
46 if (!pFont->IsCIDFont() || !pFont->AsCIDFont()->IsVertWriting())
47 return;
48
49 uint16_t CID = pFont->AsCIDFont()->CIDFromCharCode(pInfo->m_CharCode);
50 pInfo->m_Origin = CFX_PointF(0, pInfo->m_Origin.x);
51
52 short vx;
53 short vy;
54 pFont->AsCIDFont()->GetVertOrigin(CID, vx, vy);
55
56 float fontsize = GetFontSize();
57 pInfo->m_Origin.x -= fontsize * vx / 1000;
58 pInfo->m_Origin.y -= fontsize * vy / 1000;
59 }
60
CountChars() const61 size_t CPDF_TextObject::CountChars() const {
62 size_t count = 0;
63 for (uint32_t charcode : m_CharCodes) {
64 if (charcode != CPDF_Font::kInvalidCharCode)
65 ++count;
66 }
67 return count;
68 }
69
GetCharInfo(size_t index,uint32_t * charcode,float * kerning) const70 void CPDF_TextObject::GetCharInfo(size_t index,
71 uint32_t* charcode,
72 float* kerning) const {
73 size_t count = 0;
74 for (size_t i = 0; i < m_CharCodes.size(); ++i) {
75 if (m_CharCodes[i] == CPDF_Font::kInvalidCharCode)
76 continue;
77 if (count++ != index)
78 continue;
79 *charcode = m_CharCodes[i];
80 if (i == m_CharCodes.size() - 1 ||
81 m_CharCodes[i + 1] != CPDF_Font::kInvalidCharCode) {
82 *kerning = 0;
83 } else {
84 *kerning = m_CharPos[i];
85 }
86 return;
87 }
88 }
89
GetCharInfo(size_t index,CPDF_TextObjectItem * pInfo) const90 void CPDF_TextObject::GetCharInfo(size_t index,
91 CPDF_TextObjectItem* pInfo) const {
92 size_t count = 0;
93 for (size_t i = 0; i < m_CharCodes.size(); ++i) {
94 uint32_t charcode = m_CharCodes[i];
95 if (charcode == CPDF_Font::kInvalidCharCode)
96 continue;
97 if (count++ != index)
98 continue;
99 GetItemInfo(i, pInfo);
100 break;
101 }
102 }
103
CountWords() const104 int CPDF_TextObject::CountWords() const {
105 RetainPtr<CPDF_Font> pFont = GetFont();
106 bool bInLatinWord = false;
107 int nWords = 0;
108 for (size_t i = 0, sz = CountChars(); i < sz; ++i) {
109 uint32_t charcode = CPDF_Font::kInvalidCharCode;
110 float unused_kerning;
111 GetCharInfo(i, &charcode, &unused_kerning);
112
113 WideString swUnicode = pFont->UnicodeFromCharCode(charcode);
114 uint16_t unicode = 0;
115 if (swUnicode.GetLength() > 0)
116 unicode = swUnicode[0];
117
118 bool bIsLatin = ISLATINWORD(unicode);
119 if (bIsLatin && bInLatinWord)
120 continue;
121
122 bInLatinWord = bIsLatin;
123 if (unicode != 0x20)
124 nWords++;
125 }
126
127 return nWords;
128 }
129
GetWordString(int nWordIndex) const130 WideString CPDF_TextObject::GetWordString(int nWordIndex) const {
131 RetainPtr<CPDF_Font> pFont = GetFont();
132 WideString swRet;
133 int nWords = 0;
134 bool bInLatinWord = false;
135 for (size_t i = 0, sz = CountChars(); i < sz; ++i) {
136 uint32_t charcode = CPDF_Font::kInvalidCharCode;
137 float unused_kerning;
138 GetCharInfo(i, &charcode, &unused_kerning);
139
140 WideString swUnicode = pFont->UnicodeFromCharCode(charcode);
141 uint16_t unicode = 0;
142 if (swUnicode.GetLength() > 0)
143 unicode = swUnicode[0];
144
145 bool bIsLatin = ISLATINWORD(unicode);
146 if (!bIsLatin || !bInLatinWord) {
147 bInLatinWord = bIsLatin;
148 if (unicode != 0x20)
149 nWords++;
150 }
151 if (nWords - 1 == nWordIndex)
152 swRet += unicode;
153 }
154 return swRet;
155 }
156
Clone() const157 std::unique_ptr<CPDF_TextObject> CPDF_TextObject::Clone() const {
158 auto obj = pdfium::MakeUnique<CPDF_TextObject>();
159 obj->CopyData(this);
160 obj->m_CharCodes = m_CharCodes;
161 obj->m_CharPos = m_CharPos;
162 obj->m_Pos = m_Pos;
163 return obj;
164 }
165
GetType() const166 CPDF_PageObject::Type CPDF_TextObject::GetType() const {
167 return TEXT;
168 }
169
Transform(const CFX_Matrix & matrix)170 void CPDF_TextObject::Transform(const CFX_Matrix& matrix) {
171 CFX_Matrix text_matrix = GetTextMatrix() * matrix;
172
173 float* pTextMatrix = m_TextState.GetMutableMatrix();
174 pTextMatrix[0] = text_matrix.a;
175 pTextMatrix[1] = text_matrix.c;
176 pTextMatrix[2] = text_matrix.b;
177 pTextMatrix[3] = text_matrix.d;
178 m_Pos = CFX_PointF(text_matrix.e, text_matrix.f);
179 CalcPositionData(0);
180 SetDirty(true);
181 }
182
IsText() const183 bool CPDF_TextObject::IsText() const {
184 return true;
185 }
186
AsText()187 CPDF_TextObject* CPDF_TextObject::AsText() {
188 return this;
189 }
190
AsText() const191 const CPDF_TextObject* CPDF_TextObject::AsText() const {
192 return this;
193 }
194
GetTextMatrix() const195 CFX_Matrix CPDF_TextObject::GetTextMatrix() const {
196 const float* pTextMatrix = m_TextState.GetMatrix();
197 return CFX_Matrix(pTextMatrix[0], pTextMatrix[2], pTextMatrix[1],
198 pTextMatrix[3], m_Pos.x, m_Pos.y);
199 }
200
SetSegments(const ByteString * pStrs,const std::vector<float> & kernings,size_t nSegs)201 void CPDF_TextObject::SetSegments(const ByteString* pStrs,
202 const std::vector<float>& kernings,
203 size_t nSegs) {
204 m_CharCodes.clear();
205 m_CharPos.clear();
206 RetainPtr<CPDF_Font> pFont = GetFont();
207 int nChars = 0;
208 for (size_t i = 0; i < nSegs; ++i)
209 nChars += pFont->CountChar(pStrs[i].AsStringView());
210 nChars += nSegs - 1;
211 m_CharCodes.resize(nChars);
212 m_CharPos.resize(nChars - 1);
213 size_t index = 0;
214 for (size_t i = 0; i < nSegs; ++i) {
215 ByteStringView segment = pStrs[i].AsStringView();
216 size_t offset = 0;
217 while (offset < segment.GetLength()) {
218 ASSERT(index < m_CharCodes.size());
219 m_CharCodes[index++] = pFont->GetNextChar(segment, &offset);
220 }
221 if (i != nSegs - 1) {
222 m_CharPos[index - 1] = kernings[i];
223 m_CharCodes[index++] = CPDF_Font::kInvalidCharCode;
224 }
225 }
226 }
227
SetText(const ByteString & str)228 void CPDF_TextObject::SetText(const ByteString& str) {
229 SetSegments(&str, std::vector<float>(), 1);
230 RecalcPositionData();
231 SetDirty(true);
232 }
233
GetCharWidth(uint32_t charcode) const234 float CPDF_TextObject::GetCharWidth(uint32_t charcode) const {
235 float fontsize = GetFontSize() / 1000;
236 RetainPtr<CPDF_Font> pFont = GetFont();
237 bool bVertWriting = false;
238 CPDF_CIDFont* pCIDFont = pFont->AsCIDFont();
239 if (pCIDFont)
240 bVertWriting = pCIDFont->IsVertWriting();
241 if (!bVertWriting)
242 return pFont->GetCharWidthF(charcode) * fontsize;
243
244 uint16_t CID = pCIDFont->CIDFromCharCode(charcode);
245 return pCIDFont->GetVertWidth(CID) * fontsize;
246 }
247
GetFont() const248 RetainPtr<CPDF_Font> CPDF_TextObject::GetFont() const {
249 return m_TextState.GetFont();
250 }
251
GetFontSize() const252 float CPDF_TextObject::GetFontSize() const {
253 return m_TextState.GetFontSize();
254 }
255
GetTextRenderMode() const256 TextRenderingMode CPDF_TextObject::GetTextRenderMode() const {
257 return m_TextState.GetTextMode();
258 }
259
CalcPositionData(float horz_scale)260 CFX_PointF CPDF_TextObject::CalcPositionData(float horz_scale) {
261 float curpos = 0;
262 float min_x = 10000 * 1.0f;
263 float max_x = -10000 * 1.0f;
264 float min_y = 10000 * 1.0f;
265 float max_y = -10000 * 1.0f;
266 RetainPtr<CPDF_Font> pFont = GetFont();
267 bool bVertWriting = false;
268 CPDF_CIDFont* pCIDFont = pFont->AsCIDFont();
269 if (pCIDFont)
270 bVertWriting = pCIDFont->IsVertWriting();
271
272 float fontsize = GetFontSize();
273 for (size_t i = 0; i < m_CharCodes.size(); ++i) {
274 uint32_t charcode = m_CharCodes[i];
275 if (i > 0) {
276 if (charcode == CPDF_Font::kInvalidCharCode) {
277 curpos -= (m_CharPos[i - 1] * fontsize) / 1000;
278 continue;
279 }
280 m_CharPos[i - 1] = curpos;
281 }
282
283 FX_RECT char_rect = pFont->GetCharBBox(charcode);
284 float charwidth;
285 if (!bVertWriting) {
286 min_y = std::min(
287 min_y, static_cast<float>(std::min(char_rect.top, char_rect.bottom)));
288 max_y = std::max(
289 max_y, static_cast<float>(std::max(char_rect.top, char_rect.bottom)));
290 float char_left = curpos + char_rect.left * fontsize / 1000;
291 float char_right = curpos + char_rect.right * fontsize / 1000;
292 min_x = std::min(min_x, std::min(char_left, char_right));
293 max_x = std::max(max_x, std::max(char_left, char_right));
294 charwidth = pFont->GetCharWidthF(charcode) * fontsize / 1000;
295 } else {
296 uint16_t CID = pCIDFont->CIDFromCharCode(charcode);
297 short vx;
298 short vy;
299 pCIDFont->GetVertOrigin(CID, vx, vy);
300 char_rect.left -= vx;
301 char_rect.right -= vx;
302 char_rect.top -= vy;
303 char_rect.bottom -= vy;
304 min_x = std::min(
305 min_x, static_cast<float>(std::min(char_rect.left, char_rect.right)));
306 max_x = std::max(
307 max_x, static_cast<float>(std::max(char_rect.left, char_rect.right)));
308 float char_top = curpos + char_rect.top * fontsize / 1000;
309 float char_bottom = curpos + char_rect.bottom * fontsize / 1000;
310 min_y = std::min(min_y, std::min(char_top, char_bottom));
311 max_y = std::max(max_y, std::max(char_top, char_bottom));
312 charwidth = pCIDFont->GetVertWidth(CID) * fontsize / 1000;
313 }
314 curpos += charwidth;
315 if (charcode == ' ' && (!pCIDFont || pCIDFont->GetCharSize(' ') == 1))
316 curpos += m_TextState.GetWordSpace();
317
318 curpos += m_TextState.GetCharSpace();
319 }
320
321 CFX_PointF ret;
322 if (bVertWriting) {
323 ret.y = curpos;
324 min_x = min_x * fontsize / 1000;
325 max_x = max_x * fontsize / 1000;
326 } else {
327 ret.x = curpos * horz_scale;
328 min_y = min_y * fontsize / 1000;
329 max_y = max_y * fontsize / 1000;
330 }
331 SetRect(
332 GetTextMatrix().TransformRect(CFX_FloatRect(min_x, min_y, max_x, max_y)));
333
334 if (!TextRenderingModeIsStrokeMode(m_TextState.GetTextMode()))
335 return ret;
336
337 float half_width = m_GraphState.GetLineWidth() / 2;
338 m_Rect.left -= half_width;
339 m_Rect.right += half_width;
340 m_Rect.top += half_width;
341 m_Rect.bottom -= half_width;
342
343 return ret;
344 }
345
RecalcPositionData()346 void CPDF_TextObject::RecalcPositionData() {
347 CalcPositionData(1);
348 }
349