1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfdoc/ctypeset.h"
8
9 #include <algorithm>
10
11 #include "core/fpdfdoc/cline.h"
12 #include "core/fpdfdoc/cpdf_variabletext.h"
13 #include "core/fpdfdoc/cpvt_wordinfo.h"
14 #include "core/fpdfdoc/csection.h"
15 #include "third_party/base/stl_util.h"
16
17 namespace {
18
19 const uint8_t special_chars[128] = {
20 0x00, 0x0C, 0x08, 0x0C, 0x08, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
21 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
22 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x00,
23 0x10, 0x00, 0x00, 0x28, 0x0C, 0x08, 0x00, 0x00, 0x28, 0x28, 0x28, 0x28,
24 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x08, 0x08,
25 0x00, 0x00, 0x00, 0x08, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
26 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
27 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00,
28 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
29 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
30 0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00,
31 };
32
IsLatin(uint16_t word)33 bool IsLatin(uint16_t word) {
34 if (word <= 0x007F)
35 return !!(special_chars[word] & 0x01);
36
37 return ((word >= 0x00C0 && word <= 0x00FF) ||
38 (word >= 0x0100 && word <= 0x024F) ||
39 (word >= 0x1E00 && word <= 0x1EFF) ||
40 (word >= 0x2C60 && word <= 0x2C7F) ||
41 (word >= 0xA720 && word <= 0xA7FF) ||
42 (word >= 0xFF21 && word <= 0xFF3A) ||
43 (word >= 0xFF41 && word <= 0xFF5A));
44 }
45
IsDigit(uint32_t word)46 bool IsDigit(uint32_t word) {
47 return word >= 0x0030 && word <= 0x0039;
48 }
49
IsCJK(uint32_t word)50 bool IsCJK(uint32_t word) {
51 if ((word >= 0x1100 && word <= 0x11FF) ||
52 (word >= 0x2E80 && word <= 0x2FFF) ||
53 (word >= 0x3040 && word <= 0x9FBF) ||
54 (word >= 0xAC00 && word <= 0xD7AF) ||
55 (word >= 0xF900 && word <= 0xFAFF) ||
56 (word >= 0xFE30 && word <= 0xFE4F) ||
57 (word >= 0x20000 && word <= 0x2A6DF) ||
58 (word >= 0x2F800 && word <= 0x2FA1F)) {
59 return true;
60 }
61 if (word >= 0x3000 && word <= 0x303F) {
62 return (
63 word == 0x3005 || word == 0x3006 || word == 0x3021 || word == 0x3022 ||
64 word == 0x3023 || word == 0x3024 || word == 0x3025 || word == 0x3026 ||
65 word == 0x3027 || word == 0x3028 || word == 0x3029 || word == 0x3031 ||
66 word == 0x3032 || word == 0x3033 || word == 0x3034 || word == 0x3035);
67 }
68 return word >= 0xFF66 && word <= 0xFF9D;
69 }
70
IsPunctuation(uint32_t word)71 bool IsPunctuation(uint32_t word) {
72 if (word <= 0x007F)
73 return !!(special_chars[word] & 0x08);
74
75 if (word >= 0x0080 && word <= 0x00FF) {
76 return (word == 0x0082 || word == 0x0084 || word == 0x0085 ||
77 word == 0x0091 || word == 0x0092 || word == 0x0093 ||
78 word <= 0x0094 || word == 0x0096 || word == 0x00B4 ||
79 word == 0x00B8);
80 }
81
82 if (word >= 0x2000 && word <= 0x206F) {
83 return (
84 word == 0x2010 || word == 0x2011 || word == 0x2012 || word == 0x2013 ||
85 word == 0x2018 || word == 0x2019 || word == 0x201A || word == 0x201B ||
86 word == 0x201C || word == 0x201D || word == 0x201E || word == 0x201F ||
87 word == 0x2032 || word == 0x2033 || word == 0x2034 || word == 0x2035 ||
88 word == 0x2036 || word == 0x2037 || word == 0x203C || word == 0x203D ||
89 word == 0x203E || word == 0x2044);
90 }
91
92 if (word >= 0x3000 && word <= 0x303F) {
93 return (
94 word == 0x3001 || word == 0x3002 || word == 0x3003 || word == 0x3005 ||
95 word == 0x3009 || word == 0x300A || word == 0x300B || word == 0x300C ||
96 word == 0x300D || word == 0x300F || word == 0x300E || word == 0x3010 ||
97 word == 0x3011 || word == 0x3014 || word == 0x3015 || word == 0x3016 ||
98 word == 0x3017 || word == 0x3018 || word == 0x3019 || word == 0x301A ||
99 word == 0x301B || word == 0x301D || word == 0x301E || word == 0x301F);
100 }
101
102 if (word >= 0xFE50 && word <= 0xFE6F)
103 return (word >= 0xFE50 && word <= 0xFE5E) || word == 0xFE63;
104
105 if (word >= 0xFF00 && word <= 0xFFEF) {
106 return (
107 word == 0xFF01 || word == 0xFF02 || word == 0xFF07 || word == 0xFF08 ||
108 word == 0xFF09 || word == 0xFF0C || word == 0xFF0E || word == 0xFF0F ||
109 word == 0xFF1A || word == 0xFF1B || word == 0xFF1F || word == 0xFF3B ||
110 word == 0xFF3D || word == 0xFF40 || word == 0xFF5B || word == 0xFF5C ||
111 word == 0xFF5D || word == 0xFF61 || word == 0xFF62 || word == 0xFF63 ||
112 word == 0xFF64 || word == 0xFF65 || word == 0xFF9E || word == 0xFF9F);
113 }
114
115 return false;
116 }
117
IsConnectiveSymbol(uint32_t word)118 bool IsConnectiveSymbol(uint32_t word) {
119 return word <= 0x007F && (special_chars[word] & 0x20);
120 }
121
IsOpenStylePunctuation(uint32_t word)122 bool IsOpenStylePunctuation(uint32_t word) {
123 if (word <= 0x007F)
124 return !!(special_chars[word] & 0x04);
125
126 return (word == 0x300A || word == 0x300C || word == 0x300E ||
127 word == 0x3010 || word == 0x3014 || word == 0x3016 ||
128 word == 0x3018 || word == 0x301A || word == 0xFF08 ||
129 word == 0xFF3B || word == 0xFF5B || word == 0xFF62);
130 }
131
IsCurrencySymbol(uint16_t word)132 bool IsCurrencySymbol(uint16_t word) {
133 return (word == 0x0024 || word == 0x0080 || word == 0x00A2 ||
134 word == 0x00A3 || word == 0x00A4 || word == 0x00A5 ||
135 (word >= 0x20A0 && word <= 0x20CF) || word == 0xFE69 ||
136 word == 0xFF04 || word == 0xFFE0 || word == 0xFFE1 ||
137 word == 0xFFE5 || word == 0xFFE6);
138 }
139
IsPrefixSymbol(uint16_t word)140 bool IsPrefixSymbol(uint16_t word) {
141 return IsCurrencySymbol(word) || word == 0x2116;
142 }
143
IsSpace(uint16_t word)144 bool IsSpace(uint16_t word) {
145 return word == 0x0020 || word == 0x3000;
146 }
147
NeedDivision(uint16_t prevWord,uint16_t curWord)148 bool NeedDivision(uint16_t prevWord, uint16_t curWord) {
149 if ((IsLatin(prevWord) || IsDigit(prevWord)) &&
150 (IsLatin(curWord) || IsDigit(curWord))) {
151 return false;
152 }
153 if (IsSpace(curWord) || IsPunctuation(curWord)) {
154 return false;
155 }
156 if (IsConnectiveSymbol(prevWord) || IsConnectiveSymbol(curWord)) {
157 return false;
158 }
159 if (IsSpace(prevWord) || IsPunctuation(prevWord)) {
160 return true;
161 }
162 if (IsPrefixSymbol(prevWord)) {
163 return false;
164 }
165 if (IsPrefixSymbol(curWord) || IsCJK(curWord)) {
166 return true;
167 }
168 if (IsCJK(prevWord)) {
169 return true;
170 }
171 return false;
172 }
173
174 } // namespace
175
CTypeset(CSection * pSection)176 CTypeset::CTypeset(CSection* pSection)
177 : m_rcRet(0.0f, 0.0f, 0.0f, 0.0f),
178 m_pVT(pSection->m_pVT),
179 m_pSection(pSection) {}
180
~CTypeset()181 CTypeset::~CTypeset() {}
182
CharArray()183 CPVT_FloatRect CTypeset::CharArray() {
184 m_rcRet = CPVT_FloatRect(0, 0, 0, 0);
185 if (m_pSection->m_LineArray.empty())
186 return m_rcRet;
187
188 float fNodeWidth = m_pVT->GetPlateWidth() /
189 (m_pVT->GetCharArray() <= 0 ? 1 : m_pVT->GetCharArray());
190 float fLineAscent =
191 m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize());
192 float fLineDescent =
193 m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize());
194 float x = 0.0f;
195 float y = m_pVT->GetLineLeading() + fLineAscent;
196 int32_t nStart = 0;
197 CLine* pLine = m_pSection->m_LineArray.front().get();
198 switch (m_pVT->GetAlignment()) {
199 case 0:
200 pLine->m_LineInfo.fLineX = fNodeWidth * VARIABLETEXT_HALF;
201 break;
202 case 1:
203 nStart = (m_pVT->GetCharArray() -
204 pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray)) /
205 2;
206 pLine->m_LineInfo.fLineX =
207 fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF;
208 break;
209 case 2:
210 nStart = m_pVT->GetCharArray() -
211 pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray);
212 pLine->m_LineInfo.fLineX =
213 fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF;
214 break;
215 }
216 for (int32_t w = 0,
217 sz = pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray);
218 w < sz; w++) {
219 if (w >= m_pVT->GetCharArray())
220 break;
221
222 float fNextWidth = 0;
223 if (pdfium::IndexInBounds(m_pSection->m_WordArray, w + 1)) {
224 CPVT_WordInfo* pNextWord = m_pSection->m_WordArray[w + 1].get();
225 pNextWord->fWordTail = 0;
226 fNextWidth = m_pVT->GetWordWidth(*pNextWord);
227 }
228 CPVT_WordInfo* pWord = m_pSection->m_WordArray[w].get();
229 pWord->fWordTail = 0;
230 float fWordWidth = m_pVT->GetWordWidth(*pWord);
231 float fWordAscent = m_pVT->GetWordAscent(*pWord);
232 float fWordDescent = m_pVT->GetWordDescent(*pWord);
233 x = (float)(fNodeWidth * (w + nStart + 0.5) -
234 fWordWidth * VARIABLETEXT_HALF);
235 pWord->fWordX = x;
236 pWord->fWordY = y;
237 if (w == 0) {
238 pLine->m_LineInfo.fLineX = x;
239 }
240 if (w != pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray) - 1) {
241 pWord->fWordTail =
242 (fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF > 0
243 ? fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF
244 : 0);
245 } else {
246 pWord->fWordTail = 0;
247 }
248 x += fWordWidth;
249 fLineAscent = std::max(fLineAscent, fWordAscent);
250 fLineDescent = std::min(fLineDescent, fWordDescent);
251 }
252 pLine->m_LineInfo.nBeginWordIndex = 0;
253 pLine->m_LineInfo.nEndWordIndex =
254 pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray) - 1;
255 pLine->m_LineInfo.fLineY = y;
256 pLine->m_LineInfo.fLineWidth = x - pLine->m_LineInfo.fLineX;
257 pLine->m_LineInfo.fLineAscent = fLineAscent;
258 pLine->m_LineInfo.fLineDescent = fLineDescent;
259 m_rcRet = CPVT_FloatRect(0, 0, x, y - fLineDescent);
260 return m_rcRet;
261 }
262
GetEditSize(float fFontSize)263 CFX_SizeF CTypeset::GetEditSize(float fFontSize) {
264 ASSERT(m_pSection);
265 ASSERT(m_pVT);
266 SplitLines(false, fFontSize);
267 return CFX_SizeF(m_rcRet.Width(), m_rcRet.Height());
268 }
269
Typeset()270 CPVT_FloatRect CTypeset::Typeset() {
271 ASSERT(m_pVT);
272 m_pSection->m_LineArray.clear();
273 SplitLines(true, 0.0f);
274 OutputLines();
275 return m_rcRet;
276 }
277
SplitLines(bool bTypeset,float fFontSize)278 void CTypeset::SplitLines(bool bTypeset, float fFontSize) {
279 ASSERT(m_pVT);
280 ASSERT(m_pSection);
281 int32_t nLineHead = 0;
282 int32_t nLineTail = 0;
283 float fMaxX = 0.0f, fMaxY = 0.0f;
284 float fLineWidth = 0.0f, fBackupLineWidth = 0.0f;
285 float fLineAscent = 0.0f, fBackupLineAscent = 0.0f;
286 float fLineDescent = 0.0f, fBackupLineDescent = 0.0f;
287 int32_t nWordStartPos = 0;
288 bool bFullWord = false;
289 int32_t nLineFullWordIndex = 0;
290 int32_t nCharIndex = 0;
291 CPVT_LineInfo line;
292 float fWordWidth = 0;
293 float fTypesetWidth =
294 std::max(m_pVT->GetPlateWidth() - m_pVT->GetLineIndent(), 0.0f);
295 int32_t nTotalWords =
296 pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray);
297 bool bOpened = false;
298 if (nTotalWords > 0) {
299 int32_t i = 0;
300 while (i < nTotalWords) {
301 CPVT_WordInfo* pWord = m_pSection->m_WordArray[i].get();
302 CPVT_WordInfo* pOldWord = pWord;
303 if (i > 0) {
304 pOldWord = m_pSection->m_WordArray[i - 1].get();
305 }
306 if (pWord) {
307 if (bTypeset) {
308 fLineAscent = std::max(fLineAscent, m_pVT->GetWordAscent(*pWord));
309 fLineDescent = std::min(fLineDescent, m_pVT->GetWordDescent(*pWord));
310 fWordWidth = m_pVT->GetWordWidth(*pWord);
311 } else {
312 fLineAscent =
313 std::max(fLineAscent, m_pVT->GetWordAscent(*pWord, fFontSize));
314 fLineDescent =
315 std::min(fLineDescent, m_pVT->GetWordDescent(*pWord, fFontSize));
316 fWordWidth = m_pVT->GetWordWidth(
317 pWord->nFontIndex, pWord->Word, m_pVT->GetSubWord(),
318 m_pVT->GetCharSpace(), m_pVT->GetHorzScale(), fFontSize,
319 pWord->fWordTail);
320 }
321 if (!bOpened) {
322 if (IsOpenStylePunctuation(pWord->Word)) {
323 bOpened = true;
324 bFullWord = true;
325 } else if (pOldWord) {
326 if (NeedDivision(pOldWord->Word, pWord->Word)) {
327 bFullWord = true;
328 }
329 }
330 } else {
331 if (!IsSpace(pWord->Word) && !IsOpenStylePunctuation(pWord->Word)) {
332 bOpened = false;
333 }
334 }
335 if (bFullWord) {
336 bFullWord = false;
337 if (nCharIndex > 0) {
338 nLineFullWordIndex++;
339 }
340 nWordStartPos = i;
341 fBackupLineWidth = fLineWidth;
342 fBackupLineAscent = fLineAscent;
343 fBackupLineDescent = fLineDescent;
344 }
345 nCharIndex++;
346 }
347 if (m_pVT->IsAutoReturn() && fTypesetWidth > 0 &&
348 fLineWidth + fWordWidth > fTypesetWidth) {
349 if (nLineFullWordIndex > 0) {
350 i = nWordStartPos;
351 fLineWidth = fBackupLineWidth;
352 fLineAscent = fBackupLineAscent;
353 fLineDescent = fBackupLineDescent;
354 }
355 if (nCharIndex == 1) {
356 fLineWidth = fWordWidth;
357 i++;
358 }
359 nLineTail = i - 1;
360 if (bTypeset) {
361 line.nBeginWordIndex = nLineHead;
362 line.nEndWordIndex = nLineTail;
363 line.nTotalWord = nLineTail - nLineHead + 1;
364 line.fLineWidth = fLineWidth;
365 line.fLineAscent = fLineAscent;
366 line.fLineDescent = fLineDescent;
367 m_pSection->AddLine(line);
368 }
369 fMaxY += (fLineAscent + m_pVT->GetLineLeading());
370 fMaxY -= fLineDescent;
371 fMaxX = std::max(fLineWidth, fMaxX);
372 nLineHead = i;
373 fLineWidth = 0.0f;
374 fLineAscent = 0.0f;
375 fLineDescent = 0.0f;
376 nCharIndex = 0;
377 nLineFullWordIndex = 0;
378 bFullWord = false;
379 } else {
380 fLineWidth += fWordWidth;
381 i++;
382 }
383 }
384 if (nLineHead <= nTotalWords - 1) {
385 nLineTail = nTotalWords - 1;
386 if (bTypeset) {
387 line.nBeginWordIndex = nLineHead;
388 line.nEndWordIndex = nLineTail;
389 line.nTotalWord = nLineTail - nLineHead + 1;
390 line.fLineWidth = fLineWidth;
391 line.fLineAscent = fLineAscent;
392 line.fLineDescent = fLineDescent;
393 m_pSection->AddLine(line);
394 }
395 fMaxY += (fLineAscent + m_pVT->GetLineLeading());
396 fMaxY -= fLineDescent;
397 fMaxX = std::max(fLineWidth, fMaxX);
398 }
399 } else {
400 if (bTypeset) {
401 fLineAscent = m_pVT->GetLineAscent();
402 fLineDescent = m_pVT->GetLineDescent();
403 } else {
404 fLineAscent =
405 m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), fFontSize);
406 fLineDescent =
407 m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), fFontSize);
408 }
409 if (bTypeset) {
410 line.nBeginWordIndex = -1;
411 line.nEndWordIndex = -1;
412 line.nTotalWord = 0;
413 line.fLineWidth = 0;
414 line.fLineAscent = fLineAscent;
415 line.fLineDescent = fLineDescent;
416 m_pSection->AddLine(line);
417 }
418 fMaxY += m_pVT->GetLineLeading() + fLineAscent - fLineDescent;
419 }
420 m_rcRet = CPVT_FloatRect(0, 0, fMaxX, fMaxY);
421 }
422
OutputLines()423 void CTypeset::OutputLines() {
424 ASSERT(m_pVT);
425 ASSERT(m_pSection);
426 float fMinX = 0.0f, fMinY = 0.0f, fMaxX = 0.0f, fMaxY = 0.0f;
427 float fPosX = 0.0f, fPosY = 0.0f;
428 float fLineIndent = m_pVT->GetLineIndent();
429 float fTypesetWidth = std::max(m_pVT->GetPlateWidth() - fLineIndent, 0.0f);
430 switch (m_pVT->GetAlignment()) {
431 default:
432 case 0:
433 fMinX = 0.0f;
434 break;
435 case 1:
436 fMinX = (fTypesetWidth - m_rcRet.Width()) * VARIABLETEXT_HALF;
437 break;
438 case 2:
439 fMinX = fTypesetWidth - m_rcRet.Width();
440 break;
441 }
442 fMaxX = fMinX + m_rcRet.Width();
443 fMinY = 0.0f;
444 fMaxY = m_rcRet.Height();
445 int32_t nTotalLines =
446 pdfium::CollectionSize<int32_t>(m_pSection->m_LineArray);
447 if (nTotalLines > 0) {
448 for (int32_t l = 0; l < nTotalLines; l++) {
449 CLine* pLine = m_pSection->m_LineArray[l].get();
450 switch (m_pVT->GetAlignment()) {
451 default:
452 case 0:
453 fPosX = 0;
454 break;
455 case 1:
456 fPosX = (fTypesetWidth - pLine->m_LineInfo.fLineWidth) *
457 VARIABLETEXT_HALF;
458 break;
459 case 2:
460 fPosX = fTypesetWidth - pLine->m_LineInfo.fLineWidth;
461 break;
462 }
463 fPosX += fLineIndent;
464 fPosY += m_pVT->GetLineLeading();
465 fPosY += pLine->m_LineInfo.fLineAscent;
466 pLine->m_LineInfo.fLineX = fPosX - fMinX;
467 pLine->m_LineInfo.fLineY = fPosY - fMinY;
468 for (int32_t w = pLine->m_LineInfo.nBeginWordIndex;
469 w <= pLine->m_LineInfo.nEndWordIndex; w++) {
470 if (pdfium::IndexInBounds(m_pSection->m_WordArray, w)) {
471 CPVT_WordInfo* pWord = m_pSection->m_WordArray[w].get();
472 pWord->fWordX = fPosX - fMinX;
473 pWord->fWordY = fPosY - fMinY;
474
475 fPosX += m_pVT->GetWordWidth(*pWord);
476 }
477 }
478 fPosY -= pLine->m_LineInfo.fLineDescent;
479 }
480 }
481 m_rcRet = CPVT_FloatRect(fMinX, fMinY, fMaxX, fMaxY);
482 }
483