1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include <algorithm>
8
9 #include "reflowedtextpage.h"
10
CreateReflowTextPage(IPDF_ReflowedPage * pRefPage)11 IPDF_TextPage* IPDF_TextPage::CreateReflowTextPage(IPDF_ReflowedPage* pRefPage)
12 {
13 return new CRF_TextPage(pRefPage);
14 }
CRF_TextPage(IPDF_ReflowedPage * pRefPage)15 CRF_TextPage::CRF_TextPage(IPDF_ReflowedPage* pRefPage)
16 {
17 m_pRefPage = (CPDF_ReflowedPage*)(pRefPage);
18 m_pDataList = NULL;
19 m_CountBSArray = NULL;
20 }
~CRF_TextPage()21 CRF_TextPage::~CRF_TextPage()
22 {
23 if(m_pDataList) {
24 delete m_pDataList;
25 m_pDataList = NULL;
26 }
27 if(m_CountBSArray) {
28 delete m_CountBSArray;
29 m_CountBSArray = NULL;
30 }
31 }
ParseTextPage()32 FX_BOOL CRF_TextPage::ParseTextPage()
33 {
34 if(!m_pRefPage) {
35 return FALSE;
36 }
37 int count = m_pRefPage->m_pReflowed->GetSize();
38 m_pDataList = new CRF_CharDataPtrArray(std::min(count, 500));
39 for(int i = 0; i < count; i++) {
40 CRF_Data* pData = (*(m_pRefPage->m_pReflowed))[i];
41 if(pData->GetType() == CRF_Data::Text) {
42 m_pDataList->Add((CRF_CharData*)pData);
43 }
44 }
45 m_CountBSArray = new CFX_CountBSINT32Array(20);
46 return TRUE;
47 }
IsParsered() const48 FX_BOOL CRF_TextPage::IsParsered() const
49 {
50 if(m_pDataList) {
51 return TRUE;
52 }
53 return FALSE;
54 }
CharIndexFromTextIndex(int TextIndex) const55 int CRF_TextPage::CharIndexFromTextIndex(int TextIndex) const
56 {
57 return TextIndex;
58 }
TextIndexFromCharIndex(int CharIndex) const59 int CRF_TextPage::TextIndexFromCharIndex(int CharIndex) const
60 {
61 return CharIndex;
62 }
63
CountChars() const64 int CRF_TextPage::CountChars() const
65 {
66 if (NULL == m_pDataList) {
67 return -1;
68 }
69 return m_pDataList->GetSize();
70 }
GetCharInfo(int index,FPDF_CHAR_INFO & info) const71 void CRF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO & info) const
72 {
73 if(index >= CountChars() || index < 0 || !m_pDataList) {
74 return;
75 }
76 CRF_CharData* pData = (*m_pDataList)[index];
77 FX_FLOAT ReltiveCorddDs = pData->m_pCharState->m_fDescent;
78 FX_FLOAT ReltiveCorddAs = pData->m_pCharState->m_fAscent;
79 info.m_Flag = CHAR_NORMAL;
80 info.m_pTextObj = pData->m_pCharState->m_pTextObj;
81 info.m_OriginX = pData->m_PosX;
82 info.m_OriginY = pData->m_PosY - ReltiveCorddDs;
83 info.m_FontSize = pData->m_pCharState->m_fFontSize;
84 CFX_FloatRect FloatRectTmp(pData->m_PosX, pData->m_PosY, pData->m_PosX + pData->m_Width, pData->m_PosY + ReltiveCorddAs - ReltiveCorddDs);
85 info.m_CharBox = FloatRectTmp;
86 CFX_WideString str = pData->m_pCharState->m_pFont->UnicodeFromCharCode(pData->m_CharCode);
87 if(!str.IsEmpty()) {
88 info.m_Unicode = str.GetAt(0);
89 } else {
90 info.m_Unicode = -1;
91 }
92 info.m_Charcode = (FX_WCHAR)pData->m_CharCode;
93 info.m_Matrix = CFX_Matrix(1, 0, 0, 1, 0, 0);
94 }
95 extern FX_BOOL GetIntersection(FX_FLOAT low1, FX_FLOAT high1, FX_FLOAT low2, FX_FLOAT high2, FX_FLOAT& interlow, FX_FLOAT& interhigh);
_IsInsameline(const CFX_FloatRect & rectA,const CFX_FloatRect & rectB)96 inline FX_BOOL _IsInsameline(const CFX_FloatRect& rectA, const CFX_FloatRect& rectB)
97 {
98 if((rectA.top >= rectB.bottom && rectB.top >= rectA.bottom)) {
99 return TRUE;
100 } else {
101 return FALSE;
102 }
103 }
_IsIntersect(const CFX_FloatRect & rectA,const CFX_FloatRect & rectB)104 inline FX_BOOL _IsIntersect(const CFX_FloatRect& rectA, const CFX_FloatRect& rectB)
105 {
106 FX_FLOAT interlow = .0f, interhigh = .0f;
107 if(GetIntersection(rectA.bottom, rectA.top, rectB.bottom, rectB.top, interlow, interhigh)) {
108 if(GetIntersection(rectA.left, rectA.right, rectB.left, rectB.right, interlow, interhigh)) {
109 return TRUE;
110 } else {
111 return FALSE;
112 }
113 }
114 return FALSE;
115 }
GetRectArray(int start,int nCount,CFX_RectArray & rectArray) const116 void CRF_TextPage::GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const
117 {
118 int indexlen = start + nCount;
119 FPDF_CHAR_INFO info;
120 FX_BOOL bstart = TRUE;
121 CFX_FloatRect recttmp;
122 int i;
123 for(i = start; i < indexlen; i++) {
124 GetCharInfo(i, info);
125 if(bstart) {
126 recttmp = info.m_CharBox;
127 bstart = FALSE;
128 } else if(_IsInsameline(recttmp, info.m_CharBox)) {
129 recttmp.right = info.m_CharBox.right;
130 if(info.m_CharBox.top > recttmp.top) {
131 recttmp.top = info.m_CharBox.top;
132 }
133 if(info.m_CharBox.bottom < recttmp.bottom) {
134 recttmp.bottom = info.m_CharBox.bottom;
135 }
136 } else {
137 rectArray.Add(recttmp);
138 recttmp = info.m_CharBox;
139 }
140 }
141 rectArray.Add(recttmp);
142 }
_GetDistance(CFX_FloatRect floatRect,CPDF_Point point)143 inline FX_FLOAT _GetDistance(CFX_FloatRect floatRect, CPDF_Point point)
144 {
145 if(floatRect.right < point.x && floatRect.bottom > point.y) {
146 return FXSYS_sqrt(FXSYS_pow(point.x - floatRect.right, 2) + FXSYS_pow(floatRect.bottom - point.y, 2));
147 }
148 if (floatRect.right < point.x && floatRect.top < point.y) {
149 return FXSYS_sqrt(FXSYS_pow(point.x - floatRect.right, 2) + FXSYS_pow(point.y - floatRect.top, 2));
150 }
151 if(floatRect.left > point.x && floatRect.bottom > point.y) {
152 return FXSYS_sqrt(FXSYS_pow(floatRect.bottom - point.y, 2) + FXSYS_pow(floatRect.left - point.x, 2));
153 }
154 if((floatRect.right > point.x || FXSYS_fabs(floatRect.right - point.x) <= 0.0001f) &&
155 (floatRect.left < point.x || FXSYS_fabs(floatRect.left - point.x) <= 0.0001f) && floatRect.bottom > point.y) {
156 return FXSYS_fabs(floatRect.bottom - point.y);
157 }
158 if(floatRect.left > point.x && (floatRect.bottom < point.y || FXSYS_fabs(floatRect.bottom - point.y) <= 0.0001f) &&
159 (floatRect.top > point.y || FXSYS_fabs(floatRect.top - point.y) <= 0.0001f)) {
160 return FXSYS_fabs(floatRect.left - point.x);
161 }
162 if(floatRect.left > point.x && floatRect.top < point.y) {
163 return FXSYS_sqrt(FXSYS_pow(floatRect.left - point.x, 2) + FXSYS_pow(point.y - floatRect.top, 2));
164 }
165 if ((floatRect.left < point.x || FXSYS_fabs(floatRect.left - point.x) <= 0.0001f) &&
166 (floatRect.right > point.x || FXSYS_fabs(floatRect.right - point.x) <= 0.0001f) && floatRect.top < point.y) {
167 return FXSYS_fabs(point.y - floatRect.top);
168 }
169 if(floatRect.right < point.x && (floatRect.top > point.y || FXSYS_fabs(floatRect.top - point.y) <= 0.0001f) &&
170 (floatRect.bottom < point.y || FXSYS_fabs(floatRect.bottom - point.y) <= 0.0001f)) {
171 return point.x - floatRect.right;
172 }
173 return .0f;
174 }
GetIndexAtPos(CPDF_Point point,FX_FLOAT xTorelance,FX_FLOAT yTorelance) const175 int CRF_TextPage::GetIndexAtPos(CPDF_Point point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const
176 {
177 int index = -1, i = 0, j = 0;
178 FPDF_CHAR_INFO info;
179 CFX_FloatRect rectTmp;
180 FX_FLOAT MinDistance = 1000, DistanceTmp = 0;
181 FX_FLOAT rect_bottom = point.x - xTorelance;
182 CFX_FloatRect TorelanceRect(rect_bottom <= 0 ? 0 : rect_bottom, point.y - yTorelance, point.x + xTorelance, point.y + yTorelance);
183 int count = CountChars();
184 for(i = 0; i < count; i++) {
185 GetCharInfo(i, info);
186 rectTmp = info.m_CharBox;
187 if(rectTmp.Contains(point.x, point.y)) {
188 index = i;
189 break;
190 } else if(_IsIntersect(rectTmp, TorelanceRect)) {
191 DistanceTmp = _GetDistance(rectTmp, point);
192 if(DistanceTmp < MinDistance) {
193 MinDistance = DistanceTmp;
194 index = i;
195 }
196 }
197 }
198 return index;
199 }
GetIndexAtPos(FX_FLOAT x,FX_FLOAT y,FX_FLOAT xTorelance,FX_FLOAT yTorelance) const200 int CRF_TextPage::GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const
201 {
202 int index = 0;
203 CPDF_Point point(x, y);
204 if((index = GetIndexAtPos(point, xTorelance, yTorelance)) < 0) {
205 return -1;
206 } else {
207 return index;
208 }
209 }
GetOrderByDirection(int index,int direction) const210 int CRF_TextPage::GetOrderByDirection(int index, int direction) const
211 {
212 return -1;
213 }
GetTextByRect(CFX_FloatRect rect) const214 CFX_WideString CRF_TextPage::GetTextByRect(CFX_FloatRect rect) const
215 {
216 int count;
217 FPDF_CHAR_INFO info;
218 CFX_WideString str;
219 CFX_FloatRect Recttmp;
220 FX_BOOL bstart = TRUE;
221 count = CountChars();
222 if(rect.IsEmpty()) {
223 return L"";
224 }
225 for(int i = 0; i < count; i++) {
226 GetCharInfo(i, info);
227 if(_IsIntersect(rect, info.m_CharBox)) {
228 if(bstart) {
229 Recttmp = info.m_CharBox;
230 str += info.m_Unicode;
231 bstart = FALSE;
232 } else if(_IsInsameline(Recttmp, info.m_CharBox)) {
233 str += info.m_Unicode;
234 } else {
235 str += L"\r\n";
236 Recttmp = info.m_CharBox;
237 str += info.m_Unicode;
238 }
239 }
240 }
241 if(str.IsEmpty()) {
242 return L"";
243 } else {
244 return str;
245 }
246 }
GetRectsArrayByRect(CFX_FloatRect rect,CFX_RectArray & resRectArray) const247 void CRF_TextPage::GetRectsArrayByRect(CFX_FloatRect rect, CFX_RectArray& resRectArray) const
248 {
249 int count, i;
250 FX_BOOL bstart = TRUE;
251 FPDF_CHAR_INFO info;
252 CFX_FloatRect recttmp;
253 count = CountChars();
254 for(i = 0; i < count; i++) {
255 GetCharInfo(i, info);
256 if(_IsIntersect(rect, info.m_CharBox)) {
257 if(bstart) {
258 recttmp = info.m_CharBox;
259 bstart = FALSE;
260 } else if(_IsInsameline(recttmp, info.m_CharBox)) {
261 recttmp.right = info.m_CharBox.right;
262 if(info.m_CharBox.top > recttmp.top) {
263 recttmp.top = info.m_CharBox.top;
264 }
265 if(info.m_CharBox.bottom < recttmp.bottom) {
266 recttmp.bottom = info.m_CharBox.bottom;
267 }
268 } else {
269 resRectArray.Add(recttmp);
270 recttmp = info.m_CharBox;
271 }
272 }
273 }
274 resRectArray.Add(recttmp);
275 }
CountRects(int start,int nCount)276 int CRF_TextPage::CountRects(int start, int nCount)
277 {
278 m_rectArray.RemoveAll();
279 GetRectArray(start, nCount, m_rectArray);
280 return m_rectArray.GetSize();
281 }
GetRect(int rectIndex,FX_FLOAT & left,FX_FLOAT & top,FX_FLOAT & right,FX_FLOAT & bottom) const282 void CRF_TextPage::GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top, FX_FLOAT& right, FX_FLOAT &bottom) const
283 {
284 if(m_rectArray.GetSize() <= rectIndex) {
285 return;
286 }
287 left = m_rectArray[rectIndex].left;
288 top = m_rectArray[rectIndex].top;
289 right = m_rectArray[rectIndex].right;
290 bottom = m_rectArray[rectIndex].bottom;
291 }
GetBaselineRotate(int rectIndex,int & Rotate)292 FX_BOOL CRF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate)
293 {
294 Rotate = 0;
295 return TRUE;
296 }
GetBaselineRotate(CFX_FloatRect rect,int & Rotate)297 FX_BOOL CRF_TextPage::GetBaselineRotate(CFX_FloatRect rect, int& Rotate)
298 {
299 Rotate = 0;
300 return TRUE;
301 }
CountBoundedSegments(FX_FLOAT left,FX_FLOAT top,FX_FLOAT right,FX_FLOAT bottom,FX_BOOL bContains)302 int CRF_TextPage::CountBoundedSegments(FX_FLOAT left, FX_FLOAT top, FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains)
303 {
304 if (!m_CountBSArray) {
305 return -1;
306 }
307 m_CountBSArray->RemoveAll();
308 CFX_FloatRect floatrect(left, bottom, right, top);
309 int totalcount, i, j = 0, counttmp = 0;
310 FX_BOOL bstart = TRUE;
311 FPDF_CHAR_INFO info;
312 CFX_FloatRect recttmp;
313 totalcount = CountChars();
314 for(i = 0; i < totalcount; i++) {
315 GetCharInfo(i, info);
316 if(_IsIntersect(floatrect, info.m_CharBox)) {
317 if(bstart) {
318 m_CountBSArray->Add(i);
319 counttmp = 1;
320 recttmp = info.m_CharBox;
321 bstart = FALSE;
322 } else if(_IsInsameline(recttmp, info.m_CharBox)) {
323 recttmp.right = info.m_CharBox.right;
324 if(info.m_CharBox.top > recttmp.top) {
325 recttmp.top = info.m_CharBox.top;
326 }
327 if(info.m_CharBox.bottom < recttmp.bottom) {
328 recttmp.bottom = info.m_CharBox.bottom;
329 }
330 counttmp ++;
331 } else {
332 m_CountBSArray->Add(counttmp);
333 m_CountBSArray->Add(i);
334 counttmp = 1;
335 j++;
336 recttmp = info.m_CharBox;
337 }
338 }
339 }
340 m_CountBSArray->Add(counttmp);
341 j++;
342 return j;
343 }
GetBoundedSegment(int index,int & start,int & count) const344 void CRF_TextPage::GetBoundedSegment(int index, int& start, int& count) const
345 {
346 if (!m_CountBSArray) {
347 return;
348 }
349 if(m_CountBSArray->GetSize() <= index * 2) {
350 start = 0;
351 count = 0;
352 return;
353 }
354 start = *(int *)m_CountBSArray->GetAt(index * 2);
355 count = *(int *)m_CountBSArray->GetAt(index * 2 + 1);
356 }
357
GetWordBreak(int index,int direction) const358 int CRF_TextPage::GetWordBreak(int index, int direction) const
359 {
360 return -1;
361 }
GetPageText(int start,int nCount) const362 CFX_WideString CRF_TextPage::GetPageText(int start, int nCount ) const
363 {
364 if(nCount == -1) {
365 nCount = CountChars();
366 start = 0;
367 } else if(nCount < 1) {
368 return L"";
369 } else if(start >= CountChars()) {
370 return L"";
371 }
372 int i, index = start + nCount;
373 FPDF_CHAR_INFO info;
374 CFX_WideString str;
375 CFX_FloatRect recttmp;
376 FX_BOOL bstart = TRUE;
377 for(i = start; i < index; i++) {
378 GetCharInfo(i, info);
379 if(bstart) {
380 recttmp = info.m_CharBox;
381 str += info.m_Unicode;
382 bstart = FALSE;
383 } else if (_IsInsameline(recttmp, info.m_CharBox)) {
384 str += info.m_Unicode;
385 } else {
386 str += L"\r\n";
387 recttmp = info.m_CharBox;
388 str += info.m_Unicode;
389 }
390 }
391 if(str.IsEmpty()) {
392 return L"";
393 }
394 return str;
395 }
396