• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef _FPDF_TEXT_H_
8 #define _FPDF_TEXT_H_
9 #ifndef _FPDF_PARSER_
10 #include "../fpdfapi/fpdf_parser.h"
11 #endif
12 #ifndef _FPDF_PAGEOBJ_H_
13 #include "../fpdfapi/fpdf_pageobj.h"
14 #endif
15 #ifndef _FPDF_PAGE_
16 #include "../fpdfapi/fpdf_page.h"
17 #endif
18 class CPDF_PageObjects;
19 #define PDF2TXT_AUTO_ROTATE		1
20 #define PDF2TXT_AUTO_WIDTH		2
21 #define PDF2TXT_KEEP_COLUMN		4
22 #define PDF2TXT_USE_OCR			8
23 #define PDF2TXT_INCLUDE_INVISIBLE	16
24 void PDF_GetPageText(CFX_ByteStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
25                      int iMinWidth, FX_DWORD flags);
26 void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
27                              int iMinWidth, FX_DWORD flags);
28 void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
29                                FX_DWORD flags);
30 CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary* pPage);
31 class IPDF_TextPage;
32 class IPDF_LinkExtract;
33 class IPDF_TextPageFind;
34 #define CHAR_ERROR			-1
35 #define CHAR_NORMAL			0
36 #define CHAR_GENERATED		1
37 #define CHAR_UNUNICODE		2
38 typedef struct {
39     FX_WCHAR			m_Unicode;
40     FX_WCHAR			m_Charcode;
41     FX_INT32			m_Flag;
42     FX_FLOAT			m_FontSize;
43     FX_FLOAT			m_OriginX;
44     FX_FLOAT			m_OriginY;
45     CFX_FloatRect		m_CharBox;
46     CPDF_TextObject*	m_pTextObj;
47     CFX_AffineMatrix	m_Matrix;
48 } FPDF_CHAR_INFO;
49 typedef	CFX_ArrayTemplate<CFX_FloatRect> CFX_RectArray;
50 #define FPDFTEXT_LRTB	0
51 #define FPDFTEXT_RLTB	1
52 #define FPDFTEXT_TBRL	2
53 #define FPDFTEXT_LEFT			-1
54 #define FPDFTEXT_RIGHT			1
55 #define FPDFTEXT_UP				-2
56 #define FPDFTEXT_DOWN			2
57 class IPDF_ReflowedPage;
58 #define FPDFTEXT_WRITINGMODE_UNKNOW	0
59 #define FPDFTEXT_WRITINGMODE_LRTB	1
60 #define FPDFTEXT_WRITINGMODE_RLTB	2
61 #define FPDFTEXT_WRITINGMODE_TBRL	3
62 class CPDFText_ParseOptions : public CFX_Object
63 {
64 public:
65 
66     CPDFText_ParseOptions();
67     FX_BOOL			m_bGetCharCodeOnly;
68     FX_BOOL			m_bNormalizeObjs;
69     FX_BOOL			m_bOutputHyphen;
70 };
71 class IPDF_TextPage : public CFX_Object
72 {
73 public:
74 
~IPDF_TextPage()75     virtual ~IPDF_TextPage() {}
76     static IPDF_TextPage*	CreateTextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions);
77     static IPDF_TextPage*	CreateTextPage(const CPDF_Page* pPage, int flags = 0);
78     static IPDF_TextPage*	CreateTextPage(const CPDF_PageObjects* pObjs, int flags = 0);
79     static IPDF_TextPage*	CreateReflowTextPage(IPDF_ReflowedPage* pRefPage);
80 
81     virtual void			NormalizeObjects(FX_BOOL bNormalize) = 0;
82 
83     virtual FX_BOOL			ParseTextPage() = 0;
84 
85 
86     virtual FX_BOOL			IsParsered() const = 0;
87 public:
88 
89     virtual int CharIndexFromTextIndex(int TextIndex) const = 0;
90 
91     virtual int TextIndexFromCharIndex(int CharIndex) const = 0;
92 
93 
94     virtual int				CountChars() const = 0;
95 
96     virtual	void			GetCharInfo(int index, FPDF_CHAR_INFO & info) const = 0;
97 
98     virtual void			GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const = 0;
99 
100 
101 
102     virtual int				GetIndexAtPos(CPDF_Point point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const = 0;
103 
104     virtual int				GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const = 0;
105 
106     virtual	int				GetOrderByDirection(int index, int direction) const = 0;
107 
108     virtual CFX_WideString	GetTextByRect(CFX_FloatRect rect) const = 0;
109 
110     virtual void			GetRectsArrayByRect(CFX_FloatRect rect, CFX_RectArray& resRectArray) const = 0;
111 
112 
113     virtual int				CountRects(int start, int nCount) = 0;
114 
115     virtual	void			GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top, FX_FLOAT& right, FX_FLOAT &bottom) const = 0;
116 
117     virtual FX_BOOL			GetBaselineRotate(int rectIndex, int& Rotate) = 0;
118 
119     virtual FX_BOOL			GetBaselineRotate(CFX_FloatRect rect, int& Rotate) = 0;
120 
121     virtual	int				CountBoundedSegments(FX_FLOAT left, FX_FLOAT top, FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE) = 0;
122 
123     virtual	void			GetBoundedSegment(int index, int& start, int& count) const = 0;
124 
125 
126     virtual int				GetWordBreak(int index, int direction) const = 0;
127 
128     virtual CFX_WideString	GetPageText(int start = 0, int nCount = -1 ) const = 0;
129 };
130 #define FPDFTEXT_MATCHCASE      0x00000001
131 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002
132 #define FPDFTEXT_CONSECUTIVE	0x00000004
133 class IPDF_TextPageFind : public CFX_Object
134 {
135 public:
136 
~IPDF_TextPageFind()137     virtual	~IPDF_TextPageFind() {}
138 
139     static	IPDF_TextPageFind*	CreatePageFind(const IPDF_TextPage* pTextPage);
140 public:
141 
142     virtual	FX_BOOL				FindFirst(CFX_WideString findwhat, int flags, int startPos = 0) = 0;
143 
144     virtual	FX_BOOL				FindNext() = 0;
145 
146     virtual	FX_BOOL				FindPrev() = 0;
147 
148     virtual void				GetRectArray(CFX_RectArray& rects) const = 0;
149 
150     virtual int					GetCurOrder() const = 0;
151 
152     virtual int					GetMatchedCount() const = 0;
153 };
154 class IPDF_LinkExtract : public CFX_Object
155 {
156 public:
157 
~IPDF_LinkExtract()158     virtual	~IPDF_LinkExtract() {}
159 
160     static	IPDF_LinkExtract*	CreateLinkExtract();
161 
162     virtual FX_BOOL				ExtractLinks(const IPDF_TextPage* pTextPage) = 0;
163 public:
164 
165     virtual int					CountLinks() const = 0;
166 
167     virtual CFX_WideString		GetURL(int index) const = 0;
168 
169     virtual	void				GetBoundedSegment(int index, int& start, int& count) const = 0;
170 
171     virtual void				GetRects(int index, CFX_RectArray& rects) const = 0;
172 };
173 #endif
174