• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef _FPDFTEXT_H_
8 #define _FPDFTEXT_H_
9 
10 #include "fpdfview.h"
11 
12 // Exported Functions
13 #ifdef __cplusplus
14 extern "C" {
15 #endif
16 
17 // Function: FPDFText_LoadPage
18 //			Prepare information about all characters in a page.
19 // Parameters:
20 //			page	-	Handle to the page. Returned by FPDF_LoadPage function (in FPDFVIEW module).
21 // Return value:
22 //			A handle to the text page information structure.
23 //			NULL if something goes wrong.
24 // Comments:
25 //			Application must call FPDFText_ClosePage to release the text page information.
26 //			If you don't purchase Text Module , this function will return NULL.
27 //
28 DLLEXPORT FPDF_TEXTPAGE	STDCALL FPDFText_LoadPage(FPDF_PAGE page);
29 
30 // Function: FPDFText_ClosePage
31 //			Release all resources allocated for a text page information structure.
32 // Parameters:
33 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
34 // Return Value:
35 //			None.
36 //
37 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page);
38 
39 // Function: FPDFText_CountChars
40 //			Get number of characters in a page.
41 // Parameters:
42 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
43 // Return value:
44 //			Number of characters in the page. Return -1 for error.
45 //			Generated characters, like additional space characters, new line characters, are also counted.
46 // Comments:
47 //			Characters in a page form a "stream", inside the stream, each character has an index.
48 //			We will use the index parameters in many of FPDFTEXT functions. The first character in the page
49 //			has an index value of zero.
50 //
51 DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page);
52 
53 // Function: FPDFText_GetUnicode
54 //			Get Unicode of a character in a page.
55 // Parameters:
56 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
57 //			index		-	Zero-based index of the character.
58 // Return value:
59 //			The Unicode of the particular character.
60 //			If a character is not encoded in Unicode and Foxit engine can't convert to Unicode,
61 //			the return value will be zero.
62 //
63 DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int index);
64 
65 // Function: FPDFText_GetFontSize
66 //			Get the font size of a particular character.
67 // Parameters:
68 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
69 //			index		-	Zero-based index of the character.
70 // Return value:
71 //			The font size of the particular character, measured in points (about 1/72 inch).
72 //			This is the typographic size of the font (so called "em size").
73 //
74 DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, int index);
75 
76 // Function: FPDFText_GetCharBox
77 //			Get bounding box of a particular character.
78 // Parameters:
79 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
80 //			index		-	Zero-based index of the character.
81 //			left		-	Pointer to a double number receiving left position of the character box.
82 //			right		-	Pointer to a double number receiving right position of the character box.
83 //			bottom		-	Pointer to a double number receiving bottom position of the character box.
84 //			top			-	Pointer to a double number receiving top position of the character box.
85 // Return Value:
86 //			None.
87 // Comments:
88 //			All positions are measured in PDF "user space".
89 //
90 DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, int index, double* left,
91 													double* right, double* bottom, double* top);
92 
93 // Function: FPDFText_GetCharIndexAtPos
94 //			Get the index of a character at or nearby a certain position on the page.
95 // Parameters:
96 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
97 //			x			-	X position in PDF "user space".
98 //			y			-	Y position in PDF "user space".
99 //			xTolerance	-	An x-axis tolerance value for character hit detection, in point unit.
100 //			yTolerance	-	A y-axis tolerance value for character hit detection, in point unit.
101 // Return Value:
102 //			The zero-based index of the character at, or nearby the point (x,y).
103 //			If there is no character at or nearby the point, return value will be -1.
104 //			If an error occurs, -3 will be returned.
105 //
106 DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
107 												 double x, double y, double xTorelance, double yTolerance);
108 
109 // Function: FPDFText_GetText
110 //			Extract unicode text string from the page.
111 // Parameters:
112 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
113 //			start_index	-	Index for the start characters.
114 //			count		-	Number of characters to be extracted.
115 //			result		-	A buffer (allocated by application) receiving the extracted unicodes.
116 //							The size of the buffer must be able to hold the number of characters plus a terminator.
117 // Return Value:
118 //			Number of characters written into the result buffer, including the trailing terminator.
119 // Comments:
120 //			This function ignores characters without unicode information.
121 //
122 DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, int start_index, int count, unsigned short* result);
123 
124 // Function: FPDFText_CountRects
125 //			Count number of rectangular areas occupied by a segment of texts.
126 // Parameters:
127 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
128 //			start_index	-	Index for the start characters.
129 //			count		-	Number of characters.
130 // Return value:
131 //			Number of rectangles. Zero for error.
132 // Comments:
133 //			This function, along with FPDFText_GetRect can be used by applications to detect the position
134 //			on the page for a text segment, so proper areas can be highlighted or something.
135 //			FPDFTEXT will automatically merge small character boxes into bigger one if those characters
136 //			are on the same line and use same font settings.
137 //
138 DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page, int start_index, int count);
139 
140 // Function: FPDFText_GetRect
141 //			Get a rectangular area from the result generated by FPDFText_CountRects.
142 // Parameters:
143 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
144 //			rect_index	-	Zero-based index for the rectangle.
145 //			left		-	Pointer to a double value receiving the rectangle left boundary.
146 //			top			-	Pointer to a double value receiving the rectangle top boundary.
147 //			right		-	Pointer to a double value receiving the rectangle right boundary.
148 //			bottom		-	Pointer to a double value receiving the rectangle bottom boundary.
149 // Return Value:
150 //			None.
151 //
152 DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page, int rect_index, double* left, double* top,
153 											double* right, double* bottom);
154 
155 // Function: FPDFText_GetBoundedText
156 //			Extract unicode text within a rectangular boundary on the page.
157 // Parameters:
158 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
159 //			left		-	Left boundary.
160 //			top			-	Top boundary.
161 //			right		-	Right boundary.
162 //			bottom		-	Bottom boundary.
163 //			buffer		-	A unicode buffer.
164 //			buflen		-	Number of characters (not bytes) for the buffer, excluding an additional terminator.
165 // Return Value:
166 //			If buffer is NULL or buflen is zero, return number of characters (not bytes) needed,
167 //			otherwise, return number of characters copied into the buffer.
168 //
169 DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double left, double top,
170 											  double right, double bottom,unsigned short* buffer,int buflen);
171 
172 
173 // Flags used by FPDFText_FindStart function.
174 #define FPDF_MATCHCASE      0x00000001		//If not set, it will not match case by default.
175 #define FPDF_MATCHWHOLEWORD 0x00000002		//If not set, it will not match the whole word by default.
176 
177 // Function: FPDFText_FindStart
178 //			Start a search.
179 // Parameters:
180 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
181 //			findwhat	-	A unicode match pattern.
182 //			flags		-	Option flags.
183 //			start_index	-	Start from this character. -1 for end of the page.
184 // Return Value:
185 //			A handle for the search context. FPDFText_FindClose must be called to release this handle.
186 //
187 DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page, FPDF_WIDESTRING findwhat,
188 													unsigned long flags, int start_index);
189 
190 // Function: FPDFText_FindNext
191 //			Search in the direction from page start to end.
192 // Parameters:
193 //			handle		-	A search context handle returned by FPDFText_FindStart.
194 // Return Value:
195 //			Whether a match is found.
196 //
197 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle);
198 
199 // Function: FPDFText_FindPrev
200 //			Search in the direction from page end to start.
201 // Parameters:
202 //			handle		-	A search context handle returned by FPDFText_FindStart.
203 // Return Value:
204 //			Whether a match is found.
205 //
206 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle);
207 
208 // Function: FPDFText_GetSchResultIndex
209 //			Get the starting character index of the search result.
210 // Parameters:
211 //			handle		-	A search context handle returned by FPDFText_FindStart.
212 // Return Value:
213 //			Index for the starting character.
214 //
215 DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle);
216 
217 // Function: FPDFText_GetSchCount
218 //			Get the number of matched characters in the search result.
219 // Parameters:
220 //			handle		-	A search context handle returned by FPDFText_FindStart.
221 // Return Value:
222 //			Number of matched characters.
223 //
224 DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle);
225 
226 // Function: FPDFText_FindClose
227 //			Release a search context.
228 // Parameters:
229 //			handle		-	A search context handle returned by FPDFText_FindStart.
230 // Return Value:
231 //			None.
232 //
233 DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle);
234 
235 // Function: FPDFLink_LoadWebLinks
236 //			Prepare information about weblinks in a page.
237 // Parameters:
238 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
239 // Return Value:
240 //			A handle to the page's links information structure.
241 //			NULL if something goes wrong.
242 // Comments:
243 //			Weblinks are those links implicitly embedded in PDF pages. PDF also has a type of
244 //			annotation called "link", FPDFTEXT doesn't deal with that kind of link.
245 //			FPDFTEXT weblink feature is useful for automatically detecting links in the page
246 //			contents. For example, things like "http://www.foxitsoftware.com" will be detected,
247 //			so applications can allow user to click on those characters to activate the link,
248 //			even the PDF doesn't come with link annotations.
249 //
250 //			FPDFLink_CloseWebLinks must be called to release resources.
251 //
252 DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page);
253 
254 // Function: FPDFLink_CountWebLinks
255 //			Count number of detected web links.
256 // Parameters:
257 //			link_page	-	Handle returned by FPDFLink_LoadWebLinks.
258 // Return Value:
259 //			Number of detected web links.
260 //
261 DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page);
262 
263 // Function: FPDFLink_GetURL
264 //			Fetch the URL information for a detected web link.
265 // Parameters:
266 //			link_page	-	Handle returned by FPDFLink_LoadWebLinks.
267 //			link_index	-	Zero-based index for the link.
268 //			buffer		-	A unicode buffer.
269 //			buflen		-	Number of characters (not bytes) for the buffer, including an additional terminator.
270 // Return Value:
271 //			If buffer is NULL or buflen is zero, return number of characters (not bytes and an additional terminator is also counted) needed,
272 //			otherwise, return number of characters copied into the buffer.
273 //
274 DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, int link_index, unsigned short* buffer,int buflen);
275 
276 // Function: FPDFLink_CountRects
277 //			Count number of rectangular areas for the link.
278 // Parameters:
279 //			link_page	-	Handle returned by FPDFLink_LoadWebLinks.
280 //			link_index	-	Zero-based index for the link.
281 // Return Value:
282 //			Number of rectangular areas for the link.
283 //
284 DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, int link_index);
285 
286 // Function: FPDFLink_GetRect
287 //			Fetch the boundaries of a rectangle for a link.
288 // Parameters:
289 //			link_page	-	Handle returned by FPDFLink_LoadWebLinks.
290 //			link_index	-	Zero-based index for the link.
291 //			rect_index	-	Zero-based index for a rectangle.
292 //			left		-	Pointer to a double value receiving the rectangle left boundary.
293 //			top			-	Pointer to a double value receiving the rectangle top boundary.
294 //			right		-	Pointer to a double value receiving the rectangle right boundary.
295 //			bottom		-	Pointer to a double value receiving the rectangle bottom boundary.
296 // Return Value:
297 //			None.
298 //
299 DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, int link_index, int rect_index,
300 										double* left, double* top,double* right, double* bottom);
301 
302 // Function: FPDFLink_CloseWebLinks
303 //			Release resources used by weblink feature.
304 // Parameters:
305 //			link_page	-	Handle returned by FPDFLink_LoadWebLinks.
306 // Return Value:
307 //			None.
308 //
309 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page);
310 
311 
312 #ifdef __cplusplus
313 };
314 #endif
315 
316 #endif//_FPDFTEXT_H_
317