• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MEDIAPROVIDER_PDF_JNI_PDFCLIENT_PAGE_H_
18 #define MEDIAPROVIDER_PDF_JNI_PDFCLIENT_PAGE_H_
19 
20 #include <stdint.h>
21 
22 #include <span>
23 #include <string>
24 #include <unordered_map>
25 #include <unordered_set>
26 #include <utility>
27 #include <vector>
28 
29 #include "annotation.h"
30 #include "cpp/fpdf_scopers.h"
31 #include "form_filler.h"
32 #include "form_widget_info.h"
33 #include "fpdfview.h"
34 #include "page_object.h"
35 #include "rect.h"
36 
37 namespace pdfClient {
38 
39 // Render Flags corresponding to each render flag defined in
40 // 'pdf/framework/java/android/graphics/pdf/RenderParams.java'
41 // LINT.IfChange
42 static const int FLAG_RENDER_TEXT_ANNOTATIONS = 1 << 1;
43 static const int FLAG_RENDER_HIGHLIGHT_ANNOTATIONS = 1 << 2;
44 static const int FLAG_RENDER_STAMP_ANNOTATIONS = 1 << 3;
45 static const int FLAG_RENDER_FREETEXT_ANNOTATIONS = 1 << 4;
46 // LINT.ThenChange(packages/providers/MediaProvider/pdf/framework/java/android/graphics/pdf/RenderParams.java)
47 
48 static const std::unordered_map<int, std::vector<int>> renderFlagsAnnotsMap = {
49         {FLAG_RENDER_TEXT_ANNOTATIONS,
50          std::vector<int>{
51                  FPDF_ANNOT_TEXT,
52                  FPDF_ANNOT_FREETEXT}},  // TODO Remove FreeText from FLAG_RENDER_TEXT_ANNOTATIONS
53         {FLAG_RENDER_HIGHLIGHT_ANNOTATIONS, std::vector<int>{FPDF_ANNOT_HIGHLIGHT}},
54         {FLAG_RENDER_STAMP_ANNOTATIONS, std::vector<int>{FPDF_ANNOT_STAMP}},
55         {FLAG_RENDER_FREETEXT_ANNOTATIONS, std::vector<int>{FPDF_ANNOT_FREETEXT}}};
56 // A start index (inclusive) and a stop index (exclusive) into the string of
57 // codepoints that make up a range of text.
58 typedef std::pair<int, int> TextRange;
59 
60 // A start index (inclusive) or stop index (exclusive) into the string of
61 // codepoints that make up a range of text, and a point on the boundary where
62 // the selection starts or stops.
63 struct SelectionBoundary {
64     int index;
65     Point_i point;
66     bool is_rtl;
67 
SelectionBoundarySelectionBoundary68     SelectionBoundary(int i, int x, int y, bool r) : index(i), is_rtl(r) { point = IntPoint(x, y); }
69 };
70 
71 struct GotoLinkDest {
72     int page_number = 0;
73     float x = 0;
74     float y = 0;
75     float zoom = 0;
76 
set_page_numberGotoLinkDest77     void set_page_number(int page_number) { this->page_number = page_number; }
78 
set_xGotoLinkDest79     void set_x(float x) { this->x = x; }
80 
set_yGotoLinkDest81     void set_y(float y) { this->y = y; }
82 
set_zoomGotoLinkDest83     void set_zoom(float zoom) { this->zoom = zoom; }
84 };
85 
86 struct GotoLink {
87     std::vector<Rectangle_i> rect;
88     GotoLinkDest dest;
89 };
90 
91 // Interface for converting coordinates between two spaces.
92 class ICoordinateConverter {
93   public:
94     virtual ~ICoordinateConverter() = default;
95 
96     // Convert a point from page coordinates to device coordinates
97     virtual Point_f PageToDevice(const Point_f& in) const = 0;
98 
99     // Convert a point from device coordinates to page coordinates
100     virtual Point_f DeviceToPage(const Point_f& in) const = 0;
101 };
102 
103 // Wrapper on a FPDF_PAGE that adds rendering functionality.
104 class Page : public ICoordinateConverter {
105   public:
106     // FPDF_PAGE is opened when constructed.
107     Page(FPDF_DOCUMENT doc, int page_num, FormFiller* form_filler);
108 
109     // Move constructor.
110     Page(Page&& p);
111 
112     virtual ~Page();
113 
114     int Width() const;
115 
116     int Height() const;
117 
118     Rectangle_i Dimensions() const;
119 
120     // Render the page to the output bitmap, applying the appropriate transform, clip, and
121     // render mode as specified.
122     void Render(FPDF_BITMAP bitmap, FS_MATRIX transform, int clip_left, int clip_top,
123          int clip_right, int clip_bottom, int render_mode, int show_annot_types,
124          bool render_form_fields);
125 
126     // The page has a transform that must be applied to all characters and objects
127     // on the page. This transforms from the page's internal co-ordinate system
128     // to the external co-ordinate system from (0, 0) to (Width(), Height()).
129     Point_i ApplyPageTransform(const Point_d& input) const;
130     Rectangle_i ApplyPageTransform(const Rectangle_d& input) const;
131     Rectangle_i ApplyPageTransform(const Rectangle_i& input) const;
132 
133     // Transform from the external co-ordinate system (0, 0)-(Width(), Height())
134     // back into the page's internal co-ordinate system.
135     Point_d UnapplyPageTransform(const Point_i& input) const;
136 
137     // ICoordinate Converter
138     Point_f PageToDevice(const Point_f& in) const override;
139 
140     Point_f DeviceToPage(const Point_f& in) const override;
141 
142     int NumChars();
143 
144     uint32_t GetUnicode(int char_index);
145 
146     // Returns the entire text of the given page in UTF-8.
147     std::string GetTextUtf8();
148 
149     // Returns part of the text of the given page in UTF-8.
150     std::string GetTextUtf8(const int start_index, const int stop_index);
151 
152     // Appends each alt-text instance on the page to |result|.
153     void GetAltTextUtf8(std::vector<std::string>* result) const;
154 
155     // Searches for the given word on the given page and returns the number of
156     // matches. Ignores case and accents when searching.
157     // If matches vector is not NULL, it is filled with the start and end indices
158     // of each match - these are character indices according to FPDFText API.
159     int FindMatchesUtf8(std::string_view utf8, std::vector<TextRange>* matches);
160 
161     // Same as above, but finds the bounding boxes of the matches. Returns the
162     // number of matches and fills in the rects vector. Each match can take more
163     // than one rect to bound, so the match_to_rect vector is filled so that
164     // rects[match_to_rect[i]] is the first rectangle that belongs with match i.
165     // Matches for which we cannot find a single bounding rectangle are discarded.
166     // The char_indexes vector is filled with the char index that each match
167     // starts at - the beginning of its TextRange.
168     int BoundsOfMatchesUtf8(std::string_view utf8, std::vector<Rectangle_i>* rects,
169                             std::vector<int>* match_to_rect, std::vector<int>* char_indexes);
170 
171     // Appends 0 or more rectangles to the given vector that surround the text
172     // of the given page from the start index and the stop index.
173     // Returns the number of rectangles used to surround the text.
174     int GetTextBounds(const int start_index, const int stop_index, std::vector<Rectangle_i>* rects);
175 
176     // If there is a word at the given point, returns true and modifies the given
177     // boundaries to point to each end of the word - otherwise returns false.
178     bool SelectWordAt(const Point_i& point, SelectionBoundary* start, SelectionBoundary* stop);
179 
180     // Modifies the given selection boundary object in the following ways:
181     // - The resulting boundary will have an index that is within the range
182     // [0...n], where n is NumChars().
183     // - The resulting boundary will have a point that is at the outer corner
184     // of the char just inside the selection.
185     void ConstrainBoundary(SelectionBoundary* boundary);
186 
187     int GetFontSize(int index);
188     // Get the URLs and bounding rectangles for all links on the page.
189     int GetLinksUtf8(std::vector<Rectangle_i>* rects, std::vector<int>* link_to_rect,
190                      std::vector<std::string>* urls) const;
191 
192     // Returns the list of GotoLink for all GotoLinks on the page.
193     std::vector<GotoLink> GetGotoLinks() const;
194 
195     // Perform any operations required to prepare this page for form filling.
196     void InitializeFormFilling();
197 
198     // Perform any clean up operations after form filling is complete.
199     void TerminateFormFilling();
200 
201     // Obtain information about the form widget at |point| on the page, if any.
202     // |point| is in device coordinates.
203     FormWidgetInfo GetFormWidgetInfo(Point_i point);
204 
205     // Obtain information about the form widget with index |annotation_index| on
206     // the page, if any.
207     FormWidgetInfo GetFormWidgetInfo(int annotation_index);
208 
209     // Obtain form widget information for all form field annotations on the page,
210     // optionally restricting by |type_ids| and store in |widget_infos|. See
211     // fpdf_formfill.h for type constants. If |type_ids| is empty all form
212     // widgets on page will be added to |widget_infos|, if any.
213     void GetFormWidgetInfos(const std::unordered_set<int>& type_ids,
214                             std::vector<FormWidgetInfo>* widget_infos);
215 
216     // Perform a click at |point| on the page. Any focus in the document
217     // resulting from this operation will be killed before returning.  No-op if
218     // no widget present at |point| or widget cannot be edited. Returns true if
219     // click was performed. |point| is in device coordinates.
220     bool ClickOnPoint(Point_i point);
221 
222     // Set the value text of the widget at |annotation_index| on page. No-op if
223     // no widget present or widget cannot be edited. Returns true if text was
224     // set, false otherwise.
225     bool SetFormFieldText(int annotation_index, std::string_view text);
226 
227     // Set the |selected_indices| for the choice widget at |annotation_index| as
228     // selected and deselect all other indices. No-op if no widget present or
229     // widget cannot be edited. Returns true if indices were set, false otherwise.
230     bool SetChoiceSelection(int annotation_index, std::span<const int> selected_indices);
231 
232     // Informs the page that the |rect| of the page bitmap has been invalidated.
233     // This takes place following form filling operations. |Rect| must be in page
234     // coordinates.
235     void NotifyInvalidRect(Rectangle_i rect);
236 
237     // Return whether or not an area of the bitmap has been invalidated.
238     bool HasInvalidRect();
239 
240     // Returns the area of the page that has been invalidated and resets the
241     // field. Rect returned in device coordinates.
242     Rectangle_i ConsumeInvalidRect();
243 
244     // Returns FPDF_PAGE. This Page retains ownership. All operations that wish
245     // to access FPDF_PAGE should to call methods of this class instead of
246     // requesting the FPDF_PAGE directly through this method.
247     void* Get();
248 
249     // Get all PageObjects on this Page. Ownership of PageObjects is with Page.
250     std::vector<PageObject*> GetPageObjects(bool refetch = false);
251 
252     // Add PageObject to Page.
253     int AddPageObject(std::unique_ptr<PageObject> page_object);
254 
255     // Remove PageObject on Page.
256     bool RemovePageObject(int index);
257 
258     // Update the attributes of the PageObject on the Page. Ownership stays with
259     // the Page, we only modify the PageObject's attributes.
260     bool UpdatePageObject(int index, std::unique_ptr<PageObject> page_object);
261 
262     // Get all supported annotations. The list will contain null for the types of annotations
263     // which are not supported. Page will have ownership of annotations
264     std::vector<Annotation*> GetPageAnnotations();
265 
266     // Add an annotation to the page
267     int AddPageAnnotation(std::unique_ptr<Annotation> annotation);
268 
269     // Remove the annotation from the page at a given index
270     bool RemovePageAnnotation(int index);
271 
272     // Update the attributes of the annotation on the Page. Ownership stays with
273     // the Page, we only modify the Annotation's attributes.
274     bool UpdatePageAnnotation(int index, std::unique_ptr<Annotation> annotation);
275 
276   private:
277     // Convenience methods to access the variables dependent on an initialized
278     // ScopedFPDFTextPage. We lazy init text_page_ for efficiency because many
279     // page operations do not require it.
280     FPDF_TEXTPAGE text_page();
281     int first_printable_char_index();
282     int last_printable_char_index();
283 
284     // Check that text_page_ and first/last_printable_char_index_ have been
285     // initialized and do so if not.
286     void EnsureTextPageInitialized();
287 
288     // Android bitmaps are in ARGB order. pdfClient emits bitmaps which have red and
289     // blue swapped when treated as Android bitmaps - but this function fixes it.
290     // NOTE: This might rely on little-endian architecture.
291     void InPlaceSwapRedBlueChannels(void* pixels, const int num_pixels) const;
292 
293     // Looks for an instance of the given UTF32 string on the given page, starting
294     // not before the page_start index and ending before the page_stop index.
295     // If found, returns true and updates the TextRange. Case/accent insensitive.
296     bool FindMatch(const std::u32string& query, const int page_start, const int page_stop,
297                    TextRange* match);
298 
299     // Checks if the page matches the given UTF32 string at the given match_start
300     // index that ends before the page_stop index. If it matches, returns true
301     // and updates the TextRange. Case/accent insensitive.
302     bool IsMatch(const std::u32string& query, const int match_start, const int page_stop,
303                  TextRange* match);
304 
305     // Returns a SelectionBoundary at a particular index - 0 means before the char
306     // at index 0, 1 means after char 0 but before the char at index 1, and so on.
307     SelectionBoundary GetBoundaryAtIndex(const int index);
308 
309     // Returns whether text is flowing left or right at a particular index.
310     bool IsRtlAtIndex(const int index);
311 
312     // Returns a SelectionBoundary at a particular index, once we already know
313     // which way the text is flowing at that index.
314     SelectionBoundary GetBoundaryAtIndex(const int index, bool is_rtl);
315 
316     // Returns a SelectionBoundary as near as possible to the given point.
317     SelectionBoundary GetBoundaryAtPoint(const Point_i& point);
318 
319     // Given a boundary index to the middle or either end of a word, returns
320     // the boundary index of the start of that word - which is the index of the
321     // first char that is part of that word.
322     int GetWordStartIndex(const int index);
323 
324     // Given a boundary index to the middle or either end of a word, returns
325     // the boundary index of the stop of that word - which is the index of the
326     // first char that is immediately after that word, but not part of it.
327     int GetWordStopIndex(const int index);
328 
329     // Returns the rectangle that bounds the given char - page transform is not
330     // yet applied, must be applied later.
331     Rectangle_d GetRawCharBounds(int char_index);
332 
333     // Returns the rectangle that bounds the given char, with the page transform
334     // already applied.
335     Rectangle_i GetCharBounds(int char_index);
336 
337     // Returns the origin of the given char, with the page transform applied.
338     Point_i GetCharOrigin(int char_index);
339 
340     // Get the URLs and bounding rectangles for annotation links only - text
341     // that has been annotated to link to some URL.
342     int GetAnnotatedLinksUtf8(std::vector<Rectangle_i>* rects, std::vector<int>* link_to_rect,
343                               std::vector<std::string>* urls) const;
344 
345     // Get the URLs and bounding rectangles for inferred links only - text that
346     // we recognize as a potential link since it starts with http:// or similar.
347     int GetInferredLinksUtf8(std::vector<Rectangle_i>* rects, std::vector<int>* link_to_rect,
348                              std::vector<std::string>* urls) const;
349 
350     bool IsGotoLink(FPDF_LINK link) const;
351 
352     bool IsUrlLink(FPDF_LINK link) const;
353 
354     // Get the URL of the given link, in UTF-8.
355     std::string GetUrlUtf8(FPDF_LINK link) const;
356 
357     // Get the bounds of the given link, in page co-ordinates.
358     Rectangle_i GetRect(FPDF_LINK link) const;
359 
360     FPDF_DOCUMENT document_;  // Not owned.
361 
362     ScopedFPDFPage page_;
363 
364     FormFiller* const form_filler_;  // Not owned.
365 
366     // these variables lazily initialized, should be accessed via corresponding
367     // accessor methods
368     ScopedFPDFTextPage text_page_;
369     int first_printable_char_index_;
370     int last_printable_char_index_;
371 
372     // Rectangle representing an area of the bitmap for this page that has been
373     // reported as invalidated. Will be coalesced from all rectangles that are
374     // reported as invalidated since the last time this rectangle was consumed.
375     // Rectangles are invalidated due to form filling operations.
376     // Rectangle is in Device Coordinates.
377     Rectangle_i invalid_rect_;
378 
379     // Page number that is opened.
380     int page_num_;
381 
382     // Page Objects
383     std::vector<std::unique_ptr<PageObject>> page_objects_;
384 
385     // Populates page_objects_ with PageObjects on Page.
386     void PopulatePageObjects(bool refetch);
387 
388     // Annotations
389     std::vector<std::unique_ptr<Annotation>> annotations_;
390 
391     void PopulateAnnotations();
392 };
393 
394 }  // namespace pdfClient
395 
396 #endif  // MEDIAPROVIDER_PDF_JNI_PDFCLIENT_PAGE_H_