1 /* 2 * Copyright (C) 2024 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MEDIAPROVIDER_PDF_JNI_PDFCLIENT_PAGE_H_ 18 #define MEDIAPROVIDER_PDF_JNI_PDFCLIENT_PAGE_H_ 19 20 #include <stdint.h> 21 22 #include <span> 23 #include <string> 24 #include <unordered_map> 25 #include <unordered_set> 26 #include <utility> 27 #include <vector> 28 29 #include "annotation.h" 30 #include "cpp/fpdf_scopers.h" 31 #include "form_filler.h" 32 #include "form_widget_info.h" 33 #include "fpdfview.h" 34 #include "page_object.h" 35 #include "rect.h" 36 37 namespace pdfClient { 38 39 // Render Flags corresponding to each render flag defined in 40 // 'pdf/framework/java/android/graphics/pdf/RenderParams.java' 41 // LINT.IfChange 42 static const int FLAG_RENDER_TEXT_ANNOTATIONS = 1 << 1; 43 static const int FLAG_RENDER_HIGHLIGHT_ANNOTATIONS = 1 << 2; 44 static const int FLAG_RENDER_STAMP_ANNOTATIONS = 1 << 3; 45 static const int FLAG_RENDER_FREETEXT_ANNOTATIONS = 1 << 4; 46 // LINT.ThenChange(packages/providers/MediaProvider/pdf/framework/java/android/graphics/pdf/RenderParams.java) 47 48 static const std::unordered_map<int, std::vector<int>> renderFlagsAnnotsMap = { 49 {FLAG_RENDER_TEXT_ANNOTATIONS, 50 std::vector<int>{ 51 FPDF_ANNOT_TEXT, 52 FPDF_ANNOT_FREETEXT}}, // TODO Remove FreeText from FLAG_RENDER_TEXT_ANNOTATIONS 53 {FLAG_RENDER_HIGHLIGHT_ANNOTATIONS, std::vector<int>{FPDF_ANNOT_HIGHLIGHT}}, 54 {FLAG_RENDER_STAMP_ANNOTATIONS, std::vector<int>{FPDF_ANNOT_STAMP}}, 55 {FLAG_RENDER_FREETEXT_ANNOTATIONS, std::vector<int>{FPDF_ANNOT_FREETEXT}}}; 56 // A start index (inclusive) and a stop index (exclusive) into the string of 57 // codepoints that make up a range of text. 58 typedef std::pair<int, int> TextRange; 59 60 // A start index (inclusive) or stop index (exclusive) into the string of 61 // codepoints that make up a range of text, and a point on the boundary where 62 // the selection starts or stops. 63 struct SelectionBoundary { 64 int index; 65 Point_i point; 66 bool is_rtl; 67 SelectionBoundarySelectionBoundary68 SelectionBoundary(int i, int x, int y, bool r) : index(i), is_rtl(r) { point = IntPoint(x, y); } 69 }; 70 71 struct GotoLinkDest { 72 int page_number = 0; 73 float x = 0; 74 float y = 0; 75 float zoom = 0; 76 set_page_numberGotoLinkDest77 void set_page_number(int page_number) { this->page_number = page_number; } 78 set_xGotoLinkDest79 void set_x(float x) { this->x = x; } 80 set_yGotoLinkDest81 void set_y(float y) { this->y = y; } 82 set_zoomGotoLinkDest83 void set_zoom(float zoom) { this->zoom = zoom; } 84 }; 85 86 struct GotoLink { 87 std::vector<Rectangle_i> rect; 88 GotoLinkDest dest; 89 }; 90 91 // Interface for converting coordinates between two spaces. 92 class ICoordinateConverter { 93 public: 94 virtual ~ICoordinateConverter() = default; 95 96 // Convert a point from page coordinates to device coordinates 97 virtual Point_f PageToDevice(const Point_f& in) const = 0; 98 99 // Convert a point from device coordinates to page coordinates 100 virtual Point_f DeviceToPage(const Point_f& in) const = 0; 101 }; 102 103 // Wrapper on a FPDF_PAGE that adds rendering functionality. 104 class Page : public ICoordinateConverter { 105 public: 106 // FPDF_PAGE is opened when constructed. 107 Page(FPDF_DOCUMENT doc, int page_num, FormFiller* form_filler); 108 109 // Move constructor. 110 Page(Page&& p); 111 112 virtual ~Page(); 113 114 int Width() const; 115 116 int Height() const; 117 118 Rectangle_i Dimensions() const; 119 120 // Render the page to the output bitmap, applying the appropriate transform, clip, and 121 // render mode as specified. 122 void Render(FPDF_BITMAP bitmap, FS_MATRIX transform, int clip_left, int clip_top, 123 int clip_right, int clip_bottom, int render_mode, int show_annot_types, 124 bool render_form_fields); 125 126 // The page has a transform that must be applied to all characters and objects 127 // on the page. This transforms from the page's internal co-ordinate system 128 // to the external co-ordinate system from (0, 0) to (Width(), Height()). 129 Point_i ApplyPageTransform(const Point_d& input) const; 130 Rectangle_i ApplyPageTransform(const Rectangle_d& input) const; 131 Rectangle_i ApplyPageTransform(const Rectangle_i& input) const; 132 133 // Transform from the external co-ordinate system (0, 0)-(Width(), Height()) 134 // back into the page's internal co-ordinate system. 135 Point_d UnapplyPageTransform(const Point_i& input) const; 136 137 // ICoordinate Converter 138 Point_f PageToDevice(const Point_f& in) const override; 139 140 Point_f DeviceToPage(const Point_f& in) const override; 141 142 int NumChars(); 143 144 uint32_t GetUnicode(int char_index); 145 146 // Returns the entire text of the given page in UTF-8. 147 std::string GetTextUtf8(); 148 149 // Returns part of the text of the given page in UTF-8. 150 std::string GetTextUtf8(const int start_index, const int stop_index); 151 152 // Appends each alt-text instance on the page to |result|. 153 void GetAltTextUtf8(std::vector<std::string>* result) const; 154 155 // Searches for the given word on the given page and returns the number of 156 // matches. Ignores case and accents when searching. 157 // If matches vector is not NULL, it is filled with the start and end indices 158 // of each match - these are character indices according to FPDFText API. 159 int FindMatchesUtf8(std::string_view utf8, std::vector<TextRange>* matches); 160 161 // Same as above, but finds the bounding boxes of the matches. Returns the 162 // number of matches and fills in the rects vector. Each match can take more 163 // than one rect to bound, so the match_to_rect vector is filled so that 164 // rects[match_to_rect[i]] is the first rectangle that belongs with match i. 165 // Matches for which we cannot find a single bounding rectangle are discarded. 166 // The char_indexes vector is filled with the char index that each match 167 // starts at - the beginning of its TextRange. 168 int BoundsOfMatchesUtf8(std::string_view utf8, std::vector<Rectangle_i>* rects, 169 std::vector<int>* match_to_rect, std::vector<int>* char_indexes); 170 171 // Appends 0 or more rectangles to the given vector that surround the text 172 // of the given page from the start index and the stop index. 173 // Returns the number of rectangles used to surround the text. 174 int GetTextBounds(const int start_index, const int stop_index, std::vector<Rectangle_i>* rects); 175 176 // If there is a word at the given point, returns true and modifies the given 177 // boundaries to point to each end of the word - otherwise returns false. 178 bool SelectWordAt(const Point_i& point, SelectionBoundary* start, SelectionBoundary* stop); 179 180 // Modifies the given selection boundary object in the following ways: 181 // - The resulting boundary will have an index that is within the range 182 // [0...n], where n is NumChars(). 183 // - The resulting boundary will have a point that is at the outer corner 184 // of the char just inside the selection. 185 void ConstrainBoundary(SelectionBoundary* boundary); 186 187 int GetFontSize(int index); 188 // Get the URLs and bounding rectangles for all links on the page. 189 int GetLinksUtf8(std::vector<Rectangle_i>* rects, std::vector<int>* link_to_rect, 190 std::vector<std::string>* urls) const; 191 192 // Returns the list of GotoLink for all GotoLinks on the page. 193 std::vector<GotoLink> GetGotoLinks() const; 194 195 // Perform any operations required to prepare this page for form filling. 196 void InitializeFormFilling(); 197 198 // Perform any clean up operations after form filling is complete. 199 void TerminateFormFilling(); 200 201 // Obtain information about the form widget at |point| on the page, if any. 202 // |point| is in device coordinates. 203 FormWidgetInfo GetFormWidgetInfo(Point_i point); 204 205 // Obtain information about the form widget with index |annotation_index| on 206 // the page, if any. 207 FormWidgetInfo GetFormWidgetInfo(int annotation_index); 208 209 // Obtain form widget information for all form field annotations on the page, 210 // optionally restricting by |type_ids| and store in |widget_infos|. See 211 // fpdf_formfill.h for type constants. If |type_ids| is empty all form 212 // widgets on page will be added to |widget_infos|, if any. 213 void GetFormWidgetInfos(const std::unordered_set<int>& type_ids, 214 std::vector<FormWidgetInfo>* widget_infos); 215 216 // Perform a click at |point| on the page. Any focus in the document 217 // resulting from this operation will be killed before returning. No-op if 218 // no widget present at |point| or widget cannot be edited. Returns true if 219 // click was performed. |point| is in device coordinates. 220 bool ClickOnPoint(Point_i point); 221 222 // Set the value text of the widget at |annotation_index| on page. No-op if 223 // no widget present or widget cannot be edited. Returns true if text was 224 // set, false otherwise. 225 bool SetFormFieldText(int annotation_index, std::string_view text); 226 227 // Set the |selected_indices| for the choice widget at |annotation_index| as 228 // selected and deselect all other indices. No-op if no widget present or 229 // widget cannot be edited. Returns true if indices were set, false otherwise. 230 bool SetChoiceSelection(int annotation_index, std::span<const int> selected_indices); 231 232 // Informs the page that the |rect| of the page bitmap has been invalidated. 233 // This takes place following form filling operations. |Rect| must be in page 234 // coordinates. 235 void NotifyInvalidRect(Rectangle_i rect); 236 237 // Return whether or not an area of the bitmap has been invalidated. 238 bool HasInvalidRect(); 239 240 // Returns the area of the page that has been invalidated and resets the 241 // field. Rect returned in device coordinates. 242 Rectangle_i ConsumeInvalidRect(); 243 244 // Returns FPDF_PAGE. This Page retains ownership. All operations that wish 245 // to access FPDF_PAGE should to call methods of this class instead of 246 // requesting the FPDF_PAGE directly through this method. 247 void* Get(); 248 249 // Get all PageObjects on this Page. Ownership of PageObjects is with Page. 250 std::vector<PageObject*> GetPageObjects(bool refetch = false); 251 252 // Add PageObject to Page. 253 int AddPageObject(std::unique_ptr<PageObject> page_object); 254 255 // Remove PageObject on Page. 256 bool RemovePageObject(int index); 257 258 // Update the attributes of the PageObject on the Page. Ownership stays with 259 // the Page, we only modify the PageObject's attributes. 260 bool UpdatePageObject(int index, std::unique_ptr<PageObject> page_object); 261 262 // Get all supported annotations. The list will contain null for the types of annotations 263 // which are not supported. Page will have ownership of annotations 264 std::vector<Annotation*> GetPageAnnotations(); 265 266 // Add an annotation to the page 267 int AddPageAnnotation(std::unique_ptr<Annotation> annotation); 268 269 // Remove the annotation from the page at a given index 270 bool RemovePageAnnotation(int index); 271 272 // Update the attributes of the annotation on the Page. Ownership stays with 273 // the Page, we only modify the Annotation's attributes. 274 bool UpdatePageAnnotation(int index, std::unique_ptr<Annotation> annotation); 275 276 private: 277 // Convenience methods to access the variables dependent on an initialized 278 // ScopedFPDFTextPage. We lazy init text_page_ for efficiency because many 279 // page operations do not require it. 280 FPDF_TEXTPAGE text_page(); 281 int first_printable_char_index(); 282 int last_printable_char_index(); 283 284 // Check that text_page_ and first/last_printable_char_index_ have been 285 // initialized and do so if not. 286 void EnsureTextPageInitialized(); 287 288 // Android bitmaps are in ARGB order. pdfClient emits bitmaps which have red and 289 // blue swapped when treated as Android bitmaps - but this function fixes it. 290 // NOTE: This might rely on little-endian architecture. 291 void InPlaceSwapRedBlueChannels(void* pixels, const int num_pixels) const; 292 293 // Looks for an instance of the given UTF32 string on the given page, starting 294 // not before the page_start index and ending before the page_stop index. 295 // If found, returns true and updates the TextRange. Case/accent insensitive. 296 bool FindMatch(const std::u32string& query, const int page_start, const int page_stop, 297 TextRange* match); 298 299 // Checks if the page matches the given UTF32 string at the given match_start 300 // index that ends before the page_stop index. If it matches, returns true 301 // and updates the TextRange. Case/accent insensitive. 302 bool IsMatch(const std::u32string& query, const int match_start, const int page_stop, 303 TextRange* match); 304 305 // Returns a SelectionBoundary at a particular index - 0 means before the char 306 // at index 0, 1 means after char 0 but before the char at index 1, and so on. 307 SelectionBoundary GetBoundaryAtIndex(const int index); 308 309 // Returns whether text is flowing left or right at a particular index. 310 bool IsRtlAtIndex(const int index); 311 312 // Returns a SelectionBoundary at a particular index, once we already know 313 // which way the text is flowing at that index. 314 SelectionBoundary GetBoundaryAtIndex(const int index, bool is_rtl); 315 316 // Returns a SelectionBoundary as near as possible to the given point. 317 SelectionBoundary GetBoundaryAtPoint(const Point_i& point); 318 319 // Given a boundary index to the middle or either end of a word, returns 320 // the boundary index of the start of that word - which is the index of the 321 // first char that is part of that word. 322 int GetWordStartIndex(const int index); 323 324 // Given a boundary index to the middle or either end of a word, returns 325 // the boundary index of the stop of that word - which is the index of the 326 // first char that is immediately after that word, but not part of it. 327 int GetWordStopIndex(const int index); 328 329 // Returns the rectangle that bounds the given char - page transform is not 330 // yet applied, must be applied later. 331 Rectangle_d GetRawCharBounds(int char_index); 332 333 // Returns the rectangle that bounds the given char, with the page transform 334 // already applied. 335 Rectangle_i GetCharBounds(int char_index); 336 337 // Returns the origin of the given char, with the page transform applied. 338 Point_i GetCharOrigin(int char_index); 339 340 // Get the URLs and bounding rectangles for annotation links only - text 341 // that has been annotated to link to some URL. 342 int GetAnnotatedLinksUtf8(std::vector<Rectangle_i>* rects, std::vector<int>* link_to_rect, 343 std::vector<std::string>* urls) const; 344 345 // Get the URLs and bounding rectangles for inferred links only - text that 346 // we recognize as a potential link since it starts with http:// or similar. 347 int GetInferredLinksUtf8(std::vector<Rectangle_i>* rects, std::vector<int>* link_to_rect, 348 std::vector<std::string>* urls) const; 349 350 bool IsGotoLink(FPDF_LINK link) const; 351 352 bool IsUrlLink(FPDF_LINK link) const; 353 354 // Get the URL of the given link, in UTF-8. 355 std::string GetUrlUtf8(FPDF_LINK link) const; 356 357 // Get the bounds of the given link, in page co-ordinates. 358 Rectangle_i GetRect(FPDF_LINK link) const; 359 360 FPDF_DOCUMENT document_; // Not owned. 361 362 ScopedFPDFPage page_; 363 364 FormFiller* const form_filler_; // Not owned. 365 366 // these variables lazily initialized, should be accessed via corresponding 367 // accessor methods 368 ScopedFPDFTextPage text_page_; 369 int first_printable_char_index_; 370 int last_printable_char_index_; 371 372 // Rectangle representing an area of the bitmap for this page that has been 373 // reported as invalidated. Will be coalesced from all rectangles that are 374 // reported as invalidated since the last time this rectangle was consumed. 375 // Rectangles are invalidated due to form filling operations. 376 // Rectangle is in Device Coordinates. 377 Rectangle_i invalid_rect_; 378 379 // Page number that is opened. 380 int page_num_; 381 382 // Page Objects 383 std::vector<std::unique_ptr<PageObject>> page_objects_; 384 385 // Populates page_objects_ with PageObjects on Page. 386 void PopulatePageObjects(bool refetch); 387 388 // Annotations 389 std::vector<std::unique_ptr<Annotation>> annotations_; 390 391 void PopulateAnnotations(); 392 }; 393 394 } // namespace pdfClient 395 396 #endif // MEDIAPROVIDER_PDF_JNI_PDFCLIENT_PAGE_H_