• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "page.h"
18 
19 #include <stddef.h>
20 #include <stdint.h>
21 
22 #include <algorithm>
23 #include <limits>
24 #include <span>
25 #include <string>
26 #include <vector>
27 
28 #include "cpp/fpdf_scopers.h"
29 #include "form_filler.h"
30 #include "form_widget_info.h"
31 #include "fpdf_annot.h"
32 #include "fpdf_doc.h"
33 #include "fpdf_text.h"
34 #include "fpdfview.h"
35 #include "image_object.h"
36 #include "logging.h"
37 #include "normalize.h"
38 #include "path_object.h"
39 #include "rect.h"
40 #include "text_object.h"
41 #include "utf.h"
42 #include "utils/annot_hider.h"
43 #include "utils/text.h"
44 
45 #define LOG_TAG "page"
46 
47 using pdfClient::Rectangle_f;
48 using std::vector;
49 
50 namespace pdfClient {
51 
52 static const int kBytesPerPixel = 4;
53 
54 static const Rectangle_i kEmptyIntRectangle = IntRect(0, 0, 0, 0);
55 
56 // The acceptable fatness / inaccuracy of a user's finger in points.
57 static const int kFingerTolerance = 10;
58 
59 static const int RENDER_MODE_FOR_DISPLAY = 1;
60 static const int RENDER_MODE_FOR_PRINT = 2;
61 
Page(FPDF_DOCUMENT doc,int page_num,FormFiller * form_filler)62 Page::Page(FPDF_DOCUMENT doc, int page_num, FormFiller* form_filler)
63     : document_(doc),
64       page_(FPDF_LoadPage(doc, page_num)),
65       form_filler_(form_filler),
66       invalid_rect_(kEmptyIntRectangle),
67       page_num_(page_num) {}
68 
69 Page::Page(Page&& p) = default;
70 
~Page()71 Page::~Page() {}
72 
Width() const73 int Page::Width() const {
74     return FPDF_GetPageWidth(page_.get());
75 }
76 
Height() const77 int Page::Height() const {
78     return FPDF_GetPageHeight(page_.get());
79 }
80 
Dimensions() const81 Rectangle_i Page::Dimensions() const {
82     return IntRect(0, 0, Width(), Height());
83 }
84 
Render(FPDF_BITMAP bitmap,FS_MATRIX transform,int clip_left,int clip_top,int clip_right,int clip_bottom,int render_mode,int show_annot_types,bool render_form_fields)85 void Page::Render(FPDF_BITMAP bitmap, FS_MATRIX transform, int clip_left, int clip_top,
86                   int clip_right, int clip_bottom, int render_mode, int show_annot_types,
87                   bool render_form_fields) {
88     std::unordered_set<int> types;
89     for (auto renderFlag_annot : renderFlagsAnnotsMap) {
90         if ((renderFlag_annot.first & show_annot_types) != 0) {
91             for (int annot_type : renderFlag_annot.second) {
92                 types.insert(annot_type);
93             }
94         }
95     }
96     if (render_form_fields) types.insert(FPDF_ANNOT_WIDGET);
97     pdfClient_utils::AnnotHider annot_hider(page_.get(), types);
98     int renderFlags = FPDF_REVERSE_BYTE_ORDER;
99     if (render_mode == RENDER_MODE_FOR_DISPLAY) {
100         renderFlags |= FPDF_LCD_TEXT | FPDF_ANNOT;
101     } else if (render_mode == RENDER_MODE_FOR_PRINT) {
102         renderFlags |= FPDF_PRINTING;
103     }
104 
105     FS_RECTF clip = {(float)clip_left, (float)clip_top, (float)clip_right, (float)clip_bottom};
106     FPDF_RenderPageBitmapWithMatrix(bitmap, page_.get(), &transform, &clip, renderFlags);
107 
108     if (render_form_fields) {
109         form_filler_->RenderTile(page_.get(), bitmap, transform, clip, renderFlags);
110     }
111 }
112 
ApplyPageTransform(const Point_d & input) const113 Point_i Page::ApplyPageTransform(const Point_d& input) const {
114     Point_i output;
115     FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.x, input.y, &output.x,
116                       &output.y);
117     return output;
118 }
119 
ApplyPageTransform(const Rectangle_d & input) const120 Rectangle_i Page::ApplyPageTransform(const Rectangle_d& input) const {
121     return ApplyPageTransform(OuterIntRect(input));
122 }
123 
ApplyPageTransform(const Rectangle_i & input) const124 Rectangle_i Page::ApplyPageTransform(const Rectangle_i& input) const {
125     Point_i output1, output2;
126     FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.left, input.top, &output1.x,
127                       &output1.y);
128     FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.right, input.bottom,
129                       &output2.x, &output2.y);
130 
131     Rectangle_i output = IntRect(output1, output2);
132     // Constrain output within the page.
133     output = Intersect(output, Dimensions());
134     return output;
135 }
136 
UnapplyPageTransform(const Point_i & input) const137 Point_d Page::UnapplyPageTransform(const Point_i& input) const {
138     Point_d output;
139     FPDF_DeviceToPage(page_.get(), 0, 0, Width(), Height(), 0, input.x, input.y, &output.x,
140                       &output.y);
141     return output;
142 }
143 
PageToDevice(const Point_f & in) const144 Point_f Page::PageToDevice(const Point_f& in) const {
145     // Get Device Coordinates from Page Coordinates
146     Point_i out;
147     FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, in.x, in.y, &out.x, &out.y);
148 
149     return {static_cast<float>(out.x), static_cast<float>(out.y)};
150 }
151 
DeviceToPage(const Point_f & in) const152 Point_f Page::DeviceToPage(const Point_f& in) const {
153     // Get Page Coordinates from Device Coordinates
154     Point_d out;
155     FPDF_DeviceToPage(page_.get(), 0, 0, Width(), Height(), 0, in.x, in.y, &out.x, &out.y);
156 
157     return {static_cast<float>(out.x), static_cast<float>(out.y)};
158 }
159 
NumChars()160 int Page::NumChars() {
161     return FPDFText_CountChars(text_page());
162 }
163 
GetUnicode(int char_index)164 uint32_t Page::GetUnicode(int char_index) {
165     return FPDFText_GetUnicode(text_page(), char_index);
166 }
167 
GetTextUtf8()168 std::string Page::GetTextUtf8() {
169     return GetTextUtf8(first_printable_char_index(), last_printable_char_index() + 1);
170 }
171 
GetTextUtf8(const int start_index,const int stop_index)172 std::string Page::GetTextUtf8(const int start_index, const int stop_index) {
173     std::string result;
174     for (int i = start_index; i < stop_index; i++) {
175         AppendpdfClientCodepointAsUtf8(GetUnicode(i), &result);
176     }
177     return result;
178 }
179 
GetAltTextUtf8(vector<std::string> * result) const180 void Page::GetAltTextUtf8(vector<std::string>* result) const {
181     ::pdfClient_utils::GetAltText(page_.get(), result);
182 }
183 
FindMatchesUtf8(std::string_view utf8,vector<TextRange> * matches)184 int Page::FindMatchesUtf8(std::string_view utf8, vector<TextRange>* matches) {
185     std::u32string query(Utf8ToUtf32(utf8));
186     // Normalize characters of string for searching - ignore case and accents.
187     NormalizeStringForSearch(&query);
188     TextRange match;
189     int page_start = first_printable_char_index();
190     int page_stop = last_printable_char_index() + 1;
191     int num_matches = 0;
192     while (FindMatch(query, page_start, page_stop, &match)) {
193         if (matches != nullptr) {
194             matches->push_back(match);
195         }
196         num_matches++;
197         page_start = match.second;
198     }
199     return num_matches;
200 }
201 
BoundsOfMatchesUtf8(std::string_view utf8,vector<Rectangle_i> * rects,vector<int> * match_to_rect,vector<int> * char_indexes)202 int Page::BoundsOfMatchesUtf8(std::string_view utf8, vector<Rectangle_i>* rects,
203                               vector<int>* match_to_rect, vector<int>* char_indexes) {
204     vector<TextRange> matches;
205     int num_matches = FindMatchesUtf8(utf8, &matches);
206     int num_rects = 0;
207     int num_matches_with_rects = 0;
208     for (int i = 0; i < num_matches; i++) {
209         int start = matches[i].first, stop = matches[i].second;
210         int num_rects_for_match = GetTextBounds(start, stop, rects);
211         if (num_rects_for_match == 0) {
212             continue;
213         }
214         if (match_to_rect != nullptr) {
215             match_to_rect->push_back(num_rects);
216         }
217         if (char_indexes != nullptr) {
218             char_indexes->push_back(start);
219         }
220         num_rects += num_rects_for_match;
221         num_matches_with_rects++;
222     }
223     return num_matches_with_rects;
224 }
225 
GetTextBounds(const int start_index,const int stop_index,vector<Rectangle_i> * rects)226 int Page::GetTextBounds(const int start_index, const int stop_index, vector<Rectangle_i>* rects) {
227     int num_rects = 0;
228     Rectangle_d rect = DoubleRect(0, 0, 0, 0);
229     for (int index = start_index; index < stop_index; index++) {
230         double x1, x2, y1, y2;
231         // This call doesn't apply the page transform - have to apply later.
232         FPDFText_GetCharBox(text_page(), index, &x1, &x2, &y1, &y2);
233         if (x1 != x2 && y1 != y2) {
234             if (IsEmpty(rect)) {
235                 rect = DoubleRect(x1, y1, x2, y2);
236             } else {
237                 rect = Union(rect, DoubleRect(x1, y1, x2, y2));
238             }
239         }
240         // Starting a new line - push current rect, start a new rect.
241         if (IsLineBreak(GetUnicode(index))) {
242             if (!IsEmpty(rect)) {
243                 num_rects++;
244                 rects->push_back(ApplyPageTransform(rect));
245             }
246             rect = DoubleRect(0, 0, 0, 0);
247         }
248     }
249     // Push the last current rect.
250     if (!IsEmpty(rect)) {
251         num_rects++;
252         rects->push_back(ApplyPageTransform(rect));
253     }
254     return num_rects;
255 }
256 
SelectWordAt(const Point_i & point,SelectionBoundary * start,SelectionBoundary * stop)257 bool Page::SelectWordAt(const Point_i& point, SelectionBoundary* start, SelectionBoundary* stop) {
258     Point_d char_point = UnapplyPageTransform(point);
259     int char_index = FPDFText_GetCharIndexAtPos(text_page(), char_point.x, char_point.y,
260                                                 kFingerTolerance, kFingerTolerance);
261     if (char_index < 0 || IsWordBreak(GetUnicode(char_index))) {
262         return false;  // No word at the given point to select.
263     }
264     start->index = GetWordStartIndex(char_index);
265     stop->index = GetWordStopIndex(char_index);
266     ConstrainBoundary(start);
267     ConstrainBoundary(stop);
268     return true;
269 }
270 
ConstrainBoundary(SelectionBoundary * boundary)271 void Page::ConstrainBoundary(SelectionBoundary* boundary) {
272     if (boundary->index < 0) {
273         // Index is not specified - find the nearest index to the given point.
274         *boundary = GetBoundaryAtPoint(boundary->point);
275     } else {
276         // Index is specified - find the point at that index.
277         int index = std::max(boundary->index, first_printable_char_index());
278         index = std::min(index, last_printable_char_index() + 1);
279         *boundary = GetBoundaryAtIndex(index);
280     }
281 }
282 
GetFontSize(int index)283 int Page::GetFontSize(int index) {
284     return FPDFText_GetFontSize(text_page(), index);
285 }
286 
GetLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const287 int Page::GetLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
288                        vector<std::string>* urls) const {
289     return GetAnnotatedLinksUtf8(rects, link_to_rect, urls) +
290            GetInferredLinksUtf8(rects, link_to_rect, urls);
291 }
292 
GetGotoLinks() const293 vector<GotoLink> Page::GetGotoLinks() const {
294     vector<GotoLink> links;
295 
296     FPDF_LINK link = nullptr;
297     int pos = 0;
298     while (FPDFLink_Enumerate(page_.get(), &pos, &link)) {
299         if (!IsGotoLink(link)) {
300             continue;
301         }
302         // Get the bounds of the actual link
303         vector<Rectangle_i> goto_link_rects;
304         Rectangle_i rect = GetRect(link);
305         goto_link_rects.push_back(rect);
306 
307         GotoLinkDest* goto_link_dest = new GotoLinkDest();
308 
309         // Get and parse the destination
310         FPDF_DEST fpdf_dest = FPDFLink_GetDest(document_, link);
311         int dest_page_index = FPDFDest_GetDestPageIndex(document_, fpdf_dest);
312         if (dest_page_index < 0) {
313             LOGE("Goto Link has invalid destination page index");
314             continue;
315         }
316         goto_link_dest->set_page_number(dest_page_index);
317 
318         FPDF_BOOL has_x_coord;
319         FPDF_BOOL has_y_coord;
320         FPDF_BOOL has_zoom;
321         FS_FLOAT x;
322         FS_FLOAT y;
323         FS_FLOAT zoom;
324         FPDF_BOOL success = FPDFDest_GetLocationInPage(fpdf_dest, &has_x_coord, &has_y_coord,
325                                                        &has_zoom, &x, &y, &zoom);
326 
327         if (!success) {
328             continue;
329         }
330         if (has_x_coord) {
331             auto point = DoublePoint(x, 0);
332             auto tPoint = ApplyPageTransform(point);
333             goto_link_dest->set_x(tPoint.x);
334         }
335         if (has_y_coord) {
336             auto point = DoublePoint(0, y);
337             auto tPoint = ApplyPageTransform(point);
338             goto_link_dest->set_y(tPoint.y);
339         }
340         if (has_zoom) {
341             goto_link_dest->set_zoom(zoom);
342         }
343 
344         GotoLink goto_link = GotoLink{goto_link_rects, *goto_link_dest};
345 
346         // Ensure that links are within page bounds
347         if (goto_link_dest->x >= 0 && goto_link_dest->y >= 0) {
348             links.push_back(goto_link);
349         } else {
350             LOGE("Goto Link out of bound (x=%f, y=%f). Page width=%d, height =%d",
351                  goto_link_dest->x, goto_link_dest->y, Width(), Height());
352         }
353     }
354     return links;
355 }
356 
InitializeFormFilling()357 void Page::InitializeFormFilling() {
358     form_filler_->NotifyAfterPageLoad(page_.get());
359 }
360 
TerminateFormFilling()361 void Page::TerminateFormFilling() {
362     form_filler_->NotifyBeforePageClose(page_.get());
363 }
364 
GetFormWidgetInfo(Point_i point)365 FormWidgetInfo Page::GetFormWidgetInfo(Point_i point) {
366     Point_d page_point = UnapplyPageTransform(point);
367     FormWidgetInfo result = form_filler_->GetFormWidgetInfo(page_.get(), page_point);
368     if (result.FoundWidget()) {
369         // widget_rect is in page coords, transform to device coords before
370         // returning to user.
371         Rectangle_i transformed_widget_rect = ApplyPageTransform(result.widget_rect());
372         result.set_widget_rect(transformed_widget_rect);
373     }
374 
375     // Consume any rectangle that was invalidated by this action. Some
376     // info-gathering actions may cause temporary invalidation without
377     // actually doing anything that we need to redraw for.
378     ConsumeInvalidRect();
379     return result;
380 }
381 
GetFormWidgetInfo(int annotation_index)382 FormWidgetInfo Page::GetFormWidgetInfo(int annotation_index) {
383     FormWidgetInfo result = form_filler_->GetFormWidgetInfo(page_.get(), annotation_index);
384     if (result.FoundWidget()) {
385         // widget_rect is in page coords; transform to device coords before
386         // returning to user.
387         Rectangle_i transformed_widget_rect = ApplyPageTransform(result.widget_rect());
388         result.set_widget_rect(transformed_widget_rect);
389     }
390 
391     // Consume any rectangle that was invalidated by this action. Some
392     // info-gathering actions may cause temporary invalidation without
393     // actually doing anything that we need to redraw for.
394     ConsumeInvalidRect();
395     return result;
396 }
397 
GetFormWidgetInfos(const std::unordered_set<int> & type_ids,std::vector<FormWidgetInfo> * widget_infos)398 void Page::GetFormWidgetInfos(const std::unordered_set<int>& type_ids,
399                               std::vector<FormWidgetInfo>* widget_infos) {
400     form_filler_->GetFormWidgetInfos(page_.get(), type_ids, widget_infos);
401     for (FormWidgetInfo& widget_info : *widget_infos) {
402         // widget_rect is in page coords; transform to device coords before
403         // returning to user.
404         Rectangle_i transformed_widget_rect = ApplyPageTransform(widget_info.widget_rect());
405         widget_info.set_widget_rect(transformed_widget_rect);
406     }
407 
408     // Consume any rectangles that were invalidated by this action. Some
409     // info-gathering actions may cause temporary invalidation without
410     // actually doing anything that we need to redraw for.
411     ConsumeInvalidRect();
412 }
413 
ClickOnPoint(Point_i point)414 bool Page::ClickOnPoint(Point_i point) {
415     Point_d page_point = UnapplyPageTransform(point);
416     return form_filler_->ClickOnPoint(page_.get(), page_point);
417 }
SetFormFieldText(int annotation_index,std::string_view text)418 bool Page::SetFormFieldText(int annotation_index, std::string_view text) {
419     return form_filler_->SetText(page_.get(), annotation_index, text);
420 }
421 
SetChoiceSelection(int annotation_index,std::span<const int> selected_indices)422 bool Page::SetChoiceSelection(int annotation_index, std::span<const int> selected_indices) {
423     return form_filler_->SetChoiceSelection(page_.get(), annotation_index, selected_indices);
424 }
NotifyInvalidRect(Rectangle_i rect)425 void Page::NotifyInvalidRect(Rectangle_i rect) {
426     if (rect.left < 0 || rect.top < 0 || rect.right < 0 || rect.bottom < 0 || IsEmpty(rect)) {
427         return;
428     }
429 
430     Rectangle_i device_rect = ApplyPageTransform(rect);
431     // If invalid_rect_ is currently empty, avoid unioning so we don't extend
432     // |rect|'s top left corner to (0,0) for no reason.
433     if (IsEmpty(invalid_rect_)) {
434         invalid_rect_ = device_rect;
435         return;
436     }
437 
438     invalid_rect_ = Union(invalid_rect_, device_rect);
439 }
440 
HasInvalidRect()441 bool Page::HasInvalidRect() {
442     return !IsEmpty(invalid_rect_);
443 }
444 
ConsumeInvalidRect()445 Rectangle_i Page::ConsumeInvalidRect() {
446     Rectangle_i copy = invalid_rect_;
447     invalid_rect_ = kEmptyIntRectangle;
448     return copy;
449 }
450 
Get()451 void* Page::Get() {
452     return page_.get();
453 }
454 
GetPageObjects(bool refetch)455 std::vector<PageObject*> Page::GetPageObjects(bool refetch) {
456     PopulatePageObjects(refetch);
457 
458     std::vector<PageObject*> page_objects;
459     for (const auto& page_object : page_objects_) {
460         page_objects.push_back(page_object.get());
461     }
462 
463     return page_objects;
464 }
465 
AddPageObject(std::unique_ptr<PageObject> pageObject)466 int Page::AddPageObject(std::unique_ptr<PageObject> pageObject) {
467     // Create a scoped PDFium page object.
468     ScopedFPDFPageObject scoped_page_object(pageObject->CreateFPDFInstance(document_, page_.get()));
469 
470     // Check if a FPDF page object was created.
471     if (!scoped_page_object) {
472         return -1;
473     }
474 
475     // Insert the FPDF page object into the FPDF page.
476     FPDFPage_InsertObject(page_.get(), scoped_page_object.release());
477     FPDFPage_GenerateContent(page_.get());
478 
479     // Add pageObject in stored list if populated.
480     if (!page_objects_.empty()) {
481         page_objects_.push_back(std::move(pageObject));
482     }
483 
484     return FPDFPage_CountObjects(page_.get()) - 1;
485 }
486 
RemovePageObject(int index)487 bool Page::RemovePageObject(int index) {
488     FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page_.get(), index);
489     // Remove FPDF PageObject
490     if (!FPDFPage_RemoveObject(page_.get(), page_object)) {
491         return false;
492     }
493 
494     FPDFPageObj_Destroy(page_object);
495     FPDFPage_GenerateContent(page_.get());
496 
497     // Remove pageObject from stored list if populated.
498     if (!page_objects_.empty()) {
499         page_objects_.erase(page_objects_.begin() + index);
500     }
501 
502     return true;
503 }
504 
UpdatePageObject(int index,std::unique_ptr<PageObject> pageObject)505 bool Page::UpdatePageObject(int index, std::unique_ptr<PageObject> pageObject) {
506     // Check for valid index
507     if (index < 0 || index >= FPDFPage_CountObjects(page_.get())) {
508         return false;
509     }
510 
511     // Get PDFium PageObject.
512     FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page_.get(), index);
513 
514     // Update PDFium PageObject
515     if (!pageObject->UpdateFPDFInstance(page_object, page_.get())) {
516         return false;
517     }
518 
519     FPDFPage_GenerateContent(page_.get());
520 
521     return true;
522 }
523 
text_page()524 FPDF_TEXTPAGE Page::text_page() {
525     EnsureTextPageInitialized();
526     return text_page_.get();
527 }
528 
first_printable_char_index()529 int Page::first_printable_char_index() {
530     EnsureTextPageInitialized();
531     return first_printable_char_index_;
532 }
533 
last_printable_char_index()534 int Page::last_printable_char_index() {
535     EnsureTextPageInitialized();
536     return last_printable_char_index_;
537 }
538 
EnsureTextPageInitialized()539 void Page::EnsureTextPageInitialized() {
540     if (text_page_) {
541         return;
542     }
543     if (!page_.get()) {
544         // Page should never be null but a partner has an unexplained bug b/376796346
545         LOGE("Null page (err=%lu). for (page_num=%d)", FPDF_GetLastError(), page_num_);
546         // since the text_page_ would not have a page to load from
547         // Initialize variables to -1, otherwise they carry over garbage values.
548         first_printable_char_index_ = -1;
549         last_printable_char_index_ = -1;
550         return;
551     }
552 
553     text_page_.reset(FPDFText_LoadPage(page_.get()));
554     if (!text_page_) {
555         // This will get into infinite recursion if not returned - b/376796346
556         LOGE("Failed to load text (err=%lu). for (page_num=%d)", FPDF_GetLastError(), page_num_);
557         // Initialize variables to -1, otherwise they carry over garbage values.
558         first_printable_char_index_ = -1;
559         last_printable_char_index_ = -1;
560         return;
561     }
562 
563     int num_chars = NumChars();
564 
565     int i;
566     for (i = 0; i < num_chars && IsWordBreak(GetUnicode(i)); i++) {
567     }
568     first_printable_char_index_ = i;
569 
570     for (i = num_chars - 1; i >= first_printable_char_index_ && IsWordBreak(GetUnicode(i)); i--) {
571     }
572     last_printable_char_index_ = i;
573 }
574 
InPlaceSwapRedBlueChannels(void * pixels,const int num_pixels) const575 void Page::InPlaceSwapRedBlueChannels(void* pixels, const int num_pixels) const {
576     uint8_t* channels = static_cast<uint8_t*>(pixels);
577     uint8_t* channel1 = channels;
578     uint8_t* channel3 = channels + 2;
579 
580     for (int i = 0; i < num_pixels; ++i, channel1 += kBytesPerPixel, channel3 += kBytesPerPixel) {
581         std::swap(*channel1, *channel3);
582     }
583 }
584 
FindMatch(const std::u32string & query,const int page_start,const int page_stop,TextRange * match)585 bool Page::FindMatch(const std::u32string& query, const int page_start, const int page_stop,
586                      TextRange* match) {
587     if (query.empty()) {
588         return false;
589     }
590 
591     int max_match_start = page_stop - query.length();
592     for (int m = page_start; m <= max_match_start; m++) {
593         if (IsMatch(query, m, page_stop, match)) {
594             return true;
595         }
596     }
597     return false;
598 }
599 
IsMatch(const std::u32string & query,const int match_start,const int page_stop,TextRange * match)600 bool Page::IsMatch(const std::u32string& query, const int match_start, const int page_stop,
601                    TextRange* match) {
602     int page_index = match_start;
603     size_t query_index = 0;
604     uint32_t page_char = 0, prev_char = 0;
605     while (query_index < query.length()) {
606         prev_char = page_char;
607         page_char = GetUnicode(page_index);
608 
609         if (NormalizeForSearch(page_char) == query[query_index]) {
610             // This codepoint matches (ignoring case and accents). Move to next.
611             query_index++;
612             page_index++;
613         } else if (IsSkippableForSearch(page_char, prev_char) && query_index > 0) {
614             // Don't increment query index - skip over skippable character.
615             page_index++;
616             if ((page_stop - page_index) < (query.length() - query_index)) {
617                 return false;  // Not enough room for query string before page_stop.
618             }
619         } else {
620             return false;
621         }
622     }
623     // Update match to contain page indices of match start and match stop.
624     match->first = match_start;
625     match->second = page_index;
626     return true;
627 }
628 
GetBoundaryAtIndex(const int index)629 SelectionBoundary Page::GetBoundaryAtIndex(const int index) {
630     return GetBoundaryAtIndex(index, IsRtlAtIndex(index));
631 }
632 
IsRtlAtIndex(const int index)633 bool Page::IsRtlAtIndex(const int index) {
634     int start_index = GetWordStartIndex(index);
635     int stop_index = GetWordStopIndex(index);
636     int word_length = stop_index - start_index;
637     if (word_length <= 1) {
638         // Can't tell directionality from a single character, guess LTR.
639         return false;
640     }
641     Rectangle_i start_bounds = GetCharBounds(start_index);
642     Rectangle_i stop_bounds = GetCharBounds(stop_index - 1);
643     return start_bounds.Center().x > stop_bounds.Center().x;
644 }
645 
GetBoundaryAtIndex(const int index,bool is_rtl)646 SelectionBoundary Page::GetBoundaryAtIndex(const int index, bool is_rtl) {
647     // Normally we align the boundary on the start edge of next character:
648     int char_index = index;
649     bool use_end_edge = false;
650 
651     // Printable characters have well defined bounding boxes, word-breaks (spaces
652     // and newlines) may not - so we use the end edge of the previous printable
653     // character instead if the next character is not printable.
654     if (index == NumChars() || IsWordBreak(GetUnicode(index))) {
655         char_index = index - 1;
656         use_end_edge = true;
657     }
658     bool use_right_edge = use_end_edge ^ is_rtl;
659 
660     SelectionBoundary boundary(index, 0, 0, is_rtl);
661     Rectangle_i char_bounds = GetCharBounds(char_index);
662     boundary.point.x = use_right_edge ? char_bounds.right : char_bounds.left;
663     // Use the baseline (not the bottom) of the char as the y-value.
664     boundary.point.y = GetCharOrigin(char_index).y;
665     return boundary;
666 }
667 
GetBoundaryAtPoint(const Point_i & point)668 SelectionBoundary Page::GetBoundaryAtPoint(const Point_i& point) {
669     SelectionBoundary best_boundary(0, point.x, point.y, false);
670     int best_distance_sq = std::numeric_limits<int>::max();
671 
672     bool prev_char_is_word_char = false;
673     bool is_rtl = false;
674     for (int index = first_printable_char_index(); index <= last_printable_char_index() + 1;
675          index++) {
676         bool cur_char_is_word_char =
677                 (index <= last_printable_char_index()) && !IsWordBreak(GetUnicode(index));
678         // Starting a new word:
679         if (cur_char_is_word_char && !prev_char_is_word_char) {
680             // Finding out RTL involves looking at each end of the word,
681             // so we only do it at the start of each word:
682             is_rtl = IsRtlAtIndex(index);
683         }
684         if (cur_char_is_word_char || prev_char_is_word_char) {
685             SelectionBoundary boundary = GetBoundaryAtIndex(index, is_rtl);
686             int dx = boundary.point.x - point.x;
687             int dy = boundary.point.y - point.y;
688             int distance_sq = dx * dx + dy * dy;
689             if (distance_sq < best_distance_sq) {
690                 best_boundary = boundary;
691                 best_distance_sq = distance_sq;
692             }
693         }
694         prev_char_is_word_char = cur_char_is_word_char;
695     }
696     return best_boundary;
697 }
698 
GetWordStartIndex(const int index)699 int Page::GetWordStartIndex(const int index) {
700     int start_index = index;
701     while (start_index > 0 && !IsWordBreak(GetUnicode(start_index - 1))) {
702         --start_index;  // Move start_index to the start of the word.
703     }
704     return start_index;
705 }
706 
GetWordStopIndex(const int index)707 int Page::GetWordStopIndex(const int index) {
708     int stop_index = index;
709     int num_chars = NumChars();
710     while (stop_index < num_chars && !IsWordBreak(GetUnicode(stop_index))) {
711         ++stop_index;  // Move stop_index to the end of the word.
712     }
713     return stop_index;
714 }
715 
GetRawCharBounds(const int char_index)716 Rectangle_d Page::GetRawCharBounds(const int char_index) {
717     double x1, x2, y1, y2;
718     FPDFText_GetCharBox(text_page(), char_index, &x1, &x2, &y1, &y2);
719     return DoubleRect(x1, y1, x2, y2);
720 }
721 
GetCharBounds(const int char_index)722 Rectangle_i Page::GetCharBounds(const int char_index) {
723     return ApplyPageTransform(GetRawCharBounds(char_index));
724 }
725 
GetCharOrigin(const int char_index)726 Point_i Page::GetCharOrigin(const int char_index) {
727     double x = 0.0, y = 0.0;
728     FPDFText_GetCharOrigin(text_page(), char_index, &x, &y);
729     return ApplyPageTransform(DoublePoint(x, y));
730 }
731 
GetAnnotatedLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const732 int Page::GetAnnotatedLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
733                                 vector<std::string>* urls) const {
734     FPDF_LINK link = nullptr;
735     int pos = 0;
736     int num_links_with_rect = 0;
737     while (FPDFLink_Enumerate(page_.get(), &pos, &link)) {
738         if (!IsUrlLink(link)) {
739             continue;
740         }
741 
742         std::string url = GetUrlUtf8(link);
743         Rectangle_i rect = GetRect(link);
744         if (IsEmpty(rect)) {
745             continue;
746         }
747 
748         link_to_rect->push_back(rects->size());
749         rects->push_back(rect);
750         urls->push_back(url);
751         num_links_with_rect++;
752     }
753     return num_links_with_rect;
754 }
755 
GetInferredLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const756 int Page::GetInferredLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
757                                vector<std::string>* urls) const {
758     // TODO(b/312730882): Infer links by looking for http:// and similar and for
759     // email addresses to use as mailto: links. There are some pdfClient methods for
760     // doing this, but these have some bugs which need patching or working around.
761     return 0;
762 }
763 
GetUrlUtf8(FPDF_LINK link) const764 std::string Page::GetUrlUtf8(FPDF_LINK link) const {
765     FPDF_ACTION action = FPDFLink_GetAction(link);
766     // Allocate a string big enough to hold the URL.
767     std::string url(FPDFAction_GetURIPath(document_, action, nullptr, 0), '\0');
768     // Then write the URL to it.
769     FPDFAction_GetURIPath(document_, action, &url[0], url.length());
770     EraseTrailingNulls(&url);
771     return url;
772 }
773 
GetRect(FPDF_LINK link) const774 Rectangle_i Page::GetRect(FPDF_LINK link) const {
775     FS_RECTF r;
776     if (!FPDFLink_GetAnnotRect(link, &r)) {
777         return Rectangle_i();
778     }
779 
780     Rectangle_d rect_d = DoubleRect(r.left, r.top, r.right, r.bottom);
781     return ApplyPageTransform(rect_d);
782 }
783 
IsGotoLink(FPDF_LINK link) const784 bool Page::IsGotoLink(FPDF_LINK link) const {
785     FPDF_ACTION action = FPDFLink_GetAction(link);
786     return action != nullptr && FPDFAction_GetType(action) == PDFACTION_GOTO;
787 }
788 
IsUrlLink(FPDF_LINK link) const789 bool Page::IsUrlLink(FPDF_LINK link) const {
790     FPDF_ACTION action = FPDFLink_GetAction(link);
791     return action != nullptr && FPDFAction_GetType(action) == PDFACTION_URI;
792 }
793 
PopulatePageObjects(bool refetch)794 void Page::PopulatePageObjects(bool refetch) {
795     if (!refetch && !page_objects_.empty()) {
796         return;
797     }
798 
799     int object_count = FPDFPage_CountObjects(page_.get());
800     // Resize PageObjects
801     page_objects_.resize(object_count);
802 
803     for (int index = 0; index < object_count; ++index) {
804         FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page_.get(), index);
805         int type = FPDFPageObj_GetType(page_object);
806 
807         // Pointer to PageObject
808         std::unique_ptr<PageObject> page_object_ = nullptr;
809 
810         switch (type) {
811             case FPDF_PAGEOBJ_TEXT: {
812                 page_object_ = std::make_unique<TextObject>();
813                 break;
814             }
815             case FPDF_PAGEOBJ_PATH: {
816                 page_object_ = std::make_unique<PathObject>();
817                 break;
818             }
819             case FPDF_PAGEOBJ_IMAGE: {
820                 page_object_ = std::make_unique<ImageObject>();
821                 break;
822             }
823             default:
824                 break;
825         }
826 
827         // Populate PageObject From Page
828         if (page_object_ && page_object_->PopulateFromFPDFInstance(page_object, page_.get())) {
829             page_objects_[index] = std::move(page_object_);
830         }
831     }
832 }
833 
GetPageAnnotations()834 std::vector<Annotation*> Page::GetPageAnnotations() {
835     PopulateAnnotations();
836 
837     std::vector<Annotation*> result;
838 
839     result.reserve(annotations_.size());
840     for (const auto& annotation : annotations_) {
841         result.push_back(annotation.get());
842     }
843 
844     return result;
845 }
846 
PopulateAnnotations()847 void Page::PopulateAnnotations() {
848     // If page_ is null
849     if (!page_) {
850         LOGE("Page is null");
851         return;
852     }
853 
854     int num_of_annotations = FPDFPage_GetAnnotCount(page_.get());
855     annotations_.resize(num_of_annotations);
856 
857     for (int annotation_index = 0; annotation_index < num_of_annotations; annotation_index++) {
858         ScopedFPDFAnnotation scoped_annot(FPDFPage_GetAnnot(page_.get(), annotation_index));
859         int annotationType = FPDFAnnot_GetSubtype(scoped_annot.get());
860 
861         std::unique_ptr<Annotation> annotation = nullptr;
862 
863         switch (annotationType) {
864             case FPDF_ANNOT_STAMP: {
865                 FS_RECTF rect;
866                 if (!FPDFAnnot_GetRect(scoped_annot.get(), &rect)) {
867                     LOGE("Failed to get the bounds of the annotation");
868                     break;
869                 }
870                 auto bounds = Rectangle_f{rect.left, rect.top, rect.right, rect.bottom};
871                 annotation = std::make_unique<StampAnnotation>(bounds);
872                 break;
873             }
874             case FPDF_ANNOT_HIGHLIGHT: {
875                 vector<Rectangle_f> bounds;
876                 auto num_bounds = FPDFAnnot_CountAttachmentPoints(scoped_annot.get());
877                 if (num_bounds > 0) {
878                     bounds.resize(num_bounds);
879                     for (auto bound_index = 0; bound_index < num_bounds; bound_index++) {
880                         FS_QUADPOINTSF quad_points;
881                         if (!FPDFAnnot_GetAttachmentPoints(scoped_annot.get(), bound_index,
882                                                            &quad_points)) {
883                             LOGD("Failed to get quad points from pdfium");
884                             break;
885                         }
886 
887                         bounds[bound_index] = Rectangle_f(quad_points.x1, quad_points.y1,
888                                                           quad_points.x2, quad_points.y4);
889                     }
890                 } else {
891                     LOGD("Failed to find bounds for highlight annotation");
892                 }
893                 annotation = std::make_unique<HighlightAnnotation>(bounds);
894                 break;
895             }
896             case FPDF_ANNOT_FREETEXT: {
897                 FS_RECTF rect;
898                 if (!FPDFAnnot_GetRect(scoped_annot.get(), &rect)) {
899                     LOGE("Failed to get the bounds of the annotation");
900                     break;
901                 }
902                 auto bounds = Rectangle_f{rect.left, rect.top, rect.right, rect.bottom};
903                 annotation = std::make_unique<FreeTextAnnotation>(bounds);
904                 break;
905             }
906             default: {
907                 break;
908             }
909         }
910 
911         if (!annotation ||
912             !annotation->PopulateFromPdfiumInstance(scoped_annot.get(), page_.get())) {
913             LOGE("Failed to create a pdfClient's instance of annotation using pdfium "
914                  "instance");
915         }
916 
917         annotations_[annotation_index] = std::move(annotation);
918     }
919 }
920 
AddPageAnnotation(std::unique_ptr<Annotation> annotation)921 int Page::AddPageAnnotation(std::unique_ptr<Annotation> annotation) {
922     ScopedFPDFAnnotation scoped_annot = annotation->CreatePdfiumInstance(document_, page_.get());
923 
924     if (!scoped_annot) {
925         LOGE("Failed to add the given annotation to the page");
926         return -1;
927     }
928 
929     FPDFPage_GenerateContent(page_.get());
930 
931     // Add the object to the annotations_ list
932     annotations_.push_back(std::move(annotation));
933 
934     // Return the index of added annotation
935     return FPDFPage_GetAnnotIndex(page_.get(), scoped_annot.get());
936 }
937 
RemovePageAnnotation(int index)938 bool Page::RemovePageAnnotation(int index) {
939     PopulateAnnotations();
940     if (index >= annotations_.size() || index < 0) {
941         LOGE("Given index is out range for number of annotations on this page");
942         return false;
943     }
944     // Remove the annotation at given index
945     if (!FPDFPage_RemoveAnnot(page_.get(), index)) {
946         LOGE("Failed to remove the annotation at index - %d ", index);
947         return false;
948     }
949 
950     FPDFPage_GenerateContent(page_.get());
951 
952     // Remove from annotations_ list
953     annotations_.erase(annotations_.begin() + index);
954 
955     return true;
956 }
957 
UpdatePageAnnotation(int index,std::unique_ptr<Annotation> annotation)958 bool Page::UpdatePageAnnotation(int index, std::unique_ptr<Annotation> annotation) {
959     PopulateAnnotations();
960     // Check for valid index
961     if (index < 0 || index >= annotations_.size()) {
962         return false;
963     }
964 
965     // check if there in an annotation of supported type at given index
966     if (annotations_[index] == nullptr) {
967         return false;
968     }
969 
970     // Get the pdfium annotation
971     ScopedFPDFAnnotation scoped_annot = ScopedFPDFAnnotation(FPDFPage_GetAnnot(page_.get(), index));
972 
973     if (!scoped_annot) {
974         LOGE("Failed to get pdfium annotation's instance");
975         return false;
976     }
977 
978     if (!annotation->UpdatePdfiumInstance(scoped_annot.get(), document_, page_.get())) {
979         LOGE("Failed to update pdfium annotation's instance");
980         return false;
981     }
982 
983     FPDFPage_GenerateContent(page_.get());
984 
985     return true;
986 }
987 
988 }  // namespace pdfClient