1 /*
2 * Copyright (C) 2024 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "page.h"
18
19 #include <stddef.h>
20 #include <stdint.h>
21
22 #include <algorithm>
23 #include <limits>
24 #include <span>
25 #include <string>
26 #include <vector>
27
28 #include "cpp/fpdf_scopers.h"
29 #include "form_filler.h"
30 #include "form_widget_info.h"
31 #include "fpdf_annot.h"
32 #include "fpdf_doc.h"
33 #include "fpdf_text.h"
34 #include "fpdfview.h"
35 #include "image_object.h"
36 #include "logging.h"
37 #include "normalize.h"
38 #include "path_object.h"
39 #include "rect.h"
40 #include "text_object.h"
41 #include "utf.h"
42 #include "utils/annot_hider.h"
43 #include "utils/text.h"
44
45 #define LOG_TAG "page"
46
47 using pdfClient::Rectangle_f;
48 using std::vector;
49
50 namespace pdfClient {
51
52 static const int kBytesPerPixel = 4;
53
54 static const Rectangle_i kEmptyIntRectangle = IntRect(0, 0, 0, 0);
55
56 // The acceptable fatness / inaccuracy of a user's finger in points.
57 static const int kFingerTolerance = 10;
58
59 static const int RENDER_MODE_FOR_DISPLAY = 1;
60 static const int RENDER_MODE_FOR_PRINT = 2;
61
Page(FPDF_DOCUMENT doc,int page_num,FormFiller * form_filler)62 Page::Page(FPDF_DOCUMENT doc, int page_num, FormFiller* form_filler)
63 : document_(doc),
64 page_(FPDF_LoadPage(doc, page_num)),
65 form_filler_(form_filler),
66 invalid_rect_(kEmptyIntRectangle),
67 page_num_(page_num) {}
68
69 Page::Page(Page&& p) = default;
70
~Page()71 Page::~Page() {}
72
Width() const73 int Page::Width() const {
74 return FPDF_GetPageWidth(page_.get());
75 }
76
Height() const77 int Page::Height() const {
78 return FPDF_GetPageHeight(page_.get());
79 }
80
Dimensions() const81 Rectangle_i Page::Dimensions() const {
82 return IntRect(0, 0, Width(), Height());
83 }
84
Render(FPDF_BITMAP bitmap,FS_MATRIX transform,int clip_left,int clip_top,int clip_right,int clip_bottom,int render_mode,int show_annot_types,bool render_form_fields)85 void Page::Render(FPDF_BITMAP bitmap, FS_MATRIX transform, int clip_left, int clip_top,
86 int clip_right, int clip_bottom, int render_mode, int show_annot_types,
87 bool render_form_fields) {
88 std::unordered_set<int> types;
89 for (auto renderFlag_annot : renderFlagsAnnotsMap) {
90 if ((renderFlag_annot.first & show_annot_types) != 0) {
91 for (int annot_type : renderFlag_annot.second) {
92 types.insert(annot_type);
93 }
94 }
95 }
96 if (render_form_fields) types.insert(FPDF_ANNOT_WIDGET);
97 pdfClient_utils::AnnotHider annot_hider(page_.get(), types);
98 int renderFlags = FPDF_REVERSE_BYTE_ORDER;
99 if (render_mode == RENDER_MODE_FOR_DISPLAY) {
100 renderFlags |= FPDF_LCD_TEXT | FPDF_ANNOT;
101 } else if (render_mode == RENDER_MODE_FOR_PRINT) {
102 renderFlags |= FPDF_PRINTING;
103 }
104
105 FS_RECTF clip = {(float)clip_left, (float)clip_top, (float)clip_right, (float)clip_bottom};
106 FPDF_RenderPageBitmapWithMatrix(bitmap, page_.get(), &transform, &clip, renderFlags);
107
108 if (render_form_fields) {
109 form_filler_->RenderTile(page_.get(), bitmap, transform, clip, renderFlags);
110 }
111 }
112
ApplyPageTransform(const Point_d & input) const113 Point_i Page::ApplyPageTransform(const Point_d& input) const {
114 Point_i output;
115 FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.x, input.y, &output.x,
116 &output.y);
117 return output;
118 }
119
ApplyPageTransform(const Rectangle_d & input) const120 Rectangle_i Page::ApplyPageTransform(const Rectangle_d& input) const {
121 return ApplyPageTransform(OuterIntRect(input));
122 }
123
ApplyPageTransform(const Rectangle_i & input) const124 Rectangle_i Page::ApplyPageTransform(const Rectangle_i& input) const {
125 Point_i output1, output2;
126 FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.left, input.top, &output1.x,
127 &output1.y);
128 FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.right, input.bottom,
129 &output2.x, &output2.y);
130
131 Rectangle_i output = IntRect(output1, output2);
132 // Constrain output within the page.
133 output = Intersect(output, Dimensions());
134 return output;
135 }
136
UnapplyPageTransform(const Point_i & input) const137 Point_d Page::UnapplyPageTransform(const Point_i& input) const {
138 Point_d output;
139 FPDF_DeviceToPage(page_.get(), 0, 0, Width(), Height(), 0, input.x, input.y, &output.x,
140 &output.y);
141 return output;
142 }
143
PageToDevice(const Point_f & in) const144 Point_f Page::PageToDevice(const Point_f& in) const {
145 // Get Device Coordinates from Page Coordinates
146 Point_i out;
147 FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, in.x, in.y, &out.x, &out.y);
148
149 return {static_cast<float>(out.x), static_cast<float>(out.y)};
150 }
151
DeviceToPage(const Point_f & in) const152 Point_f Page::DeviceToPage(const Point_f& in) const {
153 // Get Page Coordinates from Device Coordinates
154 Point_d out;
155 FPDF_DeviceToPage(page_.get(), 0, 0, Width(), Height(), 0, in.x, in.y, &out.x, &out.y);
156
157 return {static_cast<float>(out.x), static_cast<float>(out.y)};
158 }
159
NumChars()160 int Page::NumChars() {
161 return FPDFText_CountChars(text_page());
162 }
163
GetUnicode(int char_index)164 uint32_t Page::GetUnicode(int char_index) {
165 return FPDFText_GetUnicode(text_page(), char_index);
166 }
167
GetTextUtf8()168 std::string Page::GetTextUtf8() {
169 return GetTextUtf8(first_printable_char_index(), last_printable_char_index() + 1);
170 }
171
GetTextUtf8(const int start_index,const int stop_index)172 std::string Page::GetTextUtf8(const int start_index, const int stop_index) {
173 std::string result;
174 for (int i = start_index; i < stop_index; i++) {
175 AppendpdfClientCodepointAsUtf8(GetUnicode(i), &result);
176 }
177 return result;
178 }
179
GetAltTextUtf8(vector<std::string> * result) const180 void Page::GetAltTextUtf8(vector<std::string>* result) const {
181 ::pdfClient_utils::GetAltText(page_.get(), result);
182 }
183
FindMatchesUtf8(std::string_view utf8,vector<TextRange> * matches)184 int Page::FindMatchesUtf8(std::string_view utf8, vector<TextRange>* matches) {
185 std::u32string query(Utf8ToUtf32(utf8));
186 // Normalize characters of string for searching - ignore case and accents.
187 NormalizeStringForSearch(&query);
188 TextRange match;
189 int page_start = first_printable_char_index();
190 int page_stop = last_printable_char_index() + 1;
191 int num_matches = 0;
192 while (FindMatch(query, page_start, page_stop, &match)) {
193 if (matches != nullptr) {
194 matches->push_back(match);
195 }
196 num_matches++;
197 page_start = match.second;
198 }
199 return num_matches;
200 }
201
BoundsOfMatchesUtf8(std::string_view utf8,vector<Rectangle_i> * rects,vector<int> * match_to_rect,vector<int> * char_indexes)202 int Page::BoundsOfMatchesUtf8(std::string_view utf8, vector<Rectangle_i>* rects,
203 vector<int>* match_to_rect, vector<int>* char_indexes) {
204 vector<TextRange> matches;
205 int num_matches = FindMatchesUtf8(utf8, &matches);
206 int num_rects = 0;
207 int num_matches_with_rects = 0;
208 for (int i = 0; i < num_matches; i++) {
209 int start = matches[i].first, stop = matches[i].second;
210 int num_rects_for_match = GetTextBounds(start, stop, rects);
211 if (num_rects_for_match == 0) {
212 continue;
213 }
214 if (match_to_rect != nullptr) {
215 match_to_rect->push_back(num_rects);
216 }
217 if (char_indexes != nullptr) {
218 char_indexes->push_back(start);
219 }
220 num_rects += num_rects_for_match;
221 num_matches_with_rects++;
222 }
223 return num_matches_with_rects;
224 }
225
GetTextBounds(const int start_index,const int stop_index,vector<Rectangle_i> * rects)226 int Page::GetTextBounds(const int start_index, const int stop_index, vector<Rectangle_i>* rects) {
227 int num_rects = 0;
228 Rectangle_d rect = DoubleRect(0, 0, 0, 0);
229 for (int index = start_index; index < stop_index; index++) {
230 double x1, x2, y1, y2;
231 // This call doesn't apply the page transform - have to apply later.
232 FPDFText_GetCharBox(text_page(), index, &x1, &x2, &y1, &y2);
233 if (x1 != x2 && y1 != y2) {
234 if (IsEmpty(rect)) {
235 rect = DoubleRect(x1, y1, x2, y2);
236 } else {
237 rect = Union(rect, DoubleRect(x1, y1, x2, y2));
238 }
239 }
240 // Starting a new line - push current rect, start a new rect.
241 if (IsLineBreak(GetUnicode(index))) {
242 if (!IsEmpty(rect)) {
243 num_rects++;
244 rects->push_back(ApplyPageTransform(rect));
245 }
246 rect = DoubleRect(0, 0, 0, 0);
247 }
248 }
249 // Push the last current rect.
250 if (!IsEmpty(rect)) {
251 num_rects++;
252 rects->push_back(ApplyPageTransform(rect));
253 }
254 return num_rects;
255 }
256
SelectWordAt(const Point_i & point,SelectionBoundary * start,SelectionBoundary * stop)257 bool Page::SelectWordAt(const Point_i& point, SelectionBoundary* start, SelectionBoundary* stop) {
258 Point_d char_point = UnapplyPageTransform(point);
259 int char_index = FPDFText_GetCharIndexAtPos(text_page(), char_point.x, char_point.y,
260 kFingerTolerance, kFingerTolerance);
261 if (char_index < 0 || IsWordBreak(GetUnicode(char_index))) {
262 return false; // No word at the given point to select.
263 }
264 start->index = GetWordStartIndex(char_index);
265 stop->index = GetWordStopIndex(char_index);
266 ConstrainBoundary(start);
267 ConstrainBoundary(stop);
268 return true;
269 }
270
ConstrainBoundary(SelectionBoundary * boundary)271 void Page::ConstrainBoundary(SelectionBoundary* boundary) {
272 if (boundary->index < 0) {
273 // Index is not specified - find the nearest index to the given point.
274 *boundary = GetBoundaryAtPoint(boundary->point);
275 } else {
276 // Index is specified - find the point at that index.
277 int index = std::max(boundary->index, first_printable_char_index());
278 index = std::min(index, last_printable_char_index() + 1);
279 *boundary = GetBoundaryAtIndex(index);
280 }
281 }
282
GetFontSize(int index)283 int Page::GetFontSize(int index) {
284 return FPDFText_GetFontSize(text_page(), index);
285 }
286
GetLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const287 int Page::GetLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
288 vector<std::string>* urls) const {
289 return GetAnnotatedLinksUtf8(rects, link_to_rect, urls) +
290 GetInferredLinksUtf8(rects, link_to_rect, urls);
291 }
292
GetGotoLinks() const293 vector<GotoLink> Page::GetGotoLinks() const {
294 vector<GotoLink> links;
295
296 FPDF_LINK link = nullptr;
297 int pos = 0;
298 while (FPDFLink_Enumerate(page_.get(), &pos, &link)) {
299 if (!IsGotoLink(link)) {
300 continue;
301 }
302 // Get the bounds of the actual link
303 vector<Rectangle_i> goto_link_rects;
304 Rectangle_i rect = GetRect(link);
305 goto_link_rects.push_back(rect);
306
307 GotoLinkDest* goto_link_dest = new GotoLinkDest();
308
309 // Get and parse the destination
310 FPDF_DEST fpdf_dest = FPDFLink_GetDest(document_, link);
311 int dest_page_index = FPDFDest_GetDestPageIndex(document_, fpdf_dest);
312 if (dest_page_index < 0) {
313 LOGE("Goto Link has invalid destination page index");
314 continue;
315 }
316 goto_link_dest->set_page_number(dest_page_index);
317
318 FPDF_BOOL has_x_coord;
319 FPDF_BOOL has_y_coord;
320 FPDF_BOOL has_zoom;
321 FS_FLOAT x;
322 FS_FLOAT y;
323 FS_FLOAT zoom;
324 FPDF_BOOL success = FPDFDest_GetLocationInPage(fpdf_dest, &has_x_coord, &has_y_coord,
325 &has_zoom, &x, &y, &zoom);
326
327 if (!success) {
328 continue;
329 }
330 if (has_x_coord) {
331 auto point = DoublePoint(x, 0);
332 auto tPoint = ApplyPageTransform(point);
333 goto_link_dest->set_x(tPoint.x);
334 }
335 if (has_y_coord) {
336 auto point = DoublePoint(0, y);
337 auto tPoint = ApplyPageTransform(point);
338 goto_link_dest->set_y(tPoint.y);
339 }
340 if (has_zoom) {
341 goto_link_dest->set_zoom(zoom);
342 }
343
344 GotoLink goto_link = GotoLink{goto_link_rects, *goto_link_dest};
345
346 // Ensure that links are within page bounds
347 if (goto_link_dest->x >= 0 && goto_link_dest->y >= 0) {
348 links.push_back(goto_link);
349 } else {
350 LOGE("Goto Link out of bound (x=%f, y=%f). Page width=%d, height =%d",
351 goto_link_dest->x, goto_link_dest->y, Width(), Height());
352 }
353 }
354 return links;
355 }
356
InitializeFormFilling()357 void Page::InitializeFormFilling() {
358 form_filler_->NotifyAfterPageLoad(page_.get());
359 }
360
TerminateFormFilling()361 void Page::TerminateFormFilling() {
362 form_filler_->NotifyBeforePageClose(page_.get());
363 }
364
GetFormWidgetInfo(Point_i point)365 FormWidgetInfo Page::GetFormWidgetInfo(Point_i point) {
366 Point_d page_point = UnapplyPageTransform(point);
367 FormWidgetInfo result = form_filler_->GetFormWidgetInfo(page_.get(), page_point);
368 if (result.FoundWidget()) {
369 // widget_rect is in page coords, transform to device coords before
370 // returning to user.
371 Rectangle_i transformed_widget_rect = ApplyPageTransform(result.widget_rect());
372 result.set_widget_rect(transformed_widget_rect);
373 }
374
375 // Consume any rectangle that was invalidated by this action. Some
376 // info-gathering actions may cause temporary invalidation without
377 // actually doing anything that we need to redraw for.
378 ConsumeInvalidRect();
379 return result;
380 }
381
GetFormWidgetInfo(int annotation_index)382 FormWidgetInfo Page::GetFormWidgetInfo(int annotation_index) {
383 FormWidgetInfo result = form_filler_->GetFormWidgetInfo(page_.get(), annotation_index);
384 if (result.FoundWidget()) {
385 // widget_rect is in page coords; transform to device coords before
386 // returning to user.
387 Rectangle_i transformed_widget_rect = ApplyPageTransform(result.widget_rect());
388 result.set_widget_rect(transformed_widget_rect);
389 }
390
391 // Consume any rectangle that was invalidated by this action. Some
392 // info-gathering actions may cause temporary invalidation without
393 // actually doing anything that we need to redraw for.
394 ConsumeInvalidRect();
395 return result;
396 }
397
GetFormWidgetInfos(const std::unordered_set<int> & type_ids,std::vector<FormWidgetInfo> * widget_infos)398 void Page::GetFormWidgetInfos(const std::unordered_set<int>& type_ids,
399 std::vector<FormWidgetInfo>* widget_infos) {
400 form_filler_->GetFormWidgetInfos(page_.get(), type_ids, widget_infos);
401 for (FormWidgetInfo& widget_info : *widget_infos) {
402 // widget_rect is in page coords; transform to device coords before
403 // returning to user.
404 Rectangle_i transformed_widget_rect = ApplyPageTransform(widget_info.widget_rect());
405 widget_info.set_widget_rect(transformed_widget_rect);
406 }
407
408 // Consume any rectangles that were invalidated by this action. Some
409 // info-gathering actions may cause temporary invalidation without
410 // actually doing anything that we need to redraw for.
411 ConsumeInvalidRect();
412 }
413
ClickOnPoint(Point_i point)414 bool Page::ClickOnPoint(Point_i point) {
415 Point_d page_point = UnapplyPageTransform(point);
416 return form_filler_->ClickOnPoint(page_.get(), page_point);
417 }
SetFormFieldText(int annotation_index,std::string_view text)418 bool Page::SetFormFieldText(int annotation_index, std::string_view text) {
419 return form_filler_->SetText(page_.get(), annotation_index, text);
420 }
421
SetChoiceSelection(int annotation_index,std::span<const int> selected_indices)422 bool Page::SetChoiceSelection(int annotation_index, std::span<const int> selected_indices) {
423 return form_filler_->SetChoiceSelection(page_.get(), annotation_index, selected_indices);
424 }
NotifyInvalidRect(Rectangle_i rect)425 void Page::NotifyInvalidRect(Rectangle_i rect) {
426 if (rect.left < 0 || rect.top < 0 || rect.right < 0 || rect.bottom < 0 || IsEmpty(rect)) {
427 return;
428 }
429
430 Rectangle_i device_rect = ApplyPageTransform(rect);
431 // If invalid_rect_ is currently empty, avoid unioning so we don't extend
432 // |rect|'s top left corner to (0,0) for no reason.
433 if (IsEmpty(invalid_rect_)) {
434 invalid_rect_ = device_rect;
435 return;
436 }
437
438 invalid_rect_ = Union(invalid_rect_, device_rect);
439 }
440
HasInvalidRect()441 bool Page::HasInvalidRect() {
442 return !IsEmpty(invalid_rect_);
443 }
444
ConsumeInvalidRect()445 Rectangle_i Page::ConsumeInvalidRect() {
446 Rectangle_i copy = invalid_rect_;
447 invalid_rect_ = kEmptyIntRectangle;
448 return copy;
449 }
450
Get()451 void* Page::Get() {
452 return page_.get();
453 }
454
GetPageObjects(bool refetch)455 std::vector<PageObject*> Page::GetPageObjects(bool refetch) {
456 PopulatePageObjects(refetch);
457
458 std::vector<PageObject*> page_objects;
459 for (const auto& page_object : page_objects_) {
460 page_objects.push_back(page_object.get());
461 }
462
463 return page_objects;
464 }
465
AddPageObject(std::unique_ptr<PageObject> pageObject)466 int Page::AddPageObject(std::unique_ptr<PageObject> pageObject) {
467 // Create a scoped PDFium page object.
468 ScopedFPDFPageObject scoped_page_object(pageObject->CreateFPDFInstance(document_, page_.get()));
469
470 // Check if a FPDF page object was created.
471 if (!scoped_page_object) {
472 return -1;
473 }
474
475 // Insert the FPDF page object into the FPDF page.
476 FPDFPage_InsertObject(page_.get(), scoped_page_object.release());
477 FPDFPage_GenerateContent(page_.get());
478
479 // Add pageObject in stored list if populated.
480 if (!page_objects_.empty()) {
481 page_objects_.push_back(std::move(pageObject));
482 }
483
484 return FPDFPage_CountObjects(page_.get()) - 1;
485 }
486
RemovePageObject(int index)487 bool Page::RemovePageObject(int index) {
488 FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page_.get(), index);
489 // Remove FPDF PageObject
490 if (!FPDFPage_RemoveObject(page_.get(), page_object)) {
491 return false;
492 }
493
494 FPDFPageObj_Destroy(page_object);
495 FPDFPage_GenerateContent(page_.get());
496
497 // Remove pageObject from stored list if populated.
498 if (!page_objects_.empty()) {
499 page_objects_.erase(page_objects_.begin() + index);
500 }
501
502 return true;
503 }
504
UpdatePageObject(int index,std::unique_ptr<PageObject> pageObject)505 bool Page::UpdatePageObject(int index, std::unique_ptr<PageObject> pageObject) {
506 // Check for valid index
507 if (index < 0 || index >= FPDFPage_CountObjects(page_.get())) {
508 return false;
509 }
510
511 // Get PDFium PageObject.
512 FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page_.get(), index);
513
514 // Update PDFium PageObject
515 if (!pageObject->UpdateFPDFInstance(page_object, page_.get())) {
516 return false;
517 }
518
519 FPDFPage_GenerateContent(page_.get());
520
521 return true;
522 }
523
text_page()524 FPDF_TEXTPAGE Page::text_page() {
525 EnsureTextPageInitialized();
526 return text_page_.get();
527 }
528
first_printable_char_index()529 int Page::first_printable_char_index() {
530 EnsureTextPageInitialized();
531 return first_printable_char_index_;
532 }
533
last_printable_char_index()534 int Page::last_printable_char_index() {
535 EnsureTextPageInitialized();
536 return last_printable_char_index_;
537 }
538
EnsureTextPageInitialized()539 void Page::EnsureTextPageInitialized() {
540 if (text_page_) {
541 return;
542 }
543 if (!page_.get()) {
544 // Page should never be null but a partner has an unexplained bug b/376796346
545 LOGE("Null page (err=%lu). for (page_num=%d)", FPDF_GetLastError(), page_num_);
546 // since the text_page_ would not have a page to load from
547 // Initialize variables to -1, otherwise they carry over garbage values.
548 first_printable_char_index_ = -1;
549 last_printable_char_index_ = -1;
550 return;
551 }
552
553 text_page_.reset(FPDFText_LoadPage(page_.get()));
554 if (!text_page_) {
555 // This will get into infinite recursion if not returned - b/376796346
556 LOGE("Failed to load text (err=%lu). for (page_num=%d)", FPDF_GetLastError(), page_num_);
557 // Initialize variables to -1, otherwise they carry over garbage values.
558 first_printable_char_index_ = -1;
559 last_printable_char_index_ = -1;
560 return;
561 }
562
563 int num_chars = NumChars();
564
565 int i;
566 for (i = 0; i < num_chars && IsWordBreak(GetUnicode(i)); i++) {
567 }
568 first_printable_char_index_ = i;
569
570 for (i = num_chars - 1; i >= first_printable_char_index_ && IsWordBreak(GetUnicode(i)); i--) {
571 }
572 last_printable_char_index_ = i;
573 }
574
InPlaceSwapRedBlueChannels(void * pixels,const int num_pixels) const575 void Page::InPlaceSwapRedBlueChannels(void* pixels, const int num_pixels) const {
576 uint8_t* channels = static_cast<uint8_t*>(pixels);
577 uint8_t* channel1 = channels;
578 uint8_t* channel3 = channels + 2;
579
580 for (int i = 0; i < num_pixels; ++i, channel1 += kBytesPerPixel, channel3 += kBytesPerPixel) {
581 std::swap(*channel1, *channel3);
582 }
583 }
584
FindMatch(const std::u32string & query,const int page_start,const int page_stop,TextRange * match)585 bool Page::FindMatch(const std::u32string& query, const int page_start, const int page_stop,
586 TextRange* match) {
587 if (query.empty()) {
588 return false;
589 }
590
591 int max_match_start = page_stop - query.length();
592 for (int m = page_start; m <= max_match_start; m++) {
593 if (IsMatch(query, m, page_stop, match)) {
594 return true;
595 }
596 }
597 return false;
598 }
599
IsMatch(const std::u32string & query,const int match_start,const int page_stop,TextRange * match)600 bool Page::IsMatch(const std::u32string& query, const int match_start, const int page_stop,
601 TextRange* match) {
602 int page_index = match_start;
603 size_t query_index = 0;
604 uint32_t page_char = 0, prev_char = 0;
605 while (query_index < query.length()) {
606 prev_char = page_char;
607 page_char = GetUnicode(page_index);
608
609 if (NormalizeForSearch(page_char) == query[query_index]) {
610 // This codepoint matches (ignoring case and accents). Move to next.
611 query_index++;
612 page_index++;
613 } else if (IsSkippableForSearch(page_char, prev_char) && query_index > 0) {
614 // Don't increment query index - skip over skippable character.
615 page_index++;
616 if ((page_stop - page_index) < (query.length() - query_index)) {
617 return false; // Not enough room for query string before page_stop.
618 }
619 } else {
620 return false;
621 }
622 }
623 // Update match to contain page indices of match start and match stop.
624 match->first = match_start;
625 match->second = page_index;
626 return true;
627 }
628
GetBoundaryAtIndex(const int index)629 SelectionBoundary Page::GetBoundaryAtIndex(const int index) {
630 return GetBoundaryAtIndex(index, IsRtlAtIndex(index));
631 }
632
IsRtlAtIndex(const int index)633 bool Page::IsRtlAtIndex(const int index) {
634 int start_index = GetWordStartIndex(index);
635 int stop_index = GetWordStopIndex(index);
636 int word_length = stop_index - start_index;
637 if (word_length <= 1) {
638 // Can't tell directionality from a single character, guess LTR.
639 return false;
640 }
641 Rectangle_i start_bounds = GetCharBounds(start_index);
642 Rectangle_i stop_bounds = GetCharBounds(stop_index - 1);
643 return start_bounds.Center().x > stop_bounds.Center().x;
644 }
645
GetBoundaryAtIndex(const int index,bool is_rtl)646 SelectionBoundary Page::GetBoundaryAtIndex(const int index, bool is_rtl) {
647 // Normally we align the boundary on the start edge of next character:
648 int char_index = index;
649 bool use_end_edge = false;
650
651 // Printable characters have well defined bounding boxes, word-breaks (spaces
652 // and newlines) may not - so we use the end edge of the previous printable
653 // character instead if the next character is not printable.
654 if (index == NumChars() || IsWordBreak(GetUnicode(index))) {
655 char_index = index - 1;
656 use_end_edge = true;
657 }
658 bool use_right_edge = use_end_edge ^ is_rtl;
659
660 SelectionBoundary boundary(index, 0, 0, is_rtl);
661 Rectangle_i char_bounds = GetCharBounds(char_index);
662 boundary.point.x = use_right_edge ? char_bounds.right : char_bounds.left;
663 // Use the baseline (not the bottom) of the char as the y-value.
664 boundary.point.y = GetCharOrigin(char_index).y;
665 return boundary;
666 }
667
GetBoundaryAtPoint(const Point_i & point)668 SelectionBoundary Page::GetBoundaryAtPoint(const Point_i& point) {
669 SelectionBoundary best_boundary(0, point.x, point.y, false);
670 int best_distance_sq = std::numeric_limits<int>::max();
671
672 bool prev_char_is_word_char = false;
673 bool is_rtl = false;
674 for (int index = first_printable_char_index(); index <= last_printable_char_index() + 1;
675 index++) {
676 bool cur_char_is_word_char =
677 (index <= last_printable_char_index()) && !IsWordBreak(GetUnicode(index));
678 // Starting a new word:
679 if (cur_char_is_word_char && !prev_char_is_word_char) {
680 // Finding out RTL involves looking at each end of the word,
681 // so we only do it at the start of each word:
682 is_rtl = IsRtlAtIndex(index);
683 }
684 if (cur_char_is_word_char || prev_char_is_word_char) {
685 SelectionBoundary boundary = GetBoundaryAtIndex(index, is_rtl);
686 int dx = boundary.point.x - point.x;
687 int dy = boundary.point.y - point.y;
688 int distance_sq = dx * dx + dy * dy;
689 if (distance_sq < best_distance_sq) {
690 best_boundary = boundary;
691 best_distance_sq = distance_sq;
692 }
693 }
694 prev_char_is_word_char = cur_char_is_word_char;
695 }
696 return best_boundary;
697 }
698
GetWordStartIndex(const int index)699 int Page::GetWordStartIndex(const int index) {
700 int start_index = index;
701 while (start_index > 0 && !IsWordBreak(GetUnicode(start_index - 1))) {
702 --start_index; // Move start_index to the start of the word.
703 }
704 return start_index;
705 }
706
GetWordStopIndex(const int index)707 int Page::GetWordStopIndex(const int index) {
708 int stop_index = index;
709 int num_chars = NumChars();
710 while (stop_index < num_chars && !IsWordBreak(GetUnicode(stop_index))) {
711 ++stop_index; // Move stop_index to the end of the word.
712 }
713 return stop_index;
714 }
715
GetRawCharBounds(const int char_index)716 Rectangle_d Page::GetRawCharBounds(const int char_index) {
717 double x1, x2, y1, y2;
718 FPDFText_GetCharBox(text_page(), char_index, &x1, &x2, &y1, &y2);
719 return DoubleRect(x1, y1, x2, y2);
720 }
721
GetCharBounds(const int char_index)722 Rectangle_i Page::GetCharBounds(const int char_index) {
723 return ApplyPageTransform(GetRawCharBounds(char_index));
724 }
725
GetCharOrigin(const int char_index)726 Point_i Page::GetCharOrigin(const int char_index) {
727 double x = 0.0, y = 0.0;
728 FPDFText_GetCharOrigin(text_page(), char_index, &x, &y);
729 return ApplyPageTransform(DoublePoint(x, y));
730 }
731
GetAnnotatedLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const732 int Page::GetAnnotatedLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
733 vector<std::string>* urls) const {
734 FPDF_LINK link = nullptr;
735 int pos = 0;
736 int num_links_with_rect = 0;
737 while (FPDFLink_Enumerate(page_.get(), &pos, &link)) {
738 if (!IsUrlLink(link)) {
739 continue;
740 }
741
742 std::string url = GetUrlUtf8(link);
743 Rectangle_i rect = GetRect(link);
744 if (IsEmpty(rect)) {
745 continue;
746 }
747
748 link_to_rect->push_back(rects->size());
749 rects->push_back(rect);
750 urls->push_back(url);
751 num_links_with_rect++;
752 }
753 return num_links_with_rect;
754 }
755
GetInferredLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const756 int Page::GetInferredLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
757 vector<std::string>* urls) const {
758 // TODO(b/312730882): Infer links by looking for http:// and similar and for
759 // email addresses to use as mailto: links. There are some pdfClient methods for
760 // doing this, but these have some bugs which need patching or working around.
761 return 0;
762 }
763
GetUrlUtf8(FPDF_LINK link) const764 std::string Page::GetUrlUtf8(FPDF_LINK link) const {
765 FPDF_ACTION action = FPDFLink_GetAction(link);
766 // Allocate a string big enough to hold the URL.
767 std::string url(FPDFAction_GetURIPath(document_, action, nullptr, 0), '\0');
768 // Then write the URL to it.
769 FPDFAction_GetURIPath(document_, action, &url[0], url.length());
770 EraseTrailingNulls(&url);
771 return url;
772 }
773
GetRect(FPDF_LINK link) const774 Rectangle_i Page::GetRect(FPDF_LINK link) const {
775 FS_RECTF r;
776 if (!FPDFLink_GetAnnotRect(link, &r)) {
777 return Rectangle_i();
778 }
779
780 Rectangle_d rect_d = DoubleRect(r.left, r.top, r.right, r.bottom);
781 return ApplyPageTransform(rect_d);
782 }
783
IsGotoLink(FPDF_LINK link) const784 bool Page::IsGotoLink(FPDF_LINK link) const {
785 FPDF_ACTION action = FPDFLink_GetAction(link);
786 return action != nullptr && FPDFAction_GetType(action) == PDFACTION_GOTO;
787 }
788
IsUrlLink(FPDF_LINK link) const789 bool Page::IsUrlLink(FPDF_LINK link) const {
790 FPDF_ACTION action = FPDFLink_GetAction(link);
791 return action != nullptr && FPDFAction_GetType(action) == PDFACTION_URI;
792 }
793
PopulatePageObjects(bool refetch)794 void Page::PopulatePageObjects(bool refetch) {
795 if (!refetch && !page_objects_.empty()) {
796 return;
797 }
798
799 int object_count = FPDFPage_CountObjects(page_.get());
800 // Resize PageObjects
801 page_objects_.resize(object_count);
802
803 for (int index = 0; index < object_count; ++index) {
804 FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page_.get(), index);
805 int type = FPDFPageObj_GetType(page_object);
806
807 // Pointer to PageObject
808 std::unique_ptr<PageObject> page_object_ = nullptr;
809
810 switch (type) {
811 case FPDF_PAGEOBJ_TEXT: {
812 page_object_ = std::make_unique<TextObject>();
813 break;
814 }
815 case FPDF_PAGEOBJ_PATH: {
816 page_object_ = std::make_unique<PathObject>();
817 break;
818 }
819 case FPDF_PAGEOBJ_IMAGE: {
820 page_object_ = std::make_unique<ImageObject>();
821 break;
822 }
823 default:
824 break;
825 }
826
827 // Populate PageObject From Page
828 if (page_object_ && page_object_->PopulateFromFPDFInstance(page_object, page_.get())) {
829 page_objects_[index] = std::move(page_object_);
830 }
831 }
832 }
833
GetPageAnnotations()834 std::vector<Annotation*> Page::GetPageAnnotations() {
835 PopulateAnnotations();
836
837 std::vector<Annotation*> result;
838
839 result.reserve(annotations_.size());
840 for (const auto& annotation : annotations_) {
841 result.push_back(annotation.get());
842 }
843
844 return result;
845 }
846
PopulateAnnotations()847 void Page::PopulateAnnotations() {
848 // If page_ is null
849 if (!page_) {
850 LOGE("Page is null");
851 return;
852 }
853
854 int num_of_annotations = FPDFPage_GetAnnotCount(page_.get());
855 annotations_.resize(num_of_annotations);
856
857 for (int annotation_index = 0; annotation_index < num_of_annotations; annotation_index++) {
858 ScopedFPDFAnnotation scoped_annot(FPDFPage_GetAnnot(page_.get(), annotation_index));
859 int annotationType = FPDFAnnot_GetSubtype(scoped_annot.get());
860
861 std::unique_ptr<Annotation> annotation = nullptr;
862
863 switch (annotationType) {
864 case FPDF_ANNOT_STAMP: {
865 FS_RECTF rect;
866 if (!FPDFAnnot_GetRect(scoped_annot.get(), &rect)) {
867 LOGE("Failed to get the bounds of the annotation");
868 break;
869 }
870 auto bounds = Rectangle_f{rect.left, rect.top, rect.right, rect.bottom};
871 annotation = std::make_unique<StampAnnotation>(bounds);
872 break;
873 }
874 case FPDF_ANNOT_HIGHLIGHT: {
875 vector<Rectangle_f> bounds;
876 auto num_bounds = FPDFAnnot_CountAttachmentPoints(scoped_annot.get());
877 if (num_bounds > 0) {
878 bounds.resize(num_bounds);
879 for (auto bound_index = 0; bound_index < num_bounds; bound_index++) {
880 FS_QUADPOINTSF quad_points;
881 if (!FPDFAnnot_GetAttachmentPoints(scoped_annot.get(), bound_index,
882 &quad_points)) {
883 LOGD("Failed to get quad points from pdfium");
884 break;
885 }
886
887 bounds[bound_index] = Rectangle_f(quad_points.x1, quad_points.y1,
888 quad_points.x2, quad_points.y4);
889 }
890 } else {
891 LOGD("Failed to find bounds for highlight annotation");
892 }
893 annotation = std::make_unique<HighlightAnnotation>(bounds);
894 break;
895 }
896 case FPDF_ANNOT_FREETEXT: {
897 FS_RECTF rect;
898 if (!FPDFAnnot_GetRect(scoped_annot.get(), &rect)) {
899 LOGE("Failed to get the bounds of the annotation");
900 break;
901 }
902 auto bounds = Rectangle_f{rect.left, rect.top, rect.right, rect.bottom};
903 annotation = std::make_unique<FreeTextAnnotation>(bounds);
904 break;
905 }
906 default: {
907 break;
908 }
909 }
910
911 if (!annotation ||
912 !annotation->PopulateFromPdfiumInstance(scoped_annot.get(), page_.get())) {
913 LOGE("Failed to create a pdfClient's instance of annotation using pdfium "
914 "instance");
915 }
916
917 annotations_[annotation_index] = std::move(annotation);
918 }
919 }
920
AddPageAnnotation(std::unique_ptr<Annotation> annotation)921 int Page::AddPageAnnotation(std::unique_ptr<Annotation> annotation) {
922 ScopedFPDFAnnotation scoped_annot = annotation->CreatePdfiumInstance(document_, page_.get());
923
924 if (!scoped_annot) {
925 LOGE("Failed to add the given annotation to the page");
926 return -1;
927 }
928
929 FPDFPage_GenerateContent(page_.get());
930
931 // Add the object to the annotations_ list
932 annotations_.push_back(std::move(annotation));
933
934 // Return the index of added annotation
935 return FPDFPage_GetAnnotIndex(page_.get(), scoped_annot.get());
936 }
937
RemovePageAnnotation(int index)938 bool Page::RemovePageAnnotation(int index) {
939 PopulateAnnotations();
940 if (index >= annotations_.size() || index < 0) {
941 LOGE("Given index is out range for number of annotations on this page");
942 return false;
943 }
944 // Remove the annotation at given index
945 if (!FPDFPage_RemoveAnnot(page_.get(), index)) {
946 LOGE("Failed to remove the annotation at index - %d ", index);
947 return false;
948 }
949
950 FPDFPage_GenerateContent(page_.get());
951
952 // Remove from annotations_ list
953 annotations_.erase(annotations_.begin() + index);
954
955 return true;
956 }
957
UpdatePageAnnotation(int index,std::unique_ptr<Annotation> annotation)958 bool Page::UpdatePageAnnotation(int index, std::unique_ptr<Annotation> annotation) {
959 PopulateAnnotations();
960 // Check for valid index
961 if (index < 0 || index >= annotations_.size()) {
962 return false;
963 }
964
965 // check if there in an annotation of supported type at given index
966 if (annotations_[index] == nullptr) {
967 return false;
968 }
969
970 // Get the pdfium annotation
971 ScopedFPDFAnnotation scoped_annot = ScopedFPDFAnnotation(FPDFPage_GetAnnot(page_.get(), index));
972
973 if (!scoped_annot) {
974 LOGE("Failed to get pdfium annotation's instance");
975 return false;
976 }
977
978 if (!annotation->UpdatePdfiumInstance(scoped_annot.get(), document_, page_.get())) {
979 LOGE("Failed to update pdfium annotation's instance");
980 return false;
981 }
982
983 FPDFPage_GenerateContent(page_.get());
984
985 return true;
986 }
987
988 } // namespace pdfClient