1 /*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "SkPdfConfig.h"
9 #include "SkPdfDiffEncoder.h"
10 #include "SkPdfNativeObject.h"
11 #include "SkPdfNativeTokenizer.h"
12 #include "SkPdfUtils.h"
13
14 // TODO(edisonn): mac builder does not find the header ... but from headers is ok
15 //#include "SkPdfStreamCommonDictionary_autogen.h"
16 //#include "SkPdfImageDictionary_autogen.h"
17 #include "SkPdfHeaders_autogen.h"
18
19
20 // TODO(edisonn): Perf, Make this function run faster.
21 // There could be 0s between start and end.
22 // needle will not contain 0s.
strrstrk(char * hayStart,char * hayEnd,const char * needle)23 static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
24 size_t needleLen = strlen(needle);
25 if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
26 strncmp(hayStart, needle, needleLen) == 0) {
27 return hayStart;
28 }
29
30 hayStart++;
31
32 while (hayStart < hayEnd) {
33 if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
34 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) ||
35 (hayStart+needleLen == hayEnd)) &&
36 strncmp(hayStart, needle, needleLen) == 0) {
37 return hayStart;
38 }
39 hayStart++;
40 }
41 return NULL;
42 }
43
skipPdfWhiteSpaces(const unsigned char * start,const unsigned char * end)44 const unsigned char* skipPdfWhiteSpaces(const unsigned char* start, const unsigned char* end) {
45 while (start < end && (isPdfWhiteSpace(*start) || *start == kComment_PdfDelimiter)) {
46 TRACE_COMMENT(*start);
47 if (*start == kComment_PdfDelimiter) {
48 // skip the comment until end of line
49 while (start < end && !isPdfEOL(*start)) {
50 start++;
51 TRACE_COMMENT(*start);
52 }
53 } else {
54 start++;
55 }
56 }
57 return start;
58 }
59
endOfPdfToken(const unsigned char * start,const unsigned char * end)60 const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned char* end) {
61 SkASSERT(!isPdfWhiteSpace(*start));
62
63 if (start < end && isPdfDelimiter(*start)) {
64 TRACE_TK(*start);
65 start++;
66 return start;
67 }
68
69 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
70 TRACE_TK(*start);
71 start++;
72 }
73 return start;
74 }
75
76 // The parsing should end with a ].
readArray(const unsigned char * start,const unsigned char * end,SkPdfNativeObject * array,SkPdfAllocator * allocator,SkPdfNativeDoc * doc)77 static const unsigned char* readArray(const unsigned char* start, const unsigned char* end,
78 SkPdfNativeObject* array,
79 SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
80 SkPdfNativeObject::makeEmptyArray(array);
81 // PUT_TRACK_STREAM(array, start, start)
82
83 if (allocator == NULL) {
84 // TODO(edisonn): report/warning error/assert
85 return end;
86 }
87
88 while (start < end) {
89 // skip white spaces
90 start = skipPdfWhiteSpaces(start, end);
91
92 const unsigned char* endOfToken = endOfPdfToken(start, end);
93
94 if (endOfToken == start) {
95 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
96 return start;
97 }
98
99 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
100 return endOfToken;
101 }
102
103 SkPdfNativeObject* newObj = allocator->allocObject();
104 start = nextObject(start, end, newObj, allocator, doc);
105 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array
106 // only when we are sure they are not references!
107 if (newObj->isKeywordReference() && array->size() >= 2 &&
108 array->objAtAIndex(SkToInt(array->size() - 1))->isInteger() &&
109 array->objAtAIndex(SkToInt(array->size() - 2))->isInteger()) {
110 SkPdfNativeObject* gen = array->removeLastInArray();
111 SkPdfNativeObject* id = array->removeLastInArray();
112
113 SkPdfNativeObject::resetAndMakeReference((unsigned int)id->intValue(),
114 (unsigned int)gen->intValue(), newObj);
115 // newObj PUT_TRACK_PARAMETERS_OBJ2(id, newObj) - store end, as now
116 }
117 array->appendInArray(newObj);
118 }
119 // TODO(edisonn): report not reached, we should never get here
120 // TODO(edisonn): there might be a bug here, enable an assert and run it on files
121 // or it might be that the files were actually corrupted
122 return start;
123 }
124
readString(const unsigned char * start,const unsigned char * end,unsigned char * out)125 static const unsigned char* readString(const unsigned char* start, const unsigned char* end,
126 unsigned char* out) {
127 const unsigned char* in = start;
128 bool hasOut = (out != NULL);
129
130 int openRoundBrackets = 1;
131 while (in < end) {
132 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
133 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
134 if (openRoundBrackets == 0) {
135 in++; // consumed )
136 break;
137 }
138
139 if (*in == kEscape_PdfSpecial) {
140 if (in + 1 < end) {
141 switch (in[1]) {
142 case 'n':
143 if (hasOut) { *out = kLF_PdfWhiteSpace; }
144 out++;
145 in += 2;
146 break;
147
148 case 'r':
149 if (hasOut) { *out = kCR_PdfWhiteSpace; }
150 out++;
151 in += 2;
152 break;
153
154 case 't':
155 if (hasOut) { *out = kHT_PdfWhiteSpace; }
156 out++;
157 in += 2;
158 break;
159
160 case 'b':
161 // TODO(edisonn): any special meaning to backspace?
162 if (hasOut) { *out = kBackspace_PdfSpecial; }
163 out++;
164 in += 2;
165 break;
166
167 case 'f':
168 if (hasOut) { *out = kFF_PdfWhiteSpace; }
169 out++;
170 in += 2;
171 break;
172
173 case kOpenedRoundBracket_PdfDelimiter:
174 if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
175 out++;
176 in += 2;
177 break;
178
179 case kClosedRoundBracket_PdfDelimiter:
180 if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
181 out++;
182 in += 2;
183 break;
184
185 case kEscape_PdfSpecial:
186 if (hasOut) { *out = kEscape_PdfSpecial; }
187 out++;
188 in += 2;
189 break;
190
191 case '0':
192 case '1':
193 case '2':
194 case '3':
195 case '4':
196 case '5':
197 case '6':
198 case '7': {
199 //read octals
200 in++; // consume backslash
201
202 int code = 0;
203 int i = 0;
204 while (in < end && *in >= '0' && *in < '8') {
205 code = (code << 3) + ((*in) - '0'); // code * 8 + d
206 i++;
207 in++;
208 if (i == 3) {
209 if (hasOut) { *out = code & 0xff; }
210 out++;
211 i = 0;
212 }
213 }
214 if (i > 0) {
215 if (hasOut) { *out = code & 0xff; }
216 out++;
217 }
218 }
219 break;
220
221 default:
222 // Per spec, backslash is ignored if escaped ch is unknown
223 in++;
224 break;
225 }
226 } else {
227 in++;
228 }
229 } else {
230 if (hasOut) { *out = *in; }
231 in++;
232 out++;
233 }
234 }
235
236 if (hasOut) {
237 return in; // consumed already ) at the end of the string
238 } else {
239 // return where the string would end if we reuse the string
240 return start + (out - (const unsigned char*)NULL);
241 }
242 }
243
readStringLength(const unsigned char * start,const unsigned char * end)244 static size_t readStringLength(const unsigned char* start, const unsigned char* end) {
245 return readString(start, end, NULL) - start;
246 }
247
readString(const unsigned char * start,const unsigned char * end,SkPdfNativeObject * str,SkPdfAllocator * allocator)248 static const unsigned char* readString(const unsigned char* start, const unsigned char* end,
249 SkPdfNativeObject* str, SkPdfAllocator* allocator) {
250 if (!allocator) {
251 // TODO(edisonn): report error/warn/assert
252 return end;
253 }
254
255 size_t outLength = readStringLength(start, end);
256 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
257 const unsigned char* now = readString(start, end, out);
258 SkPdfNativeObject::makeString(out, out + outLength, str);
259 // PUT_TRACK_STREAM(str, start, now)
260 TRACE_STRING(out, out + outLength);
261 return now; // consumed already ) at the end of the string
262 }
263
readHexString(const unsigned char * start,const unsigned char * end,unsigned char * out)264 static const unsigned char* readHexString(const unsigned char* start, const unsigned char* end,
265 unsigned char* out) {
266 bool hasOut = (out != NULL);
267 const unsigned char* in = start;
268
269 unsigned char code = 0;
270
271 while (in < end) {
272 while (in < end && isPdfWhiteSpace(*in)) {
273 in++;
274 }
275
276 if (*in == kClosedInequityBracket_PdfDelimiter) {
277 in++; // consume >
278 // normal exit
279 break;
280 }
281
282 if (in >= end) {
283 // end too soon
284 break;
285 }
286
287 switch (*in) {
288 case '0':
289 case '1':
290 case '2':
291 case '3':
292 case '4':
293 case '5':
294 case '6':
295 case '7':
296 case '8':
297 case '9':
298 code = (*in - '0') << 4;
299 break;
300
301 case 'a':
302 case 'b':
303 case 'c':
304 case 'd':
305 case 'e':
306 case 'f':
307 code = (*in - 'a' + 10) << 4;
308 break;
309
310 case 'A':
311 case 'B':
312 case 'C':
313 case 'D':
314 case 'E':
315 case 'F':
316 code = (*in - 'A' + 10) << 4;
317 break;
318
319 // TODO(edisonn): spec does not say how to handle this error
320 default:
321 break;
322 }
323
324 in++; // advance
325
326 while (in < end && isPdfWhiteSpace(*in)) {
327 in++;
328 }
329
330 // TODO(edisonn): report error
331 if (in >= end) {
332 if (hasOut) { *out = code; }
333 out++;
334 break;
335 }
336
337 if (*in == kClosedInequityBracket_PdfDelimiter) {
338 if (hasOut) { *out = code; }
339 out++;
340 in++;
341 break;
342 }
343
344 switch (*in) {
345 case '0':
346 case '1':
347 case '2':
348 case '3':
349 case '4':
350 case '5':
351 case '6':
352 case '7':
353 case '8':
354 case '9':
355 code += (*in - '0');
356 break;
357
358 case 'a':
359 case 'b':
360 case 'c':
361 case 'd':
362 case 'e':
363 case 'f':
364 code += (*in - 'a' + 10);
365 break;
366
367 case 'A':
368 case 'B':
369 case 'C':
370 case 'D':
371 case 'E':
372 case 'F':
373 code += (*in - 'A' + 10);
374 break;
375
376 // TODO(edisonn): spec does not say how to handle this error
377 default:
378 break;
379 }
380
381 if (hasOut) { *out = code; }
382 out++;
383 in++;
384 }
385
386 if (hasOut) {
387 return in; // consumed already ) at the end of the string
388 } else {
389 // return where the string would end if we reuse the string
390 return start + (out - (const unsigned char*)NULL);
391 }
392 }
393
readHexStringLength(const unsigned char * start,const unsigned char * end)394 static size_t readHexStringLength(const unsigned char* start, const unsigned char* end) {
395 return readHexString(start, end, NULL) - start;
396 }
397
readHexString(const unsigned char * start,const unsigned char * end,SkPdfNativeObject * str,SkPdfAllocator * allocator)398 static const unsigned char* readHexString(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator) {
399 if (!allocator) {
400 // TODO(edisonn): report error/warn/assert
401 return end;
402 }
403 size_t outLength = readHexStringLength(start, end);
404 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
405 const unsigned char* now = readHexString(start, end, out);
406 SkPdfNativeObject::makeHexString(out, out + outLength, str);
407 // str PUT_TRACK_STREAM(start, now)
408 TRACE_HEXSTRING(out, out + outLength);
409 return now; // consumed already > at the end of the string
410 }
411
412 // TODO(edisonn): add version parameter, before PDF 1.2 name could not have special characters.
readName(const unsigned char * start,const unsigned char * end,unsigned char * out)413 static const unsigned char* readName(const unsigned char* start, const unsigned char* end,
414 unsigned char* out) {
415 bool hasOut = (out != NULL);
416 const unsigned char* in = start;
417
418 unsigned char code = 0;
419
420 while (in < end) {
421 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
422 break;
423 }
424
425 if (*in == '#' && in + 2 < end) {
426 in++;
427 switch (*in) {
428 case '0':
429 case '1':
430 case '2':
431 case '3':
432 case '4':
433 case '5':
434 case '6':
435 case '7':
436 case '8':
437 case '9':
438 code = (*in - '0') << 4;
439 break;
440
441 case 'a':
442 case 'b':
443 case 'c':
444 case 'd':
445 case 'e':
446 case 'f':
447 code = (*in - 'a' + 10) << 4;
448 break;
449
450 case 'A':
451 case 'B':
452 case 'C':
453 case 'D':
454 case 'E':
455 case 'F':
456 code = (*in - 'A' + 10) << 4;
457 break;
458
459 // TODO(edisonn): spec does not say how to handle this error
460 default:
461 break;
462 }
463
464 in++; // advance
465
466 switch (*in) {
467 case '0':
468 case '1':
469 case '2':
470 case '3':
471 case '4':
472 case '5':
473 case '6':
474 case '7':
475 case '8':
476 case '9':
477 code += (*in - '0');
478 break;
479
480 case 'a':
481 case 'b':
482 case 'c':
483 case 'd':
484 case 'e':
485 case 'f':
486 code += (*in - 'a' + 10);
487 break;
488
489 case 'A':
490 case 'B':
491 case 'C':
492 case 'D':
493 case 'E':
494 case 'F':
495 code += (*in - 'A' + 10);
496 break;
497
498 // TODO(edisonn): spec does not say how to handle this error
499 default:
500 break;
501 }
502
503 if (hasOut) { *out = code; }
504 out++;
505 in++;
506 } else {
507 if (hasOut) { *out = *in; }
508 out++;
509 in++;
510 }
511 }
512
513 if (hasOut) {
514 return in; // consumed already ) at the end of the string
515 } else {
516 // return where the string would end if we reuse the string
517 return start + (out - (const unsigned char*)NULL);
518 }
519 }
520
readNameLength(const unsigned char * start,const unsigned char * end)521 static size_t readNameLength(const unsigned char* start, const unsigned char* end) {
522 return readName(start, end, NULL) - start;
523 }
524
readName(const unsigned char * start,const unsigned char * end,SkPdfNativeObject * name,SkPdfAllocator * allocator)525 static const unsigned char* readName(const unsigned char* start, const unsigned char* end,
526 SkPdfNativeObject* name, SkPdfAllocator* allocator) {
527 if (!allocator) {
528 // TODO(edisonn): report error/warn/assert
529 return end;
530 }
531 size_t outLength = readNameLength(start, end);
532 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
533 const unsigned char* now = readName(start, end, out);
534 SkPdfNativeObject::makeName(out, out + outLength, name);
535 //PUT_TRACK_STREAM(start, now)
536 TRACE_NAME(out, out + outLength);
537 return now;
538 }
539
540 // TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
541 // that makes for an interesting scenario, where the stream itself contains endstream, together
542 // with a reference object with the length, but the real length object would be somewhere else
543 // it could confuse the parser
544 /*example:
545
546 7 0 obj
547 << /length 8 0 R>>
548 stream
549 ...............
550 endstream
551 8 0 obj #we are in stream actually, not a real object
552 << 10 >> #we are in stream actually, not a real object
553 endobj
554 endstream
555 8 0 obj #real obj
556 << 100 >> #real obj
557 endobj
558 and it could get worse, with multiple object like this
559 */
560
561 // right now implement the silly algorithm that assumes endstream is finishing the stream
562
readStream(const unsigned char * start,const unsigned char * end,SkPdfNativeObject * dict,SkPdfNativeDoc * doc)563 static const unsigned char* readStream(const unsigned char* start, const unsigned char* end,
564 SkPdfNativeObject* dict, SkPdfNativeDoc* doc) {
565 start = skipPdfWhiteSpaces(start, end);
566 if (!( start[0] == 's' &&
567 start[1] == 't' &&
568 start[2] == 'r' &&
569 start[3] == 'e' &&
570 start[4] == 'a' &&
571 start[5] == 'm')) {
572 // no stream. return.
573 return start;
574 }
575
576 start += 6; // strlen("stream")
577 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
578 start += 2;
579 } else if (start[0] == kLF_PdfWhiteSpace) {
580 start += 1;
581 } else if (isPdfWhiteSpace(start[0])) {
582 start += 1;
583 } else {
584 // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
585 }
586
587 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
588 // TODO(edisonn): load Length
589 int64_t length = -1;
590
591 // TODO(edisonn): very basic implementation
592 if (stream->has_Length() && stream->Length(doc) > 0) {
593 length = stream->Length(doc);
594 }
595
596 // TODO(edisonn): load external streams
597 // TODO(edisonn): look at the last filter, to determine how to deal with possible parsing
598 // issues. The last filter can have special rules to terminate a stream, which we could
599 // use to determine end of stream.
600
601 if (length >= 0) {
602 const unsigned char* endstream = start + length;
603
604 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
605 endstream += 2;
606 } else if (endstream[0] == kLF_PdfWhiteSpace) {
607 endstream += 1;
608 }
609
610 if (strncmp((const char*)endstream, "endstream", strlen("endstream")) != 0) {
611 length = -1;
612 }
613 }
614
615 if (length < 0) {
616 // scan the buffer, until we find first endstream
617 // TODO(edisonn): all buffers must have a 0 at the end now,
618 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end,
619 "endstream");
620
621 if (endstream) {
622 length = endstream - start;
623 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
624 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
625 }
626 }
627 if (length >= 0) {
628 const unsigned char* endstream = start + length;
629
630 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
631 endstream += 2;
632 } else if (endstream[0] == kLF_PdfWhiteSpace) {
633 endstream += 1;
634 }
635
636 // TODO(edisonn): verify the next bytes are "endstream"
637
638 endstream += strlen("endstream");
639 // TODO(edisonn): Assert? report error/warning?
640 dict->addStream(start, (size_t)length);
641 return endstream;
642 }
643 return start;
644 }
645
readInlineImageStream(const unsigned char * start,const unsigned char * end,SkPdfImageDictionary * inlineImage,SkPdfNativeDoc * doc)646 static const unsigned char* readInlineImageStream(const unsigned char* start,
647 const unsigned char* end,
648 SkPdfImageDictionary* inlineImage,
649 SkPdfNativeDoc* doc) {
650 // We already processed ID keyword, and we should be positioned immediately after it
651
652 // TODO(edisonn): security: either make all streams to have extra 2 bytes at the end,
653 // instead of this if.
654 //if (end - start <= 2) {
655 // // TODO(edisonn): warning?
656 // return end; // but can we have a pixel image encoded in 1-2 bytes?
657 //}
658
659 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
660 start += 2;
661 } else if (start[0] == kLF_PdfWhiteSpace) {
662 start += 1;
663 } else if (isPdfWhiteSpace(start[0])) {
664 start += 1;
665 } else {
666 SkASSERT(isPdfDelimiter(start[0]));
667 // TODO(edisonn): warning?
668 }
669
670 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "EI");
671 const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
672
673 if (endstream) {
674 size_t length = endstream - start;
675 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
676 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
677 inlineImage->addStream(start, (size_t)length);
678 } else {
679 // TODO(edisonn): report error in inline image stream (ID-EI) section
680 // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
681 return end;
682 }
683 return endEI;
684 }
685
readDictionary(const unsigned char * start,const unsigned char * end,SkPdfNativeObject * dict,SkPdfAllocator * allocator,SkPdfNativeDoc * doc)686 static const unsigned char* readDictionary(const unsigned char* start, const unsigned char* end,
687 SkPdfNativeObject* dict,
688 SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
689 if (allocator == NULL) {
690 // TODO(edisonn): report/warning error
691 return end;
692 }
693 SkPdfNativeObject::makeEmptyDictionary(dict);
694 // PUT_TRACK_STREAM(dict, start, start)
695
696 start = skipPdfWhiteSpaces(start, end);
697 SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them after set.
698
699 while (start < end && *start == kNamed_PdfDelimiter) {
700 SkPdfNativeObject key;
701 //*start = '\0';
702 start++;
703 start = readName(start, end, &key, &tmpStorage);
704 start = skipPdfWhiteSpaces(start, end);
705
706 if (start < end) {
707 SkPdfNativeObject* value = allocator->allocObject();
708 start = nextObject(start, end, value, allocator, doc);
709
710 start = skipPdfWhiteSpaces(start, end);
711
712 if (start < end) {
713 // We should have an indirect reference
714 if (isPdfDigit(*start)) {
715 SkPdfNativeObject generation;
716 start = nextObject(start, end, &generation, allocator, doc);
717
718 SkPdfNativeObject keywordR;
719 start = nextObject(start, end, &keywordR, allocator, doc);
720
721 if (value->isInteger() && generation.isInteger() &&
722 keywordR.isKeywordReference()) {
723 int64_t id = value->intValue();
724 SkPdfNativeObject::resetAndMakeReference(
725 (unsigned int)id,
726 (unsigned int)generation.intValue(),
727 value);
728 // PUT_TRACK_PARAMETERS_OBJ2(value, &generation)
729 dict->set(&key, value);
730 } else {
731 // TODO(edisonn) error?, ignore it for now.
732 dict->set(&key, value);
733 }
734 } else {
735 // next elem is not a digit, but it might not be / either!
736 dict->set(&key, value);
737 }
738 } else {
739 // /key >>
740 dict->set(&key, value);
741 return end;
742 }
743 start = skipPdfWhiteSpaces(start, end);
744 } else {
745 dict->set(&key, &SkPdfNativeObject::kNull);
746 return end;
747 }
748 }
749
750 // now we should expect >>
751 start = skipPdfWhiteSpaces(start, end);
752 if (*start != kClosedInequityBracket_PdfDelimiter) {
753 // TODO(edisonn): report/warning
754 }
755
756 start++; // skip >
757 if (*start != kClosedInequityBracket_PdfDelimiter) {
758 // TODO(edisonn): report/warning
759 }
760
761 start++; // skip >
762
763 //STORE_TRACK_PARAMETER_OFFSET_END(dict,start);
764
765 start = readStream(start, end, dict, doc);
766
767 return start;
768 }
769
nextObject(const unsigned char * start,const unsigned char * end,SkPdfNativeObject * token,SkPdfAllocator * allocator,SkPdfNativeDoc * doc)770 const unsigned char* nextObject(const unsigned char* start, const unsigned char* end,
771 SkPdfNativeObject* token,
772 SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
773 const unsigned char* current;
774
775 // skip white spaces
776 start = skipPdfWhiteSpaces(start, end);
777
778 if (start >= end) {
779 return end;
780 }
781
782 current = endOfPdfToken(start, end);
783
784 // no token, len would be 0
785 if (current == start || current == end) {
786 return end;
787 }
788
789 size_t tokenLen = current - start;
790
791 if (tokenLen == 1) {
792 // start array
793 switch (*start) {
794 case kOpenedSquareBracket_PdfDelimiter:
795 return readArray(current, end, token, allocator, doc);
796
797 case kOpenedRoundBracket_PdfDelimiter:
798 return readString(start + 1, end, token, allocator);
799
800 case kOpenedInequityBracket_PdfDelimiter:
801 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
802 // TODO(edisonn): pass here the length somehow?
803 return readDictionary(start + 2, end, token, allocator, doc); // skip <<
804 } else {
805 return readHexString(start + 1, end, token, allocator); // skip <
806 }
807
808 case kNamed_PdfDelimiter:
809 return readName(start + 1, end, token, allocator);
810
811 // TODO(edisonn): what to do curly brackets?
812 case kOpenedCurlyBracket_PdfDelimiter:
813 default:
814 break;
815 }
816
817 SkASSERT(!isPdfWhiteSpace(*start));
818 if (isPdfDelimiter(*start)) {
819 // TODO(edisonn): how unexpected stream ] } > ) will be handled?
820 // for now ignore, and it will become a keyword to be ignored
821 }
822 }
823
824 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
825 SkPdfNativeObject::makeNull(token);
826 // PUT_TRACK_STREAM(start, start + 4)
827 return current;
828 }
829
830 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
831 SkPdfNativeObject::makeBoolean(true, token);
832 // PUT_TRACK_STREAM(start, start + 4)
833 return current;
834 }
835
836 // TODO(edisonn): again, make all buffers have 5 extra bytes
837 if (tokenLen == 5 && start[0] == 'f' &&
838 start[1] == 'a' &&
839 start[2] == 'l' &&
840 start[3] == 's' &&
841 start[4] == 'e') {
842 SkPdfNativeObject::makeBoolean(false, token);
843 // PUT_TRACK_STREAM(start, start + 5)
844 return current;
845 }
846
847 if (isPdfNumeric(*start)) {
848 SkPdfNativeObject::makeNumeric(start, current, token);
849 // PUT_TRACK_STREAM(start, current)
850 } else {
851 SkPdfNativeObject::makeKeyword(start, current, token);
852 // PUT_TRACK_STREAM(start, current)
853 }
854 return current;
855 }
856
allocBlock()857 SkPdfNativeObject* SkPdfAllocator::allocBlock() {
858 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfNativeObject);
859 return new SkPdfNativeObject[BUFFER_SIZE];
860 }
861
~SkPdfAllocator()862 SkPdfAllocator::~SkPdfAllocator() {
863 for (int i = 0 ; i < fHandles.count(); i++) {
864 free(fHandles[i]);
865 }
866 for (int i = 0 ; i < fHistory.count(); i++) {
867 for (int j = 0 ; j < BUFFER_SIZE; j++) {
868 fHistory[i][j].reset();
869 }
870 delete[] fHistory[i];
871 }
872 for (int j = 0 ; j < BUFFER_SIZE; j++) {
873 fCurrent[j].reset();
874 }
875 delete[] fCurrent;
876 }
877
allocObject()878 SkPdfNativeObject* SkPdfAllocator::allocObject() {
879 if (fCurrentUsed >= BUFFER_SIZE) {
880 fHistory.push(fCurrent);
881 fCurrent = allocBlock();
882 fCurrentUsed = 0;
883 fSizeInBytes += sizeof(SkPdfNativeObject*);
884 }
885 fCurrentUsed++;
886 return &fCurrent[fCurrentUsed - 1];
887 }
888
889 // TODO(edisonn): perf: do no copy the buffers, but reuse them, and mark cache the result,
890 // so there is no need of a second pass
SkPdfNativeTokenizer(SkPdfNativeObject * objWithStream,SkPdfAllocator * allocator,SkPdfNativeDoc * doc)891 SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream,
892 SkPdfAllocator* allocator,
893 SkPdfNativeDoc* doc)
894 : fDoc(doc)
895 , fAllocator(allocator)
896 , fUncompressedStream(NULL)
897 , fUncompressedStreamEnd(NULL)
898 , fEmpty(false)
899 , fHasPutBack(false) {
900 const unsigned char* buffer = NULL;
901 size_t len = 0;
902 objWithStream->GetFilteredStreamRef(&buffer, &len);
903 // TODO(edisonn): really bad hack, find end of object (endobj might be in a comment!)
904 // we need to do now for perf, and our generated pdfs do not have comments,
905 // but we need to remove this hack for pdfs in the wild
906 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
907 if (endobj) {
908 len = endobj - (char*)buffer + strlen("endobj");
909 }
910 fUncompressedStreamStart = fUncompressedStream = buffer;
911 fUncompressedStreamEnd = fUncompressedStream + len;
912 }
913
SkPdfNativeTokenizer(const unsigned char * buffer,int len,SkPdfAllocator * allocator,SkPdfNativeDoc * doc)914 SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len,
915 SkPdfAllocator* allocator,
916 SkPdfNativeDoc* doc) : fDoc(doc)
917 , fAllocator(allocator)
918 , fEmpty(false)
919 , fHasPutBack(false) {
920 // TODO(edisonn): really bad hack, find end of object (endobj might be in a comment!)
921 // we need to do now for perf, and our generated pdfs do not have comments,
922 // but we need to remove this hack for pdfs in the wild
923 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
924 if (endobj) {
925 len = SkToInt(endobj - (char*)buffer + strlen("endobj"));
926 }
927 fUncompressedStreamStart = fUncompressedStream = buffer;
928 fUncompressedStreamEnd = fUncompressedStream + len;
929 }
930
~SkPdfNativeTokenizer()931 SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
932 }
933
readTokenCore(PdfToken * token)934 bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
935 #ifdef PDF_TRACE_READ_TOKEN
936 static int read_op = 0;
937 #endif
938
939 token->fKeyword = NULL;
940 token->fObject = NULL;
941
942 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);
943 if (fUncompressedStream >= fUncompressedStreamEnd) {
944 fEmpty = true;
945 return false;
946 }
947
948 SkPdfNativeObject obj;
949 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
950 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)
951
952 // If it is a keyword, we will only get the pointer of the string.
953 if (obj.type() == SkPdfNativeObject::kKeyword_PdfObjectType) {
954 token->fKeyword = obj.c_str();
955 token->fKeywordLength = obj.lenstr();
956 token->fType = kKeyword_TokenType;
957 } else {
958 SkPdfNativeObject* pobj = fAllocator->allocObject();
959 *pobj = obj;
960 token->fObject = pobj;
961 token->fType = kObject_TokenType;
962 }
963
964 #ifdef PDF_TRACE_READ_TOKEN
965 read_op++;
966 #if 0
967 if (548 == read_op) {
968 printf("break;\n");
969 }
970 #endif
971 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object",
972 token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() :
973 token->fObject->toString().c_str());
974 #endif
975
976 return true;
977 }
978
PutBack(PdfToken token)979 void SkPdfNativeTokenizer::PutBack(PdfToken token) {
980 SkASSERT(!fHasPutBack);
981 fHasPutBack = true;
982 fPutBack = token;
983 #ifdef PDF_TRACE_READ_TOKEN
984 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object",
985 token.fKeyword ? SkString(token.fKeyword, token.fKeywordLength).c_str() :
986 token.fObject->toString().c_str());
987 #endif
988 }
989
readToken(PdfToken * token,bool writeDiff)990 bool SkPdfNativeTokenizer::readToken(PdfToken* token, bool writeDiff) {
991 if (fHasPutBack) {
992 *token = fPutBack;
993 fHasPutBack = false;
994 #ifdef PDF_TRACE_READ_TOKEN
995 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object",
996 token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() :
997 token->fObject->toString().c_str());
998 #endif
999 if (writeDiff) {
1000 SkPdfDiffEncoder::WriteToFile(token);
1001 }
1002 return true;
1003 }
1004
1005 if (fEmpty) {
1006 #ifdef PDF_TRACE_READ_TOKEN
1007 printf("EMPTY TOKENIZER\n");
1008 #endif
1009 return false;
1010 }
1011
1012 const bool result = readTokenCore(token);
1013 if (result && writeDiff) {
1014 SkPdfDiffEncoder::WriteToFile(token);
1015 }
1016 return result;
1017 }
1018
1019 #define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
1020
1021 // keys
1022 DECLARE_PDF_NAME(BitsPerComponent);
1023 DECLARE_PDF_NAME(ColorSpace);
1024 DECLARE_PDF_NAME(Decode);
1025 DECLARE_PDF_NAME(DecodeParms);
1026 DECLARE_PDF_NAME(Filter);
1027 DECLARE_PDF_NAME(Height);
1028 DECLARE_PDF_NAME(ImageMask);
1029 DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abBreviations?
1030 DECLARE_PDF_NAME(Interpolate);
1031 DECLARE_PDF_NAME(Width);
1032
1033 // values
1034 DECLARE_PDF_NAME(DeviceGray);
1035 DECLARE_PDF_NAME(DeviceRGB);
1036 DECLARE_PDF_NAME(DeviceCMYK);
1037 DECLARE_PDF_NAME(Indexed);
1038 DECLARE_PDF_NAME(ASCIIHexDecode);
1039 DECLARE_PDF_NAME(ASCII85Decode);
1040 DECLARE_PDF_NAME(LZWDecode);
1041 DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
1042 DECLARE_PDF_NAME(RunLengthDecode);
1043 DECLARE_PDF_NAME(CCITTFaxDecode);
1044 DECLARE_PDF_NAME(DCTDecode);
1045
1046 #define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
1047
1048
inlineImageKeyAbbreviationExpand(SkPdfNativeObject * key)1049 static SkPdfNativeObject* inlineImageKeyAbbreviationExpand(SkPdfNativeObject* key) {
1050 if (!key || !key->isName()) {
1051 return key;
1052 }
1053
1054 // TODO(edisonn): use autogenerated code!
1055 HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
1056 HANDLE_NAME_ABBR(key, ColorSpace, CS);
1057 HANDLE_NAME_ABBR(key, Decode, D);
1058 HANDLE_NAME_ABBR(key, DecodeParms, DP);
1059 HANDLE_NAME_ABBR(key, Filter, F);
1060 HANDLE_NAME_ABBR(key, Height, H);
1061 HANDLE_NAME_ABBR(key, ImageMask, IM);
1062 // HANDLE_NAME_ABBR(key, Intent, );
1063 HANDLE_NAME_ABBR(key, Interpolate, I);
1064 HANDLE_NAME_ABBR(key, Width, W);
1065
1066 return key;
1067 }
1068
inlineImageValueAbbreviationExpand(SkPdfNativeObject * value)1069 static SkPdfNativeObject* inlineImageValueAbbreviationExpand(SkPdfNativeObject* value) {
1070 if (!value || !value->isName()) {
1071 return value;
1072 }
1073
1074 // TODO(edisonn): use autogenerated code!
1075 HANDLE_NAME_ABBR(value, DeviceGray, G);
1076 HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
1077 HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
1078 HANDLE_NAME_ABBR(value, Indexed, I);
1079 HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
1080 HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
1081 HANDLE_NAME_ABBR(value, LZWDecode, LZW);
1082 HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
1083 HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
1084 HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
1085 HANDLE_NAME_ABBR(value, DCTDecode, DCT);
1086
1087 return value;
1088 }
1089
readInlineImage()1090 SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
1091 // BI already processed
1092 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);
1093 if (fUncompressedStream >= fUncompressedStreamEnd) {
1094 return NULL;
1095 }
1096
1097 SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
1098 SkPdfNativeObject::makeEmptyDictionary(inlineImage);
1099 // PUT_TRACK_STREAM_ARGS_EXPL(fStreamId, fUncompressedStream - fUncompressedStreamStart,
1100 // fUncompressedStream - fUncompressedStreamStart)
1101
1102 while (fUncompressedStream < fUncompressedStreamEnd) {
1103 SkPdfNativeObject* key = fAllocator->allocObject();
1104 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, key,
1105 fAllocator, fDoc);
1106 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)s
1107
1108 if (key->isKeyword() && key->lenstr() == 2 &&
1109 key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
1110 fUncompressedStream = readInlineImageStream(fUncompressedStream, fUncompressedStreamEnd,
1111 inlineImage, fDoc);
1112 return inlineImage;
1113 } else {
1114 SkPdfNativeObject* obj = fAllocator->allocObject();
1115 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, obj,
1116 fAllocator, fDoc);
1117 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)s
1118 // TODO(edisonn): perf maybe we should not expand abBreviation like this
1119 inlineImage->set(inlineImageKeyAbbreviationExpand(key),
1120 inlineImageValueAbbreviationExpand(obj));
1121 }
1122 }
1123 // TODO(edisonn): report end of data with inline image without an EI
1124 return inlineImage;
1125 }
1126