1 /* 2 * Copyright 2013 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef SkPdfNativeTokenizer_DEFINED 9 #define SkPdfNativeTokenizer_DEFINED 10 11 #include <math.h> 12 #include <string.h> 13 14 #include "SkPdfConfig.h" 15 #include "SkTDArray.h" 16 #include "SkTDict.h" 17 18 // All these constants are defined by the PDF 1.4 Spec. 19 20 class SkPdfDictionary; 21 class SkPdfImageDictionary; 22 class SkPdfNativeDoc; 23 class SkPdfNativeObject; 24 25 26 // White Spaces 27 #define kNUL_PdfWhiteSpace '\x00' 28 #define kHT_PdfWhiteSpace '\x09' 29 #define kLF_PdfWhiteSpace '\x0A' 30 #define kFF_PdfWhiteSpace '\x0C' 31 #define kCR_PdfWhiteSpace '\x0D' 32 #define kSP_PdfWhiteSpace '\x20' 33 34 // PdfDelimiters 35 #define kOpenedRoundBracket_PdfDelimiter '(' 36 #define kClosedRoundBracket_PdfDelimiter ')' 37 #define kOpenedInequityBracket_PdfDelimiter '<' 38 #define kClosedInequityBracket_PdfDelimiter '>' 39 #define kOpenedSquareBracket_PdfDelimiter '[' 40 #define kClosedSquareBracket_PdfDelimiter ']' 41 #define kOpenedCurlyBracket_PdfDelimiter '{' 42 #define kClosedCurlyBracket_PdfDelimiter '}' 43 #define kNamed_PdfDelimiter '/' 44 #define kComment_PdfDelimiter '%' 45 46 #define kEscape_PdfSpecial '\\' 47 #define kBackspace_PdfSpecial '\x08' 48 49 // TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions? 50 // we should evaluate all options. might be even different from one machine to another 51 // 1) expand expression, let compiler optimize it 52 // 2) binary search 53 // 3) linear search in array 54 // 4) vector (e.f. T type[256] .. return type[ch] ... 55 // 5) manually build the expression with least number of operators, e.g. for consecutive 56 // chars, we can use an binary equal ignoring last bit 57 #define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)|| \ 58 ((ch)==kHT_PdfWhiteSpace)|| \ 59 ((ch)==kLF_PdfWhiteSpace)|| \ 60 ((ch)==kFF_PdfWhiteSpace)|| \ 61 ((ch)==kCR_PdfWhiteSpace)|| \ 62 ((ch)==kSP_PdfWhiteSpace)) 63 64 #define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)) 65 66 67 #define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\ 68 ((ch)==kClosedRoundBracket_PdfDelimiter)||\ 69 ((ch)==kOpenedInequityBracket_PdfDelimiter)||\ 70 ((ch)==kClosedInequityBracket_PdfDelimiter)||\ 71 ((ch)==kOpenedSquareBracket_PdfDelimiter)||\ 72 ((ch)==kClosedSquareBracket_PdfDelimiter)||\ 73 ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\ 74 ((ch)==kClosedCurlyBracket_PdfDelimiter)||\ 75 ((ch)==kNamed_PdfDelimiter)||\ 76 ((ch)==kComment_PdfDelimiter)) 77 78 #define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch)) 79 80 #define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9') 81 #define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.') 82 83 const unsigned char* skipPdfWhiteSpaces(const unsigned char* buffer, const unsigned char* end); 84 const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned char* end); 85 86 #define BUFFER_SIZE 1024 87 88 /** \class SkPdfAllocator 89 * 90 * An allocator only allocates memory, and it deletes it all when the allocator is destroyed. 91 * This strategy would allow us not to do any garbage collection while we parse and/or render 92 * a pdf. 93 * 94 */ 95 class SkPdfAllocator { 96 public: SkPdfAllocator()97 SkPdfAllocator() { 98 fSizeInBytes = sizeof(*this); 99 fCurrent = allocBlock(); 100 fCurrentUsed = 0; 101 } 102 103 ~SkPdfAllocator(); 104 105 // Allocates an object. It will be reset automatically when ~SkPdfAllocator() is called. 106 SkPdfNativeObject* allocObject(); 107 108 // Allocates a buffer. It will be freed automatically when ~SkPdfAllocator() is called. alloc(size_t bytes)109 void* alloc(size_t bytes) { 110 void* data = malloc(bytes); 111 fHandles.push(data); 112 fSizeInBytes += bytes; 113 return data; 114 } 115 116 // Returns the number of bytes used in this allocator. bytesUsed()117 size_t bytesUsed() const { 118 return fSizeInBytes; 119 } 120 121 private: 122 SkTDArray<SkPdfNativeObject*> fHistory; 123 SkTDArray<void*> fHandles; 124 SkPdfNativeObject* fCurrent; 125 int fCurrentUsed; 126 127 SkPdfNativeObject* allocBlock(); 128 size_t fSizeInBytes; 129 }; 130 131 // Type of a parsed token. 132 enum SkPdfTokenType { 133 kKeyword_TokenType, 134 kObject_TokenType, 135 }; 136 137 138 /** \struct PdfToken 139 * 140 * Stores the result of the parsing - a keyword or an object. 141 * 142 */ 143 struct PdfToken { 144 const char* fKeyword; 145 size_t fKeywordLength; 146 SkPdfNativeObject* fObject; 147 SkPdfTokenType fType; 148 PdfTokenPdfToken149 PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {} 150 }; 151 152 /** \class SkPdfNativeTokenizer 153 * 154 * Responsible to tokenize a stream in small tokens, eityh a keyword or an object. 155 * A renderer can feed on the tokens and render a pdf. 156 * 157 */ 158 class SkPdfNativeTokenizer { 159 public: 160 SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, 161 SkPdfAllocator* allocator, SkPdfNativeDoc* doc); 162 SkPdfNativeTokenizer(const unsigned char* buffer, int len, 163 SkPdfAllocator* allocator, SkPdfNativeDoc* doc); 164 165 virtual ~SkPdfNativeTokenizer(); 166 167 // Reads one token. Returns false if there are no more tokens. 168 // If writeDiff is true, and a token was read, create a PNG highlighting 169 // the difference caused by this command in /tmp/log_step_by_step. 170 // If PDF_TRACE_DIFF_IN_PNG is not defined, writeDiff does nothing. 171 bool readToken(PdfToken* token, bool writeDiff = false); 172 173 // Put back a token to be read in the nextToken read. Only one token is allowed to be put 174 // back. Must not necesaarely be the last token read. 175 void PutBack(PdfToken token); 176 177 // Reads the inline image that is present in the stream. At this point we just consumed the ID 178 // token already. 179 SkPdfImageDictionary* readInlineImage(); 180 181 private: 182 bool readTokenCore(PdfToken* token); 183 184 SkPdfNativeDoc* fDoc; 185 SkPdfAllocator* fAllocator; 186 187 const unsigned char* fUncompressedStreamStart; 188 const unsigned char* fUncompressedStream; 189 const unsigned char* fUncompressedStreamEnd; 190 191 bool fEmpty; 192 bool fHasPutBack; 193 PdfToken fPutBack; 194 }; 195 196 const unsigned char* nextObject(const unsigned char* start, const unsigned char* end, 197 SkPdfNativeObject* token, 198 SkPdfAllocator* allocator, 199 SkPdfNativeDoc* doc); 200 201 #endif // SkPdfNativeTokenizer_DEFINED 202