1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
8
9 #include "core/fpdfapi/parser/cpdf_array.h"
10 #include "core/fpdfapi/parser/cpdf_boolean.h"
11 #include "core/fpdfapi/parser/cpdf_dictionary.h"
12 #include "core/fpdfapi/parser/cpdf_name.h"
13 #include "core/fpdfapi/parser/cpdf_number.h"
14 #include "core/fpdfapi/parser/cpdf_reference.h"
15 #include "core/fpdfapi/parser/cpdf_stream.h"
16 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
17 #include "core/fpdfapi/parser/cpdf_string.h"
18 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
19 #include "core/fxcrt/fx_extension.h"
20 #include "core/fxcrt/fx_stream.h"
21 #include "third_party/base/logging.h"
22
23 // Indexed by 8-bit character code, contains either:
24 // 'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff
25 // 'N' - for numeric: 0123456789+-.
26 // 'D' - for delimiter: %()/<>[]{}
27 // 'R' - otherwise.
28 const char PDF_CharType[256] = {
29 // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO
30 // SI
31 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W', 'W', 'R', 'W', 'W', 'R',
32 'R',
33
34 // DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS
35 // US
36 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
37 'R',
38
39 // SP ! " # $ % & ´ ( ) * + , - .
40 // /
41 'W', 'R', 'R', 'R', 'R', 'D', 'R', 'R', 'D', 'D', 'R', 'N', 'R', 'N', 'N',
42 'D',
43
44 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
45 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'R', 'R', 'D', 'R', 'D',
46 'R',
47
48 // @ A B C D E F G H I J K L M N O
49 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
50 'R',
51
52 // P Q R S T U V W X Y Z [ \ ] ^ _
53 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
54 'R',
55
56 // ` a b c d e f g h i j k l m n o
57 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
58 'R',
59
60 // p q r s t u v w x y z { | } ~
61 // DEL
62 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
63 'R',
64
65 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
66 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
67 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
68 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
69 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
70 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
71 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
72 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
73 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'};
74
GetHeaderOffset(const RetainPtr<IFX_SeekableReadStream> & pFile)75 Optional<FX_FILESIZE> GetHeaderOffset(
76 const RetainPtr<IFX_SeekableReadStream>& pFile) {
77 static constexpr size_t kBufSize = 4;
78 uint8_t buf[kBufSize];
79 for (FX_FILESIZE offset = 0; offset <= 1024; ++offset) {
80 if (!pFile->ReadBlockAtOffset(buf, offset, kBufSize))
81 return {};
82
83 if (memcmp(buf, "%PDF", 4) == 0)
84 return offset;
85 }
86 return {};
87 }
88
GetDirectInteger(const CPDF_Dictionary * pDict,const ByteString & key)89 int32_t GetDirectInteger(const CPDF_Dictionary* pDict, const ByteString& key) {
90 const CPDF_Number* pObj = ToNumber(pDict->GetObjectFor(key));
91 return pObj ? pObj->GetInteger() : 0;
92 }
93
PDF_NameDecode(ByteStringView orig)94 ByteString PDF_NameDecode(ByteStringView orig) {
95 if (!orig.Contains('#'))
96 return ByteString(orig);
97
98 size_t src_size = orig.GetLength();
99 size_t out_index = 0;
100 ByteString result;
101 {
102 // Span's lifetime must end before ReleaseBuffer() below.
103 pdfium::span<char> pDest = result.GetBuffer(src_size);
104 for (size_t i = 0; i < src_size; i++) {
105 if (orig[i] == '#' && i + 2 < src_size) {
106 pDest[out_index++] = FXSYS_HexCharToInt(orig[i + 1]) * 16 +
107 FXSYS_HexCharToInt(orig[i + 2]);
108 i += 2;
109 } else {
110 pDest[out_index++] = orig[i];
111 }
112 }
113 }
114 result.ReleaseBuffer(out_index);
115 return result;
116 }
117
PDF_NameEncode(const ByteString & orig)118 ByteString PDF_NameEncode(const ByteString& orig) {
119 const uint8_t* src_buf = reinterpret_cast<const uint8_t*>(orig.c_str());
120 int src_len = orig.GetLength();
121 int dest_len = 0;
122 int i;
123 for (i = 0; i < src_len; i++) {
124 uint8_t ch = src_buf[i];
125 if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
126 PDFCharIsDelimiter(ch)) {
127 dest_len += 3;
128 } else {
129 dest_len++;
130 }
131 }
132 if (dest_len == src_len)
133 return orig;
134
135 ByteString res;
136 {
137 // Span's lifetime must end before ReleaseBuffer() below.
138 pdfium::span<char> dest_buf = res.GetBuffer(dest_len);
139 dest_len = 0;
140 for (i = 0; i < src_len; i++) {
141 uint8_t ch = src_buf[i];
142 if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
143 PDFCharIsDelimiter(ch)) {
144 dest_buf[dest_len++] = '#';
145 FXSYS_IntToTwoHexChars(ch, &dest_buf[dest_len]);
146 dest_len += 2;
147 continue;
148 }
149 dest_buf[dest_len++] = ch;
150 }
151 }
152 res.ReleaseBuffer(dest_len);
153 return res;
154 }
155
ReadArrayElementsToVector(const CPDF_Array * pArray,size_t nCount)156 std::vector<float> ReadArrayElementsToVector(const CPDF_Array* pArray,
157 size_t nCount) {
158 ASSERT(pArray);
159 ASSERT(pArray->size() >= nCount);
160 std::vector<float> ret(nCount);
161 for (size_t i = 0; i < nCount; ++i)
162 ret[i] = pArray->GetNumberAt(i);
163 return ret;
164 }
165
ValidateDictType(const CPDF_Dictionary * dict,const ByteString & type)166 bool ValidateDictType(const CPDF_Dictionary* dict, const ByteString& type) {
167 ASSERT(dict);
168 ASSERT(!type.IsEmpty());
169 const CPDF_Name* name_obj = ToName(dict->GetObjectFor("Type"));
170 return name_obj && name_obj->GetString() == type;
171 }
172
ValidateDictAllResourcesOfType(const CPDF_Dictionary * dict,const ByteString & type)173 bool ValidateDictAllResourcesOfType(const CPDF_Dictionary* dict,
174 const ByteString& type) {
175 if (!dict)
176 return false;
177
178 CPDF_DictionaryLocker locker(dict);
179 for (const auto& it : locker) {
180 const CPDF_Dictionary* entry = ToDictionary(it.second.Get()->GetDirect());
181 if (!entry || !ValidateDictType(entry, type))
182 return false;
183 }
184 return true;
185 }
186
ValidateFontResourceDict(const CPDF_Dictionary * dict)187 bool ValidateFontResourceDict(const CPDF_Dictionary* dict) {
188 return ValidateDictAllResourcesOfType(dict, "Font");
189 }
190
operator <<(std::ostream & buf,const CPDF_Object * pObj)191 std::ostream& operator<<(std::ostream& buf, const CPDF_Object* pObj) {
192 if (!pObj) {
193 buf << " null";
194 return buf;
195 }
196 switch (pObj->GetType()) {
197 case CPDF_Object::kNullobj:
198 buf << " null";
199 break;
200 case CPDF_Object::kBoolean:
201 case CPDF_Object::kNumber:
202 buf << " " << pObj->GetString();
203 break;
204 case CPDF_Object::kString:
205 buf << PDF_EncodeString(pObj->GetString(), pObj->AsString()->IsHex());
206 break;
207 case CPDF_Object::kName: {
208 ByteString str = pObj->GetString();
209 buf << "/" << PDF_NameEncode(str);
210 break;
211 }
212 case CPDF_Object::kReference: {
213 buf << " " << pObj->AsReference()->GetRefObjNum() << " 0 R ";
214 break;
215 }
216 case CPDF_Object::kArray: {
217 const CPDF_Array* p = pObj->AsArray();
218 buf << "[";
219 for (size_t i = 0; i < p->size(); i++) {
220 const CPDF_Object* pElement = p->GetObjectAt(i);
221 if (pElement && !pElement->IsInline()) {
222 buf << " " << pElement->GetObjNum() << " 0 R";
223 } else {
224 buf << pElement;
225 }
226 }
227 buf << "]";
228 break;
229 }
230 case CPDF_Object::kDictionary: {
231 CPDF_DictionaryLocker locker(pObj->AsDictionary());
232 buf << "<<";
233 for (const auto& it : locker) {
234 const ByteString& key = it.first;
235 CPDF_Object* pValue = it.second.Get();
236 buf << "/" << PDF_NameEncode(key);
237 if (pValue && !pValue->IsInline()) {
238 buf << " " << pValue->GetObjNum() << " 0 R ";
239 } else {
240 buf << pValue;
241 }
242 }
243 buf << ">>";
244 break;
245 }
246 case CPDF_Object::kStream: {
247 const CPDF_Stream* p = pObj->AsStream();
248 buf << p->GetDict() << "stream\r\n";
249 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(p);
250 pAcc->LoadAllDataRaw();
251 buf.write(reinterpret_cast<const char*>(pAcc->GetData()),
252 pAcc->GetSize());
253 buf << "\r\nendstream";
254 break;
255 }
256 default:
257 NOTREACHED();
258 break;
259 }
260 return buf;
261 }
262