1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
8
9 #include <ostream>
10 #include <utility>
11
12 #include "core/fpdfapi/parser/cpdf_array.h"
13 #include "core/fpdfapi/parser/cpdf_boolean.h"
14 #include "core/fpdfapi/parser/cpdf_dictionary.h"
15 #include "core/fpdfapi/parser/cpdf_number.h"
16 #include "core/fpdfapi/parser/cpdf_reference.h"
17 #include "core/fpdfapi/parser/cpdf_stream.h"
18 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
19 #include "core/fpdfapi/parser/cpdf_string.h"
20 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
21 #include "core/fxcrt/check.h"
22 #include "core/fxcrt/fx_extension.h"
23 #include "core/fxcrt/fx_stream.h"
24
25 // Indexed by 8-bit character code, contains either:
26 // 'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff
27 // 'N' - for numeric: 0123456789+-.
28 // 'D' - for delimiter: %()/<>[]{}
29 // 'R' - otherwise.
30 const char kPDFCharTypes[256] = {
31 // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO
32 // SI
33 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W', 'W', 'R', 'W', 'W', 'R',
34 'R',
35
36 // DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS
37 // US
38 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
39 'R',
40
41 // SP ! " # $ % & ' ( ) * + , - .
42 // /
43 'W', 'R', 'R', 'R', 'R', 'D', 'R', 'R', 'D', 'D', 'R', 'N', 'R', 'N', 'N',
44 'D',
45
46 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
47 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'R', 'R', 'D', 'R', 'D',
48 'R',
49
50 // @ A B C D E F G H I J K L M N O
51 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
52 'R',
53
54 // P Q R S T U V W X Y Z [ \ ] ^ _
55 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
56 'R',
57
58 // ` a b c d e f g h i j k l m n o
59 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
60 'R',
61
62 // p q r s t u v w x y z { | } ~
63 // DEL
64 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
65 'R',
66
67 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
68 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
69 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
70 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
71 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
72 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
73 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
74 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
75 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'};
76
GetHeaderOffset(const RetainPtr<IFX_SeekableReadStream> & pFile)77 std::optional<FX_FILESIZE> GetHeaderOffset(
78 const RetainPtr<IFX_SeekableReadStream>& pFile) {
79 static constexpr size_t kBufSize = 4;
80 uint8_t buf[kBufSize];
81 for (FX_FILESIZE offset = 0; offset <= 1024; ++offset) {
82 if (!pFile->ReadBlockAtOffset(buf, offset))
83 return std::nullopt;
84
85 if (memcmp(buf, "%PDF", 4) == 0)
86 return offset;
87 }
88 return std::nullopt;
89 }
90
PDF_NameDecode(ByteStringView orig)91 ByteString PDF_NameDecode(ByteStringView orig) {
92 size_t src_size = orig.GetLength();
93 size_t out_index = 0;
94 ByteString result;
95 {
96 // Span's lifetime must end before ReleaseBuffer() below.
97 pdfium::span<char> pDest = result.GetBuffer(src_size);
98 for (size_t i = 0; i < src_size; i++) {
99 if (orig[i] == '#' && i + 2 < src_size) {
100 pDest[out_index++] = FXSYS_HexCharToInt(orig[i + 1]) * 16 +
101 FXSYS_HexCharToInt(orig[i + 2]);
102 i += 2;
103 } else {
104 pDest[out_index++] = orig[i];
105 }
106 }
107 }
108 result.ReleaseBuffer(out_index);
109 return result;
110 }
111
PDF_NameEncode(const ByteString & orig)112 ByteString PDF_NameEncode(const ByteString& orig) {
113 pdfium::span<const uint8_t> src_span = orig.unsigned_span();
114 size_t dest_len = 0;
115 for (const auto ch : src_span) {
116 if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
117 PDFCharIsDelimiter(ch)) {
118 dest_len += 3;
119 } else {
120 dest_len++;
121 }
122 }
123 if (dest_len == src_span.size()) {
124 return orig;
125 }
126 ByteString res;
127 {
128 // Span's lifetime must end before ReleaseBuffer() below.
129 pdfium::span<char> dest_buf = res.GetBuffer(dest_len);
130 dest_len = 0;
131 for (const auto ch : src_span) {
132 if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
133 PDFCharIsDelimiter(ch)) {
134 dest_buf[dest_len++] = '#';
135 FXSYS_IntToTwoHexChars(ch, &dest_buf[dest_len]);
136 dest_len += 2;
137 continue;
138 }
139 dest_buf[dest_len++] = ch;
140 }
141 }
142 res.ReleaseBuffer(dest_len);
143 return res;
144 }
145
ReadArrayElementsToVector(const CPDF_Array * pArray,size_t nCount)146 std::vector<float> ReadArrayElementsToVector(const CPDF_Array* pArray,
147 size_t nCount) {
148 DCHECK(pArray);
149 DCHECK(pArray->size() >= nCount);
150 std::vector<float> ret(nCount);
151 for (size_t i = 0; i < nCount; ++i)
152 ret[i] = pArray->GetFloatAt(i);
153 return ret;
154 }
155
ValidateDictType(const CPDF_Dictionary * dict,ByteStringView type)156 bool ValidateDictType(const CPDF_Dictionary* dict, ByteStringView type) {
157 DCHECK(!type.IsEmpty());
158 return dict && dict->GetNameFor("Type") == type;
159 }
160
ValidateDictAllResourcesOfType(const CPDF_Dictionary * dict,ByteStringView type)161 bool ValidateDictAllResourcesOfType(const CPDF_Dictionary* dict,
162 ByteStringView type) {
163 if (!dict)
164 return false;
165
166 CPDF_DictionaryLocker locker(dict);
167 for (const auto& it : locker) {
168 RetainPtr<const CPDF_Dictionary> entry =
169 ToDictionary(it.second->GetDirect());
170 if (!ValidateDictType(entry.Get(), type))
171 return false;
172 }
173 return true;
174 }
175
ValidateFontResourceDict(const CPDF_Dictionary * dict)176 bool ValidateFontResourceDict(const CPDF_Dictionary* dict) {
177 return ValidateDictAllResourcesOfType(dict, "Font");
178 }
179
ValidateDictOptionalType(const CPDF_Dictionary * dict,ByteStringView type)180 bool ValidateDictOptionalType(const CPDF_Dictionary* dict,
181 ByteStringView type) {
182 DCHECK(!type.IsEmpty());
183 return dict && (!dict->KeyExist("Type") || dict->GetNameFor("Type") == type);
184 }
185
operator <<(std::ostream & buf,const CPDF_Object * pObj)186 std::ostream& operator<<(std::ostream& buf, const CPDF_Object* pObj) {
187 if (!pObj) {
188 buf << " null";
189 return buf;
190 }
191 switch (pObj->GetType()) {
192 case CPDF_Object::kNullobj:
193 buf << " null";
194 break;
195 case CPDF_Object::kBoolean:
196 case CPDF_Object::kNumber:
197 buf << " " << pObj->GetString();
198 break;
199 case CPDF_Object::kString:
200 buf << pObj->AsString()->EncodeString();
201 break;
202 case CPDF_Object::kName: {
203 ByteString str = pObj->GetString();
204 buf << "/" << PDF_NameEncode(str);
205 break;
206 }
207 case CPDF_Object::kReference: {
208 buf << " " << pObj->AsReference()->GetRefObjNum() << " 0 R ";
209 break;
210 }
211 case CPDF_Object::kArray: {
212 const CPDF_Array* p = pObj->AsArray();
213 buf << "[";
214 for (size_t i = 0; i < p->size(); i++) {
215 RetainPtr<const CPDF_Object> pElement = p->GetObjectAt(i);
216 if (!pElement->IsInline()) {
217 buf << " " << pElement->GetObjNum() << " 0 R";
218 } else {
219 buf << pElement.Get();
220 }
221 }
222 buf << "]";
223 break;
224 }
225 case CPDF_Object::kDictionary: {
226 CPDF_DictionaryLocker locker(pObj->AsDictionary());
227 buf << "<<";
228 for (const auto& it : locker) {
229 const ByteString& key = it.first;
230 const RetainPtr<CPDF_Object>& pValue = it.second;
231 buf << "/" << PDF_NameEncode(key);
232 if (!pValue->IsInline()) {
233 buf << " " << pValue->GetObjNum() << " 0 R ";
234 } else {
235 buf << pValue;
236 }
237 }
238 buf << ">>";
239 break;
240 }
241 case CPDF_Object::kStream: {
242 RetainPtr<const CPDF_Stream> p(pObj->AsStream());
243 buf << p->GetDict().Get() << "stream\r\n";
244 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(std::move(p));
245 pAcc->LoadAllDataRaw();
246 auto span = pdfium::as_chars(pAcc->GetSpan());
247 buf.write(span.data(), span.size());
248 buf << "\r\nendstream";
249 break;
250 }
251 }
252 return buf;
253 }
254