1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
8
9 #include <ostream>
10 #include <utility>
11
12 #include "core/fpdfapi/parser/cpdf_array.h"
13 #include "core/fpdfapi/parser/cpdf_boolean.h"
14 #include "core/fpdfapi/parser/cpdf_dictionary.h"
15 #include "core/fpdfapi/parser/cpdf_number.h"
16 #include "core/fpdfapi/parser/cpdf_reference.h"
17 #include "core/fpdfapi/parser/cpdf_stream.h"
18 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
19 #include "core/fpdfapi/parser/cpdf_string.h"
20 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
21 #include "core/fxcrt/fx_extension.h"
22 #include "core/fxcrt/fx_stream.h"
23 #include "third_party/base/check.h"
24 #include "third_party/base/notreached.h"
25
26 // Indexed by 8-bit character code, contains either:
27 // 'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff
28 // 'N' - for numeric: 0123456789+-.
29 // 'D' - for delimiter: %()/<>[]{}
30 // 'R' - otherwise.
31 const char PDF_CharType[256] = {
32 // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO
33 // SI
34 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W', 'W', 'R', 'W', 'W', 'R',
35 'R',
36
37 // DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS
38 // US
39 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
40 'R',
41
42 // SP ! " # $ % & ´ ( ) * + , - .
43 // /
44 'W', 'R', 'R', 'R', 'R', 'D', 'R', 'R', 'D', 'D', 'R', 'N', 'R', 'N', 'N',
45 'D',
46
47 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
48 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'R', 'R', 'D', 'R', 'D',
49 'R',
50
51 // @ A B C D E F G H I J K L M N O
52 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
53 'R',
54
55 // P Q R S T U V W X Y Z [ \ ] ^ _
56 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
57 'R',
58
59 // ` a b c d e f g h i j k l m n o
60 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
61 'R',
62
63 // p q r s t u v w x y z { | } ~
64 // DEL
65 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
66 'R',
67
68 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
69 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
70 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
71 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
72 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
73 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
74 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
75 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
76 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'};
77
GetHeaderOffset(const RetainPtr<IFX_SeekableReadStream> & pFile)78 absl::optional<FX_FILESIZE> GetHeaderOffset(
79 const RetainPtr<IFX_SeekableReadStream>& pFile) {
80 static constexpr size_t kBufSize = 4;
81 uint8_t buf[kBufSize];
82 for (FX_FILESIZE offset = 0; offset <= 1024; ++offset) {
83 if (!pFile->ReadBlockAtOffset(buf, offset))
84 return absl::nullopt;
85
86 if (memcmp(buf, "%PDF", 4) == 0)
87 return offset;
88 }
89 return absl::nullopt;
90 }
91
PDF_NameDecode(ByteStringView orig)92 ByteString PDF_NameDecode(ByteStringView orig) {
93 size_t src_size = orig.GetLength();
94 size_t out_index = 0;
95 ByteString result;
96 {
97 // Span's lifetime must end before ReleaseBuffer() below.
98 pdfium::span<char> pDest = result.GetBuffer(src_size);
99 for (size_t i = 0; i < src_size; i++) {
100 if (orig[i] == '#' && i + 2 < src_size) {
101 pDest[out_index++] = FXSYS_HexCharToInt(orig[i + 1]) * 16 +
102 FXSYS_HexCharToInt(orig[i + 2]);
103 i += 2;
104 } else {
105 pDest[out_index++] = orig[i];
106 }
107 }
108 }
109 result.ReleaseBuffer(out_index);
110 return result;
111 }
112
PDF_NameEncode(const ByteString & orig)113 ByteString PDF_NameEncode(const ByteString& orig) {
114 const uint8_t* src_buf = reinterpret_cast<const uint8_t*>(orig.c_str());
115 int src_len = orig.GetLength();
116 int dest_len = 0;
117 int i;
118 for (i = 0; i < src_len; i++) {
119 uint8_t ch = src_buf[i];
120 if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
121 PDFCharIsDelimiter(ch)) {
122 dest_len += 3;
123 } else {
124 dest_len++;
125 }
126 }
127 if (dest_len == src_len)
128 return orig;
129
130 ByteString res;
131 {
132 // Span's lifetime must end before ReleaseBuffer() below.
133 pdfium::span<char> dest_buf = res.GetBuffer(dest_len);
134 dest_len = 0;
135 for (i = 0; i < src_len; i++) {
136 uint8_t ch = src_buf[i];
137 if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
138 PDFCharIsDelimiter(ch)) {
139 dest_buf[dest_len++] = '#';
140 FXSYS_IntToTwoHexChars(ch, &dest_buf[dest_len]);
141 dest_len += 2;
142 continue;
143 }
144 dest_buf[dest_len++] = ch;
145 }
146 }
147 res.ReleaseBuffer(dest_len);
148 return res;
149 }
150
ReadArrayElementsToVector(const CPDF_Array * pArray,size_t nCount)151 std::vector<float> ReadArrayElementsToVector(const CPDF_Array* pArray,
152 size_t nCount) {
153 DCHECK(pArray);
154 DCHECK(pArray->size() >= nCount);
155 std::vector<float> ret(nCount);
156 for (size_t i = 0; i < nCount; ++i)
157 ret[i] = pArray->GetFloatAt(i);
158 return ret;
159 }
160
ValidateDictType(const CPDF_Dictionary * dict,ByteStringView type)161 bool ValidateDictType(const CPDF_Dictionary* dict, ByteStringView type) {
162 DCHECK(!type.IsEmpty());
163 return dict && dict->GetNameFor("Type") == type;
164 }
165
ValidateDictAllResourcesOfType(const CPDF_Dictionary * dict,ByteStringView type)166 bool ValidateDictAllResourcesOfType(const CPDF_Dictionary* dict,
167 ByteStringView type) {
168 if (!dict)
169 return false;
170
171 CPDF_DictionaryLocker locker(dict);
172 for (const auto& it : locker) {
173 RetainPtr<const CPDF_Dictionary> entry =
174 ToDictionary(it.second->GetDirect());
175 if (!ValidateDictType(entry.Get(), type))
176 return false;
177 }
178 return true;
179 }
180
ValidateFontResourceDict(const CPDF_Dictionary * dict)181 bool ValidateFontResourceDict(const CPDF_Dictionary* dict) {
182 return ValidateDictAllResourcesOfType(dict, "Font");
183 }
184
ValidateDictOptionalType(const CPDF_Dictionary * dict,ByteStringView type)185 bool ValidateDictOptionalType(const CPDF_Dictionary* dict,
186 ByteStringView type) {
187 DCHECK(!type.IsEmpty());
188 return dict && (!dict->KeyExist("Type") || dict->GetNameFor("Type") == type);
189 }
190
operator <<(std::ostream & buf,const CPDF_Object * pObj)191 std::ostream& operator<<(std::ostream& buf, const CPDF_Object* pObj) {
192 if (!pObj) {
193 buf << " null";
194 return buf;
195 }
196 switch (pObj->GetType()) {
197 case CPDF_Object::kNullobj:
198 buf << " null";
199 break;
200 case CPDF_Object::kBoolean:
201 case CPDF_Object::kNumber:
202 buf << " " << pObj->GetString();
203 break;
204 case CPDF_Object::kString:
205 buf << pObj->AsString()->EncodeString();
206 break;
207 case CPDF_Object::kName: {
208 ByteString str = pObj->GetString();
209 buf << "/" << PDF_NameEncode(str);
210 break;
211 }
212 case CPDF_Object::kReference: {
213 buf << " " << pObj->AsReference()->GetRefObjNum() << " 0 R ";
214 break;
215 }
216 case CPDF_Object::kArray: {
217 const CPDF_Array* p = pObj->AsArray();
218 buf << "[";
219 for (size_t i = 0; i < p->size(); i++) {
220 RetainPtr<const CPDF_Object> pElement = p->GetObjectAt(i);
221 if (!pElement->IsInline()) {
222 buf << " " << pElement->GetObjNum() << " 0 R";
223 } else {
224 buf << pElement.Get();
225 }
226 }
227 buf << "]";
228 break;
229 }
230 case CPDF_Object::kDictionary: {
231 CPDF_DictionaryLocker locker(pObj->AsDictionary());
232 buf << "<<";
233 for (const auto& it : locker) {
234 const ByteString& key = it.first;
235 const RetainPtr<CPDF_Object>& pValue = it.second;
236 buf << "/" << PDF_NameEncode(key);
237 if (!pValue->IsInline()) {
238 buf << " " << pValue->GetObjNum() << " 0 R ";
239 } else {
240 buf << pValue;
241 }
242 }
243 buf << ">>";
244 break;
245 }
246 case CPDF_Object::kStream: {
247 RetainPtr<const CPDF_Stream> p(pObj->AsStream());
248 buf << p->GetDict().Get() << "stream\r\n";
249 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(std::move(p));
250 pAcc->LoadAllDataRaw();
251 pdfium::span<const uint8_t> span = pAcc->GetSpan();
252 buf.write(reinterpret_cast<const char*>(span.data()), span.size());
253 buf << "\r\nendstream";
254 break;
255 }
256 default:
257 NOTREACHED();
258 break;
259 }
260 return buf;
261 }
262