1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/page/cpdf_streamparser.h"
8
9 #include <limits.h>
10
11 #include <memory>
12 #include <utility>
13
14 #include "core/fpdfapi/cpdf_modulemgr.h"
15 #include "core/fpdfapi/page/cpdf_docpagedata.h"
16 #include "core/fpdfapi/parser/cpdf_array.h"
17 #include "core/fpdfapi/parser/cpdf_boolean.h"
18 #include "core/fpdfapi/parser/cpdf_dictionary.h"
19 #include "core/fpdfapi/parser/cpdf_document.h"
20 #include "core/fpdfapi/parser/cpdf_name.h"
21 #include "core/fpdfapi/parser/cpdf_null.h"
22 #include "core/fpdfapi/parser/cpdf_number.h"
23 #include "core/fpdfapi/parser/cpdf_stream.h"
24 #include "core/fpdfapi/parser/cpdf_string.h"
25 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
26 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
27 #include "core/fxcodec/fx_codec.h"
28 #include "core/fxcrt/fx_ext.h"
29
30 namespace {
31
32 const uint32_t kMaxNestedParsingLevel = 512;
33 const uint32_t kMaxWordBuffer = 256;
34 const FX_STRSIZE kMaxStringLength = 32767;
35
DecodeAllScanlines(std::unique_ptr<CCodec_ScanlineDecoder> pDecoder,uint8_t * & dest_buf,uint32_t & dest_size)36 uint32_t DecodeAllScanlines(std::unique_ptr<CCodec_ScanlineDecoder> pDecoder,
37 uint8_t*& dest_buf,
38 uint32_t& dest_size) {
39 if (!pDecoder)
40 return FX_INVALID_OFFSET;
41 int ncomps = pDecoder->CountComps();
42 int bpc = pDecoder->GetBPC();
43 int width = pDecoder->GetWidth();
44 int height = pDecoder->GetHeight();
45 int pitch = (width * ncomps * bpc + 7) / 8;
46 if (height == 0 || pitch > (1 << 30) / height)
47 return FX_INVALID_OFFSET;
48
49 dest_buf = FX_Alloc2D(uint8_t, pitch, height);
50 dest_size = pitch * height; // Safe since checked alloc returned.
51 for (int row = 0; row < height; row++) {
52 const uint8_t* pLine = pDecoder->GetScanline(row);
53 if (!pLine)
54 break;
55
56 FXSYS_memcpy(dest_buf + row * pitch, pLine, pitch);
57 }
58 return pDecoder->GetSrcOffset();
59 }
60
PDF_DecodeInlineStream(const uint8_t * src_buf,uint32_t limit,int width,int height,CFX_ByteString & decoder,CPDF_Dictionary * pParam,uint8_t * & dest_buf,uint32_t & dest_size)61 uint32_t PDF_DecodeInlineStream(const uint8_t* src_buf,
62 uint32_t limit,
63 int width,
64 int height,
65 CFX_ByteString& decoder,
66 CPDF_Dictionary* pParam,
67 uint8_t*& dest_buf,
68 uint32_t& dest_size) {
69 if (decoder == "CCITTFaxDecode" || decoder == "CCF") {
70 std::unique_ptr<CCodec_ScanlineDecoder> pDecoder =
71 FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam);
72 return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size);
73 }
74 if (decoder == "ASCII85Decode" || decoder == "A85")
75 return A85Decode(src_buf, limit, dest_buf, dest_size);
76 if (decoder == "ASCIIHexDecode" || decoder == "AHx")
77 return HexDecode(src_buf, limit, dest_buf, dest_size);
78 if (decoder == "FlateDecode" || decoder == "Fl") {
79 return FPDFAPI_FlateOrLZWDecode(false, src_buf, limit, pParam, dest_size,
80 dest_buf, dest_size);
81 }
82 if (decoder == "LZWDecode" || decoder == "LZW") {
83 return FPDFAPI_FlateOrLZWDecode(true, src_buf, limit, pParam, 0, dest_buf,
84 dest_size);
85 }
86 if (decoder == "DCTDecode" || decoder == "DCT") {
87 std::unique_ptr<CCodec_ScanlineDecoder> pDecoder =
88 CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder(
89 src_buf, limit, width, height, 0,
90 !pParam || pParam->GetIntegerFor("ColorTransform", 1));
91 return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size);
92 }
93 if (decoder == "RunLengthDecode" || decoder == "RL")
94 return RunLengthDecode(src_buf, limit, dest_buf, dest_size);
95 dest_size = 0;
96 dest_buf = 0;
97 return (uint32_t)-1;
98 }
99
100 } // namespace
101
CPDF_StreamParser(const uint8_t * pData,uint32_t dwSize)102 CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize)
103 : m_pBuf(pData),
104 m_Size(dwSize),
105 m_Pos(0),
106 m_pPool(nullptr) {}
107
CPDF_StreamParser(const uint8_t * pData,uint32_t dwSize,const CFX_WeakPtr<CFX_ByteStringPool> & pPool)108 CPDF_StreamParser::CPDF_StreamParser(
109 const uint8_t* pData,
110 uint32_t dwSize,
111 const CFX_WeakPtr<CFX_ByteStringPool>& pPool)
112 : m_pBuf(pData),
113 m_Size(dwSize),
114 m_Pos(0),
115 m_pPool(pPool) {}
116
~CPDF_StreamParser()117 CPDF_StreamParser::~CPDF_StreamParser() {}
118
ReadInlineStream(CPDF_Document * pDoc,std::unique_ptr<CPDF_Dictionary> pDict,CPDF_Object * pCSObj)119 std::unique_ptr<CPDF_Stream> CPDF_StreamParser::ReadInlineStream(
120 CPDF_Document* pDoc,
121 std::unique_ptr<CPDF_Dictionary> pDict,
122 CPDF_Object* pCSObj) {
123 if (m_Pos == m_Size)
124 return nullptr;
125
126 if (PDFCharIsWhitespace(m_pBuf[m_Pos]))
127 m_Pos++;
128
129 CFX_ByteString Decoder;
130 CPDF_Dictionary* pParam = nullptr;
131 CPDF_Object* pFilter = pDict->GetDirectObjectFor("Filter");
132 if (pFilter) {
133 if (CPDF_Array* pArray = pFilter->AsArray()) {
134 Decoder = pArray->GetStringAt(0);
135 CPDF_Array* pParams = pDict->GetArrayFor("DecodeParms");
136 if (pParams)
137 pParam = pParams->GetDictAt(0);
138 } else {
139 Decoder = pFilter->GetString();
140 pParam = pDict->GetDictFor("DecodeParms");
141 }
142 }
143 uint32_t width = pDict->GetIntegerFor("Width");
144 uint32_t height = pDict->GetIntegerFor("Height");
145 uint32_t OrigSize = 0;
146 if (pCSObj) {
147 uint32_t bpc = pDict->GetIntegerFor("BitsPerComponent");
148 uint32_t nComponents = 1;
149 CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj);
150 if (pCS) {
151 nComponents = pCS->CountComponents();
152 pDoc->GetPageData()->ReleaseColorSpace(pCSObj);
153 } else {
154 nComponents = 3;
155 }
156 uint32_t pitch = width;
157 if (bpc && pitch > INT_MAX / bpc)
158 return nullptr;
159
160 pitch *= bpc;
161 if (nComponents && pitch > INT_MAX / nComponents)
162 return nullptr;
163
164 pitch *= nComponents;
165 if (pitch > INT_MAX - 7)
166 return nullptr;
167
168 pitch += 7;
169 pitch /= 8;
170 OrigSize = pitch;
171 } else {
172 if (width > INT_MAX - 7)
173 return nullptr;
174
175 OrigSize = ((width + 7) / 8);
176 }
177 if (height && OrigSize > INT_MAX / height)
178 return nullptr;
179
180 OrigSize *= height;
181 std::unique_ptr<uint8_t, FxFreeDeleter> pData;
182 uint32_t dwStreamSize;
183 if (Decoder.IsEmpty()) {
184 if (OrigSize > m_Size - m_Pos)
185 OrigSize = m_Size - m_Pos;
186 pData.reset(FX_Alloc(uint8_t, OrigSize));
187 FXSYS_memcpy(pData.get(), m_pBuf + m_Pos, OrigSize);
188 dwStreamSize = OrigSize;
189 m_Pos += OrigSize;
190 } else {
191 uint8_t* pIgnore = nullptr;
192 uint32_t dwDestSize = OrigSize;
193 dwStreamSize =
194 PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height,
195 Decoder, pParam, pIgnore, dwDestSize);
196 FX_Free(pIgnore);
197 if (static_cast<int>(dwStreamSize) < 0)
198 return nullptr;
199
200 uint32_t dwSavePos = m_Pos;
201 m_Pos += dwStreamSize;
202 while (1) {
203 uint32_t dwPrevPos = m_Pos;
204 CPDF_StreamParser::SyntaxType type = ParseNextElement();
205 if (type == CPDF_StreamParser::EndOfData)
206 break;
207
208 if (type != CPDF_StreamParser::Keyword) {
209 dwStreamSize += m_Pos - dwPrevPos;
210 continue;
211 }
212 if (GetWord() == "EI") {
213 m_Pos = dwPrevPos;
214 break;
215 }
216 dwStreamSize += m_Pos - dwPrevPos;
217 }
218 m_Pos = dwSavePos;
219 pData.reset(FX_Alloc(uint8_t, dwStreamSize));
220 FXSYS_memcpy(pData.get(), m_pBuf + m_Pos, dwStreamSize);
221 m_Pos += dwStreamSize;
222 }
223 pDict->SetNewFor<CPDF_Number>("Length", (int)dwStreamSize);
224 return pdfium::MakeUnique<CPDF_Stream>(std::move(pData), dwStreamSize,
225 std::move(pDict));
226 }
227
ParseNextElement()228 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() {
229 m_pLastObj.reset();
230 m_WordSize = 0;
231 if (!PositionIsInBounds())
232 return EndOfData;
233
234 int ch = m_pBuf[m_Pos++];
235 while (1) {
236 while (PDFCharIsWhitespace(ch)) {
237 if (!PositionIsInBounds())
238 return EndOfData;
239
240 ch = m_pBuf[m_Pos++];
241 }
242
243 if (ch != '%')
244 break;
245
246 while (1) {
247 if (!PositionIsInBounds())
248 return EndOfData;
249
250 ch = m_pBuf[m_Pos++];
251 if (PDFCharIsLineEnding(ch))
252 break;
253 }
254 }
255
256 if (PDFCharIsDelimiter(ch) && ch != '/') {
257 m_Pos--;
258 m_pLastObj = ReadNextObject(false, false, 0);
259 return Others;
260 }
261
262 bool bIsNumber = true;
263 while (1) {
264 if (m_WordSize < kMaxWordBuffer)
265 m_WordBuffer[m_WordSize++] = ch;
266
267 if (!PDFCharIsNumeric(ch))
268 bIsNumber = false;
269
270 if (!PositionIsInBounds())
271 break;
272
273 ch = m_pBuf[m_Pos++];
274
275 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
276 m_Pos--;
277 break;
278 }
279 }
280
281 m_WordBuffer[m_WordSize] = 0;
282 if (bIsNumber)
283 return Number;
284
285 if (m_WordBuffer[0] == '/')
286 return Name;
287
288 if (m_WordSize == 4) {
289 if (memcmp(m_WordBuffer, "true", 4) == 0) {
290 m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(true);
291 return Others;
292 }
293 if (memcmp(m_WordBuffer, "null", 4) == 0) {
294 m_pLastObj = pdfium::MakeUnique<CPDF_Null>();
295 return Others;
296 }
297 } else if (m_WordSize == 5) {
298 if (memcmp(m_WordBuffer, "false", 5) == 0) {
299 m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(false);
300 return Others;
301 }
302 }
303 return Keyword;
304 }
305
ReadNextObject(bool bAllowNestedArray,bool bInArray,uint32_t dwRecursionLevel)306 std::unique_ptr<CPDF_Object> CPDF_StreamParser::ReadNextObject(
307 bool bAllowNestedArray,
308 bool bInArray,
309 uint32_t dwRecursionLevel) {
310 bool bIsNumber;
311 // Must get the next word before returning to avoid infinite loops.
312 GetNextWord(bIsNumber);
313 if (!m_WordSize || dwRecursionLevel > kMaxNestedParsingLevel)
314 return nullptr;
315
316 if (bIsNumber) {
317 m_WordBuffer[m_WordSize] = 0;
318 return pdfium::MakeUnique<CPDF_Number>(
319 CFX_ByteStringC(m_WordBuffer, m_WordSize));
320 }
321
322 int first_char = m_WordBuffer[0];
323 if (first_char == '/') {
324 CFX_ByteString name =
325 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1));
326 return pdfium::MakeUnique<CPDF_Name>(m_pPool, name);
327 }
328
329 if (first_char == '(') {
330 CFX_ByteString str = ReadString();
331 return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false);
332 }
333
334 if (first_char == '<') {
335 if (m_WordSize == 1)
336 return pdfium::MakeUnique<CPDF_String>(m_pPool, ReadHexString(), true);
337
338 auto pDict = pdfium::MakeUnique<CPDF_Dictionary>(m_pPool);
339 while (1) {
340 GetNextWord(bIsNumber);
341 if (m_WordSize == 2 && m_WordBuffer[0] == '>')
342 break;
343
344 if (!m_WordSize || m_WordBuffer[0] != '/')
345 return nullptr;
346
347 CFX_ByteString key =
348 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1));
349 std::unique_ptr<CPDF_Object> pObj =
350 ReadNextObject(true, bInArray, dwRecursionLevel + 1);
351 if (!pObj)
352 return nullptr;
353
354 if (!key.IsEmpty())
355 pDict->SetFor(key, std::move(pObj));
356 }
357 return std::move(pDict);
358 }
359
360 if (first_char == '[') {
361 if ((!bAllowNestedArray && bInArray))
362 return nullptr;
363
364 auto pArray = pdfium::MakeUnique<CPDF_Array>();
365 while (1) {
366 std::unique_ptr<CPDF_Object> pObj =
367 ReadNextObject(bAllowNestedArray, true, dwRecursionLevel + 1);
368 if (pObj) {
369 pArray->Add(std::move(pObj));
370 continue;
371 }
372 if (!m_WordSize || m_WordBuffer[0] == ']')
373 break;
374 }
375 return std::move(pArray);
376 }
377
378 if (m_WordSize == 5 && !memcmp(m_WordBuffer, "false", 5))
379 return pdfium::MakeUnique<CPDF_Boolean>(false);
380
381 if (m_WordSize == 4) {
382 if (memcmp(m_WordBuffer, "true", 4) == 0)
383 return pdfium::MakeUnique<CPDF_Boolean>(true);
384 if (memcmp(m_WordBuffer, "null", 4) == 0)
385 return pdfium::MakeUnique<CPDF_Null>();
386 }
387
388 return nullptr;
389 }
390
391 // TODO(npm): the following methods are almost identical in cpdf_syntaxparser
GetNextWord(bool & bIsNumber)392 void CPDF_StreamParser::GetNextWord(bool& bIsNumber) {
393 m_WordSize = 0;
394 bIsNumber = true;
395 if (!PositionIsInBounds())
396 return;
397
398 int ch = m_pBuf[m_Pos++];
399 while (1) {
400 while (PDFCharIsWhitespace(ch)) {
401 if (!PositionIsInBounds()) {
402 return;
403 }
404 ch = m_pBuf[m_Pos++];
405 }
406
407 if (ch != '%')
408 break;
409
410 while (1) {
411 if (!PositionIsInBounds())
412 return;
413 ch = m_pBuf[m_Pos++];
414 if (PDFCharIsLineEnding(ch))
415 break;
416 }
417 }
418
419 if (PDFCharIsDelimiter(ch)) {
420 bIsNumber = false;
421 m_WordBuffer[m_WordSize++] = ch;
422 if (ch == '/') {
423 while (1) {
424 if (!PositionIsInBounds())
425 return;
426 ch = m_pBuf[m_Pos++];
427 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
428 m_Pos--;
429 return;
430 }
431
432 if (m_WordSize < kMaxWordBuffer)
433 m_WordBuffer[m_WordSize++] = ch;
434 }
435 } else if (ch == '<') {
436 if (!PositionIsInBounds())
437 return;
438 ch = m_pBuf[m_Pos++];
439 if (ch == '<')
440 m_WordBuffer[m_WordSize++] = ch;
441 else
442 m_Pos--;
443 } else if (ch == '>') {
444 if (!PositionIsInBounds())
445 return;
446 ch = m_pBuf[m_Pos++];
447 if (ch == '>')
448 m_WordBuffer[m_WordSize++] = ch;
449 else
450 m_Pos--;
451 }
452 return;
453 }
454
455 while (1) {
456 if (m_WordSize < kMaxWordBuffer)
457 m_WordBuffer[m_WordSize++] = ch;
458 if (!PDFCharIsNumeric(ch))
459 bIsNumber = false;
460
461 if (!PositionIsInBounds())
462 return;
463 ch = m_pBuf[m_Pos++];
464 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
465 m_Pos--;
466 break;
467 }
468 }
469 }
470
ReadString()471 CFX_ByteString CPDF_StreamParser::ReadString() {
472 if (!PositionIsInBounds())
473 return CFX_ByteString();
474
475 uint8_t ch = m_pBuf[m_Pos++];
476 CFX_ByteTextBuf buf;
477 int parlevel = 0;
478 int status = 0;
479 int iEscCode = 0;
480 while (1) {
481 switch (status) {
482 case 0:
483 if (ch == ')') {
484 if (parlevel == 0) {
485 if (buf.GetLength() > kMaxStringLength) {
486 return CFX_ByteString(buf.GetBuffer(), kMaxStringLength);
487 }
488 return buf.MakeString();
489 }
490 parlevel--;
491 buf.AppendChar(')');
492 } else if (ch == '(') {
493 parlevel++;
494 buf.AppendChar('(');
495 } else if (ch == '\\') {
496 status = 1;
497 } else {
498 buf.AppendChar((char)ch);
499 }
500 break;
501 case 1:
502 if (ch >= '0' && ch <= '7') {
503 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
504 status = 2;
505 break;
506 }
507 if (ch == 'n') {
508 buf.AppendChar('\n');
509 } else if (ch == 'r') {
510 buf.AppendChar('\r');
511 } else if (ch == 't') {
512 buf.AppendChar('\t');
513 } else if (ch == 'b') {
514 buf.AppendChar('\b');
515 } else if (ch == 'f') {
516 buf.AppendChar('\f');
517 } else if (ch == '\r') {
518 status = 4;
519 break;
520 } else if (ch == '\n') {
521 } else {
522 buf.AppendChar(ch);
523 }
524 status = 0;
525 break;
526 case 2:
527 if (ch >= '0' && ch <= '7') {
528 iEscCode =
529 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
530 status = 3;
531 } else {
532 buf.AppendChar(iEscCode);
533 status = 0;
534 continue;
535 }
536 break;
537 case 3:
538 if (ch >= '0' && ch <= '7') {
539 iEscCode =
540 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
541 buf.AppendChar(iEscCode);
542 status = 0;
543 } else {
544 buf.AppendChar(iEscCode);
545 status = 0;
546 continue;
547 }
548 break;
549 case 4:
550 status = 0;
551 if (ch != '\n') {
552 continue;
553 }
554 break;
555 }
556 if (!PositionIsInBounds())
557 break;
558
559 ch = m_pBuf[m_Pos++];
560 }
561 if (PositionIsInBounds())
562 ++m_Pos;
563
564 if (buf.GetLength() > kMaxStringLength) {
565 return CFX_ByteString(buf.GetBuffer(), kMaxStringLength);
566 }
567 return buf.MakeString();
568 }
569
ReadHexString()570 CFX_ByteString CPDF_StreamParser::ReadHexString() {
571 if (!PositionIsInBounds())
572 return CFX_ByteString();
573
574 CFX_ByteTextBuf buf;
575 bool bFirst = true;
576 int code = 0;
577 while (PositionIsInBounds()) {
578 int ch = m_pBuf[m_Pos++];
579
580 if (ch == '>')
581 break;
582
583 if (!std::isxdigit(ch))
584 continue;
585
586 int val = FXSYS_toHexDigit(ch);
587 if (bFirst) {
588 code = val * 16;
589 } else {
590 code += val;
591 buf.AppendByte((uint8_t)code);
592 }
593 bFirst = !bFirst;
594 }
595 if (!bFirst)
596 buf.AppendChar((char)code);
597
598 if (buf.GetLength() > kMaxStringLength)
599 return CFX_ByteString(buf.GetBuffer(), kMaxStringLength);
600
601 return buf.MakeString();
602 }
603
PositionIsInBounds() const604 bool CPDF_StreamParser::PositionIsInBounds() const {
605 return m_Pos < m_Size;
606 }
607