1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/page/cpdf_streamparser.h"
8
9 #include <limits.h>
10
11 #include <algorithm>
12 #include <memory>
13 #include <sstream>
14 #include <utility>
15
16 #include "core/fpdfapi/cpdf_modulemgr.h"
17 #include "core/fpdfapi/page/cpdf_docpagedata.h"
18 #include "core/fpdfapi/parser/cpdf_array.h"
19 #include "core/fpdfapi/parser/cpdf_boolean.h"
20 #include "core/fpdfapi/parser/cpdf_dictionary.h"
21 #include "core/fpdfapi/parser/cpdf_document.h"
22 #include "core/fpdfapi/parser/cpdf_name.h"
23 #include "core/fpdfapi/parser/cpdf_null.h"
24 #include "core/fpdfapi/parser/cpdf_number.h"
25 #include "core/fpdfapi/parser/cpdf_stream.h"
26 #include "core/fpdfapi/parser/cpdf_string.h"
27 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
28 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
29 #include "core/fxcodec/codec/ccodec_jpegmodule.h"
30 #include "core/fxcodec/codec/ccodec_scanlinedecoder.h"
31 #include "core/fxcrt/fx_extension.h"
32
33 namespace {
34
35 const uint32_t kMaxNestedParsingLevel = 512;
36 const uint32_t kMaxWordBuffer = 256;
37 const size_t kMaxStringLength = 32767;
38
DecodeAllScanlines(std::unique_ptr<CCodec_ScanlineDecoder> pDecoder,uint8_t ** dest_buf,uint32_t * dest_size)39 uint32_t DecodeAllScanlines(std::unique_ptr<CCodec_ScanlineDecoder> pDecoder,
40 uint8_t** dest_buf,
41 uint32_t* dest_size) {
42 if (!pDecoder)
43 return FX_INVALID_OFFSET;
44 int ncomps = pDecoder->CountComps();
45 int bpc = pDecoder->GetBPC();
46 int width = pDecoder->GetWidth();
47 int height = pDecoder->GetHeight();
48 int pitch = (width * ncomps * bpc + 7) / 8;
49 if (height == 0 || pitch > (1 << 30) / height)
50 return FX_INVALID_OFFSET;
51
52 *dest_buf = FX_Alloc2D(uint8_t, pitch, height);
53 *dest_size = pitch * height; // Safe since checked alloc returned.
54 for (int row = 0; row < height; ++row) {
55 const uint8_t* pLine = pDecoder->GetScanline(row);
56 if (!pLine)
57 break;
58
59 memcpy(*dest_buf + row * pitch, pLine, pitch);
60 }
61 return pDecoder->GetSrcOffset();
62 }
63
DecodeInlineStream(const uint8_t * src_buf,uint32_t limit,int width,int height,const ByteString & decoder,CPDF_Dictionary * pParam,uint8_t ** dest_buf,uint32_t * dest_size)64 uint32_t DecodeInlineStream(const uint8_t* src_buf,
65 uint32_t limit,
66 int width,
67 int height,
68 const ByteString& decoder,
69 CPDF_Dictionary* pParam,
70 uint8_t** dest_buf,
71 uint32_t* dest_size) {
72 if (decoder == "CCITTFaxDecode" || decoder == "CCF") {
73 std::unique_ptr<CCodec_ScanlineDecoder> pDecoder =
74 FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam);
75 return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size);
76 }
77 if (decoder == "ASCII85Decode" || decoder == "A85")
78 return A85Decode(src_buf, limit, dest_buf, dest_size);
79 if (decoder == "ASCIIHexDecode" || decoder == "AHx")
80 return HexDecode(src_buf, limit, dest_buf, dest_size);
81 if (decoder == "FlateDecode" || decoder == "Fl") {
82 return FPDFAPI_FlateOrLZWDecode(false, src_buf, limit, pParam, *dest_size,
83 dest_buf, dest_size);
84 }
85 if (decoder == "LZWDecode" || decoder == "LZW") {
86 return FPDFAPI_FlateOrLZWDecode(true, src_buf, limit, pParam, 0, dest_buf,
87 dest_size);
88 }
89 if (decoder == "DCTDecode" || decoder == "DCT") {
90 std::unique_ptr<CCodec_ScanlineDecoder> pDecoder =
91 CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder(
92 src_buf, limit, width, height, 0,
93 !pParam || pParam->GetIntegerFor("ColorTransform", 1));
94 return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size);
95 }
96 if (decoder == "RunLengthDecode" || decoder == "RL")
97 return RunLengthDecode(src_buf, limit, dest_buf, dest_size);
98 *dest_size = 0;
99 *dest_buf = 0;
100 return 0xFFFFFFFF;
101 }
102
103 } // namespace
104
CPDF_StreamParser(const uint8_t * pData,uint32_t dwSize)105 CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize)
106 : m_pBuf(pData), m_Size(dwSize), m_Pos(0), m_pPool(nullptr) {}
107
CPDF_StreamParser(const uint8_t * pData,uint32_t dwSize,const WeakPtr<ByteStringPool> & pPool)108 CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData,
109 uint32_t dwSize,
110 const WeakPtr<ByteStringPool>& pPool)
111 : m_pBuf(pData), m_Size(dwSize), m_Pos(0), m_pPool(pPool) {}
112
~CPDF_StreamParser()113 CPDF_StreamParser::~CPDF_StreamParser() {}
114
ReadInlineStream(CPDF_Document * pDoc,std::unique_ptr<CPDF_Dictionary> pDict,CPDF_Object * pCSObj)115 std::unique_ptr<CPDF_Stream> CPDF_StreamParser::ReadInlineStream(
116 CPDF_Document* pDoc,
117 std::unique_ptr<CPDF_Dictionary> pDict,
118 CPDF_Object* pCSObj) {
119 if (m_Pos == m_Size)
120 return nullptr;
121
122 if (PDFCharIsWhitespace(m_pBuf[m_Pos]))
123 m_Pos++;
124
125 ByteString Decoder;
126 CPDF_Dictionary* pParam = nullptr;
127 CPDF_Object* pFilter = pDict->GetDirectObjectFor("Filter");
128 if (pFilter) {
129 if (CPDF_Array* pArray = pFilter->AsArray()) {
130 Decoder = pArray->GetStringAt(0);
131 CPDF_Array* pParams = pDict->GetArrayFor("DecodeParms");
132 if (pParams)
133 pParam = pParams->GetDictAt(0);
134 } else {
135 Decoder = pFilter->GetString();
136 pParam = pDict->GetDictFor("DecodeParms");
137 }
138 }
139 uint32_t width = pDict->GetIntegerFor("Width");
140 uint32_t height = pDict->GetIntegerFor("Height");
141 uint32_t OrigSize = 0;
142 if (pCSObj) {
143 uint32_t bpc = pDict->GetIntegerFor("BitsPerComponent");
144 uint32_t nComponents = 1;
145 CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj);
146 if (pCS) {
147 nComponents = pCS->CountComponents();
148 pDoc->GetPageData()->ReleaseColorSpace(pCSObj);
149 } else {
150 nComponents = 3;
151 }
152 uint32_t pitch = width;
153 if (bpc && pitch > INT_MAX / bpc)
154 return nullptr;
155
156 pitch *= bpc;
157 if (nComponents && pitch > INT_MAX / nComponents)
158 return nullptr;
159
160 pitch *= nComponents;
161 if (pitch > INT_MAX - 7)
162 return nullptr;
163
164 pitch += 7;
165 pitch /= 8;
166 OrigSize = pitch;
167 } else {
168 if (width > INT_MAX - 7)
169 return nullptr;
170
171 OrigSize = ((width + 7) / 8);
172 }
173 if (height && OrigSize > INT_MAX / height)
174 return nullptr;
175
176 OrigSize *= height;
177 std::unique_ptr<uint8_t, FxFreeDeleter> pData;
178 uint32_t dwStreamSize;
179 if (Decoder.IsEmpty()) {
180 if (OrigSize > m_Size - m_Pos)
181 OrigSize = m_Size - m_Pos;
182 pData.reset(FX_Alloc(uint8_t, OrigSize));
183 memcpy(pData.get(), m_pBuf + m_Pos, OrigSize);
184 dwStreamSize = OrigSize;
185 m_Pos += OrigSize;
186 } else {
187 uint8_t* pIgnore = nullptr;
188 uint32_t dwDestSize = OrigSize;
189 dwStreamSize =
190 DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height,
191 Decoder, pParam, &pIgnore, &dwDestSize);
192 FX_Free(pIgnore);
193 if (static_cast<int>(dwStreamSize) < 0)
194 return nullptr;
195
196 uint32_t dwSavePos = m_Pos;
197 m_Pos += dwStreamSize;
198 while (1) {
199 uint32_t dwPrevPos = m_Pos;
200 CPDF_StreamParser::SyntaxType type = ParseNextElement();
201 if (type == CPDF_StreamParser::EndOfData)
202 break;
203
204 if (type != CPDF_StreamParser::Keyword) {
205 dwStreamSize += m_Pos - dwPrevPos;
206 continue;
207 }
208 if (GetWord() == "EI") {
209 m_Pos = dwPrevPos;
210 break;
211 }
212 dwStreamSize += m_Pos - dwPrevPos;
213 }
214 m_Pos = dwSavePos;
215 pData.reset(FX_Alloc(uint8_t, dwStreamSize));
216 memcpy(pData.get(), m_pBuf + m_Pos, dwStreamSize);
217 m_Pos += dwStreamSize;
218 }
219 pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(dwStreamSize));
220 return pdfium::MakeUnique<CPDF_Stream>(std::move(pData), dwStreamSize,
221 std::move(pDict));
222 }
223
ParseNextElement()224 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() {
225 m_pLastObj.reset();
226 m_WordSize = 0;
227 if (!PositionIsInBounds())
228 return EndOfData;
229
230 int ch = m_pBuf[m_Pos++];
231 while (1) {
232 while (PDFCharIsWhitespace(ch)) {
233 if (!PositionIsInBounds())
234 return EndOfData;
235
236 ch = m_pBuf[m_Pos++];
237 }
238
239 if (ch != '%')
240 break;
241
242 while (1) {
243 if (!PositionIsInBounds())
244 return EndOfData;
245
246 ch = m_pBuf[m_Pos++];
247 if (PDFCharIsLineEnding(ch))
248 break;
249 }
250 }
251
252 if (PDFCharIsDelimiter(ch) && ch != '/') {
253 m_Pos--;
254 m_pLastObj = ReadNextObject(false, false, 0);
255 return Others;
256 }
257
258 bool bIsNumber = true;
259 while (1) {
260 if (m_WordSize < kMaxWordBuffer)
261 m_WordBuffer[m_WordSize++] = ch;
262
263 if (!PDFCharIsNumeric(ch))
264 bIsNumber = false;
265
266 if (!PositionIsInBounds())
267 break;
268
269 ch = m_pBuf[m_Pos++];
270
271 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
272 m_Pos--;
273 break;
274 }
275 }
276
277 m_WordBuffer[m_WordSize] = 0;
278 if (bIsNumber)
279 return Number;
280
281 if (m_WordBuffer[0] == '/')
282 return Name;
283
284 if (m_WordSize == 4) {
285 if (memcmp(m_WordBuffer, "true", 4) == 0) {
286 m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(true);
287 return Others;
288 }
289 if (memcmp(m_WordBuffer, "null", 4) == 0) {
290 m_pLastObj = pdfium::MakeUnique<CPDF_Null>();
291 return Others;
292 }
293 } else if (m_WordSize == 5) {
294 if (memcmp(m_WordBuffer, "false", 5) == 0) {
295 m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(false);
296 return Others;
297 }
298 }
299 return Keyword;
300 }
301
ReadNextObject(bool bAllowNestedArray,bool bInArray,uint32_t dwRecursionLevel)302 std::unique_ptr<CPDF_Object> CPDF_StreamParser::ReadNextObject(
303 bool bAllowNestedArray,
304 bool bInArray,
305 uint32_t dwRecursionLevel) {
306 bool bIsNumber;
307 // Must get the next word before returning to avoid infinite loops.
308 GetNextWord(bIsNumber);
309 if (!m_WordSize || dwRecursionLevel > kMaxNestedParsingLevel)
310 return nullptr;
311
312 if (bIsNumber) {
313 m_WordBuffer[m_WordSize] = 0;
314 return pdfium::MakeUnique<CPDF_Number>(
315 ByteStringView(m_WordBuffer, m_WordSize));
316 }
317
318 int first_char = m_WordBuffer[0];
319 if (first_char == '/') {
320 ByteString name =
321 PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1));
322 return pdfium::MakeUnique<CPDF_Name>(m_pPool, name);
323 }
324
325 if (first_char == '(') {
326 ByteString str = ReadString();
327 return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false);
328 }
329
330 if (first_char == '<') {
331 if (m_WordSize == 1)
332 return pdfium::MakeUnique<CPDF_String>(m_pPool, ReadHexString(), true);
333
334 auto pDict = pdfium::MakeUnique<CPDF_Dictionary>(m_pPool);
335 while (1) {
336 GetNextWord(bIsNumber);
337 if (m_WordSize == 2 && m_WordBuffer[0] == '>')
338 break;
339
340 if (!m_WordSize || m_WordBuffer[0] != '/')
341 return nullptr;
342
343 ByteString key =
344 PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1));
345 std::unique_ptr<CPDF_Object> pObj =
346 ReadNextObject(true, bInArray, dwRecursionLevel + 1);
347 if (!pObj)
348 return nullptr;
349
350 if (!key.IsEmpty())
351 pDict->SetFor(key, std::move(pObj));
352 }
353 return std::move(pDict);
354 }
355
356 if (first_char == '[') {
357 if ((!bAllowNestedArray && bInArray))
358 return nullptr;
359
360 auto pArray = pdfium::MakeUnique<CPDF_Array>();
361 while (1) {
362 std::unique_ptr<CPDF_Object> pObj =
363 ReadNextObject(bAllowNestedArray, true, dwRecursionLevel + 1);
364 if (pObj) {
365 pArray->Add(std::move(pObj));
366 continue;
367 }
368 if (!m_WordSize || m_WordBuffer[0] == ']')
369 break;
370 }
371 return std::move(pArray);
372 }
373
374 if (m_WordSize == 5 && !memcmp(m_WordBuffer, "false", 5))
375 return pdfium::MakeUnique<CPDF_Boolean>(false);
376
377 if (m_WordSize == 4) {
378 if (memcmp(m_WordBuffer, "true", 4) == 0)
379 return pdfium::MakeUnique<CPDF_Boolean>(true);
380 if (memcmp(m_WordBuffer, "null", 4) == 0)
381 return pdfium::MakeUnique<CPDF_Null>();
382 }
383
384 return nullptr;
385 }
386
387 // TODO(npm): the following methods are almost identical in cpdf_syntaxparser
GetNextWord(bool & bIsNumber)388 void CPDF_StreamParser::GetNextWord(bool& bIsNumber) {
389 m_WordSize = 0;
390 bIsNumber = true;
391 if (!PositionIsInBounds())
392 return;
393
394 int ch = m_pBuf[m_Pos++];
395 while (1) {
396 while (PDFCharIsWhitespace(ch)) {
397 if (!PositionIsInBounds()) {
398 return;
399 }
400 ch = m_pBuf[m_Pos++];
401 }
402
403 if (ch != '%')
404 break;
405
406 while (1) {
407 if (!PositionIsInBounds())
408 return;
409 ch = m_pBuf[m_Pos++];
410 if (PDFCharIsLineEnding(ch))
411 break;
412 }
413 }
414
415 if (PDFCharIsDelimiter(ch)) {
416 bIsNumber = false;
417 m_WordBuffer[m_WordSize++] = ch;
418 if (ch == '/') {
419 while (1) {
420 if (!PositionIsInBounds())
421 return;
422 ch = m_pBuf[m_Pos++];
423 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
424 m_Pos--;
425 return;
426 }
427
428 if (m_WordSize < kMaxWordBuffer)
429 m_WordBuffer[m_WordSize++] = ch;
430 }
431 } else if (ch == '<') {
432 if (!PositionIsInBounds())
433 return;
434 ch = m_pBuf[m_Pos++];
435 if (ch == '<')
436 m_WordBuffer[m_WordSize++] = ch;
437 else
438 m_Pos--;
439 } else if (ch == '>') {
440 if (!PositionIsInBounds())
441 return;
442 ch = m_pBuf[m_Pos++];
443 if (ch == '>')
444 m_WordBuffer[m_WordSize++] = ch;
445 else
446 m_Pos--;
447 }
448 return;
449 }
450
451 while (1) {
452 if (m_WordSize < kMaxWordBuffer)
453 m_WordBuffer[m_WordSize++] = ch;
454 if (!PDFCharIsNumeric(ch))
455 bIsNumber = false;
456
457 if (!PositionIsInBounds())
458 return;
459 ch = m_pBuf[m_Pos++];
460 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
461 m_Pos--;
462 break;
463 }
464 }
465 }
466
ReadString()467 ByteString CPDF_StreamParser::ReadString() {
468 if (!PositionIsInBounds())
469 return ByteString();
470
471 uint8_t ch = m_pBuf[m_Pos++];
472 std::ostringstream buf;
473 int parlevel = 0;
474 int status = 0;
475 int iEscCode = 0;
476 while (1) {
477 switch (status) {
478 case 0:
479 if (ch == ')') {
480 if (parlevel == 0) {
481 return ByteString(
482 buf.str().c_str(),
483 std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
484 }
485 parlevel--;
486 buf << ')';
487 } else if (ch == '(') {
488 parlevel++;
489 buf << '(';
490 } else if (ch == '\\') {
491 status = 1;
492 } else {
493 buf << static_cast<char>(ch);
494 }
495 break;
496 case 1:
497 if (ch >= '0' && ch <= '7') {
498 iEscCode = FXSYS_DecimalCharToInt(static_cast<char>(ch));
499 status = 2;
500 break;
501 }
502 if (ch == '\r') {
503 status = 4;
504 break;
505 }
506 if (ch == '\n') {
507 // Do nothing.
508 } else if (ch == 'n') {
509 buf << '\n';
510 } else if (ch == 'r') {
511 buf << '\r';
512 } else if (ch == 't') {
513 buf << '\t';
514 } else if (ch == 'b') {
515 buf << '\b';
516 } else if (ch == 'f') {
517 buf << '\f';
518 } else {
519 buf << static_cast<char>(ch);
520 }
521 status = 0;
522 break;
523 case 2:
524 if (ch >= '0' && ch <= '7') {
525 iEscCode =
526 iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
527 status = 3;
528 } else {
529 buf << static_cast<char>(iEscCode);
530 status = 0;
531 continue;
532 }
533 break;
534 case 3:
535 if (ch >= '0' && ch <= '7') {
536 iEscCode =
537 iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
538 buf << static_cast<char>(iEscCode);
539 status = 0;
540 } else {
541 buf << static_cast<char>(iEscCode);
542 status = 0;
543 continue;
544 }
545 break;
546 case 4:
547 status = 0;
548 if (ch != '\n')
549 continue;
550 break;
551 }
552 if (!PositionIsInBounds())
553 break;
554
555 ch = m_pBuf[m_Pos++];
556 }
557 if (PositionIsInBounds())
558 ++m_Pos;
559
560 return ByteString(
561 buf.str().c_str(),
562 std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
563 }
564
ReadHexString()565 ByteString CPDF_StreamParser::ReadHexString() {
566 if (!PositionIsInBounds())
567 return ByteString();
568
569 std::ostringstream buf;
570 bool bFirst = true;
571 int code = 0;
572 while (PositionIsInBounds()) {
573 int ch = m_pBuf[m_Pos++];
574
575 if (ch == '>')
576 break;
577
578 if (!std::isxdigit(ch))
579 continue;
580
581 int val = FXSYS_HexCharToInt(ch);
582 if (bFirst) {
583 code = val * 16;
584 } else {
585 code += val;
586 buf << static_cast<uint8_t>(code);
587 }
588 bFirst = !bFirst;
589 }
590 if (!bFirst)
591 buf << static_cast<char>(code);
592
593 return ByteString(
594 buf.str().c_str(),
595 std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
596 }
597
PositionIsInBounds() const598 bool CPDF_StreamParser::PositionIsInBounds() const {
599 return m_Pos < m_Size;
600 }
601