• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/page/cpdf_streamparser.h"
8 
9 #include <limits.h>
10 
11 #include <algorithm>
12 #include <memory>
13 #include <sstream>
14 #include <utility>
15 
16 #include "constants/stream_dict_common.h"
17 #include "core/fpdfapi/page/cpdf_docpagedata.h"
18 #include "core/fpdfapi/parser/cpdf_array.h"
19 #include "core/fpdfapi/parser/cpdf_boolean.h"
20 #include "core/fpdfapi/parser/cpdf_dictionary.h"
21 #include "core/fpdfapi/parser/cpdf_document.h"
22 #include "core/fpdfapi/parser/cpdf_name.h"
23 #include "core/fpdfapi/parser/cpdf_null.h"
24 #include "core/fpdfapi/parser/cpdf_number.h"
25 #include "core/fpdfapi/parser/cpdf_stream.h"
26 #include "core/fpdfapi/parser/cpdf_string.h"
27 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
28 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
29 #include "core/fxcodec/fx_codec.h"
30 #include "core/fxcodec/jpeg/jpegmodule.h"
31 #include "core/fxcodec/scanlinedecoder.h"
32 #include "core/fxcrt/fx_extension.h"
33 #include "core/fxcrt/fx_memory_wrappers.h"
34 #include "core/fxcrt/fx_safe_types.h"
35 #include "third_party/base/ptr_util.h"
36 
37 namespace {
38 
39 const uint32_t kMaxNestedParsingLevel = 512;
40 const size_t kMaxStringLength = 32767;
41 
42 const char kTrue[] = "true";
43 const char kFalse[] = "false";
44 const char kNull[] = "null";
45 
DecodeAllScanlines(std::unique_ptr<ScanlineDecoder> pDecoder)46 uint32_t DecodeAllScanlines(std::unique_ptr<ScanlineDecoder> pDecoder) {
47   if (!pDecoder)
48     return FX_INVALID_OFFSET;
49 
50   int ncomps = pDecoder->CountComps();
51   int bpc = pDecoder->GetBPC();
52   int width = pDecoder->GetWidth();
53   int height = pDecoder->GetHeight();
54   if (width <= 0 || height <= 0)
55     return FX_INVALID_OFFSET;
56 
57   FX_SAFE_UINT32 size = fxcodec::CalculatePitch8(bpc, ncomps, width);
58   size *= height;
59   if (size.ValueOrDefault(0) == 0)
60     return FX_INVALID_OFFSET;
61 
62   for (int row = 0; row < height; ++row) {
63     if (!pDecoder->GetScanline(row))
64       break;
65   }
66   return pDecoder->GetSrcOffset();
67 }
68 
DecodeInlineStream(pdfium::span<const uint8_t> src_span,int width,int height,const ByteString & decoder,const CPDF_Dictionary * pParam,uint32_t orig_size)69 uint32_t DecodeInlineStream(pdfium::span<const uint8_t> src_span,
70                             int width,
71                             int height,
72                             const ByteString& decoder,
73                             const CPDF_Dictionary* pParam,
74                             uint32_t orig_size) {
75   // |decoder| should not be an abbreviation.
76   ASSERT(decoder != "A85");
77   ASSERT(decoder != "AHx");
78   ASSERT(decoder != "CCF");
79   ASSERT(decoder != "DCT");
80   ASSERT(decoder != "Fl");
81   ASSERT(decoder != "LZW");
82   ASSERT(decoder != "RL");
83 
84   std::unique_ptr<uint8_t, FxFreeDeleter> ignored_result;
85   uint32_t ignored_size;
86   if (decoder == "FlateDecode") {
87     return FlateOrLZWDecode(false, src_span, pParam, orig_size, &ignored_result,
88                             &ignored_size);
89   }
90   if (decoder == "LZWDecode") {
91     return FlateOrLZWDecode(true, src_span, pParam, 0, &ignored_result,
92                             &ignored_size);
93   }
94   if (decoder == "DCTDecode") {
95     std::unique_ptr<ScanlineDecoder> pDecoder =
96         fxcodec::ModuleMgr::GetInstance()->GetJpegModule()->CreateDecoder(
97             src_span, width, height, 0,
98             !pParam || pParam->GetIntegerFor("ColorTransform", 1));
99     return DecodeAllScanlines(std::move(pDecoder));
100   }
101   if (decoder == "CCITTFaxDecode") {
102     std::unique_ptr<ScanlineDecoder> pDecoder =
103         CreateFaxDecoder(src_span, width, height, pParam);
104     return DecodeAllScanlines(std::move(pDecoder));
105   }
106 
107   if (decoder == "ASCII85Decode")
108     return A85Decode(src_span, &ignored_result, &ignored_size);
109   if (decoder == "ASCIIHexDecode")
110     return HexDecode(src_span, &ignored_result, &ignored_size);
111   if (decoder == "RunLengthDecode")
112     return RunLengthDecode(src_span, &ignored_result, &ignored_size);
113 
114   return FX_INVALID_OFFSET;
115 }
116 
117 }  // namespace
118 
CPDF_StreamParser(pdfium::span<const uint8_t> span)119 CPDF_StreamParser::CPDF_StreamParser(pdfium::span<const uint8_t> span)
120     : m_pBuf(span) {}
121 
CPDF_StreamParser(pdfium::span<const uint8_t> span,const WeakPtr<ByteStringPool> & pPool)122 CPDF_StreamParser::CPDF_StreamParser(pdfium::span<const uint8_t> span,
123                                      const WeakPtr<ByteStringPool>& pPool)
124     : m_pPool(pPool), m_pBuf(span) {}
125 
~CPDF_StreamParser()126 CPDF_StreamParser::~CPDF_StreamParser() {}
127 
ReadInlineStream(CPDF_Document * pDoc,RetainPtr<CPDF_Dictionary> pDict,const CPDF_Object * pCSObj)128 RetainPtr<CPDF_Stream> CPDF_StreamParser::ReadInlineStream(
129     CPDF_Document* pDoc,
130     RetainPtr<CPDF_Dictionary> pDict,
131     const CPDF_Object* pCSObj) {
132   if (m_Pos < m_pBuf.size() && PDFCharIsWhitespace(m_pBuf[m_Pos]))
133     m_Pos++;
134 
135   if (m_Pos == m_pBuf.size())
136     return nullptr;
137 
138   ByteString decoder;
139   const CPDF_Dictionary* pParam = nullptr;
140   CPDF_Object* pFilter = pDict->GetDirectObjectFor("Filter");
141   if (pFilter) {
142     const CPDF_Array* pArray = pFilter->AsArray();
143     if (pArray) {
144       decoder = pArray->GetStringAt(0);
145       const CPDF_Array* pParams =
146           pDict->GetArrayFor(pdfium::stream::kDecodeParms);
147       if (pParams)
148         pParam = pParams->GetDictAt(0);
149     } else {
150       decoder = pFilter->GetString();
151       pParam = pDict->GetDictFor(pdfium::stream::kDecodeParms);
152     }
153   }
154   uint32_t width = pDict->GetIntegerFor("Width");
155   uint32_t height = pDict->GetIntegerFor("Height");
156   uint32_t bpc = 1;
157   uint32_t nComponents = 1;
158   if (pCSObj) {
159     RetainPtr<CPDF_ColorSpace> pCS =
160         CPDF_DocPageData::FromDocument(pDoc)->GetColorSpace(pCSObj, nullptr);
161     nComponents = pCS ? pCS->CountComponents() : 3;
162     bpc = pDict->GetIntegerFor("BitsPerComponent");
163   }
164   FX_SAFE_UINT32 size = fxcodec::CalculatePitch8(bpc, nComponents, width);
165   size *= height;
166   if (!size.IsValid())
167     return nullptr;
168 
169   uint32_t dwOrigSize = size.ValueOrDie();
170   std::unique_ptr<uint8_t, FxFreeDeleter> pData;
171   uint32_t dwStreamSize;
172   if (decoder.IsEmpty()) {
173     dwOrigSize = std::min<uint32_t>(dwOrigSize, m_pBuf.size() - m_Pos);
174     pData.reset(FX_Alloc(uint8_t, dwOrigSize));
175     auto copy_span = m_pBuf.subspan(m_Pos, dwOrigSize);
176     memcpy(pData.get(), copy_span.data(), copy_span.size());
177     dwStreamSize = dwOrigSize;
178     m_Pos += dwOrigSize;
179   } else {
180     dwStreamSize = DecodeInlineStream(m_pBuf.subspan(m_Pos), width, height,
181                                       decoder, pParam, dwOrigSize);
182     if (!pdfium::base::IsValueInRangeForNumericType<int>(dwStreamSize))
183       return nullptr;
184 
185     uint32_t dwSavePos = m_Pos;
186     m_Pos += dwStreamSize;
187     while (1) {
188       uint32_t dwPrevPos = m_Pos;
189       CPDF_StreamParser::SyntaxType type = ParseNextElement();
190       if (type == CPDF_StreamParser::EndOfData)
191         break;
192 
193       if (type != CPDF_StreamParser::Keyword) {
194         dwStreamSize += m_Pos - dwPrevPos;
195         continue;
196       }
197       if (GetWord() == "EI") {
198         m_Pos = dwPrevPos;
199         break;
200       }
201       dwStreamSize += m_Pos - dwPrevPos;
202     }
203     m_Pos = dwSavePos;
204     pData.reset(FX_Alloc(uint8_t, dwStreamSize));
205     auto copy_span = m_pBuf.subspan(m_Pos, dwStreamSize);
206     memcpy(pData.get(), copy_span.data(), copy_span.size());
207     m_Pos += dwStreamSize;
208   }
209   pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(dwStreamSize));
210   return pdfium::MakeRetain<CPDF_Stream>(std::move(pData), dwStreamSize,
211                                          std::move(pDict));
212 }
213 
ParseNextElement()214 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() {
215   m_pLastObj.Reset();
216   m_WordSize = 0;
217   if (!PositionIsInBounds())
218     return EndOfData;
219 
220   uint8_t ch = m_pBuf[m_Pos++];
221   while (1) {
222     while (PDFCharIsWhitespace(ch)) {
223       if (!PositionIsInBounds())
224         return EndOfData;
225 
226       ch = m_pBuf[m_Pos++];
227     }
228 
229     if (ch != '%')
230       break;
231 
232     while (1) {
233       if (!PositionIsInBounds())
234         return EndOfData;
235 
236       ch = m_pBuf[m_Pos++];
237       if (PDFCharIsLineEnding(ch))
238         break;
239     }
240   }
241 
242   if (PDFCharIsDelimiter(ch) && ch != '/') {
243     m_Pos--;
244     m_pLastObj = ReadNextObject(false, false, 0);
245     return Others;
246   }
247 
248   bool bIsNumber = true;
249   while (1) {
250     if (m_WordSize < kMaxWordLength)
251       m_WordBuffer[m_WordSize++] = ch;
252 
253     if (!PDFCharIsNumeric(ch))
254       bIsNumber = false;
255 
256     if (!PositionIsInBounds())
257       break;
258 
259     ch = m_pBuf[m_Pos++];
260 
261     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
262       m_Pos--;
263       break;
264     }
265   }
266 
267   m_WordBuffer[m_WordSize] = 0;
268   if (bIsNumber)
269     return Number;
270 
271   if (m_WordBuffer[0] == '/')
272     return Name;
273 
274   if (m_WordSize == 4) {
275     if (WordBufferMatches(kTrue)) {
276       m_pLastObj = pdfium::MakeRetain<CPDF_Boolean>(true);
277       return Others;
278     }
279     if (WordBufferMatches(kNull)) {
280       m_pLastObj = pdfium::MakeRetain<CPDF_Null>();
281       return Others;
282     }
283   } else if (m_WordSize == 5) {
284     if (WordBufferMatches(kFalse)) {
285       m_pLastObj = pdfium::MakeRetain<CPDF_Boolean>(false);
286       return Others;
287     }
288   }
289   return Keyword;
290 }
291 
ReadNextObject(bool bAllowNestedArray,bool bInArray,uint32_t dwRecursionLevel)292 RetainPtr<CPDF_Object> CPDF_StreamParser::ReadNextObject(
293     bool bAllowNestedArray,
294     bool bInArray,
295     uint32_t dwRecursionLevel) {
296   bool bIsNumber;
297   // Must get the next word before returning to avoid infinite loops.
298   GetNextWord(bIsNumber);
299   if (!m_WordSize || dwRecursionLevel > kMaxNestedParsingLevel)
300     return nullptr;
301 
302   if (bIsNumber) {
303     m_WordBuffer[m_WordSize] = 0;
304     return pdfium::MakeRetain<CPDF_Number>(
305         ByteStringView(m_WordBuffer, m_WordSize));
306   }
307 
308   int first_char = m_WordBuffer[0];
309   if (first_char == '/') {
310     ByteString name =
311         PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1));
312     return pdfium::MakeRetain<CPDF_Name>(m_pPool, name);
313   }
314 
315   if (first_char == '(') {
316     ByteString str = ReadString();
317     return pdfium::MakeRetain<CPDF_String>(m_pPool, str, false);
318   }
319 
320   if (first_char == '<') {
321     if (m_WordSize == 1)
322       return pdfium::MakeRetain<CPDF_String>(m_pPool, ReadHexString(), true);
323 
324     auto pDict = pdfium::MakeRetain<CPDF_Dictionary>(m_pPool);
325     while (1) {
326       GetNextWord(bIsNumber);
327       if (m_WordSize == 2 && m_WordBuffer[0] == '>')
328         break;
329 
330       if (!m_WordSize || m_WordBuffer[0] != '/')
331         return nullptr;
332 
333       ByteString key =
334           PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1));
335       RetainPtr<CPDF_Object> pObj =
336           ReadNextObject(true, bInArray, dwRecursionLevel + 1);
337       if (!pObj)
338         return nullptr;
339 
340       if (!key.IsEmpty())
341         pDict->SetFor(key, std::move(pObj));
342     }
343     return pDict;
344   }
345 
346   if (first_char == '[') {
347     if ((!bAllowNestedArray && bInArray))
348       return nullptr;
349 
350     auto pArray = pdfium::MakeRetain<CPDF_Array>();
351     while (1) {
352       RetainPtr<CPDF_Object> pObj =
353           ReadNextObject(bAllowNestedArray, true, dwRecursionLevel + 1);
354       if (pObj) {
355         pArray->Add(std::move(pObj));
356         continue;
357       }
358       if (!m_WordSize || m_WordBuffer[0] == ']')
359         break;
360     }
361     return pArray;
362   }
363 
364   if (WordBufferMatches(kFalse))
365     return pdfium::MakeRetain<CPDF_Boolean>(false);
366   if (WordBufferMatches(kTrue))
367     return pdfium::MakeRetain<CPDF_Boolean>(true);
368   if (WordBufferMatches(kNull))
369     return pdfium::MakeRetain<CPDF_Null>();
370   return nullptr;
371 }
372 
373 // TODO(npm): the following methods are almost identical in cpdf_syntaxparser
GetNextWord(bool & bIsNumber)374 void CPDF_StreamParser::GetNextWord(bool& bIsNumber) {
375   m_WordSize = 0;
376   bIsNumber = true;
377   if (!PositionIsInBounds())
378     return;
379 
380   uint8_t ch = m_pBuf[m_Pos++];
381   while (1) {
382     while (PDFCharIsWhitespace(ch)) {
383       if (!PositionIsInBounds()) {
384         return;
385       }
386       ch = m_pBuf[m_Pos++];
387     }
388 
389     if (ch != '%')
390       break;
391 
392     while (1) {
393       if (!PositionIsInBounds())
394         return;
395       ch = m_pBuf[m_Pos++];
396       if (PDFCharIsLineEnding(ch))
397         break;
398     }
399   }
400 
401   if (PDFCharIsDelimiter(ch)) {
402     bIsNumber = false;
403     m_WordBuffer[m_WordSize++] = ch;
404     if (ch == '/') {
405       while (1) {
406         if (!PositionIsInBounds())
407           return;
408         ch = m_pBuf[m_Pos++];
409         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
410           m_Pos--;
411           return;
412         }
413         if (m_WordSize < kMaxWordLength)
414           m_WordBuffer[m_WordSize++] = ch;
415       }
416     } else if (ch == '<') {
417       if (!PositionIsInBounds())
418         return;
419       ch = m_pBuf[m_Pos++];
420       if (ch == '<')
421         m_WordBuffer[m_WordSize++] = ch;
422       else
423         m_Pos--;
424     } else if (ch == '>') {
425       if (!PositionIsInBounds())
426         return;
427       ch = m_pBuf[m_Pos++];
428       if (ch == '>')
429         m_WordBuffer[m_WordSize++] = ch;
430       else
431         m_Pos--;
432     }
433     return;
434   }
435 
436   while (1) {
437     if (m_WordSize < kMaxWordLength)
438       m_WordBuffer[m_WordSize++] = ch;
439     if (!PDFCharIsNumeric(ch))
440       bIsNumber = false;
441     if (!PositionIsInBounds())
442       return;
443 
444     ch = m_pBuf[m_Pos++];
445     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
446       m_Pos--;
447       break;
448     }
449   }
450 }
451 
ReadString()452 ByteString CPDF_StreamParser::ReadString() {
453   if (!PositionIsInBounds())
454     return ByteString();
455 
456   uint8_t ch = m_pBuf[m_Pos++];
457   std::ostringstream buf;
458   int parlevel = 0;
459   int status = 0;
460   int iEscCode = 0;
461   while (1) {
462     switch (status) {
463       case 0:
464         if (ch == ')') {
465           if (parlevel == 0) {
466             if (buf.tellp() <= 0)
467               return ByteString();
468 
469             return ByteString(
470                 buf.str().c_str(),
471                 std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
472           }
473           parlevel--;
474           buf << ')';
475         } else if (ch == '(') {
476           parlevel++;
477           buf << '(';
478         } else if (ch == '\\') {
479           status = 1;
480         } else {
481           buf << static_cast<char>(ch);
482         }
483         break;
484       case 1:
485         if (FXSYS_IsOctalDigit(ch)) {
486           iEscCode = FXSYS_DecimalCharToInt(static_cast<char>(ch));
487           status = 2;
488           break;
489         }
490         if (ch == '\r') {
491           status = 4;
492           break;
493         }
494         if (ch == '\n') {
495           // Do nothing.
496         } else if (ch == 'n') {
497           buf << '\n';
498         } else if (ch == 'r') {
499           buf << '\r';
500         } else if (ch == 't') {
501           buf << '\t';
502         } else if (ch == 'b') {
503           buf << '\b';
504         } else if (ch == 'f') {
505           buf << '\f';
506         } else {
507           buf << static_cast<char>(ch);
508         }
509         status = 0;
510         break;
511       case 2:
512         if (FXSYS_IsOctalDigit(ch)) {
513           iEscCode =
514               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
515           status = 3;
516         } else {
517           buf << static_cast<char>(iEscCode);
518           status = 0;
519           continue;
520         }
521         break;
522       case 3:
523         if (FXSYS_IsOctalDigit(ch)) {
524           iEscCode =
525               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
526           buf << static_cast<char>(iEscCode);
527           status = 0;
528         } else {
529           buf << static_cast<char>(iEscCode);
530           status = 0;
531           continue;
532         }
533         break;
534       case 4:
535         status = 0;
536         if (ch != '\n')
537           continue;
538         break;
539     }
540     if (!PositionIsInBounds())
541       break;
542 
543     ch = m_pBuf[m_Pos++];
544   }
545   if (PositionIsInBounds())
546     ++m_Pos;
547 
548   if (buf.tellp() <= 0)
549     return ByteString();
550 
551   return ByteString(
552       buf.str().c_str(),
553       std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
554 }
555 
ReadHexString()556 ByteString CPDF_StreamParser::ReadHexString() {
557   if (!PositionIsInBounds())
558     return ByteString();
559 
560   std::ostringstream buf;
561   bool bFirst = true;
562   int code = 0;
563   while (PositionIsInBounds()) {
564     uint8_t ch = m_pBuf[m_Pos++];
565     if (ch == '>')
566       break;
567 
568     if (!std::isxdigit(ch))
569       continue;
570 
571     int val = FXSYS_HexCharToInt(ch);
572     if (bFirst) {
573       code = val * 16;
574     } else {
575       code += val;
576       buf << static_cast<uint8_t>(code);
577     }
578     bFirst = !bFirst;
579   }
580   if (!bFirst)
581     buf << static_cast<char>(code);
582 
583   if (buf.tellp() <= 0)
584     return ByteString();
585 
586   return ByteString(
587       buf.str().c_str(),
588       std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
589 }
590 
PositionIsInBounds() const591 bool CPDF_StreamParser::PositionIsInBounds() const {
592   return m_Pos < m_pBuf.size();
593 }
594 
WordBufferMatches(const char * pWord) const595 bool CPDF_StreamParser::WordBufferMatches(const char* pWord) const {
596   const size_t iLength = strlen(pWord);
597   return m_WordSize == iLength && memcmp(m_WordBuffer, pWord, iLength) == 0;
598 }
599