• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/page/cpdf_streamparser.h"
8 
9 #include <ctype.h>
10 
11 #include <algorithm>
12 #include <memory>
13 #include <utility>
14 
15 #include "constants/stream_dict_common.h"
16 #include "core/fpdfapi/page/cpdf_docpagedata.h"
17 #include "core/fpdfapi/parser/cpdf_array.h"
18 #include "core/fpdfapi/parser/cpdf_boolean.h"
19 #include "core/fpdfapi/parser/cpdf_dictionary.h"
20 #include "core/fpdfapi/parser/cpdf_name.h"
21 #include "core/fpdfapi/parser/cpdf_null.h"
22 #include "core/fpdfapi/parser/cpdf_number.h"
23 #include "core/fpdfapi/parser/cpdf_stream.h"
24 #include "core/fpdfapi/parser/cpdf_string.h"
25 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
26 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
27 #include "core/fxcodec/jpeg/jpegmodule.h"
28 #include "core/fxcodec/scanlinedecoder.h"
29 #include "core/fxcrt/data_vector.h"
30 #include "core/fxcrt/fx_extension.h"
31 #include "core/fxcrt/fx_memory_wrappers.h"
32 #include "core/fxcrt/fx_safe_types.h"
33 #include "core/fxcrt/span_util.h"
34 #include "core/fxge/calculate_pitch.h"
35 #include "third_party/base/check.h"
36 
37 namespace {
38 
39 const uint32_t kMaxNestedParsingLevel = 512;
40 const size_t kMaxStringLength = 32767;
41 
42 const char kTrue[] = "true";
43 const char kFalse[] = "false";
44 const char kNull[] = "null";
45 
DecodeAllScanlines(std::unique_ptr<ScanlineDecoder> pDecoder)46 uint32_t DecodeAllScanlines(std::unique_ptr<ScanlineDecoder> pDecoder) {
47   if (!pDecoder)
48     return FX_INVALID_OFFSET;
49 
50   int ncomps = pDecoder->CountComps();
51   int bpc = pDecoder->GetBPC();
52   int width = pDecoder->GetWidth();
53   int height = pDecoder->GetHeight();
54   if (width <= 0 || height <= 0)
55     return FX_INVALID_OFFSET;
56 
57   absl::optional<uint32_t> maybe_size =
58       fxge::CalculatePitch8(bpc, ncomps, width);
59   if (!maybe_size.has_value())
60     return FX_INVALID_OFFSET;
61 
62   FX_SAFE_UINT32 size = maybe_size.value();
63   size *= height;
64   if (size.ValueOrDefault(0) == 0)
65     return FX_INVALID_OFFSET;
66 
67   for (int row = 0; row < height; ++row) {
68     if (pDecoder->GetScanline(row).empty())
69       break;
70   }
71   return pDecoder->GetSrcOffset();
72 }
73 
DecodeInlineStream(pdfium::span<const uint8_t> src_span,int width,int height,const ByteString & decoder,RetainPtr<const CPDF_Dictionary> pParam,uint32_t orig_size)74 uint32_t DecodeInlineStream(pdfium::span<const uint8_t> src_span,
75                             int width,
76                             int height,
77                             const ByteString& decoder,
78                             RetainPtr<const CPDF_Dictionary> pParam,
79                             uint32_t orig_size) {
80   // |decoder| should not be an abbreviation.
81   DCHECK(decoder != "A85");
82   DCHECK(decoder != "AHx");
83   DCHECK(decoder != "CCF");
84   DCHECK(decoder != "DCT");
85   DCHECK(decoder != "Fl");
86   DCHECK(decoder != "LZW");
87   DCHECK(decoder != "RL");
88 
89   std::unique_ptr<uint8_t, FxFreeDeleter> ignored_result;
90   uint32_t ignored_size;
91   if (decoder == "FlateDecode") {
92     return FlateOrLZWDecode(false, src_span, pParam.Get(), orig_size,
93                             &ignored_result, &ignored_size);
94   }
95   if (decoder == "LZWDecode") {
96     return FlateOrLZWDecode(true, src_span, pParam.Get(), 0, &ignored_result,
97                             &ignored_size);
98   }
99   if (decoder == "DCTDecode") {
100     std::unique_ptr<ScanlineDecoder> pDecoder = JpegModule::CreateDecoder(
101         src_span, width, height, 0,
102         !pParam || pParam->GetIntegerFor("ColorTransform", 1));
103     return DecodeAllScanlines(std::move(pDecoder));
104   }
105   if (decoder == "CCITTFaxDecode") {
106     std::unique_ptr<ScanlineDecoder> pDecoder =
107         CreateFaxDecoder(src_span, width, height, pParam.Get());
108     return DecodeAllScanlines(std::move(pDecoder));
109   }
110 
111   if (decoder == "ASCII85Decode")
112     return A85Decode(src_span, &ignored_result, &ignored_size);
113   if (decoder == "ASCIIHexDecode")
114     return HexDecode(src_span, &ignored_result, &ignored_size);
115   if (decoder == "RunLengthDecode")
116     return RunLengthDecode(src_span, &ignored_result, &ignored_size);
117 
118   return FX_INVALID_OFFSET;
119 }
120 
121 }  // namespace
122 
CPDF_StreamParser(pdfium::span<const uint8_t> span)123 CPDF_StreamParser::CPDF_StreamParser(pdfium::span<const uint8_t> span)
124     : m_pBuf(span) {}
125 
CPDF_StreamParser(pdfium::span<const uint8_t> span,const WeakPtr<ByteStringPool> & pPool)126 CPDF_StreamParser::CPDF_StreamParser(pdfium::span<const uint8_t> span,
127                                      const WeakPtr<ByteStringPool>& pPool)
128     : m_pPool(pPool), m_pBuf(span) {}
129 
130 CPDF_StreamParser::~CPDF_StreamParser() = default;
131 
ReadInlineStream(CPDF_Document * pDoc,RetainPtr<CPDF_Dictionary> pDict,const CPDF_Object * pCSObj)132 RetainPtr<CPDF_Stream> CPDF_StreamParser::ReadInlineStream(
133     CPDF_Document* pDoc,
134     RetainPtr<CPDF_Dictionary> pDict,
135     const CPDF_Object* pCSObj) {
136   if (m_Pos < m_pBuf.size() && PDFCharIsWhitespace(m_pBuf[m_Pos]))
137     m_Pos++;
138 
139   if (m_Pos == m_pBuf.size())
140     return nullptr;
141 
142   ByteString decoder;
143   RetainPtr<const CPDF_Dictionary> pParam;
144   RetainPtr<const CPDF_Object> pFilter = pDict->GetDirectObjectFor("Filter");
145   if (pFilter) {
146     const CPDF_Array* pArray = pFilter->AsArray();
147     if (pArray) {
148       decoder = pArray->GetByteStringAt(0);
149       RetainPtr<const CPDF_Array> pParams =
150           pDict->GetArrayFor(pdfium::stream::kDecodeParms);
151       if (pParams)
152         pParam = pParams->GetDictAt(0);
153     } else {
154       decoder = pFilter->GetString();
155       pParam = pDict->GetDictFor(pdfium::stream::kDecodeParms);
156     }
157   }
158   uint32_t width = pDict->GetIntegerFor("Width");
159   uint32_t height = pDict->GetIntegerFor("Height");
160   uint32_t bpc = 1;
161   uint32_t nComponents = 1;
162   if (pCSObj) {
163     RetainPtr<CPDF_ColorSpace> pCS =
164         CPDF_DocPageData::FromDocument(pDoc)->GetColorSpace(pCSObj, nullptr);
165     nComponents = pCS ? pCS->CountComponents() : 3;
166     bpc = pDict->GetIntegerFor("BitsPerComponent");
167   }
168   absl::optional<uint32_t> maybe_size =
169       fxge::CalculatePitch8(bpc, nComponents, width);
170   if (!maybe_size.has_value())
171     return nullptr;
172 
173   FX_SAFE_UINT32 size = maybe_size.value();
174   size *= height;
175   if (!size.IsValid())
176     return nullptr;
177 
178   uint32_t dwOrigSize = size.ValueOrDie();
179   DataVector<uint8_t> data;
180   uint32_t dwStreamSize;
181   if (decoder.IsEmpty()) {
182     dwOrigSize = std::min<uint32_t>(dwOrigSize, m_pBuf.size() - m_Pos);
183     auto src_span = m_pBuf.subspan(m_Pos, dwOrigSize);
184     data = DataVector<uint8_t>(src_span.begin(), src_span.end());
185     dwStreamSize = dwOrigSize;
186     m_Pos += dwOrigSize;
187   } else {
188     dwStreamSize = DecodeInlineStream(m_pBuf.subspan(m_Pos), width, height,
189                                       decoder, std::move(pParam), dwOrigSize);
190     if (!pdfium::base::IsValueInRangeForNumericType<int>(dwStreamSize))
191       return nullptr;
192 
193     uint32_t dwSavePos = m_Pos;
194     m_Pos += dwStreamSize;
195     while (true) {
196       uint32_t dwPrevPos = m_Pos;
197       ElementType type = ParseNextElement();
198       if (type == ElementType::kEndOfData)
199         break;
200 
201       if (type != ElementType::kKeyword) {
202         dwStreamSize += m_Pos - dwPrevPos;
203         continue;
204       }
205       if (GetWord() == "EI") {
206         m_Pos = dwPrevPos;
207         break;
208       }
209       dwStreamSize += m_Pos - dwPrevPos;
210     }
211     m_Pos = dwSavePos;
212     auto src_span = m_pBuf.subspan(m_Pos, dwStreamSize);
213     data = DataVector<uint8_t>(src_span.begin(), src_span.end());
214     m_Pos += dwStreamSize;
215   }
216   pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(dwStreamSize));
217   return pdfium::MakeRetain<CPDF_Stream>(std::move(data), std::move(pDict));
218 }
219 
ParseNextElement()220 CPDF_StreamParser::ElementType CPDF_StreamParser::ParseNextElement() {
221   m_pLastObj.Reset();
222   m_WordSize = 0;
223   if (!PositionIsInBounds())
224     return ElementType::kEndOfData;
225 
226   uint8_t ch = m_pBuf[m_Pos++];
227   while (true) {
228     while (PDFCharIsWhitespace(ch)) {
229       if (!PositionIsInBounds())
230         return ElementType::kEndOfData;
231 
232       ch = m_pBuf[m_Pos++];
233     }
234 
235     if (ch != '%')
236       break;
237 
238     while (true) {
239       if (!PositionIsInBounds())
240         return ElementType::kEndOfData;
241 
242       ch = m_pBuf[m_Pos++];
243       if (PDFCharIsLineEnding(ch))
244         break;
245     }
246   }
247 
248   if (PDFCharIsDelimiter(ch) && ch != '/') {
249     m_Pos--;
250     m_pLastObj = ReadNextObject(false, false, 0);
251     return ElementType::kOther;
252   }
253 
254   bool bIsNumber = true;
255   while (true) {
256     if (m_WordSize < kMaxWordLength)
257       m_WordBuffer[m_WordSize++] = ch;
258 
259     if (!PDFCharIsNumeric(ch))
260       bIsNumber = false;
261 
262     if (!PositionIsInBounds())
263       break;
264 
265     ch = m_pBuf[m_Pos++];
266 
267     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
268       m_Pos--;
269       break;
270     }
271   }
272 
273   m_WordBuffer[m_WordSize] = 0;
274   if (bIsNumber)
275     return ElementType::kNumber;
276 
277   if (m_WordBuffer[0] == '/')
278     return ElementType::kName;
279 
280   if (m_WordSize == 4) {
281     if (GetWord() == kTrue) {
282       m_pLastObj = pdfium::MakeRetain<CPDF_Boolean>(true);
283       return ElementType::kOther;
284     }
285     if (GetWord() == kNull) {
286       m_pLastObj = pdfium::MakeRetain<CPDF_Null>();
287       return ElementType::kOther;
288     }
289   } else if (m_WordSize == 5) {
290     if (GetWord() == kFalse) {
291       m_pLastObj = pdfium::MakeRetain<CPDF_Boolean>(false);
292       return ElementType::kOther;
293     }
294   }
295   return ElementType::kKeyword;
296 }
297 
ReadNextObject(bool bAllowNestedArray,bool bInArray,uint32_t dwRecursionLevel)298 RetainPtr<CPDF_Object> CPDF_StreamParser::ReadNextObject(
299     bool bAllowNestedArray,
300     bool bInArray,
301     uint32_t dwRecursionLevel) {
302   bool bIsNumber;
303   // Must get the next word before returning to avoid infinite loops.
304   GetNextWord(bIsNumber);
305   if (!m_WordSize || dwRecursionLevel > kMaxNestedParsingLevel)
306     return nullptr;
307 
308   if (bIsNumber) {
309     m_WordBuffer[m_WordSize] = 0;
310     return pdfium::MakeRetain<CPDF_Number>(GetWord());
311   }
312 
313   int first_char = m_WordBuffer[0];
314   if (first_char == '/') {
315     ByteString name = PDF_NameDecode(GetWord().Substr(1));
316     return pdfium::MakeRetain<CPDF_Name>(m_pPool, name);
317   }
318 
319   if (first_char == '(') {
320     ByteString str = ReadString();
321     return pdfium::MakeRetain<CPDF_String>(m_pPool, str, false);
322   }
323 
324   if (first_char == '<') {
325     if (m_WordSize == 1)
326       return pdfium::MakeRetain<CPDF_String>(m_pPool, ReadHexString(), true);
327 
328     auto pDict = pdfium::MakeRetain<CPDF_Dictionary>(m_pPool);
329     while (true) {
330       GetNextWord(bIsNumber);
331       if (m_WordSize == 2 && m_WordBuffer[0] == '>')
332         break;
333 
334       if (!m_WordSize || m_WordBuffer[0] != '/')
335         return nullptr;
336 
337       ByteString key = PDF_NameDecode(GetWord().Substr(1));
338       RetainPtr<CPDF_Object> pObj =
339           ReadNextObject(true, bInArray, dwRecursionLevel + 1);
340       if (!pObj)
341         return nullptr;
342 
343       pDict->SetFor(key, std::move(pObj));
344     }
345     return pDict;
346   }
347 
348   if (first_char == '[') {
349     if ((!bAllowNestedArray && bInArray))
350       return nullptr;
351 
352     auto pArray = pdfium::MakeRetain<CPDF_Array>();
353     while (true) {
354       RetainPtr<CPDF_Object> pObj =
355           ReadNextObject(bAllowNestedArray, true, dwRecursionLevel + 1);
356       if (pObj) {
357         pArray->Append(std::move(pObj));
358         continue;
359       }
360       if (!m_WordSize || m_WordBuffer[0] == ']')
361         break;
362     }
363     return pArray;
364   }
365 
366   if (GetWord() == kFalse)
367     return pdfium::MakeRetain<CPDF_Boolean>(false);
368   if (GetWord() == kTrue)
369     return pdfium::MakeRetain<CPDF_Boolean>(true);
370   if (GetWord() == kNull)
371     return pdfium::MakeRetain<CPDF_Null>();
372   return nullptr;
373 }
374 
375 // TODO(npm): the following methods are almost identical in cpdf_syntaxparser
GetNextWord(bool & bIsNumber)376 void CPDF_StreamParser::GetNextWord(bool& bIsNumber) {
377   m_WordSize = 0;
378   bIsNumber = true;
379   if (!PositionIsInBounds())
380     return;
381 
382   uint8_t ch = m_pBuf[m_Pos++];
383   while (true) {
384     while (PDFCharIsWhitespace(ch)) {
385       if (!PositionIsInBounds()) {
386         return;
387       }
388       ch = m_pBuf[m_Pos++];
389     }
390 
391     if (ch != '%')
392       break;
393 
394     while (true) {
395       if (!PositionIsInBounds())
396         return;
397       ch = m_pBuf[m_Pos++];
398       if (PDFCharIsLineEnding(ch))
399         break;
400     }
401   }
402 
403   if (PDFCharIsDelimiter(ch)) {
404     bIsNumber = false;
405     m_WordBuffer[m_WordSize++] = ch;
406     if (ch == '/') {
407       while (true) {
408         if (!PositionIsInBounds())
409           return;
410         ch = m_pBuf[m_Pos++];
411         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
412           m_Pos--;
413           return;
414         }
415         if (m_WordSize < kMaxWordLength)
416           m_WordBuffer[m_WordSize++] = ch;
417       }
418     } else if (ch == '<') {
419       if (!PositionIsInBounds())
420         return;
421       ch = m_pBuf[m_Pos++];
422       if (ch == '<')
423         m_WordBuffer[m_WordSize++] = ch;
424       else
425         m_Pos--;
426     } else if (ch == '>') {
427       if (!PositionIsInBounds())
428         return;
429       ch = m_pBuf[m_Pos++];
430       if (ch == '>')
431         m_WordBuffer[m_WordSize++] = ch;
432       else
433         m_Pos--;
434     }
435     return;
436   }
437 
438   while (true) {
439     if (m_WordSize < kMaxWordLength)
440       m_WordBuffer[m_WordSize++] = ch;
441     if (!PDFCharIsNumeric(ch))
442       bIsNumber = false;
443     if (!PositionIsInBounds())
444       return;
445 
446     ch = m_pBuf[m_Pos++];
447     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
448       m_Pos--;
449       break;
450     }
451   }
452 }
453 
ReadString()454 ByteString CPDF_StreamParser::ReadString() {
455   if (!PositionIsInBounds())
456     return ByteString();
457 
458   ByteString buf;
459   int parlevel = 0;
460   int status = 0;
461   int iEscCode = 0;
462   uint8_t ch = m_pBuf[m_Pos++];
463   while (true) {
464     switch (status) {
465       case 0:
466         if (ch == ')') {
467           if (parlevel == 0) {
468             return buf.First(std::min(buf.GetLength(), kMaxStringLength));
469           }
470           parlevel--;
471           buf += ')';
472         } else if (ch == '(') {
473           parlevel++;
474           buf += '(';
475         } else if (ch == '\\') {
476           status = 1;
477         } else {
478           buf += static_cast<char>(ch);
479         }
480         break;
481       case 1:
482         if (FXSYS_IsOctalDigit(ch)) {
483           iEscCode = FXSYS_DecimalCharToInt(static_cast<char>(ch));
484           status = 2;
485           break;
486         }
487         if (ch == '\r') {
488           status = 4;
489           break;
490         }
491         if (ch == '\n') {
492           // Do nothing.
493         } else if (ch == 'n') {
494           buf += '\n';
495         } else if (ch == 'r') {
496           buf += '\r';
497         } else if (ch == 't') {
498           buf += '\t';
499         } else if (ch == 'b') {
500           buf += '\b';
501         } else if (ch == 'f') {
502           buf += '\f';
503         } else {
504           buf += static_cast<char>(ch);
505         }
506         status = 0;
507         break;
508       case 2:
509         if (FXSYS_IsOctalDigit(ch)) {
510           iEscCode =
511               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
512           status = 3;
513         } else {
514           buf += static_cast<char>(iEscCode);
515           status = 0;
516           continue;
517         }
518         break;
519       case 3:
520         if (FXSYS_IsOctalDigit(ch)) {
521           iEscCode =
522               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
523           buf += static_cast<char>(iEscCode);
524           status = 0;
525         } else {
526           buf += static_cast<char>(iEscCode);
527           status = 0;
528           continue;
529         }
530         break;
531       case 4:
532         status = 0;
533         if (ch != '\n')
534           continue;
535         break;
536     }
537     if (!PositionIsInBounds())
538       return buf.First(std::min(buf.GetLength(), kMaxStringLength));
539 
540     ch = m_pBuf[m_Pos++];
541   }
542 }
543 
ReadHexString()544 ByteString CPDF_StreamParser::ReadHexString() {
545   if (!PositionIsInBounds())
546     return ByteString();
547 
548   ByteString buf;
549   bool bFirst = true;
550   int code = 0;
551   while (PositionIsInBounds()) {
552     uint8_t ch = m_pBuf[m_Pos++];
553     if (ch == '>')
554       break;
555 
556     if (!isxdigit(ch))
557       continue;
558 
559     int val = FXSYS_HexCharToInt(ch);
560     if (bFirst) {
561       code = val * 16;
562     } else {
563       code += val;
564       buf += static_cast<uint8_t>(code);
565     }
566     bFirst = !bFirst;
567   }
568   if (!bFirst)
569     buf += static_cast<char>(code);
570 
571   return buf.First(std::min<size_t>(buf.GetLength(), kMaxStringLength));
572 }
573 
PositionIsInBounds() const574 bool CPDF_StreamParser::PositionIsInBounds() const {
575   return m_Pos < m_pBuf.size();
576 }
577