• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/page/cpdf_streamparser.h"
8 
9 #include <ctype.h>
10 
11 #include <algorithm>
12 #include <memory>
13 #include <utility>
14 
15 #include "constants/stream_dict_common.h"
16 #include "core/fpdfapi/page/cpdf_docpagedata.h"
17 #include "core/fpdfapi/parser/cpdf_array.h"
18 #include "core/fpdfapi/parser/cpdf_boolean.h"
19 #include "core/fpdfapi/parser/cpdf_dictionary.h"
20 #include "core/fpdfapi/parser/cpdf_name.h"
21 #include "core/fpdfapi/parser/cpdf_null.h"
22 #include "core/fpdfapi/parser/cpdf_number.h"
23 #include "core/fpdfapi/parser/cpdf_stream.h"
24 #include "core/fpdfapi/parser/cpdf_string.h"
25 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
26 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
27 #include "core/fxcodec/data_and_bytes_consumed.h"
28 #include "core/fxcodec/jpeg/jpegmodule.h"
29 #include "core/fxcodec/scanlinedecoder.h"
30 #include "core/fxcrt/check.h"
31 #include "core/fxcrt/data_vector.h"
32 #include "core/fxcrt/fx_extension.h"
33 #include "core/fxcrt/fx_safe_types.h"
34 #include "core/fxcrt/span_util.h"
35 #include "core/fxge/calculate_pitch.h"
36 
37 namespace {
38 
39 const uint32_t kMaxNestedParsingLevel = 512;
40 const size_t kMaxStringLength = 32767;
41 
42 const char kTrue[] = "true";
43 const char kFalse[] = "false";
44 const char kNull[] = "null";
45 
DecodeAllScanlines(std::unique_ptr<ScanlineDecoder> pDecoder)46 uint32_t DecodeAllScanlines(std::unique_ptr<ScanlineDecoder> pDecoder) {
47   if (!pDecoder)
48     return FX_INVALID_OFFSET;
49 
50   int ncomps = pDecoder->CountComps();
51   int bpc = pDecoder->GetBPC();
52   int width = pDecoder->GetWidth();
53   int height = pDecoder->GetHeight();
54   if (width <= 0 || height <= 0)
55     return FX_INVALID_OFFSET;
56 
57   std::optional<uint32_t> maybe_size =
58       fxge::CalculatePitch8(bpc, ncomps, width);
59   if (!maybe_size.has_value())
60     return FX_INVALID_OFFSET;
61 
62   FX_SAFE_UINT32 size = maybe_size.value();
63   size *= height;
64   if (size.ValueOrDefault(0) == 0)
65     return FX_INVALID_OFFSET;
66 
67   for (int row = 0; row < height; ++row) {
68     if (pDecoder->GetScanline(row).empty())
69       break;
70   }
71   return pDecoder->GetSrcOffset();
72 }
73 
DecodeInlineStream(pdfium::span<const uint8_t> src_span,int width,int height,const ByteString & decoder,RetainPtr<const CPDF_Dictionary> pParam,uint32_t orig_size)74 uint32_t DecodeInlineStream(pdfium::span<const uint8_t> src_span,
75                             int width,
76                             int height,
77                             const ByteString& decoder,
78                             RetainPtr<const CPDF_Dictionary> pParam,
79                             uint32_t orig_size) {
80   // |decoder| should not be an abbreviation.
81   DCHECK(decoder != "A85");
82   DCHECK(decoder != "AHx");
83   DCHECK(decoder != "CCF");
84   DCHECK(decoder != "DCT");
85   DCHECK(decoder != "Fl");
86   DCHECK(decoder != "LZW");
87   DCHECK(decoder != "RL");
88 
89   if (decoder == "FlateDecode") {
90     return FlateOrLZWDecode(/*use_lzw=*/false, src_span, pParam.Get(),
91                             /*estimated_size=*/orig_size)
92         .bytes_consumed;
93   }
94   if (decoder == "LZWDecode") {
95     return FlateOrLZWDecode(
96                /*use_lzw=*/true, src_span, pParam.Get(),
97                /*estimated_size=*/0)
98         .bytes_consumed;
99   }
100   if (decoder == "DCTDecode") {
101     std::unique_ptr<ScanlineDecoder> pDecoder = JpegModule::CreateDecoder(
102         src_span, width, height, 0,
103         !pParam || pParam->GetIntegerFor("ColorTransform", 1));
104     return DecodeAllScanlines(std::move(pDecoder));
105   }
106   if (decoder == "CCITTFaxDecode") {
107     std::unique_ptr<ScanlineDecoder> pDecoder =
108         CreateFaxDecoder(src_span, width, height, pParam.Get());
109     return DecodeAllScanlines(std::move(pDecoder));
110   }
111 
112   if (decoder == "ASCII85Decode") {
113     return A85Decode(src_span).bytes_consumed;
114   }
115   if (decoder == "ASCIIHexDecode") {
116     return HexDecode(src_span).bytes_consumed;
117   }
118   if (decoder == "RunLengthDecode") {
119     return RunLengthDecode(src_span).bytes_consumed;
120   }
121 
122   return FX_INVALID_OFFSET;
123 }
124 
125 }  // namespace
126 
CPDF_StreamParser(pdfium::span<const uint8_t> span)127 CPDF_StreamParser::CPDF_StreamParser(pdfium::span<const uint8_t> span)
128     : m_pBuf(span) {}
129 
CPDF_StreamParser(pdfium::span<const uint8_t> span,const WeakPtr<ByteStringPool> & pPool)130 CPDF_StreamParser::CPDF_StreamParser(pdfium::span<const uint8_t> span,
131                                      const WeakPtr<ByteStringPool>& pPool)
132     : m_pPool(pPool), m_pBuf(span) {}
133 
134 CPDF_StreamParser::~CPDF_StreamParser() = default;
135 
ReadInlineStream(CPDF_Document * pDoc,RetainPtr<CPDF_Dictionary> pDict,const CPDF_Object * pCSObj)136 RetainPtr<CPDF_Stream> CPDF_StreamParser::ReadInlineStream(
137     CPDF_Document* pDoc,
138     RetainPtr<CPDF_Dictionary> pDict,
139     const CPDF_Object* pCSObj) {
140   if (m_Pos < m_pBuf.size() && PDFCharIsWhitespace(m_pBuf[m_Pos]))
141     m_Pos++;
142 
143   if (m_Pos == m_pBuf.size())
144     return nullptr;
145 
146   ByteString decoder;
147   RetainPtr<const CPDF_Dictionary> pParam;
148   RetainPtr<const CPDF_Object> pFilter = pDict->GetDirectObjectFor("Filter");
149   if (pFilter) {
150     const CPDF_Array* pArray = pFilter->AsArray();
151     if (pArray) {
152       decoder = pArray->GetByteStringAt(0);
153       RetainPtr<const CPDF_Array> pParams =
154           pDict->GetArrayFor(pdfium::stream::kDecodeParms);
155       if (pParams)
156         pParam = pParams->GetDictAt(0);
157     } else {
158       decoder = pFilter->GetString();
159       pParam = pDict->GetDictFor(pdfium::stream::kDecodeParms);
160     }
161   }
162   uint32_t width = pDict->GetIntegerFor("Width");
163   uint32_t height = pDict->GetIntegerFor("Height");
164   uint32_t bpc = 1;
165   uint32_t nComponents = 1;
166   if (pCSObj) {
167     RetainPtr<CPDF_ColorSpace> pCS =
168         CPDF_DocPageData::FromDocument(pDoc)->GetColorSpace(pCSObj, nullptr);
169     nComponents = pCS ? pCS->ComponentCount() : 3;
170     bpc = pDict->GetIntegerFor("BitsPerComponent");
171   }
172   std::optional<uint32_t> maybe_size =
173       fxge::CalculatePitch8(bpc, nComponents, width);
174   if (!maybe_size.has_value())
175     return nullptr;
176 
177   FX_SAFE_UINT32 size = maybe_size.value();
178   size *= height;
179   if (!size.IsValid())
180     return nullptr;
181 
182   uint32_t dwOrigSize = size.ValueOrDie();
183   DataVector<uint8_t> data;
184   uint32_t dwStreamSize;
185   if (decoder.IsEmpty()) {
186     dwOrigSize = std::min<uint32_t>(dwOrigSize, m_pBuf.size() - m_Pos);
187     auto src_span = m_pBuf.subspan(m_Pos, dwOrigSize);
188     data = DataVector<uint8_t>(src_span.begin(), src_span.end());
189     dwStreamSize = dwOrigSize;
190     m_Pos += dwOrigSize;
191   } else {
192     dwStreamSize = DecodeInlineStream(m_pBuf.subspan(m_Pos), width, height,
193                                       decoder, std::move(pParam), dwOrigSize);
194     if (!pdfium::IsValueInRangeForNumericType<int>(dwStreamSize)) {
195       return nullptr;
196     }
197 
198     uint32_t dwSavePos = m_Pos;
199     m_Pos += dwStreamSize;
200     while (true) {
201       uint32_t dwPrevPos = m_Pos;
202       ElementType type = ParseNextElement();
203       if (type == ElementType::kEndOfData)
204         break;
205 
206       if (type != ElementType::kKeyword) {
207         dwStreamSize += m_Pos - dwPrevPos;
208         continue;
209       }
210       if (GetWord() == "EI") {
211         m_Pos = dwPrevPos;
212         break;
213       }
214       dwStreamSize += m_Pos - dwPrevPos;
215     }
216     m_Pos = dwSavePos;
217     auto src_span = m_pBuf.subspan(m_Pos, dwStreamSize);
218     data = DataVector<uint8_t>(src_span.begin(), src_span.end());
219     m_Pos += dwStreamSize;
220   }
221   pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(dwStreamSize));
222   return pdfium::MakeRetain<CPDF_Stream>(std::move(data), std::move(pDict));
223 }
224 
ParseNextElement()225 CPDF_StreamParser::ElementType CPDF_StreamParser::ParseNextElement() {
226   m_pLastObj.Reset();
227   m_WordSize = 0;
228   if (!PositionIsInBounds())
229     return ElementType::kEndOfData;
230 
231   uint8_t ch = m_pBuf[m_Pos++];
232   while (true) {
233     while (PDFCharIsWhitespace(ch)) {
234       if (!PositionIsInBounds())
235         return ElementType::kEndOfData;
236 
237       ch = m_pBuf[m_Pos++];
238     }
239 
240     if (ch != '%')
241       break;
242 
243     while (true) {
244       if (!PositionIsInBounds())
245         return ElementType::kEndOfData;
246 
247       ch = m_pBuf[m_Pos++];
248       if (PDFCharIsLineEnding(ch))
249         break;
250     }
251   }
252 
253   if (PDFCharIsDelimiter(ch) && ch != '/') {
254     m_Pos--;
255     m_pLastObj = ReadNextObject(false, false, 0);
256     return ElementType::kOther;
257   }
258 
259   bool bIsNumber = true;
260   while (true) {
261     if (m_WordSize < kMaxWordLength)
262       m_WordBuffer[m_WordSize++] = ch;
263 
264     if (!PDFCharIsNumeric(ch))
265       bIsNumber = false;
266 
267     if (!PositionIsInBounds())
268       break;
269 
270     ch = m_pBuf[m_Pos++];
271 
272     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
273       m_Pos--;
274       break;
275     }
276   }
277 
278   m_WordBuffer[m_WordSize] = 0;
279   if (bIsNumber)
280     return ElementType::kNumber;
281 
282   if (m_WordBuffer[0] == '/')
283     return ElementType::kName;
284 
285   if (m_WordSize == 4) {
286     if (GetWord() == kTrue) {
287       m_pLastObj = pdfium::MakeRetain<CPDF_Boolean>(true);
288       return ElementType::kOther;
289     }
290     if (GetWord() == kNull) {
291       m_pLastObj = pdfium::MakeRetain<CPDF_Null>();
292       return ElementType::kOther;
293     }
294   } else if (m_WordSize == 5) {
295     if (GetWord() == kFalse) {
296       m_pLastObj = pdfium::MakeRetain<CPDF_Boolean>(false);
297       return ElementType::kOther;
298     }
299   }
300   return ElementType::kKeyword;
301 }
302 
ReadNextObject(bool bAllowNestedArray,bool bInArray,uint32_t dwRecursionLevel)303 RetainPtr<CPDF_Object> CPDF_StreamParser::ReadNextObject(
304     bool bAllowNestedArray,
305     bool bInArray,
306     uint32_t dwRecursionLevel) {
307   bool bIsNumber;
308   // Must get the next word before returning to avoid infinite loops.
309   GetNextWord(bIsNumber);
310   if (!m_WordSize || dwRecursionLevel > kMaxNestedParsingLevel)
311     return nullptr;
312 
313   if (bIsNumber) {
314     m_WordBuffer[m_WordSize] = 0;
315     return pdfium::MakeRetain<CPDF_Number>(GetWord());
316   }
317 
318   int first_char = m_WordBuffer[0];
319   if (first_char == '/') {
320     ByteString name = PDF_NameDecode(GetWord().Substr(1));
321     return pdfium::MakeRetain<CPDF_Name>(m_pPool, name);
322   }
323 
324   if (first_char == '(') {
325     return pdfium::MakeRetain<CPDF_String>(m_pPool, ReadString());
326   }
327 
328   if (first_char == '<') {
329     if (m_WordSize == 1) {
330       return pdfium::MakeRetain<CPDF_String>(m_pPool, ReadHexString(),
331                                              CPDF_String::DataType::kIsHex);
332     }
333 
334     auto pDict = pdfium::MakeRetain<CPDF_Dictionary>(m_pPool);
335     while (true) {
336       GetNextWord(bIsNumber);
337       if (m_WordSize == 2 && m_WordBuffer[0] == '>')
338         break;
339 
340       if (!m_WordSize || m_WordBuffer[0] != '/')
341         return nullptr;
342 
343       ByteString key = PDF_NameDecode(GetWord().Substr(1));
344       RetainPtr<CPDF_Object> pObj =
345           ReadNextObject(true, bInArray, dwRecursionLevel + 1);
346       if (!pObj)
347         return nullptr;
348 
349       pDict->SetFor(key, std::move(pObj));
350     }
351     return pDict;
352   }
353 
354   if (first_char == '[') {
355     if ((!bAllowNestedArray && bInArray))
356       return nullptr;
357 
358     auto pArray = pdfium::MakeRetain<CPDF_Array>();
359     while (true) {
360       RetainPtr<CPDF_Object> pObj =
361           ReadNextObject(bAllowNestedArray, true, dwRecursionLevel + 1);
362       if (pObj) {
363         pArray->Append(std::move(pObj));
364         continue;
365       }
366       if (!m_WordSize || m_WordBuffer[0] == ']')
367         break;
368     }
369     return pArray;
370   }
371 
372   if (GetWord() == kFalse)
373     return pdfium::MakeRetain<CPDF_Boolean>(false);
374   if (GetWord() == kTrue)
375     return pdfium::MakeRetain<CPDF_Boolean>(true);
376   if (GetWord() == kNull)
377     return pdfium::MakeRetain<CPDF_Null>();
378   return nullptr;
379 }
380 
381 // TODO(npm): the following methods are almost identical in cpdf_syntaxparser
GetNextWord(bool & bIsNumber)382 void CPDF_StreamParser::GetNextWord(bool& bIsNumber) {
383   m_WordSize = 0;
384   bIsNumber = true;
385   if (!PositionIsInBounds())
386     return;
387 
388   uint8_t ch = m_pBuf[m_Pos++];
389   while (true) {
390     while (PDFCharIsWhitespace(ch)) {
391       if (!PositionIsInBounds()) {
392         return;
393       }
394       ch = m_pBuf[m_Pos++];
395     }
396 
397     if (ch != '%')
398       break;
399 
400     while (true) {
401       if (!PositionIsInBounds())
402         return;
403       ch = m_pBuf[m_Pos++];
404       if (PDFCharIsLineEnding(ch))
405         break;
406     }
407   }
408 
409   if (PDFCharIsDelimiter(ch)) {
410     bIsNumber = false;
411     m_WordBuffer[m_WordSize++] = ch;
412     if (ch == '/') {
413       while (true) {
414         if (!PositionIsInBounds())
415           return;
416         ch = m_pBuf[m_Pos++];
417         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
418           m_Pos--;
419           return;
420         }
421         if (m_WordSize < kMaxWordLength)
422           m_WordBuffer[m_WordSize++] = ch;
423       }
424     } else if (ch == '<') {
425       if (!PositionIsInBounds())
426         return;
427       ch = m_pBuf[m_Pos++];
428       if (ch == '<')
429         m_WordBuffer[m_WordSize++] = ch;
430       else
431         m_Pos--;
432     } else if (ch == '>') {
433       if (!PositionIsInBounds())
434         return;
435       ch = m_pBuf[m_Pos++];
436       if (ch == '>')
437         m_WordBuffer[m_WordSize++] = ch;
438       else
439         m_Pos--;
440     }
441     return;
442   }
443 
444   while (true) {
445     if (m_WordSize < kMaxWordLength)
446       m_WordBuffer[m_WordSize++] = ch;
447     if (!PDFCharIsNumeric(ch))
448       bIsNumber = false;
449     if (!PositionIsInBounds())
450       return;
451 
452     ch = m_pBuf[m_Pos++];
453     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
454       m_Pos--;
455       break;
456     }
457   }
458 }
459 
ReadString()460 ByteString CPDF_StreamParser::ReadString() {
461   if (!PositionIsInBounds())
462     return ByteString();
463 
464   ByteString buf;
465   int parlevel = 0;
466   int status = 0;
467   int iEscCode = 0;
468   uint8_t ch = m_pBuf[m_Pos++];
469   while (true) {
470     switch (status) {
471       case 0:
472         if (ch == ')') {
473           if (parlevel == 0) {
474             return buf.First(std::min(buf.GetLength(), kMaxStringLength));
475           }
476           parlevel--;
477           buf += ')';
478         } else if (ch == '(') {
479           parlevel++;
480           buf += '(';
481         } else if (ch == '\\') {
482           status = 1;
483         } else {
484           buf += static_cast<char>(ch);
485         }
486         break;
487       case 1:
488         if (FXSYS_IsOctalDigit(ch)) {
489           iEscCode = FXSYS_DecimalCharToInt(static_cast<char>(ch));
490           status = 2;
491           break;
492         }
493         if (ch == '\r') {
494           status = 4;
495           break;
496         }
497         if (ch == '\n') {
498           // Do nothing.
499         } else if (ch == 'n') {
500           buf += '\n';
501         } else if (ch == 'r') {
502           buf += '\r';
503         } else if (ch == 't') {
504           buf += '\t';
505         } else if (ch == 'b') {
506           buf += '\b';
507         } else if (ch == 'f') {
508           buf += '\f';
509         } else {
510           buf += static_cast<char>(ch);
511         }
512         status = 0;
513         break;
514       case 2:
515         if (FXSYS_IsOctalDigit(ch)) {
516           iEscCode =
517               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
518           status = 3;
519         } else {
520           buf += static_cast<char>(iEscCode);
521           status = 0;
522           continue;
523         }
524         break;
525       case 3:
526         if (FXSYS_IsOctalDigit(ch)) {
527           iEscCode =
528               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
529           buf += static_cast<char>(iEscCode);
530           status = 0;
531         } else {
532           buf += static_cast<char>(iEscCode);
533           status = 0;
534           continue;
535         }
536         break;
537       case 4:
538         status = 0;
539         if (ch != '\n')
540           continue;
541         break;
542     }
543     if (!PositionIsInBounds())
544       return buf.First(std::min(buf.GetLength(), kMaxStringLength));
545 
546     ch = m_pBuf[m_Pos++];
547   }
548 }
549 
ReadHexString()550 DataVector<uint8_t> CPDF_StreamParser::ReadHexString() {
551   if (!PositionIsInBounds()) {
552     return DataVector<uint8_t>();
553   }
554 
555   // TODO(thestig): Deduplicate CPDF_SyntaxParser::ReadHexString()?
556   DataVector<uint8_t> buf;
557   bool bFirst = true;
558   uint8_t code = 0;
559   while (PositionIsInBounds()) {
560     uint8_t ch = m_pBuf[m_Pos++];
561     if (ch == '>')
562       break;
563 
564     if (!isxdigit(ch))
565       continue;
566 
567     int val = FXSYS_HexCharToInt(ch);
568     if (bFirst) {
569       code = val * 16;
570     } else {
571       code += val;
572       buf.push_back(code);
573     }
574     bFirst = !bFirst;
575   }
576   if (!bFirst) {
577     buf.push_back(code);
578   }
579 
580   if (buf.size() > kMaxStringLength) {
581     buf.resize(kMaxStringLength);
582   }
583   return buf;
584 }
585 
PositionIsInBounds() const586 bool CPDF_StreamParser::PositionIsInBounds() const {
587   return m_Pos < m_pBuf.size();
588 }
589