• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/page/cpdf_streamparser.h"
8 
9 #include <limits.h>
10 
11 #include <memory>
12 #include <utility>
13 
14 #include "core/fpdfapi/cpdf_modulemgr.h"
15 #include "core/fpdfapi/page/cpdf_docpagedata.h"
16 #include "core/fpdfapi/parser/cpdf_array.h"
17 #include "core/fpdfapi/parser/cpdf_boolean.h"
18 #include "core/fpdfapi/parser/cpdf_dictionary.h"
19 #include "core/fpdfapi/parser/cpdf_document.h"
20 #include "core/fpdfapi/parser/cpdf_name.h"
21 #include "core/fpdfapi/parser/cpdf_null.h"
22 #include "core/fpdfapi/parser/cpdf_number.h"
23 #include "core/fpdfapi/parser/cpdf_stream.h"
24 #include "core/fpdfapi/parser/cpdf_string.h"
25 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
26 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
27 #include "core/fxcodec/fx_codec.h"
28 #include "core/fxcrt/fx_ext.h"
29 
30 namespace {
31 
32 const uint32_t kMaxNestedParsingLevel = 512;
33 const uint32_t kMaxWordBuffer = 256;
34 const FX_STRSIZE kMaxStringLength = 32767;
35 
DecodeAllScanlines(std::unique_ptr<CCodec_ScanlineDecoder> pDecoder,uint8_t * & dest_buf,uint32_t & dest_size)36 uint32_t DecodeAllScanlines(std::unique_ptr<CCodec_ScanlineDecoder> pDecoder,
37                             uint8_t*& dest_buf,
38                             uint32_t& dest_size) {
39   if (!pDecoder)
40     return FX_INVALID_OFFSET;
41   int ncomps = pDecoder->CountComps();
42   int bpc = pDecoder->GetBPC();
43   int width = pDecoder->GetWidth();
44   int height = pDecoder->GetHeight();
45   int pitch = (width * ncomps * bpc + 7) / 8;
46   if (height == 0 || pitch > (1 << 30) / height)
47     return FX_INVALID_OFFSET;
48 
49   dest_buf = FX_Alloc2D(uint8_t, pitch, height);
50   dest_size = pitch * height;  // Safe since checked alloc returned.
51   for (int row = 0; row < height; row++) {
52     const uint8_t* pLine = pDecoder->GetScanline(row);
53     if (!pLine)
54       break;
55 
56     FXSYS_memcpy(dest_buf + row * pitch, pLine, pitch);
57   }
58   return pDecoder->GetSrcOffset();
59 }
60 
PDF_DecodeInlineStream(const uint8_t * src_buf,uint32_t limit,int width,int height,CFX_ByteString & decoder,CPDF_Dictionary * pParam,uint8_t * & dest_buf,uint32_t & dest_size)61 uint32_t PDF_DecodeInlineStream(const uint8_t* src_buf,
62                                 uint32_t limit,
63                                 int width,
64                                 int height,
65                                 CFX_ByteString& decoder,
66                                 CPDF_Dictionary* pParam,
67                                 uint8_t*& dest_buf,
68                                 uint32_t& dest_size) {
69   if (decoder == "CCITTFaxDecode" || decoder == "CCF") {
70     std::unique_ptr<CCodec_ScanlineDecoder> pDecoder =
71         FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam);
72     return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size);
73   }
74   if (decoder == "ASCII85Decode" || decoder == "A85")
75     return A85Decode(src_buf, limit, dest_buf, dest_size);
76   if (decoder == "ASCIIHexDecode" || decoder == "AHx")
77     return HexDecode(src_buf, limit, dest_buf, dest_size);
78   if (decoder == "FlateDecode" || decoder == "Fl") {
79     return FPDFAPI_FlateOrLZWDecode(false, src_buf, limit, pParam, dest_size,
80                                     dest_buf, dest_size);
81   }
82   if (decoder == "LZWDecode" || decoder == "LZW") {
83     return FPDFAPI_FlateOrLZWDecode(true, src_buf, limit, pParam, 0, dest_buf,
84                                     dest_size);
85   }
86   if (decoder == "DCTDecode" || decoder == "DCT") {
87     std::unique_ptr<CCodec_ScanlineDecoder> pDecoder =
88         CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder(
89             src_buf, limit, width, height, 0,
90             !pParam || pParam->GetIntegerFor("ColorTransform", 1));
91     return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size);
92   }
93   if (decoder == "RunLengthDecode" || decoder == "RL")
94     return RunLengthDecode(src_buf, limit, dest_buf, dest_size);
95   dest_size = 0;
96   dest_buf = 0;
97   return (uint32_t)-1;
98 }
99 
100 }  // namespace
101 
CPDF_StreamParser(const uint8_t * pData,uint32_t dwSize)102 CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize)
103     : m_pBuf(pData),
104       m_Size(dwSize),
105       m_Pos(0),
106       m_pPool(nullptr) {}
107 
CPDF_StreamParser(const uint8_t * pData,uint32_t dwSize,const CFX_WeakPtr<CFX_ByteStringPool> & pPool)108 CPDF_StreamParser::CPDF_StreamParser(
109     const uint8_t* pData,
110     uint32_t dwSize,
111     const CFX_WeakPtr<CFX_ByteStringPool>& pPool)
112     : m_pBuf(pData),
113       m_Size(dwSize),
114       m_Pos(0),
115       m_pPool(pPool) {}
116 
~CPDF_StreamParser()117 CPDF_StreamParser::~CPDF_StreamParser() {}
118 
ReadInlineStream(CPDF_Document * pDoc,std::unique_ptr<CPDF_Dictionary> pDict,CPDF_Object * pCSObj)119 std::unique_ptr<CPDF_Stream> CPDF_StreamParser::ReadInlineStream(
120     CPDF_Document* pDoc,
121     std::unique_ptr<CPDF_Dictionary> pDict,
122     CPDF_Object* pCSObj) {
123   if (m_Pos == m_Size)
124     return nullptr;
125 
126   if (PDFCharIsWhitespace(m_pBuf[m_Pos]))
127     m_Pos++;
128 
129   CFX_ByteString Decoder;
130   CPDF_Dictionary* pParam = nullptr;
131   CPDF_Object* pFilter = pDict->GetDirectObjectFor("Filter");
132   if (pFilter) {
133     if (CPDF_Array* pArray = pFilter->AsArray()) {
134       Decoder = pArray->GetStringAt(0);
135       CPDF_Array* pParams = pDict->GetArrayFor("DecodeParms");
136       if (pParams)
137         pParam = pParams->GetDictAt(0);
138     } else {
139       Decoder = pFilter->GetString();
140       pParam = pDict->GetDictFor("DecodeParms");
141     }
142   }
143   uint32_t width = pDict->GetIntegerFor("Width");
144   uint32_t height = pDict->GetIntegerFor("Height");
145   uint32_t OrigSize = 0;
146   if (pCSObj) {
147     uint32_t bpc = pDict->GetIntegerFor("BitsPerComponent");
148     uint32_t nComponents = 1;
149     CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj);
150     if (pCS) {
151       nComponents = pCS->CountComponents();
152       pDoc->GetPageData()->ReleaseColorSpace(pCSObj);
153     } else {
154       nComponents = 3;
155     }
156     uint32_t pitch = width;
157     if (bpc && pitch > INT_MAX / bpc)
158       return nullptr;
159 
160     pitch *= bpc;
161     if (nComponents && pitch > INT_MAX / nComponents)
162       return nullptr;
163 
164     pitch *= nComponents;
165     if (pitch > INT_MAX - 7)
166       return nullptr;
167 
168     pitch += 7;
169     pitch /= 8;
170     OrigSize = pitch;
171   } else {
172     if (width > INT_MAX - 7)
173       return nullptr;
174 
175     OrigSize = ((width + 7) / 8);
176   }
177   if (height && OrigSize > INT_MAX / height)
178     return nullptr;
179 
180   OrigSize *= height;
181   std::unique_ptr<uint8_t, FxFreeDeleter> pData;
182   uint32_t dwStreamSize;
183   if (Decoder.IsEmpty()) {
184     if (OrigSize > m_Size - m_Pos)
185       OrigSize = m_Size - m_Pos;
186     pData.reset(FX_Alloc(uint8_t, OrigSize));
187     FXSYS_memcpy(pData.get(), m_pBuf + m_Pos, OrigSize);
188     dwStreamSize = OrigSize;
189     m_Pos += OrigSize;
190   } else {
191     uint8_t* pIgnore = nullptr;
192     uint32_t dwDestSize = OrigSize;
193     dwStreamSize =
194         PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height,
195                                Decoder, pParam, pIgnore, dwDestSize);
196     FX_Free(pIgnore);
197     if (static_cast<int>(dwStreamSize) < 0)
198       return nullptr;
199 
200     uint32_t dwSavePos = m_Pos;
201     m_Pos += dwStreamSize;
202     while (1) {
203       uint32_t dwPrevPos = m_Pos;
204       CPDF_StreamParser::SyntaxType type = ParseNextElement();
205       if (type == CPDF_StreamParser::EndOfData)
206         break;
207 
208       if (type != CPDF_StreamParser::Keyword) {
209         dwStreamSize += m_Pos - dwPrevPos;
210         continue;
211       }
212       if (GetWord() == "EI") {
213         m_Pos = dwPrevPos;
214         break;
215       }
216       dwStreamSize += m_Pos - dwPrevPos;
217     }
218     m_Pos = dwSavePos;
219     pData.reset(FX_Alloc(uint8_t, dwStreamSize));
220     FXSYS_memcpy(pData.get(), m_pBuf + m_Pos, dwStreamSize);
221     m_Pos += dwStreamSize;
222   }
223   pDict->SetNewFor<CPDF_Number>("Length", (int)dwStreamSize);
224   return pdfium::MakeUnique<CPDF_Stream>(std::move(pData), dwStreamSize,
225                                          std::move(pDict));
226 }
227 
ParseNextElement()228 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() {
229   m_pLastObj.reset();
230   m_WordSize = 0;
231   if (!PositionIsInBounds())
232     return EndOfData;
233 
234   int ch = m_pBuf[m_Pos++];
235   while (1) {
236     while (PDFCharIsWhitespace(ch)) {
237       if (!PositionIsInBounds())
238         return EndOfData;
239 
240       ch = m_pBuf[m_Pos++];
241     }
242 
243     if (ch != '%')
244       break;
245 
246     while (1) {
247       if (!PositionIsInBounds())
248         return EndOfData;
249 
250       ch = m_pBuf[m_Pos++];
251       if (PDFCharIsLineEnding(ch))
252         break;
253     }
254   }
255 
256   if (PDFCharIsDelimiter(ch) && ch != '/') {
257     m_Pos--;
258     m_pLastObj = ReadNextObject(false, false, 0);
259     return Others;
260   }
261 
262   bool bIsNumber = true;
263   while (1) {
264     if (m_WordSize < kMaxWordBuffer)
265       m_WordBuffer[m_WordSize++] = ch;
266 
267     if (!PDFCharIsNumeric(ch))
268       bIsNumber = false;
269 
270     if (!PositionIsInBounds())
271       break;
272 
273     ch = m_pBuf[m_Pos++];
274 
275     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
276       m_Pos--;
277       break;
278     }
279   }
280 
281   m_WordBuffer[m_WordSize] = 0;
282   if (bIsNumber)
283     return Number;
284 
285   if (m_WordBuffer[0] == '/')
286     return Name;
287 
288   if (m_WordSize == 4) {
289     if (memcmp(m_WordBuffer, "true", 4) == 0) {
290       m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(true);
291       return Others;
292     }
293     if (memcmp(m_WordBuffer, "null", 4) == 0) {
294       m_pLastObj = pdfium::MakeUnique<CPDF_Null>();
295       return Others;
296     }
297   } else if (m_WordSize == 5) {
298     if (memcmp(m_WordBuffer, "false", 5) == 0) {
299       m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(false);
300       return Others;
301     }
302   }
303   return Keyword;
304 }
305 
ReadNextObject(bool bAllowNestedArray,bool bInArray,uint32_t dwRecursionLevel)306 std::unique_ptr<CPDF_Object> CPDF_StreamParser::ReadNextObject(
307     bool bAllowNestedArray,
308     bool bInArray,
309     uint32_t dwRecursionLevel) {
310   bool bIsNumber;
311   // Must get the next word before returning to avoid infinite loops.
312   GetNextWord(bIsNumber);
313   if (!m_WordSize || dwRecursionLevel > kMaxNestedParsingLevel)
314     return nullptr;
315 
316   if (bIsNumber) {
317     m_WordBuffer[m_WordSize] = 0;
318     return pdfium::MakeUnique<CPDF_Number>(
319         CFX_ByteStringC(m_WordBuffer, m_WordSize));
320   }
321 
322   int first_char = m_WordBuffer[0];
323   if (first_char == '/') {
324     CFX_ByteString name =
325         PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1));
326     return pdfium::MakeUnique<CPDF_Name>(m_pPool, name);
327   }
328 
329   if (first_char == '(') {
330     CFX_ByteString str = ReadString();
331     return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false);
332   }
333 
334   if (first_char == '<') {
335     if (m_WordSize == 1)
336       return pdfium::MakeUnique<CPDF_String>(m_pPool, ReadHexString(), true);
337 
338     auto pDict = pdfium::MakeUnique<CPDF_Dictionary>(m_pPool);
339     while (1) {
340       GetNextWord(bIsNumber);
341       if (m_WordSize == 2 && m_WordBuffer[0] == '>')
342         break;
343 
344       if (!m_WordSize || m_WordBuffer[0] != '/')
345         return nullptr;
346 
347       CFX_ByteString key =
348           PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1));
349       std::unique_ptr<CPDF_Object> pObj =
350           ReadNextObject(true, bInArray, dwRecursionLevel + 1);
351       if (!pObj)
352         return nullptr;
353 
354       if (!key.IsEmpty())
355         pDict->SetFor(key, std::move(pObj));
356     }
357     return std::move(pDict);
358   }
359 
360   if (first_char == '[') {
361     if ((!bAllowNestedArray && bInArray))
362       return nullptr;
363 
364     auto pArray = pdfium::MakeUnique<CPDF_Array>();
365     while (1) {
366       std::unique_ptr<CPDF_Object> pObj =
367           ReadNextObject(bAllowNestedArray, true, dwRecursionLevel + 1);
368       if (pObj) {
369         pArray->Add(std::move(pObj));
370         continue;
371       }
372       if (!m_WordSize || m_WordBuffer[0] == ']')
373         break;
374     }
375     return std::move(pArray);
376   }
377 
378   if (m_WordSize == 5 && !memcmp(m_WordBuffer, "false", 5))
379     return pdfium::MakeUnique<CPDF_Boolean>(false);
380 
381   if (m_WordSize == 4) {
382     if (memcmp(m_WordBuffer, "true", 4) == 0)
383       return pdfium::MakeUnique<CPDF_Boolean>(true);
384     if (memcmp(m_WordBuffer, "null", 4) == 0)
385       return pdfium::MakeUnique<CPDF_Null>();
386   }
387 
388   return nullptr;
389 }
390 
391 // TODO(npm): the following methods are almost identical in cpdf_syntaxparser
GetNextWord(bool & bIsNumber)392 void CPDF_StreamParser::GetNextWord(bool& bIsNumber) {
393   m_WordSize = 0;
394   bIsNumber = true;
395   if (!PositionIsInBounds())
396     return;
397 
398   int ch = m_pBuf[m_Pos++];
399   while (1) {
400     while (PDFCharIsWhitespace(ch)) {
401       if (!PositionIsInBounds()) {
402         return;
403       }
404       ch = m_pBuf[m_Pos++];
405     }
406 
407     if (ch != '%')
408       break;
409 
410     while (1) {
411       if (!PositionIsInBounds())
412         return;
413       ch = m_pBuf[m_Pos++];
414       if (PDFCharIsLineEnding(ch))
415         break;
416     }
417   }
418 
419   if (PDFCharIsDelimiter(ch)) {
420     bIsNumber = false;
421     m_WordBuffer[m_WordSize++] = ch;
422     if (ch == '/') {
423       while (1) {
424         if (!PositionIsInBounds())
425           return;
426         ch = m_pBuf[m_Pos++];
427         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
428           m_Pos--;
429           return;
430         }
431 
432         if (m_WordSize < kMaxWordBuffer)
433           m_WordBuffer[m_WordSize++] = ch;
434       }
435     } else if (ch == '<') {
436       if (!PositionIsInBounds())
437         return;
438       ch = m_pBuf[m_Pos++];
439       if (ch == '<')
440         m_WordBuffer[m_WordSize++] = ch;
441       else
442         m_Pos--;
443     } else if (ch == '>') {
444       if (!PositionIsInBounds())
445         return;
446       ch = m_pBuf[m_Pos++];
447       if (ch == '>')
448         m_WordBuffer[m_WordSize++] = ch;
449       else
450         m_Pos--;
451     }
452     return;
453   }
454 
455   while (1) {
456     if (m_WordSize < kMaxWordBuffer)
457       m_WordBuffer[m_WordSize++] = ch;
458     if (!PDFCharIsNumeric(ch))
459       bIsNumber = false;
460 
461     if (!PositionIsInBounds())
462       return;
463     ch = m_pBuf[m_Pos++];
464     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
465       m_Pos--;
466       break;
467     }
468   }
469 }
470 
ReadString()471 CFX_ByteString CPDF_StreamParser::ReadString() {
472   if (!PositionIsInBounds())
473     return CFX_ByteString();
474 
475   uint8_t ch = m_pBuf[m_Pos++];
476   CFX_ByteTextBuf buf;
477   int parlevel = 0;
478   int status = 0;
479   int iEscCode = 0;
480   while (1) {
481     switch (status) {
482       case 0:
483         if (ch == ')') {
484           if (parlevel == 0) {
485             if (buf.GetLength() > kMaxStringLength) {
486               return CFX_ByteString(buf.GetBuffer(), kMaxStringLength);
487             }
488             return buf.MakeString();
489           }
490           parlevel--;
491           buf.AppendChar(')');
492         } else if (ch == '(') {
493           parlevel++;
494           buf.AppendChar('(');
495         } else if (ch == '\\') {
496           status = 1;
497         } else {
498           buf.AppendChar((char)ch);
499         }
500         break;
501       case 1:
502         if (ch >= '0' && ch <= '7') {
503           iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
504           status = 2;
505           break;
506         }
507         if (ch == 'n') {
508           buf.AppendChar('\n');
509         } else if (ch == 'r') {
510           buf.AppendChar('\r');
511         } else if (ch == 't') {
512           buf.AppendChar('\t');
513         } else if (ch == 'b') {
514           buf.AppendChar('\b');
515         } else if (ch == 'f') {
516           buf.AppendChar('\f');
517         } else if (ch == '\r') {
518           status = 4;
519           break;
520         } else if (ch == '\n') {
521         } else {
522           buf.AppendChar(ch);
523         }
524         status = 0;
525         break;
526       case 2:
527         if (ch >= '0' && ch <= '7') {
528           iEscCode =
529               iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
530           status = 3;
531         } else {
532           buf.AppendChar(iEscCode);
533           status = 0;
534           continue;
535         }
536         break;
537       case 3:
538         if (ch >= '0' && ch <= '7') {
539           iEscCode =
540               iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
541           buf.AppendChar(iEscCode);
542           status = 0;
543         } else {
544           buf.AppendChar(iEscCode);
545           status = 0;
546           continue;
547         }
548         break;
549       case 4:
550         status = 0;
551         if (ch != '\n') {
552           continue;
553         }
554         break;
555     }
556     if (!PositionIsInBounds())
557       break;
558 
559     ch = m_pBuf[m_Pos++];
560   }
561   if (PositionIsInBounds())
562     ++m_Pos;
563 
564   if (buf.GetLength() > kMaxStringLength) {
565     return CFX_ByteString(buf.GetBuffer(), kMaxStringLength);
566   }
567   return buf.MakeString();
568 }
569 
ReadHexString()570 CFX_ByteString CPDF_StreamParser::ReadHexString() {
571   if (!PositionIsInBounds())
572     return CFX_ByteString();
573 
574   CFX_ByteTextBuf buf;
575   bool bFirst = true;
576   int code = 0;
577   while (PositionIsInBounds()) {
578     int ch = m_pBuf[m_Pos++];
579 
580     if (ch == '>')
581       break;
582 
583     if (!std::isxdigit(ch))
584       continue;
585 
586     int val = FXSYS_toHexDigit(ch);
587     if (bFirst) {
588       code = val * 16;
589     } else {
590       code += val;
591       buf.AppendByte((uint8_t)code);
592     }
593     bFirst = !bFirst;
594   }
595   if (!bFirst)
596     buf.AppendChar((char)code);
597 
598   if (buf.GetLength() > kMaxStringLength)
599     return CFX_ByteString(buf.GetBuffer(), kMaxStringLength);
600 
601   return buf.MakeString();
602 }
603 
PositionIsInBounds() const604 bool CPDF_StreamParser::PositionIsInBounds() const {
605   return m_Pos < m_Size;
606 }
607