• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
8 
9 #include <ctype.h>
10 #include <limits.h>
11 #include <stddef.h>
12 
13 #include <algorithm>
14 #include <array>
15 #include <utility>
16 
17 #include "build/build_config.h"
18 #include "constants/stream_dict_common.h"
19 #include "core/fpdfapi/parser/cpdf_array.h"
20 #include "core/fpdfapi/parser/cpdf_dictionary.h"
21 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
22 #include "core/fxcodec/data_and_bytes_consumed.h"
23 #include "core/fxcodec/fax/faxmodule.h"
24 #include "core/fxcodec/flate/flatemodule.h"
25 #include "core/fxcodec/scanlinedecoder.h"
26 #include "core/fxcrt/check.h"
27 #include "core/fxcrt/compiler_specific.h"
28 #include "core/fxcrt/containers/contains.h"
29 #include "core/fxcrt/fx_extension.h"
30 #include "core/fxcrt/fx_memcpy_wrappers.h"
31 #include "core/fxcrt/fx_safe_types.h"
32 #include "core/fxcrt/numerics/safe_conversions.h"
33 #include "core/fxcrt/span.h"
34 #include "core/fxcrt/stl_util.h"
35 #include "core/fxcrt/utf16.h"
36 
37 namespace {
38 
39 const uint32_t kMaxStreamSize = 20 * 1024 * 1024;
40 
CheckFlateDecodeParams(int Colors,int BitsPerComponent,int Columns)41 bool CheckFlateDecodeParams(int Colors, int BitsPerComponent, int Columns) {
42   if (Colors < 0 || BitsPerComponent < 0 || Columns < 0)
43     return false;
44 
45   FX_SAFE_INT32 check = Columns;
46   check *= Colors;
47   check *= BitsPerComponent;
48   if (!check.IsValid())
49     return false;
50 
51   return check.ValueOrDie() <= INT_MAX - 7;
52 }
53 
GetA85Result(uint32_t res,size_t i)54 uint8_t GetA85Result(uint32_t res, size_t i) {
55   return static_cast<uint8_t>(res >> (3 - i) * 8);
56 }
57 
58 }  // namespace
59 
60 const std::array<uint16_t, 256> kPDFDocEncoding = {
61     0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008,
62     0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011,
63     0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x02d8, 0x02c7, 0x02c6,
64     0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc, 0x0020, 0x0021, 0x0022, 0x0023,
65     0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c,
66     0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035,
67     0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e,
68     0x003f, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
69     0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050,
70     0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
71     0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062,
72     0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b,
73     0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074,
74     0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d,
75     0x007e, 0x0000, 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192,
76     0x2044, 0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018,
77     0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160, 0x0178,
78     0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, 0x0000, 0x20ac, 0x00a1,
79     0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa,
80     0x00ab, 0x00ac, 0x0000, 0x00ae, 0x00af, 0x00b0, 0x00b1, 0x00b2, 0x00b3,
81     0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc,
82     0x00bd, 0x00be, 0x00bf, 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5,
83     0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce,
84     0x00cf, 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
85     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 0x00e0,
86     0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9,
87     0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x00f0, 0x00f1, 0x00f2,
88     0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb,
89     0x00fc, 0x00fd, 0x00fe, 0x00ff};
90 
ValidateDecoderPipeline(const CPDF_Array * pDecoders)91 bool ValidateDecoderPipeline(const CPDF_Array* pDecoders) {
92   size_t count = pDecoders->size();
93   if (count == 0)
94     return true;
95 
96   for (size_t i = 0; i < count; ++i) {
97     RetainPtr<const CPDF_Object> object = pDecoders->GetDirectObjectAt(i);
98     if (!object || !object->IsName()) {
99       return false;
100     }
101   }
102 
103   if (count == 1)
104     return true;
105 
106   // TODO(thestig): Consolidate all the places that use these filter names.
107   static const char kValidDecoders[][16] = {
108       "FlateDecode",    "Fl",  "LZWDecode",       "LZW", "ASCII85Decode", "A85",
109       "ASCIIHexDecode", "AHx", "RunLengthDecode", "RL"};
110   for (size_t i = 0; i < count - 1; ++i) {
111     if (!pdfium::Contains(kValidDecoders, pDecoders->GetByteStringAt(i)))
112       return false;
113   }
114   return true;
115 }
116 
A85Decode(pdfium::span<const uint8_t> src_span)117 DataAndBytesConsumed A85Decode(pdfium::span<const uint8_t> src_span) {
118   if (src_span.empty()) {
119     return {DataVector<uint8_t>(), 0u};
120   }
121 
122   // Count legal characters and zeros.
123   uint32_t zcount = 0;
124   uint32_t pos = 0;
125   while (pos < src_span.size()) {
126     uint8_t ch = src_span[pos];
127     if (ch == 'z') {
128       zcount++;
129     } else if ((ch < '!' || ch > 'u') && !PDFCharIsLineEnding(ch) &&
130                ch != ' ' && ch != '\t') {
131       break;
132     }
133     pos++;
134   }
135   // No content to decode.
136   if (pos == 0) {
137     return {DataVector<uint8_t>(), 0u};
138   }
139 
140   // Count the space needed to contain non-zero characters. The encoding ratio
141   // of Ascii85 is 4:5.
142   uint32_t space_for_non_zeroes = (pos - zcount) / 5 * 4 + 4;
143   FX_SAFE_UINT32 size = zcount;
144   size *= 4;
145   size += space_for_non_zeroes;
146   if (!size.IsValid()) {
147     return {DataVector<uint8_t>(), FX_INVALID_OFFSET};
148   }
149 
150   DataVector<uint8_t> dest_buf(size.ValueOrDie());
151   pdfium::span<uint8_t> dest_span(dest_buf);
152   size_t state = 0;
153   uint32_t res = 0;
154   pos = 0;
155   while (pos < src_span.size()) {
156     uint8_t ch = src_span[pos++];
157     if (PDFCharIsLineEnding(ch) || ch == ' ' || ch == '\t') {
158       continue;
159     }
160 
161     if (ch == 'z') {
162       fxcrt::Fill(dest_span.first(4), 0);
163       dest_span = dest_span.subspan(4);
164       state = 0;
165       res = 0;
166       continue;
167     }
168 
169     // Check for the end or illegal character.
170     if (ch < '!' || ch > 'u') {
171       break;
172     }
173 
174     res = res * 85 + ch - 33;
175     if (state < 4) {
176       ++state;
177       continue;
178     }
179 
180     for (size_t i = 0; i < 4; ++i) {
181       dest_span.front() = GetA85Result(res, i);
182       dest_span = dest_span.subspan(1);
183     }
184     state = 0;
185     res = 0;
186   }
187   // Handle partial group.
188   if (state) {
189     for (size_t i = state; i < 5; ++i) {
190       res = res * 85 + 84;
191     }
192     for (size_t i = 0; i < state - 1; ++i) {
193       dest_span.front() = GetA85Result(res, i);
194       dest_span = dest_span.subspan(1);
195     }
196   }
197   if (pos < src_span.size() && src_span[pos] == '>') {
198     ++pos;
199   }
200   dest_buf.resize(dest_buf.size() - dest_span.size());
201   return {std::move(dest_buf), pos};
202 }
203 
HexDecode(pdfium::span<const uint8_t> src_span)204 DataAndBytesConsumed HexDecode(pdfium::span<const uint8_t> src_span) {
205   if (src_span.empty()) {
206     return {DataVector<uint8_t>(), 0u};
207   }
208 
209   uint32_t i = 0;
210   // Find the end of data.
211   while (i < src_span.size() && src_span[i] != '>') {
212     ++i;
213   }
214 
215   DataVector<uint8_t> dest_buf(i / 2 + 1);
216   pdfium::span<uint8_t> dest_span(dest_buf);
217   bool is_first = true;
218   for (i = 0; i < src_span.size(); ++i) {
219     uint8_t ch = src_span[i];
220     if (PDFCharIsLineEnding(ch) || ch == ' ' || ch == '\t') {
221       continue;
222     }
223 
224     if (ch == '>') {
225       ++i;
226       break;
227     }
228     if (!isxdigit(ch)) {
229       continue;
230     }
231 
232     int digit = FXSYS_HexCharToInt(ch);
233     if (is_first) {
234       dest_span.front() = digit * 16;
235     } else {
236       dest_span.front() += digit;
237       dest_span = dest_span.subspan(1);
238     }
239     is_first = !is_first;
240   }
241   size_t dest_size = dest_buf.size() - dest_span.size();
242   if (!is_first) {
243     ++dest_size;
244   }
245   dest_buf.resize(dest_size);
246   return {std::move(dest_buf), i};
247 }
248 
RunLengthDecode(pdfium::span<const uint8_t> src_span)249 DataAndBytesConsumed RunLengthDecode(pdfium::span<const uint8_t> src_span) {
250   uint32_t dest_size = 0;
251   size_t i = 0;
252   while (i < src_span.size()) {
253     if (src_span[i] == 128)
254       break;
255 
256     uint32_t old = dest_size;
257     if (src_span[i] < 128) {
258       dest_size += src_span[i] + 1;
259       if (dest_size < old) {
260         return {DataVector<uint8_t>(), FX_INVALID_OFFSET};
261       }
262       i += src_span[i] + 2;
263     } else {
264       dest_size += 257 - src_span[i];
265       if (dest_size < old) {
266         return {DataVector<uint8_t>(), FX_INVALID_OFFSET};
267       }
268       i += 2;
269     }
270   }
271   if (dest_size >= kMaxStreamSize) {
272     return {DataVector<uint8_t>(), FX_INVALID_OFFSET};
273   }
274 
275   DataVector<uint8_t> dest_buf(dest_size);
276   auto dest_span = pdfium::make_span(dest_buf);
277   i = 0;
278   int dest_count = 0;
279   while (i < src_span.size()) {
280     if (src_span[i] == 128)
281       break;
282 
283     if (src_span[i] < 128) {
284       uint32_t copy_len = src_span[i] + 1;
285       uint32_t buf_left = src_span.size() - i - 1;
286       if (buf_left < copy_len) {
287         uint32_t delta = copy_len - buf_left;
288         copy_len = buf_left;
289         fxcrt::Fill(dest_span.subspan(dest_count + copy_len, delta), 0);
290       }
291       auto copy_span = src_span.subspan(i + 1, copy_len);
292       fxcrt::Copy(copy_span, dest_span.subspan(dest_count));
293       dest_count += src_span[i] + 1;
294       i += src_span[i] + 2;
295     } else {
296       const uint8_t fill = i + 1 < src_span.size() ? src_span[i + 1] : 0;
297       const size_t fill_size = 257 - src_span[i];
298       fxcrt::Fill(dest_span.subspan(dest_count, fill_size), fill);
299       dest_count += fill_size;
300       i += 2;
301     }
302   }
303   return {std::move(dest_buf),
304           pdfium::checked_cast<uint32_t>(std::min(i + 1, src_span.size()))};
305 }
306 
CreateFaxDecoder(pdfium::span<const uint8_t> src_span,int width,int height,const CPDF_Dictionary * pParams)307 std::unique_ptr<ScanlineDecoder> CreateFaxDecoder(
308     pdfium::span<const uint8_t> src_span,
309     int width,
310     int height,
311     const CPDF_Dictionary* pParams) {
312   int K = 0;
313   bool EndOfLine = false;
314   bool ByteAlign = false;
315   bool BlackIs1 = false;
316   int Columns = 1728;
317   int Rows = 0;
318   if (pParams) {
319     K = pParams->GetIntegerFor("K");
320     EndOfLine = !!pParams->GetIntegerFor("EndOfLine");
321     ByteAlign = !!pParams->GetIntegerFor("EncodedByteAlign");
322     BlackIs1 = !!pParams->GetIntegerFor("BlackIs1");
323     Columns = pParams->GetIntegerFor("Columns", 1728);
324     Rows = pParams->GetIntegerFor("Rows");
325     if (Rows > USHRT_MAX)
326       Rows = 0;
327   }
328   return FaxModule::CreateDecoder(src_span, width, height, K, EndOfLine,
329                                   ByteAlign, BlackIs1, Columns, Rows);
330 }
331 
CreateFlateDecoder(pdfium::span<const uint8_t> src_span,int width,int height,int nComps,int bpc,const CPDF_Dictionary * pParams)332 std::unique_ptr<ScanlineDecoder> CreateFlateDecoder(
333     pdfium::span<const uint8_t> src_span,
334     int width,
335     int height,
336     int nComps,
337     int bpc,
338     const CPDF_Dictionary* pParams) {
339   int predictor = 0;
340   int Colors = 0;
341   int BitsPerComponent = 0;
342   int Columns = 0;
343   if (pParams) {
344     predictor = pParams->GetIntegerFor("Predictor");
345     Colors = pParams->GetIntegerFor("Colors", 1);
346     BitsPerComponent = pParams->GetIntegerFor("BitsPerComponent", 8);
347     Columns = pParams->GetIntegerFor("Columns", 1);
348     if (!CheckFlateDecodeParams(Colors, BitsPerComponent, Columns))
349       return nullptr;
350   }
351   return FlateModule::CreateDecoder(src_span, width, height, nComps, bpc,
352                                     predictor, Colors, BitsPerComponent,
353                                     Columns);
354 }
355 
FlateOrLZWDecode(bool use_lzw,pdfium::span<const uint8_t> src_span,const CPDF_Dictionary * pParams,uint32_t estimated_size)356 DataAndBytesConsumed FlateOrLZWDecode(bool use_lzw,
357                                       pdfium::span<const uint8_t> src_span,
358                                       const CPDF_Dictionary* pParams,
359                                       uint32_t estimated_size) {
360   int predictor = 0;
361   int Colors = 0;
362   int BitsPerComponent = 0;
363   int Columns = 0;
364   bool bEarlyChange = true;
365   if (pParams) {
366     predictor = pParams->GetIntegerFor("Predictor");
367     bEarlyChange = !!pParams->GetIntegerFor("EarlyChange", 1);
368     Colors = pParams->GetIntegerFor("Colors", 1);
369     BitsPerComponent = pParams->GetIntegerFor("BitsPerComponent", 8);
370     Columns = pParams->GetIntegerFor("Columns", 1);
371     if (!CheckFlateDecodeParams(Colors, BitsPerComponent, Columns))
372       return {DataVector<uint8_t>(), FX_INVALID_OFFSET};
373   }
374   return FlateModule::FlateOrLZWDecode(use_lzw, src_span, bEarlyChange,
375                                        predictor, Colors, BitsPerComponent,
376                                        Columns, estimated_size);
377 }
378 
GetDecoderArray(RetainPtr<const CPDF_Dictionary> pDict)379 std::optional<DecoderArray> GetDecoderArray(
380     RetainPtr<const CPDF_Dictionary> pDict) {
381   RetainPtr<const CPDF_Object> pFilter = pDict->GetDirectObjectFor("Filter");
382   if (!pFilter)
383     return DecoderArray();
384 
385   if (!pFilter->IsArray() && !pFilter->IsName())
386     return std::nullopt;
387 
388   RetainPtr<const CPDF_Object> pParams =
389       pDict->GetDirectObjectFor(pdfium::stream::kDecodeParms);
390 
391   DecoderArray decoder_array;
392   if (const CPDF_Array* pDecoders = pFilter->AsArray()) {
393     if (!ValidateDecoderPipeline(pDecoders))
394       return std::nullopt;
395 
396     RetainPtr<const CPDF_Array> pParamsArray = ToArray(pParams);
397     for (size_t i = 0; i < pDecoders->size(); ++i) {
398       decoder_array.emplace_back(
399           pDecoders->GetByteStringAt(i),
400           pParamsArray ? pParamsArray->GetDictAt(i) : nullptr);
401     }
402   } else {
403     DCHECK(pFilter->IsName());
404     decoder_array.emplace_back(pFilter->GetString(),
405                                pParams ? pParams->GetDict() : nullptr);
406   }
407 
408   return decoder_array;
409 }
410 
411 PDFDataDecodeResult::PDFDataDecodeResult() = default;
412 
PDFDataDecodeResult(DataVector<uint8_t> data,ByteString image_encoding,RetainPtr<const CPDF_Dictionary> image_params)413 PDFDataDecodeResult::PDFDataDecodeResult(
414     DataVector<uint8_t> data,
415     ByteString image_encoding,
416     RetainPtr<const CPDF_Dictionary> image_params)
417     : data(std::move(data)),
418       image_encoding(std::move(image_encoding)),
419       image_params(std::move(image_params)) {}
420 
421 PDFDataDecodeResult::PDFDataDecodeResult(PDFDataDecodeResult&& that) noexcept =
422     default;
423 
424 PDFDataDecodeResult& PDFDataDecodeResult::operator=(
425     PDFDataDecodeResult&& that) noexcept = default;
426 
427 PDFDataDecodeResult::~PDFDataDecodeResult() = default;
428 
PDF_DataDecode(pdfium::span<const uint8_t> src_span,uint32_t last_estimated_size,bool bImageAcc,const DecoderArray & decoder_array)429 std::optional<PDFDataDecodeResult> PDF_DataDecode(
430     pdfium::span<const uint8_t> src_span,
431     uint32_t last_estimated_size,
432     bool bImageAcc,
433     const DecoderArray& decoder_array) {
434   PDFDataDecodeResult result;
435   // May be changed to point to `result.data` in the for-loop below. So put it
436   // below `result` and let it get destroyed first.
437   pdfium::span<const uint8_t> last_span = src_span;
438   const size_t nSize = decoder_array.size();
439   for (size_t i = 0; i < nSize; ++i) {
440     int estimated_size = i == nSize - 1 ? last_estimated_size : 0;
441     ByteString decoder = decoder_array[i].first;
442     RetainPtr<const CPDF_Dictionary> pParam =
443         ToDictionary(decoder_array[i].second);
444     DataVector<uint8_t> new_buf;
445     uint32_t bytes_consumed = FX_INVALID_OFFSET;
446     if (decoder == "Crypt")
447       continue;
448     if (decoder == "FlateDecode" || decoder == "Fl") {
449       if (bImageAcc && i == nSize - 1) {
450         result.image_encoding = "FlateDecode";
451         result.image_params = std::move(pParam);
452         return result;
453       }
454       DataAndBytesConsumed decode_result = FlateOrLZWDecode(
455           /*use_lzw=*/false, last_span, pParam, estimated_size);
456       new_buf = std::move(decode_result.data);
457       bytes_consumed = decode_result.bytes_consumed;
458     } else if (decoder == "LZWDecode" || decoder == "LZW") {
459       DataAndBytesConsumed decode_result =
460           FlateOrLZWDecode(/*use_lzw=*/true, last_span, pParam, estimated_size);
461       new_buf = std::move(decode_result.data);
462       bytes_consumed = decode_result.bytes_consumed;
463     } else if (decoder == "ASCII85Decode" || decoder == "A85") {
464       DataAndBytesConsumed decode_result = A85Decode(last_span);
465       new_buf = std::move(decode_result.data);
466       bytes_consumed = decode_result.bytes_consumed;
467     } else if (decoder == "ASCIIHexDecode" || decoder == "AHx") {
468       DataAndBytesConsumed decode_result = HexDecode(last_span);
469       new_buf = std::move(decode_result.data);
470       bytes_consumed = decode_result.bytes_consumed;
471     } else if (decoder == "RunLengthDecode" || decoder == "RL") {
472       if (bImageAcc && i == nSize - 1) {
473         result.image_encoding = "RunLengthDecode";
474         result.image_params = std::move(pParam);
475         return result;
476       }
477       DataAndBytesConsumed decode_result = RunLengthDecode(last_span);
478       new_buf = std::move(decode_result.data);
479       bytes_consumed = decode_result.bytes_consumed;
480     } else {
481       // If we get here, assume it's an image decoder.
482       if (decoder == "DCT") {
483         decoder = "DCTDecode";
484       } else if (decoder == "CCF") {
485         decoder = "CCITTFaxDecode";
486       }
487       result.image_encoding = std::move(decoder);
488       result.image_params = std::move(pParam);
489       return result;
490     }
491     if (bytes_consumed == FX_INVALID_OFFSET) {
492       return std::nullopt;
493     }
494 
495     last_span = pdfium::make_span(new_buf);
496     result.data = std::move(new_buf);
497   }
498 
499   result.image_encoding.clear();
500   result.image_params = nullptr;
501   return result;
502 }
503 
StripLanguageCodes(pdfium::span<wchar_t> s,size_t n)504 static size_t StripLanguageCodes(pdfium::span<wchar_t> s, size_t n) {
505   size_t dest_pos = 0;
506   for (size_t i = 0; i < n; ++i) {
507     // 0x001B is a begin/end marker for language metadata region that
508     // should not be in the decoded text.
509     if (s[i] == 0x001B) {
510       for (++i; i < n && s[i] != 0x001B; ++i) {
511         // No for-loop body. The loop searches for the terminating 0x001B.
512       }
513       continue;
514     }
515     s[dest_pos++] = s[i];
516   }
517   return dest_pos;
518 }
519 
PDF_DecodeText(pdfium::span<const uint8_t> span)520 WideString PDF_DecodeText(pdfium::span<const uint8_t> span) {
521   size_t dest_pos = 0;
522   WideString result;
523   if (span.size() >= 2 && ((span[0] == 0xfe && span[1] == 0xff) ||
524                            (span[0] == 0xff && span[1] == 0xfe))) {
525     if (span[0] == 0xfe) {
526       result = WideString::FromUTF16BE(span.subspan(2));
527     } else {
528       result = WideString::FromUTF16LE(span.subspan(2));
529     }
530     pdfium::span<wchar_t> dest_buf = result.GetBuffer(result.GetLength());
531     dest_pos = StripLanguageCodes(dest_buf, result.GetLength());
532   } else if (span.size() >= 3 && span[0] == 0xef && span[1] == 0xbb &&
533              span[2] == 0xbf) {
534     result = WideString::FromUTF8(ByteStringView(span.subspan(3)));
535     pdfium::span<wchar_t> dest_buf = result.GetBuffer(result.GetLength());
536     dest_pos = StripLanguageCodes(dest_buf, result.GetLength());
537   } else {
538     pdfium::span<wchar_t> dest_buf = result.GetBuffer(span.size());
539     for (size_t i = 0; i < span.size(); ++i)
540       dest_buf[i] = kPDFDocEncoding[span[i]];
541     dest_pos = span.size();
542   }
543   result.ReleaseBuffer(dest_pos);
544   return result;
545 }
546 
PDF_EncodeText(WideStringView str)547 ByteString PDF_EncodeText(WideStringView str) {
548   size_t i = 0;
549   size_t len = str.GetLength();
550   ByteString result;
551   {
552     pdfium::span<char> dest_buf = result.GetBuffer(len);
553     for (i = 0; i < len; ++i) {
554       int code;
555       for (code = 0; code < 256; ++code) {
556         if (kPDFDocEncoding[code] == str[i])
557           break;
558       }
559       if (code == 256)
560         break;
561 
562       dest_buf[i] = code;
563     }
564   }
565   result.ReleaseBuffer(i);
566   if (i == len)
567     return result;
568 
569   if (len > INT_MAX / 2 - 1) {
570     result.ReleaseBuffer(0);
571     return result;
572   }
573 
574   size_t dest_index = 0;
575   {
576     std::u16string utf16 = FX_UTF16Encode(str);
577     // 2 bytes required per UTF-16 code unit.
578     pdfium::span<uint8_t> dest_buf =
579         pdfium::as_writable_bytes(result.GetBuffer(utf16.size() * 2 + 2));
580 
581     dest_buf[dest_index++] = 0xfe;
582     dest_buf[dest_index++] = 0xff;
583     for (char16_t code_unit : utf16) {
584       dest_buf[dest_index++] = code_unit >> 8;
585       dest_buf[dest_index++] = static_cast<uint8_t>(code_unit);
586     }
587   }
588   result.ReleaseBuffer(dest_index);
589   return result;
590 }
591 
PDF_EncodeString(ByteStringView src)592 ByteString PDF_EncodeString(ByteStringView src) {
593   ByteString result;
594   result.Reserve(src.GetLength() + 2);
595   result += '(';
596   for (size_t i = 0; i < src.GetLength(); ++i) {
597     uint8_t ch = src[i];
598     if (ch == 0x0a) {
599       result += "\\n";
600       continue;
601     }
602     if (ch == 0x0d) {
603       result += "\\r";
604       continue;
605     }
606     if (ch == ')' || ch == '\\' || ch == '(')
607       result += '\\';
608     result += static_cast<char>(ch);
609   }
610   result += ')';
611   return result;
612 }
613 
PDF_HexEncodeString(ByteStringView src)614 ByteString PDF_HexEncodeString(ByteStringView src) {
615   ByteString result;
616   result.Reserve(2 * src.GetLength() + 2);
617   result += '<';
618   for (size_t i = 0; i < src.GetLength(); ++i) {
619     char buf[2];
620     FXSYS_IntToTwoHexChars(src[i], buf);
621     result += buf[0];
622     result += buf[1];
623   }
624   result += '>';
625   return result;
626 }
627