• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
6 
7 #include <stddef.h>
8 #include <stdint.h>
9 
10 #include <iterator>
11 
12 #include "core/fpdfapi/parser/cpdf_array.h"
13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/parser/cpdf_indirect_object_holder.h"
15 #include "core/fpdfapi/parser/cpdf_name.h"
16 #include "core/fpdfapi/parser/cpdf_reference.h"
17 #include "core/fpdfapi/parser/cpdf_string.h"
18 #include "core/fxcrt/bytestring.h"
19 #include "core/fxcrt/fx_memory_wrappers.h"
20 #include "core/fxcrt/string_view_template.h"
21 #include "core/fxcrt/widestring.h"
22 #include "testing/gtest/include/gtest/gtest.h"
23 #include "testing/test_support.h"
24 #include "third_party/base/containers/span.h"
25 
26 namespace {
27 
28 // Converts a string literal into a `uint8_t` span.
29 template <size_t N>
ToSpan(const char (& array)[N])30 pdfium::span<const uint8_t> ToSpan(const char (&array)[N]) {
31   return pdfium::span(reinterpret_cast<const uint8_t*>(array), N - 1);
32 }
33 
34 // Converts a string literal into a `ByteString`.
35 template <size_t N>
ToByteString(const char (& array)[N])36 ByteString ToByteString(const char (&array)[N]) {
37   return ByteString(array, N - 1);
38 }
39 
40 }  // namespace
41 
TEST(ParserDecodeTest,ValidateDecoderPipeline)42 TEST(ParserDecodeTest, ValidateDecoderPipeline) {
43   {
44     // Empty decoder list is always valid.
45     auto decoders = pdfium::MakeRetain<CPDF_Array>();
46     EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
47   }
48   {
49     // 1 decoder is almost always valid.
50     auto decoders = pdfium::MakeRetain<CPDF_Array>();
51     decoders->AppendNew<CPDF_Name>("FlateEncode");
52     EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
53   }
54   {
55     // 1 decoder is almost always valid, even with an unknown decoder.
56     auto decoders = pdfium::MakeRetain<CPDF_Array>();
57     decoders->AppendNew<CPDF_Name>("FooBar");
58     EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
59   }
60   {
61     // Valid 2 decoder pipeline.
62     auto decoders = pdfium::MakeRetain<CPDF_Array>();
63     decoders->AppendNew<CPDF_Name>("AHx");
64     decoders->AppendNew<CPDF_Name>("LZWDecode");
65     EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
66   }
67   {
68     // Valid 2 decoder pipeline.
69     auto decoders = pdfium::MakeRetain<CPDF_Array>();
70     decoders->AppendNew<CPDF_Name>("ASCII85Decode");
71     decoders->AppendNew<CPDF_Name>("ASCII85Decode");
72     EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
73   }
74   {
75     // Valid 5 decoder pipeline.
76     auto decoders = pdfium::MakeRetain<CPDF_Array>();
77     decoders->AppendNew<CPDF_Name>("ASCII85Decode");
78     decoders->AppendNew<CPDF_Name>("A85");
79     decoders->AppendNew<CPDF_Name>("RunLengthDecode");
80     decoders->AppendNew<CPDF_Name>("FlateDecode");
81     decoders->AppendNew<CPDF_Name>("RL");
82     EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
83   }
84   {
85     // Valid 5 decoder pipeline, with an image decoder at the end.
86     auto decoders = pdfium::MakeRetain<CPDF_Array>();
87     decoders->AppendNew<CPDF_Name>("RunLengthDecode");
88     decoders->AppendNew<CPDF_Name>("ASCII85Decode");
89     decoders->AppendNew<CPDF_Name>("FlateDecode");
90     decoders->AppendNew<CPDF_Name>("LZW");
91     decoders->AppendNew<CPDF_Name>("DCTDecode");
92     EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
93   }
94   {
95     // Invalid 1 decoder pipeline due to wrong type.
96     auto decoders = pdfium::MakeRetain<CPDF_Array>();
97     decoders->AppendNew<CPDF_String>("FlateEncode", false);
98     EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
99   }
100   {
101     // Invalid 2 decoder pipeline, with 2 image decoders.
102     auto decoders = pdfium::MakeRetain<CPDF_Array>();
103     decoders->AppendNew<CPDF_Name>("DCTDecode");
104     decoders->AppendNew<CPDF_Name>("CCITTFaxDecode");
105     EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
106   }
107   {
108     // Invalid 2 decoder pipeline, with 1 image decoder at the start.
109     auto decoders = pdfium::MakeRetain<CPDF_Array>();
110     decoders->AppendNew<CPDF_Name>("DCTDecode");
111     decoders->AppendNew<CPDF_Name>("FlateDecode");
112     EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
113   }
114   {
115     // Invalid 2 decoder pipeline due to wrong type.
116     auto decoders = pdfium::MakeRetain<CPDF_Array>();
117     decoders->AppendNew<CPDF_String>("AHx", false);
118     decoders->AppendNew<CPDF_Name>("LZWDecode");
119     EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
120   }
121   {
122     // Invalid 5 decoder pipeline.
123     auto decoders = pdfium::MakeRetain<CPDF_Array>();
124     decoders->AppendNew<CPDF_Name>("FlateDecode");
125     decoders->AppendNew<CPDF_Name>("FlateDecode");
126     decoders->AppendNew<CPDF_Name>("DCTDecode");
127     decoders->AppendNew<CPDF_Name>("FlateDecode");
128     decoders->AppendNew<CPDF_Name>("FlateDecode");
129     EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
130   }
131   {
132     // Invalid 5 decoder pipeline due to wrong type.
133     auto decoders = pdfium::MakeRetain<CPDF_Array>();
134     decoders->AppendNew<CPDF_Name>("ASCII85Decode");
135     decoders->AppendNew<CPDF_Name>("A85");
136     decoders->AppendNew<CPDF_Name>("RunLengthDecode");
137     decoders->AppendNew<CPDF_Name>("FlateDecode");
138     decoders->AppendNew<CPDF_String>("RL", false);
139     EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
140   }
141 }
142 
TEST(ParserDecodeTest,ValidateDecoderPipelineWithIndirectObjects)143 TEST(ParserDecodeTest, ValidateDecoderPipelineWithIndirectObjects) {
144   {
145     // Valid 2 decoder pipeline with indirect objects.
146     CPDF_IndirectObjectHolder objects_holder;
147     auto decoder = pdfium::MakeRetain<CPDF_Name>(nullptr, "FlateDecode");
148     uint32_t decoder_number =
149         objects_holder.AddIndirectObject(std::move(decoder));
150 
151     auto decoders = pdfium::MakeRetain<CPDF_Array>();
152     decoders->AppendNew<CPDF_Reference>(&objects_holder, decoder_number);
153     decoders->AppendNew<CPDF_Name>("LZW");
154     EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
155   }
156   {
157     // Valid 5 decoder pipeline with indirect objects, with an image decoder at
158     // the end.
159     CPDF_IndirectObjectHolder objects_holder;
160     auto decoder = pdfium::MakeRetain<CPDF_Name>(nullptr, "LZW");
161     uint32_t decoder_number =
162         objects_holder.AddIndirectObject(std::move(decoder));
163 
164     auto decoders = pdfium::MakeRetain<CPDF_Array>();
165     decoders->AppendNew<CPDF_Name>("RunLengthDecode");
166     decoders->AppendNew<CPDF_Name>("ASCII85Decode");
167     decoders->AppendNew<CPDF_Name>("FlateDecode");
168     decoders->AppendNew<CPDF_Reference>(&objects_holder, decoder_number);
169     decoders->AppendNew<CPDF_Name>("DCTDecode");
170     EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
171   }
172   {
173     // Invalid 2 decoder pipeline due to wrong type indirect object.
174     CPDF_IndirectObjectHolder objects_holder;
175     auto decoder =
176         pdfium::MakeRetain<CPDF_String>(nullptr, "FlateDecode", false);
177     uint32_t decoder_number =
178         objects_holder.AddIndirectObject(std::move(decoder));
179 
180     auto decoders = pdfium::MakeRetain<CPDF_Array>();
181     decoders->AppendNew<CPDF_Reference>(&objects_holder, decoder_number);
182     decoders->AppendNew<CPDF_Name>("LZW");
183     EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
184   }
185   {
186     // Invalid 2 decoder pipeline due to invalid indirect object.
187     CPDF_IndirectObjectHolder objects_holder;
188     auto decoder = pdfium::MakeRetain<CPDF_Name>(nullptr, "DCTDecode");
189     uint32_t decoder_number =
190         objects_holder.AddIndirectObject(std::move(decoder));
191 
192     auto decoders = pdfium::MakeRetain<CPDF_Array>();
193     decoders->AppendNew<CPDF_Reference>(&objects_holder, decoder_number);
194     decoders->AppendNew<CPDF_Name>("LZW");
195     EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
196   }
197 }
198 
199 // TODO(thestig): Test decoder params.
TEST(ParserDecodeTest,GetDecoderArray)200 TEST(ParserDecodeTest, GetDecoderArray) {
201   {
202     // Treat no filter as an empty filter array.
203     auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
204     absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
205     ASSERT_TRUE(decoder_array.has_value());
206     EXPECT_TRUE(decoder_array.value().empty());
207   }
208   {
209     // Wrong filter type.
210     auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
211     dict->SetNewFor<CPDF_String>("Filter", "RL", false);
212     absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
213     EXPECT_FALSE(decoder_array.has_value());
214   }
215   {
216     // Filter name.
217     auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
218     dict->SetNewFor<CPDF_Name>("Filter", "RL");
219     absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
220     ASSERT_TRUE(decoder_array.has_value());
221     ASSERT_EQ(1u, decoder_array.value().size());
222     EXPECT_EQ("RL", decoder_array.value()[0].first);
223   }
224   {
225     // Empty filter array.
226     auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
227     dict->SetNewFor<CPDF_Array>("Filter");
228     absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
229     ASSERT_TRUE(decoder_array.has_value());
230     EXPECT_TRUE(decoder_array.value().empty());
231   }
232   {
233     // Valid 1 element filter array.
234     auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
235     auto filter_array = dict->SetNewFor<CPDF_Array>("Filter");
236     filter_array->AppendNew<CPDF_Name>("FooBar");
237     absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
238     ASSERT_TRUE(decoder_array.has_value());
239     ASSERT_EQ(1u, decoder_array.value().size());
240     EXPECT_EQ("FooBar", decoder_array.value()[0].first);
241   }
242   {
243     // Valid 2 element filter array.
244     auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
245     auto filter_array = dict->SetNewFor<CPDF_Array>("Filter");
246     filter_array->AppendNew<CPDF_Name>("AHx");
247     filter_array->AppendNew<CPDF_Name>("LZWDecode");
248     absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
249     ASSERT_TRUE(decoder_array.has_value());
250     ASSERT_EQ(2u, decoder_array.value().size());
251     EXPECT_EQ("AHx", decoder_array.value()[0].first);
252     EXPECT_EQ("LZWDecode", decoder_array.value()[1].first);
253   }
254   {
255     // Invalid 2 element filter array.
256     auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
257     auto invalid_filter_array = dict->SetNewFor<CPDF_Array>("Filter");
258     invalid_filter_array->AppendNew<CPDF_Name>("DCTDecode");
259     invalid_filter_array->AppendNew<CPDF_Name>("CCITTFaxDecode");
260     absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
261     EXPECT_FALSE(decoder_array.has_value());
262   }
263 }
264 
TEST(ParserDecodeTest,A85Decode)265 TEST(ParserDecodeTest, A85Decode) {
266   const pdfium::DecodeTestData kTestData[] = {
267       // Empty src string.
268       STR_IN_OUT_CASE("", "", 0),
269       // Empty content in src string.
270       STR_IN_OUT_CASE("~>", "", 0),
271       // Regular conversion.
272       STR_IN_OUT_CASE("FCfN8~>", "test", 7),
273       // End at the ending mark.
274       STR_IN_OUT_CASE("FCfN8~>FCfN8", "test", 7),
275       // Skip whitespaces.
276       STR_IN_OUT_CASE("\t F C\r\n \tf N 8 ~>", "test", 17),
277       // No ending mark.
278       STR_IN_OUT_CASE("@3B0)DJj_BF*)>@Gp#-s", "a funny story :)", 20),
279       // Non-multiple length.
280       STR_IN_OUT_CASE("12A", "2k", 3),
281       // Stop at unknown characters.
282       STR_IN_OUT_CASE("FCfN8FCfN8vw", "testtest", 11),
283   };
284   for (const auto& test_case : kTestData) {
285     std::unique_ptr<uint8_t, FxFreeDeleter> result;
286     uint32_t result_size = 0;
287     EXPECT_EQ(test_case.processed_size,
288               A85Decode({test_case.input, test_case.input_size}, &result,
289                         &result_size))
290         << "for case " << test_case.input;
291     ASSERT_EQ(test_case.expected_size, result_size);
292     const uint8_t* result_ptr = result.get();
293     for (size_t j = 0; j < result_size; ++j) {
294       EXPECT_EQ(test_case.expected[j], result_ptr[j])
295           << "for case " << test_case.input << " char " << j;
296     }
297   }
298 }
299 
300 // NOTE: python's zlib.compress() and zlib.decompress() may be useful for
301 // external validation of the FlateDncode/FlateEecode test cases.
TEST(FPDFParserDecodeEmbedderTest,FlateDecode)302 TEST(FPDFParserDecodeEmbedderTest, FlateDecode) {
303   static const pdfium::DecodeTestData flate_decode_cases[] = {
304       STR_IN_OUT_CASE("", "", 0),
305       STR_IN_OUT_CASE("preposterous nonsense", "", 2),
306       STR_IN_OUT_CASE("\x78\x9c\x03\x00\x00\x00\x00\x01", "", 8),
307       STR_IN_OUT_CASE("\x78\x9c\x53\x00\x00\x00\x21\x00\x21", " ", 9),
308       STR_IN_OUT_CASE("\x78\x9c\x33\x34\x32\x06\x00\01\x2d\x00\x97", "123", 11),
309       STR_IN_OUT_CASE("\x78\x9c\x63\xf8\x0f\x00\x01\x01\x01\x00", "\x00\xff",
310                       10),
311       STR_IN_OUT_CASE(
312           "\x78\x9c\x33\x54\x30\x00\x42\x5d\x43\x05\x23\x4b\x05\x73\x33\x63"
313           "\x85\xe4\x5c\x2e\x90\x80\xa9\xa9\xa9\x82\xb9\xb1\xa9\x42\x51\x2a"
314           "\x57\xb8\x42\x1e\x57\x21\x92\xa0\x89\x9e\xb1\xa5\x09\x92\x84\x9e"
315           "\x85\x81\x81\x25\xd8\x14\x24\x26\xd0\x18\x43\x05\x10\x0c\x72\x57"
316           "\x80\x30\x8a\xd2\xb9\xf4\xdd\x0d\x14\xd2\x8b\xc1\x46\x99\x59\x1a"
317           "\x2b\x58\x1a\x9a\x83\x8c\x49\xe3\x0a\x04\x42\x00\x37\x4c\x1b\x42",
318           "1 0 0 -1 29 763 cm\n0 0 555 735 re\nW n\nq\n0 0 555 734.394 re\n"
319           "W n\nq\n0.8009 0 0 0.8009 0 0 cm\n1 1 1 RG 1 1 1 rg\n/G0 gs\n"
320           "0 0 693 917 re\nf\nQ\nQ\n",
321           96),
322   };
323 
324   for (size_t i = 0; i < std::size(flate_decode_cases); ++i) {
325     const pdfium::DecodeTestData& data = flate_decode_cases[i];
326     std::unique_ptr<uint8_t, FxFreeDeleter> buf;
327     uint32_t buf_size;
328     EXPECT_EQ(data.processed_size,
329               FlateDecode({data.input, data.input_size}, &buf, &buf_size))
330         << " for case " << i;
331     ASSERT_TRUE(buf);
332     EXPECT_EQ(data.expected_size, buf_size) << " for case " << i;
333     if (data.expected_size != buf_size)
334       continue;
335     EXPECT_EQ(0, memcmp(data.expected, buf.get(), data.expected_size))
336         << " for case " << i;
337   }
338 }
339 
TEST(ParserDecodeTest,FlateEncode)340 TEST(ParserDecodeTest, FlateEncode) {
341   static const pdfium::StrFuncTestData flate_encode_cases[] = {
342       STR_IN_OUT_CASE("", "\x78\x9c\x03\x00\x00\x00\x00\x01"),
343       STR_IN_OUT_CASE(" ", "\x78\x9c\x53\x00\x00\x00\x21\x00\x21"),
344       STR_IN_OUT_CASE("123", "\x78\x9c\x33\x34\x32\x06\x00\01\x2d\x00\x97"),
345       STR_IN_OUT_CASE("\x00\xff", "\x78\x9c\x63\xf8\x0f\x00\x01\x01\x01\x00"),
346       STR_IN_OUT_CASE(
347           "1 0 0 -1 29 763 cm\n0 0 555 735 re\nW n\nq\n0 0 555 734.394 re\n"
348           "W n\nq\n0.8009 0 0 0.8009 0 0 cm\n1 1 1 RG 1 1 1 rg\n/G0 gs\n"
349           "0 0 693 917 re\nf\nQ\nQ\n",
350           "\x78\x9c\x33\x54\x30\x00\x42\x5d\x43\x05\x23\x4b\x05\x73\x33\x63"
351           "\x85\xe4\x5c\x2e\x90\x80\xa9\xa9\xa9\x82\xb9\xb1\xa9\x42\x51\x2a"
352           "\x57\xb8\x42\x1e\x57\x21\x92\xa0\x89\x9e\xb1\xa5\x09\x92\x84\x9e"
353           "\x85\x81\x81\x25\xd8\x14\x24\x26\xd0\x18\x43\x05\x10\x0c\x72\x57"
354           "\x80\x30\x8a\xd2\xb9\xf4\xdd\x0d\x14\xd2\x8b\xc1\x46\x99\x59\x1a"
355           "\x2b\x58\x1a\x9a\x83\x8c\x49\xe3\x0a\x04\x42\x00\x37\x4c\x1b\x42"),
356   };
357 
358   for (size_t i = 0; i < std::size(flate_encode_cases); ++i) {
359     const pdfium::StrFuncTestData& data = flate_encode_cases[i];
360     DataVector<uint8_t> result = FlateEncode({data.input, data.input_size});
361     EXPECT_EQ(data.expected_size, result.size()) << " for case " << i;
362     if (data.expected_size != result.size())
363       continue;
364     EXPECT_EQ(0, memcmp(data.expected, result.data(), data.expected_size))
365         << " for case " << i;
366   }
367 }
368 
TEST(ParserDecodeTest,HexDecode)369 TEST(ParserDecodeTest, HexDecode) {
370   const pdfium::DecodeTestData kTestData[] = {
371       // Empty src string.
372       STR_IN_OUT_CASE("", "", 0),
373       // Empty content in src string.
374       STR_IN_OUT_CASE(">", "", 1),
375       // Only whitespaces in src string.
376       STR_IN_OUT_CASE("\t   \r\n>", "", 7),
377       // Regular conversion.
378       STR_IN_OUT_CASE("12Ac>zzz", "\x12\xac", 5),
379       // Skip whitespaces.
380       STR_IN_OUT_CASE("12 Ac\t02\r\nBF>zzz>", "\x12\xac\x02\xbf", 13),
381       // Non-multiple length.
382       STR_IN_OUT_CASE("12A>zzz", "\x12\xa0", 4),
383       // Skips unknown characters.
384       STR_IN_OUT_CASE("12tk  \tAc>zzz", "\x12\xac", 10),
385       // No ending mark.
386       STR_IN_OUT_CASE("12AcED3c3456", "\x12\xac\xed\x3c\x34\x56", 12),
387   };
388   for (const auto& test_case : kTestData) {
389     std::unique_ptr<uint8_t, FxFreeDeleter> result;
390     uint32_t result_size = 0;
391     EXPECT_EQ(test_case.processed_size,
392               HexDecode({test_case.input, test_case.input_size}, &result,
393                         &result_size))
394         << "for case " << test_case.input;
395     ASSERT_EQ(test_case.expected_size, result_size);
396     const uint8_t* result_ptr = result.get();
397     for (size_t j = 0; j < result_size; ++j) {
398       EXPECT_EQ(test_case.expected[j], result_ptr[j])
399           << "for case " << test_case.input << " char " << j;
400     }
401   }
402 }
403 
TEST(ParserDecodeTest,DecodeText)404 TEST(ParserDecodeTest, DecodeText) {
405   // Empty src string.
406   EXPECT_EQ(L"", PDF_DecodeText(ToSpan("")));
407 
408   // ASCII text.
409   EXPECT_EQ(L"the quick\tfox", PDF_DecodeText(ToSpan("the quick\tfox")));
410 
411   // Unicode text.
412   EXPECT_EQ(L"\x0330\x0331",
413             PDF_DecodeText(ToSpan("\xFE\xFF\x03\x30\x03\x31")));
414 
415   // More Unicode text.
416   EXPECT_EQ(
417       L"\x7F51\x9875\x0020\x56FE\x7247\x0020"
418       L"\x8D44\x8BAF\x66F4\x591A\x0020\x00BB",
419       PDF_DecodeText(
420           ToSpan("\xFE\xFF\x7F\x51\x98\x75\x00\x20\x56\xFE\x72\x47\x00"
421                  "\x20\x8D\x44\x8B\xAF\x66\xF4\x59\x1A\x00\x20\x00\xBB")));
422 
423   // Supplementary Unicode text.
424   EXPECT_EQ(L"��", PDF_DecodeText(ToSpan("\xFE\xFF\xD8\x3C\xDF\xA8")));
425 }
426 
427 // https://crbug.com/pdfium/182
TEST(ParserDecodeTest,DecodeTextWithUnicodeEscapes)428 TEST(ParserDecodeTest, DecodeTextWithUnicodeEscapes) {
429   EXPECT_EQ(L"\x0020\x5370\x5237",
430             PDF_DecodeText(ToSpan(
431                 "\xFE\xFF\x00\x1B\x6A\x61\x00\x1B\x00\x20\x53\x70\x52\x37")));
432   EXPECT_EQ(
433       L"\x0020\x5370\x5237",
434       PDF_DecodeText(ToSpan(
435           "\xFE\xFF\x00\x1B\x6A\x61\x00\x1B\x00\x20\x53\x70\x52\x37\x29")));
436   EXPECT_EQ(
437       L"\x0020\x5370\x5237",
438       PDF_DecodeText(ToSpan(
439           "\xFE\xFF\x00\x1B\x6A\x61\x4A\x50\x00\x1B\x00\x20\x53\x70\x52\x37")));
440   EXPECT_EQ(L"\x0020\x5237",
441             PDF_DecodeText(ToSpan(
442                 "\xFE\xFF\x00\x20\x00\x1B\x6A\x61\x4A\x50\x00\x1B\x52\x37")));
443 }
444 
445 // https://crbug.com/1001159
TEST(ParserDecodeTest,DecodeTextWithInvalidUnicodeEscapes)446 TEST(ParserDecodeTest, DecodeTextWithInvalidUnicodeEscapes) {
447   EXPECT_EQ(L"", PDF_DecodeText(ToSpan("\xFE\xFF\x00\x1B\x00\x1B")));
448   EXPECT_EQ(L"", PDF_DecodeText(ToSpan("\xFE\xFF\x00\x1B\x00\x1B\x20")));
449   EXPECT_EQ(L"\x0020",
450             PDF_DecodeText(ToSpan("\xFE\xFF\x00\x1B\x00\x1B\x00\x20")));
451 }
452 
TEST(ParserDecodeTest,DecodeTextWithUnpairedSurrogates)453 TEST(ParserDecodeTest, DecodeTextWithUnpairedSurrogates) {
454   EXPECT_EQ(L"\xD800", PDF_DecodeText(ToSpan("\xFE\xFF\xD8\x00"))) << "High";
455   EXPECT_EQ(L"\xDC00", PDF_DecodeText(ToSpan("\xFE\xFF\xDC\x00"))) << "Low";
456   EXPECT_EQ(L"\xD800��",
457             PDF_DecodeText(ToSpan("\xFE\xFF\xD8\x00\xD8\x3C\xDF\xA8")))
458       << "High-high";
459   EXPECT_EQ(L"��\xDC00",
460             PDF_DecodeText(ToSpan("\xFE\xFF\xD8\x3C\xDF\xA8\xDC\x00")))
461       << "Low-low";
462 }
463 
TEST(ParserDecodeTest,EncodeText)464 TEST(ParserDecodeTest, EncodeText) {
465   // Empty src string.
466   EXPECT_EQ("", PDF_EncodeText(L""));
467 
468   // ASCII text.
469   EXPECT_EQ("the quick\tfox", PDF_EncodeText(L"the quick\tfox"));
470 
471   // Unicode text.
472   EXPECT_EQ("\xFE\xFF\x03\x30\x03\x31", PDF_EncodeText(L"\x0330\x0331"));
473 
474   // More Unicode text.
475   EXPECT_EQ(
476       ToByteString("\xFE\xFF\x7F\x51\x98\x75\x00\x20\x56\xFE\x72\x47\x00"
477                    "\x20\x8D\x44\x8B\xAF\x66\xF4\x59\x1A\x00\x20\x00\xBB"),
478       PDF_EncodeText(L"\x7F51\x9875\x0020\x56FE\x7247\x0020"
479                      L"\x8D44\x8BAF\x66F4\x591A\x0020\x00BB"));
480 
481   // Supplementary Unicode text.
482   EXPECT_EQ("\xFE\xFF\xD8\x3C\xDF\xA8", PDF_EncodeText(L"��"));
483 }
484 
TEST(ParserDecodeTest,RoundTripText)485 TEST(ParserDecodeTest, RoundTripText) {
486   for (int pdf_code_point = 0; pdf_code_point < 256; ++pdf_code_point) {
487     ByteString original(static_cast<char>(pdf_code_point));
488     ByteString reencoded =
489         PDF_EncodeText(PDF_DecodeText(original.raw_span()).AsStringView());
490 
491     switch (pdf_code_point) {
492       case 0x7F:
493       case 0x9F:
494       case 0xAD:
495         EXPECT_EQ(ByteString('\0'), reencoded) << "PDFDocEncoding undefined";
496         break;
497 
498       default:
499         EXPECT_EQ(original, reencoded) << "PDFDocEncoding: " << pdf_code_point;
500         break;
501     }
502   }
503 }
504