1 // Copyright 2015 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
6
7 #include <stddef.h>
8 #include <stdint.h>
9
10 #include <iterator>
11
12 #include "core/fpdfapi/parser/cpdf_array.h"
13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/parser/cpdf_indirect_object_holder.h"
15 #include "core/fpdfapi/parser/cpdf_name.h"
16 #include "core/fpdfapi/parser/cpdf_reference.h"
17 #include "core/fpdfapi/parser/cpdf_string.h"
18 #include "core/fxcrt/bytestring.h"
19 #include "core/fxcrt/fx_memory_wrappers.h"
20 #include "core/fxcrt/string_view_template.h"
21 #include "core/fxcrt/widestring.h"
22 #include "testing/gtest/include/gtest/gtest.h"
23 #include "testing/test_support.h"
24 #include "third_party/base/containers/span.h"
25
26 namespace {
27
28 // Converts a string literal into a `uint8_t` span.
29 template <size_t N>
ToSpan(const char (& array)[N])30 pdfium::span<const uint8_t> ToSpan(const char (&array)[N]) {
31 return pdfium::span(reinterpret_cast<const uint8_t*>(array), N - 1);
32 }
33
34 // Converts a string literal into a `ByteString`.
35 template <size_t N>
ToByteString(const char (& array)[N])36 ByteString ToByteString(const char (&array)[N]) {
37 return ByteString(array, N - 1);
38 }
39
40 } // namespace
41
TEST(ParserDecodeTest,ValidateDecoderPipeline)42 TEST(ParserDecodeTest, ValidateDecoderPipeline) {
43 {
44 // Empty decoder list is always valid.
45 auto decoders = pdfium::MakeRetain<CPDF_Array>();
46 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
47 }
48 {
49 // 1 decoder is almost always valid.
50 auto decoders = pdfium::MakeRetain<CPDF_Array>();
51 decoders->AppendNew<CPDF_Name>("FlateEncode");
52 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
53 }
54 {
55 // 1 decoder is almost always valid, even with an unknown decoder.
56 auto decoders = pdfium::MakeRetain<CPDF_Array>();
57 decoders->AppendNew<CPDF_Name>("FooBar");
58 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
59 }
60 {
61 // Valid 2 decoder pipeline.
62 auto decoders = pdfium::MakeRetain<CPDF_Array>();
63 decoders->AppendNew<CPDF_Name>("AHx");
64 decoders->AppendNew<CPDF_Name>("LZWDecode");
65 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
66 }
67 {
68 // Valid 2 decoder pipeline.
69 auto decoders = pdfium::MakeRetain<CPDF_Array>();
70 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
71 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
72 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
73 }
74 {
75 // Valid 5 decoder pipeline.
76 auto decoders = pdfium::MakeRetain<CPDF_Array>();
77 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
78 decoders->AppendNew<CPDF_Name>("A85");
79 decoders->AppendNew<CPDF_Name>("RunLengthDecode");
80 decoders->AppendNew<CPDF_Name>("FlateDecode");
81 decoders->AppendNew<CPDF_Name>("RL");
82 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
83 }
84 {
85 // Valid 5 decoder pipeline, with an image decoder at the end.
86 auto decoders = pdfium::MakeRetain<CPDF_Array>();
87 decoders->AppendNew<CPDF_Name>("RunLengthDecode");
88 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
89 decoders->AppendNew<CPDF_Name>("FlateDecode");
90 decoders->AppendNew<CPDF_Name>("LZW");
91 decoders->AppendNew<CPDF_Name>("DCTDecode");
92 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
93 }
94 {
95 // Invalid 1 decoder pipeline due to wrong type.
96 auto decoders = pdfium::MakeRetain<CPDF_Array>();
97 decoders->AppendNew<CPDF_String>("FlateEncode", false);
98 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
99 }
100 {
101 // Invalid 2 decoder pipeline, with 2 image decoders.
102 auto decoders = pdfium::MakeRetain<CPDF_Array>();
103 decoders->AppendNew<CPDF_Name>("DCTDecode");
104 decoders->AppendNew<CPDF_Name>("CCITTFaxDecode");
105 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
106 }
107 {
108 // Invalid 2 decoder pipeline, with 1 image decoder at the start.
109 auto decoders = pdfium::MakeRetain<CPDF_Array>();
110 decoders->AppendNew<CPDF_Name>("DCTDecode");
111 decoders->AppendNew<CPDF_Name>("FlateDecode");
112 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
113 }
114 {
115 // Invalid 2 decoder pipeline due to wrong type.
116 auto decoders = pdfium::MakeRetain<CPDF_Array>();
117 decoders->AppendNew<CPDF_String>("AHx", false);
118 decoders->AppendNew<CPDF_Name>("LZWDecode");
119 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
120 }
121 {
122 // Invalid 5 decoder pipeline.
123 auto decoders = pdfium::MakeRetain<CPDF_Array>();
124 decoders->AppendNew<CPDF_Name>("FlateDecode");
125 decoders->AppendNew<CPDF_Name>("FlateDecode");
126 decoders->AppendNew<CPDF_Name>("DCTDecode");
127 decoders->AppendNew<CPDF_Name>("FlateDecode");
128 decoders->AppendNew<CPDF_Name>("FlateDecode");
129 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
130 }
131 {
132 // Invalid 5 decoder pipeline due to wrong type.
133 auto decoders = pdfium::MakeRetain<CPDF_Array>();
134 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
135 decoders->AppendNew<CPDF_Name>("A85");
136 decoders->AppendNew<CPDF_Name>("RunLengthDecode");
137 decoders->AppendNew<CPDF_Name>("FlateDecode");
138 decoders->AppendNew<CPDF_String>("RL", false);
139 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
140 }
141 }
142
TEST(ParserDecodeTest,ValidateDecoderPipelineWithIndirectObjects)143 TEST(ParserDecodeTest, ValidateDecoderPipelineWithIndirectObjects) {
144 {
145 // Valid 2 decoder pipeline with indirect objects.
146 CPDF_IndirectObjectHolder objects_holder;
147 auto decoder = pdfium::MakeRetain<CPDF_Name>(nullptr, "FlateDecode");
148 uint32_t decoder_number =
149 objects_holder.AddIndirectObject(std::move(decoder));
150
151 auto decoders = pdfium::MakeRetain<CPDF_Array>();
152 decoders->AppendNew<CPDF_Reference>(&objects_holder, decoder_number);
153 decoders->AppendNew<CPDF_Name>("LZW");
154 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
155 }
156 {
157 // Valid 5 decoder pipeline with indirect objects, with an image decoder at
158 // the end.
159 CPDF_IndirectObjectHolder objects_holder;
160 auto decoder = pdfium::MakeRetain<CPDF_Name>(nullptr, "LZW");
161 uint32_t decoder_number =
162 objects_holder.AddIndirectObject(std::move(decoder));
163
164 auto decoders = pdfium::MakeRetain<CPDF_Array>();
165 decoders->AppendNew<CPDF_Name>("RunLengthDecode");
166 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
167 decoders->AppendNew<CPDF_Name>("FlateDecode");
168 decoders->AppendNew<CPDF_Reference>(&objects_holder, decoder_number);
169 decoders->AppendNew<CPDF_Name>("DCTDecode");
170 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
171 }
172 {
173 // Invalid 2 decoder pipeline due to wrong type indirect object.
174 CPDF_IndirectObjectHolder objects_holder;
175 auto decoder =
176 pdfium::MakeRetain<CPDF_String>(nullptr, "FlateDecode", false);
177 uint32_t decoder_number =
178 objects_holder.AddIndirectObject(std::move(decoder));
179
180 auto decoders = pdfium::MakeRetain<CPDF_Array>();
181 decoders->AppendNew<CPDF_Reference>(&objects_holder, decoder_number);
182 decoders->AppendNew<CPDF_Name>("LZW");
183 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
184 }
185 {
186 // Invalid 2 decoder pipeline due to invalid indirect object.
187 CPDF_IndirectObjectHolder objects_holder;
188 auto decoder = pdfium::MakeRetain<CPDF_Name>(nullptr, "DCTDecode");
189 uint32_t decoder_number =
190 objects_holder.AddIndirectObject(std::move(decoder));
191
192 auto decoders = pdfium::MakeRetain<CPDF_Array>();
193 decoders->AppendNew<CPDF_Reference>(&objects_holder, decoder_number);
194 decoders->AppendNew<CPDF_Name>("LZW");
195 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
196 }
197 }
198
199 // TODO(thestig): Test decoder params.
TEST(ParserDecodeTest,GetDecoderArray)200 TEST(ParserDecodeTest, GetDecoderArray) {
201 {
202 // Treat no filter as an empty filter array.
203 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
204 absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
205 ASSERT_TRUE(decoder_array.has_value());
206 EXPECT_TRUE(decoder_array.value().empty());
207 }
208 {
209 // Wrong filter type.
210 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
211 dict->SetNewFor<CPDF_String>("Filter", "RL", false);
212 absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
213 EXPECT_FALSE(decoder_array.has_value());
214 }
215 {
216 // Filter name.
217 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
218 dict->SetNewFor<CPDF_Name>("Filter", "RL");
219 absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
220 ASSERT_TRUE(decoder_array.has_value());
221 ASSERT_EQ(1u, decoder_array.value().size());
222 EXPECT_EQ("RL", decoder_array.value()[0].first);
223 }
224 {
225 // Empty filter array.
226 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
227 dict->SetNewFor<CPDF_Array>("Filter");
228 absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
229 ASSERT_TRUE(decoder_array.has_value());
230 EXPECT_TRUE(decoder_array.value().empty());
231 }
232 {
233 // Valid 1 element filter array.
234 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
235 auto filter_array = dict->SetNewFor<CPDF_Array>("Filter");
236 filter_array->AppendNew<CPDF_Name>("FooBar");
237 absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
238 ASSERT_TRUE(decoder_array.has_value());
239 ASSERT_EQ(1u, decoder_array.value().size());
240 EXPECT_EQ("FooBar", decoder_array.value()[0].first);
241 }
242 {
243 // Valid 2 element filter array.
244 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
245 auto filter_array = dict->SetNewFor<CPDF_Array>("Filter");
246 filter_array->AppendNew<CPDF_Name>("AHx");
247 filter_array->AppendNew<CPDF_Name>("LZWDecode");
248 absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
249 ASSERT_TRUE(decoder_array.has_value());
250 ASSERT_EQ(2u, decoder_array.value().size());
251 EXPECT_EQ("AHx", decoder_array.value()[0].first);
252 EXPECT_EQ("LZWDecode", decoder_array.value()[1].first);
253 }
254 {
255 // Invalid 2 element filter array.
256 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
257 auto invalid_filter_array = dict->SetNewFor<CPDF_Array>("Filter");
258 invalid_filter_array->AppendNew<CPDF_Name>("DCTDecode");
259 invalid_filter_array->AppendNew<CPDF_Name>("CCITTFaxDecode");
260 absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
261 EXPECT_FALSE(decoder_array.has_value());
262 }
263 }
264
TEST(ParserDecodeTest,A85Decode)265 TEST(ParserDecodeTest, A85Decode) {
266 const pdfium::DecodeTestData kTestData[] = {
267 // Empty src string.
268 STR_IN_OUT_CASE("", "", 0),
269 // Empty content in src string.
270 STR_IN_OUT_CASE("~>", "", 0),
271 // Regular conversion.
272 STR_IN_OUT_CASE("FCfN8~>", "test", 7),
273 // End at the ending mark.
274 STR_IN_OUT_CASE("FCfN8~>FCfN8", "test", 7),
275 // Skip whitespaces.
276 STR_IN_OUT_CASE("\t F C\r\n \tf N 8 ~>", "test", 17),
277 // No ending mark.
278 STR_IN_OUT_CASE("@3B0)DJj_BF*)>@Gp#-s", "a funny story :)", 20),
279 // Non-multiple length.
280 STR_IN_OUT_CASE("12A", "2k", 3),
281 // Stop at unknown characters.
282 STR_IN_OUT_CASE("FCfN8FCfN8vw", "testtest", 11),
283 };
284 for (const auto& test_case : kTestData) {
285 std::unique_ptr<uint8_t, FxFreeDeleter> result;
286 uint32_t result_size = 0;
287 EXPECT_EQ(test_case.processed_size,
288 A85Decode({test_case.input, test_case.input_size}, &result,
289 &result_size))
290 << "for case " << test_case.input;
291 ASSERT_EQ(test_case.expected_size, result_size);
292 const uint8_t* result_ptr = result.get();
293 for (size_t j = 0; j < result_size; ++j) {
294 EXPECT_EQ(test_case.expected[j], result_ptr[j])
295 << "for case " << test_case.input << " char " << j;
296 }
297 }
298 }
299
300 // NOTE: python's zlib.compress() and zlib.decompress() may be useful for
301 // external validation of the FlateDncode/FlateEecode test cases.
TEST(FPDFParserDecodeEmbedderTest,FlateDecode)302 TEST(FPDFParserDecodeEmbedderTest, FlateDecode) {
303 static const pdfium::DecodeTestData flate_decode_cases[] = {
304 STR_IN_OUT_CASE("", "", 0),
305 STR_IN_OUT_CASE("preposterous nonsense", "", 2),
306 STR_IN_OUT_CASE("\x78\x9c\x03\x00\x00\x00\x00\x01", "", 8),
307 STR_IN_OUT_CASE("\x78\x9c\x53\x00\x00\x00\x21\x00\x21", " ", 9),
308 STR_IN_OUT_CASE("\x78\x9c\x33\x34\x32\x06\x00\01\x2d\x00\x97", "123", 11),
309 STR_IN_OUT_CASE("\x78\x9c\x63\xf8\x0f\x00\x01\x01\x01\x00", "\x00\xff",
310 10),
311 STR_IN_OUT_CASE(
312 "\x78\x9c\x33\x54\x30\x00\x42\x5d\x43\x05\x23\x4b\x05\x73\x33\x63"
313 "\x85\xe4\x5c\x2e\x90\x80\xa9\xa9\xa9\x82\xb9\xb1\xa9\x42\x51\x2a"
314 "\x57\xb8\x42\x1e\x57\x21\x92\xa0\x89\x9e\xb1\xa5\x09\x92\x84\x9e"
315 "\x85\x81\x81\x25\xd8\x14\x24\x26\xd0\x18\x43\x05\x10\x0c\x72\x57"
316 "\x80\x30\x8a\xd2\xb9\xf4\xdd\x0d\x14\xd2\x8b\xc1\x46\x99\x59\x1a"
317 "\x2b\x58\x1a\x9a\x83\x8c\x49\xe3\x0a\x04\x42\x00\x37\x4c\x1b\x42",
318 "1 0 0 -1 29 763 cm\n0 0 555 735 re\nW n\nq\n0 0 555 734.394 re\n"
319 "W n\nq\n0.8009 0 0 0.8009 0 0 cm\n1 1 1 RG 1 1 1 rg\n/G0 gs\n"
320 "0 0 693 917 re\nf\nQ\nQ\n",
321 96),
322 };
323
324 for (size_t i = 0; i < std::size(flate_decode_cases); ++i) {
325 const pdfium::DecodeTestData& data = flate_decode_cases[i];
326 std::unique_ptr<uint8_t, FxFreeDeleter> buf;
327 uint32_t buf_size;
328 EXPECT_EQ(data.processed_size,
329 FlateDecode({data.input, data.input_size}, &buf, &buf_size))
330 << " for case " << i;
331 ASSERT_TRUE(buf);
332 EXPECT_EQ(data.expected_size, buf_size) << " for case " << i;
333 if (data.expected_size != buf_size)
334 continue;
335 EXPECT_EQ(0, memcmp(data.expected, buf.get(), data.expected_size))
336 << " for case " << i;
337 }
338 }
339
TEST(ParserDecodeTest,FlateEncode)340 TEST(ParserDecodeTest, FlateEncode) {
341 static const pdfium::StrFuncTestData flate_encode_cases[] = {
342 STR_IN_OUT_CASE("", "\x78\x9c\x03\x00\x00\x00\x00\x01"),
343 STR_IN_OUT_CASE(" ", "\x78\x9c\x53\x00\x00\x00\x21\x00\x21"),
344 STR_IN_OUT_CASE("123", "\x78\x9c\x33\x34\x32\x06\x00\01\x2d\x00\x97"),
345 STR_IN_OUT_CASE("\x00\xff", "\x78\x9c\x63\xf8\x0f\x00\x01\x01\x01\x00"),
346 STR_IN_OUT_CASE(
347 "1 0 0 -1 29 763 cm\n0 0 555 735 re\nW n\nq\n0 0 555 734.394 re\n"
348 "W n\nq\n0.8009 0 0 0.8009 0 0 cm\n1 1 1 RG 1 1 1 rg\n/G0 gs\n"
349 "0 0 693 917 re\nf\nQ\nQ\n",
350 "\x78\x9c\x33\x54\x30\x00\x42\x5d\x43\x05\x23\x4b\x05\x73\x33\x63"
351 "\x85\xe4\x5c\x2e\x90\x80\xa9\xa9\xa9\x82\xb9\xb1\xa9\x42\x51\x2a"
352 "\x57\xb8\x42\x1e\x57\x21\x92\xa0\x89\x9e\xb1\xa5\x09\x92\x84\x9e"
353 "\x85\x81\x81\x25\xd8\x14\x24\x26\xd0\x18\x43\x05\x10\x0c\x72\x57"
354 "\x80\x30\x8a\xd2\xb9\xf4\xdd\x0d\x14\xd2\x8b\xc1\x46\x99\x59\x1a"
355 "\x2b\x58\x1a\x9a\x83\x8c\x49\xe3\x0a\x04\x42\x00\x37\x4c\x1b\x42"),
356 };
357
358 for (size_t i = 0; i < std::size(flate_encode_cases); ++i) {
359 const pdfium::StrFuncTestData& data = flate_encode_cases[i];
360 DataVector<uint8_t> result = FlateEncode({data.input, data.input_size});
361 EXPECT_EQ(data.expected_size, result.size()) << " for case " << i;
362 if (data.expected_size != result.size())
363 continue;
364 EXPECT_EQ(0, memcmp(data.expected, result.data(), data.expected_size))
365 << " for case " << i;
366 }
367 }
368
TEST(ParserDecodeTest,HexDecode)369 TEST(ParserDecodeTest, HexDecode) {
370 const pdfium::DecodeTestData kTestData[] = {
371 // Empty src string.
372 STR_IN_OUT_CASE("", "", 0),
373 // Empty content in src string.
374 STR_IN_OUT_CASE(">", "", 1),
375 // Only whitespaces in src string.
376 STR_IN_OUT_CASE("\t \r\n>", "", 7),
377 // Regular conversion.
378 STR_IN_OUT_CASE("12Ac>zzz", "\x12\xac", 5),
379 // Skip whitespaces.
380 STR_IN_OUT_CASE("12 Ac\t02\r\nBF>zzz>", "\x12\xac\x02\xbf", 13),
381 // Non-multiple length.
382 STR_IN_OUT_CASE("12A>zzz", "\x12\xa0", 4),
383 // Skips unknown characters.
384 STR_IN_OUT_CASE("12tk \tAc>zzz", "\x12\xac", 10),
385 // No ending mark.
386 STR_IN_OUT_CASE("12AcED3c3456", "\x12\xac\xed\x3c\x34\x56", 12),
387 };
388 for (const auto& test_case : kTestData) {
389 std::unique_ptr<uint8_t, FxFreeDeleter> result;
390 uint32_t result_size = 0;
391 EXPECT_EQ(test_case.processed_size,
392 HexDecode({test_case.input, test_case.input_size}, &result,
393 &result_size))
394 << "for case " << test_case.input;
395 ASSERT_EQ(test_case.expected_size, result_size);
396 const uint8_t* result_ptr = result.get();
397 for (size_t j = 0; j < result_size; ++j) {
398 EXPECT_EQ(test_case.expected[j], result_ptr[j])
399 << "for case " << test_case.input << " char " << j;
400 }
401 }
402 }
403
TEST(ParserDecodeTest,DecodeText)404 TEST(ParserDecodeTest, DecodeText) {
405 // Empty src string.
406 EXPECT_EQ(L"", PDF_DecodeText(ToSpan("")));
407
408 // ASCII text.
409 EXPECT_EQ(L"the quick\tfox", PDF_DecodeText(ToSpan("the quick\tfox")));
410
411 // Unicode text.
412 EXPECT_EQ(L"\x0330\x0331",
413 PDF_DecodeText(ToSpan("\xFE\xFF\x03\x30\x03\x31")));
414
415 // More Unicode text.
416 EXPECT_EQ(
417 L"\x7F51\x9875\x0020\x56FE\x7247\x0020"
418 L"\x8D44\x8BAF\x66F4\x591A\x0020\x00BB",
419 PDF_DecodeText(
420 ToSpan("\xFE\xFF\x7F\x51\x98\x75\x00\x20\x56\xFE\x72\x47\x00"
421 "\x20\x8D\x44\x8B\xAF\x66\xF4\x59\x1A\x00\x20\x00\xBB")));
422
423 // Supplementary Unicode text.
424 EXPECT_EQ(L"", PDF_DecodeText(ToSpan("\xFE\xFF\xD8\x3C\xDF\xA8")));
425 }
426
427 // https://crbug.com/pdfium/182
TEST(ParserDecodeTest,DecodeTextWithUnicodeEscapes)428 TEST(ParserDecodeTest, DecodeTextWithUnicodeEscapes) {
429 EXPECT_EQ(L"\x0020\x5370\x5237",
430 PDF_DecodeText(ToSpan(
431 "\xFE\xFF\x00\x1B\x6A\x61\x00\x1B\x00\x20\x53\x70\x52\x37")));
432 EXPECT_EQ(
433 L"\x0020\x5370\x5237",
434 PDF_DecodeText(ToSpan(
435 "\xFE\xFF\x00\x1B\x6A\x61\x00\x1B\x00\x20\x53\x70\x52\x37\x29")));
436 EXPECT_EQ(
437 L"\x0020\x5370\x5237",
438 PDF_DecodeText(ToSpan(
439 "\xFE\xFF\x00\x1B\x6A\x61\x4A\x50\x00\x1B\x00\x20\x53\x70\x52\x37")));
440 EXPECT_EQ(L"\x0020\x5237",
441 PDF_DecodeText(ToSpan(
442 "\xFE\xFF\x00\x20\x00\x1B\x6A\x61\x4A\x50\x00\x1B\x52\x37")));
443 }
444
445 // https://crbug.com/1001159
TEST(ParserDecodeTest,DecodeTextWithInvalidUnicodeEscapes)446 TEST(ParserDecodeTest, DecodeTextWithInvalidUnicodeEscapes) {
447 EXPECT_EQ(L"", PDF_DecodeText(ToSpan("\xFE\xFF\x00\x1B\x00\x1B")));
448 EXPECT_EQ(L"", PDF_DecodeText(ToSpan("\xFE\xFF\x00\x1B\x00\x1B\x20")));
449 EXPECT_EQ(L"\x0020",
450 PDF_DecodeText(ToSpan("\xFE\xFF\x00\x1B\x00\x1B\x00\x20")));
451 }
452
TEST(ParserDecodeTest,DecodeTextWithUnpairedSurrogates)453 TEST(ParserDecodeTest, DecodeTextWithUnpairedSurrogates) {
454 EXPECT_EQ(L"\xD800", PDF_DecodeText(ToSpan("\xFE\xFF\xD8\x00"))) << "High";
455 EXPECT_EQ(L"\xDC00", PDF_DecodeText(ToSpan("\xFE\xFF\xDC\x00"))) << "Low";
456 EXPECT_EQ(L"\xD800",
457 PDF_DecodeText(ToSpan("\xFE\xFF\xD8\x00\xD8\x3C\xDF\xA8")))
458 << "High-high";
459 EXPECT_EQ(L"\xDC00",
460 PDF_DecodeText(ToSpan("\xFE\xFF\xD8\x3C\xDF\xA8\xDC\x00")))
461 << "Low-low";
462 }
463
TEST(ParserDecodeTest,EncodeText)464 TEST(ParserDecodeTest, EncodeText) {
465 // Empty src string.
466 EXPECT_EQ("", PDF_EncodeText(L""));
467
468 // ASCII text.
469 EXPECT_EQ("the quick\tfox", PDF_EncodeText(L"the quick\tfox"));
470
471 // Unicode text.
472 EXPECT_EQ("\xFE\xFF\x03\x30\x03\x31", PDF_EncodeText(L"\x0330\x0331"));
473
474 // More Unicode text.
475 EXPECT_EQ(
476 ToByteString("\xFE\xFF\x7F\x51\x98\x75\x00\x20\x56\xFE\x72\x47\x00"
477 "\x20\x8D\x44\x8B\xAF\x66\xF4\x59\x1A\x00\x20\x00\xBB"),
478 PDF_EncodeText(L"\x7F51\x9875\x0020\x56FE\x7247\x0020"
479 L"\x8D44\x8BAF\x66F4\x591A\x0020\x00BB"));
480
481 // Supplementary Unicode text.
482 EXPECT_EQ("\xFE\xFF\xD8\x3C\xDF\xA8", PDF_EncodeText(L""));
483 }
484
TEST(ParserDecodeTest,RoundTripText)485 TEST(ParserDecodeTest, RoundTripText) {
486 for (int pdf_code_point = 0; pdf_code_point < 256; ++pdf_code_point) {
487 ByteString original(static_cast<char>(pdf_code_point));
488 ByteString reencoded =
489 PDF_EncodeText(PDF_DecodeText(original.raw_span()).AsStringView());
490
491 switch (pdf_code_point) {
492 case 0x7F:
493 case 0x9F:
494 case 0xAD:
495 EXPECT_EQ(ByteString('\0'), reencoded) << "PDFDocEncoding undefined";
496 break;
497
498 default:
499 EXPECT_EQ(original, reencoded) << "PDFDocEncoding: " << pdf_code_point;
500 break;
501 }
502 }
503 }
504