1 // Copyright 2015 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "core/fpdfapi/font/cpdf_tounicodemap.h"
6
7 #include "core/fpdfapi/parser/cpdf_stream.h"
8 #include "core/fxcrt/retain_ptr.h"
9 #include "core/fxcrt/span.h"
10 #include "testing/gmock/include/gmock/gmock.h"
11 #include "testing/gtest/include/gtest/gtest.h"
12
TEST(CPDFToUnicodeMapTest,StringToCode)13 TEST(CPDFToUnicodeMapTest, StringToCode) {
14 EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<0001>"), testing::Optional(1u));
15 EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<c2>"), testing::Optional(194u));
16 EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<A2>"), testing::Optional(162u));
17 EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<Af2>"),
18 testing::Optional(2802u));
19 EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<FFFFFFFF>"),
20 testing::Optional(4294967295u));
21
22 // Whitespaces within the string are ignored.
23 EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<00\n0\r1>"),
24 testing::Optional(1u));
25 EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<c 2>"),
26 testing::Optional(194u));
27 EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<A2\r\n>"),
28 testing::Optional(162u));
29
30 // Integer overflow
31 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<100000000>").has_value());
32 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<1abcdFFFF>").has_value());
33
34 // Invalid string
35 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("").has_value());
36 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<>").has_value());
37 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("12").has_value());
38 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<12").has_value());
39 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("12>").has_value());
40 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<1-7>").has_value());
41 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("00AB").has_value());
42 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<00NN>").has_value());
43 }
44
TEST(CPDFToUnicodeMapTest,StringToWideString)45 TEST(CPDFToUnicodeMapTest, StringToWideString) {
46 EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString(""));
47 EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("1234"));
48 EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("<c2"));
49 EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("<c2D2"));
50 EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("c2ab>"));
51
52 WideString res = L"\xc2ab";
53 EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab>"));
54 EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abab>"));
55 EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab 1234>"));
56
57 res += L"\xfaab";
58 EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb>"));
59 EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb12>"));
60 }
61
TEST(CPDFToUnicodeMapTest,HandleBeginBFRangeAvoidIntegerOverflow)62 TEST(CPDFToUnicodeMapTest, HandleBeginBFRangeAvoidIntegerOverflow) {
63 // Make sure there won't be infinite loops due to integer overflows in
64 // HandleBeginBFRange().
65 {
66 static constexpr uint8_t kInput1[] =
67 "beginbfrange<FFFFFFFF><FFFFFFFF>[<0041>]endbfrange";
68 auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput1);
69 CPDF_ToUnicodeMap map(stream);
70 EXPECT_EQ(L"A", map.Lookup(0xffffffff));
71 }
72 {
73 static constexpr uint8_t kInput2[] =
74 "beginbfrange<FFFFFFFF><FFFFFFFF><0042>endbfrange";
75 auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput2);
76 CPDF_ToUnicodeMap map(stream);
77 EXPECT_EQ(L"B", map.Lookup(0xffffffff));
78 }
79 {
80 static constexpr uint8_t kInput3[] =
81 "beginbfrange<FFFFFFFF><FFFFFFFF><00410042>endbfrange";
82 auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput3);
83 CPDF_ToUnicodeMap map(stream);
84 EXPECT_EQ(L"AB", map.Lookup(0xffffffff));
85 }
86 }
87
TEST(CPDFToUnicodeMapTest,InsertIntoMultimap)88 TEST(CPDFToUnicodeMapTest, InsertIntoMultimap) {
89 {
90 // Both the CIDs and the unicodes are different.
91 static constexpr uint8_t kInput1[] =
92 "beginbfchar<1><0041><2><0042>endbfchar";
93 auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput1);
94 CPDF_ToUnicodeMap map(stream);
95 EXPECT_EQ(1u, map.ReverseLookup(0x0041));
96 EXPECT_EQ(2u, map.ReverseLookup(0x0042));
97 EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(1u));
98 EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(2u));
99 }
100 {
101 // The same CID with different unicodes.
102 static constexpr uint8_t kInput2[] =
103 "beginbfrange<0><0><0041><0><0><0042>endbfrange";
104 auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput2);
105 CPDF_ToUnicodeMap map(stream);
106 EXPECT_EQ(0u, map.ReverseLookup(0x0041));
107 EXPECT_EQ(0u, map.ReverseLookup(0x0042));
108 EXPECT_EQ(2u, map.GetUnicodeCountByCharcodeForTesting(0u));
109 }
110 {
111 // Duplicate mappings of CID 0 to unicode "A". There should be only 1 entry
112 // in `m_Multimap`.
113 static constexpr uint8_t kInput3[] =
114 "beginbfrange<0><0>[<0041>]endbfrange\n"
115 "beginbfchar<0><0041>endbfchar";
116 auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput3);
117 CPDF_ToUnicodeMap map(stream);
118 EXPECT_EQ(0u, map.ReverseLookup(0x0041));
119 EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(0u));
120 }
121 }
122
TEST(CPDFToUnicodeMapTest,NonBmpUnicodeLookup)123 TEST(CPDFToUnicodeMapTest, NonBmpUnicodeLookup) {
124 static constexpr uint8_t kInput[] = "beginbfchar<01><d841de76>endbfchar";
125 CPDF_ToUnicodeMap map(pdfium::MakeRetain<CPDF_Stream>(kInput));
126 EXPECT_EQ(L"\xd841\xde76", map.Lookup(0x01));
127 #if defined(WCHAR_T_IS_32_BIT)
128 // TODO(crbug.com/374947848): Should work if wchar_t is 16-bit.
129 // TODO(crbug.com/374947848): Should return 1u.
130 EXPECT_EQ(0u, map.ReverseLookup(0x20676));
131 #endif
132 }
133