1 // Copyright 2015 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "core/fpdfapi/font/cpdf_tounicodemap.h"
6
7 #include "core/fpdfapi/parser/cpdf_stream.h"
8 #include "core/fxcrt/retain_ptr.h"
9 #include "testing/gmock/include/gmock/gmock.h"
10 #include "testing/gtest/include/gtest/gtest.h"
11 #include "third_party/base/span.h"
12
TEST(cpdf_tounicodemap,StringToCode)13 TEST(cpdf_tounicodemap, StringToCode) {
14 EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<0001>"), testing::Optional(1u));
15 EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<c2>"), testing::Optional(194u));
16 EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<A2>"), testing::Optional(162u));
17 EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<Af2>"),
18 testing::Optional(2802u));
19 EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<FFFFFFFF>"),
20 testing::Optional(4294967295u));
21
22 // Integer overflow
23 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<100000000>").has_value());
24 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<1abcdFFFF>").has_value());
25
26 // Invalid string
27 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("").has_value());
28 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<>").has_value());
29 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("12").has_value());
30 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<12").has_value());
31 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("12>").has_value());
32 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<1-7>").has_value());
33 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("00AB").has_value());
34 EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<00NN>").has_value());
35 }
36
TEST(cpdf_tounicodemap,StringToWideString)37 TEST(cpdf_tounicodemap, StringToWideString) {
38 EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString(""));
39 EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("1234"));
40 EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("<c2"));
41 EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("<c2D2"));
42 EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("c2ab>"));
43
44 WideString res = L"\xc2ab";
45 EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab>"));
46 EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abab>"));
47 EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab 1234>"));
48
49 res += L"\xfaab";
50 EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb>"));
51 EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb12>"));
52 }
53
TEST(cpdf_tounicodemap,HandleBeginBFRangeAvoidIntegerOverflow)54 TEST(cpdf_tounicodemap, HandleBeginBFRangeAvoidIntegerOverflow) {
55 // Make sure there won't be infinite loops due to integer overflows in
56 // HandleBeginBFRange().
57 {
58 static constexpr uint8_t kInput1[] =
59 "beginbfrange<FFFFFFFF><FFFFFFFF>[<0041>]endbfrange";
60 auto stream = pdfium::MakeRetain<CPDF_Stream>();
61 stream->SetData(pdfium::make_span(kInput1));
62 CPDF_ToUnicodeMap map(stream);
63 EXPECT_STREQ(L"A", map.Lookup(0xffffffff).c_str());
64 }
65 {
66 static constexpr uint8_t kInput2[] =
67 "beginbfrange<FFFFFFFF><FFFFFFFF><0042>endbfrange";
68 auto stream = pdfium::MakeRetain<CPDF_Stream>();
69 stream->SetData(pdfium::make_span(kInput2));
70 CPDF_ToUnicodeMap map(stream);
71 EXPECT_STREQ(L"B", map.Lookup(0xffffffff).c_str());
72 }
73 {
74 static constexpr uint8_t kInput3[] =
75 "beginbfrange<FFFFFFFF><FFFFFFFF><00410042>endbfrange";
76 auto stream = pdfium::MakeRetain<CPDF_Stream>();
77 stream->SetData(pdfium::make_span(kInput3));
78 CPDF_ToUnicodeMap map(stream);
79 EXPECT_STREQ(L"AB", map.Lookup(0xffffffff).c_str());
80 }
81 }
82
TEST(cpdf_tounicodemap,InsertIntoMultimap)83 TEST(cpdf_tounicodemap, InsertIntoMultimap) {
84 {
85 // Both the CIDs and the unicodes are different.
86 static constexpr uint8_t kInput1[] =
87 "beginbfchar<1><0041><2><0042>endbfchar";
88 auto stream = pdfium::MakeRetain<CPDF_Stream>();
89 stream->SetData(pdfium::make_span(kInput1));
90 CPDF_ToUnicodeMap map(stream);
91 EXPECT_EQ(1u, map.ReverseLookup(0x0041));
92 EXPECT_EQ(2u, map.ReverseLookup(0x0042));
93 EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(1u));
94 EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(2u));
95 }
96 {
97 // The same CID with different unicodes.
98 static constexpr uint8_t kInput2[] =
99 "beginbfrange<0><0><0041><0><0><0042>endbfrange";
100 auto stream = pdfium::MakeRetain<CPDF_Stream>();
101 stream->SetData(pdfium::make_span(kInput2));
102 CPDF_ToUnicodeMap map(stream);
103 EXPECT_EQ(0u, map.ReverseLookup(0x0041));
104 EXPECT_EQ(0u, map.ReverseLookup(0x0042));
105 EXPECT_EQ(2u, map.GetUnicodeCountByCharcodeForTesting(0u));
106 }
107 {
108 // Duplicate mappings of CID 0 to unicode "A". There should be only 1 entry
109 // in `m_Multimap`.
110 static constexpr uint8_t kInput3[] =
111 "beginbfrange<0><0>[<0041>]endbfrange\n"
112 "beginbfchar<0><0041>endbfchar";
113 auto stream = pdfium::MakeRetain<CPDF_Stream>();
114 stream->SetData(pdfium::make_span(kInput3));
115 CPDF_ToUnicodeMap map(stream);
116 EXPECT_EQ(0u, map.ReverseLookup(0x0041));
117 EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(0u));
118 }
119 }
120