1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and∂∂
13 // limitations under the License.
14
15 #include <array>
16 #include <memory>
17 #include <string>
18 #include <string_view>
19 #include <thread> // NOLINT
20 #include <vector>
21
22 #include "gmock/gmock.h"
23 #include "gtest/gtest.h"
24 #include "icing/expand/expander.h"
25 #include "icing/expand/stemming/stemming-expander.h"
26 #include "icing/testing/common-matchers.h"
27
28 namespace icing {
29 namespace lib {
30
31 namespace {
32
33 using ::testing::ElementsAre;
34 using ::testing::Eq;
35 using ::testing::SizeIs;
36
37 constexpr std::string_view kEnglishLanguageCode = "en";
38 constexpr std::string_view kRandomLanguageCode = "random";
39
TEST(NoneStemmingExpanderTest,EmptyTerm)40 TEST(NoneStemmingExpanderTest, EmptyTerm) {
41 ICING_ASSERT_OK_AND_ASSIGN(
42 std::unique_ptr<Expander> expander,
43 StemmingExpander::Create(std::string(kEnglishLanguageCode)));
44
45 std::vector<ExpandedTerm> expanded_terms = expander->Expand("");
46 EXPECT_THAT(expanded_terms, SizeIs(1));
47 EXPECT_THAT(expanded_terms[0].text, Eq(""));
48 EXPECT_FALSE(expanded_terms[0].is_stemmed_term);
49
50 expanded_terms = expander->Expand(" ");
51 EXPECT_THAT(expanded_terms, SizeIs(1));
52 EXPECT_THAT(expanded_terms[0].text, Eq(" "));
53 EXPECT_FALSE(expanded_terms[0].is_stemmed_term);
54 }
55
TEST(NoneStemmingExpanderTest,NonAlphabetSymbols)56 TEST(NoneStemmingExpanderTest, NonAlphabetSymbols) {
57 ICING_ASSERT_OK_AND_ASSIGN(
58 std::unique_ptr<Expander> expander,
59 StemmingExpander::Create(std::string(kEnglishLanguageCode)));
60
61 std::vector<ExpandedTerm> expanded_terms = expander->Expand("....");
62 EXPECT_THAT(expanded_terms, SizeIs(1));
63 EXPECT_THAT(expanded_terms[0].text, Eq("...."));
64 EXPECT_FALSE(expanded_terms[0].is_stemmed_term);
65
66 expanded_terms = expander->Expand("928347");
67 EXPECT_THAT(expanded_terms, SizeIs(1));
68 EXPECT_THAT(expanded_terms[0].text, Eq("928347"));
69 EXPECT_FALSE(expanded_terms[0].is_stemmed_term);
70 }
71
TEST(NoneStemmingExpanderTest,ExpandTermReturnsOriginalTerm)72 TEST(NoneStemmingExpanderTest, ExpandTermReturnsOriginalTerm) {
73 ICING_ASSERT_OK_AND_ASSIGN(
74 std::unique_ptr<Expander> expander,
75 StemmingExpander::Create(std::string(kEnglishLanguageCode)));
76
77 std::vector<ExpandedTerm> expanded_terms = expander->Expand("running");
78 EXPECT_THAT(expanded_terms, SizeIs(1));
79 EXPECT_THAT(expanded_terms[0].text, Eq("running"));
80 EXPECT_FALSE(expanded_terms[0].is_stemmed_term);
81
82 expanded_terms = expander->Expand("abattement");
83 EXPECT_THAT(expanded_terms, SizeIs(1));
84 EXPECT_THAT(expanded_terms[0].text, Eq("abattement"));
85 EXPECT_FALSE(expanded_terms[0].is_stemmed_term);
86 }
87
TEST(NoneStemmingExpanderTest,LanguageCodeDoesNotMatter)88 TEST(NoneStemmingExpanderTest, LanguageCodeDoesNotMatter) {
89 ICING_ASSERT_OK_AND_ASSIGN(
90 std::unique_ptr<Expander> english_expander,
91 StemmingExpander::Create(std::string(kEnglishLanguageCode)));
92
93 std::vector<ExpandedTerm> expanded_terms =
94 english_expander->Expand("running");
95 EXPECT_THAT(expanded_terms, SizeIs(1));
96 EXPECT_THAT(expanded_terms[0].text, Eq("running"));
97 EXPECT_FALSE(expanded_terms[0].is_stemmed_term);
98
99 expanded_terms = english_expander->Expand("abattement");
100 EXPECT_THAT(expanded_terms, SizeIs(1));
101 EXPECT_THAT(expanded_terms[0].text, Eq("abattement"));
102 EXPECT_FALSE(expanded_terms[0].is_stemmed_term);
103
104 ICING_ASSERT_OK_AND_ASSIGN(
105 std::unique_ptr<Expander> random_expander,
106 StemmingExpander::Create(std::string(kRandomLanguageCode)));
107
108 expanded_terms = random_expander->Expand("running");
109 EXPECT_THAT(expanded_terms, SizeIs(1));
110 EXPECT_THAT(expanded_terms[0].text, Eq("running"));
111 EXPECT_FALSE(expanded_terms[0].is_stemmed_term);
112
113 expanded_terms = random_expander->Expand("abattement");
114 EXPECT_THAT(expanded_terms, SizeIs(1));
115 EXPECT_THAT(expanded_terms[0].text, Eq("abattement"));
116 EXPECT_FALSE(expanded_terms[0].is_stemmed_term);
117 }
118
TEST(NoneStemmingExpanderTest,Utf8Characters)119 TEST(NoneStemmingExpanderTest, Utf8Characters) {
120 ICING_ASSERT_OK_AND_ASSIGN(
121 std::unique_ptr<Expander> expander,
122 StemmingExpander::Create(std::string(kEnglishLanguageCode)));
123
124 std::vector<ExpandedTerm> expanded_terms = expander->Expand("我们");
125 EXPECT_THAT(expanded_terms, SizeIs(1));
126 EXPECT_THAT(expanded_terms[0].text, Eq("我们"));
127 EXPECT_FALSE(expanded_terms[0].is_stemmed_term);
128
129 expanded_terms = expander->Expand("இக்கதையின்");
130 EXPECT_THAT(expanded_terms, SizeIs(1));
131 EXPECT_THAT(expanded_terms[0].text, Eq("இக்கதையின்"));
132 EXPECT_FALSE(expanded_terms[0].is_stemmed_term);
133 }
134
TEST(StemmingExpanderTest,ThreadSafety)135 TEST(StemmingExpanderTest, ThreadSafety) {
136 ICING_ASSERT_OK_AND_ASSIGN(
137 std::unique_ptr<Expander> expander,
138 StemmingExpander::Create(std::string(kEnglishLanguageCode)));
139
140 constexpr std::array<std::string_view, 5> kTerms = {
141 "running", "management", "tests", "asdfjgjjh", "!!!))))"};
142
143 // Create kNumThreads threads. Call Expand() from each thread in
144 // parallel using different locales. There should be no crashes.
145 constexpr int kNumThreads = 50;
146 std::vector<std::vector<ExpandedTerm>> expanded_terms(kNumThreads);
147 auto callable = [&](int thread_id) {
148 expanded_terms[thread_id] =
149 expander->Expand(kTerms[thread_id % kTerms.size()]);
150 };
151
152 // Spawn threads to call Expand() in parallel.
153 std::vector<std::thread> thread_objs;
154 for (int i = 0; i < kNumThreads; ++i) {
155 thread_objs.emplace_back(callable, i);
156 }
157
158 // Join threads and verify results
159 for (int i = 0; i < kNumThreads; ++i) {
160 thread_objs[i].join();
161 EXPECT_THAT(expanded_terms[i],
162 ElementsAre(ExpandedTerm(std::string(kTerms[i % kTerms.size()]),
163 /*is_stemmed_term_in=*/false)));
164 }
165 }
166
167 } // namespace
168
169 } // namespace lib
170 } // namespace icing
171