1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/scoring/scoring-processor.h"
16
17 #include <cstdint>
18
19 #include "icing/text_classifier/lib3/utils/base/statusor.h"
20 #include "gmock/gmock.h"
21 #include "gtest/gtest.h"
22 #include "icing/document-builder.h"
23 #include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
24 #include "icing/proto/document.pb.h"
25 #include "icing/proto/schema.pb.h"
26 #include "icing/proto/scoring.pb.h"
27 #include "icing/proto/term.pb.h"
28 #include "icing/proto/usage.pb.h"
29 #include "icing/schema-builder.h"
30 #include "icing/scoring/scorer-test-utils.h"
31 #include "icing/testing/common-matchers.h"
32 #include "icing/testing/fake-clock.h"
33 #include "icing/testing/tmp-directory.h"
34
35 namespace icing {
36 namespace lib {
37
38 namespace {
39 using ::testing::ElementsAre;
40 using ::testing::Eq;
41 using ::testing::Gt;
42 using ::testing::IsEmpty;
43 using ::testing::SizeIs;
44
45 class ScoringProcessorTest
46 : public ::testing::TestWithParam<ScorerTestingMode> {
47 protected:
ScoringProcessorTest()48 ScoringProcessorTest()
49 : test_dir_(GetTestTempDir() + "/icing"),
50 doc_store_dir_(test_dir_ + "/doc_store"),
51 schema_store_dir_(test_dir_ + "/schema_store") {}
52
SetUp()53 void SetUp() override {
54 // Creates file directories
55 filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
56 filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str());
57 filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
58
59 ICING_ASSERT_OK_AND_ASSIGN(
60 schema_store_,
61 SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
62
63 ICING_ASSERT_OK_AND_ASSIGN(
64 DocumentStore::CreateResult create_result,
65 DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
66 schema_store_.get(),
67 /*force_recovery_and_revalidate_documents=*/false,
68 /*namespace_id_fingerprint=*/false,
69 PortableFileBackedProtoLog<
70 DocumentWrapper>::kDeflateCompressionLevel,
71 /*initialize_stats=*/nullptr));
72 document_store_ = std::move(create_result.document_store);
73
74 // Creates a simple email schema
75 SchemaProto test_email_schema =
76 SchemaBuilder()
77 .AddType(SchemaTypeConfigBuilder()
78 .SetType("email")
79 .AddProperty(
80 PropertyConfigBuilder()
81 .SetName("subject")
82 .SetDataTypeString(
83 TermMatchType::PREFIX,
84 StringIndexingConfig::TokenizerType::PLAIN)
85 .SetDataType(TYPE_STRING)
86 .SetCardinality(CARDINALITY_OPTIONAL))
87 .AddProperty(
88 PropertyConfigBuilder()
89 .SetName("body")
90 .SetDataTypeString(
91 TermMatchType::PREFIX,
92 StringIndexingConfig::TokenizerType::PLAIN)
93 .SetDataType(TYPE_STRING)
94 .SetCardinality(CARDINALITY_OPTIONAL)))
95 .Build();
96 ICING_ASSERT_OK(schema_store_->SetSchema(
97 test_email_schema, /*ignore_errors_and_delete_documents=*/false,
98 /*allow_circular_schema_definitions=*/false));
99 }
100
TearDown()101 void TearDown() override {
102 document_store_.reset();
103 schema_store_.reset();
104 filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
105 }
106
document_store()107 DocumentStore* document_store() { return document_store_.get(); }
108
schema_store()109 SchemaStore* schema_store() { return schema_store_.get(); }
110
fake_clock() const111 const FakeClock& fake_clock() const { return fake_clock_; }
112
113 private:
114 const std::string test_dir_;
115 const std::string doc_store_dir_;
116 const std::string schema_store_dir_;
117 Filesystem filesystem_;
118 FakeClock fake_clock_;
119 std::unique_ptr<DocumentStore> document_store_;
120 std::unique_ptr<SchemaStore> schema_store_;
121 };
122
123 constexpr int kDefaultScore = 0;
124 constexpr int64_t kDefaultCreationTimestampMs = 1571100001111;
125
CreateDocument(const std::string & name_space,const std::string & uri,int score,int64_t creation_timestamp_ms)126 DocumentProto CreateDocument(const std::string& name_space,
127 const std::string& uri, int score,
128 int64_t creation_timestamp_ms) {
129 return DocumentBuilder()
130 .SetKey(name_space, uri)
131 .SetSchema("email")
132 .SetScore(score)
133 .SetCreationTimestampMs(creation_timestamp_ms)
134 .Build();
135 }
136
137 libtextclassifier3::StatusOr<
138 std::pair<std::vector<DocHitInfo>, std::vector<ScoredDocumentHit>>>
CreateAndInsertsDocumentsWithScores(DocumentStore * document_store,const std::vector<int> & scores)139 CreateAndInsertsDocumentsWithScores(DocumentStore* document_store,
140 const std::vector<int>& scores) {
141 std::vector<DocHitInfo> doc_hit_infos;
142 std::vector<ScoredDocumentHit> scored_document_hits;
143 for (int i = 0; i < scores.size(); i++) {
144 ICING_ASSIGN_OR_RETURN(DocumentId document_id,
145 document_store->Put(CreateDocument(
146 "icing", "email/" + std::to_string(i),
147 scores.at(i), kDefaultCreationTimestampMs)));
148 doc_hit_infos.emplace_back(document_id);
149 scored_document_hits.emplace_back(document_id, kSectionIdMaskNone,
150 scores.at(i));
151 }
152 return std::pair(doc_hit_infos, scored_document_hits);
153 }
154
CreateUsageReport(std::string name_space,std::string uri,int64_t timestamp_ms,UsageReport::UsageType usage_type)155 UsageReport CreateUsageReport(std::string name_space, std::string uri,
156 int64_t timestamp_ms,
157 UsageReport::UsageType usage_type) {
158 UsageReport usage_report;
159 usage_report.set_document_namespace(name_space);
160 usage_report.set_document_uri(uri);
161 usage_report.set_usage_timestamp_ms(timestamp_ms);
162 usage_report.set_usage_type(usage_type);
163 return usage_report;
164 }
165
CreateTypePropertyWeights(std::string schema_type,std::vector<PropertyWeight> property_weights)166 TypePropertyWeights CreateTypePropertyWeights(
167 std::string schema_type, std::vector<PropertyWeight> property_weights) {
168 TypePropertyWeights type_property_weights;
169 type_property_weights.set_schema_type(std::move(schema_type));
170 type_property_weights.mutable_property_weights()->Reserve(
171 property_weights.size());
172
173 for (PropertyWeight& property_weight : property_weights) {
174 *type_property_weights.add_property_weights() = std::move(property_weight);
175 }
176
177 return type_property_weights;
178 }
179
CreatePropertyWeight(std::string path,double weight)180 PropertyWeight CreatePropertyWeight(std::string path, double weight) {
181 PropertyWeight property_weight;
182 property_weight.set_path(std::move(path));
183 property_weight.set_weight(weight);
184 return property_weight;
185 }
186
TEST_F(ScoringProcessorTest,CreationWithNullDocumentStoreShouldFail)187 TEST_F(ScoringProcessorTest, CreationWithNullDocumentStoreShouldFail) {
188 ScoringSpecProto spec_proto;
189 EXPECT_THAT(ScoringProcessor::Create(
190 spec_proto, /*document_store=*/nullptr, schema_store(),
191 fake_clock().GetSystemTimeMilliseconds()),
192 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
193 }
194
TEST_F(ScoringProcessorTest,CreationWithNullSchemaStoreShouldFail)195 TEST_F(ScoringProcessorTest, CreationWithNullSchemaStoreShouldFail) {
196 ScoringSpecProto spec_proto;
197 EXPECT_THAT(
198 ScoringProcessor::Create(spec_proto, document_store(),
199 /*schema_store=*/nullptr,
200 fake_clock().GetSystemTimeMilliseconds()),
201 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
202 }
203
TEST_P(ScoringProcessorTest,ShouldCreateInstance)204 TEST_P(ScoringProcessorTest, ShouldCreateInstance) {
205 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
206 ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
207 ICING_EXPECT_OK(
208 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
209 fake_clock().GetSystemTimeMilliseconds()));
210 }
211
TEST_P(ScoringProcessorTest,ShouldHandleEmptyDocHitIterator)212 TEST_P(ScoringProcessorTest, ShouldHandleEmptyDocHitIterator) {
213 // Creates an empty DocHitInfoIterator
214 std::vector<DocHitInfo> doc_hit_infos = {};
215 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
216 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
217
218 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
219 ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
220
221 // Creates a ScoringProcessor
222 ICING_ASSERT_OK_AND_ASSIGN(
223 std::unique_ptr<ScoringProcessor> scoring_processor,
224 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
225 fake_clock().GetSystemTimeMilliseconds()));
226
227 EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
228 /*num_to_score=*/5),
229 IsEmpty());
230 }
231
TEST_P(ScoringProcessorTest,ShouldHandleNonPositiveNumToScore)232 TEST_P(ScoringProcessorTest, ShouldHandleNonPositiveNumToScore) {
233 // Sets up documents
234 ICING_ASSERT_OK_AND_ASSIGN(
235 DocumentId document_id1,
236 document_store()->Put(CreateDocument("icing", "email/1", /*score=*/1,
237 kDefaultCreationTimestampMs)));
238 DocHitInfo doc_hit_info1(document_id1);
239
240 // Creates a dummy DocHitInfoIterator
241 std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1};
242 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
243 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
244
245 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
246 ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
247
248 // Creates a ScoringProcessor
249 ICING_ASSERT_OK_AND_ASSIGN(
250 std::unique_ptr<ScoringProcessor> scoring_processor,
251 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
252 fake_clock().GetSystemTimeMilliseconds()));
253
254 EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
255 /*num_to_score=*/-1),
256 IsEmpty());
257
258 doc_hit_info_iterator =
259 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
260 EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
261 /*num_to_score=*/0),
262 IsEmpty());
263 }
264
TEST_P(ScoringProcessorTest,ShouldRespectNumToScore)265 TEST_P(ScoringProcessorTest, ShouldRespectNumToScore) {
266 // Sets up documents
267 ICING_ASSERT_OK_AND_ASSIGN(
268 auto doc_hit_result_pair,
269 CreateAndInsertsDocumentsWithScores(document_store(), {1, 2, 3}));
270 std::vector<DocHitInfo> doc_hit_infos = std::move(doc_hit_result_pair.first);
271
272 // Creates a dummy DocHitInfoIterator with 3 results
273 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
274 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
275
276 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
277 ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
278
279 // Creates a ScoringProcessor
280 ICING_ASSERT_OK_AND_ASSIGN(
281 std::unique_ptr<ScoringProcessor> scoring_processor,
282 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
283 fake_clock().GetSystemTimeMilliseconds()));
284
285 EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
286 /*num_to_score=*/2),
287 SizeIs(2));
288
289 doc_hit_info_iterator =
290 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
291 EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
292 /*num_to_score=*/4),
293 SizeIs(3));
294 }
295
TEST_P(ScoringProcessorTest,ShouldScoreByDocumentScore)296 TEST_P(ScoringProcessorTest, ShouldScoreByDocumentScore) {
297 // Creates input doc_hit_infos and expected output scored_document_hits
298 ICING_ASSERT_OK_AND_ASSIGN(
299 auto doc_hit_result_pair,
300 CreateAndInsertsDocumentsWithScores(document_store(), {1, 3, 2}));
301 std::vector<DocHitInfo> doc_hit_infos = std::move(doc_hit_result_pair.first);
302 std::vector<ScoredDocumentHit> scored_document_hits =
303 std::move(doc_hit_result_pair.second);
304
305 // Creates a dummy DocHitInfoIterator with 3 results
306 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
307 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
308
309 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
310 ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
311
312 // Creates a ScoringProcessor
313 ICING_ASSERT_OK_AND_ASSIGN(
314 std::unique_ptr<ScoringProcessor> scoring_processor,
315 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
316 fake_clock().GetSystemTimeMilliseconds()));
317
318 EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
319 /*num_to_score=*/3),
320 ElementsAre(EqualsScoredDocumentHit(scored_document_hits.at(0)),
321 EqualsScoredDocumentHit(scored_document_hits.at(1)),
322 EqualsScoredDocumentHit(scored_document_hits.at(2))));
323 }
324
TEST_P(ScoringProcessorTest,ShouldScoreByRelevanceScore_DocumentsWithDifferentLength)325 TEST_P(ScoringProcessorTest,
326 ShouldScoreByRelevanceScore_DocumentsWithDifferentLength) {
327 DocumentProto document1 =
328 CreateDocument("icing", "email/1", kDefaultScore,
329 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
330 DocumentProto document2 =
331 CreateDocument("icing", "email/2", kDefaultScore,
332 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
333 DocumentProto document3 =
334 CreateDocument("icing", "email/3", kDefaultScore,
335 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
336
337 ICING_ASSERT_OK_AND_ASSIGN(
338 DocumentId document_id1,
339 document_store()->Put(document1, /*num_tokens=*/10));
340 ICING_ASSERT_OK_AND_ASSIGN(
341 DocumentId document_id2,
342 document_store()->Put(document2, /*num_tokens=*/100));
343 ICING_ASSERT_OK_AND_ASSIGN(
344 DocumentId document_id3,
345 document_store()->Put(document3, /*num_tokens=*/50));
346
347 DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
348 doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
349 DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
350 doc_hit_info2.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
351 DocHitInfoTermFrequencyPair doc_hit_info3 = DocHitInfo(document_id3);
352 doc_hit_info3.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
353
354 SectionId section_id = 0;
355 SectionIdMask section_id_mask = UINT64_C(1) << section_id;
356
357 // Creates input doc_hit_infos and expected output scored_document_hits
358 std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {
359 doc_hit_info1, doc_hit_info2, doc_hit_info3};
360
361 // Creates a dummy DocHitInfoIterator with 3 results for the query "foo"
362 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
363 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
364
365 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
366 ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
367
368 // Creates a ScoringProcessor
369 ICING_ASSERT_OK_AND_ASSIGN(
370 std::unique_ptr<ScoringProcessor> scoring_processor,
371 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
372 fake_clock().GetSystemTimeMilliseconds()));
373
374 std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
375 query_term_iterators;
376 query_term_iterators["foo"] =
377 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
378 // Since the three documents all contain the query term "foo" exactly once,
379 // the document's length determines the final score. Document shorter than the
380 // average corpus length are slightly boosted.
381 ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask,
382 /*score=*/0.187114);
383 ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask,
384 /*score=*/0.084904);
385 ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask,
386 /*score=*/0.121896);
387 EXPECT_THAT(
388 scoring_processor->Score(std::move(doc_hit_info_iterator),
389 /*num_to_score=*/3, &query_term_iterators),
390 ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
391 EqualsScoredDocumentHit(expected_scored_doc_hit2),
392 EqualsScoredDocumentHit(expected_scored_doc_hit3)));
393 }
394
TEST_P(ScoringProcessorTest,ShouldScoreByRelevanceScore_DocumentsWithSameLength)395 TEST_P(ScoringProcessorTest,
396 ShouldScoreByRelevanceScore_DocumentsWithSameLength) {
397 DocumentProto document1 =
398 CreateDocument("icing", "email/1", kDefaultScore,
399 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
400 DocumentProto document2 =
401 CreateDocument("icing", "email/2", kDefaultScore,
402 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
403 DocumentProto document3 =
404 CreateDocument("icing", "email/3", kDefaultScore,
405 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
406
407 ICING_ASSERT_OK_AND_ASSIGN(
408 DocumentId document_id1,
409 document_store()->Put(document1, /*num_tokens=*/10));
410 ICING_ASSERT_OK_AND_ASSIGN(
411 DocumentId document_id2,
412 document_store()->Put(document2, /*num_tokens=*/10));
413 ICING_ASSERT_OK_AND_ASSIGN(
414 DocumentId document_id3,
415 document_store()->Put(document3, /*num_tokens=*/10));
416
417 DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
418 doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
419 DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
420 doc_hit_info2.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
421 DocHitInfoTermFrequencyPair doc_hit_info3 = DocHitInfo(document_id3);
422 doc_hit_info3.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
423
424 SectionId section_id = 0;
425 SectionIdMask section_id_mask = UINT64_C(1) << section_id;
426
427 // Creates input doc_hit_infos and expected output scored_document_hits
428 std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {
429 doc_hit_info1, doc_hit_info2, doc_hit_info3};
430
431 // Creates a dummy DocHitInfoIterator with 3 results for the query "foo"
432 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
433 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
434
435 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
436 ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
437
438 // Creates a ScoringProcessor
439 ICING_ASSERT_OK_AND_ASSIGN(
440 std::unique_ptr<ScoringProcessor> scoring_processor,
441 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
442 fake_clock().GetSystemTimeMilliseconds()));
443
444 std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
445 query_term_iterators;
446 query_term_iterators["foo"] =
447 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
448 // Since the three documents all contain the query term "foo" exactly once
449 // and they have the same length, they will have the same BM25F scoret.
450 ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask,
451 /*score=*/0.118455);
452 ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask,
453 /*score=*/0.118455);
454 ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask,
455 /*score=*/0.118455);
456 EXPECT_THAT(
457 scoring_processor->Score(std::move(doc_hit_info_iterator),
458 /*num_to_score=*/3, &query_term_iterators),
459 ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
460 EqualsScoredDocumentHit(expected_scored_doc_hit2),
461 EqualsScoredDocumentHit(expected_scored_doc_hit3)));
462 }
463
TEST_P(ScoringProcessorTest,ShouldScoreByRelevanceScore_DocumentsWithDifferentQueryFrequency)464 TEST_P(ScoringProcessorTest,
465 ShouldScoreByRelevanceScore_DocumentsWithDifferentQueryFrequency) {
466 DocumentProto document1 =
467 CreateDocument("icing", "email/1", kDefaultScore,
468 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
469 DocumentProto document2 =
470 CreateDocument("icing", "email/2", kDefaultScore,
471 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
472 DocumentProto document3 =
473 CreateDocument("icing", "email/3", kDefaultScore,
474 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
475
476 ICING_ASSERT_OK_AND_ASSIGN(
477 DocumentId document_id1,
478 document_store()->Put(document1, /*num_tokens=*/10));
479 ICING_ASSERT_OK_AND_ASSIGN(
480 DocumentId document_id2,
481 document_store()->Put(document2, /*num_tokens=*/10));
482 ICING_ASSERT_OK_AND_ASSIGN(
483 DocumentId document_id3,
484 document_store()->Put(document3, /*num_tokens=*/10));
485
486 DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
487 // Document 1 contains the query term "foo" 5 times
488 doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/5);
489 DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
490 // Document 1 contains the query term "foo" 1 time
491 doc_hit_info2.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
492 DocHitInfoTermFrequencyPair doc_hit_info3 = DocHitInfo(document_id3);
493 // Document 1 contains the query term "foo" 3 times
494 doc_hit_info3.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
495 doc_hit_info3.UpdateSection(/*section_id*/ 1, /*hit_term_frequency=*/2);
496
497 SectionIdMask section_id_mask1 = 0b00000001;
498 SectionIdMask section_id_mask2 = 0b00000001;
499 SectionIdMask section_id_mask3 = 0b00000011;
500
501 // Creates input doc_hit_infos and expected output scored_document_hits
502 std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {
503 doc_hit_info1, doc_hit_info2, doc_hit_info3};
504
505 // Creates a dummy DocHitInfoIterator with 3 results for the query "foo"
506 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
507 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
508
509 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
510 ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
511
512 // Creates a ScoringProcessor
513 ICING_ASSERT_OK_AND_ASSIGN(
514 std::unique_ptr<ScoringProcessor> scoring_processor,
515 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
516 fake_clock().GetSystemTimeMilliseconds()));
517
518 std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
519 query_term_iterators;
520 query_term_iterators["foo"] =
521 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
522 // Since the three documents all have the same length, the score is decided by
523 // the frequency of the query term "foo".
524 ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask1,
525 /*score=*/0.226674);
526 ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask2,
527 /*score=*/0.118455);
528 ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask3,
529 /*score=*/0.196720);
530 EXPECT_THAT(
531 scoring_processor->Score(std::move(doc_hit_info_iterator),
532 /*num_to_score=*/3, &query_term_iterators),
533 ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
534 EqualsScoredDocumentHit(expected_scored_doc_hit2),
535 EqualsScoredDocumentHit(expected_scored_doc_hit3)));
536 }
537
TEST_P(ScoringProcessorTest,ShouldScoreByRelevanceScore_HitTermWithZeroFrequency)538 TEST_P(ScoringProcessorTest,
539 ShouldScoreByRelevanceScore_HitTermWithZeroFrequency) {
540 DocumentProto document1 =
541 CreateDocument("icing", "email/1", kDefaultScore,
542 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
543
544 ICING_ASSERT_OK_AND_ASSIGN(
545 DocumentId document_id1,
546 document_store()->Put(document1, /*num_tokens=*/10));
547
548 // Document 1 contains the term "foo" 0 times in the "subject" property
549 DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
550 doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/0);
551
552 // Creates input doc_hit_infos and expected output scored_document_hits
553 std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1};
554
555 // Creates a dummy DocHitInfoIterator with 1 result for the query "foo"
556 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
557 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
558
559 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
560 ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
561
562 // Creates a ScoringProcessor
563 ICING_ASSERT_OK_AND_ASSIGN(
564 std::unique_ptr<ScoringProcessor> scoring_processor,
565 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
566 fake_clock().GetSystemTimeMilliseconds()));
567
568 std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
569 query_term_iterators;
570 query_term_iterators["foo"] =
571 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
572
573 SectionIdMask section_id_mask1 = 0b00000001;
574
575 // Since the document hit has zero frequency, expect a score of zero.
576 ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask1,
577 /*score=*/0.000000);
578 EXPECT_THAT(
579 scoring_processor->Score(std::move(doc_hit_info_iterator),
580 /*num_to_score=*/1, &query_term_iterators),
581 ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1)));
582 }
583
TEST_P(ScoringProcessorTest,ShouldScoreByRelevanceScore_SameHitFrequencyDifferentPropertyWeights)584 TEST_P(ScoringProcessorTest,
585 ShouldScoreByRelevanceScore_SameHitFrequencyDifferentPropertyWeights) {
586 DocumentProto document1 =
587 CreateDocument("icing", "email/1", kDefaultScore,
588 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
589 DocumentProto document2 =
590 CreateDocument("icing", "email/2", kDefaultScore,
591 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
592
593 ICING_ASSERT_OK_AND_ASSIGN(
594 DocumentId document_id1,
595 document_store()->Put(document1, /*num_tokens=*/1));
596 ICING_ASSERT_OK_AND_ASSIGN(
597 DocumentId document_id2,
598 document_store()->Put(document2, /*num_tokens=*/1));
599
600 // Document 1 contains the term "foo" 1 time in the "body" property
601 SectionId body_section_id = 0;
602 DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
603 doc_hit_info1.UpdateSection(body_section_id, /*hit_term_frequency=*/1);
604
605 // Document 2 contains the term "foo" 1 time in the "subject" property
606 SectionId subject_section_id = 1;
607 DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
608 doc_hit_info2.UpdateSection(subject_section_id, /*hit_term_frequency=*/1);
609
610 // Creates input doc_hit_infos and expected output scored_document_hits
611 std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1,
612 doc_hit_info2};
613
614 // Creates a dummy DocHitInfoIterator with 2 results for the query "foo"
615 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
616 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
617
618 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
619 ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
620
621 PropertyWeight body_property_weight =
622 CreatePropertyWeight(/*path=*/"body", /*weight=*/0.5);
623 PropertyWeight subject_property_weight =
624 CreatePropertyWeight(/*path=*/"subject", /*weight=*/2.0);
625 *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
626 /*schema_type=*/"email", {body_property_weight, subject_property_weight});
627
628 // Creates a ScoringProcessor
629 ICING_ASSERT_OK_AND_ASSIGN(
630 std::unique_ptr<ScoringProcessor> scoring_processor,
631 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
632 fake_clock().GetSystemTimeMilliseconds()));
633
634 std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
635 query_term_iterators;
636 query_term_iterators["foo"] =
637 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
638
639 SectionIdMask body_section_id_mask = 1U << body_section_id;
640 SectionIdMask subject_section_id_mask = 1U << subject_section_id;
641
642 // We expect document 2 to have a higher score than document 1 as it matches
643 // "foo" in the "subject" property, which is weighed higher than the "body"
644 // property. Final scores are computed with smoothing applied.
645 ScoredDocumentHit expected_scored_doc_hit1(document_id1, body_section_id_mask,
646 /*score=*/0.053624);
647 ScoredDocumentHit expected_scored_doc_hit2(document_id2,
648 subject_section_id_mask,
649 /*score=*/0.153094);
650 EXPECT_THAT(
651 scoring_processor->Score(std::move(doc_hit_info_iterator),
652 /*num_to_score=*/2, &query_term_iterators),
653 ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
654 EqualsScoredDocumentHit(expected_scored_doc_hit2)));
655 }
656
TEST_P(ScoringProcessorTest,ShouldScoreByRelevanceScore_WithImplicitPropertyWeight)657 TEST_P(ScoringProcessorTest,
658 ShouldScoreByRelevanceScore_WithImplicitPropertyWeight) {
659 DocumentProto document1 =
660 CreateDocument("icing", "email/1", kDefaultScore,
661 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
662 DocumentProto document2 =
663 CreateDocument("icing", "email/2", kDefaultScore,
664 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
665
666 ICING_ASSERT_OK_AND_ASSIGN(
667 DocumentId document_id1,
668 document_store()->Put(document1, /*num_tokens=*/1));
669 ICING_ASSERT_OK_AND_ASSIGN(
670 DocumentId document_id2,
671 document_store()->Put(document2, /*num_tokens=*/1));
672
673 // Document 1 contains the term "foo" 1 time in the "body" property
674 SectionId body_section_id = 0;
675 DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
676 doc_hit_info1.UpdateSection(body_section_id, /*hit_term_frequency=*/1);
677
678 // Document 2 contains the term "foo" 1 time in the "subject" property
679 SectionId subject_section_id = 1;
680 DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
681 doc_hit_info2.UpdateSection(subject_section_id, /*hit_term_frequency=*/1);
682
683 // Creates input doc_hit_infos and expected output scored_document_hits
684 std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1,
685 doc_hit_info2};
686
687 // Creates a dummy DocHitInfoIterator with 2 results for the query "foo"
688 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
689 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
690
691 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
692 ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
693
694 PropertyWeight body_property_weight =
695 CreatePropertyWeight(/*path=*/"body", /*weight=*/0.5);
696 *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
697 /*schema_type=*/"email", {body_property_weight});
698
699 // Creates a ScoringProcessor
700 ICING_ASSERT_OK_AND_ASSIGN(
701 std::unique_ptr<ScoringProcessor> scoring_processor,
702 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
703 fake_clock().GetSystemTimeMilliseconds()));
704
705 std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
706 query_term_iterators;
707 query_term_iterators["foo"] =
708 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
709
710 SectionIdMask body_section_id_mask = 1U << body_section_id;
711 SectionIdMask subject_section_id_mask = 1U << subject_section_id;
712
713 // We expect document 2 to have a higher score than document 1 as it matches
714 // "foo" in the "subject" property, which is weighed higher than the "body"
715 // property. This is because the "subject" property is implictly given a
716 // a weight of 1.0, the default weight value. Final scores are computed with
717 // smoothing applied.
718 ScoredDocumentHit expected_scored_doc_hit1(document_id1, body_section_id_mask,
719 /*score=*/0.094601);
720 ScoredDocumentHit expected_scored_doc_hit2(document_id2,
721 subject_section_id_mask,
722 /*score=*/0.153094);
723 EXPECT_THAT(
724 scoring_processor->Score(std::move(doc_hit_info_iterator),
725 /*num_to_score=*/2, &query_term_iterators),
726 ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
727 EqualsScoredDocumentHit(expected_scored_doc_hit2)));
728 }
729
TEST_P(ScoringProcessorTest,ShouldScoreByRelevanceScore_WithDefaultPropertyWeight)730 TEST_P(ScoringProcessorTest,
731 ShouldScoreByRelevanceScore_WithDefaultPropertyWeight) {
732 DocumentProto document1 =
733 CreateDocument("icing", "email/1", kDefaultScore,
734 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
735 DocumentProto document2 =
736 CreateDocument("icing", "email/2", kDefaultScore,
737 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
738
739 ICING_ASSERT_OK_AND_ASSIGN(
740 DocumentId document_id1,
741 document_store()->Put(document1, /*num_tokens=*/1));
742
743 // Document 1 contains the term "foo" 1 time in the "body" property
744 SectionId body_section_id = 0;
745 DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
746 doc_hit_info1.UpdateSection(body_section_id, /*hit_term_frequency=*/1);
747
748 // Creates input doc_hit_infos and expected output scored_document_hits
749 std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1};
750
751 // Creates a dummy DocHitInfoIterator with 1 result for the query "foo"
752 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
753 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
754
755 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
756 ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
757
758 *spec_proto.add_type_property_weights() =
759 CreateTypePropertyWeights(/*schema_type=*/"email", {});
760
761 // Creates a ScoringProcessor with no explicit weights set.
762 ICING_ASSERT_OK_AND_ASSIGN(
763 std::unique_ptr<ScoringProcessor> scoring_processor,
764 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
765 fake_clock().GetSystemTimeMilliseconds()));
766
767 ScoringSpecProto spec_proto_with_weights =
768 CreateScoringSpecForRankingStrategy(
769 ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
770
771 PropertyWeight body_property_weight = CreatePropertyWeight(/*path=*/"body",
772 /*weight=*/1.0);
773 *spec_proto_with_weights.add_type_property_weights() =
774 CreateTypePropertyWeights(/*schema_type=*/"email",
775 {body_property_weight});
776
777 // Creates a ScoringProcessor with default weight set for "body" property.
778 ICING_ASSERT_OK_AND_ASSIGN(
779 std::unique_ptr<ScoringProcessor> scoring_processor_with_weights,
780 ScoringProcessor::Create(spec_proto_with_weights, document_store(),
781 schema_store(),
782 fake_clock().GetSystemTimeMilliseconds()));
783
784 std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
785 query_term_iterators;
786 query_term_iterators["foo"] =
787 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
788
789 // Create a doc hit iterator
790 std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
791 query_term_iterators_scoring_with_weights;
792 query_term_iterators_scoring_with_weights["foo"] =
793 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
794
795 SectionIdMask body_section_id_mask = 1U << body_section_id;
796
797 // We expect document 1 to have the same score whether a weight is explicitly
798 // set to 1.0 or implictly scored with the default weight. Final scores are
799 // computed with smoothing applied.
800 ScoredDocumentHit expected_scored_doc_hit(document_id1, body_section_id_mask,
801 /*score=*/0.208191);
802 EXPECT_THAT(
803 scoring_processor->Score(std::move(doc_hit_info_iterator),
804 /*num_to_score=*/1, &query_term_iterators),
805 ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit)));
806
807 // Restore ownership of doc hit iterator and query term iterator to test.
808 doc_hit_info_iterator =
809 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
810 query_term_iterators["foo"] =
811 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
812
813 EXPECT_THAT(scoring_processor_with_weights->Score(
814 std::move(doc_hit_info_iterator),
815 /*num_to_score=*/1, &query_term_iterators),
816 ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit)));
817 }
818
TEST_P(ScoringProcessorTest,ShouldScoreByRelevanceScore_WithZeroPropertyWeight)819 TEST_P(ScoringProcessorTest,
820 ShouldScoreByRelevanceScore_WithZeroPropertyWeight) {
821 DocumentProto document1 =
822 CreateDocument("icing", "email/1", kDefaultScore,
823 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
824 DocumentProto document2 =
825 CreateDocument("icing", "email/2", kDefaultScore,
826 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
827
828 ICING_ASSERT_OK_AND_ASSIGN(
829 DocumentId document_id1,
830 document_store()->Put(document1, /*num_tokens=*/1));
831 ICING_ASSERT_OK_AND_ASSIGN(
832 DocumentId document_id2,
833 document_store()->Put(document2, /*num_tokens=*/1));
834
835 // Document 1 contains the term "foo" 1 time in the "body" property
836 SectionId body_section_id = 0;
837 DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
838 doc_hit_info1.UpdateSection(body_section_id, /*hit_term_frequency=*/1);
839
840 // Document 2 contains the term "foo" 1 time in the "subject" property
841 SectionId subject_section_id = 1;
842 DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
843 doc_hit_info2.UpdateSection(subject_section_id, /*hit_term_frequency=*/1);
844
845 // Creates input doc_hit_infos and expected output scored_document_hits
846 std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1,
847 doc_hit_info2};
848
849 // Creates a dummy DocHitInfoIterator with 2 results for the query "foo"
850 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
851 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
852
853 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
854 ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
855
856 // Sets property weight for "body" to 0.0.
857 PropertyWeight body_property_weight =
858 CreatePropertyWeight(/*path=*/"body", /*weight=*/0.0);
859 // Sets property weight for "subject" to 1.0.
860 PropertyWeight subject_property_weight =
861 CreatePropertyWeight(/*path=*/"subject", /*weight=*/1.0);
862 *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
863 /*schema_type=*/"email", {body_property_weight, subject_property_weight});
864
865 // Creates a ScoringProcessor
866 ICING_ASSERT_OK_AND_ASSIGN(
867 std::unique_ptr<ScoringProcessor> scoring_processor,
868 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
869 fake_clock().GetSystemTimeMilliseconds()));
870
871 std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
872 query_term_iterators;
873 query_term_iterators["foo"] =
874 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
875
876 std::vector<ScoredDocumentHit> scored_document_hits =
877 scoring_processor->Score(std::move(doc_hit_info_iterator),
878 /*num_to_score=*/2, &query_term_iterators);
879
880 // We expect document1 to have a score of 0.0 as the query term "foo" matches
881 // in the "body" property which has a weight of 0.0. This is a result of the
882 // weighted term frequency being scaled down to 0.0 for the hit. We expect
883 // document2 to have a positive score as the query term "foo" matches in the
884 // "subject" property which has a weight of 1.0.
885 EXPECT_THAT(scored_document_hits, SizeIs(2));
886 EXPECT_THAT(scored_document_hits.at(0).document_id(), Eq(document_id1));
887 EXPECT_THAT(scored_document_hits.at(0).score(), Eq(0.0));
888 EXPECT_THAT(scored_document_hits.at(1).document_id(), Eq(document_id2));
889 EXPECT_THAT(scored_document_hits.at(1).score(), Gt(0.0));
890 }
891
TEST_P(ScoringProcessorTest,ShouldScoreByCreationTimestamp)892 TEST_P(ScoringProcessorTest, ShouldScoreByCreationTimestamp) {
893 DocumentProto document1 =
894 CreateDocument("icing", "email/1", kDefaultScore,
895 /*creation_timestamp_ms=*/1571100001111);
896 DocumentProto document2 =
897 CreateDocument("icing", "email/2", kDefaultScore,
898 /*creation_timestamp_ms=*/1571100002222);
899 DocumentProto document3 =
900 CreateDocument("icing", "email/3", kDefaultScore,
901 /*creation_timestamp_ms=*/1571100003333);
902 // Intentionally inserts documents in a different order
903 ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
904 document_store()->Put(document1));
905 ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
906 document_store()->Put(document3));
907 ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
908 document_store()->Put(document2));
909 DocHitInfo doc_hit_info1(document_id1);
910 DocHitInfo doc_hit_info2(document_id2);
911 DocHitInfo doc_hit_info3(document_id3);
912 ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
913 document1.creation_timestamp_ms());
914 ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
915 document2.creation_timestamp_ms());
916 ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
917 document3.creation_timestamp_ms());
918
919 // Creates a dummy DocHitInfoIterator with 3 results
920 std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info2, doc_hit_info3,
921 doc_hit_info1};
922 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
923 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
924
925 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
926 ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP, GetParam());
927
928 // Creates a ScoringProcessor which ranks in descending order
929 ICING_ASSERT_OK_AND_ASSIGN(
930 std::unique_ptr<ScoringProcessor> scoring_processor,
931 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
932 fake_clock().GetSystemTimeMilliseconds()));
933
934 EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
935 /*num_to_score=*/3),
936 ElementsAre(EqualsScoredDocumentHit(scored_document_hit2),
937 EqualsScoredDocumentHit(scored_document_hit3),
938 EqualsScoredDocumentHit(scored_document_hit1)));
939 }
940
TEST_P(ScoringProcessorTest,ShouldScoreByUsageCount)941 TEST_P(ScoringProcessorTest, ShouldScoreByUsageCount) {
942 DocumentProto document1 =
943 CreateDocument("icing", "email/1", kDefaultScore,
944 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
945 DocumentProto document2 =
946 CreateDocument("icing", "email/2", kDefaultScore,
947 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
948 DocumentProto document3 =
949 CreateDocument("icing", "email/3", kDefaultScore,
950 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
951
952 ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
953 document_store()->Put(document1));
954 ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
955 document_store()->Put(document2));
956 ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
957 document_store()->Put(document3));
958
959 // Report usage for doc1 once and doc2 twice.
960 UsageReport usage_report_doc1 = CreateUsageReport(
961 /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
962 UsageReport::USAGE_TYPE1);
963 UsageReport usage_report_doc2 = CreateUsageReport(
964 /*name_space=*/"icing", /*uri=*/"email/2", /*timestamp_ms=*/0,
965 UsageReport::USAGE_TYPE1);
966 ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc1));
967 ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc2));
968 ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc2));
969
970 DocHitInfo doc_hit_info1(document_id1);
971 DocHitInfo doc_hit_info2(document_id2);
972 DocHitInfo doc_hit_info3(document_id3);
973 ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
974 /*score=*/1);
975 ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
976 /*score=*/2);
977 ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
978 /*score=*/0);
979
980 // Creates a dummy DocHitInfoIterator with 3 results
981 std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2,
982 doc_hit_info3};
983 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
984 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
985
986 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
987 ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT, GetParam());
988
989 // Creates a ScoringProcessor which ranks in descending order
990 ICING_ASSERT_OK_AND_ASSIGN(
991 std::unique_ptr<ScoringProcessor> scoring_processor,
992 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
993 fake_clock().GetSystemTimeMilliseconds()));
994
995 EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
996 /*num_to_score=*/3),
997 ElementsAre(EqualsScoredDocumentHit(scored_document_hit1),
998 EqualsScoredDocumentHit(scored_document_hit2),
999 EqualsScoredDocumentHit(scored_document_hit3)));
1000 }
1001
TEST_P(ScoringProcessorTest,ShouldScoreByUsageTimestamp)1002 TEST_P(ScoringProcessorTest, ShouldScoreByUsageTimestamp) {
1003 DocumentProto document1 =
1004 CreateDocument("icing", "email/1", kDefaultScore,
1005 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
1006 DocumentProto document2 =
1007 CreateDocument("icing", "email/2", kDefaultScore,
1008 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
1009 DocumentProto document3 =
1010 CreateDocument("icing", "email/3", kDefaultScore,
1011 /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
1012
1013 ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
1014 document_store()->Put(document1));
1015 ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
1016 document_store()->Put(document2));
1017 ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
1018 document_store()->Put(document3));
1019
1020 // Report usage for doc1 and doc2.
1021 UsageReport usage_report_doc1 = CreateUsageReport(
1022 /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
1023 UsageReport::USAGE_TYPE1);
1024 UsageReport usage_report_doc2 = CreateUsageReport(
1025 /*name_space=*/"icing", /*uri=*/"email/2", /*timestamp_ms=*/5000,
1026 UsageReport::USAGE_TYPE1);
1027 ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc1));
1028 ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc2));
1029
1030 DocHitInfo doc_hit_info1(document_id1);
1031 DocHitInfo doc_hit_info2(document_id2);
1032 DocHitInfo doc_hit_info3(document_id3);
1033 ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
1034 /*score=*/1000);
1035 ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
1036 /*score=*/5000);
1037 ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
1038 /*score=*/0);
1039
1040 // Creates a dummy DocHitInfoIterator with 3 results
1041 std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2,
1042 doc_hit_info3};
1043 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
1044 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
1045
1046 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
1047 ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP,
1048 GetParam());
1049
1050 // Creates a ScoringProcessor which ranks in descending order
1051 ICING_ASSERT_OK_AND_ASSIGN(
1052 std::unique_ptr<ScoringProcessor> scoring_processor,
1053 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
1054 fake_clock().GetSystemTimeMilliseconds()));
1055
1056 EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
1057 /*num_to_score=*/3),
1058 ElementsAre(EqualsScoredDocumentHit(scored_document_hit1),
1059 EqualsScoredDocumentHit(scored_document_hit2),
1060 EqualsScoredDocumentHit(scored_document_hit3)));
1061 }
1062
TEST_P(ScoringProcessorTest,ShouldHandleNoScores)1063 TEST_P(ScoringProcessorTest, ShouldHandleNoScores) {
1064 // Creates input doc_hit_infos and corresponding scored_document_hits
1065 ICING_ASSERT_OK_AND_ASSIGN(
1066 auto doc_hit_result_pair,
1067 CreateAndInsertsDocumentsWithScores(document_store(), {1, 2, 3}));
1068 std::vector<DocHitInfo> doc_hit_infos = std::move(doc_hit_result_pair.first);
1069 std::vector<ScoredDocumentHit> scored_document_hits =
1070 std::move(doc_hit_result_pair.second);
1071
1072 // Creates a dummy DocHitInfoIterator with 4 results one of which doesn't have
1073 // a score.
1074 doc_hit_infos.emplace(doc_hit_infos.begin(), /*document_id_in=*/4,
1075 kSectionIdMaskNone);
1076 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
1077 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
1078
1079 // The document hit without a score will be be assigned the default score 0 in
1080 // a descending order.
1081 ScoredDocumentHit scored_document_hit_default =
1082 ScoredDocumentHit(4, kSectionIdMaskNone, /*score=*/0.0);
1083
1084 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
1085 ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
1086
1087 // Creates a ScoringProcessor which ranks in descending order
1088 ICING_ASSERT_OK_AND_ASSIGN(
1089 std::unique_ptr<ScoringProcessor> scoring_processor,
1090 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
1091 fake_clock().GetSystemTimeMilliseconds()));
1092 EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
1093 /*num_to_score=*/4),
1094 ElementsAre(EqualsScoredDocumentHit(scored_document_hit_default),
1095 EqualsScoredDocumentHit(scored_document_hits.at(0)),
1096 EqualsScoredDocumentHit(scored_document_hits.at(1)),
1097 EqualsScoredDocumentHit(scored_document_hits.at(2))));
1098 }
1099
TEST_P(ScoringProcessorTest,ShouldWrapResultsWhenNoScoring)1100 TEST_P(ScoringProcessorTest, ShouldWrapResultsWhenNoScoring) {
1101 DocumentProto document1 = CreateDocument("icing", "email/1", /*score=*/1,
1102 kDefaultCreationTimestampMs);
1103 DocumentProto document2 = CreateDocument("icing", "email/2", /*score=*/2,
1104 kDefaultCreationTimestampMs);
1105 DocumentProto document3 = CreateDocument("icing", "email/3", /*score=*/3,
1106 kDefaultCreationTimestampMs);
1107
1108 // Intentionally inserts documents in a different order
1109 ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
1110 document_store()->Put(document1));
1111 ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
1112 document_store()->Put(document3));
1113 ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
1114 document_store()->Put(document2));
1115 DocHitInfo doc_hit_info1(document_id1);
1116 DocHitInfo doc_hit_info2(document_id2);
1117 DocHitInfo doc_hit_info3(document_id3);
1118
1119 // The expected results should all have the default score 0.
1120 ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
1121 kDefaultScore);
1122 ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
1123 kDefaultScore);
1124 ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
1125 kDefaultScore);
1126
1127 // Creates a dummy DocHitInfoIterator with 3 results
1128 std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info2, doc_hit_info3,
1129 doc_hit_info1};
1130 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
1131 std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
1132
1133 // A ScoringSpecProto with no scoring strategy
1134 ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
1135 ScoringSpecProto::RankingStrategy::NONE, GetParam());
1136
1137 // Creates a ScoringProcessor which ranks in descending order
1138 ICING_ASSERT_OK_AND_ASSIGN(
1139 std::unique_ptr<ScoringProcessor> scoring_processor,
1140 ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
1141 fake_clock().GetSystemTimeMilliseconds()));
1142
1143 EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
1144 /*num_to_score=*/3),
1145 ElementsAre(EqualsScoredDocumentHit(scored_document_hit2),
1146 EqualsScoredDocumentHit(scored_document_hit3),
1147 EqualsScoredDocumentHit(scored_document_hit1)));
1148 }
1149
1150 INSTANTIATE_TEST_SUITE_P(ScoringProcessorTest, ScoringProcessorTest,
1151 testing::Values(ScorerTestingMode::kNormal,
1152 ScorerTestingMode::kAdvanced));
1153
1154 } // namespace
1155
1156 } // namespace lib
1157 } // namespace icing
1158