• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/scoring/scoring-processor.h"
16 
17 #include <cstdint>
18 
19 #include "icing/text_classifier/lib3/utils/base/statusor.h"
20 #include "gmock/gmock.h"
21 #include "gtest/gtest.h"
22 #include "icing/document-builder.h"
23 #include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
24 #include "icing/proto/document.pb.h"
25 #include "icing/proto/schema.pb.h"
26 #include "icing/proto/scoring.pb.h"
27 #include "icing/proto/term.pb.h"
28 #include "icing/proto/usage.pb.h"
29 #include "icing/schema-builder.h"
30 #include "icing/scoring/scorer-test-utils.h"
31 #include "icing/testing/common-matchers.h"
32 #include "icing/testing/fake-clock.h"
33 #include "icing/testing/tmp-directory.h"
34 
35 namespace icing {
36 namespace lib {
37 
38 namespace {
39 using ::testing::ElementsAre;
40 using ::testing::Eq;
41 using ::testing::Gt;
42 using ::testing::IsEmpty;
43 using ::testing::SizeIs;
44 
45 class ScoringProcessorTest
46     : public ::testing::TestWithParam<ScorerTestingMode> {
47  protected:
ScoringProcessorTest()48   ScoringProcessorTest()
49       : test_dir_(GetTestTempDir() + "/icing"),
50         doc_store_dir_(test_dir_ + "/doc_store"),
51         schema_store_dir_(test_dir_ + "/schema_store") {}
52 
SetUp()53   void SetUp() override {
54     // Creates file directories
55     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
56     filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str());
57     filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
58 
59     ICING_ASSERT_OK_AND_ASSIGN(
60         schema_store_,
61         SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
62 
63     ICING_ASSERT_OK_AND_ASSIGN(
64         DocumentStore::CreateResult create_result,
65         DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
66                               schema_store_.get(),
67                               /*force_recovery_and_revalidate_documents=*/false,
68                               /*namespace_id_fingerprint=*/false,
69                               PortableFileBackedProtoLog<
70                                   DocumentWrapper>::kDeflateCompressionLevel,
71                               /*initialize_stats=*/nullptr));
72     document_store_ = std::move(create_result.document_store);
73 
74     // Creates a simple email schema
75     SchemaProto test_email_schema =
76         SchemaBuilder()
77             .AddType(SchemaTypeConfigBuilder()
78                          .SetType("email")
79                          .AddProperty(
80                              PropertyConfigBuilder()
81                                  .SetName("subject")
82                                  .SetDataTypeString(
83                                      TermMatchType::PREFIX,
84                                      StringIndexingConfig::TokenizerType::PLAIN)
85                                  .SetDataType(TYPE_STRING)
86                                  .SetCardinality(CARDINALITY_OPTIONAL))
87                          .AddProperty(
88                              PropertyConfigBuilder()
89                                  .SetName("body")
90                                  .SetDataTypeString(
91                                      TermMatchType::PREFIX,
92                                      StringIndexingConfig::TokenizerType::PLAIN)
93                                  .SetDataType(TYPE_STRING)
94                                  .SetCardinality(CARDINALITY_OPTIONAL)))
95             .Build();
96     ICING_ASSERT_OK(schema_store_->SetSchema(
97         test_email_schema, /*ignore_errors_and_delete_documents=*/false,
98         /*allow_circular_schema_definitions=*/false));
99   }
100 
TearDown()101   void TearDown() override {
102     document_store_.reset();
103     schema_store_.reset();
104     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
105   }
106 
document_store()107   DocumentStore* document_store() { return document_store_.get(); }
108 
schema_store()109   SchemaStore* schema_store() { return schema_store_.get(); }
110 
fake_clock() const111   const FakeClock& fake_clock() const { return fake_clock_; }
112 
113  private:
114   const std::string test_dir_;
115   const std::string doc_store_dir_;
116   const std::string schema_store_dir_;
117   Filesystem filesystem_;
118   FakeClock fake_clock_;
119   std::unique_ptr<DocumentStore> document_store_;
120   std::unique_ptr<SchemaStore> schema_store_;
121 };
122 
123 constexpr int kDefaultScore = 0;
124 constexpr int64_t kDefaultCreationTimestampMs = 1571100001111;
125 
CreateDocument(const std::string & name_space,const std::string & uri,int score,int64_t creation_timestamp_ms)126 DocumentProto CreateDocument(const std::string& name_space,
127                              const std::string& uri, int score,
128                              int64_t creation_timestamp_ms) {
129   return DocumentBuilder()
130       .SetKey(name_space, uri)
131       .SetSchema("email")
132       .SetScore(score)
133       .SetCreationTimestampMs(creation_timestamp_ms)
134       .Build();
135 }
136 
137 libtextclassifier3::StatusOr<
138     std::pair<std::vector<DocHitInfo>, std::vector<ScoredDocumentHit>>>
CreateAndInsertsDocumentsWithScores(DocumentStore * document_store,const std::vector<int> & scores)139 CreateAndInsertsDocumentsWithScores(DocumentStore* document_store,
140                                     const std::vector<int>& scores) {
141   std::vector<DocHitInfo> doc_hit_infos;
142   std::vector<ScoredDocumentHit> scored_document_hits;
143   for (int i = 0; i < scores.size(); i++) {
144     ICING_ASSIGN_OR_RETURN(DocumentId document_id,
145                            document_store->Put(CreateDocument(
146                                "icing", "email/" + std::to_string(i),
147                                scores.at(i), kDefaultCreationTimestampMs)));
148     doc_hit_infos.emplace_back(document_id);
149     scored_document_hits.emplace_back(document_id, kSectionIdMaskNone,
150                                       scores.at(i));
151   }
152   return std::pair(doc_hit_infos, scored_document_hits);
153 }
154 
CreateUsageReport(std::string name_space,std::string uri,int64_t timestamp_ms,UsageReport::UsageType usage_type)155 UsageReport CreateUsageReport(std::string name_space, std::string uri,
156                               int64_t timestamp_ms,
157                               UsageReport::UsageType usage_type) {
158   UsageReport usage_report;
159   usage_report.set_document_namespace(name_space);
160   usage_report.set_document_uri(uri);
161   usage_report.set_usage_timestamp_ms(timestamp_ms);
162   usage_report.set_usage_type(usage_type);
163   return usage_report;
164 }
165 
CreateTypePropertyWeights(std::string schema_type,std::vector<PropertyWeight> property_weights)166 TypePropertyWeights CreateTypePropertyWeights(
167     std::string schema_type, std::vector<PropertyWeight> property_weights) {
168   TypePropertyWeights type_property_weights;
169   type_property_weights.set_schema_type(std::move(schema_type));
170   type_property_weights.mutable_property_weights()->Reserve(
171       property_weights.size());
172 
173   for (PropertyWeight& property_weight : property_weights) {
174     *type_property_weights.add_property_weights() = std::move(property_weight);
175   }
176 
177   return type_property_weights;
178 }
179 
CreatePropertyWeight(std::string path,double weight)180 PropertyWeight CreatePropertyWeight(std::string path, double weight) {
181   PropertyWeight property_weight;
182   property_weight.set_path(std::move(path));
183   property_weight.set_weight(weight);
184   return property_weight;
185 }
186 
TEST_F(ScoringProcessorTest,CreationWithNullDocumentStoreShouldFail)187 TEST_F(ScoringProcessorTest, CreationWithNullDocumentStoreShouldFail) {
188   ScoringSpecProto spec_proto;
189   EXPECT_THAT(ScoringProcessor::Create(
190                   spec_proto, /*document_store=*/nullptr, schema_store(),
191                   fake_clock().GetSystemTimeMilliseconds()),
192               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
193 }
194 
TEST_F(ScoringProcessorTest,CreationWithNullSchemaStoreShouldFail)195 TEST_F(ScoringProcessorTest, CreationWithNullSchemaStoreShouldFail) {
196   ScoringSpecProto spec_proto;
197   EXPECT_THAT(
198       ScoringProcessor::Create(spec_proto, document_store(),
199                                /*schema_store=*/nullptr,
200                                fake_clock().GetSystemTimeMilliseconds()),
201       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
202 }
203 
TEST_P(ScoringProcessorTest,ShouldCreateInstance)204 TEST_P(ScoringProcessorTest, ShouldCreateInstance) {
205   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
206       ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
207   ICING_EXPECT_OK(
208       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
209                                fake_clock().GetSystemTimeMilliseconds()));
210 }
211 
TEST_P(ScoringProcessorTest,ShouldHandleEmptyDocHitIterator)212 TEST_P(ScoringProcessorTest, ShouldHandleEmptyDocHitIterator) {
213   // Creates an empty DocHitInfoIterator
214   std::vector<DocHitInfo> doc_hit_infos = {};
215   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
216       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
217 
218   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
219       ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
220 
221   // Creates a ScoringProcessor
222   ICING_ASSERT_OK_AND_ASSIGN(
223       std::unique_ptr<ScoringProcessor> scoring_processor,
224       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
225                                fake_clock().GetSystemTimeMilliseconds()));
226 
227   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
228                                        /*num_to_score=*/5),
229               IsEmpty());
230 }
231 
TEST_P(ScoringProcessorTest,ShouldHandleNonPositiveNumToScore)232 TEST_P(ScoringProcessorTest, ShouldHandleNonPositiveNumToScore) {
233   // Sets up documents
234   ICING_ASSERT_OK_AND_ASSIGN(
235       DocumentId document_id1,
236       document_store()->Put(CreateDocument("icing", "email/1", /*score=*/1,
237                                            kDefaultCreationTimestampMs)));
238   DocHitInfo doc_hit_info1(document_id1);
239 
240   // Creates a dummy DocHitInfoIterator
241   std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1};
242   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
243       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
244 
245   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
246       ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
247 
248   // Creates a ScoringProcessor
249   ICING_ASSERT_OK_AND_ASSIGN(
250       std::unique_ptr<ScoringProcessor> scoring_processor,
251       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
252                                fake_clock().GetSystemTimeMilliseconds()));
253 
254   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
255                                        /*num_to_score=*/-1),
256               IsEmpty());
257 
258   doc_hit_info_iterator =
259       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
260   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
261                                        /*num_to_score=*/0),
262               IsEmpty());
263 }
264 
TEST_P(ScoringProcessorTest,ShouldRespectNumToScore)265 TEST_P(ScoringProcessorTest, ShouldRespectNumToScore) {
266   // Sets up documents
267   ICING_ASSERT_OK_AND_ASSIGN(
268       auto doc_hit_result_pair,
269       CreateAndInsertsDocumentsWithScores(document_store(), {1, 2, 3}));
270   std::vector<DocHitInfo> doc_hit_infos = std::move(doc_hit_result_pair.first);
271 
272   // Creates a dummy DocHitInfoIterator with 3 results
273   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
274       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
275 
276   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
277       ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
278 
279   // Creates a ScoringProcessor
280   ICING_ASSERT_OK_AND_ASSIGN(
281       std::unique_ptr<ScoringProcessor> scoring_processor,
282       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
283                                fake_clock().GetSystemTimeMilliseconds()));
284 
285   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
286                                        /*num_to_score=*/2),
287               SizeIs(2));
288 
289   doc_hit_info_iterator =
290       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
291   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
292                                        /*num_to_score=*/4),
293               SizeIs(3));
294 }
295 
TEST_P(ScoringProcessorTest,ShouldScoreByDocumentScore)296 TEST_P(ScoringProcessorTest, ShouldScoreByDocumentScore) {
297   // Creates input doc_hit_infos and expected output scored_document_hits
298   ICING_ASSERT_OK_AND_ASSIGN(
299       auto doc_hit_result_pair,
300       CreateAndInsertsDocumentsWithScores(document_store(), {1, 3, 2}));
301   std::vector<DocHitInfo> doc_hit_infos = std::move(doc_hit_result_pair.first);
302   std::vector<ScoredDocumentHit> scored_document_hits =
303       std::move(doc_hit_result_pair.second);
304 
305   // Creates a dummy DocHitInfoIterator with 3 results
306   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
307       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
308 
309   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
310       ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
311 
312   // Creates a ScoringProcessor
313   ICING_ASSERT_OK_AND_ASSIGN(
314       std::unique_ptr<ScoringProcessor> scoring_processor,
315       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
316                                fake_clock().GetSystemTimeMilliseconds()));
317 
318   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
319                                        /*num_to_score=*/3),
320               ElementsAre(EqualsScoredDocumentHit(scored_document_hits.at(0)),
321                           EqualsScoredDocumentHit(scored_document_hits.at(1)),
322                           EqualsScoredDocumentHit(scored_document_hits.at(2))));
323 }
324 
TEST_P(ScoringProcessorTest,ShouldScoreByRelevanceScore_DocumentsWithDifferentLength)325 TEST_P(ScoringProcessorTest,
326        ShouldScoreByRelevanceScore_DocumentsWithDifferentLength) {
327   DocumentProto document1 =
328       CreateDocument("icing", "email/1", kDefaultScore,
329                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
330   DocumentProto document2 =
331       CreateDocument("icing", "email/2", kDefaultScore,
332                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
333   DocumentProto document3 =
334       CreateDocument("icing", "email/3", kDefaultScore,
335                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
336 
337   ICING_ASSERT_OK_AND_ASSIGN(
338       DocumentId document_id1,
339       document_store()->Put(document1, /*num_tokens=*/10));
340   ICING_ASSERT_OK_AND_ASSIGN(
341       DocumentId document_id2,
342       document_store()->Put(document2, /*num_tokens=*/100));
343   ICING_ASSERT_OK_AND_ASSIGN(
344       DocumentId document_id3,
345       document_store()->Put(document3, /*num_tokens=*/50));
346 
347   DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
348   doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
349   DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
350   doc_hit_info2.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
351   DocHitInfoTermFrequencyPair doc_hit_info3 = DocHitInfo(document_id3);
352   doc_hit_info3.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
353 
354   SectionId section_id = 0;
355   SectionIdMask section_id_mask = UINT64_C(1) << section_id;
356 
357   // Creates input doc_hit_infos and expected output scored_document_hits
358   std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {
359       doc_hit_info1, doc_hit_info2, doc_hit_info3};
360 
361   // Creates a dummy DocHitInfoIterator with 3 results for the query "foo"
362   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
363       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
364 
365   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
366       ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
367 
368   // Creates a ScoringProcessor
369   ICING_ASSERT_OK_AND_ASSIGN(
370       std::unique_ptr<ScoringProcessor> scoring_processor,
371       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
372                                fake_clock().GetSystemTimeMilliseconds()));
373 
374   std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
375       query_term_iterators;
376   query_term_iterators["foo"] =
377       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
378   // Since the three documents all contain the query term "foo" exactly once,
379   // the document's length determines the final score. Document shorter than the
380   // average corpus length are slightly boosted.
381   ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask,
382                                              /*score=*/0.187114);
383   ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask,
384                                              /*score=*/0.084904);
385   ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask,
386                                              /*score=*/0.121896);
387   EXPECT_THAT(
388       scoring_processor->Score(std::move(doc_hit_info_iterator),
389                                /*num_to_score=*/3, &query_term_iterators),
390       ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
391                   EqualsScoredDocumentHit(expected_scored_doc_hit2),
392                   EqualsScoredDocumentHit(expected_scored_doc_hit3)));
393 }
394 
TEST_P(ScoringProcessorTest,ShouldScoreByRelevanceScore_DocumentsWithSameLength)395 TEST_P(ScoringProcessorTest,
396        ShouldScoreByRelevanceScore_DocumentsWithSameLength) {
397   DocumentProto document1 =
398       CreateDocument("icing", "email/1", kDefaultScore,
399                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
400   DocumentProto document2 =
401       CreateDocument("icing", "email/2", kDefaultScore,
402                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
403   DocumentProto document3 =
404       CreateDocument("icing", "email/3", kDefaultScore,
405                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
406 
407   ICING_ASSERT_OK_AND_ASSIGN(
408       DocumentId document_id1,
409       document_store()->Put(document1, /*num_tokens=*/10));
410   ICING_ASSERT_OK_AND_ASSIGN(
411       DocumentId document_id2,
412       document_store()->Put(document2, /*num_tokens=*/10));
413   ICING_ASSERT_OK_AND_ASSIGN(
414       DocumentId document_id3,
415       document_store()->Put(document3, /*num_tokens=*/10));
416 
417   DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
418   doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
419   DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
420   doc_hit_info2.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
421   DocHitInfoTermFrequencyPair doc_hit_info3 = DocHitInfo(document_id3);
422   doc_hit_info3.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
423 
424   SectionId section_id = 0;
425   SectionIdMask section_id_mask = UINT64_C(1) << section_id;
426 
427   // Creates input doc_hit_infos and expected output scored_document_hits
428   std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {
429       doc_hit_info1, doc_hit_info2, doc_hit_info3};
430 
431   // Creates a dummy DocHitInfoIterator with 3 results for the query "foo"
432   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
433       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
434 
435   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
436       ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
437 
438   // Creates a ScoringProcessor
439   ICING_ASSERT_OK_AND_ASSIGN(
440       std::unique_ptr<ScoringProcessor> scoring_processor,
441       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
442                                fake_clock().GetSystemTimeMilliseconds()));
443 
444   std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
445       query_term_iterators;
446   query_term_iterators["foo"] =
447       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
448   // Since the three documents all contain the query term "foo" exactly once
449   // and they have the same length, they will have the same BM25F scoret.
450   ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask,
451                                              /*score=*/0.118455);
452   ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask,
453                                              /*score=*/0.118455);
454   ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask,
455                                              /*score=*/0.118455);
456   EXPECT_THAT(
457       scoring_processor->Score(std::move(doc_hit_info_iterator),
458                                /*num_to_score=*/3, &query_term_iterators),
459       ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
460                   EqualsScoredDocumentHit(expected_scored_doc_hit2),
461                   EqualsScoredDocumentHit(expected_scored_doc_hit3)));
462 }
463 
TEST_P(ScoringProcessorTest,ShouldScoreByRelevanceScore_DocumentsWithDifferentQueryFrequency)464 TEST_P(ScoringProcessorTest,
465        ShouldScoreByRelevanceScore_DocumentsWithDifferentQueryFrequency) {
466   DocumentProto document1 =
467       CreateDocument("icing", "email/1", kDefaultScore,
468                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
469   DocumentProto document2 =
470       CreateDocument("icing", "email/2", kDefaultScore,
471                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
472   DocumentProto document3 =
473       CreateDocument("icing", "email/3", kDefaultScore,
474                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
475 
476   ICING_ASSERT_OK_AND_ASSIGN(
477       DocumentId document_id1,
478       document_store()->Put(document1, /*num_tokens=*/10));
479   ICING_ASSERT_OK_AND_ASSIGN(
480       DocumentId document_id2,
481       document_store()->Put(document2, /*num_tokens=*/10));
482   ICING_ASSERT_OK_AND_ASSIGN(
483       DocumentId document_id3,
484       document_store()->Put(document3, /*num_tokens=*/10));
485 
486   DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
487   // Document 1 contains the query term "foo" 5 times
488   doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/5);
489   DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
490   // Document 1 contains the query term "foo" 1 time
491   doc_hit_info2.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
492   DocHitInfoTermFrequencyPair doc_hit_info3 = DocHitInfo(document_id3);
493   // Document 1 contains the query term "foo" 3 times
494   doc_hit_info3.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
495   doc_hit_info3.UpdateSection(/*section_id*/ 1, /*hit_term_frequency=*/2);
496 
497   SectionIdMask section_id_mask1 = 0b00000001;
498   SectionIdMask section_id_mask2 = 0b00000001;
499   SectionIdMask section_id_mask3 = 0b00000011;
500 
501   // Creates input doc_hit_infos and expected output scored_document_hits
502   std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {
503       doc_hit_info1, doc_hit_info2, doc_hit_info3};
504 
505   // Creates a dummy DocHitInfoIterator with 3 results for the query "foo"
506   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
507       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
508 
509   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
510       ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
511 
512   // Creates a ScoringProcessor
513   ICING_ASSERT_OK_AND_ASSIGN(
514       std::unique_ptr<ScoringProcessor> scoring_processor,
515       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
516                                fake_clock().GetSystemTimeMilliseconds()));
517 
518   std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
519       query_term_iterators;
520   query_term_iterators["foo"] =
521       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
522   // Since the three documents all have the same length, the score is decided by
523   // the frequency of the query term "foo".
524   ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask1,
525                                              /*score=*/0.226674);
526   ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask2,
527                                              /*score=*/0.118455);
528   ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask3,
529                                              /*score=*/0.196720);
530   EXPECT_THAT(
531       scoring_processor->Score(std::move(doc_hit_info_iterator),
532                                /*num_to_score=*/3, &query_term_iterators),
533       ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
534                   EqualsScoredDocumentHit(expected_scored_doc_hit2),
535                   EqualsScoredDocumentHit(expected_scored_doc_hit3)));
536 }
537 
TEST_P(ScoringProcessorTest,ShouldScoreByRelevanceScore_HitTermWithZeroFrequency)538 TEST_P(ScoringProcessorTest,
539        ShouldScoreByRelevanceScore_HitTermWithZeroFrequency) {
540   DocumentProto document1 =
541       CreateDocument("icing", "email/1", kDefaultScore,
542                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
543 
544   ICING_ASSERT_OK_AND_ASSIGN(
545       DocumentId document_id1,
546       document_store()->Put(document1, /*num_tokens=*/10));
547 
548   // Document 1 contains the term "foo" 0 times in the "subject" property
549   DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
550   doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/0);
551 
552   // Creates input doc_hit_infos and expected output scored_document_hits
553   std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1};
554 
555   // Creates a dummy DocHitInfoIterator with 1 result for the query "foo"
556   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
557       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
558 
559   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
560       ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
561 
562   // Creates a ScoringProcessor
563   ICING_ASSERT_OK_AND_ASSIGN(
564       std::unique_ptr<ScoringProcessor> scoring_processor,
565       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
566                                fake_clock().GetSystemTimeMilliseconds()));
567 
568   std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
569       query_term_iterators;
570   query_term_iterators["foo"] =
571       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
572 
573   SectionIdMask section_id_mask1 = 0b00000001;
574 
575   // Since the document hit has zero frequency, expect a score of zero.
576   ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask1,
577                                              /*score=*/0.000000);
578   EXPECT_THAT(
579       scoring_processor->Score(std::move(doc_hit_info_iterator),
580                                /*num_to_score=*/1, &query_term_iterators),
581       ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1)));
582 }
583 
TEST_P(ScoringProcessorTest,ShouldScoreByRelevanceScore_SameHitFrequencyDifferentPropertyWeights)584 TEST_P(ScoringProcessorTest,
585        ShouldScoreByRelevanceScore_SameHitFrequencyDifferentPropertyWeights) {
586   DocumentProto document1 =
587       CreateDocument("icing", "email/1", kDefaultScore,
588                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
589   DocumentProto document2 =
590       CreateDocument("icing", "email/2", kDefaultScore,
591                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
592 
593   ICING_ASSERT_OK_AND_ASSIGN(
594       DocumentId document_id1,
595       document_store()->Put(document1, /*num_tokens=*/1));
596   ICING_ASSERT_OK_AND_ASSIGN(
597       DocumentId document_id2,
598       document_store()->Put(document2, /*num_tokens=*/1));
599 
600   // Document 1 contains the term "foo" 1 time in the "body" property
601   SectionId body_section_id = 0;
602   DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
603   doc_hit_info1.UpdateSection(body_section_id, /*hit_term_frequency=*/1);
604 
605   // Document 2 contains the term "foo" 1 time in the "subject" property
606   SectionId subject_section_id = 1;
607   DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
608   doc_hit_info2.UpdateSection(subject_section_id, /*hit_term_frequency=*/1);
609 
610   // Creates input doc_hit_infos and expected output scored_document_hits
611   std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1,
612                                                             doc_hit_info2};
613 
614   // Creates a dummy DocHitInfoIterator with 2 results for the query "foo"
615   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
616       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
617 
618   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
619       ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
620 
621   PropertyWeight body_property_weight =
622       CreatePropertyWeight(/*path=*/"body", /*weight=*/0.5);
623   PropertyWeight subject_property_weight =
624       CreatePropertyWeight(/*path=*/"subject", /*weight=*/2.0);
625   *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
626       /*schema_type=*/"email", {body_property_weight, subject_property_weight});
627 
628   // Creates a ScoringProcessor
629   ICING_ASSERT_OK_AND_ASSIGN(
630       std::unique_ptr<ScoringProcessor> scoring_processor,
631       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
632                                fake_clock().GetSystemTimeMilliseconds()));
633 
634   std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
635       query_term_iterators;
636   query_term_iterators["foo"] =
637       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
638 
639   SectionIdMask body_section_id_mask = 1U << body_section_id;
640   SectionIdMask subject_section_id_mask = 1U << subject_section_id;
641 
642   // We expect document 2 to have a higher score than document 1 as it matches
643   // "foo" in the "subject" property, which is weighed higher than the "body"
644   // property. Final scores are computed with smoothing applied.
645   ScoredDocumentHit expected_scored_doc_hit1(document_id1, body_section_id_mask,
646                                              /*score=*/0.053624);
647   ScoredDocumentHit expected_scored_doc_hit2(document_id2,
648                                              subject_section_id_mask,
649                                              /*score=*/0.153094);
650   EXPECT_THAT(
651       scoring_processor->Score(std::move(doc_hit_info_iterator),
652                                /*num_to_score=*/2, &query_term_iterators),
653       ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
654                   EqualsScoredDocumentHit(expected_scored_doc_hit2)));
655 }
656 
TEST_P(ScoringProcessorTest,ShouldScoreByRelevanceScore_WithImplicitPropertyWeight)657 TEST_P(ScoringProcessorTest,
658        ShouldScoreByRelevanceScore_WithImplicitPropertyWeight) {
659   DocumentProto document1 =
660       CreateDocument("icing", "email/1", kDefaultScore,
661                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
662   DocumentProto document2 =
663       CreateDocument("icing", "email/2", kDefaultScore,
664                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
665 
666   ICING_ASSERT_OK_AND_ASSIGN(
667       DocumentId document_id1,
668       document_store()->Put(document1, /*num_tokens=*/1));
669   ICING_ASSERT_OK_AND_ASSIGN(
670       DocumentId document_id2,
671       document_store()->Put(document2, /*num_tokens=*/1));
672 
673   // Document 1 contains the term "foo" 1 time in the "body" property
674   SectionId body_section_id = 0;
675   DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
676   doc_hit_info1.UpdateSection(body_section_id, /*hit_term_frequency=*/1);
677 
678   // Document 2 contains the term "foo" 1 time in the "subject" property
679   SectionId subject_section_id = 1;
680   DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
681   doc_hit_info2.UpdateSection(subject_section_id, /*hit_term_frequency=*/1);
682 
683   // Creates input doc_hit_infos and expected output scored_document_hits
684   std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1,
685                                                             doc_hit_info2};
686 
687   // Creates a dummy DocHitInfoIterator with 2 results for the query "foo"
688   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
689       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
690 
691   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
692       ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
693 
694   PropertyWeight body_property_weight =
695       CreatePropertyWeight(/*path=*/"body", /*weight=*/0.5);
696   *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
697       /*schema_type=*/"email", {body_property_weight});
698 
699   // Creates a ScoringProcessor
700   ICING_ASSERT_OK_AND_ASSIGN(
701       std::unique_ptr<ScoringProcessor> scoring_processor,
702       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
703                                fake_clock().GetSystemTimeMilliseconds()));
704 
705   std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
706       query_term_iterators;
707   query_term_iterators["foo"] =
708       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
709 
710   SectionIdMask body_section_id_mask = 1U << body_section_id;
711   SectionIdMask subject_section_id_mask = 1U << subject_section_id;
712 
713   // We expect document 2 to have a higher score than document 1 as it matches
714   // "foo" in the "subject" property, which is weighed higher than the "body"
715   // property. This is because the "subject" property is implictly given a
716   // a weight of 1.0, the default weight value. Final scores are computed with
717   // smoothing applied.
718   ScoredDocumentHit expected_scored_doc_hit1(document_id1, body_section_id_mask,
719                                              /*score=*/0.094601);
720   ScoredDocumentHit expected_scored_doc_hit2(document_id2,
721                                              subject_section_id_mask,
722                                              /*score=*/0.153094);
723   EXPECT_THAT(
724       scoring_processor->Score(std::move(doc_hit_info_iterator),
725                                /*num_to_score=*/2, &query_term_iterators),
726       ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
727                   EqualsScoredDocumentHit(expected_scored_doc_hit2)));
728 }
729 
TEST_P(ScoringProcessorTest,ShouldScoreByRelevanceScore_WithDefaultPropertyWeight)730 TEST_P(ScoringProcessorTest,
731        ShouldScoreByRelevanceScore_WithDefaultPropertyWeight) {
732   DocumentProto document1 =
733       CreateDocument("icing", "email/1", kDefaultScore,
734                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
735   DocumentProto document2 =
736       CreateDocument("icing", "email/2", kDefaultScore,
737                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
738 
739   ICING_ASSERT_OK_AND_ASSIGN(
740       DocumentId document_id1,
741       document_store()->Put(document1, /*num_tokens=*/1));
742 
743   // Document 1 contains the term "foo" 1 time in the "body" property
744   SectionId body_section_id = 0;
745   DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
746   doc_hit_info1.UpdateSection(body_section_id, /*hit_term_frequency=*/1);
747 
748   // Creates input doc_hit_infos and expected output scored_document_hits
749   std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1};
750 
751   // Creates a dummy DocHitInfoIterator with 1 result for the query "foo"
752   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
753       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
754 
755   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
756       ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
757 
758   *spec_proto.add_type_property_weights() =
759       CreateTypePropertyWeights(/*schema_type=*/"email", {});
760 
761   // Creates a ScoringProcessor with no explicit weights set.
762   ICING_ASSERT_OK_AND_ASSIGN(
763       std::unique_ptr<ScoringProcessor> scoring_processor,
764       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
765                                fake_clock().GetSystemTimeMilliseconds()));
766 
767   ScoringSpecProto spec_proto_with_weights =
768       CreateScoringSpecForRankingStrategy(
769           ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
770 
771   PropertyWeight body_property_weight = CreatePropertyWeight(/*path=*/"body",
772                                                              /*weight=*/1.0);
773   *spec_proto_with_weights.add_type_property_weights() =
774       CreateTypePropertyWeights(/*schema_type=*/"email",
775                                 {body_property_weight});
776 
777   // Creates a ScoringProcessor with default weight set for "body" property.
778   ICING_ASSERT_OK_AND_ASSIGN(
779       std::unique_ptr<ScoringProcessor> scoring_processor_with_weights,
780       ScoringProcessor::Create(spec_proto_with_weights, document_store(),
781                                schema_store(),
782                                fake_clock().GetSystemTimeMilliseconds()));
783 
784   std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
785       query_term_iterators;
786   query_term_iterators["foo"] =
787       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
788 
789   // Create a doc hit iterator
790   std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
791       query_term_iterators_scoring_with_weights;
792   query_term_iterators_scoring_with_weights["foo"] =
793       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
794 
795   SectionIdMask body_section_id_mask = 1U << body_section_id;
796 
797   // We expect document 1 to have the same score whether a weight is explicitly
798   // set to 1.0 or implictly scored with the default weight. Final scores are
799   // computed with smoothing applied.
800   ScoredDocumentHit expected_scored_doc_hit(document_id1, body_section_id_mask,
801                                             /*score=*/0.208191);
802   EXPECT_THAT(
803       scoring_processor->Score(std::move(doc_hit_info_iterator),
804                                /*num_to_score=*/1, &query_term_iterators),
805       ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit)));
806 
807   // Restore ownership of doc hit iterator and query term iterator to test.
808   doc_hit_info_iterator =
809       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
810   query_term_iterators["foo"] =
811       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
812 
813   EXPECT_THAT(scoring_processor_with_weights->Score(
814                   std::move(doc_hit_info_iterator),
815                   /*num_to_score=*/1, &query_term_iterators),
816               ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit)));
817 }
818 
TEST_P(ScoringProcessorTest,ShouldScoreByRelevanceScore_WithZeroPropertyWeight)819 TEST_P(ScoringProcessorTest,
820        ShouldScoreByRelevanceScore_WithZeroPropertyWeight) {
821   DocumentProto document1 =
822       CreateDocument("icing", "email/1", kDefaultScore,
823                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
824   DocumentProto document2 =
825       CreateDocument("icing", "email/2", kDefaultScore,
826                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
827 
828   ICING_ASSERT_OK_AND_ASSIGN(
829       DocumentId document_id1,
830       document_store()->Put(document1, /*num_tokens=*/1));
831   ICING_ASSERT_OK_AND_ASSIGN(
832       DocumentId document_id2,
833       document_store()->Put(document2, /*num_tokens=*/1));
834 
835   // Document 1 contains the term "foo" 1 time in the "body" property
836   SectionId body_section_id = 0;
837   DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
838   doc_hit_info1.UpdateSection(body_section_id, /*hit_term_frequency=*/1);
839 
840   // Document 2 contains the term "foo" 1 time in the "subject" property
841   SectionId subject_section_id = 1;
842   DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
843   doc_hit_info2.UpdateSection(subject_section_id, /*hit_term_frequency=*/1);
844 
845   // Creates input doc_hit_infos and expected output scored_document_hits
846   std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1,
847                                                             doc_hit_info2};
848 
849   // Creates a dummy DocHitInfoIterator with 2 results for the query "foo"
850   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
851       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
852 
853   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
854       ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
855 
856   // Sets property weight for "body" to 0.0.
857   PropertyWeight body_property_weight =
858       CreatePropertyWeight(/*path=*/"body", /*weight=*/0.0);
859   // Sets property weight for "subject" to 1.0.
860   PropertyWeight subject_property_weight =
861       CreatePropertyWeight(/*path=*/"subject", /*weight=*/1.0);
862   *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
863       /*schema_type=*/"email", {body_property_weight, subject_property_weight});
864 
865   // Creates a ScoringProcessor
866   ICING_ASSERT_OK_AND_ASSIGN(
867       std::unique_ptr<ScoringProcessor> scoring_processor,
868       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
869                                fake_clock().GetSystemTimeMilliseconds()));
870 
871   std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
872       query_term_iterators;
873   query_term_iterators["foo"] =
874       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
875 
876   std::vector<ScoredDocumentHit> scored_document_hits =
877       scoring_processor->Score(std::move(doc_hit_info_iterator),
878                                /*num_to_score=*/2, &query_term_iterators);
879 
880   // We expect document1 to have a score of 0.0 as the query term "foo" matches
881   // in the "body" property which has a weight of 0.0. This is a result of the
882   // weighted term frequency being scaled down to 0.0 for the hit. We expect
883   // document2 to have a positive score as the query term "foo" matches in the
884   // "subject" property which has a weight of 1.0.
885   EXPECT_THAT(scored_document_hits, SizeIs(2));
886   EXPECT_THAT(scored_document_hits.at(0).document_id(), Eq(document_id1));
887   EXPECT_THAT(scored_document_hits.at(0).score(), Eq(0.0));
888   EXPECT_THAT(scored_document_hits.at(1).document_id(), Eq(document_id2));
889   EXPECT_THAT(scored_document_hits.at(1).score(), Gt(0.0));
890 }
891 
TEST_P(ScoringProcessorTest,ShouldScoreByCreationTimestamp)892 TEST_P(ScoringProcessorTest, ShouldScoreByCreationTimestamp) {
893   DocumentProto document1 =
894       CreateDocument("icing", "email/1", kDefaultScore,
895                      /*creation_timestamp_ms=*/1571100001111);
896   DocumentProto document2 =
897       CreateDocument("icing", "email/2", kDefaultScore,
898                      /*creation_timestamp_ms=*/1571100002222);
899   DocumentProto document3 =
900       CreateDocument("icing", "email/3", kDefaultScore,
901                      /*creation_timestamp_ms=*/1571100003333);
902   // Intentionally inserts documents in a different order
903   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
904                              document_store()->Put(document1));
905   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
906                              document_store()->Put(document3));
907   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
908                              document_store()->Put(document2));
909   DocHitInfo doc_hit_info1(document_id1);
910   DocHitInfo doc_hit_info2(document_id2);
911   DocHitInfo doc_hit_info3(document_id3);
912   ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
913                                          document1.creation_timestamp_ms());
914   ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
915                                          document2.creation_timestamp_ms());
916   ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
917                                          document3.creation_timestamp_ms());
918 
919   // Creates a dummy DocHitInfoIterator with 3 results
920   std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info2, doc_hit_info3,
921                                            doc_hit_info1};
922   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
923       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
924 
925   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
926       ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP, GetParam());
927 
928   // Creates a ScoringProcessor which ranks in descending order
929   ICING_ASSERT_OK_AND_ASSIGN(
930       std::unique_ptr<ScoringProcessor> scoring_processor,
931       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
932                                fake_clock().GetSystemTimeMilliseconds()));
933 
934   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
935                                        /*num_to_score=*/3),
936               ElementsAre(EqualsScoredDocumentHit(scored_document_hit2),
937                           EqualsScoredDocumentHit(scored_document_hit3),
938                           EqualsScoredDocumentHit(scored_document_hit1)));
939 }
940 
TEST_P(ScoringProcessorTest,ShouldScoreByUsageCount)941 TEST_P(ScoringProcessorTest, ShouldScoreByUsageCount) {
942   DocumentProto document1 =
943       CreateDocument("icing", "email/1", kDefaultScore,
944                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
945   DocumentProto document2 =
946       CreateDocument("icing", "email/2", kDefaultScore,
947                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
948   DocumentProto document3 =
949       CreateDocument("icing", "email/3", kDefaultScore,
950                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
951 
952   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
953                              document_store()->Put(document1));
954   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
955                              document_store()->Put(document2));
956   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
957                              document_store()->Put(document3));
958 
959   // Report usage for doc1 once and doc2 twice.
960   UsageReport usage_report_doc1 = CreateUsageReport(
961       /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
962       UsageReport::USAGE_TYPE1);
963   UsageReport usage_report_doc2 = CreateUsageReport(
964       /*name_space=*/"icing", /*uri=*/"email/2", /*timestamp_ms=*/0,
965       UsageReport::USAGE_TYPE1);
966   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc1));
967   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc2));
968   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc2));
969 
970   DocHitInfo doc_hit_info1(document_id1);
971   DocHitInfo doc_hit_info2(document_id2);
972   DocHitInfo doc_hit_info3(document_id3);
973   ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
974                                          /*score=*/1);
975   ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
976                                          /*score=*/2);
977   ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
978                                          /*score=*/0);
979 
980   // Creates a dummy DocHitInfoIterator with 3 results
981   std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2,
982                                            doc_hit_info3};
983   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
984       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
985 
986   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
987       ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT, GetParam());
988 
989   // Creates a ScoringProcessor which ranks in descending order
990   ICING_ASSERT_OK_AND_ASSIGN(
991       std::unique_ptr<ScoringProcessor> scoring_processor,
992       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
993                                fake_clock().GetSystemTimeMilliseconds()));
994 
995   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
996                                        /*num_to_score=*/3),
997               ElementsAre(EqualsScoredDocumentHit(scored_document_hit1),
998                           EqualsScoredDocumentHit(scored_document_hit2),
999                           EqualsScoredDocumentHit(scored_document_hit3)));
1000 }
1001 
TEST_P(ScoringProcessorTest,ShouldScoreByUsageTimestamp)1002 TEST_P(ScoringProcessorTest, ShouldScoreByUsageTimestamp) {
1003   DocumentProto document1 =
1004       CreateDocument("icing", "email/1", kDefaultScore,
1005                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
1006   DocumentProto document2 =
1007       CreateDocument("icing", "email/2", kDefaultScore,
1008                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
1009   DocumentProto document3 =
1010       CreateDocument("icing", "email/3", kDefaultScore,
1011                      /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
1012 
1013   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
1014                              document_store()->Put(document1));
1015   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
1016                              document_store()->Put(document2));
1017   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
1018                              document_store()->Put(document3));
1019 
1020   // Report usage for doc1 and doc2.
1021   UsageReport usage_report_doc1 = CreateUsageReport(
1022       /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
1023       UsageReport::USAGE_TYPE1);
1024   UsageReport usage_report_doc2 = CreateUsageReport(
1025       /*name_space=*/"icing", /*uri=*/"email/2", /*timestamp_ms=*/5000,
1026       UsageReport::USAGE_TYPE1);
1027   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc1));
1028   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc2));
1029 
1030   DocHitInfo doc_hit_info1(document_id1);
1031   DocHitInfo doc_hit_info2(document_id2);
1032   DocHitInfo doc_hit_info3(document_id3);
1033   ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
1034                                          /*score=*/1000);
1035   ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
1036                                          /*score=*/5000);
1037   ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
1038                                          /*score=*/0);
1039 
1040   // Creates a dummy DocHitInfoIterator with 3 results
1041   std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2,
1042                                            doc_hit_info3};
1043   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
1044       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
1045 
1046   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
1047       ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP,
1048       GetParam());
1049 
1050   // Creates a ScoringProcessor which ranks in descending order
1051   ICING_ASSERT_OK_AND_ASSIGN(
1052       std::unique_ptr<ScoringProcessor> scoring_processor,
1053       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
1054                                fake_clock().GetSystemTimeMilliseconds()));
1055 
1056   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
1057                                        /*num_to_score=*/3),
1058               ElementsAre(EqualsScoredDocumentHit(scored_document_hit1),
1059                           EqualsScoredDocumentHit(scored_document_hit2),
1060                           EqualsScoredDocumentHit(scored_document_hit3)));
1061 }
1062 
TEST_P(ScoringProcessorTest,ShouldHandleNoScores)1063 TEST_P(ScoringProcessorTest, ShouldHandleNoScores) {
1064   // Creates input doc_hit_infos and corresponding scored_document_hits
1065   ICING_ASSERT_OK_AND_ASSIGN(
1066       auto doc_hit_result_pair,
1067       CreateAndInsertsDocumentsWithScores(document_store(), {1, 2, 3}));
1068   std::vector<DocHitInfo> doc_hit_infos = std::move(doc_hit_result_pair.first);
1069   std::vector<ScoredDocumentHit> scored_document_hits =
1070       std::move(doc_hit_result_pair.second);
1071 
1072   // Creates a dummy DocHitInfoIterator with 4 results one of which doesn't have
1073   // a score.
1074   doc_hit_infos.emplace(doc_hit_infos.begin(), /*document_id_in=*/4,
1075                         kSectionIdMaskNone);
1076   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
1077       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
1078 
1079   // The document hit without a score will be be assigned the default score 0 in
1080   // a descending order.
1081   ScoredDocumentHit scored_document_hit_default =
1082       ScoredDocumentHit(4, kSectionIdMaskNone, /*score=*/0.0);
1083 
1084   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
1085       ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
1086 
1087   // Creates a ScoringProcessor which ranks in descending order
1088   ICING_ASSERT_OK_AND_ASSIGN(
1089       std::unique_ptr<ScoringProcessor> scoring_processor,
1090       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
1091                                fake_clock().GetSystemTimeMilliseconds()));
1092   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
1093                                        /*num_to_score=*/4),
1094               ElementsAre(EqualsScoredDocumentHit(scored_document_hit_default),
1095                           EqualsScoredDocumentHit(scored_document_hits.at(0)),
1096                           EqualsScoredDocumentHit(scored_document_hits.at(1)),
1097                           EqualsScoredDocumentHit(scored_document_hits.at(2))));
1098 }
1099 
TEST_P(ScoringProcessorTest,ShouldWrapResultsWhenNoScoring)1100 TEST_P(ScoringProcessorTest, ShouldWrapResultsWhenNoScoring) {
1101   DocumentProto document1 = CreateDocument("icing", "email/1", /*score=*/1,
1102                                            kDefaultCreationTimestampMs);
1103   DocumentProto document2 = CreateDocument("icing", "email/2", /*score=*/2,
1104                                            kDefaultCreationTimestampMs);
1105   DocumentProto document3 = CreateDocument("icing", "email/3", /*score=*/3,
1106                                            kDefaultCreationTimestampMs);
1107 
1108   // Intentionally inserts documents in a different order
1109   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
1110                              document_store()->Put(document1));
1111   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
1112                              document_store()->Put(document3));
1113   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
1114                              document_store()->Put(document2));
1115   DocHitInfo doc_hit_info1(document_id1);
1116   DocHitInfo doc_hit_info2(document_id2);
1117   DocHitInfo doc_hit_info3(document_id3);
1118 
1119   // The expected results should all have the default score 0.
1120   ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
1121                                          kDefaultScore);
1122   ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
1123                                          kDefaultScore);
1124   ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
1125                                          kDefaultScore);
1126 
1127   // Creates a dummy DocHitInfoIterator with 3 results
1128   std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info2, doc_hit_info3,
1129                                            doc_hit_info1};
1130   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
1131       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
1132 
1133   // A ScoringSpecProto with no scoring strategy
1134   ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
1135       ScoringSpecProto::RankingStrategy::NONE, GetParam());
1136 
1137   // Creates a ScoringProcessor which ranks in descending order
1138   ICING_ASSERT_OK_AND_ASSIGN(
1139       std::unique_ptr<ScoringProcessor> scoring_processor,
1140       ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
1141                                fake_clock().GetSystemTimeMilliseconds()));
1142 
1143   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
1144                                        /*num_to_score=*/3),
1145               ElementsAre(EqualsScoredDocumentHit(scored_document_hit2),
1146                           EqualsScoredDocumentHit(scored_document_hit3),
1147                           EqualsScoredDocumentHit(scored_document_hit1)));
1148 }
1149 
1150 INSTANTIATE_TEST_SUITE_P(ScoringProcessorTest, ScoringProcessorTest,
1151                          testing::Values(ScorerTestingMode::kNormal,
1152                                          ScorerTestingMode::kAdvanced));
1153 
1154 }  // namespace
1155 
1156 }  // namespace lib
1157 }  // namespace icing
1158