1// Copyright 2019 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto2"; 16 17package icing.lib; 18 19import "icing/proto/term.proto"; 20 21option java_package = "com.google.android.icing.proto"; 22option java_multiple_files = true; 23option objc_class_prefix = "ICNG"; 24 25// Encapsulates the configurations on how Icing should score and rank the search 26// results. 27// TODO(b/170347684): Change all timestamps to seconds. 28// Next tag: 8 29message ScoringSpecProto { 30 // OPTIONAL: Indicates how the search results will be ranked. 31 message RankingStrategy { 32 enum Code { 33 // No ranking strategy specified, documents may be returned in an 34 // arbitrary order. 35 NONE = 0; 36 37 // Ranked by user-provided document scores. 38 DOCUMENT_SCORE = 1; 39 40 // Ranked by document creation timestamps. 41 CREATION_TIMESTAMP = 2; 42 43 // The following ranking strategies are based on usage reporting. Please 44 // see usage.proto for more information. If one of the usage ranking 45 // strategy is used but none of result documents have reported usage, the 46 // documents will be returned in the default reverse insertion order. 47 48 // Ranked by count of reports with usage type 1. 49 USAGE_TYPE1_COUNT = 3; 50 51 // Ranked by count of reports with usage type 2. 52 USAGE_TYPE2_COUNT = 4; 53 54 // Ranked by count of reports with usage type 3. 55 USAGE_TYPE3_COUNT = 5; 56 57 // Ranked by last used timestamp with usage type 1. The timestamps are 58 // compared in seconds. 59 USAGE_TYPE1_LAST_USED_TIMESTAMP = 6; 60 61 // Ranked by last used timestamp with usage type 2. The timestamps are 62 // compared in seconds. 63 USAGE_TYPE2_LAST_USED_TIMESTAMP = 7; 64 65 // Ranked by last used timestamp with usage type 3. The timestamps are 66 // compared in seconds. 67 USAGE_TYPE3_LAST_USED_TIMESTAMP = 8; 68 69 // Ranked by relevance score, currently computed as BM25F score. 70 RELEVANCE_SCORE = 9; 71 72 // Ranked by the aggregated score of the joined documents. 73 JOIN_AGGREGATE_SCORE = 10; 74 75 // Ranked by the advanced scoring expression provided. 76 ADVANCED_SCORING_EXPRESSION = 11; 77 } 78 } 79 optional RankingStrategy.Code rank_by = 1; 80 81 // OPTIONAL: Indicates the order of returned search results, the default is 82 // DESC, meaning that results with higher scores come first. This order field 83 // will be ignored if 'rank_by' is NONE. 84 message Order { 85 enum Code { 86 // Search results will be returned in a descending order. 87 DESC = 0; 88 89 // Search results will be returned in a ascending order. 90 ASC = 1; 91 } 92 } 93 optional Order.Code order_by = 2; 94 95 // OPTIONAL: Specifies property weights for RELEVANCE_SCORE scoring strategy. 96 // Property weights are used for promoting or demoting query term matches in a 97 // document property. When property weights are provided, the term frequency 98 // is multiplied by the normalized property weight when computing the 99 // normalized term frequency component of BM25F. To prefer query term matches 100 // in the "subject" property over the "body" property of "Email" documents, 101 // set a higher property weight value for "subject" than "body". By default, 102 // all properties that are not specified are given a raw, pre-normalized 103 // weight of 1.0 when scoring. 104 repeated TypePropertyWeights type_property_weights = 3; 105 106 // OPTIONAL: Specifies the scoring expression for ADVANCED_SCORING_EXPRESSION 107 // RankingStrategy. 108 optional string advanced_scoring_expression = 4; 109 110 // OPTIONAL: Specifies additional scoring expressions to return extra helpful 111 // scores for clients. The scores will not be used for ranking. 112 // 113 // To set this field, the ranking strategy must be set to 114 // ADVANCED_SCORING_EXPRESSION. 115 repeated string additional_advanced_scoring_expressions = 5; 116 117 // OPTIONAL: Specifies the schema type alias map for advanced scoring 118 // expression. 119 // 120 // The alias map must be provided for all alias schema types used in the 121 // getScorableProperty function of the advanced scoring expression. Otherwise, 122 // an error will be returned. 123 // 124 // For example, Icing clients can pass a SchemaTypeAliasMapProto to icing: 125 // schema_type_alias_for_advanced_scoring { 126 // alias_schema_type: "person" 127 // schema_types: "package1$database1/person" 128 // schema_types: "package2$database1/person" 129 // } 130 // schema_type_alias_for_advanced_scoring { 131 // alias_schema_type: "email" 132 // schema_types: "package3$database2/email" 133 // schema_types: "package4$database2/email" 134 // } 135 // 136 // When Icing processes the advanced scoring expression such as: 137 // "getScorableProperty('person', 'rfsScore')", 138 // Icing will 139 // 1. first look up documents with schema type "person". 140 // 2. if the SchemaTypeAliasMapProto is provided, Icing will also look up 141 // documents with schema types from: 142 // - "package1$database1/person" 143 // - "package2$database1/person" 144 // 145 // TODO(b/357105837): Consider moving this to a higher level proto. 146 // Currently, ScoringSpec from both parent and child queries need to provide 147 // this proto, thus introducing duplicates. We can consider moving this to a 148 // higher level proto and send it to downstreams. 149 repeated SchemaTypeAliasMapProto schema_type_alias_map_protos = 6; 150 151 // Features enabled for scoring. 152 repeated ScoringFeatureType scoring_feature_types_enabled = 7 [packed = true]; 153} 154 155// Next tag: 3 156message SuggestionScoringSpecProto { 157 message SuggestionRankingStrategy { 158 enum Code { 159 // No ranking strategy specified, terms may be returned in an arbitrary 160 // order. 161 NONE = 0; 162 163 // Ranked by the term's hit count. 164 DOCUMENT_COUNT = 1; 165 166 // Ranked by the term's frequency. 167 TERM_FREQUENCY = 2; 168 } 169 } 170 171 // TermMatchType.Code=UNKNOWN 172 // Should never purposely be set and may lead to undefined behavior. This is 173 // used for backwards compatibility reasons. 174 // 175 // TermMatchType.Code=EXACT_ONLY 176 // Only exact hits will be counted to score a suggestion term. 177 // 178 // TermMatchType.Code=PREFIX 179 // Both exact hits and prefix hits will be counted to score a suggestion 180 // term. 181 optional TermMatchType.Code scoring_match_type = 1; 182 183 // Rank the output suggested result by given SuggestionRankingStrategy. 184 optional SuggestionRankingStrategy.Code rank_by = 2; 185} 186 187// Next tag: 3 188message TypePropertyWeights { 189 // Schema type to apply property weights to. 190 optional string schema_type = 1; 191 192 // Property weights to apply to the schema type. 193 repeated PropertyWeight property_weights = 2; 194} 195 196// Next tag: 3 197message PropertyWeight { 198 // Property path to assign property weight to. Property paths must be composed 199 // only of property names and property separators (the '.' character). 200 // For example, if an "Email" schema type has string property "subject" and 201 // document property "sender", which has string property "name", the property 202 // path for the email's subject would just be "subject" and the property path 203 // for the sender's name would be "sender.name". If an invalid path is 204 // specified, the property weight is discarded. 205 optional string path = 1; 206 207 // Property weight, valid values are positive and zero. Setting a zero 208 // property weight will remove scoring contribution for a query term match in 209 // the property. Negative weights are invalid and will result in an error. 210 // By default, a property is given a raw, pre-normalized weight of 1.0. 211 optional double weight = 2; 212} 213 214// Proto that maps an alias schema type to a list of Icing schema types. 215// 216// Next tag: 3 217message SchemaTypeAliasMapProto { 218 // Alias schema type provided by Icing clients. 219 optional string alias_schema_type = 1; 220 // Schema types in Icing. 221 repeated string schema_types = 2; 222} 223 224// Next tag: 2 225enum ScoringFeatureType { 226 SCORING_FEATURE_TYPE_UNKNOWN = 0; 227 228 // Ranking with getScorableProperty(), in advanced scoring expression. 229 SCORABLE_PROPERTY_RANKING = 1; 230} 231