• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//
2// Copyright (C) 2018 The Android Open Source Project
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//      http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16
17include "actions/actions-entity-data.fbs";
18include "annotator/model.fbs";
19include "utils/codepoint-range.fbs";
20include "utils/flatbuffers/flatbuffers.fbs";
21include "utils/grammar/rules.fbs";
22include "utils/intents/intent-config.fbs";
23include "utils/normalization.fbs";
24include "utils/resources.fbs";
25include "utils/tokenizer.fbs";
26include "utils/zlib/buffer.fbs";
27
28file_identifier "TC3A";
29
30// Prediction type for a multi-task model.
31namespace libtextclassifier3;
32enum PredictionType : int {
33  UNSUPPORTED = 0,
34  NEXT_MESSAGE_PREDICTION = 1,
35  INTENT_TRIGGERING = 2,
36  ENTITY_ANNOTATION = 3,
37}
38
39namespace libtextclassifier3;
40enum RankingOptionsSortType : int {
41  SORT_TYPE_UNSPECIFIED = 0,
42
43  // Rank results (or groups) by score, then type
44  SORT_TYPE_SCORE = 1,
45
46  // Rank results (or groups) by priority score, then score, then type
47  SORT_TYPE_PRIORITY_SCORE = 2,
48}
49
50// Prediction metadata for an arbitrary task.
51namespace libtextclassifier3;
52table PredictionMetadata {
53  prediction_type:PredictionType;
54  task_spec:ActionSuggestionSpec;
55  output_suggestions:int;
56  output_suggestions_scores:int;
57  output_suggestions_spans:int;
58}
59
60namespace libtextclassifier3.TensorflowLiteModelSpec_;
61table InputNameIndexEntry {
62  key:string (key, shared);
63  value:int;
64}
65
66// TensorFlow Lite model for suggesting actions.
67namespace libtextclassifier3;
68table TensorflowLiteModelSpec {
69  // TensorFlow Lite model for suggesting actions.
70  tflite_model:[ubyte] (force_align: 16);
71
72  // Input specification.
73  // (num messages,) int32 tensor, the user id per message.
74  input_user_id:int = 0;
75
76  // (num messages,) string tensor, each message of the conversation.
77  input_context:int = 1;
78
79  // int, the number of messages in the conversation.
80  input_context_length:int = 2;
81
82  // (num messages,) float tensor, the time difference in seconds of the
83  // messages in the conversation.
84  input_time_diffs:int = 3;
85
86  // int, the number of smart replies to produce.
87  input_num_suggestions:int = 4;
88
89  reserved_7:int (deprecated);
90
91  reserved_8:int (deprecated);
92
93  reserved_9:int (deprecated);
94
95  // Input port for hashed and embedded tokens, a (num messages, max tokens,
96  // embedding size) float tensor specifying the embeddings of each token of
97  // each message in the conversation.
98  input_token_embeddings:int = -1;
99
100  // Input port for the number of tokens per message.
101  // (num messages) int32 tensor specifying the number of tokens in each message
102  // in the conversation.
103  input_num_tokens:int = -1;
104
105  // Output specification.
106  output_replies:int = 0;
107
108  output_replies_scores:int = 1;
109  output_sensitive_topic_score:int = 3;
110  output_triggering_score:int = 4;
111  output_actions_scores:int = 5;
112
113  // Model setup.
114  // When true, the inputs are resized to the concrete input sizes before
115  // inference otherwise, it's assumed that the model has the correct input
116  // shapes set.
117  resize_inputs:bool = false;
118
119  // Input port for the hashed, embedded and flattened/concatenated tokens.
120  // A (max tokens, embedding_size) float tensor specifying the embeddings of
121  // each token.
122  input_flattened_token_embeddings:int = -1;
123
124  // Generalized output specification that handles arbitrary number of
125  // prediction tasks.
126  prediction_metadata:[PredictionMetadata];
127
128  // Map of additional input tensor name to its index.
129  input_name_index:[TensorflowLiteModelSpec_.InputNameIndexEntry];
130
131  // If greater than 0, pad or truncate the input_user_id and input_context
132  // tensor to length of input_length_to_pad.
133  input_length_to_pad:int = 0;
134}
135
136// Configuration for the tokenizer.
137namespace libtextclassifier3;
138table ActionsTokenizerOptions {
139  type:TokenizationType = INTERNAL_TOKENIZER;
140
141  // If true, white space tokens will be kept when using the icu tokenizer.
142  icu_preserve_whitespace_tokens:bool = false;
143
144  // Codepoint ranges that determine what role the different codepoints play
145  // during tokenized. The ranges must not overlap.
146  tokenization_codepoint_config:[TokenizationCodepointRange];
147
148  // A set of codepoint ranges to use in the mixed tokenization mode to identify
149  // stretches of tokens to re-tokenize using the internal tokenizer.
150  internal_tokenizer_codepoint_ranges:[CodepointRange];
151
152  // If true, tokens will be also split when the codepoint's script_id changes
153  // as defined in TokenizationCodepointRange.
154  tokenize_on_script_change:bool = false;
155}
156
157// Configuration for the feature processor.
158namespace libtextclassifier3;
159table ActionsTokenFeatureProcessorOptions {
160  // Tokenizer options.
161  tokenizer_options:ActionsTokenizerOptions;
162
163  // Serialized TensorFlow Lite model with weights for the token embeddings.
164  embedding_model:[ubyte] (force_align: 16);
165
166  // Size of the embedding.
167  embedding_size:int = -1;
168
169  // Number of bits for quantization for embeddings.
170  embedding_quantization_bits:int = 8;
171
172  // Number of buckets used for hashing charactergrams.
173  num_buckets:int = -1;
174
175  // Orders of charactergrams to extract, e.g. 2 means character bigrams, 3
176  // character trigrams etc.
177  chargram_orders:[int];
178
179  // Whether to extract the token case feature.
180  extract_case_feature:bool;
181
182  // If true, will use the unicode-aware functionality for extracting features.
183  unicode_aware_features:bool;
184
185  // Regexp features to extract.
186  regexp_features:[string];
187
188  // Whether to remap digits to a single number.
189  remap_digits:bool;
190
191  // Whether to lowercase all tokens.
192  lowercase_tokens:bool;
193
194  // Maximum length of a word.
195  max_token_length:int = 20;
196
197  // The `max_num_tokens_per_message` and `min_num_tokens_per_message` are
198  // applied when tokens are embedded per message.
199  // If set and the number of tokens of a message is bigger than this limit,
200  // tokens at the beginning of the message are dropped to fit the limit.
201  max_num_tokens_per_message:int = -1;
202
203  // If set, the tokens of each message will be padded to this fixed number of
204  // tokens.
205  min_num_tokens_per_message:int = -1;
206
207  // If set and the total number of concatenated tokens is bigger than this
208  // limit, tokens at the start of the conversation are dropped.
209  max_num_total_tokens:int = -1;
210
211  // If set and the total number of concatenaed tokens is smaller than this
212  // limit, the conversation is padded with padding tokens.
213  min_num_total_tokens:int = -1;
214
215  // Id that is used as encoding of the padding token.
216  padding_token_id:int = 0;
217
218  // Id that is used as encoding of the start of message token.
219  start_token_id:int = 1;
220
221  // Id that is used as encoding of the end of message token.
222  end_token_id:int = 2;
223}
224
225// N-Gram based linear regression model.
226namespace libtextclassifier3;
227table NGramLinearRegressionModel {
228  // A flat list of all the hashed n-grams concatenated back to back. Elements
229  // should only ever be accessed via the offset table below.
230  hashed_ngram_tokens:[uint];
231
232  // Offsets to the start of the n-grams in hashed_ngram_tokens. The last
233  // element in this array is the length of hashed_ngrams to make it easier to
234  // compute n-gram lengths.
235  ngram_start_offsets:[ushort];
236
237  // Weights of the n-grams.
238  ngram_weights:[float];
239
240  // The default weight assigned to n-grams that weren't matched.
241  default_token_weight:float;
242
243  // Maximum n-gram length to consider when calculating the denominatior.
244  // This should usually be the same as max_ngram_length but can diverge
245  // if additional (longer) n-grams are added to a model as part of a minor
246  // update.
247  max_denom_ngram_length:int;
248
249  // If non-zero, the order of the skip-gram to match.
250  max_skips:int;
251
252  // The threshold above which the model output is considered positive.
253  threshold:float;
254
255  // Model specific tokenizer options.
256  // If not specified, will reuse the feature processor tokenizer.
257  tokenizer_options:ActionsTokenizerOptions;
258}
259
260// TFLite based sensitive topic classifier model.
261namespace libtextclassifier3;
262table TFLiteSensitiveClassifierConfig {
263  // Specification of the model.
264  model_spec:TensorflowLiteModelSpec;
265
266  // Triggering threshold, if a sensitive topic has a score higher than this
267  // value, it triggers the classifier.
268  threshold:float;
269}
270
271namespace libtextclassifier3;
272table TriggeringPreconditions {
273  // Lower bound thresholds for the smart reply model prediction output.
274  min_smart_reply_triggering_score:float;
275
276  // Maximum sensitive score for which actions and smart replies are shown.
277  max_sensitive_topic_score:float = 1;
278
279  // Whether to suppress all model output when a conversation is classified as
280  // sensitive.
281  suppress_on_sensitive_topic:bool = true;
282
283  // Thresholds on the model prediction input.
284  // The minimal length of input to consider for prediction.
285  min_input_length:int = 0;
286
287  // The maximal length of input to consider for prediciton, -1 if unbounded.
288  max_input_length:int = -1;
289
290  // Minimal fraction of messages in the input conversation that need to match
291  // a locale that the model can handle.
292  min_locale_match_fraction:float = 0.75;
293
294  handle_missing_locale_as_supported:bool = false;
295  handle_unknown_locale_as_supported:bool = false;
296
297  // Filter input with low-confidence triggers.
298  suppress_on_low_confidence_input:bool = true;
299
300  // Same as low_confidence_rules in ActionsModel.
301  // NOTE: Only fill this when the TriggeringPreconditions are pushed separately
302  // as a flag value (i.e. as overlay).
303  low_confidence_rules:RulesModel;
304
305  reserved_11:float (deprecated);
306
307  reserved_12:float (deprecated);
308
309  reserved_13:float (deprecated);
310
311  // Smart reply thresholds.
312  min_reply_score_threshold:float = 0;
313}
314
315namespace libtextclassifier3;
316table ActionSuggestionSpec {
317  // Type of the action suggestion.
318  type:string (shared);
319
320  // Text of a smart reply action.
321  response_text:string (shared);
322
323  // Score.
324  score:float;
325
326  // Additional entity information.
327  serialized_entity_data:string (shared);
328
329  // For ranking and internal conflict resolution.
330  priority_score:float = 0;
331
332  entity_data:ActionsEntityData;
333  response_text_blocklist:[string];
334}
335
336// Options to specify triggering behaviour per action class.
337namespace libtextclassifier3;
338table ActionTypeOptions {
339  // The name of the predicted action.
340  name:string (shared);
341
342  // Triggering behaviour.
343  // Whether the action class is considered in the model output or not.
344  enabled:bool = true;
345
346  // Minimal output score threshold.
347  min_triggering_score:float = 0;
348
349  // The action to trigger.
350  action:ActionSuggestionSpec;
351}
352
353namespace libtextclassifier3.AnnotationActionsSpec_;
354table AnnotationMapping {
355  // The annotation collection.
356  annotation_collection:string (shared);
357
358  // The action name to use.
359  action:ActionSuggestionSpec;
360
361  // Whether to use the score of the annotation as the action score.
362  use_annotation_score:bool = true;
363
364  // Minimum threshold for the annotation score for filtering.
365  min_annotation_score:float;
366
367  // If set, the text of the annotation will be used to set a field in the
368  // action entity data.
369  entity_field:FlatbufferFieldPath;
370
371  // If set, normalization to apply to the annotation text.
372  normalization_options:NormalizationOptions;
373}
374
375// Configuration for actions based on annotatations.
376namespace libtextclassifier3;
377table AnnotationActionsSpec {
378  annotation_mapping:[AnnotationActionsSpec_.AnnotationMapping];
379
380  // Whether to deduplicate annotations by type and text prior to generating
381  // actions.
382  deduplicate_annotations:bool = true;
383
384  // Annotation usecase to specify for text annotation.
385  annotation_usecase:AnnotationUsecase = ANNOTATION_USECASE_SMART;
386
387  // Maximum number of recent messages to consider from any person.
388  // We consider at most `max_history_from_any_person` many recent messages if
389  // they were received from different users or at most the maximum of this and
390  // `max_history_from_last_person` if they are all from the same user.
391  max_history_from_any_person:int = 1;
392
393  // Maximum number of recent messages to consider from the last person.
394  max_history_from_last_person:int = 1;
395
396  // Whether to include messages from the local user.
397  include_local_user_messages:bool = false;
398
399  // Whether to only consider messages up to the last one sent by the local
400  // user.
401  only_until_last_sent:bool = true;
402
403  // If true, annotator would populate serialized_entity_data in the results.
404  is_serialized_entity_data_enabled:bool = true;
405}
406
407// Ranking options.
408namespace libtextclassifier3;
409table RankingOptions {
410  // When true, actions suggestions are deduplicated by `type`, `response_text`
411  // and associated annotations, keeping the higher scoring actions.
412  deduplicate_suggestions:bool = true;
413
414  // When true, actions are deduplicated by the span they are referring to.
415  deduplicate_suggestions_by_span:bool = true;
416
417  // Optional script to run for ranking and filtering the action suggestions.
418  // The following global variables are available to the script:
419  // * input: (optionally deduplicated) action suggestions, via the `actions`
420  // global
421  // * output: indices of the actions to keep in the provided order.
422  lua_ranking_script:string (shared);
423
424  compressed_lua_ranking_script:CompressedBuffer;
425
426  // If true, suppresses smart replies if other smart actions are suggested.
427  suppress_smart_replies_with_actions:bool = false;
428
429  // If true, keep actions from the same entities together for ranking.
430  group_by_annotations:bool = true;
431
432  sort_type:RankingOptionsSortType = SORT_TYPE_SCORE;
433}
434
435// Entity data to set from capturing groups.
436namespace libtextclassifier3.RulesModel_.RuleActionSpec_;
437table RuleCapturingGroup {
438  // The id of group.
439  group_id:int;
440
441  // If set, the text of the capturing group will be used to set a field
442  // in the action entity data.
443  entity_field:FlatbufferFieldPath;
444
445  // If set, the capturing group will be used to create a text annotation
446  // with the given name and type.
447  annotation_type:string (shared);
448
449  annotation_name:string (shared);
450
451  // If set, the capturing group text will be used to create a text
452  // reply.
453  text_reply:ActionSuggestionSpec;
454
455  // If set, normalization to apply to the capturing group text.
456  normalization_options:NormalizationOptions;
457
458  // If set to true, an existing annotator annotation will be used to
459  // create the actions suggestions text annotation.
460  use_annotation_match:bool;
461
462  // If set, merge in fixed entity data for a match.
463  entity_data:ActionsEntityData;
464}
465
466// The actions to produce upon triggering.
467namespace libtextclassifier3.RulesModel_;
468table RuleActionSpec {
469  // The action.
470  action:ActionSuggestionSpec;
471
472  capturing_group:[RuleActionSpec_.RuleCapturingGroup];
473}
474
475// List of regular expression matchers.
476namespace libtextclassifier3.RulesModel_;
477table RegexRule {
478  // The regular expression pattern.
479  pattern:string (shared);
480
481  compressed_pattern:CompressedBuffer;
482  actions:[RuleActionSpec];
483
484  // Patterns for post-checking the outputs.
485  output_pattern:string (shared);
486
487  compressed_output_pattern:CompressedBuffer;
488}
489
490// Action configuration.
491// Specifies an action rules match.
492namespace libtextclassifier3.RulesModel_.GrammarRules_;
493table RuleMatch {
494  // The actions to produce as part of this match.
495  // These are indices into the `actions` array below.
496  action_id:[uint];
497}
498
499// Configuration for actions based on context-free grammars.
500namespace libtextclassifier3.RulesModel_;
501table GrammarRules {
502  // The tokenizer config.
503  tokenizer_options:ActionsTokenizerOptions;
504
505  // The grammar.
506  rules:grammar.RulesSet;
507
508  rule_match:[GrammarRules_.RuleMatch];
509
510  // The action specifications used by the rule matches.
511  actions:[RuleActionSpec];
512}
513
514// Rule based actions.
515namespace libtextclassifier3;
516table RulesModel {
517  regex_rule:[RulesModel_.RegexRule];
518
519  // If true, will compile the regexes only on first use.
520  lazy_regex_compilation:bool = true;
521
522  grammar_rules:RulesModel_.GrammarRules;
523}
524
525namespace libtextclassifier3;
526table ActionsModel {
527  // Comma-separated list of locales supported by the model as BCP 47 tags.
528  locales:string (shared);
529
530  // Version of the actions model.
531  version:int;
532
533  // A name for the model that can be used e.g. for logging.
534  name:string (shared);
535
536  tflite_model_spec:TensorflowLiteModelSpec;
537
538  // Output classes.
539  smart_reply_action_type:string (shared);
540
541  action_type:[ActionTypeOptions];
542
543  // Triggering conditions of the model.
544  preconditions:TriggeringPreconditions;
545
546  // Default number of smart reply predictions.
547  num_smart_replies:int = 3;
548
549  // Length of message history to consider, -1 if unbounded.
550  max_conversation_history_length:int = 1;
551
552  // Configuration for mapping annotations to action suggestions.
553  annotation_actions_spec:AnnotationActionsSpec;
554
555  // Configuration for rules.
556  rules:RulesModel;
557
558  // Configuration for intent generation on Android.
559  android_intent_options:IntentFactoryModel;
560
561  // Model resources.
562  resources:ResourcePool;
563
564  // Schema data for handling entity data.
565  actions_entity_data_schema:[ubyte];
566
567  // Action ranking options.
568  ranking_options:RankingOptions;
569
570  // Lua based actions.
571  lua_actions_script:string (shared);
572
573  compressed_lua_actions_script:CompressedBuffer;
574
575  // Low confidence classifiers.
576  low_confidence_rules:RulesModel;
577
578  low_confidence_ngram_model:NGramLinearRegressionModel;
579
580  // Feature processor options.
581  feature_processor_options:ActionsTokenFeatureProcessorOptions;
582
583  low_confidence_tflite_model:TFLiteSensitiveClassifierConfig;
584}
585
586root_type libtextclassifier3.ActionsModel;
587