1// 2// Copyright (C) 2018 The Android Open Source Project 3// 4// Licensed under the Apache License, Version 2.0 (the "License"); 5// you may not use this file except in compliance with the License. 6// You may obtain a copy of the License at 7// 8// http://www.apache.org/licenses/LICENSE-2.0 9// 10// Unless required by applicable law or agreed to in writing, software 11// distributed under the License is distributed on an "AS IS" BASIS, 12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13// See the License for the specific language governing permissions and 14// limitations under the License. 15// 16 17// Configuration for the text encoder op. 18 19namespace libtextclassifier3; 20 21enum SentencePieceMatcherType : byte { 22 MAPPED_TRIE = 0, 23 SORTED_STRING_TABLE = 1, 24} 25 26table TextEncoderConfig { 27 // Code that is used as encoding of the start code. 28 start_code:int32 = 0; 29 30 // Code that is used as encoding of the end code. 31 end_code:int32 = 1; 32 33 // This value is added to all codes to make them not intersect with 34 // `start_code` and `end_code`. 35 encoding_offset:int32 = 2; 36 37 // Code that is used for out-of-dictionary characters. 38 unknown_code:int32 = -1; 39 40 // Penalty associated with the unknown code. 41 unknown_score:float; 42 43 // Normalization options. 44 // Serialized normalization charsmap. 45 normalization_charsmap:string; 46 normalization_charsmap_values:string; 47 48 // Whether to add dummy whitespace at the beginning of the text in order to 49 // treat "world" in "world" and "hello world" uniformly. 50 add_dummy_prefix:bool = true; 51 52 // Whether to remove leading, trailing and duplicate internal whitespace. 53 remove_extra_whitespaces:bool = true; 54 55 // Whether to replace whitespace with a meta symbol. 56 escape_whitespaces:bool = true; 57 58 // Sentence pieces scores. 59 pieces_scores:[float]; 60 61 // Serialized sentence pieces. 62 pieces:string; 63 pieces_offsets:[uint32]; 64 matcher_type: SentencePieceMatcherType = MAPPED_TRIE; 65} 66