1// Copyright (C) 2017 The Android Open Source Project 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Protos for performing inference with an EmbeddingNetwork. 16 17syntax = "proto2"; 18option optimize_for = LITE_RUNTIME; 19 20package libtextclassifier.nlp_core; 21 22// Wrapper for storing a matrix of parameters. These are stored in row-major 23// order. 24message MatrixParams { 25 optional int32 rows = 1; // # of rows in the matrix 26 optional int32 cols = 2; // # of columns in the matrix 27 28 // Non-quantized matrix entries. 29 repeated float value = 3 [packed = true]; 30 31 // Whether the matrix is quantized. 32 optional bool is_quantized = 4 [default = false]; 33 34 // Bytes for all quantized values. Each value (see "repeated float value" 35 // field) is quantized to an uint8 (1 byte) value, and all these bytes are 36 // concatenated into the string from this field. 37 optional bytes bytes_for_quantized_values = 7; 38 39 // Bytes for all scale factors for dequantizing the values. The quantization 40 // process generates a float16 scale factor for each column. The 2 bytes for 41 // each such float16 are put in little-endian order (least significant byte 42 // first) and next all these pairs of bytes are concatenated into the string 43 // from this field. 44 optional bytes bytes_for_col_scales = 8; 45 46 reserved 5, 6; 47} 48 49// Stores all parameters for a given EmbeddingNetwork. This can either be a 50// EmbeddingNetwork or a PrecomputedEmbeddingNetwork: for precomputed networks, 51// the embedding weights are actually the activations of the first hidden layer 52// *before* the bias is added and the non-linear transform is applied. 53// 54// Thus, for PrecomputedEmbeddingNetwork storage, hidden layers are stored 55// starting from the second hidden layer, while biases are stored for every 56// hidden layer. 57message EmbeddingNetworkProto { 58 // Embeddings and hidden layers. Note that if is_precomputed == true, then the 59 // embeddings should store the activations of the first hidden layer, so we 60 // must have hidden_bias_size() == hidden_size() + 1 (we store weights for 61 // first hidden layer bias, but no the layer itself.) 62 repeated MatrixParams embeddings = 1; 63 repeated MatrixParams hidden = 2; 64 repeated MatrixParams hidden_bias = 3; 65 66 // Final layer of the network. 67 optional MatrixParams softmax = 4; 68 optional MatrixParams softmax_bias = 5; 69 70 // Element i of the repeated field below indicates number of features that use 71 // the i-th embedding space. 72 repeated int32 embedding_num_features = 7; 73 74 // Whether or not this is intended to store a precomputed network. 75 optional bool is_precomputed = 11 [default = false]; 76 77 // True if this EmbeddingNetworkProto can be used for inference with no 78 // additional matrix transposition. 79 // 80 // Given an EmbeddingNetworkProto produced by a Neurosis training pipeline, we 81 // have to transpose a few matrices (e.g., the embedding matrices) before we 82 // can perform inference. When we do so, we negate this flag. Note: we don't 83 // simply set this to true: transposing twice takes us to the original state. 84 optional bool is_transposed = 12 [default = false]; 85 86 // Allow extensions. 87 extensions 100 to max; 88 89 reserved 6, 8, 9, 10; 90} 91