// Copyright (C) 2017 The Android Open Source Project // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Protos for performing inference with an EmbeddingNetwork. syntax = "proto2"; option optimize_for = LITE_RUNTIME; package libtextclassifier.nlp_core; // Wrapper for storing a matrix of parameters. These are stored in row-major // order. message MatrixParams { optional int32 rows = 1; // # of rows in the matrix optional int32 cols = 2; // # of columns in the matrix // Non-quantized matrix entries. repeated float value = 3 [packed = true]; // Whether the matrix is quantized. optional bool is_quantized = 4 [default = false]; // Bytes for all quantized values. Each value (see "repeated float value" // field) is quantized to an uint8 (1 byte) value, and all these bytes are // concatenated into the string from this field. optional bytes bytes_for_quantized_values = 7; // Bytes for all scale factors for dequantizing the values. The quantization // process generates a float16 scale factor for each column. The 2 bytes for // each such float16 are put in little-endian order (least significant byte // first) and next all these pairs of bytes are concatenated into the string // from this field. optional bytes bytes_for_col_scales = 8; reserved 5, 6; } // Stores all parameters for a given EmbeddingNetwork. This can either be a // EmbeddingNetwork or a PrecomputedEmbeddingNetwork: for precomputed networks, // the embedding weights are actually the activations of the first hidden layer // *before* the bias is added and the non-linear transform is applied. // // Thus, for PrecomputedEmbeddingNetwork storage, hidden layers are stored // starting from the second hidden layer, while biases are stored for every // hidden layer. message EmbeddingNetworkProto { // Embeddings and hidden layers. Note that if is_precomputed == true, then the // embeddings should store the activations of the first hidden layer, so we // must have hidden_bias_size() == hidden_size() + 1 (we store weights for // first hidden layer bias, but no the layer itself.) repeated MatrixParams embeddings = 1; repeated MatrixParams hidden = 2; repeated MatrixParams hidden_bias = 3; // Final layer of the network. optional MatrixParams softmax = 4; optional MatrixParams softmax_bias = 5; // Element i of the repeated field below indicates number of features that use // the i-th embedding space. repeated int32 embedding_num_features = 7; // Whether or not this is intended to store a precomputed network. optional bool is_precomputed = 11 [default = false]; // True if this EmbeddingNetworkProto can be used for inference with no // additional matrix transposition. // // Given an EmbeddingNetworkProto produced by a Neurosis training pipeline, we // have to transpose a few matrices (e.g., the embedding matrices) before we // can perform inference. When we do so, we negate this flag. Note: we don't // simply set this to true: transposing twice takes us to the original state. optional bool is_transposed = 12 [default = false]; // Allow extensions. extensions 100 to max; reserved 6, 8, 9, 10; }