• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright (C) 2017 The Android Open Source Project
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Protos for performing inference with an EmbeddingNetwork.
16
17syntax = "proto2";
18option optimize_for = LITE_RUNTIME;
19
20package libtextclassifier.nlp_core;
21
22// Wrapper for storing a matrix of parameters. These are stored in row-major
23// order.
24message MatrixParams {
25  optional int32 rows = 1;  // # of rows in the matrix
26  optional int32 cols = 2;  // # of columns in the matrix
27
28  // Non-quantized matrix entries.
29  repeated float value = 3 [packed = true];
30
31  // Whether the matrix is quantized.
32  optional bool is_quantized = 4 [default = false];
33
34  // Bytes for all quantized values.  Each value (see "repeated float value"
35  // field) is quantized to an uint8 (1 byte) value, and all these bytes are
36  // concatenated into the string from this field.
37  optional bytes bytes_for_quantized_values = 7;
38
39  // Bytes for all scale factors for dequantizing the values.  The quantization
40  // process generates a float16 scale factor for each column.  The 2 bytes for
41  // each such float16 are put in little-endian order (least significant byte
42  // first) and next all these pairs of bytes are concatenated into the string
43  // from this field.
44  optional bytes bytes_for_col_scales = 8;
45
46  reserved 5, 6;
47}
48
49// Stores all parameters for a given EmbeddingNetwork. This can either be a
50// EmbeddingNetwork or a PrecomputedEmbeddingNetwork: for precomputed networks,
51// the embedding weights are actually the activations of the first hidden layer
52// *before* the bias is added and the non-linear transform is applied.
53//
54// Thus, for PrecomputedEmbeddingNetwork storage, hidden layers are stored
55// starting from the second hidden layer, while biases are stored for every
56// hidden layer.
57message EmbeddingNetworkProto {
58  // Embeddings and hidden layers. Note that if is_precomputed == true, then the
59  // embeddings should store the activations of the first hidden layer, so we
60  // must have hidden_bias_size() == hidden_size() + 1 (we store weights for
61  // first hidden layer bias, but no the layer itself.)
62  repeated MatrixParams embeddings = 1;
63  repeated MatrixParams hidden = 2;
64  repeated MatrixParams hidden_bias = 3;
65
66  // Final layer of the network.
67  optional MatrixParams softmax = 4;
68  optional MatrixParams softmax_bias = 5;
69
70  // Element i of the repeated field below indicates number of features that use
71  // the i-th embedding space.
72  repeated int32 embedding_num_features = 7;
73
74  // Whether or not this is intended to store a precomputed network.
75  optional bool is_precomputed = 11 [default = false];
76
77  // True if this EmbeddingNetworkProto can be used for inference with no
78  // additional matrix transposition.
79  //
80  // Given an EmbeddingNetworkProto produced by a Neurosis training pipeline, we
81  // have to transpose a few matrices (e.g., the embedding matrices) before we
82  // can perform inference.  When we do so, we negate this flag.  Note: we don't
83  // simply set this to true: transposing twice takes us to the original state.
84  optional bool is_transposed = 12 [default = false];
85
86  // Allow extensions.
87  extensions 100 to max;
88
89  reserved 6, 8, 9, 10;
90}
91