• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//
2// Copyright (C) 2018 The Android Open Source Project
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//      http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16
17// Flatbuffer schema for Neurosis (FFNN with embeddings) parameters.
18//
19// Contains the same information as an EmbeddingNetworkProto.
20
21namespace libtextclassifier3.saft_fbs;
22
23// NS stands for NeurosiS.  The next two digits are meant to identify
24// incompatible versions.  Ideally, we'll never have to go beyond 00.
25file_identifier "NS00";
26
27// Should be kept in sync with the C++ enum nlp_saft::QuantizationType.
28enum QuantizationType : byte {
29  NONE = 0,
30  UINT8 = 1,
31  UINT4 = 2,
32  FLOAT16 = 3,
33}
34
35table Matrix {
36  // Number of rows of this matrix.
37  rows:int;
38
39  // Number of columns of this matrix.
40  cols:int;
41
42  // Type of quantization used for the values from this matrix.
43  //
44  // If this is QuantizationType_NONE, then the unquantized values should be
45  // stored in |values| below.  Otherwise, the bytes of the quantized values
46  // should be stored in |quantized_values| and the float16 quantization scales
47  // should be stored in |scales|.
48  quantization_type:QuantizationType = NONE;
49
50  // Non-quantized matrix elements, in row-major order.  See comments for
51  // |quantization_type|.
52  values:[float];
53
54  // Quantized matrix elements, in row-major order.  See comments for
55  // |quantization_type|.
56  quantized_values:[ubyte];
57
58  // Quantization factors (float16), one per matrix row.  There is no float16
59  // primitive type for flatbuffers, we just use another 16 bit type.  See
60  // comments for |quantization_type|.
61  scales:[ushort];
62}
63
64// The input layer for a Neurosis network is composed of several parts (named
65// "chunks" below, "embedding spaces" in some other parts, etc).  For each
66// chunk, we have |num_features| features that extract feature values in that
67// chunk.  All values extracted by a feature get projected via the embedding
68// matrix |embedding| and summed together, producing a vector of
69// |embedding.cols| elements.  The resulting vector gets concatenated with the
70// similar vectors for other |num_features| features, producing a "chunk" of
71// |num_features * embedding.cols| elements.  This chunk gets concatenated with
72// the other chunks.
73//
74// Note: the specification that indicates what those |num_features| features are
75// is stored elsewhere (usually in a ModelParameter, see model.fbs).  But we
76// need to know |num_features| here, in order to specify the geometry of the
77// Neurosis network.
78table InputChunk {
79  embedding:Matrix;
80  num_features:int;
81}
82
83// One layer of neurons from the Neurosis network.  This table can represent a
84// hidden layer or the final (output / softmax) layer.
85//
86// Our formalism is a bit different, but equivalent to the usual description
87// from the literature:
88//
89// Technically, in Neurosis, each layer takes an input (a vector of floats); if
90// this is not the first layer, we apply a nonlinear function (ReLU); for the
91// first layer, we skip ReLU.  Next, we multiply by |weights| and add |bias|,
92// get the input for the next level and so on.  The output from the last layer
93// is generally used for softmax classification.  That's why we say that the
94// last layer is the "softmax layer".
95table NeuralLayer {
96  // Weight matrix for this layer.  Geometry: num_inputs x num_neurons, where
97  // num_inputs is the number of values produced by previous layer (which can be
98  // the input layer, or another hidden layer) and num_neurons is the number of
99  // neurons from this layer.
100  weights:Matrix;
101
102  // Bias vector for this layer.
103  //
104  // NOTE: right now, we accept both 1 x num_neurons and num_neurons x 1
105  // geometries: the layout of the elements is the same in both cases.
106  bias:Matrix;
107}
108
109table EmbeddingNetwork {
110  // Specification of the chunks that compose the input layer.
111  input_chunks:[InputChunk];
112
113  // Hidden layers, followed by the final (softmax) layer.
114  layers:[NeuralLayer];
115}
116
117root_type EmbeddingNetwork;
118