1// 2// Copyright (C) 2018 The Android Open Source Project 3// 4// Licensed under the Apache License, Version 2.0 (the "License"); 5// you may not use this file except in compliance with the License. 6// You may obtain a copy of the License at 7// 8// http://www.apache.org/licenses/LICENSE-2.0 9// 10// Unless required by applicable law or agreed to in writing, software 11// distributed under the License is distributed on an "AS IS" BASIS, 12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13// See the License for the specific language governing permissions and 14// limitations under the License. 15// 16 17// Flatbuffer schema for Neurosis (FFNN with embeddings) parameters. 18// 19// Contains the same information as an EmbeddingNetworkProto. 20 21namespace libtextclassifier3.saft_fbs; 22 23// NS stands for NeurosiS. The next two digits are meant to identify 24// incompatible versions. Ideally, we'll never have to go beyond 00. 25file_identifier "NS00"; 26 27// Should be kept in sync with the C++ enum nlp_saft::QuantizationType. 28enum QuantizationType : byte { 29 NONE = 0, 30 UINT8 = 1, 31 UINT4 = 2, 32 FLOAT16 = 3, 33} 34 35table Matrix { 36 // Number of rows of this matrix. 37 rows:int; 38 39 // Number of columns of this matrix. 40 cols:int; 41 42 // Type of quantization used for the values from this matrix. 43 // 44 // If this is QuantizationType_NONE, then the unquantized values should be 45 // stored in |values| below. Otherwise, the bytes of the quantized values 46 // should be stored in |quantized_values| and the float16 quantization scales 47 // should be stored in |scales|. 48 quantization_type:QuantizationType = NONE; 49 50 // Non-quantized matrix elements, in row-major order. See comments for 51 // |quantization_type|. 52 values:[float]; 53 54 // Quantized matrix elements, in row-major order. See comments for 55 // |quantization_type|. 56 quantized_values:[ubyte]; 57 58 // Quantization factors (float16), one per matrix row. There is no float16 59 // primitive type for flatbuffers, we just use another 16 bit type. See 60 // comments for |quantization_type|. 61 scales:[ushort]; 62} 63 64// The input layer for a Neurosis network is composed of several parts (named 65// "chunks" below, "embedding spaces" in some other parts, etc). For each 66// chunk, we have |num_features| features that extract feature values in that 67// chunk. All values extracted by a feature get projected via the embedding 68// matrix |embedding| and summed together, producing a vector of 69// |embedding.cols| elements. The resulting vector gets concatenated with the 70// similar vectors for other |num_features| features, producing a "chunk" of 71// |num_features * embedding.cols| elements. This chunk gets concatenated with 72// the other chunks. 73// 74// Note: the specification that indicates what those |num_features| features are 75// is stored elsewhere (usually in a ModelParameter, see model.fbs). But we 76// need to know |num_features| here, in order to specify the geometry of the 77// Neurosis network. 78table InputChunk { 79 embedding:Matrix; 80 num_features:int; 81} 82 83// One layer of neurons from the Neurosis network. This table can represent a 84// hidden layer or the final (output / softmax) layer. 85// 86// Our formalism is a bit different, but equivalent to the usual description 87// from the literature: 88// 89// Technically, in Neurosis, each layer takes an input (a vector of floats); if 90// this is not the first layer, we apply a nonlinear function (ReLU); for the 91// first layer, we skip ReLU. Next, we multiply by |weights| and add |bias|, 92// get the input for the next level and so on. The output from the last layer 93// is generally used for softmax classification. That's why we say that the 94// last layer is the "softmax layer". 95table NeuralLayer { 96 // Weight matrix for this layer. Geometry: num_inputs x num_neurons, where 97 // num_inputs is the number of values produced by previous layer (which can be 98 // the input layer, or another hidden layer) and num_neurons is the number of 99 // neurons from this layer. 100 weights:Matrix; 101 102 // Bias vector for this layer. 103 // 104 // NOTE: right now, we accept both 1 x num_neurons and num_neurons x 1 105 // geometries: the layout of the elements is the same in both cases. 106 bias:Matrix; 107} 108 109table EmbeddingNetwork { 110 // Specification of the chunks that compose the input layer. 111 input_chunks:[InputChunk]; 112 113 // Hidden layers, followed by the final (softmax) layer. 114 layers:[NeuralLayer]; 115} 116 117root_type EmbeddingNetwork; 118