1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #ifndef AOM_AV1_ENCODER_ML_H_ 13 #define AOM_AV1_ENCODER_ML_H_ 14 15 #ifdef __cplusplus 16 extern "C" { 17 #endif 18 19 #include "config/av1_rtcd.h" 20 21 #define NN_MAX_HIDDEN_LAYERS 10 22 #define NN_MAX_NODES_PER_LAYER 128 23 24 struct NN_CONFIG { 25 int num_inputs; // Number of input nodes, i.e. features. 26 int num_outputs; // Number of output nodes. 27 int num_hidden_layers; // Number of hidden layers, maximum 10. 28 // Number of nodes for each hidden layer. 29 int num_hidden_nodes[NN_MAX_HIDDEN_LAYERS]; 30 // Weight parameters, indexed by layer. 31 const float *weights[NN_MAX_HIDDEN_LAYERS + 1]; 32 // Bias parameters, indexed by layer. 33 const float *bias[NN_MAX_HIDDEN_LAYERS + 1]; 34 }; 35 // Typedef from struct NN_CONFIG to NN_CONFIG is in rtcd_defs 36 37 #if CONFIG_NN_V2 38 // Fully-connectedly layer configuration 39 struct FC_LAYER { 40 const int num_inputs; // Number of input nodes, i.e. features. 41 const int num_outputs; // Number of output nodes. 42 43 float *weights; // Weight parameters. 44 float *bias; // Bias parameters. 45 const ACTIVATION activation; // Activation function. 46 47 float *output; // The output array. 48 float *dY; // Gradient of outputs 49 float *dW; // Gradient of weights. 50 float *db; // Gradient of bias 51 }; 52 53 // NN configure structure V2 54 struct NN_CONFIG_V2 { 55 const int num_hidden_layers; // Number of hidden layers, max = 10. 56 FC_LAYER layer[NN_MAX_HIDDEN_LAYERS + 1]; // The layer array 57 const int num_logits; // Number of output nodes. 58 float *logits; // Raw prediction (same as output of final layer) 59 const LOSS loss; // Loss function 60 }; 61 62 // Calculate prediction based on the given input features and neural net config. 63 // Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden 64 // layer. 65 void av1_nn_predict_v2(const float *features, NN_CONFIG_V2 *nn_config, 66 int reduce_prec, float *output); 67 #endif // CONFIG_NN_V2 68 69 // Applies the softmax normalization function to the input 70 // to get a valid probability distribution in the output: 71 // output[i] = exp(input[i]) / sum_{k \in [0,n)}(exp(input[k])) 72 void av1_nn_softmax(const float *input, float *output, int n); 73 74 // Applies a precision reduction to output of av1_nn_predict to prevent 75 // mismatches between C and SIMD implementations. 76 void av1_nn_output_prec_reduce(float *const output, int num_output); 77 78 #ifdef __cplusplus 79 } // extern "C" 80 #endif 81 82 #endif // AOM_AV1_ENCODER_ML_H_ 83