1 /* 2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #ifndef AOM_AV1_COMMON_CNN_H_ 13 #define AOM_AV1_COMMON_CNN_H_ 14 15 #ifdef __cplusplus 16 extern "C" { 17 #endif 18 19 #include <math.h> 20 #include <stdbool.h> 21 22 #include "aom_util/aom_thread.h" 23 #include "config/av1_rtcd.h" 24 25 struct AV1Common; 26 27 #define CNN_MAX_HIDDEN_LAYERS 64 28 #define CNN_MAX_LAYERS (CNN_MAX_HIDDEN_LAYERS + 1) 29 #define CNN_MAX_CHANNELS 256 30 #define CNN_MAX_BRANCHES 4 31 #define CNN_MAX_THREADS 32 32 33 #define NO_BRANCH_CONFIG \ 34 { 0, 0, 0 } 35 #define NO_BN_PARAMS \ 36 { NULL, NULL, NULL, NULL } 37 38 enum { 39 PADDING_SAME_ZERO, // tensorflow's SAME padding with pixels outside 40 // the image area assumed to be 0 (default) 41 PADDING_SAME_REPLICATE, // tensorflow's SAME padding with pixels outside 42 // the image area replicated from closest edge 43 PADDING_VALID // tensorflow's VALID padding 44 } UENUM1BYTE(PADDING_TYPE); 45 46 // enum { NONE, RELU, SOFTSIGN } UENUM1BYTE(ACTIVATION); 47 48 // Times when input tensor may be copied to branches given in input_to_branches. 49 // BRANCH_NO_COPY: doesn't copy any tensor. 50 // BRANCH_INPUT: copies the input tensor to branches. 51 // BRANCH_OUTPUT: copies the convolved tensor to branches. 52 // BRANCH_COMBINED: copies the combined (after convolving and branch combining) 53 // tensor. If no combinations happen at this layer, then this option 54 // has the same effect as COPY_OUTPUT. 55 enum { 56 BRANCH_NO_COPY, 57 BRANCH_INPUT, 58 BRANCH_OUTPUT, 59 BRANCH_COMBINED 60 } UENUM1BYTE(BRANCH_COPY); 61 62 // Types of combining branches with output of current layer: 63 // BRANCH_NOC: no branch combining 64 // BRANCH_ADD: Add previously stored branch tensor to output of layer 65 // BRANCH_CAT: Concatenate branch tensor to output of layer 66 enum { BRANCH_NOC, BRANCH_ADD, BRANCH_CAT } UENUM1BYTE(BRANCH_COMBINE); 67 68 // The parameters used to scale each channel in batch 69 // normalization. The processing in done on a per-channel basis. 70 // e.g. bn_mean[c] is the mean for all pixels in channel c. This 71 // is always applied after activation. The output is given by 72 // out[c,i,j] = norm[c,i,j] * bn_gamma[c] + bn_beta[c] where 73 // norm[c,i,j] = (in[c,i,j] - bn_mean[c]) / bn_std[c] 74 // here we assume that the effect of variance_epsilon is already 75 // taken into account when bn_std is calculated. The pointers 76 // needs to be either all zero or all valid. If all zero, then 77 // batchnorm is disabled, else batchnorm is applied. 78 struct CNN_BATCHNORM_PARAMS { 79 const float *bn_gamma; 80 const float *bn_beta; 81 const float *bn_mean; 82 const float *bn_std; 83 }; 84 85 struct CNN_BRANCH_CONFIG { 86 int input_to_branches; // If nonzero, copy the active tensor to the current 87 // layer and store for future use in branches 88 // specified in the field as a binary mask. For 89 // example, if input_to_branch = 0x06, it means the 90 // input tensor to the current branch is copied to 91 // branches 1 and 2 (where 0 represents the primary 92 // branch). One restriction is that the mask 93 // cannot indicate copying to the current branch. 94 // If greater than 0, only copies the channels up 95 // to the given index. 96 int channels_to_copy; // Within the layer, input a copy of active 97 // tensor to branches given in input_to_branches. 98 int branches_to_combine; // mask of branches to combine with output of 99 // current layer, if 100 // branch_combine_type != BRANCH_NOC 101 // For example, if branches_to_combine = 0x0A, 102 // it means that braches 1 and 3 are combined 103 // with the current branch. 104 }; 105 106 struct CNN_LAYER_CONFIG { 107 int in_channels; 108 int filter_width; 109 int filter_height; 110 int out_channels; 111 int skip_width; 112 int skip_height; 113 int maxpool; // whether to use maxpool or not (only effective when 114 // skip width or skip_height are > 1) 115 const float *weights; // array of length filter_height x filter_width x 116 // in_channels x out_channels where the inner-most 117 // scan is out_channels and the outer most scan is 118 // filter_height. 119 const float *bias; // array of length out_channels 120 PADDING_TYPE pad; // padding type 121 ACTIVATION activation; // the activation function to use after convolution 122 int deconvolve; // whether this is a deconvolution layer. 123 // 0: If skip_width or skip_height are > 1, then we 124 // reduce resolution 125 // 1: If skip_width or skip_height are > 1, then we 126 // increase resolution 127 int branch; // branch index in [0, CNN_MAX_BRANCHES - 1], where 128 // 0 refers to the primary branch. 129 BRANCH_COPY branch_copy_type; 130 BRANCH_COMBINE branch_combine_type; 131 struct CNN_BRANCH_CONFIG branch_config; 132 struct CNN_BATCHNORM_PARAMS 133 bn_params; // A struct that contains the parameters 134 // used for batch normalization. 135 int output_num; // The output buffer idx to which the layer output is 136 // written. Set to -1 to disable writing it to the output. In 137 // the case that branch_combine_type is BRANCH_CAT, all 138 // concatenated channels will be written to output. In the 139 // case of BRANCH_ADD, the output will be the result of 140 // summation. 141 }; 142 143 struct CNN_CONFIG { 144 int num_layers; // number of CNN layers ( = number of hidden layers + 1) 145 int is_residue; // whether the output activation is a residue 146 int ext_width, ext_height; // extension horizontally and vertically 147 int strict_bounds; // whether the input bounds are strict or not. 148 // If strict, the extension area is filled by 149 // replication; if not strict, image data is 150 // assumed available beyond the bounds. 151 CNN_LAYER_CONFIG layer_config[CNN_MAX_LAYERS]; 152 }; 153 154 struct CNN_THREAD_DATA { 155 int num_workers; 156 AVxWorker *workers; 157 }; 158 159 struct CNN_MULTI_OUT { 160 int num_outputs; 161 const int *output_channels; 162 const int *output_strides; 163 float **output_buffer; 164 }; 165 166 // Function to return size of output 167 void av1_find_cnn_output_size(int in_width, int in_height, 168 const CNN_CONFIG *cnn_config, int *out_width, 169 int *out_height, int *out_channels); 170 171 // Function to return output width and output height of given layer. 172 void av1_find_cnn_layer_output_size(int in_width, int in_height, 173 const CNN_LAYER_CONFIG *layer_config, 174 int *out_width, int *out_height); 175 176 // Prediction functions from set of input image buffers. This function supports 177 // CNN with multiple outputs. 178 bool av1_cnn_predict_img_multi_out(uint8_t **dgd, int width, int height, 179 int stride, const CNN_CONFIG *cnn_config, 180 const CNN_THREAD_DATA *thread_data, 181 struct CNN_MULTI_OUT *output); 182 bool av1_cnn_predict_img_multi_out_highbd(uint16_t **dgd, int width, int height, 183 int stride, 184 const CNN_CONFIG *cnn_config, 185 const CNN_THREAD_DATA *thread_data, 186 int bit_depth, CNN_MULTI_OUT *output); 187 188 // Prediction functions from set of input image buffers. This function only 189 // supports a single output. 190 bool av1_cnn_predict_img(uint8_t **dgd, int width, int height, int stride, 191 const CNN_CONFIG *cnn_config, 192 const CNN_THREAD_DATA *thread_data, float **output, 193 int out_stride); 194 bool av1_cnn_predict_img_highbd(uint16_t **dgd, int width, int height, 195 int stride, const CNN_CONFIG *cnn_config, 196 const CNN_THREAD_DATA *thread_data, 197 int bit_depth, float **output, int out_stride); 198 199 #ifdef __cplusplus 200 } // extern "C" 201 #endif 202 203 #endif // AOM_AV1_COMMON_CNN_H_ 204