1 /* 2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #ifndef AOM_AV1_COMMON_CNN_H_ 13 #define AOM_AV1_COMMON_CNN_H_ 14 15 #ifdef __cplusplus 16 extern "C" { 17 #endif 18 19 #include <math.h> 20 21 #include "aom_util/aom_thread.h" 22 #include "config/av1_rtcd.h" 23 24 struct AV1Common; 25 26 #define CNN_MAX_HIDDEN_LAYERS 64 27 #define CNN_MAX_LAYERS (CNN_MAX_HIDDEN_LAYERS + 1) 28 #define CNN_MAX_CHANNELS 256 29 #define CNN_MAX_BRANCHES 4 30 #define CNN_MAX_THREADS 32 31 32 #define NO_BRANCH_CONFIG \ 33 { 0, 0, 0 } 34 #define NO_BN_PARAMS \ 35 { NULL, NULL, NULL, NULL } 36 37 enum { 38 PADDING_SAME_ZERO, // tensorflow's SAME padding with pixels outside 39 // the image area assumed to be 0 (default) 40 PADDING_SAME_REPLICATE, // tensorflow's SAME padding with pixels outside 41 // the image area replicated from closest edge 42 PADDING_VALID // tensorflow's VALID padding 43 } UENUM1BYTE(PADDING_TYPE); 44 45 // enum { NONE, RELU, SOFTSIGN } UENUM1BYTE(ACTIVATION); 46 47 // Times when input tensor may be copied to branches given in input_to_branches. 48 // BRANCH_NO_COPY: doesn't copy any tensor. 49 // BRANCH_INPUT: copies the input tensor to branches. 50 // BRANCH_OUTPUT: copies the convolved tensor to branches. 51 // BRANCH_COMBINED: copies the combined (after convolving and branch combining) 52 // tensor. If no combinations happen at this layer, then this option 53 // has the same effect as COPY_OUTPUT. 54 enum { 55 BRANCH_NO_COPY, 56 BRANCH_INPUT, 57 BRANCH_OUTPUT, 58 BRANCH_COMBINED 59 } UENUM1BYTE(BRANCH_COPY); 60 61 // Types of combining branches with output of current layer: 62 // BRANCH_NOC: no branch combining 63 // BRANCH_ADD: Add previously stored branch tensor to output of layer 64 // BRANCH_CAT: Concatenate branch tensor to output of layer 65 enum { BRANCH_NOC, BRANCH_ADD, BRANCH_CAT } UENUM1BYTE(BRANCH_COMBINE); 66 67 // The parameters used to scale each channel in batch 68 // normalization. The processing in done on a per-channel basis. 69 // e.g. bn_mean[c] is the mean for all pixels in channel c. This 70 // is always applied after activation. The output is given by 71 // out[c,i,j] = norm[c,i,j] * bn_gamma[c] + bn_beta[c] where 72 // norm[c,i,j] = (in[c,i,j] - bn_mean[c]) / bn_std[c] 73 // here we assume that the effect of variance_epsilon is already 74 // taken into account when bn_std is calculated. The pointers 75 // needs to be either all zero or all valid. If all zero, then 76 // batchnorm is disabled, else batchnorm is applied. 77 struct CNN_BATCHNORM_PARAMS { 78 const float *bn_gamma; 79 const float *bn_beta; 80 const float *bn_mean; 81 const float *bn_std; 82 }; 83 84 struct CNN_BRANCH_CONFIG { 85 int input_to_branches; // If nonzero, copy the active tensor to the current 86 // layer and store for future use in branches 87 // specified in the field as a binary mask. For 88 // example, if input_to_branch = 0x06, it means the 89 // input tensor to the current branch is copied to 90 // branches 1 and 2 (where 0 represents the primary 91 // branch). One restriction is that the mask 92 // cannot indicate copying to the current branch. 93 // If greater than 0, only copies the channels up 94 // to the given index. 95 int channels_to_copy; // Within the layer, input a copy of active 96 // tensor to branches given in input_to_branches. 97 int branches_to_combine; // mask of branches to combine with output of 98 // current layer, if 99 // branch_combine_type != BRANCH_NOC 100 // For example, if branches_to_combine = 0x0A, 101 // it means that braches 1 and 3 are combined 102 // with the current branch. 103 }; 104 105 struct CNN_LAYER_CONFIG { 106 int in_channels; 107 int filter_width; 108 int filter_height; 109 int out_channels; 110 int skip_width; 111 int skip_height; 112 int maxpool; // whether to use maxpool or not (only effective when 113 // skip width or skip_height are > 1) 114 const float *weights; // array of length filter_height x filter_width x 115 // in_channels x out_channels where the inner-most 116 // scan is out_channels and the outer most scan is 117 // filter_height. 118 const float *bias; // array of length out_channels 119 PADDING_TYPE pad; // padding type 120 ACTIVATION activation; // the activation function to use after convolution 121 int deconvolve; // whether this is a deconvolution layer. 122 // 0: If skip_width or skip_height are > 1, then we 123 // reduce resolution 124 // 1: If skip_width or skip_height are > 1, then we 125 // increase resolution 126 int branch; // branch index in [0, CNN_MAX_BRANCHES - 1], where 127 // 0 refers to the primary branch. 128 BRANCH_COPY branch_copy_type; 129 BRANCH_COMBINE branch_combine_type; 130 struct CNN_BRANCH_CONFIG branch_config; 131 struct CNN_BATCHNORM_PARAMS 132 bn_params; // A struct that contains the parameters 133 // used for batch normalization. 134 int output_num; // The output buffer idx to which the layer output is 135 // written. Set to -1 to disable writing it to the output. In 136 // the case that branch_combine_type is BRANCH_CAT, all 137 // concatenated channels will be written to output. In the 138 // case of BRANCH_ADD, the output will be the result of 139 // summation. 140 }; 141 142 struct CNN_CONFIG { 143 int num_layers; // number of CNN layers ( = number of hidden layers + 1) 144 int is_residue; // whether the output activation is a residue 145 int ext_width, ext_height; // extension horizontally and vertically 146 int strict_bounds; // whether the input bounds are strict or not. 147 // If strict, the extension area is filled by 148 // replication; if not strict, image data is 149 // assumed available beyond the bounds. 150 CNN_LAYER_CONFIG layer_config[CNN_MAX_LAYERS]; 151 }; 152 153 struct CNN_THREAD_DATA { 154 int num_workers; 155 AVxWorker *workers; 156 }; 157 158 struct CNN_MULTI_OUT { 159 int num_outputs; 160 const int *output_channels; 161 const int *output_strides; 162 float **output_buffer; 163 }; 164 165 // Function to return size of output 166 void av1_find_cnn_output_size(int in_width, int in_height, 167 const CNN_CONFIG *cnn_config, int *out_width, 168 int *out_height, int *out_channels); 169 170 // Prediction functions from set of input image buffers. This function supports 171 // CNN with multiple outputs. 172 void av1_cnn_predict_img_multi_out(uint8_t **dgd, int width, int height, 173 int stride, const CNN_CONFIG *cnn_config, 174 const CNN_THREAD_DATA *thread_data, 175 struct CNN_MULTI_OUT *output); 176 void av1_cnn_predict_img_multi_out_highbd(uint16_t **dgd, int width, int height, 177 int stride, 178 const CNN_CONFIG *cnn_config, 179 const CNN_THREAD_DATA *thread_data, 180 int bit_depth, CNN_MULTI_OUT *output); 181 182 // Prediction functions from set of input image buffers. This function only 183 // supports a single output. 184 void av1_cnn_predict_img(uint8_t **dgd, int width, int height, int stride, 185 const CNN_CONFIG *cnn_config, 186 const CNN_THREAD_DATA *thread_data, float **output, 187 int out_stride); 188 void av1_cnn_predict_img_highbd(uint16_t **dgd, int width, int height, 189 int stride, const CNN_CONFIG *cnn_config, 190 const CNN_THREAD_DATA *thread_data, 191 int bit_depth, float **output, int out_stride); 192 193 #ifdef __cplusplus 194 } // extern "C" 195 #endif 196 197 #endif // AOM_AV1_COMMON_CNN_H_ 198