| /third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ |
| D | batchnorm_fold_impl.cu | 32 __global__ void UpdateBatchStd(int channel_size, T* batch_std) { in UpdateBatchStd() 40 …x(const T* d_batch_mean, const T* d_batch_std, const T* x, const T* batch_mean, const T* batch_std, in CalDx() 62 void CalUpdateBatchStd(int channel_size, T* batch_std, cudaStream_t cuda_stream) { in CalUpdateBatchStd() 71 … const T* batch_std, int batch_size, int channel_size, int height, int width, T* dx, in CalBatchNormFoldGrad()
|
| D | batchnorm_fold2_impl.cu | 28 __global__ void BatchNormFold2Kernel(const T *x, const T *beta, const T *gamma, const T *batch_std,… in BatchNormFold2Kernel() 67 …NormFold2GradNotFreeze(const T *d_beta, const T *reduce_x, const T *batch_mean, const T *batch_std, in BatchNormFold2GradNotFreeze() 94 __global__ void DxMul(size_t N, size_t C, size_t HW, const T *batch_std, const T *running_std, T *d… in DxMul() 104 void BatchNormFold2Forward(const T *x, const T *beta, const T *gamma, const T *batch_std, const T *… in BatchNormFold2Forward() 132 …NormFold2GradNotFreeze(const T *d_beta, const T *reduce_x, const T *batch_mean, const T *batch_std, in CalBatchNormFold2GradNotFreeze() 146 …tchNormFold2GradFreeze(const T *d_beta, const T *reduce_x, const T *batch_mean, const T *batch_std, in CalBatchNormFold2GradFreeze() 162 void CalBatchNormFold2GradNotFreezeDxMul(const T *batch_std, const T *running_std, T *d_x, size_t N… in CalBatchNormFold2GradNotFreezeDxMul()
|
| /third_party/mindspore/mindspore/ops/_op_impl/_custom_op/ |
| D | correction_mul.py | 52 def correction_mul_compute(x, batch_std, running_std, kernel_name="correction_mul"): argument 62 def correction_mul(x, batch_std, running_std, y, channel, kernel_name="correction_mul"): argument
|
| D | batchnorm_fold_grad.py | 57 def _batchnorm_fold_grad_compute(d_batch_mean, d_batch_std, data_x, batch_mean, batch_std): argument 77 def batchnorm_fold_grad(d_batch_mean, d_batch_std, x, batch_mean, batch_std, dx, argument
|
| D | batchnorm_fold2.py | 58 def batchnorm_fold2_compute(x, beta, gamma, batch_std, batch_mean, running_std, kernel_name="batchn… argument 73 def batchnorm_fold2(x, beta, gamma, batch_std, batch_mean, running_std, y, kernel_name="batchnorm_f… argument
|
| D | batchnorm_fold2_grad.py | 57 def batchnorm_fold2_grad_compute(dout, dout_reduce, dout_x_reduce, gamma, batch_std, batch_mean, ru… argument 84 def batchnorm_fold2_grad(dout, dout_reduce, dout_x_reduce, gamma, batch_std, batch_mean, running_st… argument
|
| D | correction_mul_grad.py | 52 def correction_mul_grad_compute(dout, x, batch_std, running_std, channel, data_format, kernel_name=… argument 65 def correction_mul_grad(dout, x, batch_std, running_std, dx, mul_dx, channel, kernel_name="correcti… argument
|
| D | batchnorm_fold.py | 101 … y, batch_mean, batch_std, running_mean, running_std, mean_updated, variance_updated, argument
|
| /third_party/mindspore/tests/st/ops/gpu/ |
| D | test_batchnorm_fold_grad_op.py | 34 def construct(self, d_batch_mean, d_batch_std, x, batch_mean, batch_std, current_step): argument 39 def np_result(d_batch_mean, d_batch_std, x, batch_mean, batch_std): argument
|
| D | test_batchnorm_fold2_op.py | 35 …def construct(self, x, beta, gamma, batch_std, batch_mean, running_std, running_mean, current_step… argument 47 …def construct(self, x, beta, gamma, batch_std, batch_mean, running_std, running_mean, current_step… argument
|
| D | test_correction_mul_grad_op.py | 34 def construct(self, dy, x, batch_std, running_std): argument
|
| /third_party/mindspore/mindspore/ops/_grad/ |
| D | grad_quant_ops.py | 98 def bprop(x, batch_std, running_std, out, dout): argument 102 def bprop_npu(x, batch_std, running_std, out, dout): argument 118 …def bprop(x, beta, gamma, batch_std, batch_mean, running_std, running_mean, global_step, out, dout… argument 155 def bprop(x, beta, gamma, batch_std, batch_mean, running_std, out, dout): argument
|
| /third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/ |
| D | batchnorm_fold2_gpu_kernel.h | 55 auto *batch_std = GetDeviceAddress<T>(inputs, 3); in Launch() local
|
| D | batchnorm_fold_grad_gpu_kernel.h | 61 T *batch_std = GetDeviceAddress<T>(inputs, 4); in Launch() local
|
| D | batchnorm_fold_gpu_kernel.h | 71 auto batch_std = GetDeviceAddress<T>(outputs, 1); in Launch() local
|
| D | batchnorm_fold2_grad_gpu_kernel.h | 55 auto *batch_std = GetDeviceAddress<T>(inputs, 3); in Launch() local
|