| /third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ |
| D | correction_mul_impl.cu | 22 __global__ void CorrectionMul(const T* weight, const T* gamma, const T* running_std, const int batc… in CorrectionMul() 40 __global__ void Reduce(int N, int CHW, const T* tmp, const T* running_std, T* d_gamma) { in Reduce() 49 void CalCorrectionMul(const T* weight, const T* gamma, const T* running_std, int N, int C, int H, i… in CalCorrectionMul() 59 void CalCorrectionMulGrad(const T* d_out, const T* weight, const T* running_std, int N, int C, int … in CalCorrectionMulGrad()
|
| D | batchnorm_fold2_impl.cu | 29 … const T *running_std, const T *running_mean, const int *global_step, T *y, in BatchNormFold2Kernel() 68 … const T *running_mean, const T *running_std, const T *gamma, T *d_gamma, in BatchNormFold2GradNotFreeze() 79 … BatchNormFold2GradFreeze(const T *d_beta, const T *running_mean, const T *running_std, T *d_gamma, in BatchNormFold2GradFreeze() 94 __global__ void DxMul(size_t N, size_t C, size_t HW, const T *batch_std, const T *running_std, T *d… in DxMul() 105 … const T *running_std, const T *running_mean, const int *global_step, T *y, int freeze_bn, in BatchNormFold2Forward() 133 … const T *running_mean, const T *running_std, const T *gamma, T *d_gamma, in CalBatchNormFold2GradNotFreeze() 147 … const T *running_mean, const T *running_std, const T *gamma, T *d_gamma, in CalBatchNormFold2GradFreeze() 162 void CalBatchNormFold2GradNotFreezeDxMul(const T *batch_std, const T *running_std, T *d_x, size_t N… in CalBatchNormFold2GradNotFreezeDxMul()
|
| D | batchnorm_fold_impl.cu | 24 __global__ void UpdateRunningStd(int channel_size, const double epsilon, T* running_std) { in UpdateRunningStd() 53 void CalUpdateRunningStd(int channel_size, double epsilon, T* running_std, cudaStream_t cuda_stream… in CalUpdateRunningStd()
|
| /third_party/mindspore/mindspore/ops/_op_impl/_custom_op/ |
| D | correction_mul.py | 52 def correction_mul_compute(x, batch_std, running_std, kernel_name="correction_mul"): argument 62 def correction_mul(x, batch_std, running_std, y, channel, kernel_name="correction_mul"): argument
|
| D | batchnorm_fold2.py | 58 def batchnorm_fold2_compute(x, beta, gamma, batch_std, batch_mean, running_std, kernel_name="batchn… argument 73 def batchnorm_fold2(x, beta, gamma, batch_std, batch_mean, running_std, y, kernel_name="batchnorm_f… argument
|
| D | batchnorm_fold2_grad.py | 57 …orm_fold2_grad_compute(dout, dout_reduce, dout_x_reduce, gamma, batch_std, batch_mean, running_std, argument 84 …old2_grad(dout, dout_reduce, dout_x_reduce, gamma, batch_std, batch_mean, running_std, d_batch_std, argument
|
| D | correction_mul_grad.py | 52 def correction_mul_grad_compute(dout, x, batch_std, running_std, channel, data_format, kernel_name=… argument 65 def correction_mul_grad(dout, x, batch_std, running_std, dx, mul_dx, channel, kernel_name="correcti… argument
|
| D | batchnorm_fold.py | 101 … y, batch_mean, batch_std, running_mean, running_std, mean_updated, variance_updated, argument
|
| /third_party/mindspore/tests/st/ops/gpu/ |
| D | test_batchnorm_fold2_op.py | 35 …def construct(self, x, beta, gamma, batch_std, batch_mean, running_std, running_mean, current_step… argument 47 …def construct(self, x, beta, gamma, batch_std, batch_mean, running_std, running_mean, current_step… argument
|
| D | test_correction_mul_grad_op.py | 34 def construct(self, dy, x, batch_std, running_std): argument
|
| /third_party/mindspore/mindspore/ops/_grad/ |
| D | grad_quant_ops.py | 98 def bprop(x, batch_std, running_std, out, dout): argument 102 def bprop_npu(x, batch_std, running_std, out, dout): argument 118 …def bprop(x, beta, gamma, batch_std, batch_mean, running_std, running_mean, global_step, out, dout… argument 155 def bprop(x, beta, gamma, batch_std, batch_mean, running_std, out, dout): argument
|
| /third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/ |
| D | correction_mul_gpu_kernel.h | 44 auto *running_std = GetDeviceAddress<T>(inputs, 2); in Launch() local
|
| D | correction_mul_grad_gpu_kernel.h | 45 auto *running_std = GetDeviceAddress<T>(inputs, 3); in Launch() local
|
| D | batchnorm_fold2_gpu_kernel.h | 57 auto *running_std = GetDeviceAddress<T>(inputs, 5); in Launch() local
|
| D | batchnorm_fold_gpu_kernel.h | 73 auto running_std = GetDeviceAddress<T>(outputs, 3); in Launch() local
|
| D | batchnorm_fold2_grad_gpu_kernel.h | 57 auto *running_std = GetDeviceAddress<T>(inputs, 5); in Launch() local
|