/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ |
D | layer_norm_grad_grad_impl.cu | 195 const T *grad_dx, const T *grad_dg, T *d_x) { in InputThreadReduceOuterMean() argument 219 d_x[pos] = part3; in InputThreadReduceOuterMean() 234 const half *grad_dg, half *d_x) { in InputThreadReduceOuterMean() argument 257 d_x[pos] = part3; in InputThreadReduceOuterMean() 305 … const T *grad_dx, const T *grad_dg, const T *grad_db, T *d_dy, T *d_x, in InputProp() argument 322 d_x[pos] += part5 + share_mem[6] * (1.0 / col_dim); in InputProp() 333 … const half *grad_dx, const half *grad_dg, const half *grad_db, half *d_dy, half *d_x, in InputProp() argument 351 d_x[pos] += part5 + share_mem[6] * __float2half(1.0 / col_dim); in InputProp() 362 … const T *grad_dx, const T *grad_dg, const T *grad_db, T *d_dy, T *d_x, T *global_sum1, in InputPropKernel() argument 380 var, gamma, grad_dx, grad_dg, d_x); in InputPropKernel() [all …]
|
D | batchnorm_fold2_impl.cu | 94 …bal__ void DxMul(size_t N, size_t C, size_t HW, const T *batch_std, const T *running_std, T *d_x) { in DxMul() argument 99 d_x[i] = d_x[i] * running_std[c] / batch_std[c]; in DxMul() 162 void CalBatchNormFold2GradNotFreezeDxMul(const T *batch_std, const T *running_std, T *d_x, size_t N… in CalBatchNormFold2GradNotFreezeDxMul() argument 164 …ET_BLOCKS(N * C * H * W), GET_THREADS, 0, cuda_stream>>>(N, C, H * W, batch_std, running_std, d_x); in CalBatchNormFold2GradNotFreezeDxMul() 167 …tchNormFold2GradNotFreezeDxMul<float>(const float *batch_std, const float *running_std, float *d_x,
|
D | layer_norm_grad_grad_impl.cuh | 25 … const T* grad_dx, const T* grad_dg, const T* grad_db, T* d_dy, T* d_x, T* d_gamma,
|
D | batchnorm_fold2_impl.cuh | 38 void CalBatchNormFold2GradNotFreezeDxMul(const T *batch_std, const T *running_std, T *d_x, size_t N…
|
/third_party/mindspore/mindspore/ops/_grad/ |
D | grad_quant_ops.py | 119 …d_batch_std, d_batch_mean, d_beta, d_gamma, d_x = op_f(dout, x, gamma, batch_std, batch_mean, runn… 121 …return d_x, d_beta, d_gamma, d_batch_std, d_batch_mean, zeros_like(running_std), zeros_like(runnin… 157 … d_batch_std, d_batch_mean, d_gamma, d_x = op_f(dout, dout_reduce, dout_x_reduce, gamma, batch_std, 159 return d_x, dout_reduce, d_gamma, d_batch_std, d_batch_mean, zeros_like(running_std)
|
D | grad_nn_ops.py | 745 d_x, d_dy, d_gamma = layer_norm_grad_grad( 747 return d_x, d_dy, zeros_like(variance), zeros_like(mean), d_gamma
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/ |
D | batchnorm_fold2_grad_gpu_kernel.h | 64 auto *d_x = GetDeviceAddress<T>(outputs, 4); in Launch() local 79 …cudaMemcpyAsync(d_x, dout, x_size, cudaMemcpyDeviceToDevice, reinterpret_cast<cudaStream_t>(stream… in Launch() 85 …CalBatchNormFold2GradNotFreezeDxMul(batch_std, running_std, d_x, batch_size_, channel_, height_, w… in Launch()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ |
D | layer_norm_grad_grad_gpu_kernel.h | 51 auto d_x = GetDeviceAddress<T>(outputs, 0); in Launch() local 66 … grad_dx, grad_dg, grad_db, d_dy, d_x, d_gamma, reinterpret_cast<cudaStream_t>(stream_ptr)); in Launch()
|
/third_party/ffmpeg/libavcodec/ |
D | cavs.c | 539 static inline void scale_mv(AVSContext *h, int *d_x, int *d_y, in scale_mv() argument 543 *d_x = (src->x * distp * den + 256 + FF_SIGNBIT(src->x)) >> 9; in scale_mv()
|
/third_party/mindspore/tests/st/ops/gpu/ |
D | test_layer_norm_grad_grad_op.py | 94 d_x = part3 + part4 + sum7 107 return d_x, d_dy, d_gamma
|