/external/ComputeLibrary/src/core/NEON/kernels/convolution/winograd/input_transforms/ |
D | a64_fp16_6x6.cpp | 181 …XTx[0][j] = vsub_f16(vadd_f16(x[4][j], vmul_f16(x[0][j], vdup_n_f16(4.0f))), vmul_f16(x[2][j], vdu… in a64_fp16_6x6() 184 …XTx[1][j] = vsub_f16(vadd_f16(x[3][j], x[4][j]), vmul_f16(vadd_f16(x[1][j], x[2][j]), vdup_n_f16(… in a64_fp16_6x6() 187 …XTx[2][j] = vadd_f16(vsub_f16(x[4][j], x[3][j]), vmul_f16(vsub_f16(x[1][j], x[2][j]), vdup_n_f16(4… in a64_fp16_6x6() 190 …XTx[3][j] = vadd_f16(vsub_f16(x[4][j], x[2][j]), vmul_f16(vsub_f16(x[3][j], x[1][j]), vdup_n_f16(2… in a64_fp16_6x6() 193 …XTx[4][j] = vadd_f16(vsub_f16(x[4][j], x[2][j]), vmul_f16(vsub_f16(x[1][j], x[3][j]), vdup_n_f16(2… in a64_fp16_6x6() 196 …XTx[5][j] = vsub_f16(vadd_f16(x[5][j], vmul_f16(x[1][j], vdup_n_f16(4.0f))), vmul_f16(x[3][j], vdu… in a64_fp16_6x6() 203 …U[i][0] = vsub_f16(vadd_f16(XTx[i][4], vmul_f16(XTx[i][0], vdup_n_f16(4.0f))), vmul_f16(XTx[i][2],… in a64_fp16_6x6() 206 …U[i][1] = vsub_f16(vadd_f16(XTx[i][3], XTx[i][4]), vmul_f16(vadd_f16(XTx[i][1], XTx[i][2]), vdup_n… in a64_fp16_6x6() 209 …U[i][2] = vadd_f16(vsub_f16(XTx[i][4], XTx[i][3]), vmul_f16(vsub_f16(XTx[i][1], XTx[i][2]), vdup_n… in a64_fp16_6x6() 212 …U[i][3] = vadd_f16(vsub_f16(XTx[i][4], XTx[i][2]), vmul_f16(vsub_f16(XTx[i][3], XTx[i][1]), vdup_n… in a64_fp16_6x6() [all …]
|
/external/ComputeLibrary/src/core/NEON/kernels/convolution/winograd/weight_transforms/ |
D | a64_fp16_4x4_3x3.cpp | 143 …Ww[3][j] = vadd_f16(vadd_f16(w[0][j], vmul_f16(w[1][j], vdup_n_f16(2.0f))), vmul_f16(w[2][j], vdup… in a64_fp16_4x4_3x3() 146 …Ww[4][j] = vadd_f16(vsub_f16(w[0][j], vmul_f16(w[1][j], vdup_n_f16(2.0f))), vmul_f16(w[2][j], vdup… in a64_fp16_4x4_3x3() 167 …V[i][3] = vmul_n_f16(vadd_f16(vadd_f16(Ww[i][0], vmul_f16(Ww[i][1], vdup_n_f16(2.0f))), vmul_f16(W… in a64_fp16_4x4_3x3() 170 …V[i][4] = vmul_n_f16(vadd_f16(vsub_f16(Ww[i][0], vmul_f16(Ww[i][1], vdup_n_f16(2.0f))), vmul_f16(W… in a64_fp16_4x4_3x3()
|
/external/ComputeLibrary/src/core/NEON/kernels/convolution/winograd/output_transforms/ |
D | a64_fp16_4x4_3x3.cpp | 155 …FZ[i][1] = vadd_f16(vsub_f16(F[i][1], F[i][2]), vmul_f16(vsub_f16(F[i][3], F[i][4]), vdup_n_f16(2.… in a64_fp16_4x4_3x3() 158 …FZ[i][2] = vadd_f16(vadd_f16(F[i][1], F[i][2]), vmul_f16(vadd_f16(F[i][3], F[i][4]), vdup_n_f16(4.… in a64_fp16_4x4_3x3() 161 …FZ[i][3] = vadd_f16(vadd_f16(vsub_f16(F[i][1], F[i][2]), vmul_f16(vsub_f16(F[i][3], F[i][4]), vdup… in a64_fp16_4x4_3x3() 171 …f[1][j] = vadd_f16(vsub_f16(FZ[1][j], FZ[2][j]), vmul_f16(vsub_f16(FZ[3][j], FZ[4][j]), vdup_n_f16… in a64_fp16_4x4_3x3() 174 …f[2][j] = vadd_f16(vadd_f16(FZ[1][j], FZ[2][j]), vmul_f16(vadd_f16(FZ[3][j], FZ[4][j]), vdup_n_f16… in a64_fp16_4x4_3x3() 177 …f[3][j] = vadd_f16(vadd_f16(vsub_f16(FZ[1][j], FZ[2][j]), vmul_f16(vsub_f16(FZ[3][j], FZ[4][j]), v… in a64_fp16_4x4_3x3()
|
/external/XNNPACK/src/f16-gavgpool-cw/ |
D | neonfp16arith-x4.c | 78 float16x4_t vout = vmul_f16(vsum, vmultiplier); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x4() 113 float16x4_t vout = vmul_f16(vsum, vmultiplier); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x4()
|
D | neonfp16arith-x8.c | 90 float16x4_t vout = vmul_f16(vsum, vmultiplier); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8() 126 float16x4_t vout = vmul_f16(vsum, vmultiplier); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8()
|
/external/ComputeLibrary/src/cpu/kernels/pool2d/neon/nchw/ |
D | all.cpp | 119 top_data = vmul_f16(top_data, top_data); in pooling3_fp16_neon_nchw() 120 middle_data = vmul_f16(middle_data, middle_data); in pooling3_fp16_neon_nchw() 121 bottom_data = vmul_f16(bottom_data, bottom_data); in pooling3_fp16_neon_nchw() 133 res = vmul_f16(vpadd_f16(res, res), scale_v); in pooling3_fp16_neon_nchw() 285 top_data = vmul_f16(top_data, top_data); in pooling2_fp16_neon_nchw() 286 bottom_data = vmul_f16(bottom_data, bottom_data); in pooling2_fp16_neon_nchw() 296 res = vmul_f16(vpadd_f16(sum_data, sum_data), scale_v); in pooling2_fp16_neon_nchw()
|
/external/ComputeLibrary/scripts/ |
D | clang-tidy.h | 35 inline float16x4_t vmul_f16 (float16x4_t, float16x4_t) in vmul_f16() function
|
/external/ComputeLibrary/src/core/NEON/ |
D | NEMath.inl | 530 …sqrt_reciprocal = vmul_f16(vrsqrts_f16(vmul_f16(x, sqrt_reciprocal), sqrt_reciprocal),… 531 …sqrt_reciprocal = vmul_f16(vrsqrts_f16(vmul_f16(x, sqrt_reciprocal), sqrt_reciprocal),… 546 recip = vmul_f16(vrecps_f16(x, recip), recip); 547 recip = vmul_f16(vrecps_f16(x, recip), recip);
|
/external/ComputeLibrary/src/cpu/kernels/gemm_matrix_mul/generic/neon/ |
D | impl.cpp | 198 vacc = vadd_f16(vacc, vmul_f16(a0l, b_col)); in vector_matrix_multiply_f16()
|
/external/eigen/Eigen/src/Core/arch/NEON/ |
D | PacketMath.h | 4074 return vmul_f16(a, b); 4460 prod = vmul_f16(a_lo, a_hi); 4461 prod = vmul_f16(prod, vrev64_f16(prod)); 4471 prod = vmul_f16(a, vrev64_f16(a));
|