/third_party/python/Modules/_blake2/impl/ |
D | blake2s-round.h | 44 #define G1(row1,row2,row3,row4,buf) \ argument 46 row4 = _mm_xor_si128( row4, row1 ); \ 47 row4 = _mm_roti_epi32(row4, -16); \ 48 row3 = _mm_add_epi32( row3, row4 ); \ 52 #define G2(row1,row2,row3,row4,buf) \ argument 54 row4 = _mm_xor_si128( row4, row1 ); \ 55 row4 = _mm_roti_epi32(row4, -8); \ 56 row3 = _mm_add_epi32( row3, row4 ); \ 60 #define DIAGONALIZE(row1,row2,row3,row4) \ argument 61 row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(2,1,0,3) ); \ [all …]
|
D | blake2s.c | 268 __m128i row1, row2, row3, row4; in blake2s_compress() local 307 …row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOADU( &S-… in blake2s_compress() 319 STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) ); in blake2s_compress()
|
/third_party/libjpeg-turbo/simd/powerpc/ |
D | jquanti-altivec.c | 119 __vector short row0, row1, row2, row3, row4, row5, row6, row7, in jsimd_quantize_altivec() local 140 row4 = vec_ld(64, workspace); in jsimd_quantize_altivec() 150 row4s = vec_sra(row4, pw_word_bit_m1); in jsimd_quantize_altivec() 158 row4 = vec_xor(row4, row4s); in jsimd_quantize_altivec() 166 row4 = vec_sub(row4, row4s); in jsimd_quantize_altivec() 184 row4 = vec_add(row4, corr4); in jsimd_quantize_altivec() 202 MULTIPLY(row4, recip4, row4); in jsimd_quantize_altivec() 220 MULTIPLY(row4, scale4, row4); in jsimd_quantize_altivec() 229 row4 = vec_xor(row4, row4s); in jsimd_quantize_altivec() 237 row4 = vec_sub(row4, row4s); in jsimd_quantize_altivec() [all …]
|
D | jfdctfst-altivec.c | 92 __vector short row0, row1, row2, row3, row4, row5, row6, row7, in jsimd_fdct_ifast_altivec() local 113 row4 = vec_ld(64, data); in jsimd_fdct_ifast_altivec() 141 tmp3 = vec_add(row3, row4); in jsimd_fdct_ifast_altivec() 142 tmp4 = vec_sub(row3, row4); in jsimd_fdct_ifast_altivec()
|
D | jfdctint-altivec.c | 184 __vector short row0, row1, row2, row3, row4, row5, row6, row7, in jsimd_fdct_islow_altivec() local 217 row4 = vec_ld(64, data); in jsimd_fdct_islow_altivec() 245 tmp3 = vec_add(row3, row4); in jsimd_fdct_islow_altivec() 246 tmp4 = vec_sub(row3, row4); in jsimd_fdct_islow_altivec()
|
/third_party/flutter/skia/third_party/externals/libjpeg-turbo/simd/powerpc/ |
D | jquanti-altivec.c | 119 __vector short row0, row1, row2, row3, row4, row5, row6, row7, in jsimd_quantize_altivec() local 140 row4 = vec_ld(64, workspace); in jsimd_quantize_altivec() 150 row4s = vec_sra(row4, pw_word_bit_m1); in jsimd_quantize_altivec() 158 row4 = vec_xor(row4, row4s); in jsimd_quantize_altivec() 166 row4 = vec_sub(row4, row4s); in jsimd_quantize_altivec() 184 row4 = vec_add(row4, corr4); in jsimd_quantize_altivec() 202 MULTIPLY(row4, recip4, row4); in jsimd_quantize_altivec() 220 MULTIPLY(row4, scale4, row4); in jsimd_quantize_altivec() 229 row4 = vec_xor(row4, row4s); in jsimd_quantize_altivec() 237 row4 = vec_sub(row4, row4s); in jsimd_quantize_altivec() [all …]
|
D | jfdctfst-altivec.c | 92 __vector short row0, row1, row2, row3, row4, row5, row6, row7, in jsimd_fdct_ifast_altivec() local 113 row4 = vec_ld(64, data); in jsimd_fdct_ifast_altivec() 141 tmp3 = vec_add(row3, row4); in jsimd_fdct_ifast_altivec() 142 tmp4 = vec_sub(row3, row4); in jsimd_fdct_ifast_altivec()
|
D | jfdctint-altivec.c | 184 __vector short row0, row1, row2, row3, row4, row5, row6, row7, in jsimd_fdct_islow_altivec() local 217 row4 = vec_ld(64, data); in jsimd_fdct_islow_altivec() 245 tmp3 = vec_add(row3, row4); in jsimd_fdct_islow_altivec() 246 tmp4 = vec_sub(row3, row4); in jsimd_fdct_islow_altivec()
|
/third_party/skia/third_party/externals/libjpeg-turbo/simd/arm/aarch32/ |
D | jchuff-neon.c | 138 int16x8_t row4 = vld1q_dup_s16(block + 35); in jsimd_huff_encode_one_block_neon() local 139 row4 = vld1q_lane_s16(block + 42, row4, 1); in jsimd_huff_encode_one_block_neon() 140 row4 = vld1q_lane_s16(block + 49, row4, 2); in jsimd_huff_encode_one_block_neon() 141 row4 = vld1q_lane_s16(block + 56, row4, 3); in jsimd_huff_encode_one_block_neon() 142 row4 = vld1q_lane_s16(block + 57, row4, 4); in jsimd_huff_encode_one_block_neon() 143 row4 = vld1q_lane_s16(block + 50, row4, 5); in jsimd_huff_encode_one_block_neon() 144 row4 = vld1q_lane_s16(block + 43, row4, 6); in jsimd_huff_encode_one_block_neon() 145 row4 = vld1q_lane_s16(block + 36, row4, 7); in jsimd_huff_encode_one_block_neon() 174 int16x8_t abs_row4 = vabsq_s16(row4); in jsimd_huff_encode_one_block_neon() 200 vshlq_u16(vreinterpretq_u16_s16(vshrq_n_s16(row4, 15)), in jsimd_huff_encode_one_block_neon()
|
/third_party/mindspore/mindspore/lite/micro/coder/wrapper/base/ |
D | optimize_handler_wrapper.c | 19 extern void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, i… 31 void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, … in MatMulR4Int8_optimize_handler() argument 33 return MatMulOptR4Int8Neon64(a, b, dst, row4, col4, deep16, input_sum, bias); in MatMulR4Int8_optimize_handler()
|
/third_party/skia/third_party/externals/libjpeg-turbo/simd/arm/ |
D | jfdctfst-neon.c | 156 int16x8_t row4 = vreinterpretq_s16_s32(rows_04.val[1]); in jsimd_fdct_ifast_neon() local 169 tmp3 = vaddq_s16(row3, row4); in jsimd_fdct_ifast_neon() 170 tmp4 = vsubq_s16(row3, row4); in jsimd_fdct_ifast_neon() 179 row4 = vsubq_s16(tmp10, tmp11); in jsimd_fdct_ifast_neon() 210 vst1q_s16(data + 4 * DCTSIZE, row4); in jsimd_fdct_ifast_neon()
|
D | jidctfst-neon.c | 73 int16x8_t row4 = vld1q_s16(coef_block + 4 * DCTSIZE); in jsimd_idct_ifast_neon() local 86 bitmap = vorrq_s16(bitmap, row4); in jsimd_idct_ifast_neon() 105 row4 = dcval; in jsimd_idct_ifast_neon() 129 int16x4_t tmp2 = vmul_s16(vget_high_s16(row4), quant_row4); in jsimd_idct_ifast_neon() 182 row4 = vcombine_s16(dcval, vadd_s16(tmp3, tmp4)); in jsimd_idct_ifast_neon() 204 int16x4_t tmp2 = vmul_s16(vget_low_s16(row4), quant_row4); in jsimd_idct_ifast_neon() 257 row4 = vcombine_s16(vadd_s16(tmp3, tmp4), dcval); in jsimd_idct_ifast_neon() 274 int16x8_t tmp2 = vmulq_s16(row4, quant_row4); in jsimd_idct_ifast_neon() 327 row4 = vaddq_s16(tmp3, tmp4); in jsimd_idct_ifast_neon() 334 int16x8x2_t rows_45 = vtrnq_s16(row4, row5); in jsimd_idct_ifast_neon()
|
D | jidctint-neon.c | 101 int16x4_t row4, 205 int16x4_t row4 = vld1_s16(coef_block + 4 * DCTSIZE); in jsimd_idct_islow_neon() local 223 bitmap = vorr_s16(bitmap, row4); in jsimd_idct_islow_neon() 244 jsimd_idct_islow_pass1_regular(row0, row1, row2, row3, row4, row5, in jsimd_idct_islow_neon() 258 row4 = vld1_s16(coef_block + 4 * DCTSIZE + 4); in jsimd_idct_islow_neon() 276 bitmap = vorr_s16(bitmap, row4); in jsimd_idct_islow_neon() 304 jsimd_idct_islow_pass1_regular(row0, row1, row2, row3, row4, row5, in jsimd_idct_islow_neon() 342 int16x4_t row4, in jsimd_idct_islow_pass1_regular() argument 377 z3_s16 = vmul_s16(row4, quant_row4); in jsimd_idct_islow_pass1_regular()
|
D | jfdctint-neon.c | 255 int16x8_t row4 = vreinterpretq_s16_s32(rows_04.val[1]); in jsimd_fdct_islow_neon() local 268 tmp3 = vaddq_s16(row3, row4); in jsimd_fdct_islow_neon() 269 tmp4 = vsubq_s16(row3, row4); in jsimd_fdct_islow_neon() 278 row4 = vrshrq_n_s16(vsubq_s16(tmp10, tmp11), PASS1_BITS); in jsimd_fdct_islow_neon() 372 vst1q_s16(data + 4 * DCTSIZE, row4); in jsimd_fdct_islow_neon()
|
/third_party/ffmpeg/libavcodec/mips/ |
D | vp8_lpf_msa.c | 380 v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; in ff_vp8_h_loop_filter16_msa() local 388 LD_UB8(temp_src, pitch, row0, row1, row2, row3, row4, row5, row6, row7); in ff_vp8_h_loop_filter16_msa() 392 TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, in ff_vp8_h_loop_filter16_msa() 445 v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; in ff_vp8_h_loop_filter8uv_msa() local 453 LD_UB8(src_u - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7); in ff_vp8_h_loop_filter8uv_msa() 456 TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, in ff_vp8_h_loop_filter8uv_msa() 525 v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; in ff_vp8_h_loop_filter_simple_msa() local 531 LD_UB8(temp_src, pitch, row0, row1, row2, row3, row4, row5, row6, row7); in ff_vp8_h_loop_filter_simple_msa() 535 TRANSPOSE16x4_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, in ff_vp8_h_loop_filter_simple_msa() 595 v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; in ff_vp8_h_loop_filter8uv_inner_msa() local [all …]
|
/third_party/mindspore/mindspore/lite/src/runtime/kernel/arm/int8/ |
D | opt_op_handler.cc | 27 void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, … in MatMulR4Int8_optimize_handler() argument 29 return MatMulOptR4Int8Neon64(a, b, dst, row4, col4, deep16, input_sum, bias); in MatMulR4Int8_optimize_handler()
|
D | opt_op_handler.h | 25 void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep… 39 void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, …
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/ |
D | pack_int8.h | 41 void PackInputSum16x4PerLayer(const int8_t *src, int32_t *dst, int32_t filter_zp, size_t row4, size… 48 void PreSum4x16Int8Pert(const int8_t *src, int32_t *sum, size_t row4, size_t col16, int32_t filter_…
|
D | matmul_int8.h | 69 void MatmulInt8Neon64(const int8_t *a, const int8_t *b, int8_t *dst, int row4, int col4, int deep16… 73 void MatMulR4Int8Neon64(const int8_t *a, const int8_t *b, int32_t *dst, int row4, int col4, int dee…
|
/third_party/flutter/skia/gm/ |
D | hardstop_gradients.cpp | 118 SkScalar row4[] = {0.00f, 0.25f, 0.50f, 0.50f, 1.00f}; in onDraw() local 128 row4, in onDraw()
|
/third_party/skia/gm/ |
D | hardstop_gradients.cpp | 118 SkScalar row4[] = {0.00f, 0.25f, 0.50f, 0.50f, 1.00f}; in onDraw() local 128 row4, in onDraw()
|
/third_party/skia/third_party/externals/d3d12allocator/src/ |
D | Common.h | 187 const vec4& row4) : in mat4() 191 _41(row4.x), _42(row4.y), _43(row4.z), _44(row4.w) in mat4()
|
/third_party/flutter/skia/third_party/externals/libwebp/src/dsp/ |
D | dec_msa.c | 364 v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; in HFilter16() local 371 LD_UB8(ptmp, stride, row0, row1, row2, row3, row4, row5, row6, row7); in HFilter16() 374 TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, in HFilter16() 446 v16u8 row0, row1, row2, row3, row4, row5, row6, row7; in HFilterVertEdge16i() local 453 LD_UB8(src - 4, stride, row0, row1, row2, row3, row4, row5, row6, row7); in HFilterVertEdge16i() 456 TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, in HFilterVertEdge16i() 530 v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; in HFilter8() local 537 LD_UB8(ptmp_src_u, stride, row0, row1, row2, row3, row4, row5, row6, row7); in HFilter8() 540 TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, in HFilter8() 595 v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; in HFilter8i() local [all …]
|
/third_party/skia/third_party/externals/libwebp/src/dsp/ |
D | dec_msa.c | 364 v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; in HFilter16() local 371 LD_UB8(ptmp, stride, row0, row1, row2, row3, row4, row5, row6, row7); in HFilter16() 374 TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, in HFilter16() 446 v16u8 row0, row1, row2, row3, row4, row5, row6, row7; in HFilterVertEdge16i() local 453 LD_UB8(src - 4, stride, row0, row1, row2, row3, row4, row5, row6, row7); in HFilterVertEdge16i() 456 TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, in HFilterVertEdge16i() 530 v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; in HFilter8() local 537 LD_UB8(ptmp_src_u, stride, row0, row1, row2, row3, row4, row5, row6, row7); in HFilter8() 540 TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, in HFilter8() 595 v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; in HFilter8i() local [all …]
|
/third_party/ffmpeg/libavcodec/ |
D | ivi_dsp.c | 669 int i, row2, row4, row8; in ff_ivi_col_slant8() local 673 row4 = pitch << 2; in ff_ivi_col_slant8() 680 out[0], out[pitch], out[row2], out[row2 + pitch], out[row4], in ff_ivi_col_slant8() 681 out[row4 + pitch], out[row4 + row2], out[row8 - pitch], in ff_ivi_col_slant8() 684 out[0] = out[pitch] = out[row2] = out[row2 + pitch] = out[row4] = in ff_ivi_col_slant8() 685 out[row4 + pitch] = out[row4 + row2] = out[row8 - pitch] = 0; in ff_ivi_col_slant8()
|