/external/XNNPACK/src/f16-gemm/gen/ |
D | 1x8-minmax-neonfp16arith-ld64.c | 45 …float16x8_t vacc0x01234567 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8_t)); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() local 54 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() 58 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() 63 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() 67 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() 72 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() 76 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() 81 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() 85 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() 96 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0, vb01234567); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() [all …]
|
D | 1x16-minmax-neonfp16arith-ld64.c | 45 …float16x8_t vacc0x01234567 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8_t)); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() local 56 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 61 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 68 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 73 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 80 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 85 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 92 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 97 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 110 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0, vb01234567); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() [all …]
|
D | 4x8-minmax-neonfp16arith-ld64.c | 63 …float16x8_t vacc0x01234567 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8_t)); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64() local 64 float16x8_t vacc1x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64() 65 float16x8_t vacc2x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64() 66 float16x8_t vacc3x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64() 78 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64() 88 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64() 96 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64() 106 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64() 114 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64() 124 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64() [all …]
|
/external/XNNPACK/src/f16-gemm/gen-inc/ |
D | 1x8inc-minmax-neonfp16arith-ld64.c | 47 …float16x8_t vacc0x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64() local 56 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64() 60 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64() 65 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64() 69 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64() 74 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64() 78 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64() 83 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64() 87 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64() 98 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0, vb01234567); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64() [all …]
|
D | 1x16inc-minmax-neonfp16arith-ld64.c | 47 …float16x8_t vacc0x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() local 58 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 63 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 70 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 75 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 82 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 87 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 94 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 99 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 112 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0, vb01234567); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() [all …]
|
D | 4x8inc-minmax-neonfp16arith-ld64.c | 65 …float16x8_t vacc0x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64() local 80 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64() 90 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64() 98 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64() 108 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64() 116 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64() 126 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64() 134 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64() 144 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64() 161 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0, vb01234567); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64() [all …]
|
/external/XNNPACK/src/f16-igemm/gen/ |
D | 1x8-minmax-neonfp16arith-ld64.c | 47 …float16x8_t vacc0x01234567 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8_t)); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64() local 65 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64() 69 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64() 74 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64() 78 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64() 83 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64() 87 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64() 92 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64() 96 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64() 105 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0, vb01234567); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64() [all …]
|
D | 1x16-minmax-neonfp16arith-ld64.c | 47 …float16x8_t vacc0x01234567 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8_t)); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() local 67 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 72 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 79 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 84 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 91 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 96 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 103 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 108 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 119 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0, vb01234567); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() [all …]
|
D | 4x8-minmax-neonfp16arith-ld64.c | 59 …float16x8_t vacc0x01234567 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8_t)); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64() local 60 float16x8_t vacc1x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64() 61 float16x8_t vacc2x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64() 62 float16x8_t vacc3x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64() 98 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64() 108 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64() 116 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64() 126 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64() 134 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64() 144 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64() [all …]
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 1x16s4-minmax-fma3-broadcast.c | 42 __m256 vacc0x01234567 = _mm256_load_ps(w + 0); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() local 55 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c0, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() 63 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c1, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() 71 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() 79 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() 95 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() 103 vacc0x01234567 = _mm256_min_ps(vacc0x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() 107 vacc0x01234567 = _mm256_max_ps(vacc0x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() 111 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() 120 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() [all …]
|
D | 1x16-minmax-fma3-broadcast.c | 42 __m256 vacc0x01234567 = _mm256_load_ps(w + 0); in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast() local 55 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast() 62 vacc0x01234567 = _mm256_min_ps(vacc0x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast() 66 vacc0x01234567 = _mm256_max_ps(vacc0x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast() 70 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast() 79 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast() 81 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast() 85 __m128 vacc0x0123 = _mm256_castps256_ps128(vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast() 89 vacc0x0123 = _mm256_extractf128_ps(vacc0x01234567, 1); in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast()
|
D | 1x16-minmax-avx-broadcast.c | 42 __m256 vacc0x01234567 = _mm256_load_ps(w + 0); in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast() local 55 vacc0x01234567 = _mm256_add_ps(vacc0x01234567, _mm256_mul_ps(va0, vb01234567)); in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast() 62 vacc0x01234567 = _mm256_min_ps(vacc0x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast() 66 vacc0x01234567 = _mm256_max_ps(vacc0x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast() 70 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast() 79 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast() 81 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast() 85 __m128 vacc0x0123 = _mm256_castps256_ps128(vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast() 89 vacc0x0123 = _mm256_extractf128_ps(vacc0x01234567, 1); in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast()
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 1x16s4inc-minmax-fma3-broadcast.c | 44 __m256 vacc0x01234567 = _mm256_load_ps(acc + 0); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast() local 57 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c0, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast() 65 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c1, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast() 73 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast() 81 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast() 97 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast() 105 vacc0x01234567 = _mm256_min_ps(vacc0x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast() 109 vacc0x01234567 = _mm256_max_ps(vacc0x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast() 113 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast() 122 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast() [all …]
|
D | 1x16inc-minmax-fma3-broadcast.c | 44 __m256 vacc0x01234567 = _mm256_load_ps(acc + 0); in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast() local 57 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast() 64 vacc0x01234567 = _mm256_min_ps(vacc0x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast() 68 vacc0x01234567 = _mm256_max_ps(vacc0x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast() 72 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast() 81 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast() 83 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast() 87 __m128 vacc0x0123 = _mm256_castps256_ps128(vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast() 91 vacc0x0123 = _mm256_extractf128_ps(vacc0x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast()
|
D | 1x16inc-minmax-avx-broadcast.c | 44 __m256 vacc0x01234567 = _mm256_load_ps(acc + 0); in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast() local 57 vacc0x01234567 = _mm256_add_ps(vacc0x01234567, _mm256_mul_ps(va0, vb01234567)); in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast() 64 vacc0x01234567 = _mm256_min_ps(vacc0x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast() 68 vacc0x01234567 = _mm256_max_ps(vacc0x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast() 72 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast() 81 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast() 83 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast() 87 __m128 vacc0x0123 = _mm256_castps256_ps128(vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast() 91 vacc0x0123 = _mm256_extractf128_ps(vacc0x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast()
|
/external/XNNPACK/src/f16-vmulcaddc/gen/ |
D | c16-minmax-neonfp16arith-2x.c | 53 float16x8_t vacc0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x() local 61 vacc0x01234567 = vfmaq_f16(vbias01234567, vscale01234567, vacc0x01234567); in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x() 66 vacc0x01234567 = vmaxq_f16(vacc0x01234567, vmin); in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x() 71 vacc0x01234567 = vminq_f16(vacc0x01234567, vmax); in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x() 76 vst1q_f16(o0, vacc0x01234567); o0 += 8; in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x() 84 float16x8_t vacc0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x() local 89 vacc0x01234567 = vfmaq_f16(vbias01234567, vscale01234567, vacc0x01234567); in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x() 92 vacc0x01234567 = vmaxq_f16(vacc0x01234567, vmin); in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x() 95 vacc0x01234567 = vminq_f16(vacc0x01234567, vmax); in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x() 98 vst1q_f16(o0, vacc0x01234567); o0 += 8; in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x() [all …]
|
D | c8-minmax-neonfp16arith-2x.c | 52 float16x8_t vacc0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x() local 57 vacc0x01234567 = vfmaq_f16(vbias01234567, vscale01234567, vacc0x01234567); in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x() 60 vacc0x01234567 = vmaxq_f16(vacc0x01234567, vmin); in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x() 63 vacc0x01234567 = vminq_f16(vacc0x01234567, vmax); in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x() 66 vst1q_f16(o0, vacc0x01234567); o0 += 8; in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x() 72 float16x8_t vacc0x01234567 = vld1q_f16(i0); i0 = (const __fp16*) ((uintptr_t) i0 + c); in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x() local 77 vacc0x01234567 = vfmaq_f16(vbias01234567, vscale01234567, vacc0x01234567); in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x() 80 vacc0x01234567 = vmaxq_f16(vacc0x01234567, vmin); in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x() 83 vacc0x01234567 = vminq_f16(vacc0x01234567, vmax); in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x() 86 float16x4_t vacc0x0123 = vget_low_f16(vacc0x01234567); in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x() [all …]
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 1x16s4-minmax-fma3-broadcast.c | 46 __m256 vacc0x01234567 = _mm256_load_ps(w); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast() local 68 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c0, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast() 76 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c1, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast() 84 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast() 92 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast() 108 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast() 118 vacc0x01234567 = _mm256_min_ps(vacc0x01234567, vmax); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast() 122 vacc0x01234567 = _mm256_max_ps(vacc0x01234567, vmin); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast() 126 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast() 134 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast() [all …]
|
D | 1x16-minmax-avx-broadcast.c | 46 __m256 vacc0x01234567 = _mm256_load_ps(w); in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast() local 68 vacc0x01234567 = _mm256_add_ps(vacc0x01234567, _mm256_mul_ps(va0, vb01234567)); in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast() 76 vacc0x01234567 = _mm256_min_ps(vacc0x01234567, vmax); in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast() 80 vacc0x01234567 = _mm256_max_ps(vacc0x01234567, vmin); in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast() 84 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast() 92 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast() 94 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast() 98 __m128 vacc0x0123 = _mm256_castps256_ps128(vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast() 102 vacc0x0123 = _mm256_extractf128_ps(vacc0x01234567, 1); in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast()
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7p7x-minmax-sse41-c8-acc2.c | 60 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 63 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 65 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 68 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 70 const __m128i vacc0123 = _mm_add_epi32(vbias, _mm_cvtepi16_epi32(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 71 … = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0x01234567, _mm_cmpgt_epi16(_mm_setzero_si128(), va… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 106 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 109 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 111 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 114 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() [all …]
|
D | 7p7x-minmax-wasmsimd-c8-acc2.c | 59 v128_t vacc0x01234567 = wasm_i16x8_add(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 62 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 64 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 67 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 69 const v128_t vacc0123 = wasm_i32x4_add(vbias, wasm_i32x4_widen_low_i16x8(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 70 const v128_t vacc4567 = wasm_i32x4_add(vbias, wasm_i32x4_widen_high_i16x8(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 104 v128_t vacc0x01234567 = wasm_i16x8_add(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 107 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 109 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 112 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() [all …]
|
D | 7p7x-minmax-neon-c8-acc2.c | 52 int16x8_t vacc0x01234567 = vaddl_s8(vi0x01234567, vi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() local 55 vacc0x01234567 = vaddw_s8(vacc0x01234567, vi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 57 vacc0x01234567 = vaddw_s8(vacc0x01234567, vi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 60 vacc0x01234567 = vaddq_s16(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 62 const int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 63 const int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 89 int16x8_t vacc0x01234567 = vaddl_s8(vi0x01234567, vi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() local 92 vacc0x01234567 = vaddw_s8(vacc0x01234567, vi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 94 vacc0x01234567 = vaddw_s8(vacc0x01234567, vi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 97 vacc0x01234567 = vaddq_s16(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() [all …]
|
D | 7p7x-minmax-sse2-c8-acc2.c | 67 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 70 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 72 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 75 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 77 const __m128i vsgnacc0x01234567 = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 78 …const __m128i vacc0123 = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vacc0x01234567, vsgnacc0x01234567… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 79 …const __m128i vacc4567 = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0x01234567, vsgnacc0x01234567… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 121 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 124 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 126 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() [all …]
|
D | 7p7x-minmax-ssse3-c8-acc2.c | 67 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 70 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 72 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 75 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 77 const __m128i vsgnacc0x01234567 = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 78 …const __m128i vacc0123 = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vacc0x01234567, vsgnacc0x01234567… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 79 …const __m128i vacc4567 = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0x01234567, vsgnacc0x01234567… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 121 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 124 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 126 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() [all …]
|
D | 7p7x-minmax-sse41-c24-acc2.c | 74 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 81 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 87 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 92 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 96 const __m128i vacc0123 = _mm_add_epi32(vbias, _mm_cvtepi16_epi32(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 97 … = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0x01234567, _mm_cmpgt_epi16(_mm_setzero_si128(), va… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 129 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 132 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 134 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 137 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() [all …]
|