/external/XNNPACK/src/f32-dwconv/gen/ |
D | up16x25-fma3.c | 90 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x25__fma3() 91 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x25__fma3() 98 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x25__fma3() 99 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x25__fma3() 107 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x25__fma3() 108 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x25__fma3() 116 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x25__fma3() 117 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x25__fma3() 125 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x25__fma3() 126 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x25__fma3() [all …]
|
D | up16x25-fma3-acc2.c | 90 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2() 91 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2() 98 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2() 99 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2() 107 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2() 108 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2() 116 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2() 117 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2() 125 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2() 126 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x25__fma3_acc2() [all …]
|
D | up16x25-avx-acc2.c | 90 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 91 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 98 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 99 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 107 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 108 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 116 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 117 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 125 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 126 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() [all …]
|
D | up16x25-avx.c | 90 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x25__avx() 91 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x25__avx() 98 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x25__avx() 99 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x25__avx() 107 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x25__avx() 108 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x25__avx() 116 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x25__avx() 117 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x25__avx() 125 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x25__avx() 126 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x25__avx() [all …]
|
D | up8x25-fma3-acc2.c | 90 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2() 96 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2() 102 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2() 108 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2() 114 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2() 120 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2() 126 const __m256 vk5x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2() 132 const __m256 vk6x01234567 = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2() 138 const __m256 vk7x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2() 144 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x25__fma3_acc2() [all …]
|
D | up8x25-fma3.c | 90 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x25__fma3() 96 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x25__fma3() 102 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x25__fma3() 108 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x25__fma3() 114 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x25__fma3() 120 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__fma3() 126 const __m256 vk5x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up8x25__fma3() 132 const __m256 vk6x01234567 = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up8x25__fma3() 138 const __m256 vk7x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up8x25__fma3() 144 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x25__fma3() [all …]
|
D | up16x9-fma3-acc2.c | 58 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2() 59 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2() 66 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2() 67 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2() 75 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2() 76 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2() 84 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2() 85 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2() 93 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2() 94 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x9__fma3_acc2() [all …]
|
D | up16x9-fma3.c | 58 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x9__fma3() 59 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x9__fma3() 66 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x9__fma3() 67 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x9__fma3() 75 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x9__fma3() 76 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x9__fma3() 84 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x9__fma3() 85 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x9__fma3() 93 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x9__fma3() 94 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x9__fma3() [all …]
|
D | up8x25-avx.c | 90 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x25__avx() 96 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x25__avx() 102 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x25__avx() 108 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x25__avx() 114 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x25__avx() 120 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__avx() 126 const __m256 vk5x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up8x25__avx() 132 const __m256 vk6x01234567 = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up8x25__avx() 138 const __m256 vk7x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up8x25__avx() 144 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x25__avx() [all …]
|
D | up8x25-avx-acc2.c | 90 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 96 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 102 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 108 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 114 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 120 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 126 const __m256 vk5x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 132 const __m256 vk6x01234567 = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 138 const __m256 vk7x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 144 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() [all …]
|
D | up16x9-avx-acc2.c | 58 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 59 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 66 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 67 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 75 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 76 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 84 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 85 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 93 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 94 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() [all …]
|
D | up16x9-avx.c | 58 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x9__avx() 59 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x9__avx() 66 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x9__avx() 67 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x9__avx() 75 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x9__avx() 76 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x9__avx() 84 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x9__avx() 85 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x9__avx() 93 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x9__avx() 94 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x9__avx() [all …]
|
D | up16x4-fma3.c | 48 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x4__fma3() 49 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x4__fma3() 56 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x4__fma3() 57 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x4__fma3() 65 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x4__fma3() 66 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x4__fma3() 74 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x4__fma3() 75 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x4__fma3() 83 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x4__fma3() 84 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x4__fma3() [all …]
|
D | up16x4-avx-acc2.c | 48 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 49 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 56 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 57 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 65 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 66 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 74 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 75 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 83 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 84 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() [all …]
|
D | up16x4-avx.c | 48 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x4__avx() 49 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x4__avx() 56 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x4__avx() 57 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x4__avx() 65 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x4__avx() 66 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x4__avx() 74 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x4__avx() 75 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x4__avx() 83 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x4__avx() 84 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x4__avx() [all …]
|
D | up8x9-fma3-acc2.c | 58 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2() 64 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2() 70 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2() 76 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2() 82 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2() 88 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2() 94 const __m256 vk5x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2() 100 const __m256 vk6x01234567 = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2() 106 const __m256 vk7x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2() 112 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x9__fma3_acc2() [all …]
|
D | up16x4-fma3-acc2.c | 48 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2() 49 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2() 56 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2() 57 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2() 65 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2() 66 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2() 74 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2() 75 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2() 83 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2() 84 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up16x4__fma3_acc2() [all …]
|
D | up8x9-fma3.c | 58 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x9__fma3() 64 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x9__fma3() 70 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x9__fma3() 76 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x9__fma3() 82 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x9__fma3() 88 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__fma3() 94 const __m256 vk5x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up8x9__fma3() 100 const __m256 vk6x01234567 = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up8x9__fma3() 106 const __m256 vk7x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up8x9__fma3() 112 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x9__fma3() [all …]
|
D | up8x9-avx.c | 58 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x9__avx() 64 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x9__avx() 70 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x9__avx() 76 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x9__avx() 82 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x9__avx() 88 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__avx() 94 const __m256 vk5x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up8x9__avx() 100 const __m256 vk6x01234567 = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up8x9__avx() 106 const __m256 vk7x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up8x9__avx() 112 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x9__avx() [all …]
|
D | up8x9-avx-acc2.c | 58 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 64 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 70 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 76 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 82 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 88 const __m256 vk4x01234567 = _mm256_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 94 const __m256 vk5x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 100 const __m256 vk6x01234567 = _mm256_load_ps(w + 56); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 106 const __m256 vk7x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 112 const __m256 vk8x01234567 = _mm256_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() [all …]
|
D | up8x4-fma3.c | 48 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x4__fma3() 54 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x4__fma3() 60 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x4__fma3() 66 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x4__fma3() 72 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x4__fma3() 89 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_ukernel_up8x4__fma3() 92 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x4__fma3() 96 const __m256 vk1x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x4__fma3() 100 const __m256 vk2x01234567 = _mm256_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x4__fma3() 104 const __m256 vk3x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_ukernel_up8x4__fma3()
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 1x16s4-fma3-broadcast.c | 44 __m256 vacc0x01234567 = _mm256_load_ps(acc + 0); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast() 45 __m256 vacc0x89ABCDEF = _mm256_load_ps(acc + 8); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast() 54 const __m256 vb01234567c0 = _mm256_load_ps(w + 0); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast() 55 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast() 62 const __m256 vb01234567c1 = _mm256_load_ps(w + 16); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast() 63 const __m256 vb89ABCDEFc1 = _mm256_load_ps(w + 24); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast() 70 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast() 71 const __m256 vb89ABCDEFc2 = _mm256_load_ps(w + 40); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast() 78 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast() 79 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast() [all …]
|
D | 3x16s4-fma3-broadcast.c | 56 __m256 vacc0x01234567 = _mm256_load_ps(acc + 0); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast() 57 __m256 vacc0x89ABCDEF = _mm256_load_ps(acc + 8); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast() 58 __m256 vacc1x01234567 = _mm256_load_ps(acc + 16); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast() 59 __m256 vacc1x89ABCDEF = _mm256_load_ps(acc + 24); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast() 60 __m256 vacc2x01234567 = _mm256_load_ps(acc + 32); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast() 61 __m256 vacc2x89ABCDEF = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast() 74 const __m256 vb01234567c0 = _mm256_load_ps(w + 0); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast() 75 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast() 88 const __m256 vb01234567c1 = _mm256_load_ps(w + 16); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast() 89 const __m256 vb89ABCDEFc1 = _mm256_load_ps(w + 24); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast() [all …]
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 1x16s4-fma3-broadcast.c | 42 __m256 vacc0x01234567 = _mm256_load_ps(w + 0); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast() 43 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast() 52 const __m256 vb01234567c0 = _mm256_load_ps(w + 0); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast() 53 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast() 60 const __m256 vb01234567c1 = _mm256_load_ps(w + 16); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast() 61 const __m256 vb89ABCDEFc1 = _mm256_load_ps(w + 24); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast() 68 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast() 69 const __m256 vb89ABCDEFc2 = _mm256_load_ps(w + 40); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast() 76 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast() 77 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast() [all …]
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 1x16s4-fma3-broadcast.c | 46 __m256 vacc0x01234567 = _mm256_load_ps(w); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast() 47 __m256 vacc0x89ABCDEF = _mm256_load_ps(w + 8); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast() 65 const __m256 vb01234567c0 = _mm256_load_ps(w + 0); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast() 66 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast() 73 const __m256 vb01234567c1 = _mm256_load_ps(w + 16); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast() 74 const __m256 vb89ABCDEFc1 = _mm256_load_ps(w + 24); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast() 81 const __m256 vb01234567c2 = _mm256_load_ps(w + 32); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast() 82 const __m256 vb89ABCDEFc2 = _mm256_load_ps(w + 40); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast() 89 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast() 90 const __m256 vb89ABCDEFc3 = _mm256_load_ps(w + 56); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast() [all …]
|