/external/XNNPACK/src/f32-dwconv/gen/ |
D | up16x25-avx-acc2.c | 100 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 101 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi0x89ABCDEF, vk0x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 118 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 119 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi2x89ABCDEF, vk2x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 127 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 128 vacc89ABCDEFp1 = _mm256_add_ps(vacc89ABCDEFp1, _mm256_mul_ps(vi3x89ABCDEF, vk3x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 136 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 137 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi4x89ABCDEF, vk4x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 145 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() 146 vacc89ABCDEFp1 = _mm256_add_ps(vacc89ABCDEFp1, _mm256_mul_ps(vi5x89ABCDEF, vk5x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x25__avx_acc2() [all …]
|
D | up16x25-avx.c | 100 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx() 101 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi0x89ABCDEF, vk0x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x25__avx() 109 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi1x01234567, vk1x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx() 110 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi1x89ABCDEF, vk1x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x25__avx() 118 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx() 119 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi2x89ABCDEF, vk2x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x25__avx() 127 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx() 128 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi3x89ABCDEF, vk3x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x25__avx() 136 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x25__avx() 137 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi4x89ABCDEF, vk4x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x25__avx() [all …]
|
D | up8x25-avx.c | 97 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx() 103 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi1x01234567, vk1x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx() 109 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx() 115 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx() 121 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx() 127 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx() 133 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi6x01234567, vk6x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx() 139 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi7x01234567, vk7x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx() 145 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx() 151 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi9x01234567, vk9x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx() [all …]
|
D | up8x25-avx-acc2.c | 97 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 109 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 115 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 121 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 127 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 133 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi6x01234567, vk6x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 139 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi7x01234567, vk7x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 145 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 151 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi9x01234567, vk9x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() 157 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi10x01234567, vk10x01234567)); in xnn_f32_dwconv_ukernel_up8x25__avx_acc2() [all …]
|
D | up16x9-avx-acc2.c | 68 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 69 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi0x89ABCDEF, vk0x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 86 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 87 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi2x89ABCDEF, vk2x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 95 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 96 vacc89ABCDEFp1 = _mm256_add_ps(vacc89ABCDEFp1, _mm256_mul_ps(vi3x89ABCDEF, vk3x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 104 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 105 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi4x89ABCDEF, vk4x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 113 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() 114 vacc89ABCDEFp1 = _mm256_add_ps(vacc89ABCDEFp1, _mm256_mul_ps(vi5x89ABCDEF, vk5x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x9__avx_acc2() [all …]
|
D | up16x9-avx.c | 68 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx() 69 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi0x89ABCDEF, vk0x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x9__avx() 77 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi1x01234567, vk1x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx() 78 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi1x89ABCDEF, vk1x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x9__avx() 86 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx() 87 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi2x89ABCDEF, vk2x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x9__avx() 95 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx() 96 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi3x89ABCDEF, vk3x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x9__avx() 104 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up16x9__avx() 105 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi4x89ABCDEF, vk4x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x9__avx() [all …]
|
D | up8x9-avx.c | 65 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx() 71 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi1x01234567, vk1x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx() 77 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx() 83 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx() 89 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx() 95 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx() 101 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi6x01234567, vk6x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx() 107 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi7x01234567, vk7x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx() 113 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx() 133 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx() [all …]
|
D | up8x9-avx-acc2.c | 65 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 77 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 83 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 89 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 95 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 101 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi6x01234567, vk6x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 107 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi7x01234567, vk7x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 113 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 118 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() 135 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up8x9__avx_acc2() [all …]
|
D | up16x4-avx-acc2.c | 58 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 59 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi0x89ABCDEF, vk0x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 76 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 77 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi2x89ABCDEF, vk2x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 85 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 86 vacc89ABCDEFp1 = _mm256_add_ps(vacc89ABCDEFp1, _mm256_mul_ps(vi3x89ABCDEF, vk3x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 91 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 92 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, vacc89ABCDEFp1); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 110 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() 122 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_ukernel_up16x4__avx_acc2() [all …]
|
D | up16x4-avx.c | 58 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up16x4__avx() 59 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi0x89ABCDEF, vk0x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x4__avx() 67 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi1x01234567, vk1x01234567)); in xnn_f32_dwconv_ukernel_up16x4__avx() 68 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi1x89ABCDEF, vk1x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x4__avx() 76 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_ukernel_up16x4__avx() 77 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi2x89ABCDEF, vk2x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x4__avx() 85 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_ukernel_up16x4__avx() 86 vacc89ABCDEFp0 = _mm256_add_ps(vacc89ABCDEFp0, _mm256_mul_ps(vi3x89ABCDEF, vk3x89ABCDEF)); in xnn_f32_dwconv_ukernel_up16x4__avx() 107 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up16x4__avx() 113 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi1x01234567, vk1x01234567)); in xnn_f32_dwconv_ukernel_up16x4__avx() [all …]
|
D | up8x4-avx.c | 55 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up8x4__avx() 61 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi1x01234567, vk1x01234567)); in xnn_f32_dwconv_ukernel_up8x4__avx() 67 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_ukernel_up8x4__avx() 73 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_ukernel_up8x4__avx() 93 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up8x4__avx() 97 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi1x01234567, vk1x01234567)); in xnn_f32_dwconv_ukernel_up8x4__avx() 101 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_ukernel_up8x4__avx() 105 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_ukernel_up8x4__avx()
|
D | up8x4-avx-acc2.c | 55 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up8x4__avx_acc2() 67 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_ukernel_up8x4__avx_acc2() 73 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_ukernel_up8x4__avx_acc2() 78 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_ukernel_up8x4__avx_acc2() 95 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_ukernel_up8x4__avx_acc2() 103 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_ukernel_up8x4__avx_acc2() 107 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_ukernel_up8x4__avx_acc2() 110 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_ukernel_up8x4__avx_acc2()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx2-p5-x96.c | 191 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 192 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 193 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 194 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 195 __m256 ve4 = _mm256_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 196 __m256 ve5 = _mm256_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 197 __m256 ve6 = _mm256_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 198 __m256 ve7 = _mm256_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 199 __m256 ve8 = _mm256_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 200 __m256 ve9 = _mm256_add_ps(vn9, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() [all …]
|
D | avx2-p5-x88.c | 181 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 182 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 183 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 184 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 185 __m256 ve4 = _mm256_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 186 __m256 ve5 = _mm256_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 187 __m256 ve6 = _mm256_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 188 __m256 ve7 = _mm256_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 189 __m256 ve8 = _mm256_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 190 __m256 ve9 = _mm256_add_ps(vn9, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() [all …]
|
D | avx2-p5-x80.c | 171 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 172 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 173 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 174 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 175 __m256 ve4 = _mm256_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 176 __m256 ve5 = _mm256_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 177 __m256 ve6 = _mm256_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 178 __m256 ve7 = _mm256_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 179 __m256 ve8 = _mm256_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 180 __m256 ve9 = _mm256_add_ps(vn9, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() [all …]
|
D | avx2-p5-x72.c | 161 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 162 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 163 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 164 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 165 __m256 ve4 = _mm256_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 166 __m256 ve5 = _mm256_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 167 __m256 ve6 = _mm256_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 168 __m256 ve7 = _mm256_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 169 __m256 ve8 = _mm256_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 188 …const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve0, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() [all …]
|
D | avx2-p5-x56.c | 141 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 142 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 143 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 144 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 145 __m256 ve4 = _mm256_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 146 __m256 ve5 = _mm256_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 147 __m256 ve6 = _mm256_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 164 …const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve0, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 165 …const __m256 vs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve1, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 166 …const __m256 vs2 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve2, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() [all …]
|
D | avx2-p5-x64.c | 151 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 152 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 153 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 154 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 155 __m256 ve4 = _mm256_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 156 __m256 ve5 = _mm256_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 157 __m256 ve6 = _mm256_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 158 __m256 ve7 = _mm256_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 176 …const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve0, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 177 …const __m256 vs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve1, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() [all …]
|
D | avx2-p5-x48.c | 131 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 132 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 133 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 134 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 135 __m256 ve4 = _mm256_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 136 __m256 ve5 = _mm256_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 152 …const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve0, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 153 …const __m256 vs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve1, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 154 …const __m256 vs2 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve2, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 155 …const __m256 vs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve3, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() [all …]
|
D | avx2-p5-x40.c | 121 __m256 ve0 = _mm256_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 122 __m256 ve1 = _mm256_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 123 __m256 ve2 = _mm256_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 124 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 125 __m256 ve4 = _mm256_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 140 …const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve0, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 141 …const __m256 vs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve1, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 142 …const __m256 vs2 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve2, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 143 …const __m256 vs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve3, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 144 …const __m256 vs4 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve4, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() [all …]
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx2-p5-x96-acc6.c | 231 …const __m256 vaccs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 232 …const __m256 vaccs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 233 …const __m256 vaccs2 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 234 …const __m256 vaccs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 235 …const __m256 vaccs4 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 236 …const __m256 vaccs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 237 …const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 238 …const __m256 vs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 239 …const __m256 vs2 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 240 …const __m256 vs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() [all …]
|
D | avx2-p5-x64-acc4.c | 181 …const __m256 vaccs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 182 …const __m256 vaccs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 183 …const __m256 vaccs2 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 184 …const __m256 vaccs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 185 …const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 186 …const __m256 vs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 187 …const __m256 vs2 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 188 …const __m256 vs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 189 …const __m256 vs4 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 190 …const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() [all …]
|
D | avx2-p5-x80-acc5.c | 206 …const __m256 vaccs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 207 …const __m256 vaccs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 208 …const __m256 vaccs2 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 209 …const __m256 vaccs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 210 …const __m256 vaccs4 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 211 …const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 212 …const __m256 vs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 213 …const __m256 vs2 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 214 …const __m256 vs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 215 …const __m256 vs4 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() [all …]
|
D | avx2-p5-x72-acc3.c | 189 …const __m256 vaccs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 190 …const __m256 vaccs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 191 …const __m256 vaccs2 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdel… in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 192 …const __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 193 …const __m256 vs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 194 …const __m256 vs2 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 195 …const __m256 vs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 196 …const __m256 vs4 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 197 …const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 198 …const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_… in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() [all …]
|
/external/libaom/libaom/aom_dsp/x86/ |
D | fft_avx2.c | 24 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps, 27 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps, 30 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps, 50 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps, 53 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps, 56 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
|