/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 4x16inc-minmax-avx512f-broadcast.c | 66 __m512 vacc3x0123456789ABCDEF = _mm512_load_ps(acc + 48); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() local 77 …vacc3x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a3), vb0123456789ABCDEF, vacc3x0123456789… in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() 91 vacc3x0123456789ABCDEF = _mm512_min_ps(vacc3x0123456789ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() 97 vacc3x0123456789ABCDEF = _mm512_max_ps(vacc3x0123456789ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() 100 _mm512_storeu_ps(c3, vacc3x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() 120 _mm512_mask_storeu_ps(c3, vmask, vacc3x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast()
|
D | 5x16inc-minmax-avx512f-broadcast.c | 72 __m512 vacc3x0123456789ABCDEF = _mm512_load_ps(acc + 48); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() local 84 …vacc3x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a3), vb0123456789ABCDEF, vacc3x0123456789… in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() 100 vacc3x0123456789ABCDEF = _mm512_min_ps(vacc3x0123456789ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() 107 vacc3x0123456789ABCDEF = _mm512_max_ps(vacc3x0123456789ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() 113 _mm512_storeu_ps(c3, vacc3x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() 135 _mm512_mask_storeu_ps(c3, vmask, vacc3x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast()
|
D | 6x16inc-minmax-avx512f-broadcast.c | 78 __m512 vacc3x0123456789ABCDEF = _mm512_load_ps(acc + 48); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() local 91 …vacc3x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a3), vb0123456789ABCDEF, vacc3x0123456789… in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 109 vacc3x0123456789ABCDEF = _mm512_min_ps(vacc3x0123456789ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 117 vacc3x0123456789ABCDEF = _mm512_max_ps(vacc3x0123456789ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 126 _mm512_storeu_ps(c3, vacc3x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 150 _mm512_mask_storeu_ps(c3, vmask, vacc3x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast()
|
D | 7x16inc-minmax-avx512f-broadcast.c | 84 __m512 vacc3x0123456789ABCDEF = _mm512_load_ps(acc + 48); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() local 98 …vacc3x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a3), vb0123456789ABCDEF, vacc3x0123456789… in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 118 vacc3x0123456789ABCDEF = _mm512_min_ps(vacc3x0123456789ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 127 vacc3x0123456789ABCDEF = _mm512_max_ps(vacc3x0123456789ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 139 _mm512_storeu_ps(c3, vacc3x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 165 _mm512_mask_storeu_ps(c3, vmask, vacc3x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast()
|
D | 8x16inc-minmax-avx512f-broadcast.c | 90 __m512 vacc3x0123456789ABCDEF = _mm512_load_ps(acc + 48); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() local 105 …vacc3x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a3), vb0123456789ABCDEF, vacc3x0123456789… in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 127 vacc3x0123456789ABCDEF = _mm512_min_ps(vacc3x0123456789ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 137 vacc3x0123456789ABCDEF = _mm512_max_ps(vacc3x0123456789ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 152 _mm512_storeu_ps(c3, vacc3x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 180 _mm512_mask_storeu_ps(c3, vmask, vacc3x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 4x16-minmax-avx512f-broadcast.c | 64 __m512 vacc3x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast() local 75 …vacc3x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a3), vb0123456789ABCDEF, vacc3x0123456789… in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast() 89 vacc3x0123456789ABCDEF = _mm512_min_ps(vacc3x0123456789ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast() 95 vacc3x0123456789ABCDEF = _mm512_max_ps(vacc3x0123456789ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast() 98 _mm512_storeu_ps(c3, vacc3x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast() 118 _mm512_mask_storeu_ps(c3, vmask, vacc3x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast()
|
D | 5x16-minmax-avx512f-broadcast.c | 70 __m512 vacc3x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast() local 82 …vacc3x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a3), vb0123456789ABCDEF, vacc3x0123456789… in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast() 98 vacc3x0123456789ABCDEF = _mm512_min_ps(vacc3x0123456789ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast() 105 vacc3x0123456789ABCDEF = _mm512_max_ps(vacc3x0123456789ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast() 111 _mm512_storeu_ps(c3, vacc3x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast() 133 _mm512_mask_storeu_ps(c3, vmask, vacc3x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast()
|
D | 6x16-minmax-avx512f-broadcast.c | 76 __m512 vacc3x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast() local 89 …vacc3x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a3), vb0123456789ABCDEF, vacc3x0123456789… in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast() 107 vacc3x0123456789ABCDEF = _mm512_min_ps(vacc3x0123456789ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast() 115 vacc3x0123456789ABCDEF = _mm512_max_ps(vacc3x0123456789ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast() 124 _mm512_storeu_ps(c3, vacc3x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast() 148 _mm512_mask_storeu_ps(c3, vmask, vacc3x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast()
|
D | 7x16-minmax-avx512f-broadcast.c | 82 __m512 vacc3x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast() local 96 …vacc3x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a3), vb0123456789ABCDEF, vacc3x0123456789… in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast() 116 vacc3x0123456789ABCDEF = _mm512_min_ps(vacc3x0123456789ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast() 125 vacc3x0123456789ABCDEF = _mm512_max_ps(vacc3x0123456789ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast() 137 _mm512_storeu_ps(c3, vacc3x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast() 163 _mm512_mask_storeu_ps(c3, vmask, vacc3x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast()
|
D | 8x16-minmax-avx512f-broadcast.c | 88 __m512 vacc3x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast() local 103 …vacc3x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a3), vb0123456789ABCDEF, vacc3x0123456789… in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast() 125 vacc3x0123456789ABCDEF = _mm512_min_ps(vacc3x0123456789ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast() 135 vacc3x0123456789ABCDEF = _mm512_max_ps(vacc3x0123456789ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast() 150 _mm512_storeu_ps(c3, vacc3x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast() 178 _mm512_mask_storeu_ps(c3, vmask, vacc3x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 4x16-minmax-avx512f-broadcast.c | 62 __m512 vacc3x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast() local 97 …vacc3x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a3), vb0123456789ABCDEF, vacc3x0123456789… in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast() 113 vacc3x0123456789ABCDEF = _mm512_min_ps(vacc3x0123456789ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast() 119 vacc3x0123456789ABCDEF = _mm512_max_ps(vacc3x0123456789ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast() 122 _mm512_storeu_ps(c3, vacc3x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast() 138 _mm512_mask_storeu_ps(c3, vmask, vacc3x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast()
|
D | 5x16-minmax-avx512f-broadcast.c | 66 __m512 vacc3x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast() local 107 …vacc3x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a3), vb0123456789ABCDEF, vacc3x0123456789… in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast() 125 vacc3x0123456789ABCDEF = _mm512_min_ps(vacc3x0123456789ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast() 132 vacc3x0123456789ABCDEF = _mm512_max_ps(vacc3x0123456789ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast() 138 _mm512_storeu_ps(c3, vacc3x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast() 155 _mm512_mask_storeu_ps(c3, vmask, vacc3x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast()
|
D | 6x16-minmax-avx512f-broadcast.c | 70 __m512 vacc3x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast() local 117 …vacc3x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a3), vb0123456789ABCDEF, vacc3x0123456789… in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast() 137 vacc3x0123456789ABCDEF = _mm512_min_ps(vacc3x0123456789ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast() 145 vacc3x0123456789ABCDEF = _mm512_max_ps(vacc3x0123456789ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast() 154 _mm512_storeu_ps(c3, vacc3x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast() 172 _mm512_mask_storeu_ps(c3, vmask, vacc3x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast()
|
D | 7x16-minmax-avx512f-broadcast.c | 74 __m512 vacc3x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast() local 127 …vacc3x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a3), vb0123456789ABCDEF, vacc3x0123456789… in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast() 149 vacc3x0123456789ABCDEF = _mm512_min_ps(vacc3x0123456789ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast() 158 vacc3x0123456789ABCDEF = _mm512_max_ps(vacc3x0123456789ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast() 170 _mm512_storeu_ps(c3, vacc3x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast() 189 _mm512_mask_storeu_ps(c3, vmask, vacc3x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast()
|
D | 8x16-minmax-avx512f-broadcast.c | 78 __m512 vacc3x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast() local 137 …vacc3x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a3), vb0123456789ABCDEF, vacc3x0123456789… in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast() 161 vacc3x0123456789ABCDEF = _mm512_min_ps(vacc3x0123456789ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast() 171 vacc3x0123456789ABCDEF = _mm512_max_ps(vacc3x0123456789ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast() 186 _mm512_storeu_ps(c3, vacc3x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast() 206 _mm512_mask_storeu_ps(c3, vmask, vacc3x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast()
|