/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 4x16inc-minmax-avx512f-broadcast.c | 65 __m512 vacc2x0123456789ABCDEF = _mm512_load_ps(acc + 32); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() local 76 …vacc2x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a2), vb0123456789ABCDEF, vacc2x0123456789… in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() 90 vacc2x0123456789ABCDEF = _mm512_min_ps(vacc2x0123456789ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() 96 vacc2x0123456789ABCDEF = _mm512_max_ps(vacc2x0123456789ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() 102 _mm512_storeu_ps(c2, vacc2x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast() 121 _mm512_mask_storeu_ps(c2, vmask, vacc2x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast()
|
D | 5x16inc-minmax-avx512f-broadcast.c | 71 __m512 vacc2x0123456789ABCDEF = _mm512_load_ps(acc + 32); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() local 83 …vacc2x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a2), vb0123456789ABCDEF, vacc2x0123456789… in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() 99 vacc2x0123456789ABCDEF = _mm512_min_ps(vacc2x0123456789ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() 106 vacc2x0123456789ABCDEF = _mm512_max_ps(vacc2x0123456789ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() 115 _mm512_storeu_ps(c2, vacc2x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast() 136 _mm512_mask_storeu_ps(c2, vmask, vacc2x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast()
|
D | 6x16inc-minmax-avx512f-broadcast.c | 77 __m512 vacc2x0123456789ABCDEF = _mm512_load_ps(acc + 32); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() local 90 …vacc2x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a2), vb0123456789ABCDEF, vacc2x0123456789… in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 108 vacc2x0123456789ABCDEF = _mm512_min_ps(vacc2x0123456789ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 116 vacc2x0123456789ABCDEF = _mm512_max_ps(vacc2x0123456789ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 128 _mm512_storeu_ps(c2, vacc2x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast() 151 _mm512_mask_storeu_ps(c2, vmask, vacc2x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast()
|
D | 7x16inc-minmax-avx512f-broadcast.c | 83 __m512 vacc2x0123456789ABCDEF = _mm512_load_ps(acc + 32); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() local 97 …vacc2x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a2), vb0123456789ABCDEF, vacc2x0123456789… in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 117 vacc2x0123456789ABCDEF = _mm512_min_ps(vacc2x0123456789ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 126 vacc2x0123456789ABCDEF = _mm512_max_ps(vacc2x0123456789ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 141 _mm512_storeu_ps(c2, vacc2x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast() 166 _mm512_mask_storeu_ps(c2, vmask, vacc2x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast()
|
D | 8x16inc-minmax-avx512f-broadcast.c | 89 __m512 vacc2x0123456789ABCDEF = _mm512_load_ps(acc + 32); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() local 104 …vacc2x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a2), vb0123456789ABCDEF, vacc2x0123456789… in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 126 vacc2x0123456789ABCDEF = _mm512_min_ps(vacc2x0123456789ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 136 vacc2x0123456789ABCDEF = _mm512_max_ps(vacc2x0123456789ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 154 _mm512_storeu_ps(c2, vacc2x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast() 181 _mm512_mask_storeu_ps(c2, vmask, vacc2x0123456789ABCDEF); in xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 4x16-minmax-avx512f-broadcast.c | 63 __m512 vacc2x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast() local 74 …vacc2x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a2), vb0123456789ABCDEF, vacc2x0123456789… in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast() 88 vacc2x0123456789ABCDEF = _mm512_min_ps(vacc2x0123456789ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast() 94 vacc2x0123456789ABCDEF = _mm512_max_ps(vacc2x0123456789ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast() 100 _mm512_storeu_ps(c2, vacc2x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast() 119 _mm512_mask_storeu_ps(c2, vmask, vacc2x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast()
|
D | 5x16-minmax-avx512f-broadcast.c | 69 __m512 vacc2x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast() local 81 …vacc2x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a2), vb0123456789ABCDEF, vacc2x0123456789… in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast() 97 vacc2x0123456789ABCDEF = _mm512_min_ps(vacc2x0123456789ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast() 104 vacc2x0123456789ABCDEF = _mm512_max_ps(vacc2x0123456789ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast() 113 _mm512_storeu_ps(c2, vacc2x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast() 134 _mm512_mask_storeu_ps(c2, vmask, vacc2x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast()
|
D | 6x16-minmax-avx512f-broadcast.c | 75 __m512 vacc2x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast() local 88 …vacc2x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a2), vb0123456789ABCDEF, vacc2x0123456789… in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast() 106 vacc2x0123456789ABCDEF = _mm512_min_ps(vacc2x0123456789ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast() 114 vacc2x0123456789ABCDEF = _mm512_max_ps(vacc2x0123456789ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast() 126 _mm512_storeu_ps(c2, vacc2x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast() 149 _mm512_mask_storeu_ps(c2, vmask, vacc2x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast()
|
D | 7x16-minmax-avx512f-broadcast.c | 81 __m512 vacc2x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast() local 95 …vacc2x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a2), vb0123456789ABCDEF, vacc2x0123456789… in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast() 115 vacc2x0123456789ABCDEF = _mm512_min_ps(vacc2x0123456789ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast() 124 vacc2x0123456789ABCDEF = _mm512_max_ps(vacc2x0123456789ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast() 139 _mm512_storeu_ps(c2, vacc2x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast() 164 _mm512_mask_storeu_ps(c2, vmask, vacc2x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast()
|
D | 8x16-minmax-avx512f-broadcast.c | 87 __m512 vacc2x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast() local 102 …vacc2x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a2), vb0123456789ABCDEF, vacc2x0123456789… in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast() 124 vacc2x0123456789ABCDEF = _mm512_min_ps(vacc2x0123456789ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast() 134 vacc2x0123456789ABCDEF = _mm512_max_ps(vacc2x0123456789ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast() 152 _mm512_storeu_ps(c2, vacc2x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast() 179 _mm512_mask_storeu_ps(c2, vmask, vacc2x0123456789ABCDEF); in xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 4x16-minmax-avx512f-broadcast.c | 61 __m512 vacc2x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast() local 96 …vacc2x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a2), vb0123456789ABCDEF, vacc2x0123456789… in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast() 112 vacc2x0123456789ABCDEF = _mm512_min_ps(vacc2x0123456789ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast() 118 vacc2x0123456789ABCDEF = _mm512_max_ps(vacc2x0123456789ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast() 124 _mm512_storeu_ps(c2, vacc2x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast() 139 _mm512_mask_storeu_ps(c2, vmask, vacc2x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast()
|
D | 5x16-minmax-avx512f-broadcast.c | 65 __m512 vacc2x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast() local 106 …vacc2x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a2), vb0123456789ABCDEF, vacc2x0123456789… in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast() 124 vacc2x0123456789ABCDEF = _mm512_min_ps(vacc2x0123456789ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast() 131 vacc2x0123456789ABCDEF = _mm512_max_ps(vacc2x0123456789ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast() 140 _mm512_storeu_ps(c2, vacc2x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast() 156 _mm512_mask_storeu_ps(c2, vmask, vacc2x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast()
|
D | 6x16-minmax-avx512f-broadcast.c | 69 __m512 vacc2x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast() local 116 …vacc2x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a2), vb0123456789ABCDEF, vacc2x0123456789… in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast() 136 vacc2x0123456789ABCDEF = _mm512_min_ps(vacc2x0123456789ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast() 144 vacc2x0123456789ABCDEF = _mm512_max_ps(vacc2x0123456789ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast() 156 _mm512_storeu_ps(c2, vacc2x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast() 173 _mm512_mask_storeu_ps(c2, vmask, vacc2x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast()
|
D | 7x16-minmax-avx512f-broadcast.c | 73 __m512 vacc2x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast() local 126 …vacc2x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a2), vb0123456789ABCDEF, vacc2x0123456789… in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast() 148 vacc2x0123456789ABCDEF = _mm512_min_ps(vacc2x0123456789ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast() 157 vacc2x0123456789ABCDEF = _mm512_max_ps(vacc2x0123456789ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast() 172 _mm512_storeu_ps(c2, vacc2x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast() 190 _mm512_mask_storeu_ps(c2, vmask, vacc2x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast()
|
D | 8x16-minmax-avx512f-broadcast.c | 77 __m512 vacc2x0123456789ABCDEF = vacc0x0123456789ABCDEF; in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast() local 136 …vacc2x0123456789ABCDEF = _mm512_fmadd_ps(_mm512_set1_ps(*a2), vb0123456789ABCDEF, vacc2x0123456789… in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast() 160 vacc2x0123456789ABCDEF = _mm512_min_ps(vacc2x0123456789ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast() 170 vacc2x0123456789ABCDEF = _mm512_max_ps(vacc2x0123456789ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast() 188 _mm512_storeu_ps(c2, vacc2x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast() 207 _mm512_mask_storeu_ps(c2, vmask, vacc2x0123456789ABCDEF); in xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast()
|