/external/XNNPACK/src/f32-dwconv/gen/ |
D | up8x25-sse.c | 30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up8x25__sse() 31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up8x25__sse() 88 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x25__sse() 89 __m128 vacc4567p0 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up8x25__sse() 96 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x25__sse() 97 const __m128 vk0x4567 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up8x25__sse() 105 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x25__sse() 106 const __m128 vk1x4567 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up8x25__sse() 114 const __m128 vk2x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x25__sse() 115 const __m128 vk2x4567 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up8x25__sse() [all …]
|
D | up8x25-sse-acc2.c | 30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 88 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 89 __m128 vacc4567p0 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 96 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 97 const __m128 vk0x4567 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 105 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 106 const __m128 vk1x4567 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 114 const __m128 vk2x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 115 const __m128 vk2x4567 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() [all …]
|
D | up4x25-sse.c | 30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up4x25__sse() 31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up4x25__sse() 88 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x25__sse() 94 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up4x25__sse() 100 const __m128 vk1x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up4x25__sse() 106 const __m128 vk2x0123 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up4x25__sse() 112 const __m128 vk3x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up4x25__sse() 118 const __m128 vk4x0123 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up4x25__sse() 124 const __m128 vk5x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up4x25__sse() 130 const __m128 vk6x0123 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up4x25__sse() [all …]
|
D | up4x25-sse-acc2.c | 30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() 31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() 88 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() 94 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() 100 const __m128 vk1x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() 106 const __m128 vk2x0123 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() 112 const __m128 vk3x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() 118 const __m128 vk4x0123 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() 124 const __m128 vk5x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() 130 const __m128 vk6x0123 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() [all …]
|
D | up8x9-sse-acc2.c | 30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 56 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 57 __m128 vacc4567p0 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 64 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 65 const __m128 vk0x4567 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 73 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 74 const __m128 vk1x4567 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 82 const __m128 vk2x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 83 const __m128 vk2x4567 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() [all …]
|
D | up8x9-sse.c | 30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up8x9__sse() 31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up8x9__sse() 56 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x9__sse() 57 __m128 vacc4567p0 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up8x9__sse() 64 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x9__sse() 65 const __m128 vk0x4567 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up8x9__sse() 73 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x9__sse() 74 const __m128 vk1x4567 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up8x9__sse() 82 const __m128 vk2x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x9__sse() 83 const __m128 vk2x4567 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up8x9__sse() [all …]
|
D | up4x9-sse-acc2.c | 30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() 31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() 56 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() 62 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() 68 const __m128 vk1x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() 74 const __m128 vk2x0123 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() 80 const __m128 vk3x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() 86 const __m128 vk4x0123 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() 92 const __m128 vk5x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() 98 const __m128 vk6x0123 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() [all …]
|
D | up8x4-sse.c | 30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up8x4__sse() 31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up8x4__sse() 46 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x4__sse() 47 __m128 vacc4567p0 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up8x4__sse() 54 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x4__sse() 55 const __m128 vk0x4567 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up8x4__sse() 63 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x4__sse() 64 const __m128 vk1x4567 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up8x4__sse() 72 const __m128 vk2x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x4__sse() 73 const __m128 vk2x4567 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up8x4__sse() [all …]
|
D | up4x9-sse.c | 30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up4x9__sse() 31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up4x9__sse() 56 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x9__sse() 62 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up4x9__sse() 68 const __m128 vk1x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up4x9__sse() 74 const __m128 vk2x0123 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up4x9__sse() 80 const __m128 vk3x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up4x9__sse() 86 const __m128 vk4x0123 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up4x9__sse() 92 const __m128 vk5x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up4x9__sse() 98 const __m128 vk6x0123 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up4x9__sse() [all …]
|
D | up8x4-sse-acc2.c | 30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() 31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() 46 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() 47 __m128 vacc4567p0 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() 54 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() 55 const __m128 vk0x4567 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() 63 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() 64 const __m128 vk1x4567 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() 72 const __m128 vk2x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() 73 const __m128 vk2x4567 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() [all …]
|
D | up4x4-sse.c | 30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up4x4__sse() 31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up4x4__sse() 46 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x4__sse() 52 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up4x4__sse() 58 const __m128 vk1x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up4x4__sse() 64 const __m128 vk2x0123 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up4x4__sse() 70 const __m128 vk3x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up4x4__sse() 83 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x4__sse() 86 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up4x4__sse() 90 const __m128 vk1x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up4x4__sse() [all …]
|
D | up4x4-sse-acc2.c | 30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() 31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() 46 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() 52 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() 58 const __m128 vk1x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() 64 const __m128 vk2x0123 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() 70 const __m128 vk3x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() 85 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() 88 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() 92 const __m128 vk1x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() [all …]
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 1x8s4-sse.c | 42 __m128 vacc0x0123 = _mm_load_ps(w + 0); in xnn_f32_gemm_ukernel_1x8s4__sse() 43 __m128 vacc0x4567 = _mm_load_ps(w + 4); in xnn_f32_gemm_ukernel_1x8s4__sse() 52 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_gemm_ukernel_1x8s4__sse() 53 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_gemm_ukernel_1x8s4__sse() 60 const __m128 vb0123c1 = _mm_load_ps(w + 8); in xnn_f32_gemm_ukernel_1x8s4__sse() 61 const __m128 vb4567c1 = _mm_load_ps(w + 12); in xnn_f32_gemm_ukernel_1x8s4__sse() 68 const __m128 vb0123c2 = _mm_load_ps(w + 16); in xnn_f32_gemm_ukernel_1x8s4__sse() 69 const __m128 vb4567c2 = _mm_load_ps(w + 20); in xnn_f32_gemm_ukernel_1x8s4__sse() 76 const __m128 vb0123c3 = _mm_load_ps(w + 24); in xnn_f32_gemm_ukernel_1x8s4__sse() 77 const __m128 vb4567c3 = _mm_load_ps(w + 28); in xnn_f32_gemm_ukernel_1x8s4__sse() [all …]
|
D | 1x8-sse-dup.c | 42 __m128 vacc0x0123 = _mm_load_ps(w + 0); in xnn_f32_gemm_ukernel_1x8__sse_dup() 43 __m128 vacc0x4567 = _mm_load_ps(w + 4); in xnn_f32_gemm_ukernel_1x8__sse_dup() 54 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_gemm_ukernel_1x8__sse_dup() 55 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_gemm_ukernel_1x8__sse_dup() 62 const __m128 vb0123c1 = _mm_load_ps(w + 8); in xnn_f32_gemm_ukernel_1x8__sse_dup() 63 const __m128 vb4567c1 = _mm_load_ps(w + 12); in xnn_f32_gemm_ukernel_1x8__sse_dup() 70 const __m128 vb0123c2 = _mm_load_ps(w + 16); in xnn_f32_gemm_ukernel_1x8__sse_dup() 71 const __m128 vb4567c2 = _mm_load_ps(w + 20); in xnn_f32_gemm_ukernel_1x8__sse_dup() 78 const __m128 vb0123c3 = _mm_load_ps(w + 24); in xnn_f32_gemm_ukernel_1x8__sse_dup() 79 const __m128 vb4567c3 = _mm_load_ps(w + 28); in xnn_f32_gemm_ukernel_1x8__sse_dup() [all …]
|
D | 1x8-sse-load1.c | 42 __m128 vacc0x0123 = _mm_load_ps(w + 0); in xnn_f32_gemm_ukernel_1x8__sse_load1() 43 __m128 vacc0x4567 = _mm_load_ps(w + 4); in xnn_f32_gemm_ukernel_1x8__sse_load1() 51 const __m128 vb0123 = _mm_load_ps(w); in xnn_f32_gemm_ukernel_1x8__sse_load1() 52 const __m128 vb4567 = _mm_load_ps(w + 4); in xnn_f32_gemm_ukernel_1x8__sse_load1() 61 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_gemm_ukernel_1x8__sse_load1() 65 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_gemm_ukernel_1x8__sse_load1()
|
D | 4x8s4-sse.c | 60 __m128 vacc0x0123 = _mm_load_ps(w + 0); in xnn_f32_gemm_ukernel_4x8s4__sse() 61 __m128 vacc0x4567 = _mm_load_ps(w + 4); in xnn_f32_gemm_ukernel_4x8s4__sse() 82 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_gemm_ukernel_4x8s4__sse() 83 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_gemm_ukernel_4x8s4__sse() 99 const __m128 vb0123c1 = _mm_load_ps(w + 8); in xnn_f32_gemm_ukernel_4x8s4__sse() 100 const __m128 vb4567c1 = _mm_load_ps(w + 12); in xnn_f32_gemm_ukernel_4x8s4__sse() 116 const __m128 vb0123c2 = _mm_load_ps(w + 16); in xnn_f32_gemm_ukernel_4x8s4__sse() 117 const __m128 vb4567c2 = _mm_load_ps(w + 20); in xnn_f32_gemm_ukernel_4x8s4__sse() 133 const __m128 vb0123c3 = _mm_load_ps(w + 24); in xnn_f32_gemm_ukernel_4x8s4__sse() 134 const __m128 vb4567c3 = _mm_load_ps(w + 28); in xnn_f32_gemm_ukernel_4x8s4__sse() [all …]
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 1x8s4-sse.c | 44 __m128 vacc0x0123 = _mm_load_ps(acc + 0); in xnn_f32_gemminc_ukernel_1x8s4__sse() 45 __m128 vacc0x4567 = _mm_load_ps(acc + 4); in xnn_f32_gemminc_ukernel_1x8s4__sse() 54 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_gemminc_ukernel_1x8s4__sse() 55 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_gemminc_ukernel_1x8s4__sse() 62 const __m128 vb0123c1 = _mm_load_ps(w + 8); in xnn_f32_gemminc_ukernel_1x8s4__sse() 63 const __m128 vb4567c1 = _mm_load_ps(w + 12); in xnn_f32_gemminc_ukernel_1x8s4__sse() 70 const __m128 vb0123c2 = _mm_load_ps(w + 16); in xnn_f32_gemminc_ukernel_1x8s4__sse() 71 const __m128 vb4567c2 = _mm_load_ps(w + 20); in xnn_f32_gemminc_ukernel_1x8s4__sse() 78 const __m128 vb0123c3 = _mm_load_ps(w + 24); in xnn_f32_gemminc_ukernel_1x8s4__sse() 79 const __m128 vb4567c3 = _mm_load_ps(w + 28); in xnn_f32_gemminc_ukernel_1x8s4__sse() [all …]
|
D | 1x8-sse-dup.c | 44 __m128 vacc0x0123 = _mm_load_ps(acc + 0); in xnn_f32_gemminc_ukernel_1x8__sse_dup() 45 __m128 vacc0x4567 = _mm_load_ps(acc + 4); in xnn_f32_gemminc_ukernel_1x8__sse_dup() 56 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_gemminc_ukernel_1x8__sse_dup() 57 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_gemminc_ukernel_1x8__sse_dup() 64 const __m128 vb0123c1 = _mm_load_ps(w + 8); in xnn_f32_gemminc_ukernel_1x8__sse_dup() 65 const __m128 vb4567c1 = _mm_load_ps(w + 12); in xnn_f32_gemminc_ukernel_1x8__sse_dup() 72 const __m128 vb0123c2 = _mm_load_ps(w + 16); in xnn_f32_gemminc_ukernel_1x8__sse_dup() 73 const __m128 vb4567c2 = _mm_load_ps(w + 20); in xnn_f32_gemminc_ukernel_1x8__sse_dup() 80 const __m128 vb0123c3 = _mm_load_ps(w + 24); in xnn_f32_gemminc_ukernel_1x8__sse_dup() 81 const __m128 vb4567c3 = _mm_load_ps(w + 28); in xnn_f32_gemminc_ukernel_1x8__sse_dup() [all …]
|
D | 4x8s4-sse.c | 62 __m128 vacc0x0123 = _mm_load_ps(acc + 0); in xnn_f32_gemminc_ukernel_4x8s4__sse() 63 __m128 vacc0x4567 = _mm_load_ps(acc + 4); in xnn_f32_gemminc_ukernel_4x8s4__sse() 64 __m128 vacc1x0123 = _mm_load_ps(acc + 8); in xnn_f32_gemminc_ukernel_4x8s4__sse() 65 __m128 vacc1x4567 = _mm_load_ps(acc + 12); in xnn_f32_gemminc_ukernel_4x8s4__sse() 66 __m128 vacc2x0123 = _mm_load_ps(acc + 16); in xnn_f32_gemminc_ukernel_4x8s4__sse() 67 __m128 vacc2x4567 = _mm_load_ps(acc + 20); in xnn_f32_gemminc_ukernel_4x8s4__sse() 68 __m128 vacc3x0123 = _mm_load_ps(acc + 24); in xnn_f32_gemminc_ukernel_4x8s4__sse() 69 __m128 vacc3x4567 = _mm_load_ps(acc + 28); in xnn_f32_gemminc_ukernel_4x8s4__sse() 84 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_gemminc_ukernel_4x8s4__sse() 85 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_gemminc_ukernel_4x8s4__sse() [all …]
|
D | 4x8-sse-dup.c | 62 __m128 vacc0x0123 = _mm_load_ps(acc + 0); in xnn_f32_gemminc_ukernel_4x8__sse_dup() 63 __m128 vacc0x4567 = _mm_load_ps(acc + 4); in xnn_f32_gemminc_ukernel_4x8__sse_dup() 64 __m128 vacc1x0123 = _mm_load_ps(acc + 8); in xnn_f32_gemminc_ukernel_4x8__sse_dup() 65 __m128 vacc1x4567 = _mm_load_ps(acc + 12); in xnn_f32_gemminc_ukernel_4x8__sse_dup() 66 __m128 vacc2x0123 = _mm_load_ps(acc + 16); in xnn_f32_gemminc_ukernel_4x8__sse_dup() 67 __m128 vacc2x4567 = _mm_load_ps(acc + 20); in xnn_f32_gemminc_ukernel_4x8__sse_dup() 68 __m128 vacc3x0123 = _mm_load_ps(acc + 24); in xnn_f32_gemminc_ukernel_4x8__sse_dup() 69 __m128 vacc3x4567 = _mm_load_ps(acc + 28); in xnn_f32_gemminc_ukernel_4x8__sse_dup() 89 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_gemminc_ukernel_4x8__sse_dup() 90 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_gemminc_ukernel_4x8__sse_dup() [all …]
|
D | 4x8-sse-load1.c | 62 __m128 vacc0x0123 = _mm_load_ps(acc + 0); in xnn_f32_gemminc_ukernel_4x8__sse_load1() 63 __m128 vacc0x4567 = _mm_load_ps(acc + 4); in xnn_f32_gemminc_ukernel_4x8__sse_load1() 64 __m128 vacc1x0123 = _mm_load_ps(acc + 8); in xnn_f32_gemminc_ukernel_4x8__sse_load1() 65 __m128 vacc1x4567 = _mm_load_ps(acc + 12); in xnn_f32_gemminc_ukernel_4x8__sse_load1() 66 __m128 vacc2x0123 = _mm_load_ps(acc + 16); in xnn_f32_gemminc_ukernel_4x8__sse_load1() 67 __m128 vacc2x4567 = _mm_load_ps(acc + 20); in xnn_f32_gemminc_ukernel_4x8__sse_load1() 68 __m128 vacc3x0123 = _mm_load_ps(acc + 24); in xnn_f32_gemminc_ukernel_4x8__sse_load1() 69 __m128 vacc3x4567 = _mm_load_ps(acc + 28); in xnn_f32_gemminc_ukernel_4x8__sse_load1() 83 const __m128 vb0123 = _mm_load_ps(w); in xnn_f32_gemminc_ukernel_4x8__sse_load1() 84 const __m128 vb4567 = _mm_load_ps(w + 4); in xnn_f32_gemminc_ukernel_4x8__sse_load1() [all …]
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 1x8s4-sse.c | 46 __m128 vacc0x0123 = _mm_load_ps(w); in xnn_f32_igemm_ukernel_1x8s4__sse() 47 __m128 vacc0x4567 = _mm_load_ps(w + 4); in xnn_f32_igemm_ukernel_1x8s4__sse() 65 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_igemm_ukernel_1x8s4__sse() 66 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_igemm_ukernel_1x8s4__sse() 73 const __m128 vb0123c1 = _mm_load_ps(w + 8); in xnn_f32_igemm_ukernel_1x8s4__sse() 74 const __m128 vb4567c1 = _mm_load_ps(w + 12); in xnn_f32_igemm_ukernel_1x8s4__sse() 81 const __m128 vb0123c2 = _mm_load_ps(w + 16); in xnn_f32_igemm_ukernel_1x8s4__sse() 82 const __m128 vb4567c2 = _mm_load_ps(w + 20); in xnn_f32_igemm_ukernel_1x8s4__sse() 89 const __m128 vb0123c3 = _mm_load_ps(w + 24); in xnn_f32_igemm_ukernel_1x8s4__sse() 90 const __m128 vb4567c3 = _mm_load_ps(w + 28); in xnn_f32_igemm_ukernel_1x8s4__sse() [all …]
|
D | 1x8-sse-dup.c | 46 __m128 vacc0x0123 = _mm_load_ps(w); in xnn_f32_igemm_ukernel_1x8__sse_dup() 47 __m128 vacc0x4567 = _mm_load_ps(w + 4); in xnn_f32_igemm_ukernel_1x8__sse_dup() 67 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_igemm_ukernel_1x8__sse_dup() 68 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_igemm_ukernel_1x8__sse_dup() 75 const __m128 vb0123c1 = _mm_load_ps(w + 8); in xnn_f32_igemm_ukernel_1x8__sse_dup() 76 const __m128 vb4567c1 = _mm_load_ps(w + 12); in xnn_f32_igemm_ukernel_1x8__sse_dup() 83 const __m128 vb0123c2 = _mm_load_ps(w + 16); in xnn_f32_igemm_ukernel_1x8__sse_dup() 84 const __m128 vb4567c2 = _mm_load_ps(w + 20); in xnn_f32_igemm_ukernel_1x8__sse_dup() 91 const __m128 vb0123c3 = _mm_load_ps(w + 24); in xnn_f32_igemm_ukernel_1x8__sse_dup() 92 const __m128 vb4567c3 = _mm_load_ps(w + 28); in xnn_f32_igemm_ukernel_1x8__sse_dup() [all …]
|
/external/libaom/libaom/aom_dsp/x86/ |
D | fft_sse2.c | 20 __m128 row1 = _mm_load_ps(&A[0 * lda]); in transpose4x4() 21 __m128 row2 = _mm_load_ps(&A[1 * lda]); in transpose4x4() 22 __m128 row3 = _mm_load_ps(&A[2 * lda]); in transpose4x4() 23 __m128 row4 = _mm_load_ps(&A[3 * lda]); in transpose4x4() 71 __m128 real1 = _mm_load_ps(packed + r * n + c); in aom_fft_unpack_2d_output_sse2() 72 __m128 real2 = _mm_load_ps(packed + (r + n2) * n + c + n2); in aom_fft_unpack_2d_output_sse2() 73 __m128 imag1 = _mm_load_ps(packed + (r + n2) * n + c); in aom_fft_unpack_2d_output_sse2() 74 __m128 imag2 = _mm_load_ps(packed + r * n + c + n2); in aom_fft_unpack_2d_output_sse2() 94 __m128 real1 = _mm_load_ps(packed + r3 * n + c); in aom_fft_unpack_2d_output_sse2() 95 __m128 real2 = _mm_load_ps(packed + (r3 + n2) * n + c + n2); in aom_fft_unpack_2d_output_sse2() [all …]
|
/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c8-sse-2x.c | 44 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x() 45 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x() 50 const __m128 vscale0123 = _mm_load_ps(w); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x() 51 const __m128 vscale4567 = _mm_load_ps(w + 4); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x() 65 const __m128 vbias0123 = _mm_load_ps(w + 8); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x() 66 const __m128 vbias4567 = _mm_load_ps(w + 12); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x() 93 const __m128 vscale0123 = _mm_load_ps(w); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x() 103 const __m128 vbias0123 = _mm_load_ps(w + 8); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x() 122 const __m128 vscale0123 = _mm_load_ps(w); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x() 132 const __m128 vbias0123 = _mm_load_ps(w + 8); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
|