Home
last modified time | relevance | path

Searched refs:_mm_load_ps (Results 1 – 25 of 182) sorted by relevance

12345678

/external/XNNPACK/src/f32-dwconv/gen/
Dup8x25-sse.c30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up8x25__sse()
31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up8x25__sse()
88 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x25__sse()
89 __m128 vacc4567p0 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up8x25__sse()
96 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x25__sse()
97 const __m128 vk0x4567 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up8x25__sse()
105 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x25__sse()
106 const __m128 vk1x4567 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up8x25__sse()
114 const __m128 vk2x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x25__sse()
115 const __m128 vk2x4567 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up8x25__sse()
[all …]
Dup8x25-sse-acc2.c30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
88 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
89 __m128 vacc4567p0 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
96 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
97 const __m128 vk0x4567 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
105 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
106 const __m128 vk1x4567 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
114 const __m128 vk2x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
115 const __m128 vk2x4567 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
[all …]
Dup4x25-sse.c30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up4x25__sse()
31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up4x25__sse()
88 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x25__sse()
94 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up4x25__sse()
100 const __m128 vk1x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up4x25__sse()
106 const __m128 vk2x0123 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up4x25__sse()
112 const __m128 vk3x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up4x25__sse()
118 const __m128 vk4x0123 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up4x25__sse()
124 const __m128 vk5x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up4x25__sse()
130 const __m128 vk6x0123 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up4x25__sse()
[all …]
Dup4x25-sse-acc2.c30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
88 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
94 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
100 const __m128 vk1x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
106 const __m128 vk2x0123 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
112 const __m128 vk3x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
118 const __m128 vk4x0123 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
124 const __m128 vk5x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
130 const __m128 vk6x0123 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
[all …]
Dup8x9-sse-acc2.c30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
56 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
57 __m128 vacc4567p0 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
64 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
65 const __m128 vk0x4567 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
73 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
74 const __m128 vk1x4567 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
82 const __m128 vk2x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
83 const __m128 vk2x4567 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
[all …]
Dup8x9-sse.c30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up8x9__sse()
31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up8x9__sse()
56 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x9__sse()
57 __m128 vacc4567p0 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up8x9__sse()
64 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x9__sse()
65 const __m128 vk0x4567 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up8x9__sse()
73 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x9__sse()
74 const __m128 vk1x4567 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up8x9__sse()
82 const __m128 vk2x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x9__sse()
83 const __m128 vk2x4567 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up8x9__sse()
[all …]
Dup4x9-sse-acc2.c30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
56 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
62 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
68 const __m128 vk1x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
74 const __m128 vk2x0123 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
80 const __m128 vk3x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
86 const __m128 vk4x0123 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
92 const __m128 vk5x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
98 const __m128 vk6x0123 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
[all …]
Dup8x4-sse.c30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up8x4__sse()
31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up8x4__sse()
46 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x4__sse()
47 __m128 vacc4567p0 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up8x4__sse()
54 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x4__sse()
55 const __m128 vk0x4567 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up8x4__sse()
63 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x4__sse()
64 const __m128 vk1x4567 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up8x4__sse()
72 const __m128 vk2x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x4__sse()
73 const __m128 vk2x4567 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up8x4__sse()
[all …]
Dup4x9-sse.c30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up4x9__sse()
31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up4x9__sse()
56 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x9__sse()
62 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up4x9__sse()
68 const __m128 vk1x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up4x9__sse()
74 const __m128 vk2x0123 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up4x9__sse()
80 const __m128 vk3x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up4x9__sse()
86 const __m128 vk4x0123 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up4x9__sse()
92 const __m128 vk5x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up4x9__sse()
98 const __m128 vk6x0123 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up4x9__sse()
[all …]
Dup8x4-sse-acc2.c30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2()
31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2()
46 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2()
47 __m128 vacc4567p0 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2()
54 const __m128 vk0x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2()
55 const __m128 vk0x4567 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2()
63 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2()
64 const __m128 vk1x4567 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2()
72 const __m128 vk2x0123 = _mm_load_ps(w + 24); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2()
73 const __m128 vk2x4567 = _mm_load_ps(w + 28); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2()
[all …]
Dup4x4-sse.c30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up4x4__sse()
31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up4x4__sse()
46 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x4__sse()
52 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up4x4__sse()
58 const __m128 vk1x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up4x4__sse()
64 const __m128 vk2x0123 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up4x4__sse()
70 const __m128 vk3x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up4x4__sse()
83 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x4__sse()
86 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up4x4__sse()
90 const __m128 vk1x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up4x4__sse()
[all …]
Dup4x4-sse-acc2.c30 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2()
31 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2()
46 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2()
52 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2()
58 const __m128 vk1x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2()
64 const __m128 vk2x0123 = _mm_load_ps(w + 12); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2()
70 const __m128 vk3x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2()
85 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2()
88 const __m128 vk0x0123 = _mm_load_ps(w + 4); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2()
92 const __m128 vk1x0123 = _mm_load_ps(w + 8); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2()
[all …]
/external/XNNPACK/src/f32-gemm/gen/
D1x8s4-sse.c42 __m128 vacc0x0123 = _mm_load_ps(w + 0); in xnn_f32_gemm_ukernel_1x8s4__sse()
43 __m128 vacc0x4567 = _mm_load_ps(w + 4); in xnn_f32_gemm_ukernel_1x8s4__sse()
52 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_gemm_ukernel_1x8s4__sse()
53 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_gemm_ukernel_1x8s4__sse()
60 const __m128 vb0123c1 = _mm_load_ps(w + 8); in xnn_f32_gemm_ukernel_1x8s4__sse()
61 const __m128 vb4567c1 = _mm_load_ps(w + 12); in xnn_f32_gemm_ukernel_1x8s4__sse()
68 const __m128 vb0123c2 = _mm_load_ps(w + 16); in xnn_f32_gemm_ukernel_1x8s4__sse()
69 const __m128 vb4567c2 = _mm_load_ps(w + 20); in xnn_f32_gemm_ukernel_1x8s4__sse()
76 const __m128 vb0123c3 = _mm_load_ps(w + 24); in xnn_f32_gemm_ukernel_1x8s4__sse()
77 const __m128 vb4567c3 = _mm_load_ps(w + 28); in xnn_f32_gemm_ukernel_1x8s4__sse()
[all …]
D1x8-sse-dup.c42 __m128 vacc0x0123 = _mm_load_ps(w + 0); in xnn_f32_gemm_ukernel_1x8__sse_dup()
43 __m128 vacc0x4567 = _mm_load_ps(w + 4); in xnn_f32_gemm_ukernel_1x8__sse_dup()
54 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_gemm_ukernel_1x8__sse_dup()
55 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_gemm_ukernel_1x8__sse_dup()
62 const __m128 vb0123c1 = _mm_load_ps(w + 8); in xnn_f32_gemm_ukernel_1x8__sse_dup()
63 const __m128 vb4567c1 = _mm_load_ps(w + 12); in xnn_f32_gemm_ukernel_1x8__sse_dup()
70 const __m128 vb0123c2 = _mm_load_ps(w + 16); in xnn_f32_gemm_ukernel_1x8__sse_dup()
71 const __m128 vb4567c2 = _mm_load_ps(w + 20); in xnn_f32_gemm_ukernel_1x8__sse_dup()
78 const __m128 vb0123c3 = _mm_load_ps(w + 24); in xnn_f32_gemm_ukernel_1x8__sse_dup()
79 const __m128 vb4567c3 = _mm_load_ps(w + 28); in xnn_f32_gemm_ukernel_1x8__sse_dup()
[all …]
D1x8-sse-load1.c42 __m128 vacc0x0123 = _mm_load_ps(w + 0); in xnn_f32_gemm_ukernel_1x8__sse_load1()
43 __m128 vacc0x4567 = _mm_load_ps(w + 4); in xnn_f32_gemm_ukernel_1x8__sse_load1()
51 const __m128 vb0123 = _mm_load_ps(w); in xnn_f32_gemm_ukernel_1x8__sse_load1()
52 const __m128 vb4567 = _mm_load_ps(w + 4); in xnn_f32_gemm_ukernel_1x8__sse_load1()
61 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_gemm_ukernel_1x8__sse_load1()
65 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_gemm_ukernel_1x8__sse_load1()
D4x8s4-sse.c60 __m128 vacc0x0123 = _mm_load_ps(w + 0); in xnn_f32_gemm_ukernel_4x8s4__sse()
61 __m128 vacc0x4567 = _mm_load_ps(w + 4); in xnn_f32_gemm_ukernel_4x8s4__sse()
82 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_gemm_ukernel_4x8s4__sse()
83 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_gemm_ukernel_4x8s4__sse()
99 const __m128 vb0123c1 = _mm_load_ps(w + 8); in xnn_f32_gemm_ukernel_4x8s4__sse()
100 const __m128 vb4567c1 = _mm_load_ps(w + 12); in xnn_f32_gemm_ukernel_4x8s4__sse()
116 const __m128 vb0123c2 = _mm_load_ps(w + 16); in xnn_f32_gemm_ukernel_4x8s4__sse()
117 const __m128 vb4567c2 = _mm_load_ps(w + 20); in xnn_f32_gemm_ukernel_4x8s4__sse()
133 const __m128 vb0123c3 = _mm_load_ps(w + 24); in xnn_f32_gemm_ukernel_4x8s4__sse()
134 const __m128 vb4567c3 = _mm_load_ps(w + 28); in xnn_f32_gemm_ukernel_4x8s4__sse()
[all …]
/external/XNNPACK/src/f32-gemm/gen-inc/
D1x8s4-sse.c44 __m128 vacc0x0123 = _mm_load_ps(acc + 0); in xnn_f32_gemminc_ukernel_1x8s4__sse()
45 __m128 vacc0x4567 = _mm_load_ps(acc + 4); in xnn_f32_gemminc_ukernel_1x8s4__sse()
54 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_gemminc_ukernel_1x8s4__sse()
55 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_gemminc_ukernel_1x8s4__sse()
62 const __m128 vb0123c1 = _mm_load_ps(w + 8); in xnn_f32_gemminc_ukernel_1x8s4__sse()
63 const __m128 vb4567c1 = _mm_load_ps(w + 12); in xnn_f32_gemminc_ukernel_1x8s4__sse()
70 const __m128 vb0123c2 = _mm_load_ps(w + 16); in xnn_f32_gemminc_ukernel_1x8s4__sse()
71 const __m128 vb4567c2 = _mm_load_ps(w + 20); in xnn_f32_gemminc_ukernel_1x8s4__sse()
78 const __m128 vb0123c3 = _mm_load_ps(w + 24); in xnn_f32_gemminc_ukernel_1x8s4__sse()
79 const __m128 vb4567c3 = _mm_load_ps(w + 28); in xnn_f32_gemminc_ukernel_1x8s4__sse()
[all …]
D1x8-sse-dup.c44 __m128 vacc0x0123 = _mm_load_ps(acc + 0); in xnn_f32_gemminc_ukernel_1x8__sse_dup()
45 __m128 vacc0x4567 = _mm_load_ps(acc + 4); in xnn_f32_gemminc_ukernel_1x8__sse_dup()
56 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_gemminc_ukernel_1x8__sse_dup()
57 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_gemminc_ukernel_1x8__sse_dup()
64 const __m128 vb0123c1 = _mm_load_ps(w + 8); in xnn_f32_gemminc_ukernel_1x8__sse_dup()
65 const __m128 vb4567c1 = _mm_load_ps(w + 12); in xnn_f32_gemminc_ukernel_1x8__sse_dup()
72 const __m128 vb0123c2 = _mm_load_ps(w + 16); in xnn_f32_gemminc_ukernel_1x8__sse_dup()
73 const __m128 vb4567c2 = _mm_load_ps(w + 20); in xnn_f32_gemminc_ukernel_1x8__sse_dup()
80 const __m128 vb0123c3 = _mm_load_ps(w + 24); in xnn_f32_gemminc_ukernel_1x8__sse_dup()
81 const __m128 vb4567c3 = _mm_load_ps(w + 28); in xnn_f32_gemminc_ukernel_1x8__sse_dup()
[all …]
D4x8s4-sse.c62 __m128 vacc0x0123 = _mm_load_ps(acc + 0); in xnn_f32_gemminc_ukernel_4x8s4__sse()
63 __m128 vacc0x4567 = _mm_load_ps(acc + 4); in xnn_f32_gemminc_ukernel_4x8s4__sse()
64 __m128 vacc1x0123 = _mm_load_ps(acc + 8); in xnn_f32_gemminc_ukernel_4x8s4__sse()
65 __m128 vacc1x4567 = _mm_load_ps(acc + 12); in xnn_f32_gemminc_ukernel_4x8s4__sse()
66 __m128 vacc2x0123 = _mm_load_ps(acc + 16); in xnn_f32_gemminc_ukernel_4x8s4__sse()
67 __m128 vacc2x4567 = _mm_load_ps(acc + 20); in xnn_f32_gemminc_ukernel_4x8s4__sse()
68 __m128 vacc3x0123 = _mm_load_ps(acc + 24); in xnn_f32_gemminc_ukernel_4x8s4__sse()
69 __m128 vacc3x4567 = _mm_load_ps(acc + 28); in xnn_f32_gemminc_ukernel_4x8s4__sse()
84 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_gemminc_ukernel_4x8s4__sse()
85 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_gemminc_ukernel_4x8s4__sse()
[all …]
D4x8-sse-dup.c62 __m128 vacc0x0123 = _mm_load_ps(acc + 0); in xnn_f32_gemminc_ukernel_4x8__sse_dup()
63 __m128 vacc0x4567 = _mm_load_ps(acc + 4); in xnn_f32_gemminc_ukernel_4x8__sse_dup()
64 __m128 vacc1x0123 = _mm_load_ps(acc + 8); in xnn_f32_gemminc_ukernel_4x8__sse_dup()
65 __m128 vacc1x4567 = _mm_load_ps(acc + 12); in xnn_f32_gemminc_ukernel_4x8__sse_dup()
66 __m128 vacc2x0123 = _mm_load_ps(acc + 16); in xnn_f32_gemminc_ukernel_4x8__sse_dup()
67 __m128 vacc2x4567 = _mm_load_ps(acc + 20); in xnn_f32_gemminc_ukernel_4x8__sse_dup()
68 __m128 vacc3x0123 = _mm_load_ps(acc + 24); in xnn_f32_gemminc_ukernel_4x8__sse_dup()
69 __m128 vacc3x4567 = _mm_load_ps(acc + 28); in xnn_f32_gemminc_ukernel_4x8__sse_dup()
89 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_gemminc_ukernel_4x8__sse_dup()
90 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_gemminc_ukernel_4x8__sse_dup()
[all …]
D4x8-sse-load1.c62 __m128 vacc0x0123 = _mm_load_ps(acc + 0); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
63 __m128 vacc0x4567 = _mm_load_ps(acc + 4); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
64 __m128 vacc1x0123 = _mm_load_ps(acc + 8); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
65 __m128 vacc1x4567 = _mm_load_ps(acc + 12); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
66 __m128 vacc2x0123 = _mm_load_ps(acc + 16); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
67 __m128 vacc2x4567 = _mm_load_ps(acc + 20); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
68 __m128 vacc3x0123 = _mm_load_ps(acc + 24); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
69 __m128 vacc3x4567 = _mm_load_ps(acc + 28); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
83 const __m128 vb0123 = _mm_load_ps(w); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
84 const __m128 vb4567 = _mm_load_ps(w + 4); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
[all …]
/external/XNNPACK/src/f32-igemm/gen/
D1x8s4-sse.c46 __m128 vacc0x0123 = _mm_load_ps(w); in xnn_f32_igemm_ukernel_1x8s4__sse()
47 __m128 vacc0x4567 = _mm_load_ps(w + 4); in xnn_f32_igemm_ukernel_1x8s4__sse()
65 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_igemm_ukernel_1x8s4__sse()
66 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_igemm_ukernel_1x8s4__sse()
73 const __m128 vb0123c1 = _mm_load_ps(w + 8); in xnn_f32_igemm_ukernel_1x8s4__sse()
74 const __m128 vb4567c1 = _mm_load_ps(w + 12); in xnn_f32_igemm_ukernel_1x8s4__sse()
81 const __m128 vb0123c2 = _mm_load_ps(w + 16); in xnn_f32_igemm_ukernel_1x8s4__sse()
82 const __m128 vb4567c2 = _mm_load_ps(w + 20); in xnn_f32_igemm_ukernel_1x8s4__sse()
89 const __m128 vb0123c3 = _mm_load_ps(w + 24); in xnn_f32_igemm_ukernel_1x8s4__sse()
90 const __m128 vb4567c3 = _mm_load_ps(w + 28); in xnn_f32_igemm_ukernel_1x8s4__sse()
[all …]
D1x8-sse-dup.c46 __m128 vacc0x0123 = _mm_load_ps(w); in xnn_f32_igemm_ukernel_1x8__sse_dup()
47 __m128 vacc0x4567 = _mm_load_ps(w + 4); in xnn_f32_igemm_ukernel_1x8__sse_dup()
67 const __m128 vb0123c0 = _mm_load_ps(w + 0); in xnn_f32_igemm_ukernel_1x8__sse_dup()
68 const __m128 vb4567c0 = _mm_load_ps(w + 4); in xnn_f32_igemm_ukernel_1x8__sse_dup()
75 const __m128 vb0123c1 = _mm_load_ps(w + 8); in xnn_f32_igemm_ukernel_1x8__sse_dup()
76 const __m128 vb4567c1 = _mm_load_ps(w + 12); in xnn_f32_igemm_ukernel_1x8__sse_dup()
83 const __m128 vb0123c2 = _mm_load_ps(w + 16); in xnn_f32_igemm_ukernel_1x8__sse_dup()
84 const __m128 vb4567c2 = _mm_load_ps(w + 20); in xnn_f32_igemm_ukernel_1x8__sse_dup()
91 const __m128 vb0123c3 = _mm_load_ps(w + 24); in xnn_f32_igemm_ukernel_1x8__sse_dup()
92 const __m128 vb4567c3 = _mm_load_ps(w + 28); in xnn_f32_igemm_ukernel_1x8__sse_dup()
[all …]
/external/libaom/libaom/aom_dsp/x86/
Dfft_sse2.c20 __m128 row1 = _mm_load_ps(&A[0 * lda]); in transpose4x4()
21 __m128 row2 = _mm_load_ps(&A[1 * lda]); in transpose4x4()
22 __m128 row3 = _mm_load_ps(&A[2 * lda]); in transpose4x4()
23 __m128 row4 = _mm_load_ps(&A[3 * lda]); in transpose4x4()
71 __m128 real1 = _mm_load_ps(packed + r * n + c); in aom_fft_unpack_2d_output_sse2()
72 __m128 real2 = _mm_load_ps(packed + (r + n2) * n + c + n2); in aom_fft_unpack_2d_output_sse2()
73 __m128 imag1 = _mm_load_ps(packed + (r + n2) * n + c); in aom_fft_unpack_2d_output_sse2()
74 __m128 imag2 = _mm_load_ps(packed + r * n + c + n2); in aom_fft_unpack_2d_output_sse2()
94 __m128 real1 = _mm_load_ps(packed + r3 * n + c); in aom_fft_unpack_2d_output_sse2()
95 __m128 real2 = _mm_load_ps(packed + (r3 + n2) * n + c + n2); in aom_fft_unpack_2d_output_sse2()
[all …]
/external/XNNPACK/src/f32-vmulcaddc/gen/
Dc8-sse-2x.c44 const __m128 vmin = _mm_load_ps(params->sse.min); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
45 const __m128 vmax = _mm_load_ps(params->sse.max); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
50 const __m128 vscale0123 = _mm_load_ps(w); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
51 const __m128 vscale4567 = _mm_load_ps(w + 4); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
65 const __m128 vbias0123 = _mm_load_ps(w + 8); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
66 const __m128 vbias4567 = _mm_load_ps(w + 12); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
93 const __m128 vscale0123 = _mm_load_ps(w); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
103 const __m128 vbias0123 = _mm_load_ps(w + 8); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
122 const __m128 vscale0123 = _mm_load_ps(w); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
132 const __m128 vbias0123 = _mm_load_ps(w + 8); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()

12345678