• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <assert.h>
7 
8 #include <wasm_simd128.h>
9 
10 #include <xnnpack/packx.h>
11 
12 
xnn_x32_packx_ukernel_4x__wasmsimd(size_t m,size_t k,const uint32_t * restrict x_ptr,size_t x_stride,uint32_t * restrict y_ptr)13 void xnn_x32_packx_ukernel_4x__wasmsimd(
14     size_t m,
15     size_t k,
16     const uint32_t* restrict x_ptr,
17     size_t x_stride,
18     uint32_t* restrict y_ptr)
19 {
20   assert(m != 0);
21   assert(k != 0);
22 
23   const float* x0 = (const float*) x_ptr;
24   const float* x1 = (const float*) ((uintptr_t) x0 + x_stride);
25   if (m < 2) {
26     x1 = x0;
27   }
28   const float* x2 = (const float*) ((uintptr_t) x1 + x_stride);
29   if (m <= 2) {
30     x2 = x1;
31   }
32   const float* x3 = (const float*) ((uintptr_t) x2 + x_stride);
33   if (m != 4) {
34     x3 = x2;
35   }
36   float* y = (float*) y_ptr;
37 
38   for (; k >= 4; k -= 4) {
39     const v128_t vx0 = wasm_v128_load(x0);
40     x0 += 4;
41     const v128_t vx1 = wasm_v128_load(x1);
42     x1 += 4;
43     const v128_t vx2 = wasm_v128_load(x2);
44     x2 += 4;
45     const v128_t vx3 = wasm_v128_load(x3);
46     x3 += 4;
47 
48     const v128_t vt0 = wasm_v32x4_shuffle(vx0, vx1, 0, 4, 1, 5);
49     const v128_t vt1 = wasm_v32x4_shuffle(vx0, vx1, 2, 6, 3, 7);
50     const v128_t vt2 = wasm_v32x4_shuffle(vx2, vx3, 0, 4, 1, 5);
51     const v128_t vt3 = wasm_v32x4_shuffle(vx2, vx3, 2, 6, 3, 7);
52 
53     const v128_t vy0 = wasm_v32x4_shuffle(vt0, vt2, 0, 1, 4, 5);
54     wasm_v128_store(y, vy0);
55 
56     const v128_t vy1 = wasm_v32x4_shuffle(vt0, vt2, 2, 3, 6, 7);
57     wasm_v128_store(y + 4, vy1);
58 
59     const v128_t vy2 = wasm_v32x4_shuffle(vt1, vt3, 0, 1, 4, 5);
60     wasm_v128_store(y + 8, vy2);
61 
62     const v128_t vy3 = wasm_v32x4_shuffle(vt1, vt3, 2, 3, 6, 7);
63     wasm_v128_store(y + 12, vy3);
64 
65     y += 16;
66   }
67   if XNN_UNLIKELY(k != 0) {
68     do {
69       const float vx0 = *x0++;
70       const float vx1 = *x1++;
71       const float vx2 = *x2++;
72       const float vx3 = *x3++;
73       y[0] = vx0;
74       y[1] = vx1;
75       y[2] = vx2;
76       y[3] = vx3;
77       y += 4;
78     } while (--k != 0);
79   }
80 }
81