• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <arm_neon.h>
10 
11 #include <xnnpack/zip.h>
12 
13 
xnn_x8_zip_x4_ukernel__neon(size_t n,const uint8_t * input,uint8_t * output)14 void xnn_x8_zip_x4_ukernel__neon(
15     size_t n,
16     const uint8_t* input,
17     uint8_t* output)
18 {
19   const uint8_t* x = input;
20   const uint8_t* y = (const uint8_t*) ((uintptr_t) x + n);
21   const uint8_t* z = (const uint8_t*) ((uintptr_t) y + n);
22   const uint8_t* w = (const uint8_t*) ((uintptr_t) z + n);
23   uint8_t* o = output;
24 
25   if (n >= 8) {
26     do {
27       uint8x8x4_t vxyzw;
28       vxyzw.val[0] = vld1_u8(x); x += 8;
29       vxyzw.val[1] = vld1_u8(y); y += 8;
30       vxyzw.val[2] = vld1_u8(z); z += 8;
31       vxyzw.val[3] = vld1_u8(w); w += 8;
32       vst4_u8(o, vxyzw); o += 32;
33       n -= 8;
34     } while (n >= 8);
35     if (n != 0) {
36       const size_t address_increment = n - 8;
37       uint8x8x4_t vxyzw;
38       vxyzw.val[0] = vld1_u8(x + address_increment);
39       vxyzw.val[1] = vld1_u8(y + address_increment);
40       vxyzw.val[2] = vld1_u8(z + address_increment);
41       vxyzw.val[3] = vld1_u8(w + address_increment);
42       vst4_u8((uint8_t*) ((uintptr_t) o + address_increment * 4), vxyzw);
43     }
44   } else {
45     do {
46       const uint8_t vx = *x++;
47       const uint8_t vy = *y++;
48       const uint8_t vz = *z++;
49       const uint8_t vw = *w++;
50       o[0] = vx;
51       o[1] = vy;
52       o[2] = vz;
53       o[3] = vw;
54       o += 4;
55     } while (--n != 0);
56   }
57 }
58