• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <arm_neon.h>
10 
11 #include <xnnpack/zip.h>
12 
13 
xnn_x8_zip_x2_ukernel__neon(size_t n,const uint8_t * input,uint8_t * output)14 void xnn_x8_zip_x2_ukernel__neon(
15     size_t n,
16     const uint8_t* input,
17     uint8_t* output)
18 {
19   const uint8_t* x = input;
20   const uint8_t* y = (const uint8_t*) ((uintptr_t) x + n);
21   uint8_t* o = output;
22 
23   if (n >= 8) {
24     do {
25       uint8x8x2_t vxy;
26       vxy.val[0] = vld1_u8(x); x += 8;
27       vxy.val[1] = vld1_u8(y); y += 8;
28       vst2_u8(o, vxy); o += 16;;
29       n -= 8;
30     } while (n >= 8);
31     if (n != 0) {
32       const size_t address_increment = n - 8;
33       uint8x8x2_t vxy;
34       vxy.val[0] = vld1_u8((const uint8_t*) ((uintptr_t) x + address_increment));
35       vxy.val[1] = vld1_u8((const uint8_t*) ((uintptr_t) y + address_increment));
36       vst2_u8((uint8_t*) ((uintptr_t) o + address_increment * 2), vxy);
37     }
38   } else {
39     do {
40       const uint8_t vx = *x++;
41       const uint8_t vy = *y++;
42       o[0] = vx;
43       o[1] = vy;
44       o += 2;
45     } while (--n != 0);
46   }
47 }
48