1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #include <arm_neon.h> 10 11 #include <xnnpack/zip.h> 12 13 xnn_x8_zip_x2_ukernel__neon(size_t n,const uint8_t * input,uint8_t * output)14void xnn_x8_zip_x2_ukernel__neon( 15 size_t n, 16 const uint8_t* input, 17 uint8_t* output) 18 { 19 const uint8_t* x = input; 20 const uint8_t* y = (const uint8_t*) ((uintptr_t) x + n); 21 uint8_t* o = output; 22 23 if (n >= 8) { 24 do { 25 uint8x8x2_t vxy; 26 vxy.val[0] = vld1_u8(x); x += 8; 27 vxy.val[1] = vld1_u8(y); y += 8; 28 vst2_u8(o, vxy); o += 16;; 29 n -= 8; 30 } while (n >= 8); 31 if (n != 0) { 32 const size_t address_increment = n - 8; 33 uint8x8x2_t vxy; 34 vxy.val[0] = vld1_u8((const uint8_t*) ((uintptr_t) x + address_increment)); 35 vxy.val[1] = vld1_u8((const uint8_t*) ((uintptr_t) y + address_increment)); 36 vst2_u8((uint8_t*) ((uintptr_t) o + address_increment * 2), vxy); 37 } 38 } else { 39 do { 40 const uint8_t vx = *x++; 41 const uint8_t vy = *y++; 42 o[0] = vx; 43 o[1] = vy; 44 o += 2; 45 } while (--n != 0); 46 } 47 } 48