1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <assert.h>
7
8 #include <xnnpack/pad.h>
9
10
xnn_xx_pad_ukernel__scalar(size_t rows,size_t channels,size_t pre_padding,size_t post_padding,const void * input,size_t input_stride,void * output,size_t output_stride,const uint32_t fill_pattern)11 void xnn_xx_pad_ukernel__scalar(
12 size_t rows,
13 size_t channels,
14 size_t pre_padding,
15 size_t post_padding,
16 const void* input,
17 size_t input_stride,
18 void* output,
19 size_t output_stride,
20 const uint32_t fill_pattern) XNN_OOB_READS
21 {
22 const size_t input_increment = input_stride - channels;
23 const size_t output_increment = output_stride - (pre_padding + channels + post_padding);
24
25 do {
26 // Pre-pad input channels.
27 size_t l = pre_padding;
28 if XNN_LIKELY(l != 0) {
29 uint32_t vfill_pattern = fill_pattern;
30 for (; l >= 4 * sizeof(uint8_t); l -= 4 * sizeof(uint8_t)) {
31 *((uint32_t*) output) = vfill_pattern;
32 output = (uint8_t*) output + 4;
33 }
34 if XNN_LIKELY(l & (2 * sizeof(uint8_t))) {
35 *((uint16_t*) output) = (uint16_t) vfill_pattern;
36 vfill_pattern >>= 16;
37 output = (uint8_t*) output + 2;
38 }
39 if XNN_LIKELY(l & (1 * sizeof(uint8_t))) {
40 *((uint8_t*) output) = (uint8_t) vfill_pattern;
41 output = (uint8_t*) output + 1;
42 }
43 }
44
45 // Copy input channels.
46 size_t c = channels;
47 for (; c >= 16 * sizeof(uint8_t); c -= 16 * sizeof(uint8_t)) {
48 const uint32_t vdata0 = ((const uint32_t*) input)[0];
49 const uint32_t vdata1 = ((const uint32_t*) input)[1];
50 const uint32_t vdata2 = ((const uint32_t*) input)[2];
51 const uint32_t vdata3 = ((const uint32_t*) input)[3];
52 input = (const uint8_t*) input + 16;
53
54 ((uint32_t*) output)[0] = vdata0;
55 ((uint32_t*) output)[1] = vdata1;
56 ((uint32_t*) output)[2] = vdata2;
57 ((uint32_t*) output)[3] = vdata3;
58 output = (uint8_t*) output + 16;
59 }
60 if XNN_UNLIKELY(c != 0) {
61 for (; c >= 4 * sizeof(uint8_t); c -= 4 * sizeof(uint8_t)) {
62 *((uint32_t*) output) = *((const uint32_t*) input);
63 input = (const uint8_t*) input + 4;
64 output = (uint8_t*) output + 4;
65 }
66 if XNN_UNLIKELY(c != 0) {
67 uint32_t vdata = *((const uint32_t*) input);
68 input = (const void*) ((uintptr_t) input + c);
69
70 if XNN_LIKELY(c & (2 * sizeof(uint8_t))) {
71 *((uint16_t*) output) = (uint16_t) vdata;
72 vdata >>= 16;
73 output = (uint8_t*) output + 2;
74 }
75 if XNN_LIKELY(c & (1 * sizeof(uint8_t))) {
76 *((uint8_t*) output) = (uint8_t) vdata;
77 output = (uint8_t*) output + 1;
78 }
79 }
80 }
81
82 // Post-pad input channels.
83 size_t r = post_padding;
84 if XNN_LIKELY(r != 0) {
85 uint32_t vfill_pattern = fill_pattern;
86 for (; r >= 4 * sizeof(uint8_t); r -= 4 * sizeof(uint8_t)) {
87 *((uint32_t*) output) = vfill_pattern;
88 output = (uint8_t*) output + 4;
89 }
90 if XNN_LIKELY(r & (2 * sizeof(uint8_t))) {
91 *((uint16_t*) output) = (uint16_t) vfill_pattern;
92 vfill_pattern >>= 16;
93 output = (uint8_t*) output + 2;
94 }
95 if XNN_LIKELY(r & (1 * sizeof(uint8_t))) {
96 *((uint8_t*) output) = (uint8_t) vfill_pattern;
97 output = (uint8_t*) output + 1;
98 }
99 }
100
101 input = (const uint32_t*) ((uintptr_t) input + input_increment);
102 output = (uint32_t*) ((uintptr_t) output + output_increment);
103 } while (--rows != 0);
104 }
105