• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <assert.h>
7 
8 #include <xnnpack/pad.h>
9 
10 
xnn_xx_pad_ukernel__scalar(size_t rows,size_t channels,size_t pre_padding,size_t post_padding,const void * input,size_t input_stride,void * output,size_t output_stride,const uint32_t fill_pattern)11 void xnn_xx_pad_ukernel__scalar(
12     size_t rows,
13     size_t channels,
14     size_t pre_padding,
15     size_t post_padding,
16     const void* input,
17     size_t input_stride,
18     void* output,
19     size_t output_stride,
20     const uint32_t fill_pattern) XNN_OOB_READS
21 {
22   const size_t input_increment = input_stride - channels;
23   const size_t output_increment = output_stride - (pre_padding + channels + post_padding);
24 
25   do {
26     // Pre-pad input channels.
27     size_t l = pre_padding;
28     if XNN_LIKELY(l != 0) {
29       uint32_t vfill_pattern = fill_pattern;
30       for (; l >= 4 * sizeof(uint8_t); l -= 4 * sizeof(uint8_t)) {
31         *((uint32_t*) output) = vfill_pattern;
32         output = (uint8_t*) output + 4;
33       }
34       if XNN_LIKELY(l & (2 * sizeof(uint8_t))) {
35         *((uint16_t*) output) = (uint16_t) vfill_pattern;
36         vfill_pattern >>= 16;
37         output = (uint8_t*) output + 2;
38       }
39       if XNN_LIKELY(l & (1 * sizeof(uint8_t))) {
40         *((uint8_t*) output) = (uint8_t) vfill_pattern;
41         output = (uint8_t*) output + 1;
42       }
43     }
44 
45     // Copy input channels.
46     size_t c = channels;
47     for (; c >= 16 * sizeof(uint8_t); c -= 16 * sizeof(uint8_t)) {
48       const uint32_t vdata0 = ((const uint32_t*) input)[0];
49       const uint32_t vdata1 = ((const uint32_t*) input)[1];
50       const uint32_t vdata2 = ((const uint32_t*) input)[2];
51       const uint32_t vdata3 = ((const uint32_t*) input)[3];
52       input = (const uint8_t*) input + 16;
53 
54       ((uint32_t*) output)[0] = vdata0;
55       ((uint32_t*) output)[1] = vdata1;
56       ((uint32_t*) output)[2] = vdata2;
57       ((uint32_t*) output)[3] = vdata3;
58       output = (uint8_t*) output + 16;
59     }
60     if XNN_UNLIKELY(c != 0) {
61       for (; c >= 4 * sizeof(uint8_t); c -= 4 * sizeof(uint8_t)) {
62         *((uint32_t*) output) = *((const uint32_t*) input);
63         input = (const uint8_t*) input + 4;
64         output = (uint8_t*) output + 4;
65       }
66       if XNN_UNLIKELY(c != 0) {
67         uint32_t vdata = *((const uint32_t*) input);
68         input = (const void*) ((uintptr_t) input + c);
69 
70         if XNN_LIKELY(c & (2 * sizeof(uint8_t))) {
71           *((uint16_t*) output) = (uint16_t) vdata;
72           vdata >>= 16;
73           output = (uint8_t*) output + 2;
74         }
75         if XNN_LIKELY(c & (1 * sizeof(uint8_t))) {
76           *((uint8_t*) output) = (uint8_t) vdata;
77           output = (uint8_t*) output + 1;
78         }
79       }
80     }
81 
82     // Post-pad input channels.
83     size_t r = post_padding;
84     if XNN_LIKELY(r != 0) {
85       uint32_t vfill_pattern = fill_pattern;
86       for (; r >= 4 * sizeof(uint8_t); r -= 4 * sizeof(uint8_t)) {
87         *((uint32_t*) output) = vfill_pattern;
88         output = (uint8_t*) output + 4;
89       }
90       if XNN_LIKELY(r & (2 * sizeof(uint8_t))) {
91         *((uint16_t*) output) = (uint16_t) vfill_pattern;
92         vfill_pattern >>= 16;
93         output = (uint8_t*) output + 2;
94       }
95       if XNN_LIKELY(r & (1 * sizeof(uint8_t))) {
96         *((uint8_t*) output) = (uint8_t) vfill_pattern;
97         output = (uint8_t*) output + 1;
98       }
99     }
100 
101     input = (const uint32_t*) ((uintptr_t) input + input_increment);
102     output = (uint32_t*) ((uintptr_t) output + output_increment);
103   } while (--rows != 0);
104 }
105