• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <assert.h>
7 
8 #include <xmmintrin.h>
9 
10 #include <xnnpack/fill.h>
11 
12 
xnn_x32_fill_ukernel__sse(size_t rows,size_t channels,uint32_t * output,size_t output_stride,const uint32_t * fill_value)13 void xnn_x32_fill_ukernel__sse(
14     size_t rows,
15     size_t channels,
16     uint32_t* output,
17     size_t output_stride,
18     const uint32_t* fill_value)
19 {
20   assert(rows != 0);
21   assert(channels != 0);
22   assert(channels % sizeof(uint32_t) == 0);
23   assert(fill_value != NULL);
24 
25   const size_t output_increment = output_stride - channels;
26 
27   const __m128 vfill = _mm_load1_ps((const float*) fill_value);
28   float* o = (float*) output;
29   do {
30     size_t c = channels;
31     for (; c >= 16 * sizeof(uint32_t); c -= 16 * sizeof(uint32_t)) {
32       _mm_storeu_ps(o, vfill);
33       _mm_storeu_ps(o + 4, vfill);
34       _mm_storeu_ps(o + 8, vfill);
35       _mm_storeu_ps(o + 12, vfill);
36       o += 16;
37     }
38     for (; c >= 4 * sizeof(uint32_t); c -= 4 * sizeof(uint32_t)) {
39       _mm_storeu_ps(o, vfill);
40       o += 4;
41     }
42     if XNN_UNLIKELY(c != 0) {
43       if XNN_LIKELY(c & (2 * sizeof(uint32_t))) {
44         _mm_storel_pi((__m64*) o, vfill);
45         o += 2;
46       }
47       if XNN_LIKELY(c & (1 * sizeof(uint32_t))) {
48         _mm_store_ss(o, vfill);
49         o += 1;
50       }
51     }
52     o = (void*) ((uintptr_t) o + output_increment);
53   } while (--rows != 0);
54 }
55