• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  *
3  * Copyright (C) 2022 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19  */
20 /**
21  * *******************************************************************************
22  * * @file
23  *  isvc_mem_fns_av8.c
24  *
25  * @brief
26  *  armv8 variants of
27  * functions used for memory operations
28  *
29  * *******************************************************************************
30  */
31 #include <arm_neon.h>
32 #include <string.h>
33 
34 #include "ih264_typedefs.h"
35 #include "isvc_mem_fns.h"
36 
isvc_memset_2d_neon(UWORD8 * pu1_dst,WORD32 i4_dst_stride,UWORD8 u1_val,WORD32 i4_blk_wd,WORD32 i4_blk_ht)37 void isvc_memset_2d_neon(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32 i4_blk_wd,
38                          WORD32 i4_blk_ht)
39 {
40     if(i4_blk_wd == 4)
41     {
42         vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0);
43         pu1_dst += i4_dst_stride;
44 
45         vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0);
46         pu1_dst += i4_dst_stride;
47 
48         vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0);
49         pu1_dst += i4_dst_stride;
50 
51         vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0);
52     }
53     else if(i4_blk_wd == 8)
54     {
55         vst1_u8(pu1_dst, vdup_n_u8(u1_val));
56         pu1_dst += i4_dst_stride;
57 
58         vst1_u8(pu1_dst, vdup_n_u8(u1_val));
59         pu1_dst += i4_dst_stride;
60 
61         vst1_u8(pu1_dst, vdup_n_u8(u1_val));
62         pu1_dst += i4_dst_stride;
63 
64         vst1_u8(pu1_dst, vdup_n_u8(u1_val));
65         pu1_dst += i4_dst_stride;
66 
67         vst1_u8(pu1_dst, vdup_n_u8(u1_val));
68         pu1_dst += i4_dst_stride;
69 
70         vst1_u8(pu1_dst, vdup_n_u8(u1_val));
71         pu1_dst += i4_dst_stride;
72 
73         vst1_u8(pu1_dst, vdup_n_u8(u1_val));
74         pu1_dst += i4_dst_stride;
75 
76         vst1_u8(pu1_dst, vdup_n_u8(u1_val));
77     }
78     else if((i4_blk_wd % 16 == 0) && (i4_blk_ht % 16 == 0))
79     {
80         WORD32 i, j;
81         UWORD8 *pu1_dst_col_ptr, *pu1_dst_row_ptr;
82         WORD32 i4_width_by_16 = i4_blk_wd / 16;
83         WORD32 i4_height_by_16 = i4_blk_ht / 16;
84 
85         for(i = 0; i < i4_height_by_16; i++)
86         {
87             pu1_dst_row_ptr = pu1_dst + i * 16 * i4_dst_stride;
88             for(j = 0; j < i4_width_by_16; j++)
89             {
90                 pu1_dst_col_ptr = pu1_dst_row_ptr + (j << 4);
91 
92                 vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
93                 pu1_dst_col_ptr += i4_dst_stride;
94 
95                 vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
96                 pu1_dst_col_ptr += i4_dst_stride;
97 
98                 vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
99                 pu1_dst_col_ptr += i4_dst_stride;
100 
101                 vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
102                 pu1_dst_col_ptr += i4_dst_stride;
103 
104                 vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
105                 pu1_dst_col_ptr += i4_dst_stride;
106 
107                 vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
108                 pu1_dst_col_ptr += i4_dst_stride;
109 
110                 vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
111                 pu1_dst_col_ptr += i4_dst_stride;
112 
113                 vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
114                 pu1_dst_col_ptr += i4_dst_stride;
115 
116                 vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
117                 pu1_dst_col_ptr += i4_dst_stride;
118 
119                 vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
120                 pu1_dst_col_ptr += i4_dst_stride;
121 
122                 vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
123                 pu1_dst_col_ptr += i4_dst_stride;
124 
125                 vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
126                 pu1_dst_col_ptr += i4_dst_stride;
127 
128                 vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
129                 pu1_dst_col_ptr += i4_dst_stride;
130 
131                 vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
132                 pu1_dst_col_ptr += i4_dst_stride;
133 
134                 vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
135                 pu1_dst_col_ptr += i4_dst_stride;
136 
137                 vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
138             }
139         }
140     }
141     else
142     {
143         WORD32 i;
144 
145         for(i = 0; i < i4_blk_ht; i++)
146         {
147             memset(pu1_dst, u1_val, i4_blk_wd);
148             pu1_dst += i4_dst_stride;
149         }
150     }
151 }
152