1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 *******************************************************************************
22 * @file
23 * ih264_mem_fns_atom_intr.c
24 *
25 * @brief
26 * Functions used for memory operations
27 *
28 * @author
29 * Ittiam
30 *
31 * @par List of Functions:
32 *
33 * @remarks
34 * None
35 *
36 *******************************************************************************
37 */
38
39 /*****************************************************************************/
40 /* File Includes */
41 /*****************************************************************************/
42 #include <stdio.h>
43 #include <stddef.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <assert.h>
47
48 #include "ih264_typedefs.h"
49 #include "ih264_mem_fns.h"
50
51 #include <immintrin.h>
52
53 /**
54 *******************************************************************************
55 *
56 * @brief
57 * memcpy of a 8,16 or 32 bytes
58 *
59 * @par Description:
60 * Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
61 *
62 * @param[in] pu1_dst
63 * UWORD8 pointer to the destination
64 *
65 * @param[in] pu1_src
66 * UWORD8 pointer to the source
67 *
68 * @param[in] num_bytes
69 * number of bytes to copy
70 * @returns
71 *
72 * @remarks
73 * None
74 *
75 *******************************************************************************
76 */
77
78
79
80
ih264_memcpy_mul_8_ssse3(UWORD8 * pu1_dst,UWORD8 * pu1_src,UWORD32 num_bytes)81 void ih264_memcpy_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes)
82 {
83 int col;
84 for(col = num_bytes; col >= 8; col -= 8)
85 {
86 __m128i src_temp16x8b;
87 src_temp16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
88 pu1_src += 8;
89 _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b);
90 pu1_dst += 8;
91 }
92 }
93
94 /**
95 *******************************************************************************
96 *
97 * @brief
98 * memset of a 8,16 or 32 bytes
99 *
100 * @par Description:
101 * Does memset of 8bit data for 8,16 or 32 number of bytes
102 *
103 * @param[in] pu1_dst
104 * UWORD8 pointer to the destination
105 *
106 * @param[in] value
107 * UWORD8 value used for memset
108 *
109 * @param[in] num_bytes
110 * number of bytes to set
111 * @returns
112 *
113 * @remarks
114 * None
115 *
116 *******************************************************************************
117 */
118
119
ih264_memset_mul_8_ssse3(UWORD8 * pu1_dst,UWORD8 value,UWORD32 num_bytes)120 void ih264_memset_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes)
121 {
122 int col;
123 __m128i src_temp16x8b;
124 src_temp16x8b = _mm_set1_epi8(value);
125 for(col = num_bytes; col >= 8; col -= 8)
126 {
127 _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b);
128 pu1_dst += 8;
129 }
130 }
131
132 /**
133 *******************************************************************************
134 *
135 * @brief
136 * memset of 16bit data of a 8,16 or 32 bytes
137 *
138 * @par Description:
139 * Does memset of 16bit data for 8,16 or 32 number of bytes
140 *
141 * @param[in] pu2_dst
142 * UWORD8 pointer to the destination
143 *
144 * @param[in] value
145 * UWORD16 value used for memset
146 *
147 * @param[in] num_words
148 * number of words to set
149 * @returns
150 *
151 * @remarks
152 * None
153 *
154 *******************************************************************************
155 */
156
157
ih264_memset_16bit_mul_8_ssse3(UWORD16 * pu2_dst,UWORD16 value,UWORD32 num_words)158 void ih264_memset_16bit_mul_8_ssse3(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words)
159 {
160 int col;
161 __m128i src_temp16x8b;
162 src_temp16x8b = _mm_set1_epi16(value);
163 for(col = num_words; col >= 8; col -= 8)
164 {
165 _mm_storeu_si128((__m128i *)(pu2_dst), src_temp16x8b);
166 pu2_dst += 8;
167 }
168 }
169
170