Lines Matching refs:s
105 dup v14.4s, w4 // populate the u4_qp_div_6
109 saddl v4.4s, v0.4h, v3.4h //x0 = x4 + x7
110 saddl v5.4s, v1.4h, v2.4h //x1 = x5 + x6
111 ssubl v6.4s, v1.4h, v2.4h //x2 = x5 - x6
112 ssubl v7.4s, v0.4h, v3.4h //x3 = x4 - x7
114 add v0.4s, v4.4s, v5.4s //pi4_tmp_ptr[0] = x0 + x1
115 add v1.4s, v7.4s, v6.4s //pi4_tmp_ptr[1] = x3 + x2
116 sub v2.4s, v4.4s, v5.4s //pi4_tmp_ptr[2] = x0 - x1
117 sub v3.4s, v7.4s, v6.4s //pi4_tmp_ptr[3] = x3 - x2
119 umull v15.4s, v15.4h, v16.4h
120 dup v15.4s, v15.s[0] //pu2_weigh_mat[0]*pu2_iscal_mat[0]
123 trn1 v4.4s, v0.4s, v1.4s
124 trn2 v5.4s, v0.4s, v1.4s
125 trn1 v6.4s, v2.4s, v3.4s
126 trn2 v7.4s, v2.4s, v3.4s
134 add v4.4s, v0.4s, v3.4s //x0 = x4+x7
135 add v5.4s, v1.4s, v2.4s //x1 = x5+x6
136 sub v6.4s, v1.4s, v2.4s //x2 = x5-x6
137 sub v7.4s, v0.4s, v3.4s //x3 = x4-x7
139 add v0.4s, v4.4s, v5.4s //pi4_tmp_ptr[0] = x0 + x1
140 add v1.4s, v7.4s, v6.4s //pi4_tmp_ptr[1] = x3 + x2
141 sub v2.4s, v4.4s, v5.4s //pi4_tmp_ptr[2] = x0 - x1
142 sub v3.4s, v7.4s, v6.4s //pi4_tmp_ptr[3] = x3 - x2
144 mul v0.4s, v0.4s, v15.4s // q0 = p[i] = (x[i] * trns_coeff[i]) where i = 0..3
145 mul v1.4s, v1.4s, v15.4s // q1 = p[i] = (x[i] * trns_coeff[i]) where i = 4..7
146 mul v2.4s, v2.4s, v15.4s // q2 = p[i] = (x[i] * trns_coeff[i]) where i = 8..11
147 mul v3.4s, v3.4s, v15.4s // q3 = p[i] = (x[i] * trns_coeff[i]) where i = 12..15
149 sshl v0.4s, v0.4s, v14.4s // q0 = q[i] = (p[i] << (qp/6)) where i = 0..3
150 sshl v1.4s, v1.4s, v14.4s // q1 = q[i] = (p[i] << (qp/6)) where i = 4..7
151 sshl v2.4s, v2.4s, v14.4s // q2 = q[i] = (p[i] << (qp/6)) where i = 8..11
152 sshl v3.4s, v3.4s, v14.4s // q3 = q[i] = (p[i] << (qp/6)) where i = 12..15
154 sqrshrn v0.4h, v0.4s, #6 // d0 = c[i] = ((q[i] + 32) >> 4) where i = 0..3
155 sqrshrn v1.4h, v1.4s, #6 // d1 = c[i] = ((q[i] + 32) >> 4) where i = 4..7
156 sqrshrn v2.4h, v2.4s, #6 // d2 = c[i] = ((q[i] + 32) >> 4) where i = 8..11
157 sqrshrn v3.4h, v3.4s, #6 // d3 = c[i] = ((q[i] + 32) >> 4) where i = 12..15
218 dup v28.4s, w4 //load qp/6
224 saddl v2.4s, v0.4h, v1.4h //i4_x0 = i4_x4 + i4_x5;...x2
225 ssubl v4.4s, v0.4h, v1.4h //i4_x1 = i4_x4 - i4_x5;...x3
227 umull v30.4s, v26.4h, v27.4h //pu2_iscal_mat[0]*pu2_weigh_mat[0]
228 dup v30.4s, v30.s[0]
230 trn1 v0.4s, v2.4s, v4.4s
231 trn2 v1.4s, v2.4s, v4.4s //i4_x0 i4_x1 -> q1
233 add v2.4s, v0.4s, v1.4s //i4_x4 = i4_x0+i4_x2;.. i4_x5
234 sub v3.4s, v0.4s, v1.4s //i4_x6 = i4_x0-i4_x2;.. i4_x7
236 mul v2.4s, v2.4s, v30.4s
237 mul v3.4s, v3.4s, v30.4s
239 sshl v2.4s, v2.4s, v28.4s
240 sshl v3.4s, v3.4s, v28.4s
242 xtn v0.4h, v2.4s //i4_x4 i4_x5 i4_y4 i4_y5
243 xtn v1.4h, v3.4s //i4_x6 i4_x7 i4_y6 i4_y7
245 st2 {v0.4s-v1.4s}, [x1]