/external/libvpx/vp8/encoder/arm/neon/ |
D | variance_neon.asm | 29 vmov.i8 q8, #0 ;q8 - sum 49 vpadal.s16 q8, q11 ;calculate sum 55 vpadal.s16 q8, q12 58 vpadal.s16 q8, q13 61 vpadal.s16 q8, q14 68 vpaddl.s32 q0, q8 ;accumulate sum 102 vmov.i8 q8, #0 ;q8 - sum 119 vpadal.s16 q8, q11 ;calculate sum 125 vpadal.s16 q8, q12 128 vpadal.s16 q8, q13 [all …]
|
D | vp8_subpixelvariance16x16s_neon.asm | 37 vmov.i8 q8, #0 ;q8 - sum 75 vpadal.s16 q8, q4 ;sum 81 vpadal.s16 q8, q5 84 vpadal.s16 q8, q6 87 vpadal.s16 q8, q7 91 vpadal.s16 q8, q0 ;sum 94 vpadal.s16 q8, q1 97 vpadal.s16 q8, q2 100 vpadal.s16 q8, q3 107 vpaddl.s32 q0, q8 ;accumulate sum [all …]
|
D | vp8_mse16x16_neon.asm | 31 vmov.i8 q7, #0 ;q7, q8, q9, q10 - sse 32 vmov.i8 q8, #0 50 vmlal.s16 q8, d23, d23 57 vmlal.s16 q8, d27, d27 63 vadd.u32 q7, q7, q8 85 vmov.i8 q8, #0 ;q8 - sum 102 vpadal.s16 q8, q11 108 vpadal.s16 q8, q12 111 vpadal.s16 q8, q13 114 vpadal.s16 q8, q14 [all …]
|
D | vp8_subpixelvariance8x8_neon.asm | 51 vmull.u8 q8, d6, d0 61 vmlal.u8 q8, d7, d1 69 vqrshrn.u16 d24, q8, #7 78 vmull.u8 q8, d6, d0 90 vmlal.u8 q8, d7, d1 96 vqrshrn.u16 d28, q8, #7 120 vmull.u8 q8, d29, d0 129 vmlal.u8 q8, d30, d1 138 vqrshrn.u16 d29, q8, #7 159 vmov.i8 q8, #0 ;q8 - sum [all …]
|
D | fastfdct4x4_neon.asm | 56 vqdmulh.s16 q8, q4, d0[0] 60 vshr.s16 q8, q8, #1 62 …vadd.s16 q8, q4, q8 ;d16:temp2 = ((c1 * x_c1)>>16) + c1; d17:temp2 = ((d1 * x… 86 vqdmulh.s16 q8, q4, d0[0] 90 vshr.s16 q8, q8, #1 92 …vadd.s16 q8, q4, q8 ;d16:temp2 = ((c1 * x_c1)>>16) + c1; d17:temp2 = ((d1 * x…
|
D | vp8_subpixelvariance16x16_neon.asm | 66 vmull.u8 q8, d3, d0 89 vmlal.u8 q8, d3, d1 ;(src_ptr[0] * Filter[1]) 97 vqrshrn.u16 d15, q8, #7 202 vmull.u8 q8, d29, d0 211 vmlal.u8 q8, d31, d1 222 vqrshrn.u16 d9, q8, #7 254 vmull.u8 q8, d3, d0 277 vmlal.u8 q8, d3, d1 ;(src_ptr[0] * Filter[1]) 285 vqrshrn.u16 d15, q8, #7 329 vmull.u8 q8, d29, d0 [all …]
|
D | fastquantizeb_neon.asm | 56 vld1.s16 {q8, q9}, [r12] ;load quant_ptr [0-15] 63 vqdmulh.s16 q4, q8 ;y = ((Round + abs(z)) * Quant) >> 16 67 vceq.s16 q8, q8 ;set q8 to all 1
|
D | subtract_neon.asm | 65 vsubl.u8 q8, d0, d2 74 vst1.16 {q8}, [r0]! ;store diff 115 vsubl.u8 q8, d0, d1 124 vst1.16 {q8}, [r0]! ;store diff 151 vsubl.u8 q8, d0, d1 160 vst1.16 {q8}, [r0]! ;store diff
|
D | fastfdct8x4_neon.asm | 69 vqdmulh.s16 q8, q7, d0[1] 76 vshr.s16 q8, q8, #1 120 vqdmulh.s16 q8, q7, d0[1] 127 vshr.s16 q8, q8, #1 148 vclt.s16 q8, q4, #0 153 vsub.s16 q4, q4, q8
|
/external/libvpx/vp8/common/arm/neon/ |
D | sixtappredict8x4_neon.asm | 69 vmull.u8 q8, d8, d0 79 vmlsl.u8 q8, d29, d1 89 vmlsl.u8 q8, d29, d4 99 vmlal.u8 q8, d29, d2 109 vmlal.u8 q8, d29, d5 124 vqadd.s16 q8, q4 131 vqrshrun.s16 d23, q8, #7 145 vmull.u8 q8, d6, d0 ;(src_ptr[-2] * vp8_filter[0]) 157 vmlsl.u8 q8, d27, d1 ;-(src_ptr[-1] * vp8_filter[1]) 169 vmlsl.u8 q8, d27, d4 ;-(src_ptr[2] * vp8_filter[4]) [all …]
|
D | sixtappredict8x8_neon.asm | 73 vmull.u8 q8, d8, d0 83 vmlsl.u8 q8, d29, d1 93 vmlsl.u8 q8, d29, d4 103 vmlal.u8 q8, d29, d2 113 vmlal.u8 q8, d29, d5 130 vqadd.s16 q8, q4 137 vqrshrun.s16 d23, q8, #7 158 vmull.u8 q8, d6, d0 ;(src_ptr[-2] * vp8_filter[0]) 170 vmlsl.u8 q8, d27, d1 ;-(src_ptr[-1] * vp8_filter[1]) 182 vmlsl.u8 q8, d27, d4 ;-(src_ptr[2] * vp8_filter[4]) [all …]
|
D | sixtappredict4x4_neon.asm | 75 vmull.u8 q8, d20, d5 85 vmlal.u8 q8, d10, d0 92 vmlsl.u8 q8, d20, d1 99 vmlsl.u8 q8, d10, d4 106 vmlal.u8 q8, d20, d2 117 vqadd.s16 q8, q10 123 vqrshrun.s16 d28, q8, #7 140 vmull.u8 q8, d20, d5 152 vmlal.u8 q8, d10, d0 162 vmlsl.u8 q8, d20, d1 [all …]
|
D | mbloopfilter_neon.asm | 48 vld1.u8 {q8}, [r0], r1 ; q1 64 vst1.u8 {q8}, [r12], r1 ; store oq1 169 vtrn.32 q4, q8 176 vtrn.16 q8, q10 180 vtrn.8 q7, q8 202 vtrn.32 q4, q8 210 vtrn.16 q8, q10 214 vtrn.8 q7, q8 283 vtrn.32 q4, q8 290 vtrn.16 q8, q10 [all …]
|
D | sixtappredict16x16_neon.asm | 78 vmull.u8 q8, d6, d0 ;(src_ptr[-2] * vp8_filter[0]) 89 vmlsl.u8 q8, d28, d1 ;-(src_ptr[-1] * vp8_filter[1]) 105 vmlsl.u8 q8, d28, d4 ;-(src_ptr[2] * vp8_filter[4]) 121 vmlal.u8 q8, d28, d5 ;(src_ptr[3] * vp8_filter[5]) 137 vmlal.u8 q8, d28, d2 ;(src_ptr[0] * vp8_filter[2]) 161 vqadd.s16 q8, q4 ;sum of all (src_data*filter_parameters) 175 vqrshrun.s16 d6, q8, #7 ;shift/round/saturate to u8 196 vabs.s32 q8, q6 247 vmull.u8 q8, d22, d3 254 vqadd.s16 q8, q4 [all …]
|
D | loopfilter_neon.asm | 48 vld1.u8 {q8}, [r2], r1 ; q1 59 vst1.u8 {q8}, [r0], r1 ; store oq1 163 vtrn.32 q4, q8 170 vtrn.16 q8, q10 174 vtrn.8 q7, q8 247 vtrn.32 q4, q8 254 vtrn.16 q8, q10 258 vtrn.8 q7, q8 307 ; q8 q1 317 vabd.u8 q14, q8, q7 ; abs(q1 - q0) [all …]
|
D | bilinearpredict8x8_neon.asm | 48 vmull.u8 q8, d6, d0 58 vmlal.u8 q8, d7, d1 66 vqrshrn.u16 d24, q8, #7 75 vmull.u8 q8, d6, d0 87 vmlal.u8 q8, d7, d1 93 vqrshrn.u16 d28, q8, #7 118 vmull.u8 q8, d29, d0 127 vmlal.u8 q8, d30, d1 136 vqrshrn.u16 d9, q8, #7
|
D | bilinearpredict16x16_neon.asm | 63 vmull.u8 q8, d3, d0 86 vmlal.u8 q8, d3, d1 ;(src_ptr[0] * vp8_filter[1]) 94 vqrshrn.u16 d15, q8, #7 196 vmull.u8 q8, d29, d0 205 vmlal.u8 q8, d31, d1 216 vqrshrn.u16 d9, q8, #7 248 vmull.u8 q8, d3, d0 271 vmlal.u8 q8, d3, d1 ;(src_ptr[0] * vp8_filter[1]) 279 vqrshrn.u16 d15, q8, #7 320 vmull.u8 q8, d29, d0 [all …]
|
D | loopfiltersimplehorizontaledge_neon.asm | 39 vld1.u8 {q8}, [r0] ; q1 43 vabd.u8 q14, q5, q8 ; abs(p1 - q1) 52 veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value 63 vqsub.s8 q4, q5, q8 ; q4: vp8_filter = vp8_signed_char_clamp(ps1-qs1)
|
D | recon16x16mb_neon.asm | 31 vld1.16 {q8, q9}, [r1]! ;load data from diff_ptr 50 vadd.s16 q0, q0, q8 ;add Diff data and Pred data together 82 vld1.16 {q8, q9}, [r1]! ;load data from diff_ptr 97 vadd.s16 q0, q0, q8 ;add Diff data and Pred data together
|
/external/libvpx/vp8/decoder/arm/neon/ |
D | idct_dequant_dc_full_2x_neon.asm | 69 ; q8: 4 * cospi 73 vqdmulh.s16 q8, q4, d0[0] ; cospi8sqrt2minus1 87 vshr.s16 q8, q8, #1 92 vqadd.s16 q4, q4, q8 120 ; q8: 1 * sinpi : c1/temp1 124 vqdmulh.s16 q8, q5, d0[2] ; sinpi8sqrt2 141 ; q8: c1 = temp1 - temp2 143 vqsub.s16 q8, q8, q11 151 vqadd.s16 q5, q3, q8 152 vqsub.s16 q6, q3, q8
|
D | idct_dequant_full_2x_neon.asm | 61 ; q8: 4 * cospi 65 vqdmulh.s16 q8, q4, d0[0] ; cospi8sqrt2minus1 79 vshr.s16 q8, q8, #1 84 vqadd.s16 q4, q4, q8 112 ; q8: 1 * sinpi : c1/temp1 116 vqdmulh.s16 q8, q5, d0[2] ; sinpi8sqrt2 133 ; q8: c1 = temp1 - temp2 135 vqsub.s16 q8, q8, q11 143 vqadd.s16 q5, q3, q8 144 vqsub.s16 q6, q3, q8
|
/external/libvpx/vpx_scale/arm/neon/ |
D | vp8_vpxyv12_copyframeyonly_neon.asm | 49 vld1.8 {q8, q9}, [r10]! 60 vst1.8 {q8, q9}, [r11]! 123 vmov q9, q8 134 vst1.8 {q8, q9}, [r5], lr 154 vld1.8 {q8, q9}, [r2]! 168 vst1.8 {q8, q9}, [r6]! 264 vst1.8 {q8}, [r5], lr 284 vld1.8 {q8, q9}, [r2]! 298 vst1.8 {q8, q9}, [r6]! 426 vld1.8 {q8, q9}, [r10]! [all …]
|
D | vp8_vpxyv12_extendframeborders_neon.asm | 65 vmov q9, q8 76 vst1.8 {q8, q9}, [r5], lr 96 vld1.8 {q8, q9}, [r2]! 110 vst1.8 {q8, q9}, [r6]! 185 vst1.8 {q8}, [r5], lr 206 vld1.8 {q8, q9}, [r2]! 216 vst1.8 {q8, q9}, [r6]! 347 vst1.8 {q8}, [r5], lr 367 vld1.8 {q8, q9}, [r2]! 381 vst1.8 {q8, q9}, [r6]! [all …]
|
D | vp8_vpxyv12_copyframe_func_neon.asm | 59 vld1.8 {q8, q9}, [r10]! 70 vst1.8 {q8, q9}, [r11]! 117 vld1.8 {q8, q9}, [r10]! 124 vst1.8 {q8, q9}, [r11]!
|
D | vp8_vpxyv12_copysrcframe_func_neon.asm | 56 vld1.8 {q8, q9}, [r2]! 66 vst1.8 {q8, q9}, [r3]! 111 vld1.8 {q8, q9}, [r2]! 117 vst1.8 {q8, q9}, [r3]!
|