/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/neon/ |
D | vp8_subpixelvariance16x16_neon.asm | 59 vdup.8 d0, d31[0] ;first_pass filter (d0 d1) 70 vmull.u8 q7, d2, d0 ;(src_ptr[0] * Filter[0]) 71 vmull.u8 q8, d3, d0 72 vmull.u8 q9, d5, d0 73 vmull.u8 q10, d6, d0 74 vmull.u8 q11, d8, d0 75 vmull.u8 q12, d9, d0 76 vmull.u8 q13, d11, d0 77 vmull.u8 q14, d12, d0 123 vmull.u8 q9, d2, d0 ;(src_ptr[0] * Filter[0]) [all …]
|
D | sad8_neon.asm | 28 vld1.8 {d0}, [r0], r1 34 vabdl.u8 q12, d0, d8 46 vld1.8 {d0}, [r0], r1 54 vabal.u8 q12, d0, d8 69 vadd.u32 d0, d0, d1 71 vmov.32 r0, d0[0] 85 vld1.8 {d0}, [r0], r1 91 vabdl.u8 q12, d0, d8 103 vld1.8 {d0}, [r0], r1 111 vabal.u8 q12, d0, d8 [all …]
|
D | variance_neon.asm | 41 vsubl.u8 q11, d0, d4 ;calculate diff 73 vadd.s64 d0, d0, d1 76 ;vmov.32 r0, d0[0] ;this instruction costs a lot 84 vmull.s32 q5, d0, d0 87 vsub.u32 d0, d1, d10 89 vmov.32 r0, d0[0] ;return 114 vsubl.u8 q11, d0, d4 ;calculate diff 143 vadd.s64 d0, d0, d1 146 vmull.s32 q5, d0, d0 149 vsub.u32 d0, d1, d10 [all …]
|
D | vp8_subpixelvariance8x8_neon.asm | 44 vdup.8 d0, d31[0] ;first_pass filter (d0 d1) 49 vmull.u8 q6, d2, d0 ;(src_ptr[0] * Filter[0]) 50 vmull.u8 q7, d4, d0 51 vmull.u8 q8, d6, d0 52 vmull.u8 q9, d8, d0 76 vmull.u8 q6, d2, d0 ;(src_ptr[0] * Filter[0]) 77 vmull.u8 q7, d4, d0 78 vmull.u8 q8, d6, d0 79 vmull.u8 q9, d8, d0 80 vmull.u8 q10, d10, d0 [all …]
|
D | iwalsh_neon.asm | 21 ; read in all four lines of values: d0->d3 25 vadd.s16 d4, d0, d3 ;a = [0] + [12] 27 vsub.s16 d5, d0, d3 ;d = [0] - [12] 33 vtrn.32 d0, d2 ;d0: 0 1 8 9 38 vtrn.16 d0, d1 ;d0: 0 4 8 12 45 vadd.s16 d4, d0, d3 ;a = [0] + [3] 47 vsub.s16 d5, d0, d3 ;d = [0] - [3] 64 vst1.i16 d0[0], [r1],r2 69 vst1.i16 d0[1], [r1],r2 74 vst1.i16 d0[2], [r1],r2 [all …]
|
D | vp8_subpixelvariance16x16s_neon.asm | 43 vld1.u8 {d0, d1, d2, d3}, [r0], r1 ;load src data 66 vsubl.u8 q4, d0, d22 ;diff 92 vmlal.s16 q9, d0, d0 ;sse 110 vadd.s64 d0, d0, d1 113 vmull.s32 q5, d0, d0 116 vsub.u32 d0, d1, d10 118 vmov.32 r0, d0[0] ;return 159 vsubl.u8 q11, d0, d2 ;diff 185 vmlal.s16 q9, d0, d0 ;sse 206 vadd.s64 d0, d0, d1 [all …]
|
D | sad16_neon.asm | 33 vabdl.u8 q12, d0, d8 58 vabal.u8 q12, d0, d8 83 vabal.u8 q12, d0, d8 108 vabal.u8 q12, d0, d8 131 vadd.u32 d0, d0, d1 133 vmov.32 r0, d0[0] 152 vabdl.u8 q12, d0, d8 176 vabal.u8 q12, d0, d8 199 vadd.u32 d0, d0, d1 201 vmov.32 r0, d0[0]
|
D | sixtappredict8x4_neon.asm | 62 vdup.8 d0, d24[0] ;first_pass filter (d0-d5) 79 vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp8_filter[0]) 80 vmull.u8 q8, d8, d0 81 vmull.u8 q9, d10, d0 82 vmull.u8 q10, d12, d0 156 vmull.u8 q8, d6, d0 ;(src_ptr[-2] * vp8_filter[0]) 157 vmull.u8 q9, d8, d0 158 vmull.u8 q10, d10, d0 159 vmull.u8 q11, d12, d0 160 vmull.u8 q12, d14, d0 [all …]
|
D | idct_dequant_full_2x_neon.asm | 49 vld1.16 {d0}, [r1] 61 vqdmulh.s16 q6, q4, d0[2] ; sinpi8sqrt2 62 vqdmulh.s16 q7, q5, d0[2] 63 vqdmulh.s16 q8, q4, d0[0] ; cospi8sqrt2minus1 64 vqdmulh.s16 q9, q5, d0[0] 114 vqdmulh.s16 q8, q5, d0[2] ; sinpi8sqrt2 115 vqdmulh.s16 q9, q7, d0[2] 116 vqdmulh.s16 q10, q5, d0[0] ; cospi8sqrt2minus1 117 vqdmulh.s16 q11, q7, d0[0] 173 vqmovun.s16 d0, q4 ; lo [all …]
|
D | sixtappredict8x8_neon.asm | 65 vdup.8 d0, d24[0] ;first_pass filter (d0-d5) 83 vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp8_filter[0]) 84 vmull.u8 q8, d8, d0 85 vmull.u8 q9, d10, d0 86 vmull.u8 q10, d12, d0 169 vmull.u8 q8, d6, d0 ;(src_ptr[-2] * vp8_filter[0]) 170 vmull.u8 q9, d8, d0 171 vmull.u8 q10, d10, d0 172 vmull.u8 q11, d12, d0 173 vmull.u8 q12, d14, d0 [all …]
|
D | sixtappredict16x16_neon.asm | 72 vdup.8 d0, d24[0] ;first_pass filter (d0-d5) 89 vmull.u8 q8, d6, d0 ;(src_ptr[-2] * vp8_filter[0]) 90 vmull.u8 q9, d7, d0 91 vmull.u8 q10, d9, d0 92 vmull.u8 q11, d10, d0 93 vmull.u8 q12, d12, d0 94 vmull.u8 q13, d13, d0 211 vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5) 232 vmull.u8 q3, d18, d0 ;(src_ptr[-2] * vp8_filter[0]) 233 vmull.u8 q4, d19, d0 [all …]
|
D | sixtappredict4x4_neon.asm | 61 vdup.8 d0, d24[0] ;first_pass filter (d0-d5) 95 vmlal.u8 q7, d6, d0 ;+(src_ptr[-2] * vp8_filter[0]) 96 vmlal.u8 q8, d10, d0 162 vmlal.u8 q7, d6, d0 ;+(src_ptr[-2] * vp8_filter[0]) 163 vmlal.u8 q8, d10, d0 164 vmlal.u8 q12, d22, d0 ;(src_ptr[-2] * vp8_filter[0]) 224 vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5) 231 vmull.u8 q3, d27, d0 ;(src_ptr[-2] * vp8_filter[0]) 232 vmull.u8 q4, d28, d0 276 vdup.8 d0, d24[0] ;first_pass filter (d0-d5) [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/neon/ |
D | vp8_shortwalsh4x4_neon.asm | 25 vld1.16 {d0}, [r0@64], r2 ; load input 31 ;transpose d0, d1, d2, d3. Then, d0=ip[0], d1=ip[1], d2=ip[2], d3=ip[3] 32 vtrn.32 d0, d2 37 vtrn.16 d0, d1 40 vadd.s16 d4, d0, d2 ; ip[0] + ip[2] 43 vsub.s16 d7, d0, d2 ; ip[0] - ip[2] 51 vadd.s16 d0, d4, d5 ; a1 + d1 56 vsub.s16 d0, d0, d16 ; op[0] = a1 + d1 + (a1 != 0) 59 ;transpose d0, d1, d2, d3, Then, d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12] 61 vtrn.32 d0, d2 [all …]
|
D | shortfdct_neon.asm | 35 vld1.16 {d0}, [r0@64], r2 43 ; transpose d0=ip[0], d1=ip[1], d2=ip[2], d3=ip[3] 44 vtrn.32 d0, d2 47 vtrn.16 d0, d1 50 vadd.s16 d4, d0, d3 ; a1 = ip[0] + ip[3] 53 vsub.s16 d7, d0, d3 ; d1 = ip[0] - ip[3] 58 vadd.s16 d0, d4, d5 ; op[0] = a1 + b1 72 ; transpose d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12] 73 vtrn.32 d0, d2 75 vtrn.16 d0, d1 [all …]
|
D | vp8_mse16x16_neon.asm | 43 vsubl.u8 q11, d0, d4 69 vadd.u64 d0, d2, d3 71 vst1.32 {d0[0]}, [r12] 72 vmov.32 r0, d0[0] 85 vld1.8 {d0}, [r0], r1 ;Load up source and reference 94 vsubl.u8 q11, d0, d4 109 vadd.u64 d0, d2, d3 111 vmov.32 r0, d0[0]
|
D | fastquantizeb_neon.asm | 133 vmax.u16 d0, d0, d1 135 vmovl.u16 q0, d0 138 vmax.u32 d0, d0, d1 140 vpmax.u32 d0, d0, d0 146 vst1.8 {d0[0]}, [r4] ; store eob
|
D | subtract_neon.asm | 36 vld1.8 {d0}, [r3], r6 ;load src 45 vsubl.u8 q10, d0, d1 83 vsubl.u8 q8, d0, d2 123 vld1.8 {d0}, [r1], r3 ;load usrc 140 vsubl.u8 q8, d0, d1 159 vld1.8 {d0}, [r2], r3 ;load vsrc 176 vsubl.u8 q8, d0, d1
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/arm/neon/ |
D | vp9_reconintra_neon.asm | 38 vld1.32 {d0[0]}, [r2] 39 vst1.32 {d0[0]}, [r0], r1 40 vst1.32 {d0[0]}, [r0], r1 41 vst1.32 {d0[0]}, [r0], r1 42 vst1.32 {d0[0]}, [r0], r1 55 vld1.8 {d0}, [r2] 56 vst1.8 {d0}, [r0], r1 57 vst1.8 {d0}, [r0], r1 58 vst1.8 {d0}, [r0], r1 59 vst1.8 {d0}, [r0], r1 [all …]
|
D | vp9_avg_neon.asm | 88 vld1.8 {d0}, [r0], r1 97 vst1.8 {d0}, [r2@64], r3 104 vld1.32 {d0[0]}, [r0], r1 105 vld1.32 {d0[1]}, [r0], r1 108 vrhadd.u8 d0, d0, d2 109 vst1.32 {d0[0]}, [r2@32], r3 110 vst1.32 {d0[1]}, [r2@32], r3
|
D | vp9_idct8x8_add_neon.asm | 26 vdup.16 d0, r3 ; duplicate cospi_28_64 32 vmull.s16 q2, d18, d0 33 vmull.s16 q3, d19, d0 64 vmlal.s16 q2, d30, d0 65 vmlal.s16 q3, d31, d0 76 vdup.16 d0, r7 ; duplicate cospi_16_64 83 vmull.s16 q2, d16, d0 84 vmull.s16 q3, d17, d0 87 vmull.s16 q13, d16, d0 88 vmull.s16 q15, d17, d0 [all …]
|
D | vp9_iht8x8_add_neon.asm | 125 vdup.16 d0, r3 ; duplicate cospi_28_64 131 vmull.s16 q2, d18, d0 132 vmull.s16 q3, d19, d0 163 vmlal.s16 q2, d30, d0 164 vmlal.s16 q3, d31, d0 175 vdup.16 d0, r7 ; duplicate cospi_16_64 182 vmull.s16 q2, d16, d0 183 vmull.s16 q3, d17, d0 186 vmull.s16 q13, d16, d0 187 vmull.s16 q15, d17, d0 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/ |
D | reconinter.c | 285 BLOCKD *d0 = &x->block[i]; in vp8_build_inter4x4_predictors_mbuv() local 288 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) in vp8_build_inter4x4_predictors_mbuv() 289 build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride); in vp8_build_inter4x4_predictors_mbuv() 292 vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict); in vp8_build_inter4x4_predictors_mbuv() 300 BLOCKD *d0 = &x->block[i]; in vp8_build_inter4x4_predictors_mbuv() local 303 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) in vp8_build_inter4x4_predictors_mbuv() 304 build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride); in vp8_build_inter4x4_predictors_mbuv() 307 vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict); in vp8_build_inter4x4_predictors_mbuv() 471 BLOCKD *d0 = &x->block[i]; in build_inter4x4_predictors_mb() local 483 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) in build_inter4x4_predictors_mb() [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/armv6/ |
D | vp8_subtract_armv6.asm | 52 usub16 r4, r4, r5 ; [d2 | d0] 57 pkhbt r0, r4, r5, lsl #16 ; [d1 | d0] 98 usub16 r6, r8, r9 ; [d2 | d0] (A) 104 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) 115 usub16 r6, r8, r9 ; [d2 | d0] (B) 121 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) 143 usub16 r6, r8, r9 ; [d2 | d0] (A) 149 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) 160 usub16 r6, r8, r9 ; [d2 | d0] (B) 166 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vpx_scale/arm/neon/ |
D | vp8_vpxyv12_extendframeborders_neon.asm | 42 vld1.8 {d0[], d1[]}, [r1], lr 148 vld1.8 {d0[], d1[]}, [r1], lr 274 vld1.8 {d0}, [r1]! 282 vst1.8 {d0}, [r5], lr 284 vst1.8 {d0}, [r5], lr 286 vst1.8 {d0}, [r5], lr 288 vst1.8 {d0}, [r5], lr 290 vst1.8 {d0}, [r5], lr 292 vst1.8 {d0}, [r5], lr 294 vst1.8 {d0}, [r5], lr [all …]
|
D | vp8_vpxyv12_copysrcframe_func_neon.asm | 77 vld1.8 {d0}, [r2]! 81 vst1.8 {d0}, [r3]! 126 vld1.8 {d0}, [r2]! 129 vst1.8 {d0}, [r3]! 183 vld1.8 {d0}, [r2]! 187 vst1.8 {d0}, [r3]! 228 vld1.8 {d0}, [r2]! 231 vst1.8 {d0}, [r3]!
|