/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/arm/neon/ |
D | vp9_reconintra_neon.asm | 76 vld1.8 {q0}, [r2] 77 vst1.8 {q0}, [r0], r1 78 vst1.8 {q0}, [r0], r1 79 vst1.8 {q0}, [r0], r1 80 vst1.8 {q0}, [r0], r1 81 vst1.8 {q0}, [r0], r1 82 vst1.8 {q0}, [r0], r1 83 vst1.8 {q0}, [r0], r1 84 vst1.8 {q0}, [r0], r1 85 vst1.8 {q0}, [r0], r1 [all …]
|
D | vp9_idct16x16_1_add_neon.asm | 47 vdup.s16 q0, r0 ; duplicate a1 61 vaddw.u8 q9, q0, d2 ; dest[x] + a1 62 vaddw.u8 q10, q0, d3 ; dest[x] + a1 63 vaddw.u8 q11, q0, d4 ; dest[x] + a1 64 vaddw.u8 q12, q0, d5 ; dest[x] + a1 74 vaddw.u8 q9, q0, d6 ; dest[x] + a1 75 vaddw.u8 q10, q0, d7 ; dest[x] + a1 76 vaddw.u8 q11, q0, d16 ; dest[x] + a1 77 vaddw.u8 q12, q0, d17 ; dest[x] + a1 97 vaddw.u8 q9, q0, d2 ; dest[x] + a1 [all …]
|
D | vp9_avg_neon.asm | 36 vld1.8 {q0-q1}, [r0]! 41 vrhadd.u8 q0, q0, q8 45 vst1.8 {q0-q1}, [r2@128]! 52 vld1.8 {q0-q1}, [r0], r1 57 vrhadd.u8 q0, q0, q8 64 vst1.8 {q0-q1}, [r2@128], r3 71 vld1.8 {q0}, [r0], r1 77 vrhadd.u8 q0, q0, q2 81 vst1.8 {q0}, [r2@128], r3 94 vrhadd.u8 q0, q0, q1
|
D | vp9_idct32x32_add_neon.asm | 358 ; into q0-q7 and the second one into q8-q15. There is a stride of 64, 361 vld1.s16 {q0}, [r3]! 397 vtrn.32 q0, q2 405 vtrn.16 q0, q1 420 vst1.16 {q0}, [r0]! 481 vadd.s16 q4, q0, q1 482 vsub.s16 q13, q0, q1 518 vsub.s16 q14, q1, q0 519 vadd.s16 q2, q1, q0 538 vadd.s16 q9, q5, q0 [all …]
|
D | vp9_idct8x8_1_add_neon.asm | 47 vdup.s16 q0, r0 ; duplicate a1 59 vaddw.u8 q9, q0, d2 ; dest[x] + a1 60 vaddw.u8 q10, q0, d3 ; dest[x] + a1 61 vaddw.u8 q11, q0, d4 ; dest[x] + a1 62 vaddw.u8 q12, q0, d5 ; dest[x] + a1 72 vaddw.u8 q9, q0, d6 ; dest[x] + a1 73 vaddw.u8 q10, q0, d7 ; dest[x] + a1 74 vaddw.u8 q11, q0, d16 ; dest[x] + a1 75 vaddw.u8 q12, q0, d17 ; dest[x] + a1
|
D | vp9_idct16x16_add_neon.asm | 47 ; will be stored back into q8-q15 registers. This function will touch q0-q7 155 vmull.s16 q0, d24, d30 166 vadd.s32 q3, q2, q0 170 vsub.s32 q13, q2, q0 183 vmull.s16 q0, d20, d31 191 vmlal.s16 q0, d28, d30 199 vqrshrn.s32 d22, q0, #14 ; >> 14 216 vadd.s16 q0, q8, q11 ; step1[0] = step2[0] + step2[3]; 248 vadd.s16 q8, q0, q15 ; step2[0] = step1[0] + step1[7]; 255 vsub.s16 q15, q0, q15 ; step2[7] = step1[0] - step1[7]; [all …]
|
D | vp9_copy_neon.asm | 35 vld1.8 {q0-q1}, [r0]! 37 vst1.8 {q0-q1}, [r2@128]! 45 vld1.8 {q0-q1}, [r0], r1 48 vst1.8 {q0-q1}, [r2@128], r3 56 vld1.8 {q0}, [r0], r1 59 vst1.8 {q0}, [r2@128], r3
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/neon/ |
D | buildintrapredictorsmby_neon.asm | 45 vdup.u8 q0, r12 126 vdup.u8 q0, r5 129 vst1.u8 {q0}, [r1]! 130 vst1.u8 {q0}, [r1]! 131 vst1.u8 {q0}, [r1]! 132 vst1.u8 {q0}, [r1]! 133 vst1.u8 {q0}, [r1]! 134 vst1.u8 {q0}, [r1]! 135 vst1.u8 {q0}, [r1]! 136 vst1.u8 {q0}, [r1]! [all …]
|
D | loopfiltersimplehorizontaledge_neon.asm | 28 vld1.u8 {q7}, [r0@128], r1 ; q0 33 vabd.u8 q15, q6, q7 ; abs(p0 - q0) 36 vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2 38 vmov.u8 q0, #0x80 ; 0x80 40 vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 42 veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value 43 veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value 44 veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value 45 veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value 47 vcge.u8 q15, q1, q15 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > limit)*-1 [all …]
|
D | vp8_subpixelvariance16x16s_neon.asm | 55 vext.8 q1, q0, q1, #1 ;construct src_ptr[1] 60 vrhadd.u8 q0, q0, q1 ;(src_ptr[0]+src_ptr[1])/round/shift right 1 70 vsubl.u8 q0, d4, d26 91 vpadal.s16 q8, q0 ;sum 107 vpaddl.s32 q0, q8 ;accumulate sum 137 vld1.u8 {q0}, [r0], r1 ;load src data 153 vrhadd.u8 q0, q0, q2 163 vsubl.u8 q0, d8, d10 184 vpadal.s16 q8, q0 ;sum 194 vmov q0, q15 [all …]
|
D | sad16_neon.asm | 27 vld1.8 {q0}, [r0], r1 49 vld1.8 {q0}, [r0], r1 74 vld1.8 {q0}, [r0], r1 99 vld1.8 {q0}, [r0], r1 126 vadd.u16 q0, q12, q13 128 vpaddl.u16 q1, q0 129 vpaddl.u32 q0, q1 146 vld1.8 {q0}, [r0], r1 167 vld1.8 {q0}, [r0], r1 194 vadd.u16 q0, q12, q13 [all …]
|
D | loopfiltersimpleverticaledge_neon.asm | 53 vabd.u8 q15, q5, q4 ; abs(p0 - q0) 56 vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2 58 vmov.u8 q0, #0x80 ; 0x80 60 vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 62 veor q4, q4, q0 ; qs0: q0 offset to convert to a signed value 63 veor q5, q5, q0 ; ps0: p0 offset to convert to a signed value 64 veor q3, q3, q0 ; ps1: p1 offset to convert to a signed value 65 veor q6, q6, q0 ; qs1: q1 offset to convert to a signed value 67 vcge.u8 q15, q1, q15 ; abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1 100 veor q6, q11, q0 ; *op0 = u^0x80 [all …]
|
D | iwalsh_neon.asm | 22 vld1.i16 {q0-q1}, [r0@128] 30 vadd.s16 q0, q2, q3 ; a+b d+c 52 vadd.s16 q0, q2, q3 ; a+b d+c 55 vadd.i16 q0, q0, q8 ;e/f += 3 58 vshr.s16 q0, q0, #3 ;e/f >> 3
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/ |
D | vp9_loopfilter_filters.c | 24 uint8_t q0, uint8_t q1, in filter_mask() argument 30 mask |= (abs(q1 - q0) > limit) * -1; in filter_mask() 33 mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; in filter_mask() 40 uint8_t q0, uint8_t q1, in flat_mask4() argument 44 mask |= (abs(q1 - q0) > thresh) * -1; in flat_mask4() 46 mask |= (abs(q2 - q0) > thresh) * -1; in flat_mask4() 48 mask |= (abs(q3 - q0) > thresh) * -1; in flat_mask4() 55 uint8_t p0, uint8_t q0, in flat_mask5() argument 58 int8_t mask = ~flat_mask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3); in flat_mask5() 60 mask |= (abs(q4 - q0) > thresh) * -1; in flat_mask5() [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/mips/dspr2/ |
D | vp9_loopfilter_masks_dspr2.h | 30 uint32_t q0, uint32_t q1, in vp9_filter_hev_mask_dspr2() argument 94 [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0), in vp9_filter_hev_mask_dspr2() 124 : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3), in vp9_filter_hev_mask_dspr2() 137 uint32_t q0, uint32_t q1, in vp9_filter_hev_mask_flatmask4_dspr2() argument 242 [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0), in vp9_filter_hev_mask_flatmask4_dspr2() 273 : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3), in vp9_filter_hev_mask_flatmask4_dspr2() 284 uint32_t p0, uint32_t q0, in vp9_flatmask5() argument 361 [p1] "r" (p1), [p0] "r" (p0), [q0] "r" (q0), [q1] "r" (q1), in vp9_flatmask5()
|
D | vp9_mbloop_loopfilter_dspr2.c | 36 uint32_t p3, p2, p1, p0, q0, q1, q2, q3; in vp9_lpf_horizontal_8_dspr2() local 79 [q3] "=&r" (q3), [q2] "=&r" (q2), [q1] "=&r" (q1), [q0] "=&r" (q0) in vp9_lpf_horizontal_8_dspr2() 85 p1, p0, p3, p2, q0, q1, q2, q3, in vp9_lpf_horizontal_8_dspr2() 89 vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1, in vp9_lpf_horizontal_8_dspr2() 127 [q0] "r" (q0), [q1] "r" (q1), [q2] "r" (q2), in vp9_lpf_horizontal_8_dspr2() 133 vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1, in vp9_lpf_horizontal_8_dspr2() 232 [q0] "+r" (q0), [q1] "+r" (q1), [q2] "+r" (q2), in vp9_lpf_horizontal_8_dspr2() 334 uint32_t p3, p2, p1, p0, q3, q2, q1, q0; in vp9_lpf_vertical_8_dspr2() local 374 [q0] "=&r" (q0), [q1] "=&r" (q1), [q2] "=&r" (q2), [q3] "=&r" (q3) in vp9_lpf_vertical_8_dspr2() 449 [q3] "+r" (q3), [q2] "+r" (q2), [q1] "+r" (q1), [q0] "+r" (q0), in vp9_lpf_vertical_8_dspr2() [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/x86/ |
D | loopfilter_mmx.asm | 68 movq mm4, [rsi] ; q0 69 movq mm0, mm4 ; q0 70 psubusb mm4, mm3 ; q0-=q1 71 psubusb mm3, mm0 ; q1-=q0 72 por mm4, mm3 ; abs(q0-q1) 118 movq mm3, [rsi] ; q0 119 psubusb mm5, mm3 ; p0-=q0 120 psubusb mm3, mm6 ; q0-=p0 121 por mm5, mm3 ; abs(p0 - q0) 122 paddusb mm5, mm5 ; abs(p0-q0)*2 [all …]
|
D | loopfilter_sse2.asm | 33 movdqa xmm5, [rsi] ; q0 39 movlps xmm5, [rsi + rax] ; q0 66 movdqa xmm0, xmm5 ; q0 69 psubusb xmm5, xmm3 ; q0-=q1 70 psubusb xmm3, xmm0 ; q1-=q0 72 por xmm5, xmm3 ; abs(q0-q1) 141 movdqa xmm3, xmm0 ; q0 147 psubusb xmm5, xmm3 ; p0-=q0 148 psubusb xmm3, xmm6 ; q0-=p0 149 por xmm5, xmm3 ; abs(p0 - q0) [all …]
|
/hardware/samsung_slsi/exynos5/libswconverter/ |
D | csc_ARGB8888_to_YUV420SP_NEON.s | 34 @q0: temp1, R 84 vmls.u16 q8,q4,q11 @q0:U -(38 * R[k]) @128<<6+ 32 + u>>2 90 vmls.u16 q7,q5,q14 @q0:U -(94 * G[k]) @128<<6+ 32 + v>>2 106 vmul.u16 q7,q4,q14 @q0 = 66 *R[k] 107 vmla.u16 q7,q5,q15 @q0 += 129 *G[k] 108 vmla.u16 q7,q6,q8 @q0 += 25 *B[k] 124 vmul.u16 q0,q4,q14 @q0 = 66 *R[k] 125 vmla.u16 q0,q5,q15 @q0 += 129 *G[k] 126 vmla.u16 q0,q6,q8 @q0 += 25 *B[k] 127 vadd.u16 q0,q0,q10 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/x86/ |
D | vp9_loopfilter_mmx.asm | 68 movq mm4, [rsi] ; q0 69 movq mm0, mm4 ; q0 70 psubusb mm4, mm3 ; q0-=q1 71 psubusb mm3, mm0 ; q1-=q0 72 por mm4, mm3 ; abs(q0-q1) 118 movq mm3, [rsi] ; q0 119 psubusb mm5, mm3 ; p0-=q0 120 psubusb mm3, mm6 ; q0-=p0 121 por mm5, mm3 ; abs(p0 - q0) 122 paddusb mm5, mm5 ; abs(p0-q0)*2 [all …]
|
D | vp9_loopfilter_intrin_sse2.c | 388 __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4; in mb_lpf_horizontal_edge_w_sse2_16() local 397 q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); in mb_lpf_horizontal_edge_w_sse2_16() 412 _mm_store_si128((__m128i *)&aq[0 * 16], q0); in mb_lpf_horizontal_edge_w_sse2_16() 418 const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), in mb_lpf_horizontal_edge_w_sse2_16() 419 _mm_subs_epu8(q0, q1)); in mb_lpf_horizontal_edge_w_sse2_16() 422 __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), in mb_lpf_horizontal_edge_w_sse2_16() 423 _mm_subs_epu8(q0, p0)); in mb_lpf_horizontal_edge_w_sse2_16() 465 __m128i qs0 = _mm_xor_si128(q0, t80); in mb_lpf_horizontal_edge_w_sse2_16() 514 _mm_or_si128(_mm_subs_epu8(q2, q0), in mb_lpf_horizontal_edge_w_sse2_16() 515 _mm_subs_epu8(q0, q2))); in mb_lpf_horizontal_edge_w_sse2_16() [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/neon/ |
D | fastquantizeb_neon.asm | 33 vld1.16 {q0, q1}, [r4@128] ; load z 37 vabs.s16 q4, q0 ; calculate x = abs(z) 41 vshr.s16 q2, q0, #15 ; sz 52 vld1.16 {q0, q1}, [r4@128] ; load z2 57 vabs.s16 q10, q0 ; calculate x2 = abs(z_2) 59 vshr.s16 q12, q0, #15 ; sz2 119 vand q0, q6, q14 ; get all valid numbers from scan array 127 vmax.u16 q0, q0, q1 ; find maximum value in q0, q1 135 vmovl.u16 q0, d0 164 vld1.16 {q0, q1}, [r3@128] ; load z [all …]
|
D | shortfdct_neon.asm | 105 vst1.16 {q0, q1}, [r1@128] 116 vld1.16 {q0}, [r0@128], r2 124 ; transpose q0=ip[0], q1=ip[1], q2=ip[2], q3=ip[3] 125 vtrn.32 q0, q2 ; [A0|B0] 127 vtrn.16 q0, q1 ; [A2|B2] 130 vadd.s16 q11, q0, q3 ; a1 = ip[0] + ip[3] 133 vsub.s16 q14, q0, q3 ; d1 = ip[0] - ip[3] 140 vadd.s16 q0, q11, q12 ; [A0 | B0] = a1 + b1 165 ; transpose q0=ip[0], q1=ip[4], q2=ip[8], q3=ip[12] 166 vtrn.32 q0, q2 ; q0=[A0 | B0] [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/armv6/ |
D | simpleloopfilter_v6.asm | 64 ldr r5, [src] ; q0 181 uqsub8 r9, r4, r5 ; p0 - q0 182 uqsub8 r10, r5, r4 ; q0 - p0 184 orr r9, r9, r10 ; abs(p0 - q0) 186 uqadd8 r9, r9, r9 ; abs(p0 - q0) * 2 188 uqadd8 r7, r7, r9 ; abs(p0 - q0)*2 + abs(p1 - q1)/2 201 eor r5, r5, r2 ; q0 offset to convert to a signed value 204 qsub8 r6, r5, r4 ; q0 - p0 206 qadd8 r3, r3, r6 ; vp8_filter += q0 - p0 209 qadd8 r3, r3, r6 ; vp8_filter += q0 - p0 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vpx_scale/arm/neon/ |
D | vp8_vpxyv12_extendframeborders_neon.asm | 51 vmov q1, q0 62 vst1.8 {q0, q1}, [r5], lr 87 vld1.8 {q0, q1}, [r1]! 101 vst1.8 {q0, q1}, [r5]! 167 vst1.8 {q0}, [r5], lr 198 vld1.8 {q0, q1}, [r1]! 208 vst1.8 {q0, q1}, [r5]! 240 vld1.8 {q0}, [r1]! 248 vst1.8 {q0}, [r5], lr 250 vst1.8 {q0}, [r5], lr [all …]
|