/hardware/samsung_slsi/exynos5/libswconverter/ |
D | csc_tiled_to_linear_uv_deinterleave_neon.s | 69 @r12 temp1 72 stmfd sp!, {r4-r12,r14} @ backup registers 87 mov r12, r3, asr #1 @ temp1 = (width >> 1) * i + (j >> 1) 88 mul r12, r12, r5 92 add r12, r12, r6, asr #1 94 add r7, r0, r12 @ dst_offset = u_dst + temp1 109 add r7, r1, r12 @ dst_offset = v_dst + temp1 130 mov r12, r3, asr #1 @ temp1 = (width >> 1) * i + (j >> 1) 131 mul r12, r12, r5 132 add r12, r12, r6, asr #1 [all …]
|
D | csc_interleave_memcpy_neon.s | 62 @r12 src2_addr 65 stmfd sp!, {r8-r12,r14} @ backup registers 69 mov r12, r2 84 vld1.8 {q1}, [r12]! 85 vld1.8 {q3}, [r12]! 86 vld1.8 {q5}, [r12]! 87 vld1.8 {q7}, [r12]! 88 vld1.8 {q9}, [r12]! 89 vld1.8 {q11}, [r12]! 90 vld1.8 {q13}, [r12]! [all …]
|
D | csc_tiled_to_linear_uv_neon.s | 66 @r12 temp1 69 stmfd sp!, {r4-r12,r14} @ backup registers 86 mul r12, r2, r5 @ temp1 = width * i + j; 88 add r12, r12, r6 91 add r7, r0, r12 @ dst_offset = y_dst + temp1 114 mul r12, r2, r5 @ temp1 = width * i + j; 115 add r12, r12, r6 116 add r7, r0, r12 @ r7 = y_dst + temp1 155 mul r12, r2, r5 @ temp1 = width * i + j; 156 add r12, r12, r6 [all …]
|
D | csc_tiled_to_linear_y_neon.s | 67 @r12 temp1 70 stmfd sp!, {r4-r12,r14} @ backup registers 91 mul r12, r2, r5 @ temp1 = width * i + j; 94 add r12, r12, r6 96 add r7, r0, r12 @ dst_offset = y_dst + temp1 129 mul r12, r2, r5 @ temp1 = width * i + j; 130 add r12, r12, r6 131 add r7, r0, r12 @ r7 = y_dst + temp1 170 mul r12, r2, r5 @ temp1 = width * i + j; 171 add r12, r12, r6 [all …]
|
D | csc_ARGB8888_to_YUV420SP_NEON.s | 21 @r12 temp1, nHeight 27 stmfd sp!, {r4-r12,r14} @ backup registers 28 ldr r12, [sp, #40] @ load nHeight 60 stmfd sp!, {r12} @ backup registers 195 ldr r12, [r2], #4 @loadRGB int 198 mov r14, r12 @copy to r10 210 mov r14, r12,lsl #8 @copy to r10 222 mov r14, r12,lsl #16 @copy to r10 247 mov r12,r7 258 sub r12, r12, r11 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vpx_scale/arm/neon/ |
D | vp8_vpxyv12_copysrcframe_func_neon.asm | 50 mov r12, r5 61 sub r12, r12, #128 62 cmp r12, #128 73 cmp r12, #0 79 sub r12, r12, #8 80 cmp r12, #8 85 cmp r12, #0 90 subs r12, r12, #1 107 mov r12, r5 114 sub r12, r12, #128 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/neon/ |
D | buildintrapredictorsmby_neon.asm | 44 mov r12, #128 45 vdup.u8 q0, r12 48 mov r12, #0 67 add r12, r4, r6 84 add r12, r12, r3 85 add r12, r12, r4 86 add r12, r12, r5 87 add r12, r12, r6 94 add r12, r12, r3 95 add r12, r12, r4 [all …]
|
D | mbloopfilter_neon.asm | 32 ldr r12, [sp, #4] ; load thresh 34 vdup.u8 q2, r12 ; thresh 35 add r12, r0, r1, lsr #1 ; move src pointer up by 1 line 38 vld1.u8 {q4}, [r12@128], r1 ; p2 40 vld1.u8 {q6}, [r12@128], r1 ; p0 42 vld1.u8 {q8}, [r12@128], r1 ; q1 44 vld1.u8 {q10}, [r12@128], r1 ; q3 48 sub r12, r12, r1, lsl #2 49 add r0, r12, r1, lsr #1 51 vst1.u8 {q4}, [r12@128],r1 ; store op2 [all …]
|
D | loopfiltersimpleverticaledge_neon.asm | 26 add r12, r1, r1 29 vld4.8 {d6[0], d7[0], d8[0], d9[0]}, [r0], r12 30 vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r3], r12 31 vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r12 32 vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r3], r12 33 vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r12 34 vld4.8 {d6[5], d7[5], d8[5], d9[5]}, [r3], r12 35 vld4.8 {d6[6], d7[6], d8[6], d9[6]}, [r0], r12 36 vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r3], r12 38 vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r12 [all …]
|
D | loopfilter_neon.asm | 31 add r12, r2, r1 37 vld1.u8 {q4}, [r12@128], r1 ; p2 39 vld1.u8 {q6}, [r12@128], r1 ; p0 41 vld1.u8 {q8}, [r12@128], r1 ; q1 43 vld1.u8 {q10}, [r12@128] ; q3 46 sub r12, r12, r1, lsl #1 51 vst1.u8 {q6}, [r12@128], r1 ; store op0 53 vst1.u8 {q8}, [r12@128], r1 ; store oq1 69 ldr r12, [sp, #4] ; load thresh 71 vdup.u8 q2, r12 ; duplicate thresh [all …]
|
D | idct_dequant_0_2x_neon.asm | 26 add r12, r2, #4 28 vld1.32 {d8[0]}, [r12], r3 30 vld1.32 {d8[1]}, [r12], r3 32 vld1.32 {d10[0]}, [r12], r3 34 vld1.32 {d10[1]}, [r12], r3 36 ldrh r12, [r0] ; lo q 42 sxth r12, r12 ; lo 43 mul r0, r12, r1
|
D | variance_neon.asm | 33 mov r12, #8 53 subs r12, r12, #1 70 ldr r12, [sp] ;load *sse from stack 79 ;str r1, [r12] 85 vst1.32 {d1[0]}, [r12] ;store sse 106 mov r12, #4 123 subs r12, r12, #1 140 ldr r12, [sp] ;load *sse from stack 147 vst1.32 {d1[0]}, [r12] ;store sse 169 mov r12, #8 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/armv6/ |
D | loopfilter_v6.asm | 88 ldr r12, [src], pstep ; p0 99 uqsub8 r6, r11, r12 ; p1 - p0 101 uqsub8 r7, r12, r11 ; p0 - p1 111 uqsub8 r11, r12, r9 ; p0 - q0 112 uqsub8 r12, r9, r12 ; q0 - p0 115 orr r12, r11, r12 ; abs (p0-q0) 117 uqadd8 r12, r12, r12 ; abs (p0-q0) * 2 120 uqadd8 r12, r12, r6 ; abs (p0-q0)*2 + abs (p1-q1)/2 122 uqsub8 r12, r12, r4 ; compare to flimit 125 orr lr, lr, r12 [all …]
|
D | dequant_idct_v6.asm | 29 mov r12, #4 44 subs r12, r12, #1 63 ldr r12, [r0, #24] 70 smulwt r11, r3, r12 73 smulwt r7, r4, r12 74 smulwb r9, r3, r12 75 smulwb r10, r4, r12 80 uadd16 r7, r12, r9 104 smulwt r12, r4, r6 109 pkhbt r12, r10, r12, lsl #16 [all …]
|
D | intra4x4_predict_v6.asm | 34 push {r4-r12, lr} 38 pop {r4-r12, pc} ; default 57 usad8 r12, r8, r9 64 add r4, r4, r12 67 mov r12, r4, asr #3 ; (expected_dc + 4) >> 3 69 add r12, r12, r12, lsl #8 71 add r12, r12, r12, lsl #16 74 str r12, [r3], r0 75 str r12, [r3], r0 76 str r12, [r3], r0 [all …]
|
D | iwalsh_v6.asm | 22 stmdb sp!, {r4 - r12, lr} 35 qsub16 r12, r4, r6 ; c1 [5-9 | 4-8] 39 qadd16 r4, r12, lr ; c1 + d1 [5 | 4] 41 qsub16 r8, lr, r12 ; d1 - c1 [13 | 12] 45 qsub16 r12, r5, r7 ; c1 [7-11 | 6-10] 49 qadd16 r5, r12, lr ; c1 + d1 [7 | 6] 51 qsub16 r9, lr, r12 ; d1 - c1 [15 | 14] 57 qsubaddx r12, r4, r5 ; [c1|a1] [5-6 | 4+7] 63 qaddsubx r4, r12, lr ; [b2|c2] [c1+d1 | a1-b1] 64 qaddsubx r5, lr, r12 ; [a2|d2] [b1+a1 | d1-c1] [all …]
|
D | copymem8x4_v6.asm | 39 mov r12, #4 48 subs r12, r12, #1 83 mov r12, #4 86 subs r12, r12, #1 108 mov r12, #4 115 subs r12, r12, #1
|
D | copymem8x8_v6.asm | 39 mov r12, #8 48 subs r12, r12, #1 83 mov r12, #8 86 subs r12, r12, #1 108 mov r12, #8 115 subs r12, r12, #1
|
D | copymem16x16_v6.asm | 42 mov r12, #16 55 subs r12, r12, #1 106 mov r12, #16 109 subs r12, r12, #1 137 mov r12, #16 147 subs r12, r12, #1 165 mov r12, #16 172 subs r12, r12, #1
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/arm/neon/ |
D | vp9_idct16x16_1_add_neon.asm | 29 mov r12, #0x2d00 30 add r12, #0x41 33 mul r0, r0, r12 ; input[0] * cospi_16_64 38 mul r0, r0, r12 ; out * cospi_16_64 39 mov r12, r1 ; save dest 69 vst1.64 {d2}, [r12], r0 70 vst1.64 {d3}, [r12], r2 71 vst1.64 {d30}, [r12], r0 72 vst1.64 {d31}, [r12], r2 82 vst1.64 {d2}, [r12], r0 [all …]
|
D | vp9_idct8x8_1_add_neon.asm | 29 mov r12, #0x2d00 30 add r12, #0x41 33 mul r0, r0, r12 ; input[0] * cospi_16_64 38 mul r0, r0, r12 ; out * cospi_16_64 39 mov r12, r1 ; save dest 67 vst1.64 {d2}, [r12], r2 68 vst1.64 {d3}, [r12], r2 69 vst1.64 {d30}, [r12], r2 70 vst1.64 {d31}, [r12], r2 80 vst1.64 {d2}, [r12], r2 [all …]
|
D | vp9_idct4x4_1_add_neon.asm | 29 mov r12, #0x2d00 30 add r12, #0x41 33 mul r0, r0, r12 ; input[0] * cospi_16_64 38 mul r0, r0, r12 ; out * cospi_16_64 39 mov r12, r1 ; save dest 60 vst1.32 {d6[0]}, [r12], r2 61 vst1.32 {d6[1]}, [r12], r2 62 vst1.32 {d7[0]}, [r12], r2 63 vst1.32 {d7[1]}, [r12]
|
D | vp9_dc_only_idct_add_neon.asm | 30 mov r12, #0x2d00 31 add r12, #0x41 34 mul r0, r0, r12 ; input_dc * cospi_16_64 39 mul r0, r0, r12 ; out * cospi_16_64 48 ldr r12, [sp] ; load stride 61 vst1.32 {d2[0]}, [r2], r12 62 vst1.32 {d2[1]}, [r2], r12 63 vst1.32 {d4[0]}, [r2], r12
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/neon/ |
D | subtract_neon.asm | 68 mov r12, #4 101 subs r12, r12, #1 119 mov r12, #32 ; "diff" stride x2 149 vst1.16 {q8}, [r0], r12 ;store diff 150 vst1.16 {q9}, [r7], r12 151 vst1.16 {q10}, [r0], r12 152 vst1.16 {q11}, [r7], r12 153 vst1.16 {q12}, [r0], r12 154 vst1.16 {q13}, [r7], r12 155 vst1.16 {q14}, [r0], r12 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/armv6/ |
D | vp8_short_fdct4x4_armv6.asm | 21 stmfd sp!, {r4 - r12, lr} 30 ldr r12, c0x22a453a0 ; [2217*4 | 5352*4] 46 smlad r6, r7, r12, r11 ; o1 = (c1 * 2217 + d1 * 5352 + 14500) 47 smlsdx r7, r7, r12, r10 ; o3 = (d1 * 2217 - c1 * 5352 + 7500) 71 smlad r6, r7, r12, r11 ; o5 = (c1 * 2217 + d1 * 5352 + 14500) 72 smlsdx r7, r7, r12, r10 ; o7 = (d1 * 2217 - c1 * 5352 + 7500) 96 smlad r6, r7, r12, r11 ; o9 = (c1 * 2217 + d1 * 5352 + 14500) 97 smlsdx r7, r7, r12, r10 ; o11 = (d1 * 2217 - c1 * 5352 + 7500) 119 smlad r6, r7, r12, r11 ; o13 = (c1 * 2217 + d1 * 5352 + 14500) 120 smlsdx r7, r7, r12, r10 ; o15 = (d1 * 2217 - c1 * 5352 + 7500) [all …]
|