/hardware/samsung_slsi/exynos5/libswconverter/ |
D | csc_tiled_to_linear_y_neon.s | 63 @r8 src_offset 81 mul r8, r11, r5 @ src_offset = tiled_width * i 83 add r8, r1, r8 @ src_offset = y_src + src_offset 85 pld [r8, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)] 86 vld1.8 {q0, q1}, [r8]! 87 vld1.8 {q2, q3}, [r8]! 88 pld [r8, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)] 89 vld1.8 {q4, q5}, [r8]! 90 vld1.8 {q6, q7}, [r8]! 92 pld [r8, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)] [all …]
|
D | csc_tiled_to_linear_uv_neon.s | 62 @r8 src_offset 80 mul r8, r11, r5 @ src_offset = tiled_width * i 82 add r8, r1, r8 @ src_offset = y_src + src_offset 84 pld [r8, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)] 85 vld1.8 {q0, q1}, [r8]! 87 vld1.8 {q2, q3}, [r8]! 89 pld [r8, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)] 90 vld1.8 {q4, q5}, [r8]! 92 vld1.8 {q6, q7}, [r8]! 110 mul r8, r11, r5 @ src_offset = (tiled_width * i) + (j << 3); [all …]
|
D | csc_tiled_to_linear_uv_deinterleave_neon.s | 65 @r8 src_offset 83 mul r8, r11, r5 @ src_offset = tiled_width * i 85 add r8, r2, r8 @ src_offset = uv_src + src_offset 90 pld [r8, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)] 91 vld2.8 {q0, q1}, [r8]! 93 vld2.8 {q2, q3}, [r8]! 95 pld [r8, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)] 96 vld2.8 {q4, q5}, [r8]! 98 vld2.8 {q6, q7}, [r8]! 127 mul r8, r11, r5 @ src_offset = (tiled_width * i) + (j << 3) [all …]
|
D | csc_interleave_memcpy_neon.s | 58 @r8 temp1 65 stmfd sp!, {r8-r12,r14} @ backup registers 111 ldrb r8, [r11], #1 113 strb r8, [r10], #1 119 ldmfd sp!, {r8-r12,r15} @ restore registers
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/armv6/ |
D | dequant_idct_v6.asm | 68 smulwb r8, r4, r6 71 pkhbt r8, r8, r10, lsl #16 81 usub16 r7, r8, r7 84 usub16 r8, r11, r14 87 uadd16 r6, r8, r7 88 usub16 r7, r8, r7 101 ldr r8, [r0], #4 105 smulwt lr, r3, r8 106 smulwt r10, r4, r8 107 pkhbt r11, r8, r6, lsl #16 [all …]
|
D | loopfilter_v6.asm | 92 uqsub8 r8, r10, r11 ; p2 - p1 96 orr r8, r8, r10 ; abs (p2-p1) 98 uqsub8 r8, r8, r2 ; compare to limit 100 orr lr, lr, r8 106 uqsub8 r8, r6, r3 ; compare to thresh -- save r8 for later 159 orr r10, r6, r8 ; calculate vp8_hevmask 167 ldr r8, [src], pstep ; p0 173 eor r8, r8, r12 ; p0 offset to convert to a signed value 178 str r8, [sp, #4] ; store ps0 temporarily 183 qsub8 r8, r9, r8 ; vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0)) [all …]
|
D | intra4x4_predict_v6.asm | 52 ldr r8, [r0] ; Above 57 usad8 r12, r8, r9 82 ldr r8, [r0] ; Above 92 uxtb16 r10, r8 ; a[2|0] 93 uxtb16 r11, r8, ror #8 ; a[3|1] 141 ldr r8, [r0] ; a[3|2|1|0] 148 uxtb16 r4, r8 ; a[2|0] 149 uxtb16 r5, r8, ror #8 ; a[3|1] 178 ldrb r8, [sp, #48] ; top_left 183 add r8, r8, r4 ; tl + l[0] [all …]
|
D | filter_v6.asm | 54 ldrb r8, [r0, #-2] ; load source data 62 pkhbt lr, r8, r9, lsl #16 ; r9 | r8 63 pkhbt r8, r9, r10, lsl #16 ; r10 | r9 69 smuad r8, r8, r4 74 smlad r8, r11, r5, r8 83 smlad r11, r10, r6, r8 88 ldrneb r8, [r0, #-2] ; load data for next loop 144 ldrb r8, [r0, #-2] ; load source data 152 pkhbt lr, r8, r9, lsl #16 ; r9 | r8 153 pkhbt r8, r9, r10, lsl #16 ; r10 | r9 [all …]
|
D | bilinearfilter_v6.asm | 52 ldrb r8, [r0, #2] 60 pkhbt r7, r7, r8, lsl #16 ; src[2] | src[1] 63 pkhbt r8, r8, r9, lsl #16 ; src[3] | src[2] 67 smuad r8, r8, r5 80 add r8, r8, #0x40 ; round_shift_and_clamp 83 usat r8, #16, r8, asr #7 86 strh r8, [r1], r3 ; result is transposed and stored 92 ldrneb r8, [r0, #2] 116 ldrb r8, [r0, #2] 123 strh r8, [r1], r3 [all …]
|
D | vp8_variance8x8_armv6.asm | 41 usub8 r8, r6, r7 ; calculate difference 43 sel r10, r8, lr ; select bytes with positive difference 46 sel r8, r9, lr ; select bytes with negative difference 50 usad8 r7, r8, lr ; calculate sum of negative differences 51 orr r8, r8, r10 ; differences of all 4 pixels 57 uxtb16 r7, r8 ; byte (two pixels) to halfwords 58 uxtb16 r10, r8, ror #8 ; another two pixels to halfwords 66 usub8 r8, r6, r7 ; calculate difference 68 sel r10, r8, lr ; select bytes with positive difference 71 sel r8, r9, lr ; select bytes with negative difference [all …]
|
D | iwalsh_v6.asm | 30 ldr r8, [r0, #24] ; [13 | 12] 33 qadd16 r10, r2, r8 ; a1 [1+13 | 0+12] 36 qsub16 lr, r2, r8 ; d1 [1-13 | 0-12] 41 qsub16 r8, lr, r12 ; d1 - c1 [13 | 12] 95 qsubaddx r4, r8, r9 ; [c1|a1] [13-14 | 12+15] 96 qaddsubx r5, r8, r9 ; [b1|d1] [13+14 | 12-15] 100 qaddsubx r8, r4, r5 ; [b2|c2] [c1+d1 | a1-b1] 105 qadd16 r8, r8, r10 ; [b2+3|c2+3] 121 asr lr, r8, #19 ; [13] 123 sxth r8, r8 [all …]
|
D | vp8_sad16x16_armv6.asm | 39 ldr r8, [r2, #0x0] ; load 4 ref pixels (1A) 45 usada8 r4, r8, r6, r4 ; calculate sad for 4 pixels 46 usad8 r8, r7, r9 ; calculate sad for 4 pixels 58 usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels 62 add r4, r4, r8 ; add partial sad values 65 ldr r8, [r2, #0x0] ; load 4 ref pixels (2A) 70 usada8 r4, r6, r8, r4 ; calculate sad for 4 pixels 71 usad8 r8, r7, r9 ; calculate sad for 4 pixels 80 usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels 86 add r4, r4, r8 ; add partial sad values
|
D | vp8_variance16x16_armv6.asm | 32 mov r8, #0 ; initialize sum = 0 55 adds r8, r8, r4 ; add positive differences to sum 56 subs r8, r8, r5 ; subtract negative differences from sum 79 add r8, r8, r4 ; add positive differences to sum 80 sub r8, r8, r5 ; subtract negative differences from sum 103 add r8, r8, r4 ; add positive differences to sum 104 sub r8, r8, r5 ; subtract negative differences from sum 129 add r8, r8, r4 ; add positive differences to sum 130 sub r8, r8, r5 ; subtract negative differences from sum 145 mul r0, r8, r8 ; sum * sum
|
D | vp8_variance_halfpixvar16x16_v_armv6.asm | 32 mov r8, #0 ; initialize sum = 0 61 adds r8, r8, r4 ; add positive differences to sum 62 subs r8, r8, r5 ; subtract negative differences from sum 92 add r8, r8, r4 ; add positive differences to sum 93 sub r8, r8, r5 ; subtract negative differences from sum 123 add r8, r8, r4 ; add positive differences to sum 124 sub r8, r8, r5 ; subtract negative differences from sum 156 add r8, r8, r4 ; add positive differences to sum 157 sub r8, r8, r5 ; subtract negative differences from sum 172 mul r0, r8, r8 ; sum * sum
|
D | vp8_variance_halfpixvar16x16_h_armv6.asm | 32 mov r8, #0 ; initialize sum = 0 60 adds r8, r8, r4 ; add positive differences to sum 61 subs r8, r8, r5 ; subtract negative differences from sum 91 add r8, r8, r4 ; add positive differences to sum 92 sub r8, r8, r5 ; subtract negative differences from sum 122 add r8, r8, r4 ; add positive differences to sum 123 sub r8, r8, r5 ; subtract negative differences from sum 155 add r8, r8, r4 ; add positive differences to sum 156 sub r8, r8, r5 ; subtract negative differences from sum 170 mul r0, r8, r8 ; sum * sum
|
D | sixtappredict8x4_v6.asm | 55 ldrb r8, [r0, #-3] 62 pkhbt r7, r7, r8, lsl #16 ; r8 | r7 64 pkhbt r8, r8, r9, lsl #16 ; r9 | r8 73 smlad r11, r8, r4, r11 ; vp8_filter[2], vp8_filter[3] 96 movne r6, r8 98 movne r8, r10 152 ldr r8, [sp, #8] 159 smlad lr, r5, r8, lr 160 smladx r10, r11, r8, r10 177 movne r7, r8 [all …]
|
D | idct_v6.asm | 45 smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16 49 pkhbt r8, r8, r10, lsl #16 ; 5s | 4s 63 usub16 r7, r8, r7 ; c 66 usub16 r8, r11, r14 ; b 70 uadd16 r6, r8, r7 ; b+c 71 usub16 r7, r8, r7 ; b-c 94 smulwt r8, r4, r0 ; (ip[1] * sinpi8sqrt2) >> 16 99 pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1 118 usub16 r12, r8, r6 ; c (o1 | o5) 122 mov r8, #4 ; set up 4's
|
D | simpleloopfilter_v6.asm | 176 ;transpose r7, r8, r9, r10 to r3, r4, r5, r6 177 TRANSPOSE_MATRIX r7, r8, r9, r10, r3, r4, r5, r6 180 uqsub8 r8, r6, r3 ; q1 - p1 183 orr r7, r7, r8 ; abs(p1 - q1) 185 mov r8, #0 187 uhadd8 r7, r7, r8 ; abs(p1 - q1) / 2 192 sel lr, r10, r8 ; filter mask 219 shadd8 r9 , r9 , r8 220 shadd8 r3 , r3 , r8 221 shadd8 r9 , r9 , r8 [all …]
|
D | vp8_variance_halfpixvar16x16_hv_armv6.asm | 32 mov r8, #0 ; initialize sum = 0 71 adds r8, r8, r4 ; add positive differences to sum 72 subs r8, r8, r5 ; subtract negative differences from sum 113 add r8, r8, r4 ; add positive differences to sum 114 sub r8, r8, r5 ; subtract negative differences from sum 155 add r8, r8, r4 ; add positive differences to sum 156 sub r8, r8, r5 ; subtract negative differences from sum 197 add r8, r8, r4 ; add positive differences to sum 198 sub r8, r8, r5 ; subtract negative differences from sum 211 mul r0, r8, r8 ; sum * sum
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/armv6/ |
D | vp8_mse16x16_armv6.asm | 44 usub8 r8, r5, r6 ; calculate difference 46 sel r7, r8, lr ; select bytes with positive difference 49 sel r8, r9, lr ; select bytes with negative difference 53 usad8 r6, r8, lr ; calculate sum of negative differences 54 orr r8, r8, r7 ; differences of all 4 pixels 59 uxtb16 r6, r8 ; byte (two pixels) to halfwords 60 uxtb16 r7, r8, ror #8 ; another two pixels to halfwords 67 usub8 r8, r5, r6 ; calculate difference 68 sel r7, r8, lr ; select bytes with positive difference 70 sel r8, r9, lr ; select bytes with negative difference [all …]
|
D | vp8_subtract_armv6.asm | 38 ldr r8, [r1, #vp8_blockd_predictor] 45 ldr r1, [r8], r2 ; pred 93 uxtb16 r8, r6 ; [s2 | s0] (A) 98 usub16 r6, r8, r9 ; [d2 | d0] (A) 104 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) 107 str r8, [r0], #4 ; diff (A) 108 uxtb16 r8, r10 ; [s2 | s0] (B) 115 usub16 r6, r8, r9 ; [d2 | d0] (B) 121 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) 124 str r8, [r0], #4 ; diff (B) [all …]
|
D | vp8_short_fdct4x4_armv6.asm | 49 ldrd r8, r9, [r0] ; [i5 | i4] [i7 | i6] 59 qadd16 r6, r8, r9 ; [i5+i6 | i4+i7] = [b1 | a1] without shift 60 qsub16 r7, r8, r9 ; [i5-i6 | i4-i7] = [c1 | d1] without shift 69 smusd r8, r6, lr ; o6 = (i5+i6)*8 - (i4+i7)*8 77 pkhbt r6, r8, r7, lsl #4 ; [o7 | o6] 94 smusd r8, r6, lr ; o10 = (i9+i10)*8 - (i8+i11)*8 102 pkhbt r6, r8, r7, lsl #4 ; [o11 | o10] 147 lsl r8, r2, #16 ; prepare bottom halfword for scaling 151 pkhtb r4, r2, r8, asr #20 ; pack and scale bottom halfword 159 smlabb r8, r7, r12, r2 ; [ ------ | d1*5352] [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/arm/neon/ |
D | vp9_mb_lpf_neon.asm | 28 push {r4-r8, lr} 38 sub r8, r0, r1, lsl #3 ; move src pointer down by 8 lines 40 vld1.u8 {d0}, [r8@64], r1 ; p7 41 vld1.u8 {d1}, [r8@64], r1 ; p6 42 vld1.u8 {d2}, [r8@64], r1 ; p5 43 vld1.u8 {d3}, [r8@64], r1 ; p4 44 vld1.u8 {d4}, [r8@64], r1 ; p3 45 vld1.u8 {d5}, [r8@64], r1 ; p2 46 vld1.u8 {d6}, [r8@64], r1 ; p1 47 vld1.u8 {d7}, [r8@64], r1 ; p0 [all …]
|
D | vp9_convolve8_avg_neon.asm | 69 sub r8, r1, r1, lsl #2 ; -src_stride * 3 70 add r8, r8, #4 ; -src_stride * 3 + 4 85 vld1.8 {d27}, [r0], r8 110 vld1.32 {d30[]}, [r0], r8 142 pld [r5, -r8] 191 push {r4-r8, lr} 210 add r8, r2, r3, asr #1 238 vld1.u32 {d6[1]}, [r8@32], r3 240 vld1.u32 {d7[1]}, [r8@32], r3 254 pld [r8] [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vpx_scale/arm/neon/ |
D | vp8_vpxyv12_copyframe_func_neon.asm | 31 ldr r8, [r0, #yv12_buffer_config_u_buffer] ;srcptr1 43 str r8, [sp] 52 mov r8, r2 60 vld1.8 {q0, q1}, [r8]! 62 vld1.8 {q2, q3}, [r8]! 64 vld1.8 {q4, q5}, [r8]! 66 vld1.8 {q6, q7}, [r8]! 112 mov r8, r2 120 vld1.8 {q0, q1}, [r8]! 122 vld1.8 {q2, q3}, [r8]! [all …]
|