/hardware/samsung_slsi/exynos5/libswconverter/ |
D | csc_tiled_to_linear_y_neon.s | 62 @r7 dst_offset 96 add r7, r0, r12 @ dst_offset = y_dst + temp1 101 vst1.8 {q0}, [r7], r2 102 vst1.8 {q1}, [r7], r2 103 vst1.8 {q2}, [r7], r2 104 vst1.8 {q3}, [r7], r2 105 vst1.8 {q4}, [r7], r2 106 vst1.8 {q5}, [r7], r2 107 vst1.8 {q6}, [r7], r2 108 vst1.8 {q7}, [r7], r2 [all …]
|
D | csc_tiled_to_linear_uv_deinterleave_neon.s | 64 @r7 dst_offset 94 add r7, r0, r12 @ dst_offset = u_dst + temp1 100 vst1.8 {d0}, [r7], r14 101 vst1.8 {d1}, [r7], r14 102 vst1.8 {d4}, [r7], r14 103 vst1.8 {d5}, [r7], r14 104 vst1.8 {d8}, [r7], r14 105 vst1.8 {d9}, [r7], r14 106 vst1.8 {d12}, [r7], r14 107 vst1.8 {d13}, [r7], r14 [all …]
|
D | csc_tiled_to_linear_uv_neon.s | 61 @r7 dst_offset 91 add r7, r0, r12 @ dst_offset = y_dst + temp1 94 vst1.8 {q0}, [r7], r2 95 vst1.8 {q1}, [r7], r2 96 vst1.8 {q2}, [r7], r2 97 vst1.8 {q3}, [r7], r2 98 vst1.8 {q4}, [r7], r2 99 vst1.8 {q5}, [r7], r2 100 vst1.8 {q6}, [r7], r2 101 vst1.8 {q7}, [r7], r2 [all …]
|
D | csc_ARGB8888_to_YUV420SP_NEON.s | 16 @r7 temp6, accumilator 207 mul r7, r10, r11 219 mla r7, r10, r11, r7 231 mla r7, r10, r11, r7 234 add r7, r6 236 lsr r7, #8 237 strb r7, [r0],#1 238 lsr r7,#16 239 strb r7, [r0],#1 246 ldr r7,=0x00008080 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/armv6/ |
D | dequant_idct_v6.asm | 33 smultt r7, r4, r5 39 strh r7, [r0], #2 42 smultt r7, r4, r5 50 strh r7, [r0], #2 66 smulwb r7, r3, r6 69 pkhbt r7, r7, r9, lsl #16 72 uadd16 r6, r6, r7 73 smulwt r7, r4, r12 79 pkhbt r10, r10, r7, lsl #16 80 uadd16 r7, r12, r9 [all …]
|
D | copymem16x16_v6.asm | 21 stmdb sp!, {r4 - r7} 22 ;push {r4-r7} 40 ldrb r7, [r0, #3] 48 strb r7, [r2, #3] 53 ldrb r7, [r0, #7] 60 strb r7, [r2, #7] 65 ldrb r7, [r0, #11] 70 strb r7, [r2, #11] 75 ldrb r7, [r0, #15] 82 strb r7, [r2, #15] [all …]
|
D | loopfilter_v6.asm | 91 uqsub8 r7, r10, r9 ; p2 - p3 95 orr r6, r6, r7 ; abs (p3-p2) 101 uqsub8 r7, r12, r11 ; p0 - p1 104 orr r6, r6, r7 ; abs (p1-p0) 105 uqsub8 r7, r6, r2 ; compare to limit 107 orr lr, lr, r7 110 uqsub8 r7, r10, r11 ; q1 - p1 113 orr r6, r6, r7 ; abs (p1-q1) 114 ldr r7, c0x7F7F7F7F 118 and r6, r7, r6, lsr #1 ; abs (p1-q1) / 2 [all …]
|
D | intra4x4_predict_v6.asm | 58 ldrb r7, [r1] ; Left[3] 63 add r4, r4, r7 87 ldrb r7, [r1] ; Left[3] 100 add r7, r7, r7, lsl #16 ; l[3|3] 125 sadd16 r4, r7, r10 ; l[3|3] + a[2|0] - [tl|tl] 126 sadd16 r5, r7, r11 ; l[3|3] + a[3|1] - [tl|tl] 181 ldrb r7, [r1] ; Left[3] 186 add r11, r6, r7 ; l[2] + l[3] 193 add r6, r11, r7, lsl #1 ; l[2] + 2*l[3] + l[3] 229 uxtb16 r7, r4, ror #8 ; a[3|1] [all …]
|
D | dc_only_idct_add_v6.asm | 24 stmdb sp!, {r4 - r7} 37 uxtab16 r7, r0, r6 41 usat16 r7, #8, r7 44 orr r7, r7, r6, lsl #8 48 str r7, [r3], r12 52 uxtab16 r7, r0, r6 56 usat16 r7, #8, r7 59 orr r7, r7, r6, lsl #8 61 str r7, [r3] 63 ldmia sp!, {r4 - r7}
|
D | idct_v6.asm | 43 smulbb r7, r5, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16 48 pkhtb r7, r9, r7, asr #16 ; 5c | 4c 50 uadd16 r6, r6, r7 ; 5c+5 | 4c+4 52 smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16 60 pkhbt r10, r10, r7, lsl #16 ; 13s | 12s 61 uadd16 r7, r12, r9 ; 13c+13 | 12c+12 63 usub16 r7, r8, r7 ; c 70 uadd16 r6, r8, r7 ; b+c 71 usub16 r7, r8, r7 ; b-c 75 str r7, [r0, #(8*2)] ; o9 | o8 [all …]
|
D | vp8_variance_halfpixvar16x16_hv_armv6.asm | 43 ldr r7, [r9, #1] ; load source pixels d, row N+1 50 mvn r7, r7 51 uhsub8 r5, r5, r7 61 sel r7, r6, lr ; select bytes with positive difference 67 usad8 r4, r7, lr ; calculate sum of positive differences 69 orr r6, r6, r7 ; differences of all 4 pixels 76 uxtb16 r7, r6, ror #8 ; another two pixels to halfwords 84 smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) 86 ldr r7, [r9, #5] ; load source pixels d, row N+1 93 mvn r7, r7 [all …]
|
D | bilinearfilter_v6.asm | 34 add r7, r2, r4 ; preload next row 35 pld [r0, r7] 51 ldrb r7, [r0, #1] 59 pkhbt r6, r6, r7, lsl #16 ; src[1] | src[0] 60 pkhbt r7, r7, r8, lsl #16 ; src[2] | src[1] 64 smuad r7, r7, r5 74 add r7, r7, #0x40 76 usat r7, #16, r7, asr #7 81 strh r7, [r1], r3 91 ldrneb r7, [r0, #1] [all …]
|
D | filter_v6.asm | 35 ldr r7, [sp, #36] ; output height 50 mov r7, r7, lsl #16 ; height is top part of counter 57 orr r7, r7, r3, lsr #2 ; construct loop counter 77 sub r7, r7, #1 85 ands r10, r7, #0xff ; test loop counter 102 subs r7, r7, #0x10000 122 ldr r7, [sp, #36] ; output height 140 mov r7, r7, lsl #16 ; height is top part of counter 147 orr r7, r7, r3, lsr #2 ; construct loop counter 167 sub r7, r7, #1 [all …]
|
D | vp8_variance8x8_armv6.asm | 37 ldr r7, [r2, #0x0] ; load 4 ref pixels 41 usub8 r8, r6, r7 ; calculate difference 44 usub8 r9, r7, r6 ; calculate difference with reversed operands 50 usad8 r7, r8, lr ; calculate sum of negative differences 54 sub r4, r4, r7 ; subtract negative differences from sum 57 uxtb16 r7, r8 ; byte (two pixels) to halfwords 59 smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1) 63 ldr r7, [r2, #0x4] ; load 4 ref pixels 66 usub8 r8, r6, r7 ; calculate difference 69 usub8 r9, r7, r6 ; calculate difference with reversed operands [all …]
|
D | vp8_variance_halfpixvar16x16_v_armv6.asm | 51 sel r7, r6, lr ; select bytes with positive difference 57 usad8 r4, r7, lr ; calculate sum of positive differences 59 orr r6, r6, r7 ; differences of all 4 pixels 66 uxtb16 r7, r6, ror #8 ; another two pixels to halfwords 79 smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) 82 sel r7, r6, lr ; select bytes with positive difference 87 usad8 r4, r7, lr ; calculate sum of positive differences 89 orr r6, r6, r7 ; differences of all 4 pixels 97 uxtb16 r7, r6, ror #8 ; another two pixels to halfwords 110 smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) [all …]
|
D | vp8_variance_halfpixvar16x16_h_armv6.asm | 50 sel r7, r6, lr ; select bytes with positive difference 56 usad8 r4, r7, lr ; calculate sum of positive differences 58 orr r6, r6, r7 ; differences of all 4 pixels 65 uxtb16 r7, r6, ror #8 ; another two pixels to halfwords 78 smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) 81 sel r7, r6, lr ; select bytes with positive difference 86 usad8 r4, r7, lr ; calculate sum of positive differences 88 orr r6, r6, r7 ; differences of all 4 pixels 96 uxtb16 r7, r6, ror #8 ; another two pixels to halfwords 109 smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) [all …]
|
D | iwalsh_v6.asm | 29 ldr r7, [r0, #20] ; [11 | 10] 44 qadd16 r11, r5, r7 ; b1 [7+11 | 6+10] 45 qsub16 r12, r5, r7 ; c1 [7-11 | 6-10] 50 qsub16 r7, r10, r11 ; a1 - b1 [11 | 10] 93 qsubaddx r2, r6, r7 ; [c1|a1] [9-10 | 8+11] 94 qaddsubx r3, r6, r7 ; [b1|d1] [9+10 | 8-11] 99 qaddsubx r7, r3, r2 ; [a2|d2] [b1+a1 | d1-c1] 104 qadd16 r7, r7, r10 ; [a2+3|d2+3] 108 asr r12, r7, #19 ; [8] 113 sxth r7, r7 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/armv6/ |
D | vp8_short_fdct4x4_armv6.asm | 35 qsub16 r7, r4, r5 ; [i1-i2 | i0-i3] = [c1 | d1] without shift 39 qadd16 r7, r7, r7 ; 2*[c1|d1] --> we can use smlad and smlsd 46 smlad r6, r7, r12, r11 ; o1 = (c1 * 2217 + d1 * 5352 + 14500) 47 smlsdx r7, r7, r12, r10 ; o3 = (d1 * 2217 - c1 * 5352 + 7500) 52 pkhbt r6, r5, r7, lsl #4 ; [o3 | o2] 60 qsub16 r7, r8, r9 ; [i5-i6 | i4-i7] = [c1 | d1] without shift 64 qadd16 r7, r7, r7 ; 2x[c1|d1] --> we can use smlad and smlsd 71 smlad r6, r7, r12, r11 ; o5 = (c1 * 2217 + d1 * 5352 + 14500) 72 smlsdx r7, r7, r12, r10 ; o7 = (d1 * 2217 - c1 * 5352 + 7500) 77 pkhbt r6, r8, r7, lsl #4 ; [o7 | o6] [all …]
|
D | vp8_mse16x16_armv6.asm | 46 sel r7, r8, lr ; select bytes with positive difference 52 usad8 r5, r7, lr ; calculate sum of positive differences 54 orr r8, r8, r7 ; differences of all 4 pixels 60 uxtb16 r7, r8, ror #8 ; another two pixels to halfwords 65 smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) 68 sel r7, r8, lr ; select bytes with positive difference 73 usad8 r5, r7, lr ; calculate sum of positive differences 75 orr r8, r8, r7 ; differences of all 4 pixels 79 uxtb16 r7, r8, ror #8 ; another two pixels to halfwords 84 smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) [all …]
|
D | vp8_subtract_armv6.asm | 36 ldr r7, [r0, #vp8_block_src_stride] 44 ldr r0, [r3], r7 ; src 91 ldr r7, [r5] ; upred (A) 94 uxtb16 r9, r7 ; [p2 | p0] (A) 96 uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) 99 usub16 r7, r10, r11 ; [d3 | d1] (A) 104 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) 105 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) 116 usub16 r7, r10, r11 ; [d3 | d1] (B) 121 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/armv5te/ |
D | vp8_packtokens_partitions_armv5.asm | 63 ldr r7, [r4, #0] ; dereference cpi->tp_list 64 str r7, [sp, #32] ; store start of cpi->tp_list 100 ldr r1, [r7, #tokenlist_start] 101 ldr r9, [r7, #tokenlist_stop] 103 str r7, [sp, #16] ; tokenlist address for next time 116 ldrb r7, [r1, #tokenextra_skip_eob_node] 122 cmp r7, #0 135 sub r7, r5, #1 ; range-1 141 mul r6, r4, r7 ; ((range-1) * pp[i>>1])) 147 mov r7, #1 [all …]
|
D | vp8_packtokens_mbrow_armv5.asm | 61 ldr r7, [r4, #0] ; dereference cpi->tp_list 71 ldr r1, [r7, #tokenlist_start] 72 ldr r9, [r7, #tokenlist_stop] 74 str r7, [sp, #16] ; tokenlist address for next time 87 ldrb r7, [r1, #tokenextra_skip_eob_node] 93 cmp r7, #0 106 sub r7, r5, #1 ; range-1 112 mul r6, r4, r7 ; ((range-1) * pp[i>>1])) 118 mov r7, #1 120 add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) [all …]
|
D | vp8_packtokens_armv5.asm | 66 ldrb r7, [r1, #tokenextra_skip_eob_node] 72 cmp r7, #0 85 sub r7, r5, #1 ; range-1 91 mul r6, r4, r7 ; ((range-1) * pp[i>>1])) 97 mov r7, #1 99 add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) 124 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 128 ldrge r7, [r0, #vp8_writer_buffer] 129 ldrb r11, [r7, r4] 133 ldr r7, [r0, #vp8_writer_buffer] [all …]
|
D | boolhuff_armv5te.asm | 68 sub r7, r5, #1 ; range-1 71 mul r6, r4, r7 ; ((range-1) * probability) 73 mov r7, #1 74 add r4, r7, r6, lsr #8 ; 1 + (((range-1) * probability) >> 8) 99 strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 103 ldrge r7, [r0, #vp8_writer_buffer] 104 ldrb r1, [r7, r4] 108 ldr r7, [r0, #vp8_writer_buffer] 109 ldrb r9, [r7, r4] ; w->buffer[x] 111 strb r9, [r7, r4] ; w->buffer[x] + 1 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/neon/ |
D | subtract_neon.asm | 26 stmfd sp!, {r4-r7} 34 ldr r7, [r1, #vp8_blockd_predictor] 37 vld1.8 {d1}, [r7], r2 ;load pred 39 vld1.8 {d3}, [r7], r2 41 vld1.8 {d5}, [r7], r2 43 vld1.8 {d7}, [r7], r2 57 ldmfd sp!, {r4-r7} 67 push {r4-r7} 104 pop {r4-r7} 114 push {r4-r7} [all …]
|