/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/armv6/ |
D | copymem8x4_v6.asm | 21 ;push {r4-r5} 22 stmdb sp!, {r4-r5} 37 ldrb r5, [r0, #1] 43 strb r5, [r2, #1] 46 ldrb r5, [r0, #3] 51 strb r5, [r2, #3] 54 ldrb r5, [r0, #5] 57 strb r5, [r2, #5] 60 ldrb r5, [r0, #7] 65 strb r5, [r2, #7] [all …]
|
D | copymem8x8_v6.asm | 21 ;push {r4-r5} 22 stmdb sp!, {r4-r5} 37 ldrb r5, [r0, #1] 43 strb r5, [r2, #1] 46 ldrb r5, [r0, #3] 51 strb r5, [r2, #3] 54 ldrb r5, [r0, #5] 57 strb r5, [r2, #5] 60 ldrb r5, [r0, #7] 65 strb r5, [r2, #7] [all …]
|
D | vp8_variance_halfpixvar16x16_hv_armv6.asm | 42 ldr r5, [r9, #0] ; load source pixels c, row N+1 51 uhsub8 r5, r5, r7 52 eor r5, r5, r10 54 mvn r5, r5 55 uhsub8 r4, r4, r5 56 ldr r5, [r2, #0] ; load 4 ref pixels 59 usub8 r6, r4, r5 ; calculate difference 62 usub8 r6, r5, r4 ; calculate difference with reversed operands 68 usad8 r5, r6, lr ; calculate sum of negative differences 72 subs r8, r8, r5 ; subtract negative differences from sum [all …]
|
D | intra4x4_predict_v6.asm | 55 ldrb r5, [r1], r2 ; Left[1] 61 add r4, r4, r5 85 ldrb r5, [r1], r2 ; Left[1] 98 add r5, r5, r5, lsl #16 ; l[1|1] 107 sadd16 r4, r5, r10 ; l[1|1] + a[2|0] - [tl|tl] 108 sadd16 r5, r5, r11 ; l[1|1] + a[3|1] - [tl|tl] 114 usat16 r5, #8, r5 119 add r12, r4, r5, lsl #8 ; [3|2|1|0] 126 sadd16 r5, r7, r11 ; l[3|3] + a[3|1] - [tl|tl] 131 usat16 r5, #8, r5 [all …]
|
D | vp8_variance16x16_armv6.asm | 39 ldr r5, [r2, #0] ; load 4 ref pixels 43 usub8 r6, r4, r5 ; calculate difference 46 usub8 r9, r5, r4 ; calculate difference with reversed operands 52 usad8 r5, r6, lr ; calculate sum of negative differences 56 subs r8, r8, r5 ; subtract negative differences from sum 59 uxtb16 r5, r6 ; byte (two pixels) to halfwords 61 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) 65 ldr r5, [r2, #4] ; load 4 ref pixels 68 usub8 r6, r4, r5 ; calculate difference 70 usub8 r9, r5, r4 ; calculate difference with reversed operands [all …]
|
D | vp8_variance_halfpixvar16x16_v_armv6.asm | 42 ldr r5, [r2, #0] ; load 4 ref pixels 49 usub8 r6, r4, r5 ; calculate difference 52 usub8 r6, r5, r4 ; calculate difference with reversed operands 58 usad8 r5, r6, lr ; calculate sum of negative differences 62 subs r8, r8, r5 ; subtract negative differences from sum 65 uxtb16 r5, r6 ; byte (two pixels) to halfwords 67 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) 72 ldr r5, [r2, #4] ; load 4 ref pixels 81 usub8 r6, r4, r5 ; calculate difference 83 usub8 r6, r5, r4 ; calculate difference with reversed operands [all …]
|
D | vp8_variance_halfpixvar16x16_h_armv6.asm | 41 ldr r5, [r2, #0] ; load 4 ref pixels 48 usub8 r6, r4, r5 ; calculate difference 51 usub8 r6, r5, r4 ; calculate difference with reversed operands 57 usad8 r5, r6, lr ; calculate sum of negative differences 61 subs r8, r8, r5 ; subtract negative differences from sum 64 uxtb16 r5, r6 ; byte (two pixels) to halfwords 66 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) 71 ldr r5, [r2, #4] ; load 4 ref pixels 80 usub8 r6, r4, r5 ; calculate difference 82 usub8 r6, r5, r4 ; calculate difference with reversed operands [all …]
|
D | simpleloopfilter_v6.asm | 64 ldr r5, [src] ; q0 174 pkhbt r10, r5, r6, lsl #16 176 ;transpose r7, r8, r9, r10 to r3, r4, r5, r6 177 TRANSPOSE_MATRIX r7, r8, r9, r10, r3, r4, r5, r6 181 uqsub8 r9, r4, r5 ; p0 - q0 182 uqsub8 r10, r5, r4 ; q0 - p0 201 eor r5, r5, r2 ; q0 offset to convert to a signed value 204 qsub8 r6, r5, r4 ; q0 - p0 230 qsub8 r5, r5, r3 ; u = q0 - Filter1 232 eor r5, r5, r2 ; *oq0 = u^0x80 [all …]
|
D | dc_only_idct_add_v6.asm | 35 uxtab16 r5, r0, r4 ; a1+2 | a1+0 39 usat16 r5, #8, r5 43 orr r5, r5, r4, lsl #8 46 str r5, [r3], r12 50 uxtab16 r5, r0, r4 54 usat16 r5, #8, r5 58 orr r5, r5, r4, lsl #8 60 str r5, [r3], r12
|
D | iwalsh_v6.asm | 27 ldr r5, [r0, #12] ; [7 | 6] 44 qadd16 r11, r5, r7 ; b1 [7+11 | 6+10] 45 qsub16 r12, r5, r7 ; c1 [7-11 | 6-10] 49 qadd16 r5, r12, lr ; c1 + d1 [7 | 6] 57 qsubaddx r12, r4, r5 ; [c1|a1] [5-6 | 4+7] 58 qaddsubx lr, r4, r5 ; [b1|d1] [5+6 | 4-7] 64 qaddsubx r5, lr, r12 ; [a2|d2] [b1+a1 | d1-c1] 69 qadd16 r5, r5, r10 ; [a2+3|d2+3] 82 asr r12, r5, #19 ; [4] 87 sxth r5, r5 [all …]
|
D | idct_v6.asm | 33 mov r5, #0x00004E00 ; cos 34 orr r5, r5, #0x0000007B ; cospi8sqrt2minus1 35 orr r5, r5, #1<<31 ; loop counter on top bit 42 smulbt r9, r5, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 43 smulbb r7, r5, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16 47 smulbt r11, r5, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16 53 smulbb r9, r5, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16 56 subs r5, r5, #1<<31 ; i-- 91 smulbt r9, r5, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 92 smulbt r7, r5, r0 ; (ip[1] * cospi8sqrt2minus1) >> 16 [all …]
|
D | copymem16x16_v6.asm | 38 ldrb r5, [r0, #1] 46 strb r5, [r2, #1] 51 ldrb r5, [r0, #5] 58 strb r5, [r2, #5] 63 ldrb r5, [r0, #9] 68 strb r5, [r2, #9] 73 ldrb r5, [r0, #13] 80 strb r5, [r2, #13] 87 ldrneb r5, [r0, #1] 102 ldr r5, [r0, #4] [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vpx_scale/arm/neon/ |
D | vp8_vpxyv12_extendframeborders_neon.asm | 37 sub r5, r1, #32 ; dest_ptr1 = src_ptr1 - Border 62 vst1.8 {q0, q1}, [r5], lr 64 vst1.8 {q4, q5}, [r5], lr 66 vst1.8 {q8, q9}, [r5], lr 68 vst1.8 {q12, q13}, [r5], lr 83 sub r5, r1, lr, asl #5 ; dest_ptr1 = src_ptr1 - (Border * plane_stride) 101 vst1.8 {q0, q1}, [r5]! 103 vst1.8 {q2, q3}, [r5]! 105 vst1.8 {q4, q5}, [r5]! 107 vst1.8 {q6, q7}, [r5]! [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/arm/neon/ |
D | vp9_copy_neon.asm | 19 push {r4-r5, lr} 20 ldrd r4, r5, [sp, #28] 39 subs r5, r5, #1 41 pop {r4-r5, pc} 50 subs r5, r5, #2 52 pop {r4-r5, pc} 61 subs r5, r5, #2 63 pop {r4-r5, pc} 72 subs r5, r5, #2 74 pop {r4-r5, pc} [all …]
|
D | vp9_avg_neon.asm | 20 ldrd r4, r5, [sp, #32] 47 subs r5, r5, #1 66 subs r5, r5, #2 83 subs r5, r5, #2 99 subs r5, r5, #2 111 subs r5, r5, #2
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/armv6/ |
D | vp8_short_fdct4x4_armv6.asm | 26 ldrd r4, r5, [r0] ; [i1 | i0] [i3 | i2] 32 ror r5, r5, #16 ; [i2 | i3] 34 qadd16 r6, r4, r5 ; [i1+i2 | i0+i3] = [b1 | a1] without shift 35 qsub16 r7, r4, r5 ; [i1-i2 | i0-i3] = [c1 | d1] without shift 44 smusd r5, r6, lr ; o2 = (i1+i2)*8 - (i0+i3)*8 52 pkhbt r6, r5, r7, lsl #4 ; [o3 | o2] 74 ldrd r4, r5, [r0] ; [i9 | i8] [i11 | i10] 82 ror r5, r5, #16 ; [i10 | i11] 84 qadd16 r6, r4, r5 ; [i9+i10 | i8+i11]=[b1 | a1] without shift 85 qsub16 r7, r4, r5 ; [i9-i10 | i8-i11]=[c1 | d1] without shift [all …]
|
D | vp8_mse16x16_armv6.asm | 39 ldr r5, [r0, #0x0] ; load 4 src pixels 44 usub8 r8, r5, r6 ; calculate difference 47 usub8 r9, r6, r5 ; calculate difference with reversed operands 52 usad8 r5, r7, lr ; calculate sum of positive differences 56 ldr r5, [r0, #0x4] ; load 4 src pixels 67 usub8 r8, r5, r6 ; calculate difference 69 usub8 r9, r6, r5 ; calculate difference with reversed operands 73 usad8 r5, r7, lr ; calculate sum of positive differences 76 ldr r5, [r0, #0x8] ; load 4 src pixels 86 usub8 r8, r5, r6 ; calculate difference [all …]
|
D | walsh_v6.asm | 27 ldrd r4, r5, [r0], r2 32 qadd16 r3, r4, r5 ; [d1|a1] [1+3 | 0+2] 33 qsub16 r4, r4, r5 ; [c1|b1] [1-3 | 0-2] 37 qadd16 r5, r6, r7 ; [d1|a1] [5+7 | 4+6] 61 lsls r2, r5, #16 62 smuad r12, r5, lr ; B0 = a1<<2 + d1<<2 105 smusd r5, r5, lr ; B3 = a1<<2 - d1<<2 107 add r7, r5, r9 ; d1_3 = B3 + D3 108 sub r5, r5, r9 ; c1_3 = B3 - D3 113 adds r9, r3, r5 ; b2 = b1_3 + c1_3 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/ppc/ |
D | sad_altivec.asm | 56 lvsl v3, 0, r5 ;# only needs to be done once per block 60 lvx v1, 0, r5 61 lvx v2, r10, r5 63 add r5, r5, r6 76 lvx v1, 0, r5 77 lvx v2, r10, r5 88 add r5, r5, r6 93 lvx v1, 0, r5 95 lvx v2, r10, r5 100 add r5, r5, r6 [all …]
|
D | variance_altivec.asm | 76 load_aligned_16 v5, r5, r10 80 add r5, r5, r6 109 load_aligned_16 v5, r5, r10 113 add r5, r5, r6 117 load_aligned_16 v0, r5, r10 121 add r5, r5, r6 152 ;# r5 unsigned char *ref_ptr 174 ;# r5 unsigned char *ref_ptr 195 ;# r5 unsigned char *ref_ptr 208 load_aligned_16 v5, r5, r10 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/neon/ |
D | buildintrapredictorsmby_neon.asm | 41 ldr r5, [sp, #28] ; Left 51 adds r7, r4, r5 73 cmp r5, #0 81 ldrb r5, [r0], r2 86 add r12, r12, r5 91 ldrb r5, [r0], r2 96 add r12, r12, r5 101 ldrb r5, [r0], r2 106 add r12, r12, r5 111 ldrb r5, [r0], r2 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/neon/ |
D | subtract_neon.asm | 30 ldr r5, [r0, #vp8_block_src_diff] 52 vst1.16 {d20}, [r5], r2 ;store diff 53 vst1.16 {d22}, [r5], r2 54 vst1.16 {d24}, [r5], r2 55 vst1.16 {d26}, [r5], r2 71 add r5, r0, #16 ; second diff pointer 93 vst1.16 {q9}, [r5], r6 95 vst1.16 {q11}, [r5], r6 97 vst1.16 {q13}, [r5], r6 99 vst1.16 {q15}, [r5], r6 [all …]
|
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/ppc/ |
D | encodemb_altivec.asm | 17 ;# r5 unsigned char *vsrc 74 lvsl v5, 0, r5 ;# permutate value for alignment 75 lvx v1, 0, r5 ;# src 78 add r5, r5, r7 86 lvsl v5, 0, r5 ;# permutate value for alignment 87 lvx v1, 0, r5 ;# src 89 add r5, r5, r7 114 ;# r5 unsigned char *pred 128 lvx v2, 0, r5 ;# pred 131 addi r5, r5, 16
|
/hardware/samsung_slsi/exynos5/libswconverter/ |
D | csc_tiled_to_linear_uv_deinterleave_neon.s | 62 @r5 i 81 mov r5, #0 83 mul r8, r11, r5 @ src_offset = tiled_width * i 88 mul r12, r12, r5 127 mul r8, r11, r5 @ src_offset = (tiled_width * i) + (j << 3) 131 mul r12, r12, r5 163 add r5, r5, #8 164 cmp r5, r9 173 mul r8, r11, r5 @ src_offset = (tiled_width * i) + (j << 3) 178 mul r12, r12, r5 [all …]
|
D | csc_tiled_to_linear_uv_neon.s | 59 @r5 i 78 mov r5, #0 80 mul r8, r11, r5 @ src_offset = tiled_width * i 86 mul r12, r2, r5 @ temp1 = width * i + j; 110 mul r8, r11, r5 @ src_offset = (tiled_width * i) + (j << 3); 114 mul r12, r2, r5 @ temp1 = width * i + j; 141 add r5, r5, #8 142 cmp r5, r9 151 mul r8, r11, r5 @ src_offset = (tiled_width * i) + (j << 3) 155 mul r12, r2, r5 @ temp1 = width * i + j; [all …]
|