Home
last modified time | relevance | path

Searched refs:r8 (Results 1 – 25 of 63) sorted by relevance

123

/hardware/samsung_slsi/exynos5/libswconverter/
Dcsc_tiled_to_linear_y_neon.s63 @r8 src_offset
81 mul r8, r11, r5 @ src_offset = tiled_width * i
83 add r8, r1, r8 @ src_offset = y_src + src_offset
85 pld [r8, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)]
86 vld1.8 {q0, q1}, [r8]!
87 vld1.8 {q2, q3}, [r8]!
88 pld [r8, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)]
89 vld1.8 {q4, q5}, [r8]!
90 vld1.8 {q6, q7}, [r8]!
92 pld [r8, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)]
[all …]
Dcsc_tiled_to_linear_uv_neon.s62 @r8 src_offset
80 mul r8, r11, r5 @ src_offset = tiled_width * i
82 add r8, r1, r8 @ src_offset = y_src + src_offset
84 pld [r8, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)]
85 vld1.8 {q0, q1}, [r8]!
87 vld1.8 {q2, q3}, [r8]!
89 pld [r8, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)]
90 vld1.8 {q4, q5}, [r8]!
92 vld1.8 {q6, q7}, [r8]!
110 mul r8, r11, r5 @ src_offset = (tiled_width * i) + (j << 3);
[all …]
Dcsc_tiled_to_linear_uv_deinterleave_neon.s65 @r8 src_offset
83 mul r8, r11, r5 @ src_offset = tiled_width * i
85 add r8, r2, r8 @ src_offset = uv_src + src_offset
90 pld [r8, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)]
91 vld2.8 {q0, q1}, [r8]!
93 vld2.8 {q2, q3}, [r8]!
95 pld [r8, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)]
96 vld2.8 {q4, q5}, [r8]!
98 vld2.8 {q6, q7}, [r8]!
127 mul r8, r11, r5 @ src_offset = (tiled_width * i) + (j << 3)
[all …]
Dcsc_interleave_memcpy_neon.s58 @r8 temp1
65 stmfd sp!, {r8-r12,r14} @ backup registers
111 ldrb r8, [r11], #1
113 strb r8, [r10], #1
119 ldmfd sp!, {r8-r12,r15} @ restore registers
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/armv6/
Ddequant_idct_v6.asm68 smulwb r8, r4, r6
71 pkhbt r8, r8, r10, lsl #16
81 usub16 r7, r8, r7
84 usub16 r8, r11, r14
87 uadd16 r6, r8, r7
88 usub16 r7, r8, r7
101 ldr r8, [r0], #4
105 smulwt lr, r3, r8
106 smulwt r10, r4, r8
107 pkhbt r11, r8, r6, lsl #16
[all …]
Dloopfilter_v6.asm92 uqsub8 r8, r10, r11 ; p2 - p1
96 orr r8, r8, r10 ; abs (p2-p1)
98 uqsub8 r8, r8, r2 ; compare to limit
100 orr lr, lr, r8
106 uqsub8 r8, r6, r3 ; compare to thresh -- save r8 for later
159 orr r10, r6, r8 ; calculate vp8_hevmask
167 ldr r8, [src], pstep ; p0
173 eor r8, r8, r12 ; p0 offset to convert to a signed value
178 str r8, [sp, #4] ; store ps0 temporarily
183 qsub8 r8, r9, r8 ; vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
[all …]
Dintra4x4_predict_v6.asm52 ldr r8, [r0] ; Above
57 usad8 r12, r8, r9
82 ldr r8, [r0] ; Above
92 uxtb16 r10, r8 ; a[2|0]
93 uxtb16 r11, r8, ror #8 ; a[3|1]
141 ldr r8, [r0] ; a[3|2|1|0]
148 uxtb16 r4, r8 ; a[2|0]
149 uxtb16 r5, r8, ror #8 ; a[3|1]
178 ldrb r8, [sp, #48] ; top_left
183 add r8, r8, r4 ; tl + l[0]
[all …]
Dfilter_v6.asm54 ldrb r8, [r0, #-2] ; load source data
62 pkhbt lr, r8, r9, lsl #16 ; r9 | r8
63 pkhbt r8, r9, r10, lsl #16 ; r10 | r9
69 smuad r8, r8, r4
74 smlad r8, r11, r5, r8
83 smlad r11, r10, r6, r8
88 ldrneb r8, [r0, #-2] ; load data for next loop
144 ldrb r8, [r0, #-2] ; load source data
152 pkhbt lr, r8, r9, lsl #16 ; r9 | r8
153 pkhbt r8, r9, r10, lsl #16 ; r10 | r9
[all …]
Dbilinearfilter_v6.asm52 ldrb r8, [r0, #2]
60 pkhbt r7, r7, r8, lsl #16 ; src[2] | src[1]
63 pkhbt r8, r8, r9, lsl #16 ; src[3] | src[2]
67 smuad r8, r8, r5
80 add r8, r8, #0x40 ; round_shift_and_clamp
83 usat r8, #16, r8, asr #7
86 strh r8, [r1], r3 ; result is transposed and stored
92 ldrneb r8, [r0, #2]
116 ldrb r8, [r0, #2]
123 strh r8, [r1], r3
[all …]
Dvp8_variance8x8_armv6.asm41 usub8 r8, r6, r7 ; calculate difference
43 sel r10, r8, lr ; select bytes with positive difference
46 sel r8, r9, lr ; select bytes with negative difference
50 usad8 r7, r8, lr ; calculate sum of negative differences
51 orr r8, r8, r10 ; differences of all 4 pixels
57 uxtb16 r7, r8 ; byte (two pixels) to halfwords
58 uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
66 usub8 r8, r6, r7 ; calculate difference
68 sel r10, r8, lr ; select bytes with positive difference
71 sel r8, r9, lr ; select bytes with negative difference
[all …]
Diwalsh_v6.asm30 ldr r8, [r0, #24] ; [13 | 12]
33 qadd16 r10, r2, r8 ; a1 [1+13 | 0+12]
36 qsub16 lr, r2, r8 ; d1 [1-13 | 0-12]
41 qsub16 r8, lr, r12 ; d1 - c1 [13 | 12]
95 qsubaddx r4, r8, r9 ; [c1|a1] [13-14 | 12+15]
96 qaddsubx r5, r8, r9 ; [b1|d1] [13+14 | 12-15]
100 qaddsubx r8, r4, r5 ; [b2|c2] [c1+d1 | a1-b1]
105 qadd16 r8, r8, r10 ; [b2+3|c2+3]
121 asr lr, r8, #19 ; [13]
123 sxth r8, r8
[all …]
Dvp8_sad16x16_armv6.asm39 ldr r8, [r2, #0x0] ; load 4 ref pixels (1A)
45 usada8 r4, r8, r6, r4 ; calculate sad for 4 pixels
46 usad8 r8, r7, r9 ; calculate sad for 4 pixels
58 usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels
62 add r4, r4, r8 ; add partial sad values
65 ldr r8, [r2, #0x0] ; load 4 ref pixels (2A)
70 usada8 r4, r6, r8, r4 ; calculate sad for 4 pixels
71 usad8 r8, r7, r9 ; calculate sad for 4 pixels
80 usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels
86 add r4, r4, r8 ; add partial sad values
Dvp8_variance16x16_armv6.asm32 mov r8, #0 ; initialize sum = 0
55 adds r8, r8, r4 ; add positive differences to sum
56 subs r8, r8, r5 ; subtract negative differences from sum
79 add r8, r8, r4 ; add positive differences to sum
80 sub r8, r8, r5 ; subtract negative differences from sum
103 add r8, r8, r4 ; add positive differences to sum
104 sub r8, r8, r5 ; subtract negative differences from sum
129 add r8, r8, r4 ; add positive differences to sum
130 sub r8, r8, r5 ; subtract negative differences from sum
145 mul r0, r8, r8 ; sum * sum
Dvp8_variance_halfpixvar16x16_v_armv6.asm32 mov r8, #0 ; initialize sum = 0
61 adds r8, r8, r4 ; add positive differences to sum
62 subs r8, r8, r5 ; subtract negative differences from sum
92 add r8, r8, r4 ; add positive differences to sum
93 sub r8, r8, r5 ; subtract negative differences from sum
123 add r8, r8, r4 ; add positive differences to sum
124 sub r8, r8, r5 ; subtract negative differences from sum
156 add r8, r8, r4 ; add positive differences to sum
157 sub r8, r8, r5 ; subtract negative differences from sum
172 mul r0, r8, r8 ; sum * sum
Dvp8_variance_halfpixvar16x16_h_armv6.asm32 mov r8, #0 ; initialize sum = 0
60 adds r8, r8, r4 ; add positive differences to sum
61 subs r8, r8, r5 ; subtract negative differences from sum
91 add r8, r8, r4 ; add positive differences to sum
92 sub r8, r8, r5 ; subtract negative differences from sum
122 add r8, r8, r4 ; add positive differences to sum
123 sub r8, r8, r5 ; subtract negative differences from sum
155 add r8, r8, r4 ; add positive differences to sum
156 sub r8, r8, r5 ; subtract negative differences from sum
170 mul r0, r8, r8 ; sum * sum
Dsixtappredict8x4_v6.asm55 ldrb r8, [r0, #-3]
62 pkhbt r7, r7, r8, lsl #16 ; r8 | r7
64 pkhbt r8, r8, r9, lsl #16 ; r9 | r8
73 smlad r11, r8, r4, r11 ; vp8_filter[2], vp8_filter[3]
96 movne r6, r8
98 movne r8, r10
152 ldr r8, [sp, #8]
159 smlad lr, r5, r8, lr
160 smladx r10, r11, r8, r10
177 movne r7, r8
[all …]
Didct_v6.asm45 smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16
49 pkhbt r8, r8, r10, lsl #16 ; 5s | 4s
63 usub16 r7, r8, r7 ; c
66 usub16 r8, r11, r14 ; b
70 uadd16 r6, r8, r7 ; b+c
71 usub16 r7, r8, r7 ; b-c
94 smulwt r8, r4, r0 ; (ip[1] * sinpi8sqrt2) >> 16
99 pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1
118 usub16 r12, r8, r6 ; c (o1 | o5)
122 mov r8, #4 ; set up 4's
Dsimpleloopfilter_v6.asm176 ;transpose r7, r8, r9, r10 to r3, r4, r5, r6
177 TRANSPOSE_MATRIX r7, r8, r9, r10, r3, r4, r5, r6
180 uqsub8 r8, r6, r3 ; q1 - p1
183 orr r7, r7, r8 ; abs(p1 - q1)
185 mov r8, #0
187 uhadd8 r7, r7, r8 ; abs(p1 - q1) / 2
192 sel lr, r10, r8 ; filter mask
219 shadd8 r9 , r9 , r8
220 shadd8 r3 , r3 , r8
221 shadd8 r9 , r9 , r8
[all …]
Dvp8_variance_halfpixvar16x16_hv_armv6.asm32 mov r8, #0 ; initialize sum = 0
71 adds r8, r8, r4 ; add positive differences to sum
72 subs r8, r8, r5 ; subtract negative differences from sum
113 add r8, r8, r4 ; add positive differences to sum
114 sub r8, r8, r5 ; subtract negative differences from sum
155 add r8, r8, r4 ; add positive differences to sum
156 sub r8, r8, r5 ; subtract negative differences from sum
197 add r8, r8, r4 ; add positive differences to sum
198 sub r8, r8, r5 ; subtract negative differences from sum
211 mul r0, r8, r8 ; sum * sum
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/armv6/
Dvp8_mse16x16_armv6.asm44 usub8 r8, r5, r6 ; calculate difference
46 sel r7, r8, lr ; select bytes with positive difference
49 sel r8, r9, lr ; select bytes with negative difference
53 usad8 r6, r8, lr ; calculate sum of negative differences
54 orr r8, r8, r7 ; differences of all 4 pixels
59 uxtb16 r6, r8 ; byte (two pixels) to halfwords
60 uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
67 usub8 r8, r5, r6 ; calculate difference
68 sel r7, r8, lr ; select bytes with positive difference
70 sel r8, r9, lr ; select bytes with negative difference
[all …]
Dvp8_subtract_armv6.asm38 ldr r8, [r1, #vp8_blockd_predictor]
45 ldr r1, [r8], r2 ; pred
93 uxtb16 r8, r6 ; [s2 | s0] (A)
98 usub16 r6, r8, r9 ; [d2 | d0] (A)
104 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
107 str r8, [r0], #4 ; diff (A)
108 uxtb16 r8, r10 ; [s2 | s0] (B)
115 usub16 r6, r8, r9 ; [d2 | d0] (B)
121 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
124 str r8, [r0], #4 ; diff (B)
[all …]
Dvp8_short_fdct4x4_armv6.asm49 ldrd r8, r9, [r0] ; [i5 | i4] [i7 | i6]
59 qadd16 r6, r8, r9 ; [i5+i6 | i4+i7] = [b1 | a1] without shift
60 qsub16 r7, r8, r9 ; [i5-i6 | i4-i7] = [c1 | d1] without shift
69 smusd r8, r6, lr ; o6 = (i5+i6)*8 - (i4+i7)*8
77 pkhbt r6, r8, r7, lsl #4 ; [o7 | o6]
94 smusd r8, r6, lr ; o10 = (i9+i10)*8 - (i8+i11)*8
102 pkhbt r6, r8, r7, lsl #4 ; [o11 | o10]
147 lsl r8, r2, #16 ; prepare bottom halfword for scaling
151 pkhtb r4, r2, r8, asr #20 ; pack and scale bottom halfword
159 smlabb r8, r7, r12, r2 ; [ ------ | d1*5352]
[all …]
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/arm/neon/
Dvp9_mb_lpf_neon.asm28 push {r4-r8, lr}
38 sub r8, r0, r1, lsl #3 ; move src pointer down by 8 lines
40 vld1.u8 {d0}, [r8@64], r1 ; p7
41 vld1.u8 {d1}, [r8@64], r1 ; p6
42 vld1.u8 {d2}, [r8@64], r1 ; p5
43 vld1.u8 {d3}, [r8@64], r1 ; p4
44 vld1.u8 {d4}, [r8@64], r1 ; p3
45 vld1.u8 {d5}, [r8@64], r1 ; p2
46 vld1.u8 {d6}, [r8@64], r1 ; p1
47 vld1.u8 {d7}, [r8@64], r1 ; p0
[all …]
Dvp9_convolve8_avg_neon.asm69 sub r8, r1, r1, lsl #2 ; -src_stride * 3
70 add r8, r8, #4 ; -src_stride * 3 + 4
85 vld1.8 {d27}, [r0], r8
110 vld1.32 {d30[]}, [r0], r8
142 pld [r5, -r8]
191 push {r4-r8, lr}
210 add r8, r2, r3, asr #1
238 vld1.u32 {d6[1]}, [r8@32], r3
240 vld1.u32 {d7[1]}, [r8@32], r3
254 pld [r8]
[all …]
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vpx_scale/arm/neon/
Dvp8_vpxyv12_copyframe_func_neon.asm31 ldr r8, [r0, #yv12_buffer_config_u_buffer] ;srcptr1
43 str r8, [sp]
52 mov r8, r2
60 vld1.8 {q0, q1}, [r8]!
62 vld1.8 {q2, q3}, [r8]!
64 vld1.8 {q4, q5}, [r8]!
66 vld1.8 {q6, q7}, [r8]!
112 mov r8, r2
120 vld1.8 {q0, q1}, [r8]!
122 vld1.8 {q2, q3}, [r8]!
[all …]

123