Home
last modified time | relevance | path

Searched refs:q0 (Results 1 – 25 of 65) sorted by relevance

123

/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/arm/neon/
Dvp9_reconintra_neon.asm76 vld1.8 {q0}, [r2]
77 vst1.8 {q0}, [r0], r1
78 vst1.8 {q0}, [r0], r1
79 vst1.8 {q0}, [r0], r1
80 vst1.8 {q0}, [r0], r1
81 vst1.8 {q0}, [r0], r1
82 vst1.8 {q0}, [r0], r1
83 vst1.8 {q0}, [r0], r1
84 vst1.8 {q0}, [r0], r1
85 vst1.8 {q0}, [r0], r1
[all …]
Dvp9_idct16x16_1_add_neon.asm47 vdup.s16 q0, r0 ; duplicate a1
61 vaddw.u8 q9, q0, d2 ; dest[x] + a1
62 vaddw.u8 q10, q0, d3 ; dest[x] + a1
63 vaddw.u8 q11, q0, d4 ; dest[x] + a1
64 vaddw.u8 q12, q0, d5 ; dest[x] + a1
74 vaddw.u8 q9, q0, d6 ; dest[x] + a1
75 vaddw.u8 q10, q0, d7 ; dest[x] + a1
76 vaddw.u8 q11, q0, d16 ; dest[x] + a1
77 vaddw.u8 q12, q0, d17 ; dest[x] + a1
97 vaddw.u8 q9, q0, d2 ; dest[x] + a1
[all …]
Dvp9_avg_neon.asm36 vld1.8 {q0-q1}, [r0]!
41 vrhadd.u8 q0, q0, q8
45 vst1.8 {q0-q1}, [r2@128]!
52 vld1.8 {q0-q1}, [r0], r1
57 vrhadd.u8 q0, q0, q8
64 vst1.8 {q0-q1}, [r2@128], r3
71 vld1.8 {q0}, [r0], r1
77 vrhadd.u8 q0, q0, q2
81 vst1.8 {q0}, [r2@128], r3
94 vrhadd.u8 q0, q0, q1
Dvp9_idct32x32_add_neon.asm358 ; into q0-q7 and the second one into q8-q15. There is a stride of 64,
361 vld1.s16 {q0}, [r3]!
397 vtrn.32 q0, q2
405 vtrn.16 q0, q1
420 vst1.16 {q0}, [r0]!
481 vadd.s16 q4, q0, q1
482 vsub.s16 q13, q0, q1
518 vsub.s16 q14, q1, q0
519 vadd.s16 q2, q1, q0
538 vadd.s16 q9, q5, q0
[all …]
Dvp9_idct8x8_1_add_neon.asm47 vdup.s16 q0, r0 ; duplicate a1
59 vaddw.u8 q9, q0, d2 ; dest[x] + a1
60 vaddw.u8 q10, q0, d3 ; dest[x] + a1
61 vaddw.u8 q11, q0, d4 ; dest[x] + a1
62 vaddw.u8 q12, q0, d5 ; dest[x] + a1
72 vaddw.u8 q9, q0, d6 ; dest[x] + a1
73 vaddw.u8 q10, q0, d7 ; dest[x] + a1
74 vaddw.u8 q11, q0, d16 ; dest[x] + a1
75 vaddw.u8 q12, q0, d17 ; dest[x] + a1
Dvp9_idct16x16_add_neon.asm47 ; will be stored back into q8-q15 registers. This function will touch q0-q7
155 vmull.s16 q0, d24, d30
166 vadd.s32 q3, q2, q0
170 vsub.s32 q13, q2, q0
183 vmull.s16 q0, d20, d31
191 vmlal.s16 q0, d28, d30
199 vqrshrn.s32 d22, q0, #14 ; >> 14
216 vadd.s16 q0, q8, q11 ; step1[0] = step2[0] + step2[3];
248 vadd.s16 q8, q0, q15 ; step2[0] = step1[0] + step1[7];
255 vsub.s16 q15, q0, q15 ; step2[7] = step1[0] - step1[7];
[all …]
Dvp9_copy_neon.asm35 vld1.8 {q0-q1}, [r0]!
37 vst1.8 {q0-q1}, [r2@128]!
45 vld1.8 {q0-q1}, [r0], r1
48 vst1.8 {q0-q1}, [r2@128], r3
56 vld1.8 {q0}, [r0], r1
59 vst1.8 {q0}, [r2@128], r3
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/neon/
Dbuildintrapredictorsmby_neon.asm45 vdup.u8 q0, r12
126 vdup.u8 q0, r5
129 vst1.u8 {q0}, [r1]!
130 vst1.u8 {q0}, [r1]!
131 vst1.u8 {q0}, [r1]!
132 vst1.u8 {q0}, [r1]!
133 vst1.u8 {q0}, [r1]!
134 vst1.u8 {q0}, [r1]!
135 vst1.u8 {q0}, [r1]!
136 vst1.u8 {q0}, [r1]!
[all …]
Dloopfiltersimplehorizontaledge_neon.asm28 vld1.u8 {q7}, [r0@128], r1 ; q0
33 vabd.u8 q15, q6, q7 ; abs(p0 - q0)
36 vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
38 vmov.u8 q0, #0x80 ; 0x80
40 vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
42 veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
43 veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
44 veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
45 veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
47 vcge.u8 q15, q1, q15 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > limit)*-1
[all …]
Dvp8_subpixelvariance16x16s_neon.asm55 vext.8 q1, q0, q1, #1 ;construct src_ptr[1]
60 vrhadd.u8 q0, q0, q1 ;(src_ptr[0]+src_ptr[1])/round/shift right 1
70 vsubl.u8 q0, d4, d26
91 vpadal.s16 q8, q0 ;sum
107 vpaddl.s32 q0, q8 ;accumulate sum
137 vld1.u8 {q0}, [r0], r1 ;load src data
153 vrhadd.u8 q0, q0, q2
163 vsubl.u8 q0, d8, d10
184 vpadal.s16 q8, q0 ;sum
194 vmov q0, q15
[all …]
Dsad16_neon.asm27 vld1.8 {q0}, [r0], r1
49 vld1.8 {q0}, [r0], r1
74 vld1.8 {q0}, [r0], r1
99 vld1.8 {q0}, [r0], r1
126 vadd.u16 q0, q12, q13
128 vpaddl.u16 q1, q0
129 vpaddl.u32 q0, q1
146 vld1.8 {q0}, [r0], r1
167 vld1.8 {q0}, [r0], r1
194 vadd.u16 q0, q12, q13
[all …]
Dloopfiltersimpleverticaledge_neon.asm53 vabd.u8 q15, q5, q4 ; abs(p0 - q0)
56 vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
58 vmov.u8 q0, #0x80 ; 0x80
60 vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
62 veor q4, q4, q0 ; qs0: q0 offset to convert to a signed value
63 veor q5, q5, q0 ; ps0: p0 offset to convert to a signed value
64 veor q3, q3, q0 ; ps1: p1 offset to convert to a signed value
65 veor q6, q6, q0 ; qs1: q1 offset to convert to a signed value
67 vcge.u8 q15, q1, q15 ; abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
100 veor q6, q11, q0 ; *op0 = u^0x80
[all …]
Diwalsh_neon.asm22 vld1.i16 {q0-q1}, [r0@128]
30 vadd.s16 q0, q2, q3 ; a+b d+c
52 vadd.s16 q0, q2, q3 ; a+b d+c
55 vadd.i16 q0, q0, q8 ;e/f += 3
58 vshr.s16 q0, q0, #3 ;e/f >> 3
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/
Dvp9_loopfilter_filters.c24 uint8_t q0, uint8_t q1, in filter_mask() argument
30 mask |= (abs(q1 - q0) > limit) * -1; in filter_mask()
33 mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; in filter_mask()
40 uint8_t q0, uint8_t q1, in flat_mask4() argument
44 mask |= (abs(q1 - q0) > thresh) * -1; in flat_mask4()
46 mask |= (abs(q2 - q0) > thresh) * -1; in flat_mask4()
48 mask |= (abs(q3 - q0) > thresh) * -1; in flat_mask4()
55 uint8_t p0, uint8_t q0, in flat_mask5() argument
58 int8_t mask = ~flat_mask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3); in flat_mask5()
60 mask |= (abs(q4 - q0) > thresh) * -1; in flat_mask5()
[all …]
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/mips/dspr2/
Dvp9_loopfilter_masks_dspr2.h30 uint32_t q0, uint32_t q1, in vp9_filter_hev_mask_dspr2() argument
94 [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0), in vp9_filter_hev_mask_dspr2()
124 : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3), in vp9_filter_hev_mask_dspr2()
137 uint32_t q0, uint32_t q1, in vp9_filter_hev_mask_flatmask4_dspr2() argument
242 [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0), in vp9_filter_hev_mask_flatmask4_dspr2()
273 : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3), in vp9_filter_hev_mask_flatmask4_dspr2()
284 uint32_t p0, uint32_t q0, in vp9_flatmask5() argument
361 [p1] "r" (p1), [p0] "r" (p0), [q0] "r" (q0), [q1] "r" (q1), in vp9_flatmask5()
Dvp9_mbloop_loopfilter_dspr2.c36 uint32_t p3, p2, p1, p0, q0, q1, q2, q3; in vp9_lpf_horizontal_8_dspr2() local
79 [q3] "=&r" (q3), [q2] "=&r" (q2), [q1] "=&r" (q1), [q0] "=&r" (q0) in vp9_lpf_horizontal_8_dspr2()
85 p1, p0, p3, p2, q0, q1, q2, q3, in vp9_lpf_horizontal_8_dspr2()
89 vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1, in vp9_lpf_horizontal_8_dspr2()
127 [q0] "r" (q0), [q1] "r" (q1), [q2] "r" (q2), in vp9_lpf_horizontal_8_dspr2()
133 vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1, in vp9_lpf_horizontal_8_dspr2()
232 [q0] "+r" (q0), [q1] "+r" (q1), [q2] "+r" (q2), in vp9_lpf_horizontal_8_dspr2()
334 uint32_t p3, p2, p1, p0, q3, q2, q1, q0; in vp9_lpf_vertical_8_dspr2() local
374 [q0] "=&r" (q0), [q1] "=&r" (q1), [q2] "=&r" (q2), [q3] "=&r" (q3) in vp9_lpf_vertical_8_dspr2()
449 [q3] "+r" (q3), [q2] "+r" (q2), [q1] "+r" (q1), [q0] "+r" (q0), in vp9_lpf_vertical_8_dspr2()
[all …]
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/x86/
Dloopfilter_mmx.asm68 movq mm4, [rsi] ; q0
69 movq mm0, mm4 ; q0
70 psubusb mm4, mm3 ; q0-=q1
71 psubusb mm3, mm0 ; q1-=q0
72 por mm4, mm3 ; abs(q0-q1)
118 movq mm3, [rsi] ; q0
119 psubusb mm5, mm3 ; p0-=q0
120 psubusb mm3, mm6 ; q0-=p0
121 por mm5, mm3 ; abs(p0 - q0)
122 paddusb mm5, mm5 ; abs(p0-q0)*2
[all …]
Dloopfilter_sse2.asm33 movdqa xmm5, [rsi] ; q0
39 movlps xmm5, [rsi + rax] ; q0
66 movdqa xmm0, xmm5 ; q0
69 psubusb xmm5, xmm3 ; q0-=q1
70 psubusb xmm3, xmm0 ; q1-=q0
72 por xmm5, xmm3 ; abs(q0-q1)
141 movdqa xmm3, xmm0 ; q0
147 psubusb xmm5, xmm3 ; p0-=q0
148 psubusb xmm3, xmm6 ; q0-=p0
149 por xmm5, xmm3 ; abs(p0 - q0)
[all …]
/hardware/samsung_slsi/exynos5/libswconverter/
Dcsc_ARGB8888_to_YUV420SP_NEON.s34 @q0: temp1, R
84 vmls.u16 q8,q4,q11 @q0:U -(38 * R[k]) @128<<6+ 32 + u>>2
90 vmls.u16 q7,q5,q14 @q0:U -(94 * G[k]) @128<<6+ 32 + v>>2
106 vmul.u16 q7,q4,q14 @q0 = 66 *R[k]
107 vmla.u16 q7,q5,q15 @q0 += 129 *G[k]
108 vmla.u16 q7,q6,q8 @q0 += 25 *B[k]
124 vmul.u16 q0,q4,q14 @q0 = 66 *R[k]
125 vmla.u16 q0,q5,q15 @q0 += 129 *G[k]
126 vmla.u16 q0,q6,q8 @q0 += 25 *B[k]
127 vadd.u16 q0,q0,q10
[all …]
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp9/common/x86/
Dvp9_loopfilter_mmx.asm68 movq mm4, [rsi] ; q0
69 movq mm0, mm4 ; q0
70 psubusb mm4, mm3 ; q0-=q1
71 psubusb mm3, mm0 ; q1-=q0
72 por mm4, mm3 ; abs(q0-q1)
118 movq mm3, [rsi] ; q0
119 psubusb mm5, mm3 ; p0-=q0
120 psubusb mm3, mm6 ; q0-=p0
121 por mm5, mm3 ; abs(p0 - q0)
122 paddusb mm5, mm5 ; abs(p0-q0)*2
[all …]
Dvp9_loopfilter_intrin_sse2.c388 __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4; in mb_lpf_horizontal_edge_w_sse2_16() local
397 q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); in mb_lpf_horizontal_edge_w_sse2_16()
412 _mm_store_si128((__m128i *)&aq[0 * 16], q0); in mb_lpf_horizontal_edge_w_sse2_16()
418 const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), in mb_lpf_horizontal_edge_w_sse2_16()
419 _mm_subs_epu8(q0, q1)); in mb_lpf_horizontal_edge_w_sse2_16()
422 __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), in mb_lpf_horizontal_edge_w_sse2_16()
423 _mm_subs_epu8(q0, p0)); in mb_lpf_horizontal_edge_w_sse2_16()
465 __m128i qs0 = _mm_xor_si128(q0, t80); in mb_lpf_horizontal_edge_w_sse2_16()
514 _mm_or_si128(_mm_subs_epu8(q2, q0), in mb_lpf_horizontal_edge_w_sse2_16()
515 _mm_subs_epu8(q0, q2))); in mb_lpf_horizontal_edge_w_sse2_16()
[all …]
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/encoder/arm/neon/
Dfastquantizeb_neon.asm33 vld1.16 {q0, q1}, [r4@128] ; load z
37 vabs.s16 q4, q0 ; calculate x = abs(z)
41 vshr.s16 q2, q0, #15 ; sz
52 vld1.16 {q0, q1}, [r4@128] ; load z2
57 vabs.s16 q10, q0 ; calculate x2 = abs(z_2)
59 vshr.s16 q12, q0, #15 ; sz2
119 vand q0, q6, q14 ; get all valid numbers from scan array
127 vmax.u16 q0, q0, q1 ; find maximum value in q0, q1
135 vmovl.u16 q0, d0
164 vld1.16 {q0, q1}, [r3@128] ; load z
[all …]
Dshortfdct_neon.asm105 vst1.16 {q0, q1}, [r1@128]
116 vld1.16 {q0}, [r0@128], r2
124 ; transpose q0=ip[0], q1=ip[1], q2=ip[2], q3=ip[3]
125 vtrn.32 q0, q2 ; [A0|B0]
127 vtrn.16 q0, q1 ; [A2|B2]
130 vadd.s16 q11, q0, q3 ; a1 = ip[0] + ip[3]
133 vsub.s16 q14, q0, q3 ; d1 = ip[0] - ip[3]
140 vadd.s16 q0, q11, q12 ; [A0 | B0] = a1 + b1
165 ; transpose q0=ip[0], q1=ip[4], q2=ip[8], q3=ip[12]
166 vtrn.32 q0, q2 ; q0=[A0 | B0]
[all …]
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vp8/common/arm/armv6/
Dsimpleloopfilter_v6.asm64 ldr r5, [src] ; q0
181 uqsub8 r9, r4, r5 ; p0 - q0
182 uqsub8 r10, r5, r4 ; q0 - p0
184 orr r9, r9, r10 ; abs(p0 - q0)
186 uqadd8 r9, r9, r9 ; abs(p0 - q0) * 2
188 uqadd8 r7, r7, r9 ; abs(p0 - q0)*2 + abs(p1 - q1)/2
201 eor r5, r5, r2 ; q0 offset to convert to a signed value
204 qsub8 r6, r5, r4 ; q0 - p0
206 qadd8 r3, r3, r6 ; vp8_filter += q0 - p0
209 qadd8 r3, r3, r6 ; vp8_filter += q0 - p0
[all …]
/hardware/intel/common/omx-components/videocodec/libvpx_internal/libvpx/vpx_scale/arm/neon/
Dvp8_vpxyv12_extendframeborders_neon.asm51 vmov q1, q0
62 vst1.8 {q0, q1}, [r5], lr
87 vld1.8 {q0, q1}, [r1]!
101 vst1.8 {q0, q1}, [r5]!
167 vst1.8 {q0}, [r5], lr
198 vld1.8 {q0, q1}, [r1]!
208 vst1.8 {q0, q1}, [r5]!
240 vld1.8 {q0}, [r1]!
248 vst1.8 {q0}, [r5], lr
250 vst1.8 {q0}, [r5], lr
[all …]

123