• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1@/******************************************************************************
2@ *
3@ * Copyright (C) 2015 The Android Open Source Project
4@ *
5@ * Licensed under the Apache License, Version 2.0 (the "License");
6@ * you may not use this file except in compliance with the License.
7@ * You may obtain a copy of the License at:
8@ *
9@ * http://www.apache.org/licenses/LICENSE-2.0
10@ *
11@ * Unless required by applicable law or agreed to in writing, software
12@ * distributed under the License is distributed on an "AS IS" BASIS,
13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@ * See the License for the specific language governing permissions and
15@ * limitations under the License.
16@ *
17@ *****************************************************************************
18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19@*/
20
21@/*
22@//----------------------------------------------------------------------------
23@// File Name            : impeg2_format_conv.s
24@//
25@// Description          : This file has the Idct Implementations for the
26@//                        MPEG4 SP decoder on neon platform.
27@//
28@// Reference Document   :
29@//
30@// Revision History     :
31@//      Date            Author                  Detail Description
32@//   ------------    ----------------    ----------------------------------
33@//   Jul 07, 2008     Naveen Kumar T                Created
34@//
35@//-------------------------------------------------------------------------
36@*/
37
38@/*
39@// ----------------------------------------------------------------------------
40@// Include Files
41@// ----------------------------------------------------------------------------
42@*/
43.text
44.p2align 2
45.equ log2_16 ,  4
46.equ log2_2  ,  1
47@/*
48@// ----------------------------------------------------------------------------
49@// Struct/Union Types and Define
50@// ----------------------------------------------------------------------------
51@*/
52
53@/*
54@// ----------------------------------------------------------------------------
55@// Static Global Data section variables
56@// ----------------------------------------------------------------------------
57@*/
58@//--------------------------- NONE --------------------------------------------
59
60@/*
61@// ----------------------------------------------------------------------------
62@// Static Prototype Functions
63@// ----------------------------------------------------------------------------
64@*/
65@// -------------------------- NONE --------------------------------------------
66
67@/*
68@// ----------------------------------------------------------------------------
69@// Exported functions
70@// ----------------------------------------------------------------------------
71@*/
72
73@/*****************************************************************************
74@*                                                                            *
75@*  Function Name    : impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q()                      *
76@*                                                                            *
77@*  Description      : This function conversts the image from YUV420P color   *
78@*                     space to 420SP color space(UV interleaved).        *
79@*                                                                            *
80@*  Arguments        : R0           pu1_y                                     *
81@*                     R1           pu1_u                                     *
82@*                     R2           pu1_v                                     *
83@*                     R3           pu1_dest_y                                *
84@*                     [R13 #40]    pu1_dest_uv                               *
85@*                     [R13 #44]    u2_height                                 *
86@*                     [R13 #48]    u2_width                                  *
87@*                     [R13 #52]    u2_stridey                                *
88@*                     [R13 #56]    u2_strideu                                *
89@*                     [R13 #60]    u2_stridev                                *
90@*                     [R13 #64]    u2_dest_stride_y                          *
91@*                     [R13 #68]    u2_dest_stride_uv                         *
92@*                     [R13 #72]    convert_uv_only                           *
93@*                                                                            *
94@*  Values Returned  : None                                                   *
95@*                                                                            *
96@*  Register Usage   : R0 - R8, Q0                                            *
97@*                                                                            *
98@*  Stack Usage      : 24 Bytes                                               *
99@*                                                                            *
100@*  Interruptibility : Interruptible                                          *
101@*                                                                            *
102@*  Known Limitations                                                         *
103@*       Assumptions: Image Width:     Assumed to be multiple of 16 and       *
104@*                     greater than or equal to 16                *
105@*                     Image Height:    Assumed to be even.                   *
106@*                                                                            *
107@*  Revision History :                                                        *
108@*         DD MM YYYY   Author(s)       Changes (Describe the changes made)   *
109@*         07 06 2010   Varshita        Draft                                 *
110@*         07 06 2010   Naveen Kr T     Completed                             *
111@*                                                                            *
112@*****************************************************************************/
113                .global impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q
114impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q:
115
116    @// push the registers on the stack
117    stmfd           sp!, {r4-r8, lr}
118
119    ldr             r4, [sp, #56]       @// Load convert_uv_only
120
121    cmp             r4, #1
122    beq             yuv420sp_uv_chroma
123    @/* Do the preprocessing before the main loops start */
124    @// Load the parameters from stack
125    ldr             r4, [sp, #28]       @// Load u2_height from stack
126
127    ldr             r5, [sp, #32]       @// Load u2_width from stack
128
129    ldr             r7, [sp, #36]       @// Load u2_stridey from stack
130
131    ldr             r8, [sp, #48]       @// Load u2_dest_stride_y from stack
132
133    sub             r7, r7, r5          @// Source increment
134
135    sub             r8, r8, r5          @// Destination increment
136
137
138yuv420sp_uv_row_loop_y:
139    mov             r6, r5
140
141yuv420sp_uv_col_loop_y:
142    pld             [r0, #128]
143    vld1.8          {q0}, [r0]!
144    vst1.8          {q0}, [r3]!
145    sub             r6, r6, #16
146    cmp             r6, #15
147    bgt             yuv420sp_uv_col_loop_y
148
149    cmp             r6, #0
150    beq             yuv420sp_uv_row_loop_end_y
151    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
152    @//Ex if width is 162, above loop will process 160 pixels. And
153    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
154    @// and written using VLD1 and VST1
155    rsb             r6, r6, #16
156    sub             r0, r0, r6
157    sub             r3, r3, r6
158
159    vld1.8          {q0}, [r0]!
160    vst1.8          {q0}, [r3]!
161
162yuv420sp_uv_row_loop_end_y:
163    add             r0, r0, r7
164    add             r3, r3, r8
165    subs            r4, r4, #1
166    bgt             yuv420sp_uv_row_loop_y
167
168yuv420sp_uv_chroma:
169
170    ldr             r3, [sp, #24]       @// Load pu1_dest_uv from stack
171
172    ldr             r4, [sp, #28]       @// Load u2_height from stack
173    add             r4, r4, 1
174
175    ldr             r5, [sp, #32]       @// Load u2_width from stack
176    add             r5, r5, 1
177    bic             r5, r5, #1
178
179    ldr             r7, [sp, #40]       @// Load u2_strideu from stack
180
181    ldr             r8, [sp, #52]       @// Load u2_dest_stride_uv from stack
182
183    sub             r7, r7, r5, lsr #1  @// Source increment
184
185    sub             r8, r8, r5          @// Destination increment
186
187    mov             r5, r5, lsr #1
188    mov             r4, r4, lsr #1
189    ldr             r3, [sp, #24]       @// Load pu1_dest_uv from stack
190yuv420sp_uv_row_loop_uv:
191    mov             r6, r5
192
193
194yuv420sp_uv_col_loop_uv:
195    pld             [r1, #128]
196    pld             [r2, #128]
197    vld1.8          d0, [r1]!
198    vld1.8          d1, [r2]!
199    vst2.8          {d0, d1}, [r3]!
200    sub             r6, r6, #8
201    cmp             r6, #7
202    bgt             yuv420sp_uv_col_loop_uv
203
204    cmp             r6, #0
205    beq             yuv420sp_uv_row_loop_end_uv
206    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
207    @//Ex if width is 162, above loop will process 160 pixels. And
208    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
209    @// and written using VLD1 and VST1
210    rsb             r6, r6, #8
211    sub             r1, r1, r6
212    sub             r2, r2, r6
213    sub             r3, r3, r6, lsl #1
214
215    vld1.8          d0, [r1]!
216    vld1.8          d1, [r2]!
217    vst2.8          {d0, d1}, [r3]!
218
219yuv420sp_uv_row_loop_end_uv:
220    add             r1, r1, r7
221    add             r2, r2, r7
222    add             r3, r3, r8
223    subs            r4, r4, #1
224    bgt             yuv420sp_uv_row_loop_uv
225    @//POP THE REGISTERS
226    ldmfd           sp!, {r4-r8, pc}
227
228
229
230
231
232@/*****************************************************************************
233@*                                                                            *
234@*  Function Name    : impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q()                      *
235@*                                                                            *
236@*  Description      : This function conversts the image from YUV420P color   *
237@*                     space to 420SP color space(VU interleaved).        *
238@*             This function is similar to above function         *
239@*             IMP4D_CXA8_YUV420toYUV420SP_VU with a difference in   *
240@*             VLD1.8 for chroma - order of registers is different    *
241@*                                                                            *
242@*  Arguments        : R0           pu1_y                                     *
243@*                     R1           pu1_u                                     *
244@*                     R2           pu1_v                                     *
245@*                     R3           pu1_dest_y                                *
246@*                     [R13 #40]    pu1_dest_uv                               *
247@*                     [R13 #44]    u2_height                                 *
248@*                     [R13 #48]    u2_width                                  *
249@*                     [R13 #52]    u2_stridey                                *
250@*                     [R13 #56]    u2_strideu                                *
251@*                     [R13 #60]    u2_stridev                                *
252@*                     [R13 #64]    u2_dest_stride_y                          *
253@*                     [R13 #68]    u2_dest_stride_uv                         *
254@*                     [R13 #72]    convert_uv_only                           *
255@*                                                                            *
256@*  Values Returned  : None                                                   *
257@*                                                                            *
258@*  Register Usage   : R0 - R8, Q0                                            *
259@*                                                                            *
260@*  Stack Usage      : 24 Bytes                                               *
261@*                                                                            *
262@*  Interruptibility : Interruptible                                          *
263@*                                                                            *
264@*  Known Limitations                                                         *
265@*       Assumptions: Image Width:     Assumed to be multiple of 16 and       *
266@*                     greater than or equal to 16                *
267@*                     Image Height:    Assumed to be even.                   *
268@*                                                                            *
269@*  Revision History :                                                        *
270@*         DD MM YYYY   Author(s)       Changes (Describe the changes made)   *
271@*         07 06 2010   Varshita        Draft                                 *
272@*         07 06 2010   Naveen Kr T     Completed                             *
273@*                                                                            *
274@*****************************************************************************/
275
276                .global impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q
277impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q:
278
279    @// push the registers on the stack
280    stmfd           sp!, {r4-r8, lr}
281
282    ldr             r4, [sp, #56]       @// Load convert_uv_only
283
284    cmp             r4, #1
285    beq             yuv420sp_vu_chroma
286
287    @/* Do the preprocessing before the main loops start */
288    @// Load the parameters from stack
289    ldr             r4, [sp, #28]       @// Load u2_height from stack
290
291    ldr             r5, [sp, #32]       @// Load u2_width from stack
292
293    ldr             r7, [sp, #36]       @// Load u2_stridey from stack
294
295    ldr             r8, [sp, #48]       @// Load u2_dest_stride_y from stack
296
297    sub             r7, r7, r5          @// Source increment
298
299    sub             r8, r8, r5          @// Destination increment
300
301
302yuv420sp_vu_row_loop_y:
303    mov             r6, r5
304
305yuv420sp_vu_col_loop_y:
306    pld             [r0, #128]
307    vld1.8          {q0}, [r0]!
308    vst1.8          {q0}, [r3]!
309    sub             r6, r6, #16
310    cmp             r6, #15
311    bgt             yuv420sp_vu_col_loop_y
312
313    cmp             r6, #0
314    beq             yuv420sp_vu_row_loop_end_y
315    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
316    @//Ex if width is 162, above loop will process 160 pixels. And
317    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
318    @// and written using VLD1 and VST1
319    rsb             r6, r6, #16
320    sub             r0, r0, r6
321    sub             r3, r3, r6
322
323    vld1.8          {q0}, [r0]!
324    vst1.8          {q0}, [r3]!
325
326yuv420sp_vu_row_loop_end_y:
327    add             r0, r0, r7
328    add             r3, r3, r8
329    subs            r4, r4, #1
330    bgt             yuv420sp_vu_row_loop_y
331
332yuv420sp_vu_chroma:
333
334    ldr             r3, [sp, #24]       @// Load pu1_dest_uv from stack
335
336    ldr             r4, [sp, #28]       @// Load u2_height from stack
337    add             r4, r4, 1
338
339    ldr             r5, [sp, #32]       @// Load u2_width from stack
340    add             r5, r5, 1
341    bic             r5, r5, #1
342
343    ldr             r7, [sp, #40]       @// Load u2_strideu from stack
344
345    ldr             r8, [sp, #52]       @// Load u2_dest_stride_uv from stack
346
347    sub             r7, r7, r5, lsr #1  @// Source increment
348
349    sub             r8, r8, r5          @// Destination increment
350
351    mov             r5, r5, lsr #1
352    mov             r4, r4, lsr #1
353    ldr             r3, [sp, #24]       @// Load pu1_dest_uv from stack
354yuv420sp_vu_row_loop_uv:
355    mov             r6, r5
356
357
358yuv420sp_vu_col_loop_uv:
359    pld             [r1, #128]
360    pld             [r2, #128]
361    vld1.8          d1, [r1]!
362    vld1.8          d0, [r2]!
363    vst2.8          {d0, d1}, [r3]!
364    sub             r6, r6, #8
365    cmp             r6, #7
366    bgt             yuv420sp_vu_col_loop_uv
367
368    cmp             r6, #0
369    beq             yuv420sp_vu_row_loop_end_uv
370    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
371    @//Ex if width is 162, above loop will process 160 pixels. And
372    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
373    @// and written using VLD1 and VST1
374    rsb             r6, r6, #8
375    sub             r1, r1, r6
376    sub             r2, r2, r6
377    sub             r3, r3, r6, lsl #1
378
379    vld1.8          d1, [r1]!
380    vld1.8          d0, [r2]!
381    vst2.8          {d0, d1}, [r3]!
382
383yuv420sp_vu_row_loop_end_uv:
384    add             r1, r1, r7
385    add             r2, r2, r7
386    add             r3, r3, r8
387    subs            r4, r4, #1
388    bgt             yuv420sp_vu_row_loop_uv
389    @//POP THE REGISTERS
390    ldmfd           sp!, {r4-r8, pc}
391
392
393
394
395
396