• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; Copyright (c) 2011 The Chromium Authors. All rights reserved.
2; Use of this source code is governed by a BSD-style license that can be
3; found in the LICENSE file.
4
5%include "media/base/simd/media_export.asm"
6
7  EXPORT    SYMBOL
8  align     function_align
9
10mangle(SYMBOL):
11  %assign   stack_offset 0
12
13  extern    mangle(kCoefficientsRgbY)
14
15; Parameters are in the following order:
16; 1. Y plane
17; 2. U plane
18; 3. V plane
19; 4. ARGB frame
20; 5. Width
21; 6. Source dx
22
23PROLOGUE  6, 7, 3, Y, R0, R1, ARGB, R2, R3, TEMP
24
25%if gprsize == 8
26%define     WORD_SIZE   QWORD
27%else
28%define     WORD_SIZE   DWORD
29%endif
30
31; Define register aliases.
32%define     Xq                  R1q     ; Current X position
33%define     COMPLq              R2q     ; Component A value
34%define     COMPLd              R2d     ; Component A value
35%define     U_ARG_REGq          R0q     ; U plane address argument
36%define     V_ARG_REGq          R1q     ; V plane address argument
37%define     SOURCE_DX_ARG_REGq  R3q     ; Source dx argument
38%define     WIDTH_ARG_REGq      R2q     ; Width argument
39
40%ifdef PIC
41; PIC code shared COMPR, U and V with the same register. Need to be careful in the
42; code they don't mix up. This allows R3q to be used for YUV table.
43%define     COMPRq              R0q     ; Component B value
44%define     COMPRd              R0d     ; Component B value
45%define     Uq                  R0q     ; U plane address
46%define     Vq                  R0q     ; V plane address
47%define     U_PLANE             WORD_SIZE [rsp + 3 * gprsize]
48%define     TABLE               R3q     ; Address of the table
49%else
50; Non-PIC code defines.
51%define     COMPRq              R3q     ; Component B value
52%define     COMPRd              R3d     ; Component B value
53%define     Uq                  R0q     ; U plane address
54%define     Vq                  R3q     ; V plane address
55%define     TABLE               mangle(kCoefficientsRgbY)
56%endif
57
58; Defines for stack variables. These are used in both PIC and non-PIC code.
59%define     V_PLANE             WORD_SIZE [rsp + 2 * gprsize]
60%define     SOURCE_DX           WORD_SIZE [rsp + gprsize]
61%define     SOURCE_WIDTH        WORD_SIZE [rsp]
62
63; Handle stack variables differently for PIC and non-PIC code.
64
65%ifdef PIC
66; Define stack usage for PIC code. PIC code push U plane onto stack.
67  PUSH      U_ARG_REGq
68  PUSH      V_ARG_REGq
69  PUSH      SOURCE_DX_ARG_REGq
70  imul      WIDTH_ARG_REGq, SOURCE_DX_ARG_REGq  ; source_width = width * source_dx
71  PUSH      WIDTH_ARG_REGq
72
73; Load the address of kCoefficientsRgbY into TABLE
74  mov       TEMPq, SOURCE_DX_ARG_REGq    ; Need to save source_dx first
75  LOAD_SYM  TABLE, mangle(kCoefficientsRgbY)
76%define     SOURCE_DX_ARG_REGq  TEMPq   ; Overwrite SOURCE_DX_ARG_REGq to TEMPq
77%else
78; Define stack usage. Non-PIC code just push 3 registers to stack.
79  PUSH      V_ARG_REGq
80  PUSH      SOURCE_DX_ARG_REGq
81  imul      WIDTH_ARG_REGq, SOURCE_DX_ARG_REGq  ; source_width = width * source_dx
82  PUSH      WIDTH_ARG_REGq
83%endif
84
85%macro EPILOGUE 0
86%ifdef PIC
87  ADD       rsp, 4 * gprsize
88%else
89  ADD       rsp, 3 * gprsize
90%endif
91%endmacro
92
93  xor       Xq, Xq                       ; x = 0
94  cmp       SOURCE_DX_ARG_REGq, 0x20000
95  jl        .lscaleend
96  mov       Xq, 0x8000                   ; x = 0.5 for 1/2 or less
97  jmp       .lscaleend
98
99.lscaleloop:
100%ifdef PIC
101  mov       Uq, U_PLANE                  ; PIC code saves U_PLANE on stack.
102%endif
103
104; Define macros for scaling YUV components since they are reused.
105%macro SCALEUV 1
106  mov       TEMPq, Xq
107  sar       TEMPq, 0x11
108  movzx     COMPLd, BYTE [%1 + TEMPq]
109  movzx     COMPRd, BYTE [%1 + TEMPq + 1]
110  mov       TEMPq, Xq
111  and       TEMPq, 0x1fffe
112  imul      COMPRq, TEMPq
113  xor       TEMPq, 0x1fffe
114  imul      COMPLq, TEMPq
115  add       COMPLq, COMPRq
116  shr       COMPLq, 17
117%endmacro
118  SCALEUV   Uq                           ; Use the above macro to scale U
119  movq      mm0, [TABLE + 2048 + 8 * COMPLq]
120
121  mov       Vq, V_PLANE                  ; Read V address from stack
122  SCALEUV   Vq                           ; Use the above macro to scale V
123  paddsw    mm0, [TABLE + 4096 + 8 * COMPLq]
124
125%macro SCALEY 0
126  mov       TEMPq, Xq
127  sar       TEMPq, 0x10
128  movzx     COMPLd, BYTE [Yq + TEMPq]
129  movzx     COMPRd, BYTE [Yq + TEMPq + 1]
130  mov       TEMPq, Xq
131  add       Xq, SOURCE_DX                 ; Add source_dx from stack
132  and       TEMPq, 0xffff
133  imul      COMPRq, TEMPq
134  xor       TEMPq, 0xffff
135  imul      COMPLq, TEMPq
136  add       COMPLq, COMPRq
137  shr       COMPLq, 16
138%endmacro
139  SCALEY                                  ; Use the above macro to scale Y1
140  movq      mm1, [TABLE + 8 * COMPLq]
141
142  cmp       Xq, SOURCE_WIDTH              ; Compare source_width from stack
143  jge       .lscalelastpixel
144
145  SCALEY                                  ; Use the above macro to sacle Y2
146  movq      mm2, [TABLE + 8 * COMPLq]
147
148  paddsw    mm1, mm0
149  paddsw    mm2, mm0
150  psraw     mm1, 0x6
151  psraw     mm2, 0x6
152  packuswb  mm1, mm2
153  MOVQ      [ARGBq], mm1
154  add       ARGBq, 0x8
155
156.lscaleend:
157  cmp       Xq, SOURCE_WIDTH     ; Compare source_width from stack
158  jl        .lscaleloop
159  EPILOGUE
160  RET
161
162.lscalelastpixel:
163  paddsw    mm1, mm0
164  psraw     mm1, 6
165  packuswb  mm1, mm1
166  movd      [ARGBq], mm1
167  EPILOGUE
168  RET
169