• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; Copyright (c) 2011 The Chromium Authors. All rights reserved.
2; Use of this source code is governed by a BSD-style license that can be
3; found in the LICENSE file.
4
5%include "media/base/simd/media_export.asm"
6
7  EXPORT    SYMBOL
8  align     function_align
9
10; Non-PIC code is the fastest so use this if possible.
11%ifndef PIC
12mangle(SYMBOL):
13  %assign   stack_offset 0
14  PROLOGUE  5, 7, 3, Y, U, V, ARGB, WIDTH, TEMPU, TEMPV
15  extern    mangle(kCoefficientsRgbY)
16  jmp       .convertend
17
18.convertloop:
19  movzx     TEMPUd, BYTE [Uq]
20  add       Uq, 1
21  movzx     TEMPVd, BYTE [Vq]
22  add       Vq, 1
23  movq      mm0, [mangle(kCoefficientsRgbY) + 2048 + 8 * TEMPUq]
24  movzx     TEMPUd, BYTE [Yq]
25  paddsw    mm0, [mangle(kCoefficientsRgbY) + 4096 + 8 * TEMPVq]
26  movzx     TEMPVd, BYTE [Yq + 1]
27  movq      mm1, [mangle(kCoefficientsRgbY) + 8 * TEMPUq]
28  add       Yq, 2
29  movq      mm2, [mangle(kCoefficientsRgbY) + 8 * TEMPVq]
30  paddsw    mm1, mm0
31  paddsw    mm2, mm0
32  psraw     mm1, 6
33  psraw     mm2, 6
34  packuswb  mm1, mm2
35  MOVQ      [ARGBq], mm1
36  add       ARGBq, 8
37
38.convertend:
39  sub       WIDTHq, 2
40  jns       .convertloop
41
42  ; If number of pixels is odd then compute it.
43  and       WIDTHq, 1
44  jz        .convertdone
45
46  movzx     TEMPUd, BYTE [Uq]
47  movq      mm0, [mangle(kCoefficientsRgbY) + 2048 + 8 * TEMPUq]
48  movzx     TEMPVd, BYTE [Vq]
49  paddsw    mm0, [mangle(kCoefficientsRgbY) + 4096 + 8 * TEMPVq]
50  movzx     TEMPUd, BYTE [Yq]
51  movq      mm1, [mangle(kCoefficientsRgbY) + 8 * TEMPUq]
52  paddsw    mm1, mm0
53  psraw     mm1, 6
54  packuswb  mm1, mm1
55  movd      [ARGBq], mm1
56
57.convertdone:
58  RET
59%endif
60
61; With PIC code we need to load the address of mangle(kCoefficientsRgbY).
62; This code is slower than the above version.
63%ifdef PIC
64mangle(SYMBOL):
65  %assign   stack_offset 0
66  PROLOGUE  5, 7, 3, Y, U, V, ARGB, WIDTH, TEMP, TABLE
67
68  extern    mangle(kCoefficientsRgbY)
69  LOAD_SYM  TABLEq, mangle(kCoefficientsRgbY)
70
71  jmp       .convertend
72
73.convertloop:
74  movzx     TEMPd, BYTE [Uq]
75  movq      mm0, [TABLEq + 2048 + 8 * TEMPq]
76  add       Uq, 1
77
78  movzx     TEMPd, BYTE [Vq]
79  paddsw    mm0, [TABLEq + 4096 + 8 * TEMPq]
80  add       Vq, 1
81
82  movzx     TEMPd, BYTE [Yq]
83  movq      mm1, [TABLEq + 8 * TEMPq]
84
85  movzx     TEMPd, BYTE [Yq + 1]
86  movq      mm2, [TABLEq + 8 * TEMPq]
87  add       Yq, 2
88
89  ; Add UV components to Y component.
90  paddsw    mm1, mm0
91  paddsw    mm2, mm0
92
93  ; Down shift and then pack.
94  psraw     mm1, 6
95  psraw     mm2, 6
96  packuswb  mm1, mm2
97  MOVQ      [ARGBq], mm1
98  add       ARGBq, 8
99
100.convertend:
101  sub       WIDTHq, 2
102  jns       .convertloop
103
104  ; If number of pixels is odd then compute it.
105  and       WIDTHq, 1
106  jz        .convertdone
107
108  movzx     TEMPd, BYTE [Uq]
109  movq      mm0, [TABLEq + 2048 + 8 * TEMPq]
110  movzx     TEMPd, BYTE [Vq]
111  paddsw    mm0, [TABLEq + 4096 + 8 * TEMPq]
112  movzx     TEMPd, BYTE [Yq]
113  movq      mm1, [TABLEq + 8 * TEMPq]
114  paddsw    mm1, mm0
115  psraw     mm1, 6
116  packuswb  mm1, mm1
117  movd      [ARGBq], mm1
118
119.convertdone:
120  RET
121%endif
122