1; Copyright (c) 2011 The Chromium Authors. All rights reserved. 2; Use of this source code is governed by a BSD-style license that can be 3; found in the LICENSE file. 4 5%include "media/base/simd/media_export.asm" 6 7 EXPORT SYMBOL 8 align function_align 9 10mangle(SYMBOL): 11 %assign stack_offset 0 12 13 extern mangle(kCoefficientsRgbY) 14 15; Parameters are in the following order: 16; 1. Y plane 17; 2. U plane 18; 3. V plane 19; 4. ARGB frame 20; 5. Width 21; 6. Source dx 22 23PROLOGUE 6, 7, 3, Y, R0, R1, ARGB, R2, R3, TEMP 24 25%if gprsize == 8 26%define WORD_SIZE QWORD 27%else 28%define WORD_SIZE DWORD 29%endif 30 31; Define register aliases. 32%define Xq R1q ; Current X position 33%define COMPLq R2q ; Component A value 34%define COMPLd R2d ; Component A value 35%define U_ARG_REGq R0q ; U plane address argument 36%define V_ARG_REGq R1q ; V plane address argument 37%define SOURCE_DX_ARG_REGq R3q ; Source dx argument 38%define WIDTH_ARG_REGq R2q ; Width argument 39 40%ifdef PIC 41; PIC code shared COMPR, U and V with the same register. Need to be careful in the 42; code they don't mix up. This allows R3q to be used for YUV table. 43%define COMPRq R0q ; Component B value 44%define COMPRd R0d ; Component B value 45%define Uq R0q ; U plane address 46%define Vq R0q ; V plane address 47%define U_PLANE WORD_SIZE [rsp + 3 * gprsize] 48%define TABLE R3q ; Address of the table 49%else 50; Non-PIC code defines. 51%define COMPRq R3q ; Component B value 52%define COMPRd R3d ; Component B value 53%define Uq R0q ; U plane address 54%define Vq R3q ; V plane address 55%define TABLE mangle(kCoefficientsRgbY) 56%endif 57 58; Defines for stack variables. These are used in both PIC and non-PIC code. 59%define V_PLANE WORD_SIZE [rsp + 2 * gprsize] 60%define SOURCE_DX WORD_SIZE [rsp + gprsize] 61%define SOURCE_WIDTH WORD_SIZE [rsp] 62 63; Handle stack variables differently for PIC and non-PIC code. 64 65%ifdef PIC 66; Define stack usage for PIC code. PIC code push U plane onto stack. 67 PUSH U_ARG_REGq 68 PUSH V_ARG_REGq 69 PUSH SOURCE_DX_ARG_REGq 70 imul WIDTH_ARG_REGq, SOURCE_DX_ARG_REGq ; source_width = width * source_dx 71 PUSH WIDTH_ARG_REGq 72 73; Load the address of kCoefficientsRgbY into TABLE 74 mov TEMPq, SOURCE_DX_ARG_REGq ; Need to save source_dx first 75 LOAD_SYM TABLE, mangle(kCoefficientsRgbY) 76%define SOURCE_DX_ARG_REGq TEMPq ; Overwrite SOURCE_DX_ARG_REGq to TEMPq 77%else 78; Define stack usage. Non-PIC code just push 3 registers to stack. 79 PUSH V_ARG_REGq 80 PUSH SOURCE_DX_ARG_REGq 81 imul WIDTH_ARG_REGq, SOURCE_DX_ARG_REGq ; source_width = width * source_dx 82 PUSH WIDTH_ARG_REGq 83%endif 84 85%macro EPILOGUE 0 86%ifdef PIC 87 ADD rsp, 4 * gprsize 88%else 89 ADD rsp, 3 * gprsize 90%endif 91%endmacro 92 93 xor Xq, Xq ; x = 0 94 cmp SOURCE_DX_ARG_REGq, 0x20000 95 jl .lscaleend 96 mov Xq, 0x8000 ; x = 0.5 for 1/2 or less 97 jmp .lscaleend 98 99.lscaleloop: 100%ifdef PIC 101 mov Uq, U_PLANE ; PIC code saves U_PLANE on stack. 102%endif 103 104; Define macros for scaling YUV components since they are reused. 105%macro SCALEUV 1 106 mov TEMPq, Xq 107 sar TEMPq, 0x11 108 movzx COMPLd, BYTE [%1 + TEMPq] 109 movzx COMPRd, BYTE [%1 + TEMPq + 1] 110 mov TEMPq, Xq 111 and TEMPq, 0x1fffe 112 imul COMPRq, TEMPq 113 xor TEMPq, 0x1fffe 114 imul COMPLq, TEMPq 115 add COMPLq, COMPRq 116 shr COMPLq, 17 117%endmacro 118 SCALEUV Uq ; Use the above macro to scale U 119 movq mm0, [TABLE + 2048 + 8 * COMPLq] 120 121 mov Vq, V_PLANE ; Read V address from stack 122 SCALEUV Vq ; Use the above macro to scale V 123 paddsw mm0, [TABLE + 4096 + 8 * COMPLq] 124 125%macro SCALEY 0 126 mov TEMPq, Xq 127 sar TEMPq, 0x10 128 movzx COMPLd, BYTE [Yq + TEMPq] 129 movzx COMPRd, BYTE [Yq + TEMPq + 1] 130 mov TEMPq, Xq 131 add Xq, SOURCE_DX ; Add source_dx from stack 132 and TEMPq, 0xffff 133 imul COMPRq, TEMPq 134 xor TEMPq, 0xffff 135 imul COMPLq, TEMPq 136 add COMPLq, COMPRq 137 shr COMPLq, 16 138%endmacro 139 SCALEY ; Use the above macro to scale Y1 140 movq mm1, [TABLE + 8 * COMPLq] 141 142 cmp Xq, SOURCE_WIDTH ; Compare source_width from stack 143 jge .lscalelastpixel 144 145 SCALEY ; Use the above macro to sacle Y2 146 movq mm2, [TABLE + 8 * COMPLq] 147 148 paddsw mm1, mm0 149 paddsw mm2, mm0 150 psraw mm1, 0x6 151 psraw mm2, 0x6 152 packuswb mm1, mm2 153 MOVQ [ARGBq], mm1 154 add ARGBq, 0x8 155 156.lscaleend: 157 cmp Xq, SOURCE_WIDTH ; Compare source_width from stack 158 jl .lscaleloop 159 EPILOGUE 160 RET 161 162.lscalelastpixel: 163 paddsw mm1, mm0 164 psraw mm1, 6 165 packuswb mm1, mm1 166 movd [ARGBq], mm1 167 EPILOGUE 168 RET 169