1; Copyright (c) 2011 The Chromium Authors. All rights reserved. 2; Use of this source code is governed by a BSD-style license that can be 3; found in the LICENSE file. 4 5%include "media/base/simd/media_export.asm" 6 7 EXPORT SYMBOL 8 align function_align 9 10mangle(SYMBOL): 11 %assign stack_offset 0 12 13 extern mangle(kCoefficientsRgbY) 14 15; Parameters are in the following order: 16; 1. Y plane 17; 2. U plane 18; 3. V plane 19; 4. ARGB frame 20; 5. Width 21; 6. Source dx 22 23PROLOGUE 6, 7, 3, Y, U, V, ARGB, R1, R2, TEMP 24 25%ifdef ARCH_X86_64 26%define WORD_SIZE QWORD 27%else 28%define WORD_SIZE DWORD 29%endif 30 31%ifdef PIC 32 PUSH R1q ; Width 33%endif 34 PUSH R2q ; Source dx 35 36%define SOURCE_DX WORD_SIZE [rsp] 37 38; PIC code. 39%ifdef PIC 40 LOAD_SYM R1q, mangle(kCoefficientsRgbY) 41%define WIDTH WORD_SIZE [rsp + gprsize] 42%define TABLE R1q 43%define Xq R2q 44 45; Non-PIC code. 46%else 47%define WIDTH R1q 48%define TABLE mangle(kCoefficientsRgbY) 49%define Xq R2q 50%endif 51 52 ; Set Xq index to 0. 53 xor Xq, Xq 54 jmp .scaleend 55 56.scaleloop: 57 ; TABLE can either be a register or a symbol depending on this is 58 ; PIC or not. 59 mov TEMPq, Xq 60 sar TEMPq, 17 61 movzx TEMPd, BYTE [Uq + TEMPq] 62 movq mm0, [TABLE + 2048 + 8 * TEMPq] 63 mov TEMPq, Xq 64 sar TEMPq, 17 65 movzx TEMPd, BYTE [Vq + TEMPq] 66 paddsw mm0, [TABLE + 4096 + 8 * TEMPq] 67 mov TEMPq, Xq 68 add Xq, SOURCE_DX 69 sar TEMPq, 16 70 movzx TEMPd, BYTE [Yq + TEMPq] 71 movq mm1, [TABLE + 8 * TEMPq] 72 mov TEMPq, Xq 73 add Xq, SOURCE_DX 74 sar TEMPq, 16 75 movzx TEMPd, BYTE [Yq + TEMPq] 76 movq mm2, [TABLE + 8 * TEMPq] 77 paddsw mm1, mm0 78 paddsw mm2, mm0 79 psraw mm1, 6 80 psraw mm2, 6 81 packuswb mm1, mm2 82 MOVQ QWORD [ARGBq], mm1 83 add ARGBq, 8 84 85.scaleend: 86 ; WIDTH can either be a register or memory depending on this is 87 ; PIC or not. 88 sub WIDTH, 2 89 jns .scaleloop 90 91 and WIDTH, 1 ; odd number of pixels? 92 jz .scaledone 93 94 mov TEMPq, Xq 95 sar TEMPq, 17 96 movzx TEMPd, BYTE [Uq + TEMPq] 97 movq mm0, [TABLE + 2048 + 8 * TEMPq] 98 mov TEMPq, Xq 99 sar TEMPq, 17 100 movzx TEMPd, BYTE [Vq + TEMPq] 101 paddsw mm0, [TABLE + 4096 + 8 * TEMPq] 102 mov TEMPq, Xq 103 sar TEMPq, 16 104 movzx TEMPd, BYTE [Yq + TEMPq] 105 movq mm1, [TABLE + 8 * TEMPq] 106 paddsw mm1, mm0 107 psraw mm1, 6 108 packuswb mm1, mm1 109 movd DWORD [ARGBq], mm1 110 111.scaledone: 112%ifdef PIC 113 ADD rsp, 2 * gprsize 114%else 115 ADD rsp, gprsize 116%endif 117 RET 118