1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license and patent 5; grant that can be found in the LICENSE file in the root of the source 6; tree. All contributing project authors may be found in the AUTHORS 7; file in the root of the source tree. 8; 9 10 11%include "vpx_ports/x86_abi_support.asm" 12 13 14;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr 15; short *qcoeff_ptr,short *dequant_ptr, 16; short *round_ptr, 17; short *quant_ptr, short *dqcoeff_ptr); 18; 19global sym(vp8_fast_quantize_b_impl_ssse3) 20sym(vp8_fast_quantize_b_impl_ssse3): 21 push rbp 22 mov rbp, rsp 23 SHADOW_ARGS_TO_STACK 6 24 GET_GOT rbx 25 push rsi 26 push rdi 27 ; end prolog 28 29 mov rdx, arg(0) ;coeff_ptr 30 mov rdi, arg(3) ;round_ptr 31 mov rsi, arg(4) ;quant_ptr 32 33 movdqa xmm0, [rdx] 34 movdqa xmm4, [rdx + 16] 35 36 movdqa xmm2, [rdi] ;round lo 37 movdqa xmm3, [rdi + 16] ;round hi 38 39 movdqa xmm1, xmm0 40 movdqa xmm5, xmm4 41 42 psraw xmm0, 15 ;sign of z (aka sz) 43 psraw xmm4, 15 ;sign of z (aka sz) 44 45 pabsw xmm1, xmm1 46 pabsw xmm5, xmm5 47 48 paddw xmm1, xmm2 49 paddw xmm5, xmm3 50 51 pmulhw xmm1, [rsi] 52 pmulhw xmm5, [rsi + 16] 53 54 mov rdi, arg(1) ;qcoeff_ptr 55 mov rcx, arg(2) ;dequant_ptr 56 mov rsi, arg(5) ;dqcoeff_ptr 57 58 pxor xmm1, xmm0 59 pxor xmm5, xmm4 60 psubw xmm1, xmm0 61 psubw xmm5, xmm4 62 63 movdqa [rdi], xmm1 64 movdqa [rdi + 16], xmm5 65 66 movdqa xmm2, [rcx] 67 movdqa xmm3, [rcx + 16] 68 69 pxor xmm4, xmm4 70 pmullw xmm2, xmm1 71 pmullw xmm3, xmm5 72 73 pcmpeqw xmm1, xmm4 ;non zero mask 74 pcmpeqw xmm5, xmm4 ;non zero mask 75 packsswb xmm1, xmm5 76 pshufb xmm1, [ GLOBAL(zz_shuf)] 77 78 pmovmskb edx, xmm1 79 80; xor ecx, ecx 81; mov eax, -1 82;find_eob_loop: 83; shr edx, 1 84; jc fq_skip 85; mov eax, ecx 86;fq_skip: 87; inc ecx 88; cmp ecx, 16 89; jne find_eob_loop 90 xor rdi, rdi 91 mov eax, -1 92 xor dx, ax ;flip the bits for bsr 93 bsr eax, edx 94 95 movdqa [rsi], xmm2 ;store dqcoeff 96 movdqa [rsi + 16], xmm3 ;store dqcoeff 97 98 sub edi, edx ;check for all zeros in bit mask 99 sar edi, 31 ;0 or -1 100 add eax, 1 101 and eax, edi ;if the bit mask was all zero, 102 ;then eob = 0 103 ; begin epilog 104 pop rdi 105 pop rsi 106 RESTORE_GOT 107 UNSHADOW_ARGS 108 pop rbp 109 ret 110 111SECTION_RODATA 112align 16 113zz_shuf: 114 db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 115