1from peachpy import * 2from peachpy.x86_64 import * 3 4 5def fp16_alt_xmm_to_fp32_ymm(xmm_half): 6 ymm_half = YMMRegister() 7 VPERMQ(ymm_half, xmm_half.as_ymm, 0b01010000) 8 9 ymm_zero = YMMRegister() 10 VPXOR(ymm_zero.as_xmm, ymm_zero.as_xmm, ymm_zero.as_xmm) 11 12 ymm_word = YMMRegister() 13 VPUNPCKLWD(ymm_word, ymm_zero, ymm_half) 14 15 ymm_shl1_half = YMMRegister() 16 VPADDW(ymm_shl1_half, ymm_half, ymm_half) 17 18 ymm_shl1_nonsign = YMMRegister() 19 VPADDD(ymm_shl1_nonsign, ymm_word, ymm_word) 20 21 sign_mask = Constant.float32x8(-0.0) 22 23 ymm_sign = YMMRegister() 24 VANDPS(ymm_sign, ymm_word, sign_mask) 25 26 ymm_shr3_nonsign = YMMRegister() 27 VPSRLD(ymm_shr3_nonsign, ymm_shl1_nonsign, 4) 28 29 exp_offset = Constant.uint32x8(0x38000000) 30 31 ymm_norm_nonsign = YMMRegister() 32 VPADDD(ymm_norm_nonsign, ymm_shr3_nonsign, exp_offset) 33 34 magic_mask = Constant.uint16x16(0x3E80) 35 ymm_denorm_nonsign = YMMRegister() 36 VPUNPCKLWD(ymm_denorm_nonsign, ymm_shl1_half, magic_mask) 37 38 magic_bias = Constant.float32x8(0.25) 39 VSUBPS(ymm_denorm_nonsign, ymm_denorm_nonsign, magic_bias) 40 41 ymm_denorm_cutoff = YMMRegister() 42 VMOVDQA(ymm_denorm_cutoff, Constant.uint32x8(0x00800000)) 43 44 ymm_denorm_mask = YMMRegister() 45 VPCMPGTD(ymm_denorm_mask, ymm_denorm_cutoff, ymm_shr3_nonsign) 46 47 ymm_nonsign = YMMRegister() 48 VBLENDVPS(ymm_nonsign, ymm_norm_nonsign, ymm_denorm_nonsign, ymm_denorm_mask) 49 50 ymm_float = YMMRegister() 51 VORPS(ymm_float, ymm_nonsign, ymm_sign) 52 53 return ymm_float 54