1from peachpy import * 2from peachpy.x86_64 import * 3 4import fp16.avx, fp16.avx2 5 6 7arg_fp16 = Argument(ptr(const_uint16_t), name="fp16") 8arg_fp32 = Argument(ptr(uint32_t), name="fp32") 9 10with Function("fp16_alt_xmm_to_fp32_ymm_peachpy__avx2", (arg_fp16, arg_fp32), target=uarch.default + isa.avx2): 11 12 reg_fp16 = GeneralPurposeRegister64() 13 LOAD.ARGUMENT(reg_fp16, arg_fp16) 14 15 reg_fp32 = GeneralPurposeRegister64() 16 LOAD.ARGUMENT(reg_fp32, arg_fp32) 17 18 xmm_fp16 = XMMRegister() 19 VMOVUPS(xmm_fp16, [reg_fp16]) 20 ymm_fp32 = fp16.avx2.fp16_alt_xmm_to_fp32_ymm(xmm_fp16) 21 VMOVUPS([reg_fp32], ymm_fp32) 22 23 RETURN() 24 25with Function("fp16_alt_xmm_to_fp32_xmm_peachpy__avx", (arg_fp16, arg_fp32), target=uarch.default + isa.avx): 26 27 reg_fp16 = GeneralPurposeRegister64() 28 LOAD.ARGUMENT(reg_fp16, arg_fp16) 29 30 reg_fp32 = GeneralPurposeRegister64() 31 LOAD.ARGUMENT(reg_fp32, arg_fp32) 32 33 xmm_fp16 = XMMRegister() 34 VMOVUPS(xmm_fp16, [reg_fp16]) 35 xmm_fp32 = fp16.avx.fp16_alt_xmm_to_fp32_xmm(xmm_fp16) 36 VMOVUPS([reg_fp32], xmm_fp32) 37 38 RETURN() 39