1; 2; jsimdcpu.asm - SIMD instruction support check 3; 4; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB 5; Copyright (C) 2016, D. R. Commander. 6; 7; Based on 8; x86 SIMD extension for IJG JPEG library 9; Copyright (C) 1999-2006, MIYASAKA Masaru. 10; For conditions of distribution and use, see copyright notice in jsimdext.inc 11; 12; This file should be assembled with NASM (Netwide Assembler), 13; can *not* be assembled with Microsoft's MASM or any compatible 14; assembler (including Borland's Turbo Assembler). 15; NASM is available from http://nasm.sourceforge.net/ or 16; http://sourceforge.net/project/showfiles.php?group_id=6208 17 18%include "jsimdext.inc" 19 20; -------------------------------------------------------------------------- 21 SECTION SEG_TEXT 22 BITS 64 23; 24; Check if the CPU supports SIMD instructions 25; 26; GLOBAL(unsigned int) 27; jpeg_simd_cpu_support(void) 28; 29 30 align 32 31 GLOBAL_FUNCTION(jpeg_simd_cpu_support) 32 33EXTN(jpeg_simd_cpu_support): 34 push rbx 35 push rdi 36 37 xor rdi, rdi ; simd support flag 38 39 ; Assume that all x86-64 processors support SSE & SSE2 instructions 40 or rdi, JSIMD_SSE2 41 or rdi, JSIMD_SSE 42 43 ; Check whether CPUID leaf 07H is supported 44 ; (leaf 07H is used to check for AVX2 instruction support) 45 mov rax, 0 46 cpuid 47 cmp rax, 7 48 jl short .return ; Maximum leaf < 07H 49 50 ; Check for AVX2 instruction support 51 mov rax, 7 52 xor rcx, rcx 53 cpuid 54 mov rax, rbx ; rax = Extended feature flags 55 56 test rax, 1<<5 ; bit5:AVX2 57 jz short .return 58 59 ; Check for AVX2 O/S support 60 mov rax, 1 61 xor rcx, rcx 62 cpuid 63 test rcx, 1<<27 64 jz short .return ; O/S does not support XSAVE 65 test rcx, 1<<28 66 jz short .return ; CPU does not support AVX2 67 68 xor rcx, rcx 69 xgetbv 70 and rax, 6 71 cmp rax, 6 ; O/S does not manage XMM/YMM state 72 ; using XSAVE 73 jnz short .return 74 75 or rdi, JSIMD_AVX2 76 77.return: 78 mov rax, rdi 79 80 pop rdi 81 pop rbx 82 ret 83 84; For some reason, the OS X linker does not honor the request to align the 85; segment unless we do this. 86 align 32 87