• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;
2; jsimdcpu.asm - SIMD instruction support check
3;
4; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5; Copyright (C) 2016, D. R. Commander.
6;
7; Based on
8; x86 SIMD extension for IJG JPEG library
9; Copyright (C) 1999-2006, MIYASAKA Masaru.
10; For conditions of distribution and use, see copyright notice in jsimdext.inc
11;
12; This file should be assembled with NASM (Netwide Assembler),
13; can *not* be assembled with Microsoft's MASM or any compatible
14; assembler (including Borland's Turbo Assembler).
15; NASM is available from http://nasm.sourceforge.net/ or
16; http://sourceforge.net/project/showfiles.php?group_id=6208
17;
18; [TAB8]
19
20%include "jsimdext.inc"
21
22; --------------------------------------------------------------------------
23    SECTION     SEG_TEXT
24    BITS        64
25;
26; Check if the CPU supports SIMD instructions
27;
28; GLOBAL(unsigned int)
29; jpeg_simd_cpu_support(void)
30;
31
32    align       32
33    GLOBAL_FUNCTION(jpeg_simd_cpu_support)
34
35EXTN(jpeg_simd_cpu_support):
36    push        rbx
37    push        rdi
38
39    xor         rdi, rdi                ; simd support flag
40
41    ; Assume that all x86-64 processors support SSE & SSE2 instructions
42    or          rdi, JSIMD_SSE2
43    or          rdi, JSIMD_SSE
44
45    ; Check whether CPUID leaf 07H is supported
46    ; (leaf 07H is used to check for AVX2 instruction support)
47    mov         rax, 0
48    cpuid
49    cmp         rax, 7
50    jl          short .return           ; Maximum leaf < 07H
51
52    ; Check for AVX2 instruction support
53    mov         rax, 7
54    xor         rcx, rcx
55    cpuid
56    mov         rax, rbx                ; rax = Extended feature flags
57
58    test        rax, 1<<5               ; bit5:AVX2
59    jz          short .return
60
61    ; Check for AVX2 O/S support
62    mov         rax, 1
63    xor         rcx, rcx
64    cpuid
65    test        rcx, 1<<27
66    jz          short .return           ; O/S does not support XSAVE
67    test        rcx, 1<<28
68    jz          short .return           ; CPU does not support AVX2
69
70    xor         rcx, rcx
71    xgetbv
72    and         rax, 6
73    cmp         rax, 6                  ; O/S does not manage XMM/YMM state
74                                        ; using XSAVE
75    jnz         short .return
76
77    or          rdi, JSIMD_AVX2
78
79.return:
80    mov         rax, rdi
81
82    pop         rdi
83    pop         rbx
84    ret
85
86; For some reason, the OS X linker does not honor the request to align the
87; segment unless we do this.
88    align       32
89