• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018, VideoLAN and dav1d authors
3  * Copyright © 2018, Two Orioles, LLC
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "config.h"
29 
30 #include <stdint.h>
31 #include <string.h>
32 
33 #include "common/attributes.h"
34 
35 #include "src/x86/cpu.h"
36 
37 typedef struct {
38     uint32_t eax, ebx, edx, ecx;
39 } CpuidRegisters;
40 
41 void dav1d_cpu_cpuid(CpuidRegisters *regs, unsigned leaf, unsigned subleaf);
42 uint64_t dav1d_cpu_xgetbv(unsigned xcr);
43 
44 #define X(reg, mask) (((reg) & (mask)) == (mask))
45 
dav1d_get_cpu_flags_x86(void)46 COLD unsigned dav1d_get_cpu_flags_x86(void) {
47     union {
48         CpuidRegisters r;
49         struct {
50             uint32_t max_leaf;
51             char vendor[12];
52         };
53     } cpu;
54     dav1d_cpu_cpuid(&cpu.r, 0, 0);
55     unsigned flags = 0;
56 
57     if (cpu.max_leaf >= 1) {
58         CpuidRegisters r;
59         dav1d_cpu_cpuid(&r, 1, 0);
60         const unsigned family = ((r.eax >> 8) & 0x0f) + ((r.eax >> 20) & 0xff);
61 
62         if (X(r.edx, 0x06008000)) /* CMOV/SSE/SSE2 */ {
63             flags |= DAV1D_X86_CPU_FLAG_SSE2;
64             if (X(r.ecx, 0x00000201)) /* SSE3/SSSE3 */ {
65                 flags |= DAV1D_X86_CPU_FLAG_SSSE3;
66                 if (X(r.ecx, 0x00080000)) /* SSE4.1 */
67                     flags |= DAV1D_X86_CPU_FLAG_SSE41;
68             }
69         }
70 #if ARCH_X86_64
71         /* We only support >128-bit SIMD on x86-64. */
72         if (X(r.ecx, 0x18000000)) /* OSXSAVE/AVX */ {
73             const uint64_t xcr0 = dav1d_cpu_xgetbv(0);
74             if (X(xcr0, 0x00000006)) /* XMM/YMM */ {
75                 if (cpu.max_leaf >= 7) {
76                     dav1d_cpu_cpuid(&r, 7, 0);
77                     if (X(r.ebx, 0x00000128)) /* BMI1/BMI2/AVX2 */ {
78                         flags |= DAV1D_X86_CPU_FLAG_AVX2;
79                         if (X(xcr0, 0x000000e0)) /* ZMM/OPMASK */ {
80                             if (X(r.ebx, 0xd0230000) && X(r.ecx, 0x00005f42))
81                                 flags |= DAV1D_X86_CPU_FLAG_AVX512ICL;
82                         }
83                     }
84                 }
85             }
86         }
87 #endif
88         if (!memcmp(cpu.vendor, "AuthenticAMD", sizeof(cpu.vendor))) {
89             if ((flags & DAV1D_X86_CPU_FLAG_AVX2) && family <= 0x19) {
90                 /* Excavator, Zen, Zen+, Zen 2, Zen 3, Zen 3+, Zen 4 */
91                 flags |= DAV1D_X86_CPU_FLAG_SLOW_GATHER;
92             }
93         }
94     }
95 
96     return flags;
97 }
98