1 #include <stdbool.h>
2 #include <stdint.h>
3 #include <stddef.h>
4
5 #include <irt.h>
6
7 #define NACL_CODE_BUNDLE_SIZE 32
8 #include <cpuinfo.h>
9 #include <x86/api.h>
10
11 static const uint8_t cmpxchg16b_bundle[NACL_CODE_BUNDLE_SIZE] = {
12 /* MOV edi, edi */
13 0x89, 0xFF,
14 /* CMPXCHG16B [r15 + rdi * 1] */
15 0x49, 0x0F, 0xC7, 0x0C, 0x3F,
16 /* Fill remainder with HLTs */
17 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
18 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
19 };
20
21 static const uint8_t lzcnt_bundle[NACL_CODE_BUNDLE_SIZE] = {
22 /* LZCNT eax, ecx */
23 0xF3, 0x0F, 0xBD, 0xC1,
24 /* Fill remainder with HLTs */
25 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
26 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
27 };
28
29 static const uint8_t popcnt_bundle[NACL_CODE_BUNDLE_SIZE] = {
30 /* POPCNT eax, ecx */
31 0xF3, 0x0F, 0xB8, 0xC1,
32 /* Fill remainder with HLTs */
33 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
34 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
35 };
36
37 static const uint8_t movbe_bundle[NACL_CODE_BUNDLE_SIZE] = {
38 /* MOV ecx, ecx */
39 0x89, 0xC9,
40 /* MOVBE eax, [r15 + rcx * 1] */
41 0x41, 0x0F, 0x38, 0xF0, 0x04, 0x0F,
42 /* Fill remainder with HLTs */
43 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
44 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
45 };
46
47 static const uint8_t bmi_bundle[NACL_CODE_BUNDLE_SIZE] = {
48 /* ANDN eax, ecx, edx */
49 0xC4, 0xE2, 0x70, 0xF2, 0xC2,
50 /* Fill remainder with HLTs */
51 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
52 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
53 };
54
55 static const uint8_t tbm_bundle[NACL_CODE_BUNDLE_SIZE] = {
56 /* BLCS eax, ecx */
57 0x8F, 0xE9, 0x78, 0x01, 0xD9,
58 /* Fill remainder with HLTs */
59 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
60 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
61 };
62
63 static const uint8_t three_d_now_bundle[NACL_CODE_BUNDLE_SIZE] = {
64 /* PFADD mm0, mm1 */
65 0x0F, 0x0F, 0xC1, 0x9E,
66 /* Fill remainder with HLTs */
67 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
68 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
69 };
70
71 static const uint8_t three_d_now_plus_bundle[NACL_CODE_BUNDLE_SIZE] = {
72 /* PFNACC mm0, mm1 */
73 0x0F, 0x0F, 0xC1, 0x8A,
74 /* Fill remainder with HLTs */
75 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
76 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
77 };
78
79 static const uint8_t sse3_bundle[NACL_CODE_BUNDLE_SIZE] = {
80 /* HADDPS xmm0, xmm1 */
81 0xF2, 0x0F, 0x7C, 0xC1,
82 /* Fill remainder with HLTs */
83 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
84 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
85 };
86
87 static const uint8_t ssse3_bundle[NACL_CODE_BUNDLE_SIZE] = {
88 /* PSHUFB xmm0, xmm1 */
89 0x66, 0x0F, 0x38, 0x00, 0xC1,
90 /* Fill remainder with HLTs */
91 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
92 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
93 };
94
95 static const uint8_t sse4_1_bundle[NACL_CODE_BUNDLE_SIZE] = {
96 /* PMULLD xmm0, xmm1 */
97 0x66, 0x0F, 0x38, 0x40, 0xC1,
98 /* Fill remainder with HLTs */
99 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
100 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
101 };
102
103 static const uint8_t sse4_2_bundle[NACL_CODE_BUNDLE_SIZE] = {
104 /* PCMPGTQ xmm0, xmm1 */
105 0x66, 0x0F, 0x38, 0x37, 0xC1,
106 /* Fill remainder with HLTs */
107 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
108 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
109 };
110
111 static const uint8_t sse4a_bundle[NACL_CODE_BUNDLE_SIZE] = {
112 /* EXTRQ xmm0, xmm1 */
113 0x66, 0x0F, 0x79, 0xC1,
114 /* Fill remainder with HLTs */
115 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
116 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
117 };
118
119 static const uint8_t aes_bundle[NACL_CODE_BUNDLE_SIZE] = {
120 /* AESENC xmm0, xmm1 */
121 0x66, 0x0F, 0x38, 0xDC, 0xC1,
122 /* Fill remainder with HLTs */
123 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
124 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
125 };
126
127 static const uint8_t pclmulqdq_bundle[NACL_CODE_BUNDLE_SIZE] = {
128 /* PCLMULQDQ xmm0, xmm1, 0 */
129 0x66, 0x0F, 0x3A, 0x44, 0xC1, 0x00,
130 /* Fill remainder with HLTs */
131 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
132 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
133 };
134
135 static const uint8_t avx_bundle[NACL_CODE_BUNDLE_SIZE] = {
136 /* VPERMILPS ymm0, ymm1, 0xAA */
137 0xC4, 0xE3, 0x7D, 0x04, 0xC1, 0xAA,
138 /* Fill remainder with HLTs */
139 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
140 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
141 };
142
143 static const uint8_t fma3_bundle[NACL_CODE_BUNDLE_SIZE] = {
144 /* VFMADDSUB213PS ymm0, ymm1, ymm2 */
145 0xC4, 0xE2, 0x75, 0xA6, 0xC2,
146 /* Fill remainder with HLTs */
147 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
148 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
149 };
150
151 static const uint8_t fma4_bundle[NACL_CODE_BUNDLE_SIZE] = {
152 /* VFMADDPS ymm0, ymm1, ymm2, ymm3 */
153 0xC4, 0xE3, 0xF5, 0x68, 0xC3, 0x20,
154 /* Fill remainder with HLTs */
155 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
156 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
157 };
158
159 static const uint8_t xop_bundle[NACL_CODE_BUNDLE_SIZE] = {
160 /* VPHADDBQ xmm0, xmm1 */
161 0x8F, 0xE9, 0x78, 0xC3, 0xC1,
162 /* Fill remainder with HLTs */
163 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
164 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
165 };
166
167 static const uint8_t f16c_bundle[NACL_CODE_BUNDLE_SIZE] = {
168 /* VCVTPH2PS ymm0, xmm1 */
169 0xC4, 0xE2, 0x7D, 0x13, 0xC1,
170 /* Fill remainder with HLTs */
171 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
172 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
173 };
174
175 static const uint8_t avx2_bundle[NACL_CODE_BUNDLE_SIZE] = {
176 /* VPERMPS ymm0, ymm1, ymm2 */
177 0xC4, 0xE2, 0x75, 0x16, 0xC2,
178 /* Fill remainder with HLTs */
179 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
180 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
181 };
182
183
cpuinfo_x86_nacl_detect_isa(void)184 struct cpuinfo_x86_isa cpuinfo_x86_nacl_detect_isa(void) {
185 /*
186 * Under Native Client sandbox we can't just ask the CPU:
187 * - First, some instructions (XGETBV) necessary to query AVX support are not white-listed in the validator.
188 * - Secondly, even if CPU supports some instruction, but validator doesn't know about it (e.g. due a bug in the
189 * ISA detection in the validator), all instructions from the "unsupported" ISA extensions will be replaced by
190 * HLTs when the module is loaded.
191 * Thus, instead of quering the CPU about supported ISA extensions, we query the validator: we pass bundles with
192 * instructions from ISA extensions to dynamic code generation APIs, and test if they are accepted.
193 */
194
195 struct cpuinfo_x86_isa isa = { 0 };
196
197 struct nacl_irt_code_data_alloc nacl_irt_code_data_alloc = { 0 };
198 struct nacl_irt_dyncode nacl_irt_dyncode = { 0 };
199 if (sizeof(nacl_irt_code_data_alloc) != nacl_interface_query(NACL_IRT_CODE_DATA_ALLOC_v0_1,
200 &nacl_irt_code_data_alloc,
201 sizeof(nacl_irt_code_data_alloc)))
202 {
203 goto finish;
204 }
205
206 if (sizeof(nacl_irt_dyncode) != nacl_interface_query(NACL_IRT_DYNCODE_v0_1,
207 &nacl_irt_dyncode,
208 sizeof(nacl_irt_dyncode)))
209 {
210 goto finish;
211 }
212
213 const size_t allocation_size = 65536;
214 uintptr_t code_segment = 0;
215 if (0 != nacl_irt_code_data_alloc.allocate_code_data(0, allocation_size, 0, 0, &code_segment))
216 {
217 goto finish;
218 }
219
220 isa.cmpxchg16b = !nacl_irt_dyncode.dyncode_create((void*) code_segment, cmpxchg16b_bundle, NACL_CODE_BUNDLE_SIZE) &&
221 (*((const uint8_t*) code_segment) != 0xF4);
222 code_segment += NACL_CODE_BUNDLE_SIZE;
223
224 isa.lzcnt = !nacl_irt_dyncode.dyncode_create((void*) code_segment, lzcnt_bundle, NACL_CODE_BUNDLE_SIZE) &&
225 (*((const uint8_t*) code_segment) != 0xF4);
226 code_segment += NACL_CODE_BUNDLE_SIZE;
227
228 isa.popcnt = !nacl_irt_dyncode.dyncode_create((void*) code_segment, popcnt_bundle, NACL_CODE_BUNDLE_SIZE) &&
229 (*((const uint8_t*) code_segment) != 0xF4);
230 code_segment += NACL_CODE_BUNDLE_SIZE;
231
232 isa.movbe = !nacl_irt_dyncode.dyncode_create((void*) code_segment, movbe_bundle, NACL_CODE_BUNDLE_SIZE) &&
233 (*((const uint8_t*) code_segment) != 0xF4);
234 code_segment += NACL_CODE_BUNDLE_SIZE;
235
236 isa.bmi = !nacl_irt_dyncode.dyncode_create((void*) code_segment, bmi_bundle, NACL_CODE_BUNDLE_SIZE) &&
237 (*((const uint8_t*) code_segment) != 0xF4);
238 code_segment += NACL_CODE_BUNDLE_SIZE;
239
240 isa.tbm = !nacl_irt_dyncode.dyncode_create((void*) code_segment, tbm_bundle, NACL_CODE_BUNDLE_SIZE) &&
241 (*((const uint8_t*) code_segment) != 0xF4);
242 code_segment += NACL_CODE_BUNDLE_SIZE;
243
244 isa.three_d_now = !nacl_irt_dyncode.dyncode_create((void*) code_segment, three_d_now_bundle, NACL_CODE_BUNDLE_SIZE) &&
245 (*((const uint8_t*) code_segment) != 0xF4);
246 code_segment += NACL_CODE_BUNDLE_SIZE;
247
248 isa.three_d_now_plus =
249 !nacl_irt_dyncode.dyncode_create((void*) code_segment, three_d_now_plus_bundle, NACL_CODE_BUNDLE_SIZE) &&
250 (*((const uint8_t*) code_segment) != 0xF4);
251 code_segment += NACL_CODE_BUNDLE_SIZE;
252
253 isa.sse3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse3_bundle, NACL_CODE_BUNDLE_SIZE) &&
254 (*((const uint8_t*) code_segment) != 0xF4);
255 code_segment += NACL_CODE_BUNDLE_SIZE;
256
257 isa.ssse3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, ssse3_bundle, NACL_CODE_BUNDLE_SIZE) &&
258 (*((const uint8_t*) code_segment) != 0xF4);
259 code_segment += NACL_CODE_BUNDLE_SIZE;
260
261 isa.sse4_1 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4_1_bundle, NACL_CODE_BUNDLE_SIZE) &&
262 (*((const uint8_t*) code_segment) != 0xF4);
263 code_segment += NACL_CODE_BUNDLE_SIZE;
264
265 isa.sse4_2 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4_2_bundle, NACL_CODE_BUNDLE_SIZE) &&
266 (*((const uint8_t*) code_segment) != 0xF4);
267 code_segment += NACL_CODE_BUNDLE_SIZE;
268
269 isa.sse4a = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4a_bundle, NACL_CODE_BUNDLE_SIZE) &&
270 (*((const uint8_t*) code_segment) != 0xF4);
271 code_segment += NACL_CODE_BUNDLE_SIZE;
272
273 isa.aes = !nacl_irt_dyncode.dyncode_create((void*) code_segment, aes_bundle, NACL_CODE_BUNDLE_SIZE) &&
274 (*((const uint8_t*) code_segment) != 0xF4);
275 code_segment += NACL_CODE_BUNDLE_SIZE;
276
277 isa.pclmulqdq = !nacl_irt_dyncode.dyncode_create((void*) code_segment, pclmulqdq_bundle, NACL_CODE_BUNDLE_SIZE) &&
278 (*((const uint8_t*) code_segment) != 0xF4);
279 code_segment += NACL_CODE_BUNDLE_SIZE;
280
281 isa.avx = !nacl_irt_dyncode.dyncode_create((void*) code_segment, avx_bundle, NACL_CODE_BUNDLE_SIZE) &&
282 (*((const uint8_t*) code_segment) != 0xF4);
283 code_segment += NACL_CODE_BUNDLE_SIZE;
284
285 isa.fma3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, fma3_bundle, NACL_CODE_BUNDLE_SIZE) &&
286 (*((const uint8_t*) code_segment) != 0xF4);
287 code_segment += NACL_CODE_BUNDLE_SIZE;
288
289 isa.fma4 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, fma4_bundle, NACL_CODE_BUNDLE_SIZE) &&
290 (*((const uint8_t*) code_segment) != 0xF4);
291 code_segment += NACL_CODE_BUNDLE_SIZE;
292
293 isa.xop = !nacl_irt_dyncode.dyncode_create((void*) code_segment, xop_bundle, NACL_CODE_BUNDLE_SIZE) &&
294 (*((const uint8_t*) code_segment) != 0xF4);
295 code_segment += NACL_CODE_BUNDLE_SIZE;
296
297 isa.f16c = !nacl_irt_dyncode.dyncode_create((void*) code_segment, f16c_bundle, NACL_CODE_BUNDLE_SIZE) &&
298 (*((const uint8_t*) code_segment) != 0xF4);
299 code_segment += NACL_CODE_BUNDLE_SIZE;
300
301 isa.avx2 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, avx2_bundle, NACL_CODE_BUNDLE_SIZE) &&
302 (*((const uint8_t*) code_segment) != 0xF4);
303
304 finish:
305 return isa;
306 }
307