1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2016-20 Intel Corporation. */
3
4 #include <linux/file.h>
5 #include <linux/freezer.h>
6 #include <linux/highmem.h>
7 #include <linux/kthread.h>
8 #include <linux/miscdevice.h>
9 #include <linux/pagemap.h>
10 #include <linux/ratelimit.h>
11 #include <linux/sched/mm.h>
12 #include <linux/sched/signal.h>
13 #include <linux/slab.h>
14 #include <asm/sgx.h>
15 #include "driver.h"
16 #include "encl.h"
17 #include "encls.h"
18
19 struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
20 static int sgx_nr_epc_sections;
21 static struct task_struct *ksgxd_tsk;
22 static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq);
23
24 /*
25 * These variables are part of the state of the reclaimer, and must be accessed
26 * with sgx_reclaimer_lock acquired.
27 */
28 static LIST_HEAD(sgx_active_page_list);
29 static DEFINE_SPINLOCK(sgx_reclaimer_lock);
30
31 static atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0);
32
33 /* Nodes with one or more EPC sections. */
34 static nodemask_t sgx_numa_mask;
35
36 /*
37 * Array with one list_head for each possible NUMA node. Each
38 * list contains all the sgx_epc_section's which are on that
39 * node.
40 */
41 static struct sgx_numa_node *sgx_numa_nodes;
42
43 static LIST_HEAD(sgx_dirty_page_list);
44
45 /*
46 * Reset post-kexec EPC pages to the uninitialized state. The pages are removed
47 * from the input list, and made available for the page allocator. SECS pages
48 * prepending their children in the input list are left intact.
49 *
50 * Return 0 when sanitization was successful or kthread was stopped, and the
51 * number of unsanitized pages otherwise.
52 */
__sgx_sanitize_pages(struct list_head * dirty_page_list)53 static unsigned long __sgx_sanitize_pages(struct list_head *dirty_page_list)
54 {
55 unsigned long left_dirty = 0;
56 struct sgx_epc_page *page;
57 LIST_HEAD(dirty);
58 int ret;
59
60 /* dirty_page_list is thread-local, no need for a lock: */
61 while (!list_empty(dirty_page_list)) {
62 if (kthread_should_stop())
63 return 0;
64
65 page = list_first_entry(dirty_page_list, struct sgx_epc_page, list);
66
67 ret = __eremove(sgx_get_epc_virt_addr(page));
68 if (!ret) {
69 /*
70 * page is now sanitized. Make it available via the SGX
71 * page allocator:
72 */
73 list_del(&page->list);
74 sgx_free_epc_page(page);
75 } else {
76 /* The page is not yet clean - move to the dirty list. */
77 list_move_tail(&page->list, &dirty);
78 left_dirty++;
79 }
80
81 cond_resched();
82 }
83
84 list_splice(&dirty, dirty_page_list);
85 return left_dirty;
86 }
87
sgx_reclaimer_age(struct sgx_epc_page * epc_page)88 static bool sgx_reclaimer_age(struct sgx_epc_page *epc_page)
89 {
90 struct sgx_encl_page *page = epc_page->owner;
91 struct sgx_encl *encl = page->encl;
92 struct sgx_encl_mm *encl_mm;
93 bool ret = true;
94 int idx;
95
96 idx = srcu_read_lock(&encl->srcu);
97
98 list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
99 if (!mmget_not_zero(encl_mm->mm))
100 continue;
101
102 mmap_read_lock(encl_mm->mm);
103 ret = !sgx_encl_test_and_clear_young(encl_mm->mm, page);
104 mmap_read_unlock(encl_mm->mm);
105
106 mmput_async(encl_mm->mm);
107
108 if (!ret)
109 break;
110 }
111
112 srcu_read_unlock(&encl->srcu, idx);
113
114 if (!ret)
115 return false;
116
117 return true;
118 }
119
sgx_reclaimer_block(struct sgx_epc_page * epc_page)120 static void sgx_reclaimer_block(struct sgx_epc_page *epc_page)
121 {
122 struct sgx_encl_page *page = epc_page->owner;
123 unsigned long addr = page->desc & PAGE_MASK;
124 struct sgx_encl *encl = page->encl;
125 unsigned long mm_list_version;
126 struct sgx_encl_mm *encl_mm;
127 struct vm_area_struct *vma;
128 int idx, ret;
129
130 do {
131 mm_list_version = encl->mm_list_version;
132
133 /* Pairs with smp_rmb() in sgx_encl_mm_add(). */
134 smp_rmb();
135
136 idx = srcu_read_lock(&encl->srcu);
137
138 list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
139 if (!mmget_not_zero(encl_mm->mm))
140 continue;
141
142 mmap_read_lock(encl_mm->mm);
143
144 ret = sgx_encl_find(encl_mm->mm, addr, &vma);
145 if (!ret && encl == vma->vm_private_data)
146 zap_vma_ptes(vma, addr, PAGE_SIZE);
147
148 mmap_read_unlock(encl_mm->mm);
149
150 mmput_async(encl_mm->mm);
151 }
152
153 srcu_read_unlock(&encl->srcu, idx);
154 } while (unlikely(encl->mm_list_version != mm_list_version));
155
156 mutex_lock(&encl->lock);
157
158 ret = __eblock(sgx_get_epc_virt_addr(epc_page));
159 if (encls_failed(ret))
160 ENCLS_WARN(ret, "EBLOCK");
161
162 mutex_unlock(&encl->lock);
163 }
164
__sgx_encl_ewb(struct sgx_epc_page * epc_page,void * va_slot,struct sgx_backing * backing)165 static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot,
166 struct sgx_backing *backing)
167 {
168 struct sgx_pageinfo pginfo;
169 int ret;
170
171 pginfo.addr = 0;
172 pginfo.secs = 0;
173
174 pginfo.contents = (unsigned long)kmap_atomic(backing->contents);
175 pginfo.metadata = (unsigned long)kmap_atomic(backing->pcmd) +
176 backing->pcmd_offset;
177
178 ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot);
179 set_page_dirty(backing->pcmd);
180 set_page_dirty(backing->contents);
181
182 kunmap_atomic((void *)(unsigned long)(pginfo.metadata -
183 backing->pcmd_offset));
184 kunmap_atomic((void *)(unsigned long)pginfo.contents);
185
186 return ret;
187 }
188
sgx_ipi_cb(void * info)189 static void sgx_ipi_cb(void *info)
190 {
191 }
192
sgx_encl_ewb_cpumask(struct sgx_encl * encl)193 static const cpumask_t *sgx_encl_ewb_cpumask(struct sgx_encl *encl)
194 {
195 cpumask_t *cpumask = &encl->cpumask;
196 struct sgx_encl_mm *encl_mm;
197 int idx;
198
199 /*
200 * Can race with sgx_encl_mm_add(), but ETRACK has already been
201 * executed, which means that the CPUs running in the new mm will enter
202 * into the enclave with a fresh epoch.
203 */
204 cpumask_clear(cpumask);
205
206 idx = srcu_read_lock(&encl->srcu);
207
208 list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
209 if (!mmget_not_zero(encl_mm->mm))
210 continue;
211
212 cpumask_or(cpumask, cpumask, mm_cpumask(encl_mm->mm));
213
214 mmput_async(encl_mm->mm);
215 }
216
217 srcu_read_unlock(&encl->srcu, idx);
218
219 return cpumask;
220 }
221
222 /*
223 * Swap page to the regular memory transformed to the blocked state by using
224 * EBLOCK, which means that it can no longer be referenced (no new TLB entries).
225 *
226 * The first trial just tries to write the page assuming that some other thread
227 * has reset the count for threads inside the enclave by using ETRACK, and
228 * previous thread count has been zeroed out. The second trial calls ETRACK
229 * before EWB. If that fails we kick all the HW threads out, and then do EWB,
230 * which should be guaranteed the succeed.
231 */
sgx_encl_ewb(struct sgx_epc_page * epc_page,struct sgx_backing * backing)232 static void sgx_encl_ewb(struct sgx_epc_page *epc_page,
233 struct sgx_backing *backing)
234 {
235 struct sgx_encl_page *encl_page = epc_page->owner;
236 struct sgx_encl *encl = encl_page->encl;
237 struct sgx_va_page *va_page;
238 unsigned int va_offset;
239 void *va_slot;
240 int ret;
241
242 encl_page->desc &= ~SGX_ENCL_PAGE_BEING_RECLAIMED;
243
244 va_page = list_first_entry(&encl->va_pages, struct sgx_va_page,
245 list);
246 va_offset = sgx_alloc_va_slot(va_page);
247 va_slot = sgx_get_epc_virt_addr(va_page->epc_page) + va_offset;
248 if (sgx_va_page_full(va_page))
249 list_move_tail(&va_page->list, &encl->va_pages);
250
251 ret = __sgx_encl_ewb(epc_page, va_slot, backing);
252 if (ret == SGX_NOT_TRACKED) {
253 ret = __etrack(sgx_get_epc_virt_addr(encl->secs.epc_page));
254 if (ret) {
255 if (encls_failed(ret))
256 ENCLS_WARN(ret, "ETRACK");
257 }
258
259 ret = __sgx_encl_ewb(epc_page, va_slot, backing);
260 if (ret == SGX_NOT_TRACKED) {
261 /*
262 * Slow path, send IPIs to kick cpus out of the
263 * enclave. Note, it's imperative that the cpu
264 * mask is generated *after* ETRACK, else we'll
265 * miss cpus that entered the enclave between
266 * generating the mask and incrementing epoch.
267 */
268 on_each_cpu_mask(sgx_encl_ewb_cpumask(encl),
269 sgx_ipi_cb, NULL, 1);
270 ret = __sgx_encl_ewb(epc_page, va_slot, backing);
271 }
272 }
273
274 if (ret) {
275 if (encls_failed(ret))
276 ENCLS_WARN(ret, "EWB");
277
278 sgx_free_va_slot(va_page, va_offset);
279 } else {
280 encl_page->desc |= va_offset;
281 encl_page->va_page = va_page;
282 }
283 }
284
sgx_reclaimer_write(struct sgx_epc_page * epc_page,struct sgx_backing * backing)285 static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
286 struct sgx_backing *backing)
287 {
288 struct sgx_encl_page *encl_page = epc_page->owner;
289 struct sgx_encl *encl = encl_page->encl;
290 struct sgx_backing secs_backing;
291 int ret;
292
293 mutex_lock(&encl->lock);
294
295 sgx_encl_ewb(epc_page, backing);
296 encl_page->epc_page = NULL;
297 encl->secs_child_cnt--;
298 sgx_encl_put_backing(backing);
299
300 if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) {
301 ret = sgx_encl_alloc_backing(encl, PFN_DOWN(encl->size),
302 &secs_backing);
303 if (ret)
304 goto out;
305
306 sgx_encl_ewb(encl->secs.epc_page, &secs_backing);
307
308 sgx_encl_free_epc_page(encl->secs.epc_page);
309 encl->secs.epc_page = NULL;
310
311 sgx_encl_put_backing(&secs_backing);
312 }
313
314 out:
315 mutex_unlock(&encl->lock);
316 }
317
318 /*
319 * Take a fixed number of pages from the head of the active page pool and
320 * reclaim them to the enclave's private shmem files. Skip the pages, which have
321 * been accessed since the last scan. Move those pages to the tail of active
322 * page pool so that the pages get scanned in LRU like fashion.
323 *
324 * Batch process a chunk of pages (at the moment 16) in order to degrade amount
325 * of IPI's and ETRACK's potentially required. sgx_encl_ewb() does degrade a bit
326 * among the HW threads with three stage EWB pipeline (EWB, ETRACK + EWB and IPI
327 * + EWB) but not sufficiently. Reclaiming one page at a time would also be
328 * problematic as it would increase the lock contention too much, which would
329 * halt forward progress.
330 */
sgx_reclaim_pages(void)331 static void sgx_reclaim_pages(void)
332 {
333 struct sgx_epc_page *chunk[SGX_NR_TO_SCAN];
334 struct sgx_backing backing[SGX_NR_TO_SCAN];
335 struct sgx_epc_section *section;
336 struct sgx_encl_page *encl_page;
337 struct sgx_epc_page *epc_page;
338 struct sgx_numa_node *node;
339 pgoff_t page_index;
340 int cnt = 0;
341 int ret;
342 int i;
343
344 spin_lock(&sgx_reclaimer_lock);
345 for (i = 0; i < SGX_NR_TO_SCAN; i++) {
346 if (list_empty(&sgx_active_page_list))
347 break;
348
349 epc_page = list_first_entry(&sgx_active_page_list,
350 struct sgx_epc_page, list);
351 list_del_init(&epc_page->list);
352 encl_page = epc_page->owner;
353
354 if (kref_get_unless_zero(&encl_page->encl->refcount) != 0)
355 chunk[cnt++] = epc_page;
356 else
357 /* The owner is freeing the page. No need to add the
358 * page back to the list of reclaimable pages.
359 */
360 epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
361 }
362 spin_unlock(&sgx_reclaimer_lock);
363
364 for (i = 0; i < cnt; i++) {
365 epc_page = chunk[i];
366 encl_page = epc_page->owner;
367
368 if (!sgx_reclaimer_age(epc_page))
369 goto skip;
370
371 page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base);
372
373 mutex_lock(&encl_page->encl->lock);
374 ret = sgx_encl_alloc_backing(encl_page->encl, page_index, &backing[i]);
375 if (ret) {
376 mutex_unlock(&encl_page->encl->lock);
377 goto skip;
378 }
379
380 encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED;
381 mutex_unlock(&encl_page->encl->lock);
382 continue;
383
384 skip:
385 spin_lock(&sgx_reclaimer_lock);
386 list_add_tail(&epc_page->list, &sgx_active_page_list);
387 spin_unlock(&sgx_reclaimer_lock);
388
389 kref_put(&encl_page->encl->refcount, sgx_encl_release);
390
391 chunk[i] = NULL;
392 }
393
394 for (i = 0; i < cnt; i++) {
395 epc_page = chunk[i];
396 if (epc_page)
397 sgx_reclaimer_block(epc_page);
398 }
399
400 for (i = 0; i < cnt; i++) {
401 epc_page = chunk[i];
402 if (!epc_page)
403 continue;
404
405 encl_page = epc_page->owner;
406 sgx_reclaimer_write(epc_page, &backing[i]);
407
408 kref_put(&encl_page->encl->refcount, sgx_encl_release);
409 epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
410
411 section = &sgx_epc_sections[epc_page->section];
412 node = section->node;
413
414 spin_lock(&node->lock);
415 list_add_tail(&epc_page->list, &node->free_page_list);
416 spin_unlock(&node->lock);
417 atomic_long_inc(&sgx_nr_free_pages);
418 }
419 }
420
sgx_should_reclaim(unsigned long watermark)421 static bool sgx_should_reclaim(unsigned long watermark)
422 {
423 return atomic_long_read(&sgx_nr_free_pages) < watermark &&
424 !list_empty(&sgx_active_page_list);
425 }
426
ksgxd(void * p)427 static int ksgxd(void *p)
428 {
429 set_freezable();
430
431 /*
432 * Sanitize pages in order to recover from kexec(). The 2nd pass is
433 * required for SECS pages, whose child pages blocked EREMOVE.
434 */
435 __sgx_sanitize_pages(&sgx_dirty_page_list);
436 WARN_ON(__sgx_sanitize_pages(&sgx_dirty_page_list));
437
438 while (!kthread_should_stop()) {
439 if (try_to_freeze())
440 continue;
441
442 wait_event_freezable(ksgxd_waitq,
443 kthread_should_stop() ||
444 sgx_should_reclaim(SGX_NR_HIGH_PAGES));
445
446 if (sgx_should_reclaim(SGX_NR_HIGH_PAGES))
447 sgx_reclaim_pages();
448
449 cond_resched();
450 }
451
452 return 0;
453 }
454
sgx_page_reclaimer_init(void)455 static bool __init sgx_page_reclaimer_init(void)
456 {
457 struct task_struct *tsk;
458
459 tsk = kthread_run(ksgxd, NULL, "ksgxd");
460 if (IS_ERR(tsk))
461 return false;
462
463 ksgxd_tsk = tsk;
464
465 return true;
466 }
467
current_is_ksgxd(void)468 bool current_is_ksgxd(void)
469 {
470 return current == ksgxd_tsk;
471 }
472
__sgx_alloc_epc_page_from_node(int nid)473 static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
474 {
475 struct sgx_numa_node *node = &sgx_numa_nodes[nid];
476 struct sgx_epc_page *page = NULL;
477
478 spin_lock(&node->lock);
479
480 if (list_empty(&node->free_page_list)) {
481 spin_unlock(&node->lock);
482 return NULL;
483 }
484
485 page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list);
486 list_del_init(&page->list);
487
488 spin_unlock(&node->lock);
489 atomic_long_dec(&sgx_nr_free_pages);
490
491 return page;
492 }
493
494 /**
495 * __sgx_alloc_epc_page() - Allocate an EPC page
496 *
497 * Iterate through NUMA nodes and reserve ia free EPC page to the caller. Start
498 * from the NUMA node, where the caller is executing.
499 *
500 * Return:
501 * - an EPC page: A borrowed EPC pages were available.
502 * - NULL: Out of EPC pages.
503 */
__sgx_alloc_epc_page(void)504 struct sgx_epc_page *__sgx_alloc_epc_page(void)
505 {
506 struct sgx_epc_page *page;
507 int nid_of_current = numa_node_id();
508 int nid = nid_of_current;
509
510 if (node_isset(nid_of_current, sgx_numa_mask)) {
511 page = __sgx_alloc_epc_page_from_node(nid_of_current);
512 if (page)
513 return page;
514 }
515
516 /* Fall back to the non-local NUMA nodes: */
517 while (true) {
518 nid = next_node_in(nid, sgx_numa_mask);
519 if (nid == nid_of_current)
520 break;
521
522 page = __sgx_alloc_epc_page_from_node(nid);
523 if (page)
524 return page;
525 }
526
527 return ERR_PTR(-ENOMEM);
528 }
529
530 /**
531 * sgx_mark_page_reclaimable() - Mark a page as reclaimable
532 * @page: EPC page
533 *
534 * Mark a page as reclaimable and add it to the active page list. Pages
535 * are automatically removed from the active list when freed.
536 */
sgx_mark_page_reclaimable(struct sgx_epc_page * page)537 void sgx_mark_page_reclaimable(struct sgx_epc_page *page)
538 {
539 spin_lock(&sgx_reclaimer_lock);
540 page->flags |= SGX_EPC_PAGE_RECLAIMER_TRACKED;
541 list_add_tail(&page->list, &sgx_active_page_list);
542 spin_unlock(&sgx_reclaimer_lock);
543 }
544
545 /**
546 * sgx_unmark_page_reclaimable() - Remove a page from the reclaim list
547 * @page: EPC page
548 *
549 * Clear the reclaimable flag and remove the page from the active page list.
550 *
551 * Return:
552 * 0 on success,
553 * -EBUSY if the page is in the process of being reclaimed
554 */
sgx_unmark_page_reclaimable(struct sgx_epc_page * page)555 int sgx_unmark_page_reclaimable(struct sgx_epc_page *page)
556 {
557 spin_lock(&sgx_reclaimer_lock);
558 if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) {
559 /* The page is being reclaimed. */
560 if (list_empty(&page->list)) {
561 spin_unlock(&sgx_reclaimer_lock);
562 return -EBUSY;
563 }
564
565 list_del(&page->list);
566 page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
567 }
568 spin_unlock(&sgx_reclaimer_lock);
569
570 return 0;
571 }
572
573 /**
574 * sgx_alloc_epc_page() - Allocate an EPC page
575 * @owner: the owner of the EPC page
576 * @reclaim: reclaim pages if necessary
577 *
578 * Iterate through EPC sections and borrow a free EPC page to the caller. When a
579 * page is no longer needed it must be released with sgx_free_epc_page(). If
580 * @reclaim is set to true, directly reclaim pages when we are out of pages. No
581 * mm's can be locked when @reclaim is set to true.
582 *
583 * Finally, wake up ksgxd when the number of pages goes below the watermark
584 * before returning back to the caller.
585 *
586 * Return:
587 * an EPC page,
588 * -errno on error
589 */
sgx_alloc_epc_page(void * owner,bool reclaim)590 struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim)
591 {
592 struct sgx_epc_page *page;
593
594 for ( ; ; ) {
595 page = __sgx_alloc_epc_page();
596 if (!IS_ERR(page)) {
597 page->owner = owner;
598 break;
599 }
600
601 if (list_empty(&sgx_active_page_list))
602 return ERR_PTR(-ENOMEM);
603
604 if (!reclaim) {
605 page = ERR_PTR(-EBUSY);
606 break;
607 }
608
609 if (signal_pending(current)) {
610 page = ERR_PTR(-ERESTARTSYS);
611 break;
612 }
613
614 sgx_reclaim_pages();
615 cond_resched();
616 }
617
618 if (sgx_should_reclaim(SGX_NR_LOW_PAGES))
619 wake_up(&ksgxd_waitq);
620
621 return page;
622 }
623
624 /**
625 * sgx_free_epc_page() - Free an EPC page
626 * @page: an EPC page
627 *
628 * Put the EPC page back to the list of free pages. It's the caller's
629 * responsibility to make sure that the page is in uninitialized state. In other
630 * words, do EREMOVE, EWB or whatever operation is necessary before calling
631 * this function.
632 */
sgx_free_epc_page(struct sgx_epc_page * page)633 void sgx_free_epc_page(struct sgx_epc_page *page)
634 {
635 struct sgx_epc_section *section = &sgx_epc_sections[page->section];
636 struct sgx_numa_node *node = section->node;
637
638 spin_lock(&node->lock);
639
640 list_add_tail(&page->list, &node->free_page_list);
641
642 spin_unlock(&node->lock);
643 atomic_long_inc(&sgx_nr_free_pages);
644 }
645
sgx_setup_epc_section(u64 phys_addr,u64 size,unsigned long index,struct sgx_epc_section * section)646 static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
647 unsigned long index,
648 struct sgx_epc_section *section)
649 {
650 unsigned long nr_pages = size >> PAGE_SHIFT;
651 unsigned long i;
652
653 section->virt_addr = memremap(phys_addr, size, MEMREMAP_WB);
654 if (!section->virt_addr)
655 return false;
656
657 section->pages = vmalloc(nr_pages * sizeof(struct sgx_epc_page));
658 if (!section->pages) {
659 memunmap(section->virt_addr);
660 return false;
661 }
662
663 section->phys_addr = phys_addr;
664
665 for (i = 0; i < nr_pages; i++) {
666 section->pages[i].section = index;
667 section->pages[i].flags = 0;
668 section->pages[i].owner = NULL;
669 list_add_tail(§ion->pages[i].list, &sgx_dirty_page_list);
670 }
671
672 return true;
673 }
674
675 /**
676 * A section metric is concatenated in a way that @low bits 12-31 define the
677 * bits 12-31 of the metric and @high bits 0-19 define the bits 32-51 of the
678 * metric.
679 */
sgx_calc_section_metric(u64 low,u64 high)680 static inline u64 __init sgx_calc_section_metric(u64 low, u64 high)
681 {
682 return (low & GENMASK_ULL(31, 12)) +
683 ((high & GENMASK_ULL(19, 0)) << 32);
684 }
685
sgx_page_cache_init(void)686 static bool __init sgx_page_cache_init(void)
687 {
688 u32 eax, ebx, ecx, edx, type;
689 u64 pa, size;
690 int nid;
691 int i;
692
693 sgx_numa_nodes = kmalloc_array(num_possible_nodes(), sizeof(*sgx_numa_nodes), GFP_KERNEL);
694 if (!sgx_numa_nodes)
695 return false;
696
697 for (i = 0; i < ARRAY_SIZE(sgx_epc_sections); i++) {
698 cpuid_count(SGX_CPUID, i + SGX_CPUID_EPC, &eax, &ebx, &ecx, &edx);
699
700 type = eax & SGX_CPUID_EPC_MASK;
701 if (type == SGX_CPUID_EPC_INVALID)
702 break;
703
704 if (type != SGX_CPUID_EPC_SECTION) {
705 pr_err_once("Unknown EPC section type: %u\n", type);
706 break;
707 }
708
709 pa = sgx_calc_section_metric(eax, ebx);
710 size = sgx_calc_section_metric(ecx, edx);
711
712 pr_info("EPC section 0x%llx-0x%llx\n", pa, pa + size - 1);
713
714 if (!sgx_setup_epc_section(pa, size, i, &sgx_epc_sections[i])) {
715 pr_err("No free memory for an EPC section\n");
716 break;
717 }
718
719 nid = numa_map_to_online_node(phys_to_target_node(pa));
720 if (nid == NUMA_NO_NODE) {
721 /* The physical address is already printed above. */
722 pr_warn(FW_BUG "Unable to map EPC section to online node. Fallback to the NUMA node 0.\n");
723 nid = 0;
724 }
725
726 if (!node_isset(nid, sgx_numa_mask)) {
727 spin_lock_init(&sgx_numa_nodes[nid].lock);
728 INIT_LIST_HEAD(&sgx_numa_nodes[nid].free_page_list);
729 node_set(nid, sgx_numa_mask);
730 }
731
732 sgx_epc_sections[i].node = &sgx_numa_nodes[nid];
733
734 sgx_nr_epc_sections++;
735 }
736
737 if (!sgx_nr_epc_sections) {
738 pr_err("There are zero EPC sections.\n");
739 return false;
740 }
741
742 return true;
743 }
744
745 /*
746 * Update the SGX_LEPUBKEYHASH MSRs to the values specified by caller.
747 * Bare-metal driver requires to update them to hash of enclave's signer
748 * before EINIT. KVM needs to update them to guest's virtual MSR values
749 * before doing EINIT from guest.
750 */
sgx_update_lepubkeyhash(u64 * lepubkeyhash)751 void sgx_update_lepubkeyhash(u64 *lepubkeyhash)
752 {
753 int i;
754
755 WARN_ON_ONCE(preemptible());
756
757 for (i = 0; i < 4; i++)
758 wrmsrl(MSR_IA32_SGXLEPUBKEYHASH0 + i, lepubkeyhash[i]);
759 }
760
761 const struct file_operations sgx_provision_fops = {
762 .owner = THIS_MODULE,
763 };
764
765 static struct miscdevice sgx_dev_provision = {
766 .minor = MISC_DYNAMIC_MINOR,
767 .name = "sgx_provision",
768 .nodename = "sgx_provision",
769 .fops = &sgx_provision_fops,
770 };
771
772 /**
773 * sgx_set_attribute() - Update allowed attributes given file descriptor
774 * @allowed_attributes: Pointer to allowed enclave attributes
775 * @attribute_fd: File descriptor for specific attribute
776 *
777 * Append enclave attribute indicated by file descriptor to allowed
778 * attributes. Currently only SGX_ATTR_PROVISIONKEY indicated by
779 * /dev/sgx_provision is supported.
780 *
781 * Return:
782 * -0: SGX_ATTR_PROVISIONKEY is appended to allowed_attributes
783 * -EINVAL: Invalid, or not supported file descriptor
784 */
sgx_set_attribute(unsigned long * allowed_attributes,unsigned int attribute_fd)785 int sgx_set_attribute(unsigned long *allowed_attributes,
786 unsigned int attribute_fd)
787 {
788 struct file *file;
789
790 file = fget(attribute_fd);
791 if (!file)
792 return -EINVAL;
793
794 if (file->f_op != &sgx_provision_fops) {
795 fput(file);
796 return -EINVAL;
797 }
798
799 *allowed_attributes |= SGX_ATTR_PROVISIONKEY;
800
801 fput(file);
802 return 0;
803 }
804 EXPORT_SYMBOL_GPL(sgx_set_attribute);
805
sgx_init(void)806 static int __init sgx_init(void)
807 {
808 int ret;
809 int i;
810
811 if (!cpu_feature_enabled(X86_FEATURE_SGX))
812 return -ENODEV;
813
814 if (!sgx_page_cache_init())
815 return -ENOMEM;
816
817 if (!sgx_page_reclaimer_init()) {
818 ret = -ENOMEM;
819 goto err_page_cache;
820 }
821
822 ret = misc_register(&sgx_dev_provision);
823 if (ret)
824 goto err_kthread;
825
826 /*
827 * Always try to initialize the native *and* KVM drivers.
828 * The KVM driver is less picky than the native one and
829 * can function if the native one is not supported on the
830 * current system or fails to initialize.
831 *
832 * Error out only if both fail to initialize.
833 */
834 ret = sgx_drv_init();
835
836 if (sgx_vepc_init() && ret)
837 goto err_provision;
838
839 return 0;
840
841 err_provision:
842 misc_deregister(&sgx_dev_provision);
843
844 err_kthread:
845 kthread_stop(ksgxd_tsk);
846
847 err_page_cache:
848 for (i = 0; i < sgx_nr_epc_sections; i++) {
849 vfree(sgx_epc_sections[i].pages);
850 memunmap(sgx_epc_sections[i].virt_addr);
851 }
852
853 return ret;
854 }
855
856 device_initcall(sgx_init);
857