1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Re-map IO memory to kernel address space so that we can access it.
4 * This is needed for high PCI addresses that aren't mapped in the
5 * 640k-1MB IO memory area on PC's
6 *
7 * (C) Copyright 1995 1996 Linus Torvalds
8 */
9
10 #include <linux/memblock.h>
11 #include <linux/init.h>
12 #include <linux/io.h>
13 #include <linux/ioport.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16 #include <linux/mmiotrace.h>
17 #include <linux/mem_encrypt.h>
18 #include <linux/efi.h>
19
20 #include <asm/set_memory.h>
21 #include <asm/e820/api.h>
22 #include <asm/efi.h>
23 #include <asm/fixmap.h>
24 #include <asm/pgtable.h>
25 #include <asm/tlbflush.h>
26 #include <asm/pgalloc.h>
27 #include <asm/pat.h>
28 #include <asm/setup.h>
29
30 #include "physaddr.h"
31
32 /*
33 * Descriptor controlling ioremap() behavior.
34 */
35 struct ioremap_desc {
36 unsigned int flags;
37 };
38
39 /*
40 * Fix up the linear direct mapping of the kernel to avoid cache attribute
41 * conflicts.
42 */
ioremap_change_attr(unsigned long vaddr,unsigned long size,enum page_cache_mode pcm)43 int ioremap_change_attr(unsigned long vaddr, unsigned long size,
44 enum page_cache_mode pcm)
45 {
46 unsigned long nrpages = size >> PAGE_SHIFT;
47 int err;
48
49 switch (pcm) {
50 case _PAGE_CACHE_MODE_UC:
51 default:
52 err = _set_memory_uc(vaddr, nrpages);
53 break;
54 case _PAGE_CACHE_MODE_WC:
55 err = _set_memory_wc(vaddr, nrpages);
56 break;
57 case _PAGE_CACHE_MODE_WT:
58 err = _set_memory_wt(vaddr, nrpages);
59 break;
60 case _PAGE_CACHE_MODE_WB:
61 err = _set_memory_wb(vaddr, nrpages);
62 break;
63 }
64
65 return err;
66 }
67
68 /* Does the range (or a subset of) contain normal RAM? */
__ioremap_check_ram(struct resource * res)69 static unsigned int __ioremap_check_ram(struct resource *res)
70 {
71 unsigned long start_pfn, stop_pfn;
72 unsigned long i;
73
74 if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM)
75 return 0;
76
77 start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT;
78 stop_pfn = (res->end + 1) >> PAGE_SHIFT;
79 if (stop_pfn > start_pfn) {
80 for (i = 0; i < (stop_pfn - start_pfn); ++i)
81 if (pfn_valid(start_pfn + i) &&
82 !PageReserved(pfn_to_page(start_pfn + i)))
83 return IORES_MAP_SYSTEM_RAM;
84 }
85
86 return 0;
87 }
88
89 /*
90 * In a SEV guest, NONE and RESERVED should not be mapped encrypted because
91 * there the whole memory is already encrypted.
92 */
__ioremap_check_encrypted(struct resource * res)93 static unsigned int __ioremap_check_encrypted(struct resource *res)
94 {
95 if (!sev_active())
96 return 0;
97
98 switch (res->desc) {
99 case IORES_DESC_NONE:
100 case IORES_DESC_RESERVED:
101 break;
102 default:
103 return IORES_MAP_ENCRYPTED;
104 }
105
106 return 0;
107 }
108
109 /*
110 * The EFI runtime services data area is not covered by walk_mem_res(), but must
111 * be mapped encrypted when SEV is active.
112 */
__ioremap_check_other(resource_size_t addr,struct ioremap_desc * desc)113 static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc)
114 {
115 if (!sev_active())
116 return;
117
118 if (!IS_ENABLED(CONFIG_EFI))
119 return;
120
121 if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA)
122 desc->flags |= IORES_MAP_ENCRYPTED;
123 }
124
__ioremap_collect_map_flags(struct resource * res,void * arg)125 static int __ioremap_collect_map_flags(struct resource *res, void *arg)
126 {
127 struct ioremap_desc *desc = arg;
128
129 if (!(desc->flags & IORES_MAP_SYSTEM_RAM))
130 desc->flags |= __ioremap_check_ram(res);
131
132 if (!(desc->flags & IORES_MAP_ENCRYPTED))
133 desc->flags |= __ioremap_check_encrypted(res);
134
135 return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) ==
136 (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED));
137 }
138
139 /*
140 * To avoid multiple resource walks, this function walks resources marked as
141 * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a
142 * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
143 *
144 * After that, deal with misc other ranges in __ioremap_check_other() which do
145 * not fall into the above category.
146 */
__ioremap_check_mem(resource_size_t addr,unsigned long size,struct ioremap_desc * desc)147 static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
148 struct ioremap_desc *desc)
149 {
150 u64 start, end;
151
152 start = (u64)addr;
153 end = start + size - 1;
154 memset(desc, 0, sizeof(struct ioremap_desc));
155
156 walk_mem_res(start, end, desc, __ioremap_collect_map_flags);
157
158 __ioremap_check_other(addr, desc);
159 }
160
161 /*
162 * Remap an arbitrary physical address space into the kernel virtual
163 * address space. It transparently creates kernel huge I/O mapping when
164 * the physical address is aligned by a huge page size (1GB or 2MB) and
165 * the requested size is at least the huge page size.
166 *
167 * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
168 * Therefore, the mapping code falls back to use a smaller page toward 4KB
169 * when a mapping range is covered by non-WB type of MTRRs.
170 *
171 * NOTE! We need to allow non-page-aligned mappings too: we will obviously
172 * have to convert them into an offset in a page-aligned mapping, but the
173 * caller shouldn't need to know that small detail.
174 */
175 static void __iomem *
__ioremap_caller(resource_size_t phys_addr,unsigned long size,enum page_cache_mode pcm,void * caller,bool encrypted)176 __ioremap_caller(resource_size_t phys_addr, unsigned long size,
177 enum page_cache_mode pcm, void *caller, bool encrypted)
178 {
179 unsigned long offset, vaddr;
180 resource_size_t last_addr;
181 const resource_size_t unaligned_phys_addr = phys_addr;
182 const unsigned long unaligned_size = size;
183 struct ioremap_desc io_desc;
184 struct vm_struct *area;
185 enum page_cache_mode new_pcm;
186 pgprot_t prot;
187 int retval;
188 void __iomem *ret_addr;
189
190 /* Don't allow wraparound or zero size */
191 last_addr = phys_addr + size - 1;
192 if (!size || last_addr < phys_addr)
193 return NULL;
194
195 if (!phys_addr_valid(phys_addr)) {
196 printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
197 (unsigned long long)phys_addr);
198 WARN_ON_ONCE(1);
199 return NULL;
200 }
201
202 __ioremap_check_mem(phys_addr, size, &io_desc);
203
204 /*
205 * Don't allow anybody to remap normal RAM that we're using..
206 */
207 if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
208 WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
209 &phys_addr, &last_addr);
210 return NULL;
211 }
212
213 /*
214 * Mappings have to be page-aligned
215 */
216 offset = phys_addr & ~PAGE_MASK;
217 phys_addr &= PAGE_MASK;
218 size = PAGE_ALIGN(last_addr+1) - phys_addr;
219
220 /*
221 * Mask out any bits not part of the actual physical
222 * address, like memory encryption bits.
223 */
224 phys_addr &= PHYSICAL_PAGE_MASK;
225
226 retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
227 pcm, &new_pcm);
228 if (retval) {
229 printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
230 return NULL;
231 }
232
233 if (pcm != new_pcm) {
234 if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
235 printk(KERN_ERR
236 "ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
237 (unsigned long long)phys_addr,
238 (unsigned long long)(phys_addr + size),
239 pcm, new_pcm);
240 goto err_free_memtype;
241 }
242 pcm = new_pcm;
243 }
244
245 /*
246 * If the page being mapped is in memory and SEV is active then
247 * make sure the memory encryption attribute is enabled in the
248 * resulting mapping.
249 */
250 prot = PAGE_KERNEL_IO;
251 if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted)
252 prot = pgprot_encrypted(prot);
253
254 switch (pcm) {
255 case _PAGE_CACHE_MODE_UC:
256 default:
257 prot = __pgprot(pgprot_val(prot) |
258 cachemode2protval(_PAGE_CACHE_MODE_UC));
259 break;
260 case _PAGE_CACHE_MODE_UC_MINUS:
261 prot = __pgprot(pgprot_val(prot) |
262 cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
263 break;
264 case _PAGE_CACHE_MODE_WC:
265 prot = __pgprot(pgprot_val(prot) |
266 cachemode2protval(_PAGE_CACHE_MODE_WC));
267 break;
268 case _PAGE_CACHE_MODE_WT:
269 prot = __pgprot(pgprot_val(prot) |
270 cachemode2protval(_PAGE_CACHE_MODE_WT));
271 break;
272 case _PAGE_CACHE_MODE_WB:
273 break;
274 }
275
276 /*
277 * Ok, go for it..
278 */
279 area = get_vm_area_caller(size, VM_IOREMAP, caller);
280 if (!area)
281 goto err_free_memtype;
282 area->phys_addr = phys_addr;
283 vaddr = (unsigned long) area->addr;
284
285 if (kernel_map_sync_memtype(phys_addr, size, pcm))
286 goto err_free_area;
287
288 if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
289 goto err_free_area;
290
291 ret_addr = (void __iomem *) (vaddr + offset);
292 mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
293
294 /*
295 * Check if the request spans more than any BAR in the iomem resource
296 * tree.
297 */
298 if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
299 pr_warn("caller %pS mapping multiple BARs\n", caller);
300
301 return ret_addr;
302 err_free_area:
303 free_vm_area(area);
304 err_free_memtype:
305 free_memtype(phys_addr, phys_addr + size);
306 return NULL;
307 }
308
309 /**
310 * ioremap_nocache - map bus memory into CPU space
311 * @phys_addr: bus address of the memory
312 * @size: size of the resource to map
313 *
314 * ioremap_nocache performs a platform specific sequence of operations to
315 * make bus memory CPU accessible via the readb/readw/readl/writeb/
316 * writew/writel functions and the other mmio helpers. The returned
317 * address is not guaranteed to be usable directly as a virtual
318 * address.
319 *
320 * This version of ioremap ensures that the memory is marked uncachable
321 * on the CPU as well as honouring existing caching rules from things like
322 * the PCI bus. Note that there are other caches and buffers on many
323 * busses. In particular driver authors should read up on PCI writes
324 *
325 * It's useful if some control registers are in such an area and
326 * write combining or read caching is not desirable:
327 *
328 * Must be freed with iounmap.
329 */
ioremap_nocache(resource_size_t phys_addr,unsigned long size)330 void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
331 {
332 /*
333 * Ideally, this should be:
334 * pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
335 *
336 * Till we fix all X drivers to use ioremap_wc(), we will use
337 * UC MINUS. Drivers that are certain they need or can already
338 * be converted over to strong UC can use ioremap_uc().
339 */
340 enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
341
342 return __ioremap_caller(phys_addr, size, pcm,
343 __builtin_return_address(0), false);
344 }
345 EXPORT_SYMBOL(ioremap_nocache);
346
347 /**
348 * ioremap_uc - map bus memory into CPU space as strongly uncachable
349 * @phys_addr: bus address of the memory
350 * @size: size of the resource to map
351 *
352 * ioremap_uc performs a platform specific sequence of operations to
353 * make bus memory CPU accessible via the readb/readw/readl/writeb/
354 * writew/writel functions and the other mmio helpers. The returned
355 * address is not guaranteed to be usable directly as a virtual
356 * address.
357 *
358 * This version of ioremap ensures that the memory is marked with a strong
359 * preference as completely uncachable on the CPU when possible. For non-PAT
360 * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
361 * systems this will set the PAT entry for the pages as strong UC. This call
362 * will honor existing caching rules from things like the PCI bus. Note that
363 * there are other caches and buffers on many busses. In particular driver
364 * authors should read up on PCI writes.
365 *
366 * It's useful if some control registers are in such an area and
367 * write combining or read caching is not desirable:
368 *
369 * Must be freed with iounmap.
370 */
ioremap_uc(resource_size_t phys_addr,unsigned long size)371 void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
372 {
373 enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
374
375 return __ioremap_caller(phys_addr, size, pcm,
376 __builtin_return_address(0), false);
377 }
378 EXPORT_SYMBOL_GPL(ioremap_uc);
379
380 /**
381 * ioremap_wc - map memory into CPU space write combined
382 * @phys_addr: bus address of the memory
383 * @size: size of the resource to map
384 *
385 * This version of ioremap ensures that the memory is marked write combining.
386 * Write combining allows faster writes to some hardware devices.
387 *
388 * Must be freed with iounmap.
389 */
ioremap_wc(resource_size_t phys_addr,unsigned long size)390 void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
391 {
392 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
393 __builtin_return_address(0), false);
394 }
395 EXPORT_SYMBOL(ioremap_wc);
396
397 /**
398 * ioremap_wt - map memory into CPU space write through
399 * @phys_addr: bus address of the memory
400 * @size: size of the resource to map
401 *
402 * This version of ioremap ensures that the memory is marked write through.
403 * Write through stores data into memory while keeping the cache up-to-date.
404 *
405 * Must be freed with iounmap.
406 */
ioremap_wt(resource_size_t phys_addr,unsigned long size)407 void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
408 {
409 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
410 __builtin_return_address(0), false);
411 }
412 EXPORT_SYMBOL(ioremap_wt);
413
ioremap_encrypted(resource_size_t phys_addr,unsigned long size)414 void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size)
415 {
416 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
417 __builtin_return_address(0), true);
418 }
419 EXPORT_SYMBOL(ioremap_encrypted);
420
ioremap_cache(resource_size_t phys_addr,unsigned long size)421 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
422 {
423 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
424 __builtin_return_address(0), false);
425 }
426 EXPORT_SYMBOL(ioremap_cache);
427
ioremap_prot(resource_size_t phys_addr,unsigned long size,unsigned long prot_val)428 void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
429 unsigned long prot_val)
430 {
431 return __ioremap_caller(phys_addr, size,
432 pgprot2cachemode(__pgprot(prot_val)),
433 __builtin_return_address(0), false);
434 }
435 EXPORT_SYMBOL(ioremap_prot);
436
437 /**
438 * iounmap - Free a IO remapping
439 * @addr: virtual address from ioremap_*
440 *
441 * Caller must ensure there is only one unmapping for the same pointer.
442 */
iounmap(volatile void __iomem * addr)443 void iounmap(volatile void __iomem *addr)
444 {
445 struct vm_struct *p, *o;
446
447 if ((void __force *)addr <= high_memory)
448 return;
449
450 /*
451 * The PCI/ISA range special-casing was removed from __ioremap()
452 * so this check, in theory, can be removed. However, there are
453 * cases where iounmap() is called for addresses not obtained via
454 * ioremap() (vga16fb for example). Add a warning so that these
455 * cases can be caught and fixed.
456 */
457 if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
458 (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) {
459 WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n");
460 return;
461 }
462
463 mmiotrace_iounmap(addr);
464
465 addr = (volatile void __iomem *)
466 (PAGE_MASK & (unsigned long __force)addr);
467
468 /* Use the vm area unlocked, assuming the caller
469 ensures there isn't another iounmap for the same address
470 in parallel. Reuse of the virtual address is prevented by
471 leaving it in the global lists until we're done with it.
472 cpa takes care of the direct mappings. */
473 p = find_vm_area((void __force *)addr);
474
475 if (!p) {
476 printk(KERN_ERR "iounmap: bad address %p\n", addr);
477 dump_stack();
478 return;
479 }
480
481 free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
482
483 /* Finally remove it */
484 o = remove_vm_area((void __force *)addr);
485 BUG_ON(p != o || o == NULL);
486 kfree(p);
487 }
488 EXPORT_SYMBOL(iounmap);
489
arch_ioremap_p4d_supported(void)490 int __init arch_ioremap_p4d_supported(void)
491 {
492 return 0;
493 }
494
arch_ioremap_pud_supported(void)495 int __init arch_ioremap_pud_supported(void)
496 {
497 #ifdef CONFIG_X86_64
498 return boot_cpu_has(X86_FEATURE_GBPAGES);
499 #else
500 return 0;
501 #endif
502 }
503
arch_ioremap_pmd_supported(void)504 int __init arch_ioremap_pmd_supported(void)
505 {
506 return boot_cpu_has(X86_FEATURE_PSE);
507 }
508
509 /*
510 * Convert a physical pointer to a virtual kernel pointer for /dev/mem
511 * access
512 */
xlate_dev_mem_ptr(phys_addr_t phys)513 void *xlate_dev_mem_ptr(phys_addr_t phys)
514 {
515 unsigned long start = phys & PAGE_MASK;
516 unsigned long offset = phys & ~PAGE_MASK;
517 void *vaddr;
518
519 /* memremap() maps if RAM, otherwise falls back to ioremap() */
520 vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB);
521
522 /* Only add the offset on success and return NULL if memremap() failed */
523 if (vaddr)
524 vaddr += offset;
525
526 return vaddr;
527 }
528
unxlate_dev_mem_ptr(phys_addr_t phys,void * addr)529 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
530 {
531 memunmap((void *)((unsigned long)addr & PAGE_MASK));
532 }
533
534 /*
535 * Examine the physical address to determine if it is an area of memory
536 * that should be mapped decrypted. If the memory is not part of the
537 * kernel usable area it was accessed and created decrypted, so these
538 * areas should be mapped decrypted. And since the encryption key can
539 * change across reboots, persistent memory should also be mapped
540 * decrypted.
541 *
542 * If SEV is active, that implies that BIOS/UEFI also ran encrypted so
543 * only persistent memory should be mapped decrypted.
544 */
memremap_should_map_decrypted(resource_size_t phys_addr,unsigned long size)545 static bool memremap_should_map_decrypted(resource_size_t phys_addr,
546 unsigned long size)
547 {
548 int is_pmem;
549
550 /*
551 * Check if the address is part of a persistent memory region.
552 * This check covers areas added by E820, EFI and ACPI.
553 */
554 is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
555 IORES_DESC_PERSISTENT_MEMORY);
556 if (is_pmem != REGION_DISJOINT)
557 return true;
558
559 /*
560 * Check if the non-volatile attribute is set for an EFI
561 * reserved area.
562 */
563 if (efi_enabled(EFI_BOOT)) {
564 switch (efi_mem_type(phys_addr)) {
565 case EFI_RESERVED_TYPE:
566 if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
567 return true;
568 break;
569 default:
570 break;
571 }
572 }
573
574 /* Check if the address is outside kernel usable area */
575 switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
576 case E820_TYPE_RESERVED:
577 case E820_TYPE_ACPI:
578 case E820_TYPE_NVS:
579 case E820_TYPE_UNUSABLE:
580 /* For SEV, these areas are encrypted */
581 if (sev_active())
582 break;
583 /* Fallthrough */
584
585 case E820_TYPE_PRAM:
586 return true;
587 default:
588 break;
589 }
590
591 return false;
592 }
593
594 /*
595 * Examine the physical address to determine if it is EFI data. Check
596 * it against the boot params structure and EFI tables and memory types.
597 */
memremap_is_efi_data(resource_size_t phys_addr,unsigned long size)598 static bool memremap_is_efi_data(resource_size_t phys_addr,
599 unsigned long size)
600 {
601 u64 paddr;
602
603 /* Check if the address is part of EFI boot/runtime data */
604 if (!efi_enabled(EFI_BOOT))
605 return false;
606
607 paddr = boot_params.efi_info.efi_memmap_hi;
608 paddr <<= 32;
609 paddr |= boot_params.efi_info.efi_memmap;
610 if (phys_addr == paddr)
611 return true;
612
613 paddr = boot_params.efi_info.efi_systab_hi;
614 paddr <<= 32;
615 paddr |= boot_params.efi_info.efi_systab;
616 if (phys_addr == paddr)
617 return true;
618
619 if (efi_is_table_address(phys_addr))
620 return true;
621
622 switch (efi_mem_type(phys_addr)) {
623 case EFI_BOOT_SERVICES_DATA:
624 case EFI_RUNTIME_SERVICES_DATA:
625 return true;
626 default:
627 break;
628 }
629
630 return false;
631 }
632
633 /*
634 * Examine the physical address to determine if it is boot data by checking
635 * it against the boot params setup_data chain.
636 */
memremap_is_setup_data(resource_size_t phys_addr,unsigned long size)637 static bool memremap_is_setup_data(resource_size_t phys_addr,
638 unsigned long size)
639 {
640 struct setup_data *data;
641 u64 paddr, paddr_next;
642
643 paddr = boot_params.hdr.setup_data;
644 while (paddr) {
645 unsigned int len;
646
647 if (phys_addr == paddr)
648 return true;
649
650 data = memremap(paddr, sizeof(*data),
651 MEMREMAP_WB | MEMREMAP_DEC);
652
653 paddr_next = data->next;
654 len = data->len;
655
656 memunmap(data);
657
658 if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
659 return true;
660
661 paddr = paddr_next;
662 }
663
664 return false;
665 }
666
667 /*
668 * Examine the physical address to determine if it is boot data by checking
669 * it against the boot params setup_data chain (early boot version).
670 */
early_memremap_is_setup_data(resource_size_t phys_addr,unsigned long size)671 static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
672 unsigned long size)
673 {
674 struct setup_data *data;
675 u64 paddr, paddr_next;
676
677 paddr = boot_params.hdr.setup_data;
678 while (paddr) {
679 unsigned int len;
680
681 if (phys_addr == paddr)
682 return true;
683
684 data = early_memremap_decrypted(paddr, sizeof(*data));
685
686 paddr_next = data->next;
687 len = data->len;
688
689 early_memunmap(data, sizeof(*data));
690
691 if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
692 return true;
693
694 paddr = paddr_next;
695 }
696
697 return false;
698 }
699
700 /*
701 * Architecture function to determine if RAM remap is allowed. By default, a
702 * RAM remap will map the data as encrypted. Determine if a RAM remap should
703 * not be done so that the data will be mapped decrypted.
704 */
arch_memremap_can_ram_remap(resource_size_t phys_addr,unsigned long size,unsigned long flags)705 bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
706 unsigned long flags)
707 {
708 if (!mem_encrypt_active())
709 return true;
710
711 if (flags & MEMREMAP_ENC)
712 return true;
713
714 if (flags & MEMREMAP_DEC)
715 return false;
716
717 if (sme_active()) {
718 if (memremap_is_setup_data(phys_addr, size) ||
719 memremap_is_efi_data(phys_addr, size))
720 return false;
721 }
722
723 return !memremap_should_map_decrypted(phys_addr, size);
724 }
725
726 /*
727 * Architecture override of __weak function to adjust the protection attributes
728 * used when remapping memory. By default, early_memremap() will map the data
729 * as encrypted. Determine if an encrypted mapping should not be done and set
730 * the appropriate protection attributes.
731 */
early_memremap_pgprot_adjust(resource_size_t phys_addr,unsigned long size,pgprot_t prot)732 pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
733 unsigned long size,
734 pgprot_t prot)
735 {
736 bool encrypted_prot;
737
738 if (!mem_encrypt_active())
739 return prot;
740
741 encrypted_prot = true;
742
743 if (sme_active()) {
744 if (early_memremap_is_setup_data(phys_addr, size) ||
745 memremap_is_efi_data(phys_addr, size))
746 encrypted_prot = false;
747 }
748
749 if (encrypted_prot && memremap_should_map_decrypted(phys_addr, size))
750 encrypted_prot = false;
751
752 return encrypted_prot ? pgprot_encrypted(prot)
753 : pgprot_decrypted(prot);
754 }
755
phys_mem_access_encrypted(unsigned long phys_addr,unsigned long size)756 bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
757 {
758 return arch_memremap_can_ram_remap(phys_addr, size, 0);
759 }
760
761 #ifdef CONFIG_AMD_MEM_ENCRYPT
762 /* Remap memory with encryption */
early_memremap_encrypted(resource_size_t phys_addr,unsigned long size)763 void __init *early_memremap_encrypted(resource_size_t phys_addr,
764 unsigned long size)
765 {
766 return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
767 }
768
769 /*
770 * Remap memory with encryption and write-protected - cannot be called
771 * before pat_init() is called
772 */
early_memremap_encrypted_wp(resource_size_t phys_addr,unsigned long size)773 void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
774 unsigned long size)
775 {
776 /* Be sure the write-protect PAT entry is set for write-protect */
777 if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP)
778 return NULL;
779
780 return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP);
781 }
782
783 /* Remap memory without encryption */
early_memremap_decrypted(resource_size_t phys_addr,unsigned long size)784 void __init *early_memremap_decrypted(resource_size_t phys_addr,
785 unsigned long size)
786 {
787 return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC);
788 }
789
790 /*
791 * Remap memory without encryption and write-protected - cannot be called
792 * before pat_init() is called
793 */
early_memremap_decrypted_wp(resource_size_t phys_addr,unsigned long size)794 void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
795 unsigned long size)
796 {
797 /* Be sure the write-protect PAT entry is set for write-protect */
798 if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP)
799 return NULL;
800
801 return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP);
802 }
803 #endif /* CONFIG_AMD_MEM_ENCRYPT */
804
805 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
806
early_ioremap_pmd(unsigned long addr)807 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
808 {
809 /* Don't assume we're using swapper_pg_dir at this point */
810 pgd_t *base = __va(read_cr3_pa());
811 pgd_t *pgd = &base[pgd_index(addr)];
812 p4d_t *p4d = p4d_offset(pgd, addr);
813 pud_t *pud = pud_offset(p4d, addr);
814 pmd_t *pmd = pmd_offset(pud, addr);
815
816 return pmd;
817 }
818
early_ioremap_pte(unsigned long addr)819 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
820 {
821 return &bm_pte[pte_index(addr)];
822 }
823
is_early_ioremap_ptep(pte_t * ptep)824 bool __init is_early_ioremap_ptep(pte_t *ptep)
825 {
826 return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
827 }
828
early_ioremap_init(void)829 void __init early_ioremap_init(void)
830 {
831 pmd_t *pmd;
832
833 #ifdef CONFIG_X86_64
834 BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
835 #else
836 WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
837 #endif
838
839 early_ioremap_setup();
840
841 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
842 memset(bm_pte, 0, sizeof(bm_pte));
843 pmd_populate_kernel(&init_mm, pmd, bm_pte);
844
845 /*
846 * The boot-ioremap range spans multiple pmds, for which
847 * we are not prepared:
848 */
849 #define __FIXADDR_TOP (-PAGE_SIZE)
850 BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
851 != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
852 #undef __FIXADDR_TOP
853 if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
854 WARN_ON(1);
855 printk(KERN_WARNING "pmd %p != %p\n",
856 pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
857 printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
858 fix_to_virt(FIX_BTMAP_BEGIN));
859 printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n",
860 fix_to_virt(FIX_BTMAP_END));
861
862 printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
863 printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n",
864 FIX_BTMAP_BEGIN);
865 }
866 }
867
__early_set_fixmap(enum fixed_addresses idx,phys_addr_t phys,pgprot_t flags)868 void __init __early_set_fixmap(enum fixed_addresses idx,
869 phys_addr_t phys, pgprot_t flags)
870 {
871 unsigned long addr = __fix_to_virt(idx);
872 pte_t *pte;
873
874 if (idx >= __end_of_fixed_addresses) {
875 BUG();
876 return;
877 }
878 pte = early_ioremap_pte(addr);
879
880 /* Sanitize 'prot' against any unsupported bits: */
881 pgprot_val(flags) &= __supported_pte_mask;
882
883 if (pgprot_val(flags))
884 set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
885 else
886 pte_clear(&init_mm, addr, pte);
887 __flush_tlb_one_kernel(addr);
888 }
889