• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**@file
2   Memory Detection for Virtual Machines.
3 
4   Copyright (c) 2006 - 2016, Intel Corporation. All rights reserved.<BR>
5   This program and the accompanying materials
6   are licensed and made available under the terms and conditions of the BSD License
7   which accompanies this distribution.  The full text of the license may be found at
8   http://opensource.org/licenses/bsd-license.php
9 
10   THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
11   WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
12 
13 Module Name:
14 
15   MemDetect.c
16 
17 **/
18 
19 //
20 // The package level header files this module uses
21 //
22 #include <PiPei.h>
23 
24 //
25 // The Library classes this module consumes
26 //
27 #include <Library/BaseMemoryLib.h>
28 #include <Library/DebugLib.h>
29 #include <Library/HobLib.h>
30 #include <Library/IoLib.h>
31 #include <Library/PcdLib.h>
32 #include <Library/PeimEntryPoint.h>
33 #include <Library/ResourcePublicationLib.h>
34 #include <Library/MtrrLib.h>
35 #include <Library/QemuFwCfgLib.h>
36 
37 #include "Platform.h"
38 #include "Cmos.h"
39 
40 UINT8 mPhysMemAddressWidth;
41 
42 STATIC UINT32 mS3AcpiReservedMemoryBase;
43 STATIC UINT32 mS3AcpiReservedMemorySize;
44 
45 UINT32
GetSystemMemorySizeBelow4gb(VOID)46 GetSystemMemorySizeBelow4gb (
47   VOID
48   )
49 {
50   UINT8 Cmos0x34;
51   UINT8 Cmos0x35;
52 
53   //
54   // CMOS 0x34/0x35 specifies the system memory above 16 MB.
55   // * CMOS(0x35) is the high byte
56   // * CMOS(0x34) is the low byte
57   // * The size is specified in 64kb chunks
58   // * Since this is memory above 16MB, the 16MB must be added
59   //   into the calculation to get the total memory size.
60   //
61 
62   Cmos0x34 = (UINT8) CmosRead8 (0x34);
63   Cmos0x35 = (UINT8) CmosRead8 (0x35);
64 
65   return (UINT32) (((UINTN)((Cmos0x35 << 8) + Cmos0x34) << 16) + SIZE_16MB);
66 }
67 
68 
69 STATIC
70 UINT64
GetSystemMemorySizeAbove4gb()71 GetSystemMemorySizeAbove4gb (
72   )
73 {
74   UINT32 Size;
75   UINTN  CmosIndex;
76 
77   //
78   // CMOS 0x5b-0x5d specifies the system memory above 4GB MB.
79   // * CMOS(0x5d) is the most significant size byte
80   // * CMOS(0x5c) is the middle size byte
81   // * CMOS(0x5b) is the least significant size byte
82   // * The size is specified in 64kb chunks
83   //
84 
85   Size = 0;
86   for (CmosIndex = 0x5d; CmosIndex >= 0x5b; CmosIndex--) {
87     Size = (UINT32) (Size << 8) + (UINT32) CmosRead8 (CmosIndex);
88   }
89 
90   return LShiftU64 (Size, 16);
91 }
92 
93 
94 /**
95   Return the highest address that DXE could possibly use, plus one.
96 **/
97 STATIC
98 UINT64
GetFirstNonAddress(VOID)99 GetFirstNonAddress (
100   VOID
101   )
102 {
103   UINT64               FirstNonAddress;
104   UINT64               Pci64Base, Pci64Size;
105   CHAR8                MbString[7 + 1];
106   EFI_STATUS           Status;
107   FIRMWARE_CONFIG_ITEM FwCfgItem;
108   UINTN                FwCfgSize;
109   UINT64               HotPlugMemoryEnd;
110   RETURN_STATUS        PcdStatus;
111 
112   FirstNonAddress = BASE_4GB + GetSystemMemorySizeAbove4gb ();
113 
114   //
115   // If DXE is 32-bit, then we're done; PciBusDxe will degrade 64-bit MMIO
116   // resources to 32-bit anyway. See DegradeResource() in
117   // "PciResourceSupport.c".
118   //
119 #ifdef MDE_CPU_IA32
120   if (!FeaturePcdGet (PcdDxeIplSwitchToLongMode)) {
121     return FirstNonAddress;
122   }
123 #endif
124 
125   //
126   // Otherwise, in order to calculate the highest address plus one, we must
127   // consider the 64-bit PCI host aperture too. Fetch the default size.
128   //
129   Pci64Size = PcdGet64 (PcdPciMmio64Size);
130 
131   //
132   // See if the user specified the number of megabytes for the 64-bit PCI host
133   // aperture. The number of non-NUL characters in MbString allows for
134   // 9,999,999 MB, which is approximately 10 TB.
135   //
136   // As signaled by the "X-" prefix, this knob is experimental, and might go
137   // away at any time.
138   //
139   Status = QemuFwCfgFindFile ("opt/ovmf/X-PciMmio64Mb", &FwCfgItem,
140              &FwCfgSize);
141   if (!EFI_ERROR (Status)) {
142     if (FwCfgSize >= sizeof MbString) {
143       DEBUG ((EFI_D_WARN,
144         "%a: ignoring malformed 64-bit PCI host aperture size from fw_cfg\n",
145         __FUNCTION__));
146     } else {
147       QemuFwCfgSelectItem (FwCfgItem);
148       QemuFwCfgReadBytes (FwCfgSize, MbString);
149       MbString[FwCfgSize] = '\0';
150       Pci64Size = LShiftU64 (AsciiStrDecimalToUint64 (MbString), 20);
151     }
152   }
153 
154   if (Pci64Size == 0) {
155     if (mBootMode != BOOT_ON_S3_RESUME) {
156       DEBUG ((EFI_D_INFO, "%a: disabling 64-bit PCI host aperture\n",
157         __FUNCTION__));
158       PcdStatus = PcdSet64S (PcdPciMmio64Size, 0);
159       ASSERT_RETURN_ERROR (PcdStatus);
160     }
161 
162     //
163     // There's nothing more to do; the amount of memory above 4GB fully
164     // determines the highest address plus one. The memory hotplug area (see
165     // below) plays no role for the firmware in this case.
166     //
167     return FirstNonAddress;
168   }
169 
170   //
171   // The "etc/reserved-memory-end" fw_cfg file, when present, contains an
172   // absolute, exclusive end address for the memory hotplug area. This area
173   // starts right at the end of the memory above 4GB. The 64-bit PCI host
174   // aperture must be placed above it.
175   //
176   Status = QemuFwCfgFindFile ("etc/reserved-memory-end", &FwCfgItem,
177              &FwCfgSize);
178   if (!EFI_ERROR (Status) && FwCfgSize == sizeof HotPlugMemoryEnd) {
179     QemuFwCfgSelectItem (FwCfgItem);
180     QemuFwCfgReadBytes (FwCfgSize, &HotPlugMemoryEnd);
181 
182     ASSERT (HotPlugMemoryEnd >= FirstNonAddress);
183     FirstNonAddress = HotPlugMemoryEnd;
184   }
185 
186   //
187   // SeaBIOS aligns both boundaries of the 64-bit PCI host aperture to 1GB, so
188   // that the host can map it with 1GB hugepages. Follow suit.
189   //
190   Pci64Base = ALIGN_VALUE (FirstNonAddress, (UINT64)SIZE_1GB);
191   Pci64Size = ALIGN_VALUE (Pci64Size, (UINT64)SIZE_1GB);
192 
193   //
194   // The 64-bit PCI host aperture should also be "naturally" aligned. The
195   // alignment is determined by rounding the size of the aperture down to the
196   // next smaller or equal power of two. That is, align the aperture by the
197   // largest BAR size that can fit into it.
198   //
199   Pci64Base = ALIGN_VALUE (Pci64Base, GetPowerOfTwo64 (Pci64Size));
200 
201   if (mBootMode != BOOT_ON_S3_RESUME) {
202     //
203     // The core PciHostBridgeDxe driver will automatically add this range to
204     // the GCD memory space map through our PciHostBridgeLib instance; here we
205     // only need to set the PCDs.
206     //
207     PcdStatus = PcdSet64S (PcdPciMmio64Base, Pci64Base);
208     ASSERT_RETURN_ERROR (PcdStatus);
209     PcdStatus = PcdSet64S (PcdPciMmio64Size, Pci64Size);
210     ASSERT_RETURN_ERROR (PcdStatus);
211 
212     DEBUG ((EFI_D_INFO, "%a: Pci64Base=0x%Lx Pci64Size=0x%Lx\n",
213       __FUNCTION__, Pci64Base, Pci64Size));
214   }
215 
216   //
217   // The useful address space ends with the 64-bit PCI host aperture.
218   //
219   FirstNonAddress = Pci64Base + Pci64Size;
220   return FirstNonAddress;
221 }
222 
223 
224 /**
225   Initialize the mPhysMemAddressWidth variable, based on guest RAM size.
226 **/
227 VOID
AddressWidthInitialization(VOID)228 AddressWidthInitialization (
229   VOID
230   )
231 {
232   UINT64 FirstNonAddress;
233 
234   //
235   // As guest-physical memory size grows, the permanent PEI RAM requirements
236   // are dominated by the identity-mapping page tables built by the DXE IPL.
237   // The DXL IPL keys off of the physical address bits advertized in the CPU
238   // HOB. To conserve memory, we calculate the minimum address width here.
239   //
240   FirstNonAddress      = GetFirstNonAddress ();
241   mPhysMemAddressWidth = (UINT8)HighBitSet64 (FirstNonAddress);
242 
243   //
244   // If FirstNonAddress is not an integral power of two, then we need an
245   // additional bit.
246   //
247   if ((FirstNonAddress & (FirstNonAddress - 1)) != 0) {
248     ++mPhysMemAddressWidth;
249   }
250 
251   //
252   // The minimum address width is 36 (covers up to and excluding 64 GB, which
253   // is the maximum for Ia32 + PAE). The theoretical architecture maximum for
254   // X64 long mode is 52 bits, but the DXE IPL clamps that down to 48 bits. We
255   // can simply assert that here, since 48 bits are good enough for 256 TB.
256   //
257   if (mPhysMemAddressWidth <= 36) {
258     mPhysMemAddressWidth = 36;
259   }
260   ASSERT (mPhysMemAddressWidth <= 48);
261 }
262 
263 
264 /**
265   Calculate the cap for the permanent PEI memory.
266 **/
267 STATIC
268 UINT32
GetPeiMemoryCap(VOID)269 GetPeiMemoryCap (
270   VOID
271   )
272 {
273   BOOLEAN Page1GSupport;
274   UINT32  RegEax;
275   UINT32  RegEdx;
276   UINT32  Pml4Entries;
277   UINT32  PdpEntries;
278   UINTN   TotalPages;
279 
280   //
281   // If DXE is 32-bit, then just return the traditional 64 MB cap.
282   //
283 #ifdef MDE_CPU_IA32
284   if (!FeaturePcdGet (PcdDxeIplSwitchToLongMode)) {
285     return SIZE_64MB;
286   }
287 #endif
288 
289   //
290   // Dependent on physical address width, PEI memory allocations can be
291   // dominated by the page tables built for 64-bit DXE. So we key the cap off
292   // of those. The code below is based on CreateIdentityMappingPageTables() in
293   // "MdeModulePkg/Core/DxeIplPeim/X64/VirtualMemory.c".
294   //
295   Page1GSupport = FALSE;
296   if (PcdGetBool (PcdUse1GPageTable)) {
297     AsmCpuid (0x80000000, &RegEax, NULL, NULL, NULL);
298     if (RegEax >= 0x80000001) {
299       AsmCpuid (0x80000001, NULL, NULL, NULL, &RegEdx);
300       if ((RegEdx & BIT26) != 0) {
301         Page1GSupport = TRUE;
302       }
303     }
304   }
305 
306   if (mPhysMemAddressWidth <= 39) {
307     Pml4Entries = 1;
308     PdpEntries = 1 << (mPhysMemAddressWidth - 30);
309     ASSERT (PdpEntries <= 0x200);
310   } else {
311     Pml4Entries = 1 << (mPhysMemAddressWidth - 39);
312     ASSERT (Pml4Entries <= 0x200);
313     PdpEntries = 512;
314   }
315 
316   TotalPages = Page1GSupport ? Pml4Entries + 1 :
317                                (PdpEntries + 1) * Pml4Entries + 1;
318   ASSERT (TotalPages <= 0x40201);
319 
320   //
321   // Add 64 MB for miscellaneous allocations. Note that for
322   // mPhysMemAddressWidth values close to 36, the cap will actually be
323   // dominated by this increment.
324   //
325   return (UINT32)(EFI_PAGES_TO_SIZE (TotalPages) + SIZE_64MB);
326 }
327 
328 
329 /**
330   Publish PEI core memory
331 
332   @return EFI_SUCCESS     The PEIM initialized successfully.
333 
334 **/
335 EFI_STATUS
PublishPeiMemory(VOID)336 PublishPeiMemory (
337   VOID
338   )
339 {
340   EFI_STATUS                  Status;
341   EFI_PHYSICAL_ADDRESS        MemoryBase;
342   UINT64                      MemorySize;
343   UINT32                      LowerMemorySize;
344   UINT32                      PeiMemoryCap;
345 
346   LowerMemorySize = GetSystemMemorySizeBelow4gb ();
347   if (FeaturePcdGet (PcdSmmSmramRequire)) {
348     //
349     // TSEG is chipped from the end of low RAM
350     //
351     LowerMemorySize -= FixedPcdGet8 (PcdQ35TsegMbytes) * SIZE_1MB;
352   }
353 
354   //
355   // If S3 is supported, then the S3 permanent PEI memory is placed next,
356   // downwards. Its size is primarily dictated by CpuMpPei. The formula below
357   // is an approximation.
358   //
359   if (mS3Supported) {
360     mS3AcpiReservedMemorySize = SIZE_512KB +
361       mMaxCpuCount *
362       PcdGet32 (PcdCpuApStackSize);
363     mS3AcpiReservedMemoryBase = LowerMemorySize - mS3AcpiReservedMemorySize;
364     LowerMemorySize = mS3AcpiReservedMemoryBase;
365   }
366 
367   if (mBootMode == BOOT_ON_S3_RESUME) {
368     MemoryBase = mS3AcpiReservedMemoryBase;
369     MemorySize = mS3AcpiReservedMemorySize;
370   } else {
371     PeiMemoryCap = GetPeiMemoryCap ();
372     DEBUG ((EFI_D_INFO, "%a: mPhysMemAddressWidth=%d PeiMemoryCap=%u KB\n",
373       __FUNCTION__, mPhysMemAddressWidth, PeiMemoryCap >> 10));
374 
375     //
376     // Determine the range of memory to use during PEI
377     //
378     // Technically we could lay the permanent PEI RAM over SEC's temporary
379     // decompression and scratch buffer even if "secure S3" is needed, since
380     // their lifetimes don't overlap. However, PeiFvInitialization() will cover
381     // RAM up to PcdOvmfDecompressionScratchEnd with an EfiACPIMemoryNVS memory
382     // allocation HOB, and other allocations served from the permanent PEI RAM
383     // shouldn't overlap with that HOB.
384     //
385     MemoryBase = mS3Supported && FeaturePcdGet (PcdSmmSmramRequire) ?
386       PcdGet32 (PcdOvmfDecompressionScratchEnd) :
387       PcdGet32 (PcdOvmfDxeMemFvBase) + PcdGet32 (PcdOvmfDxeMemFvSize);
388     MemorySize = LowerMemorySize - MemoryBase;
389     if (MemorySize > PeiMemoryCap) {
390       MemoryBase = LowerMemorySize - PeiMemoryCap;
391       MemorySize = PeiMemoryCap;
392     }
393   }
394 
395   //
396   // Publish this memory to the PEI Core
397   //
398   Status = PublishSystemMemory(MemoryBase, MemorySize);
399   ASSERT_EFI_ERROR (Status);
400 
401   return Status;
402 }
403 
404 
405 /**
406   Peform Memory Detection for QEMU / KVM
407 
408 **/
409 STATIC
410 VOID
QemuInitializeRam(VOID)411 QemuInitializeRam (
412   VOID
413   )
414 {
415   UINT64                      LowerMemorySize;
416   UINT64                      UpperMemorySize;
417   MTRR_SETTINGS               MtrrSettings;
418   EFI_STATUS                  Status;
419 
420   DEBUG ((EFI_D_INFO, "%a called\n", __FUNCTION__));
421 
422   //
423   // Determine total memory size available
424   //
425   LowerMemorySize = GetSystemMemorySizeBelow4gb ();
426   UpperMemorySize = GetSystemMemorySizeAbove4gb ();
427 
428   if (mBootMode == BOOT_ON_S3_RESUME) {
429     //
430     // Create the following memory HOB as an exception on the S3 boot path.
431     //
432     // Normally we'd create memory HOBs only on the normal boot path. However,
433     // CpuMpPei specifically needs such a low-memory HOB on the S3 path as
434     // well, for "borrowing" a subset of it temporarily, for the AP startup
435     // vector.
436     //
437     // CpuMpPei saves the original contents of the borrowed area in permanent
438     // PEI RAM, in a backup buffer allocated with the normal PEI services.
439     // CpuMpPei restores the original contents ("returns" the borrowed area) at
440     // End-of-PEI. End-of-PEI in turn is emitted by S3Resume2Pei before
441     // transferring control to the OS's wakeup vector in the FACS.
442     //
443     // We expect any other PEIMs that "borrow" memory similarly to CpuMpPei to
444     // restore the original contents. Furthermore, we expect all such PEIMs
445     // (CpuMpPei included) to claim the borrowed areas by producing memory
446     // allocation HOBs, and to honor preexistent memory allocation HOBs when
447     // looking for an area to borrow.
448     //
449     AddMemoryRangeHob (0, BASE_512KB + BASE_128KB);
450   } else {
451     //
452     // Create memory HOBs
453     //
454     AddMemoryRangeHob (0, BASE_512KB + BASE_128KB);
455 
456     if (FeaturePcdGet (PcdSmmSmramRequire)) {
457       UINT32 TsegSize;
458 
459       TsegSize = FixedPcdGet8 (PcdQ35TsegMbytes) * SIZE_1MB;
460       AddMemoryRangeHob (BASE_1MB, LowerMemorySize - TsegSize);
461       AddReservedMemoryBaseSizeHob (LowerMemorySize - TsegSize, TsegSize,
462         TRUE);
463     } else {
464       AddMemoryRangeHob (BASE_1MB, LowerMemorySize);
465     }
466 
467     if (UpperMemorySize != 0) {
468       AddMemoryBaseSizeHob (BASE_4GB, UpperMemorySize);
469     }
470   }
471 
472   //
473   // We'd like to keep the following ranges uncached:
474   // - [640 KB, 1 MB)
475   // - [LowerMemorySize, 4 GB)
476   //
477   // Everything else should be WB. Unfortunately, programming the inverse (ie.
478   // keeping the default UC, and configuring the complement set of the above as
479   // WB) is not reliable in general, because the end of the upper RAM can have
480   // practically any alignment, and we may not have enough variable MTRRs to
481   // cover it exactly.
482   //
483   if (IsMtrrSupported ()) {
484     MtrrGetAllMtrrs (&MtrrSettings);
485 
486     //
487     // MTRRs disabled, fixed MTRRs disabled, default type is uncached
488     //
489     ASSERT ((MtrrSettings.MtrrDefType & BIT11) == 0);
490     ASSERT ((MtrrSettings.MtrrDefType & BIT10) == 0);
491     ASSERT ((MtrrSettings.MtrrDefType & 0xFF) == 0);
492 
493     //
494     // flip default type to writeback
495     //
496     SetMem (&MtrrSettings.Fixed, sizeof MtrrSettings.Fixed, 0x06);
497     ZeroMem (&MtrrSettings.Variables, sizeof MtrrSettings.Variables);
498     MtrrSettings.MtrrDefType |= BIT11 | BIT10 | 6;
499     MtrrSetAllMtrrs (&MtrrSettings);
500 
501     //
502     // Set memory range from 640KB to 1MB to uncacheable
503     //
504     Status = MtrrSetMemoryAttribute (BASE_512KB + BASE_128KB,
505                BASE_1MB - (BASE_512KB + BASE_128KB), CacheUncacheable);
506     ASSERT_EFI_ERROR (Status);
507 
508     //
509     // Set memory range from the "top of lower RAM" (RAM below 4GB) to 4GB as
510     // uncacheable
511     //
512     Status = MtrrSetMemoryAttribute (LowerMemorySize,
513                SIZE_4GB - LowerMemorySize, CacheUncacheable);
514     ASSERT_EFI_ERROR (Status);
515   }
516 }
517 
518 /**
519   Publish system RAM and reserve memory regions
520 
521 **/
522 VOID
InitializeRamRegions(VOID)523 InitializeRamRegions (
524   VOID
525   )
526 {
527   if (!mXen) {
528     QemuInitializeRam ();
529   } else {
530     XenPublishRamRegions ();
531   }
532 
533   if (mS3Supported && mBootMode != BOOT_ON_S3_RESUME) {
534     //
535     // This is the memory range that will be used for PEI on S3 resume
536     //
537     BuildMemoryAllocationHob (
538       mS3AcpiReservedMemoryBase,
539       mS3AcpiReservedMemorySize,
540       EfiACPIMemoryNVS
541       );
542 
543     //
544     // Cover the initial RAM area used as stack and temporary PEI heap.
545     //
546     // This is reserved as ACPI NVS so it can be used on S3 resume.
547     //
548     BuildMemoryAllocationHob (
549       PcdGet32 (PcdOvmfSecPeiTempRamBase),
550       PcdGet32 (PcdOvmfSecPeiTempRamSize),
551       EfiACPIMemoryNVS
552       );
553 
554     //
555     // SEC stores its table of GUIDed section handlers here.
556     //
557     BuildMemoryAllocationHob (
558       PcdGet64 (PcdGuidedExtractHandlerTableAddress),
559       PcdGet32 (PcdGuidedExtractHandlerTableSize),
560       EfiACPIMemoryNVS
561       );
562 
563 #ifdef MDE_CPU_X64
564     //
565     // Reserve the initial page tables built by the reset vector code.
566     //
567     // Since this memory range will be used by the Reset Vector on S3
568     // resume, it must be reserved as ACPI NVS.
569     //
570     BuildMemoryAllocationHob (
571       (EFI_PHYSICAL_ADDRESS)(UINTN) PcdGet32 (PcdOvmfSecPageTablesBase),
572       (UINT64)(UINTN) PcdGet32 (PcdOvmfSecPageTablesSize),
573       EfiACPIMemoryNVS
574       );
575 #endif
576   }
577 
578   if (mBootMode != BOOT_ON_S3_RESUME) {
579     if (!FeaturePcdGet (PcdSmmSmramRequire)) {
580       //
581       // Reserve the lock box storage area
582       //
583       // Since this memory range will be used on S3 resume, it must be
584       // reserved as ACPI NVS.
585       //
586       // If S3 is unsupported, then various drivers might still write to the
587       // LockBox area. We ought to prevent DXE from serving allocation requests
588       // such that they would overlap the LockBox storage.
589       //
590       ZeroMem (
591         (VOID*)(UINTN) PcdGet32 (PcdOvmfLockBoxStorageBase),
592         (UINTN) PcdGet32 (PcdOvmfLockBoxStorageSize)
593         );
594       BuildMemoryAllocationHob (
595         (EFI_PHYSICAL_ADDRESS)(UINTN) PcdGet32 (PcdOvmfLockBoxStorageBase),
596         (UINT64)(UINTN) PcdGet32 (PcdOvmfLockBoxStorageSize),
597         mS3Supported ? EfiACPIMemoryNVS : EfiBootServicesData
598         );
599     }
600 
601     if (FeaturePcdGet (PcdSmmSmramRequire)) {
602       UINT32 TsegSize;
603 
604       //
605       // Make sure the TSEG area that we reported as a reserved memory resource
606       // cannot be used for reserved memory allocations.
607       //
608       TsegSize = FixedPcdGet8 (PcdQ35TsegMbytes) * SIZE_1MB;
609       BuildMemoryAllocationHob (
610         GetSystemMemorySizeBelow4gb() - TsegSize,
611         TsegSize,
612         EfiReservedMemoryType
613         );
614     }
615   }
616 }
617