1 //! Platform-specific, assembly instructions to avoid 2 //! intermediate rounding on architectures with FPUs. 3 //! 4 //! This is adapted from the implementation in the Rust core library, 5 //! the original implementation can be [here](https://github.com/rust-lang/rust/blob/master/library/core/src/num/dec2flt/fpu.rs). 6 //! 7 //! It is therefore also subject to a Apache2.0/MIT license. 8 9 #![cfg(feature = "nightly")] 10 #![doc(hidden)] 11 12 pub use fpu_precision::set_precision; 13 14 // On x86, the x87 FPU is used for float operations if the SSE/SSE2 extensions are not available. 15 // The x87 FPU operates with 80 bits of precision by default, which means that operations will 16 // round to 80 bits causing double rounding to happen when values are eventually represented as 17 // 32/64 bit float values. To overcome this, the FPU control word can be set so that the 18 // computations are performed in the desired precision. 19 #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] 20 mod fpu_precision { 21 use core::mem::size_of; 22 23 /// A structure used to preserve the original value of the FPU control word, so that it can be 24 /// restored when the structure is dropped. 25 /// 26 /// The x87 FPU is a 16-bits register whose fields are as follows: 27 /// 28 /// | 12-15 | 10-11 | 8-9 | 6-7 | 5 | 4 | 3 | 2 | 1 | 0 | 29 /// |------:|------:|----:|----:|---:|---:|---:|---:|---:|---:| 30 /// | | RC | PC | | PM | UM | OM | ZM | DM | IM | 31 /// 32 /// The documentation for all of the fields is available in the IA-32 Architectures Software 33 /// Developer's Manual (Volume 1). 34 /// 35 /// The only field which is relevant for the following code is PC, Precision Control. This 36 /// field determines the precision of the operations performed by the FPU. It can be set to: 37 /// - 0b00, single precision i.e., 32-bits 38 /// - 0b10, double precision i.e., 64-bits 39 /// - 0b11, double extended precision i.e., 80-bits (default state) 40 /// The 0b01 value is reserved and should not be used. 41 pub struct FPUControlWord(u16); 42 set_cw(cw: u16)43 fn set_cw(cw: u16) { 44 // SAFETY: the `fldcw` instruction has been audited to be able to work correctly with 45 // any `u16` 46 unsafe { 47 asm!( 48 "fldcw word ptr [{}]", 49 in(reg) &cw, 50 options(nostack), 51 ) 52 } 53 } 54 55 /// Sets the precision field of the FPU to `T` and returns a `FPUControlWord`. set_precision<T>() -> FPUControlWord56 pub fn set_precision<T>() -> FPUControlWord { 57 let mut cw = 0_u16; 58 59 // Compute the value for the Precision Control field that is appropriate for `T`. 60 let cw_precision = match size_of::<T>() { 61 4 => 0x0000, // 32 bits 62 8 => 0x0200, // 64 bits 63 _ => 0x0300, // default, 80 bits 64 }; 65 66 // Get the original value of the control word to restore it later, when the 67 // `FPUControlWord` structure is dropped 68 // SAFETY: the `fnstcw` instruction has been audited to be able to work correctly with 69 // any `u16` 70 unsafe { 71 asm!( 72 "fnstcw word ptr [{}]", 73 in(reg) &mut cw, 74 options(nostack), 75 ) 76 } 77 78 // Set the control word to the desired precision. This is achieved by masking away the old 79 // precision (bits 8 and 9, 0x300) and replacing it with the precision flag computed above. 80 set_cw((cw & 0xFCFF) | cw_precision); 81 82 FPUControlWord(cw) 83 } 84 85 impl Drop for FPUControlWord { drop(&mut self)86 fn drop(&mut self) { 87 set_cw(self.0) 88 } 89 } 90 } 91 92 // In most architectures, floating point operations have an explicit bit size, therefore the 93 // precision of the computation is determined on a per-operation basis. 94 #[cfg(any(not(target_arch = "x86"), target_feature = "sse2"))] 95 mod fpu_precision { set_precision<T>()96 pub fn set_precision<T>() { 97 } 98 } 99