• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Platform-specific, assembly instructions to avoid
2 //! intermediate rounding on architectures with FPUs.
3 //!
4 //! This is adapted from the implementation in the Rust core library,
5 //! the original implementation can be [here](https://github.com/rust-lang/rust/blob/master/library/core/src/num/dec2flt/fpu.rs).
6 //!
7 //! It is therefore also subject to a Apache2.0/MIT license.
8 
9 #![cfg(feature = "nightly")]
10 #![doc(hidden)]
11 
12 pub use fpu_precision::set_precision;
13 
14 // On x86, the x87 FPU is used for float operations if the SSE/SSE2 extensions are not available.
15 // The x87 FPU operates with 80 bits of precision by default, which means that operations will
16 // round to 80 bits causing double rounding to happen when values are eventually represented as
17 // 32/64 bit float values. To overcome this, the FPU control word can be set so that the
18 // computations are performed in the desired precision.
19 #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
20 mod fpu_precision {
21     use core::mem::size_of;
22 
23     /// A structure used to preserve the original value of the FPU control word, so that it can be
24     /// restored when the structure is dropped.
25     ///
26     /// The x87 FPU is a 16-bits register whose fields are as follows:
27     ///
28     /// | 12-15 | 10-11 | 8-9 | 6-7 |  5 |  4 |  3 |  2 |  1 |  0 |
29     /// |------:|------:|----:|----:|---:|---:|---:|---:|---:|---:|
30     /// |       | RC    | PC  |     | PM | UM | OM | ZM | DM | IM |
31     ///
32     /// The documentation for all of the fields is available in the IA-32 Architectures Software
33     /// Developer's Manual (Volume 1).
34     ///
35     /// The only field which is relevant for the following code is PC, Precision Control. This
36     /// field determines the precision of the operations performed by the  FPU. It can be set to:
37     ///  - 0b00, single precision i.e., 32-bits
38     ///  - 0b10, double precision i.e., 64-bits
39     ///  - 0b11, double extended precision i.e., 80-bits (default state)
40     /// The 0b01 value is reserved and should not be used.
41     pub struct FPUControlWord(u16);
42 
set_cw(cw: u16)43     fn set_cw(cw: u16) {
44         // SAFETY: the `fldcw` instruction has been audited to be able to work correctly with
45         // any `u16`
46         unsafe {
47             asm!(
48                 "fldcw word ptr [{}]",
49                 in(reg) &cw,
50                 options(nostack),
51             )
52         }
53     }
54 
55     /// Sets the precision field of the FPU to `T` and returns a `FPUControlWord`.
set_precision<T>() -> FPUControlWord56     pub fn set_precision<T>() -> FPUControlWord {
57         let mut cw = 0_u16;
58 
59         // Compute the value for the Precision Control field that is appropriate for `T`.
60         let cw_precision = match size_of::<T>() {
61             4 => 0x0000, // 32 bits
62             8 => 0x0200, // 64 bits
63             _ => 0x0300, // default, 80 bits
64         };
65 
66         // Get the original value of the control word to restore it later, when the
67         // `FPUControlWord` structure is dropped
68         // SAFETY: the `fnstcw` instruction has been audited to be able to work correctly with
69         // any `u16`
70         unsafe {
71             asm!(
72                 "fnstcw word ptr [{}]",
73                 in(reg) &mut cw,
74                 options(nostack),
75             )
76         }
77 
78         // Set the control word to the desired precision. This is achieved by masking away the old
79         // precision (bits 8 and 9, 0x300) and replacing it with the precision flag computed above.
80         set_cw((cw & 0xFCFF) | cw_precision);
81 
82         FPUControlWord(cw)
83     }
84 
85     impl Drop for FPUControlWord {
drop(&mut self)86         fn drop(&mut self) {
87             set_cw(self.0)
88         }
89     }
90 }
91 
92 // In most architectures, floating point operations have an explicit bit size, therefore the
93 // precision of the computation is determined on a per-operation basis.
94 #[cfg(any(not(target_arch = "x86"), target_feature = "sse2"))]
95 mod fpu_precision {
set_precision<T>()96     pub fn set_precision<T>() {
97     }
98 }
99