• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! This uses the name `AsciiChar`, even though it's not exposed that way right now,
2 //! because it avoids a whole bunch of "are you sure you didn't mean `char`?"
3 //! suggestions from rustc if you get anything slightly wrong in here, and overall
4 //! helps with clarity as we're also referring to `char` intentionally in here.
5 
6 use crate::fmt;
7 use crate::mem::transmute;
8 
9 /// One of the 128 Unicode characters from U+0000 through U+007F,
10 /// often known as the [ASCII] subset.
11 ///
12 /// Officially, this is the first [block] in Unicode, _Basic Latin_.
13 /// For details, see the [*C0 Controls and Basic Latin*][chart] code chart.
14 ///
15 /// This block was based on older 7-bit character code standards such as
16 /// ANSI X3.4-1977, ISO 646-1973, and [NIST FIPS 1-2].
17 ///
18 /// # When to use this
19 ///
20 /// The main advantage of this subset is that it's always valid UTF-8.  As such,
21 /// the `&[ascii::Char]` -> `&str` conversion function (as well as other related
22 /// ones) are O(1): *no* runtime checks are needed.
23 ///
24 /// If you're consuming strings, you should usually handle Unicode and thus
25 /// accept `str`s, not limit yourself to `ascii::Char`s.
26 ///
27 /// However, certain formats are intentionally designed to produce ASCII-only
28 /// output in order to be 8-bit-clean.  In those cases, it can be simpler and
29 /// faster to generate `ascii::Char`s instead of dealing with the variable width
30 /// properties of general UTF-8 encoded strings, while still allowing the result
31 /// to be used freely with other Rust things that deal in general `str`s.
32 ///
33 /// For example, a UUID library might offer a way to produce the string
34 /// representation of a UUID as an `[ascii::Char; 36]` to avoid memory
35 /// allocation yet still allow it to be used as UTF-8 via `as_str` without
36 /// paying for validation (or needing `unsafe` code) the way it would if it
37 /// were provided as a `[u8; 36]`.
38 ///
39 /// # Layout
40 ///
41 /// This type is guaranteed to have a size and alignment of 1 byte.
42 ///
43 /// # Names
44 ///
45 /// The variants on this type are [Unicode names][NamesList] of the characters
46 /// in upper camel case, with a few tweaks:
47 /// - For `<control>` characters, the primary alias name is used.
48 /// - `LATIN` is dropped, as this block has no non-latin letters.
49 /// - `LETTER` is dropped, as `CAPITAL`/`SMALL` suffices in this block.
50 /// - `DIGIT`s use a single digit rather than writing out `ZERO`, `ONE`, etc.
51 ///
52 /// [ASCII]: https://www.unicode.org/glossary/index.html#ASCII
53 /// [block]: https://www.unicode.org/glossary/index.html#block
54 /// [chart]: https://www.unicode.org/charts/PDF/U0000.pdf
55 /// [NIST FIPS 1-2]: https://nvlpubs.nist.gov/nistpubs/Legacy/FIPS/fipspub1-2-1977.pdf
56 /// [NamesList]: https://www.unicode.org/Public/15.0.0/ucd/NamesList.txt
57 #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
58 #[unstable(feature = "ascii_char", issue = "110998")]
59 #[repr(u8)]
60 pub enum AsciiChar {
61     /// U+0000
62     #[unstable(feature = "ascii_char_variants", issue = "110998")]
63     Null = 0,
64     /// U+0001
65     #[unstable(feature = "ascii_char_variants", issue = "110998")]
66     StartOfHeading = 1,
67     /// U+0002
68     #[unstable(feature = "ascii_char_variants", issue = "110998")]
69     StartOfText = 2,
70     /// U+0003
71     #[unstable(feature = "ascii_char_variants", issue = "110998")]
72     EndOfText = 3,
73     /// U+0004
74     #[unstable(feature = "ascii_char_variants", issue = "110998")]
75     EndOfTransmission = 4,
76     /// U+0005
77     #[unstable(feature = "ascii_char_variants", issue = "110998")]
78     Enquiry = 5,
79     /// U+0006
80     #[unstable(feature = "ascii_char_variants", issue = "110998")]
81     Acknowledge = 6,
82     /// U+0007
83     #[unstable(feature = "ascii_char_variants", issue = "110998")]
84     Bell = 7,
85     /// U+0008
86     #[unstable(feature = "ascii_char_variants", issue = "110998")]
87     Backspace = 8,
88     /// U+0009
89     #[unstable(feature = "ascii_char_variants", issue = "110998")]
90     CharacterTabulation = 9,
91     /// U+000A
92     #[unstable(feature = "ascii_char_variants", issue = "110998")]
93     LineFeed = 10,
94     /// U+000B
95     #[unstable(feature = "ascii_char_variants", issue = "110998")]
96     LineTabulation = 11,
97     /// U+000C
98     #[unstable(feature = "ascii_char_variants", issue = "110998")]
99     FormFeed = 12,
100     /// U+000D
101     #[unstable(feature = "ascii_char_variants", issue = "110998")]
102     CarriageReturn = 13,
103     /// U+000E
104     #[unstable(feature = "ascii_char_variants", issue = "110998")]
105     ShiftOut = 14,
106     /// U+000F
107     #[unstable(feature = "ascii_char_variants", issue = "110998")]
108     ShiftIn = 15,
109     /// U+0010
110     #[unstable(feature = "ascii_char_variants", issue = "110998")]
111     DataLinkEscape = 16,
112     /// U+0011
113     #[unstable(feature = "ascii_char_variants", issue = "110998")]
114     DeviceControlOne = 17,
115     /// U+0012
116     #[unstable(feature = "ascii_char_variants", issue = "110998")]
117     DeviceControlTwo = 18,
118     /// U+0013
119     #[unstable(feature = "ascii_char_variants", issue = "110998")]
120     DeviceControlThree = 19,
121     /// U+0014
122     #[unstable(feature = "ascii_char_variants", issue = "110998")]
123     DeviceControlFour = 20,
124     /// U+0015
125     #[unstable(feature = "ascii_char_variants", issue = "110998")]
126     NegativeAcknowledge = 21,
127     /// U+0016
128     #[unstable(feature = "ascii_char_variants", issue = "110998")]
129     SynchronousIdle = 22,
130     /// U+0017
131     #[unstable(feature = "ascii_char_variants", issue = "110998")]
132     EndOfTransmissionBlock = 23,
133     /// U+0018
134     #[unstable(feature = "ascii_char_variants", issue = "110998")]
135     Cancel = 24,
136     /// U+0019
137     #[unstable(feature = "ascii_char_variants", issue = "110998")]
138     EndOfMedium = 25,
139     /// U+001A
140     #[unstable(feature = "ascii_char_variants", issue = "110998")]
141     Substitute = 26,
142     /// U+001B
143     #[unstable(feature = "ascii_char_variants", issue = "110998")]
144     Escape = 27,
145     /// U+001C
146     #[unstable(feature = "ascii_char_variants", issue = "110998")]
147     InformationSeparatorFour = 28,
148     /// U+001D
149     #[unstable(feature = "ascii_char_variants", issue = "110998")]
150     InformationSeparatorThree = 29,
151     /// U+001E
152     #[unstable(feature = "ascii_char_variants", issue = "110998")]
153     InformationSeparatorTwo = 30,
154     /// U+001F
155     #[unstable(feature = "ascii_char_variants", issue = "110998")]
156     InformationSeparatorOne = 31,
157     /// U+0020
158     #[unstable(feature = "ascii_char_variants", issue = "110998")]
159     Space = 32,
160     /// U+0021
161     #[unstable(feature = "ascii_char_variants", issue = "110998")]
162     ExclamationMark = 33,
163     /// U+0022
164     #[unstable(feature = "ascii_char_variants", issue = "110998")]
165     QuotationMark = 34,
166     /// U+0023
167     #[unstable(feature = "ascii_char_variants", issue = "110998")]
168     NumberSign = 35,
169     /// U+0024
170     #[unstable(feature = "ascii_char_variants", issue = "110998")]
171     DollarSign = 36,
172     /// U+0025
173     #[unstable(feature = "ascii_char_variants", issue = "110998")]
174     PercentSign = 37,
175     /// U+0026
176     #[unstable(feature = "ascii_char_variants", issue = "110998")]
177     Ampersand = 38,
178     /// U+0027
179     #[unstable(feature = "ascii_char_variants", issue = "110998")]
180     Apostrophe = 39,
181     /// U+0028
182     #[unstable(feature = "ascii_char_variants", issue = "110998")]
183     LeftParenthesis = 40,
184     /// U+0029
185     #[unstable(feature = "ascii_char_variants", issue = "110998")]
186     RightParenthesis = 41,
187     /// U+002A
188     #[unstable(feature = "ascii_char_variants", issue = "110998")]
189     Asterisk = 42,
190     /// U+002B
191     #[unstable(feature = "ascii_char_variants", issue = "110998")]
192     PlusSign = 43,
193     /// U+002C
194     #[unstable(feature = "ascii_char_variants", issue = "110998")]
195     Comma = 44,
196     /// U+002D
197     #[unstable(feature = "ascii_char_variants", issue = "110998")]
198     HyphenMinus = 45,
199     /// U+002E
200     #[unstable(feature = "ascii_char_variants", issue = "110998")]
201     FullStop = 46,
202     /// U+002F
203     #[unstable(feature = "ascii_char_variants", issue = "110998")]
204     Solidus = 47,
205     /// U+0030
206     #[unstable(feature = "ascii_char_variants", issue = "110998")]
207     Digit0 = 48,
208     /// U+0031
209     #[unstable(feature = "ascii_char_variants", issue = "110998")]
210     Digit1 = 49,
211     /// U+0032
212     #[unstable(feature = "ascii_char_variants", issue = "110998")]
213     Digit2 = 50,
214     /// U+0033
215     #[unstable(feature = "ascii_char_variants", issue = "110998")]
216     Digit3 = 51,
217     /// U+0034
218     #[unstable(feature = "ascii_char_variants", issue = "110998")]
219     Digit4 = 52,
220     /// U+0035
221     #[unstable(feature = "ascii_char_variants", issue = "110998")]
222     Digit5 = 53,
223     /// U+0036
224     #[unstable(feature = "ascii_char_variants", issue = "110998")]
225     Digit6 = 54,
226     /// U+0037
227     #[unstable(feature = "ascii_char_variants", issue = "110998")]
228     Digit7 = 55,
229     /// U+0038
230     #[unstable(feature = "ascii_char_variants", issue = "110998")]
231     Digit8 = 56,
232     /// U+0039
233     #[unstable(feature = "ascii_char_variants", issue = "110998")]
234     Digit9 = 57,
235     /// U+003A
236     #[unstable(feature = "ascii_char_variants", issue = "110998")]
237     Colon = 58,
238     /// U+003B
239     #[unstable(feature = "ascii_char_variants", issue = "110998")]
240     Semicolon = 59,
241     /// U+003C
242     #[unstable(feature = "ascii_char_variants", issue = "110998")]
243     LessThanSign = 60,
244     /// U+003D
245     #[unstable(feature = "ascii_char_variants", issue = "110998")]
246     EqualsSign = 61,
247     /// U+003E
248     #[unstable(feature = "ascii_char_variants", issue = "110998")]
249     GreaterThanSign = 62,
250     /// U+003F
251     #[unstable(feature = "ascii_char_variants", issue = "110998")]
252     QuestionMark = 63,
253     /// U+0040
254     #[unstable(feature = "ascii_char_variants", issue = "110998")]
255     CommercialAt = 64,
256     /// U+0041
257     #[unstable(feature = "ascii_char_variants", issue = "110998")]
258     CapitalA = 65,
259     /// U+0042
260     #[unstable(feature = "ascii_char_variants", issue = "110998")]
261     CapitalB = 66,
262     /// U+0043
263     #[unstable(feature = "ascii_char_variants", issue = "110998")]
264     CapitalC = 67,
265     /// U+0044
266     #[unstable(feature = "ascii_char_variants", issue = "110998")]
267     CapitalD = 68,
268     /// U+0045
269     #[unstable(feature = "ascii_char_variants", issue = "110998")]
270     CapitalE = 69,
271     /// U+0046
272     #[unstable(feature = "ascii_char_variants", issue = "110998")]
273     CapitalF = 70,
274     /// U+0047
275     #[unstable(feature = "ascii_char_variants", issue = "110998")]
276     CapitalG = 71,
277     /// U+0048
278     #[unstable(feature = "ascii_char_variants", issue = "110998")]
279     CapitalH = 72,
280     /// U+0049
281     #[unstable(feature = "ascii_char_variants", issue = "110998")]
282     CapitalI = 73,
283     /// U+004A
284     #[unstable(feature = "ascii_char_variants", issue = "110998")]
285     CapitalJ = 74,
286     /// U+004B
287     #[unstable(feature = "ascii_char_variants", issue = "110998")]
288     CapitalK = 75,
289     /// U+004C
290     #[unstable(feature = "ascii_char_variants", issue = "110998")]
291     CapitalL = 76,
292     /// U+004D
293     #[unstable(feature = "ascii_char_variants", issue = "110998")]
294     CapitalM = 77,
295     /// U+004E
296     #[unstable(feature = "ascii_char_variants", issue = "110998")]
297     CapitalN = 78,
298     /// U+004F
299     #[unstable(feature = "ascii_char_variants", issue = "110998")]
300     CapitalO = 79,
301     /// U+0050
302     #[unstable(feature = "ascii_char_variants", issue = "110998")]
303     CapitalP = 80,
304     /// U+0051
305     #[unstable(feature = "ascii_char_variants", issue = "110998")]
306     CapitalQ = 81,
307     /// U+0052
308     #[unstable(feature = "ascii_char_variants", issue = "110998")]
309     CapitalR = 82,
310     /// U+0053
311     #[unstable(feature = "ascii_char_variants", issue = "110998")]
312     CapitalS = 83,
313     /// U+0054
314     #[unstable(feature = "ascii_char_variants", issue = "110998")]
315     CapitalT = 84,
316     /// U+0055
317     #[unstable(feature = "ascii_char_variants", issue = "110998")]
318     CapitalU = 85,
319     /// U+0056
320     #[unstable(feature = "ascii_char_variants", issue = "110998")]
321     CapitalV = 86,
322     /// U+0057
323     #[unstable(feature = "ascii_char_variants", issue = "110998")]
324     CapitalW = 87,
325     /// U+0058
326     #[unstable(feature = "ascii_char_variants", issue = "110998")]
327     CapitalX = 88,
328     /// U+0059
329     #[unstable(feature = "ascii_char_variants", issue = "110998")]
330     CapitalY = 89,
331     /// U+005A
332     #[unstable(feature = "ascii_char_variants", issue = "110998")]
333     CapitalZ = 90,
334     /// U+005B
335     #[unstable(feature = "ascii_char_variants", issue = "110998")]
336     LeftSquareBracket = 91,
337     /// U+005C
338     #[unstable(feature = "ascii_char_variants", issue = "110998")]
339     ReverseSolidus = 92,
340     /// U+005D
341     #[unstable(feature = "ascii_char_variants", issue = "110998")]
342     RightSquareBracket = 93,
343     /// U+005E
344     #[unstable(feature = "ascii_char_variants", issue = "110998")]
345     CircumflexAccent = 94,
346     /// U+005F
347     #[unstable(feature = "ascii_char_variants", issue = "110998")]
348     LowLine = 95,
349     /// U+0060
350     #[unstable(feature = "ascii_char_variants", issue = "110998")]
351     GraveAccent = 96,
352     /// U+0061
353     #[unstable(feature = "ascii_char_variants", issue = "110998")]
354     SmallA = 97,
355     /// U+0062
356     #[unstable(feature = "ascii_char_variants", issue = "110998")]
357     SmallB = 98,
358     /// U+0063
359     #[unstable(feature = "ascii_char_variants", issue = "110998")]
360     SmallC = 99,
361     /// U+0064
362     #[unstable(feature = "ascii_char_variants", issue = "110998")]
363     SmallD = 100,
364     /// U+0065
365     #[unstable(feature = "ascii_char_variants", issue = "110998")]
366     SmallE = 101,
367     /// U+0066
368     #[unstable(feature = "ascii_char_variants", issue = "110998")]
369     SmallF = 102,
370     /// U+0067
371     #[unstable(feature = "ascii_char_variants", issue = "110998")]
372     SmallG = 103,
373     /// U+0068
374     #[unstable(feature = "ascii_char_variants", issue = "110998")]
375     SmallH = 104,
376     /// U+0069
377     #[unstable(feature = "ascii_char_variants", issue = "110998")]
378     SmallI = 105,
379     /// U+006A
380     #[unstable(feature = "ascii_char_variants", issue = "110998")]
381     SmallJ = 106,
382     /// U+006B
383     #[unstable(feature = "ascii_char_variants", issue = "110998")]
384     SmallK = 107,
385     /// U+006C
386     #[unstable(feature = "ascii_char_variants", issue = "110998")]
387     SmallL = 108,
388     /// U+006D
389     #[unstable(feature = "ascii_char_variants", issue = "110998")]
390     SmallM = 109,
391     /// U+006E
392     #[unstable(feature = "ascii_char_variants", issue = "110998")]
393     SmallN = 110,
394     /// U+006F
395     #[unstable(feature = "ascii_char_variants", issue = "110998")]
396     SmallO = 111,
397     /// U+0070
398     #[unstable(feature = "ascii_char_variants", issue = "110998")]
399     SmallP = 112,
400     /// U+0071
401     #[unstable(feature = "ascii_char_variants", issue = "110998")]
402     SmallQ = 113,
403     /// U+0072
404     #[unstable(feature = "ascii_char_variants", issue = "110998")]
405     SmallR = 114,
406     /// U+0073
407     #[unstable(feature = "ascii_char_variants", issue = "110998")]
408     SmallS = 115,
409     /// U+0074
410     #[unstable(feature = "ascii_char_variants", issue = "110998")]
411     SmallT = 116,
412     /// U+0075
413     #[unstable(feature = "ascii_char_variants", issue = "110998")]
414     SmallU = 117,
415     /// U+0076
416     #[unstable(feature = "ascii_char_variants", issue = "110998")]
417     SmallV = 118,
418     /// U+0077
419     #[unstable(feature = "ascii_char_variants", issue = "110998")]
420     SmallW = 119,
421     /// U+0078
422     #[unstable(feature = "ascii_char_variants", issue = "110998")]
423     SmallX = 120,
424     /// U+0079
425     #[unstable(feature = "ascii_char_variants", issue = "110998")]
426     SmallY = 121,
427     /// U+007A
428     #[unstable(feature = "ascii_char_variants", issue = "110998")]
429     SmallZ = 122,
430     /// U+007B
431     #[unstable(feature = "ascii_char_variants", issue = "110998")]
432     LeftCurlyBracket = 123,
433     /// U+007C
434     #[unstable(feature = "ascii_char_variants", issue = "110998")]
435     VerticalLine = 124,
436     /// U+007D
437     #[unstable(feature = "ascii_char_variants", issue = "110998")]
438     RightCurlyBracket = 125,
439     /// U+007E
440     #[unstable(feature = "ascii_char_variants", issue = "110998")]
441     Tilde = 126,
442     /// U+007F
443     #[unstable(feature = "ascii_char_variants", issue = "110998")]
444     Delete = 127,
445 }
446 
447 impl AsciiChar {
448     /// Creates an ascii character from the byte `b`,
449     /// or returns `None` if it's too large.
450     #[unstable(feature = "ascii_char", issue = "110998")]
451     #[inline]
from_u8(b: u8) -> Option<Self>452     pub const fn from_u8(b: u8) -> Option<Self> {
453         if b <= 127 {
454             // SAFETY: Just checked that `b` is in-range
455             Some(unsafe { Self::from_u8_unchecked(b) })
456         } else {
457             None
458         }
459     }
460 
461     /// Creates an ASCII character from the byte `b`,
462     /// without checking whether it's valid.
463     ///
464     /// # Safety
465     ///
466     /// `b` must be in `0..=127`, or else this is UB.
467     #[unstable(feature = "ascii_char", issue = "110998")]
468     #[inline]
from_u8_unchecked(b: u8) -> Self469     pub const unsafe fn from_u8_unchecked(b: u8) -> Self {
470         // SAFETY: Our safety precondition is that `b` is in-range.
471         unsafe { transmute(b) }
472     }
473 
474     /// When passed the *number* `0`, `1`, …, `9`, returns the *character*
475     /// `'0'`, `'1'`, …, `'9'` respectively.
476     ///
477     /// If `d >= 10`, returns `None`.
478     #[unstable(feature = "ascii_char", issue = "110998")]
479     #[inline]
digit(d: u8) -> Option<Self>480     pub const fn digit(d: u8) -> Option<Self> {
481         if d < 10 {
482             // SAFETY: Just checked it's in-range.
483             Some(unsafe { Self::digit_unchecked(d) })
484         } else {
485             None
486         }
487     }
488 
489     /// When passed the *number* `0`, `1`, …, `9`, returns the *character*
490     /// `'0'`, `'1'`, …, `'9'` respectively, without checking that it's in-range.
491     ///
492     /// # Safety
493     ///
494     /// This is immediate UB if called with `d > 64`.
495     ///
496     /// If `d >= 10` and `d <= 64`, this is allowed to return any value or panic.
497     /// Notably, it should not be expected to return hex digits, or any other
498     /// reasonable extension of the decimal digits.
499     ///
500     /// (This lose safety condition is intended to simplify soundness proofs
501     /// when writing code using this method, since the implementation doesn't
502     /// need something really specific, not to make those other arguments do
503     /// something useful. It might be tightened before stabilization.)
504     #[unstable(feature = "ascii_char", issue = "110998")]
505     #[inline]
digit_unchecked(d: u8) -> Self506     pub const unsafe fn digit_unchecked(d: u8) -> Self {
507         debug_assert!(d < 10);
508 
509         // SAFETY: `'0'` through `'9'` are U+00030 through U+0039,
510         // so because `d` must be 64 or less the addition can return at most
511         // 112 (0x70), which doesn't overflow and is within the ASCII range.
512         unsafe {
513             let byte = b'0'.unchecked_add(d);
514             Self::from_u8_unchecked(byte)
515         }
516     }
517 
518     /// Gets this ASCII character as a byte.
519     #[unstable(feature = "ascii_char", issue = "110998")]
520     #[inline]
as_u8(self) -> u8521     pub const fn as_u8(self) -> u8 {
522         self as u8
523     }
524 
525     /// Gets this ASCII character as a `char` Unicode Scalar Value.
526     #[unstable(feature = "ascii_char", issue = "110998")]
527     #[inline]
as_char(self) -> char528     pub const fn as_char(self) -> char {
529         self as u8 as char
530     }
531 
532     /// Views this ASCII character as a one-code-unit UTF-8 `str`.
533     #[unstable(feature = "ascii_char", issue = "110998")]
534     #[inline]
as_str(&self) -> &str535     pub const fn as_str(&self) -> &str {
536         crate::slice::from_ref(self).as_str()
537     }
538 }
539 
540 impl [AsciiChar] {
541     /// Views this slice of ASCII characters as a UTF-8 `str`.
542     #[unstable(feature = "ascii_char", issue = "110998")]
543     #[inline]
as_str(&self) -> &str544     pub const fn as_str(&self) -> &str {
545         let ascii_ptr: *const Self = self;
546         let str_ptr = ascii_ptr as *const str;
547         // SAFETY: Each ASCII codepoint in UTF-8 is encoded as one single-byte
548         // code unit having the same value as the ASCII byte.
549         unsafe { &*str_ptr }
550     }
551 
552     /// Views this slice of ASCII characters as a slice of `u8` bytes.
553     #[unstable(feature = "ascii_char", issue = "110998")]
554     #[inline]
as_bytes(&self) -> &[u8]555     pub const fn as_bytes(&self) -> &[u8] {
556         self.as_str().as_bytes()
557     }
558 }
559 
560 #[unstable(feature = "ascii_char", issue = "110998")]
561 impl fmt::Display for AsciiChar {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result562     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
563         <str as fmt::Display>::fmt(self.as_str(), f)
564     }
565 }
566