1 //! This uses the name `AsciiChar`, even though it's not exposed that way right now, 2 //! because it avoids a whole bunch of "are you sure you didn't mean `char`?" 3 //! suggestions from rustc if you get anything slightly wrong in here, and overall 4 //! helps with clarity as we're also referring to `char` intentionally in here. 5 6 use crate::fmt; 7 use crate::mem::transmute; 8 9 /// One of the 128 Unicode characters from U+0000 through U+007F, 10 /// often known as the [ASCII] subset. 11 /// 12 /// Officially, this is the first [block] in Unicode, _Basic Latin_. 13 /// For details, see the [*C0 Controls and Basic Latin*][chart] code chart. 14 /// 15 /// This block was based on older 7-bit character code standards such as 16 /// ANSI X3.4-1977, ISO 646-1973, and [NIST FIPS 1-2]. 17 /// 18 /// # When to use this 19 /// 20 /// The main advantage of this subset is that it's always valid UTF-8. As such, 21 /// the `&[ascii::Char]` -> `&str` conversion function (as well as other related 22 /// ones) are O(1): *no* runtime checks are needed. 23 /// 24 /// If you're consuming strings, you should usually handle Unicode and thus 25 /// accept `str`s, not limit yourself to `ascii::Char`s. 26 /// 27 /// However, certain formats are intentionally designed to produce ASCII-only 28 /// output in order to be 8-bit-clean. In those cases, it can be simpler and 29 /// faster to generate `ascii::Char`s instead of dealing with the variable width 30 /// properties of general UTF-8 encoded strings, while still allowing the result 31 /// to be used freely with other Rust things that deal in general `str`s. 32 /// 33 /// For example, a UUID library might offer a way to produce the string 34 /// representation of a UUID as an `[ascii::Char; 36]` to avoid memory 35 /// allocation yet still allow it to be used as UTF-8 via `as_str` without 36 /// paying for validation (or needing `unsafe` code) the way it would if it 37 /// were provided as a `[u8; 36]`. 38 /// 39 /// # Layout 40 /// 41 /// This type is guaranteed to have a size and alignment of 1 byte. 42 /// 43 /// # Names 44 /// 45 /// The variants on this type are [Unicode names][NamesList] of the characters 46 /// in upper camel case, with a few tweaks: 47 /// - For `<control>` characters, the primary alias name is used. 48 /// - `LATIN` is dropped, as this block has no non-latin letters. 49 /// - `LETTER` is dropped, as `CAPITAL`/`SMALL` suffices in this block. 50 /// - `DIGIT`s use a single digit rather than writing out `ZERO`, `ONE`, etc. 51 /// 52 /// [ASCII]: https://www.unicode.org/glossary/index.html#ASCII 53 /// [block]: https://www.unicode.org/glossary/index.html#block 54 /// [chart]: https://www.unicode.org/charts/PDF/U0000.pdf 55 /// [NIST FIPS 1-2]: https://nvlpubs.nist.gov/nistpubs/Legacy/FIPS/fipspub1-2-1977.pdf 56 /// [NamesList]: https://www.unicode.org/Public/15.0.0/ucd/NamesList.txt 57 #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] 58 #[unstable(feature = "ascii_char", issue = "110998")] 59 #[repr(u8)] 60 pub enum AsciiChar { 61 /// U+0000 62 #[unstable(feature = "ascii_char_variants", issue = "110998")] 63 Null = 0, 64 /// U+0001 65 #[unstable(feature = "ascii_char_variants", issue = "110998")] 66 StartOfHeading = 1, 67 /// U+0002 68 #[unstable(feature = "ascii_char_variants", issue = "110998")] 69 StartOfText = 2, 70 /// U+0003 71 #[unstable(feature = "ascii_char_variants", issue = "110998")] 72 EndOfText = 3, 73 /// U+0004 74 #[unstable(feature = "ascii_char_variants", issue = "110998")] 75 EndOfTransmission = 4, 76 /// U+0005 77 #[unstable(feature = "ascii_char_variants", issue = "110998")] 78 Enquiry = 5, 79 /// U+0006 80 #[unstable(feature = "ascii_char_variants", issue = "110998")] 81 Acknowledge = 6, 82 /// U+0007 83 #[unstable(feature = "ascii_char_variants", issue = "110998")] 84 Bell = 7, 85 /// U+0008 86 #[unstable(feature = "ascii_char_variants", issue = "110998")] 87 Backspace = 8, 88 /// U+0009 89 #[unstable(feature = "ascii_char_variants", issue = "110998")] 90 CharacterTabulation = 9, 91 /// U+000A 92 #[unstable(feature = "ascii_char_variants", issue = "110998")] 93 LineFeed = 10, 94 /// U+000B 95 #[unstable(feature = "ascii_char_variants", issue = "110998")] 96 LineTabulation = 11, 97 /// U+000C 98 #[unstable(feature = "ascii_char_variants", issue = "110998")] 99 FormFeed = 12, 100 /// U+000D 101 #[unstable(feature = "ascii_char_variants", issue = "110998")] 102 CarriageReturn = 13, 103 /// U+000E 104 #[unstable(feature = "ascii_char_variants", issue = "110998")] 105 ShiftOut = 14, 106 /// U+000F 107 #[unstable(feature = "ascii_char_variants", issue = "110998")] 108 ShiftIn = 15, 109 /// U+0010 110 #[unstable(feature = "ascii_char_variants", issue = "110998")] 111 DataLinkEscape = 16, 112 /// U+0011 113 #[unstable(feature = "ascii_char_variants", issue = "110998")] 114 DeviceControlOne = 17, 115 /// U+0012 116 #[unstable(feature = "ascii_char_variants", issue = "110998")] 117 DeviceControlTwo = 18, 118 /// U+0013 119 #[unstable(feature = "ascii_char_variants", issue = "110998")] 120 DeviceControlThree = 19, 121 /// U+0014 122 #[unstable(feature = "ascii_char_variants", issue = "110998")] 123 DeviceControlFour = 20, 124 /// U+0015 125 #[unstable(feature = "ascii_char_variants", issue = "110998")] 126 NegativeAcknowledge = 21, 127 /// U+0016 128 #[unstable(feature = "ascii_char_variants", issue = "110998")] 129 SynchronousIdle = 22, 130 /// U+0017 131 #[unstable(feature = "ascii_char_variants", issue = "110998")] 132 EndOfTransmissionBlock = 23, 133 /// U+0018 134 #[unstable(feature = "ascii_char_variants", issue = "110998")] 135 Cancel = 24, 136 /// U+0019 137 #[unstable(feature = "ascii_char_variants", issue = "110998")] 138 EndOfMedium = 25, 139 /// U+001A 140 #[unstable(feature = "ascii_char_variants", issue = "110998")] 141 Substitute = 26, 142 /// U+001B 143 #[unstable(feature = "ascii_char_variants", issue = "110998")] 144 Escape = 27, 145 /// U+001C 146 #[unstable(feature = "ascii_char_variants", issue = "110998")] 147 InformationSeparatorFour = 28, 148 /// U+001D 149 #[unstable(feature = "ascii_char_variants", issue = "110998")] 150 InformationSeparatorThree = 29, 151 /// U+001E 152 #[unstable(feature = "ascii_char_variants", issue = "110998")] 153 InformationSeparatorTwo = 30, 154 /// U+001F 155 #[unstable(feature = "ascii_char_variants", issue = "110998")] 156 InformationSeparatorOne = 31, 157 /// U+0020 158 #[unstable(feature = "ascii_char_variants", issue = "110998")] 159 Space = 32, 160 /// U+0021 161 #[unstable(feature = "ascii_char_variants", issue = "110998")] 162 ExclamationMark = 33, 163 /// U+0022 164 #[unstable(feature = "ascii_char_variants", issue = "110998")] 165 QuotationMark = 34, 166 /// U+0023 167 #[unstable(feature = "ascii_char_variants", issue = "110998")] 168 NumberSign = 35, 169 /// U+0024 170 #[unstable(feature = "ascii_char_variants", issue = "110998")] 171 DollarSign = 36, 172 /// U+0025 173 #[unstable(feature = "ascii_char_variants", issue = "110998")] 174 PercentSign = 37, 175 /// U+0026 176 #[unstable(feature = "ascii_char_variants", issue = "110998")] 177 Ampersand = 38, 178 /// U+0027 179 #[unstable(feature = "ascii_char_variants", issue = "110998")] 180 Apostrophe = 39, 181 /// U+0028 182 #[unstable(feature = "ascii_char_variants", issue = "110998")] 183 LeftParenthesis = 40, 184 /// U+0029 185 #[unstable(feature = "ascii_char_variants", issue = "110998")] 186 RightParenthesis = 41, 187 /// U+002A 188 #[unstable(feature = "ascii_char_variants", issue = "110998")] 189 Asterisk = 42, 190 /// U+002B 191 #[unstable(feature = "ascii_char_variants", issue = "110998")] 192 PlusSign = 43, 193 /// U+002C 194 #[unstable(feature = "ascii_char_variants", issue = "110998")] 195 Comma = 44, 196 /// U+002D 197 #[unstable(feature = "ascii_char_variants", issue = "110998")] 198 HyphenMinus = 45, 199 /// U+002E 200 #[unstable(feature = "ascii_char_variants", issue = "110998")] 201 FullStop = 46, 202 /// U+002F 203 #[unstable(feature = "ascii_char_variants", issue = "110998")] 204 Solidus = 47, 205 /// U+0030 206 #[unstable(feature = "ascii_char_variants", issue = "110998")] 207 Digit0 = 48, 208 /// U+0031 209 #[unstable(feature = "ascii_char_variants", issue = "110998")] 210 Digit1 = 49, 211 /// U+0032 212 #[unstable(feature = "ascii_char_variants", issue = "110998")] 213 Digit2 = 50, 214 /// U+0033 215 #[unstable(feature = "ascii_char_variants", issue = "110998")] 216 Digit3 = 51, 217 /// U+0034 218 #[unstable(feature = "ascii_char_variants", issue = "110998")] 219 Digit4 = 52, 220 /// U+0035 221 #[unstable(feature = "ascii_char_variants", issue = "110998")] 222 Digit5 = 53, 223 /// U+0036 224 #[unstable(feature = "ascii_char_variants", issue = "110998")] 225 Digit6 = 54, 226 /// U+0037 227 #[unstable(feature = "ascii_char_variants", issue = "110998")] 228 Digit7 = 55, 229 /// U+0038 230 #[unstable(feature = "ascii_char_variants", issue = "110998")] 231 Digit8 = 56, 232 /// U+0039 233 #[unstable(feature = "ascii_char_variants", issue = "110998")] 234 Digit9 = 57, 235 /// U+003A 236 #[unstable(feature = "ascii_char_variants", issue = "110998")] 237 Colon = 58, 238 /// U+003B 239 #[unstable(feature = "ascii_char_variants", issue = "110998")] 240 Semicolon = 59, 241 /// U+003C 242 #[unstable(feature = "ascii_char_variants", issue = "110998")] 243 LessThanSign = 60, 244 /// U+003D 245 #[unstable(feature = "ascii_char_variants", issue = "110998")] 246 EqualsSign = 61, 247 /// U+003E 248 #[unstable(feature = "ascii_char_variants", issue = "110998")] 249 GreaterThanSign = 62, 250 /// U+003F 251 #[unstable(feature = "ascii_char_variants", issue = "110998")] 252 QuestionMark = 63, 253 /// U+0040 254 #[unstable(feature = "ascii_char_variants", issue = "110998")] 255 CommercialAt = 64, 256 /// U+0041 257 #[unstable(feature = "ascii_char_variants", issue = "110998")] 258 CapitalA = 65, 259 /// U+0042 260 #[unstable(feature = "ascii_char_variants", issue = "110998")] 261 CapitalB = 66, 262 /// U+0043 263 #[unstable(feature = "ascii_char_variants", issue = "110998")] 264 CapitalC = 67, 265 /// U+0044 266 #[unstable(feature = "ascii_char_variants", issue = "110998")] 267 CapitalD = 68, 268 /// U+0045 269 #[unstable(feature = "ascii_char_variants", issue = "110998")] 270 CapitalE = 69, 271 /// U+0046 272 #[unstable(feature = "ascii_char_variants", issue = "110998")] 273 CapitalF = 70, 274 /// U+0047 275 #[unstable(feature = "ascii_char_variants", issue = "110998")] 276 CapitalG = 71, 277 /// U+0048 278 #[unstable(feature = "ascii_char_variants", issue = "110998")] 279 CapitalH = 72, 280 /// U+0049 281 #[unstable(feature = "ascii_char_variants", issue = "110998")] 282 CapitalI = 73, 283 /// U+004A 284 #[unstable(feature = "ascii_char_variants", issue = "110998")] 285 CapitalJ = 74, 286 /// U+004B 287 #[unstable(feature = "ascii_char_variants", issue = "110998")] 288 CapitalK = 75, 289 /// U+004C 290 #[unstable(feature = "ascii_char_variants", issue = "110998")] 291 CapitalL = 76, 292 /// U+004D 293 #[unstable(feature = "ascii_char_variants", issue = "110998")] 294 CapitalM = 77, 295 /// U+004E 296 #[unstable(feature = "ascii_char_variants", issue = "110998")] 297 CapitalN = 78, 298 /// U+004F 299 #[unstable(feature = "ascii_char_variants", issue = "110998")] 300 CapitalO = 79, 301 /// U+0050 302 #[unstable(feature = "ascii_char_variants", issue = "110998")] 303 CapitalP = 80, 304 /// U+0051 305 #[unstable(feature = "ascii_char_variants", issue = "110998")] 306 CapitalQ = 81, 307 /// U+0052 308 #[unstable(feature = "ascii_char_variants", issue = "110998")] 309 CapitalR = 82, 310 /// U+0053 311 #[unstable(feature = "ascii_char_variants", issue = "110998")] 312 CapitalS = 83, 313 /// U+0054 314 #[unstable(feature = "ascii_char_variants", issue = "110998")] 315 CapitalT = 84, 316 /// U+0055 317 #[unstable(feature = "ascii_char_variants", issue = "110998")] 318 CapitalU = 85, 319 /// U+0056 320 #[unstable(feature = "ascii_char_variants", issue = "110998")] 321 CapitalV = 86, 322 /// U+0057 323 #[unstable(feature = "ascii_char_variants", issue = "110998")] 324 CapitalW = 87, 325 /// U+0058 326 #[unstable(feature = "ascii_char_variants", issue = "110998")] 327 CapitalX = 88, 328 /// U+0059 329 #[unstable(feature = "ascii_char_variants", issue = "110998")] 330 CapitalY = 89, 331 /// U+005A 332 #[unstable(feature = "ascii_char_variants", issue = "110998")] 333 CapitalZ = 90, 334 /// U+005B 335 #[unstable(feature = "ascii_char_variants", issue = "110998")] 336 LeftSquareBracket = 91, 337 /// U+005C 338 #[unstable(feature = "ascii_char_variants", issue = "110998")] 339 ReverseSolidus = 92, 340 /// U+005D 341 #[unstable(feature = "ascii_char_variants", issue = "110998")] 342 RightSquareBracket = 93, 343 /// U+005E 344 #[unstable(feature = "ascii_char_variants", issue = "110998")] 345 CircumflexAccent = 94, 346 /// U+005F 347 #[unstable(feature = "ascii_char_variants", issue = "110998")] 348 LowLine = 95, 349 /// U+0060 350 #[unstable(feature = "ascii_char_variants", issue = "110998")] 351 GraveAccent = 96, 352 /// U+0061 353 #[unstable(feature = "ascii_char_variants", issue = "110998")] 354 SmallA = 97, 355 /// U+0062 356 #[unstable(feature = "ascii_char_variants", issue = "110998")] 357 SmallB = 98, 358 /// U+0063 359 #[unstable(feature = "ascii_char_variants", issue = "110998")] 360 SmallC = 99, 361 /// U+0064 362 #[unstable(feature = "ascii_char_variants", issue = "110998")] 363 SmallD = 100, 364 /// U+0065 365 #[unstable(feature = "ascii_char_variants", issue = "110998")] 366 SmallE = 101, 367 /// U+0066 368 #[unstable(feature = "ascii_char_variants", issue = "110998")] 369 SmallF = 102, 370 /// U+0067 371 #[unstable(feature = "ascii_char_variants", issue = "110998")] 372 SmallG = 103, 373 /// U+0068 374 #[unstable(feature = "ascii_char_variants", issue = "110998")] 375 SmallH = 104, 376 /// U+0069 377 #[unstable(feature = "ascii_char_variants", issue = "110998")] 378 SmallI = 105, 379 /// U+006A 380 #[unstable(feature = "ascii_char_variants", issue = "110998")] 381 SmallJ = 106, 382 /// U+006B 383 #[unstable(feature = "ascii_char_variants", issue = "110998")] 384 SmallK = 107, 385 /// U+006C 386 #[unstable(feature = "ascii_char_variants", issue = "110998")] 387 SmallL = 108, 388 /// U+006D 389 #[unstable(feature = "ascii_char_variants", issue = "110998")] 390 SmallM = 109, 391 /// U+006E 392 #[unstable(feature = "ascii_char_variants", issue = "110998")] 393 SmallN = 110, 394 /// U+006F 395 #[unstable(feature = "ascii_char_variants", issue = "110998")] 396 SmallO = 111, 397 /// U+0070 398 #[unstable(feature = "ascii_char_variants", issue = "110998")] 399 SmallP = 112, 400 /// U+0071 401 #[unstable(feature = "ascii_char_variants", issue = "110998")] 402 SmallQ = 113, 403 /// U+0072 404 #[unstable(feature = "ascii_char_variants", issue = "110998")] 405 SmallR = 114, 406 /// U+0073 407 #[unstable(feature = "ascii_char_variants", issue = "110998")] 408 SmallS = 115, 409 /// U+0074 410 #[unstable(feature = "ascii_char_variants", issue = "110998")] 411 SmallT = 116, 412 /// U+0075 413 #[unstable(feature = "ascii_char_variants", issue = "110998")] 414 SmallU = 117, 415 /// U+0076 416 #[unstable(feature = "ascii_char_variants", issue = "110998")] 417 SmallV = 118, 418 /// U+0077 419 #[unstable(feature = "ascii_char_variants", issue = "110998")] 420 SmallW = 119, 421 /// U+0078 422 #[unstable(feature = "ascii_char_variants", issue = "110998")] 423 SmallX = 120, 424 /// U+0079 425 #[unstable(feature = "ascii_char_variants", issue = "110998")] 426 SmallY = 121, 427 /// U+007A 428 #[unstable(feature = "ascii_char_variants", issue = "110998")] 429 SmallZ = 122, 430 /// U+007B 431 #[unstable(feature = "ascii_char_variants", issue = "110998")] 432 LeftCurlyBracket = 123, 433 /// U+007C 434 #[unstable(feature = "ascii_char_variants", issue = "110998")] 435 VerticalLine = 124, 436 /// U+007D 437 #[unstable(feature = "ascii_char_variants", issue = "110998")] 438 RightCurlyBracket = 125, 439 /// U+007E 440 #[unstable(feature = "ascii_char_variants", issue = "110998")] 441 Tilde = 126, 442 /// U+007F 443 #[unstable(feature = "ascii_char_variants", issue = "110998")] 444 Delete = 127, 445 } 446 447 impl AsciiChar { 448 /// Creates an ascii character from the byte `b`, 449 /// or returns `None` if it's too large. 450 #[unstable(feature = "ascii_char", issue = "110998")] 451 #[inline] from_u8(b: u8) -> Option<Self>452 pub const fn from_u8(b: u8) -> Option<Self> { 453 if b <= 127 { 454 // SAFETY: Just checked that `b` is in-range 455 Some(unsafe { Self::from_u8_unchecked(b) }) 456 } else { 457 None 458 } 459 } 460 461 /// Creates an ASCII character from the byte `b`, 462 /// without checking whether it's valid. 463 /// 464 /// # Safety 465 /// 466 /// `b` must be in `0..=127`, or else this is UB. 467 #[unstable(feature = "ascii_char", issue = "110998")] 468 #[inline] from_u8_unchecked(b: u8) -> Self469 pub const unsafe fn from_u8_unchecked(b: u8) -> Self { 470 // SAFETY: Our safety precondition is that `b` is in-range. 471 unsafe { transmute(b) } 472 } 473 474 /// When passed the *number* `0`, `1`, …, `9`, returns the *character* 475 /// `'0'`, `'1'`, …, `'9'` respectively. 476 /// 477 /// If `d >= 10`, returns `None`. 478 #[unstable(feature = "ascii_char", issue = "110998")] 479 #[inline] digit(d: u8) -> Option<Self>480 pub const fn digit(d: u8) -> Option<Self> { 481 if d < 10 { 482 // SAFETY: Just checked it's in-range. 483 Some(unsafe { Self::digit_unchecked(d) }) 484 } else { 485 None 486 } 487 } 488 489 /// When passed the *number* `0`, `1`, …, `9`, returns the *character* 490 /// `'0'`, `'1'`, …, `'9'` respectively, without checking that it's in-range. 491 /// 492 /// # Safety 493 /// 494 /// This is immediate UB if called with `d > 64`. 495 /// 496 /// If `d >= 10` and `d <= 64`, this is allowed to return any value or panic. 497 /// Notably, it should not be expected to return hex digits, or any other 498 /// reasonable extension of the decimal digits. 499 /// 500 /// (This lose safety condition is intended to simplify soundness proofs 501 /// when writing code using this method, since the implementation doesn't 502 /// need something really specific, not to make those other arguments do 503 /// something useful. It might be tightened before stabilization.) 504 #[unstable(feature = "ascii_char", issue = "110998")] 505 #[inline] digit_unchecked(d: u8) -> Self506 pub const unsafe fn digit_unchecked(d: u8) -> Self { 507 debug_assert!(d < 10); 508 509 // SAFETY: `'0'` through `'9'` are U+00030 through U+0039, 510 // so because `d` must be 64 or less the addition can return at most 511 // 112 (0x70), which doesn't overflow and is within the ASCII range. 512 unsafe { 513 let byte = b'0'.unchecked_add(d); 514 Self::from_u8_unchecked(byte) 515 } 516 } 517 518 /// Gets this ASCII character as a byte. 519 #[unstable(feature = "ascii_char", issue = "110998")] 520 #[inline] as_u8(self) -> u8521 pub const fn as_u8(self) -> u8 { 522 self as u8 523 } 524 525 /// Gets this ASCII character as a `char` Unicode Scalar Value. 526 #[unstable(feature = "ascii_char", issue = "110998")] 527 #[inline] as_char(self) -> char528 pub const fn as_char(self) -> char { 529 self as u8 as char 530 } 531 532 /// Views this ASCII character as a one-code-unit UTF-8 `str`. 533 #[unstable(feature = "ascii_char", issue = "110998")] 534 #[inline] as_str(&self) -> &str535 pub const fn as_str(&self) -> &str { 536 crate::slice::from_ref(self).as_str() 537 } 538 } 539 540 impl [AsciiChar] { 541 /// Views this slice of ASCII characters as a UTF-8 `str`. 542 #[unstable(feature = "ascii_char", issue = "110998")] 543 #[inline] as_str(&self) -> &str544 pub const fn as_str(&self) -> &str { 545 let ascii_ptr: *const Self = self; 546 let str_ptr = ascii_ptr as *const str; 547 // SAFETY: Each ASCII codepoint in UTF-8 is encoded as one single-byte 548 // code unit having the same value as the ASCII byte. 549 unsafe { &*str_ptr } 550 } 551 552 /// Views this slice of ASCII characters as a slice of `u8` bytes. 553 #[unstable(feature = "ascii_char", issue = "110998")] 554 #[inline] as_bytes(&self) -> &[u8]555 pub const fn as_bytes(&self) -> &[u8] { 556 self.as_str().as_bytes() 557 } 558 } 559 560 #[unstable(feature = "ascii_char", issue = "110998")] 561 impl fmt::Display for AsciiChar { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result562 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 563 <str as fmt::Display>::fmt(self.as_str(), f) 564 } 565 } 566