1/* 2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16package std.core; 17 18export type UTF_16_CodePoint = int; 19 20/** 21 * Represents boxed char value and related operations 22 */ 23export final class Char extends Object implements Comparable<Char>, JSONable<Char> { 24 private value: char; 25 26 /** 27 * constructor() creates a default Char object. 28 */ 29 public constructor() { 30 this.value = c'\u0000'; 31 } 32 33 /** 34 * constructor(char) creates a Char object from a specified primitive char. 35 * 36 * @param value a primitive char to create Char from. 37 */ 38 public constructor(value: char) { 39 this.value = value; 40 } 41 42 /** 43 * constructor(Char) creates a Char object from another Char object. 44 * 45 * @param value a Char object to copy from. 46 */ 47 public constructor(value: Char) { 48 this.value = value.unboxed(); 49 } 50 51 /** 52 * unboxed() returns an underlying primitive char. 53 * 54 * @returns the underlying primitive char. 55 */ 56 public unboxed(): char { 57 return this.value; 58 } 59 60 /** 61 * valueOf(char) creates a Char object from a primitive char. 62 * This method is preferred over {@link <constructor(char)>} since this method can use a cached Char object, 63 * i.e. this method might be more efficient. 64 * 65 * @param value a primitive char to create Char from. 66 * 67 * @returns an instance of Char created from the primitive char. 68 */ 69 public static valueOf(value: char): Char { 70 // TODO(ivan-tyulyandin): caching is possible 71 return new Char(value); 72 } 73 74 /** 75 * equals(Object) compares two Chars by their underlying primitive chars. 76 * 77 * @param other a reference to object to be compared with. 78 * 79 * @returns comparison result of underlying primitive chars. 80 * Returns false if the argument is not an instance of Char. 81 */ 82 public equals(other: NullishType): boolean { 83 if (__runtimeIsSameReference(this, other)) { 84 return true 85 } 86 87 if (!(other instanceof Char)) { 88 return false 89 } 90 91 return this.value == (other as Char).value 92 } 93 94 /** 95 * toString() converts Char to String object that contains a single element with the underlying char. 96 * 97 * @returns the String representation of the Char. 98 */ 99 public override toString(): String { 100 // TODO(ivan-tyulyandin): if String(char) will be implemented, rewrite code below 101 let buf = new char[1]; 102 buf[0] = this.value; 103 return new String(buf); 104 } 105 106 /** 107 * compare(Char, Char) compares two Chars by their underlying chars. 108 * 109 * @param lhs the first Char to compare. 110 * 111 * @param rhs the second Char to compare. 112 * 113 * @returns comparison result of the underlying chars. 114 */ 115 public static compare(lhs: Char, rhs: Char): boolean { 116 return (lhs.unboxed() == rhs.unboxed()); 117 } 118 119 /** 120 * Compares this instance to other Char object 121 * The result is less than 0 if this instance lesser than provided object 122 * 0 if they are equal 123 * and greater than 0 otherwise. 124 * 125 * @param other Char object to compare with 126 * 127 * @returns result of the comparison 128 */ 129 public override compareTo(other: Char): int { 130 return (this.value as int) - (other.unboxed() as int); 131 } 132 133 /** 134 * $_hashCode(char) returns a hashcode of the char. 135 * 136 * @param value the char to be hash coded. 137 * 138 * @returns the hash code. 139 */ 140 public static $_hashCode(value: char): int { 141 return value as int; 142 } 143 144 /** 145 * $_hashCode() returns a hashcode of the underlying char. 146 * 147 * @returns the hash code. 148 */ 149 public override $_hashCode(): int { 150 return Char.$_hashCode(this.value); 151 } 152 153 public static readonly CHAR_BIT_SIZE = 16; 154 public static readonly HIGH_SURROGATE_MIN: char = c'\uD800'; 155 public static readonly HIGH_SURROGATE_MAX: char = c'\uDBFF'; 156 public static readonly LOW_SURROGATE_MIN: char = c'\uDC00'; 157 public static readonly LOW_SURROGATE_MAX: char = c'\uDFFF'; 158 private static readonly HIGHEST_16_BITS_MASK: int = 0xFFFF0000; 159 private static readonly LOWEST_16_BITS_MASK: int = 0x0000FFFF; 160 private static readonly HIGH_SURROGATE_BITS: int = (Char.HIGH_SURROGATE_MIN as int) << Char.CHAR_BIT_SIZE; 161 private static readonly LOW_SURROGATE_BITS: int = Char.LOW_SURROGATE_MIN as int; 162 private static readonly BMP_BORDER: UTF_16_CodePoint = 0x10000; 163 164 /** 165 * `MIN_VALUE` is a smallest value of type `char` 166 */ 167 public static readonly MIN_VALUE: char = c'\u0000'; 168 169 /** 170 * `MAX_VALUE` is a largest value of type `char` 171 */ 172 public static readonly MAX_VALUE: char = c'\uFFFF'; 173 174 /** 175 * isInBasicMultilingualPlane(char) checks if the char is in Basic Multilingual Plane. 176 * See UTF-16 for more details. 177 * 178 * @param value the char to be checked. 179 * 180 * @returns if the char is in Basic Multilingual Plane. 181 */ 182 public static isInBasicMultilingualPlane(value: char): boolean { 183 return (value < Char.HIGH_SURROGATE_MIN) || (value > Char.LOW_SURROGATE_MAX); 184 } 185 186 /** 187 * isInBasicMultilingualPlane(UTF_16_CodePoint) checks if the code point is in Basic Multilingual Plane. 188 * See UTF-16 for more details. 189 * 190 * @param value the code point to be checked. 191 * 192 * @returns if the code point is in Basic Multilingual Plane. 193 */ 194 public static isInBasicMultilingualPlane(value: UTF_16_CodePoint): boolean { 195 if (value < Char.BMP_BORDER) { 196 return Char.isInBasicMultilingualPlane(value as char); 197 } 198 return false; 199 } 200 201 /** 202 * isInBasicMultilingualPlane() checks if the underlying char is in Basic Multilingual Plane. 203 * See UTF-16 for more details. 204 * 205 * @returns if the char is in Basic Multilingual Plane. 206 */ 207 public isInBasicMultilingualPlane(): boolean { 208 return Char.isInBasicMultilingualPlane(this.value); 209 } 210 211 /** 212 * isValidCodePoint() checks if the code point is correctly encoded. 213 * See UTF-16 for more details. 214 * 215 * @returns true if the code point is not malformed. 216 */ 217 public static isValidCodePoint(value: UTF_16_CodePoint): boolean { 218 return (Char.isInBasicMultilingualPlane(value)) 219 || ((value & Char.HIGH_SURROGATE_BITS) == Char.HIGH_SURROGATE_BITS) 220 && ((value & Char.LOW_SURROGATE_BITS) == Char.LOW_SURROGATE_BITS); 221 } 222 223 /** 224 * codeUnitsToEncode(UTF_16_CodePoint) counts a number of code units to encode the UTF-16 code point. 225 * See UTF-16 for more details. 226 * 227 * @param value UTF-16 code point to be examinated. 228 * 229 * @returns the number of code units to encode the char. 230 */ 231 public static codeUnitsToEncode(value: UTF_16_CodePoint): int { 232 if (Char.isInBasicMultilingualPlane(value)) { 233 return 1; 234 } 235 return 2; 236 } 237 238 /** 239 * isHighSurrogate(char) checks if the char is a high surrogate. 240 * 241 * @param value the char to be checked. 242 * 243 * @returns true if the char is a high surrogate, false otherwise. 244 */ 245 public static isHighSurrogate(value: char): boolean { 246 return Char.HIGH_SURROGATE_MIN <= value 247 && Char.HIGH_SURROGATE_MAX >= value; 248 } 249 250 /** 251 * isLowSurrogate(char) checks if the char is a low surrogate. 252 * 253 * @param value the char to be checked. 254 * 255 * @returns true if the char is a low surrogate, false otherwise. 256 */ 257 public static isLowSurrogate(value: char): boolean { 258 return Char.LOW_SURROGATE_MIN <= value 259 && Char.LOW_SURROGATE_MAX >= value; 260 } 261 262 /** 263 * getHighSurrogate(UTF_16_CodePoint) splits code point as a two code units and return the first one. 264 * The result can be malformed und thus has to be checked with {@link <isHighSurrogate(char)>}. 265 * 266 * @param value an encoded code point. 267 * 268 * @returns the high surrogate, possibly malformed. 269 */ 270 public static getHighSurrogate(value: UTF_16_CodePoint): char { 271 return (((value - 0x10000) >>> 10) + Char.HIGH_SURROGATE_MIN) as char 272 } 273 274 /** 275 * getLowSurrogate(UTF_16_CodePoint) splits code point as a two code units and return the second one. 276 * The result can be malformed und thus has to be checked with {@link <isLowSurrogate(char)>}. 277 * 278 * @param value an encoded code point. 279 * 280 * @returns the low surrogate, possibly malformed. 281 */ 282 public static getLowSurrogate(value: UTF_16_CodePoint): char { 283 return (((value - 0x10000) & 0x3FF) + Char.LOW_SURROGATE_MIN) as char 284 } 285 286 /** 287 * isPartOfSurrogatePair(char) checks whether the char is low or high surrogate. 288 * 289 * @param value the char to be tested. 290 * 291 * @returns true if the argument is correctly encoded low or high surrogate. 292 */ 293 public static isPartOfSurrogatePair(value: char): boolean { 294 return Char.isHighSurrogate(value) || Char.isLowSurrogate(value); 295 } 296 297 /** 298 * isPartOfSurrogatePair() checks whether the underlying char is low or high surrogate. 299 * 300 * @returns true if the argument is correctly encoded low or high surrogate. 301 */ 302 public isPartOfSurrogatePair(): boolean { 303 return Char.isPartOfSurrogatePair(this.value); 304 } 305 306 /** 307 * charsToCodePoint(char, char) combines to chars to code point 308 * 309 * @returns codepoint decoded from UTF-16. 310 */ 311 public static charsToCodePoint(highValue: char, lowValue: char): UTF_16_CodePoint { 312 return ((highValue - Char.HIGH_SURROGATE_MIN) * 0x400) + (lowValue - Char.LOW_SURROGATE_MIN) + 0x10000; 313 } 314 315 /** 316 * isBinDigit() checks whether the char represents a binary digit. 317 * 318 * @param value a char to check. 319 * 320 * @returns true if the char is a binary digit. 321 */ 322 public static isBinDigit(value: char): boolean { 323 let diff: int = value - c'0'; 324 return (0 == diff) || (diff == 1); 325 } 326 327 /** 328 * isBinDigit() checks whether the underlying char represents a binary digit. 329 * 330 * @returns true if the char is a binary digit. 331 */ 332 public isBinDigit(): boolean { 333 return Char.isBinDigit(this.value); 334 } 335 336 /** 337 * isDecDigit() checks whether the char represents a decimal digit. 338 * 339 * @param value a char to check. 340 * 341 * @returns true if the char is a decimal digit. 342 */ 343 public static isDecDigit(value: char): boolean { 344 let diff: int = value - c'0'; 345 return (0 <= diff) && (diff <= 9); 346 } 347 348 /** 349 * isDecDigit() checks whether the underlying char represents a decimal digit. 350 * 351 * @returns true if the char is a decimal digit. 352 */ 353 public isDecDigit(): boolean { 354 return Char.isDecDigit(this.value); 355 } 356 357 /** 358 * isBinDigit() checks whether the char represents a hexadecimal digit. 359 * 360 * @param value a char to check. 361 * 362 * @returns true if the char is a hexadecimal digit. 363 */ 364 public static isHexDigit(value: char): boolean { 365 let isDigit = (c'0' <= value) && (value <= c'9') 366 let isChar = ((c'a' <= value) && (value <= c'f')) 367 || ((c'A' <= value) && (value <= c'F')) 368 return isDigit || isChar 369 } 370 371 /** 372 * isHexDigit() checks whether the underlying char represents a hexadecimal digit. 373 * 374 * @returns true if the char is a hexadecimal digit. 375 */ 376 public isHexDigit(): boolean { 377 return Char.isHexDigit(this.value); 378 } 379 380 /** 381 * isLetter(char) checks whether the char is a letter. 382 * 383 * @param value a char to be tested. 384 * 385 * @returns true if the char is a letter. 386 */ 387 public static isLetter(value: char): boolean { 388 return (c'A' <= value) && (value <= c'Z') 389 || (c'a' <= value) && (value <= c'z'); 390 } 391 392 /** 393 * isLetter() checks whether the underlying char is a letter. 394 * 395 * @returns true if the char is a letter. 396 */ 397 public isLetter(): boolean { 398 return Char.isLetter(this.value); 399 } 400 401 /** 402 * isUpperCase(char) checks whether the char is an upper case letter. 403 * 404 * @param value a char to be tested. 405 * 406 * @returns true if the char is an upper case letter. 407 */ 408 public static native isUpperCase(value: char): boolean; 409 410 /** 411 * isUpperCase() checks whether the underlying char is an upper case letter. 412 * 413 * @returns true if the char is an upper case letter. 414 */ 415 public isUpperCase(): boolean { 416 return Char.isUpperCase(this.value); 417 } 418 419 /** 420 * isLowerCase(char) checks whether the char is a lower case letter. 421 * 422 * @param value a char to be tested. 423 * 424 * @returns true if the char is a lower case letter. 425 */ 426 public static native isLowerCase(value: char): boolean 427 428 /** 429 * isLowerCase() checks whether the underlying char is a lower case letter. 430 * 431 * @returns true if the char is a lower case letter. 432 */ 433 public isLowerCase(): boolean { 434 return Char.isLowerCase(this.value); 435 } 436 437 /** 438 * toUpperCase(char) converts the char to upper case if it is in lower case, otherwise the char returned itself 439 * 440 * @param value a char to transform to upper case. 441 * 442 * @returns if char is in lower case then an upper case of it is returned, otherwise the argument itself returned. 443 */ 444 public static native toUpperCase(value: char): char 445 446 /** 447 * toUpperCase() converts the underlying char to upper case if it is in lower case, otherwise the char unchanged 448 */ 449 public toUpperCase(): void { 450 this.value = Char.toUpperCase(this.value); 451 } 452 453 /** 454 * toLowerCase(char) converts the char to lower case if it is in upper case, otherwise the char returned itself 455 * 456 * @param value a char to transform to upper case. 457 * 458 * @returns if char is in lower case then an upper case of it is returned, otherwise the argument itself returned. 459 */ 460 public static native toLowerCase(value: char): char 461 462 /** 463 * toLowerCase() converts the underlying char to a lower case if it is in upper case, otherwise the char unchanged. 464 */ 465 public toLowerCase(): void { 466 this.value = Char.toLowerCase(this.value); 467 } 468 469 /** 470 * isWhiteSpace(char) checks whether the char is a whitespace char. 471 * 472 * @param value a char to be tested. 473 * 474 * @returns true if the char is a whitespace. 475 */ 476 public static native isWhiteSpace(value: char): boolean; 477 478 /** 479 * isWhiteSpace() checks whether the underlying char is a whitespace char. 480 * 481 * @returns true if the char is a whitespace. 482 */ 483 public isWhiteSpace(): boolean { 484 return Char.isWhiteSpace(this.value); 485 } 486 487 /** 488 * Creates a Char instance based on JSONValue 489 * 490 * @param json: JSONValue - a JSON representation 491 * 492 * @throws JSONTypeError if json does not encode a valid char 493 * 494 * @returns char - char value decoded from JSON 495 */ 496 static createFromJSONValue(json: JSONValue): Char { 497 if (json instanceof JSONString) { 498 let str = (json as JSONString).value 499 if (str.getLength() == 1) { 500 return Char.valueOf(str.charAt(0)) 501 } 502 } 503 throw new JSONTypeError("Cannot create Char from JSON", new ErrorOptions(json as Object)) 504 } 505} 506