1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /** 5 ******************************************************************************* 6 * Copyright (C) 1996-2016, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 11 package ohos.global.icu.util; 12 13 import java.util.Enumeration; 14 import java.util.NoSuchElementException; 15 16 import ohos.global.icu.text.UTF16; 17 import ohos.global.icu.text.UnicodeSet; 18 19 /** 20 * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.util.Calendar}. Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'. 21 * 22 * <p>The string tokenizer class allows an application to break a string 23 * into tokens by performing code point comparison. 24 * The <code>StringTokenizer</code> methods do not distinguish 25 * among identifiers, numbers, and quoted strings, nor do they recognize 26 * and skip comments. 27 * <p> 28 * The set of delimiters (the codepoints that separate tokens) may be 29 * specified either at creation time or on a per-token basis. 30 * <p> 31 * An instance of <code>StringTokenizer</code> behaves in one of three ways, 32 * depending on whether it was created with the <code>returnDelims</code> 33 * and <code>coalesceDelims</code> 34 * flags having the value <code>true</code> or <code>false</code>: 35 * <ul> 36 * <li>If returnDelims is <code>false</code>, delimiter code points serve to 37 * separate tokens. A token is a maximal sequence of consecutive 38 * code points that are not delimiters. 39 * <li>If returnDelims is <code>true</code>, delimiter code points are 40 * themselves considered to be tokens. In this case, if coalesceDelims is 41 * <code>true</code>, such tokens will be the maximal sequence of consecutive 42 * code points that <em>are</em> delimiters. If coalesceDelims is false, 43 * a token will be received for each delimiter code point. 44 * </ul> 45 * <p>A token is thus either one 46 * delimiter code point, a maximal sequence of consecutive code points that 47 * are delimiters, or a maximal sequence of consecutive code 48 * points that are not delimiters. 49 * <p> 50 * A <tt>StringTokenizer</tt> object internally maintains a current 51 * position within the string to be tokenized. Some operations advance this 52 * current position past the code point processed. 53 * <p> 54 * A token is returned by taking a substring of the string that was used to 55 * create the <tt>StringTokenizer</tt> object. 56 * <p> 57 * Example of the use of the default delimiter tokenizer. 58 * <blockquote><pre> 59 * StringTokenizer st = new StringTokenizer("this is a test"); 60 * while (st.hasMoreTokens()) { 61 * println(st.nextToken()); 62 * } 63 * </pre></blockquote> 64 * <p> 65 * prints the following output: 66 * <blockquote><pre> 67 * this 68 * is 69 * a 70 * test 71 * </pre></blockquote> 72 * <p> 73 * Example of the use of the tokenizer with user specified delimiter. 74 * <blockquote><pre> 75 * StringTokenizer st = new StringTokenizer( 76 * "this is a test with supplementary characters \ud800\ud800\udc00\udc00", 77 * " \ud800\udc00"); 78 * while (st.hasMoreTokens()) { 79 * println(st.nextToken()); 80 * } 81 * </pre></blockquote> 82 * <p> 83 * prints the following output: 84 * <blockquote><pre> 85 * this 86 * is 87 * a 88 * test 89 * with 90 * supplementary 91 * characters 92 * \ud800 93 * \udc00 94 * </pre></blockquote> 95 * 96 * @author syn wee 97 * @hide exposed on OHOS 98 */ 99 public final class StringTokenizer implements Enumeration<Object> 100 { 101 // public constructors --------------------------------------------- 102 103 /** 104 * <strong>[icu]</strong> Constructs a string tokenizer for the specified string. All 105 * characters in the delim argument are the delimiters for separating 106 * tokens. 107 * <p>If the returnDelims flag is false, the delimiter characters are 108 * skipped and only serve as separators between tokens. 109 * <p>If the returnDelims flag is true, then the delimiter characters 110 * are also returned as tokens, one per delimiter. 111 * @param str a string to be parsed. 112 * @param delim the delimiters. 113 * @param returndelims flag indicating whether to return the delimiters 114 * as tokens. 115 * @exception NullPointerException if str is null 116 */ StringTokenizer(String str, UnicodeSet delim, boolean returndelims)117 public StringTokenizer(String str, UnicodeSet delim, boolean returndelims) 118 { 119 this(str, delim, returndelims, false); 120 } 121 122 /** 123 * <strong>[icu]</strong> Constructs a string tokenizer for the specified string. All 124 * characters in the delim argument are the delimiters for separating 125 * tokens. 126 * <p>If the returnDelims flag is false, the delimiter characters are 127 * skipped and only serve as separators between tokens. 128 * <p>If the returnDelims flag is true, then the delimiter characters 129 * are also returned as tokens. If coalescedelims is true, one token 130 * is returned for each run of delimiter characters, otherwise one 131 * token is returned per delimiter. Since surrogate pairs can be 132 * delimiters, the returned token might be two chars in length. 133 * @param str a string to be parsed. 134 * @param delim the delimiters. 135 * @param returndelims flag indicating whether to return the delimiters 136 * as tokens. 137 * @param coalescedelims flag indicating whether to return a run of 138 * delimiters as a single token or as one token per delimiter. 139 * This only takes effect if returndelims is true. 140 * @exception NullPointerException if str is null 141 * @deprecated This API is ICU internal only. 142 * @hide draft / provisional / internal are hidden on OHOS 143 */ 144 @Deprecated StringTokenizer(String str, UnicodeSet delim, boolean returndelims, boolean coalescedelims)145 public StringTokenizer(String str, UnicodeSet delim, boolean returndelims, boolean coalescedelims) 146 { 147 m_source_ = str; 148 m_length_ = str.length(); 149 if (delim == null) { 150 m_delimiters_ = EMPTY_DELIMITER_; 151 } 152 else { 153 m_delimiters_ = delim; 154 } 155 m_returnDelimiters_ = returndelims; 156 m_coalesceDelimiters_ = coalescedelims; 157 m_tokenOffset_ = -1; 158 m_tokenSize_ = -1; 159 if (m_length_ == 0) { 160 // string length 0, no tokens 161 m_nextOffset_ = -1; 162 } 163 else { 164 m_nextOffset_ = 0; 165 if (!returndelims) { 166 m_nextOffset_ = getNextNonDelimiter(0); 167 } 168 } 169 } 170 171 /** 172 * <strong>[icu]</strong> Constructs a string tokenizer for the specified string. The 173 * characters in the delim argument are the delimiters for separating 174 * tokens. 175 * <p>Delimiter characters themselves will not be treated as tokens. 176 * @param str a string to be parsed. 177 * @param delim the delimiters. 178 * @exception NullPointerException if str is null 179 */ StringTokenizer(String str, UnicodeSet delim)180 public StringTokenizer(String str, UnicodeSet delim) 181 { 182 this(str, delim, false, false); 183 } 184 185 /** 186 * <p>Constructs a string tokenizer for the specified string. All 187 * characters in the delim argument are the delimiters for separating 188 * tokens. 189 * <p>If the returnDelims flag is false, the delimiter characters are 190 * skipped and only serve as separators between tokens. 191 * <p>If the returnDelims flag is true, then the delimiter characters 192 * are also returned as tokens, one per delimiter. 193 * @param str a string to be parsed. 194 * @param delim the delimiters. 195 * @param returndelims flag indicating whether to return the delimiters 196 * as tokens. 197 * @exception NullPointerException if str is null 198 */ StringTokenizer(String str, String delim, boolean returndelims)199 public StringTokenizer(String str, String delim, boolean returndelims) 200 { 201 this(str, delim, returndelims, false); // java default behavior 202 } 203 204 /** 205 * <p>Constructs a string tokenizer for the specified string. All 206 * characters in the delim argument are the delimiters for separating 207 * tokens. 208 * <p>If the returnDelims flag is false, the delimiter characters are 209 * skipped and only serve as separators between tokens. 210 * <p>If the returnDelims flag is true, then the delimiter characters 211 * are also returned as tokens. If coalescedelims is true, one token 212 * is returned for each run of delimiter characters, otherwise one 213 * token is returned per delimiter. Since surrogate pairs can be 214 * delimiters, the returned token might be two chars in length. 215 * @param str a string to be parsed. 216 * @param delim the delimiters. 217 * @param returndelims flag indicating whether to return the delimiters 218 * as tokens. 219 * @param coalescedelims flag indicating whether to return a run of 220 * delimiters as a single token or as one token per delimiter. 221 * This only takes effect if returndelims is true. 222 * @exception NullPointerException if str is null 223 * @deprecated This API is ICU internal only. 224 * @hide draft / provisional / internal are hidden on OHOS 225 */ 226 @Deprecated StringTokenizer(String str, String delim, boolean returndelims, boolean coalescedelims)227 public StringTokenizer(String str, String delim, boolean returndelims, boolean coalescedelims) 228 { 229 // don't ignore whitespace 230 m_delimiters_ = EMPTY_DELIMITER_; 231 if (delim != null && delim.length() > 0) { 232 m_delimiters_ = new UnicodeSet(); 233 m_delimiters_.addAll(delim); 234 checkDelimiters(); 235 } 236 m_coalesceDelimiters_ = coalescedelims; 237 m_source_ = str; 238 m_length_ = str.length(); 239 m_returnDelimiters_ = returndelims; 240 m_tokenOffset_ = -1; 241 m_tokenSize_ = -1; 242 if (m_length_ == 0) { 243 // string length 0, no tokens 244 m_nextOffset_ = -1; 245 } 246 else { 247 m_nextOffset_ = 0; 248 if (!returndelims) { 249 m_nextOffset_ = getNextNonDelimiter(0); 250 } 251 } 252 } 253 254 /** 255 * <p>Constructs a string tokenizer for the specified string. The 256 * characters in the delim argument are the delimiters for separating 257 * tokens. 258 * <p>Delimiter characters themselves will not be treated as tokens. 259 * @param str a string to be parsed. 260 * @param delim the delimiters. 261 * @exception NullPointerException if str is null 262 */ StringTokenizer(String str, String delim)263 public StringTokenizer(String str, String delim) 264 { 265 // don't ignore whitespace 266 this(str, delim, false, false); 267 } 268 269 /** 270 * <p>Constructs a string tokenizer for the specified string. 271 * The tokenizer uses the default delimiter set, which is 272 * " \t\n\r\f": 273 * the space character, the tab character, the newline character, the 274 * carriage-return character, and the form-feed character. 275 * <p>Delimiter characters themselves will not be treated as tokens. 276 * @param str a string to be parsed 277 * @exception NullPointerException if str is null 278 */ StringTokenizer(String str)279 public StringTokenizer(String str) 280 { 281 this(str, DEFAULT_DELIMITERS_, false, false); 282 } 283 284 // public methods -------------------------------------------------- 285 286 /** 287 * Tests if there are more tokens available from this tokenizer's 288 * string. 289 * If this method returns <tt>true</tt>, then a subsequent call to 290 * <tt>nextToken</tt> with no argument will successfully return a token. 291 * @return <code>true</code> if and only if there is at least one token 292 * in the string after the current position; <code>false</code> 293 * otherwise. 294 */ hasMoreTokens()295 public boolean hasMoreTokens() 296 { 297 return m_nextOffset_ >= 0; 298 } 299 300 /** 301 * Returns the next token from this string tokenizer. 302 * @return the next token from this string tokenizer. 303 * @exception NoSuchElementException if there are no more tokens in 304 * this tokenizer's string. 305 */ nextToken()306 public String nextToken() 307 { 308 if (m_tokenOffset_ < 0) { 309 if (m_nextOffset_ < 0) { 310 throw new NoSuchElementException("No more tokens in String"); 311 } 312 // pre-calculations of tokens not done 313 if (m_returnDelimiters_) { 314 int tokenlimit = 0; 315 int c = UTF16.charAt(m_source_, m_nextOffset_); 316 boolean contains = delims == null 317 ? m_delimiters_.contains(c) 318 : c < delims.length && delims[c]; 319 if (contains) { 320 if (m_coalesceDelimiters_) { 321 tokenlimit = getNextNonDelimiter(m_nextOffset_); 322 } else { 323 tokenlimit = m_nextOffset_ + UTF16.getCharCount(c); 324 if (tokenlimit == m_length_) { 325 tokenlimit = -1; 326 } 327 } 328 } 329 else { 330 tokenlimit = getNextDelimiter(m_nextOffset_); 331 } 332 String result; 333 if (tokenlimit < 0) { 334 result = m_source_.substring(m_nextOffset_); 335 } 336 else { 337 result = m_source_.substring(m_nextOffset_, tokenlimit); 338 } 339 m_nextOffset_ = tokenlimit; 340 return result; 341 } 342 else { 343 int tokenlimit = getNextDelimiter(m_nextOffset_); 344 String result; 345 if (tokenlimit < 0) { 346 result = m_source_.substring(m_nextOffset_); 347 m_nextOffset_ = tokenlimit; 348 } 349 else { 350 result = m_source_.substring(m_nextOffset_, tokenlimit); 351 m_nextOffset_ = getNextNonDelimiter(tokenlimit); 352 } 353 354 return result; 355 } 356 } 357 // count was called before and we have all the tokens 358 if (m_tokenOffset_ >= m_tokenSize_) { 359 throw new NoSuchElementException("No more tokens in String"); 360 } 361 String result; 362 if (m_tokenLimit_[m_tokenOffset_] >= 0) { 363 result = m_source_.substring(m_tokenStart_[m_tokenOffset_], 364 m_tokenLimit_[m_tokenOffset_]); 365 } 366 else { 367 result = m_source_.substring(m_tokenStart_[m_tokenOffset_]); 368 } 369 m_tokenOffset_ ++; 370 m_nextOffset_ = -1; 371 if (m_tokenOffset_ < m_tokenSize_) { 372 m_nextOffset_ = m_tokenStart_[m_tokenOffset_]; 373 } 374 return result; 375 } 376 377 /** 378 * Returns the next token in this string tokenizer's string. First, 379 * the set of characters considered to be delimiters by this 380 * <tt>StringTokenizer</tt> object is changed to be the characters in 381 * the string <tt>delim</tt>. Then the next token in the string 382 * after the current position is returned. The current position is 383 * advanced beyond the recognized token. The new delimiter set 384 * remains the default after this call. 385 * @param delim the new delimiters. 386 * @return the next token, after switching to the new delimiter set. 387 * @exception NoSuchElementException if there are no more tokens in 388 * this tokenizer's string. 389 */ 390 public String nextToken(String delim) 391 { 392 m_delimiters_ = EMPTY_DELIMITER_; 393 if (delim != null && delim.length() > 0) { 394 m_delimiters_ = new UnicodeSet(); 395 m_delimiters_.addAll(delim); 396 } 397 return nextToken(m_delimiters_); 398 } 399 400 /** 401 * <strong>[icu]</strong> Returns the next token in this string tokenizer's string. First, 402 * the set of characters considered to be delimiters by this 403 * <tt>StringTokenizer</tt> object is changed to be the characters in 404 * the string <tt>delim</tt>. Then the next token in the string 405 * after the current position is returned. The current position is 406 * advanced beyond the recognized token. The new delimiter set 407 * remains the default after this call. 408 * @param delim the new delimiters. 409 * @return the next token, after switching to the new delimiter set. 410 * @exception NoSuchElementException if there are no more tokens in 411 * this tokenizer's string. 412 */ 413 public String nextToken(UnicodeSet delim) 414 { 415 m_delimiters_ = delim; 416 checkDelimiters(); 417 m_tokenOffset_ = -1; 418 m_tokenSize_ = -1; 419 if (!m_returnDelimiters_) { 420 m_nextOffset_ = getNextNonDelimiter(m_nextOffset_); 421 } 422 return nextToken(); 423 } 424 425 /** 426 * Returns the same value as the <code>hasMoreTokens</code> method. 427 * It exists so that this class can implement the 428 * <code>Enumeration</code> interface. 429 * @return <code>true</code> if there are more tokens; 430 * <code>false</code> otherwise. 431 * @see #hasMoreTokens() 432 */ 433 public boolean hasMoreElements() 434 { 435 return hasMoreTokens(); 436 } 437 438 /** 439 * Returns the same value as the <code>nextToken</code> method, except 440 * that its declared return value is <code>Object</code> rather than 441 * <code>String</code>. It exists so that this class can implement the 442 * <code>Enumeration</code> interface. 443 * @return the next token in the string. 444 * @exception NoSuchElementException if there are no more tokens in 445 * this tokenizer's string. 446 * @see #nextToken() 447 */ 448 public Object nextElement() 449 { 450 return nextToken(); 451 } 452 453 /** 454 * Calculates the number of times that this tokenizer's 455 * <code>nextToken</code> method can be called before it generates an 456 * exception. The current position is not advanced. 457 * @return the number of tokens remaining in the string using the 458 * current delimiter set. 459 * @see #nextToken() 460 */ 461 public int countTokens() 462 { 463 int result = 0; 464 if (hasMoreTokens()) { 465 if (m_tokenOffset_ >= 0) { 466 return m_tokenSize_ - m_tokenOffset_; 467 } 468 if (m_tokenStart_ == null) { 469 m_tokenStart_ = new int[TOKEN_SIZE_]; 470 m_tokenLimit_ = new int[TOKEN_SIZE_]; 471 } 472 do { 473 if (m_tokenStart_.length == result) { 474 int temptokenindex[] = m_tokenStart_; 475 int temptokensize[] = m_tokenLimit_; 476 int originalsize = temptokenindex.length; 477 int newsize = originalsize + TOKEN_SIZE_; 478 m_tokenStart_ = new int[newsize]; 479 m_tokenLimit_ = new int[newsize]; 480 System.arraycopy(temptokenindex, 0, m_tokenStart_, 0, 481 originalsize); 482 System.arraycopy(temptokensize, 0, m_tokenLimit_, 0, 483 originalsize); 484 } 485 m_tokenStart_[result] = m_nextOffset_; 486 if (m_returnDelimiters_) { 487 int c = UTF16.charAt(m_source_, m_nextOffset_); 488 boolean contains = delims == null 489 ? m_delimiters_.contains(c) 490 : c < delims.length && delims[c]; 491 if (contains) { 492 if (m_coalesceDelimiters_) { 493 m_tokenLimit_[result] = getNextNonDelimiter( 494 m_nextOffset_); 495 } else { 496 int p = m_nextOffset_ + 1; 497 if (p == m_length_) { 498 p = -1; 499 } 500 m_tokenLimit_[result] = p; 501 502 } 503 } 504 else { 505 m_tokenLimit_[result] = getNextDelimiter(m_nextOffset_); 506 } 507 m_nextOffset_ = m_tokenLimit_[result]; 508 } 509 else { 510 m_tokenLimit_[result] = getNextDelimiter(m_nextOffset_); 511 m_nextOffset_ = getNextNonDelimiter(m_tokenLimit_[result]); 512 } 513 result ++; 514 } while (m_nextOffset_ >= 0); 515 m_tokenOffset_ = 0; 516 m_tokenSize_ = result; 517 m_nextOffset_ = m_tokenStart_[0]; 518 } 519 return result; 520 } 521 522 // private data members ------------------------------------------------- 523 524 /** 525 * Current offset to the token array. If the array token is not set up yet, 526 * this value is a -1 527 */ 528 private int m_tokenOffset_; 529 /** 530 * Size of the token array. If the array token is not set up yet, 531 * this value is a -1 532 */ 533 private int m_tokenSize_; 534 /** 535 * Array of pre-calculated tokens start indexes in source string terminated 536 * by -1. 537 * This is only set up during countTokens() and only stores the remaining 538 * tokens, not all tokens including parsed ones 539 */ 540 private int m_tokenStart_[]; 541 /** 542 * Array of pre-calculated tokens limit indexes in source string. 543 * This is only set up during countTokens() and only stores the remaining 544 * tokens, not all tokens including parsed ones 545 */ 546 private int m_tokenLimit_[]; 547 /** 548 * UnicodeSet containing delimiters 549 */ 550 private UnicodeSet m_delimiters_; 551 /** 552 * String to parse for tokens 553 */ 554 private String m_source_; 555 /** 556 * Length of m_source_ 557 */ 558 private int m_length_; 559 /** 560 * Current position in string to parse for tokens 561 */ 562 private int m_nextOffset_; 563 /** 564 * Flag indicator if delimiters are to be treated as tokens too 565 */ 566 private boolean m_returnDelimiters_; 567 568 /** 569 * Flag indicating whether to coalesce runs of delimiters into single tokens 570 */ 571 private boolean m_coalesceDelimiters_; 572 573 /** 574 * Default set of delimiters \t\n\r\f 575 */ 576 private static final UnicodeSet DEFAULT_DELIMITERS_ 577 = new UnicodeSet(0x09, 0x0a, 0x0c, 0x0d, 0x20, 0x20); // UnicodeSet("[ \t\n\r\f]", false) 578 /** 579 * Array size increments 580 */ 581 private static final int TOKEN_SIZE_ = 100; 582 /** 583 * A empty delimiter UnicodeSet, used when user specified null delimiters 584 */ 585 private static final UnicodeSet EMPTY_DELIMITER_ = UnicodeSet.EMPTY; 586 587 // private methods ------------------------------------------------------ 588 589 /** 590 * Gets the index of the next delimiter after offset 591 * @param offset to the source string 592 * @return offset of the immediate next delimiter, otherwise 593 * (- source string length - 1) if there 594 * are no more delimiters after m_nextOffset 595 */ getNextDelimiter(int offset)596 private int getNextDelimiter(int offset) 597 { 598 if (offset >= 0) { 599 int result = offset; 600 int c = 0; 601 if (delims == null) { 602 do { 603 c = UTF16.charAt(m_source_, result); 604 if (m_delimiters_.contains(c)) { 605 break; 606 } 607 result ++; 608 } while (result < m_length_); 609 } else { 610 do { 611 c = UTF16.charAt(m_source_, result); 612 if (c < delims.length && delims[c]) { 613 break; 614 } 615 result ++; 616 } while (result < m_length_); 617 } 618 if (result < m_length_) { 619 return result; 620 } 621 } 622 return -1 - m_length_; 623 } 624 625 /** 626 * Gets the index of the next non-delimiter after m_nextOffset_ 627 * @param offset to the source string 628 * @return offset of the immediate next non-delimiter, otherwise 629 * (- source string length - 1) if there 630 * are no more delimiters after m_nextOffset 631 */ getNextNonDelimiter(int offset)632 private int getNextNonDelimiter(int offset) 633 { 634 if (offset >= 0) { 635 int result = offset; 636 int c = 0; 637 if (delims == null) { 638 do { 639 c = UTF16.charAt(m_source_, result); 640 if (!m_delimiters_.contains(c)) { 641 break; 642 } 643 result ++; 644 } while (result < m_length_); 645 } else { 646 do { 647 c = UTF16.charAt(m_source_, result); 648 if (!(c < delims.length && delims[c])) { 649 break; 650 } 651 result ++; 652 } while (result < m_length_); 653 } 654 if (result < m_length_) { 655 return result; 656 } 657 } 658 return -1 - m_length_; 659 } 660 checkDelimiters()661 void checkDelimiters() { 662 if (m_delimiters_ == null || m_delimiters_.size() == 0) { 663 delims = new boolean[0]; 664 } else { 665 int maxChar = m_delimiters_.getRangeEnd(m_delimiters_.getRangeCount()-1); 666 if (maxChar < 0x7f) { 667 delims = new boolean[maxChar+1]; 668 for (int i = 0, ch; -1 != (ch = m_delimiters_.charAt(i)); ++i) { 669 delims[ch] = true; 670 } 671 } else { 672 delims = null; 673 } 674 } 675 } 676 private boolean[] delims; 677 } 678