1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. Oracle designates this 9 * particular file as subject to the "Classpath" exception as provided 10 * by Oracle in the LICENSE file that accompanied this code. 11 * 12 * This code is distributed in the hope that it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 * version 2 for more details (a copy is included in the LICENSE file that 16 * accompanied this code). 17 * 18 * You should have received a copy of the GNU General Public License version 19 * 2 along with this work; if not, write to the Free Software Foundation, 20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 21 * 22 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 23 * or visit www.oracle.com if you need additional information or have any 24 * questions. 25 */ 26 27 package java.util.regex; 28 29 import android.compat.Compatibility; 30 import android.compat.annotation.ChangeId; 31 import android.compat.annotation.EnabledSince; 32 import com.android.icu.util.regex.MatcherNative; 33 import dalvik.annotation.compat.VersionCodes; 34 import dalvik.system.VMRuntime; 35 import java.util.ConcurrentModificationException; 36 import java.util.Iterator; 37 import java.util.NoSuchElementException; 38 import java.util.Objects; 39 import java.util.Spliterator; 40 import java.util.Spliterators; 41 import java.util.function.Consumer; 42 import java.util.function.Function; 43 import java.util.stream.Stream; 44 import java.util.stream.StreamSupport; 45 46 /** 47 * An engine that performs match operations on a {@linkplain 48 * java.lang.CharSequence character sequence} by interpreting a {@link Pattern}. 49 * 50 * <p> A matcher is created from a pattern by invoking the pattern's {@link 51 * Pattern#matcher matcher} method. Once created, a matcher can be used to 52 * perform three different kinds of match operations: 53 * 54 * <ul> 55 * 56 * <li><p> The {@link #matches matches} method attempts to match the entire 57 * input sequence against the pattern. </p></li> 58 * 59 * <li><p> The {@link #lookingAt lookingAt} method attempts to match the 60 * input sequence, starting at the beginning, against the pattern. </p></li> 61 * 62 * <li><p> The {@link #find find} method scans the input sequence looking 63 * for the next subsequence that matches the pattern. </p></li> 64 * 65 * </ul> 66 * 67 * <p> Each of these methods returns a boolean indicating success or failure. 68 * More information about a successful match can be obtained by querying the 69 * state of the matcher. 70 * 71 * <p> A matcher finds matches in a subset of its input called the 72 * <i>region</i>. By default, the region contains all of the matcher's input. 73 * The region can be modified via the {@link #region(int, int) region} method 74 * and queried via the {@link #regionStart() regionStart} and {@link 75 * #regionEnd() regionEnd} methods. The way that the region boundaries interact 76 * with some pattern constructs can be changed. See {@link 77 * #useAnchoringBounds(boolean) useAnchoringBounds} and {@link 78 * #useTransparentBounds(boolean) useTransparentBounds} for more details. 79 * 80 * <p> This class also defines methods for replacing matched subsequences with 81 * new strings whose contents can, if desired, be computed from the match 82 * result. The {@link #appendReplacement appendReplacement} and {@link 83 * #appendTail appendTail} methods can be used in tandem in order to collect 84 * the result into an existing string buffer or string builder. Alternatively, 85 * the more convenient {@link #replaceAll replaceAll} method can be used to 86 * create a string in which every matching subsequence in the input sequence 87 * is replaced. 88 * 89 * <p> The explicit state of a matcher includes the start and end indices of 90 * the most recent successful match. It also includes the start and end 91 * indices of the input subsequence captured by each <a 92 * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total 93 * count of such subsequences. As a convenience, methods are also provided for 94 * returning these captured subsequences in string form. 95 * 96 * <p> The explicit state of a matcher is initially undefined; attempting to 97 * query any part of it before a successful match will cause an {@link 98 * IllegalStateException} to be thrown. The explicit state of a matcher is 99 * recomputed by every match operation. 100 * 101 * <p> The implicit state of a matcher includes the input character sequence as 102 * well as the <i>append position</i>, which is initially zero and is updated 103 * by the {@link #appendReplacement appendReplacement} method. 104 * 105 * <p> A matcher may be reset explicitly by invoking its {@link #reset()} 106 * method or, if a new input sequence is desired, its {@link 107 * #reset(java.lang.CharSequence) reset(CharSequence)} method. Resetting a 108 * matcher discards its explicit state information and sets the append position 109 * to zero. 110 * 111 * <p> Instances of this class are not safe for use by multiple concurrent 112 * threads. </p> 113 * 114 * 115 * @author Mike McCloskey 116 * @author Mark Reinhold 117 * @author JSR-51 Expert Group 118 * @since 1.4 119 */ 120 121 public final class Matcher implements MatchResult { 122 123 /** 124 * The Pattern object that created this Matcher. 125 */ 126 private Pattern parentPattern; 127 128 /** 129 * Holds the offsets for the most recent match. 130 */ 131 int[] groups; 132 133 /** 134 * The range within the sequence that is to be matched (between 0 135 * and text.length()). 136 */ 137 int from, to; 138 139 /** 140 * Holds the input text. 141 */ 142 String text; 143 144 /** 145 * Reflects whether a match has been found during the most recent find 146 * operation. 147 */ 148 private boolean matchFound; 149 150 private MatcherNative nativeMatcher; 151 152 /** 153 * The index of the last position appended in a substitution. 154 */ 155 int appendPos = 0; 156 157 /** 158 * Holds the original CharSequence for use in {@link #reset}. {@link #text} is used during 159 * matching. Note that CharSequence is mutable while String is not, so reset can cause the input 160 * to match to change. 161 */ 162 private CharSequence originalInput; 163 164 /** 165 * If transparentBounds is true then the boundaries of this 166 * matcher's region are transparent to lookahead, lookbehind, 167 * and boundary matching constructs that try to see beyond them. 168 */ 169 boolean transparentBounds = false; 170 171 /** 172 * If anchoringBounds is true then the boundaries of this 173 * matcher's region match anchors such as ^ and $. 174 */ 175 boolean anchoringBounds = true; 176 177 /** 178 * Number of times this matcher's state has been modified 179 */ 180 int modCount; 181 182 // BEGIN Android-removed: Remove unused default constructor. 183 /* 184 * No default constructor. 185 * 186 Matcher() { 187 } 188 */ 189 // END Android-removed: Remove unused default constructor. 190 191 /** 192 * All matchers have the state used by Pattern during a match. 193 */ Matcher(Pattern parent, CharSequence text)194 Matcher(Pattern parent, CharSequence text) { 195 // Android-changed: Use ICU4C as the regex backend. 196 /* 197 this.parentPattern = parent; 198 this.text = text; 199 200 // Allocate state storage 201 int parentGroupCount = Math.max(parent.capturingGroupCount, 10); 202 groups = new int[parentGroupCount * 2]; 203 locals = new int[parent.localCount]; 204 localsPos = new IntHashSet[parent.localTCNCount]; 205 206 // Put fields into initial states 207 reset(); 208 */ 209 usePattern(parent); 210 reset(text); 211 } 212 213 /** 214 * Returns the pattern that is interpreted by this matcher. 215 * 216 * @return The pattern for which this matcher was created 217 */ pattern()218 public Pattern pattern() { 219 return parentPattern; 220 } 221 222 /** 223 * Returns the match state of this matcher as a {@link MatchResult}. 224 * The result is unaffected by subsequent operations performed upon this 225 * matcher. 226 * 227 * @return a {@code MatchResult} with the state of this matcher 228 * @throws IllegalStateException if no match is found. 229 * @since 1.5 230 */ toMatchResult()231 public MatchResult toMatchResult() { 232 // Android-added: Throw IllegalStateException if not matched. 233 ensureMatch(); 234 return toMatchResult(text.toString()); 235 } 236 toMatchResult(String text)237 private MatchResult toMatchResult(String text) { 238 // Android-changed: Replace first and end field usages with our implementation. 239 return new ImmutableMatchResult(matchFound ? start() : -1, // this.first, 240 matchFound ? end() : -1, // this.last, 241 groupCount(), 242 this.groups.clone(), 243 text); 244 } 245 246 private static class ImmutableMatchResult implements MatchResult { 247 private final int first; 248 private final int last; 249 private final int[] groups; 250 private final int groupCount; 251 private final String text; 252 ImmutableMatchResult(int first, int last, int groupCount, int groups[], String text)253 ImmutableMatchResult(int first, int last, int groupCount, 254 int groups[], String text) 255 { 256 this.first = first; 257 this.last = last; 258 this.groupCount = groupCount; 259 this.groups = groups; 260 this.text = text; 261 } 262 263 @Override start()264 public int start() { 265 checkMatch(); 266 return first; 267 } 268 269 @Override start(int group)270 public int start(int group) { 271 checkMatch(); 272 if (group < 0 || group > groupCount) 273 throw new IndexOutOfBoundsException("No group " + group); 274 return groups[group * 2]; 275 } 276 277 @Override end()278 public int end() { 279 checkMatch(); 280 return last; 281 } 282 283 @Override end(int group)284 public int end(int group) { 285 checkMatch(); 286 if (group < 0 || group > groupCount) 287 throw new IndexOutOfBoundsException("No group " + group); 288 return groups[group * 2 + 1]; 289 } 290 291 @Override groupCount()292 public int groupCount() { 293 return groupCount; 294 } 295 296 @Override group()297 public String group() { 298 checkMatch(); 299 return group(0); 300 } 301 302 @Override group(int group)303 public String group(int group) { 304 checkMatch(); 305 if (group < 0 || group > groupCount) 306 throw new IndexOutOfBoundsException("No group " + group); 307 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 308 return null; 309 return text.subSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 310 } 311 checkMatch()312 private void checkMatch() { 313 if (first < 0) 314 throw new IllegalStateException("No match found"); 315 316 } 317 } 318 319 /** 320 * Changes the {@code Pattern} that this {@code Matcher} uses to 321 * find matches with. 322 * 323 * <p> This method causes this matcher to lose information 324 * about the groups of the last match that occurred. The 325 * matcher's position in the input is maintained and its 326 * last append position is unaffected.</p> 327 * 328 * @param newPattern 329 * The new pattern used by this matcher 330 * @return This matcher 331 * @throws IllegalArgumentException 332 * If newPattern is {@code null} 333 * @since 1.5 334 */ usePattern(Pattern newPattern)335 public Matcher usePattern(Pattern newPattern) { 336 if (newPattern == null) 337 throw new IllegalArgumentException("Pattern cannot be null"); 338 339 synchronized (this) { 340 // may throw 341 nativeMatcher = MatcherNative.create(newPattern.nativePattern); 342 } 343 parentPattern = newPattern; 344 345 if (text != null) { 346 resetForInput(); 347 } 348 349 groups = new int[(groupCount() + 1) * 2]; 350 matchFound = false; 351 modCount++; 352 return this; 353 } 354 355 /** 356 * Resets this matcher. 357 * 358 * <p> Resetting a matcher discards all of its explicit state information 359 * and sets its append position to zero. The matcher's region is set to the 360 * default region, which is its entire character sequence. The anchoring 361 * and transparency of this matcher's region boundaries are unaffected. 362 * 363 * @return This matcher 364 */ reset()365 public Matcher reset() { 366 Matcher matcher = reset(originalInput, 0, originalInput.length()); 367 modCount++; 368 return matcher; 369 } 370 371 /** 372 * Resets this matcher with a new input sequence. 373 * 374 * <p> Resetting a matcher discards all of its explicit state information 375 * and sets its append position to zero. The matcher's region is set to 376 * the default region, which is its entire character sequence. The 377 * anchoring and transparency of this matcher's region boundaries are 378 * unaffected. 379 * 380 * @param input 381 * The new input character sequence 382 * 383 * @return This matcher 384 */ reset(CharSequence input)385 public Matcher reset(CharSequence input) { 386 return reset(input, 0, input.length()); 387 } 388 389 /** 390 * Returns the start index of the previous match. 391 * 392 * @return The index of the first character matched 393 * 394 * @throws IllegalStateException 395 * If no match has yet been attempted, 396 * or if the previous match operation failed 397 */ start()398 public int start() { 399 return start(0); 400 } 401 402 /** 403 * Returns the start index of the subsequence captured by the given group 404 * during the previous match operation. 405 * 406 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 407 * to right, starting at one. Group zero denotes the entire pattern, so 408 * the expression <i>m.</i>{@code start(0)} is equivalent to 409 * <i>m.</i>{@code start()}. </p> 410 * 411 * @param group 412 * The index of a capturing group in this matcher's pattern 413 * 414 * @return The index of the first character captured by the group, 415 * or {@code -1} if the match was successful but the group 416 * itself did not match anything 417 * 418 * @throws IllegalStateException 419 * If no match has yet been attempted, 420 * or if the previous match operation failed 421 * 422 * @throws IndexOutOfBoundsException 423 * If there is no capturing group in the pattern 424 * with the given index 425 */ start(int group)426 public int start(int group) { 427 ensureMatch(); 428 if (group < 0 || group > groupCount()) 429 throw new IndexOutOfBoundsException("No group " + group); 430 return groups[group * 2]; 431 } 432 433 /** 434 * Returns the start index of the subsequence captured by the given 435 * <a href="Pattern.html#groupname">named-capturing group</a> during the 436 * previous match operation. 437 * 438 * @param name 439 * The name of a named-capturing group in this matcher's pattern 440 * 441 * @return The index of the first character captured by the group, 442 * or {@code -1} if the match was successful but the group 443 * itself did not match anything 444 * 445 * @throws IllegalStateException 446 * If no match has yet been attempted, 447 * or if the previous match operation failed 448 * 449 * @throws IllegalArgumentException 450 * If there is no capturing group in the pattern 451 * with the given name 452 * @since 1.8 453 */ start(String name)454 public int start(String name) { 455 return groups[getMatchedGroupIndex(name) * 2]; 456 } 457 458 /** 459 * Returns the offset after the last character matched. 460 * 461 * @return The offset after the last character matched 462 * 463 * @throws IllegalStateException 464 * If no match has yet been attempted, 465 * or if the previous match operation failed 466 */ end()467 public int end() { 468 return end(0); 469 } 470 471 /** 472 * Returns the offset after the last character of the subsequence 473 * captured by the given group during the previous match operation. 474 * 475 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 476 * to right, starting at one. Group zero denotes the entire pattern, so 477 * the expression <i>m.</i>{@code end(0)} is equivalent to 478 * <i>m.</i>{@code end()}. </p> 479 * 480 * @param group 481 * The index of a capturing group in this matcher's pattern 482 * 483 * @return The offset after the last character captured by the group, 484 * or {@code -1} if the match was successful 485 * but the group itself did not match anything 486 * 487 * @throws IllegalStateException 488 * If no match has yet been attempted, 489 * or if the previous match operation failed 490 * 491 * @throws IndexOutOfBoundsException 492 * If there is no capturing group in the pattern 493 * with the given index 494 */ end(int group)495 public int end(int group) { 496 ensureMatch(); 497 if (group < 0 || group > groupCount()) 498 throw new IndexOutOfBoundsException("No group " + group); 499 return groups[group * 2 + 1]; 500 } 501 502 /** 503 * Returns the offset after the last character of the subsequence 504 * captured by the given <a href="Pattern.html#groupname">named-capturing 505 * group</a> during the previous match operation. 506 * 507 * @param name 508 * The name of a named-capturing group in this matcher's pattern 509 * 510 * @return The offset after the last character captured by the group, 511 * or {@code -1} if the match was successful 512 * but the group itself did not match anything 513 * 514 * @throws IllegalStateException 515 * If no match has yet been attempted, 516 * or if the previous match operation failed 517 * 518 * @throws IllegalArgumentException 519 * If there is no capturing group in the pattern 520 * with the given name 521 * @since 1.8 522 */ end(String name)523 public int end(String name) { 524 return groups[getMatchedGroupIndex(name) * 2 + 1]; 525 } 526 527 /** 528 * Returns the input subsequence matched by the previous match. 529 * 530 * <p> For a matcher <i>m</i> with input sequence <i>s</i>, 531 * the expressions <i>m.</i>{@code group()} and 532 * <i>s.</i>{@code substring(}<i>m.</i>{@code start(),} <i>m.</i> 533 * {@code end())} are equivalent. </p> 534 * 535 * <p> Note that some patterns, for example {@code a*}, match the empty 536 * string. This method will return the empty string when the pattern 537 * successfully matches the empty string in the input. </p> 538 * 539 * @return The (possibly empty) subsequence matched by the previous match, 540 * in string form 541 * 542 * @throws IllegalStateException 543 * If no match has yet been attempted, 544 * or if the previous match operation failed 545 */ group()546 public String group() { 547 return group(0); 548 } 549 550 /** 551 * Returns the input subsequence captured by the given group during the 552 * previous match operation. 553 * 554 * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index 555 * <i>g</i>, the expressions <i>m.</i>{@code group(}<i>g</i>{@code )} and 556 * <i>s.</i>{@code substring(}<i>m.</i>{@code start(}<i>g</i>{@code 557 * ),} <i>m.</i>{@code end(}<i>g</i>{@code ))} 558 * are equivalent. </p> 559 * 560 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 561 * to right, starting at one. Group zero denotes the entire pattern, so 562 * the expression {@code m.group(0)} is equivalent to {@code m.group()}. 563 * </p> 564 * 565 * <p> If the match was successful but the group specified failed to match 566 * any part of the input sequence, then {@code null} is returned. Note 567 * that some groups, for example {@code (a*)}, match the empty string. 568 * This method will return the empty string when such a group successfully 569 * matches the empty string in the input. </p> 570 * 571 * @param group 572 * The index of a capturing group in this matcher's pattern 573 * 574 * @return The (possibly empty) subsequence captured by the group 575 * during the previous match, or {@code null} if the group 576 * failed to match part of the input 577 * 578 * @throws IllegalStateException 579 * If no match has yet been attempted, 580 * or if the previous match operation failed 581 * 582 * @throws IndexOutOfBoundsException 583 * If there is no capturing group in the pattern 584 * with the given index 585 */ group(int group)586 public String group(int group) { 587 ensureMatch(); 588 if (group < 0 || group > groupCount()) 589 throw new IndexOutOfBoundsException("No group " + group); 590 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 591 return null; 592 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 593 } 594 595 /** 596 * Returns the input subsequence captured by the given 597 * <a href="Pattern.html#groupname">named-capturing group</a> during the 598 * previous match operation. 599 * 600 * <p> If the match was successful but the group specified failed to match 601 * any part of the input sequence, then {@code null} is returned. Note 602 * that some groups, for example {@code (a*)}, match the empty string. 603 * This method will return the empty string when such a group successfully 604 * matches the empty string in the input. </p> 605 * 606 * @param name 607 * The name of a named-capturing group in this matcher's pattern 608 * 609 * @return The (possibly empty) subsequence captured by the named group 610 * during the previous match, or {@code null} if the group 611 * failed to match part of the input 612 * 613 * @throws IllegalStateException 614 * If no match has yet been attempted, 615 * or if the previous match operation failed 616 * 617 * @throws IllegalArgumentException 618 * If there is no capturing group in the pattern 619 * with the given name 620 * @since 1.7 621 */ group(String name)622 public String group(String name) { 623 int group = getMatchedGroupIndex(name); 624 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 625 return null; 626 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 627 } 628 629 /** 630 * Returns the number of capturing groups in this matcher's pattern. 631 * 632 * <p> Group zero denotes the entire pattern by convention. It is not 633 * included in this count. 634 * 635 * <p> Any non-negative integer smaller than or equal to the value 636 * returned by this method is guaranteed to be a valid group index for 637 * this matcher. </p> 638 * 639 * @return The number of capturing groups in this matcher's pattern 640 */ groupCount()641 public int groupCount() { 642 synchronized (this) { 643 return nativeMatcher.groupCount(); 644 } 645 } 646 647 /** 648 * Attempts to match the entire region against the pattern. 649 * 650 * <p> If the match succeeds then more information can be obtained via the 651 * {@code start}, {@code end}, and {@code group} methods. </p> 652 * 653 * @return {@code true} if, and only if, the entire region sequence 654 * matches this matcher's pattern 655 */ matches()656 public boolean matches() { 657 synchronized (this) { 658 matchFound = nativeMatcher.matches(groups); 659 } 660 modCount++; 661 return matchFound; 662 } 663 664 /** 665 * Attempts to find the next subsequence of the input sequence that matches 666 * the pattern. 667 * 668 * <p> This method starts at the beginning of this matcher's region, or, if 669 * a previous invocation of the method was successful and the matcher has 670 * not since been reset, at the first character not matched by the previous 671 * match. 672 * 673 * <p> If the match succeeds then more information can be obtained via the 674 * {@code start}, {@code end}, and {@code group} methods. </p> 675 * 676 * @return {@code true} if, and only if, a subsequence of the input 677 * sequence matches this matcher's pattern 678 */ find()679 public boolean find() { 680 synchronized (this) { 681 matchFound = nativeMatcher.findNext(groups); 682 } 683 modCount++; 684 return matchFound; 685 } 686 687 /** 688 * Resets this matcher and then attempts to find the next subsequence of 689 * the input sequence that matches the pattern, starting at the specified 690 * index. 691 * 692 * <p> If the match succeeds then more information can be obtained via the 693 * {@code start}, {@code end}, and {@code group} methods, and subsequent 694 * invocations of the {@link #find()} method will start at the first 695 * character not matched by this match. </p> 696 * 697 * @param start the index to start searching for a match 698 * @throws IndexOutOfBoundsException 699 * If start is less than zero or if start is greater than the 700 * length of the input sequence. 701 * 702 * @return {@code true} if, and only if, a subsequence of the input 703 * sequence starting at the given index matches this matcher's 704 * pattern 705 */ find(int start)706 public boolean find(int start) { 707 int limit = getTextLength(); 708 if ((start < 0) || (start > limit)) 709 throw new IndexOutOfBoundsException("Illegal start index"); 710 reset(); 711 synchronized (this) { 712 matchFound = nativeMatcher.find(start, groups); 713 } 714 modCount++; 715 return matchFound; 716 } 717 718 /** 719 * Attempts to match the input sequence, starting at the beginning of the 720 * region, against the pattern. 721 * 722 * <p> Like the {@link #matches matches} method, this method always starts 723 * at the beginning of the region; unlike that method, it does not 724 * require that the entire region be matched. 725 * 726 * <p> If the match succeeds then more information can be obtained via the 727 * {@code start}, {@code end}, and {@code group} methods. </p> 728 * 729 * @return {@code true} if, and only if, a prefix of the input 730 * sequence matches this matcher's pattern 731 */ lookingAt()732 public boolean lookingAt() { 733 synchronized (this) { 734 matchFound = nativeMatcher.lookingAt(groups); 735 } 736 modCount++; 737 return matchFound; 738 } 739 740 /** 741 * Returns a literal replacement {@code String} for the specified 742 * {@code String}. 743 * 744 * This method produces a {@code String} that will work 745 * as a literal replacement {@code s} in the 746 * {@code appendReplacement} method of the {@link Matcher} class. 747 * The {@code String} produced will match the sequence of characters 748 * in {@code s} treated as a literal sequence. Slashes ('\') and 749 * dollar signs ('$') will be given no special meaning. 750 * 751 * @param s The string to be literalized 752 * @return A literal string replacement 753 * @since 1.5 754 */ quoteReplacement(String s)755 public static String quoteReplacement(String s) { 756 if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1)) 757 return s; 758 StringBuilder sb = new StringBuilder(); 759 for (int i=0; i<s.length(); i++) { 760 char c = s.charAt(i); 761 if (c == '\\' || c == '$') { 762 sb.append('\\'); 763 } 764 sb.append(c); 765 } 766 return sb.toString(); 767 } 768 769 /** 770 * Implements a non-terminal append-and-replace step. 771 * 772 * <p> This method performs the following actions: </p> 773 * 774 * <ol> 775 * 776 * <li><p> It reads characters from the input sequence, starting at the 777 * append position, and appends them to the given string buffer. It 778 * stops after reading the last character preceding the previous match, 779 * that is, the character at index {@link 780 * #start()} {@code -} {@code 1}. </p></li> 781 * 782 * <li><p> It appends the given replacement string to the string buffer. 783 * </p></li> 784 * 785 * <li><p> It sets the append position of this matcher to the index of 786 * the last character matched, plus one, that is, to {@link #end()}. 787 * </p></li> 788 * 789 * </ol> 790 * 791 * <p> The replacement string may contain references to subsequences 792 * captured during the previous match: Each occurrence of 793 * <code>${</code><i>name</i><code>}</code> or {@code $}<i>g</i> 794 * will be replaced by the result of evaluating the corresponding 795 * {@link #group(String) group(name)} or {@link #group(int) group(g)} 796 * respectively. For {@code $}<i>g</i>, 797 * the first number after the {@code $} is always treated as part of 798 * the group reference. Subsequent numbers are incorporated into g if 799 * they would form a legal group reference. Only the numerals '0' 800 * through '9' are considered as potential components of the group 801 * reference. If the second group matched the string {@code "foo"}, for 802 * example, then passing the replacement string {@code "$2bar"} would 803 * cause {@code "foobar"} to be appended to the string buffer. A dollar 804 * sign ({@code $}) may be included as a literal in the replacement 805 * string by preceding it with a backslash ({@code \$}). 806 * 807 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in 808 * the replacement string may cause the results to be different than if it 809 * were being treated as a literal replacement string. Dollar signs may be 810 * treated as references to captured subsequences as described above, and 811 * backslashes are used to escape literal characters in the replacement 812 * string. 813 * 814 * <p> This method is intended to be used in a loop together with the 815 * {@link #appendTail(StringBuffer) appendTail} and {@link #find() find} 816 * methods. The following code, for example, writes {@code one dog two dogs 817 * in the yard} to the standard-output stream: </p> 818 * 819 * <blockquote><pre> 820 * Pattern p = Pattern.compile("cat"); 821 * Matcher m = p.matcher("one cat two cats in the yard"); 822 * StringBuffer sb = new StringBuffer(); 823 * while (m.find()) { 824 * m.appendReplacement(sb, "dog"); 825 * } 826 * m.appendTail(sb); 827 * System.out.println(sb.toString());</pre></blockquote> 828 * 829 * @param sb 830 * The target string buffer 831 * 832 * @param replacement 833 * The replacement string 834 * 835 * @return This matcher 836 * 837 * @throws IllegalStateException 838 * If no match has yet been attempted, 839 * or if the previous match operation failed 840 * 841 * @throws IllegalArgumentException 842 * If the replacement string refers to a named-capturing 843 * group that does not exist in the pattern 844 * 845 * @throws IndexOutOfBoundsException 846 * If the replacement string refers to a capturing group 847 * that does not exist in the pattern 848 */ appendReplacement(StringBuffer sb, String replacement)849 public Matcher appendReplacement(StringBuffer sb, String replacement) { 850 // TODO: Throw IllegalStateException after an SDK level check. 851 // Android-removed: Don't throw IllegalStateException due to app compat 852 // If no match, return error 853 // if (first < 0) 854 // throw new IllegalStateException("No match available"); 855 StringBuilder result = new StringBuilder(); 856 // Android-changed: Use Android's appendEvaluated due to app compat. 857 // appendExpandedReplacement(replacement, result); 858 appendReplacementInternal(result, replacement); 859 // Append the intervening text 860 // Android-changed: Android has no lastAppendPosition. 861 // sb.append(text, lastAppendPosition, first); 862 sb.append(text, appendPos, start()); 863 // Append the match substitution 864 sb.append(result); 865 // Android-changed: Android has no lastAppendPosition. 866 // lastAppendPosition = last; 867 appendPos = end(); 868 modCount++; 869 return this; 870 } 871 872 // BEGIN Android-added: Backward-compatible codes for appendReplacement(). 873 /** 874 * Since Android 14, {@link Matcher} becomes stricter for the replacement syntax and 875 * group references used by its methods, e.g. {@link #appendReplacement(StringBuffer, String)}. 876 * 877 * This flag is enabled for apps targeting Android 14+. 878 * 879 * @hide 880 */ 881 @ChangeId 882 @EnabledSince(targetSdkVersion = VersionCodes.UPSIDE_DOWN_CAKE) 883 public static final long DISALLOW_INVALID_GROUP_REFERENCE = 247079863L; 884 appendReplacementInternal(StringBuilder sb, String replacement)885 private void appendReplacementInternal(StringBuilder sb, String replacement) { 886 if (VMRuntime.getSdkVersion() >= VersionCodes.UPSIDE_DOWN_CAKE 887 && Compatibility.isChangeEnabled(DISALLOW_INVALID_GROUP_REFERENCE)) { 888 appendExpandedReplacement(replacement, sb); 889 } else { 890 appendEvaluated(sb, replacement); 891 } 892 } 893 894 /** 895 * Internal helper method to append a given string to a given string buffer. 896 * If the string contains any references to groups, these are replaced by 897 * the corresponding group's contents. 898 * 899 * @param buffer the string builder. 900 * @param s the string to append. 901 * 902 * @hide 903 */ appendEvaluated(StringBuilder buffer, String s)904 public void appendEvaluated(StringBuilder buffer, String s) { 905 boolean escape = false; 906 boolean dollar = false; 907 boolean escapeNamedGroup = false; 908 int escapeNamedGroupStart = -1; 909 910 for (int i = 0; i < s.length(); i++) { 911 char c = s.charAt(i); 912 if (c == '\\' && !escape) { 913 escape = true; 914 } else if (c == '$' && !escape) { 915 dollar = true; 916 } else if (c >= '0' && c <= '9' && dollar && !escapeNamedGroup) { 917 String groupValue = group(c - '0'); 918 if (groupValue != null) { 919 buffer.append(groupValue); 920 } 921 dollar = false; 922 } else if (c == '{' && dollar) { 923 escapeNamedGroup = true; 924 escapeNamedGroupStart = i; 925 } else if (c == '}' && dollar && escapeNamedGroup) { 926 String groupValue = group(s.substring(escapeNamedGroupStart + 1, i)); 927 if (groupValue != null) { 928 buffer.append(groupValue); 929 } 930 dollar = false; 931 escapeNamedGroup = false; 932 } else if (c != '}' && dollar && escapeNamedGroup) { 933 continue; 934 } else { 935 buffer.append(c); 936 dollar = false; 937 escape = false; 938 escapeNamedGroup = false; 939 } 940 } 941 942 if (escape) { 943 throw new IllegalArgumentException("character to be escaped is missing"); 944 } 945 946 if (dollar) { 947 throw new IllegalArgumentException("Illegal group reference: group index is missing"); 948 } 949 950 if (escapeNamedGroup) { 951 throw new IllegalArgumentException("Missing ending brace '}' from replacement string"); 952 } 953 } 954 // END Android-added: Backward-compatible codes for appendReplacement(). 955 956 /** 957 * Implements a non-terminal append-and-replace step. 958 * 959 * <p> This method performs the following actions: </p> 960 * 961 * <ol> 962 * 963 * <li><p> It reads characters from the input sequence, starting at the 964 * append position, and appends them to the given string builder. It 965 * stops after reading the last character preceding the previous match, 966 * that is, the character at index {@link 967 * #start()} {@code -} {@code 1}. </p></li> 968 * 969 * <li><p> It appends the given replacement string to the string builder. 970 * </p></li> 971 * 972 * <li><p> It sets the append position of this matcher to the index of 973 * the last character matched, plus one, that is, to {@link #end()}. 974 * </p></li> 975 * 976 * </ol> 977 * 978 * <p> The replacement string may contain references to subsequences 979 * captured during the previous match: Each occurrence of 980 * {@code $}<i>g</i> will be replaced by the result of 981 * evaluating {@link #group(int) group}{@code (}<i>g</i>{@code )}. 982 * The first number after the {@code $} is always treated as part of 983 * the group reference. Subsequent numbers are incorporated into g if 984 * they would form a legal group reference. Only the numerals '0' 985 * through '9' are considered as potential components of the group 986 * reference. If the second group matched the string {@code "foo"}, for 987 * example, then passing the replacement string {@code "$2bar"} would 988 * cause {@code "foobar"} to be appended to the string builder. A dollar 989 * sign ({@code $}) may be included as a literal in the replacement 990 * string by preceding it with a backslash ({@code \$}). 991 * 992 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in 993 * the replacement string may cause the results to be different than if it 994 * were being treated as a literal replacement string. Dollar signs may be 995 * treated as references to captured subsequences as described above, and 996 * backslashes are used to escape literal characters in the replacement 997 * string. 998 * 999 * <p> This method is intended to be used in a loop together with the 1000 * {@link #appendTail(StringBuilder) appendTail} and 1001 * {@link #find() find} methods. The following code, for example, writes 1002 * {@code one dog two dogs in the yard} to the standard-output stream: </p> 1003 * 1004 * <blockquote><pre> 1005 * Pattern p = Pattern.compile("cat"); 1006 * Matcher m = p.matcher("one cat two cats in the yard"); 1007 * StringBuilder sb = new StringBuilder(); 1008 * while (m.find()) { 1009 * m.appendReplacement(sb, "dog"); 1010 * } 1011 * m.appendTail(sb); 1012 * System.out.println(sb.toString());</pre></blockquote> 1013 * 1014 * @param sb 1015 * The target string builder 1016 * @param replacement 1017 * The replacement string 1018 * @return This matcher 1019 * 1020 * @throws IllegalStateException 1021 * If no match has yet been attempted, 1022 * or if the previous match operation failed 1023 * @throws IllegalArgumentException 1024 * If the replacement string refers to a named-capturing 1025 * group that does not exist in the pattern 1026 * @throws IndexOutOfBoundsException 1027 * If the replacement string refers to a capturing group 1028 * that does not exist in the pattern 1029 * @since 9 1030 */ appendReplacement(StringBuilder sb, String replacement)1031 public Matcher appendReplacement(StringBuilder sb, String replacement) { 1032 // If no match, return error 1033 // Android-changed: Android has no first field. 1034 // if (first < 0) 1035 // throw new IllegalStateException("No match available"); 1036 ensureMatch(); 1037 StringBuilder result = new StringBuilder(); 1038 // Android-changed: Use Android's appendEvaluated due to app compat. 1039 // appendExpandedReplacement(replacement, result); 1040 appendReplacementInternal(result, replacement); 1041 // Append the intervening text 1042 // Android-changed: Android has no lastAppendPosition. 1043 // sb.append(text, lastAppendPosition, first); 1044 sb.append(text, appendPos, start()); 1045 // Append the match substitution 1046 sb.append(result); 1047 // Android-changed: Android has no lastAppendPosition. 1048 // lastAppendPosition = last; 1049 appendPos = end(); 1050 modCount++; 1051 return this; 1052 } 1053 1054 // Android-changed: Make public for testing. 1055 /** 1056 * Processes replacement string to replace group references with 1057 * groups. 1058 * 1059 * @hide 1060 */ appendExpandedReplacement( String replacement, StringBuilder result)1061 public StringBuilder appendExpandedReplacement( 1062 String replacement, StringBuilder result) { 1063 int cursor = 0; 1064 while (cursor < replacement.length()) { 1065 char nextChar = replacement.charAt(cursor); 1066 if (nextChar == '\\') { 1067 cursor++; 1068 if (cursor == replacement.length()) 1069 throw new IllegalArgumentException( 1070 "character to be escaped is missing"); 1071 nextChar = replacement.charAt(cursor); 1072 result.append(nextChar); 1073 cursor++; 1074 } else if (nextChar == '$') { 1075 // Skip past $ 1076 cursor++; 1077 // Throw IAE if this "$" is the last character in replacement 1078 if (cursor == replacement.length()) 1079 throw new IllegalArgumentException( 1080 "Illegal group reference: group index is missing"); 1081 nextChar = replacement.charAt(cursor); 1082 int refNum = -1; 1083 if (nextChar == '{') { 1084 cursor++; 1085 StringBuilder gsb = new StringBuilder(); 1086 while (cursor < replacement.length()) { 1087 nextChar = replacement.charAt(cursor); 1088 if (ASCII.isLower(nextChar) || 1089 ASCII.isUpper(nextChar) || 1090 ASCII.isDigit(nextChar)) { 1091 gsb.append(nextChar); 1092 cursor++; 1093 } else { 1094 break; 1095 } 1096 } 1097 if (gsb.length() == 0) 1098 throw new IllegalArgumentException( 1099 "named capturing group has 0 length name"); 1100 if (nextChar != '}') 1101 throw new IllegalArgumentException( 1102 "named capturing group is missing trailing '}'"); 1103 String gname = gsb.toString(); 1104 if (ASCII.isDigit(gname.charAt(0))) 1105 throw new IllegalArgumentException( 1106 "capturing group name {" + gname + 1107 "} starts with digit character"); 1108 // Android-changed: Use ICU4C as the regex backend. 1109 // if (!parentPattern.namedGroups().containsKey(gname)) 1110 int groupIndex = nativeMatcher.getMatchedGroupIndex(gname); 1111 if (groupIndex < 0) 1112 throw new IllegalArgumentException( 1113 "No group with name {" + gname + "}"); 1114 refNum = groupIndex; 1115 cursor++; 1116 } else { 1117 // The first number is always a group 1118 refNum = nextChar - '0'; 1119 if ((refNum < 0) || (refNum > 9)) 1120 throw new IllegalArgumentException( 1121 "Illegal group reference"); 1122 cursor++; 1123 // Capture the largest legal group string 1124 boolean done = false; 1125 while (!done) { 1126 if (cursor >= replacement.length()) { 1127 break; 1128 } 1129 int nextDigit = replacement.charAt(cursor) - '0'; 1130 if ((nextDigit < 0) || (nextDigit > 9)) { // not a number 1131 break; 1132 } 1133 int newRefNum = (refNum * 10) + nextDigit; 1134 if (groupCount() < newRefNum) { 1135 done = true; 1136 } else { 1137 refNum = newRefNum; 1138 cursor++; 1139 } 1140 } 1141 } 1142 // Append group 1143 if (start(refNum) != -1 && end(refNum) != -1) 1144 result.append(text, start(refNum), end(refNum)); 1145 } else { 1146 result.append(nextChar); 1147 cursor++; 1148 } 1149 } 1150 return result; 1151 } 1152 1153 /** 1154 * Implements a terminal append-and-replace step. 1155 * 1156 * <p> This method reads characters from the input sequence, starting at 1157 * the append position, and appends them to the given string buffer. It is 1158 * intended to be invoked after one or more invocations of the {@link 1159 * #appendReplacement(StringBuffer, String) appendReplacement} method in 1160 * order to copy the remainder of the input sequence. </p> 1161 * 1162 * @param sb 1163 * The target string buffer 1164 * 1165 * @return The target string buffer 1166 */ appendTail(StringBuffer sb)1167 public StringBuffer appendTail(StringBuffer sb) { 1168 // Android-changed: Android has no lastAppendPosition. 1169 // sb.append(text, lastAppendPosition, getTextLength()); 1170 if (appendPos < to) { 1171 sb.append(text.substring(appendPos, to)); 1172 } 1173 return sb; 1174 } 1175 1176 /** 1177 * Implements a terminal append-and-replace step. 1178 * 1179 * <p> This method reads characters from the input sequence, starting at 1180 * the append position, and appends them to the given string builder. It is 1181 * intended to be invoked after one or more invocations of the {@link 1182 * #appendReplacement(StringBuilder, String) 1183 * appendReplacement} method in order to copy the remainder of the input 1184 * sequence. </p> 1185 * 1186 * @param sb 1187 * The target string builder 1188 * 1189 * @return The target string builder 1190 * 1191 * @since 9 1192 */ appendTail(StringBuilder sb)1193 public StringBuilder appendTail(StringBuilder sb) { 1194 // Android-changed: Android has no lastAppendPosition. 1195 // sb.append(text, lastAppendPosition, getTextLength()); 1196 if (appendPos < to) { 1197 sb.append(text.substring(appendPos, to)); 1198 } 1199 return sb; 1200 } 1201 1202 /** 1203 * Replaces every subsequence of the input sequence that matches the 1204 * pattern with the given replacement string. 1205 * 1206 * <p> This method first resets this matcher. It then scans the input 1207 * sequence looking for matches of the pattern. Characters that are not 1208 * part of any match are appended directly to the result string; each match 1209 * is replaced in the result by the replacement string. The replacement 1210 * string may contain references to captured subsequences as in the {@link 1211 * #appendReplacement appendReplacement} method. 1212 * 1213 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in 1214 * the replacement string may cause the results to be different than if it 1215 * were being treated as a literal replacement string. Dollar signs may be 1216 * treated as references to captured subsequences as described above, and 1217 * backslashes are used to escape literal characters in the replacement 1218 * string. 1219 * 1220 * <p> Given the regular expression {@code a*b}, the input 1221 * {@code "aabfooaabfooabfoob"}, and the replacement string 1222 * {@code "-"}, an invocation of this method on a matcher for that 1223 * expression would yield the string {@code "-foo-foo-foo-"}. 1224 * 1225 * <p> Invoking this method changes this matcher's state. If the matcher 1226 * is to be used in further matching operations then it should first be 1227 * reset. </p> 1228 * 1229 * @param replacement 1230 * The replacement string 1231 * 1232 * @return The string constructed by replacing each matching subsequence 1233 * by the replacement string, substituting captured subsequences 1234 * as needed 1235 */ replaceAll(String replacement)1236 public String replaceAll(String replacement) { 1237 reset(); 1238 boolean result = find(); 1239 if (result) { 1240 StringBuilder sb = new StringBuilder(); 1241 do { 1242 appendReplacement(sb, replacement); 1243 result = find(); 1244 } while (result); 1245 appendTail(sb); 1246 return sb.toString(); 1247 } 1248 return text.toString(); 1249 } 1250 1251 /** 1252 * Replaces every subsequence of the input sequence that matches the 1253 * pattern with the result of applying the given replacer function to the 1254 * match result of this matcher corresponding to that subsequence. 1255 * Exceptions thrown by the function are relayed to the caller. 1256 * 1257 * <p> This method first resets this matcher. It then scans the input 1258 * sequence looking for matches of the pattern. Characters that are not 1259 * part of any match are appended directly to the result string; each match 1260 * is replaced in the result by the applying the replacer function that 1261 * returns a replacement string. Each replacement string may contain 1262 * references to captured subsequences as in the {@link #appendReplacement 1263 * appendReplacement} method. 1264 * 1265 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in 1266 * a replacement string may cause the results to be different than if it 1267 * were being treated as a literal replacement string. Dollar signs may be 1268 * treated as references to captured subsequences as described above, and 1269 * backslashes are used to escape literal characters in the replacement 1270 * string. 1271 * 1272 * <p> Given the regular expression {@code dog}, the input 1273 * {@code "zzzdogzzzdogzzz"}, and the function 1274 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on 1275 * a matcher for that expression would yield the string 1276 * {@code "zzzDOGzzzDOGzzz"}. 1277 * 1278 * <p> Invoking this method changes this matcher's state. If the matcher 1279 * is to be used in further matching operations then it should first be 1280 * reset. </p> 1281 * 1282 * <p> The replacer function should not modify this matcher's state during 1283 * replacement. This method will, on a best-effort basis, throw a 1284 * {@link java.util.ConcurrentModificationException} if such modification is 1285 * detected. 1286 * 1287 * <p> The state of each match result passed to the replacer function is 1288 * guaranteed to be constant only for the duration of the replacer function 1289 * call and only if the replacer function does not modify this matcher's 1290 * state. 1291 * 1292 * @implNote 1293 * This implementation applies the replacer function to this matcher, which 1294 * is an instance of {@code MatchResult}. 1295 * 1296 * @param replacer 1297 * The function to be applied to the match result of this matcher 1298 * that returns a replacement string. 1299 * @return The string constructed by replacing each matching subsequence 1300 * with the result of applying the replacer function to that 1301 * matched subsequence, substituting captured subsequences as 1302 * needed. 1303 * @throws NullPointerException if the replacer function is null 1304 * @throws ConcurrentModificationException if it is detected, on a 1305 * best-effort basis, that the replacer function modified this 1306 * matcher's state 1307 * @since 9 1308 */ replaceAll(Function<MatchResult, String> replacer)1309 public String replaceAll(Function<MatchResult, String> replacer) { 1310 Objects.requireNonNull(replacer); 1311 reset(); 1312 boolean result = find(); 1313 if (result) { 1314 StringBuilder sb = new StringBuilder(); 1315 do { 1316 int ec = modCount; 1317 String replacement = replacer.apply(this); 1318 if (ec != modCount) 1319 throw new ConcurrentModificationException(); 1320 appendReplacement(sb, replacement); 1321 result = find(); 1322 } while (result); 1323 appendTail(sb); 1324 return sb.toString(); 1325 } 1326 return text.toString(); 1327 } 1328 1329 /** 1330 * Returns a stream of match results for each subsequence of the input 1331 * sequence that matches the pattern. The match results occur in the 1332 * same order as the matching subsequences in the input sequence. 1333 * 1334 * <p> Each match result is produced as if by {@link #toMatchResult()}. 1335 * 1336 * <p> This method does not reset this matcher. Matching starts on 1337 * initiation of the terminal stream operation either at the beginning of 1338 * this matcher's region, or, if the matcher has not since been reset, at 1339 * the first character not matched by a previous match. 1340 * 1341 * <p> If the matcher is to be used for further matching operations after 1342 * the terminal stream operation completes then it should be first reset. 1343 * 1344 * <p> This matcher's state should not be modified during execution of the 1345 * returned stream's pipeline. The returned stream's source 1346 * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort 1347 * basis, throw a {@link java.util.ConcurrentModificationException} if such 1348 * modification is detected. 1349 * 1350 * @return a sequential stream of match results. 1351 * @since 9 1352 */ results()1353 public Stream<MatchResult> results() { 1354 class MatchResultIterator implements Iterator<MatchResult> { 1355 // -ve for call to find, 0 for not found, 1 for found 1356 int state = -1; 1357 // State for concurrent modification checking 1358 // -1 for uninitialized 1359 int expectedCount = -1; 1360 // The input sequence as a string, set once only after first find 1361 // Avoids repeated conversion from CharSequence for each match 1362 String textAsString; 1363 1364 @Override 1365 public MatchResult next() { 1366 if (expectedCount >= 0 && expectedCount != modCount) 1367 throw new ConcurrentModificationException(); 1368 1369 if (!hasNext()) 1370 throw new NoSuchElementException(); 1371 1372 state = -1; 1373 return toMatchResult(textAsString); 1374 } 1375 1376 @Override 1377 public boolean hasNext() { 1378 if (state >= 0) 1379 return state == 1; 1380 1381 // Defer throwing ConcurrentModificationException to when next 1382 // or forEachRemaining is called. The is consistent with other 1383 // fail-fast implementations. 1384 if (expectedCount >= 0 && expectedCount != modCount) 1385 return true; 1386 1387 boolean found = find(); 1388 // Capture the input sequence as a string on first find 1389 if (found && state < 0) 1390 textAsString = text.toString(); 1391 state = found ? 1 : 0; 1392 expectedCount = modCount; 1393 return found; 1394 } 1395 1396 @Override 1397 public void forEachRemaining(Consumer<? super MatchResult> action) { 1398 if (expectedCount >= 0 && expectedCount != modCount) 1399 throw new ConcurrentModificationException(); 1400 1401 int s = state; 1402 if (s == 0) 1403 return; 1404 1405 // Set state to report no more elements on further operations 1406 state = 0; 1407 expectedCount = -1; 1408 1409 // Perform a first find if required 1410 if (s < 0 && !find()) 1411 return; 1412 1413 // Capture the input sequence as a string on first find 1414 textAsString = text.toString(); 1415 1416 do { 1417 int ec = modCount; 1418 action.accept(toMatchResult(textAsString)); 1419 if (ec != modCount) 1420 throw new ConcurrentModificationException(); 1421 } while (find()); 1422 } 1423 } 1424 return StreamSupport.stream(Spliterators.spliteratorUnknownSize( 1425 new MatchResultIterator(), Spliterator.ORDERED | Spliterator.NONNULL), false); 1426 } 1427 1428 /** 1429 * Replaces the first subsequence of the input sequence that matches the 1430 * pattern with the given replacement string. 1431 * 1432 * <p> This method first resets this matcher. It then scans the input 1433 * sequence looking for a match of the pattern. Characters that are not 1434 * part of the match are appended directly to the result string; the match 1435 * is replaced in the result by the replacement string. The replacement 1436 * string may contain references to captured subsequences as in the {@link 1437 * #appendReplacement appendReplacement} method. 1438 * 1439 * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in 1440 * the replacement string may cause the results to be different than if it 1441 * were being treated as a literal replacement string. Dollar signs may be 1442 * treated as references to captured subsequences as described above, and 1443 * backslashes are used to escape literal characters in the replacement 1444 * string. 1445 * 1446 * <p> Given the regular expression {@code dog}, the input 1447 * {@code "zzzdogzzzdogzzz"}, and the replacement string 1448 * {@code "cat"}, an invocation of this method on a matcher for that 1449 * expression would yield the string {@code "zzzcatzzzdogzzz"}. </p> 1450 * 1451 * <p> Invoking this method changes this matcher's state. If the matcher 1452 * is to be used in further matching operations then it should first be 1453 * reset. </p> 1454 * 1455 * @param replacement 1456 * The replacement string 1457 * @return The string constructed by replacing the first matching 1458 * subsequence by the replacement string, substituting captured 1459 * subsequences as needed 1460 */ replaceFirst(String replacement)1461 public String replaceFirst(String replacement) { 1462 if (replacement == null) 1463 throw new NullPointerException("replacement"); 1464 reset(); 1465 if (!find()) 1466 return text.toString(); 1467 StringBuilder sb = new StringBuilder(); 1468 appendReplacement(sb, replacement); 1469 appendTail(sb); 1470 return sb.toString(); 1471 } 1472 1473 /** 1474 * Replaces the first subsequence of the input sequence that matches the 1475 * pattern with the result of applying the given replacer function to the 1476 * match result of this matcher corresponding to that subsequence. 1477 * Exceptions thrown by the replace function are relayed to the caller. 1478 * 1479 * <p> This method first resets this matcher. It then scans the input 1480 * sequence looking for a match of the pattern. Characters that are not 1481 * part of the match are appended directly to the result string; the match 1482 * is replaced in the result by the applying the replacer function that 1483 * returns a replacement string. The replacement string may contain 1484 * references to captured subsequences as in the {@link #appendReplacement 1485 * appendReplacement} method. 1486 * 1487 * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in 1488 * the replacement string may cause the results to be different than if it 1489 * were being treated as a literal replacement string. Dollar signs may be 1490 * treated as references to captured subsequences as described above, and 1491 * backslashes are used to escape literal characters in the replacement 1492 * string. 1493 * 1494 * <p> Given the regular expression {@code dog}, the input 1495 * {@code "zzzdogzzzdogzzz"}, and the function 1496 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on 1497 * a matcher for that expression would yield the string 1498 * {@code "zzzDOGzzzdogzzz"}. 1499 * 1500 * <p> Invoking this method changes this matcher's state. If the matcher 1501 * is to be used in further matching operations then it should first be 1502 * reset. 1503 * 1504 * <p> The replacer function should not modify this matcher's state during 1505 * replacement. This method will, on a best-effort basis, throw a 1506 * {@link java.util.ConcurrentModificationException} if such modification is 1507 * detected. 1508 * 1509 * <p> The state of the match result passed to the replacer function is 1510 * guaranteed to be constant only for the duration of the replacer function 1511 * call and only if the replacer function does not modify this matcher's 1512 * state. 1513 * 1514 * @implNote 1515 * This implementation applies the replacer function to this matcher, which 1516 * is an instance of {@code MatchResult}. 1517 * 1518 * @param replacer 1519 * The function to be applied to the match result of this matcher 1520 * that returns a replacement string. 1521 * @return The string constructed by replacing the first matching 1522 * subsequence with the result of applying the replacer function to 1523 * the matched subsequence, substituting captured subsequences as 1524 * needed. 1525 * @throws NullPointerException if the replacer function is null 1526 * @throws ConcurrentModificationException if it is detected, on a 1527 * best-effort basis, that the replacer function modified this 1528 * matcher's state 1529 * @since 9 1530 */ replaceFirst(Function<MatchResult, String> replacer)1531 public String replaceFirst(Function<MatchResult, String> replacer) { 1532 Objects.requireNonNull(replacer); 1533 reset(); 1534 if (!find()) 1535 return text.toString(); 1536 StringBuilder sb = new StringBuilder(); 1537 int ec = modCount; 1538 String replacement = replacer.apply(this); 1539 if (ec != modCount) 1540 throw new ConcurrentModificationException(); 1541 appendReplacement(sb, replacement); 1542 appendTail(sb); 1543 return sb.toString(); 1544 } 1545 1546 /** 1547 * Sets the limits of this matcher's region. The region is the part of the 1548 * input sequence that will be searched to find a match. Invoking this 1549 * method resets the matcher, and then sets the region to start at the 1550 * index specified by the {@code start} parameter and end at the 1551 * index specified by the {@code end} parameter. 1552 * 1553 * <p>Depending on the transparency and anchoring being used (see 1554 * {@link #useTransparentBounds(boolean) useTransparentBounds} and 1555 * {@link #useAnchoringBounds(boolean) useAnchoringBounds}), certain 1556 * constructs such as anchors may behave differently at or around the 1557 * boundaries of the region. 1558 * 1559 * @param start 1560 * The index to start searching at (inclusive) 1561 * @param end 1562 * The index to end searching at (exclusive) 1563 * @throws IndexOutOfBoundsException 1564 * If start or end is less than zero, if 1565 * start is greater than the length of the input sequence, if 1566 * end is greater than the length of the input sequence, or if 1567 * start is greater than end. 1568 * @return this matcher 1569 * @since 1.5 1570 */ region(int start, int end)1571 public Matcher region(int start, int end) { 1572 return reset(originalInput, start, end); 1573 } 1574 1575 /** 1576 * Reports the start index of this matcher's region. The 1577 * searches this matcher conducts are limited to finding matches 1578 * within {@link #regionStart() regionStart} (inclusive) and 1579 * {@link #regionEnd() regionEnd} (exclusive). 1580 * 1581 * @return The starting point of this matcher's region 1582 * @since 1.5 1583 */ regionStart()1584 public int regionStart() { 1585 return from; 1586 } 1587 1588 /** 1589 * Reports the end index (exclusive) of this matcher's region. 1590 * The searches this matcher conducts are limited to finding matches 1591 * within {@link #regionStart() regionStart} (inclusive) and 1592 * {@link #regionEnd() regionEnd} (exclusive). 1593 * 1594 * @return the ending point of this matcher's region 1595 * @since 1.5 1596 */ regionEnd()1597 public int regionEnd() { 1598 return to; 1599 } 1600 1601 /** 1602 * Queries the transparency of region bounds for this matcher. 1603 * 1604 * <p> This method returns {@code true} if this matcher uses 1605 * <i>transparent</i> bounds, {@code false} if it uses <i>opaque</i> 1606 * bounds. 1607 * 1608 * <p> See {@link #useTransparentBounds(boolean) useTransparentBounds} for a 1609 * description of transparent and opaque bounds. 1610 * 1611 * <p> By default, a matcher uses opaque region boundaries. 1612 * 1613 * @return {@code true} iff this matcher is using transparent bounds, 1614 * {@code false} otherwise. 1615 * @see java.util.regex.Matcher#useTransparentBounds(boolean) 1616 * @since 1.5 1617 */ hasTransparentBounds()1618 public boolean hasTransparentBounds() { 1619 return transparentBounds; 1620 } 1621 1622 /** 1623 * Sets the transparency of region bounds for this matcher. 1624 * 1625 * <p> Invoking this method with an argument of {@code true} will set this 1626 * matcher to use <i>transparent</i> bounds. If the boolean 1627 * argument is {@code false}, then <i>opaque</i> bounds will be used. 1628 * 1629 * <p> Using transparent bounds, the boundaries of this 1630 * matcher's region are transparent to lookahead, lookbehind, 1631 * and boundary matching constructs. Those constructs can see beyond the 1632 * boundaries of the region to see if a match is appropriate. 1633 * 1634 * <p> Using opaque bounds, the boundaries of this matcher's 1635 * region are opaque to lookahead, lookbehind, and boundary matching 1636 * constructs that may try to see beyond them. Those constructs cannot 1637 * look past the boundaries so they will fail to match anything outside 1638 * of the region. 1639 * 1640 * <p> By default, a matcher uses opaque bounds. 1641 * 1642 * @param b a boolean indicating whether to use opaque or transparent 1643 * regions 1644 * @return this matcher 1645 * @see java.util.regex.Matcher#hasTransparentBounds 1646 * @since 1.5 1647 */ useTransparentBounds(boolean b)1648 public Matcher useTransparentBounds(boolean b) { 1649 synchronized (this) { 1650 transparentBounds = b; 1651 nativeMatcher.useTransparentBounds(b); 1652 } 1653 return this; 1654 } 1655 1656 /** 1657 * Queries the anchoring of region bounds for this matcher. 1658 * 1659 * <p> This method returns {@code true} if this matcher uses 1660 * <i>anchoring</i> bounds, {@code false} otherwise. 1661 * 1662 * <p> See {@link #useAnchoringBounds(boolean) useAnchoringBounds} for a 1663 * description of anchoring bounds. 1664 * 1665 * <p> By default, a matcher uses anchoring region boundaries. 1666 * 1667 * @return {@code true} iff this matcher is using anchoring bounds, 1668 * {@code false} otherwise. 1669 * @see java.util.regex.Matcher#useAnchoringBounds(boolean) 1670 * @since 1.5 1671 */ hasAnchoringBounds()1672 public boolean hasAnchoringBounds() { 1673 return anchoringBounds; 1674 } 1675 1676 /** 1677 * Sets the anchoring of region bounds for this matcher. 1678 * 1679 * <p> Invoking this method with an argument of {@code true} will set this 1680 * matcher to use <i>anchoring</i> bounds. If the boolean 1681 * argument is {@code false}, then <i>non-anchoring</i> bounds will be 1682 * used. 1683 * 1684 * <p> Using anchoring bounds, the boundaries of this 1685 * matcher's region match anchors such as ^ and $. 1686 * 1687 * <p> Without anchoring bounds, the boundaries of this 1688 * matcher's region will not match anchors such as ^ and $. 1689 * 1690 * <p> By default, a matcher uses anchoring region boundaries. 1691 * 1692 * @param b a boolean indicating whether or not to use anchoring bounds. 1693 * @return this matcher 1694 * @see java.util.regex.Matcher#hasAnchoringBounds 1695 * @since 1.5 1696 */ useAnchoringBounds(boolean b)1697 public Matcher useAnchoringBounds(boolean b) { 1698 synchronized (this) { 1699 anchoringBounds = b; 1700 nativeMatcher.useAnchoringBounds(b); 1701 } 1702 return this; 1703 } 1704 1705 /** 1706 * <p>Returns the string representation of this matcher. The 1707 * string representation of a {@code Matcher} contains information 1708 * that may be useful for debugging. The exact format is unspecified. 1709 * 1710 * @return The string representation of this matcher 1711 * @since 1.5 1712 */ toString()1713 public String toString() { 1714 StringBuilder sb = new StringBuilder(); 1715 sb.append("java.util.regex.Matcher") 1716 .append("[pattern=").append(pattern()) 1717 .append(" region=") 1718 .append(regionStart()).append(',').append(regionEnd()) 1719 .append(" lastmatch="); 1720 // Android-changed: Android has no first field. 1721 // if ((first >= 0) && (group() != null)) { 1722 if (matchFound && (group() != null)) { 1723 sb.append(group()); 1724 } 1725 sb.append(']'); 1726 return sb.toString(); 1727 } 1728 1729 /** 1730 * <p>Returns true if the end of input was hit by the search engine in 1731 * the last match operation performed by this matcher. 1732 * 1733 * <p>When this method returns true, then it is possible that more input 1734 * would have changed the result of the last search. 1735 * 1736 * @return true iff the end of input was hit in the last match; false 1737 * otherwise 1738 * @since 1.5 1739 */ hitEnd()1740 public boolean hitEnd() { 1741 synchronized (this) { 1742 return nativeMatcher.hitEnd(); 1743 } 1744 } 1745 1746 /** 1747 * <p>Returns true if more input could change a positive match into a 1748 * negative one. 1749 * 1750 * <p>If this method returns true, and a match was found, then more 1751 * input could cause the match to be lost. If this method returns false 1752 * and a match was found, then more input might change the match but the 1753 * match won't be lost. If a match was not found, then requireEnd has no 1754 * meaning. 1755 * 1756 * @return true iff more input could change a positive match into a 1757 * negative one. 1758 * @since 1.5 1759 */ requireEnd()1760 public boolean requireEnd() { 1761 synchronized (this) { 1762 return nativeMatcher.requireEnd(); 1763 } 1764 } 1765 1766 /** 1767 * Returns the end index of the text. 1768 * 1769 * @return the index after the last character in the text 1770 */ getTextLength()1771 int getTextLength() { 1772 return text.length(); 1773 } 1774 1775 /** 1776 * Generates a String from this matcher's input in the specified range. 1777 * 1778 * @param beginIndex the beginning index, inclusive 1779 * @param endIndex the ending index, exclusive 1780 * @return A String generated from this matcher's input 1781 */ getSubSequence(int beginIndex, int endIndex)1782 CharSequence getSubSequence(int beginIndex, int endIndex) { 1783 return text.subSequence(beginIndex, endIndex); 1784 } 1785 1786 /** 1787 * Resets the Matcher. A new input sequence and a new region can be 1788 * specified. Results of a previous find get lost. The next attempt to find 1789 * an occurrence of the Pattern in the string will start at the beginning of 1790 * the region. This is the internal version of reset() to which the several 1791 * public versions delegate. 1792 * 1793 * @param input 1794 * the input sequence. 1795 * @param start 1796 * the start of the region. 1797 * @param end 1798 * the end of the region. 1799 * 1800 * @return the matcher itself. 1801 */ reset(CharSequence input, int start, int end)1802 private Matcher reset(CharSequence input, int start, int end) { 1803 if (input == null) { 1804 throw new IllegalArgumentException("input == null"); 1805 } 1806 1807 if (start < 0 || end < 0 || start > input.length() || end > input.length() || start > end) { 1808 throw new IndexOutOfBoundsException(); 1809 } 1810 1811 this.originalInput = input; 1812 this.text = input.toString(); 1813 this.from = start; 1814 this.to = end; 1815 resetForInput(); 1816 1817 matchFound = false; 1818 appendPos = 0; 1819 modCount++; 1820 1821 return this; 1822 } 1823 resetForInput()1824 private void resetForInput() { 1825 synchronized (this) { 1826 nativeMatcher.setInput(text, from, to); 1827 nativeMatcher.useAnchoringBounds(anchoringBounds); 1828 nativeMatcher.useTransparentBounds(transparentBounds); 1829 } 1830 } 1831 1832 /** 1833 * Makes sure that a successful match has been made. Is invoked internally 1834 * from various places in the class. 1835 * 1836 * @throws IllegalStateException 1837 * if no successful match has been made. 1838 */ ensureMatch()1839 private void ensureMatch() { 1840 if (!matchFound) { 1841 throw new IllegalStateException("No successful match so far"); 1842 } 1843 } 1844 getMatchedGroupIndex(String name)1845 private int getMatchedGroupIndex(String name) { 1846 ensureMatch(); 1847 int result = nativeMatcher.getMatchedGroupIndex(name); 1848 if (result < 0) { 1849 throw new IllegalArgumentException("No capturing group in the pattern " + 1850 "with the name " + name); 1851 } 1852 return result; 1853 } 1854 1855 }