• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This code is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 only, as
8  * published by the Free Software Foundation.  Oracle designates this
9  * particular file as subject to the "Classpath" exception as provided
10  * by Oracle in the LICENSE file that accompanied this code.
11  *
12  * This code is distributed in the hope that it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15  * version 2 for more details (a copy is included in the LICENSE file that
16  * accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License version
19  * 2 along with this work; if not, write to the Free Software Foundation,
20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
23  * or visit www.oracle.com if you need additional information or have any
24  * questions.
25  */
26 
27 package java.util.regex;
28 
29 import android.compat.Compatibility;
30 import android.compat.annotation.ChangeId;
31 import android.compat.annotation.EnabledSince;
32 import com.android.icu.util.regex.MatcherNative;
33 import dalvik.annotation.compat.VersionCodes;
34 import dalvik.system.VMRuntime;
35 import java.util.ConcurrentModificationException;
36 import java.util.Iterator;
37 import java.util.NoSuchElementException;
38 import java.util.Objects;
39 import java.util.Spliterator;
40 import java.util.Spliterators;
41 import java.util.function.Consumer;
42 import java.util.function.Function;
43 import java.util.stream.Stream;
44 import java.util.stream.StreamSupport;
45 
46 /**
47  * An engine that performs match operations on a {@linkplain
48  * java.lang.CharSequence character sequence} by interpreting a {@link Pattern}.
49  *
50  * <p> A matcher is created from a pattern by invoking the pattern's {@link
51  * Pattern#matcher matcher} method.  Once created, a matcher can be used to
52  * perform three different kinds of match operations:
53  *
54  * <ul>
55  *
56  *   <li><p> The {@link #matches matches} method attempts to match the entire
57  *   input sequence against the pattern.  </p></li>
58  *
59  *   <li><p> The {@link #lookingAt lookingAt} method attempts to match the
60  *   input sequence, starting at the beginning, against the pattern.  </p></li>
61  *
62  *   <li><p> The {@link #find find} method scans the input sequence looking
63  *   for the next subsequence that matches the pattern.  </p></li>
64  *
65  * </ul>
66  *
67  * <p> Each of these methods returns a boolean indicating success or failure.
68  * More information about a successful match can be obtained by querying the
69  * state of the matcher.
70  *
71  * <p> A matcher finds matches in a subset of its input called the
72  * <i>region</i>. By default, the region contains all of the matcher's input.
73  * The region can be modified via the {@link #region(int, int) region} method
74  * and queried via the {@link #regionStart() regionStart} and {@link
75  * #regionEnd() regionEnd} methods. The way that the region boundaries interact
76  * with some pattern constructs can be changed. See {@link
77  * #useAnchoringBounds(boolean) useAnchoringBounds} and {@link
78  * #useTransparentBounds(boolean) useTransparentBounds} for more details.
79  *
80  * <p> This class also defines methods for replacing matched subsequences with
81  * new strings whose contents can, if desired, be computed from the match
82  * result.  The {@link #appendReplacement appendReplacement} and {@link
83  * #appendTail appendTail} methods can be used in tandem in order to collect
84  * the result into an existing string buffer or string builder. Alternatively,
85  * the more convenient {@link #replaceAll replaceAll} method can be used to
86  * create a string in which every matching subsequence in the input sequence
87  * is replaced.
88  *
89  * <p> The explicit state of a matcher includes the start and end indices of
90  * the most recent successful match.  It also includes the start and end
91  * indices of the input subsequence captured by each <a
92  * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total
93  * count of such subsequences.  As a convenience, methods are also provided for
94  * returning these captured subsequences in string form.
95  *
96  * <p> The explicit state of a matcher is initially undefined; attempting to
97  * query any part of it before a successful match will cause an {@link
98  * IllegalStateException} to be thrown.  The explicit state of a matcher is
99  * recomputed by every match operation.
100  *
101  * <p> The implicit state of a matcher includes the input character sequence as
102  * well as the <i>append position</i>, which is initially zero and is updated
103  * by the {@link #appendReplacement appendReplacement} method.
104  *
105  * <p> A matcher may be reset explicitly by invoking its {@link #reset()}
106  * method or, if a new input sequence is desired, its {@link
107  * #reset(java.lang.CharSequence) reset(CharSequence)} method.  Resetting a
108  * matcher discards its explicit state information and sets the append position
109  * to zero.
110  *
111  * <p> Instances of this class are not safe for use by multiple concurrent
112  * threads. </p>
113  *
114  *
115  * @author      Mike McCloskey
116  * @author      Mark Reinhold
117  * @author      JSR-51 Expert Group
118  * @since       1.4
119  */
120 
121 public final class Matcher implements MatchResult {
122 
123     /**
124      * The Pattern object that created this Matcher.
125      */
126     private Pattern parentPattern;
127 
128     /**
129      * Holds the offsets for the most recent match.
130      */
131     int[] groups;
132 
133     /**
134      * The range within the sequence that is to be matched (between  0
135      * and text.length()).
136      */
137     int from, to;
138 
139     /**
140      * Holds the input text.
141      */
142     String text;
143 
144     /**
145      * Reflects whether a match has been found during the most recent find
146      * operation.
147      */
148     private boolean matchFound;
149 
150     private MatcherNative nativeMatcher;
151 
152     /**
153      * The index of the last position appended in a substitution.
154      */
155     int appendPos = 0;
156 
157     /**
158      * Holds the original CharSequence for use in {@link #reset}. {@link #text} is used during
159      * matching. Note that CharSequence is mutable while String is not, so reset can cause the input
160      * to match to change.
161      */
162     private CharSequence originalInput;
163 
164     /**
165      * If transparentBounds is true then the boundaries of this
166      * matcher's region are transparent to lookahead, lookbehind,
167      * and boundary matching constructs that try to see beyond them.
168      */
169     boolean transparentBounds = false;
170 
171     /**
172      * If anchoringBounds is true then the boundaries of this
173      * matcher's region match anchors such as ^ and $.
174      */
175     boolean anchoringBounds = true;
176 
177     /**
178      * Number of times this matcher's state has been modified
179      */
180     int modCount;
181 
182     // BEGIN Android-removed: Remove unused default constructor.
183     /*
184      * No default constructor.
185      *
186     Matcher() {
187     }
188     */
189     // END Android-removed: Remove unused default constructor.
190 
191     /**
192      * All matchers have the state used by Pattern during a match.
193      */
Matcher(Pattern parent, CharSequence text)194     Matcher(Pattern parent, CharSequence text) {
195         // Android-changed: Use ICU4C as the regex backend.
196         /*
197         this.parentPattern = parent;
198         this.text = text;
199 
200         // Allocate state storage
201         int parentGroupCount = Math.max(parent.capturingGroupCount, 10);
202         groups = new int[parentGroupCount * 2];
203         locals = new int[parent.localCount];
204         localsPos = new IntHashSet[parent.localTCNCount];
205 
206         // Put fields into initial states
207         reset();
208         */
209         usePattern(parent);
210         reset(text);
211     }
212 
213     /**
214      * Returns the pattern that is interpreted by this matcher.
215      *
216      * @return  The pattern for which this matcher was created
217      */
pattern()218     public Pattern pattern() {
219         return parentPattern;
220     }
221 
222     /**
223      * Returns the match state of this matcher as a {@link MatchResult}.
224      * The result is unaffected by subsequent operations performed upon this
225      * matcher.
226      *
227      * @return  a {@code MatchResult} with the state of this matcher
228      * @throws IllegalStateException if no match is found.
229      * @since 1.5
230      */
toMatchResult()231     public MatchResult toMatchResult() {
232         // Android-added: Throw IllegalStateException if not matched.
233         ensureMatch();
234         return toMatchResult(text.toString());
235     }
236 
toMatchResult(String text)237     private MatchResult toMatchResult(String text) {
238         // Android-changed: Replace first and end field usages with our implementation.
239         return new ImmutableMatchResult(matchFound ? start() : -1, // this.first,
240                                         matchFound ? end() : -1, // this.last,
241                                          groupCount(),
242                                          this.groups.clone(),
243                                          text);
244     }
245 
246     private static class ImmutableMatchResult implements MatchResult {
247         private final int first;
248         private final int last;
249         private final int[] groups;
250         private final int groupCount;
251         private final String text;
252 
ImmutableMatchResult(int first, int last, int groupCount, int groups[], String text)253         ImmutableMatchResult(int first, int last, int groupCount,
254                 int groups[], String text)
255         {
256             this.first = first;
257             this.last = last;
258             this.groupCount = groupCount;
259             this.groups = groups;
260             this.text = text;
261         }
262 
263         @Override
start()264         public int start() {
265             checkMatch();
266             return first;
267         }
268 
269         @Override
start(int group)270         public int start(int group) {
271             checkMatch();
272             if (group < 0 || group > groupCount)
273                 throw new IndexOutOfBoundsException("No group " + group);
274             return groups[group * 2];
275         }
276 
277         @Override
end()278         public int end() {
279             checkMatch();
280             return last;
281         }
282 
283         @Override
end(int group)284         public int end(int group) {
285             checkMatch();
286             if (group < 0 || group > groupCount)
287                 throw new IndexOutOfBoundsException("No group " + group);
288             return groups[group * 2 + 1];
289         }
290 
291         @Override
groupCount()292         public int groupCount() {
293             return groupCount;
294         }
295 
296         @Override
group()297         public String group() {
298             checkMatch();
299             return group(0);
300         }
301 
302         @Override
group(int group)303         public String group(int group) {
304             checkMatch();
305             if (group < 0 || group > groupCount)
306                 throw new IndexOutOfBoundsException("No group " + group);
307             if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
308                 return null;
309             return text.subSequence(groups[group * 2], groups[group * 2 + 1]).toString();
310         }
311 
checkMatch()312         private void checkMatch() {
313             if (first < 0)
314                 throw new IllegalStateException("No match found");
315 
316         }
317     }
318 
319     /**
320      * Changes the {@code Pattern} that this {@code Matcher} uses to
321      * find matches with.
322      *
323      * <p> This method causes this matcher to lose information
324      * about the groups of the last match that occurred. The
325      * matcher's position in the input is maintained and its
326      * last append position is unaffected.</p>
327      *
328      * @param  newPattern
329      *         The new pattern used by this matcher
330      * @return  This matcher
331      * @throws  IllegalArgumentException
332      *          If newPattern is {@code null}
333      * @since 1.5
334      */
usePattern(Pattern newPattern)335     public Matcher usePattern(Pattern newPattern) {
336         if (newPattern == null)
337             throw new IllegalArgumentException("Pattern cannot be null");
338 
339         synchronized (this) {
340             // may throw
341             nativeMatcher = MatcherNative.create(newPattern.nativePattern);
342         }
343         parentPattern = newPattern;
344 
345         if (text != null) {
346             resetForInput();
347         }
348 
349         groups = new int[(groupCount() + 1) * 2];
350         matchFound = false;
351         modCount++;
352         return this;
353     }
354 
355     /**
356      * Resets this matcher.
357      *
358      * <p> Resetting a matcher discards all of its explicit state information
359      * and sets its append position to zero. The matcher's region is set to the
360      * default region, which is its entire character sequence. The anchoring
361      * and transparency of this matcher's region boundaries are unaffected.
362      *
363      * @return  This matcher
364      */
reset()365     public Matcher reset() {
366         Matcher matcher = reset(originalInput, 0, originalInput.length());
367         modCount++;
368         return matcher;
369     }
370 
371     /**
372      * Resets this matcher with a new input sequence.
373      *
374      * <p> Resetting a matcher discards all of its explicit state information
375      * and sets its append position to zero.  The matcher's region is set to
376      * the default region, which is its entire character sequence.  The
377      * anchoring and transparency of this matcher's region boundaries are
378      * unaffected.
379      *
380      * @param  input
381      *         The new input character sequence
382      *
383      * @return  This matcher
384      */
reset(CharSequence input)385     public Matcher reset(CharSequence input) {
386         return reset(input, 0, input.length());
387     }
388 
389     /**
390      * Returns the start index of the previous match.
391      *
392      * @return  The index of the first character matched
393      *
394      * @throws  IllegalStateException
395      *          If no match has yet been attempted,
396      *          or if the previous match operation failed
397      */
start()398     public int start() {
399         return start(0);
400     }
401 
402     /**
403      * Returns the start index of the subsequence captured by the given group
404      * during the previous match operation.
405      *
406      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
407      * to right, starting at one.  Group zero denotes the entire pattern, so
408      * the expression <i>m.</i>{@code start(0)} is equivalent to
409      * <i>m.</i>{@code start()}.  </p>
410      *
411      * @param  group
412      *         The index of a capturing group in this matcher's pattern
413      *
414      * @return  The index of the first character captured by the group,
415      *          or {@code -1} if the match was successful but the group
416      *          itself did not match anything
417      *
418      * @throws  IllegalStateException
419      *          If no match has yet been attempted,
420      *          or if the previous match operation failed
421      *
422      * @throws  IndexOutOfBoundsException
423      *          If there is no capturing group in the pattern
424      *          with the given index
425      */
start(int group)426     public int start(int group) {
427         ensureMatch();
428         if (group < 0 || group > groupCount())
429             throw new IndexOutOfBoundsException("No group " + group);
430         return groups[group * 2];
431     }
432 
433     /**
434      * Returns the start index of the subsequence captured by the given
435      * <a href="Pattern.html#groupname">named-capturing group</a> during the
436      * previous match operation.
437      *
438      * @param  name
439      *         The name of a named-capturing group in this matcher's pattern
440      *
441      * @return  The index of the first character captured by the group,
442      *          or {@code -1} if the match was successful but the group
443      *          itself did not match anything
444      *
445      * @throws  IllegalStateException
446      *          If no match has yet been attempted,
447      *          or if the previous match operation failed
448      *
449      * @throws  IllegalArgumentException
450      *          If there is no capturing group in the pattern
451      *          with the given name
452      * @since 1.8
453      */
start(String name)454     public int start(String name) {
455         return groups[getMatchedGroupIndex(name) * 2];
456     }
457 
458     /**
459      * Returns the offset after the last character matched.
460      *
461      * @return  The offset after the last character matched
462      *
463      * @throws  IllegalStateException
464      *          If no match has yet been attempted,
465      *          or if the previous match operation failed
466      */
end()467     public int end() {
468         return end(0);
469     }
470 
471     /**
472      * Returns the offset after the last character of the subsequence
473      * captured by the given group during the previous match operation.
474      *
475      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
476      * to right, starting at one.  Group zero denotes the entire pattern, so
477      * the expression <i>m.</i>{@code end(0)} is equivalent to
478      * <i>m.</i>{@code end()}.  </p>
479      *
480      * @param  group
481      *         The index of a capturing group in this matcher's pattern
482      *
483      * @return  The offset after the last character captured by the group,
484      *          or {@code -1} if the match was successful
485      *          but the group itself did not match anything
486      *
487      * @throws  IllegalStateException
488      *          If no match has yet been attempted,
489      *          or if the previous match operation failed
490      *
491      * @throws  IndexOutOfBoundsException
492      *          If there is no capturing group in the pattern
493      *          with the given index
494      */
end(int group)495     public int end(int group) {
496         ensureMatch();
497         if (group < 0 || group > groupCount())
498             throw new IndexOutOfBoundsException("No group " + group);
499         return groups[group * 2 + 1];
500     }
501 
502     /**
503      * Returns the offset after the last character of the subsequence
504      * captured by the given <a href="Pattern.html#groupname">named-capturing
505      * group</a> during the previous match operation.
506      *
507      * @param  name
508      *         The name of a named-capturing group in this matcher's pattern
509      *
510      * @return  The offset after the last character captured by the group,
511      *          or {@code -1} if the match was successful
512      *          but the group itself did not match anything
513      *
514      * @throws  IllegalStateException
515      *          If no match has yet been attempted,
516      *          or if the previous match operation failed
517      *
518      * @throws  IllegalArgumentException
519      *          If there is no capturing group in the pattern
520      *          with the given name
521      * @since 1.8
522      */
end(String name)523     public int end(String name) {
524         return groups[getMatchedGroupIndex(name) * 2 + 1];
525     }
526 
527     /**
528      * Returns the input subsequence matched by the previous match.
529      *
530      * <p> For a matcher <i>m</i> with input sequence <i>s</i>,
531      * the expressions <i>m.</i>{@code group()} and
532      * <i>s.</i>{@code substring(}<i>m.</i>{@code start(),}&nbsp;<i>m.</i>
533      * {@code end())} are equivalent.  </p>
534      *
535      * <p> Note that some patterns, for example {@code a*}, match the empty
536      * string.  This method will return the empty string when the pattern
537      * successfully matches the empty string in the input.  </p>
538      *
539      * @return The (possibly empty) subsequence matched by the previous match,
540      *         in string form
541      *
542      * @throws  IllegalStateException
543      *          If no match has yet been attempted,
544      *          or if the previous match operation failed
545      */
group()546     public String group() {
547         return group(0);
548     }
549 
550     /**
551      * Returns the input subsequence captured by the given group during the
552      * previous match operation.
553      *
554      * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index
555      * <i>g</i>, the expressions <i>m.</i>{@code group(}<i>g</i>{@code )} and
556      * <i>s.</i>{@code substring(}<i>m.</i>{@code start(}<i>g</i>{@code
557      * ),}&nbsp;<i>m.</i>{@code end(}<i>g</i>{@code ))}
558      * are equivalent.  </p>
559      *
560      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
561      * to right, starting at one.  Group zero denotes the entire pattern, so
562      * the expression {@code m.group(0)} is equivalent to {@code m.group()}.
563      * </p>
564      *
565      * <p> If the match was successful but the group specified failed to match
566      * any part of the input sequence, then {@code null} is returned. Note
567      * that some groups, for example {@code (a*)}, match the empty string.
568      * This method will return the empty string when such a group successfully
569      * matches the empty string in the input.  </p>
570      *
571      * @param  group
572      *         The index of a capturing group in this matcher's pattern
573      *
574      * @return  The (possibly empty) subsequence captured by the group
575      *          during the previous match, or {@code null} if the group
576      *          failed to match part of the input
577      *
578      * @throws  IllegalStateException
579      *          If no match has yet been attempted,
580      *          or if the previous match operation failed
581      *
582      * @throws  IndexOutOfBoundsException
583      *          If there is no capturing group in the pattern
584      *          with the given index
585      */
group(int group)586     public String group(int group) {
587         ensureMatch();
588         if (group < 0 || group > groupCount())
589             throw new IndexOutOfBoundsException("No group " + group);
590         if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
591             return null;
592         return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
593     }
594 
595     /**
596      * Returns the input subsequence captured by the given
597      * <a href="Pattern.html#groupname">named-capturing group</a> during the
598      * previous match operation.
599      *
600      * <p> If the match was successful but the group specified failed to match
601      * any part of the input sequence, then {@code null} is returned. Note
602      * that some groups, for example {@code (a*)}, match the empty string.
603      * This method will return the empty string when such a group successfully
604      * matches the empty string in the input.  </p>
605      *
606      * @param  name
607      *         The name of a named-capturing group in this matcher's pattern
608      *
609      * @return  The (possibly empty) subsequence captured by the named group
610      *          during the previous match, or {@code null} if the group
611      *          failed to match part of the input
612      *
613      * @throws  IllegalStateException
614      *          If no match has yet been attempted,
615      *          or if the previous match operation failed
616      *
617      * @throws  IllegalArgumentException
618      *          If there is no capturing group in the pattern
619      *          with the given name
620      * @since 1.7
621      */
group(String name)622     public String group(String name) {
623         int group = getMatchedGroupIndex(name);
624         if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
625             return null;
626         return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
627     }
628 
629     /**
630      * Returns the number of capturing groups in this matcher's pattern.
631      *
632      * <p> Group zero denotes the entire pattern by convention. It is not
633      * included in this count.
634      *
635      * <p> Any non-negative integer smaller than or equal to the value
636      * returned by this method is guaranteed to be a valid group index for
637      * this matcher.  </p>
638      *
639      * @return The number of capturing groups in this matcher's pattern
640      */
groupCount()641     public int groupCount() {
642         synchronized (this) {
643             return nativeMatcher.groupCount();
644         }
645     }
646 
647     /**
648      * Attempts to match the entire region against the pattern.
649      *
650      * <p> If the match succeeds then more information can be obtained via the
651      * {@code start}, {@code end}, and {@code group} methods.  </p>
652      *
653      * @return  {@code true} if, and only if, the entire region sequence
654      *          matches this matcher's pattern
655      */
matches()656     public boolean matches() {
657         synchronized (this) {
658             matchFound = nativeMatcher.matches(groups);
659         }
660         modCount++;
661         return matchFound;
662     }
663 
664     /**
665      * Attempts to find the next subsequence of the input sequence that matches
666      * the pattern.
667      *
668      * <p> This method starts at the beginning of this matcher's region, or, if
669      * a previous invocation of the method was successful and the matcher has
670      * not since been reset, at the first character not matched by the previous
671      * match.
672      *
673      * <p> If the match succeeds then more information can be obtained via the
674      * {@code start}, {@code end}, and {@code group} methods.  </p>
675      *
676      * @return  {@code true} if, and only if, a subsequence of the input
677      *          sequence matches this matcher's pattern
678      */
find()679     public boolean find() {
680         synchronized (this) {
681             matchFound = nativeMatcher.findNext(groups);
682         }
683         modCount++;
684         return matchFound;
685     }
686 
687     /**
688      * Resets this matcher and then attempts to find the next subsequence of
689      * the input sequence that matches the pattern, starting at the specified
690      * index.
691      *
692      * <p> If the match succeeds then more information can be obtained via the
693      * {@code start}, {@code end}, and {@code group} methods, and subsequent
694      * invocations of the {@link #find()} method will start at the first
695      * character not matched by this match.  </p>
696      *
697      * @param start the index to start searching for a match
698      * @throws  IndexOutOfBoundsException
699      *          If start is less than zero or if start is greater than the
700      *          length of the input sequence.
701      *
702      * @return  {@code true} if, and only if, a subsequence of the input
703      *          sequence starting at the given index matches this matcher's
704      *          pattern
705      */
find(int start)706     public boolean find(int start) {
707         int limit = getTextLength();
708         if ((start < 0) || (start > limit))
709             throw new IndexOutOfBoundsException("Illegal start index");
710         reset();
711         synchronized (this) {
712             matchFound = nativeMatcher.find(start, groups);
713         }
714         modCount++;
715         return matchFound;
716     }
717 
718     /**
719      * Attempts to match the input sequence, starting at the beginning of the
720      * region, against the pattern.
721      *
722      * <p> Like the {@link #matches matches} method, this method always starts
723      * at the beginning of the region; unlike that method, it does not
724      * require that the entire region be matched.
725      *
726      * <p> If the match succeeds then more information can be obtained via the
727      * {@code start}, {@code end}, and {@code group} methods.  </p>
728      *
729      * @return  {@code true} if, and only if, a prefix of the input
730      *          sequence matches this matcher's pattern
731      */
lookingAt()732     public boolean lookingAt() {
733         synchronized (this) {
734             matchFound = nativeMatcher.lookingAt(groups);
735         }
736         modCount++;
737         return matchFound;
738     }
739 
740     /**
741      * Returns a literal replacement {@code String} for the specified
742      * {@code String}.
743      *
744      * This method produces a {@code String} that will work
745      * as a literal replacement {@code s} in the
746      * {@code appendReplacement} method of the {@link Matcher} class.
747      * The {@code String} produced will match the sequence of characters
748      * in {@code s} treated as a literal sequence. Slashes ('\') and
749      * dollar signs ('$') will be given no special meaning.
750      *
751      * @param  s The string to be literalized
752      * @return  A literal string replacement
753      * @since 1.5
754      */
quoteReplacement(String s)755     public static String quoteReplacement(String s) {
756         if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1))
757             return s;
758         StringBuilder sb = new StringBuilder();
759         for (int i=0; i<s.length(); i++) {
760             char c = s.charAt(i);
761             if (c == '\\' || c == '$') {
762                 sb.append('\\');
763             }
764             sb.append(c);
765         }
766         return sb.toString();
767     }
768 
769     /**
770      * Implements a non-terminal append-and-replace step.
771      *
772      * <p> This method performs the following actions: </p>
773      *
774      * <ol>
775      *
776      *   <li><p> It reads characters from the input sequence, starting at the
777      *   append position, and appends them to the given string buffer.  It
778      *   stops after reading the last character preceding the previous match,
779      *   that is, the character at index {@link
780      *   #start()}&nbsp;{@code -}&nbsp;{@code 1}.  </p></li>
781      *
782      *   <li><p> It appends the given replacement string to the string buffer.
783      *   </p></li>
784      *
785      *   <li><p> It sets the append position of this matcher to the index of
786      *   the last character matched, plus one, that is, to {@link #end()}.
787      *   </p></li>
788      *
789      * </ol>
790      *
791      * <p> The replacement string may contain references to subsequences
792      * captured during the previous match: Each occurrence of
793      * <code>${</code><i>name</i><code>}</code> or {@code $}<i>g</i>
794      * will be replaced by the result of evaluating the corresponding
795      * {@link #group(String) group(name)} or {@link #group(int) group(g)}
796      * respectively. For {@code $}<i>g</i>,
797      * the first number after the {@code $} is always treated as part of
798      * the group reference. Subsequent numbers are incorporated into g if
799      * they would form a legal group reference. Only the numerals '0'
800      * through '9' are considered as potential components of the group
801      * reference. If the second group matched the string {@code "foo"}, for
802      * example, then passing the replacement string {@code "$2bar"} would
803      * cause {@code "foobar"} to be appended to the string buffer. A dollar
804      * sign ({@code $}) may be included as a literal in the replacement
805      * string by preceding it with a backslash ({@code \$}).
806      *
807      * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
808      * the replacement string may cause the results to be different than if it
809      * were being treated as a literal replacement string. Dollar signs may be
810      * treated as references to captured subsequences as described above, and
811      * backslashes are used to escape literal characters in the replacement
812      * string.
813      *
814      * <p> This method is intended to be used in a loop together with the
815      * {@link #appendTail(StringBuffer) appendTail} and {@link #find() find}
816      * methods.  The following code, for example, writes {@code one dog two dogs
817      * in the yard} to the standard-output stream: </p>
818      *
819      * <blockquote><pre>
820      * Pattern p = Pattern.compile("cat");
821      * Matcher m = p.matcher("one cat two cats in the yard");
822      * StringBuffer sb = new StringBuffer();
823      * while (m.find()) {
824      *     m.appendReplacement(sb, "dog");
825      * }
826      * m.appendTail(sb);
827      * System.out.println(sb.toString());</pre></blockquote>
828      *
829      * @param  sb
830      *         The target string buffer
831      *
832      * @param  replacement
833      *         The replacement string
834      *
835      * @return  This matcher
836      *
837      * @throws  IllegalStateException
838      *          If no match has yet been attempted,
839      *          or if the previous match operation failed
840      *
841      * @throws  IllegalArgumentException
842      *          If the replacement string refers to a named-capturing
843      *          group that does not exist in the pattern
844      *
845      * @throws  IndexOutOfBoundsException
846      *          If the replacement string refers to a capturing group
847      *          that does not exist in the pattern
848      */
appendReplacement(StringBuffer sb, String replacement)849     public Matcher appendReplacement(StringBuffer sb, String replacement) {
850         // TODO: Throw IllegalStateException after an SDK level check.
851         // Android-removed: Don't throw IllegalStateException due to app compat
852         // If no match, return error
853         // if (first < 0)
854         //     throw new IllegalStateException("No match available");
855         StringBuilder result = new StringBuilder();
856         // Android-changed: Use Android's appendEvaluated due to app compat.
857         // appendExpandedReplacement(replacement, result);
858         appendReplacementInternal(result, replacement);
859         // Append the intervening text
860         // Android-changed: Android has no lastAppendPosition.
861         // sb.append(text, lastAppendPosition, first);
862         sb.append(text, appendPos, start());
863         // Append the match substitution
864         sb.append(result);
865         // Android-changed: Android has no lastAppendPosition.
866         // lastAppendPosition = last;
867         appendPos = end();
868         modCount++;
869         return this;
870     }
871 
872     // BEGIN Android-added: Backward-compatible codes for appendReplacement().
873     /**
874      * Since Android 14, {@link Matcher} becomes stricter for the replacement syntax and
875      * group references used by its methods, e.g. {@link #appendReplacement(StringBuffer, String)}.
876      *
877      * This flag is enabled for apps targeting Android 14+.
878      *
879      * @hide
880      */
881     @ChangeId
882     @EnabledSince(targetSdkVersion = VersionCodes.UPSIDE_DOWN_CAKE)
883     public static final long DISALLOW_INVALID_GROUP_REFERENCE = 247079863L;
884 
appendReplacementInternal(StringBuilder sb, String replacement)885     private void appendReplacementInternal(StringBuilder sb, String replacement) {
886         if (VMRuntime.getSdkVersion() >= VersionCodes.UPSIDE_DOWN_CAKE
887                 && Compatibility.isChangeEnabled(DISALLOW_INVALID_GROUP_REFERENCE)) {
888             appendExpandedReplacement(replacement, sb);
889         } else {
890             appendEvaluated(sb, replacement);
891         }
892     }
893 
894     /**
895      * Internal helper method to append a given string to a given string buffer.
896      * If the string contains any references to groups, these are replaced by
897      * the corresponding group's contents.
898      *
899      * @param buffer the string builder.
900      * @param s the string to append.
901      *
902      * @hide
903      */
appendEvaluated(StringBuilder buffer, String s)904     public void appendEvaluated(StringBuilder buffer, String s) {
905         boolean escape = false;
906         boolean dollar = false;
907         boolean escapeNamedGroup = false;
908         int escapeNamedGroupStart = -1;
909 
910         for (int i = 0; i < s.length(); i++) {
911             char c = s.charAt(i);
912             if (c == '\\' && !escape) {
913                 escape = true;
914             } else if (c == '$' && !escape) {
915                 dollar = true;
916             } else if (c >= '0' && c <= '9' && dollar && !escapeNamedGroup) {
917                 String groupValue = group(c - '0');
918                 if (groupValue != null) {
919                     buffer.append(groupValue);
920                 }
921                 dollar = false;
922             } else if (c == '{' && dollar) {
923                 escapeNamedGroup = true;
924                 escapeNamedGroupStart = i;
925             } else if (c == '}' && dollar && escapeNamedGroup) {
926                 String groupValue = group(s.substring(escapeNamedGroupStart + 1, i));
927                 if (groupValue != null) {
928                     buffer.append(groupValue);
929                 }
930                 dollar = false;
931                 escapeNamedGroup = false;
932             } else if (c != '}' && dollar && escapeNamedGroup) {
933                 continue;
934             } else {
935                 buffer.append(c);
936                 dollar = false;
937                 escape = false;
938                 escapeNamedGroup = false;
939             }
940         }
941 
942         if (escape) {
943             throw new IllegalArgumentException("character to be escaped is missing");
944         }
945 
946         if (dollar) {
947             throw new IllegalArgumentException("Illegal group reference: group index is missing");
948         }
949 
950         if (escapeNamedGroup) {
951             throw new IllegalArgumentException("Missing ending brace '}' from replacement string");
952         }
953     }
954     // END Android-added: Backward-compatible codes for appendReplacement().
955 
956     /**
957      * Implements a non-terminal append-and-replace step.
958      *
959      * <p> This method performs the following actions: </p>
960      *
961      * <ol>
962      *
963      *   <li><p> It reads characters from the input sequence, starting at the
964      *   append position, and appends them to the given string builder.  It
965      *   stops after reading the last character preceding the previous match,
966      *   that is, the character at index {@link
967      *   #start()}&nbsp;{@code -}&nbsp;{@code 1}.  </p></li>
968      *
969      *   <li><p> It appends the given replacement string to the string builder.
970      *   </p></li>
971      *
972      *   <li><p> It sets the append position of this matcher to the index of
973      *   the last character matched, plus one, that is, to {@link #end()}.
974      *   </p></li>
975      *
976      * </ol>
977      *
978      * <p> The replacement string may contain references to subsequences
979      * captured during the previous match: Each occurrence of
980      * {@code $}<i>g</i> will be replaced by the result of
981      * evaluating {@link #group(int) group}{@code (}<i>g</i>{@code )}.
982      * The first number after the {@code $} is always treated as part of
983      * the group reference. Subsequent numbers are incorporated into g if
984      * they would form a legal group reference. Only the numerals '0'
985      * through '9' are considered as potential components of the group
986      * reference. If the second group matched the string {@code "foo"}, for
987      * example, then passing the replacement string {@code "$2bar"} would
988      * cause {@code "foobar"} to be appended to the string builder. A dollar
989      * sign ({@code $}) may be included as a literal in the replacement
990      * string by preceding it with a backslash ({@code \$}).
991      *
992      * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
993      * the replacement string may cause the results to be different than if it
994      * were being treated as a literal replacement string. Dollar signs may be
995      * treated as references to captured subsequences as described above, and
996      * backslashes are used to escape literal characters in the replacement
997      * string.
998      *
999      * <p> This method is intended to be used in a loop together with the
1000      * {@link #appendTail(StringBuilder) appendTail} and
1001      * {@link #find() find} methods. The following code, for example, writes
1002      * {@code one dog two dogs in the yard} to the standard-output stream: </p>
1003      *
1004      * <blockquote><pre>
1005      * Pattern p = Pattern.compile("cat");
1006      * Matcher m = p.matcher("one cat two cats in the yard");
1007      * StringBuilder sb = new StringBuilder();
1008      * while (m.find()) {
1009      *     m.appendReplacement(sb, "dog");
1010      * }
1011      * m.appendTail(sb);
1012      * System.out.println(sb.toString());</pre></blockquote>
1013      *
1014      * @param  sb
1015      *         The target string builder
1016      * @param  replacement
1017      *         The replacement string
1018      * @return  This matcher
1019      *
1020      * @throws  IllegalStateException
1021      *          If no match has yet been attempted,
1022      *          or if the previous match operation failed
1023      * @throws  IllegalArgumentException
1024      *          If the replacement string refers to a named-capturing
1025      *          group that does not exist in the pattern
1026      * @throws  IndexOutOfBoundsException
1027      *          If the replacement string refers to a capturing group
1028      *          that does not exist in the pattern
1029      * @since 9
1030      */
appendReplacement(StringBuilder sb, String replacement)1031     public Matcher appendReplacement(StringBuilder sb, String replacement) {
1032         // If no match, return error
1033         // Android-changed: Android has no first field.
1034         // if (first < 0)
1035         //     throw new IllegalStateException("No match available");
1036         ensureMatch();
1037         StringBuilder result = new StringBuilder();
1038         // Android-changed: Use Android's appendEvaluated due to app compat.
1039         // appendExpandedReplacement(replacement, result);
1040         appendReplacementInternal(result, replacement);
1041         // Append the intervening text
1042         // Android-changed: Android has no lastAppendPosition.
1043         // sb.append(text, lastAppendPosition, first);
1044         sb.append(text, appendPos, start());
1045         // Append the match substitution
1046         sb.append(result);
1047         // Android-changed: Android has no lastAppendPosition.
1048         // lastAppendPosition = last;
1049         appendPos = end();
1050         modCount++;
1051         return this;
1052     }
1053 
1054     // Android-changed: Make public for testing.
1055     /**
1056      * Processes replacement string to replace group references with
1057      * groups.
1058      *
1059      * @hide
1060      */
appendExpandedReplacement( String replacement, StringBuilder result)1061     public StringBuilder appendExpandedReplacement(
1062             String replacement, StringBuilder result) {
1063         int cursor = 0;
1064         while (cursor < replacement.length()) {
1065             char nextChar = replacement.charAt(cursor);
1066             if (nextChar == '\\') {
1067                 cursor++;
1068                 if (cursor == replacement.length())
1069                     throw new IllegalArgumentException(
1070                             "character to be escaped is missing");
1071                 nextChar = replacement.charAt(cursor);
1072                 result.append(nextChar);
1073                 cursor++;
1074             } else if (nextChar == '$') {
1075                 // Skip past $
1076                 cursor++;
1077                 // Throw IAE if this "$" is the last character in replacement
1078                 if (cursor == replacement.length())
1079                     throw new IllegalArgumentException(
1080                             "Illegal group reference: group index is missing");
1081                 nextChar = replacement.charAt(cursor);
1082                 int refNum = -1;
1083                 if (nextChar == '{') {
1084                     cursor++;
1085                     StringBuilder gsb = new StringBuilder();
1086                     while (cursor < replacement.length()) {
1087                         nextChar = replacement.charAt(cursor);
1088                         if (ASCII.isLower(nextChar) ||
1089                                 ASCII.isUpper(nextChar) ||
1090                                 ASCII.isDigit(nextChar)) {
1091                             gsb.append(nextChar);
1092                             cursor++;
1093                         } else {
1094                             break;
1095                         }
1096                     }
1097                     if (gsb.length() == 0)
1098                         throw new IllegalArgumentException(
1099                                 "named capturing group has 0 length name");
1100                     if (nextChar != '}')
1101                         throw new IllegalArgumentException(
1102                                 "named capturing group is missing trailing '}'");
1103                     String gname = gsb.toString();
1104                     if (ASCII.isDigit(gname.charAt(0)))
1105                         throw new IllegalArgumentException(
1106                                 "capturing group name {" + gname +
1107                                         "} starts with digit character");
1108                     // Android-changed: Use ICU4C as the regex backend.
1109                     // if (!parentPattern.namedGroups().containsKey(gname))
1110                     int groupIndex = nativeMatcher.getMatchedGroupIndex(gname);
1111                     if (groupIndex < 0)
1112                         throw new IllegalArgumentException(
1113                                 "No group with name {" + gname + "}");
1114                     refNum = groupIndex;
1115                     cursor++;
1116                 } else {
1117                     // The first number is always a group
1118                     refNum = nextChar - '0';
1119                     if ((refNum < 0) || (refNum > 9))
1120                         throw new IllegalArgumentException(
1121                                 "Illegal group reference");
1122                     cursor++;
1123                     // Capture the largest legal group string
1124                     boolean done = false;
1125                     while (!done) {
1126                         if (cursor >= replacement.length()) {
1127                             break;
1128                         }
1129                         int nextDigit = replacement.charAt(cursor) - '0';
1130                         if ((nextDigit < 0) || (nextDigit > 9)) { // not a number
1131                             break;
1132                         }
1133                         int newRefNum = (refNum * 10) + nextDigit;
1134                         if (groupCount() < newRefNum) {
1135                             done = true;
1136                         } else {
1137                             refNum = newRefNum;
1138                             cursor++;
1139                         }
1140                     }
1141                 }
1142                 // Append group
1143                 if (start(refNum) != -1 && end(refNum) != -1)
1144                     result.append(text, start(refNum), end(refNum));
1145             } else {
1146                 result.append(nextChar);
1147                 cursor++;
1148             }
1149         }
1150         return result;
1151     }
1152 
1153     /**
1154      * Implements a terminal append-and-replace step.
1155      *
1156      * <p> This method reads characters from the input sequence, starting at
1157      * the append position, and appends them to the given string buffer.  It is
1158      * intended to be invoked after one or more invocations of the {@link
1159      * #appendReplacement(StringBuffer, String) appendReplacement} method in
1160      * order to copy the remainder of the input sequence.  </p>
1161      *
1162      * @param  sb
1163      *         The target string buffer
1164      *
1165      * @return  The target string buffer
1166      */
appendTail(StringBuffer sb)1167     public StringBuffer appendTail(StringBuffer sb) {
1168         // Android-changed: Android has no lastAppendPosition.
1169         // sb.append(text, lastAppendPosition, getTextLength());
1170         if (appendPos < to) {
1171             sb.append(text.substring(appendPos, to));
1172         }
1173         return sb;
1174     }
1175 
1176     /**
1177      * Implements a terminal append-and-replace step.
1178      *
1179      * <p> This method reads characters from the input sequence, starting at
1180      * the append position, and appends them to the given string builder.  It is
1181      * intended to be invoked after one or more invocations of the {@link
1182      * #appendReplacement(StringBuilder, String)
1183      * appendReplacement} method in order to copy the remainder of the input
1184      * sequence.  </p>
1185      *
1186      * @param  sb
1187      *         The target string builder
1188      *
1189      * @return  The target string builder
1190      *
1191      * @since 9
1192      */
appendTail(StringBuilder sb)1193     public StringBuilder appendTail(StringBuilder sb) {
1194         // Android-changed: Android has no lastAppendPosition.
1195         // sb.append(text, lastAppendPosition, getTextLength());
1196         if (appendPos < to) {
1197             sb.append(text.substring(appendPos, to));
1198         }
1199         return sb;
1200     }
1201 
1202     /**
1203      * Replaces every subsequence of the input sequence that matches the
1204      * pattern with the given replacement string.
1205      *
1206      * <p> This method first resets this matcher.  It then scans the input
1207      * sequence looking for matches of the pattern.  Characters that are not
1208      * part of any match are appended directly to the result string; each match
1209      * is replaced in the result by the replacement string.  The replacement
1210      * string may contain references to captured subsequences as in the {@link
1211      * #appendReplacement appendReplacement} method.
1212      *
1213      * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1214      * the replacement string may cause the results to be different than if it
1215      * were being treated as a literal replacement string. Dollar signs may be
1216      * treated as references to captured subsequences as described above, and
1217      * backslashes are used to escape literal characters in the replacement
1218      * string.
1219      *
1220      * <p> Given the regular expression {@code a*b}, the input
1221      * {@code "aabfooaabfooabfoob"}, and the replacement string
1222      * {@code "-"}, an invocation of this method on a matcher for that
1223      * expression would yield the string {@code "-foo-foo-foo-"}.
1224      *
1225      * <p> Invoking this method changes this matcher's state.  If the matcher
1226      * is to be used in further matching operations then it should first be
1227      * reset.  </p>
1228      *
1229      * @param  replacement
1230      *         The replacement string
1231      *
1232      * @return  The string constructed by replacing each matching subsequence
1233      *          by the replacement string, substituting captured subsequences
1234      *          as needed
1235      */
replaceAll(String replacement)1236     public String replaceAll(String replacement) {
1237         reset();
1238         boolean result = find();
1239         if (result) {
1240             StringBuilder sb = new StringBuilder();
1241             do {
1242                 appendReplacement(sb, replacement);
1243                 result = find();
1244             } while (result);
1245             appendTail(sb);
1246             return sb.toString();
1247         }
1248         return text.toString();
1249     }
1250 
1251     /**
1252      * Replaces every subsequence of the input sequence that matches the
1253      * pattern with the result of applying the given replacer function to the
1254      * match result of this matcher corresponding to that subsequence.
1255      * Exceptions thrown by the function are relayed to the caller.
1256      *
1257      * <p> This method first resets this matcher.  It then scans the input
1258      * sequence looking for matches of the pattern.  Characters that are not
1259      * part of any match are appended directly to the result string; each match
1260      * is replaced in the result by the applying the replacer function that
1261      * returns a replacement string.  Each replacement string may contain
1262      * references to captured subsequences as in the {@link #appendReplacement
1263      * appendReplacement} method.
1264      *
1265      * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1266      * a replacement string may cause the results to be different than if it
1267      * were being treated as a literal replacement string. Dollar signs may be
1268      * treated as references to captured subsequences as described above, and
1269      * backslashes are used to escape literal characters in the replacement
1270      * string.
1271      *
1272      * <p> Given the regular expression {@code dog}, the input
1273      * {@code "zzzdogzzzdogzzz"}, and the function
1274      * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on
1275      * a matcher for that expression would yield the string
1276      * {@code "zzzDOGzzzDOGzzz"}.
1277      *
1278      * <p> Invoking this method changes this matcher's state.  If the matcher
1279      * is to be used in further matching operations then it should first be
1280      * reset.  </p>
1281      *
1282      * <p> The replacer function should not modify this matcher's state during
1283      * replacement.  This method will, on a best-effort basis, throw a
1284      * {@link java.util.ConcurrentModificationException} if such modification is
1285      * detected.
1286      *
1287      * <p> The state of each match result passed to the replacer function is
1288      * guaranteed to be constant only for the duration of the replacer function
1289      * call and only if the replacer function does not modify this matcher's
1290      * state.
1291      *
1292      * @implNote
1293      * This implementation applies the replacer function to this matcher, which
1294      * is an instance of {@code MatchResult}.
1295      *
1296      * @param  replacer
1297      *         The function to be applied to the match result of this matcher
1298      *         that returns a replacement string.
1299      * @return  The string constructed by replacing each matching subsequence
1300      *          with the result of applying the replacer function to that
1301      *          matched subsequence, substituting captured subsequences as
1302      *          needed.
1303      * @throws NullPointerException if the replacer function is null
1304      * @throws ConcurrentModificationException if it is detected, on a
1305      *         best-effort basis, that the replacer function modified this
1306      *         matcher's state
1307      * @since 9
1308      */
replaceAll(Function<MatchResult, String> replacer)1309     public String replaceAll(Function<MatchResult, String> replacer) {
1310         Objects.requireNonNull(replacer);
1311         reset();
1312         boolean result = find();
1313         if (result) {
1314             StringBuilder sb = new StringBuilder();
1315             do {
1316                 int ec = modCount;
1317                 String replacement =  replacer.apply(this);
1318                 if (ec != modCount)
1319                     throw new ConcurrentModificationException();
1320                 appendReplacement(sb, replacement);
1321                 result = find();
1322             } while (result);
1323             appendTail(sb);
1324             return sb.toString();
1325         }
1326         return text.toString();
1327     }
1328 
1329     /**
1330      * Returns a stream of match results for each subsequence of the input
1331      * sequence that matches the pattern.  The match results occur in the
1332      * same order as the matching subsequences in the input sequence.
1333      *
1334      * <p> Each match result is produced as if by {@link #toMatchResult()}.
1335      *
1336      * <p> This method does not reset this matcher.  Matching starts on
1337      * initiation of the terminal stream operation either at the beginning of
1338      * this matcher's region, or, if the matcher has not since been reset, at
1339      * the first character not matched by a previous match.
1340      *
1341      * <p> If the matcher is to be used for further matching operations after
1342      * the terminal stream operation completes then it should be first reset.
1343      *
1344      * <p> This matcher's state should not be modified during execution of the
1345      * returned stream's pipeline.  The returned stream's source
1346      * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort
1347      * basis, throw a {@link java.util.ConcurrentModificationException} if such
1348      * modification is detected.
1349      *
1350      * @return a sequential stream of match results.
1351      * @since 9
1352      */
results()1353     public Stream<MatchResult> results() {
1354         class MatchResultIterator implements Iterator<MatchResult> {
1355             // -ve for call to find, 0 for not found, 1 for found
1356             int state = -1;
1357             // State for concurrent modification checking
1358             // -1 for uninitialized
1359             int expectedCount = -1;
1360             // The input sequence as a string, set once only after first find
1361             // Avoids repeated conversion from CharSequence for each match
1362             String textAsString;
1363 
1364             @Override
1365             public MatchResult next() {
1366                 if (expectedCount >= 0 && expectedCount != modCount)
1367                     throw new ConcurrentModificationException();
1368 
1369                 if (!hasNext())
1370                     throw new NoSuchElementException();
1371 
1372                 state = -1;
1373                 return toMatchResult(textAsString);
1374             }
1375 
1376             @Override
1377             public boolean hasNext() {
1378                 if (state >= 0)
1379                     return state == 1;
1380 
1381                 // Defer throwing ConcurrentModificationException to when next
1382                 // or forEachRemaining is called.  The is consistent with other
1383                 // fail-fast implementations.
1384                 if (expectedCount >= 0 && expectedCount != modCount)
1385                     return true;
1386 
1387                 boolean found = find();
1388                 // Capture the input sequence as a string on first find
1389                 if (found && state < 0)
1390                     textAsString = text.toString();
1391                 state = found ? 1 : 0;
1392                 expectedCount = modCount;
1393                 return found;
1394             }
1395 
1396             @Override
1397             public void forEachRemaining(Consumer<? super MatchResult> action) {
1398                 if (expectedCount >= 0 && expectedCount != modCount)
1399                     throw new ConcurrentModificationException();
1400 
1401                 int s = state;
1402                 if (s == 0)
1403                     return;
1404 
1405                 // Set state to report no more elements on further operations
1406                 state = 0;
1407                 expectedCount = -1;
1408 
1409                 // Perform a first find if required
1410                 if (s < 0 && !find())
1411                     return;
1412 
1413                 // Capture the input sequence as a string on first find
1414                 textAsString = text.toString();
1415 
1416                 do {
1417                     int ec = modCount;
1418                     action.accept(toMatchResult(textAsString));
1419                     if (ec != modCount)
1420                         throw new ConcurrentModificationException();
1421                 } while (find());
1422             }
1423         }
1424         return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
1425                 new MatchResultIterator(), Spliterator.ORDERED | Spliterator.NONNULL), false);
1426     }
1427 
1428     /**
1429      * Replaces the first subsequence of the input sequence that matches the
1430      * pattern with the given replacement string.
1431      *
1432      * <p> This method first resets this matcher.  It then scans the input
1433      * sequence looking for a match of the pattern.  Characters that are not
1434      * part of the match are appended directly to the result string; the match
1435      * is replaced in the result by the replacement string.  The replacement
1436      * string may contain references to captured subsequences as in the {@link
1437      * #appendReplacement appendReplacement} method.
1438      *
1439      * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1440      * the replacement string may cause the results to be different than if it
1441      * were being treated as a literal replacement string. Dollar signs may be
1442      * treated as references to captured subsequences as described above, and
1443      * backslashes are used to escape literal characters in the replacement
1444      * string.
1445      *
1446      * <p> Given the regular expression {@code dog}, the input
1447      * {@code "zzzdogzzzdogzzz"}, and the replacement string
1448      * {@code "cat"}, an invocation of this method on a matcher for that
1449      * expression would yield the string {@code "zzzcatzzzdogzzz"}.  </p>
1450      *
1451      * <p> Invoking this method changes this matcher's state.  If the matcher
1452      * is to be used in further matching operations then it should first be
1453      * reset.  </p>
1454      *
1455      * @param  replacement
1456      *         The replacement string
1457      * @return  The string constructed by replacing the first matching
1458      *          subsequence by the replacement string, substituting captured
1459      *          subsequences as needed
1460      */
replaceFirst(String replacement)1461     public String replaceFirst(String replacement) {
1462         if (replacement == null)
1463             throw new NullPointerException("replacement");
1464         reset();
1465         if (!find())
1466             return text.toString();
1467         StringBuilder sb = new StringBuilder();
1468         appendReplacement(sb, replacement);
1469         appendTail(sb);
1470         return sb.toString();
1471     }
1472 
1473     /**
1474      * Replaces the first subsequence of the input sequence that matches the
1475      * pattern with the result of applying the given replacer function to the
1476      * match result of this matcher corresponding to that subsequence.
1477      * Exceptions thrown by the replace function are relayed to the caller.
1478      *
1479      * <p> This method first resets this matcher.  It then scans the input
1480      * sequence looking for a match of the pattern.  Characters that are not
1481      * part of the match are appended directly to the result string; the match
1482      * is replaced in the result by the applying the replacer function that
1483      * returns a replacement string.  The replacement string may contain
1484      * references to captured subsequences as in the {@link #appendReplacement
1485      * appendReplacement} method.
1486      *
1487      * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1488      * the replacement string may cause the results to be different than if it
1489      * were being treated as a literal replacement string. Dollar signs may be
1490      * treated as references to captured subsequences as described above, and
1491      * backslashes are used to escape literal characters in the replacement
1492      * string.
1493      *
1494      * <p> Given the regular expression {@code dog}, the input
1495      * {@code "zzzdogzzzdogzzz"}, and the function
1496      * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on
1497      * a matcher for that expression would yield the string
1498      * {@code "zzzDOGzzzdogzzz"}.
1499      *
1500      * <p> Invoking this method changes this matcher's state.  If the matcher
1501      * is to be used in further matching operations then it should first be
1502      * reset.
1503      *
1504      * <p> The replacer function should not modify this matcher's state during
1505      * replacement.  This method will, on a best-effort basis, throw a
1506      * {@link java.util.ConcurrentModificationException} if such modification is
1507      * detected.
1508      *
1509      * <p> The state of the match result passed to the replacer function is
1510      * guaranteed to be constant only for the duration of the replacer function
1511      * call and only if the replacer function does not modify this matcher's
1512      * state.
1513      *
1514      * @implNote
1515      * This implementation applies the replacer function to this matcher, which
1516      * is an instance of {@code MatchResult}.
1517      *
1518      * @param  replacer
1519      *         The function to be applied to the match result of this matcher
1520      *         that returns a replacement string.
1521      * @return  The string constructed by replacing the first matching
1522      *          subsequence with the result of applying the replacer function to
1523      *          the matched subsequence, substituting captured subsequences as
1524      *          needed.
1525      * @throws NullPointerException if the replacer function is null
1526      * @throws ConcurrentModificationException if it is detected, on a
1527      *         best-effort basis, that the replacer function modified this
1528      *         matcher's state
1529      * @since 9
1530      */
replaceFirst(Function<MatchResult, String> replacer)1531     public String replaceFirst(Function<MatchResult, String> replacer) {
1532         Objects.requireNonNull(replacer);
1533         reset();
1534         if (!find())
1535             return text.toString();
1536         StringBuilder sb = new StringBuilder();
1537         int ec = modCount;
1538         String replacement = replacer.apply(this);
1539         if (ec != modCount)
1540             throw new ConcurrentModificationException();
1541         appendReplacement(sb, replacement);
1542         appendTail(sb);
1543         return sb.toString();
1544     }
1545 
1546     /**
1547      * Sets the limits of this matcher's region. The region is the part of the
1548      * input sequence that will be searched to find a match. Invoking this
1549      * method resets the matcher, and then sets the region to start at the
1550      * index specified by the {@code start} parameter and end at the
1551      * index specified by the {@code end} parameter.
1552      *
1553      * <p>Depending on the transparency and anchoring being used (see
1554      * {@link #useTransparentBounds(boolean) useTransparentBounds} and
1555      * {@link #useAnchoringBounds(boolean) useAnchoringBounds}), certain
1556      * constructs such as anchors may behave differently at or around the
1557      * boundaries of the region.
1558      *
1559      * @param  start
1560      *         The index to start searching at (inclusive)
1561      * @param  end
1562      *         The index to end searching at (exclusive)
1563      * @throws  IndexOutOfBoundsException
1564      *          If start or end is less than zero, if
1565      *          start is greater than the length of the input sequence, if
1566      *          end is greater than the length of the input sequence, or if
1567      *          start is greater than end.
1568      * @return  this matcher
1569      * @since 1.5
1570      */
region(int start, int end)1571     public Matcher region(int start, int end) {
1572         return reset(originalInput, start, end);
1573     }
1574 
1575     /**
1576      * Reports the start index of this matcher's region. The
1577      * searches this matcher conducts are limited to finding matches
1578      * within {@link #regionStart() regionStart} (inclusive) and
1579      * {@link #regionEnd() regionEnd} (exclusive).
1580      *
1581      * @return  The starting point of this matcher's region
1582      * @since 1.5
1583      */
regionStart()1584     public int regionStart() {
1585         return from;
1586     }
1587 
1588     /**
1589      * Reports the end index (exclusive) of this matcher's region.
1590      * The searches this matcher conducts are limited to finding matches
1591      * within {@link #regionStart() regionStart} (inclusive) and
1592      * {@link #regionEnd() regionEnd} (exclusive).
1593      *
1594      * @return  the ending point of this matcher's region
1595      * @since 1.5
1596      */
regionEnd()1597     public int regionEnd() {
1598         return to;
1599     }
1600 
1601     /**
1602      * Queries the transparency of region bounds for this matcher.
1603      *
1604      * <p> This method returns {@code true} if this matcher uses
1605      * <i>transparent</i> bounds, {@code false} if it uses <i>opaque</i>
1606      * bounds.
1607      *
1608      * <p> See {@link #useTransparentBounds(boolean) useTransparentBounds} for a
1609      * description of transparent and opaque bounds.
1610      *
1611      * <p> By default, a matcher uses opaque region boundaries.
1612      *
1613      * @return {@code true} iff this matcher is using transparent bounds,
1614      *         {@code false} otherwise.
1615      * @see java.util.regex.Matcher#useTransparentBounds(boolean)
1616      * @since 1.5
1617      */
hasTransparentBounds()1618     public boolean hasTransparentBounds() {
1619         return transparentBounds;
1620     }
1621 
1622     /**
1623      * Sets the transparency of region bounds for this matcher.
1624      *
1625      * <p> Invoking this method with an argument of {@code true} will set this
1626      * matcher to use <i>transparent</i> bounds. If the boolean
1627      * argument is {@code false}, then <i>opaque</i> bounds will be used.
1628      *
1629      * <p> Using transparent bounds, the boundaries of this
1630      * matcher's region are transparent to lookahead, lookbehind,
1631      * and boundary matching constructs. Those constructs can see beyond the
1632      * boundaries of the region to see if a match is appropriate.
1633      *
1634      * <p> Using opaque bounds, the boundaries of this matcher's
1635      * region are opaque to lookahead, lookbehind, and boundary matching
1636      * constructs that may try to see beyond them. Those constructs cannot
1637      * look past the boundaries so they will fail to match anything outside
1638      * of the region.
1639      *
1640      * <p> By default, a matcher uses opaque bounds.
1641      *
1642      * @param  b a boolean indicating whether to use opaque or transparent
1643      *         regions
1644      * @return this matcher
1645      * @see java.util.regex.Matcher#hasTransparentBounds
1646      * @since 1.5
1647      */
useTransparentBounds(boolean b)1648     public Matcher useTransparentBounds(boolean b) {
1649         synchronized (this) {
1650             transparentBounds = b;
1651             nativeMatcher.useTransparentBounds(b);
1652         }
1653         return this;
1654     }
1655 
1656     /**
1657      * Queries the anchoring of region bounds for this matcher.
1658      *
1659      * <p> This method returns {@code true} if this matcher uses
1660      * <i>anchoring</i> bounds, {@code false} otherwise.
1661      *
1662      * <p> See {@link #useAnchoringBounds(boolean) useAnchoringBounds} for a
1663      * description of anchoring bounds.
1664      *
1665      * <p> By default, a matcher uses anchoring region boundaries.
1666      *
1667      * @return {@code true} iff this matcher is using anchoring bounds,
1668      *         {@code false} otherwise.
1669      * @see java.util.regex.Matcher#useAnchoringBounds(boolean)
1670      * @since 1.5
1671      */
hasAnchoringBounds()1672     public boolean hasAnchoringBounds() {
1673         return anchoringBounds;
1674     }
1675 
1676     /**
1677      * Sets the anchoring of region bounds for this matcher.
1678      *
1679      * <p> Invoking this method with an argument of {@code true} will set this
1680      * matcher to use <i>anchoring</i> bounds. If the boolean
1681      * argument is {@code false}, then <i>non-anchoring</i> bounds will be
1682      * used.
1683      *
1684      * <p> Using anchoring bounds, the boundaries of this
1685      * matcher's region match anchors such as ^ and $.
1686      *
1687      * <p> Without anchoring bounds, the boundaries of this
1688      * matcher's region will not match anchors such as ^ and $.
1689      *
1690      * <p> By default, a matcher uses anchoring region boundaries.
1691      *
1692      * @param  b a boolean indicating whether or not to use anchoring bounds.
1693      * @return this matcher
1694      * @see java.util.regex.Matcher#hasAnchoringBounds
1695      * @since 1.5
1696      */
useAnchoringBounds(boolean b)1697     public Matcher useAnchoringBounds(boolean b) {
1698         synchronized (this) {
1699             anchoringBounds = b;
1700             nativeMatcher.useAnchoringBounds(b);
1701         }
1702         return this;
1703     }
1704 
1705     /**
1706      * <p>Returns the string representation of this matcher. The
1707      * string representation of a {@code Matcher} contains information
1708      * that may be useful for debugging. The exact format is unspecified.
1709      *
1710      * @return  The string representation of this matcher
1711      * @since 1.5
1712      */
toString()1713     public String toString() {
1714         StringBuilder sb = new StringBuilder();
1715         sb.append("java.util.regex.Matcher")
1716                 .append("[pattern=").append(pattern())
1717                 .append(" region=")
1718                 .append(regionStart()).append(',').append(regionEnd())
1719                 .append(" lastmatch=");
1720         // Android-changed: Android has no first field.
1721         // if ((first >= 0) && (group() != null)) {
1722         if (matchFound && (group() != null)) {
1723             sb.append(group());
1724         }
1725         sb.append(']');
1726         return sb.toString();
1727     }
1728 
1729     /**
1730      * <p>Returns true if the end of input was hit by the search engine in
1731      * the last match operation performed by this matcher.
1732      *
1733      * <p>When this method returns true, then it is possible that more input
1734      * would have changed the result of the last search.
1735      *
1736      * @return  true iff the end of input was hit in the last match; false
1737      *          otherwise
1738      * @since 1.5
1739      */
hitEnd()1740     public boolean hitEnd() {
1741         synchronized (this) {
1742             return nativeMatcher.hitEnd();
1743         }
1744     }
1745 
1746     /**
1747      * <p>Returns true if more input could change a positive match into a
1748      * negative one.
1749      *
1750      * <p>If this method returns true, and a match was found, then more
1751      * input could cause the match to be lost. If this method returns false
1752      * and a match was found, then more input might change the match but the
1753      * match won't be lost. If a match was not found, then requireEnd has no
1754      * meaning.
1755      *
1756      * @return  true iff more input could change a positive match into a
1757      *          negative one.
1758      * @since 1.5
1759      */
requireEnd()1760     public boolean requireEnd() {
1761         synchronized (this) {
1762             return nativeMatcher.requireEnd();
1763         }
1764     }
1765 
1766     /**
1767      * Returns the end index of the text.
1768      *
1769      * @return the index after the last character in the text
1770      */
getTextLength()1771     int getTextLength() {
1772         return text.length();
1773     }
1774 
1775     /**
1776      * Generates a String from this matcher's input in the specified range.
1777      *
1778      * @param  beginIndex   the beginning index, inclusive
1779      * @param  endIndex     the ending index, exclusive
1780      * @return A String generated from this matcher's input
1781      */
getSubSequence(int beginIndex, int endIndex)1782     CharSequence getSubSequence(int beginIndex, int endIndex) {
1783         return text.subSequence(beginIndex, endIndex);
1784     }
1785 
1786     /**
1787      * Resets the Matcher. A new input sequence and a new region can be
1788      * specified. Results of a previous find get lost. The next attempt to find
1789      * an occurrence of the Pattern in the string will start at the beginning of
1790      * the region. This is the internal version of reset() to which the several
1791      * public versions delegate.
1792      *
1793      * @param input
1794      *            the input sequence.
1795      * @param start
1796      *            the start of the region.
1797      * @param end
1798      *            the end of the region.
1799      *
1800      * @return the matcher itself.
1801      */
reset(CharSequence input, int start, int end)1802     private Matcher reset(CharSequence input, int start, int end) {
1803         if (input == null) {
1804             throw new IllegalArgumentException("input == null");
1805         }
1806 
1807         if (start < 0 || end < 0 || start > input.length() || end > input.length() || start > end) {
1808             throw new IndexOutOfBoundsException();
1809         }
1810 
1811         this.originalInput = input;
1812         this.text = input.toString();
1813         this.from = start;
1814         this.to = end;
1815         resetForInput();
1816 
1817         matchFound = false;
1818         appendPos = 0;
1819         modCount++;
1820 
1821         return this;
1822     }
1823 
resetForInput()1824     private void resetForInput() {
1825         synchronized (this) {
1826             nativeMatcher.setInput(text, from, to);
1827             nativeMatcher.useAnchoringBounds(anchoringBounds);
1828             nativeMatcher.useTransparentBounds(transparentBounds);
1829         }
1830     }
1831 
1832     /**
1833      * Makes sure that a successful match has been made. Is invoked internally
1834      * from various places in the class.
1835      *
1836      * @throws IllegalStateException
1837      *             if no successful match has been made.
1838      */
ensureMatch()1839     private void ensureMatch() {
1840         if (!matchFound) {
1841             throw new IllegalStateException("No successful match so far");
1842         }
1843     }
1844 
getMatchedGroupIndex(String name)1845     private int getMatchedGroupIndex(String name) {
1846         ensureMatch();
1847         int result = nativeMatcher.getMatchedGroupIndex(name);
1848         if (result < 0) {
1849             throw new IllegalArgumentException("No capturing group in the pattern " +
1850                                                "with the name " + name);
1851         }
1852         return result;
1853     }
1854 
1855 }