• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  * Copyright (c) 1999, 2009, Oracle and/or its affiliates. All rights reserved.
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This code is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 only, as
8  * published by the Free Software Foundation.  Oracle designates this
9  * particular file as subject to the "Classpath" exception as provided
10  * by Oracle in the LICENSE file that accompanied this code.
11  *
12  * This code is distributed in the hope that it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15  * version 2 for more details (a copy is included in the LICENSE file that
16  * accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License version
19  * 2 along with this work; if not, write to the Free Software Foundation,
20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
23  * or visit www.oracle.com if you need additional information or have any
24  * questions.
25  */
26 
27 package java.util.regex;
28 
29 import libcore.util.NativeAllocationRegistry;
30 
31 /**
32  * An engine that performs match operations on a {@link java.lang.CharSequence
33  * </code>character sequence<code>} by interpreting a {@link Pattern}.
34  *
35  * <p> A matcher is created from a pattern by invoking the pattern's {@link
36  * Pattern#matcher matcher} method.  Once created, a matcher can be used to
37  * perform three different kinds of match operations:
38  *
39  * <ul>
40  *
41  *   <li><p> The {@link #matches matches} method attempts to match the entire
42  *   input sequence against the pattern.  </p></li>
43  *
44  *   <li><p> The {@link #lookingAt lookingAt} method attempts to match the
45  *   input sequence, starting at the beginning, against the pattern.  </p></li>
46  *
47  *   <li><p> The {@link #find find} method scans the input sequence looking for
48  *   the next subsequence that matches the pattern.  </p></li>
49  *
50  * </ul>
51  *
52  * <p> Each of these methods returns a boolean indicating success or failure.
53  * More information about a successful match can be obtained by querying the
54  * state of the matcher.
55  *
56  * <p> A matcher finds matches in a subset of its input called the
57  * <i>region</i>. By default, the region contains all of the matcher's input.
58  * The region can be modified via the{@link #region region} method and queried
59  * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd}
60  * methods. The way that the region boundaries interact with some pattern
61  * constructs can be changed. See {@link #useAnchoringBounds
62  * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds}
63  * for more details.
64  *
65  * <p> This class also defines methods for replacing matched subsequences with
66  * new strings whose contents can, if desired, be computed from the match
67  * result.  The {@link #appendReplacement appendReplacement} and {@link
68  * #appendTail appendTail} methods can be used in tandem in order to collect
69  * the result into an existing string buffer, or the more convenient {@link
70  * #replaceAll replaceAll} method can be used to create a string in which every
71  * matching subsequence in the input sequence is replaced.
72  *
73  * <p> The explicit state of a matcher includes the start and end indices of
74  * the most recent successful match.  It also includes the start and end
75  * indices of the input subsequence captured by each <a
76  * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total
77  * count of such subsequences.  As a convenience, methods are also provided for
78  * returning these captured subsequences in string form.
79  *
80  * <p> The explicit state of a matcher is initially undefined; attempting to
81  * query any part of it before a successful match will cause an {@link
82  * IllegalStateException} to be thrown.  The explicit state of a matcher is
83  * recomputed by every match operation.
84  *
85  * <p> The implicit state of a matcher includes the input character sequence as
86  * well as the <i>append position</i>, which is initially zero and is updated
87  * by the {@link #appendReplacement appendReplacement} method.
88  *
89  * <p> A matcher may be reset explicitly by invoking its {@link #reset()}
90  * method or, if a new input sequence is desired, its {@link
91  * #reset(java.lang.CharSequence) reset(CharSequence)} method.  Resetting a
92  * matcher discards its explicit state information and sets the append position
93  * to zero.
94  *
95  * <p> Instances of this class are not safe for use by multiple concurrent
96  * threads. </p>
97  *
98  *
99  * @author      Mike McCloskey
100  * @author      Mark Reinhold
101  * @author      JSR-51 Expert Group
102  * @since       1.4
103  * @spec        JSR-51
104  */
105 
106 public final class Matcher implements MatchResult {
107     /**
108      * The Pattern object that created this Matcher.
109      */
110     private Pattern pattern;
111 
112     /**
113      * The address of the native peer.
114      * Uses of this must be manually synchronized to avoid native crashes.
115      */
116     private long address;
117 
118     /**
119      * If non-null, a Runnable that can be used to explicitly deallocate address.
120      */
121     private Runnable nativeFinalizer;
122 
123     private static final NativeAllocationRegistry registry = new NativeAllocationRegistry(
124             Matcher.class.getClassLoader(), getNativeFinalizer(), nativeSize());
125 
126     /**
127      * Holds the input text.
128      */
129     private String input;
130 
131     /**
132      * Holds the start of the region, or 0 if the matching should start at the
133      * beginning of the text.
134      */
135     private int regionStart;
136 
137     /**
138      * Holds the end of the region, or input.length() if the matching should
139      * go until the end of the input.
140      */
141     private int regionEnd;
142 
143     /**
144      * Holds the position where the next append operation will take place.
145      */
146     private int appendPos;
147 
148     /**
149      * Reflects whether a match has been found during the most recent find
150      * operation.
151      */
152     private boolean matchFound;
153 
154     /**
155      * Holds the offsets for the most recent match.
156      */
157     private int[] matchOffsets;
158 
159     /**
160      * Reflects whether the bounds of the region are anchoring.
161      */
162     private boolean anchoringBounds = true;
163 
164     /**
165      * Reflects whether the bounds of the region are transparent.
166      */
167     private boolean transparentBounds;
168 
169     /**
170      * All matchers have the state used by Pattern during a match.
171      */
Matcher(Pattern parent, CharSequence text)172     Matcher(Pattern parent, CharSequence text) {
173         usePattern(parent);
174         reset(text);
175     }
176 
177     /**
178      * Returns the pattern that is interpreted by this matcher.
179      *
180      * @return  The pattern for which this matcher was created
181      */
pattern()182     public Pattern pattern() {
183         return pattern;
184     }
185 
186     /**
187      * Returns the match state of this matcher as a {@link MatchResult}.
188      * The result is unaffected by subsequent operations performed upon this
189      * matcher.
190      *
191      * @return  a <code>MatchResult</code> with the state of this matcher
192      * @since 1.5
193      */
toMatchResult()194     public MatchResult toMatchResult() {
195         ensureMatch();
196         return new OffsetBasedMatchResult(input, matchOffsets);
197     }
198 
199     /**
200       * Changes the <tt>Pattern</tt> that this <tt>Matcher</tt> uses to
201       * find matches with.
202       *
203       * <p> This method causes this matcher to lose information
204       * about the groups of the last match that occurred. The
205       * matcher's position in the input is maintained and its
206       * last append position is unaffected.</p>
207       *
208       * @param  newPattern
209       *         The new pattern used by this matcher
210       * @return  This matcher
211       * @throws  IllegalArgumentException
212       *          If newPattern is <tt>null</tt>
213       * @since 1.5
214       */
usePattern(Pattern newPattern)215     public Matcher usePattern(Pattern newPattern) {
216         if (newPattern == null) {
217             throw new IllegalArgumentException("newPattern == null");
218         }
219 
220         this.pattern = newPattern;
221 
222         synchronized (this) {
223             if (nativeFinalizer != null) {
224                 nativeFinalizer.run();
225                 address = 0; // In case openImpl throws.
226                 nativeFinalizer = null;
227             }
228             address = openImpl(pattern.address);
229             nativeFinalizer = registry.registerNativeAllocation(this, address);
230         }
231 
232         if (input != null) {
233             resetForInput();
234         }
235 
236         matchOffsets = new int[(groupCount() + 1) * 2];
237         matchFound = false;
238         return this;
239     }
240 
241     /**
242      * Returns the offset after the last character matched.  </p>
243      *
244      * @return  The offset after the last character matched
245      *
246      * @throws  IllegalStateException
247      *          If no match has yet been attempted,
248      *          or if the previous match operation failed
249      */
end()250     public int end() {
251         return end(0);
252     }
253 
254     /**
255      * Returns the offset after the last character of the subsequence
256      * captured by the given group during the previous match operation.
257      *
258      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
259      * to right, starting at one.  Group zero denotes the entire pattern, so
260      * the expression <i>m.</i><tt>end(0)</tt> is equivalent to
261      * <i>m.</i><tt>end()</tt>.  </p>
262      *
263      * @param  group
264      *         The index of a capturing group in this matcher's pattern
265      *
266      * @return  The offset after the last character captured by the group,
267      *          or <tt>-1</tt> if the match was successful
268      *          but the group itself did not match anything
269      *
270      * @throws  IllegalStateException
271      *          If no match has yet been attempted,
272      *          or if the previous match operation failed
273      *
274      * @throws  IndexOutOfBoundsException
275      *          If there is no capturing group in the pattern
276      *          with the given index
277      */
end(int group)278     public int end(int group) {
279         ensureMatch();
280         return matchOffsets[(group * 2) + 1];
281     }
282 
283     /**
284      * Returns the input subsequence matched by the previous match.
285      *
286      * <p> For a matcher <i>m</i> with input sequence <i>s</i>,
287      * the expressions <i>m.</i><tt>group()</tt> and
288      * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(),</tt>&nbsp;<i>m.</i><tt>end())</tt>
289      * are equivalent.  </p>
290      *
291      * <p> Note that some patterns, for example <tt>a*</tt>, match the empty
292      * string.  This method will return the empty string when the pattern
293      * successfully matches the empty string in the input.  </p>
294      *
295      * @return The (possibly empty) subsequence matched by the previous match,
296      *         in string form
297      *
298      * @throws  IllegalStateException
299      *          If no match has yet been attempted,
300      *          or if the previous match operation failed
301      */
group()302     public String group() {
303         return group(0);
304     }
305 
306     /**
307      * Returns the input subsequence captured by the given group during the
308      * previous match operation.
309      *
310      * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index
311      * <i>g</i>, the expressions <i>m.</i><tt>group(</tt><i>g</i><tt>)</tt> and
312      * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(</tt><i>g</i><tt>),</tt>&nbsp;<i>m.</i><tt>end(</tt><i>g</i><tt>))</tt>
313      * are equivalent.  </p>
314      *
315      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
316      * to right, starting at one.  Group zero denotes the entire pattern, so
317      * the expression <tt>m.group(0)</tt> is equivalent to <tt>m.group()</tt>.
318      * </p>
319      *
320      * <p> If the match was successful but the group specified failed to match
321      * any part of the input sequence, then <tt>null</tt> is returned. Note
322      * that some groups, for example <tt>(a*)</tt>, match the empty string.
323      * This method will return the empty string when such a group successfully
324      * matches the empty string in the input.  </p>
325      *
326      * @param  group
327      *         The index of a capturing group in this matcher's pattern
328      *
329      * @return  The (possibly empty) subsequence captured by the group
330      *          during the previous match, or <tt>null</tt> if the group
331      *          failed to match part of the input
332      *
333      * @throws  IllegalStateException
334      *          If no match has yet been attempted,
335      *          or if the previous match operation failed
336      *
337      * @throws  IndexOutOfBoundsException
338      *          If there is no capturing group in the pattern
339      *          with the given index
340      */
group(int group)341     public String group(int group) {
342         ensureMatch();
343         int from = matchOffsets[group * 2];
344         int to = matchOffsets[(group * 2) + 1];
345         if (from == -1 || to == -1) {
346             return null;
347         } else {
348             return input.substring(from, to);
349         }
350     }
351 
352     /**
353      * Returns the input subsequence captured by the given
354      * <a href="Pattern.html#groupname">named-capturing group</a> during the previous
355      * match operation.
356      *
357      * <p> If the match was successful but the group specified failed to match
358      * any part of the input sequence, then <tt>null</tt> is returned. Note
359      * that some groups, for example <tt>(a*)</tt>, match the empty string.
360      * This method will return the empty string when such a group successfully
361      * matches the empty string in the input.  </p>
362      *
363      * @param  name
364      *         The name of a named-capturing group in this matcher's pattern
365      *
366      * @return  The (possibly empty) subsequence captured by the named group
367      *          during the previous match, or <tt>null</tt> if the group
368      *          failed to match part of the input
369      *
370      * @throws  IllegalStateException
371      *          If no match has yet been attempted,
372      *          or if the previous match operation failed
373      *
374      * @throws  IllegalArgumentException
375      *          If there is no capturing group in the pattern
376      *          with the given name
377      * @since 1.7
378      *
379      * @hide
380      */
group(String name)381     public String group(String name) {
382         // TODO: Implement this - ICU55 supports named regex groups.
383         throw new UnsupportedOperationException();
384     }
385 
386     /**
387      * Returns the number of capturing groups in this matcher's pattern.
388      *
389      * <p> Group zero denotes the entire pattern by convention. It is not
390      * included in this count.
391      *
392      * <p> Any non-negative integer smaller than or equal to the value
393      * returned by this method is guaranteed to be a valid group index for
394      * this matcher.  </p>
395      *
396      * @return The number of capturing groups in this matcher's pattern
397      */
groupCount()398     public int groupCount() {
399         synchronized (this) {
400             return groupCountImpl(address);
401         }
402     }
403 
404     /**
405      * Attempts to match the entire region against the pattern.
406      *
407      * <p> If the match succeeds then more information can be obtained via the
408      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
409      *
410      * @return  <tt>true</tt> if, and only if, the entire region sequence
411      *          matches this matcher's pattern
412      */
matches()413     public boolean matches() {
414         synchronized (this) {
415             matchFound = matchesImpl(address, input, matchOffsets);
416         }
417         return matchFound;
418     }
419 
420     /**
421      * Attempts to find the next subsequence of the input sequence that matches
422      * the pattern.
423      *
424      * <p> This method starts at the beginning of this matcher's region, or, if
425      * a previous invocation of the method was successful and the matcher has
426      * not since been reset, at the first character not matched by the previous
427      * match.
428      *
429      * <p> If the match succeeds then more information can be obtained via the
430      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
431      *
432      * @return  <tt>true</tt> if, and only if, a subsequence of the input
433      *          sequence matches this matcher's pattern
434      */
find()435     public boolean find() {
436         synchronized (this) {
437             matchFound = findNextImpl(address, input, matchOffsets);
438         }
439         return matchFound;
440     }
441 
442     /**
443      * Resets this matcher and then attempts to find the next subsequence of
444      * the input sequence that matches the pattern, starting at the specified
445      * index.
446      *
447      * <p> If the match succeeds then more information can be obtained via the
448      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods, and subsequent
449      * invocations of the {@link #find()} method will start at the first
450      * character not matched by this match.  </p>
451      *
452      * @throws  IndexOutOfBoundsException
453      *          If start is less than zero or if start is greater than the
454      *          length of the input sequence.
455      *
456      * @return  <tt>true</tt> if, and only if, a subsequence of the input
457      *          sequence starting at the given index matches this matcher's
458      *          pattern
459      */
find(int start)460     public boolean find(int start) {
461         if (start < 0 || start > input.length()) {
462             throw new IndexOutOfBoundsException("start=" + start + "; length=" + input.length());
463         }
464 
465         synchronized (this) {
466             matchFound = findImpl(address, input, start, matchOffsets);
467         }
468         return matchFound;
469     }
470 
471     /**
472      * Attempts to match the input sequence, starting at the beginning of the
473      * region, against the pattern.
474      *
475      * <p> Like the {@link #matches matches} method, this method always starts
476      * at the beginning of the region; unlike that method, it does not
477      * require that the entire region be matched.
478      *
479      * <p> If the match succeeds then more information can be obtained via the
480      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
481      *
482      * @return  <tt>true</tt> if, and only if, a prefix of the input
483      *          sequence matches this matcher's pattern
484      */
lookingAt()485     public boolean lookingAt() {
486         synchronized (this) {
487             matchFound = lookingAtImpl(address, input, matchOffsets);
488         }
489         return matchFound;
490     }
491 
492     /**
493      * Returns a literal replacement <code>String</code> for the specified
494      * <code>String</code>.
495      *
496      * This method produces a <code>String</code> that will work
497      * as a literal replacement <code>s</code> in the
498      * <code>appendReplacement</code> method of the {@link Matcher} class.
499      * The <code>String</code> produced will match the sequence of characters
500      * in <code>s</code> treated as a literal sequence. Slashes ('\') and
501      * dollar signs ('$') will be given no special meaning.
502      *
503      * @param  s The string to be literalized
504      * @return  A literal string replacement
505      * @since 1.5
506      */
quoteReplacement(String s)507     public static String quoteReplacement(String s) {
508         if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1))
509             return s;
510         StringBuilder sb = new StringBuilder();
511         for (int i=0; i<s.length(); i++) {
512             char c = s.charAt(i);
513             if (c == '\\' || c == '$') {
514                 sb.append('\\');
515             }
516             sb.append(c);
517         }
518         return sb.toString();
519     }
520 
521     /**
522      * Implements a non-terminal append-and-replace step.
523      *
524      * <p> This method performs the following actions: </p>
525      *
526      * <ol>
527      *
528      *   <li><p> It reads characters from the input sequence, starting at the
529      *   append position, and appends them to the given string buffer.  It
530      *   stops after reading the last character preceding the previous match,
531      *   that is, the character at index {@link
532      *   #start()}&nbsp;<tt>-</tt>&nbsp;<tt>1</tt>.  </p></li>
533      *
534      *   <li><p> It appends the given replacement string to the string buffer.
535      *   </p></li>
536      *
537      *   <li><p> It sets the append position of this matcher to the index of
538      *   the last character matched, plus one, that is, to {@link #end()}.
539      *   </p></li>
540      *
541      * </ol>
542      *
543      * <p> The replacement string may contain references to subsequences
544      * captured during the previous match: Each occurrence of
545      * <tt>$</tt><i>g</i> will be replaced by the result of evaluating the corresponding
546      * {@link #group(int) group(g)</tt>} respectively. For  <tt>$</tt><i>g</i><tt></tt>,
547      * the first number after the <tt>$</tt> is always treated as part of
548      * the group reference. Subsequent numbers are incorporated into g if
549      * they would form a legal group reference. Only the numerals '0'
550      * through '9' are considered as potential components of the group
551      * reference. If the second group matched the string <tt>"foo"</tt>, for
552      * example, then passing the replacement string <tt>"$2bar"</tt> would
553      * cause <tt>"foobar"</tt> to be appended to the string buffer. A dollar
554      * sign (<tt>$</tt>) may be included as a literal in the replacement
555      * string by preceding it with a backslash (<tt>\$</tt>).
556      *
557      * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
558      * the replacement string may cause the results to be different than if it
559      * were being treated as a literal replacement string. Dollar signs may be
560      * treated as references to captured subsequences as described above, and
561      * backslashes are used to escape literal characters in the replacement
562      * string.
563      *
564      * <p> This method is intended to be used in a loop together with the
565      * {@link #appendTail appendTail} and {@link #find find} methods.  The
566      * following code, for example, writes <tt>one dog two dogs in the
567      * yard</tt> to the standard-output stream: </p>
568      *
569      * <blockquote><pre>
570      * Pattern p = Pattern.compile("cat");
571      * Matcher m = p.matcher("one cat two cats in the yard");
572      * StringBuffer sb = new StringBuffer();
573      * while (m.find()) {
574      *     m.appendReplacement(sb, "dog");
575      * }
576      * m.appendTail(sb);
577      * System.out.println(sb.toString());</pre></blockquote>
578      *
579      * @param  sb
580      *         The target string buffer
581      *
582      * @param  replacement
583      *         The replacement string
584      *
585      * @return  This matcher
586      *
587      * @throws  IllegalStateException
588      *          If no match has yet been attempted,
589      *          or if the previous match operation failed
590      *
591      * @throws  IllegalArgumentException
592      *          If the replacement string refers to a named-capturing
593      *          group that does not exist in the pattern
594      *
595      * @throws  IndexOutOfBoundsException
596      *          If the replacement string refers to a capturing group
597      *          that does not exist in the pattern
598      */
appendReplacement(StringBuffer sb, String replacement)599     public Matcher appendReplacement(StringBuffer sb, String replacement) {
600         sb.append(input.substring(appendPos, start()));
601         appendEvaluated(sb, replacement);
602         appendPos = end();
603 
604         return this;
605     }
606 
607     /**
608      * Internal helper method to append a given string to a given string buffer.
609      * If the string contains any references to groups, these are replaced by
610      * the corresponding group's contents.
611      *
612      * @param buffer the string buffer.
613      * @param s the string to append.
614      */
appendEvaluated(StringBuffer buffer, String s)615     private void appendEvaluated(StringBuffer buffer, String s) {
616         boolean escape = false;
617         boolean dollar = false;
618 
619         for (int i = 0; i < s.length(); i++) {
620             char c = s.charAt(i);
621             if (c == '\\' && !escape) {
622                 escape = true;
623             } else if (c == '$' && !escape) {
624                 dollar = true;
625             } else if (c >= '0' && c <= '9' && dollar) {
626                 buffer.append(group(c - '0'));
627                 dollar = false;
628             } else {
629                 buffer.append(c);
630                 dollar = false;
631                 escape = false;
632             }
633         }
634 
635         if (escape) {
636             throw new ArrayIndexOutOfBoundsException(s.length());
637         }
638     }
639 
640 
641     /**
642      * Implements a terminal append-and-replace step.
643      *
644      * <p> This method reads characters from the input sequence, starting at
645      * the append position, and appends them to the given string buffer.  It is
646      * intended to be invoked after one or more invocations of the {@link
647      * #appendReplacement appendReplacement} method in order to copy the
648      * remainder of the input sequence.  </p>
649      *
650      * @param  sb
651      *         The target string buffer
652      *
653      * @return  The target string buffer
654      */
appendTail(StringBuffer sb)655     public StringBuffer appendTail(StringBuffer sb) {
656         if (appendPos < regionEnd) {
657             sb.append(input.substring(appendPos, regionEnd));
658         }
659         return sb;
660     }
661 
662     /**
663      * Replaces every subsequence of the input sequence that matches the
664      * pattern with the given replacement string.
665      *
666      * <p> This method first resets this matcher.  It then scans the input
667      * sequence looking for matches of the pattern.  Characters that are not
668      * part of any match are appended directly to the result string; each match
669      * is replaced in the result by the replacement string.  The replacement
670      * string may contain references to captured subsequences as in the {@link
671      * #appendReplacement appendReplacement} method.
672      *
673      * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
674      * the replacement string may cause the results to be different than if it
675      * were being treated as a literal replacement string. Dollar signs may be
676      * treated as references to captured subsequences as described above, and
677      * backslashes are used to escape literal characters in the replacement
678      * string.
679      *
680      * <p> Given the regular expression <tt>a*b</tt>, the input
681      * <tt>"aabfooaabfooabfoob"</tt>, and the replacement string
682      * <tt>"-"</tt>, an invocation of this method on a matcher for that
683      * expression would yield the string <tt>"-foo-foo-foo-"</tt>.
684      *
685      * <p> Invoking this method changes this matcher's state.  If the matcher
686      * is to be used in further matching operations then it should first be
687      * reset.  </p>
688      *
689      * @param  replacement
690      *         The replacement string
691      *
692      * @return  The string constructed by replacing each matching subsequence
693      *          by the replacement string, substituting captured subsequences
694      *          as needed
695      */
replaceAll(String replacement)696     public String replaceAll(String replacement) {
697         reset();
698         StringBuffer buffer = new StringBuffer(input.length());
699         while (find()) {
700             appendReplacement(buffer, replacement);
701         }
702         return appendTail(buffer).toString();
703     }
704 
705     /**
706      * Replaces the first subsequence of the input sequence that matches the
707      * pattern with the given replacement string.
708      *
709      * <p> This method first resets this matcher.  It then scans the input
710      * sequence looking for a match of the pattern.  Characters that are not
711      * part of the match are appended directly to the result string; the match
712      * is replaced in the result by the replacement string.  The replacement
713      * string may contain references to captured subsequences as in the {@link
714      * #appendReplacement appendReplacement} method.
715      *
716      * <p>Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
717      * the replacement string may cause the results to be different than if it
718      * were being treated as a literal replacement string. Dollar signs may be
719      * treated as references to captured subsequences as described above, and
720      * backslashes are used to escape literal characters in the replacement
721      * string.
722      *
723      * <p> Given the regular expression <tt>dog</tt>, the input
724      * <tt>"zzzdogzzzdogzzz"</tt>, and the replacement string
725      * <tt>"cat"</tt>, an invocation of this method on a matcher for that
726      * expression would yield the string <tt>"zzzcatzzzdogzzz"</tt>.  </p>
727      *
728      * <p> Invoking this method changes this matcher's state.  If the matcher
729      * is to be used in further matching operations then it should first be
730      * reset.  </p>
731      *
732      * @param  replacement
733      *         The replacement string
734      * @return  The string constructed by replacing the first matching
735      *          subsequence by the replacement string, substituting captured
736      *          subsequences as needed
737      */
replaceFirst(String replacement)738     public String replaceFirst(String replacement) {
739         reset();
740         StringBuffer buffer = new StringBuffer(input.length());
741         if (find()) {
742             appendReplacement(buffer, replacement);
743         }
744         return appendTail(buffer).toString();
745     }
746 
747     /**
748      * Sets the limits of this matcher's region. The region is the part of the
749      * input sequence that will be searched to find a match. Invoking this
750      * method resets the matcher, and then sets the region to start at the
751      * index specified by the <code>start</code> parameter and end at the
752      * index specified by the <code>end</code> parameter.
753      *
754      * <p>Depending on the transparency and anchoring being used (see
755      * {@link #useTransparentBounds useTransparentBounds} and
756      * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such
757      * as anchors may behave differently at or around the boundaries of the
758      * region.
759      *
760      * @param  start
761      *         The index to start searching at (inclusive)
762      * @param  end
763      *         The index to end searching at (exclusive)
764      * @throws  IndexOutOfBoundsException
765      *          If start or end is less than zero, if
766      *          start is greater than the length of the input sequence, if
767      *          end is greater than the length of the input sequence, or if
768      *          start is greater than end.
769      * @return  this matcher
770      * @since 1.5
771      */
region(int start, int end)772     public Matcher region(int start, int end) {
773         return reset(input, start, end);
774     }
775 
776     /**
777      * Reports the start index of this matcher's region. The
778      * searches this matcher conducts are limited to finding matches
779      * within {@link #regionStart regionStart} (inclusive) and
780      * {@link #regionEnd regionEnd} (exclusive).
781      *
782      * @return  The starting point of this matcher's region
783      * @since 1.5
784      */
regionStart()785     public int regionStart() {
786         return regionStart;
787     }
788 
789     /**
790      * Reports the end index (exclusive) of this matcher's region.
791      * The searches this matcher conducts are limited to finding matches
792      * within {@link #regionStart regionStart} (inclusive) and
793      * {@link #regionEnd regionEnd} (exclusive).
794      *
795      * @return  the ending point of this matcher's region
796      * @since 1.5
797      */
regionEnd()798     public int regionEnd() {
799         return regionEnd;
800     }
801 
802     /**
803      * Queries the transparency of region bounds for this matcher.
804      *
805      * <p> This method returns <tt>true</tt> if this matcher uses
806      * <i>transparent</i> bounds, <tt>false</tt> if it uses <i>opaque</i>
807      * bounds.
808      *
809      * <p> See {@link #useTransparentBounds useTransparentBounds} for a
810      * description of transparent and opaque bounds.
811      *
812      * <p> By default, a matcher uses opaque region boundaries.
813      *
814      * @return <tt>true</tt> iff this matcher is using transparent bounds,
815      *         <tt>false</tt> otherwise.
816      * @see java.util.regex.Matcher#useTransparentBounds(boolean)
817      * @since 1.5
818      */
hasTransparentBounds()819     public boolean hasTransparentBounds() {
820         return transparentBounds;
821     }
822 
823     /**
824      * Sets the transparency of region bounds for this matcher.
825      *
826      * <p> Invoking this method with an argument of <tt>true</tt> will set this
827      * matcher to use <i>transparent</i> bounds. If the boolean
828      * argument is <tt>false</tt>, then <i>opaque</i> bounds will be used.
829      *
830      * <p> Using transparent bounds, the boundaries of this
831      * matcher's region are transparent to lookahead, lookbehind,
832      * and boundary matching constructs. Those constructs can see beyond the
833      * boundaries of the region to see if a match is appropriate.
834      *
835      * <p> Using opaque bounds, the boundaries of this matcher's
836      * region are opaque to lookahead, lookbehind, and boundary matching
837      * constructs that may try to see beyond them. Those constructs cannot
838      * look past the boundaries so they will fail to match anything outside
839      * of the region.
840      *
841      * <p> By default, a matcher uses opaque bounds.
842      *
843      * @param  value a boolean indicating whether to use opaque or transparent
844      *         regions
845      * @return this matcher
846      * @see java.util.regex.Matcher#hasTransparentBounds
847      * @since 1.5
848      */
useTransparentBounds(boolean value)849     public Matcher useTransparentBounds(boolean value) {
850         synchronized (this) {
851             transparentBounds = value;
852             useTransparentBoundsImpl(address, value);
853         }
854         return this;
855     }
856 
857     /**
858      * Queries the anchoring of region bounds for this matcher.
859      *
860      * <p> This method returns <tt>true</tt> if this matcher uses
861      * <i>anchoring</i> bounds, <tt>false</tt> otherwise.
862      *
863      * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a
864      * description of anchoring bounds.
865      *
866      * <p> By default, a matcher uses anchoring region boundaries.
867      *
868      * @return <tt>true</tt> iff this matcher is using anchoring bounds,
869      *         <tt>false</tt> otherwise.
870      * @see java.util.regex.Matcher#useAnchoringBounds(boolean)
871      * @since 1.5
872      */
hasAnchoringBounds()873     public boolean hasAnchoringBounds() {
874         return anchoringBounds;
875     }
876 
877     /**
878      * Sets the anchoring of region bounds for this matcher.
879      *
880      * <p> Invoking this method with an argument of <tt>true</tt> will set this
881      * matcher to use <i>anchoring</i> bounds. If the boolean
882      * argument is <tt>false</tt>, then <i>non-anchoring</i> bounds will be
883      * used.
884      *
885      * <p> Using anchoring bounds, the boundaries of this
886      * matcher's region match anchors such as ^ and $.
887      *
888      * <p> Without anchoring bounds, the boundaries of this
889      * matcher's region will not match anchors such as ^ and $.
890      *
891      * <p> By default, a matcher uses anchoring region boundaries.
892      *
893      * @param  value a boolean indicating whether or not to use anchoring bounds.
894      * @return this matcher
895      * @see java.util.regex.Matcher#hasAnchoringBounds
896      * @since 1.5
897      */
useAnchoringBounds(boolean value)898     public Matcher useAnchoringBounds(boolean value) {
899         synchronized (this) {
900             anchoringBounds = value;
901             useAnchoringBoundsImpl(address, value);
902         }
903         return this;
904     }
905 
906     /**
907      * <p>Returns the string representation of this matcher. The
908      * string representation of a <code>Matcher</code> contains information
909      * that may be useful for debugging. The exact format is unspecified.
910      *
911      * @return  The string representation of this matcher
912      * @since 1.5
913      */
toString()914     public String toString() {
915         StringBuilder sb = new StringBuilder();
916         sb.append("java.util.regex.Matcher");
917         sb.append("[pattern=" + pattern());
918         sb.append(" region=");
919         sb.append(regionStart() + "," + regionEnd());
920         sb.append(" lastmatch=");
921         if (matchFound && (group() != null)) {
922             sb.append(group());
923         }
924         sb.append("]");
925         return sb.toString();
926     }
927 
928     /**
929      * <p>Returns true if the end of input was hit by the search engine in
930      * the last match operation performed by this matcher.
931      *
932      * <p>When this method returns true, then it is possible that more input
933      * would have changed the result of the last search.
934      *
935      * @return  true iff the end of input was hit in the last match; false
936      *          otherwise
937      * @since 1.5
938      */
hitEnd()939     public boolean hitEnd() {
940         synchronized (this) {
941             return hitEndImpl(address);
942         }
943     }
944 
945 
946     /**
947      * <p>Returns true if more input could change a positive match into a
948      * negative one.
949      *
950      * <p>If this method returns true, and a match was found, then more
951      * input could cause the match to be lost. If this method returns false
952      * and a match was found, then more input might change the match but the
953      * match won't be lost. If a match was not found, then requireEnd has no
954      * meaning.
955      *
956      * @return  true iff more input could change a positive match into a
957      *          negative one.
958      * @since 1.5
959      */
requireEnd()960     public boolean requireEnd() {
961         synchronized (this) {
962             return requireEndImpl(address);
963         }
964     }
965 
966     /**
967      * Resets this matcher.
968      *
969      * <p> Resetting a matcher discards all of its explicit state information
970      * and sets its append position to zero. The matcher's region is set to the
971      * default region, which is its entire character sequence. The anchoring
972      * and transparency of this matcher's region boundaries are unaffected.
973      *
974      * @return  This matcher
975      */
reset()976     public Matcher reset() {
977         return reset(input, 0, input.length());
978     }
979 
980     /**
981      * Resets this matcher with a new input sequence.
982      *
983      * <p> Resetting a matcher discards all of its explicit state information
984      * and sets its append position to zero.  The matcher's region is set to
985      * the default region, which is its entire character sequence.  The
986      * anchoring and transparency of this matcher's region boundaries are
987      * unaffected.
988      *
989      * @param  input
990      *         The new input character sequence
991      *
992      * @return  This matcher
993      */
reset(CharSequence input)994     public Matcher reset(CharSequence input) {
995         return reset(input, 0, input.length());
996     }
997 
998     /**
999      * Resets the Matcher. A new input sequence and a new region can be
1000      * specified. Results of a previous find get lost. The next attempt to find
1001      * an occurrence of the Pattern in the string will start at the beginning of
1002      * the region. This is the internal version of reset() to which the several
1003      * public versions delegate.
1004      *
1005      * @param input
1006      *            the input sequence.
1007      * @param start
1008      *            the start of the region.
1009      * @param end
1010      *            the end of the region.
1011      *
1012      * @return the matcher itself.
1013      */
reset(CharSequence input, int start, int end)1014     private Matcher reset(CharSequence input, int start, int end) {
1015         if (input == null) {
1016             throw new IllegalArgumentException("input == null");
1017         }
1018 
1019         if (start < 0 || end < 0 || start > input.length() || end > input.length() || start > end) {
1020             throw new IndexOutOfBoundsException();
1021         }
1022 
1023         this.input = input.toString();
1024         this.regionStart = start;
1025         this.regionEnd = end;
1026         resetForInput();
1027 
1028         matchFound = false;
1029         appendPos = 0;
1030 
1031         return this;
1032     }
1033 
resetForInput()1034     private void resetForInput() {
1035         synchronized (this) {
1036             setInputImpl(address, input, regionStart, regionEnd);
1037             useAnchoringBoundsImpl(address, anchoringBounds);
1038             useTransparentBoundsImpl(address, transparentBounds);
1039         }
1040     }
1041 
1042     /**
1043      * Makes sure that a successful match has been made. Is invoked internally
1044      * from various places in the class.
1045      *
1046      * @throws IllegalStateException
1047      *             if no successful match has been made.
1048      */
ensureMatch()1049     private void ensureMatch() {
1050         if (!matchFound) {
1051             throw new IllegalStateException("No successful match so far");
1052         }
1053     }
1054 
1055     /**
1056      * Returns the start index of the previous match.  </p>
1057      *
1058      * @return  The index of the first character matched
1059      *
1060      * @throws  IllegalStateException
1061      *          If no match has yet been attempted,
1062      *          or if the previous match operation failed
1063      */
start()1064     public int start() {
1065         return start(0);
1066     }
1067 
1068     /**
1069      * Returns the start index of the subsequence captured by the given group
1070      * during the previous match operation.
1071      *
1072      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
1073      * to right, starting at one.  Group zero denotes the entire pattern, so
1074      * the expression <i>m.</i><tt>start(0)</tt> is equivalent to
1075      * <i>m.</i><tt>start()</tt>.  </p>
1076      *
1077      * @param  group
1078      *         The index of a capturing group in this matcher's pattern
1079      *
1080      * @return  The index of the first character captured by the group,
1081      *          or <tt>-1</tt> if the match was successful but the group
1082      *          itself did not match anything
1083      *
1084      * @throws  IllegalStateException
1085      *          If no match has yet been attempted,
1086      *          or if the previous match operation failed
1087      *
1088      * @throws  IndexOutOfBoundsException
1089      *          If there is no capturing group in the pattern
1090      *          with the given index
1091      */
start(int group)1092     public int start(int group) throws IllegalStateException {
1093         ensureMatch();
1094         return matchOffsets[group * 2];
1095     }
1096 
findImpl(long addr, String s, int startIndex, int[] offsets)1097     private static native boolean findImpl(long addr, String s, int startIndex, int[] offsets);
findNextImpl(long addr, String s, int[] offsets)1098     private static native boolean findNextImpl(long addr, String s, int[] offsets);
getNativeFinalizer()1099     private static native long getNativeFinalizer();
groupCountImpl(long addr)1100     private static native int groupCountImpl(long addr);
hitEndImpl(long addr)1101     private static native boolean hitEndImpl(long addr);
lookingAtImpl(long addr, String s, int[] offsets)1102     private static native boolean lookingAtImpl(long addr, String s, int[] offsets);
matchesImpl(long addr, String s, int[] offsets)1103     private static native boolean matchesImpl(long addr, String s, int[] offsets);
nativeSize()1104     private static native int nativeSize();
openImpl(long patternAddr)1105     private static native long openImpl(long patternAddr);
requireEndImpl(long addr)1106     private static native boolean requireEndImpl(long addr);
setInputImpl(long addr, String s, int start, int end)1107     private static native void setInputImpl(long addr, String s, int start, int end);
useAnchoringBoundsImpl(long addr, boolean value)1108     private static native void useAnchoringBoundsImpl(long addr, boolean value);
useTransparentBoundsImpl(long addr, boolean value)1109     private static native void useTransparentBoundsImpl(long addr, boolean value);
1110 
1111     /**
1112      * A trivial match result implementation that's based on an array of integers
1113      * representing match offsets. The array is of the form
1114      * {@code { start1, end1, start2, end2 ....}) where each consecutive pair of elements represents
1115      * the start and end of a match respectively.
1116      */
1117     static final class OffsetBasedMatchResult implements MatchResult {
1118         private final String input;
1119         private final int[] offsets;
1120 
OffsetBasedMatchResult(String input, int[] offsets)1121         OffsetBasedMatchResult(String input, int[] offsets) {
1122             this.input = input;
1123             this.offsets = offsets.clone();
1124         }
1125 
1126         @Override
start()1127         public int start() {
1128             return start(0);
1129         }
1130 
1131         @Override
start(int group)1132         public int start(int group) {
1133             return offsets[2 * group];
1134         }
1135 
1136         @Override
end()1137         public int end() {
1138             return end(0);
1139         }
1140 
1141         @Override
end(int group)1142         public int end(int group) {
1143             return offsets[2 * group + 1];
1144         }
1145 
1146         @Override
group()1147         public String group() {
1148             return group(0);
1149         }
1150 
1151         @Override
group(int group)1152         public String group(int group) {
1153             final int start = start(group);
1154             final int end = end(group);
1155             if (start == -1 || end == -1) {
1156                 return null;
1157             }
1158 
1159             return input.substring(start, end);
1160         }
1161 
1162         @Override
groupCount()1163         public int groupCount() {
1164             return (offsets.length / 2) - 1;
1165         }
1166     }
1167 }
1168