• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2007 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package java.util.regex;
18 
19 /**
20  * The result of applying a {@code Pattern} to a given input. See {@link Pattern} for
21  * example uses.
22  */
23 public final class Matcher implements MatchResult {
24 
25     /**
26      * Holds the pattern, that is, the compiled regular expression.
27      */
28     private Pattern pattern;
29 
30     /**
31      * The address of the native peer.
32      * Uses of this must be manually synchronized to avoid native crashes.
33      */
34     private long address;
35 
36     /**
37      * Holds the input text.
38      */
39     private String input;
40 
41     /**
42      * Holds the start of the region, or 0 if the matching should start at the
43      * beginning of the text.
44      */
45     private int regionStart;
46 
47     /**
48      * Holds the end of the region, or input.length() if the matching should
49      * go until the end of the input.
50      */
51     private int regionEnd;
52 
53     /**
54      * Holds the position where the next append operation will take place.
55      */
56     private int appendPos;
57 
58     /**
59      * Reflects whether a match has been found during the most recent find
60      * operation.
61      */
62     private boolean matchFound;
63 
64     /**
65      * Holds the offsets for the most recent match.
66      */
67     private int[] matchOffsets;
68 
69     /**
70      * Reflects whether the bounds of the region are anchoring.
71      */
72     private boolean anchoringBounds = true;
73 
74     /**
75      * Reflects whether the bounds of the region are transparent.
76      */
77     private boolean transparentBounds;
78 
79     /**
80      * Creates a matcher for a given combination of pattern and input. Both
81      * elements can be changed later on.
82      *
83      * @param pattern
84      *            the pattern to use.
85      * @param input
86      *            the input to use.
87      */
Matcher(Pattern pattern, CharSequence input)88     Matcher(Pattern pattern, CharSequence input) {
89         usePattern(pattern);
90         reset(input);
91     }
92 
93     /**
94      * Appends a literal part of the input plus a replacement for the current
95      * match to a given {@link StringBuffer}. The literal part is exactly the
96      * part of the input between the previous match and the current match. The
97      * method can be used in conjunction with {@link #find()} and
98      * {@link #appendTail(StringBuffer)} to walk through the input and replace
99      * all occurrences of the {@code Pattern} with something else.
100      *
101      * @param buffer
102      *            the {@code StringBuffer} to append to.
103      * @param replacement
104      *            the replacement text.
105      * @return the {@code Matcher} itself.
106      * @throws IllegalStateException
107      *             if no successful match has been made.
108      */
appendReplacement(StringBuffer buffer, String replacement)109     public Matcher appendReplacement(StringBuffer buffer, String replacement) {
110         buffer.append(input.substring(appendPos, start()));
111         appendEvaluated(buffer, replacement);
112         appendPos = end();
113 
114         return this;
115     }
116 
117     /**
118      * Internal helper method to append a given string to a given string buffer.
119      * If the string contains any references to groups, these are replaced by
120      * the corresponding group's contents.
121      *
122      * @param buffer
123      *            the string buffer.
124      * @param s
125      *            the string to append.
126      */
appendEvaluated(StringBuffer buffer, String s)127     private void appendEvaluated(StringBuffer buffer, String s) {
128         boolean escape = false;
129         boolean dollar = false;
130 
131         for (int i = 0; i < s.length(); i++) {
132             char c = s.charAt(i);
133             if (c == '\\' && !escape) {
134                 escape = true;
135             } else if (c == '$' && !escape) {
136                 dollar = true;
137             } else if (c >= '0' && c <= '9' && dollar) {
138                 buffer.append(group(c - '0'));
139                 dollar = false;
140             } else {
141                 buffer.append(c);
142                 dollar = false;
143                 escape = false;
144             }
145         }
146 
147         // This seemingly stupid piece of code reproduces a JDK bug.
148         if (escape) {
149             throw new ArrayIndexOutOfBoundsException(s.length());
150         }
151     }
152 
153     /**
154      * Resets the {@code Matcher}. This results in the region being set to the
155      * whole input. Results of a previous find get lost. The next attempt to
156      * find an occurrence of the {@link Pattern} in the string will start at the
157      * beginning of the input.
158      *
159      * @return the {@code Matcher} itself.
160      */
reset()161     public Matcher reset() {
162         return reset(input, 0, input.length());
163     }
164 
165     /**
166      * Provides a new input and resets the {@code Matcher}. This results in the
167      * region being set to the whole input. Results of a previous find get lost.
168      * The next attempt to find an occurrence of the {@link Pattern} in the
169      * string will start at the beginning of the input.
170      *
171      * @param input
172      *            the new input sequence.
173      *
174      * @return the {@code Matcher} itself.
175      */
reset(CharSequence input)176     public Matcher reset(CharSequence input) {
177         return reset(input, 0, input.length());
178     }
179 
180     /**
181      * Resets the Matcher. A new input sequence and a new region can be
182      * specified. Results of a previous find get lost. The next attempt to find
183      * an occurrence of the Pattern in the string will start at the beginning of
184      * the region. This is the internal version of reset() to which the several
185      * public versions delegate.
186      *
187      * @param input
188      *            the input sequence.
189      * @param start
190      *            the start of the region.
191      * @param end
192      *            the end of the region.
193      *
194      * @return the matcher itself.
195      */
reset(CharSequence input, int start, int end)196     private Matcher reset(CharSequence input, int start, int end) {
197         if (input == null) {
198             throw new IllegalArgumentException("input == null");
199         }
200 
201         if (start < 0 || end < 0 || start > input.length() || end > input.length() || start > end) {
202             throw new IndexOutOfBoundsException();
203         }
204 
205         this.input = input.toString();
206         this.regionStart = start;
207         this.regionEnd = end;
208         resetForInput();
209 
210         matchFound = false;
211         appendPos = 0;
212 
213         return this;
214     }
215 
216     /**
217      * Sets a new pattern for the {@code Matcher}. Results of a previous find
218      * get lost. The next attempt to find an occurrence of the {@link Pattern}
219      * in the string will start at the beginning of the input.
220      *
221      * @param pattern
222      *            the new {@code Pattern}.
223      *
224      * @return the {@code Matcher} itself.
225      */
usePattern(Pattern pattern)226     public Matcher usePattern(Pattern pattern) {
227         if (pattern == null) {
228             throw new IllegalArgumentException("pattern == null");
229         }
230 
231         this.pattern = pattern;
232 
233         synchronized (this) {
234             if (address != 0) {
235                 closeImpl(address);
236                 address = 0; // In case openImpl throws.
237             }
238             address = openImpl(pattern.address);
239         }
240 
241         if (input != null) {
242             resetForInput();
243         }
244 
245         matchOffsets = new int[(groupCount() + 1) * 2];
246         matchFound = false;
247         return this;
248     }
249 
resetForInput()250     private void resetForInput() {
251         synchronized (this) {
252             setInputImpl(address, input, regionStart, regionEnd);
253             useAnchoringBoundsImpl(address, anchoringBounds);
254             useTransparentBoundsImpl(address, transparentBounds);
255         }
256     }
257 
258     /**
259      * Resets this matcher and sets a region. Only characters inside the region
260      * are considered for a match.
261      *
262      * @param start
263      *            the first character of the region.
264      * @param end
265      *            the first character after the end of the region.
266      * @return the {@code Matcher} itself.
267      */
region(int start, int end)268     public Matcher region(int start, int end) {
269         return reset(input, start, end);
270     }
271 
272     /**
273      * Appends the (unmatched) remainder of the input to the given
274      * {@link StringBuffer}. The method can be used in conjunction with
275      * {@link #find()} and {@link #appendReplacement(StringBuffer, String)} to
276      * walk through the input and replace all matches of the {@code Pattern}
277      * with something else.
278      *
279      * @param buffer
280      *            the {@code StringBuffer} to append to.
281      * @return the {@code StringBuffer}.
282      * @throws IllegalStateException
283      *             if no successful match has been made.
284      */
appendTail(StringBuffer buffer)285     public StringBuffer appendTail(StringBuffer buffer) {
286         if (appendPos < regionEnd) {
287             buffer.append(input.substring(appendPos, regionEnd));
288         }
289         return buffer;
290     }
291 
292     /**
293      * Replaces the first occurrence of this matcher's pattern in the input with
294      * a given string.
295      *
296      * @param replacement
297      *            the replacement text.
298      * @return the modified input string.
299      */
replaceFirst(String replacement)300     public String replaceFirst(String replacement) {
301         reset();
302         StringBuffer buffer = new StringBuffer(input.length());
303         if (find()) {
304             appendReplacement(buffer, replacement);
305         }
306         return appendTail(buffer).toString();
307     }
308 
309     /**
310      * Replaces all occurrences of this matcher's pattern in the input with a
311      * given string.
312      *
313      * @param replacement
314      *            the replacement text.
315      * @return the modified input string.
316      */
replaceAll(String replacement)317     public String replaceAll(String replacement) {
318         reset();
319         StringBuffer buffer = new StringBuffer(input.length());
320         while (find()) {
321             appendReplacement(buffer, replacement);
322         }
323         return appendTail(buffer).toString();
324     }
325 
326     /**
327      * Returns the {@link Pattern} instance used inside this matcher.
328      *
329      * @return the {@code Pattern} instance.
330      */
pattern()331     public Pattern pattern() {
332         return pattern;
333     }
334 
335     /**
336      * Returns the text that matched a given group of the regular expression.
337      * Explicit capturing groups in the pattern are numbered left to right in order
338      * of their <i>opening</i> parenthesis, starting at 1.
339      * The special group 0 represents the entire match (as if the entire pattern is surrounded
340      * by an implicit capturing group).
341      * For example, "a((b)c)" matching "abc" would give the following groups:
342      * <pre>
343      * 0 "abc"
344      * 1 "bc"
345      * 2 "b"
346      * </pre>
347      *
348      * <p>An optional capturing group that failed to match as part of an overall
349      * successful match (for example, "a(b)?c" matching "ac") returns null.
350      * A capturing group that matched the empty string (for example, "a(b?)c" matching "ac")
351      * returns the empty string.
352      *
353      * @throws IllegalStateException
354      *             if no successful match has been made.
355      */
group(int group)356     public String group(int group) {
357         ensureMatch();
358         int from = matchOffsets[group * 2];
359         int to = matchOffsets[(group * 2) + 1];
360         if (from == -1 || to == -1) {
361             return null;
362         } else {
363             return input.substring(from, to);
364         }
365     }
366 
367     /**
368      * Returns the text that matched the whole regular expression.
369      *
370      * @return the text.
371      * @throws IllegalStateException
372      *             if no successful match has been made.
373      */
group()374     public String group() {
375         return group(0);
376     }
377 
378     /**
379      * Returns true if there is another match in the input, starting
380      * from the given position. The region is ignored.
381      *
382      * @throws IndexOutOfBoundsException if {@code start < 0 || start > input.length()}
383      */
find(int start)384     public boolean find(int start) {
385         if (start < 0 || start > input.length()) {
386             throw new IndexOutOfBoundsException("start=" + start + "; length=" + input.length());
387         }
388 
389         synchronized (this) {
390             matchFound = findImpl(address, input, start, matchOffsets);
391         }
392         return matchFound;
393     }
394 
395     /**
396      * Returns the next occurrence of the {@link Pattern} in the input. If a
397      * previous match was successful, the method continues the search from the
398      * first character following that match in the input. Otherwise it searches
399      * either from the region start (if one has been set), or from position 0.
400      *
401      * @return true if (and only if) a match has been found.
402      */
find()403     public boolean find() {
404         synchronized (this) {
405             matchFound = findNextImpl(address, input, matchOffsets);
406         }
407         return matchFound;
408     }
409 
410     /**
411      * Tries to match the {@link Pattern}, starting from the beginning of the
412      * region (or the beginning of the input, if no region has been set).
413      * Doesn't require the {@code Pattern} to match against the whole region.
414      *
415      * @return true if (and only if) the {@code Pattern} matches.
416      */
lookingAt()417     public boolean lookingAt() {
418         synchronized (this) {
419             matchFound = lookingAtImpl(address, input, matchOffsets);
420         }
421         return matchFound;
422     }
423 
424     /**
425      * Tries to match the {@link Pattern} against the entire region (or the
426      * entire input, if no region has been set).
427      *
428      * @return true if (and only if) the {@code Pattern} matches the entire
429      *         region.
430      */
matches()431     public boolean matches() {
432         synchronized (this) {
433             matchFound = matchesImpl(address, input, matchOffsets);
434         }
435         return matchFound;
436     }
437 
438     /**
439      * Returns the index of the first character of the text that matched a given
440      * group.
441      *
442      * @param group
443      *            the group, ranging from 0 to groupCount() - 1, with 0
444      *            representing the whole pattern.
445      * @return the character index.
446      * @throws IllegalStateException
447      *             if no successful match has been made.
448      */
start(int group)449     public int start(int group) throws IllegalStateException {
450         ensureMatch();
451         return matchOffsets[group * 2];
452     }
453 
454     /**
455      * Returns the index of the first character following the text that matched
456      * a given group.
457      *
458      * @param group
459      *            the group, ranging from 0 to groupCount() - 1, with 0
460      *            representing the whole pattern.
461      * @return the character index.
462      * @throws IllegalStateException
463      *             if no successful match has been made.
464      */
end(int group)465     public int end(int group) {
466         ensureMatch();
467         return matchOffsets[(group * 2) + 1];
468     }
469 
470     /**
471      * Returns a replacement string for the given one that has all backslashes
472      * and dollar signs escaped.
473      *
474      * @param s
475      *            the input string.
476      * @return the input string, with all backslashes and dollar signs having
477      *         been escaped.
478      */
quoteReplacement(String s)479     public static String quoteReplacement(String s) {
480         StringBuilder result = new StringBuilder(s.length());
481         for (int i = 0; i < s.length(); i++) {
482             char c = s.charAt(i);
483             if (c == '\\' || c == '$') {
484                 result.append('\\');
485             }
486             result.append(c);
487         }
488         return result.toString();
489     }
490 
491     /**
492      * Returns the index of the first character of the text that matched the
493      * whole regular expression.
494      *
495      * @return the character index.
496      * @throws IllegalStateException
497      *             if no successful match has been made.
498      */
start()499     public int start() {
500         return start(0);
501     }
502 
503     /**
504      * Returns the number of groups in the results, which is always equal to
505      * the number of groups in the original regular expression.
506      *
507      * @return the number of groups.
508      */
groupCount()509     public int groupCount() {
510         synchronized (this) {
511             return groupCountImpl(address);
512         }
513     }
514 
515     /**
516      * Returns the index of the first character following the text that matched
517      * the whole regular expression.
518      *
519      * @return the character index.
520      * @throws IllegalStateException
521      *             if no successful match has been made.
522      */
end()523     public int end() {
524         return end(0);
525     }
526 
527     /**
528      * Converts the current match into a separate {@link MatchResult} instance
529      * that is independent from this matcher. The new object is unaffected when
530      * the state of this matcher changes.
531      *
532      * @return the new {@code MatchResult}.
533      * @throws IllegalStateException
534      *             if no successful match has been made.
535      */
toMatchResult()536     public MatchResult toMatchResult() {
537         ensureMatch();
538         return new MatchResultImpl(input, matchOffsets);
539     }
540 
541     /**
542      * Determines whether this matcher has anchoring bounds enabled or not. When
543      * anchoring bounds are enabled, the start and end of the input match the
544      * '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled
545      * by default.
546      *
547      * @param value
548      *            the new value for anchoring bounds.
549      * @return the {@code Matcher} itself.
550      */
useAnchoringBounds(boolean value)551     public Matcher useAnchoringBounds(boolean value) {
552         synchronized (this) {
553             anchoringBounds = value;
554             useAnchoringBoundsImpl(address, value);
555         }
556         return this;
557     }
558 
559     /**
560      * Indicates whether this matcher has anchoring bounds enabled. When
561      * anchoring bounds are enabled, the start and end of the input match the
562      * '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled
563      * by default.
564      *
565      * @return true if (and only if) the {@code Matcher} uses anchoring bounds.
566      */
hasAnchoringBounds()567     public boolean hasAnchoringBounds() {
568         return anchoringBounds;
569     }
570 
571     /**
572      * Determines whether this matcher has transparent bounds enabled or not.
573      * When transparent bounds are enabled, the parts of the input outside the
574      * region are subject to lookahead and lookbehind, otherwise they are not.
575      * Transparent bounds are disabled by default.
576      *
577      * @param value
578      *            the new value for transparent bounds.
579      * @return the {@code Matcher} itself.
580      */
useTransparentBounds(boolean value)581     public Matcher useTransparentBounds(boolean value) {
582         synchronized (this) {
583             transparentBounds = value;
584             useTransparentBoundsImpl(address, value);
585         }
586         return this;
587     }
588 
589     /**
590      * Makes sure that a successful match has been made. Is invoked internally
591      * from various places in the class.
592      *
593      * @throws IllegalStateException
594      *             if no successful match has been made.
595      */
ensureMatch()596     private void ensureMatch() {
597         if (!matchFound) {
598             throw new IllegalStateException("No successful match so far");
599         }
600     }
601 
602     /**
603      * Indicates whether this matcher has transparent bounds enabled. When
604      * transparent bounds are enabled, the parts of the input outside the region
605      * are subject to lookahead and lookbehind, otherwise they are not.
606      * Transparent bounds are disabled by default.
607      *
608      * @return true if (and only if) the {@code Matcher} uses anchoring bounds.
609      */
hasTransparentBounds()610     public boolean hasTransparentBounds() {
611         return transparentBounds;
612     }
613 
614     /**
615      * Returns this matcher's region start, that is, the index of the first character that is
616      * considered for a match.
617      */
regionStart()618     public int regionStart() {
619         return regionStart;
620     }
621 
622     /**
623      * Returns this matcher's region end, that is, the index of the first character that is
624      * not considered for a match.
625      */
regionEnd()626     public int regionEnd() {
627         return regionEnd;
628     }
629 
630     /**
631      * Returns true if and only if more input might change a successful match into an
632      * unsuccessful one.
633      */
requireEnd()634     public boolean requireEnd() {
635         synchronized (this) {
636             return requireEndImpl(address);
637         }
638     }
639 
640     /**
641      * Returns true if and only if the last match hit the end of the input.
642      */
hitEnd()643     public boolean hitEnd() {
644         synchronized (this) {
645             return hitEndImpl(address);
646         }
647     }
648 
finalize()649     @Override protected void finalize() throws Throwable {
650         try {
651             synchronized (this) {
652                 closeImpl(address);
653             }
654         } finally {
655             super.finalize();
656         }
657     }
658 
closeImpl(long addr)659     private static native void closeImpl(long addr);
findImpl(long addr, String s, int startIndex, int[] offsets)660     private static native boolean findImpl(long addr, String s, int startIndex, int[] offsets);
findNextImpl(long addr, String s, int[] offsets)661     private static native boolean findNextImpl(long addr, String s, int[] offsets);
groupCountImpl(long addr)662     private static native int groupCountImpl(long addr);
hitEndImpl(long addr)663     private static native boolean hitEndImpl(long addr);
lookingAtImpl(long addr, String s, int[] offsets)664     private static native boolean lookingAtImpl(long addr, String s, int[] offsets);
matchesImpl(long addr, String s, int[] offsets)665     private static native boolean matchesImpl(long addr, String s, int[] offsets);
openImpl(long patternAddr)666     private static native long openImpl(long patternAddr);
requireEndImpl(long addr)667     private static native boolean requireEndImpl(long addr);
setInputImpl(long addr, String s, int start, int end)668     private static native void setInputImpl(long addr, String s, int start, int end);
useAnchoringBoundsImpl(long addr, boolean value)669     private static native void useAnchoringBoundsImpl(long addr, boolean value);
useTransparentBoundsImpl(long addr, boolean value)670     private static native void useTransparentBoundsImpl(long addr, boolean value);
671 }
672