• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2007 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package java.util.regex;
18 
19 /**
20  * The result of applying a {@code Pattern} to a given input. See {@link Pattern} for
21  * example uses.
22  */
23 public final class Matcher implements MatchResult {
24 
25     /**
26      * Holds the pattern, that is, the compiled regular expression.
27      */
28     private Pattern pattern;
29 
30     /**
31      * Holds the handle for the native version of the pattern.
32      */
33     private int address;
34 
35     /**
36      * Holds the input text.
37      */
38     private String input;
39 
40     /**
41      * Holds the start of the region, or 0 if the matching should start at the
42      * beginning of the text.
43      */
44     private int regionStart;
45 
46     /**
47      * Holds the end of the region, or input.length() if the matching should
48      * go until the end of the input.
49      */
50     private int regionEnd;
51 
52     /**
53      * Holds the position where the next find operation will take place.
54      */
55     private int findPos;
56 
57     /**
58      * Holds the position where the next append operation will take place.
59      */
60     private int appendPos;
61 
62     /**
63      * Reflects whether a match has been found during the most recent find
64      * operation.
65      */
66     private boolean matchFound;
67 
68     /**
69      * Holds the offsets for the most recent match.
70      */
71     private int[] matchOffsets;
72 
73     /**
74      * Reflects whether the bounds of the region are anchoring.
75      */
76     private boolean anchoringBounds = true;
77 
78     /**
79      * Reflects whether the bounds of the region are transparent.
80      */
81     private boolean transparentBounds;
82 
83     /**
84      * Creates a matcher for a given combination of pattern and input. Both
85      * elements can be changed later on.
86      *
87      * @param pattern
88      *            the pattern to use.
89      * @param input
90      *            the input to use.
91      */
Matcher(Pattern pattern, CharSequence input)92     Matcher(Pattern pattern, CharSequence input) {
93         usePattern(pattern);
94         reset(input);
95     }
96 
97     /**
98      * Appends a literal part of the input plus a replacement for the current
99      * match to a given {@link StringBuffer}. The literal part is exactly the
100      * part of the input between the previous match and the current match. The
101      * method can be used in conjunction with {@link #find()} and
102      * {@link #appendTail(StringBuffer)} to walk through the input and replace
103      * all occurrences of the {@code Pattern} with something else.
104      *
105      * @param buffer
106      *            the {@code StringBuffer} to append to.
107      * @param replacement
108      *            the replacement text.
109      * @return the {@code Matcher} itself.
110      * @throws IllegalStateException
111      *             if no successful match has been made.
112      */
appendReplacement(StringBuffer buffer, String replacement)113     public Matcher appendReplacement(StringBuffer buffer, String replacement) {
114         buffer.append(input.substring(appendPos, start()));
115         appendEvaluated(buffer, replacement);
116         appendPos = end();
117 
118         return this;
119     }
120 
121     /**
122      * Internal helper method to append a given string to a given string buffer.
123      * If the string contains any references to groups, these are replaced by
124      * the corresponding group's contents.
125      *
126      * @param buffer
127      *            the string buffer.
128      * @param s
129      *            the string to append.
130      */
appendEvaluated(StringBuffer buffer, String s)131     private void appendEvaluated(StringBuffer buffer, String s) {
132         boolean escape = false;
133         boolean dollar = false;
134 
135         for (int i = 0; i < s.length(); i++) {
136             char c = s.charAt(i);
137             if (c == '\\' && !escape) {
138                 escape = true;
139             } else if (c == '$' && !escape) {
140                 dollar = true;
141             } else if (c >= '0' && c <= '9' && dollar) {
142                 buffer.append(group(c - '0'));
143                 dollar = false;
144             } else {
145                 buffer.append(c);
146                 dollar = false;
147                 escape = false;
148             }
149         }
150 
151         // This seemingly stupid piece of code reproduces a JDK bug.
152         if (escape) {
153             throw new ArrayIndexOutOfBoundsException(s.length());
154         }
155     }
156 
157     /**
158      * Resets the {@code Matcher}. This results in the region being set to the
159      * whole input. Results of a previous find get lost. The next attempt to
160      * find an occurrence of the {@link Pattern} in the string will start at the
161      * beginning of the input.
162      *
163      * @return the {@code Matcher} itself.
164      */
reset()165     public Matcher reset() {
166         return reset(input, 0, input.length());
167     }
168 
169     /**
170      * Provides a new input and resets the {@code Matcher}. This results in the
171      * region being set to the whole input. Results of a previous find get lost.
172      * The next attempt to find an occurrence of the {@link Pattern} in the
173      * string will start at the beginning of the input.
174      *
175      * @param input
176      *            the new input sequence.
177      *
178      * @return the {@code Matcher} itself.
179      */
reset(CharSequence input)180     public Matcher reset(CharSequence input) {
181         return reset(input, 0, input.length());
182     }
183 
184     /**
185      * Resets the Matcher. A new input sequence and a new region can be
186      * specified. Results of a previous find get lost. The next attempt to find
187      * an occurrence of the Pattern in the string will start at the beginning of
188      * the region. This is the internal version of reset() to which the several
189      * public versions delegate.
190      *
191      * @param input
192      *            the input sequence.
193      * @param start
194      *            the start of the region.
195      * @param end
196      *            the end of the region.
197      *
198      * @return the matcher itself.
199      */
reset(CharSequence input, int start, int end)200     private Matcher reset(CharSequence input, int start, int end) {
201         if (input == null) {
202             throw new IllegalArgumentException();
203         }
204 
205         if (start < 0 || end < 0 || start > input.length() || end > input.length() || start > end) {
206             throw new IndexOutOfBoundsException();
207         }
208 
209         this.input = input.toString();
210         this.regionStart = start;
211         this.regionEnd = end;
212         resetForInput();
213 
214         matchFound = false;
215         findPos = regionStart;
216         appendPos = 0;
217 
218         return this;
219     }
220 
221     /**
222      * Sets a new pattern for the {@code Matcher}. Results of a previous find
223      * get lost. The next attempt to find an occurrence of the {@link Pattern}
224      * in the string will start at the beginning of the input.
225      *
226      * @param pattern
227      *            the new {@code Pattern}.
228      *
229      * @return the {@code Matcher} itself.
230      */
usePattern(Pattern pattern)231     public Matcher usePattern(Pattern pattern) {
232         if (pattern == null) {
233             throw new IllegalArgumentException();
234         }
235 
236         this.pattern = pattern;
237 
238         if (address != 0) {
239             closeImpl(address);
240             address = 0;
241         }
242         address = openImpl(pattern.address);
243 
244         if (input != null) {
245             resetForInput();
246         }
247 
248         matchOffsets = new int[(groupCount() + 1) * 2];
249         matchFound = false;
250         return this;
251     }
252 
resetForInput()253     private void resetForInput() {
254         setInputImpl(address, input, regionStart, regionEnd);
255         useAnchoringBoundsImpl(address, anchoringBounds);
256         useTransparentBoundsImpl(address, transparentBounds);
257     }
258 
259     /**
260      * Resets this matcher and sets a region. Only characters inside the region
261      * are considered for a match.
262      *
263      * @param start
264      *            the first character of the region.
265      * @param end
266      *            the first character after the end of the region.
267      * @return the {@code Matcher} itself.
268      */
region(int start, int end)269     public Matcher region(int start, int end) {
270         return reset(input, start, end);
271     }
272 
273     /**
274      * Appends the (unmatched) remainder of the input to the given
275      * {@link StringBuffer}. The method can be used in conjunction with
276      * {@link #find()} and {@link #appendReplacement(StringBuffer, String)} to
277      * walk through the input and replace all matches of the {@code Pattern}
278      * with something else.
279      *
280      * @param buffer
281      *            the {@code StringBuffer} to append to.
282      * @return the {@code StringBuffer}.
283      * @throws IllegalStateException
284      *             if no successful match has been made.
285      */
appendTail(StringBuffer buffer)286     public StringBuffer appendTail(StringBuffer buffer) {
287         if (appendPos < regionEnd) {
288             buffer.append(input.substring(appendPos, regionEnd));
289         }
290         return buffer;
291     }
292 
293     /**
294      * Replaces the first occurrence of this matcher's pattern in the input with
295      * a given string.
296      *
297      * @param replacement
298      *            the replacement text.
299      * @return the modified input string.
300      */
replaceFirst(String replacement)301     public String replaceFirst(String replacement) {
302         reset();
303         StringBuffer buffer = new StringBuffer(input.length());
304         if (find()) {
305             appendReplacement(buffer, replacement);
306         }
307         return appendTail(buffer).toString();
308     }
309 
310     /**
311      * Replaces all occurrences of this matcher's pattern in the input with a
312      * given string.
313      *
314      * @param replacement
315      *            the replacement text.
316      * @return the modified input string.
317      */
replaceAll(String replacement)318     public String replaceAll(String replacement) {
319         reset();
320         StringBuffer buffer = new StringBuffer(input.length());
321         while (find()) {
322             appendReplacement(buffer, replacement);
323         }
324         return appendTail(buffer).toString();
325     }
326 
327     /**
328      * Returns the {@link Pattern} instance used inside this matcher.
329      *
330      * @return the {@code Pattern} instance.
331      */
pattern()332     public Pattern pattern() {
333         return pattern;
334     }
335 
336     /**
337      * Returns the text that matched a given group of the regular expression.
338      * Explicit capturing groups in the pattern are numbered left to right in order
339      * of their <i>opening</i> parenthesis, starting at 1.
340      * The special group 0 represents the entire match (as if the entire pattern is surrounded
341      * by an implicit capturing group).
342      * For example, "a((b)c)" matching "abc" would give the following groups:
343      * <pre>
344      * 0 "abc"
345      * 1 "bc"
346      * 2 "b"
347      * </pre>
348      *
349      * <p>An optional capturing group that failed to match as part of an overall
350      * successful match (for example, "a(b)?c" matching "ac") returns null.
351      * A capturing group that matched the empty string (for example, "a(b?)c" matching "ac")
352      * returns the empty string.
353      *
354      * @throws IllegalStateException
355      *             if no successful match has been made.
356      */
group(int group)357     public String group(int group) {
358         ensureMatch();
359         int from = matchOffsets[group * 2];
360         int to = matchOffsets[(group * 2) + 1];
361         if (from == -1 || to == -1) {
362             return null;
363         } else {
364             return input.substring(from, to);
365         }
366     }
367 
368     /**
369      * Returns the text that matched the whole regular expression.
370      *
371      * @return the text.
372      * @throws IllegalStateException
373      *             if no successful match has been made.
374      */
group()375     public String group() {
376         return group(0);
377     }
378 
379     /**
380      * Returns the next occurrence of the {@link Pattern} in the input. The
381      * method starts the search from the given character in the input.
382      *
383      * @param start
384      *            The index in the input at which the find operation is to
385      *            begin. If this is less than the start of the region, it is
386      *            automatically adjusted to that value. If it is beyond the end
387      *            of the region, the method will fail.
388      * @return true if (and only if) a match has been found.
389      */
find(int start)390     public boolean find(int start) {
391         findPos = start;
392 
393         if (findPos < regionStart) {
394             findPos = regionStart;
395         } else if (findPos >= regionEnd) {
396             matchFound = false;
397             return false;
398         }
399 
400         matchFound = findImpl(address, input, findPos, matchOffsets);
401         if (matchFound) {
402             findPos = matchOffsets[1];
403         }
404         return matchFound;
405     }
406 
407     /**
408      * Returns the next occurrence of the {@link Pattern} in the input. If a
409      * previous match was successful, the method continues the search from the
410      * first character following that match in the input. Otherwise it searches
411      * either from the region start (if one has been set), or from position 0.
412      *
413      * @return true if (and only if) a match has been found.
414      */
find()415     public boolean find() {
416         matchFound = findNextImpl(address, input, matchOffsets);
417         if (matchFound) {
418             findPos = matchOffsets[1];
419         }
420         return matchFound;
421     }
422 
423     /**
424      * Tries to match the {@link Pattern}, starting from the beginning of the
425      * region (or the beginning of the input, if no region has been set).
426      * Doesn't require the {@code Pattern} to match against the whole region.
427      *
428      * @return true if (and only if) the {@code Pattern} matches.
429      */
lookingAt()430     public boolean lookingAt() {
431         matchFound = lookingAtImpl(address, input, matchOffsets);
432         if (matchFound) {
433             findPos = matchOffsets[1];
434         }
435         return matchFound;
436     }
437 
438     /**
439      * Tries to match the {@link Pattern} against the entire region (or the
440      * entire input, if no region has been set).
441      *
442      * @return true if (and only if) the {@code Pattern} matches the entire
443      *         region.
444      */
matches()445     public boolean matches() {
446         matchFound = matchesImpl(address, input, matchOffsets);
447         if (matchFound) {
448             findPos = matchOffsets[1];
449         }
450         return matchFound;
451     }
452 
453     /**
454      * Returns the index of the first character of the text that matched a given
455      * group.
456      *
457      * @param group
458      *            the group, ranging from 0 to groupCount() - 1, with 0
459      *            representing the whole pattern.
460      * @return the character index.
461      * @throws IllegalStateException
462      *             if no successful match has been made.
463      */
start(int group)464     public int start(int group) throws IllegalStateException {
465         ensureMatch();
466         return matchOffsets[group * 2];
467     }
468 
469     /**
470      * Returns the index of the first character following the text that matched
471      * a given group.
472      *
473      * @param group
474      *            the group, ranging from 0 to groupCount() - 1, with 0
475      *            representing the whole pattern.
476      * @return the character index.
477      * @throws IllegalStateException
478      *             if no successful match has been made.
479      */
end(int group)480     public int end(int group) {
481         ensureMatch();
482         return matchOffsets[(group * 2) + 1];
483     }
484 
485     /**
486      * Returns a replacement string for the given one that has all backslashes
487      * and dollar signs escaped.
488      *
489      * @param s
490      *            the input string.
491      * @return the input string, with all backslashes and dollar signs having
492      *         been escaped.
493      */
quoteReplacement(String s)494     public static String quoteReplacement(String s) {
495         StringBuilder result = new StringBuilder(s.length());
496         for (int i = 0; i < s.length(); i++) {
497             char c = s.charAt(i);
498             if (c == '\\' || c == '$') {
499                 result.append('\\');
500             }
501             result.append(c);
502         }
503         return result.toString();
504     }
505 
506     /**
507      * Returns the index of the first character of the text that matched the
508      * whole regular expression.
509      *
510      * @return the character index.
511      * @throws IllegalStateException
512      *             if no successful match has been made.
513      */
start()514     public int start() {
515         return start(0);
516     }
517 
518     /**
519      * Returns the number of groups in the results, which is always equal to
520      * the number of groups in the original regular expression.
521      *
522      * @return the number of groups.
523      */
groupCount()524     public int groupCount() {
525         return groupCountImpl(address);
526     }
527 
528     /**
529      * Returns the index of the first character following the text that matched
530      * the whole regular expression.
531      *
532      * @return the character index.
533      * @throws IllegalStateException
534      *             if no successful match has been made.
535      */
end()536     public int end() {
537         return end(0);
538     }
539 
540     /**
541      * Converts the current match into a separate {@link MatchResult} instance
542      * that is independent from this matcher. The new object is unaffected when
543      * the state of this matcher changes.
544      *
545      * @return the new {@code MatchResult}.
546      * @throws IllegalStateException
547      *             if no successful match has been made.
548      */
toMatchResult()549     public MatchResult toMatchResult() {
550         ensureMatch();
551         return new MatchResultImpl(input, matchOffsets);
552     }
553 
554     /**
555      * Determines whether this matcher has anchoring bounds enabled or not. When
556      * anchoring bounds are enabled, the start and end of the input match the
557      * '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled
558      * by default.
559      *
560      * @param value
561      *            the new value for anchoring bounds.
562      * @return the {@code Matcher} itself.
563      */
useAnchoringBounds(boolean value)564     public Matcher useAnchoringBounds(boolean value) {
565         anchoringBounds = value;
566         useAnchoringBoundsImpl(address, value);
567         return this;
568     }
569 
570     /**
571      * Indicates whether this matcher has anchoring bounds enabled. When
572      * anchoring bounds are enabled, the start and end of the input match the
573      * '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled
574      * by default.
575      *
576      * @return true if (and only if) the {@code Matcher} uses anchoring bounds.
577      */
hasAnchoringBounds()578     public boolean hasAnchoringBounds() {
579         return anchoringBounds;
580     }
581 
582     /**
583      * Determines whether this matcher has transparent bounds enabled or not.
584      * When transparent bounds are enabled, the parts of the input outside the
585      * region are subject to lookahead and lookbehind, otherwise they are not.
586      * Transparent bounds are disabled by default.
587      *
588      * @param value
589      *            the new value for transparent bounds.
590      * @return the {@code Matcher} itself.
591      */
useTransparentBounds(boolean value)592     public Matcher useTransparentBounds(boolean value) {
593         transparentBounds = value;
594         useTransparentBoundsImpl(address, value);
595         return this;
596     }
597 
598     /**
599      * Makes sure that a successful match has been made. Is invoked internally
600      * from various places in the class.
601      *
602      * @throws IllegalStateException
603      *             if no successful match has been made.
604      */
ensureMatch()605     private void ensureMatch() {
606         if (!matchFound) {
607             throw new IllegalStateException("No successful match so far");
608         }
609     }
610 
611     /**
612      * Indicates whether this matcher has transparent bounds enabled. When
613      * transparent bounds are enabled, the parts of the input outside the region
614      * are subject to lookahead and lookbehind, otherwise they are not.
615      * Transparent bounds are disabled by default.
616      *
617      * @return true if (and only if) the {@code Matcher} uses anchoring bounds.
618      */
hasTransparentBounds()619     public boolean hasTransparentBounds() {
620         return transparentBounds;
621     }
622 
623     /**
624      * Returns this matcher's region start, that is, the first character that is
625      * considered for a match.
626      *
627      * @return the start of the region.
628      */
regionStart()629     public int regionStart() {
630         return regionStart;
631     }
632 
633     /**
634      * Returns this matcher's region end, that is, the first character that is
635      * not considered for a match.
636      *
637      * @return the end of the region.
638      */
regionEnd()639     public int regionEnd() {
640         return regionEnd;
641     }
642 
643     /**
644      * Indicates whether more input might change a successful match into an
645      * unsuccessful one.
646      *
647      * @return true if (and only if) more input might change a successful match
648      *         into an unsuccessful one.
649      */
requireEnd()650     public boolean requireEnd() {
651         return requireEndImpl(address);
652     }
653 
654     /**
655      * Indicates whether the last match hit the end of the input.
656      *
657      * @return true if (and only if) the last match hit the end of the input.
658      */
hitEnd()659     public boolean hitEnd() {
660         return hitEndImpl(address);
661     }
662 
finalize()663     @Override protected void finalize() throws Throwable {
664         try {
665             closeImpl(address);
666         } finally {
667             super.finalize();
668         }
669     }
670 
closeImpl(int addr)671     private static native void closeImpl(int addr);
findImpl(int addr, String s, int startIndex, int[] offsets)672     private static native boolean findImpl(int addr, String s, int startIndex, int[] offsets);
findNextImpl(int addr, String s, int[] offsets)673     private static native boolean findNextImpl(int addr, String s, int[] offsets);
groupCountImpl(int addr)674     private static native int groupCountImpl(int addr);
hitEndImpl(int addr)675     private static native boolean hitEndImpl(int addr);
lookingAtImpl(int addr, String s, int[] offsets)676     private static native boolean lookingAtImpl(int addr, String s, int[] offsets);
matchesImpl(int addr, String s, int[] offsets)677     private static native boolean matchesImpl(int addr, String s, int[] offsets);
openImpl(int patternAddr)678     private static native int openImpl(int patternAddr);
requireEndImpl(int addr)679     private static native boolean requireEndImpl(int addr);
setInputImpl(int addr, String s, int start, int end)680     private static native void setInputImpl(int addr, String s, int start, int end);
useAnchoringBoundsImpl(int addr, boolean value)681     private static native void useAnchoringBoundsImpl(int addr, boolean value);
useTransparentBoundsImpl(int addr, boolean value)682     private static native void useTransparentBoundsImpl(int addr, boolean value);
683 }
684