• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 package org.apache.commons.lang3.text;
18 
19 import java.util.Arrays;
20 
21 import org.apache.commons.lang3.ArraySorter;
22 import org.apache.commons.lang3.ArrayUtils;
23 import org.apache.commons.lang3.StringUtils;
24 
25 /**
26  * A matcher class that can be queried to determine if a character array
27  * portion matches.
28  * <p>
29  * This class comes complete with various factory methods.
30  * If these do not suffice, you can subclass and implement your own matcher.
31  * </p>
32  *
33  * @since 2.2
34  * @deprecated As of 3.6, use Apache Commons Text
35  * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/matcher/StringMatcherFactory.html">
36  * StringMatcherFactory</a> instead
37  */
38 @Deprecated
39 public abstract class StrMatcher {
40 
41     /**
42      * Matches the comma character.
43      */
44     private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
45     /**
46      * Matches the tab character.
47      */
48     private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
49     /**
50      * Matches the space character.
51      */
52     private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
53     /**
54      * Matches the same characters as StringTokenizer,
55      * namely space, tab, newline, formfeed.
56      */
57     private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
58     /**
59      * Matches the String trim() whitespace characters.
60      */
61     private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
62     /**
63      * Matches the double quote character.
64      */
65     private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
66     /**
67      * Matches the double quote character.
68      */
69     private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
70     /**
71      * Matches the single or double quote character.
72      */
73     private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
74     /**
75      * Matches no characters.
76      */
77     private static final StrMatcher NONE_MATCHER = new NoMatcher();
78 
79     /**
80      * Returns a matcher which matches the comma character.
81      *
82      * @return a matcher for a comma
83      */
commaMatcher()84     public static StrMatcher commaMatcher() {
85         return COMMA_MATCHER;
86     }
87 
88     /**
89      * Returns a matcher which matches the tab character.
90      *
91      * @return a matcher for a tab
92      */
tabMatcher()93     public static StrMatcher tabMatcher() {
94         return TAB_MATCHER;
95     }
96 
97     /**
98      * Returns a matcher which matches the space character.
99      *
100      * @return a matcher for a space
101      */
spaceMatcher()102     public static StrMatcher spaceMatcher() {
103         return SPACE_MATCHER;
104     }
105 
106     /**
107      * Matches the same characters as StringTokenizer,
108      * namely space, tab, newline and formfeed.
109      *
110      * @return the split matcher
111      */
splitMatcher()112     public static StrMatcher splitMatcher() {
113         return SPLIT_MATCHER;
114     }
115 
116     /**
117      * Matches the String trim() whitespace characters.
118      *
119      * @return the trim matcher
120      */
trimMatcher()121     public static StrMatcher trimMatcher() {
122         return TRIM_MATCHER;
123     }
124 
125     /**
126      * Returns a matcher which matches the single quote character.
127      *
128      * @return a matcher for a single quote
129      */
singleQuoteMatcher()130     public static StrMatcher singleQuoteMatcher() {
131         return SINGLE_QUOTE_MATCHER;
132     }
133 
134     /**
135      * Returns a matcher which matches the double quote character.
136      *
137      * @return a matcher for a double quote
138      */
doubleQuoteMatcher()139     public static StrMatcher doubleQuoteMatcher() {
140         return DOUBLE_QUOTE_MATCHER;
141     }
142 
143     /**
144      * Returns a matcher which matches the single or double quote character.
145      *
146      * @return a matcher for a single or double quote
147      */
quoteMatcher()148     public static StrMatcher quoteMatcher() {
149         return QUOTE_MATCHER;
150     }
151 
152     /**
153      * Matches no characters.
154      *
155      * @return a matcher that matches nothing
156      */
noneMatcher()157     public static StrMatcher noneMatcher() {
158         return NONE_MATCHER;
159     }
160 
161     /**
162      * Constructor that creates a matcher from a character.
163      *
164      * @param ch  the character to match, must not be null
165      * @return a new Matcher for the given char
166      */
charMatcher(final char ch)167     public static StrMatcher charMatcher(final char ch) {
168         return new CharMatcher(ch);
169     }
170 
171     /**
172      * Constructor that creates a matcher from a set of characters.
173      *
174      * @param chars  the characters to match, null or empty matches nothing
175      * @return a new matcher for the given char[]
176      */
charSetMatcher(final char... chars)177     public static StrMatcher charSetMatcher(final char... chars) {
178         if (ArrayUtils.isEmpty(chars)) {
179             return NONE_MATCHER;
180         }
181         if (chars.length == 1) {
182             return new CharMatcher(chars[0]);
183         }
184         return new CharSetMatcher(chars);
185     }
186 
187     /**
188      * Constructor that creates a matcher from a string representing a set of characters.
189      *
190      * @param chars  the characters to match, null or empty matches nothing
191      * @return a new Matcher for the given characters
192      */
charSetMatcher(final String chars)193     public static StrMatcher charSetMatcher(final String chars) {
194         if (StringUtils.isEmpty(chars)) {
195             return NONE_MATCHER;
196         }
197         if (chars.length() == 1) {
198             return new CharMatcher(chars.charAt(0));
199         }
200         return new CharSetMatcher(chars.toCharArray());
201     }
202 
203     /**
204      * Constructor that creates a matcher from a string.
205      *
206      * @param str  the string to match, null or empty matches nothing
207      * @return a new Matcher for the given String
208      */
stringMatcher(final String str)209     public static StrMatcher stringMatcher(final String str) {
210         if (StringUtils.isEmpty(str)) {
211             return NONE_MATCHER;
212         }
213         return new StringMatcher(str);
214     }
215 
216     /**
217      * Constructor.
218      */
StrMatcher()219     protected StrMatcher() {
220     }
221 
222     /**
223      * Returns the number of matching characters, zero for no match.
224      * <p>
225      * This method is called to check for a match.
226      * The parameter {@code pos} represents the current position to be
227      * checked in the string {@code buffer} (a character array which must
228      * not be changed).
229      * The API guarantees that {@code pos} is a valid index for {@code buffer}.
230      * </p>
231      * <p>
232      * The character array may be larger than the active area to be matched.
233      * Only values in the buffer between the specified indices may be accessed.
234      * </p>
235      * <p>
236      * The matching code may check one character or many.
237      * It may check characters preceding {@code pos} as well as those
238      * after, so long as no checks exceed the bounds specified.
239      * </p>
240      * <p>
241      * It must return zero for no match, or a positive number if a match was found.
242      * The number indicates the number of characters that matched.
243      * </p>
244      *
245      * @param buffer  the text content to match against, do not change
246      * @param pos  the starting position for the match, valid for buffer
247      * @param bufferStart  the first active index in the buffer, valid for buffer
248      * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
249      * @return the number of matching characters, zero for no match
250      */
isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd)251     public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
252 
253     /**
254      * Returns the number of matching characters, zero for no match.
255      * <p>
256      * This method is called to check for a match.
257      * The parameter {@code pos} represents the current position to be
258      * checked in the string {@code buffer} (a character array which must
259      * not be changed).
260      * The API guarantees that {@code pos} is a valid index for {@code buffer}.
261      * </p>
262      * <p>
263      * The matching code may check one character or many.
264      * It may check characters preceding {@code pos} as well as those after.
265      * </p>
266      * <p>
267      * It must return zero for no match, or a positive number if a match was found.
268      * The number indicates the number of characters that matched.
269      * </p>
270      *
271      * @param buffer  the text content to match against, do not change
272      * @param pos  the starting position for the match, valid for buffer
273      * @return the number of matching characters, zero for no match
274      * @since 2.4
275      */
isMatch(final char[] buffer, final int pos)276     public int isMatch(final char[] buffer, final int pos) {
277         return isMatch(buffer, pos, 0, buffer.length);
278     }
279 
280     /**
281      * Class used to define a set of characters for matching purposes.
282      */
283     static final class CharSetMatcher extends StrMatcher {
284         /** The set of characters to match. */
285         private final char[] chars;
286 
287         /**
288          * Constructor that creates a matcher from a character array.
289          *
290          * @param chars  the characters to match, must not be null
291          */
CharSetMatcher(final char[] chars)292         CharSetMatcher(final char[] chars) {
293             this.chars = ArraySorter.sort(chars.clone());
294         }
295 
296         /**
297          * Returns whether or not the given character matches.
298          *
299          * @param buffer  the text content to match against, do not change
300          * @param pos  the starting position for the match, valid for buffer
301          * @param bufferStart  the first active index in the buffer, valid for buffer
302          * @param bufferEnd  the end index of the active buffer, valid for buffer
303          * @return the number of matching characters, zero for no match
304          */
305         @Override
isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd)306         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
307             return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
308         }
309     }
310 
311     /**
312      * Class used to define a character for matching purposes.
313      */
314     static final class CharMatcher extends StrMatcher {
315         /** The character to match. */
316         private final char ch;
317 
318         /**
319          * Constructor that creates a matcher that matches a single character.
320          *
321          * @param ch  the character to match
322          */
CharMatcher(final char ch)323         CharMatcher(final char ch) {
324             this.ch = ch;
325         }
326 
327         /**
328          * Returns whether or not the given character matches.
329          *
330          * @param buffer  the text content to match against, do not change
331          * @param pos  the starting position for the match, valid for buffer
332          * @param bufferStart  the first active index in the buffer, valid for buffer
333          * @param bufferEnd  the end index of the active buffer, valid for buffer
334          * @return the number of matching characters, zero for no match
335          */
336         @Override
isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd)337         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
338             return ch == buffer[pos] ? 1 : 0;
339         }
340     }
341 
342     /**
343      * Class used to define a set of characters for matching purposes.
344      */
345     static final class StringMatcher extends StrMatcher {
346         /** The string to match, as a character array. */
347         private final char[] chars;
348 
349         /**
350          * Constructor that creates a matcher from a String.
351          *
352          * @param str  the string to match, must not be null
353          */
StringMatcher(final String str)354         StringMatcher(final String str) {
355             chars = str.toCharArray();
356         }
357 
358         /**
359          * Returns whether or not the given text matches the stored string.
360          *
361          * @param buffer  the text content to match against, do not change
362          * @param pos  the starting position for the match, valid for buffer
363          * @param bufferStart  the first active index in the buffer, valid for buffer
364          * @param bufferEnd  the end index of the active buffer, valid for buffer
365          * @return the number of matching characters, zero for no match
366          */
367         @Override
isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd)368         public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
369             final int len = chars.length;
370             if (pos + len > bufferEnd) {
371                 return 0;
372             }
373             for (int i = 0; i < chars.length; i++, pos++) {
374                 if (chars[i] != buffer[pos]) {
375                     return 0;
376                 }
377             }
378             return len;
379         }
380 
381         @Override
toString()382         public String toString() {
383             return super.toString() + ' ' + Arrays.toString(chars);
384         }
385 
386     }
387 
388     /**
389      * Class used to match no characters.
390      */
391     static final class NoMatcher extends StrMatcher {
392 
393         /**
394          * Constructs a new instance of {@link NoMatcher}.
395          */
NoMatcher()396         NoMatcher() {
397         }
398 
399         /**
400          * Always returns {@code false}.
401          *
402          * @param buffer  the text content to match against, do not change
403          * @param pos  the starting position for the match, valid for buffer
404          * @param bufferStart  the first active index in the buffer, valid for buffer
405          * @param bufferEnd  the end index of the active buffer, valid for buffer
406          * @return the number of matching characters, zero for no match
407          */
408         @Override
isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd)409         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
410             return 0;
411         }
412     }
413 
414     /**
415      * Class used to match whitespace as per trim().
416      */
417     static final class TrimMatcher extends StrMatcher {
418 
419         /**
420          * Constructs a new instance of {@link TrimMatcher}.
421          */
TrimMatcher()422         TrimMatcher() {
423         }
424 
425         /**
426          * Returns whether or not the given character matches.
427          *
428          * @param buffer  the text content to match against, do not change
429          * @param pos  the starting position for the match, valid for buffer
430          * @param bufferStart  the first active index in the buffer, valid for buffer
431          * @param bufferEnd  the end index of the active buffer, valid for buffer
432          * @return the number of matching characters, zero for no match
433          */
434         @Override
isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd)435         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
436             return buffer[pos] <= 32 ? 1 : 0;
437         }
438     }
439 
440 }
441