• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 package org.apache.commons.lang3;
18 
19 import java.util.regex.Pattern;
20 
21 /**
22  * Helpers to process Strings using regular expressions.
23  * @see java.util.regex.Pattern
24  * @since 3.8
25  */
26 public class RegExUtils {
27 
28     /**
29      * Removes each substring of the text String that matches the given regular expression pattern.
30      *
31      * This method is a {@code null} safe equivalent to:
32      * <ul>
33      *  <li>{@code pattern.matcher(text).replaceAll(StringUtils.EMPTY)}</li>
34      * </ul>
35      *
36      * <p>A {@code null} reference passed to this method is a no-op.</p>
37      *
38      * <pre>
39      * StringUtils.removeAll(null, *)      = null
40      * StringUtils.removeAll("any", (Pattern) null)  = "any"
41      * StringUtils.removeAll("any", Pattern.compile(""))    = "any"
42      * StringUtils.removeAll("any", Pattern.compile(".*"))  = ""
43      * StringUtils.removeAll("any", Pattern.compile(".+"))  = ""
44      * StringUtils.removeAll("abc", Pattern.compile(".?"))  = ""
45      * StringUtils.removeAll("A&lt;__&gt;\n&lt;__&gt;B", Pattern.compile("&lt;.*&gt;"))      = "A\nB"
46      * StringUtils.removeAll("A&lt;__&gt;\n&lt;__&gt;B", Pattern.compile("(?s)&lt;.*&gt;"))  = "AB"
47      * StringUtils.removeAll("A&lt;__&gt;\n&lt;__&gt;B", Pattern.compile("&lt;.*&gt;", Pattern.DOTALL))  = "AB"
48      * StringUtils.removeAll("ABCabc123abc", Pattern.compile("[a-z]"))     = "ABC123"
49      * </pre>
50      *
51      * @param text  text to remove from, may be null
52      * @param regex  the regular expression to which this string is to be matched
53      * @return  the text with any removes processed,
54      *              {@code null} if null String input
55      *
56      * @see #replaceAll(String, Pattern, String)
57      * @see java.util.regex.Matcher#replaceAll(String)
58      * @see java.util.regex.Pattern
59      */
removeAll(final String text, final Pattern regex)60     public static String removeAll(final String text, final Pattern regex) {
61         return replaceAll(text, regex, StringUtils.EMPTY);
62     }
63 
64     /**
65      * Removes each substring of the text String that matches the given regular expression.
66      *
67      * This method is a {@code null} safe equivalent to:
68      * <ul>
69      *  <li>{@code text.replaceAll(regex, StringUtils.EMPTY)}</li>
70      *  <li>{@code Pattern.compile(regex).matcher(text).replaceAll(StringUtils.EMPTY)}</li>
71      * </ul>
72      *
73      * <p>A {@code null} reference passed to this method is a no-op.</p>
74      *
75      * <p>Unlike in the {@link #removePattern(String, String)} method, the {@link Pattern#DOTALL} option
76      * is NOT automatically added.
77      * To use the DOTALL option prepend {@code "(?s)"} to the regex.
78      * DOTALL is also known as single-line mode in Perl.</p>
79      *
80      * <pre>
81      * StringUtils.removeAll(null, *)      = null
82      * StringUtils.removeAll("any", (String) null)  = "any"
83      * StringUtils.removeAll("any", "")    = "any"
84      * StringUtils.removeAll("any", ".*")  = ""
85      * StringUtils.removeAll("any", ".+")  = ""
86      * StringUtils.removeAll("abc", ".?")  = ""
87      * StringUtils.removeAll("A&lt;__&gt;\n&lt;__&gt;B", "&lt;.*&gt;")      = "A\nB"
88      * StringUtils.removeAll("A&lt;__&gt;\n&lt;__&gt;B", "(?s)&lt;.*&gt;")  = "AB"
89      * StringUtils.removeAll("ABCabc123abc", "[a-z]")     = "ABC123"
90      * </pre>
91      *
92      * @param text  text to remove from, may be null
93      * @param regex  the regular expression to which this string is to be matched
94      * @return  the text with any removes processed,
95      *              {@code null} if null String input
96      *
97      * @throws  java.util.regex.PatternSyntaxException
98      *              if the regular expression's syntax is invalid
99      *
100      * @see #replaceAll(String, String, String)
101      * @see #removePattern(String, String)
102      * @see String#replaceAll(String, String)
103      * @see java.util.regex.Pattern
104      * @see java.util.regex.Pattern#DOTALL
105      */
removeAll(final String text, final String regex)106     public static String removeAll(final String text, final String regex) {
107         return replaceAll(text, regex, StringUtils.EMPTY);
108     }
109 
110     /**
111      * Removes the first substring of the text string that matches the given regular expression pattern.
112      *
113      * This method is a {@code null} safe equivalent to:
114      * <ul>
115      *  <li>{@code pattern.matcher(text).replaceFirst(StringUtils.EMPTY)}</li>
116      * </ul>
117      *
118      * <p>A {@code null} reference passed to this method is a no-op.</p>
119      *
120      * <pre>
121      * StringUtils.removeFirst(null, *)      = null
122      * StringUtils.removeFirst("any", (Pattern) null)  = "any"
123      * StringUtils.removeFirst("any", Pattern.compile(""))    = "any"
124      * StringUtils.removeFirst("any", Pattern.compile(".*"))  = ""
125      * StringUtils.removeFirst("any", Pattern.compile(".+"))  = ""
126      * StringUtils.removeFirst("abc", Pattern.compile(".?"))  = "bc"
127      * StringUtils.removeFirst("A&lt;__&gt;\n&lt;__&gt;B", Pattern.compile("&lt;.*&gt;"))      = "A\n&lt;__&gt;B"
128      * StringUtils.removeFirst("A&lt;__&gt;\n&lt;__&gt;B", Pattern.compile("(?s)&lt;.*&gt;"))  = "AB"
129      * StringUtils.removeFirst("ABCabc123", Pattern.compile("[a-z]"))          = "ABCbc123"
130      * StringUtils.removeFirst("ABCabc123abc", Pattern.compile("[a-z]+"))      = "ABC123abc"
131      * </pre>
132      *
133      * @param text  text to remove from, may be null
134      * @param regex  the regular expression pattern to which this string is to be matched
135      * @return  the text with the first replacement processed,
136      *              {@code null} if null String input
137      *
138      * @see #replaceFirst(String, Pattern, String)
139      * @see java.util.regex.Matcher#replaceFirst(String)
140      * @see java.util.regex.Pattern
141      */
removeFirst(final String text, final Pattern regex)142     public static String removeFirst(final String text, final Pattern regex) {
143         return replaceFirst(text, regex, StringUtils.EMPTY);
144     }
145 
146     /**
147      * Removes the first substring of the text string that matches the given regular expression.
148      *
149      * This method is a {@code null} safe equivalent to:
150      * <ul>
151      *  <li>{@code text.replaceFirst(regex, StringUtils.EMPTY)}</li>
152      *  <li>{@code Pattern.compile(regex).matcher(text).replaceFirst(StringUtils.EMPTY)}</li>
153      * </ul>
154      *
155      * <p>A {@code null} reference passed to this method is a no-op.</p>
156      *
157      * <p>The {@link Pattern#DOTALL} option is NOT automatically added.
158      * To use the DOTALL option prepend {@code "(?s)"} to the regex.
159      * DOTALL is also known as single-line mode in Perl.</p>
160      *
161      * <pre>
162      * StringUtils.removeFirst(null, *)      = null
163      * StringUtils.removeFirst("any", (String) null)  = "any"
164      * StringUtils.removeFirst("any", "")    = "any"
165      * StringUtils.removeFirst("any", ".*")  = ""
166      * StringUtils.removeFirst("any", ".+")  = ""
167      * StringUtils.removeFirst("abc", ".?")  = "bc"
168      * StringUtils.removeFirst("A&lt;__&gt;\n&lt;__&gt;B", "&lt;.*&gt;")      = "A\n&lt;__&gt;B"
169      * StringUtils.removeFirst("A&lt;__&gt;\n&lt;__&gt;B", "(?s)&lt;.*&gt;")  = "AB"
170      * StringUtils.removeFirst("ABCabc123", "[a-z]")          = "ABCbc123"
171      * StringUtils.removeFirst("ABCabc123abc", "[a-z]+")      = "ABC123abc"
172      * </pre>
173      *
174      * @param text  text to remove from, may be null
175      * @param regex  the regular expression to which this string is to be matched
176      * @return  the text with the first replacement processed,
177      *              {@code null} if null String input
178      *
179      * @throws  java.util.regex.PatternSyntaxException
180      *              if the regular expression's syntax is invalid
181      *
182      * @see #replaceFirst(String, String, String)
183      * @see String#replaceFirst(String, String)
184      * @see java.util.regex.Pattern
185      * @see java.util.regex.Pattern#DOTALL
186      */
removeFirst(final String text, final String regex)187     public static String removeFirst(final String text, final String regex) {
188         return replaceFirst(text, regex, StringUtils.EMPTY);
189     }
190 
191     /**
192      * Removes each substring of the source String that matches the given regular expression using the DOTALL option.
193      *
194      * This call is a {@code null} safe equivalent to:
195      * <ul>
196      * <li>{@code text.replaceAll(&quot;(?s)&quot; + regex, StringUtils.EMPTY)}</li>
197      * <li>{@code Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(StringUtils.EMPTY)}</li>
198      * </ul>
199      *
200      * <p>A {@code null} reference passed to this method is a no-op.</p>
201      *
202      * <pre>
203      * StringUtils.removePattern(null, *)       = null
204      * StringUtils.removePattern("any", (String) null)   = "any"
205      * StringUtils.removePattern("A&lt;__&gt;\n&lt;__&gt;B", "&lt;.*&gt;")  = "AB"
206      * StringUtils.removePattern("ABCabc123", "[a-z]")    = "ABC123"
207      * </pre>
208      *
209      * @param text
210      *            the source string
211      * @param regex
212      *            the regular expression to which this string is to be matched
213      * @return The resulting {@link String}
214      * @see #replacePattern(String, String, String)
215      * @see String#replaceAll(String, String)
216      * @see Pattern#DOTALL
217      */
removePattern(final String text, final String regex)218     public static String removePattern(final String text, final String regex) {
219         return replacePattern(text, regex, StringUtils.EMPTY);
220     }
221 
222     /**
223      * Replaces each substring of the text String that matches the given regular expression pattern with the given replacement.
224      *
225      * This method is a {@code null} safe equivalent to:
226      * <ul>
227      *  <li>{@code pattern.matcher(text).replaceAll(replacement)}</li>
228      * </ul>
229      *
230      * <p>A {@code null} reference passed to this method is a no-op.</p>
231      *
232      * <pre>
233      * StringUtils.replaceAll(null, *, *)       = null
234      * StringUtils.replaceAll("any", (Pattern) null, *)   = "any"
235      * StringUtils.replaceAll("any", *, null)   = "any"
236      * StringUtils.replaceAll("", Pattern.compile(""), "zzz")    = "zzz"
237      * StringUtils.replaceAll("", Pattern.compile(".*"), "zzz")  = "zzz"
238      * StringUtils.replaceAll("", Pattern.compile(".+"), "zzz")  = ""
239      * StringUtils.replaceAll("abc", Pattern.compile(""), "ZZ")  = "ZZaZZbZZcZZ"
240      * StringUtils.replaceAll("&lt;__&gt;\n&lt;__&gt;", Pattern.compile("&lt;.*&gt;"), "z")                 = "z\nz"
241      * StringUtils.replaceAll("&lt;__&gt;\n&lt;__&gt;", Pattern.compile("&lt;.*&gt;", Pattern.DOTALL), "z") = "z"
242      * StringUtils.replaceAll("&lt;__&gt;\n&lt;__&gt;", Pattern.compile("(?s)&lt;.*&gt;"), "z")             = "z"
243      * StringUtils.replaceAll("ABCabc123", Pattern.compile("[a-z]"), "_")       = "ABC___123"
244      * StringUtils.replaceAll("ABCabc123", Pattern.compile("[^A-Z0-9]+"), "_")  = "ABC_123"
245      * StringUtils.replaceAll("ABCabc123", Pattern.compile("[^A-Z0-9]+"), "")   = "ABC123"
246      * StringUtils.replaceAll("Lorem ipsum  dolor   sit", Pattern.compile("( +)([a-z]+)"), "_$2")  = "Lorem_ipsum_dolor_sit"
247      * </pre>
248      *
249      * @param text  text to search and replace in, may be null
250      * @param regex  the regular expression pattern to which this string is to be matched
251      * @param replacement  the string to be substituted for each match
252      * @return  the text with any replacements processed,
253      *              {@code null} if null String input
254      *
255      * @see java.util.regex.Matcher#replaceAll(String)
256      * @see java.util.regex.Pattern
257      */
replaceAll(final String text, final Pattern regex, final String replacement)258     public static String replaceAll(final String text, final Pattern regex, final String replacement) {
259         if (ObjectUtils.anyNull(text, regex, replacement)) {
260             return text;
261         }
262         return regex.matcher(text).replaceAll(replacement);
263     }
264 
265     /**
266      * Replaces each substring of the text String that matches the given regular expression
267      * with the given replacement.
268      *
269      * This method is a {@code null} safe equivalent to:
270      * <ul>
271      *  <li>{@code text.replaceAll(regex, replacement)}</li>
272      *  <li>{@code Pattern.compile(regex).matcher(text).replaceAll(replacement)}</li>
273      * </ul>
274      *
275      * <p>A {@code null} reference passed to this method is a no-op.</p>
276      *
277      * <p>Unlike in the {@link #replacePattern(String, String, String)} method, the {@link Pattern#DOTALL} option
278      * is NOT automatically added.
279      * To use the DOTALL option prepend {@code "(?s)"} to the regex.
280      * DOTALL is also known as single-line mode in Perl.</p>
281      *
282      * <pre>
283      * StringUtils.replaceAll(null, *, *)       = null
284      * StringUtils.replaceAll("any", (String) null, *)   = "any"
285      * StringUtils.replaceAll("any", *, null)   = "any"
286      * StringUtils.replaceAll("", "", "zzz")    = "zzz"
287      * StringUtils.replaceAll("", ".*", "zzz")  = "zzz"
288      * StringUtils.replaceAll("", ".+", "zzz")  = ""
289      * StringUtils.replaceAll("abc", "", "ZZ")  = "ZZaZZbZZcZZ"
290      * StringUtils.replaceAll("&lt;__&gt;\n&lt;__&gt;", "&lt;.*&gt;", "z")      = "z\nz"
291      * StringUtils.replaceAll("&lt;__&gt;\n&lt;__&gt;", "(?s)&lt;.*&gt;", "z")  = "z"
292      * StringUtils.replaceAll("ABCabc123", "[a-z]", "_")       = "ABC___123"
293      * StringUtils.replaceAll("ABCabc123", "[^A-Z0-9]+", "_")  = "ABC_123"
294      * StringUtils.replaceAll("ABCabc123", "[^A-Z0-9]+", "")   = "ABC123"
295      * StringUtils.replaceAll("Lorem ipsum  dolor   sit", "( +)([a-z]+)", "_$2")  = "Lorem_ipsum_dolor_sit"
296      * </pre>
297      *
298      * @param text  text to search and replace in, may be null
299      * @param regex  the regular expression to which this string is to be matched
300      * @param replacement  the string to be substituted for each match
301      * @return  the text with any replacements processed,
302      *              {@code null} if null String input
303      *
304      * @throws  java.util.regex.PatternSyntaxException
305      *              if the regular expression's syntax is invalid
306      *
307      * @see #replacePattern(String, String, String)
308      * @see String#replaceAll(String, String)
309      * @see java.util.regex.Pattern
310      * @see java.util.regex.Pattern#DOTALL
311      */
replaceAll(final String text, final String regex, final String replacement)312     public static String replaceAll(final String text, final String regex, final String replacement) {
313         if (ObjectUtils.anyNull(text, regex, replacement)) {
314             return text;
315         }
316         return text.replaceAll(regex, replacement);
317     }
318 
319     /**
320      * Replaces the first substring of the text string that matches the given regular expression pattern
321      * with the given replacement.
322      *
323      * This method is a {@code null} safe equivalent to:
324      * <ul>
325      *  <li>{@code pattern.matcher(text).replaceFirst(replacement)}</li>
326      * </ul>
327      *
328      * <p>A {@code null} reference passed to this method is a no-op.</p>
329      *
330      * <pre>
331      * StringUtils.replaceFirst(null, *, *)       = null
332      * StringUtils.replaceFirst("any", (Pattern) null, *)   = "any"
333      * StringUtils.replaceFirst("any", *, null)   = "any"
334      * StringUtils.replaceFirst("", Pattern.compile(""), "zzz")    = "zzz"
335      * StringUtils.replaceFirst("", Pattern.compile(".*"), "zzz")  = "zzz"
336      * StringUtils.replaceFirst("", Pattern.compile(".+"), "zzz")  = ""
337      * StringUtils.replaceFirst("abc", Pattern.compile(""), "ZZ")  = "ZZabc"
338      * StringUtils.replaceFirst("&lt;__&gt;\n&lt;__&gt;", Pattern.compile("&lt;.*&gt;"), "z")      = "z\n&lt;__&gt;"
339      * StringUtils.replaceFirst("&lt;__&gt;\n&lt;__&gt;", Pattern.compile("(?s)&lt;.*&gt;"), "z")  = "z"
340      * StringUtils.replaceFirst("ABCabc123", Pattern.compile("[a-z]"), "_")          = "ABC_bc123"
341      * StringUtils.replaceFirst("ABCabc123abc", Pattern.compile("[^A-Z0-9]+"), "_")  = "ABC_123abc"
342      * StringUtils.replaceFirst("ABCabc123abc", Pattern.compile("[^A-Z0-9]+"), "")   = "ABC123abc"
343      * StringUtils.replaceFirst("Lorem ipsum  dolor   sit", Pattern.compile("( +)([a-z]+)"), "_$2")  = "Lorem_ipsum  dolor   sit"
344      * </pre>
345      *
346      * @param text  text to search and replace in, may be null
347      * @param regex  the regular expression pattern to which this string is to be matched
348      * @param replacement  the string to be substituted for the first match
349      * @return  the text with the first replacement processed,
350      *              {@code null} if null String input
351      *
352      * @see java.util.regex.Matcher#replaceFirst(String)
353      * @see java.util.regex.Pattern
354      */
replaceFirst(final String text, final Pattern regex, final String replacement)355     public static String replaceFirst(final String text, final Pattern regex, final String replacement) {
356         if (text == null || regex == null || replacement == null) {
357             return text;
358         }
359         return regex.matcher(text).replaceFirst(replacement);
360     }
361 
362     /**
363      * Replaces the first substring of the text string that matches the given regular expression
364      * with the given replacement.
365      *
366      * This method is a {@code null} safe equivalent to:
367      * <ul>
368      *  <li>{@code text.replaceFirst(regex, replacement)}</li>
369      *  <li>{@code Pattern.compile(regex).matcher(text).replaceFirst(replacement)}</li>
370      * </ul>
371      *
372      * <p>A {@code null} reference passed to this method is a no-op.</p>
373      *
374      * <p>The {@link Pattern#DOTALL} option is NOT automatically added.
375      * To use the DOTALL option prepend {@code "(?s)"} to the regex.
376      * DOTALL is also known as single-line mode in Perl.</p>
377      *
378      * <pre>
379      * StringUtils.replaceFirst(null, *, *)       = null
380      * StringUtils.replaceFirst("any", (String) null, *)   = "any"
381      * StringUtils.replaceFirst("any", *, null)   = "any"
382      * StringUtils.replaceFirst("", "", "zzz")    = "zzz"
383      * StringUtils.replaceFirst("", ".*", "zzz")  = "zzz"
384      * StringUtils.replaceFirst("", ".+", "zzz")  = ""
385      * StringUtils.replaceFirst("abc", "", "ZZ")  = "ZZabc"
386      * StringUtils.replaceFirst("&lt;__&gt;\n&lt;__&gt;", "&lt;.*&gt;", "z")      = "z\n&lt;__&gt;"
387      * StringUtils.replaceFirst("&lt;__&gt;\n&lt;__&gt;", "(?s)&lt;.*&gt;", "z")  = "z"
388      * StringUtils.replaceFirst("ABCabc123", "[a-z]", "_")          = "ABC_bc123"
389      * StringUtils.replaceFirst("ABCabc123abc", "[^A-Z0-9]+", "_")  = "ABC_123abc"
390      * StringUtils.replaceFirst("ABCabc123abc", "[^A-Z0-9]+", "")   = "ABC123abc"
391      * StringUtils.replaceFirst("Lorem ipsum  dolor   sit", "( +)([a-z]+)", "_$2")  = "Lorem_ipsum  dolor   sit"
392      * </pre>
393      *
394      * @param text  text to search and replace in, may be null
395      * @param regex  the regular expression to which this string is to be matched
396      * @param replacement  the string to be substituted for the first match
397      * @return  the text with the first replacement processed,
398      *              {@code null} if null String input
399      *
400      * @throws  java.util.regex.PatternSyntaxException
401      *              if the regular expression's syntax is invalid
402      *
403      * @see String#replaceFirst(String, String)
404      * @see java.util.regex.Pattern
405      * @see java.util.regex.Pattern#DOTALL
406      */
replaceFirst(final String text, final String regex, final String replacement)407     public static String replaceFirst(final String text, final String regex, final String replacement) {
408         if (text == null || regex == null || replacement == null) {
409             return text;
410         }
411         return text.replaceFirst(regex, replacement);
412     }
413 
414     /**
415      * Replaces each substring of the source String that matches the given regular expression with the given
416      * replacement using the {@link Pattern#DOTALL} option. DOTALL is also known as single-line mode in Perl.
417      *
418      * This call is a {@code null} safe equivalent to:
419      * <ul>
420      * <li>{@code text.replaceAll(&quot;(?s)&quot; + regex, replacement)}</li>
421      * <li>{@code Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(replacement)}</li>
422      * </ul>
423      *
424      * <p>A {@code null} reference passed to this method is a no-op.</p>
425      *
426      * <pre>
427      * StringUtils.replacePattern(null, *, *)       = null
428      * StringUtils.replacePattern("any", (String) null, *)   = "any"
429      * StringUtils.replacePattern("any", *, null)   = "any"
430      * StringUtils.replacePattern("", "", "zzz")    = "zzz"
431      * StringUtils.replacePattern("", ".*", "zzz")  = "zzz"
432      * StringUtils.replacePattern("", ".+", "zzz")  = ""
433      * StringUtils.replacePattern("&lt;__&gt;\n&lt;__&gt;", "&lt;.*&gt;", "z")       = "z"
434      * StringUtils.replacePattern("ABCabc123", "[a-z]", "_")       = "ABC___123"
435      * StringUtils.replacePattern("ABCabc123", "[^A-Z0-9]+", "_")  = "ABC_123"
436      * StringUtils.replacePattern("ABCabc123", "[^A-Z0-9]+", "")   = "ABC123"
437      * StringUtils.replacePattern("Lorem ipsum  dolor   sit", "( +)([a-z]+)", "_$2")  = "Lorem_ipsum_dolor_sit"
438      * </pre>
439      *
440      * @param text
441      *            the source string
442      * @param regex
443      *            the regular expression to which this string is to be matched
444      * @param replacement
445      *            the string to be substituted for each match
446      * @return The resulting {@link String}
447      * @see #replaceAll(String, String, String)
448      * @see String#replaceAll(String, String)
449      * @see Pattern#DOTALL
450      */
replacePattern(final String text, final String regex, final String replacement)451     public static String replacePattern(final String text, final String regex, final String replacement) {
452         if (ObjectUtils.anyNull(text, regex, replacement)) {
453             return text;
454         }
455         return Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(replacement);
456     }
457 
458 }
459