• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2007 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.text.util;
18 
19 import java.util.regex.Matcher;
20 import java.util.regex.Pattern;
21 
22 /**
23  * @hide
24  */
25 public class Regex {
26     /**
27      *  Regular expression pattern to match all IANA top-level domains.
28      *  List accurate as of 2007/06/15.  List taken from:
29      *  http://data.iana.org/TLD/tlds-alpha-by-domain.txt
30      *  This pattern is auto-generated by //device/tools/make-iana-tld-pattern.py
31      */
32     public static final Pattern TOP_LEVEL_DOMAIN_PATTERN
33         = Pattern.compile(
34                 "((aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
35                 + "|(biz|b[abdefghijmnorstvwyz])"
36                 + "|(cat|com|coop|c[acdfghiklmnoruvxyz])"
37                 + "|d[ejkmoz]"
38                 + "|(edu|e[cegrstu])"
39                 + "|f[ijkmor]"
40                 + "|(gov|g[abdefghilmnpqrstuwy])"
41                 + "|h[kmnrtu]"
42                 + "|(info|int|i[delmnoqrst])"
43                 + "|(jobs|j[emop])"
44                 + "|k[eghimnrwyz]"
45                 + "|l[abcikrstuvy]"
46                 + "|(mil|mobi|museum|m[acdghklmnopqrstuvwxyz])"
47                 + "|(name|net|n[acefgilopruz])"
48                 + "|(org|om)"
49                 + "|(pro|p[aefghklmnrstwy])"
50                 + "|qa"
51                 + "|r[eouw]"
52                 + "|s[abcdeghijklmnortuvyz]"
53                 + "|(tel|travel|t[cdfghjklmnoprtvwz])"
54                 + "|u[agkmsyz]"
55                 + "|v[aceginu]"
56                 + "|w[fs]"
57                 + "|y[etu]"
58                 + "|z[amw])");
59 
60     /**
61      *  Regular expression pattern to match RFC 1738 URLs
62      *  List accurate as of 2007/06/15.  List taken from:
63      *  http://data.iana.org/TLD/tlds-alpha-by-domain.txt
64      *  This pattern is auto-generated by //device/tools/make-iana-tld-pattern.py
65      */
66     public static final Pattern WEB_URL_PATTERN
67         = Pattern.compile(
68             "((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
69             + "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
70             + "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?"
71             + "((?:(?:[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}\\.)+"   // named host
72             + "(?:"   // plus top level domain
73             + "(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
74             + "|(?:biz|b[abdefghijmnorstvwyz])"
75             + "|(?:cat|com|coop|c[acdfghiklmnoruvxyz])"
76             + "|d[ejkmoz]"
77             + "|(?:edu|e[cegrstu])"
78             + "|f[ijkmor]"
79             + "|(?:gov|g[abdefghilmnpqrstuwy])"
80             + "|h[kmnrtu]"
81             + "|(?:info|int|i[delmnoqrst])"
82             + "|(?:jobs|j[emop])"
83             + "|k[eghimnrwyz]"
84             + "|l[abcikrstuvy]"
85             + "|(?:mil|mobi|museum|m[acdghklmnopqrstuvwxyz])"
86             + "|(?:name|net|n[acefgilopruz])"
87             + "|(?:org|om)"
88             + "|(?:pro|p[aefghklmnrstwy])"
89             + "|qa"
90             + "|r[eouw]"
91             + "|s[abcdeghijklmnortuvyz]"
92             + "|(?:tel|travel|t[cdfghjklmnoprtvwz])"
93             + "|u[agkmsyz]"
94             + "|v[aceginu]"
95             + "|w[fs]"
96             + "|y[etu]"
97             + "|z[amw]))"
98             + "|(?:(?:25[0-5]|2[0-4]" // or ip address
99             + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]"
100             + "|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1]"
101             + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
102             + "|[1-9][0-9]|[0-9])))"
103             + "(?:\\:\\d{1,5})?)" // plus option port number
104             + "(\\/(?:(?:[a-zA-Z0-9\\;\\/\\?\\:\\@\\&\\=\\#\\~"  // plus option query params
105             + "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
106             + "(?:\\b|$)"); // and finally, a word boundary or end of
107                             // input.  This is to stop foo.sure from
108                             // matching as foo.su
109 
110     public static final Pattern IP_ADDRESS_PATTERN
111         = Pattern.compile(
112             "((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]"
113             + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
114             + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
115             + "|[1-9][0-9]|[0-9]))");
116 
117     public static final Pattern DOMAIN_NAME_PATTERN
118         = Pattern.compile(
119             "(((([a-zA-Z0-9][a-zA-Z0-9\\-]*)*[a-zA-Z0-9]\\.)+"
120             + TOP_LEVEL_DOMAIN_PATTERN + ")|"
121             + IP_ADDRESS_PATTERN + ")");
122 
123     public static final Pattern EMAIL_ADDRESS_PATTERN
124         = Pattern.compile(
125             "[a-zA-Z0-9\\+\\.\\_\\%\\-]{1,256}" +
126             "\\@" +
127             "[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}" +
128             "(" +
129                 "\\." +
130                 "[a-zA-Z0-9][a-zA-Z0-9\\-]{0,25}" +
131             ")+"
132         );
133 
134     /**
135      * This pattern is intended for searching for things that look like they
136      * might be phone numbers in arbitrary text, not for validating whether
137      * something is in fact a phone number.  It will miss many things that
138      * are legitimate phone numbers.
139      *
140      * <p> The pattern matches the following:
141      * <ul>
142      * <li>Optionally, a + sign followed immediately by one or more digits. Spaces, dots, or dashes
143      * may follow.
144      * <li>Optionally, sets of digits in parentheses, separated by spaces, dots, or dashes.
145      * <li>A string starting and ending with a digit, containing digits, spaces, dots, and/or dashes.
146      * </ul>
147      */
148     public static final Pattern PHONE_PATTERN
149         = Pattern.compile(                                  // sdd = space, dot, or dash
150                 "(\\+[0-9]+[\\- \\.]*)?"                    // +<digits><sdd>*
151                 + "(\\([0-9]+\\)[\\- \\.]*)?"               // (<digits>)<sdd>*
152                 + "([0-9][0-9\\- \\.][0-9\\- \\.]+[0-9])"); // <digit><digit|sdd>+<digit>
153 
154     /**
155      *  Convenience method to take all of the non-null matching groups in a
156      *  regex Matcher and return them as a concatenated string.
157      *
158      *  @param matcher      The Matcher object from which grouped text will
159      *                      be extracted
160      *
161      *  @return             A String comprising all of the non-null matched
162      *                      groups concatenated together
163      */
concatGroups(Matcher matcher)164     public static final String concatGroups(Matcher matcher) {
165         StringBuilder b = new StringBuilder();
166         final int numGroups = matcher.groupCount();
167 
168         for (int i = 1; i <= numGroups; i++) {
169             String s = matcher.group(i);
170 
171             System.err.println("Group(" + i + ") : " + s);
172 
173             if (s != null) {
174                 b.append(s);
175             }
176         }
177 
178         return b.toString();
179     }
180 
181     /**
182      * Convenience method to return only the digits and plus signs
183      * in the matching string.
184      *
185      * @param matcher      The Matcher object from which digits and plus will
186      *                     be extracted
187      *
188      * @return             A String comprising all of the digits and plus in
189      *                     the match
190      */
digitsAndPlusOnly(Matcher matcher)191     public static final String digitsAndPlusOnly(Matcher matcher) {
192         StringBuilder buffer = new StringBuilder();
193         String matchingRegion = matcher.group();
194 
195         for (int i = 0, size = matchingRegion.length(); i < size; i++) {
196             char character = matchingRegion.charAt(i);
197 
198             if (character == '+' || Character.isDigit(character)) {
199                 buffer.append(character);
200             }
201         }
202         return buffer.toString();
203     }
204 }
205