• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package java.util.regex;
18 
19 import java.util.ArrayList;
20 import java.util.List;
21 
22 /**
23  * Used to make {@code String.split} fast (and to help {@code Pattern.split} too).
24  * @hide
25  */
26 public class Splitter {
27     // The RI allows regular expressions beginning with ] or }, but that's probably a bug.
28     private static final String METACHARACTERS = "\\?*+[](){}^$.|";
29 
Splitter()30     private Splitter() {
31     }
32 
33     /**
34      * Returns a result equivalent to {@code s.split(separator, limit)} if it's able
35      * to compute it more cheaply than ICU, or null if the caller should fall back to
36      * using ICU.
37      */
fastSplit(String re, String input, int limit)38     public static String[] fastSplit(String re, String input, int limit) {
39         // Can we do it cheaply?
40         int len = re.length();
41         if (len == 0) {
42             return null;
43         }
44         char ch = re.charAt(0);
45         if (len == 1 && METACHARACTERS.indexOf(ch) == -1) {
46             // We're looking for a single non-metacharacter. Easy.
47         } else if (len == 2 && ch == '\\') {
48             // We're looking for a quoted character.
49             // Quoted metacharacters are effectively single non-metacharacters.
50             ch = re.charAt(1);
51             if (METACHARACTERS.indexOf(ch) == -1) {
52                 return null;
53             }
54         } else {
55             return null;
56         }
57 
58         // We can do this cheaply...
59 
60         // Unlike Perl, which considers the result of splitting the empty string to be the empty
61         // array, Java returns an array containing the empty string.
62         if (input.isEmpty()) {
63             return new String[] { "" };
64         }
65 
66         // Collect text preceding each occurrence of the separator, while there's enough space.
67         ArrayList<String> list = new ArrayList<String>();
68         int maxSize = limit <= 0 ? Integer.MAX_VALUE : limit;
69         int begin = 0;
70         int end;
71         while ((end = input.indexOf(ch, begin)) != -1 && list.size() + 1 < maxSize) {
72             list.add(input.substring(begin, end));
73             begin = end + 1;
74         }
75         return finishSplit(list, input, begin, maxSize, limit);
76     }
77 
split(Pattern pattern, String re, String input, int limit)78     public static String[] split(Pattern pattern, String re, String input, int limit) {
79         String[] fastResult = fastSplit(re, input, limit);
80         if (fastResult != null) {
81             return fastResult;
82         }
83 
84         // Unlike Perl, which considers the result of splitting the empty string to be the empty
85         // array, Java returns an array containing the empty string.
86         if (input.isEmpty()) {
87             return new String[] { "" };
88         }
89 
90         // Collect text preceding each occurrence of the separator, while there's enough space.
91         ArrayList<String> list = new ArrayList<String>();
92         int maxSize = limit <= 0 ? Integer.MAX_VALUE : limit;
93         Matcher matcher = new Matcher(pattern, input);
94         int begin = 0;
95         while (matcher.find() && list.size() + 1 < maxSize) {
96             list.add(input.substring(begin, matcher.start()));
97             begin = matcher.end();
98         }
99         return finishSplit(list, input, begin, maxSize, limit);
100     }
101 
finishSplit(List<String> list, String input, int begin, int maxSize, int limit)102     private static String[] finishSplit(List<String> list, String input, int begin, int maxSize, int limit) {
103         // Add trailing text.
104         if (begin < input.length()) {
105             list.add(input.substring(begin));
106         } else if (limit != 0) { // No point adding the empty string if limit == 0, just to remove it below.
107             list.add("");
108         }
109         // Remove all trailing empty matches in the limit == 0 case.
110         if (limit == 0) {
111             int i = list.size() - 1;
112             while (i >= 0 && list.get(i).isEmpty()) {
113                 list.remove(i);
114                 i--;
115             }
116         }
117         // Convert to an array.
118         return list.toArray(new String[list.size()]);
119     }
120 }
121