• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.icu.tool.cldrtoicu.ant;
4 
5 import static com.google.common.base.Preconditions.checkArgument;
6 import static com.google.common.base.Preconditions.checkState;
7 import static com.google.common.collect.ImmutableSet.toImmutableSet;
8 import static java.nio.charset.StandardCharsets.UTF_8;
9 import static java.nio.file.LinkOption.NOFOLLOW_LINKS;
10 import static java.util.stream.Collectors.joining;
11 import static java.util.stream.Collectors.partitioningBy;
12 
13 import java.io.BufferedReader;
14 import java.io.IOException;
15 import java.io.InputStream;
16 import java.io.InputStreamReader;
17 import java.io.UncheckedIOException;
18 import java.nio.file.Files;
19 import java.nio.file.Path;
20 import java.nio.file.Paths;
21 import java.util.ArrayList;
22 import java.util.Arrays;
23 import java.util.HashSet;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.Set;
27 import java.util.TreeSet;
28 import java.util.stream.Collectors;
29 import java.util.stream.Stream;
30 
31 import org.apache.tools.ant.BuildException;
32 import org.apache.tools.ant.Task;
33 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir;
34 
35 import com.google.common.base.CharMatcher;
36 import com.google.common.collect.ImmutableList;
37 import com.google.common.collect.ImmutableSet;
38 import com.google.common.collect.Iterables;
39 import com.google.common.io.CharStreams;
40 
41 // Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed.
42 public final class CleanOutputDirectoryTask extends Task {
43     private static final ImmutableSet<String> ALLOWED_DIRECTORIES =
44         Stream
45             .concat(
46                 Stream.of("misc", "translit"),
47                 Arrays.stream(IcuLocaleDir.values()).map(IcuLocaleDir::getOutputDir))
48             .sorted()
49             .collect(toImmutableSet());
50 
51     private static final CharMatcher NOT_WHITESPACE = CharMatcher.whitespace().negate();
52 
53     private static final String HEADER_FILE = "ldml2icu_header.txt";
54 
55     // If present in the header of a file, this line is used to determine that the file was
56     // auto-generated. This allows us to change the rest of the header freely without issue.
57     // However if it's not present in the file, we fallback to comparing the rest of the
58     // header without it (since that's the old behaviour).
59     // Once there's been an ICU release with this line included in the headers of all data
60     // files, we can remove the fallback and just test for this line and nothing else.
61     private static final String WAS_GENERATED_LABEL =
62         "Generated using tools/cldr/cldr-to-icu/build-icu-data.xml";
63 
64     // The number of header lines to check before giving up if we don't find the generated
65     // label.
66     private static final int MAX_HEADER_CHECK_LINES = 20;
67 
68     private Path root = null;
69     private boolean forceDelete = false;
70     private final List<Dir> outputDirs = new ArrayList<>();
71     private final ImmutableList<String> headerLines;
72 
CleanOutputDirectoryTask()73     public CleanOutputDirectoryTask() {
74         // TODO: Consider passing in header lines via Ant?
75         this.headerLines = readLinesFromResource("/" + HEADER_FILE);
76         // For now assume that the generated label is the last line of the header.
77         checkState(Iterables.getLast(headerLines).equals(WAS_GENERATED_LABEL),
78             "Expected last line of %s header file to be:\n\t%s", HEADER_FILE, WAS_GENERATED_LABEL);
79         // Make sure we check at least a few more lines than is in the current header.
80         checkState(MAX_HEADER_CHECK_LINES >= headerLines.size() + 5,
81             "Unexpectedly large header file; please increase MAX_HEADER_CHECK_LINES constant");
82     }
83 
84     public static final class Retain extends Task {
85         private Path path = null;
86 
87         // Don't use "Path" for the argument type because that always makes an absolute path (e.g.
88         // relative to the working directory for the Ant task). We want relative paths.
89         @SuppressWarnings("unused")
setPath(String path)90         public void setPath(String path) {
91             Path p = Paths.get(path).normalize();
92             checkBuild(!p.isAbsolute() && !p.startsWith(".."), "invalid path: %s", path);
93             this.path = p;
94         }
95 
96         @Override
init()97         public void init() throws BuildException {
98             checkBuild(path != null, "missing 'path' attribute");
99         }
100     }
101 
102     public static final class Dir extends Task {
103         private String name;
104         private final Set<Path> retained = new HashSet<>();
105 
106         @SuppressWarnings("unused")
setName(String name)107         public void setName(String name) {
108             checkBuild(ALLOWED_DIRECTORIES.contains(name),
109                 "unknown directory name '%s'; allowed values: %s", name, ALLOWED_DIRECTORIES);
110             this.name = name;
111         }
112 
113         @SuppressWarnings("unused")
addConfiguredRetain(Retain retain)114         public void addConfiguredRetain(Retain retain) {
115             retained.add(retain.path);
116         }
117 
118         @Override
init()119         public void init() throws BuildException {
120             checkBuild(name != null, "missing 'name' attribute");
121         }
122     }
123 
124     @SuppressWarnings("unused")
setRoot(String root)125     public void setRoot(String root) {
126         // Use String here since on some systems Ant doesn't support automatically converting Path instances.
127         this.root = Paths.get(root);
128     }
129 
130     @SuppressWarnings("unused")
setForceDelete(boolean forceDelete)131     public void setForceDelete(boolean forceDelete) {
132         this.forceDelete = forceDelete;
133     }
134 
135     @SuppressWarnings("unused")
addConfiguredDir(Dir dir)136     public void addConfiguredDir(Dir dir) {
137         outputDirs.add(dir);
138     }
139 
140     @Override
execute()141     public void execute() throws BuildException {
142         checkBuild(root != null, "missing 'root' attribute");
143         checkBuild(!outputDirs.isEmpty(), "missing <dir> elements");
144 
145         if (!Files.exists(root)) {
146             log("Root directory '" + root + "' does not exist (nothing to clean)");
147             return;
148         }
149         checkBuild(Files.isDirectory(root), "specified root '%s' is not a directory", root);
150 
151         Set<Path> autogenFiles = new TreeSet<>();
152         Set<Path> unknownFiles = new TreeSet<>();
153         for (Dir dirInfo : outputDirs) {
154             Path dirPath = root.resolve(dirInfo.name);
155             if (!Files.exists(dirPath)) {
156                 continue;
157             }
158             checkBuild(Files.isDirectory(dirPath), "'%s' is not a directory", dirPath);
159 
160             // Note: For now we just walk the immediate contents of each output directory and don't
161             // attempt to recursively process things. Only a couple of output directories have
162             // sub-directories anyway, and we never write files into them anyway.
163             try (Stream<Path> files = Files.list(dirPath)) {
164                 Map<Boolean, List<Path>> map = files
165                     .filter(p -> couldDelete(p, dirPath, dirInfo))
166                     .parallel()
167                     .collect(partitioningBy(this::wasAutoGenerated));
168                 unknownFiles.addAll(map.get(false));
169                 autogenFiles.addAll(map.get(true));
170             } catch (IOException e) {
171                 throw new BuildException("Error processing directory: " + dirPath, e);
172             }
173         }
174 
175         if (!unknownFiles.isEmpty() && !forceDelete) {
176             // If there are NO safe files, then something weird is going on (perhaps a change in
177             // the header file).
178             if (autogenFiles.isEmpty()) {
179                 log("Error determining 'safe' files for deletion (no auto-generated files found).");
180                 log(unknownFiles.size() + " files would be deleted for 'clean' task");
181                 logPartioned(unknownFiles);
182                 log("Set '-DforceDelete=true' to delete all files not listed in"
183                     + " <outputDirectories>.");
184             } else {
185                 // A mix of safe and unsafe files is weird, but in this case it should be a
186                 // relatively small number of files (e.g. adding a new manually maintained file or
187                 // accidental editing of header lines).
188                 log("Unknown files exist which cannot be determined to be auto-generated");
189                 log("Files:");
190                 logPartioned(unknownFiles);
191                 log(String.format("%d unknown files or directories found", unknownFiles.size()));
192                 log("Set '-DforceDelete=true' to delete these files, or add them to"
193                     + " <outputDirectories>.");
194             }
195             throw new BuildException("Unsafe files cannot be deleted");
196         }
197         if (!unknownFiles.isEmpty()) {
198             checkState(forceDelete, "unexpected flag state (forceDelete should be true here)");
199             List<Path> filesToDelete =
200                 unknownFiles.stream()
201                     .filter(p -> !Files.isDirectory(p))
202                     .collect(Collectors.toList());
203             log(String.format("Force deleting %,d files...\n", filesToDelete.size()));
204             deleteAllFiles(filesToDelete);
205 
206             List<Path> unknownDirs =
207                 unknownFiles.stream()
208                     .filter(p -> Files.isDirectory(p))
209                     .collect(Collectors.toList());
210             if (!unknownDirs.isEmpty()) {
211                 log("Add the following directories to the <outputDirectories> task:");
212                 logPartioned(unknownDirs);
213                 throw new BuildException("Unsafe directories cannot be deleted");
214             }
215         }
216         if (!autogenFiles.isEmpty()) {
217             log(String.format("Deleting %,d auto-generated files...\n", autogenFiles.size()));
218             deleteAllFiles(autogenFiles);
219         }
220     }
221 
logPartioned(Iterable<Path> files)222     private void logPartioned(Iterable<Path> files) {
223         Iterables.partition(files, 5)
224             .forEach(f -> log(
225                 f.stream().map(p -> root.relativize(p).toString()).collect(joining(", "))));
226     }
227 
couldDelete(Path path, Path dir, Dir dirInfo)228     private boolean couldDelete(Path path, Path dir, Dir dirInfo) {
229         return !dirInfo.retained.contains(dir.relativize(path));
230     }
231 
wasAutoGenerated(Path path)232     private boolean wasAutoGenerated(Path path) {
233         if (!Files.isRegularFile(path, NOFOLLOW_LINKS)) {
234             // Directories, symbolic links, devices etc.
235             return false;
236         }
237         try (BufferedReader r = Files.newBufferedReader(path, UTF_8)) {
238             return wasFileAutoGenerated(r, headerLines);
239         } catch (IOException e) {
240             throw new UncheckedIOException(e);
241         }
242     }
243 
244     // TODO: Once the WAS_GENERATED_LABEL is in all auto-generated ICU data files, simplify this.
245     // Static and non-private for testing.
wasFileAutoGenerated(BufferedReader fileReader, ImmutableList<String> headerLines)246     static boolean wasFileAutoGenerated(BufferedReader fileReader, ImmutableList<String> headerLines)
247             throws IOException {
248         // A byte-order-mark (BOM) is added to ICU data files, but not JSON deps files, so just
249         // treat it as optional everywhere (it's not the important thing we check here).
250         fileReader.mark(1);
251         int maybeByteOrderMark = fileReader.read();
252         if (maybeByteOrderMark != '\uFEFF') {
253             // Also reset if the file was empty, but that should be harmless.
254             fileReader.reset();
255         }
256         boolean isLenientHeaderMatchSoFar = true;
257         for (int n = 0; n < MAX_HEADER_CHECK_LINES ; n++) {
258             String line = fileReader.readLine();
259             // True if we have processed the header, not including the trailing generated label.
260             boolean headerIsProcessed = n >= headerLines.size() - 1;
261             boolean isCompleteLenientMatch = isLenientHeaderMatchSoFar && headerIsProcessed;
262             if (line == null) {
263                 // We ran off the end of the file, so we're done.
264                 return isCompleteLenientMatch;
265             }
266             int headerStart = skipComment(line);
267             if (headerStart < 0) {
268                 // We ran off the end of the expected comment section, so we're done.
269                 return isCompleteLenientMatch;
270             }
271             if (matchesToEndOfLine(line, headerStart, WAS_GENERATED_LABEL)) {
272                 // Finding the generated label trumps any lenient matching.
273                 return true;
274             }
275             if (!isLenientHeaderMatchSoFar) {
276                 // We already failed at lenient matching, so keep going in the hope of finding
277                 // the generated label (we don't need to check the header any more).
278                 continue;
279             }
280             if (headerIsProcessed) {
281                 // We finishing processing the header and it matched (not including the
282                 // generated label) but for older data files, that's fine.
283                 return true;
284             }
285             // Check the next header line (not including the trailing generated label).
286             isLenientHeaderMatchSoFar = matchesToEndOfLine(line, headerStart, headerLines.get(n));
287         }
288         // This is actually an unusual case. It corresponds to a file which:
289         // * has a leading comment section at least as long as MAX_HEADER_CHECK_LINES
290         // * does not contain WAS_GENERATED_LABEL anywhere in the first MAX_HEADER_CHECK_LINES
291         // Most files checked are expected to match, and so will not get here, but instead
292         // return via one of the return statements above.
293         return false;
294     }
295 
matchesToEndOfLine(String line, int start, String expected)296     private static boolean matchesToEndOfLine(String line, int start, String expected) {
297         return line.length() - start == expected.length()
298                 && line.regionMatches(start, expected, 0, expected.length());
299     }
300 
skipComment(String line)301     private static int skipComment(String line) {
302         if (line.startsWith("#")) {
303             return toCommentStart(line, 1);
304         } else if (line.startsWith("//")) {
305             return toCommentStart(line, 2);
306         }
307         return -1;
308     }
309 
310     // Not just "index-of" since a comment start followed by only whitespace is NOT a failure to
311     // find a comment (since the header might have an empty line in it, which should be okay).
toCommentStart(String line, int offset)312     private static int toCommentStart(String line, int offset) {
313         int index = NOT_WHITESPACE.indexIn(line, offset);
314         return index >= 0 ? index : line.length();
315     }
316 
deleteAllFiles(Iterable<Path> files)317     private static void deleteAllFiles(Iterable<Path> files) {
318         for (Path p : files) {
319             try {
320                 // This is a code error, since only files should be passed here.
321                 checkArgument(!Files.isDirectory(p), "Cannot delete directories: %s", p);
322                 Files.deleteIfExists(p);
323             } catch (IOException e) {
324                 throw new BuildException("Error deleting file: " + p, e);
325             }
326         }
327     }
328 
checkBuild(boolean condition, String message, Object... args)329     private static void checkBuild(boolean condition, String message, Object... args) {
330         if (!condition) {
331             throw new BuildException(String.format(message, args));
332         }
333     }
334 
readLinesFromResource(String name)335     private static ImmutableList<String> readLinesFromResource(String name) {
336         try (InputStream in = CleanOutputDirectoryTask.class.getResourceAsStream(name)) {
337             return ImmutableList.copyOf(CharStreams.readLines(new InputStreamReader(in, UTF_8)));
338         } catch (IOException e) {
339             throw new RuntimeException("cannot read resource: " + name, e);
340         }
341     }
342 }
343