• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 package org.apache.commons.io;
18 
19 import java.io.File;
20 import java.util.ArrayDeque;
21 import java.util.ArrayList;
22 import java.util.Arrays;
23 import java.util.Collection;
24 import java.util.Deque;
25 import java.util.List;
26 import java.util.regex.Matcher;
27 import java.util.regex.Pattern;
28 import java.util.stream.Stream;
29 
30 /**
31  * General file name and file path manipulation utilities.
32  * <p>
33  * When dealing with file names you can hit problems when moving from a Windows
34  * based development machine to a Unix based production machine.
35  * This class aims to help avoid those problems.
36  * </p>
37  * <p>
38  * <b>NOTE</b>: You may be able to avoid using this class entirely simply by
39  * using JDK {@link java.io.File File} objects and the two argument constructor
40  * {@link java.io.File#File(java.io.File, String) File(File,String)}.
41  * </p>
42  * <p>
43  * Most methods on this class are designed to work the same on both Unix and Windows.
44  * Those that don't include 'System', 'Unix' or 'Windows' in their name.
45  * </p>
46  * <p>
47  * Most methods recognize both separators (forward and back), and both
48  * sets of prefixes. See the Javadoc of each method for details.
49  * </p>
50  * <p>
51  * This class defines six components within a file name
52  * (example C:\dev\project\file.txt):
53  * </p>
54  * <ul>
55  * <li>the prefix - C:\</li>
56  * <li>the path - dev\project\</li>
57  * <li>the full path - C:\dev\project\</li>
58  * <li>the name - file.txt</li>
59  * <li>the base name - file</li>
60  * <li>the extension - txt</li>
61  * </ul>
62  * <p>
63  * Note that this class works best if directory file names end with a separator.
64  * If you omit the last separator, it is impossible to determine if the file name
65  * corresponds to a file or a directory. As a result, we have chosen to say
66  * it corresponds to a file.
67  * </p>
68  * <p>
69  * This class only supports Unix and Windows style names.
70  * Prefixes are matched as follows:
71  * </p>
72  * <pre>
73  * Windows:
74  * a\b\c.txt           --&gt; ""          --&gt; relative
75  * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
76  * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
77  * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
78  * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
79  *
80  * Unix:
81  * a/b/c.txt           --&gt; ""          --&gt; relative
82  * /a/b/c.txt          --&gt; "/"         --&gt; absolute
83  * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
84  * ~                   --&gt; "~/"        --&gt; current user (slash added)
85  * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
86  * ~user               --&gt; "~user/"    --&gt; named user (slash added)
87  * </pre>
88  * <p>
89  * Both prefix styles are matched always, irrespective of the machine that you are
90  * currently running on.
91  * </p>
92  * <p>
93  * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils.
94  * </p>
95  *
96  * @since 1.1
97  */
98 public class FilenameUtils {
99 
100     private static final String[] EMPTY_STRING_ARRAY = {};
101 
102     private static final String EMPTY_STRING = "";
103 
104     private static final int NOT_FOUND = -1;
105 
106     /**
107      * The extension separator character.
108      * @since 1.4
109      */
110     public static final char EXTENSION_SEPARATOR = '.';
111 
112     /**
113      * The extension separator String.
114      * @since 1.4
115      */
116     public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR);
117 
118     /**
119      * The Unix separator character.
120      */
121     private static final char UNIX_NAME_SEPARATOR = '/';
122 
123     /**
124      * The Windows separator character.
125      */
126     private static final char WINDOWS_NAME_SEPARATOR = '\\';
127 
128     /**
129      * The system separator character.
130      */
131     private static final char SYSTEM_NAME_SEPARATOR = File.separatorChar;
132 
133     /**
134      * The separator character that is the opposite of the system separator.
135      */
136     private static final char OTHER_SEPARATOR = flipSeparator(SYSTEM_NAME_SEPARATOR);
137 
138     private static final Pattern IPV4_PATTERN = Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$");
139 
140     private static final int IPV4_MAX_OCTET_VALUE = 255;
141 
142     private static final int IPV6_MAX_HEX_GROUPS = 8;
143 
144     private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4;
145 
146     private static final int MAX_UNSIGNED_SHORT = 0xffff;
147 
148     private static final int BASE_16 = 16;
149 
150     private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$");
151 
152     /**
153      * Concatenates a fileName to a base path using normal command line style rules.
154      * <p>
155      * The effect is equivalent to resultant directory after changing
156      * directory to the first argument, followed by changing directory to
157      * the second argument.
158      * </p>
159      * <p>
160      * The first argument is the base path, the second is the path to concatenate.
161      * The returned path is always normalized via {@link #normalize(String)},
162      * thus {@code ..} is handled.
163      * </p>
164      * <p>
165      * If {@code pathToAdd} is absolute (has an absolute prefix), then
166      * it will be normalized and returned.
167      * Otherwise, the paths will be joined, normalized and returned.
168      * </p>
169      * <p>
170      * The output will be the same on both Unix and Windows except
171      * for the separator character.
172      * </p>
173      * <pre>
174      * /foo/      + bar        --&gt;  /foo/bar
175      * /foo       + bar        --&gt;  /foo/bar
176      * /foo       + /bar       --&gt;  /bar
177      * /foo       + C:/bar     --&gt;  C:/bar
178      * /foo       + C:bar      --&gt;  C:bar [1]
179      * /foo/a/    + ../bar     --&gt;  /foo/bar
180      * /foo/      + ../../bar  --&gt;  null
181      * /foo/      + /bar       --&gt;  /bar
182      * /foo/..    + /bar       --&gt;  /bar
183      * /foo       + bar/c.txt  --&gt;  /foo/bar/c.txt
184      * /foo/c.txt + bar        --&gt;  /foo/c.txt/bar [2]
185      * </pre>
186      * <p>
187      * [1] Note that the Windows relative drive prefix is unreliable when
188      * used with this method.
189      * </p>
190      * <p>
191      * [2] Note that the first parameter must be a path. If it ends with a name, then
192      * the name will be built into the concatenated path. If this might be a problem,
193      * use {@link #getFullPath(String)} on the base path argument.
194      * </p>
195      *
196      * @param basePath  the base path to attach to, always treated as a path
197      * @param fullFileNameToAdd  the fileName (or path) to attach to the base
198      * @return the concatenated path, or null if invalid
199      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
200      */
concat(final String basePath, final String fullFileNameToAdd)201     public static String concat(final String basePath, final String fullFileNameToAdd) {
202         final int prefix = getPrefixLength(fullFileNameToAdd);
203         if (prefix < 0) {
204             return null;
205         }
206         if (prefix > 0) {
207             return normalize(fullFileNameToAdd);
208         }
209         if (basePath == null) {
210             return null;
211         }
212         final int len = basePath.length();
213         if (len == 0) {
214             return normalize(fullFileNameToAdd);
215         }
216         final char ch = basePath.charAt(len - 1);
217         if (isSeparator(ch)) {
218             return normalize(basePath + fullFileNameToAdd);
219         }
220         return normalize(basePath + '/' + fullFileNameToAdd);
221     }
222 
223     /**
224      * Determines whether the {@code parent} directory contains the {@code child} element (a file or directory).
225      * <p>
226      * The files names are expected to be normalized.
227      * </p>
228      *
229      * Edge cases:
230      * <ul>
231      * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li>
232      * <li>A directory does not contain itself: return false</li>
233      * <li>A null child file is not contained in any parent: return false</li>
234      * </ul>
235      *
236      * @param canonicalParent
237      *            the file to consider as the parent.
238      * @param canonicalChild
239      *            the file to consider as the child.
240      * @return true is the candidate leaf is under by the specified composite. False otherwise.
241      * @since 2.2
242      * @see FileUtils#directoryContains(File, File)
243      */
directoryContains(final String canonicalParent, final String canonicalChild)244     public static boolean directoryContains(final String canonicalParent, final String canonicalChild) {
245         if (isEmpty(canonicalParent) || isEmpty(canonicalChild)) {
246             return false;
247         }
248 
249         if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) {
250             return false;
251         }
252 
253         final char separator = toSeparator(canonicalParent.charAt(0) == UNIX_NAME_SEPARATOR);
254         final String parentWithEndSeparator = canonicalParent.charAt(canonicalParent.length() - 1) == separator ? canonicalParent : canonicalParent + separator;
255 
256         return IOCase.SYSTEM.checkStartsWith(canonicalChild, parentWithEndSeparator);
257     }
258 
259     /**
260      * Does the work of getting the path.
261      *
262      * @param fileName  the fileName
263      * @param includeSeparator  true to include the end separator
264      * @return the path
265      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
266      */
doGetFullPath(final String fileName, final boolean includeSeparator)267     private static String doGetFullPath(final String fileName, final boolean includeSeparator) {
268         if (fileName == null) {
269             return null;
270         }
271         final int prefix = getPrefixLength(fileName);
272         if (prefix < 0) {
273             return null;
274         }
275         if (prefix >= fileName.length()) {
276             if (includeSeparator) {
277                 return getPrefix(fileName);  // add end slash if necessary
278             }
279             return fileName;
280         }
281         final int index = indexOfLastSeparator(fileName);
282         if (index < 0) {
283             return fileName.substring(0, prefix);
284         }
285         int end = index + (includeSeparator ?  1 : 0);
286         if (end == 0) {
287             end++;
288         }
289         return fileName.substring(0, end);
290     }
291 
292     /**
293      * Does the work of getting the path.
294      *
295      * @param fileName  the fileName
296      * @param separatorAdd  0 to omit the end separator, 1 to return it
297      * @return the path
298      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
299      */
doGetPath(final String fileName, final int separatorAdd)300     private static String doGetPath(final String fileName, final int separatorAdd) {
301         if (fileName == null) {
302             return null;
303         }
304         final int prefix = getPrefixLength(fileName);
305         if (prefix < 0) {
306             return null;
307         }
308         final int index = indexOfLastSeparator(fileName);
309         final int endIndex = index + separatorAdd;
310         if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) {
311             return EMPTY_STRING;
312         }
313         return requireNonNullChars(fileName.substring(prefix, endIndex));
314     }
315 
316     /**
317      * Internal method to perform the normalization.
318      *
319      * @param fileName  the fileName
320      * @param separator The separator character to use
321      * @param keepSeparator  true to keep the final separator
322      * @return the normalized fileName
323      * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
324      */
doNormalize(final String fileName, final char separator, final boolean keepSeparator)325     private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) {
326         if (fileName == null) {
327             return null;
328         }
329 
330         requireNonNullChars(fileName);
331 
332         int size = fileName.length();
333         if (size == 0) {
334             return fileName;
335         }
336         final int prefix = getPrefixLength(fileName);
337         if (prefix < 0) {
338             return null;
339         }
340 
341         final char[] array = new char[size + 2];  // +1 for possible extra slash, +2 for arraycopy
342         fileName.getChars(0, fileName.length(), array, 0);
343 
344         // fix separators throughout
345         final char otherSeparator = flipSeparator(separator);
346         for (int i = 0; i < array.length; i++) {
347             if (array[i] == otherSeparator) {
348                 array[i] = separator;
349             }
350         }
351 
352         // add extra separator on the end to simplify code below
353         boolean lastIsDirectory = true;
354         if (array[size - 1] != separator) {
355             array[size++] = separator;
356             lastIsDirectory = false;
357         }
358 
359         // adjoining slashes
360         // If we get here, prefix can only be 0 or greater, size 1 or greater
361         // If prefix is 0, set loop start to 1 to prevent index errors
362         for (int i = prefix != 0 ? prefix : 1; i < size; i++) {
363             if (array[i] == separator && array[i - 1] == separator) {
364                 System.arraycopy(array, i, array, i - 1, size - i);
365                 size--;
366                 i--;
367             }
368         }
369 
370         // dot slash
371         for (int i = prefix + 1; i < size; i++) {
372             if (array[i] == separator && array[i - 1] == '.' &&
373                     (i == prefix + 1 || array[i - 2] == separator)) {
374                 if (i == size - 1) {
375                     lastIsDirectory = true;
376                 }
377                 System.arraycopy(array, i + 1, array, i - 1, size - i);
378                 size -=2;
379                 i--;
380             }
381         }
382 
383         // double dot slash
384         outer:
385         for (int i = prefix + 2; i < size; i++) {
386             if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' &&
387                     (i == prefix + 2 || array[i - 3] == separator)) {
388                 if (i == prefix + 2) {
389                     return null;
390                 }
391                 if (i == size - 1) {
392                     lastIsDirectory = true;
393                 }
394                 int j;
395                 for (j = i - 4 ; j >= prefix; j--) {
396                     if (array[j] == separator) {
397                         // remove b/../ from a/b/../c
398                         System.arraycopy(array, i + 1, array, j + 1, size - i);
399                         size -= i - j;
400                         i = j + 1;
401                         continue outer;
402                     }
403                 }
404                 // remove a/../ from a/../c
405                 System.arraycopy(array, i + 1, array, prefix, size - i);
406                 size -= i + 1 - prefix;
407                 i = prefix + 1;
408             }
409         }
410 
411         if (size <= 0) {  // should never be less than 0
412             return EMPTY_STRING;
413         }
414         if (size <= prefix) {  // should never be less than prefix
415             return new String(array, 0, size);
416         }
417         if (lastIsDirectory && keepSeparator) {
418             return new String(array, 0, size);  // keep trailing separator
419         }
420         return new String(array, 0, size - 1);  // lose trailing separator
421     }
422 
423     /**
424      * Checks whether two fileNames are equal exactly.
425      * <p>
426      * No processing is performed on the fileNames other than comparison,
427      * thus this is merely a null-safe case-sensitive equals.
428      * </p>
429      *
430      * @param fileName1  the first fileName to query, may be null
431      * @param fileName2  the second fileName to query, may be null
432      * @return true if the fileNames are equal, null equals null
433      * @see IOCase#SENSITIVE
434      */
equals(final String fileName1, final String fileName2)435     public static boolean equals(final String fileName1, final String fileName2) {
436         return equals(fileName1, fileName2, false, IOCase.SENSITIVE);
437     }
438 
439     /**
440      * Checks whether two fileNames are equal, optionally normalizing and providing
441      * control over the case-sensitivity.
442      *
443      * @param fileName1  the first fileName to query, may be null
444      * @param fileName2  the second fileName to query, may be null
445      * @param normalize  whether to normalize the fileNames
446      * @param ioCase  what case sensitivity rule to use, null means case-sensitive
447      * @return true if the fileNames are equal, null equals null
448      * @since 1.3
449      */
equals(String fileName1, String fileName2, final boolean normalize, final IOCase ioCase)450     public static boolean equals(String fileName1, String fileName2, final boolean normalize, final IOCase ioCase) {
451 
452         if (fileName1 == null || fileName2 == null) {
453             return fileName1 == null && fileName2 == null;
454         }
455         if (normalize) {
456             fileName1 = normalize(fileName1);
457             if (fileName1 == null) {
458                 return false;
459             }
460             fileName2 = normalize(fileName2);
461             if (fileName2 == null) {
462                 return false;
463             }
464         }
465         return IOCase.value(ioCase, IOCase.SENSITIVE).checkEquals(fileName1, fileName2);
466     }
467 
468     /**
469      * Checks whether two fileNames are equal after both have been normalized.
470      * <p>
471      * Both fileNames are first passed to {@link #normalize(String)}.
472      * The check is then performed in a case-sensitive manner.
473      * </p>
474      *
475      * @param fileName1  the first fileName to query, may be null
476      * @param fileName2  the second fileName to query, may be null
477      * @return true if the fileNames are equal, null equals null
478      * @see IOCase#SENSITIVE
479      */
equalsNormalized(final String fileName1, final String fileName2)480     public static boolean equalsNormalized(final String fileName1, final String fileName2) {
481         return equals(fileName1, fileName2, true, IOCase.SENSITIVE);
482     }
483 
484     /**
485      * Checks whether two fileNames are equal after both have been normalized
486      * and using the case rules of the system.
487      * <p>
488      * Both fileNames are first passed to {@link #normalize(String)}.
489      * The check is then performed case-sensitive on Unix and
490      * case-insensitive on Windows.
491      * </p>
492      *
493      * @param fileName1  the first fileName to query, may be null
494      * @param fileName2  the second fileName to query, may be null
495      * @return true if the fileNames are equal, null equals null
496      * @see IOCase#SYSTEM
497      */
equalsNormalizedOnSystem(final String fileName1, final String fileName2)498     public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) {
499         return equals(fileName1, fileName2, true, IOCase.SYSTEM);
500     }
501 
502     /**
503      * Checks whether two fileNames are equal using the case rules of the system.
504      * <p>
505      * No processing is performed on the fileNames other than comparison.
506      * The check is case-sensitive on Unix and case-insensitive on Windows.
507      * </p>
508      *
509      * @param fileName1  the first fileName to query, may be null
510      * @param fileName2  the second fileName to query, may be null
511      * @return true if the fileNames are equal, null equals null
512      * @see IOCase#SYSTEM
513      */
equalsOnSystem(final String fileName1, final String fileName2)514     public static boolean equalsOnSystem(final String fileName1, final String fileName2) {
515         return equals(fileName1, fileName2, false, IOCase.SYSTEM);
516     }
517 
518     /**
519      * Flips the Windows name separator to Linux and vice-versa.
520      *
521      * @param ch The Windows or Linux name separator.
522      * @return The Windows or Linux name separator.
523      */
flipSeparator(final char ch)524     static char flipSeparator(final char ch) {
525         if (ch == UNIX_NAME_SEPARATOR) {
526             return WINDOWS_NAME_SEPARATOR;
527         }
528         if (ch == WINDOWS_NAME_SEPARATOR) {
529             return UNIX_NAME_SEPARATOR;
530         }
531         throw new IllegalArgumentException(String.valueOf(ch));
532     }
533 
534     /**
535      * Special handling for NTFS ADS: Don't accept colon in the fileName.
536      *
537      * @param fileName a file name
538      * @return ADS offsets.
539      */
getAdsCriticalOffset(final String fileName)540     private static int getAdsCriticalOffset(final String fileName) {
541         // Step 1: Remove leading path segments.
542         final int offset1 = fileName.lastIndexOf(SYSTEM_NAME_SEPARATOR);
543         final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR);
544         if (offset1 == -1) {
545             if (offset2 == -1) {
546                 return 0;
547             }
548             return offset2 + 1;
549         }
550         if (offset2 == -1) {
551             return offset1 + 1;
552         }
553         return Math.max(offset1, offset2) + 1;
554     }
555 
556     /**
557      * Gets the base name, minus the full path and extension, from a full fileName.
558      * <p>
559      * This method will handle a file in either Unix or Windows format.
560      * The text after the last forward or backslash and before the last dot is returned.
561      * </p>
562      * <pre>
563      * a/b/c.txt --&gt; c
564      * a.txt     --&gt; a
565      * a/b/c     --&gt; c
566      * a/b/c/    --&gt; ""
567      * </pre>
568      * <p>
569      * The output will be the same irrespective of the machine that the code is running on.
570      * </p>
571      *
572      * @param fileName  the fileName to query, null returns null
573      * @return the name of the file without the path, or an empty string if none exists
574      * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
575      */
getBaseName(final String fileName)576     public static String getBaseName(final String fileName) {
577         return removeExtension(getName(fileName));
578     }
579 
580     /**
581      * Gets the extension of a fileName.
582      * <p>
583      * This method returns the textual part of the fileName after the last dot.
584      * There must be no directory separator after the dot.
585      * </p>
586      * <pre>
587      * foo.txt      --&gt; "txt"
588      * a/b/c.jpg    --&gt; "jpg"
589      * a/b.txt/c    --&gt; ""
590      * a/b/c        --&gt; ""
591      * </pre>
592      * <p>
593      * The output will be the same irrespective of the machine that the code is running on, with the
594      * exception of a possible {@link IllegalArgumentException} on Windows (see below).
595      * </p>
596      * <p>
597      * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt".
598      * In this case, the name wouldn't be the name of a file, but the identifier of an
599      * alternate data stream (bar.txt) on the file foo.exe. The method used to return
600      * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing
601      * an {@link IllegalArgumentException} for names like this.
602      * </p>
603      *
604      * @param fileName the fileName to retrieve the extension of.
605      * @return the extension of the file or an empty string if none exists or {@code null}
606      * if the fileName is {@code null}.
607      * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact,
608      * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
609      */
getExtension(final String fileName)610     public static String getExtension(final String fileName) throws IllegalArgumentException {
611         if (fileName == null) {
612             return null;
613         }
614         final int index = indexOfExtension(fileName);
615         if (index == NOT_FOUND) {
616             return EMPTY_STRING;
617         }
618         return fileName.substring(index + 1);
619     }
620 
621     /**
622      * Gets the full path from a full fileName, which is the prefix + path.
623      * <p>
624      * This method will handle a file in either Unix or Windows format.
625      * The method is entirely text based, and returns the text before and
626      * including the last forward or backslash.
627      * </p>
628      * <pre>
629      * C:\a\b\c.txt --&gt; C:\a\b\
630      * ~/a/b/c.txt  --&gt; ~/a/b/
631      * a.txt        --&gt; ""
632      * a/b/c        --&gt; a/b/
633      * a/b/c/       --&gt; a/b/c/
634      * C:           --&gt; C:
635      * C:\          --&gt; C:\
636      * ~            --&gt; ~/
637      * ~/           --&gt; ~/
638      * ~user        --&gt; ~user/
639      * ~user/       --&gt; ~user/
640      * </pre>
641      * <p>
642      * The output will be the same irrespective of the machine that the code is running on.
643      * </p>
644      *
645      * @param fileName  the fileName to query, null returns null
646      * @return the path of the file, an empty string if none exists, null if invalid
647      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
648      */
getFullPath(final String fileName)649     public static String getFullPath(final String fileName) {
650         return doGetFullPath(fileName, true);
651     }
652 
653     /**
654      * Gets the full path from a full fileName, which is the prefix + path,
655      * and also excluding the final directory separator.
656      * <p>
657      * This method will handle a file in either Unix or Windows format.
658      * The method is entirely text based, and returns the text before the
659      * last forward or backslash.
660      * </p>
661      * <pre>
662      * C:\a\b\c.txt --&gt; C:\a\b
663      * ~/a/b/c.txt  --&gt; ~/a/b
664      * a.txt        --&gt; ""
665      * a/b/c        --&gt; a/b
666      * a/b/c/       --&gt; a/b/c
667      * C:           --&gt; C:
668      * C:\          --&gt; C:\
669      * ~            --&gt; ~
670      * ~/           --&gt; ~
671      * ~user        --&gt; ~user
672      * ~user/       --&gt; ~user
673      * </pre>
674      * <p>
675      * The output will be the same irrespective of the machine that the code is running on.
676      * </p>
677      *
678      * @param fileName  the fileName to query, null returns null
679      * @return the path of the file, an empty string if none exists, null if invalid
680      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
681      */
getFullPathNoEndSeparator(final String fileName)682     public static String getFullPathNoEndSeparator(final String fileName) {
683         return doGetFullPath(fileName, false);
684     }
685 
686     /**
687      * Gets the name minus the path from a full fileName.
688      * <p>
689      * This method will handle a file in either Unix or Windows format.
690      * The text after the last forward or backslash is returned.
691      * </p>
692      * <pre>
693      * a/b/c.txt --&gt; c.txt
694      * a.txt     --&gt; a.txt
695      * a/b/c     --&gt; c
696      * a/b/c/    --&gt; ""
697      * </pre>
698      * <p>
699      * The output will be the same irrespective of the machine that the code is running on.
700      * </p>
701      *
702      * @param fileName  the fileName to query, null returns null
703      * @return the name of the file without the path, or an empty string if none exists
704      * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
705      */
getName(final String fileName)706     public static String getName(final String fileName) {
707         if (fileName == null) {
708             return null;
709         }
710         return requireNonNullChars(fileName).substring(indexOfLastSeparator(fileName) + 1);
711     }
712 
713     /**
714      * Gets the path from a full fileName, which excludes the prefix.
715      * <p>
716      * This method will handle a file in either Unix or Windows format.
717      * The method is entirely text based, and returns the text before and
718      * including the last forward or backslash.
719      * </p>
720      * <pre>
721      * C:\a\b\c.txt --&gt; a\b\
722      * ~/a/b/c.txt  --&gt; a/b/
723      * a.txt        --&gt; ""
724      * a/b/c        --&gt; a/b/
725      * a/b/c/       --&gt; a/b/c/
726      * </pre>
727      * <p>
728      * The output will be the same irrespective of the machine that the code is running on.
729      * </p>
730      * <p>
731      * This method drops the prefix from the result.
732      * See {@link #getFullPath(String)} for the method that retains the prefix.
733      * </p>
734      *
735      * @param fileName  the fileName to query, null returns null
736      * @return the path of the file, an empty string if none exists, null if invalid
737      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
738      */
getPath(final String fileName)739     public static String getPath(final String fileName) {
740         return doGetPath(fileName, 1);
741     }
742 
743     /**
744      * Gets the path from a full fileName, which excludes the prefix, and
745      * also excluding the final directory separator.
746      * <p>
747      * This method will handle a file in either Unix or Windows format.
748      * The method is entirely text based, and returns the text before the
749      * last forward or backslash.
750      * </p>
751      * <pre>
752      * C:\a\b\c.txt --&gt; a\b
753      * ~/a/b/c.txt  --&gt; a/b
754      * a.txt        --&gt; ""
755      * a/b/c        --&gt; a/b
756      * a/b/c/       --&gt; a/b/c
757      * </pre>
758      * <p>
759      * The output will be the same irrespective of the machine that the code is running on.
760      * </p>
761      * <p>
762      * This method drops the prefix from the result.
763      * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
764      * </p>
765      *
766      * @param fileName  the fileName to query, null returns null
767      * @return the path of the file, an empty string if none exists, null if invalid
768      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
769      */
getPathNoEndSeparator(final String fileName)770     public static String getPathNoEndSeparator(final String fileName) {
771         return doGetPath(fileName, 0);
772     }
773 
774     /**
775      * Gets the prefix from a full fileName, such as {@code C:/}
776      * or {@code ~/}.
777      * <p>
778      * This method will handle a file in either Unix or Windows format.
779      * The prefix includes the first slash in the full fileName where applicable.
780      * </p>
781      * <pre>
782      * Windows:
783      * a\b\c.txt           --&gt; ""          --&gt; relative
784      * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
785      * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
786      * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
787      * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
788      *
789      * Unix:
790      * a/b/c.txt           --&gt; ""          --&gt; relative
791      * /a/b/c.txt          --&gt; "/"         --&gt; absolute
792      * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
793      * ~                   --&gt; "~/"        --&gt; current user (slash added)
794      * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
795      * ~user               --&gt; "~user/"    --&gt; named user (slash added)
796      * </pre>
797      * <p>
798      * The output will be the same irrespective of the machine that the code is running on.
799      * ie. both Unix and Windows prefixes are matched regardless.
800      * </p>
801      *
802      * @param fileName  the fileName to query, null returns null
803      * @return the prefix of the file, null if invalid
804      * @throws IllegalArgumentException if the result contains the null character ({@code U+0000})
805      */
getPrefix(final String fileName)806     public static String getPrefix(final String fileName) {
807         if (fileName == null) {
808             return null;
809         }
810         final int len = getPrefixLength(fileName);
811         if (len < 0) {
812             return null;
813         }
814         if (len > fileName.length()) {
815             requireNonNullChars(fileName);
816             return fileName + UNIX_NAME_SEPARATOR;
817         }
818         return requireNonNullChars(fileName.substring(0, len));
819     }
820 
821     /**
822      * Returns the length of the fileName prefix, such as {@code C:/} or {@code ~/}.
823      * <p>
824      * This method will handle a file in either Unix or Windows format.
825      * </p>
826      * <p>
827      * The prefix length includes the first slash in the full fileName
828      * if applicable. Thus, it is possible that the length returned is greater
829      * than the length of the input string.
830      * </p>
831      * <pre>
832      * Windows:
833      * a\b\c.txt           --&gt; 0           --&gt; relative
834      * \a\b\c.txt          --&gt; 1           --&gt; current drive absolute
835      * C:a\b\c.txt         --&gt; 2           --&gt; drive relative
836      * C:\a\b\c.txt        --&gt; 3           --&gt; absolute
837      * \\server\a\b\c.txt  --&gt; 9           --&gt; UNC
838      * \\\a\b\c.txt        --&gt; -1          --&gt; error
839      *
840      * Unix:
841      * a/b/c.txt           --&gt; 0           --&gt; relative
842      * /a/b/c.txt          --&gt; 1           --&gt; absolute
843      * ~/a/b/c.txt         --&gt; 2           --&gt; current user
844      * ~                   --&gt; 2           --&gt; current user (slash added)
845      * ~user/a/b/c.txt     --&gt; 6           --&gt; named user
846      * ~user               --&gt; 6           --&gt; named user (slash added)
847      * //server/a/b/c.txt  --&gt; 9
848      * ///a/b/c.txt        --&gt; -1          --&gt; error
849      * C:                  --&gt; 0           --&gt; valid filename as only null character and / are reserved characters
850      * </pre>
851      * <p>
852      * The output will be the same irrespective of the machine that the code is running on.
853      * ie. both Unix and Windows prefixes are matched regardless.
854      * </p>
855      * <p>
856      * Note that a leading // (or \\) is used to indicate a UNC name on Windows.
857      * These must be followed by a server name, so double-slashes are not collapsed
858      * to a single slash at the start of the fileName.
859      * </p>
860      *
861      * @param fileName  the fileName to find the prefix in, null returns -1
862      * @return the length of the prefix, -1 if invalid or null
863      */
getPrefixLength(final String fileName)864     public static int getPrefixLength(final String fileName) {
865         if (fileName == null) {
866             return NOT_FOUND;
867         }
868         final int len = fileName.length();
869         if (len == 0) {
870             return 0;
871         }
872         char ch0 = fileName.charAt(0);
873         if (ch0 == ':') {
874             return NOT_FOUND;
875         }
876         if (len == 1) {
877             if (ch0 == '~') {
878                 return 2;  // return a length greater than the input
879             }
880             return isSeparator(ch0) ? 1 : 0;
881         }
882         if (ch0 == '~') {
883             int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 1);
884             int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 1);
885             if (posUnix == NOT_FOUND && posWin == NOT_FOUND) {
886                 return len + 1;  // return a length greater than the input
887             }
888             posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
889             posWin = posWin == NOT_FOUND ? posUnix : posWin;
890             return Math.min(posUnix, posWin) + 1;
891         }
892         final char ch1 = fileName.charAt(1);
893         if (ch1 == ':') {
894             ch0 = Character.toUpperCase(ch0);
895             if (ch0 >= 'A' && ch0 <= 'Z') {
896                 if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) {
897                     return 0;
898                 }
899                 if (len == 2 || !isSeparator(fileName.charAt(2))) {
900                     return 2;
901                 }
902                 return 3;
903             }
904             if (ch0 == UNIX_NAME_SEPARATOR) {
905                 return 1;
906             }
907             return NOT_FOUND;
908 
909         }
910         if (!isSeparator(ch0) || !isSeparator(ch1)) {
911             return isSeparator(ch0) ? 1 : 0;
912         }
913         int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 2);
914         int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 2);
915         if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) {
916             return NOT_FOUND;
917         }
918         posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
919         posWin = posWin == NOT_FOUND ? posUnix : posWin;
920         final int pos = Math.min(posUnix, posWin) + 1;
921         final String hostnamePart = fileName.substring(2, pos - 1);
922         return isValidHostName(hostnamePart) ? pos : NOT_FOUND;
923     }
924 
925     /**
926      * Returns the index of the last extension separator character, which is a dot.
927      * <p>
928      * This method also checks that there is no directory separator after the last dot. To do this it uses
929      * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format.
930      * </p>
931      * <p>
932      * The output will be the same irrespective of the machine that the code is running on, with the
933      * exception of a possible {@link IllegalArgumentException} on Windows (see below).
934      * </p>
935      * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt".
936      * In this case, the name wouldn't be the name of a file, but the identifier of an
937      * alternate data stream (bar.txt) on the file foo.exe. The method used to return
938      * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing
939      * an {@link IllegalArgumentException} for names like this.
940      *
941      * @param fileName
942      *            the fileName to find the last extension separator in, null returns -1
943      * @return the index of the last extension separator character, or -1 if there is no such character
944      * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact,
945      * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
946      */
indexOfExtension(final String fileName)947     public static int indexOfExtension(final String fileName) throws IllegalArgumentException {
948         if (fileName == null) {
949             return NOT_FOUND;
950         }
951         if (isSystemWindows()) {
952             // Special handling for NTFS ADS: Don't accept colon in the fileName.
953             final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName));
954             if (offset != -1) {
955                 throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden.");
956             }
957         }
958         final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
959         final int lastSeparator = indexOfLastSeparator(fileName);
960         return lastSeparator > extensionPos ? NOT_FOUND : extensionPos;
961     }
962 
963     /**
964      * Returns the index of the last directory separator character.
965      * <p>
966      * This method will handle a file in either Unix or Windows format.
967      * The position of the last forward or backslash is returned.
968      * <p>
969      * The output will be the same irrespective of the machine that the code is running on.
970      *
971      * @param fileName  the fileName to find the last path separator in, null returns -1
972      * @return the index of the last separator character, or -1 if there
973      * is no such character
974      */
indexOfLastSeparator(final String fileName)975     public static int indexOfLastSeparator(final String fileName) {
976         if (fileName == null) {
977             return NOT_FOUND;
978         }
979         final int lastUnixPos = fileName.lastIndexOf(UNIX_NAME_SEPARATOR);
980         final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_NAME_SEPARATOR);
981         return Math.max(lastUnixPos, lastWindowsPos);
982     }
983 
isEmpty(final String string)984     private static boolean isEmpty(final String string) {
985         return string == null || string.isEmpty();
986     }
987 
988     /**
989      * Checks whether the extension of the fileName is one of those specified.
990      * <p>
991      * This method obtains the extension as the textual part of the fileName
992      * after the last dot. There must be no directory separator after the dot.
993      * The extension check is case-sensitive on all platforms.
994      *
995      * @param fileName  the fileName to query, null returns false
996      * @param extensions  the extensions to check for, null checks for no extension
997      * @return true if the fileName is one of the extensions
998      * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
999      */
isExtension(final String fileName, final Collection<String> extensions)1000     public static boolean isExtension(final String fileName, final Collection<String> extensions) {
1001         if (fileName == null) {
1002             return false;
1003         }
1004         requireNonNullChars(fileName);
1005 
1006         if (extensions == null || extensions.isEmpty()) {
1007             return indexOfExtension(fileName) == NOT_FOUND;
1008         }
1009         return extensions.contains(getExtension(fileName));
1010     }
1011 
1012     /**
1013      * Checks whether the extension of the fileName is that specified.
1014      * <p>
1015      * This method obtains the extension as the textual part of the fileName
1016      * after the last dot. There must be no directory separator after the dot.
1017      * The extension check is case-sensitive on all platforms.
1018      *
1019      * @param fileName  the fileName to query, null returns false
1020      * @param extension  the extension to check for, null or empty checks for no extension
1021      * @return true if the fileName has the specified extension
1022      * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
1023      */
isExtension(final String fileName, final String extension)1024     public static boolean isExtension(final String fileName, final String extension) {
1025         if (fileName == null) {
1026             return false;
1027         }
1028         requireNonNullChars(fileName);
1029 
1030         if (isEmpty(extension)) {
1031             return indexOfExtension(fileName) == NOT_FOUND;
1032         }
1033         return getExtension(fileName).equals(extension);
1034     }
1035 
1036     /**
1037      * Checks whether the extension of the fileName is one of those specified.
1038      * <p>
1039      * This method obtains the extension as the textual part of the fileName
1040      * after the last dot. There must be no directory separator after the dot.
1041      * The extension check is case-sensitive on all platforms.
1042      *
1043      * @param fileName  the fileName to query, null returns false
1044      * @param extensions  the extensions to check for, null checks for no extension
1045      * @return true if the fileName is one of the extensions
1046      * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
1047      */
isExtension(final String fileName, final String... extensions)1048     public static boolean isExtension(final String fileName, final String... extensions) {
1049         if (fileName == null) {
1050             return false;
1051         }
1052         requireNonNullChars(fileName);
1053 
1054         if (extensions == null || extensions.length == 0) {
1055             return indexOfExtension(fileName) == NOT_FOUND;
1056         }
1057         final String fileExt = getExtension(fileName);
1058         return Stream.of(extensions).anyMatch(fileExt::equals);
1059     }
1060 
1061     /**
1062      * Checks whether a given string represents a valid IPv4 address.
1063      *
1064      * @param name the name to validate
1065      * @return true if the given name is a valid IPv4 address
1066      */
1067     // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address
isIPv4Address(final String name)1068     private static boolean isIPv4Address(final String name) {
1069         final Matcher m = IPV4_PATTERN.matcher(name);
1070         if (!m.matches() || m.groupCount() != 4) {
1071             return false;
1072         }
1073 
1074         // verify that address subgroups are legal
1075         for (int i = 1; i <= 4; i++) {
1076             final String ipSegment = m.group(i);
1077             final int iIpSegment = Integer.parseInt(ipSegment);
1078             if (iIpSegment > IPV4_MAX_OCTET_VALUE) {
1079                 return false;
1080             }
1081 
1082             if (ipSegment.length() > 1 && ipSegment.startsWith("0")) {
1083                 return false;
1084             }
1085 
1086         }
1087 
1088         return true;
1089     }
1090 
1091     // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address
1092     /**
1093      * Checks whether a given string represents a valid IPv6 address.
1094      *
1095      * @param inet6Address the name to validate
1096      * @return true if the given name is a valid IPv6 address
1097      */
isIPv6Address(final String inet6Address)1098     private static boolean isIPv6Address(final String inet6Address) {
1099         final boolean containsCompressedZeroes = inet6Address.contains("::");
1100         if (containsCompressedZeroes && inet6Address.indexOf("::") != inet6Address.lastIndexOf("::")) {
1101             return false;
1102         }
1103         if (inet6Address.startsWith(":") && !inet6Address.startsWith("::")
1104                 || inet6Address.endsWith(":") && !inet6Address.endsWith("::")) {
1105             return false;
1106         }
1107         String[] octets = inet6Address.split(":");
1108         if (containsCompressedZeroes) {
1109             final List<String> octetList = new ArrayList<>(Arrays.asList(octets));
1110             if (inet6Address.endsWith("::")) {
1111                 // String.split() drops ending empty segments
1112                 octetList.add("");
1113             } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) {
1114                 octetList.remove(0);
1115             }
1116             octets = octetList.toArray(EMPTY_STRING_ARRAY);
1117         }
1118         if (octets.length > IPV6_MAX_HEX_GROUPS) {
1119             return false;
1120         }
1121         int validOctets = 0;
1122         int emptyOctets = 0; // consecutive empty chunks
1123         for (int index = 0; index < octets.length; index++) {
1124             final String octet = octets[index];
1125             if (octet.isEmpty()) {
1126                 emptyOctets++;
1127                 if (emptyOctets > 1) {
1128                     return false;
1129                 }
1130             } else {
1131                 emptyOctets = 0;
1132                 // Is last chunk an IPv4 address?
1133                 if (index == octets.length - 1 && octet.contains(".")) {
1134                     if (!isIPv4Address(octet)) {
1135                         return false;
1136                     }
1137                     validOctets += 2;
1138                     continue;
1139                 }
1140                 if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) {
1141                     return false;
1142                 }
1143                 final int octetInt;
1144                 try {
1145                     octetInt = Integer.parseInt(octet, BASE_16);
1146                 } catch (final NumberFormatException e) {
1147                     return false;
1148                 }
1149                 if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) {
1150                     return false;
1151                 }
1152             }
1153             validOctets++;
1154         }
1155         return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes);
1156     }
1157 
1158     /**
1159      * Checks whether a given string is a valid host name according to
1160      * RFC 3986 - not accepting IP addresses.
1161      *
1162      * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1163      * @param name the hostname to validate
1164      * @return true if the given name is a valid host name
1165      */
isRFC3986HostName(final String name)1166     private static boolean isRFC3986HostName(final String name) {
1167         final String[] parts = name.split("\\.", -1);
1168         for (int i = 0; i < parts.length; i++) {
1169             if (parts[i].isEmpty()) {
1170                 // trailing dot is legal, otherwise we've hit a .. sequence
1171                 return i == parts.length - 1;
1172             }
1173             if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) {
1174                 return false;
1175             }
1176         }
1177         return true;
1178     }
1179 
1180     /**
1181      * Checks if the character is a separator.
1182      *
1183      * @param ch  the character to check
1184      * @return true if it is a separator character
1185      */
isSeparator(final char ch)1186     private static boolean isSeparator(final char ch) {
1187         return ch == UNIX_NAME_SEPARATOR || ch == WINDOWS_NAME_SEPARATOR;
1188     }
1189 
1190     /**
1191      * Determines if Windows file system is in use.
1192      *
1193      * @return true if the system is Windows
1194      */
isSystemWindows()1195     static boolean isSystemWindows() {
1196         return SYSTEM_NAME_SEPARATOR == WINDOWS_NAME_SEPARATOR;
1197     }
1198 
1199     /**
1200      * Checks whether a given string is a valid host name according to
1201      * RFC 3986.
1202      *
1203      * <p>Accepted are IP addresses (v4 and v6) as well as what the
1204      * RFC calls a "reg-name". Percent encoded names don't seem to be
1205      * valid names in UNC paths.</p>
1206      *
1207      * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1208      * @param name the hostname to validate
1209      * @return true if the given name is a valid host name
1210      */
isValidHostName(final String name)1211     private static boolean isValidHostName(final String name) {
1212         return isIPv6Address(name) || isRFC3986HostName(name);
1213     }
1214 
1215     /**
1216      * Normalizes a path, removing double and single dot path steps.
1217      * <p>
1218      * This method normalizes a path to a standard format.
1219      * The input may contain separators in either Unix or Windows format.
1220      * The output will contain separators in the format of the system.
1221      * <p>
1222      * A trailing slash will be retained.
1223      * A double slash will be merged to a single slash (but UNC names are handled).
1224      * A single dot path segment will be removed.
1225      * A double dot will cause that path segment and the one before to be removed.
1226      * If the double dot has no parent path segment to work with, {@code null}
1227      * is returned.
1228      * <p>
1229      * The output will be the same on both Unix and Windows except
1230      * for the separator character.
1231      * <pre>
1232      * /foo//               --&gt;   /foo/
1233      * /foo/./              --&gt;   /foo/
1234      * /foo/../bar          --&gt;   /bar
1235      * /foo/../bar/         --&gt;   /bar/
1236      * /foo/../bar/../baz   --&gt;   /baz
1237      * //foo//./bar         --&gt;   //foo/bar
1238      * /../                 --&gt;   null
1239      * ../foo               --&gt;   null
1240      * foo/bar/..           --&gt;   foo/
1241      * foo/../../bar        --&gt;   null
1242      * foo/../bar           --&gt;   bar
1243      * //server/foo/../bar  --&gt;   //server/bar
1244      * //server/../bar      --&gt;   null
1245      * C:\foo\..\bar        --&gt;   C:\bar
1246      * C:\..\bar            --&gt;   null
1247      * ~/foo/../bar/        --&gt;   ~/bar/
1248      * ~/../bar             --&gt;   null
1249      * </pre>
1250      * (Note the file separator returned will be correct for Windows/Unix)
1251      *
1252      * @param fileName  the fileName to normalize, null returns null
1253      * @return the normalized fileName, or null if invalid
1254      * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
1255      */
normalize(final String fileName)1256     public static String normalize(final String fileName) {
1257         return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, true);
1258     }
1259 
1260     /**
1261      * Normalizes a path, removing double and single dot path steps.
1262      * <p>
1263      * This method normalizes a path to a standard format.
1264      * The input may contain separators in either Unix or Windows format.
1265      * The output will contain separators in the format specified.
1266      * <p>
1267      * A trailing slash will be retained.
1268      * A double slash will be merged to a single slash (but UNC names are handled).
1269      * A single dot path segment will be removed.
1270      * A double dot will cause that path segment and the one before to be removed.
1271      * If the double dot has no parent path segment to work with, {@code null}
1272      * is returned.
1273      * <p>
1274      * The output will be the same on both Unix and Windows except
1275      * for the separator character.
1276      * <pre>
1277      * /foo//               --&gt;   /foo/
1278      * /foo/./              --&gt;   /foo/
1279      * /foo/../bar          --&gt;   /bar
1280      * /foo/../bar/         --&gt;   /bar/
1281      * /foo/../bar/../baz   --&gt;   /baz
1282      * //foo//./bar         --&gt;   /foo/bar
1283      * /../                 --&gt;   null
1284      * ../foo               --&gt;   null
1285      * foo/bar/..           --&gt;   foo/
1286      * foo/../../bar        --&gt;   null
1287      * foo/../bar           --&gt;   bar
1288      * //server/foo/../bar  --&gt;   //server/bar
1289      * //server/../bar      --&gt;   null
1290      * C:\foo\..\bar        --&gt;   C:\bar
1291      * C:\..\bar            --&gt;   null
1292      * ~/foo/../bar/        --&gt;   ~/bar/
1293      * ~/../bar             --&gt;   null
1294      * </pre>
1295      * The output will be the same on both Unix and Windows including
1296      * the separator character.
1297      *
1298      * @param fileName  the fileName to normalize, null returns null
1299      * @param unixSeparator {@code true} if a Unix separator should
1300      * be used or {@code false} if a Windows separator should be used.
1301      * @return the normalized fileName, or null if invalid
1302      * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
1303      * @since 2.0
1304      */
normalize(final String fileName, final boolean unixSeparator)1305     public static String normalize(final String fileName, final boolean unixSeparator) {
1306         return doNormalize(fileName, toSeparator(unixSeparator), true);
1307     }
1308 
1309     /**
1310      * Normalizes a path, removing double and single dot path steps,
1311      * and removing any final directory separator.
1312      * <p>
1313      * This method normalizes a path to a standard format.
1314      * The input may contain separators in either Unix or Windows format.
1315      * The output will contain separators in the format of the system.
1316      * <p>
1317      * A trailing slash will be removed.
1318      * A double slash will be merged to a single slash (but UNC names are handled).
1319      * A single dot path segment will be removed.
1320      * A double dot will cause that path segment and the one before to be removed.
1321      * If the double dot has no parent path segment to work with, {@code null}
1322      * is returned.
1323      * <p>
1324      * The output will be the same on both Unix and Windows except
1325      * for the separator character.
1326      * <pre>
1327      * /foo//               --&gt;   /foo
1328      * /foo/./              --&gt;   /foo
1329      * /foo/../bar          --&gt;   /bar
1330      * /foo/../bar/         --&gt;   /bar
1331      * /foo/../bar/../baz   --&gt;   /baz
1332      * //foo//./bar         --&gt;   /foo/bar
1333      * /../                 --&gt;   null
1334      * ../foo               --&gt;   null
1335      * foo/bar/..           --&gt;   foo
1336      * foo/../../bar        --&gt;   null
1337      * foo/../bar           --&gt;   bar
1338      * //server/foo/../bar  --&gt;   //server/bar
1339      * //server/../bar      --&gt;   null
1340      * C:\foo\..\bar        --&gt;   C:\bar
1341      * C:\..\bar            --&gt;   null
1342      * ~/foo/../bar/        --&gt;   ~/bar
1343      * ~/../bar             --&gt;   null
1344      * </pre>
1345      * (Note the file separator returned will be correct for Windows/Unix)
1346      *
1347      * @param fileName  the fileName to normalize, null returns null
1348      * @return the normalized fileName, or null if invalid
1349      * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
1350      */
normalizeNoEndSeparator(final String fileName)1351     public static String normalizeNoEndSeparator(final String fileName) {
1352         return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, false);
1353     }
1354 
1355     /**
1356      * Normalizes a path, removing double and single dot path steps,
1357      * and removing any final directory separator.
1358      * <p>
1359      * This method normalizes a path to a standard format.
1360      * The input may contain separators in either Unix or Windows format.
1361      * The output will contain separators in the format specified.
1362      * <p>
1363      * A trailing slash will be removed.
1364      * A double slash will be merged to a single slash (but UNC names are handled).
1365      * A single dot path segment will be removed.
1366      * A double dot will cause that path segment and the one before to be removed.
1367      * If the double dot has no parent path segment to work with, {@code null}
1368      * is returned.
1369      * <p>
1370      * The output will be the same on both Unix and Windows including
1371      * the separator character.
1372      * <pre>
1373      * /foo//               --&gt;   /foo
1374      * /foo/./              --&gt;   /foo
1375      * /foo/../bar          --&gt;   /bar
1376      * /foo/../bar/         --&gt;   /bar
1377      * /foo/../bar/../baz   --&gt;   /baz
1378      * //foo//./bar         --&gt;   /foo/bar
1379      * /../                 --&gt;   null
1380      * ../foo               --&gt;   null
1381      * foo/bar/..           --&gt;   foo
1382      * foo/../../bar        --&gt;   null
1383      * foo/../bar           --&gt;   bar
1384      * //server/foo/../bar  --&gt;   //server/bar
1385      * //server/../bar      --&gt;   null
1386      * C:\foo\..\bar        --&gt;   C:\bar
1387      * C:\..\bar            --&gt;   null
1388      * ~/foo/../bar/        --&gt;   ~/bar
1389      * ~/../bar             --&gt;   null
1390      * </pre>
1391      *
1392      * @param fileName  the fileName to normalize, null returns null
1393      * @param unixSeparator {@code true} if a Unix separator should
1394      * be used or {@code false} if a Windows separator should be used.
1395      * @return the normalized fileName, or null if invalid
1396      * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
1397      * @since 2.0
1398      */
normalizeNoEndSeparator(final String fileName, final boolean unixSeparator)1399     public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) {
1400          return doNormalize(fileName, toSeparator(unixSeparator), false);
1401     }
1402 
1403     /**
1404      * Removes the extension from a fileName.
1405      * <p>
1406      * This method returns the textual part of the fileName before the last dot.
1407      * There must be no directory separator after the dot.
1408      * <pre>
1409      * foo.txt    --&gt; foo
1410      * a\b\c.jpg  --&gt; a\b\c
1411      * a\b\c      --&gt; a\b\c
1412      * a.b\c      --&gt; a.b\c
1413      * </pre>
1414      * <p>
1415      * The output will be the same irrespective of the machine that the code is running on.
1416      *
1417      * @param fileName  the fileName to query, null returns null
1418      * @return the fileName minus the extension
1419      * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
1420      */
removeExtension(final String fileName)1421     public static String removeExtension(final String fileName) {
1422         if (fileName == null) {
1423             return null;
1424         }
1425         requireNonNullChars(fileName);
1426 
1427         final int index = indexOfExtension(fileName);
1428         if (index == NOT_FOUND) {
1429             return fileName;
1430         }
1431         return fileName.substring(0, index);
1432     }
1433 
1434     /**
1435      * Checks the input for null characters ({@code U+0000}), a sign of unsanitized data being passed to file level functions.
1436      *
1437      * This may be used for poison byte attacks.
1438      *
1439      * @param path the path to check
1440      * @return The input
1441      * @throws IllegalArgumentException if path contains the null character ({@code U+0000})
1442      */
requireNonNullChars(final String path)1443     private static String requireNonNullChars(final String path) {
1444         if (path.indexOf(0) >= 0) {
1445             throw new IllegalArgumentException(
1446                 "Null character present in file/path name. There are no known legitimate use cases for such data, but several injection attacks may use it");
1447         }
1448         return path;
1449     }
1450 
1451     /**
1452      * Converts all separators to the system separator.
1453      *
1454      * @param path the path to be changed, null ignored.
1455      * @return the updated path.
1456      */
separatorsToSystem(final String path)1457     public static String separatorsToSystem(final String path) {
1458         return FileSystem.getCurrent().normalizeSeparators(path);
1459     }
1460 
1461     /**
1462      * Converts all separators to the Unix separator of forward slash.
1463      *
1464      * @param path the path to be changed, null ignored.
1465      * @return the new path.
1466      */
separatorsToUnix(final String path)1467     public static String separatorsToUnix(final String path) {
1468         return FileSystem.LINUX.normalizeSeparators(path);
1469     }
1470 
1471     /**
1472      * Converts all separators to the Windows separator of backslash.
1473      *
1474      * @param path the path to be changed, null ignored.
1475      * @return the updated path.
1476      */
separatorsToWindows(final String path)1477     public static String separatorsToWindows(final String path) {
1478         return FileSystem.WINDOWS.normalizeSeparators(path);
1479     }
1480 
1481     /**
1482      * Splits a string into a number of tokens.
1483      * The text is split by '?' and '*'.
1484      * Where multiple '*' occur consecutively they are collapsed into a single '*'.
1485      *
1486      * @param text  the text to split
1487      * @return the array of tokens, never null
1488      */
splitOnTokens(final String text)1489     static String[] splitOnTokens(final String text) {
1490         // used by wildcardMatch
1491         // package level so a unit test may run on this
1492 
1493         if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) {
1494             return new String[] { text };
1495         }
1496 
1497         final char[] array = text.toCharArray();
1498         final ArrayList<String> list = new ArrayList<>();
1499         final StringBuilder buffer = new StringBuilder();
1500         char prevChar = 0;
1501         for (final char ch : array) {
1502             if (ch == '?' || ch == '*') {
1503                 if (buffer.length() != 0) {
1504                     list.add(buffer.toString());
1505                     buffer.setLength(0);
1506                 }
1507                 if (ch == '?') {
1508                     list.add("?");
1509                 } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*'
1510                     list.add("*");
1511                 }
1512             } else {
1513                 buffer.append(ch);
1514             }
1515             prevChar = ch;
1516         }
1517         if (buffer.length() != 0) {
1518             list.add(buffer.toString());
1519         }
1520 
1521         return list.toArray(EMPTY_STRING_ARRAY);
1522     }
1523 
1524     /**
1525      * Returns '/' if given true, '\\' otherwise.
1526      *
1527      * @param unixSeparator which separator to return.
1528      * @return '/' if given true, '\\' otherwise.
1529      */
toSeparator(final boolean unixSeparator)1530     private static char toSeparator(final boolean unixSeparator) {
1531         return unixSeparator ? UNIX_NAME_SEPARATOR : WINDOWS_NAME_SEPARATOR;
1532     }
1533 
1534     /**
1535      * Checks a fileName to see if it matches the specified wildcard matcher,
1536      * always testing case-sensitive.
1537      * <p>
1538      * The wildcard matcher uses the characters '?' and '*' to represent a
1539      * single or multiple (zero or more) wildcard characters.
1540      * This is the same as often found on DOS/Unix command lines.
1541      * The check is case-sensitive always.
1542      * <pre>
1543      * wildcardMatch("c.txt", "*.txt")      --&gt; true
1544      * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1545      * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1546      * wildcardMatch("c.txt", "*.???")      --&gt; true
1547      * wildcardMatch("c.txt", "*.????")     --&gt; false
1548      * </pre>
1549      * N.B. the sequence "*?" does not work properly at present in match strings.
1550      *
1551      * @param fileName  the fileName to match on
1552      * @param wildcardMatcher  the wildcard string to match against
1553      * @return true if the fileName matches the wildcard string
1554      * @see IOCase#SENSITIVE
1555      */
wildcardMatch(final String fileName, final String wildcardMatcher)1556     public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) {
1557         return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE);
1558     }
1559 
1560     /**
1561      * Checks a fileName to see if it matches the specified wildcard matcher
1562      * allowing control over case-sensitivity.
1563      * <p>
1564      * The wildcard matcher uses the characters '?' and '*' to represent a
1565      * single or multiple (zero or more) wildcard characters.
1566      * N.B. the sequence "*?" does not work properly at present in match strings.
1567      *
1568      * @param fileName  the fileName to match on
1569      * @param wildcardMatcher  the wildcard string to match against
1570      * @param ioCase  what case sensitivity rule to use, null means case-sensitive
1571      * @return true if the fileName matches the wildcard string
1572      * @since 1.3
1573      */
wildcardMatch(final String fileName, final String wildcardMatcher, IOCase ioCase)1574     public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase ioCase) {
1575         if (fileName == null && wildcardMatcher == null) {
1576             return true;
1577         }
1578         if (fileName == null || wildcardMatcher == null) {
1579             return false;
1580         }
1581         ioCase = IOCase.value(ioCase, IOCase.SENSITIVE);
1582         final String[] wcs = splitOnTokens(wildcardMatcher);
1583         boolean anyChars = false;
1584         int textIdx = 0;
1585         int wcsIdx = 0;
1586         final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length);
1587 
1588         // loop around a backtrack stack, to handle complex * matching
1589         do {
1590             if (!backtrack.isEmpty()) {
1591                 final int[] array = backtrack.pop();
1592                 wcsIdx = array[0];
1593                 textIdx = array[1];
1594                 anyChars = true;
1595             }
1596 
1597             // loop whilst tokens and text left to process
1598             while (wcsIdx < wcs.length) {
1599 
1600                 if (wcs[wcsIdx].equals("?")) {
1601                     // ? so move to next text char
1602                     textIdx++;
1603                     if (textIdx > fileName.length()) {
1604                         break;
1605                     }
1606                     anyChars = false;
1607 
1608                 } else if (wcs[wcsIdx].equals("*")) {
1609                     // set any chars status
1610                     anyChars = true;
1611                     if (wcsIdx == wcs.length - 1) {
1612                         textIdx = fileName.length();
1613                     }
1614 
1615                 } else {
1616                     // matching text token
1617                     if (anyChars) {
1618                         // any chars then try to locate text token
1619                         textIdx = ioCase.checkIndexOf(fileName, textIdx, wcs[wcsIdx]);
1620                         if (textIdx == NOT_FOUND) {
1621                             // token not found
1622                             break;
1623                         }
1624                         final int repeat = ioCase.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]);
1625                         if (repeat >= 0) {
1626                             backtrack.push(new int[] {wcsIdx, repeat});
1627                         }
1628                     } else if (!ioCase.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) {
1629                         // matching from current position
1630                         // couldn't match token
1631                         break;
1632                     }
1633 
1634                     // matched text token, move text index to end of matched token
1635                     textIdx += wcs[wcsIdx].length();
1636                     anyChars = false;
1637                 }
1638 
1639                 wcsIdx++;
1640             }
1641 
1642             // full match
1643             if (wcsIdx == wcs.length && textIdx == fileName.length()) {
1644                 return true;
1645             }
1646 
1647         } while (!backtrack.isEmpty());
1648 
1649         return false;
1650     }
1651 
1652     /**
1653      * Checks a fileName to see if it matches the specified wildcard matcher
1654      * using the case rules of the system.
1655      * <p>
1656      * The wildcard matcher uses the characters '?' and '*' to represent a
1657      * single or multiple (zero or more) wildcard characters.
1658      * This is the same as often found on DOS/Unix command lines.
1659      * The check is case-sensitive on Unix and case-insensitive on Windows.
1660      * <pre>
1661      * wildcardMatch("c.txt", "*.txt")      --&gt; true
1662      * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1663      * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1664      * wildcardMatch("c.txt", "*.???")      --&gt; true
1665      * wildcardMatch("c.txt", "*.????")     --&gt; false
1666      * </pre>
1667      * N.B. the sequence "*?" does not work properly at present in match strings.
1668      *
1669      * @param fileName  the fileName to match on
1670      * @param wildcardMatcher  the wildcard string to match against
1671      * @return true if the fileName matches the wildcard string
1672      * @see IOCase#SYSTEM
1673      */
wildcardMatchOnSystem(final String fileName, final String wildcardMatcher)1674     public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) {
1675         return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM);
1676     }
1677 
1678     /**
1679      * Instances should NOT be constructed in standard programming.
1680      */
FilenameUtils()1681     public FilenameUtils() {
1682     }
1683 }
1684