001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 The ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package org.conqat.lib.commons.filesystem;
018
019import java.io.File;
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.HashSet;
024import java.util.List;
025import java.util.Set;
026import java.util.regex.Pattern;
027import java.util.regex.PatternSyntaxException;
028
029import org.conqat.lib.commons.assertion.CCSMAssert;
030import org.conqat.lib.commons.collections.BasicPatternList;
031import org.conqat.lib.commons.string.StringUtils;
032
033/**
034 * * This class performs directory scanning, i.e. returns all files residing
035 * within a certain directory. The list of files returned can be narrowed using
036 * include/exclude pattern, which use the same syntax as the pattern known from
037 * ANT (see http://ant.apache.org/manual/dirtasks.html#patterns).
038 * <p>
039 * This class is meant to be a faster and more memory efficient replacement for
040 * ANT's DirectoryScanner.
041 * <p>
042 * Internally this works entirely with '/' as path separator. However, this is
043 * not visible from the outside.
044 * <p>
045 * Internally, the implementation uses Java's RegEx engine by translating ANT
046 * patterns to regular expressions.
047 */
048public class AntPatternDirectoryScanner {
049
050        /** The base directory. */
051        private final File baseDir;
052
053        /** Stores whether we are case sensitive. */
054        private final boolean caseSensitive;
055
056        /** The list of files found (result of scanning). */
057        private final List<String> filesFound = new ArrayList<String>();
058
059        /** Include pattern used with files. */
060        private final BasicPatternList fileIncludes = new BasicPatternList();
061
062        /** Exclude pattern used with files. */
063        private final BasicPatternList fileExcludes = new BasicPatternList();
064
065        /**
066         * Exclude patterns that are greedy, i.e. that end in '**'. These are
067         * interesting, because as soon as they match, every extension of the
068         * matched string will match as well. So these pattern can be used to skip
069         * entire directories. This list contains a subset of the pattern from
070         * {@link #fileExcludes}.
071         */
072        private final BasicPatternList greedyExcludes = new BasicPatternList();
073
074        /**
075         * List of required prefixes. This can be used if no include pattern starts
076         * with a '*'. In this case, directories not starting with the prefix may be
077         * skipped. Otherwise this attribute is null. This is an array so we can use
078         * it with the {@link StringUtils#startsWithOneOf(String, String...)}
079         * method.
080         */
081        private final String[] requiredPrefixes;
082
083        /** Constructor. */
084        private AntPatternDirectoryScanner(File baseDir, boolean caseSensitive, String[] includePatterns,
085                        String[] excludePatterns) throws PatternSyntaxException {
086                CCSMAssert.isTrue(baseDir.isDirectory(), "Can only scan in directories: " + baseDir);
087                this.baseDir = baseDir;
088                this.caseSensitive = caseSensitive;
089
090                boolean hadStarPrefix = false;
091                List<String> prefixes = new ArrayList<String>();
092                for (String include : includePatterns) {
093                        fileIncludes.add(AntPatternUtils.convertPattern(include, caseSensitive));
094                        if (include.startsWith("*")) {
095                                hadStarPrefix = true;
096                        } else {
097                                prefixes.add(extractPlainPrefix(include, caseSensitive));
098                        }
099                }
100                if (hadStarPrefix || prefixes.isEmpty()) {
101                        requiredPrefixes = null;
102                } else {
103                        requiredPrefixes = prefixes.toArray(new String[prefixes.size()]);
104                }
105
106                for (String exclude : excludePatterns) {
107                        Pattern pattern = AntPatternUtils.convertPattern(exclude, caseSensitive);
108                        fileExcludes.add(pattern);
109                        if (exclude.endsWith("**")) {
110                                greedyExcludes.add(pattern);
111                        }
112                }
113        }
114
115        /**
116         * Extract the plain prefix, i.e. the prefix of the pattern without wildcard
117         * characters or directory separators; this prefix can be used to speed up
118         * scanning, as only directories starting with one of the prefixes are
119         * relevant at all.
120         */
121        private static String extractPlainPrefix(String include, boolean caseSensitive) {
122                String prefix = include.replaceFirst("([\\*/\\?]|" + Pattern.quote(File.separator) + ").*$", "");
123                if (!caseSensitive) {
124                        prefix = prefix.toLowerCase();
125                }
126                return prefix;
127        }
128
129        /** Performs scanning starting from the given file. */
130        private String[] scan() throws IOException {
131                for (String path : listChildren(baseDir)) {
132                        String testPath = path;
133                        if (!caseSensitive) {
134                                testPath = testPath.toLowerCase();
135                        }
136
137                        if (requiredPrefixes != null && !StringUtils.startsWithOneOf(testPath, requiredPrefixes)) {
138                                continue;
139                        }
140
141                        doScan(path);
142                }
143                return filesFound.toArray(new String[filesFound.size()]);
144        }
145
146        /**
147         * Performs scanning in the directory denoted by the given relative path
148         * name.
149         */
150        private void doScan(String relativePath) throws IOException {
151                File file = new File(baseDir, relativePath);
152
153                if (file.isDirectory()) {
154                        if (!skipDirectory(relativePath)) {
155                                for (String name : listChildren(file)) {
156                                        doScan(relativePath + "/" + name);
157                                }
158                        }
159                } else if (isIncluded(relativePath) && !isExcluded(relativePath)) {
160                        String foundFile = relativePath.replace('/', File.separatorChar);
161                        filesFound.add(foundFile);
162                }
163        }
164
165        /**
166         * Lists the children of a directory. If this fails, a {@link IOException}
167         * is thrown.
168         */
169        private static Set<String> listChildren(File dir) throws IOException {
170                String[] list = dir.list();
171                if (list == null) {
172                        throw new IOException("Cannot scan in directory " + dir + "! Maybe read permissions are missing?");
173                }
174
175                // although occurring rarely, it happens that the build machine returns
176                // duplicate entries in java.io.File.list(), hence remove these here via
177                // a set. See also CR#4916.
178                return new HashSet<String>(Arrays.asList(list));
179        }
180
181        /** Heuristic used to skip entire directories. */
182        private boolean skipDirectory(String relativePath) {
183                return greedyExcludes.matchesAny(relativePath);
184        }
185
186        /** Returns whether a relative path is included. */
187        private boolean isIncluded(String relativePath) {
188                return fileIncludes.isEmpty() || fileIncludes.matchesAny(relativePath);
189        }
190
191        /** Returns whether a relative path is excluded. */
192        private boolean isExcluded(String relativePath) {
193                return fileExcludes.matchesAny(relativePath);
194        }
195
196        /**
197         * Performs directory scanning.
198         * 
199         * @param baseDir
200         *            the directory to start scanning in. All file names returned
201         *            will be relative to this file.
202         * @param caseSensitive
203         *            whether pattern should be applied case sensitive or not.
204         * @param includePatterns
205         *            the include pattern (use ANT's pattern syntax)
206         * @param excludePatterns
207         *            the exclude pattern (use ANT's pattern syntax)
208         * @throws IOException
209         *             in case of invalid pattern provided.
210         */
211        public static String[] scan(String baseDir, boolean caseSensitive, String[] includePatterns,
212                        String[] excludePatterns) throws IOException {
213                if (includePatterns == null) {
214                        includePatterns = new String[0];
215                }
216
217                if (excludePatterns == null) {
218                        excludePatterns = new String[0];
219                }
220
221                return new AntPatternDirectoryScanner(new File(baseDir), caseSensitive, includePatterns, excludePatterns)
222                                .scan();
223        }
224}