001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 The ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package org.conqat.lib.commons.filesystem; 018 019import java.io.File; 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.HashSet; 024import java.util.List; 025import java.util.Set; 026import java.util.regex.Pattern; 027import java.util.regex.PatternSyntaxException; 028 029import org.conqat.lib.commons.assertion.CCSMAssert; 030import org.conqat.lib.commons.collections.BasicPatternList; 031import org.conqat.lib.commons.string.StringUtils; 032 033/** 034 * * This class performs directory scanning, i.e. returns all files residing 035 * within a certain directory. The list of files returned can be narrowed using 036 * include/exclude pattern, which use the same syntax as the pattern known from 037 * ANT (see http://ant.apache.org/manual/dirtasks.html#patterns). 038 * <p> 039 * This class is meant to be a faster and more memory efficient replacement for 040 * ANT's DirectoryScanner. 041 * <p> 042 * Internally this works entirely with '/' as path separator. However, this is 043 * not visible from the outside. 044 * <p> 045 * Internally, the implementation uses Java's RegEx engine by translating ANT 046 * patterns to regular expressions. 047 */ 048public class AntPatternDirectoryScanner { 049 050 /** The base directory. */ 051 private final File baseDir; 052 053 /** Stores whether we are case sensitive. */ 054 private final boolean caseSensitive; 055 056 /** The list of files found (result of scanning). */ 057 private final List<String> filesFound = new ArrayList<String>(); 058 059 /** Include pattern used with files. */ 060 private final BasicPatternList fileIncludes = new BasicPatternList(); 061 062 /** Exclude pattern used with files. */ 063 private final BasicPatternList fileExcludes = new BasicPatternList(); 064 065 /** 066 * Exclude patterns that are greedy, i.e. that end in '**'. These are 067 * interesting, because as soon as they match, every extension of the 068 * matched string will match as well. So these pattern can be used to skip 069 * entire directories. This list contains a subset of the pattern from 070 * {@link #fileExcludes}. 071 */ 072 private final BasicPatternList greedyExcludes = new BasicPatternList(); 073 074 /** 075 * List of required prefixes. This can be used if no include pattern starts 076 * with a '*'. In this case, directories not starting with the prefix may be 077 * skipped. Otherwise this attribute is null. This is an array so we can use 078 * it with the {@link StringUtils#startsWithOneOf(String, String...)} 079 * method. 080 */ 081 private final String[] requiredPrefixes; 082 083 /** Constructor. */ 084 private AntPatternDirectoryScanner(File baseDir, boolean caseSensitive, String[] includePatterns, 085 String[] excludePatterns) throws PatternSyntaxException { 086 CCSMAssert.isTrue(baseDir.isDirectory(), "Can only scan in directories: " + baseDir); 087 this.baseDir = baseDir; 088 this.caseSensitive = caseSensitive; 089 090 boolean hadStarPrefix = false; 091 List<String> prefixes = new ArrayList<String>(); 092 for (String include : includePatterns) { 093 fileIncludes.add(AntPatternUtils.convertPattern(include, caseSensitive)); 094 if (include.startsWith("*")) { 095 hadStarPrefix = true; 096 } else { 097 prefixes.add(extractPlainPrefix(include, caseSensitive)); 098 } 099 } 100 if (hadStarPrefix || prefixes.isEmpty()) { 101 requiredPrefixes = null; 102 } else { 103 requiredPrefixes = prefixes.toArray(new String[prefixes.size()]); 104 } 105 106 for (String exclude : excludePatterns) { 107 Pattern pattern = AntPatternUtils.convertPattern(exclude, caseSensitive); 108 fileExcludes.add(pattern); 109 if (exclude.endsWith("**")) { 110 greedyExcludes.add(pattern); 111 } 112 } 113 } 114 115 /** 116 * Extract the plain prefix, i.e. the prefix of the pattern without wildcard 117 * characters or directory separators; this prefix can be used to speed up 118 * scanning, as only directories starting with one of the prefixes are 119 * relevant at all. 120 */ 121 private static String extractPlainPrefix(String include, boolean caseSensitive) { 122 String prefix = include.replaceFirst("([\\*/\\?]|" + Pattern.quote(File.separator) + ").*$", ""); 123 if (!caseSensitive) { 124 prefix = prefix.toLowerCase(); 125 } 126 return prefix; 127 } 128 129 /** Performs scanning starting from the given file. */ 130 private String[] scan() throws IOException { 131 for (String path : listChildren(baseDir)) { 132 String testPath = path; 133 if (!caseSensitive) { 134 testPath = testPath.toLowerCase(); 135 } 136 137 if (requiredPrefixes != null && !StringUtils.startsWithOneOf(testPath, requiredPrefixes)) { 138 continue; 139 } 140 141 doScan(path); 142 } 143 return filesFound.toArray(new String[filesFound.size()]); 144 } 145 146 /** 147 * Performs scanning in the directory denoted by the given relative path 148 * name. 149 */ 150 private void doScan(String relativePath) throws IOException { 151 File file = new File(baseDir, relativePath); 152 153 if (file.isDirectory()) { 154 if (!skipDirectory(relativePath)) { 155 for (String name : listChildren(file)) { 156 doScan(relativePath + "/" + name); 157 } 158 } 159 } else if (isIncluded(relativePath) && !isExcluded(relativePath)) { 160 String foundFile = relativePath.replace('/', File.separatorChar); 161 filesFound.add(foundFile); 162 } 163 } 164 165 /** 166 * Lists the children of a directory. If this fails, a {@link IOException} 167 * is thrown. 168 */ 169 private static Set<String> listChildren(File dir) throws IOException { 170 String[] list = dir.list(); 171 if (list == null) { 172 throw new IOException("Cannot scan in directory " + dir + "! Maybe read permissions are missing?"); 173 } 174 175 // although occurring rarely, it happens that the build machine returns 176 // duplicate entries in java.io.File.list(), hence remove these here via 177 // a set. See also CR#4916. 178 return new HashSet<String>(Arrays.asList(list)); 179 } 180 181 /** Heuristic used to skip entire directories. */ 182 private boolean skipDirectory(String relativePath) { 183 return greedyExcludes.matchesAny(relativePath); 184 } 185 186 /** Returns whether a relative path is included. */ 187 private boolean isIncluded(String relativePath) { 188 return fileIncludes.isEmpty() || fileIncludes.matchesAny(relativePath); 189 } 190 191 /** Returns whether a relative path is excluded. */ 192 private boolean isExcluded(String relativePath) { 193 return fileExcludes.matchesAny(relativePath); 194 } 195 196 /** 197 * Performs directory scanning. 198 * 199 * @param baseDir 200 * the directory to start scanning in. All file names returned 201 * will be relative to this file. 202 * @param caseSensitive 203 * whether pattern should be applied case sensitive or not. 204 * @param includePatterns 205 * the include pattern (use ANT's pattern syntax) 206 * @param excludePatterns 207 * the exclude pattern (use ANT's pattern syntax) 208 * @throws IOException 209 * in case of invalid pattern provided. 210 */ 211 public static String[] scan(String baseDir, boolean caseSensitive, String[] includePatterns, 212 String[] excludePatterns) throws IOException { 213 if (includePatterns == null) { 214 includePatterns = new String[0]; 215 } 216 217 if (excludePatterns == null) { 218 excludePatterns = new String[0]; 219 } 220 221 return new AntPatternDirectoryScanner(new File(baseDir), caseSensitive, includePatterns, excludePatterns) 222 .scan(); 223 } 224}