001/*-------------------------------------------------------------------------+ 002| | 003| Copyright (c) 2009-2017 CQSE GmbH | 004| | 005+-------------------------------------------------------------------------*/ 006package eu.cqse.check.framework.util.python; 007 008import eu.cqse.check.framework.scanner.ETokenType; 009import eu.cqse.check.framework.scanner.IToken; 010import eu.cqse.check.framework.shallowparser.SubTypeNames; 011import eu.cqse.check.framework.shallowparser.TokenStreamUtils; 012import eu.cqse.check.framework.shallowparser.framework.ShallowEntity; 013import org.conqat.lib.commons.collections.CollectionUtils; 014 015import java.util.ArrayList; 016import java.util.Arrays; 017import java.util.Collections; 018import java.util.EnumSet; 019import java.util.List; 020 021import static eu.cqse.check.framework.scanner.ETokenType.AS; 022import static eu.cqse.check.framework.scanner.ETokenType.COLON; 023import static eu.cqse.check.framework.scanner.ETokenType.COMMA; 024import static eu.cqse.check.framework.scanner.ETokenType.DOT; 025import static eu.cqse.check.framework.scanner.ETokenType.EQ; 026import static eu.cqse.check.framework.scanner.ETokenType.FOR; 027import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER; 028import static eu.cqse.check.framework.scanner.ETokenType.IN; 029import static eu.cqse.check.framework.scanner.ETokenType.LAMBDA; 030import static eu.cqse.check.framework.scanner.ETokenType.LBRACE; 031import static eu.cqse.check.framework.scanner.ETokenType.LBRACK; 032import static eu.cqse.check.framework.scanner.ETokenType.LPAREN; 033import static eu.cqse.check.framework.scanner.ETokenType.MULT; 034import static eu.cqse.check.framework.scanner.ETokenType.POWER; 035import static eu.cqse.check.framework.scanner.ETokenType.RBRACE; 036import static eu.cqse.check.framework.scanner.ETokenType.RBRACK; 037import static eu.cqse.check.framework.scanner.ETokenType.RPAREN; 038 039/** 040 * A class that extracts declared variable names from shallow entities in 041 * python. Currently extraction is implemented for. 042 * 043 * <ul> 044 * <li>expect statements</li> 045 * <li>for statements</li> 046 * <li>with statements</li> 047 * <li>method parameters</li> 048 * <li>lambda parameters</li> 049 * <li>variable and attribute declarations (with destructuring)</li> 050 * </ul> 051 * <p> 052 * For loops within list comprehensions are currently not supported. 053 */ 054public class PythonVariableNameExtractor { 055 056 /** 057 * Marker tokens in method argument lists, as of <a href= 058 * "https://docs.python.org/dev/tutorial/controlflow.html#function-examples">Python 059 * Documentation</a> 060 */ 061 private static final EnumSet<ETokenType> MARKER_TOKENS = EnumSet.of(MULT, ETokenType.SLASH); 062 063 /** 064 * Extracts declared variable names from the given entity. 065 */ 066 public List<IToken> extractVariableNames(ShallowEntity entity) { 067 switch (entity.getType()) { 068 case ATTRIBUTE: 069 return extractFromDeclarationStatement(entity.ownStartTokens()); 070 case STATEMENT: 071 return extractFromStatement(entity); 072 case METHOD: 073 return extractFromMethodOrLambda(entity); 074 default: 075 return CollectionUtils.emptyList(); 076 } 077 } 078 079 /** 080 * Extracts declared variable names from the given statement. 081 */ 082 private static List<IToken> extractFromStatement(ShallowEntity entity) { 083 switch (entity.getSubtype()) { 084 case SubTypeNames.SIMPLE_STATEMENT: 085 return extractFromDeclarationStatement(entity.ownStartTokens()); 086 case SubTypeNames.FOR: 087 return extractFromForStatement(entity.ownStartTokens()); 088 case SubTypeNames.WITH: 089 case SubTypeNames.EXCEPT: 090 return extractFromStatementWithAs(entity.ownStartTokens()); 091 default: 092 return CollectionUtils.emptyList(); 093 } 094 } 095 096 /** 097 * Extracts declared variable names from a declaration statement. 098 */ 099 private static List<IToken> extractFromDeclarationStatement(List<IToken> tokens) { 100 int index = TokenStreamUtils.findFirstTopLevel(tokens, 101 // EQ for regular assignment, COLON for variable annotation 102 EnumSet.of(EQ, COLON), 103 Collections.singletonList(LPAREN), 104 Collections.singletonList(RPAREN)); 105 if (index == TokenStreamUtils.NOT_FOUND) { 106 // In this case there is no variable declaration 107 return CollectionUtils.emptyList(); 108 } 109 110 List<IToken> leftHandSideTokens = tokens.subList(0, index); 111 112 // If the left-hand-side contains any commas, it must be a tuple assignment 113 // (possible with or without parentheses/brackets) 114 // e.g. a,b = 1,2 ; [a, b] = [1, 2] ; (1,2) = (1,2) 115 if (TokenStreamUtils.containsAny(leftHandSideTokens, COMMA)) { 116 return extractIdentifiersFromTupleAssignment(leftHandSideTokens); 117 } 118 119 return extractIdentifiers(leftHandSideTokens); 120 } 121 122 /** 123 * Extract the identifiers from a tuple assignment which are actually used to declare a new variable. 124 * @param tokens list of tokens from the left-hand-side of a tuple assignment 125 * (possible nested with parentheses or brackets) 126 * @return list of tokens which are identifiers that are actually used as the declaration of a new variable 127 */ 128 private static List<IToken> extractIdentifiersFromTupleAssignment(List<IToken> tokens) { 129 // Remove parentheses/brackets at beginning and end, then recursive call with remaining tokens 130 if (TokenStreamUtils.startsWith(tokens, LPAREN)) { 131 tokens = TokenStreamUtils.removeAtFront(tokens, LPAREN); 132 tokens = TokenStreamUtils.removeAtEnd(tokens, RPAREN); 133 return extractIdentifiersFromTupleAssignment(tokens); 134 } else if (TokenStreamUtils.startsWith(tokens, LBRACK)) { 135 tokens = TokenStreamUtils.removeAtFront(tokens, LBRACK); 136 tokens = TokenStreamUtils.removeAtEnd(tokens, RBRACK); 137 return extractIdentifiersFromTupleAssignment(tokens); 138 } 139 140 // Plain tuple assignment (possibly still nested), e.g. 141 // a, (b, c) = 1, (2, 3) 142 if (TokenStreamUtils.contains(tokens, COMMA)) { 143 List<List<IToken>> splitTokenLists = TokenStreamUtils.split(tokens, COMMA); 144 List<List<IToken>> plainTokenLists = new ArrayList<>(); 145 for (List<IToken> tokenList : splitTokenLists) { 146 plainTokenLists.add(extractIdentifiersFromTupleAssignment(tokenList)); 147 } 148 List<IToken> identifiers = new ArrayList<>(); 149 for (List<IToken> plainTokenList : plainTokenLists) { 150 identifiers.addAll(extractIdentifiers(plainTokenList)); 151 } 152 return identifiers; 153 } 154 155 // No more commas left 156 // -> we have reached the plain identifiers 157 // or access of attributes or array/dictionary entries, i.e. a.str or a[int("1")] 158 return extractIdentifiers(tokens); 159 } 160 161 /** 162 * Extracts declared variable names from a for statement. 163 */ 164 private static List<IToken> extractFromForStatement(List<IToken> tokens) { 165 return CollectionUtils.filter(TokenStreamUtils.tokensBetween(tokens, FOR, IN), 166 token -> token.getType() == IDENTIFIER); 167 } 168 169 /** 170 * Extracts declared variables names from a statement that introduces variables 171 * with as. This includes with and except statements. 172 */ 173 private static List<IToken> extractFromStatementWithAs(List<IToken> tokens) { 174 int index = TokenStreamUtils.firstTokenOfTypeSequence(tokens, 2, AS, IDENTIFIER); 175 if (index == TokenStreamUtils.NOT_FOUND) { 176 return CollectionUtils.emptyList(); 177 } 178 return tokens.subList(index + 1, index + 2); 179 } 180 181 /** 182 * Extracts parameter names from a method or lambda. 183 */ 184 public List<IToken> extractFromMethodOrLambda(ShallowEntity methodOrLambda) { 185 if (SubTypeNames.LAMBDA.equals(methodOrLambda.getSubtype())) { 186 return extractFromLambda(methodOrLambda); 187 } 188 return extractFromMethod(methodOrLambda); 189 } 190 191 /** 192 * Extracts parameter names from a lambda. 193 */ 194 private static List<IToken> extractFromLambda(ShallowEntity lambda) { 195 return extractIdentifiers(TokenStreamUtils.tokensBetween(lambda.ownStartTokens(), LAMBDA, COLON)); 196 } 197 198 /** 199 * Extracts parameter names from a method. 200 */ 201 private static List<IToken> extractFromMethod(ShallowEntity method) { 202 List<List<IToken>> splitParameterTokens = getSplitParameterTokens(method); 203 return CollectionUtils.filterAndMap(splitParameterTokens, 204 tokens -> !tokens.isEmpty() && !(tokens.size() == 1 && MARKER_TOKENS.contains(tokens.get(0).getType())), 205 tokens -> { 206 IToken idToken = tokens.get(0); 207 if (idToken.getType() == MULT || idToken.getType() == POWER) { 208 return tokens.get(1); 209 } 210 return idToken; 211 }); 212 } 213 214 /** 215 * Returns the split parameter tokens from the given method. Returns a list of 216 * token list where each inner list represents all tokens of one parameter. 217 */ 218 private static List<List<IToken>> getSplitParameterTokens(ShallowEntity method) { 219 List<IToken> parameterTokens = TokenStreamUtils.tokensBetweenWithNesting(method.includedTokens(), 2, LPAREN, 220 RPAREN); 221 if (parameterTokens.isEmpty()) { 222 return CollectionUtils.emptyList(); 223 } 224 225 List<ETokenType> openingTypes = new ArrayList<>(Arrays.asList(LPAREN, LBRACK, LBRACE, LAMBDA)); 226 List<ETokenType> closingTypes = new ArrayList<>(Arrays.asList(RPAREN, RBRACK, RBRACE, COLON)); 227 228 // we need to handle nesting between lambdas and colon to handle lambda 229 // default parameter properly 230 return TokenStreamUtils.splitWithNesting(parameterTokens, COMMA, openingTypes, closingTypes); 231 232 } 233 234 /** 235 * Extracts all identifiers from the given tokens. 236 */ 237 private static List<IToken> extractIdentifiers(List<IToken> tokens) { 238 // If the remaining token list contains 239 // - a bracket, it is an array or dictionary access 240 // - a parenthesis, it is a function call 241 // - a dot, it is an attribute access 242 // None of these declare a new variable. 243 if (TokenStreamUtils.containsAny(tokens, EnumSet.of(LBRACK, LPAREN, DOT))) { 244 return CollectionUtils.emptyList(); 245 } 246 247 return CollectionUtils.filter(tokens, token -> token.getType() == IDENTIFIER); 248 } 249 250}