001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 the ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package eu.cqse.check.framework.util.tokens; 018 019import java.util.Collection; 020import java.util.EnumSet; 021import java.util.List; 022import java.util.Objects; 023import java.util.Set; 024 025import org.conqat.lib.commons.collections.Pair; 026import org.conqat.lib.commons.string.StringUtils; 027 028import eu.cqse.check.framework.scanner.ETokenType; 029import eu.cqse.check.framework.scanner.ETokenType.ETokenClass; 030import eu.cqse.check.framework.scanner.IToken; 031 032/** 033 * Utility methods for {@link IToken}s. 034 */ 035public class TokenUtils { 036 037 /** 038 * Set of token types that always have to be treated case-sensitive, e.g. String 039 * literals. 040 */ 041 public static final EnumSet<ETokenType> ALWAYS_CASE_SENSITIVE_TOKEN_TYPES = EnumSet.of(ETokenType.STRING_LITERAL, 042 ETokenType.CHARACTER_LITERAL, ETokenType.UNTERMINATED_STRING_LITERAL, 043 ETokenType.UNTERMINATED_CHARACTER_LITERAL); 044 045 /** 046 * Estimates the zero-based end line of the specified token by looking where the 047 * provided next token begins. Note that this cannot check, if the provided next 048 * token is really the next token, nor if there are blank lines between the two 049 * tokens. 050 * <p> 051 * This method is more efficient than 052 * {@link #calculateEndLineByCountingLines(IToken)}, but not as precise. 053 * 054 * @param token 055 * the token whose end line should be calculated. 056 * @param nextToken 057 * the lookahead token. 058 * @return the end line of the specified first token. 059 */ 060 public static int estimateEndLineByLookahead(IToken token, IToken nextToken) { 061 int tokenEndLine = nextToken.getLineNumber() - 1; 062 return Math.max(tokenEndLine, token.getLineNumber()); 063 } 064 065 /** 066 * Calculates the zero-based end line of the specified token by counting the 067 * lines of the text contained within. 068 * <p> 069 * This method is less efficient than 070 * {@link #estimateEndLineByLookahead(IToken, IToken)}, but always yields the 071 * correct result. 072 * 073 * @param token 074 * the token whose end line should be calculated. 075 * @return the end line of the specified token. 076 */ 077 public static int calculateEndLineByCountingLines(IToken token) { 078 int tokenEndLine = token.getLineNumber() + StringUtils.countLines(token.getText()) - 1; 079 return Math.max(tokenEndLine, token.getLineNumber()); 080 } 081 082 /** 083 * Checks if the given tokens are of equal type and have the equal text. The 084 * method is <code>null</code>-safe. 085 * 086 * @return <code>true</code> if both tokens are <code>null</code> or if both 087 * tokens are of the same {@link ETokenType} and have the same text, 088 * <code>false</code> otherwise. 089 */ 090 public static boolean isEqualTypeAndText(IToken t1, IToken t2) { 091 if (t1 == t2) { 092 return true; 093 } 094 if (t1 == null || t2 == null) { 095 return false; 096 } 097 return t1.getType() == t2.getType() && Objects.equals(t1.getText(), t2.getText()); 098 } 099 100 /** 101 * Returns the first matching pattern and the match or null if no pattern 102 * matches. 103 */ 104 public static Pair<TokenPattern, TokenPatternMatch> getMatchingPattern(Collection<TokenPattern> patterns, 105 List<IToken> tokens) { 106 for (TokenPattern pattern : patterns) { 107 TokenPatternMatch match = pattern.findFirstMatch(tokens); 108 if (match != null) { 109 return new Pair<>(pattern, match); 110 } 111 } 112 return null; 113 } 114 115 /** 116 * Returns true if a given token is a comment token. 117 */ 118 public static boolean isCommentToken(IToken token) { 119 return token.getType().getTokenClass() == ETokenClass.COMMENT; 120 } 121 122 /** 123 * Creates a representation of the tokens that contains only the tokens 124 * specified in the parameter. This method keeps the original line breaks. 125 * 126 * @param coveredLines 127 * the method will only count lines that are not already in the given 128 * set. The set will afterwards contain all lines covered by the 129 * remaining tokens. 130 */ 131 public static int getTokenLineCount(EnumSet<ETokenClass> tokenClassesToPreserve, List<IToken> tokens, 132 Set<Integer> coveredLines) { 133 134 int count = 0; 135 for (IToken token : tokens) { 136 if (!tokenClassesToPreserve.contains(token.getType().getTokenClass())) { 137 continue; 138 } 139 140 int startLine = token.getLineNumber(); 141 int endLine = token.getLineNumber() + StringUtils.countLines(token.getText()); 142 for (int line = startLine; line < endLine; line++) { 143 if (coveredLines.add(line)) { 144 count += 1; 145 } 146 } 147 } 148 149 return count; 150 } 151 152}