001package eu.cqse.check.framework.shallowparser; 002 003import static eu.cqse.check.framework.shallowparser.TokenStreamUtils.NOT_FOUND; 004 005import java.util.ArrayList; 006import java.util.List; 007import java.util.Set; 008 009import org.conqat.lib.commons.assertion.CCSMAssert; 010import org.conqat.lib.commons.collections.CollectionUtils; 011import org.conqat.lib.commons.string.StringUtils; 012 013import eu.cqse.check.framework.scanner.ETokenType; 014import eu.cqse.check.framework.scanner.IToken; 015 016/** 017 * Utility methods for {@link IToken} lists that work on the tokens' text. 018 */ 019public class TokenStreamTextUtils { 020 021 /** 022 * Returns the index of the first token whose token text equals the given text 023 * in the given token list. If the text is not found, {@link #NOT_FOUND} is 024 * returned. 025 */ 026 public static int findFirst(List<IToken> tokens, String text) { 027 return findFirst(tokens, 0, tokens.size(), text); 028 } 029 030 /** 031 * Returns the index of the first token whose token text equals one of the given 032 * texts from the Set. If none of the texts are found, {@link #NOT_FOUND} is 033 * returned. 034 */ 035 public static int findFirst(List<IToken> tokens, Set<String> texts) { 036 return findFirst(tokens, 0, tokens.size(), texts); 037 } 038 039 /** 040 * Returns the first index of the given text not before the given start index 041 * and before the given end index. If the text is not found, {@link #NOT_FOUND} 042 * is returned. 043 */ 044 public static int findFirst(List<IToken> tokens, int startOffset, int endOffset, String text) { 045 for (int i = startOffset; i < endOffset; i++) { 046 if (tokens.get(i).getText().equals(text)) { 047 return i; 048 } 049 } 050 return NOT_FOUND; 051 } 052 053 /** 054 * Returns the first index of the first token not before the given start index 055 * and after the given end index whose text equals one of the given texts from 056 * the Set. If none of the texts are found, {@link #NOT_FOUND} is returned. 057 */ 058 public static int findFirst(List<IToken> tokens, int startOffset, int endOffset, Set<String> texts) { 059 for (int i = startOffset; i < endOffset; i++) { 060 if (texts.contains(tokens.get(i).getText())) { 061 return i; 062 } 063 } 064 return NOT_FOUND; 065 } 066 067 /** 068 * Returns the first index of the given text not before the given start index 069 * and before the given end index. If the text is not found, {@link #NOT_FOUND} 070 * is returned. 071 */ 072 public static int findFirstCaseInsensitive(List<IToken> tokens, int startOffset, int endOffset, String text) { 073 for (int i = startOffset; i < endOffset; i++) { 074 if (tokens.get(i).getText().equalsIgnoreCase(text)) { 075 return i; 076 } 077 } 078 return NOT_FOUND; 079 } 080 081 /** 082 * Returns all indices of the given text within the given token list. If none is 083 * found, an empty list is returned. 084 */ 085 public static List<Integer> findAll(List<IToken> tokens, String text) { 086 List<Integer> indices = new ArrayList<>(); 087 for (int i = 0; i < tokens.size(); i++) { 088 if (tokens.get(i).getText().equals(text)) { 089 indices.add(i); 090 } 091 } 092 return indices; 093 } 094 095 /** 096 * Returns whether the given token list contains a token with the given text. 097 */ 098 public static boolean contains(List<IToken> tokens, String text) { 099 return findFirst(tokens, text) != NOT_FOUND; 100 } 101 102 /** 103 * Returns whether the given token list contains a token whose text equals one 104 * of the texts from the given Set. 105 */ 106 public static boolean containsAny(List<IToken> tokens, Set<String> texts) { 107 return findFirst(tokens, texts) != NOT_FOUND; 108 } 109 110 /** 111 * Returns whether the given token list contains tokens with the given sequence 112 * of strings at the given start offset. 113 */ 114 public static boolean hasSequence(List<IToken> tokens, int startOffset, String... sequence) { 115 CCSMAssert.isFalse(startOffset < 0, "startOffset must not be less than zero"); 116 if (startOffset + sequence.length > tokens.size()) { 117 return false; 118 } 119 120 for (int i = 0; i < sequence.length; i++) { 121 if (!tokens.get(startOffset + i).getText().equals(sequence[i])) { 122 return false; 123 } 124 } 125 return true; 126 } 127 128 /** 129 * Returns the index of the first occurrence of the given sequence of strings in 130 * the given token list, beginning from the given start offset. The given start 131 * type is the type of the sequence's first token. If the sequence is not found, 132 * {@link #NOT_FOUND} is returned. 133 */ 134 public static int findSequence(List<IToken> tokens, int startOffset, ETokenType startType, String... sequence) { 135 for (int i = startOffset; i < tokens.size() - sequence.length + 1; i++) { 136 if (tokens.get(i).getType().equals(startType)) { 137 if (hasSequence(tokens, i, sequence)) { 138 return i; 139 } 140 } 141 } 142 return NOT_FOUND; 143 } 144 145 /** 146 * Returns all indices of occurrences of the given sequence of token texts in 147 * the given token list, beginning from the given offset. The given start token 148 * type is the type of the first token of the sequence. If the sequence is not 149 * found, an empty list is returned. 150 */ 151 public static List<Integer> findAllSequences(List<IToken> tokens, int startOffset, ETokenType startType, 152 String... sequence) { 153 List<Integer> indices = new ArrayList<>(); 154 155 while (startOffset < tokens.size() - sequence.length + 1) { 156 startOffset = findSequence(tokens, startOffset, startType, sequence); 157 if (startOffset == NOT_FOUND) { 158 break; 159 } 160 indices.add(startOffset); 161 startOffset++; 162 } 163 164 return indices; 165 } 166 167 /** Concatenates the token's texts and returns them as string. */ 168 public static String concatTokenTexts(List<IToken> tokens) { 169 return concatTokenTexts(tokens, ""); 170 } 171 172 /** Concatenates the token's texts and returns them as string. */ 173 public static String concatTokenTexts(List<IToken> tokens, String separator) { 174 return StringUtils.concat(CollectionUtils.map(tokens, IToken::getText), separator); 175 } 176 177 /** Concatenates the inner lists' token texts and returns them as list. */ 178 public static List<String> concatAllTokenTexts(List<List<IToken>> tokenLists) { 179 return CollectionUtils.map(tokenLists, TokenStreamTextUtils::concatTokenTexts); 180 181 } 182 183 /** 184 * Converts the sublist of the given token list from the given start index 185 * (inclusive) to the given end index (exclusive) to a list of token texts. 186 */ 187 public static List<String> getTokenTexts(List<IToken> tokens, int startIndex, int endIndex) { 188 CCSMAssert.isTrue(startIndex >= 0, "startIndex must be greater or equal to zero"); 189 CCSMAssert.isTrue(endIndex <= tokens.size(), "endIndex must be less or equal to tokens.size()"); 190 return CollectionUtils.map(tokens.subList(startIndex, endIndex), IToken::getText); 191 } 192 193 /** Converts the given token list to a list of corresponding token texts. */ 194 public static List<String> getTokenTexts(List<IToken> tokens) { 195 return getTokenTexts(tokens, 0, tokens.size()); 196 } 197 198 /** 199 * Returns whether the given token has the given type and text. The text 200 * comparison is case insensitive if the token's language is case insensitive. 201 */ 202 public static boolean is(IToken token, ETokenType tokenType, String tokenText) { 203 if (!token.getType().equals(tokenType)) { 204 return false; 205 } 206 if (token.getLanguage().isCaseSensitive()) { 207 return token.getText().equals(tokenText); 208 } 209 return token.getText().equalsIgnoreCase(tokenText); 210 } 211}