001package eu.cqse.check.framework.shallowparser;
002
003import static eu.cqse.check.framework.shallowparser.TokenStreamUtils.NOT_FOUND;
004
005import java.util.ArrayList;
006import java.util.List;
007import java.util.Set;
008
009import org.conqat.lib.commons.assertion.CCSMAssert;
010import org.conqat.lib.commons.collections.CollectionUtils;
011import org.conqat.lib.commons.string.StringUtils;
012
013import eu.cqse.check.framework.scanner.ETokenType;
014import eu.cqse.check.framework.scanner.IToken;
015
016/**
017 * Utility methods for {@link IToken} lists that work on the tokens' text.
018 */
019public class TokenStreamTextUtils {
020
021        /**
022         * Returns the index of the first token whose token text equals the given text
023         * in the given token list. If the text is not found, {@link #NOT_FOUND} is
024         * returned.
025         */
026        public static int findFirst(List<IToken> tokens, String text) {
027                return findFirst(tokens, 0, tokens.size(), text);
028        }
029
030        /**
031         * Returns the index of the first token whose token text equals one of the given
032         * texts from the Set. If none of the texts are found, {@link #NOT_FOUND} is
033         * returned.
034         */
035        public static int findFirst(List<IToken> tokens, Set<String> texts) {
036                return findFirst(tokens, 0, tokens.size(), texts);
037        }
038
039        /**
040         * Returns the first index of the given text not before the given start index
041         * and before the given end index. If the text is not found, {@link #NOT_FOUND}
042         * is returned.
043         */
044        public static int findFirst(List<IToken> tokens, int startOffset, int endOffset, String text) {
045                for (int i = startOffset; i < endOffset; i++) {
046                        if (tokens.get(i).getText().equals(text)) {
047                                return i;
048                        }
049                }
050                return NOT_FOUND;
051        }
052
053        /**
054         * Returns the first index of the first token not before the given start index
055         * and after the given end index whose text equals one of the given texts from
056         * the Set. If none of the texts are found, {@link #NOT_FOUND} is returned.
057         */
058        public static int findFirst(List<IToken> tokens, int startOffset, int endOffset, Set<String> texts) {
059                for (int i = startOffset; i < endOffset; i++) {
060                        if (texts.contains(tokens.get(i).getText())) {
061                                return i;
062                        }
063                }
064                return NOT_FOUND;
065        }
066
067        /**
068         * Returns the first index of the given text not before the given start index
069         * and before the given end index. If the text is not found, {@link #NOT_FOUND}
070         * is returned.
071         */
072        public static int findFirstCaseInsensitive(List<IToken> tokens, int startOffset, int endOffset, String text) {
073                for (int i = startOffset; i < endOffset; i++) {
074                        if (tokens.get(i).getText().equalsIgnoreCase(text)) {
075                                return i;
076                        }
077                }
078                return NOT_FOUND;
079        }
080
081        /**
082         * Returns all indices of the given text within the given token list. If none is
083         * found, an empty list is returned.
084         */
085        public static List<Integer> findAll(List<IToken> tokens, String text) {
086                List<Integer> indices = new ArrayList<>();
087                for (int i = 0; i < tokens.size(); i++) {
088                        if (tokens.get(i).getText().equals(text)) {
089                                indices.add(i);
090                        }
091                }
092                return indices;
093        }
094
095        /**
096         * Returns whether the given token list contains a token with the given text.
097         */
098        public static boolean contains(List<IToken> tokens, String text) {
099                return findFirst(tokens, text) != NOT_FOUND;
100        }
101
102        /**
103         * Returns whether the given token list contains a token whose text equals one
104         * of the texts from the given Set.
105         */
106        public static boolean containsAny(List<IToken> tokens, Set<String> texts) {
107                return findFirst(tokens, texts) != NOT_FOUND;
108        }
109
110        /**
111         * Returns whether the given token list contains tokens with the given sequence
112         * of strings at the given start offset.
113         */
114        public static boolean hasSequence(List<IToken> tokens, int startOffset, String... sequence) {
115                CCSMAssert.isFalse(startOffset < 0, "startOffset must not be less than zero");
116                if (startOffset + sequence.length > tokens.size()) {
117                        return false;
118                }
119
120                for (int i = 0; i < sequence.length; i++) {
121                        if (!tokens.get(startOffset + i).getText().equals(sequence[i])) {
122                                return false;
123                        }
124                }
125                return true;
126        }
127
128        /**
129         * Returns the index of the first occurrence of the given sequence of strings in
130         * the given token list, beginning from the given start offset. The given start
131         * type is the type of the sequence's first token. If the sequence is not found,
132         * {@link #NOT_FOUND} is returned.
133         */
134        public static int findSequence(List<IToken> tokens, int startOffset, ETokenType startType, String... sequence) {
135                for (int i = startOffset; i < tokens.size() - sequence.length + 1; i++) {
136                        if (tokens.get(i).getType().equals(startType)) {
137                                if (hasSequence(tokens, i, sequence)) {
138                                        return i;
139                                }
140                        }
141                }
142                return NOT_FOUND;
143        }
144
145        /**
146         * Returns all indices of occurrences of the given sequence of token texts in
147         * the given token list, beginning from the given offset. The given start token
148         * type is the type of the first token of the sequence. If the sequence is not
149         * found, an empty list is returned.
150         */
151        public static List<Integer> findAllSequences(List<IToken> tokens, int startOffset, ETokenType startType,
152                        String... sequence) {
153                List<Integer> indices = new ArrayList<>();
154
155                while (startOffset < tokens.size() - sequence.length + 1) {
156                        startOffset = findSequence(tokens, startOffset, startType, sequence);
157                        if (startOffset == NOT_FOUND) {
158                                break;
159                        }
160                        indices.add(startOffset);
161                        startOffset++;
162                }
163
164                return indices;
165        }
166
167        /** Concatenates the token's texts and returns them as string. */
168        public static String concatTokenTexts(List<IToken> tokens) {
169                return concatTokenTexts(tokens, "");
170        }
171
172        /** Concatenates the token's texts and returns them as string. */
173        public static String concatTokenTexts(List<IToken> tokens, String separator) {
174                return StringUtils.concat(CollectionUtils.map(tokens, IToken::getText), separator);
175        }
176
177        /** Concatenates the inner lists' token texts and returns them as list. */
178        public static List<String> concatAllTokenTexts(List<List<IToken>> tokenLists) {
179                return CollectionUtils.map(tokenLists, TokenStreamTextUtils::concatTokenTexts);
180
181        }
182
183        /**
184         * Converts the sublist of the given token list from the given start index
185         * (inclusive) to the given end index (exclusive) to a list of token texts.
186         */
187        public static List<String> getTokenTexts(List<IToken> tokens, int startIndex, int endIndex) {
188                CCSMAssert.isTrue(startIndex >= 0, "startIndex must be greater or equal to zero");
189                CCSMAssert.isTrue(endIndex <= tokens.size(), "endIndex must be less or equal to tokens.size()");
190                return CollectionUtils.map(tokens.subList(startIndex, endIndex), IToken::getText);
191        }
192
193        /** Converts the given token list to a list of corresponding token texts. */
194        public static List<String> getTokenTexts(List<IToken> tokens) {
195                return getTokenTexts(tokens, 0, tokens.size());
196        }
197
198        /**
199         * Returns whether the given token has the given type and text. The text
200         * comparison is case insensitive if the token's language is case insensitive.
201         */
202        public static boolean is(IToken token, ETokenType tokenType, String tokenText) {
203                if (!token.getType().equals(tokenType)) {
204                        return false;
205                }
206                if (token.getLanguage().isCaseSensitive()) {
207                        return token.getText().equals(tokenText);
208                }
209                return token.getText().equalsIgnoreCase(tokenText);
210        }
211}