001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.preprocessor.abap;
018
019import static eu.cqse.check.framework.scanner.ETokenType.DOT;
020import static eu.cqse.check.framework.scanner.ETokenType.LPAREN;
021import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
022
023import java.util.ArrayList;
024import java.util.Arrays;
025import java.util.List;
026
027import org.conqat.lib.commons.collections.UnmodifiableList;
028
029import eu.cqse.check.framework.preprocessor.IPreprocessor;
030import eu.cqse.check.framework.scanner.ETokenType.ETokenClass;
031import eu.cqse.check.framework.scanner.IToken;
032import eu.cqse.check.framework.shallowparser.NestingAwareTokenIterator;
033import eu.cqse.check.framework.shallowparser.TokenStreamUtils;
034
035/**
036 * Preprocessor for ABAP. It rolls out so-called "chain sentences" in order to
037 * make parsing easier.
038 *
039 * For example, the following two code snippets are equivalent:
040 * <ul>
041 * <li><code>DATA: a TYPE i, b TYPE x.</code></li>
042 * <li><code>DATA a TYPE i. DATA b TYPE x.</code></li>
043 * </ul>
044 *
045 * Basically, whenever there is a colon, this means that every statement after a
046 * comma begins with whatever came before the colon.
047 */
048public class AbapPreprocessor implements IPreprocessor {
049
050        /**
051         * Used to indicate that a certain token was not (yet) found.
052         */
053        private static final int NO_POSITION = -1;
054
055        /** {@inheritDoc} */
056        @Override
057        public List<IToken> preprocess(String uniformPath, List<IToken> tokens) {
058                List<IToken> result = new ArrayList<>();
059                int lastDot = NO_POSITION;
060                int lastColon = NO_POSITION;
061
062                NestingAwareTokenIterator iterator = new NestingAwareTokenIterator(tokens, 0, Arrays.asList(LPAREN),
063                                Arrays.asList(RPAREN));
064                while (iterator.hasNext()) {
065                        IToken token = iterator.next();
066                        int currentIndex = iterator.getCurrentIndex();
067                        switch (token.getType()) {
068                        case COLON:
069                                lastColon = currentIndex;
070                                // Remove the colon by not inserting it into the result list.
071                                break;
072                        case COMMA:
073                                if (!iterator.isTopLevel() || lastColon == NO_POSITION) {
074                                        result.add(token);
075                                        break;
076                                }
077                                result.add(TokenStreamUtils.createToken(token, ".", DOT));
078
079                                IToken reference = getReferenceToken(tokens, currentIndex);
080                                List<IToken> toRepeat = tokens.subList(lastDot + 1, lastColon);
081                                result.addAll(TokenStreamUtils.copyTokens(reference, toRepeat));
082                                break;
083                        case DOT:
084                                lastDot = currentIndex;
085                                lastColon = NO_POSITION;
086                                // fall-through intended
087                        default:
088                                result.add(token);
089                        }
090                }
091
092                return result;
093        }
094
095        /**
096         * Gets the best possible reference token for the tokens to be duplicated.
097         * Usually the comma is at the end of the line, and the new statement starts in
098         * the next line, so we return the token following the comma, if it exists. We
099         * also take care to skip over any comment tokens.
100         */
101        private static IToken getReferenceToken(List<IToken> tokens, int commaIndex) {
102                for (IToken token : tokens.subList(commaIndex + 1, tokens.size())) {
103                        if (token.getType().getTokenClass() != ETokenClass.COMMENT) {
104                                return token;
105                        }
106                }
107                return tokens.get(commaIndex);
108        }
109
110        /**
111         * Returns whether these tokens have been generated by the ABAP Preprocessor
112         * during a "Kettensatz" expansion.
113         *
114         * The expansion allows to write code like this: <code>
115         *     one two three : four five, six seven.
116         * </code> which is expanded to <code>
117         *        one two three four five. one two three six seven.
118         * </code> Of course, more segments are allowed and each segment can contain one
119         * or more tokens. Imporant are the colon, commas, and dot.
120         *
121         * Common use case: <code>WRITE: x = 5, y = 1.</code>
122         */
123        public static boolean isGeneratedFromAbapKettensatzStatement(UnmodifiableList<IToken> includedTokens) {
124                if (includedTokens.size() < 2) {
125                        return false;
126                }
127                // we use the fact that the Abap preprocessor gives the same offset (the offset
128                // of the colon) to each generated token.
129                return includedTokens.get(0).getOffset() == includedTokens.get(1).getOffset();
130        }
131}