001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 the ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package eu.cqse.check.framework.preprocessor.abap; 018 019import static eu.cqse.check.framework.scanner.ETokenType.DOT; 020import static eu.cqse.check.framework.scanner.ETokenType.LPAREN; 021import static eu.cqse.check.framework.scanner.ETokenType.RPAREN; 022 023import java.util.ArrayList; 024import java.util.Arrays; 025import java.util.List; 026 027import org.conqat.lib.commons.collections.UnmodifiableList; 028 029import eu.cqse.check.framework.preprocessor.IPreprocessor; 030import eu.cqse.check.framework.scanner.ETokenType.ETokenClass; 031import eu.cqse.check.framework.scanner.IToken; 032import eu.cqse.check.framework.shallowparser.NestingAwareTokenIterator; 033import eu.cqse.check.framework.shallowparser.TokenStreamUtils; 034 035/** 036 * Preprocessor for ABAP. It rolls out so-called "chain sentences" in order to 037 * make parsing easier. 038 * 039 * For example, the following two code snippets are equivalent: 040 * <ul> 041 * <li><code>DATA: a TYPE i, b TYPE x.</code></li> 042 * <li><code>DATA a TYPE i. DATA b TYPE x.</code></li> 043 * </ul> 044 * 045 * Basically, whenever there is a colon, this means that every statement after a 046 * comma begins with whatever came before the colon. 047 */ 048public class AbapPreprocessor implements IPreprocessor { 049 050 /** 051 * Used to indicate that a certain token was not (yet) found. 052 */ 053 private static final int NO_POSITION = -1; 054 055 /** {@inheritDoc} */ 056 @Override 057 public List<IToken> preprocess(String uniformPath, List<IToken> tokens) { 058 List<IToken> result = new ArrayList<>(); 059 int lastDot = NO_POSITION; 060 int lastColon = NO_POSITION; 061 062 NestingAwareTokenIterator iterator = new NestingAwareTokenIterator(tokens, 0, Arrays.asList(LPAREN), 063 Arrays.asList(RPAREN)); 064 while (iterator.hasNext()) { 065 IToken token = iterator.next(); 066 int currentIndex = iterator.getCurrentIndex(); 067 switch (token.getType()) { 068 case COLON: 069 lastColon = currentIndex; 070 // Remove the colon by not inserting it into the result list. 071 break; 072 case COMMA: 073 if (!iterator.isTopLevel() || lastColon == NO_POSITION) { 074 result.add(token); 075 break; 076 } 077 result.add(TokenStreamUtils.createToken(token, ".", DOT)); 078 079 IToken reference = getReferenceToken(tokens, currentIndex); 080 List<IToken> toRepeat = tokens.subList(lastDot + 1, lastColon); 081 result.addAll(TokenStreamUtils.copyTokens(reference, toRepeat)); 082 break; 083 case DOT: 084 lastDot = currentIndex; 085 lastColon = NO_POSITION; 086 // fall-through intended 087 default: 088 result.add(token); 089 } 090 } 091 092 return result; 093 } 094 095 /** 096 * Gets the best possible reference token for the tokens to be duplicated. 097 * Usually the comma is at the end of the line, and the new statement starts in 098 * the next line, so we return the token following the comma, if it exists. We 099 * also take care to skip over any comment tokens. 100 */ 101 private static IToken getReferenceToken(List<IToken> tokens, int commaIndex) { 102 for (IToken token : tokens.subList(commaIndex + 1, tokens.size())) { 103 if (token.getType().getTokenClass() != ETokenClass.COMMENT) { 104 return token; 105 } 106 } 107 return tokens.get(commaIndex); 108 } 109 110 /** 111 * Returns whether these tokens have been generated by the ABAP Preprocessor 112 * during a "Kettensatz" expansion. 113 * 114 * The expansion allows to write code like this: <code> 115 * one two three : four five, six seven. 116 * </code> which is expanded to <code> 117 * one two three four five. one two three six seven. 118 * </code> Of course, more segments are allowed and each segment can contain one 119 * or more tokens. Imporant are the colon, commas, and dot. 120 * 121 * Common use case: <code>WRITE: x = 5, y = 1.</code> 122 */ 123 public static boolean isGeneratedFromAbapKettensatzStatement(UnmodifiableList<IToken> includedTokens) { 124 if (includedTokens.size() < 2) { 125 return false; 126 } 127 // we use the fact that the Abap preprocessor gives the same offset (the offset 128 // of the colon) to each generated token. 129 return includedTokens.get(0).getOffset() == includedTokens.get(1).getOffset(); 130 } 131}