001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.shallowparser.prettyprint;
018
019import static eu.cqse.check.framework.scanner.ELanguage.COBOL;
020import static eu.cqse.check.framework.scanner.ETokenType.COMMA;
021import static eu.cqse.check.framework.scanner.ETokenType.DOT;
022import static eu.cqse.check.framework.scanner.ETokenType.END_OF_LINE_COMMENT;
023import static eu.cqse.check.framework.scanner.ETokenType.GT;
024import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER;
025import static eu.cqse.check.framework.scanner.ETokenType.LT;
026import static eu.cqse.check.framework.scanner.ETokenType.SIX_COLUMNS_COMMENT;
027import static eu.cqse.check.framework.scanner.ETokenType.TRADITIONAL_COMMENT;
028import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.ATTRIBUTE;
029import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.META;
030import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.METHOD;
031import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.MODULE;
032import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.STATEMENT;
033
034import java.util.ArrayList;
035import java.util.EnumSet;
036import java.util.List;
037
038import org.conqat.lib.commons.assertion.CCSMAssert;
039import org.conqat.lib.commons.collections.CollectionUtils;
040import org.conqat.lib.commons.collections.UnmodifiableList;
041import org.conqat.lib.commons.string.StringUtils;
042
043import eu.cqse.check.framework.scanner.ELanguage;
044import eu.cqse.check.framework.scanner.ETokenType;
045import eu.cqse.check.framework.scanner.ETokenType.ETokenClass;
046import eu.cqse.check.framework.scanner.IToken;
047import eu.cqse.check.framework.scanner.ScannerUtils;
048import eu.cqse.check.framework.shallowparser.ShallowParserException;
049import eu.cqse.check.framework.shallowparser.ShallowParserFactory;
050import eu.cqse.check.framework.shallowparser.SubTypeNames;
051import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType;
052import eu.cqse.check.framework.shallowparser.framework.IShallowEntityVisitor;
053import eu.cqse.check.framework.shallowparser.framework.ShallowEntity;
054
055/**
056 * Class for pretty printing code based on the shallow parser.
057 */
058public class PrettyPrinter implements IShallowEntityVisitor {
059
060        /** Indentation depth in spaces. */
061        private static final int INDENTATION_DEPTH = 4;
062
063        /** Extra indent for continued long lines. */
064        private static final int LONG_LINE_EXTRA_INDENT = 2 * INDENTATION_DEPTH;
065
066        /** Max length of a line. */
067        private static final int MAX_LINE_LENGTH = 80;
068
069        /** Four space characters */
070        private static final String FOUR_SPACES = "    ";
071
072        /** The language. */
073        private final ELanguage language;
074
075        /** The tokens. */
076        private final List<IToken> tokens;
077
078        /** Index into {@link #tokens}. */
079        private int tokenIndex = 0;
080
081        /** Builder for output code. */
082        private final StringBuilder builder = new StringBuilder();
083
084        /** Current indentation level. */
085        private int indent = 0;
086
087        /** Current length of a line. */
088        private int currentLineLength = 0;
089
090        /** The previously printed token. */
091        private IToken previousToken = null;
092
093        /** Intended spacing between statements. */
094        private ESpacing statementSpacing = ESpacing.NONE;
095
096        /** Whether we continue a long line. */
097        private boolean inLongLineContinuation = false;
098
099        /** The currently processed shallow entity. */
100        private ShallowEntity currentEntity;
101
102        /**
103         * Whether the currently processed token is the first in the sequence of a
104         * statement.
105         */
106        private boolean isFirstTokenInSequence = false;
107
108        /**
109         * Indentation depth for ABAP chained statements. If we are not in a chained
110         * statement, this is 0.
111         */
112        private int abapChainedStatementDepth = 0;
113
114        /** Constructor. */
115        public PrettyPrinter(List<IToken> tokens, ELanguage language) {
116                this.language = language;
117                this.tokens = tokens;
118        }
119
120        /** Formats and returns the code. */
121        public String format() throws ShallowParserException {
122                List<ShallowEntity> entities = ShallowParserFactory.createParser(language).parseTopLevel(tokens);
123                ShallowEntity.traverse(entities, this);
124                return builder.toString();
125        }
126
127        /** {@inheritDoc} */
128        @Override
129        public boolean visit(ShallowEntity entity) {
130                currentEntity = entity;
131                setStatementSpacing(ESpacing.NEW_LINE);
132                if (language == ELanguage.ABAP && SubTypeNames.VISIBILITY.equals(entity.getSubtype())) {
133                        setStatementSpacing(ESpacing.EMPTY_LINE);
134                }
135
136                appendTokens(entity.ownStartTokens());
137                indent += 1;
138                return true;
139        }
140
141        /** {@inheritDoc} */
142        @Override
143        public void endVisit(ShallowEntity entity) {
144                indent -= 1;
145                if (!entity.getChildren().isEmpty()) {
146                        appendTokens(entity.ownEndTokens());
147                }
148                setStatementSpacing(PrettyPrintingUtils.getStatementSpacing(entity, language));
149        }
150
151        /** Sets the statement spacing. */
152        private void setStatementSpacing(ESpacing spacing) {
153                statementSpacing = PrettyPrintingUtils.maxSpacing(statementSpacing, spacing);
154        }
155
156        /** Appends the given tokens into a single line. */
157        private void appendTokens(UnmodifiableList<IToken> tokens) {
158                isFirstTokenInSequence = true;
159                for (IToken token : completeTokens(tokens)) {
160                        appendToken(token);
161                        if (token.getType().getTokenClass() != ETokenClass.COMMENT) {
162                                isFirstTokenInSequence = false;
163                        }
164                }
165                inLongLineContinuation = false;
166                abapChainedStatementDepth = 0;
167        }
168
169        /** Appends a single token to the output. */
170        private void appendToken(IToken token) {
171                ESpacing spacing = statementSpacing;
172                statementSpacing = ESpacing.NONE;
173                boolean addAdditionalIndentForCobolToken = false;
174                if (previousToken != null) {
175                        spacing = PrettyPrintingUtils.maxSpacing(spacing,
176                                        PrettyPrintingUtils.getPostTokenSpacing(previousToken, abapChainedStatementDepth));
177                        spacing = PrettyPrintingUtils.maxSpacing(spacing, getInterTokenSpacing(previousToken, token));
178                        addAdditionalIndentForCobolToken = addAdditionalIndentForCobolToken(token);
179                }
180                boolean isLastAndUnparsedDotInCobol = isLastAndUnparsedDotInCobolMethod(token.getType(), spacing);
181                previousToken = token;
182
183                boolean noIndent = PrettyPrintingUtils.determineNoIndent(token);
184                spacing = PrettyPrintingUtils.maxSpacing(spacing, getPreTokenSpacing(token));
185                spacing = reckonSpacingForCobolEntity(spacing, token.getType(), isLastAndUnparsedDotInCobol);
186                realizeSpacing(spacing, noIndent);
187
188                // never format literals (e.g. multiline strings from HEREDOC)
189                if (token.getType().getTokenClass() == ETokenClass.LITERAL) {
190                        appendLiteral(token, StringUtils.splitLinesAsList(token.getText()), spacing);
191                        return;
192                }
193
194                appendNonLiteral(token, noIndent, addAdditionalIndentForCobolToken);
195        }
196
197        /**
198         * Returns true if the current token is the last and an unparsed dot in a Cobol
199         * method. Otherwise false. Such dots need to be tracked to ensure they don't
200         * appear on new lines.
201         */
202        private boolean isLastAndUnparsedDotInCobolMethod(ETokenType type, ESpacing oldSpacing) {
203                return currentEntity.getType() == METHOD && type == DOT && previousToken.getType() != DOT
204                                && oldSpacing == ESpacing.NEW_LINE;
205        }
206
207        /**
208         * Recomputes and returns no spacing. This applies to COBOL meta and module
209         * entities that have just started to be printed, or COBOL statements ending
210         * with a DOT.
211         */
212        private ESpacing reckonSpacingForCobolEntity(ESpacing oldSpacing, ETokenType tokenType,
213                        boolean isLastAndUnparsedDotInCobolMethod) {
214                if (language == COBOL) {
215                        EShallowEntityType currentEntityType = currentEntity.getType();
216                        boolean isFirstTokenInMethod = (EnumSet.of(META, MODULE).contains(currentEntityType) || PrettyPrintingUtils
217                                        .methodEntityIsNotSectionOrParagraph(currentEntityType, currentEntity.getSubtype()))
218                                        && !EnumSet.of(TRADITIONAL_COMMENT, SIX_COLUMNS_COMMENT).contains(tokenType)
219                                        && isFirstTokenInSequence;
220                        if ((tokenType == ETokenType.DOT && currentEntityType == STATEMENT) || isFirstTokenInMethod
221                                        || isLastAndUnparsedDotInCobolMethod) {
222                                return ESpacing.NONE;
223                        }
224                }
225
226                return oldSpacing;
227        }
228
229        /** Append text content for non-literal tokens into the pretty-print. */
230        private void appendNonLiteral(IToken token, boolean noIndent, boolean addAdditionalIndentForCobol) {
231                String additionalIndent = PrettyPrintingUtils.determineAdditionalIndent(token, language);
232                ECasing casing = PrettyPrintingUtils.determineCasing(token);
233
234                boolean first = true;
235                for (String line : StringUtils.splitLinesAsList(token.getText())) {
236                        if (!first) {
237                                realizeSpacing(ESpacing.NEW_LINE, noIndent);
238                                builder.append(additionalIndent);
239                                currentLineLength += additionalIndent.length();
240                        }
241                        first = false;
242                        String text = casing.apply(PrettyPrintingUtils.trimTokenTextLine(line, language));
243                        if (addAdditionalIndentForCobol) {
244                                builder.append(FOUR_SPACES);
245                        }
246                        builder.append(text);
247                        currentLineLength += text.length();
248                }
249        }
250
251        /**
252         * Returns true if to add additional indentation for Cobol statements found
253         * outside of (or before) the procedure division. They are usually in other
254         * divisions and are not well aligned in pretty-print mode. Otherwise false.
255         */
256        private boolean addAdditionalIndentForCobolToken(IToken token) {
257                ETokenType type = token.getType();
258                return currentEntity.getType() == STATEMENT && isFirstTokenInSequence
259                                && PrettyPrintingUtils.isOutsideOfProcedureDivision(currentEntity)
260                                && previousToken.getType() != SIX_COLUMNS_COMMENT
261                                && !EnumSet.of(TRADITIONAL_COMMENT, SIX_COLUMNS_COMMENT).contains(type) && type != DOT;
262        }
263
264        /**
265         * Returns true if we are currently starting to pretty-print a Shallow entity
266         * attribute in Cobol. This is usually a file or data entry mostly starting with
267         * a numeric literal
268         */
269        private boolean isStartOfCobolFileOrDataEntry(IToken token, ESpacing spacing) {
270                return language == COBOL && currentEntity.getType() == ATTRIBUTE && spacing == ESpacing.NEW_LINE
271                                && !EnumSet.of(SIX_COLUMNS_COMMENT, TRADITIONAL_COMMENT).contains(token.getType());
272        }
273
274        /** Appends a literal. */
275        private void appendLiteral(IToken token, List<String> lines, ESpacing spacing) {
276                if (isStartOfCobolFileOrDataEntry(token, spacing)) {
277                        builder.append(FOUR_SPACES);
278                }
279
280                builder.append(token.getText());
281                if (lines.size() == 1) {
282                        currentLineLength += lines.get(0).length();
283                } else {
284                        currentLineLength = CollectionUtils.getLast(lines).length();
285                }
286        }
287
288        /** Returns the spacing to be used before the given token. */
289        private ESpacing getPreTokenSpacing(IToken token) {
290                ETokenType tokenType = token.getType();
291                ETokenClass tokenClass = tokenType.getTokenClass();
292
293                if (language == COBOL) {
294                        EShallowEntityType currentEntityType = currentEntity.getType();
295                        if (EnumSet.of(META, MODULE).contains(currentEntityType)
296                                        && EnumSet.of(COMMA, IDENTIFIER).contains(token.getType())) {
297                                return ESpacing.SPACE;
298                        } else if (shouldUseNewLineBeforeCobolToken(token)) {
299                                return ESpacing.NEW_LINE;
300                        } else if (currentEntityType == STATEMENT && EnumSet.of(LT, GT).contains(tokenType)) {
301                                return ESpacing.SPACE;
302                        }
303                }
304
305                if (tokenType == ETokenType.DOCUMENTATION_COMMENT) {
306                        return ESpacing.EMPTY_LINE;
307                }
308
309                if (tokenClass == ETokenClass.COMMENT
310                                && (PrettyPrintingUtils.isMultiLine(token) || language == ELanguage.ABAP)) {
311                        return ESpacing.NEW_LINE;
312                }
313
314                if (tokenType == ETokenType.LBRACE) {
315                        return ESpacing.SPACE;
316                }
317
318                // long lines
319                if (currentLineLength + token.getText().length() > MAX_LINE_LENGTH && !isFirstTokenInSequence) {
320                        if (tokenClass != ETokenClass.DELIMITER && tokenClass != ETokenClass.OPERATOR) {
321                                inLongLineContinuation = true;
322                                return ESpacing.NEW_LINE;
323                        }
324                }
325
326                return ESpacing.NONE;
327        }
328
329        private boolean shouldUseNewLineBeforeCobolToken(IToken token) {
330                return (token.getType().getTokenClass() == ETokenClass.KEYWORD
331                                && token.getText().toLowerCase().trim().startsWith("end-")
332                                && !PrettyPrintingUtils.isOutsideOfProcedureDivision(currentEntity))
333                                || EnumSet.of(TRADITIONAL_COMMENT, END_OF_LINE_COMMENT, SIX_COLUMNS_COMMENT).contains(token.getType());
334        }
335
336        /** Returns the spacing to be used between the given tokens. */
337        private ESpacing getInterTokenSpacing(IToken token1, IToken token2) {
338                ETokenType tokenType1 = token1.getType();
339                ETokenType tokenType2 = token2.getType();
340
341                if (language == COBOL && EnumSet.of(TRADITIONAL_COMMENT, ETokenType.DOT).contains(tokenType1)
342                                && currentEntity.getType() == STATEMENT) {
343                        return ESpacing.NEW_LINE;
344                }
345
346                // preserve empty lines
347                int token1EndLine = token1.getLineNumber() + StringUtils.countLines(token1.getText()) - 1;
348                if (token2.getLineNumber() > token1EndLine + 1) {
349                        return ESpacing.EMPTY_LINE;
350                }
351
352                if (language == ELanguage.ABAP && tokenType2 == ETokenType.COLON) {
353                        abapChainedStatementDepth = currentLineLength + 2;
354                        return ESpacing.NONE;
355                }
356
357                // no space around ABAP method separators
358                EnumSet<ETokenType> methodSeparators = EnumSet.of(ETokenType.ARROW, ETokenType.EQGT, ETokenType.TILDE);
359                if (language == ELanguage.ABAP
360                                && (methodSeparators.contains(tokenType1) || methodSeparators.contains(tokenType2))) {
361                        return ESpacing.NONE;
362                }
363
364                // No space around generics
365                if (tokenType1 == LT || tokenType2 == LT || tokenType2 == GT) {
366                        return ESpacing.NONE;
367                }
368
369                // No space in annotation names
370                if (tokenType1 == ETokenType.AT_OPERATOR) {
371                        return ESpacing.NONE;
372                }
373
374                if (!PrettyPrintingUtils.isDelimiter(token1) && !PrettyPrintingUtils.isDelimiter(token2)) {
375                        return ESpacing.SPACE;
376                }
377
378                return ESpacing.NONE;
379        }
380
381        /** Realizes the given spacing. */
382        private void realizeSpacing(ESpacing spacing, boolean noIndent) {
383                // no spacing at start of output
384                if (builder.length() == 0) {
385                        return;
386                }
387
388                switch (spacing) {
389                case NONE:
390                        break;
391                case SPACE:
392                        builder.append(StringUtils.SPACE);
393                        currentLineLength += 1;
394                        break;
395                case EMPTY_LINE:
396                        builder.append(StringUtils.LINE_SEPARATOR);
397                        // fallthrough intended
398                case NEW_LINE:
399                        builder.append(StringUtils.LINE_SEPARATOR);
400                        if (noIndent) {
401                                currentLineLength = 0;
402                        } else {
403                                indent();
404                        }
405                        break;
406                default:
407                        CCSMAssert.fail("Unknown spacing: " + spacing);
408                }
409        }
410
411        /** Performs indentation. */
412        private void indent() {
413                int spaceCount = indent * INDENTATION_DEPTH;
414                if (abapChainedStatementDepth > 0) {
415                        spaceCount = abapChainedStatementDepth;
416                }
417                if (inLongLineContinuation) {
418                        spaceCount += LONG_LINE_EXTRA_INDENT;
419                }
420
421                builder.append(StringUtils.fillString(spaceCount, StringUtils.SPACE_CHAR));
422                currentLineLength = spaceCount;
423        }
424
425        /** Completes the given tokens with suppressed tokens from {@link #tokens}. */
426        private List<IToken> completeTokens(List<IToken> tokens) {
427                List<IToken> result = new ArrayList<>();
428                for (IToken token : tokens) {
429                        while (tokenIndex < this.tokens.size() && this.tokens.get(tokenIndex) != token) {
430                                result.add(this.tokens.get(tokenIndex));
431                                tokenIndex += 1;
432                        }
433                        result.add(token);
434                        tokenIndex += 1;
435                }
436                return result;
437        }
438
439        /** Formats the given code. */
440        public static String format(String code, ELanguage language) throws ShallowParserException {
441                return new PrettyPrinter(ScannerUtils.getTokens(code, language), language).format();
442        }
443}