001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 the ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package eu.cqse.check.framework.shallowparser.prettyprint; 018 019import static eu.cqse.check.framework.scanner.ELanguage.COBOL; 020import static eu.cqse.check.framework.scanner.ETokenType.COMMA; 021import static eu.cqse.check.framework.scanner.ETokenType.DOT; 022import static eu.cqse.check.framework.scanner.ETokenType.END_OF_LINE_COMMENT; 023import static eu.cqse.check.framework.scanner.ETokenType.GT; 024import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER; 025import static eu.cqse.check.framework.scanner.ETokenType.LT; 026import static eu.cqse.check.framework.scanner.ETokenType.SIX_COLUMNS_COMMENT; 027import static eu.cqse.check.framework.scanner.ETokenType.TRADITIONAL_COMMENT; 028import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.ATTRIBUTE; 029import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.META; 030import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.METHOD; 031import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.MODULE; 032import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.STATEMENT; 033 034import java.util.ArrayList; 035import java.util.EnumSet; 036import java.util.List; 037 038import org.conqat.lib.commons.assertion.CCSMAssert; 039import org.conqat.lib.commons.collections.CollectionUtils; 040import org.conqat.lib.commons.collections.UnmodifiableList; 041import org.conqat.lib.commons.string.StringUtils; 042 043import eu.cqse.check.framework.scanner.ELanguage; 044import eu.cqse.check.framework.scanner.ETokenType; 045import eu.cqse.check.framework.scanner.ETokenType.ETokenClass; 046import eu.cqse.check.framework.scanner.IToken; 047import eu.cqse.check.framework.scanner.ScannerUtils; 048import eu.cqse.check.framework.shallowparser.ShallowParserException; 049import eu.cqse.check.framework.shallowparser.ShallowParserFactory; 050import eu.cqse.check.framework.shallowparser.SubTypeNames; 051import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType; 052import eu.cqse.check.framework.shallowparser.framework.IShallowEntityVisitor; 053import eu.cqse.check.framework.shallowparser.framework.ShallowEntity; 054 055/** 056 * Class for pretty printing code based on the shallow parser. 057 */ 058public class PrettyPrinter implements IShallowEntityVisitor { 059 060 /** Indentation depth in spaces. */ 061 private static final int INDENTATION_DEPTH = 4; 062 063 /** Extra indent for continued long lines. */ 064 private static final int LONG_LINE_EXTRA_INDENT = 2 * INDENTATION_DEPTH; 065 066 /** Max length of a line. */ 067 private static final int MAX_LINE_LENGTH = 80; 068 069 /** Four space characters */ 070 private static final String FOUR_SPACES = " "; 071 072 /** The language. */ 073 private final ELanguage language; 074 075 /** The tokens. */ 076 private final List<IToken> tokens; 077 078 /** Index into {@link #tokens}. */ 079 private int tokenIndex = 0; 080 081 /** Builder for output code. */ 082 private final StringBuilder builder = new StringBuilder(); 083 084 /** Current indentation level. */ 085 private int indent = 0; 086 087 /** Current length of a line. */ 088 private int currentLineLength = 0; 089 090 /** The previously printed token. */ 091 private IToken previousToken = null; 092 093 /** Intended spacing between statements. */ 094 private ESpacing statementSpacing = ESpacing.NONE; 095 096 /** Whether we continue a long line. */ 097 private boolean inLongLineContinuation = false; 098 099 /** The currently processed shallow entity. */ 100 private ShallowEntity currentEntity; 101 102 /** 103 * Whether the currently processed token is the first in the sequence of a 104 * statement. 105 */ 106 private boolean isFirstTokenInSequence = false; 107 108 /** 109 * Indentation depth for ABAP chained statements. If we are not in a chained 110 * statement, this is 0. 111 */ 112 private int abapChainedStatementDepth = 0; 113 114 /** Constructor. */ 115 public PrettyPrinter(List<IToken> tokens, ELanguage language) { 116 this.language = language; 117 this.tokens = tokens; 118 } 119 120 /** Formats and returns the code. */ 121 public String format() throws ShallowParserException { 122 List<ShallowEntity> entities = ShallowParserFactory.createParser(language).parseTopLevel(tokens); 123 ShallowEntity.traverse(entities, this); 124 return builder.toString(); 125 } 126 127 /** {@inheritDoc} */ 128 @Override 129 public boolean visit(ShallowEntity entity) { 130 currentEntity = entity; 131 setStatementSpacing(ESpacing.NEW_LINE); 132 if (language == ELanguage.ABAP && SubTypeNames.VISIBILITY.equals(entity.getSubtype())) { 133 setStatementSpacing(ESpacing.EMPTY_LINE); 134 } 135 136 appendTokens(entity.ownStartTokens()); 137 indent += 1; 138 return true; 139 } 140 141 /** {@inheritDoc} */ 142 @Override 143 public void endVisit(ShallowEntity entity) { 144 indent -= 1; 145 if (!entity.getChildren().isEmpty()) { 146 appendTokens(entity.ownEndTokens()); 147 } 148 setStatementSpacing(PrettyPrintingUtils.getStatementSpacing(entity, language)); 149 } 150 151 /** Sets the statement spacing. */ 152 private void setStatementSpacing(ESpacing spacing) { 153 statementSpacing = PrettyPrintingUtils.maxSpacing(statementSpacing, spacing); 154 } 155 156 /** Appends the given tokens into a single line. */ 157 private void appendTokens(UnmodifiableList<IToken> tokens) { 158 isFirstTokenInSequence = true; 159 for (IToken token : completeTokens(tokens)) { 160 appendToken(token); 161 if (token.getType().getTokenClass() != ETokenClass.COMMENT) { 162 isFirstTokenInSequence = false; 163 } 164 } 165 inLongLineContinuation = false; 166 abapChainedStatementDepth = 0; 167 } 168 169 /** Appends a single token to the output. */ 170 private void appendToken(IToken token) { 171 ESpacing spacing = statementSpacing; 172 statementSpacing = ESpacing.NONE; 173 boolean addAdditionalIndentForCobolToken = false; 174 if (previousToken != null) { 175 spacing = PrettyPrintingUtils.maxSpacing(spacing, 176 PrettyPrintingUtils.getPostTokenSpacing(previousToken, abapChainedStatementDepth)); 177 spacing = PrettyPrintingUtils.maxSpacing(spacing, getInterTokenSpacing(previousToken, token)); 178 addAdditionalIndentForCobolToken = addAdditionalIndentForCobolToken(token); 179 } 180 boolean isLastAndUnparsedDotInCobol = isLastAndUnparsedDotInCobolMethod(token.getType(), spacing); 181 previousToken = token; 182 183 boolean noIndent = PrettyPrintingUtils.determineNoIndent(token); 184 spacing = PrettyPrintingUtils.maxSpacing(spacing, getPreTokenSpacing(token)); 185 spacing = reckonSpacingForCobolEntity(spacing, token.getType(), isLastAndUnparsedDotInCobol); 186 realizeSpacing(spacing, noIndent); 187 188 // never format literals (e.g. multiline strings from HEREDOC) 189 if (token.getType().getTokenClass() == ETokenClass.LITERAL) { 190 appendLiteral(token, StringUtils.splitLinesAsList(token.getText()), spacing); 191 return; 192 } 193 194 appendNonLiteral(token, noIndent, addAdditionalIndentForCobolToken); 195 } 196 197 /** 198 * Returns true if the current token is the last and an unparsed dot in a Cobol 199 * method. Otherwise false. Such dots need to be tracked to ensure they don't 200 * appear on new lines. 201 */ 202 private boolean isLastAndUnparsedDotInCobolMethod(ETokenType type, ESpacing oldSpacing) { 203 return currentEntity.getType() == METHOD && type == DOT && previousToken.getType() != DOT 204 && oldSpacing == ESpacing.NEW_LINE; 205 } 206 207 /** 208 * Recomputes and returns no spacing. This applies to COBOL meta and module 209 * entities that have just started to be printed, or COBOL statements ending 210 * with a DOT. 211 */ 212 private ESpacing reckonSpacingForCobolEntity(ESpacing oldSpacing, ETokenType tokenType, 213 boolean isLastAndUnparsedDotInCobolMethod) { 214 if (language == COBOL) { 215 EShallowEntityType currentEntityType = currentEntity.getType(); 216 boolean isFirstTokenInMethod = (EnumSet.of(META, MODULE).contains(currentEntityType) || PrettyPrintingUtils 217 .methodEntityIsNotSectionOrParagraph(currentEntityType, currentEntity.getSubtype())) 218 && !EnumSet.of(TRADITIONAL_COMMENT, SIX_COLUMNS_COMMENT).contains(tokenType) 219 && isFirstTokenInSequence; 220 if ((tokenType == ETokenType.DOT && currentEntityType == STATEMENT) || isFirstTokenInMethod 221 || isLastAndUnparsedDotInCobolMethod) { 222 return ESpacing.NONE; 223 } 224 } 225 226 return oldSpacing; 227 } 228 229 /** Append text content for non-literal tokens into the pretty-print. */ 230 private void appendNonLiteral(IToken token, boolean noIndent, boolean addAdditionalIndentForCobol) { 231 String additionalIndent = PrettyPrintingUtils.determineAdditionalIndent(token, language); 232 ECasing casing = PrettyPrintingUtils.determineCasing(token); 233 234 boolean first = true; 235 for (String line : StringUtils.splitLinesAsList(token.getText())) { 236 if (!first) { 237 realizeSpacing(ESpacing.NEW_LINE, noIndent); 238 builder.append(additionalIndent); 239 currentLineLength += additionalIndent.length(); 240 } 241 first = false; 242 String text = casing.apply(PrettyPrintingUtils.trimTokenTextLine(line, language)); 243 if (addAdditionalIndentForCobol) { 244 builder.append(FOUR_SPACES); 245 } 246 builder.append(text); 247 currentLineLength += text.length(); 248 } 249 } 250 251 /** 252 * Returns true if to add additional indentation for Cobol statements found 253 * outside of (or before) the procedure division. They are usually in other 254 * divisions and are not well aligned in pretty-print mode. Otherwise false. 255 */ 256 private boolean addAdditionalIndentForCobolToken(IToken token) { 257 ETokenType type = token.getType(); 258 return currentEntity.getType() == STATEMENT && isFirstTokenInSequence 259 && PrettyPrintingUtils.isOutsideOfProcedureDivision(currentEntity) 260 && previousToken.getType() != SIX_COLUMNS_COMMENT 261 && !EnumSet.of(TRADITIONAL_COMMENT, SIX_COLUMNS_COMMENT).contains(type) && type != DOT; 262 } 263 264 /** 265 * Returns true if we are currently starting to pretty-print a Shallow entity 266 * attribute in Cobol. This is usually a file or data entry mostly starting with 267 * a numeric literal 268 */ 269 private boolean isStartOfCobolFileOrDataEntry(IToken token, ESpacing spacing) { 270 return language == COBOL && currentEntity.getType() == ATTRIBUTE && spacing == ESpacing.NEW_LINE 271 && !EnumSet.of(SIX_COLUMNS_COMMENT, TRADITIONAL_COMMENT).contains(token.getType()); 272 } 273 274 /** Appends a literal. */ 275 private void appendLiteral(IToken token, List<String> lines, ESpacing spacing) { 276 if (isStartOfCobolFileOrDataEntry(token, spacing)) { 277 builder.append(FOUR_SPACES); 278 } 279 280 builder.append(token.getText()); 281 if (lines.size() == 1) { 282 currentLineLength += lines.get(0).length(); 283 } else { 284 currentLineLength = CollectionUtils.getLast(lines).length(); 285 } 286 } 287 288 /** Returns the spacing to be used before the given token. */ 289 private ESpacing getPreTokenSpacing(IToken token) { 290 ETokenType tokenType = token.getType(); 291 ETokenClass tokenClass = tokenType.getTokenClass(); 292 293 if (language == COBOL) { 294 EShallowEntityType currentEntityType = currentEntity.getType(); 295 if (EnumSet.of(META, MODULE).contains(currentEntityType) 296 && EnumSet.of(COMMA, IDENTIFIER).contains(token.getType())) { 297 return ESpacing.SPACE; 298 } else if (shouldUseNewLineBeforeCobolToken(token)) { 299 return ESpacing.NEW_LINE; 300 } else if (currentEntityType == STATEMENT && EnumSet.of(LT, GT).contains(tokenType)) { 301 return ESpacing.SPACE; 302 } 303 } 304 305 if (tokenType == ETokenType.DOCUMENTATION_COMMENT) { 306 return ESpacing.EMPTY_LINE; 307 } 308 309 if (tokenClass == ETokenClass.COMMENT 310 && (PrettyPrintingUtils.isMultiLine(token) || language == ELanguage.ABAP)) { 311 return ESpacing.NEW_LINE; 312 } 313 314 if (tokenType == ETokenType.LBRACE) { 315 return ESpacing.SPACE; 316 } 317 318 // long lines 319 if (currentLineLength + token.getText().length() > MAX_LINE_LENGTH && !isFirstTokenInSequence) { 320 if (tokenClass != ETokenClass.DELIMITER && tokenClass != ETokenClass.OPERATOR) { 321 inLongLineContinuation = true; 322 return ESpacing.NEW_LINE; 323 } 324 } 325 326 return ESpacing.NONE; 327 } 328 329 private boolean shouldUseNewLineBeforeCobolToken(IToken token) { 330 return (token.getType().getTokenClass() == ETokenClass.KEYWORD 331 && token.getText().toLowerCase().trim().startsWith("end-") 332 && !PrettyPrintingUtils.isOutsideOfProcedureDivision(currentEntity)) 333 || EnumSet.of(TRADITIONAL_COMMENT, END_OF_LINE_COMMENT, SIX_COLUMNS_COMMENT).contains(token.getType()); 334 } 335 336 /** Returns the spacing to be used between the given tokens. */ 337 private ESpacing getInterTokenSpacing(IToken token1, IToken token2) { 338 ETokenType tokenType1 = token1.getType(); 339 ETokenType tokenType2 = token2.getType(); 340 341 if (language == COBOL && EnumSet.of(TRADITIONAL_COMMENT, ETokenType.DOT).contains(tokenType1) 342 && currentEntity.getType() == STATEMENT) { 343 return ESpacing.NEW_LINE; 344 } 345 346 // preserve empty lines 347 int token1EndLine = token1.getLineNumber() + StringUtils.countLines(token1.getText()) - 1; 348 if (token2.getLineNumber() > token1EndLine + 1) { 349 return ESpacing.EMPTY_LINE; 350 } 351 352 if (language == ELanguage.ABAP && tokenType2 == ETokenType.COLON) { 353 abapChainedStatementDepth = currentLineLength + 2; 354 return ESpacing.NONE; 355 } 356 357 // no space around ABAP method separators 358 EnumSet<ETokenType> methodSeparators = EnumSet.of(ETokenType.ARROW, ETokenType.EQGT, ETokenType.TILDE); 359 if (language == ELanguage.ABAP 360 && (methodSeparators.contains(tokenType1) || methodSeparators.contains(tokenType2))) { 361 return ESpacing.NONE; 362 } 363 364 // No space around generics 365 if (tokenType1 == LT || tokenType2 == LT || tokenType2 == GT) { 366 return ESpacing.NONE; 367 } 368 369 // No space in annotation names 370 if (tokenType1 == ETokenType.AT_OPERATOR) { 371 return ESpacing.NONE; 372 } 373 374 if (!PrettyPrintingUtils.isDelimiter(token1) && !PrettyPrintingUtils.isDelimiter(token2)) { 375 return ESpacing.SPACE; 376 } 377 378 return ESpacing.NONE; 379 } 380 381 /** Realizes the given spacing. */ 382 private void realizeSpacing(ESpacing spacing, boolean noIndent) { 383 // no spacing at start of output 384 if (builder.length() == 0) { 385 return; 386 } 387 388 switch (spacing) { 389 case NONE: 390 break; 391 case SPACE: 392 builder.append(StringUtils.SPACE); 393 currentLineLength += 1; 394 break; 395 case EMPTY_LINE: 396 builder.append(StringUtils.LINE_SEPARATOR); 397 // fallthrough intended 398 case NEW_LINE: 399 builder.append(StringUtils.LINE_SEPARATOR); 400 if (noIndent) { 401 currentLineLength = 0; 402 } else { 403 indent(); 404 } 405 break; 406 default: 407 CCSMAssert.fail("Unknown spacing: " + spacing); 408 } 409 } 410 411 /** Performs indentation. */ 412 private void indent() { 413 int spaceCount = indent * INDENTATION_DEPTH; 414 if (abapChainedStatementDepth > 0) { 415 spaceCount = abapChainedStatementDepth; 416 } 417 if (inLongLineContinuation) { 418 spaceCount += LONG_LINE_EXTRA_INDENT; 419 } 420 421 builder.append(StringUtils.fillString(spaceCount, StringUtils.SPACE_CHAR)); 422 currentLineLength = spaceCount; 423 } 424 425 /** Completes the given tokens with suppressed tokens from {@link #tokens}. */ 426 private List<IToken> completeTokens(List<IToken> tokens) { 427 List<IToken> result = new ArrayList<>(); 428 for (IToken token : tokens) { 429 while (tokenIndex < this.tokens.size() && this.tokens.get(tokenIndex) != token) { 430 result.add(this.tokens.get(tokenIndex)); 431 tokenIndex += 1; 432 } 433 result.add(token); 434 tokenIndex += 1; 435 } 436 return result; 437 } 438 439 /** Formats the given code. */ 440 public static String format(String code, ELanguage language) throws ShallowParserException { 441 return new PrettyPrinter(ScannerUtils.getTokens(code, language), language).format(); 442 } 443}