001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 The ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.scanner;
018
019import static eu.cqse.check.framework.scanner.ELanguage.ABAP;
020import static eu.cqse.check.framework.scanner.ELanguage.ABAP_DDIC;
021import static eu.cqse.check.framework.scanner.ELanguage.ADA;
022import static eu.cqse.check.framework.scanner.ELanguage.COBOL;
023import static eu.cqse.check.framework.scanner.ELanguage.CPP;
024import static eu.cqse.check.framework.scanner.ELanguage.CS;
025import static eu.cqse.check.framework.scanner.ELanguage.DELPHI;
026import static eu.cqse.check.framework.scanner.ELanguage.FORTRAN;
027import static eu.cqse.check.framework.scanner.ELanguage.GO;
028import static eu.cqse.check.framework.scanner.ELanguage.GOSU;
029import static eu.cqse.check.framework.scanner.ELanguage.GROOVY;
030import static eu.cqse.check.framework.scanner.ELanguage.HANA_VIEW;
031import static eu.cqse.check.framework.scanner.ELanguage.IEC61131;
032import static eu.cqse.check.framework.scanner.ELanguage.JAVA;
033import static eu.cqse.check.framework.scanner.ELanguage.JAVADOC;
034import static eu.cqse.check.framework.scanner.ELanguage.JAVASCRIPT;
035import static eu.cqse.check.framework.scanner.ELanguage.JPL;
036import static eu.cqse.check.framework.scanner.ELanguage.KOTLIN;
037import static eu.cqse.check.framework.scanner.ELanguage.LINE;
038import static eu.cqse.check.framework.scanner.ELanguage.MATLAB;
039import static eu.cqse.check.framework.scanner.ELanguage.MTEXT;
040import static eu.cqse.check.framework.scanner.ELanguage.NL_REQUIREMENTS;
041import static eu.cqse.check.framework.scanner.ELanguage.NL_TESTS;
042import static eu.cqse.check.framework.scanner.ELanguage.OBJECTIVE_C;
043import static eu.cqse.check.framework.scanner.ELanguage.OCAML;
044import static eu.cqse.check.framework.scanner.ELanguage.OPEN_CL;
045import static eu.cqse.check.framework.scanner.ELanguage.OSCRIPT;
046import static eu.cqse.check.framework.scanner.ELanguage.PHP;
047import static eu.cqse.check.framework.scanner.ELanguage.PL1;
048import static eu.cqse.check.framework.scanner.ELanguage.PLSQL;
049import static eu.cqse.check.framework.scanner.ELanguage.PYTHON;
050import static eu.cqse.check.framework.scanner.ELanguage.RUBY;
051import static eu.cqse.check.framework.scanner.ELanguage.RUST;
052import static eu.cqse.check.framework.scanner.ELanguage.SIMULINK;
053import static eu.cqse.check.framework.scanner.ELanguage.SQLSCRIPT;
054import static eu.cqse.check.framework.scanner.ELanguage.SWIFT;
055import static eu.cqse.check.framework.scanner.ELanguage.TEXT;
056import static eu.cqse.check.framework.scanner.ELanguage.TSQL;
057import static eu.cqse.check.framework.scanner.ELanguage.VB;
058import static eu.cqse.check.framework.scanner.ELanguage.XML;
059import static eu.cqse.check.framework.scanner.ELanguage.XTEND;
060import static eu.cqse.check.framework.scanner.ELanguageConstants.CLIKE_COMMENT_REGEX;
061import static eu.cqse.check.framework.scanner.ETokenType.COLON;
062import static eu.cqse.check.framework.scanner.ETokenType.DOT;
063import static eu.cqse.check.framework.scanner.ETokenType.EOF;
064import static eu.cqse.check.framework.scanner.ETokenType.EOL;
065import static eu.cqse.check.framework.scanner.ETokenType.EXCLAMATION;
066import static eu.cqse.check.framework.scanner.ETokenType.LBRACE;
067import static eu.cqse.check.framework.scanner.ETokenType.LEFT_ANGLE_BRACKET;
068import static eu.cqse.check.framework.scanner.ETokenType.MULTIPLE_EOL;
069import static eu.cqse.check.framework.scanner.ETokenType.QUESTION;
070import static eu.cqse.check.framework.scanner.ETokenType.RBRACE;
071import static eu.cqse.check.framework.scanner.ETokenType.RIGHT_ANGLE_BRACKET;
072import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON;
073import static eu.cqse.check.framework.scanner.ETokenType.SLASH;
074import static eu.cqse.check.framework.scanner.ETokenType.THEN;
075import static org.conqat.lib.commons.collections.CollectionUtils.asHashSet;
076
077import java.util.HashMap;
078import java.util.Map;
079import java.util.Set;
080import java.util.regex.Pattern;
081
082import org.conqat.lib.commons.string.StringUtils;
083
084/**
085 * Class which provides parser relevant properties for the languages supported
086 * by the scanner framework. These are intentionally stored separately from
087 * {@link eu.cqse.check.framework.scanner.ELanguage} as ELanguage does not have
088 * access to the language's token classes.
089 */
090public class LanguageProperties {
091
092        /** This maps from extensions to languages. */
093        private static final Map<ELanguage, LanguageProperties> LANGUAGE_PROPERTIES_MAPPING = new HashMap<>();
094
095        static {
096                register(JAVA, JavaToken::new, asHashSet(SEMICOLON, RBRACE, LBRACE), CLIKE_COMMENT_REGEX, true);
097                register(CPP, CPPToken::new, asHashSet(SEMICOLON, RBRACE, LBRACE), CLIKE_COMMENT_REGEX, true);
098                register(OPEN_CL, OpenCLToken::new, asHashSet(SEMICOLON, RBRACE, LBRACE), CLIKE_COMMENT_REGEX, true);
099                register(RUST, RustToken::new, asHashSet(SEMICOLON, RBRACE, LBRACE), CLIKE_COMMENT_REGEX, true);
100                register(VB, VBToken::new, asHashSet(COLON, EOL), "^ *'", false);
101                register(PL1, PL1Token::new, asHashSet(SEMICOLON, RBRACE, LBRACE), CLIKE_COMMENT_REGEX, false);
102                register(COBOL, CobolToken::new, new CobolStatementOracle(), "^ *[*/]+", false);
103                register(CS, CSToken::new, asHashSet(SEMICOLON, RBRACE, LBRACE), CLIKE_COMMENT_REGEX, true);
104                register(ABAP, ABAPToken::new, asHashSet(DOT), "^ *[*\"]+", false);
105                register(ABAP_DDIC, TextToken::new, asHashSet(EOL), "", false);
106                register(ADA, AdaToken::new, asHashSet(SEMICOLON, THEN), "^ *--+", false);
107                register(TEXT, TextToken::new, asHashSet(DOT, QUESTION, EXCLAMATION, COLON, MULTIPLE_EOL), "", false);
108                register(XML, XMLToken::new, asHashSet(LEFT_ANGLE_BRACKET, SLASH, RIGHT_ANGLE_BRACKET), "(^ *<!--+)|(--+>$)",
109                                false);
110                register(SQLSCRIPT, HanaSQLScriptToken::new, asHashSet(SEMICOLON), "(^ *(--+|/[*]+))|([*]+/ *$)", false);
111                register(HANA_VIEW, XMLToken::new, asHashSet(LEFT_ANGLE_BRACKET, SLASH, RIGHT_ANGLE_BRACKET),
112                                "(^ *<!--+)|(--+>$)", false);
113                register(PLSQL, PLSQLToken::new, asHashSet(SEMICOLON), "(^ *(--+|/[*]+))|([*]+/ *$)", false);
114                register(PYTHON, PythonToken::new, asHashSet(EOL, EOF), "^ *#+", false);
115                register(TSQL, TSQLToken::new, asHashSet(EOL), "(^ *(--+|/[*]+))|([*]+/ *$)", false);
116                register(MATLAB, MatlabToken::new, asHashSet(EOL, SEMICOLON), "^ *%+", false);
117                register(PHP, PHPToken::new, asHashSet(SEMICOLON, RBRACE, LBRACE), CLIKE_COMMENT_REGEX, false);
118                register(RUBY, RubyToken::new, asHashSet(EOL), "^ *#+", false);
119                register(JAVASCRIPT, JavaScriptToken::new, asHashSet(SEMICOLON, RBRACE, LBRACE), CLIKE_COMMENT_REGEX, true);
120                register(MTEXT, MTextToken::new, asHashSet(EOL), "(?i)^ *[.]DSC", false);
121                register(JPL, JPLToken::new, asHashSet(EOL), CLIKE_COMMENT_REGEX, false);
122                register(LINE, LineToken::new, asHashSet(EOL), "", false);
123                register(DELPHI, DelphiToken::new, asHashSet(SEMICOLON), "(^ *([(][*]+|[{]|//+)|([*]+[)]|[}])$)", false);
124                register(IEC61131, Iec61131Token::new, asHashSet(SEMICOLON, ETokenType.ELEMENT),
125                                "(^ *([(][*]+|//+)|([*]+[)])$)", false);
126                register(FORTRAN, FortranToken::new, asHashSet(EOL), "^ *!+", false);
127                register(XTEND, XtendToken::new, asHashSet(SEMICOLON, RBRACE, LBRACE, EOL), CLIKE_COMMENT_REGEX, true);
128                register(SWIFT, SwiftToken::new, asHashSet(SEMICOLON, RBRACE, LBRACE, EOL), CLIKE_COMMENT_REGEX, true);
129                register(OCAML, OCamlToken::new, asHashSet(SEMICOLON, RBRACE, LBRACE), "", false);
130                register(OSCRIPT, OScriptToken::new, asHashSet(EOL, EOF), CLIKE_COMMENT_REGEX, false);
131                register(GROOVY, GroovyToken::new, asHashSet(SEMICOLON, RBRACE, LBRACE), CLIKE_COMMENT_REGEX, true);
132                register(NL_REQUIREMENTS, TextToken::new, asHashSet(DOT, QUESTION, EXCLAMATION, COLON, MULTIPLE_EOL), "",
133                                false);
134                register(NL_TESTS, TextToken::new, asHashSet(DOT, QUESTION, EXCLAMATION, COLON, MULTIPLE_EOL), "", false);
135                register(SIMULINK, TextToken::new, asHashSet(EOL), "", false);
136                register(GOSU, GosuToken::new, asHashSet(SEMICOLON, RBRACE, LBRACE), CLIKE_COMMENT_REGEX, true);
137                register(KOTLIN, KotlinToken::new, asHashSet(SEMICOLON, RBRACE, LBRACE, EOL, EOF), CLIKE_COMMENT_REGEX, true);
138                register(OBJECTIVE_C, ObjectiveCToken::new, asHashSet(SEMICOLON, RBRACE, LBRACE, EOL, EOF), CLIKE_COMMENT_REGEX,
139                                true);
140                register(JAVADOC, JavaDocToken::new, asHashSet(), "", false);
141                register(GO, GoToken::new, asHashSet(RBRACE, LBRACE, EOL, EOF), "", false);
142        }
143
144        private static void register(ELanguage language, ITokenFactory tokenFactory, Set<ETokenType> statementDelimiters,
145                        String commentLineTrimRegex, boolean cLike) {
146                LANGUAGE_PROPERTIES_MAPPING.put(language, new LanguageProperties(tokenFactory,
147                                new StatementOracle(statementDelimiters), commentLineTrimRegex, cLike));
148        }
149
150        private static void register(ELanguage language, ITokenFactory tokenFactory, IStatementOracle oracle,
151                        String commentLineTrimRegex, boolean cLike) {
152                LANGUAGE_PROPERTIES_MAPPING.put(language,
153                                new LanguageProperties(tokenFactory, oracle, commentLineTrimRegex, cLike));
154        }
155
156        /**
157         * @return The {@link LanguageProperties} associated with the given language.
158         */
159        public static LanguageProperties of(ELanguage language) {
160                return LANGUAGE_PROPERTIES_MAPPING.get(language);
161        }
162
163        /** The statement oracle for this language. */
164        private final IStatementOracle statementOracle;
165
166        /**
167         * Pattern describing the parts of a comment line that should be trimmed to
168         * reveal the text.
169         */
170        private final Pattern commentLineTrimPattern;
171
172        /** Whether the language is C-like, i.e. based on curly braces, etc. */
173        private final boolean cLike;
174
175        private final ITokenFactory tokenFactory;
176
177        /** Create language properties. */
178        private LanguageProperties(ITokenFactory tokenFactory, IStatementOracle oracle, String commentLineTrimRegex,
179                        boolean cLike) {
180                this.statementOracle = oracle;
181                this.commentLineTrimPattern = Pattern.compile(commentLineTrimRegex);
182                this.cLike = cLike;
183                this.tokenFactory = tokenFactory;
184        }
185
186        /** Get statement oracle for this language. */
187        public IStatementOracle getStatementOracle() {
188                return statementOracle;
189        }
190
191        /**
192         * Get the content of a comment, i.e. with the comment delimiters removed.
193         */
194        public String getCommentContent(String commentText) {
195                StringBuilder content = new StringBuilder();
196                for (String line : StringUtils.splitLinesAsList(commentText)) {
197                        if (content.length() > 0) {
198                                content.append(StringUtils.LINE_SEPARATOR);
199                        }
200                        content.append(commentLineTrimPattern.matcher(line).replaceAll(StringUtils.EMPTY_STRING).trim());
201                }
202                return content.toString();
203        }
204
205        /** @see #cLike */
206        public boolean isCLike() {
207                return cLike;
208        }
209
210        /** Creates a new token for the language. */
211        public IToken createToken(ETokenType type, int offset, int lineNumber, String text, String originId) {
212                return tokenFactory.create(type, offset, lineNumber, text, originId);
213        }
214
215        /** Factory interface for token creation. */
216        @FunctionalInterface
217        private interface ITokenFactory {
218
219                /** Creates a token. */
220                IToken create(ETokenType type, int offset, int lineNumber, String text, String originId);
221        }
222}