001/*-----------------------------------------------------------------------+ 002 | org.conqat.engine.index.incubator 003 | | 004 $Id$ 005 | | 006 | Copyright (c) 2009-2013 CQSE GmbH | 007 +-----------------------------------------------------------------------*/ 008package eu.cqse.check.framework.util.tokens; 009 010import java.util.ArrayList; 011import java.util.Collections; 012import java.util.List; 013import java.util.Stack; 014 015import org.conqat.lib.commons.assertion.CCSMAssert; 016 017import eu.cqse.check.framework.scanner.ELanguage; 018import eu.cqse.check.framework.scanner.ETokenType; 019import eu.cqse.check.framework.scanner.IToken; 020import eu.cqse.check.framework.scanner.Token; 021 022/** 023 * Splits a token stream into multiple parts. 024 */ 025public class TokenStreamSplitter { 026 027 /** The sentinel token to use when splitting nested structures. */ 028 public static final IToken SENTINEL_TOKEN = new SentinelToken(); 029 030 /** The split parts. */ 031 private List<List<IToken>> tokenStreams = new ArrayList<>(); 032 033 /** Constructor. */ 034 public TokenStreamSplitter(List<IToken> tokens) { 035 tokenStreams.add(tokens); 036 } 037 038 /** Returns the split streams. */ 039 public List<List<IToken>> getTokenStreams() { 040 return tokenStreams; 041 } 042 043 /** 044 * Splits all streams at the given open and close tokens, e.g. parentheses. The 045 * part inside the parentheses will become one new stream and the rest of the 046 * original stream as well. In the outside part, the inside part is replaced by 047 * the {@link #SENTINEL_TOKEN} token. 048 * 049 * For example: <code>a ( b ) c</code> would be split into 050 * <code>a ( sentinel ) c</code> and <code>b</code>. 051 * 052 * Outer parts are guaranteed to be stored before inner parts in the 053 * {@link #tokenStreams} list. 054 */ 055 public void splitNested(ETokenType openToken, ETokenType closeToken) { 056 List<List<IToken>> splitStreams = new ArrayList<>(); 057 for (List<IToken> tokens : tokenStreams) { 058 splitStreams.addAll(split(tokens, openToken, closeToken)); 059 } 060 tokenStreams = splitStreams; 061 } 062 063 /** 064 * Splits a single token list at the open/close tokens by performing a depth 065 * first traversal of the nesting tree created by these tokens. 066 */ 067 private static List<List<IToken>> split(List<IToken> tokens, ETokenType openToken, ETokenType closeToken) { 068 List<List<IToken>> result = new ArrayList<>(); 069 Stack<List<IToken>> splits = new Stack<>(); 070 splits.push(new ArrayList<IToken>()); 071 result.add(splits.peek()); 072 073 for (IToken token : tokens) { 074 ETokenType tokenType = token.getType(); 075 if (tokenType == openToken) { 076 splits.peek().add(SENTINEL_TOKEN); 077 splits.push(new ArrayList<IToken>()); 078 result.add(splits.peek()); 079 } else if (tokenType == closeToken) { 080 splits.pop(); 081 if (splits.isEmpty()) { 082 // too many closes (error condition), so just return 083 // original input 084 return Collections.singletonList(tokens); 085 } 086 } else { 087 splits.peek().add(token); 088 } 089 } 090 091 return result; 092 } 093 094 /** The sentinel token. */ 095 private static class SentinelToken extends Token { 096 097 /** default */ 098 private static final long serialVersionUID = 1L; 099 100 /** Constructor. */ 101 public SentinelToken() { 102 super(ETokenType.SENTINEL, 0, 0, "sentinel", "sentinel"); 103 } 104 105 /** {@inheritDoc} */ 106 @Override 107 public ELanguage getLanguage() { 108 CCSMAssert.fail("Operation not supported for sentinel token"); 109 return null; 110 } 111 112 /** {@inheritDoc} */ 113 @Override 114 public IToken newToken(ETokenType type, int offset, int lineNumber, String text, String originId) { 115 CCSMAssert.fail("Operation not supported for sentinel token"); 116 return null; 117 } 118 119 } 120 121}