001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 The ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package org.conqat.lib.commons.string; 018 019import java.util.Iterator; 020 021/** 022 * This class is used to split a string in lines using an {@link Iterator}. The 023 * default setting is to not return trailing empty lines. Use 024 * {@link #setIncludeTrailingEmptyLine(boolean)} to include them. 025 * <p> 026 * <b>Note:</b> According to tests I performed this is the fastest method to 027 * split a string. It is about nine times faster than the regex-bases split 028 * with: 029 * 030 * <pre> 031 * Pattern pattern = Pattern.compile("\r\n|\r|\n"); 032 * pattern.split(content); 033 * </pre> 034 */ 035public class LineSplitter implements Iterator<String>, Iterable<String> { 036 037 /** Unicode Character 'NEXT LINE (NEL)' */ 038 private static final char UNICODE_NEL = '\u0085'; 039 040 /** The string content to split. */ 041 private String content; 042 043 /** Starting index. */ 044 private int startIndex; 045 046 /** Flag for returning the trailing empty line. */ 047 private boolean includeTrailingEmptyLine = false; 048 049 /** 050 * Constructor for empty content. 051 */ 052 public LineSplitter() { 053 // Does nothing as content is empty. 054 } 055 056 /** 057 * Constructor which calls {@link #setContent(String)}. 058 */ 059 public LineSplitter(String content) { 060 setContent(content); 061 } 062 063 /** 064 * Set the string to split and reset the iterator. 065 * 066 * @param content 067 * The string to split. If string is <code>null</code> or the empty 068 * string, {@link #next()} will return <code>null</code>. 069 * 070 */ 071 public void setContent(String content) { 072 this.content = content; 073 startIndex = 0; 074 } 075 076 /** {@inheritDoc} */ 077 @Override 078 public boolean hasNext() { 079 if (content == null) { 080 return false; 081 } 082 083 if (includeTrailingEmptyLine && isTrailingEmptyLine()) { 084 return true; 085 } 086 087 if (startIndex >= content.length()) { 088 // delete reference to array to allow garbage collection 089 content = null; 090 return false; 091 } 092 093 return true; 094 } 095 096 /** 097 * Obtain next identified line. 098 * 099 * @return <code>null</code> if all lines were returned. On returning the last 100 * line all references to the input string are deleted. So it is free 101 * for garbage collection. 102 */ 103 @Override 104 public String next() { 105 if (!hasNext()) { 106 return null; 107 } 108 109 if (includeTrailingEmptyLine && isTrailingEmptyLine()) { 110 startIndex++; // shift index, so it is beyond the content length 111 return StringUtils.EMPTY_STRING; 112 } 113 114 // length to skip may vary due to the length of the line separator (\r, 115 // \n or \r\n) 116 int skip = 0; 117 118 int endIndex = startIndex; 119 120 while (skip == 0 && endIndex < content.length()) { 121 char c = content.charAt(endIndex); 122 123 endIndex++; 124 125 // Skip newlines. 126 if (c == '\n' || c == UNICODE_NEL) { 127 skip = 1; 128 } 129 130 // Skip newlines. 131 if (c == '\r') { 132 skip = 1; 133 if (endIndex < content.length() && content.charAt(endIndex) == '\n') { 134 skip = 2; 135 endIndex++; 136 } 137 } 138 } 139 140 String result = content.substring(startIndex, endIndex - skip); 141 142 startIndex = endIndex; 143 return result; 144 } 145 146 /** 147 * @return <code>true</code> if the iterator is at the end of the string content 148 * and the content contains an empty trailing line. 149 */ 150 private boolean isTrailingEmptyLine() { 151 if (startIndex > 0 && startIndex == content.length()) { 152 char lastChar = content.charAt(startIndex - 1); 153 return lastChar == '\n' || lastChar == '\r'; 154 } 155 return false; 156 } 157 158 /** 159 * Enables returning of trailing empty lines during the iteration. Default is 160 * <code>false</code> 161 * <p> 162 * If <code>true</code> the string <code>Foo\nBar\n</code>will yield three items 163 * (Foo, Bar and the empty string), otherwise two items (Foo and Bar). 164 */ 165 public void setIncludeTrailingEmptyLine(boolean includeTrailingEmptyLine) { 166 this.includeTrailingEmptyLine = includeTrailingEmptyLine; 167 } 168 169 /** {@inheritDoc} */ 170 @Override 171 public void remove() { 172 throw new UnsupportedOperationException(); 173 } 174 175 /** {@inheritDoc} */ 176 @Override 177 public Iterator<String> iterator() { 178 return this; 179 } 180 181}