001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 The ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package org.conqat.lib.commons.string; 018 019import java.util.Arrays; 020 021import org.conqat.lib.commons.assertion.CCSMAssert; 022import org.conqat.lib.commons.collections.ManagedIntArray; 023 024/** 025 * A class that helps to convert between line numbers and character offsets. The 026 * character offset is zero based while the line number is one based. 027 * <p> 028 * This class works for strings with arbitrary line terminators. 029 * <p> 030 * The implementation works by storing all character offsets of the newline 031 * characters. For multi-character line endings (i.e. CR+LF on windows) the last 032 * offset is stored. For space/performance reasons, these offsets are stored in 033 * a bare array, which is managed by the base class. 034 * <p> 035 * <i>Note:</i> This class extends {@link ManagedIntArray} although delegation 036 * would be more elegant. The problem, however, is, that {@link ManagedIntArray} 037 * does not support delegation. The idea of this class was to provide protected 038 * access to its internals, which only works via inheritance. Actually, this is 039 * not interface inheritance ({@link ManagedIntArray} has no public method, 040 * btw.) but implementation inheritance. 041 */ 042public class LineOffsetConverter extends ManagedIntArray { 043 044 /** Constructor */ 045 public LineOffsetConverter(String s) { 046 char[] chars = s.toCharArray(); 047 for (int i = 0; i < chars.length; ++i) { 048 if (chars[i] == '\n') { 049 addArrayElement(); 050 array[size - 1] = i; 051 } else if (chars[i] == '\r') { 052 if (i + 1 < chars.length && chars[i + 1] == '\n') { 053 // for \r\n just store the position of the \n 054 continue; 055 } 056 addArrayElement(); 057 array[size - 1] = i; 058 } 059 } 060 061 // append implicit '\n' at the end to allow querying start of last line 062 // (to determine size of last line) 063 if (chars.length == 0 || chars[chars.length - 1] != '\n') { 064 addArrayElement(); 065 array[size - 1] = chars.length; 066 } 067 } 068 069 /** Returns the number of lines of the input string. */ 070 public int getLineCount() { 071 return size; 072 } 073 074 /** 075 * Returns the (zero based) offset of the first character of the given line 076 * (starting at 1). 077 * 078 * @throws AssertionError 079 * if the line is not valid for the string. 080 */ 081 public int getOffset(int line) { 082 CCSMAssert.isTrue(isValidLine(line), "This is not a valid line: " + line + " Valid: [0-" + (size + 1) + "]"); 083 084 if (line == 1) { 085 return 0; 086 } 087 088 // first character of line is directly behind newline character 089 return array[line - 2] + 1; 090 } 091 092 /** Returns whether the given line is valid for the converter. */ 093 public boolean isValidLine(int line) { 094 return 1 <= line && line <= size + 1; 095 } 096 097 /** 098 * Returns the (one based) line for the character at the given (zero based) 099 * offset. The newline at the end of a line is counted as a part of the line 100 * (i.e. for the very first newline, we would return 1). If the offset is 101 * larger than the length of the string, the index of the last line is 102 * returned. 103 */ 104 public int getLine(int offset) { 105 CCSMAssert.isTrue(offset >= 0, "Negative offsets not supported!"); 106 107 int index = Arrays.binarySearch(array, 0, size, offset); 108 if (index >= 0) { 109 return index + 1; 110 } 111 112 return Math.min(-index, size); 113 } 114}