001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 The ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package org.conqat.lib.commons.string;
018
019import java.util.Arrays;
020
021import org.conqat.lib.commons.assertion.CCSMAssert;
022import org.conqat.lib.commons.collections.ManagedIntArray;
023
024/**
025 * A class that helps to convert between line numbers and character offsets. The
026 * character offset is zero based while the line number is one based.
027 * <p>
028 * This class works for strings with arbitrary line terminators.
029 * <p>
030 * The implementation works by storing all character offsets of the newline
031 * characters. For multi-character line endings (i.e. CR+LF on windows) the last
032 * offset is stored. For space/performance reasons, these offsets are stored in
033 * a bare array, which is managed by the base class.
034 * <p>
035 * <i>Note:</i> This class extends {@link ManagedIntArray} although delegation
036 * would be more elegant. The problem, however, is, that {@link ManagedIntArray}
037 * does not support delegation. The idea of this class was to provide protected
038 * access to its internals, which only works via inheritance. Actually, this is
039 * not interface inheritance ({@link ManagedIntArray} has no public method,
040 * btw.) but implementation inheritance.
041 */
042public class LineOffsetConverter extends ManagedIntArray {
043
044        /** Constructor */
045        public LineOffsetConverter(String s) {
046                char[] chars = s.toCharArray();
047                for (int i = 0; i < chars.length; ++i) {
048                        if (chars[i] == '\n') {
049                                addArrayElement();
050                                array[size - 1] = i;
051                        } else if (chars[i] == '\r') {
052                                if (i + 1 < chars.length && chars[i + 1] == '\n') {
053                                        // for \r\n just store the position of the \n
054                                        continue;
055                                }
056                                addArrayElement();
057                                array[size - 1] = i;
058                        }
059                }
060
061                // append implicit '\n' at the end to allow querying start of last line
062                // (to determine size of last line)
063                if (chars.length == 0 || chars[chars.length - 1] != '\n') {
064                        addArrayElement();
065                        array[size - 1] = chars.length;
066                }
067        }
068
069        /** Returns the number of lines of the input string. */
070        public int getLineCount() {
071                return size;
072        }
073
074        /**
075         * Returns the (zero based) offset of the first character of the given line
076         * (starting at 1).
077         * 
078         * @throws AssertionError
079         *             if the line is not valid for the string.
080         */
081        public int getOffset(int line) {
082                CCSMAssert.isTrue(isValidLine(line), "This is not a valid line: " + line + " Valid: [0-" + (size + 1) + "]");
083
084                if (line == 1) {
085                        return 0;
086                }
087
088                // first character of line is directly behind newline character
089                return array[line - 2] + 1;
090        }
091
092        /** Returns whether the given line is valid for the converter. */
093        public boolean isValidLine(int line) {
094                return 1 <= line && line <= size + 1;
095        }
096
097        /**
098         * Returns the (one based) line for the character at the given (zero based)
099         * offset. The newline at the end of a line is counted as a part of the line
100         * (i.e. for the very first newline, we would return 1). If the offset is
101         * larger than the length of the string, the index of the last line is
102         * returned.
103         */
104        public int getLine(int offset) {
105                CCSMAssert.isTrue(offset >= 0, "Negative offsets not supported!");
106
107                int index = Arrays.binarySearch(array, 0, size, offset);
108                if (index >= 0) {
109                        return index + 1;
110                }
111
112                return Math.min(-index, size);
113        }
114}