001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package org.conqat.lib.commons.string;
018
019/**
020 * Enumeration of newline types. This deals only with the most common subset of
021 * new line types. For more details see http://en.wikipedia.org/wiki/Newline.
022 */
023public enum ENewline {
024
025        /** Unix (and Linux) are using LF. */
026        UNIX("\n"),
027
028        /** Windows uses CR+LF. */
029        WINDOWS("\r\n"),
030
031        /** MAC up to version 9 uses CR. MacOS X uses {@link #UNIX} newlines. */
032        MAC("\r");
033
034        /** The character(s) used to represent newline. */
035        private final String newline;
036
037        /** Constructor. */
038        private ENewline(String newline) {
039                this.newline = newline;
040        }
041
042        /** Returns character(s) used to represent newline. */
043        public String getNewline() {
044                return newline;
045        }
046
047        /**
048         * Converts the input string to using the specified line breaks and returns
049         * the result.
050         */
051        public String convertNewlines(String input) {
052                return StringUtils.replaceLineBreaks(input, newline);
053        }
054
055        /**
056         * Attempts to guess the newline style from a string. This performs a simple
057         * majority guess, where CR+lF are simply assumed to be next to each other.
058         * If no line breaks are found, the newline for {@link StringUtils#LINE_SEPARATOR} is
059         * returned.
060         */
061        public static ENewline guessNewline(String string) {
062                int crCount = 0;
063                int lfCount = 0;
064                int crlfCount = 0;
065                boolean previousWasCR = false;
066                for (char c : string.toCharArray()) {
067                        if (c == '\r') {
068                                previousWasCR = true;
069                                crCount += 1;
070                        } else if (c == '\n') {
071                                if (previousWasCR) {
072                                        crlfCount += 1;
073                                        crCount -= 1;
074                                } else {
075                                        lfCount += 1;
076                                }
077                                previousWasCR = false;
078                        } else {
079                                previousWasCR = false;
080                        }
081                }
082
083                // in case of equals, we chose arbitrarily, so using < is ok
084                if (crlfCount > lfCount && crlfCount > crCount) {
085                        return WINDOWS;
086                }
087                if (lfCount > crCount) {
088                        return UNIX;
089                }
090
091                if (crCount == 0) {
092                        // in this case, all are 0
093                        return guessNewline(StringUtils.LINE_SEPARATOR);
094                }
095
096                return MAC;
097        }
098}