001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 The ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package org.conqat.lib.commons.assessment;
018
019import java.security.MessageDigest;
020import java.util.List;
021import java.util.regex.Matcher;
022import java.util.regex.Pattern;
023
024import org.conqat.lib.commons.digest.Digester;
025import org.conqat.lib.commons.enums.EnumUtils;
026import org.conqat.lib.commons.string.StringUtils;
027
028/**
029 * A rating is essentially an {@link ETrafficLightColor} assigned to a file. The
030 * rating can be stored in the file or externally. To protect the stored color,
031 * it is protected by a hash sum over the file's content, i.e. if the file
032 * changes this will be detected and result in a RED rating. There are some
033 * exclusions when calculating the check sum to make it more stable with respect
034 * to a versioning system or encoding problems.
035 * <p>
036 * While the typical application is to files (and their content), the actual
037 * implementation has no notion of a file and works on arbitrary strings.
038 * <p>
039 * A note on compatibility: Over time there were different versions of the
040 * rating algorithm.
041 * <ul>
042 * <li>The earliest version used no check sums, but rather compared to the
043 * CVS/SVN revision stored in the file. This is no longer supported.</li>
044 * <li>Another previous version used the tag '@levd.rating' and did not include
045 * the color itself in the calculation of the hash (i.e. the color coud be
046 * changed). Ratings created with this version can still be read.</li>
047 * <li>The current version uses the tag '@ConQAT.Rating' and includes the color
048 * in the hash code calculation. Ratings for this version can be read and
049 * written. Migration to this version is supported.</li>
050 * </ul>
051 */
052public class Rating {
053
054        /** The rating tag used. */
055        public static final String RATING_TAG = "@ConQAT.Rating";
056
057        /** The old rating tag used for backwards compatibility. */
058        private static final String OLD_RATING_TAG = "@levd.rating";
059
060        /** Regular expression that matches the colors */
061        /* package */static final String COLORS_REGEX = StringUtils.concat(ETrafficLightColor.values(), "|");
062
063        /** Pattern used to extract the rating. */
064        private static final Pattern RATING_PATTERN = Pattern.compile(
065                        "(" + RATING_TAG + "|" + OLD_RATING_TAG + ") +(" + COLORS_REGEX + ") +Hash:? *([a-fA-F0-9]*)",
066                        Pattern.CASE_INSENSITIVE);
067
068        /**
069         * Pattern used to determine whether hash calculation should be restarted
070         * (see {@link #updateHash(String, MessageDigest)}). Currently this matches
071         * Java package declarations to exclude the file header.
072         */
073        private static final Pattern RESET_PATTERN = Pattern.compile("^package .*;$");
074
075        /** The rating stored in the file. */
076        private ETrafficLightColor storedRating;
077
078        /**
079         * Stores whether this uses the {@link #OLD_RATING_TAG}. If so, the color is
080         * not included in the hash calculation.
081         */
082        private boolean oldStyleRating = false;
083
084        /** The stored hash value. */
085        private String storedHash;
086
087        /** The expected hash value. */
088        private String expectedHash;
089
090        /** Constructor. */
091        public Rating(String content) {
092                this(content, null);
093        }
094
095        /**
096         * Constructor.
097         * 
098         * @param forcedRating
099         *            if this is non-null, the rating of the file is overwritten by
100         *            this color. This is useful to calculate the expected hash
101         *            value for a given (new) color.
102         */
103        public Rating(String content, ETrafficLightColor forcedRating) {
104                this(StringUtils.splitLinesAsList(content), forcedRating);
105        }
106
107        /** Constructor. */
108        public Rating(List<String> lines) {
109                this(lines, null);
110        }
111
112        /**
113         * Constructor.
114         * 
115         * @param forcedRating
116         *            if this is non-null, the rating of the file is overwritten by
117         *            this color. This is useful to calculate the expected hash
118         *            value for a given (new) color.
119         */
120        public Rating(List<String> lines, ETrafficLightColor forcedRating) {
121                MessageDigest md5 = Digester.getMD5();
122                for (String line : lines) {
123                        // ignore line containing rating
124                        if (storedRating == null && checkRating(line)) {
125                                continue;
126                        }
127
128                        updateHash(line, md5);
129                }
130
131                if (forcedRating != null) {
132                        storedRating = forcedRating;
133                        oldStyleRating = false;
134                }
135
136                if (!oldStyleRating && storedRating != null) {
137                        md5.update(StringUtils.stringToBytes(storedRating.name()));
138                }
139
140                expectedHash = StringUtils.encodeAsHex(md5.digest());
141        }
142
143        /**
144         * Checks whether the line contains a rating and updates the fields
145         * accordingly.
146         * 
147         * @return true if a rating was found.
148         */
149        private boolean checkRating(String line) {
150                Matcher ratingMatcher = RATING_PATTERN.matcher(line);
151                if (!ratingMatcher.find()) {
152                        return false;
153                }
154
155                oldStyleRating = ratingMatcher.group(1).equalsIgnoreCase(OLD_RATING_TAG);
156                storedRating = EnumUtils.valueOf(ETrafficLightColor.class, ratingMatcher.group(2));
157                storedHash = ratingMatcher.group(3);
158                return true;
159        }
160
161        /**
162         * Updates the hash using the following rules:
163         * 
164         * <ul>
165         * <li>Only characters in the US-ASCII range which are not control
166         * characters (including whitespace) are considered. Thus the hash value is
167         * invariant about changes in whitespace or the use of different
168         * encodings.</li>
169         * <li>Lines containing a rating tag (new or old) are ignored. This is for
170         * backwards compatibility, but also allows to be tolerant when managing
171         * multiple ratings in a file.</li>
172         * <li>Lines matching the {@link #RESET_PATTERN} cause the hash calculation
173         * to restart, effectively ignoring everything encountered so far.</li>
174         * <li>Everything between dollar sign is ignored, as these are often contain
175         * information updated by the version management system.</li>
176         * </ul>
177         * 
178         */
179        private static void updateHash(String line, MessageDigest md5) {
180                if (line.contains(RATING_TAG) || line.contains(OLD_RATING_TAG)) {
181                        return;
182                }
183
184                if (RESET_PATTERN.matcher(line).matches()) {
185                        md5.reset();
186                        return;
187                }
188
189                boolean inDollar = false;
190                for (int i = 0; i < line.length(); ++i) {
191                        int c = line.charAt(i);
192                        if (c == '$') {
193                                inDollar = !inDollar;
194                        } else if (!inDollar && c > 32 && c < 128) {
195                                // this cast is ok, as we checked before
196                                md5.update((byte) c);
197                        }
198                }
199        }
200
201        /**
202         * Returns the rating stored in the file (which may be null if no old rating
203         * was found).
204         */
205        public ETrafficLightColor getStoredRating() {
206                return storedRating;
207        }
208
209        /**
210         * Returns the actual rating determined based on the stored value and a
211         * comparison of hash codes.
212         */
213        public ETrafficLightColor getRating() {
214                if (getStoredRating() == null || !isValidRating()) {
215                        return ETrafficLightColor.RED;
216                }
217
218                return getStoredRating();
219        }
220
221        /** Determines based on the attributes whether the rating is valid. */
222        private boolean isValidRating() {
223                return storedHash != null && storedHash.equalsIgnoreCase(expectedHash);
224        }
225
226        /** Returns the expected hash code as a string. */
227        public String getExpectedHashString() {
228                return expectedHash;
229        }
230
231        /**
232         * Updates the first found rating tag of a given string with the new color
233         * and returns the new content. If no rating tag is found, null is returned.
234         */
235        public static String updateRating(String content, ETrafficLightColor newColor) {
236                Matcher m = RATING_PATTERN.matcher(content);
237                if (!m.find()) {
238                        return null;
239                }
240
241                String replacement = constructRatingTag(newColor, new Rating(content, newColor).getExpectedHashString());
242
243                StringBuffer sb = new StringBuffer();
244                m.appendReplacement(sb, replacement);
245                m.appendTail(sb);
246                return sb.toString();
247        }
248
249        /**
250         * Returns the rating tag starting with {@value #RATING_TAG} and containing
251         * the color and hash values.
252         */
253        public static String constructRatingTag(ETrafficLightColor color, String hash) {
254                return RATING_TAG + " " + color.name() + " Hash: " + hash;
255        }
256}