001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package org.conqat.lib.commons.xml;
018
019import java.util.ArrayDeque;
020import java.util.Deque;
021import java.util.HashMap;
022import java.util.Map;
023import java.util.Set;
024
025import org.conqat.lib.commons.enums.EnumUtils;
026import org.conqat.lib.commons.xml.ElementEnumSaxHandler.IElementEnum;
027import org.xml.sax.Attributes;
028import org.xml.sax.SAXException;
029import org.xml.sax.helpers.DefaultHandler;
030
031/**
032 * SAX XML Parser {@link DefaultHandler} implementation that matches XML
033 * elements to enumeration values using the enumeration name. The enumeration
034 * has to implement the interface {@link IElementEnum} and thus defines a state
035 * graph of an XML element sequence for parsing the document.
036 * <p>
037 * The parser offers registration of {@link ElementHandler}s for each of the
038 * enumeration elements that will be called for the start and end of the
039 * element. If capturing the inner text of elements is desired a
040 * {@link TextElementHandler} has to be registered.
041 * <p>
042 * Resolving element names to enum constants is performed using a
043 * {@link JavaConstantResolver} but can be altered using
044 * {@link #setElementResolver(IElementResolver)}.
045 */
046public class ElementEnumSaxHandler<ELEMENT extends Enum<ELEMENT> & IElementEnum<ELEMENT>> extends DefaultHandler {
047
048        /**
049         * The currently parsed element or null if {@link #hasReachedStartElement} is
050         * <code>false</code>.
051         */
052        private ELEMENT element;
053
054        /** Flag that indicates if the start element has been reached yet. */
055        private boolean hasReachedStartElement = false;
056
057        /**
058         * The map of registered handler callbacks.
059         */
060        private final Map<ELEMENT, ElementHandler<ELEMENT>> handlers = new HashMap<>();
061
062        /**
063         * Stack of opened (and handled) elements. Will be popped from the stack on
064         * close.
065         */
066        private final Deque<ELEMENT> openedElements = new ArrayDeque<>();
067
068        /**
069         * Stack of opened elements a {@link TextElementHandler} is registered for. Text
070         * is always captured for the top element of the stack.
071         */
072        private final Deque<StringBuffer> textBuffers = new ArrayDeque<>();
073
074        /**
075         * Resolver from XML element names to element enum names. <code>null</code> if
076         * no resolver is used. The default resolver is a {@link JavaConstantResolver}.
077         */
078        private IElementResolver resolver = new JavaConstantResolver();
079
080        /** Constructor. */
081        public ElementEnumSaxHandler(ELEMENT initialElement) {
082                this.element = initialElement;
083        }
084
085        /** Sets (or overrides) the element handler for a given element. */
086        public void setElementHandler(ELEMENT element, ElementHandler<ELEMENT> handler) {
087                handlers.put(element, handler);
088        }
089
090        /** @see #resolver */
091        public void setElementResolver(IElementResolver resolver) {
092                this.resolver = resolver;
093        }
094
095        /** {@inheritDoc} */
096        @Override
097        public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
098
099                ELEMENT nextElement = getEnumForElement(localName);
100                if (nextElement == null) {
101                        return;
102                }
103
104                if (!hasReachedStartElement) {
105                        if (element != nextElement) {
106                                return;
107                        }
108
109                        hasReachedStartElement = true;
110
111                } else if (!element.nextElements().contains(nextElement)) {
112                        return;
113                }
114
115                element = nextElement;
116                openedElements.push(element);
117
118                ElementHandler<ELEMENT> handler = handlers.get(element);
119                if (handlers.containsKey(element)) {
120                        handler.onStartElement(element, attributes);
121
122                        if (handler instanceof TextElementHandler) {
123                                textBuffers.push(new StringBuffer());
124                        }
125                }
126        }
127
128        /** {@inheritDoc} */
129        @Override
130        public void characters(char[] ch, int start, int length) {
131                StringBuffer buffer = textBuffers.peek();
132                if (buffer != null) {
133                        buffer.append(ch, start, length);
134                }
135        }
136
137        /** {@inheritDoc} */
138        @Override
139        public void endElement(String uri, String localName, String qName) throws SAXException {
140                ELEMENT endedElement = getEnumForElement(localName);
141
142                if (endedElement == null || endedElement != openedElements.peek()) {
143                        return;
144                }
145                openedElements.pop();
146
147                ElementHandler<ELEMENT> handler = handlers.get(endedElement);
148                if (handler == null) {
149                        return;
150                }
151
152                if (handler instanceof TextElementHandler) {
153                        TextElementHandler<ELEMENT> textHandler = (TextElementHandler<ELEMENT>) handler;
154
155                        StringBuffer buffer = textBuffers.pop();
156                        textHandler.onText(endedElement, buffer.toString());
157                }
158
159                handler.onEndElement(endedElement);
160        }
161
162        /**
163         * Returns the enum value for a given element name or <code>null</code> if no
164         * element with this name exists. Respects normalization rules provided by
165         * {@link #resolver}.
166         */
167        @SuppressWarnings("unchecked")
168        private ELEMENT getEnumForElement(String elementName) throws AssertionError {
169                if (resolver != null) {
170                        elementName = resolver.resolve(elementName);
171                }
172
173                Class<ELEMENT> enumClass = null;
174                if (element.getClass().isEnum()) {
175                        // Enum is a top-level enumeration
176                        enumClass = (Class<ELEMENT>) element.getClass();
177                } else {
178                        // Enum constant that overrides methods from the enum class
179                        enumClass = (Class<ELEMENT>) element.getClass().getSuperclass();
180                }
181
182                return EnumUtils.valueOf(enumClass, elementName);
183        }
184
185        /**
186         * Interface that defines methods for getting the next elements for parsing the
187         * document.
188         */
189        public static interface IElementEnum<E extends Enum<E>> {
190                /**
191                 * Set of elements that are expected to be parsed after this element.
192                 */
193                public Set<E> nextElements();
194        }
195
196        /** Handler for start and element callbacks. */
197        @SuppressWarnings("unused")
198        public static class ElementHandler<ELEMENT> {
199
200                /**
201                 * Being called each time the handled element is opened.
202                 */
203                public void onStartElement(ELEMENT element, Attributes attributes) throws SAXException {
204                        // stub
205                }
206
207                /**
208                 * Being called each time the handled element is closed.
209                 */
210                public void onEndElement(ELEMENT element) throws SAXException {
211                        // stub
212                }
213        }
214
215        /**
216         * Handler for text callbacks in addition to the callbacks defined in
217         * {@link org.conqat.lib.commons.xml.ElementEnumSaxHandler.ElementHandler}.
218         * <p>
219         * As capturing text may reduce performance, please consider using
220         * {@link org.conqat.lib.commons.xml.ElementEnumSaxHandler.ElementHandler} if
221         * not interested in text.
222         */
223        @SuppressWarnings("unused")
224        public static class TextElementHandler<ELEMENT> extends ElementHandler<ELEMENT> {
225
226                /**
227                 * Will be called exactly once for an element with all the text that has
228                 * occurred in the XML between the start element and end element callback. Text
229                 * of descendant elements is also captured unless a
230                 * {@link org.conqat.lib.commons.xml.ElementEnumSaxHandler.TextElementHandler}
231                 * for these elements is registered.
232                 */
233                public void onText(ELEMENT element, String text) throws SAXException {
234                        // stub
235                }
236        }
237
238        /** Resolver from XML element names to enumeration names. */
239        public static interface IElementResolver {
240                /**
241                 * Takes the name of an XML element and resolves it to the name of a possible
242                 * enumeration value.
243                 */
244                public String resolve(String elementName);
245        }
246
247        /**
248         * Resolver that normalizes XML element names to valid Java identifiers.
249         */
250        public static class JavaConstantResolver implements IElementResolver {
251
252                /**
253                 * {@inheritDoc}
254                 * 
255                 * Converts the given string to a valid Java identifier by replacing all non
256                 * word characters by underscores and converting the string to uppercase.
257                 */
258                @Override
259                public String resolve(String elementName) {
260                        return elementName.toUpperCase().replaceAll("\\W+", "_");
261                }
262        }
263}