001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 the ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package org.conqat.lib.commons.xml; 018 019import java.util.ArrayDeque; 020import java.util.Deque; 021import java.util.HashMap; 022import java.util.Map; 023import java.util.Set; 024 025import org.conqat.lib.commons.enums.EnumUtils; 026import org.conqat.lib.commons.xml.ElementEnumSaxHandler.IElementEnum; 027import org.xml.sax.Attributes; 028import org.xml.sax.SAXException; 029import org.xml.sax.helpers.DefaultHandler; 030 031/** 032 * SAX XML Parser {@link DefaultHandler} implementation that matches XML 033 * elements to enumeration values using the enumeration name. The enumeration 034 * has to implement the interface {@link IElementEnum} and thus defines a state 035 * graph of an XML element sequence for parsing the document. 036 * <p> 037 * The parser offers registration of {@link ElementHandler}s for each of the 038 * enumeration elements that will be called for the start and end of the 039 * element. If capturing the inner text of elements is desired a 040 * {@link TextElementHandler} has to be registered. 041 * <p> 042 * Resolving element names to enum constants is performed using a 043 * {@link JavaConstantResolver} but can be altered using 044 * {@link #setElementResolver(IElementResolver)}. 045 */ 046public class ElementEnumSaxHandler<ELEMENT extends Enum<ELEMENT> & IElementEnum<ELEMENT>> extends DefaultHandler { 047 048 /** 049 * The currently parsed element or null if {@link #hasReachedStartElement} is 050 * <code>false</code>. 051 */ 052 private ELEMENT element; 053 054 /** Flag that indicates if the start element has been reached yet. */ 055 private boolean hasReachedStartElement = false; 056 057 /** 058 * The map of registered handler callbacks. 059 */ 060 private final Map<ELEMENT, ElementHandler<ELEMENT>> handlers = new HashMap<>(); 061 062 /** 063 * Stack of opened (and handled) elements. Will be popped from the stack on 064 * close. 065 */ 066 private final Deque<ELEMENT> openedElements = new ArrayDeque<>(); 067 068 /** 069 * Stack of opened elements a {@link TextElementHandler} is registered for. Text 070 * is always captured for the top element of the stack. 071 */ 072 private final Deque<StringBuffer> textBuffers = new ArrayDeque<>(); 073 074 /** 075 * Resolver from XML element names to element enum names. <code>null</code> if 076 * no resolver is used. The default resolver is a {@link JavaConstantResolver}. 077 */ 078 private IElementResolver resolver = new JavaConstantResolver(); 079 080 /** Constructor. */ 081 public ElementEnumSaxHandler(ELEMENT initialElement) { 082 this.element = initialElement; 083 } 084 085 /** Sets (or overrides) the element handler for a given element. */ 086 public void setElementHandler(ELEMENT element, ElementHandler<ELEMENT> handler) { 087 handlers.put(element, handler); 088 } 089 090 /** @see #resolver */ 091 public void setElementResolver(IElementResolver resolver) { 092 this.resolver = resolver; 093 } 094 095 /** {@inheritDoc} */ 096 @Override 097 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { 098 099 ELEMENT nextElement = getEnumForElement(localName); 100 if (nextElement == null) { 101 return; 102 } 103 104 if (!hasReachedStartElement) { 105 if (element != nextElement) { 106 return; 107 } 108 109 hasReachedStartElement = true; 110 111 } else if (!element.nextElements().contains(nextElement)) { 112 return; 113 } 114 115 element = nextElement; 116 openedElements.push(element); 117 118 ElementHandler<ELEMENT> handler = handlers.get(element); 119 if (handlers.containsKey(element)) { 120 handler.onStartElement(element, attributes); 121 122 if (handler instanceof TextElementHandler) { 123 textBuffers.push(new StringBuffer()); 124 } 125 } 126 } 127 128 /** {@inheritDoc} */ 129 @Override 130 public void characters(char[] ch, int start, int length) { 131 StringBuffer buffer = textBuffers.peek(); 132 if (buffer != null) { 133 buffer.append(ch, start, length); 134 } 135 } 136 137 /** {@inheritDoc} */ 138 @Override 139 public void endElement(String uri, String localName, String qName) throws SAXException { 140 ELEMENT endedElement = getEnumForElement(localName); 141 142 if (endedElement == null || endedElement != openedElements.peek()) { 143 return; 144 } 145 openedElements.pop(); 146 147 ElementHandler<ELEMENT> handler = handlers.get(endedElement); 148 if (handler == null) { 149 return; 150 } 151 152 if (handler instanceof TextElementHandler) { 153 TextElementHandler<ELEMENT> textHandler = (TextElementHandler<ELEMENT>) handler; 154 155 StringBuffer buffer = textBuffers.pop(); 156 textHandler.onText(endedElement, buffer.toString()); 157 } 158 159 handler.onEndElement(endedElement); 160 } 161 162 /** 163 * Returns the enum value for a given element name or <code>null</code> if no 164 * element with this name exists. Respects normalization rules provided by 165 * {@link #resolver}. 166 */ 167 @SuppressWarnings("unchecked") 168 private ELEMENT getEnumForElement(String elementName) throws AssertionError { 169 if (resolver != null) { 170 elementName = resolver.resolve(elementName); 171 } 172 173 Class<ELEMENT> enumClass = null; 174 if (element.getClass().isEnum()) { 175 // Enum is a top-level enumeration 176 enumClass = (Class<ELEMENT>) element.getClass(); 177 } else { 178 // Enum constant that overrides methods from the enum class 179 enumClass = (Class<ELEMENT>) element.getClass().getSuperclass(); 180 } 181 182 return EnumUtils.valueOf(enumClass, elementName); 183 } 184 185 /** 186 * Interface that defines methods for getting the next elements for parsing the 187 * document. 188 */ 189 public static interface IElementEnum<E extends Enum<E>> { 190 /** 191 * Set of elements that are expected to be parsed after this element. 192 */ 193 public Set<E> nextElements(); 194 } 195 196 /** Handler for start and element callbacks. */ 197 @SuppressWarnings("unused") 198 public static class ElementHandler<ELEMENT> { 199 200 /** 201 * Being called each time the handled element is opened. 202 */ 203 public void onStartElement(ELEMENT element, Attributes attributes) throws SAXException { 204 // stub 205 } 206 207 /** 208 * Being called each time the handled element is closed. 209 */ 210 public void onEndElement(ELEMENT element) throws SAXException { 211 // stub 212 } 213 } 214 215 /** 216 * Handler for text callbacks in addition to the callbacks defined in 217 * {@link org.conqat.lib.commons.xml.ElementEnumSaxHandler.ElementHandler}. 218 * <p> 219 * As capturing text may reduce performance, please consider using 220 * {@link org.conqat.lib.commons.xml.ElementEnumSaxHandler.ElementHandler} if 221 * not interested in text. 222 */ 223 @SuppressWarnings("unused") 224 public static class TextElementHandler<ELEMENT> extends ElementHandler<ELEMENT> { 225 226 /** 227 * Will be called exactly once for an element with all the text that has 228 * occurred in the XML between the start element and end element callback. Text 229 * of descendant elements is also captured unless a 230 * {@link org.conqat.lib.commons.xml.ElementEnumSaxHandler.TextElementHandler} 231 * for these elements is registered. 232 */ 233 public void onText(ELEMENT element, String text) throws SAXException { 234 // stub 235 } 236 } 237 238 /** Resolver from XML element names to enumeration names. */ 239 public static interface IElementResolver { 240 /** 241 * Takes the name of an XML element and resolves it to the name of a possible 242 * enumeration value. 243 */ 244 public String resolve(String elementName); 245 } 246 247 /** 248 * Resolver that normalizes XML element names to valid Java identifiers. 249 */ 250 public static class JavaConstantResolver implements IElementResolver { 251 252 /** 253 * {@inheritDoc} 254 * 255 * Converts the given string to a valid Java identifier by replacing all non 256 * word characters by underscores and converting the string to uppercase. 257 */ 258 @Override 259 public String resolve(String elementName) { 260 return elementName.toUpperCase().replaceAll("\\W+", "_"); 261 } 262 } 263}