001 /*--------------------------------------------------------------------------+
002 $Id: XMLUtils.java 26369 2010-03-01 18:28:42Z hummelb $
003 | |
004 | Copyright 2005-2010 Technische Universitaet Muenchen |
005 | |
006 | Licensed under the Apache License, Version 2.0 (the "License"); |
007 | you may not use this file except in compliance with the License. |
008 | You may obtain a copy of the License at |
009 | |
010 | http://www.apache.org/licenses/LICENSE-2.0 |
011 | |
012 | Unless required by applicable law or agreed to in writing, software |
013 | distributed under the License is distributed on an "AS IS" BASIS, |
014 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
015 | See the License for the specific language governing permissions and |
016 | limitations under the License. |
017 +--------------------------------------------------------------------------*/
018 package edu.tum.cs.commons.xml;
019
020 import java.io.File;
021 import java.io.FileInputStream;
022 import java.io.IOException;
023 import java.io.StringWriter;
024 import java.net.MalformedURLException;
025 import java.net.URL;
026 import java.util.ArrayList;
027 import java.util.List;
028
029 import javax.xml.parsers.DocumentBuilder;
030 import javax.xml.parsers.DocumentBuilderFactory;
031 import javax.xml.parsers.ParserConfigurationException;
032 import javax.xml.parsers.SAXParser;
033 import javax.xml.parsers.SAXParserFactory;
034 import javax.xml.transform.Transformer;
035 import javax.xml.transform.TransformerException;
036 import javax.xml.transform.TransformerFactory;
037 import javax.xml.transform.dom.DOMSource;
038 import javax.xml.transform.stream.StreamResult;
039 import javax.xml.transform.stream.StreamSource;
040
041 import org.w3c.dom.Document;
042 import org.w3c.dom.Element;
043 import org.w3c.dom.Node;
044 import org.w3c.dom.NodeList;
045 import org.xml.sax.ErrorHandler;
046 import org.xml.sax.InputSource;
047 import org.xml.sax.SAXException;
048 import org.xml.sax.SAXParseException;
049 import org.xml.sax.helpers.DefaultHandler;
050
051 import edu.tum.cs.commons.assertion.CCSMPre;
052 import edu.tum.cs.commons.string.StringUtils;
053
054 /**
055 * Collection of utility methods for XML.
056 *
057 * @author Florian Deissenboeck
058 * @author $Author: hummelb $
059 * @version $Rev: 26369 $
060 * @levd.rating GREEN Hash: 3E4CDEC5A0EAAC290A6EDF7279A9C811
061 */
062 public class XMLUtils {
063
064 /** Identifier for schema source. */
065 private static final String ATTRIBUTE_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource";
066
067 /** Schema URL */
068 private static final String SCHEMA_URL = "http://www.w3.org/2001/XMLSchema";
069
070 /** Identifier for schema language. */
071 private static final String ATTRIBUTE_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
072
073 /**
074 * Parse a file without validation.
075 *
076 * @param file
077 * the file to parse.
078 * @return the DOM document.
079 *
080 * @throws SAXException
081 * if a parsing exception occurs, i.e. if the file is not
082 * well-formed.
083 * @throws IOException
084 * if an IO exception occurs.
085 */
086 public static Document parse(File file) throws SAXException, IOException {
087 return createSchemaUnawareParser().parse(file);
088 }
089
090 /**
091 * Parse an input source without validation.
092 *
093 * @param input
094 * the input source to parse
095 * @return the DOM document.
096 *
097 * @throws SAXException
098 * if a parsing exception occurs, i.e. if the file is not
099 * well-formed.
100 * @throws IOException
101 * if an IO exception occurs.
102 */
103 public static Document parse(InputSource input) throws SAXException,
104 IOException {
105
106 return createSchemaUnawareParser().parse(input);
107 }
108
109 /**
110 * Parse an input source using SAX without validation.
111 *
112 *
113 * @throws SAXException
114 * if a parsing exception occurs, i.e. if the file is not
115 * well-formed.
116 * @throws IOException
117 * if an IO exception occurs.
118 */
119 public static void parseSAX(File file, DefaultHandler handler)
120 throws SAXException, IOException {
121 createSchemaUnawareSAXParser().parse(file, handler);
122 }
123
124 /**
125 * Parse an input source using SAX without validation.
126 *
127 *
128 * @throws SAXException
129 * if a parsing exception occurs, i.e. if the file is not
130 * well-formed.
131 * @throws IOException
132 * if an IO exception occurs.
133 */
134 public static void parseSAX(InputSource input, DefaultHandler handler)
135 throws SAXException, IOException {
136 createSchemaUnawareSAXParser().parse(input, handler);
137 }
138
139 /**
140 * Parse and validate file using schema. This implements a custom error
141 * handler to avoid different behaviour between the JAXP implementations
142 * shipping with Java 1.5 and Java 1.6.
143 *
144 * @param file
145 * the file to parse.
146 * @param schemaURL
147 * URL point to schema, may not be null
148 * @return the DOM document.
149 *
150 * @throws SAXException
151 * if a parsing exception occurs, i.e. if the file is not
152 * well-formed or not valid
153 * @throws IOException
154 * if an IO exception occurs.
155 */
156 public static Document parse(File file, URL schemaURL) throws SAXException,
157 IOException {
158
159 FileInputStream stream = new FileInputStream(file);
160 try {
161 return parse(new InputSource(stream), schemaURL);
162 } finally {
163 stream.close();
164 }
165 }
166
167 /**
168 * Parse and validate file using schema. This implements a custom error
169 * handler to avoid different behaviour between the JAXP implementations
170 * shipping with Java 1.5 and Java 1.6.
171 *
172 * @param input
173 * the input to parse.
174 * @param schemaURL
175 * URL point to schema, may not be null
176 * @return the DOM document.
177 *
178 * @throws SAXException
179 * if a parsing exception occurs, i.e. if the file is not
180 * well-formed or not valid
181 * @throws IOException
182 * if an IO exception occurs.
183 */
184 public static Document parse(InputSource input, URL schemaURL)
185 throws SAXException, IOException {
186
187 CCSMPre.isTrue(schemaURL != null, "Schema URL may not be null!");
188
189 DocumentBuilder parser = createSchemaAwareParser(schemaURL);
190
191 XMLErrorHandler errorHandler = new XMLErrorHandler();
192 parser.setErrorHandler(errorHandler);
193 Document document = parser.parse(input);
194
195 if (errorHandler.exception != null) {
196 throw errorHandler.exception;
197 }
198
199 return document;
200 }
201
202 /**
203 * Parse and validate file using SAX and schema.
204 *
205 * @param file
206 * the file to parse.
207 * @param schemaURL
208 * URL point to schema, may not be null
209 *
210 * @throws SAXException
211 * if a parsing exception occurs, i.e. if the file is not
212 * well-formed or not valid
213 * @throws IOException
214 * if an IO exception occurs.
215 */
216 public static void parseSAX(File file, URL schemaURL, DefaultHandler handler)
217 throws SAXException, IOException {
218
219 FileInputStream stream = new FileInputStream(file);
220 try {
221 parseSAX(new InputSource(stream), schemaURL, handler);
222 } finally {
223 stream.close();
224 }
225 }
226
227 /**
228 * Parse and validate file using SAX and schema.
229 *
230 * @param input
231 * the input to parse.
232 * @param schemaURL
233 * URL point to schema, may not be null
234 *
235 * @throws SAXException
236 * if a parsing exception occurs, i.e. if the file is not
237 * well-formed or not valid
238 * @throws IOException
239 * if an IO exception occurs.
240 */
241 public static void parseSAX(InputSource input, URL schemaURL,
242 DefaultHandler handler) throws SAXException, IOException {
243
244 CCSMPre.isTrue(schemaURL != null, "Schema URL may not be null!");
245 createSchemaAwareSAXParser(schemaURL).parse(input, handler);
246 }
247
248 /** Creates a schema-unaware XML parser */
249 private static DocumentBuilder createSchemaUnawareParser() {
250
251 try {
252 return createNamespaceAwareDocumentBuilderFactory()
253 .newDocumentBuilder();
254 } catch (ParserConfigurationException e) {
255 throw new IllegalStateException(
256 "No document builder found, probably Java is misconfigured!",
257 e);
258 }
259 }
260
261 /** Creates a schema-unaware SAX parser */
262 private static SAXParser createSchemaUnawareSAXParser() throws SAXException {
263 try {
264 return createNamespaceAwareSAXParserFactory().newSAXParser();
265 } catch (ParserConfigurationException e) {
266 throw new IllegalStateException(
267 "No SAX parser found, probably Java is misconfigured!", e);
268 }
269 }
270
271 /** Creates a schema-aware XML parser */
272 private static DocumentBuilder createSchemaAwareParser(URL schemaURL) {
273 DocumentBuilderFactory dbf = createNamespaceAwareDocumentBuilderFactory();
274 dbf.setValidating(true);
275 dbf.setAttribute(ATTRIBUTE_SCHEMA_LANGUAGE, SCHEMA_URL);
276 dbf.setAttribute(ATTRIBUTE_SCHEMA_SOURCE, schemaURL.toString());
277
278 try {
279 return dbf.newDocumentBuilder();
280 } catch (ParserConfigurationException e) {
281 throw new IllegalStateException(
282 "No document builder found, probably Java is misconfigured!",
283 e);
284 }
285 }
286
287 /** Creates a schema-aware SAX parser */
288 private static SAXParser createSchemaAwareSAXParser(URL schemaURL)
289 throws SAXException {
290 SAXParserFactory spf = createNamespaceAwareSAXParserFactory();
291 spf.setValidating(true);
292 try {
293 SAXParser parser = spf.newSAXParser();
294 parser.setProperty(ATTRIBUTE_SCHEMA_LANGUAGE, SCHEMA_URL);
295 parser.setProperty(ATTRIBUTE_SCHEMA_SOURCE, schemaURL.toString());
296 return parser;
297 } catch (ParserConfigurationException e) {
298 throw new IllegalStateException(
299 "No SAX parser found, probably Java is misconfigured!", e);
300 }
301 }
302
303 /** Creates a namespace-aware {@link DocumentBuilderFactory} */
304 private static DocumentBuilderFactory createNamespaceAwareDocumentBuilderFactory() {
305 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
306 dbf.setNamespaceAware(true);
307
308 return dbf;
309 }
310
311 /** Creates a namespace-aware {@link SAXParserFactory} */
312 private static SAXParserFactory createNamespaceAwareSAXParserFactory() {
313 SAXParserFactory spf = SAXParserFactory.newInstance();
314 spf.setNamespaceAware(true);
315 return spf;
316 }
317
318 /**
319 * Same as {@link #parse(File, URL)} but with schema file.
320 *
321 * @throws IllegalArgumentException
322 * if the schema file could not be converted to an URL
323 */
324 public static Document parse(File file, File schema) throws SAXException,
325 IOException {
326 try {
327 return parse(file, schema.toURI().toURL());
328 } catch (MalformedURLException e) {
329 throw new IllegalArgumentException(
330 "Schema file could not be converted to URL: " + e);
331 }
332 }
333
334 /**
335 * Returns a string representation of the given XML document, which is
336 * "pretty printed", i.e. the tags are indented.
337 */
338 public static String prettyPrint(Document doc) throws TransformerException {
339 URL url = XMLUtils.class.getResource("pretty.xsl");
340 StreamSource xslSource = new StreamSource(url.toExternalForm());
341 Transformer transformer = TransformerFactory.newInstance()
342 .newTransformer(xslSource);
343
344 DOMSource source = new DOMSource(doc);
345 StringWriter writer = new StringWriter();
346 StreamResult result = new StreamResult(writer);
347 transformer.transform(source, result);
348 return StringUtils.normalizeLineBreaks(writer.toString());
349 }
350
351 /**
352 * Determines the index (starting at 0) of the given element relative to
353 * other element nodes for the same parent.
354 */
355 public static int getElementPosition(Element element) {
356 int num = -1;
357 Node node = element;
358 while (node != null) {
359 if (node.getNodeType() == Node.ELEMENT_NODE) {
360 ++num;
361 }
362 node = node.getPreviousSibling();
363 }
364 return num;
365 }
366
367 /**
368 * Returns all children of the given element which are element named as
369 * specified.
370 */
371 public static List<Element> getNamedChildren(Element element,
372 String elementNames) {
373 List<Element> result = new ArrayList<Element>();
374 NodeList children = element.getChildNodes();
375 for (int i = 0; i < children.getLength(); ++i) {
376 Node node = children.item(i);
377 if (node.getNodeType() == Node.ELEMENT_NODE
378 && node.getNodeName().equals(elementNames)) {
379 result.add((Element) node);
380 }
381 }
382 return result;
383 }
384
385 /**
386 * Returns the first child of the given element which is an element named as
387 * specified. Returns null if none are found.
388 */
389 public static Element getNamedChild(Element element, String name) {
390 List<Element> children = XMLUtils.getNamedChildren(element, name);
391 if (children.size() > 0) {
392 return children.get(0);
393 }
394 return null;
395 }
396
397 /**
398 * Get the text content of the given element's first child that is an
399 * element named as specified. If none is found, the empty string is
400 * returned.
401 */
402 public static String getNamedChildContent(Element parent, String name) {
403 Element element = XMLUtils.getNamedChild(parent, name);
404 if (element == null) {
405 return StringUtils.EMPTY_STRING;
406 }
407 return element.getTextContent();
408 }
409
410 /**
411 * Extracts all ElementNodes from a NodeList and returns the result as a
412 * list.
413 *
414 * @param nodeList
415 * the NodeList to be searched for ElementNodes.
416 * @return an array containing all ElementNodes stored in the given node
417 * list or null if the input has been null.
418 */
419 public static List<Element> elementNodes(NodeList nodeList) {
420 if (nodeList == null) {
421 return null;
422 }
423 List<Element> result = new ArrayList<Element>();
424 int len = nodeList.getLength();
425 for (int i = 0; i < len; ++i) {
426 Node node = nodeList.item(i);
427 if (node.getNodeType() == Node.ELEMENT_NODE) {
428 result.add((Element) node);
429 }
430 }
431 return result;
432 }
433
434 /**
435 * Get all leaf elements of an XML tree rooted at an element
436 *
437 * @param root
438 * The root element
439 * @return List of all leaf elements
440 */
441 public static List<Element> leafElementNodes(Element root) {
442 List<Element> leafElementNodes = new ArrayList<Element>();
443 leafElementNodes(root, leafElementNodes);
444 return leafElementNodes;
445 }
446
447 /** Add all leaf element nodes of an XML tree rooted at an element to a list */
448 private static void leafElementNodes(Element root,
449 List<Element> leafElementNodes) {
450 List<Element> children = XMLUtils.elementNodes(root.getChildNodes());
451 if (children.isEmpty()) {
452 leafElementNodes.add(root);
453 } else {
454 for (Element child : children) {
455 leafElementNodes(child, leafElementNodes);
456 }
457 }
458 }
459
460 /** Creates an empty XML document. */
461 public static Document createEmptyDocument() {
462 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
463 DocumentBuilder builder;
464 try {
465 builder = factory.newDocumentBuilder();
466 } catch (ParserConfigurationException e) {
467 throw new IllegalStateException(
468 "No document builder found, probably Java is misconfigured!",
469 e);
470 }
471 return builder.newDocument();
472 }
473
474 /**
475 * Simple error handler for handling validation errors. This handler stores
476 * the first problem raised during parsing.
477 */
478 private static class XMLErrorHandler implements ErrorHandler {
479
480 /**
481 * The stored exception. Value unequal <code>null</code> signals a
482 * validation problem.
483 */
484 private SAXParseException exception;
485
486 /** {@inheritDoc} */
487 public void error(SAXParseException exception) {
488 if (this.exception == null) {
489 this.exception = exception;
490 }
491 }
492
493 /** {@inheritDoc} */
494 public void fatalError(SAXParseException exception) {
495 error(exception);
496 }
497
498 /** {@inheritDoc} */
499 public void warning(SAXParseException exception) {
500 System.out.println(exception);
501 // ignore
502 }
503 }
504 }