Provisional new XML loader class. The code is obviously not polished (notice
the TODO notes, etc.), but it works, the difficult work is done, and I don't
want to spend time fine-tuning before I know where its permanent home will be.
Please do speak up if you think the new features are unnecessary or object to
adoption for any other reason. Please do read the root post below about the
wider goal for this endeavor.
I would like to add the class below to JME.
This class takes an XML file as input and provides XPaths to a datatype-
specific importer. Example consumers that I have prototyped with are JME XML
and Collada importers. (You can see evidence of this in the JavaDocced
examples). This is one critical piece of the new Importer/Exporter design
which I have proposed earlier in this topic. It encapsulates this one
purpose well, and can be leveraged in the future to simplify XML reading tasks
(importing or otherwise).
The feature list presented in the class JavaDoc.
Getting all of these features to work together took a lot of work. It's
hard to overstate how much XPath can simplify coding and maintenance of
importers. XPath is rarely used with modern apps that need rigorous data
consistency enforcement, simply because it is difficult to get XPath to
work with dynamic namespace validation. These difficulties are discussed
at http://blog.davber.com/2006/09/17/xpath-with-namespaces-in-java/
and elsewhere. Not only does the class provided here solve this problem
once-and-for-all, transparently to the user, but it does so without any
third party products or libraries-- just J2SE. It can also validate our
existing namespace-less JME XML files against their schema.
import java.io.InputStream;
import java.io.FileInputStream;
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Schema;
import javax.xml.XMLConstants;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Node;
import javax.xml.xpath.XPathConstants;
import javax.xml.transform.stream.StreamSource;
import org.xml.sax.helpers.DefaultHandler;
import java.net.URL;
import javax.xml.namespace.NamespaceContext;
import java.util.Iterator;
import java.util.Map;
import java.util.HashMap;
import java.util.Collections;
/**
* A XML Document Factory with the following features:
* <OL>
* <LI>No library requirements beyond Sun J2SE 5 or later
* <LI>XML Schema validation
* <LI>Namespace-less input files (legacy support of *.xml files).
* Still validated against *.xsd file specified as file/url/input stream
* <LI>Automatic nework retrieval of any number of schemas through standard
* schemaLocation settings in the XML file.
* <LI>Use namespace prefix mappings in XML file, but validate against
* user-specified *.xsd file (overriding any schemLocation settings if
* present). This allows to eliminate network dependency if desired.
* <LI>Namespace-capable XPath that works with the features above.
* </OL>
*/
public class ModernDocFactory
extends DefaultHandler implements NamespaceContext {
protected Schema schema = null;
protected DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
protected Map<String, String> prefixToUri = new HashMap<String, String>();
protected Map<String, String> uriToPrefix = new HashMap<String, String>();
// TODO: Consider changing uriToPrefix to a Map<String, List<String>>,
// since it should accommodate mapping URL to multiple prefixes for the
// getPrefixes method below. Note that this only effects XPaths, not
// XML validation.
public void setSchema(Schema schema) { this.schema = schema; }
/*
* The FOLLOWING 3 METHODS ARE IMPLEMENTATION OF NamespaceContext INTERFACE
This is only needed for prefixes used in XPath expressions, not for XML
validation.
For all anticipated use cases, we will only need one namespace prefix
mapping for XPaths.
*/
/**
* @see NamespaceContext.getNamespaceURI(String)
*/
public String getNamespaceURI(String prefix) {
if (prefixToUri.containsKey(prefix)) return prefixToUri.get(prefix);
return XMLConstants.NULL_NS_URI;
}
/**
* @see NamespaceContext.getPrefix(String)
*/
public String getPrefix(String uri) {
if (uriToPrefix.containsKey(uri)) return uriToPrefix.get(uri);
return null;
}
/**
* @see NamespaceContext.getPrefixes(String)
*/
public Iterator<String> getPrefixes(String uri) {
// See TODO above.
String prefix = getPrefix(uri);
if (prefix == null) return null;
return Collections.singletonList(prefix).listIterator();
}
public XPath getPrefixableXPath() {
XPath xpath = XPathFactory.newInstance().newXPath();
xpath.setNamespaceContext(this);
return xpath;
}
public void setPrefixMap(Map<String, String> map) {
prefixToUri.putAll(map);
for (String key : map.keySet()) uriToPrefix.put(map.get(key), key);
}
/**
* This tests a narrow but deep slice of ModernDocFactory functionality.
*
* <P>
* It provides a single namespace mapping for the prefinef prefix "dum"
* (short for "dummy prefix") for use with namespaced XML files (this
* has no effect on non-namespaced XML files).
* More specific mapping than this would be impractical from the
* command-line, but is simple to do programmatically (by supplying a
* String-to-String map).
* </P> <P>
* Example invocations:<PRE><CODE>
* java ModernDocFactory -s file:jme.xsd x triv.xml /com/jme.animation.Bone/localRotation
* java ModernDocFactory -DSHOWTEXT=true -s file:collada_1.4.xsd http://www.collada.org/2005/11/COLLADASchema tst1.xml '/+/+/+/dum:author[1]/text()
* java ModernDocFactory -DSHOWTEXT=true -s http://www.khronos.org/files/collada_schema_1_4 http://www.collada.org/2005/11/COLLADASchematst1.xml '/+/+/+/dum:author[1]/text()
* </CODE></PRE>
* Note that the +'s above should be asterisks.
* It's difficult to show slash-asterisk in JavaDoc.
*/
static public void main(String[] sa) throws Exception {
boolean showText = System.getProperty("SHOWTEXT") != null;
if (sa.length < 3 || (sa.length < 5 && sa[0].equals("-s"))) {
System.err.println("SYNTAX: java " + ModernDocFactory.class
+ " [-s file:file.xsd] dumNsUri file.xml... '/dum:query'");
System.exit(1);
}
ModernDocFactory p = new ModernDocFactory();
p.setPrefixMap(Collections.singletonMap("dum",
sa[0].equals("-s") ? sa[2] : sa[0]));
if (sa[0].equals("-s")) {
InputStream istream = new URL(sa[1]).openStream();
if (istream == null)
throw new IllegalArgumentException(
"Failed to get byte stream from URL: " + sa[1]);
p.setSchema(
SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI).
newSchema(new StreamSource(istream)));
}
Document doc = null;
for (int i = (sa[0].equals("-s") ? 3 : 1); i < sa.length - 1; i++) {
doc = p.genDoc(new FileInputStream(sa[i]));
if (showText) {
System.out.println("Select text = "
+ p.getPrefixableXPath().evaluate(
sa[sa.length-1], doc, XPathConstants.STRING));
} else {
Node n = (Node) p.getPrefixableXPath().evaluate(
sa[sa.length-1], doc, XPathConstants.NODE);
System.out.println("Selected node = " + n.getLocalName());
}
}
}
public ModernDocFactory() throws ParserConfigurationException {
factory.setNamespaceAware(true);
factory.setFeature(
"http://apache.org/xml/features/disallow-doctype-decl", true);
factory.setFeature(
"http://apache.org/xml/features/xinclude", true);
// Following are all ignored if schema is set, but it doesn't hurt.
/* See http://xerces.apache.org/xerces2-j/features.html
about these feature settings. */
}
protected Document genDoc(InputStream is)
throws ParserConfigurationException, SAXException, IOException {
//prefixToUri.clear();
//uriToPrefix.clear();
// Don't let previous state pollute our new Document.
// TODO: Consider handling an Reader as well as an InputStream, to
// facilitate use case of getting XML from a database CLOB field.
if (schema == null) {
System.err.println("Automatic schema resolution");
/*
factory.setFeature( // Validate only if grammar specified
"http://apache.org/xml/features/validation/dynamic", true);
BETTER TO REQUIRE VALIDATION, either by resolving through Internet,
explicit xsi:schemaLocations, or programmatic
This does not do what I was hoping for:
"http://apache.org/xml/features/honour-all-schemaLocations",
May or may not be useful.
Seems to be like disallow-doctype-decl, but may differentiate
xsd from rng too.
factory.setAttribute(
"http://java.sun.com/xml/jaxp/properties/schemaLanguage",
"http://www.w3.og/2001/XMLSchema");
THIS SETTING IS INCOMPATIBLE WITH factory.setSchema().
*/
factory.setValidating(true);
// According to docs, this setting is supposed to be ignored if
// schema is set explicitly, but in fact it is not.
factory.setFeature("http://xml.org/sax/features/validation", true);
factory.setFeature(
"http://apache.org/xml/features/validation/schema", true);
} else {
System.err.println("User-specified schema resolution");
factory.setSchema(schema);
factory.setValidating(false);
// According to docs, this setting is supposed to be ignored if
// schema is set explicitly, but in fact it is not.
factory.setFeature("http://xml.org/sax/features/validation", false);
factory.setFeature(
"http://apache.org/xml/features/validation/schema", false);
}
javax.xml.parsers.DocumentBuilder builder = factory.newDocumentBuilder();
builder.setErrorHandler(this);
return builder.parse(is);
}
// FOLLOWING 3 METHODS ARE OVERRIDES OF DefaultHandler METHODS
/**
* @see DefaultHandler.error(SAXParseException)
*/
public void error(SAXParseException e) throws SAXParseException { throw e; }
/**
* @see DefaultHandler.fatalError(SAXParseException)
*/
public void fatalError(SAXParseException e) throws SAXParseException { throw e; }
/**
* @see DefaultHandler.warning(SAXParseException)
*/
public void warning(SAXParseException e) throws SAXParseException { throw e; }
}