<< July 2013 | Home | September 2013 >>

JAXP Document Validation

Here's a nice class to validate XML Documents in Java:

package co.wlv.xml.validation;

import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

import javax.xml.XMLConstants;
import javax.xml.transform.dom.DOMSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.ls.LSResourceResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

public class XsdValidator {
    private static Logger logger = LoggerFactory.getLogger(Spike.class);

    private URL schemaUrl;
    private Schema schema;
    private LSResourceResolver resourceResolver;

    public XsdValidator(URL schemaUrl) throws SAXException {
        setSchemaUrl(schemaUrl);
        init();
    }

    public XsdValidator(URL schemaUrl, LSResourceResolver resourceResolver) throws SAXException {
        setSchemaUrl(schemaUrl);
        setResourceResolver(resourceResolver);
        init();
    }

    public List<SAXParseException> validateDocumentWithXsd(Document doc) {
        XsdErrorHandler xsdErrorHandler = new XsdErrorHandler();
        Validator validator = getSchema().newValidator();
        validator.setErrorHandler(xsdErrorHandler);

        DOMSource source = new DOMSource(doc);
        try {
            validator.validate(source);
        }
        catch (SAXException e) {
            logger.error("ERROR: validateDocumentWithXsd", e);
        }
        catch (IOException e) {
            logger.error("ERROR: validateDocumentWithXsd", e);
        }
        return xsdErrorHandler.getSaxParseExceptions();
    }

    public URL getSchemaUrl() {
        return schemaUrl;
    }

    public void setSchemaUrl(URL schemaUrl) {
        this.schemaUrl = schemaUrl;
    }

    public LSResourceResolver getResourceResolver() {
        return resourceResolver;
    }

    public void setResourceResolver(LSResourceResolver resourceResolver) {
        this.resourceResolver = resourceResolver;
    }

    private void init() throws SAXException {
        SchemaFactory sf = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
        if (resourceResolver != null)
            sf.setResourceResolver(resourceResolver);
        setSchema(sf.newSchema(getSchemaUrl()));
    }

    private Schema getSchema() {
        return schema;
    }

    private void setSchema(Schema schema) {
        this.schema = schema;
    }

    private class XsdErrorHandler implements ErrorHandler {
        private List<SAXParseException> saxParseExceptions = new ArrayList<SAXParseException>();

        @Override
        public void warning(SAXParseException e) throws SAXException {
            if (logger.isDebugEnabled())
                logger.debug(String.format("WARNING: %s", e.getMessage()));
            saxParseExceptions.add(e);
        }

        @Override
        public void fatalError(SAXParseException e) throws SAXException {
            if (logger.isDebugEnabled())
                logger.debug(String.format("FATAL: %s", e.getMessage()));
            saxParseExceptions.add(e);
        }

        @Override
        public void error(SAXParseException e) throws SAXException {
            if (logger.isDebugEnabled())
                logger.debug(String.format("ERROR: %s", e.getMessage()));
            saxParseExceptions.add(e);
        }

        public List<SAXParseException> getSaxParseExceptions() {
            return saxParseExceptions;
        }
    }
}

And a test case...

package co.wlv.spike.SpikeSchematron;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.net.URL;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;

import org.apache.log4j.xml.DOMConfigurator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.ls.LSInput;
import org.w3c.dom.ls.LSResourceResolver;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

public class Spike {
    private static Logger logger = LoggerFactory.getLogger(Spike.class);
    private static File APIP_BASE = new File("etc/APIPv1p0_FinalRelease_20121017/Core_Level/Package");
    private static File APIP_ITEM_XSD = new File(APIP_BASE, "apipv1p0_qtiitemv2p1_v1p0.xsd");

    private static File APIP_ITEM_XML = new File("etc/samples/Item/VF656330.qti.xml");

    /**
     * @param args
     * @throws TransformerException
     * @throws ParserConfigurationException
     * @throws IOException
     * @throws SAXException
     */
    public static void main(String[] args) throws TransformerException, ParserConfigurationException, SAXException, IOException {
        DOMConfigurator.configure("etc/log4j.xml");

        Spike spike = new Spike();
        spike.doit();

        logger.debug("DONE!");
    }

    private void doit() throws ParserConfigurationException, SAXException, IOException {
        URL schemaUrl = APIP_ITEM_XSD.toURI().toURL();

        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        dbf.setNamespaceAware(true);
        DocumentBuilder db = dbf.newDocumentBuilder();
        Document doc = db.parse(APIP_ITEM_XML);

        XsdValidator xsdValidator = new XsdValidator(schemaUrl, new ApipLSResourceResolver());

        for (int i = 0; i < 1000; i++) {
            List<SAXParseException> saxParseExceptions = xsdValidator.validateDocumentWithXsd(doc);
            logger.debug(i + ") COUNT: " + saxParseExceptions.size());
        }
    }

    private class ApipLSResourceResolver implements LSResourceResolver {
        private Set<String> resolvedResources = new HashSet<String>();
        private Pattern apipPattern = Pattern.compile("^http://www.imsglobal.org/profile/apip/apipv1p0/[^/]*$");

        @Override
        public LSInput resolveResource(String type, String namespaceURI, String publicId, String systemId, String baseURI) {
            if (logger.isDebugEnabled()) {
                logger.debug("============================================= resolveResource =============================================");
                logger.debug(String.format("NS_URI: %s; ", namespaceURI));
                logger.debug(String.format("PUB_ID: %s; ", publicId));
                logger.debug(String.format("SYS_ID: %s;", systemId));
                logger.debug(String.format("BASE_URI: %s", baseURI));
            }
            LSInput lsInput = null;
            if (systemId != null && !resolvedResources.contains(systemId)) {
                try {
                    String name = null;
                    if (systemId.equals("http://www.imsglobal.org/xsd/w3/2001/xml.xsd"))
                        name = "etc/xsd/xml.xsd";
                    else if (systemId.equals("http://www.imsglobal.org/xsd/w3/2001/XInclude.xsd"))
                        name = "etc/xsd/XInclude.xsd";
                    else if (systemId.equals("http://www.w3.org/Math/XMLSchema/mathml2/mathml2.xsd"))
                        name = "etc/xsd/mathml2/mathml2.xsd";
                    else if (apipPattern.matcher(systemId).matches())
                        name = String.format("%s/%s", APIP_BASE, systemId.replaceAll("^.*/", ""));

                    if (name != null) {
                        File file = new File(name);
                        if (logger.isDebugEnabled())
                            logger.debug("FILE: {}; EXISTS: {}; URI: {}", file, file.exists(), file.toURI());
                        if (file.exists()) {
                            lsInput = new LSInputImpl(new FileInputStream(file));
                            lsInput.setSystemId(file.toURI().toString());
                        }
                    }
                    resolvedResources.add(systemId);
                }
                catch (FileNotFoundException e) {
                    logger.error("Error resolving resource.", e);
                }
            }
            return lsInput;
        }
    }

    private class LSInputImpl implements LSInput {

        private String baseUri, encoding, publicId, systemId;
        private InputStream byteStream;

        public LSInputImpl(InputStream byteStream) {
            this.byteStream = byteStream;
        }

        @Override
        public String getBaseURI() {
            return baseUri;
        }

        @Override
        public InputStream getByteStream() {
            return byteStream;
        }

        @Override
        public boolean getCertifiedText() {
            // TODO Auto-generated method stub
            return false;
        }

        @Override
        public Reader getCharacterStream() {
            // TODO Auto-generated method stub
            return null;
        }

        @Override
        public String getEncoding() {
            return encoding;
        }

        @Override
        public String getPublicId() {
            return publicId;
        }

        @Override
        public String getStringData() {
            // TODO Auto-generated method stub
            return null;
        }

        @Override
        public String getSystemId() {
            return systemId;
        }

        @Override
        public void setBaseURI(String baseURI) {
            baseUri = baseURI;
        }

        @Override
        public void setByteStream(InputStream byteStream) {
            this.byteStream = byteStream;
        }

        @Override
        public void setCertifiedText(boolean certifiedText) {
            // TODO Auto-generated method stub
        }

        @Override
        public void setCharacterStream(Reader characterStream) {
            // TODO Auto-generated method stub
        }

        @Override
        public void setEncoding(String encoding) {
            this.encoding = encoding;
        }

        @Override
        public void setPublicId(String publicId) {
            this.publicId = publicId;
        }

        @Override
        public void setStringData(String stringData) {
            // TODO Auto-generated method stub
        }

        @Override
        public void setSystemId(String systemId) {
            this.systemId = systemId;
        }
    }
}
Tags : , ,
Social Bookmarks :  Add this post to Slashdot    Add this post to Digg    Add this post to Reddit    Add this post to Delicious    Add this post to Stumble it    Add this post to Google    Add this post to Technorati    Add this post to Bloglines    Add this post to Facebook    Add this post to Furl    Add this post to Windows Live    Add this post to Yahoo!

Export this post as PDF document  Export this post to PDF document