CSci 4608 Principles of Web Programming: XML schema validation

Below are a sample XML file and a sample XML Schema, followed by a program that validates the file against the schema using a SAX ("Sample API for XML") parser Xerces-J.

The additional class ShowValidationErrors defines an error handler that signals errors. If you want parsing to stop at the first validation error and throw an exception, uncomment the line throw exception in the error method. Currently the parsing continues when errors are encountered so that all validation errors are displayed. Well-formedness errors (i.e. when the source file is not a valid XML) stop the program.

The XML file shiporder.xml (contains a couple of errors to demostrate the validation):. Note a reference to shiporder.xsd as an attribute of the root element.



<?xml version="1.0" encoding="ISO-8859-1"?>
<shiporder orderid="889923"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="shiporder.xsd">
 <orderperson>John Smith</orderperson>
 <shipto>
  <name>Ola Nordmann</name>
  <address>Langgt 23</address>
  <city>4000 Stavanger</city>
  <country>Norway</country>
 </shipto>
 <item>
  <title>Empire Burlesque</title>
  <note>Special Edition</note>
  <quantity>-1</quantity>
  <price>10.90</price>
 </item>
 <item>
  <title>Hide your heart</title>
  <quantity>abc</quantity>
  <price>9.90</price>
  </item>
</shiporder>


The XML Schema file shiporder.xsd:

<?xml version="1.0" encoding="ISO-8859-1" ?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">

<xs:element name="shiporder">
 <xs:complexType>
  <xs:sequence>
   <xs:element name="orderperson" type="xs:string"/>
   <xs:element name="shipto">
    <xs:complexType>
     <xs:sequence>
      <xs:element name="name" type="xs:string"/>
      <xs:element name="address" type="xs:string"/>
      <xs:element name="city" type="xs:string"/>
      <xs:element name="country" type="xs:string"/>
     </xs:sequence>
    </xs:complexType>
   </xs:element>
   <xs:element name="item" maxOccurs="unbounded">
    <xs:complexType>
     <xs:sequence>
      <xs:element name="title" type="xs:string"/>
      <xs:element name="note" type="xs:string" minOccurs="0"/>
      <xs:element name="quantity" type="xs:positiveInteger"/>
      <xs:element name="price" type="xs:decimal"/>
     </xs:sequence>
    </xs:complexType>
   </xs:element>
  </xs:sequence>
  <xs:attribute name="orderid" type="xs:string" use="required"/>
 </xs:complexType>
</xs:element>

</xs:schema>


Java validation program (see compilation/running instructions below):

import java.util.*;
import java.io.*;
import org.xml.sax.*;
import org.xml.sax.helpers.*;
import javax.xml.parsers.SAXParser;

public class CheckSchema {

    public static void main(String [] args) throws IOException, SAXException {

        // Set the filenames here:
        String xmlSource = "shiporder.xml";
        String xsdSchema = "shiporder.xsd";

        // strings to set up properties
        String JAXP_SCHEMA_LANGUAGE =
            "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
        String W3C_XML_SCHEMA =
            "http://www.w3.org/2001/XMLSchema";
        String JAXP_SCHEMA_SOURCE =
            "http://java.sun.com/xml/jaxp/properties/schemaSource";


        // the parser variable
        XMLReader parser;

        // create the XML parser
        try {
            // get a new parser from the factory
            parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");

            // XML Schema is used as the schema language
            parser.setProperty(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA);
            // Set up the schema file
            parser.setProperty(JAXP_SCHEMA_SOURCE,xsdSchema);

            // both lines are needed to make the program
            // validate against the schema.
            // Otherwise it just validates well-formedness
            parser.setFeature("http://xml.org/sax/features/validation", true);
            parser.setFeature("http://apache.org/xml/features/validation/schema",true);

            // default contents handler
            parser.setContentHandler(new DefaultHandler());
            // set customized error handler (needed to display validation errors)
            parser.setErrorHandler(new ShowValidationErrors());

        } catch  (SAXException e) {
            // print out the error message
            System.out.println("Cannot create a parser");
            // rethrow the exception
            throw e;
        }

        try {
            FileInputStream theStream = new FileInputStream(xmlSource);
            parser.parse(new InputSource(theStream));
        } catch (IOException e) {
            System.out.println("Cannot open a file");
            throw e;
        } catch (SAXException e) {
            System.out.println("The document is invalid");
            throw e;
        }
    }

}

The error handling class:

import org.xml.sax.*;

public class ShowValidationErrors implements ErrorHandler {
    public void warning(SAXParseException exception) {
        // Ignore warnings
        System.out.println("WARNING: " + exception.getMessage());
    }

    public void error(SAXParseException exception) {
        System.out.println("VALIDATION ERROR: " + exception.getMessage());
        // IF YOU WANT TO STOP AT AN ERROR, UNCOMMENT THE LINE BELOW
        // throw exception;

    }

    public void fatalError(SAXParseException exception)
        throws SAXException {
        System.out.println("WELL-FORMEDNESS ERROR: " + exception.getMessage());
        // A well-formedness error
        throw exception;

    }

}

How to compile and run this code:

javac CheckSchema.java -classpath .:/home/elenam/4608_playpen/xerces-2_5_0/xercesImpl.jar
java -classpath .:/home/elenam/4608_playpen/xerces-2_5_0/xercesImpl.jar CheckSchema

Note that the code uses the SAX parser defined in the Xerces-2_5_0 package which is currently availble in my home directory. The standard package in /usr/share/java doesn't have the correct classes. If you get an error message about SAXParser class not found, double-check your compilation/running commands.
CSci 2101 home page