GAE/JでXML解析(xpath)を行う方法

セットアップ

ここから下記のjarを取得

  • serializer.jar
  • xalan.jar
  • xercesImpl.jar
  • xml-apis.jar
  • xsltc.jar

war/WEB-INF/libにコピーする

サンプルソース

  String xml = " <inventory> "
                + "     <book year=\"2000\"> "
                + "         <title>Snow Crash</title> "
                + "         <author>Neal Stephenson</author> "
                + "         <publisher>Spectra</publisher> "
                + "         <isbn>0553380958</isbn> "
                + "         <price>14.95</price> "
                + "     </book> "
                + "     <book year=\"2005\"> "
                + "         <title>Burning Tower</title> "
                + "         <author>Larry Niven</author> "
                + "         <author>Jerry Pournelle</author> "
                + "         <publisher>Pocket</publisher> "
                + "         <isbn>0743416910</isbn> "
                + "         <price>5.99</price> "
                + "     </book> "
                + "     <book year=\"1995\"> "
                + "         <title>Zodiac</title> "
                + "         <author>Neal Stephenson</author> "
                + "         <publisher>Spectra</publisher> "
                + "         <isbn>0553573862</isbn> "
                + "         <price>7.50</price> "
                + "     </book> "
                + " </inventory> ";
  DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
  domFactory.setNamespaceAware(true);
  DocumentBuilder builder = domFactory.newDocumentBuilder();
  Document doc = builder.parse(new InputSource(new StringReader(xml)));
  XPath xpath = new org.apache.xpath.jaxp.XPathFactoryImpl().newXPath();
  NodeList nodes = (NodeList)xpath.evaluate("//book[author='Neal Stephenson']/title/text()", doc, XPathConstants.NODESET);
  for (int i = 0; i < nodes.getLength(); i++) {
      System.out.println(nodes.item(i).getNodeValue());
  }

実行結果

Snow Crash
Zodiac

簡易的にXPathUtil作った。

import java.io.StringReader;
import java.util.logging.Logger;
import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import org.slim3.util.ThrowableUtil;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

public class XPathUtil {
    private static final Logger logger =
        Logger.getLogger(XPathUtil.class.getName());

    private static XPath xpath =
        new org.apache.xpath.jaxp.XPathFactoryImpl().newXPath();

    private XPathUtil() {
    }

    private static Object get(String xml, String expression, QName resultType) {
        Object result = null;
        try {
            result =
                xpath.evaluate(expression, stringToDocument(xml), resultType);
        } catch (Throwable t) {
            ThrowableUtil.wrapAndThrow(t); // slim3のlib
        }
        return result;
    }

    private static Object get(String xml, XPathExpression expr, QName resultType) {
        Object result = null;
        try {
            result = expr.evaluate(stringToDocument(xml), resultType);
        } catch (Throwable t) {
            ThrowableUtil.wrapAndThrow(t);
        }
        return result;
    }

    public static Node getNode(String xml, String expression) {
        return (Node) get(xml, expression, XPathConstants.NODE);
    }

    public static Node getNode(String xml, XPathExpression expr) {
        return (Node) get(xml, expr, XPathConstants.NODE);
    }

    public static NodeList getNodeList(String xml, String expression) {
        return (NodeList) get(xml, expression, XPathConstants.NODESET);
    }

    public static NodeList getNodeList(String xml, XPathExpression expr) {
        return (NodeList) get(xml, expr, XPathConstants.NODESET);
    }

    private static Document stringToDocument(String xml) {
        Document result = null;
        try {
            DocumentBuilderFactory domFactory =
                DocumentBuilderFactory.newInstance();
            domFactory.setNamespaceAware(true);
            DocumentBuilder builder = domFactory.newDocumentBuilder();
            result = builder.parse(new InputSource(new StringReader(xml)));
        } catch (Throwable t) {
            ThrowableUtil.wrapAndThrow(t);
        }
        return result;
    }
}

サンプルソース

  String xml = " <inventory> "
                + "     <book year=\"2000\"> "
                + "         <title>Snow Crash</title> "
                + "         <author>Neal Stephenson</author> "
                + "         <publisher>Spectra</publisher> "
                + "         <isbn>0553380958</isbn> "
                + "         <price>14.95</price> "
                + "     </book> "
                + "     <book year=\"2005\"> "
                + "         <title>Burning Tower</title> "
                + "         <author>Larry Niven</author> "
                + "         <author>Jerry Pournelle</author> "
                + "         <publisher>Pocket</publisher> "
                + "         <isbn>0743416910</isbn> "
                + "         <price>5.99</price> "
                + "     </book> "
                + "     <book year=\"1995\"> "
                + "         <title>Zodiac</title> "
                + "         <author>Neal Stephenson</author> "
                + "         <publisher>Spectra</publisher> "
                + "         <isbn>0553573862</isbn> "
                + "         <price>7.50</price> "
                + "     </book> "
                + " </inventory> ";
  NodeList nodes = XPathUtil.getNodeList(xml, "//book[author='Neal Stephenson']/title/text()");
  for (int i = 0; i < nodes.getLength(); i++) {
      System.out.println(nodes.item(i).getNodeValue());
  }