diff --git a/pom.xml b/pom.xml
index eed6aa4a..e97cbc98 100644
--- a/pom.xml
+++ b/pom.xml
@@ -361,6 +361,11 @@
unxml
${unxml.version}
+
+ net.sf.saxon
+ Saxon-HE
+ 12.4
+
org.python
jython-standalone
diff --git a/src/main/java/io/cdap/plugin/http/common/pagination/page/XmlPage.java b/src/main/java/io/cdap/plugin/http/common/pagination/page/XmlPage.java
index f7b81e07..1eb4b1a3 100644
--- a/src/main/java/io/cdap/plugin/http/common/pagination/page/XmlPage.java
+++ b/src/main/java/io/cdap/plugin/http/common/pagination/page/XmlPage.java
@@ -15,23 +15,24 @@
*/
package io.cdap.plugin.http.common.pagination.page;
-import com.fasterxml.jackson.databind.node.ArrayNode;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
-import com.nerdforge.unxml.Parsing;
-import com.nerdforge.unxml.factory.ParsingFactory;
-import com.nerdforge.unxml.parsers.Parser;
-import com.nerdforge.unxml.parsers.builders.ObjectNodeParserBuilder;
+import com.google.gson.JsonObject;
import io.cdap.cdap.api.data.format.StructuredRecord;
import io.cdap.cdap.api.data.schema.Schema;
import io.cdap.cdap.format.StructuredRecordStringConverter;
import io.cdap.plugin.http.common.http.HttpResponse;
import io.cdap.plugin.http.source.common.BaseHttpSourceConfig;
-import org.w3c.dom.Document;
+import net.sf.saxon.s9api.Processor;
+import net.sf.saxon.s9api.SaxonApiException;
+import net.sf.saxon.s9api.XPathCompiler;
+import net.sf.saxon.s9api.XdmItem;
+import net.sf.saxon.s9api.XdmNode;
+import net.sf.saxon.s9api.XdmValue;
+import net.sf.saxon.trans.XPathException;
import java.util.Iterator;
import java.util.Map;
-import javax.xml.xpath.XPathConstants;
/**
* Returns sub elements which are specified by XPath, one by one.
@@ -41,15 +42,17 @@
class XmlPage extends BasePage {
private final Map fieldsMapping;
private final Iterator iterator;
- private final Document document;
+ private final XdmNode document;
private final Schema schema;
private final BaseHttpSourceConfig config;
+ private final Processor processor = new Processor(false);
+
XmlPage(BaseHttpSourceConfig config, HttpResponse httpResponse) {
super(httpResponse);
this.config = config;
this.fieldsMapping = config.getFullFieldsMapping();
- this.document = XmlUtil.createXmlDocument(httpResponse.getBody());
+ this.document = XmlUtil.createXmlDocument(processor, httpResponse.getBody());
this.iterator = getDocumentElementsIterator();
this.schema = config.getSchema();
}
@@ -79,33 +82,48 @@ public PageEntry next() {
*/
@Override
public String getPrimitiveByPath(String path) {
- return (String) XmlUtil.getByXPath(document, path, XPathConstants.STRING);
+ return XmlUtil.getByXPath(processor, document, path);
}
/**
- * 1. Converts xml to a structure which is defined by "Fields Mapping" configuration. This is done using unxml.
+ * 1. Converts xml to a structure which is defined by "Fields Mapping" configuration. This is done using saxon.
* 2. The result entity is a json array.
* 3. An iterator for elements of json array is returned.
*
* @return an iterator for elements of result json array.
*/
private Iterator getDocumentElementsIterator() {
- Parsing parsing = ParsingFactory.getInstance().create();
- ObjectNodeParserBuilder obj = parsing.obj();
-
- for (Map.Entry entry : fieldsMapping.entrySet()) {
- String schemaFieldName = entry.getKey();
- String fieldPath = entry.getValue();
-
- obj = obj.attribute(schemaFieldName, fieldPath, XmlUtil.xmlTextNodeParser());
+ XPathCompiler xPathCompiler = processor.newXPathCompiler();
+ JsonArray jsonArray = new JsonArray();
+ try {
+ for (XdmItem entry : xPathCompiler.evaluate(config.getResultPath(), document)) {
+ JsonObject jsonObject = new JsonObject();
+ for (String schemaFieldName : fieldsMapping.keySet()) {
+ XdmValue xdmItems = xPathCompiler.evaluate(fieldsMapping.get(schemaFieldName), entry);
+ String value = getValueFromXdmItem(xdmItems);
+ jsonObject.addProperty(schemaFieldName, value);
+ }
+ jsonArray.add(jsonObject);
+ }
+ } catch (SaxonApiException | XPathException e) {
+ throw new RuntimeException(e);
}
-
- Parser parser = parsing.arr(config.getResultPath(), obj).build();
- ArrayNode node = parser.apply(document);
- JsonArray jsonArray = JSONUtil.toJsonArray(node.toString());
return jsonArray.iterator();
}
+ private String getValueFromXdmItem(XdmValue xdmItems) throws XPathException {
+ StringBuilder value = new StringBuilder();
+ int[] i = new int[1];
+ ((XdmNode) xdmItems).children().iterator().forEachRemaining(t -> i[0] = i[0] + 1);
+ // If main node contains child node, return full node else value of the node
+ if (i[0] > 1) {
+ value.append(xdmItems);
+ } else {
+ value.append(xdmItems.getUnderlyingValue().getStringValue());
+ }
+ return value.toString();
+ }
+
@Override
public void close() {
diff --git a/src/main/java/io/cdap/plugin/http/common/pagination/page/XmlUtil.java b/src/main/java/io/cdap/plugin/http/common/pagination/page/XmlUtil.java
index 71ef81c9..ad17950a 100644
--- a/src/main/java/io/cdap/plugin/http/common/pagination/page/XmlUtil.java
+++ b/src/main/java/io/cdap/plugin/http/common/pagination/page/XmlUtil.java
@@ -17,28 +17,24 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.TextNode;
-import com.google.common.base.Charsets;
import com.nerdforge.unxml.parsers.Parser;
-import org.w3c.dom.Document;
+import net.sf.saxon.s9api.DocumentBuilder;
+import net.sf.saxon.s9api.Processor;
+import net.sf.saxon.s9api.SaxonApiException;
+import net.sf.saxon.s9api.XPathCompiler;
+import net.sf.saxon.s9api.XdmNode;
+import net.sf.saxon.trans.XPathException;
import org.w3c.dom.Node;
-import org.xml.sax.SAXException;
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
+import java.io.StringReader;
import java.io.StringWriter;
-import javax.xml.namespace.QName;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
-import javax.xml.xpath.XPath;
-import javax.xml.xpath.XPathExpression;
-import javax.xml.xpath.XPathExpressionException;
+import javax.xml.transform.stream.StreamSource;
import javax.xml.xpath.XPathFactory;
/**
@@ -50,19 +46,19 @@ public class XmlUtil {
/**
* Create xml document instance out of a String.
*
+ * @param processor Saxon processor with xml document configuration
* @param xmlString xml in string format
- * @return a Document instance representing input xml
+ * @return a XdmNode Document instance representing input xml
*/
- public static Document createXmlDocument(String xmlString) {
- DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
- factory.setIgnoringComments(true);
-
+ public static XdmNode createXmlDocument(Processor processor, String xmlString) {
+ DocumentBuilder documentBuilder = processor.newDocumentBuilder();
+ XdmNode document = null;
try {
- InputStream input = new ByteArrayInputStream(xmlString.getBytes(Charsets.UTF_8));
- return factory.newDocumentBuilder().parse(input);
- } catch (ParserConfigurationException | SAXException | IOException e) {
- throw new IllegalStateException("Failed to parse xml document", e);
+ document = documentBuilder.build(new StreamSource(new StringReader(xmlString)));
+ } catch (SaxonApiException e) {
+ throw new RuntimeException(e);
}
+ return document;
}
/**
@@ -106,17 +102,17 @@ public static String nodeToString(Node node) {
* Throws an exception if element is not of given path.
* Returns null if element not found
*
- * @param document document instance
+ * @param processor Saxon processor with xml document configuration
+ * @param document XdmNode document instance
* @param path xpath string representation
- * @param returnType a type of element expected to be returned
* @return element found by XPath or null if not found.
*/
- public static Object getByXPath(Document document, String path, QName returnType) {
- XPath xpath = xPathfactory.newXPath();
+ public static String getByXPath(Processor processor, XdmNode document, String path) {
+ XPathCompiler xPathCompiler = processor.newXPathCompiler();
try {
- XPathExpression expr = xpath.compile(path);
- return expr.evaluate(document, returnType);
- } catch (XPathExpressionException e) {
+ return xPathCompiler.evaluate(path, document).getUnderlyingValue()
+ .getStringValue();
+ } catch (XPathException | SaxonApiException e) {
return null;
}
}