Skip to content

Commit

Permalink
#135: added Saxon XSLT lib, removed slow kludge for XML-in-HTML-fixing
Browse files Browse the repository at this point in the history
  • Loading branch information
S1artie committed Jun 9, 2017
1 parent 023add6 commit e0f235a
Show file tree
Hide file tree
Showing 9 changed files with 134 additions and 110 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#13;&#10;&lt;runtimeClasspathEntry internalArchive=&quot;/de.gebit.integrity.remoting/kryo/minlog-1.3.0.jar&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#13;&#10;"/>
<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#13;&#10;&lt;runtimeClasspathEntry internalArchive=&quot;/de.gebit.integrity.remoting/kryo/objenesis-2.2.jar&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#13;&#10;"/>
<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#13;&#10;&lt;runtimeClasspathEntry internalArchive=&quot;/de.gebit.integrity.remoting/kryo/reflectasm-1.11.3.jar&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#13;&#10;"/>
<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#13;&#10;&lt;runtimeClasspathEntry internalArchive=&quot;/de.gebit.integrity.runner/saxon/saxon-6.5.5-patched.jar&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#13;&#10;"/>
</listAttribute>
<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
<booleanAttribute key="org.eclipse.jdt.launching.DEFAULT_CLASSPATH" value="false"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#13;&#10;&lt;runtimeClasspathEntry internalArchive=&quot;/de.gebit.integrity.remoting/kryo/minlog-1.3.0.jar&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#13;&#10;"/>
<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#13;&#10;&lt;runtimeClasspathEntry internalArchive=&quot;/de.gebit.integrity.remoting/kryo/objenesis-2.2.jar&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#13;&#10;"/>
<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#13;&#10;&lt;runtimeClasspathEntry internalArchive=&quot;/de.gebit.integrity.remoting/kryo/reflectasm-1.11.3.jar&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#13;&#10;"/>
<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#13;&#10;&lt;runtimeClasspathEntry path=&quot;3&quot; projectName=&quot;de.gebit.integrity.runner&quot; type=&quot;1&quot;/&gt;&#13;&#10;"/>
<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#13;&#10;&lt;runtimeClasspathEntry internalArchive=&quot;/de.gebit.integrity.runner/saxon/saxon-6.5.5-patched.jar&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#13;&#10;"/>
</listAttribute>
<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
<booleanAttribute key="org.eclipse.jdt.launching.DEFAULT_CLASSPATH" value="false"/>
Expand Down
1 change: 1 addition & 0 deletions de.gebit.integrity.runner/.classpath
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/>
<classpathentry kind="src" path="src"/>
<classpathentry kind="lib" path="saxon/saxon-6.5.5-patched.jar"/>
<classpathentry kind="output" path="target/classes"/>
</classpath>
3 changes: 2 additions & 1 deletion de.gebit.integrity.runner/META-INF/MANIFEST.MF
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ Bundle-ManifestVersion: 2
Bundle-Name: Integrity Test Framework - Test Runner
Bundle-SymbolicName: de.gebit.integrity.runner
Bundle-Version: 0.16.0.qualifier
Bundle-ClassPath: .
Require-Bundle: org.eclipse.xtext;bundle-version="2.8.3",
de.gebit.integrity.dsl,
org.eclipse.xtext.common.types;bundle-version="2.8.3",
Expand Down Expand Up @@ -41,3 +40,5 @@ Import-Package: org.apache.log4j;version="1.2.15",
org.jdom.xpath;version="1.0.0",
org.slf4j;version="1.4.0"
Bundle-Vendor: GEBIT Solutions GmbH
Bundle-ClassPath: .,
saxon/saxon-6.5.5-patched.jar
3 changes: 2 additions & 1 deletion de.gebit.integrity.runner/build.properties
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
bin.includes = META-INF/,\
.
.,\
saxon/
source.. = src/
88 changes: 88 additions & 0 deletions de.gebit.integrity.runner/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,94 @@
<resource>
<directory>${basedir}/src</directory>
</resource>
<resource>
<directory>${project.build.directory}/saxon</directory>
</resource>
</resources>

<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>truezip-maven-plugin</artifactId>
<version>1.2</version>
<executions>
<execution>
<id>copy-package</id>
<goals>
<goal>copy</goal>
</goals>
<phase>validate</phase>
<configuration>
<verbose>true</verbose>
<fileset>
<directory>${basedir}/saxon/saxon-6.5.5-patched.jar/</directory>
<outputDirectory>${project.build.directory}/saxon</outputDirectory>
<include>**/*.class</include>
</fileset>
</configuration>
</execution>
</executions>
</plugin>

<plugin>
<groupId>org.eclipse.tycho</groupId>
<artifactId>tycho-compiler-plugin</artifactId>
<executions>
<execution>
<id>default-compile</id>
<phase>compile</phase>
<goals>
<goal>compile</goal>
</goals>
<configuration>
<extraClasspathElements>
<extraClasspathElement>
<groupId>de.gebit.integrity.runner.internaldep</groupId>
<artifactId>saxon</artifactId>
<version>6.5.5</version>
<systemPath>${basedir}/saxon/saxon-6.5.5-patched.jar</systemPath>
</extraClasspathElement>
</extraClasspathElements>
</configuration>
</execution>
</executions>
</plugin>

</plugins>
<pluginManagement>
<plugins>
<!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself.-->
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<version>1.0.0</version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>
org.codehaus.mojo
</groupId>
<artifactId>
truezip-maven-plugin
</artifactId>
<versionRange>
[1.2,)
</versionRange>
<goals>
<goal>copy</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore></ignore>
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
diff --git a/HTMLEmitter.java b/HTMLEmitter.java
index 033b585..754b892 100644
--- a/HTMLEmitter.java
+++ b/HTMLEmitter.java
@@ -326,7 +326,7 @@ public class HTMLEmitter extends XMLEmitter {

if (inAttribute) {
if (ch[i]=='<') {
- writer.write('<'); // not escaped
+ writer.write("&lt;"); // escaped
} else if (ch[i]=='>') {
writer.write("&gt;"); // recommended for older browsers
} else if (ch[i]=='&') {
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
Expand Down Expand Up @@ -558,6 +557,11 @@ public class XmlWriterTestCallback extends AbstractTestRunnerCallback {
protected static final String XSLT_RESOURCE_NAME = System.getProperty(SYSPARAM_XSLT_RESOURCE,
"resource/xhtml.xslt");

/**
* The XSLT transformer factory property.
*/
protected static final String XSLT_TRANSFORMER_FACTORY_PROPERTY = "javax.xml.transform.TransformerFactory";

/**
* Creates a new instance.
*
Expand Down Expand Up @@ -1654,121 +1658,34 @@ protected int determineTransformThreadStackSize() {
*/
protected void transformResult(FileOutputStream aTargetStream) {
try {
if (System.getProperty("javax.xml.transform.TransformerFactory") == null) {
// Explicitly specify the JRE-bundled XSLT transformer if nothing else was specified via the
// system property, so we at least know for sure what to expect
System.setProperty("javax.xml.transform.TransformerFactory",
"com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl");
}
/*
* Explicitly specify the bundled, patched Saxon XSLT transformer. There is a problem with the XML source
* data that is copied to the transformed HTML result (into the "xmldata" element): since the output method
* for the serializer is set to HTML, Saxon assumes it is okay to not escape the < char in attributes, which
* is no problem for HTML, but strict XML requires those to be replaced by their corresponding entities. I
* could solve this by outputting strict XML, but that renders the output unrenderable by browsers :-( well,
* for some reason I don't fully understand at least, I'm no browser developer. To solve this, I have
* patched the HTMLEmitter inside the bundled Saxon XSLT transformer to escape this character as well, which
* seems to work just fine with browsers as well as XML parsers (which should then only parse until the end
* of the xmldata element - afterwards there's a lot of non-well-formed XML, which actually is HTML,
* coming). The mentioned patch is provided in the file com.icl.saxon.output.HTMLEmitter.diff.
*/
String tempOldProperty = System.getProperty(XSLT_TRANSFORMER_FACTORY_PROPERTY);
System.setProperty(XSLT_TRANSFORMER_FACTORY_PROPERTY, "com.icl.saxon.TransformerFactoryImpl");
TransformerFactory tempTransformerFactory = TransformerFactory.newInstance();
if (tempOldProperty != null) {
System.setProperty(XSLT_TRANSFORMER_FACTORY_PROPERTY, tempOldProperty);
} else {
System.clearProperty(XSLT_TRANSFORMER_FACTORY_PROPERTY);
}

Transformer tempTransformer = tempTransformerFactory.newTransformer(new StreamSource(getXsltStream()));
tempTransformer.setOutputProperty(OutputKeys.METHOD, "html");
tempTransformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");

Source tempSource = new JDOMSource(document);

/*
* There is a problem with the XML source data that is copied to the transformed HTML result (into the
* "xmldata" element): since the output method for the serializer is set to HTML, it seems to inevitably
* output '<' and '>' in attributes as characters, which is no problem for HTML, but strict XML requires
* those to be replaced by their corresponding entities. I could solve this by outputting strict XML, but
* that renders the output unrenderable by browsers :-( well, for some reason I don't fully understand at
* least, I'm no browser developer. To solve this, the following very ugly hack replaces the characters by
* their entities in all attribute values inside the xmldata section on a character stream level. That makes
* the xmldata content valid XML and thus conveniently parseable for example by a SAX parser (just be sure
* to stop the parsing when reaching the end of that section, because the HTML afterwards definitely doesn't
* parse as XML!), while keeping viewability on all browsers. It should not have any negative side-effects,
* apart from being disgusting and everything, but well, this can still be replaced by a more elegant
* solution if someone comes up with one.
*/
StreamResult tempResult = new StreamResult(new FilterOutputStream(aTargetStream) {

private final char[] triggerOpenTagName = new char[] { 'x', 'm', 'l', 'd', 'a', 't', 'a' };

private final char[] triggerCloseTagName = new char[] { '/', 'x', 'm', 'l', 'd', 'a', 't', 'a' };

private static final char TRIGGER_TAG_START = '<';

private static final char TRIGGER_TAG_END = '<';

private static final char TRIGGER_ATTRIBUTE = '"';

private boolean insideXmlPart;

private boolean insideAttribute;

private boolean pastXmlPart;

private int tagPosition;

@Override
public void write(int aByte) throws IOException {
char tempChar = (char) aByte;

if (!pastXmlPart) {
if (!insideAttribute) {
if (tempChar == TRIGGER_TAG_START) {
tagPosition = 0;
} else if (tempChar == TRIGGER_TAG_END) {
tagPosition = -1;
} else if (tagPosition >= 0) {
if (insideXmlPart && tempChar == TRIGGER_ATTRIBUTE) {
insideAttribute = true;
} else {
tagPosition++;
if (insideXmlPart) {
if (tagPosition < triggerCloseTagName.length - 1) {
if (tempChar != triggerCloseTagName[tagPosition]) {
tagPosition = 0;
}
} else if (tagPosition == triggerCloseTagName.length - 1) {
insideXmlPart = false;
pastXmlPart = true;
tagPosition = 0;
}
} else {
if (tagPosition < triggerOpenTagName.length - 1) {
if (tempChar != triggerOpenTagName[tagPosition]) {
tagPosition = 0;
}
} else if (tagPosition == triggerOpenTagName.length - 1) {
insideXmlPart = true;
pastXmlPart = false;
tagPosition = 0;
}
}
}
}
} else {
if (insideXmlPart) {
if (tempChar == TRIGGER_ATTRIBUTE) {
insideAttribute = false;
} else {
if (tempChar == '<') {
super.write("&lt;".getBytes("UTF-8"));
return;
} else if (tempChar == '>') {
super.write("&gt;".getBytes("UTF-8"));
return;
}
}
}
}
}

super.write(aByte);
}

@Override
public void write(byte[] someBytes, int anOffset, int aLength) throws IOException {
if (!pastXmlPart) {
super.write(someBytes, anOffset, aLength);
} else {
out.write(someBytes, anOffset, aLength);
}
}
});
StreamResult tempResult = new StreamResult(aTargetStream);

tempTransformer.transform(tempSource, tempResult);
} catch (TransformerConfigurationException exc) {
Expand Down

0 comments on commit e0f235a

Please sign in to comment.