Skip to content

Commit

Permalink
Merge pull request #3901 from maxonfjvipon/bug/#3864/unhex-via-xsl
Browse files Browse the repository at this point in the history
bug(#3864): unhexing via XSL
  • Loading branch information
yegor256 authored Feb 10, 2025
2 parents e6d7329 + 49daa09 commit c9ff38c
Show file tree
Hide file tree
Showing 11 changed files with 438 additions and 58 deletions.
34 changes: 13 additions & 21 deletions eo-maven-plugin/src/main/java/org/eolang/maven/LintMojo.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
import java.util.concurrent.ConcurrentHashMap;
import org.apache.maven.plugins.annotations.LifecyclePhase;
import org.apache.maven.plugins.annotations.Mojo;
import org.apache.maven.plugins.annotations.Parameter;
import org.cactoos.list.ListOf;
import org.eolang.lints.Defect;
import org.eolang.lints.Program;
Expand Down Expand Up @@ -72,26 +71,20 @@ public final class LintMojo extends SafeMojo {
*/
static final String CACHE = "linted";

/**
* Whether we should fail on warning.
*
* @checkstyle MemberNameCheck (11 lines)
*/
@SuppressWarnings("PMD.ImmutableField")
@Parameter(property = "eo.failOnWarning", required = true, defaultValue = "true")
private boolean failOnWarning;
@Override
void exec() throws IOException {
if (this.skipLinting) {
Logger.info(this, "Linting is skipped because eo:skipLinting is TRUE");
} else {
this.lint();
}
}

/**
* Whether we should lint all the sources together as package.
*
* @checkstyle MemberNameCheck (11 lines)
* Lint.
* @throws IOException If fails
*/
@SuppressWarnings("PMD.ImmutableField")
@Parameter(property = "eo.lintAsPackage", required = true, defaultValue = "true")
private boolean lintAsPackage;

@Override
void exec() throws IOException {
private void lint() throws IOException {
final long start = System.currentTimeMillis();
final Collection<ForeignTojo> tojos = this.scopedTojos().withShaken();
final ConcurrentHashMap<Severity, Integer> counts = new ConcurrentHashMap<>();
Expand Down Expand Up @@ -156,7 +149,7 @@ private int lintOne(final ForeignTojo tojo,
final Path target = new Place(name).make(base, AssembleMojo.XMIR);
tojo.withLinted(
new FpDefault(
src -> LintMojo.lint(xmir, counts).toString(),
src -> LintMojo.linted(xmir, counts).toString(),
this.cache.toPath().resolve(LintMojo.CACHE),
this.plugin.getVersion(),
new TojoHash(tojo),
Expand Down Expand Up @@ -283,8 +276,7 @@ private static String summary(final ConcurrentHashMap<Severity, Integer> counts)
* @param counts Counts of errors, warnings, and critical
* @return XML after linting
*/
private static XML lint(final XML xmir,
final ConcurrentHashMap<Severity, Integer> counts) {
private static XML linted(final XML xmir, final ConcurrentHashMap<Severity, Integer> counts) {
final Directives dirs = new Directives();
final Collection<Defect> defects = new Program(xmir).defects();
if (!defects.isEmpty()) {
Expand Down
30 changes: 30 additions & 0 deletions eo-maven-plugin/src/main/java/org/eolang/maven/SafeMojo.java
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,36 @@ abstract class SafeMojo extends AbstractMojo {
@SuppressWarnings("PMD.ImmutableField")
protected boolean ignoreTransitive;

/**
* Whether we should fail on warning.
*
* @checkstyle MemberNameCheck (10 lines)
* @checkstyle VisibilityModifierCheck (7 lines)
*/
@SuppressWarnings("PMD.ImmutableField")
@Parameter(property = "eo.failOnWarning", required = true, defaultValue = "true")
protected boolean failOnWarning;

/**
* Whether we should lint all the sources together as package.
*
* @checkstyle MemberNameCheck (10 lines)
* @checkstyle VisibilityModifierCheck (7 lines)
*/
@SuppressWarnings("PMD.ImmutableField")
@Parameter(property = "eo.lintAsPackage", required = true, defaultValue = "true")
protected boolean lintAsPackage;

/**
* Whether we should skip linting at all.
*
* @checkstyle MemberNameCheck (10 lines)
* @checkstyle VisibilityModifierCheck (7 lines)
*/
@SuppressWarnings("PMD.ImmutableField")
@Parameter(property = "eo.skipLinting", required = true, defaultValue = "false")
protected boolean skipLinting;

/**
* The current version of eo-maven-plugin.
* Maven 3 only.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ SOFTWARE.
<!--
For every cti objects add error messages.
-->
<xsl:import href="/org/eolang/parser/_funcs.xsl"/>
<xsl:output encoding="UTF-8" method="xml"/>
<xsl:template match="/program/errors">
<xsl:copy>
Expand All @@ -52,35 +53,14 @@ SOFTWARE.
<xsl:value-of select="@line"/>
</xsl:attribute>
<xsl:attribute name="severity">
<xsl:value-of select="eo:hex-to-utf8(o[last() - 1]/o[1]/text())"/>
<xsl:value-of select="eo:bytes-to-string(o[last() - 1]/o[1]/text())"/>
</xsl:attribute>
<xsl:value-of select="eo:hex-to-utf8(o[last()]/o[1]/text())"/>
<xsl:value-of select="eo:bytes-to-string(o[last()]/o[1]/text())"/>
</xsl:element>
</xsl:template>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
<!--Converts hex sting into readable UTF-8 string-->
<xsl:function name="eo:hex-to-utf8">
<xsl:param name="str"/>
<xsl:variable name="hex" select="'0123456789ABCDEF'"/>
<xsl:variable name="tail" select="translate($str, '-', '')"/>
<!-- Base case: Return empty string if input is empty or invalid -->
<xsl:if test="string-length($tail) &gt;= 2">
<!-- Extract first 2 digits -->
<xsl:variable name="first" select="substring($tail, 1, 1)"/>
<xsl:variable name="second" select="substring($tail, 2, 1)"/>
<!-- Get their hex values -->
<xsl:variable name="val1" select="string-length(substring-before($hex, $first))"/>
<xsl:variable name="val2" select="string-length(substring-before($hex, $second))"/>
<!-- Ensure valid character range -->
<xsl:variable name="codepoint" select="$val1 * 16 + $val2"/>
<xsl:if test="$codepoint &gt; 0">
<xsl:variable name="head" select="codepoints-to-string($codepoint)"/>
<xsl:value-of select="concat($head, eo:hex-to-utf8(substring($tail, 3)))"/>
</xsl:if>
</xsl:if>
</xsl:function>
</xsl:stylesheet>
26 changes: 13 additions & 13 deletions eo-parser/src/main/java/org/eolang/parser/Xmir.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import com.yegor256.xsline.StEndless;
import com.yegor256.xsline.TrClasspath;
import com.yegor256.xsline.TrDefault;
import com.yegor256.xsline.TrJoined;
import com.yegor256.xsline.Train;
import com.yegor256.xsline.Xsline;
import java.util.Collection;
Expand All @@ -57,23 +58,22 @@
*/
@SuppressWarnings("PMD.TooManyMethods")
public final class Xmir implements XML {
/**
* Unhex transformation.
*/
private static final Shift UNHEX = new StUnhex();

/**
* Train of transformations that prepare XMIR for conversion to EO.
*/
private static final Train<Shift> FOR_EO = new TrFull(
new TrDefault<>(
new StEndless(
new StClasspath("/org/eolang/parser/print/tuples-to-stars.xsl")
new TrJoined<>(
new TrDefault<>(
new StEndless(
new StClasspath("/org/eolang/parser/print/tuples-to-stars.xsl")
)
),
new StClasspath("/org/eolang/parser/print/dataized-to-const.xsl"),
Xmir.UNHEX,
new StClasspath("/org/eolang/parser/print/wrap-data.xsl"),
new StClasspath("/org/eolang/parser/print/to-eo.xsl")
new TrClasspath<>(
"/org/eolang/parser/print/dataized-to-const.xsl",
"/org/eolang/parser/print/unhex-data.xsl",
"/org/eolang/parser/print/wrap-data.xsl",
"/org/eolang/parser/print/to-eo.xsl"
).back()
)
);

Expand Down Expand Up @@ -166,7 +166,7 @@ public String toPhi(final boolean conservative) {
return this.converted(
new TrFull(
new TrDefault<>(
Xmir.UNHEX,
new StClasspath("/org/eolang/parser/print/unhex-data.xsl"),
new StClasspath(
"/org/eolang/parser/phi/to-phi.xsl",
String.format("conservative %b", conservative)
Expand Down
133 changes: 132 additions & 1 deletion eo-parser/src/main/resources/org/eolang/parser/_funcs.xsl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
-->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:eo="https://www.eolang.org" xmlns:xs="http://www.w3.org/2001/XMLSchema" id="_funcs" version="2.0">
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:math="http://www.w3.org/2005/xpath-functions/math" xmlns:eo="https://www.eolang.org" xmlns:xs="http://www.w3.org/2001/XMLSchema" id="_funcs" version="2.0">
<xsl:function name="eo:has-data" as="xs:boolean">
<xsl:param name="o" as="element()"/>
<xsl:sequence select="normalize-space(string-join($o/text(), '')) != ''"/>
Expand All @@ -35,4 +35,135 @@ SOFTWARE.
<xsl:param name="o" as="element()"/>
<xsl:sequence select="$o/@base='∅'"/>
</xsl:function>
<!-- BYTES TO STRING -->
<xsl:function name="eo:bytes-to-string" as="xs:string">
<xsl:param name="bytes" as="xs:string"/>
<xsl:choose>
<xsl:when test="$bytes = '--'">
<xsl:sequence select="''"/>
</xsl:when>
<xsl:otherwise>
<xsl:variable name="decoded">
<xsl:for-each select="eo:decode-bytes(for $byte in (if (ends-with($bytes, '-')) then substring-before($bytes, '-') else tokenize($bytes, '-')) return eo:hex-to-utf8($byte))">
<xsl:choose>
<xsl:when test=".=10">
<xsl:value-of select="'\n'"/>
</xsl:when>
<xsl:when test=".=9">
<xsl:value-of select="'\t'"/>
</xsl:when>
<xsl:when test=".=13">
<xsl:value-of select="'\r'"/>
</xsl:when>
<!-- Keep ASCII characters -->
<xsl:when test=". ge 32 and . le 126">
<xsl:variable name="char" select="codepoints-to-string(.)"/>
<xsl:if test="$char='\' or $char='&quot;'">
<xsl:text>\</xsl:text>
</xsl:if>
<xsl:value-of select="$char"/>
</xsl:when>
<!-- Convert non-ASCII to \uXXXX -->
<xsl:when test=". le 65535">
<xsl:value-of select="concat('\u', eo:int-to-hex(xs:int(.)))"/>
</xsl:when>
<!-- Handle surrogate pairs for code points above U+FFFF -->
<xsl:otherwise>
<!-- 55296 = 0xD800 -->
<xsl:variable name="cp1" select="xs:int(floor((. - 65536) div 1024) + 55296)"/>
<!-- 56320 = 0xDC00 -->
<xsl:variable name="cp2" select="xs:int(((. - 65536) mod 1024) + 56320)"/>
<xsl:value-of select="concat('\u', eo:int-to-hex($cp1), '\u', eo:int-to-hex($cp2))"/>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each>
</xsl:variable>
<xsl:sequence select="$decoded"/>
</xsl:otherwise>
</xsl:choose>
</xsl:function>
<!-- BYTES TO NUMBER, e.g. 40-14-00-00-00-00-00-00 => 5 -->
<xsl:function name="eo:bytes-to-number" as="xs:anyAtomicType">
<xsl:param name="bytes"/>
<!-- Undash -->
<xsl:variable name="hex" select="translate($bytes, '-', '')"/>
<xsl:variable name="map" as="element()*">
<entry h="0" b="0000"/>
<entry h="1" b="0001"/>
<entry h="2" b="0010"/>
<entry h="3" b="0011"/>
<entry h="4" b="0100"/>
<entry h="5" b="0101"/>
<entry h="6" b="0110"/>
<entry h="7" b="0111"/>
<entry h="8" b="1000"/>
<entry h="9" b="1001"/>
<entry h="A" b="1010"/>
<entry h="B" b="1011"/>
<entry h="C" b="1100"/>
<entry h="D" b="1101"/>
<entry h="E" b="1110"/>
<entry h="F" b="1111"/>
</xsl:variable>
<xsl:variable name="bin" as="xs:string" select="string-join(for $c in string-to-codepoints(upper-case($hex)) return $map[@h = codepoints-to-string($c)]/@b, '')"/>
<!-- Sign bit (1 for negative, 0 for positive) -->
<xsl:variable name="sign" select="if (substring($bin, 1, 1) = '1') then -1 else 1"/>
<!-- Extract exponent (11 bits) and convert to integer -->
<xsl:variable name="exponentBits" select="substring($bin, 2, 11)"/>
<xsl:variable name="exponent" select="sum(for $i in 1 to string-length($exponentBits) return xs:double(substring($exponentBits, $i, 1)) * math:pow(2, string-length($exponentBits) - $i)) - 1023"/>
<!-- Extract mantissa (52 bits) -->
<xsl:variable name="mantissaBits" select="substring($bin, 13, 52)"/>
<xsl:variable name="mantissaValue">
<xsl:sequence select="sum(for $i in 1 to string-length($mantissaBits) return xs:double(substring($mantissaBits, $i, 1)) div math:pow(2, $i))"/>
</xsl:variable>
<!-- Compute final double value -->
<xsl:sequence select="$sign * (1 + $mantissaValue) * math:pow(2, $exponent)"/>
</xsl:function>
<!-- HELPER FUNCTIONS -->
<!-- Function to decode UTF-8 bytes into Unicode code points -->
<xsl:function name="eo:decode-bytes" as="xs:integer*">
<xsl:param name="bytes" as="xs:integer*"/>
<xsl:choose>
<!-- 1-byte sequence: 0xxxxxxx -->
<xsl:when test="$bytes[1] lt 128">
<xsl:sequence select="$bytes[1]"/>
<xsl:sequence select="eo:decode-bytes(subsequence($bytes, 2))"/>
</xsl:when>
<!-- 2-byte sequence: 110xxxxx 10xxxxxx -->
<xsl:when test="$bytes[1] ge 192 and $bytes[1] lt 224">
<xsl:variable name="code-point" select="(($bytes[1] - 192) * 64) + ($bytes[2] - 128)"/>
<xsl:sequence select="$code-point"/>
<xsl:sequence select="eo:decode-bytes(subsequence($bytes, 3))"/>
</xsl:when>
<!-- 3-byte sequence: 1110xxxx 10xxxxxx 10xxxxxx -->
<xsl:when test="$bytes[1] ge 224 and $bytes[1] lt 240">
<xsl:variable name="code-point" select="(($bytes[1] - 224) * 4096) + (($bytes[2] - 128) * 64) + ($bytes[3] - 128)"/>
<xsl:sequence select="$code-point"/>
<xsl:sequence select="eo:decode-bytes(subsequence($bytes, 4))"/>
</xsl:when>
<!-- 4-byte sequence: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx -->
<xsl:when test="$bytes[1] ge 240 and $bytes[1] lt 248">
<xsl:variable name="code-point" select="(($bytes[1] - 240) * 262144) + (($bytes[2] - 128) * 4096) + (($bytes[3] - 128) * 64) + ($bytes[4] - 128)"/>
<xsl:sequence select="$code-point"/>
<xsl:sequence select="eo:decode-bytes(subsequence($bytes, 5))"/>
</xsl:when>
<!-- Otherwise, return empty (should not occur if input is valid UTF-8) -->
<xsl:otherwise/>
</xsl:choose>
</xsl:function>
<!-- Function to convert integer to 4-digit hex string -->
<xsl:function name="eo:int-to-hex" as="xs:string">
<xsl:param name="value" as="xs:integer"/>
<xsl:variable name="hex-chars" select="'0123456789ABCDEF'"/>
<xsl:variable name="hex" select="concat(substring($hex-chars, floor($value idiv 4096) + 1, 1), substring($hex-chars, floor(($value mod 4096) idiv 256) + 1, 1), substring($hex-chars, floor(($value mod 256) idiv 16) + 1, 1), substring($hex-chars, ($value mod 16) + 1, 1))"/>
<xsl:sequence select="$hex"/>
</xsl:function>
<xsl:function name="eo:hex-to-utf8" as="xs:integer">
<xsl:param name="hex" as="xs:string"/>
<xsl:variable name="hex-upper" select="upper-case(normalize-space($hex))"/>
<xsl:variable name="length" select="string-length($hex-upper)"/>
<xsl:variable name="hex-digits" select="string-to-codepoints('0123456789ABCDEF')"/>
<xsl:variable name="decimal" select="sum(for $i in 1 to $length return (index-of($hex-digits, string-to-codepoints(substring($hex-upper, $i, 1))) - 1) * math:pow(16, $length - $i))"/>
<xsl:value-of select="xs:int($decimal)"/>
</xsl:function>
</xsl:stylesheet>
Loading

0 comments on commit c9ff38c

Please sign in to comment.