From 23432a3add6a745a1789d5db591b299a1f452dac Mon Sep 17 00:00:00 2001 From: scivision Date: Tue, 1 Oct 2024 22:12:25 -0400 Subject: [PATCH] non-java normalize, more than 10x faster --- +stdlib/canonical.m | 2 +- +stdlib/normalize.m | 46 +++++++++++++++++++++++++++++++++++++++++-- +stdlib/relative_to.m | 4 ++-- 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/+stdlib/canonical.m b/+stdlib/canonical.m index 947e7fa..36127cf 100644 --- a/+stdlib/canonical.m +++ b/+stdlib/canonical.m @@ -45,7 +45,7 @@ else % for non-existing path, return normalized relative path % like C++ filesystem weakly_canonical() - c = stdlib.normalize(c); + c = stdlib.normalize(c, use_java); return end end diff --git a/+stdlib/normalize.m b/+stdlib/normalize.m index 2caac93..29d22ca 100644 --- a/+stdlib/normalize.m +++ b/+stdlib/normalize.m @@ -1,4 +1,4 @@ -function n = normalize(p) +function n = normalize(p, use_java) %% normalize(p) % normalize(p) remove redundant elements of path p % path need not exist, normalized path is returned @@ -10,9 +10,51 @@ % https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/nio/file/Path.html#normalize() arguments p (1,1) string + use_java (1,1) logical = false end -n = stdlib.posix(java.io.File(p).toPath().normalize()); +if use_java + n = stdlib.posix(java.io.File(p).toPath().normalize()); +else + + n = stdlib.posix(p); + + % use split to remove /../ and /./ and duplicated / + parts = split(n, "/"); + i0 = 1; + if startsWith(n, "/") + n = "/"; + elseif ispc && strlength(n) >= 2 && isletter(extractBetween(n, 1, 1)) && extractBetween(n, 2, 2) == ":" + n = parts(1); + i0 = 2; + else + n = ""; + end + + for i = i0:length(parts) + if parts(i) == ".." + if n == "" + n = parts(i); + elseif endsWith(n, "..") + n = n + "/" + parts(i); + else + j = strfind(n, "/"); + if isempty(j) + n = ""; + else + n = extractBefore(n, j(end)); + end + end + elseif all(parts(i) ~= [".", ""]) + if n == "" + n = parts(i); + else + n = n + "/" + parts(i); + end + end + end + +end if(strlength(n) == 0), n = "."; end diff --git a/+stdlib/relative_to.m b/+stdlib/relative_to.m index 56d94a4..6033363 100644 --- a/+stdlib/relative_to.m +++ b/+stdlib/relative_to.m @@ -5,8 +5,8 @@ end % must remove trailing slashes -base = stdlib.normalize(base); -other = stdlib.normalize(other); +base = stdlib.normalize(base, true); +other = stdlib.normalize(other, true); if base == other r = ".";