Skip to content

Commit

Permalink
fix windows glob matching
Browse files Browse the repository at this point in the history
  • Loading branch information
msbarry committed May 19, 2024
1 parent b50ac5d commit 50ad84d
Show file tree
Hide file tree
Showing 9 changed files with 153 additions and 126 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -478,8 +478,7 @@ public Planetiler addNaturalEarthSource(String name, Path defaultPath, String de
* {@link #run()} is called.
*
* @param name string to use in stats and logs to identify this stage
* @param pattern path to the geoparquet file to read, possibly including
* {@linkplain FileSystem#getPathMatcher(String) glob patterns}
* @param paths paths to the geoparquet files to read.
* @param hivePartitioning Set to true to parse extra feature tags from the file path, for example
* {@code {them="buildings", type="part"}} from
* {@code base/theme=buildings/type=part/file.parquet}
Expand All @@ -490,31 +489,32 @@ public Planetiler addNaturalEarthSource(String name, Path defaultPath, String de
* @return this runner instance for chaining
* @see GeoPackageReader
*/
public Planetiler addParquetSource(String name, Path pattern, boolean hivePartitioning,
public Planetiler addParquetSource(String name, List<Path> paths, boolean hivePartitioning,
Function<Map<String, Object>, Object> getId, Function<Map<String, Object>, Object> getLayer) {
// TODO handle auto-downloading
Path path = getPath(name, "parquet", pattern, null, true);
return addStage(name, "Process features in " + path, ifSourceUsed(name, () -> {
var sourcePaths = FileUtils.walkPathWithPattern(path).stream().filter(Files::isRegularFile).toList();
new ParquetReader(name, profile, stats, getId, getLayer, hivePartitioning).process(sourcePaths, featureGroup,
for (var path : paths) {
inputPaths.add(new InputPath(name, path, false));
}
return addStage(name, "Process features in " + paths, ifSourceUsed(name, () -> {

Check warning on line 498 in planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java

View workflow job for this annotation

GitHub Actions / Analyze with Sonar

MINOR CODE_SMELL

Remove useless curly braces around statement rule: java:S1602 (https://sonarcloud.io/organizations/onthegomap/rules?open=java%3AS1602&rule_key=java%3AS1602) issue url: https://sonarcloud.io/project/issues?pullRequest=888&open=AY-SazMs_nuKm1h1lzJj&id=onthegomap_planetiler
new ParquetReader(name, profile, stats, getId, getLayer, hivePartitioning).process(paths, featureGroup,
config);
}));
}

/**
* Alias for {@link #addParquetSource(String, Path, boolean, Function, Function)} using the default layer and ID
* Alias for {@link #addParquetSource(String, List, boolean, Function, Function)} using the default layer and ID
* extractors.
*/
public Planetiler addParquetSource(String name, Path pattern, boolean hivePartitioning) {
return addParquetSource(name, pattern, hivePartitioning, null, null);
public Planetiler addParquetSource(String name, List<Path> paths, boolean hivePartitioning) {
return addParquetSource(name, paths, hivePartitioning, null, null);
}

/**
* Alias for {@link #addParquetSource(String, Path, boolean, Function, Function)} without hive partitioning and using
* Alias for {@link #addParquetSource(String, List, boolean, Function, Function)} without hive partitioning and using
* the default layer and ID extractors.
*/
public Planetiler addParquetSource(String name, Path pattern) {
return addParquetSource(name, pattern, false);
public Planetiler addParquetSource(String name, List<Path> paths) {
return addParquetSource(name, paths, false);
}

/**
Expand Down Expand Up @@ -818,7 +818,7 @@ public void run() throws Exception {
for (var inputPath : inputPaths) {
if (inputPath.freeAfterReading()) {
LOGGER.info("Deleting {} ({}) to make room for output file", inputPath.id, inputPath.path);
inputPath.delete();
FileUtils.delete(inputPath.path());
}
}

Expand Down Expand Up @@ -858,7 +858,7 @@ private void checkDiskSpace() {
// if the user opts to remove an input source after reading to free up additional space for the output...
for (var input : inputPaths) {
if (input.freeAfterReading()) {
writePhase.addDisk(input.path, -input.size(), "delete " + input.id + " source after reading");
writePhase.addDisk(input.path, -FileUtils.size(input.path), "delete " + input.id + " source after reading");
}
}

Expand Down Expand Up @@ -941,23 +941,18 @@ private RunnableThatThrows ifSourceUsed(String name, RunnableThatThrows task) {
}

private Path getPath(String name, String type, Path defaultPath, String defaultUrl) {
return getPath(name, type, defaultPath, defaultUrl, false);
}

private Path getPath(String name, String type, Path defaultPath, String defaultUrl, boolean wildcard) {
Path path = arguments.file(name + "_path", name + " " + type + " path", defaultPath);
boolean refresh =
arguments.getBoolean("refresh_" + name, "Download new version of " + name + " if changed", refreshSources);
boolean freeAfterReading = arguments.getBoolean("free_" + name + "_after_read",
"delete " + name + " input file after reading to make space for output (reduces peak disk usage)", false);
var inputPath = new InputPath(name, path, freeAfterReading, wildcard);
inputPaths.add(inputPath);
if (downloadSources || refresh) {
String url = arguments.getString(name + "_url", name + " " + type + " url", defaultUrl);
if ((refresh || inputPath.isEmpty()) && url != null) {
toDownload.add(new ToDownload(name, url, path, wildcard));
if ((!Files.exists(path) || refresh) && url != null) {
toDownload.add(new ToDownload(name, url, path));
}
}
inputPaths.add(new InputPath(name, path, freeAfterReading));
return path;
}

Expand All @@ -975,7 +970,7 @@ private void download() {

private void ensureInputFilesExist() {
for (InputPath inputPath : inputPaths) {
if (profile.caresAboutSource(inputPath.id) && inputPath.isEmpty()) {
if (profile.caresAboutSource(inputPath.id) && !Files.exists(inputPath.path)) {
throw new IllegalArgumentException(inputPath.path + " does not exist. Run with --download to fetch it");
}
}
Expand All @@ -988,24 +983,7 @@ private record Stage(String id, List<String> details, RunnableThatThrows task) {
}
}

private record ToDownload(String id, String url, Path path, boolean wildcard) {}

private record InputPath(String id, Path path, boolean freeAfterReading, boolean wildcard) {

public boolean isEmpty() {
return wildcard ? FileUtils.walkPathWithPattern(path).isEmpty() : !Files.exists(path);
}

public long size() {
return wildcard ? FileUtils.size(FileUtils.getPatternBase(path)) : FileUtils.fileSize(path);
}
private record ToDownload(String id, String url, Path path) {}

public void delete() {
if (wildcard) {
FileUtils.delete(FileUtils.getPatternBase(path));
} else {
FileUtils.delete(path);
}
}
}
private record InputPath(String id, Path path, boolean freeAfterReading) {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import java.util.List;
import java.util.Objects;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import java.util.zip.ZipEntry;
Expand All @@ -43,7 +42,6 @@ public class FileUtils {
private static final double ZIP_THRESHOLD_RATIO = 1_000;

private static final Logger LOGGER = LoggerFactory.getLogger(FileUtils.class);
private static final Pattern GLOB_PATTERN = Pattern.compile("[?*{\\[].*$");

private FileUtils() {}

Expand Down Expand Up @@ -115,38 +113,6 @@ public static List<Path> walkPathWithPattern(Path basePath, String pattern) {
return walkPathWithPattern(basePath, pattern, List::of);
}

/**
* Returns list of paths matching {@param pathWithPattern} where {@param pathWithPattern} can contain glob patterns.
*
* @param pathWithPattern path that can contain glob patterns
*/
public static List<Path> walkPathWithPattern(Path pathWithPattern) {
var parsed = parsePattern(pathWithPattern);
return parsed.pattern == null ? List.of(parsed.base) : walkPathWithPattern(parsed.base, parsed.pattern, List::of);
}


/**
* Returns list of base of {@param pathWithPattern} before any glob patterns.
*/
public static Path getPatternBase(Path pathWithPattern) {
return parsePattern(pathWithPattern).base;
}

static BaseWithPattern parsePattern(Path pattern) {
String string = pattern.toString();
var matcher = GLOB_PATTERN.matcher(string);
if (!matcher.find()) {
return new BaseWithPattern(pattern, null);
}
matcher.reset();
String base = matcher.replaceAll("");
int idx = base.lastIndexOf(pattern.getFileSystem().getSeparator());
if (idx > 0) {
base = base.substring(0, idx);
}
return new BaseWithPattern(Path.of(base), string.substring(idx + 1));
}

/** Returns true if {@code path} ends with ".extension" (case-insensitive). */
public static boolean hasExtension(Path path, String extension) {
Expand Down Expand Up @@ -419,6 +385,4 @@ public static void setLength(Path path, long size) {
throw new UncheckedIOException(e);
}
}

record BaseWithPattern(Path base, String pattern) {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package com.onthegomap.planetiler.util;

import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;


/**
* Utility for constructing base+glob paths for matching many files
*/
public record Glob(Path base, String pattern) {

private static final Pattern GLOB_PATTERN = Pattern.compile("[?*{\\[].*$");

/** Wrap a base path with no globs in it yet. */
public static Glob of(Path path) {
return new Glob(path, null);
}

/** Resolves a subdirectory using parts separated by the platform file separator. */
public Glob resolve(String... subPath) {
String separator = base.getFileSystem().getSeparator();
if (pattern != null) {
return new Glob(base, pattern + separator + String.join(separator, subPath));
} else if (subPath == null || subPath.length == 0) {
return this;
} else if (GLOB_PATTERN.matcher(subPath[0]).find()) {
return new Glob(base, String.join(separator, subPath));
} else {
return of(base.resolve(subPath[0])).resolve(Arrays.copyOfRange(subPath, 1, subPath.length));
}
}

/** Parse a string containing platform-specific file separators into a base+glob pattern. */
public static Glob parse(String path) {
var matcher = GLOB_PATTERN.matcher(path);
if (!matcher.find()) {
return of(Path.of(path));
}
matcher.reset();
String base = matcher.replaceAll("");
String separator = Path.of(base).getFileSystem().getSeparator();
int idx = base.lastIndexOf(separator);
if (idx > 0) {
base = base.substring(0, idx);
}
return of(Path.of(base)).resolve(path.substring(idx + 1).split(Pattern.quote(separator)));
}

/** Search the filesystem for all files beneath {@link #base()} matching {@link #pattern()}. */
public List<Path> find() {
return pattern == null ? List.of(base) : FileUtils.walkPathWithPattern(base, pattern);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2266,7 +2266,7 @@ public void processFeature(SourceFeature source, FeatureCollector features) {
.setAttr("id", source.getString("id"));
}
})
.addParquetSource("parquet", TestUtils.pathToResource("parquet").resolve("boston.parquet"))
.addParquetSource("parquet", List.of(TestUtils.pathToResource("parquet").resolve("boston.parquet")))
.setOutput(mbtiles)
.run();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import com.onthegomap.planetiler.TestUtils;
import com.onthegomap.planetiler.config.Bounds;
import com.onthegomap.planetiler.util.FileUtils;
import com.onthegomap.planetiler.util.Glob;
import java.nio.file.Path;
import java.time.Instant;
import java.time.LocalDate;
Expand All @@ -28,7 +28,7 @@
class ParquetInputFileTest {

static List<Path> bostons() {
return FileUtils.walkPathWithPattern(TestUtils.pathToResource("parquet").resolve("boston*.parquet"));
return Glob.of(TestUtils.pathToResource("parquet")).resolve("boston*.parquet").find();
}

@ParameterizedTest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import com.onthegomap.planetiler.geo.TileOrder;
import com.onthegomap.planetiler.reader.SourceFeature;
import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.util.FileUtils;
import com.onthegomap.planetiler.util.Glob;
import com.onthegomap.planetiler.util.Parse;
import java.nio.file.Path;
import java.util.List;
Expand All @@ -32,7 +32,7 @@ class ParquetReaderTest {
private final Stats stats = Stats.inMemory();

static List<Path> bostons() {
return FileUtils.walkPathWithPattern(TestUtils.pathToResource("parquet").resolve("boston*.parquet"));
return Glob.of(TestUtils.pathToResource("parquet")).resolve("boston*.parquet").find();
}

@ParameterizedTest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
Expand All @@ -18,8 +17,6 @@
import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;

class FileUtilsTest {

Expand Down Expand Up @@ -123,7 +120,7 @@ void testWalkPathWithPatternDirectory() throws IOException {
matchingPaths.stream().sorted().toList()
);

matchingPaths = FileUtils.walkPathWithPattern(parent.resolve("*.txt"));
matchingPaths = Glob.of(parent).resolve("*.txt").find();

assertEquals(
txtFiles.stream().sorted().toList(),
Expand Down Expand Up @@ -152,7 +149,7 @@ void testWalkPathWithPatternDirectoryZip() throws IOException {
FileUtils.walkPathWithPattern(parent, "*.zip", mockWalkZipFile));


assertEquals(List.of(zipFile), FileUtils.walkPathWithPattern(parent.resolve("*.zip")));
assertEquals(List.of(zipFile), Glob.of(parent).resolve("*.zip").find());
}

@Test
Expand All @@ -165,7 +162,7 @@ void testWalkPathWithPatternSingleZip() {
List.of("/shapefile/stations.shp", "/shapefile/stations.shx"),
matchingPaths.stream().map(Path::toString).sorted().toList());

matchingPaths = FileUtils.walkPathWithPattern(zipPath.resolve("stations.sh[px]"));
matchingPaths = Glob.of(zipPath).resolve("stations.sh[px]").find();

assertEquals(
List.of("/shapefile/stations.shp", "/shapefile/stations.shx"),
Expand All @@ -178,38 +175,4 @@ void testExpandFile() throws IOException {
FileUtils.setLength(path, 1000);
assertEquals(1000, Files.size(path));
}

@ParameterizedTest
@CsvSource(value = {
"a/b/c; a/b/c;",
"a/b/*; a/b; *",
"a/*/b; a; */b",
"*/b/*; ; */b/*",
"/*/test; /; */test",
"a/b={c,d}/other; a; b={c,d}/other",
"./a/b=?/other; ./a; b=?/other",
}, delimiter = ';')
void testParsePathWithPattern(String input, String base, String pattern) {
var separator = FileSystems.getDefault().getSeparator();
input = input.replace("/", separator);
base = base == null ? "" : base.replace("/", separator);
pattern = pattern == null ? null : pattern.replace("/", separator);
assertEquals(
new FileUtils.BaseWithPattern(
Path.of(base),
pattern
),
FileUtils.parsePattern(Path.of(input))
);
}

@Test
void testWalkPathWithPattern() throws IOException {
var path = tmpDir.resolve("a").resolve("b").resolve("c.txt");
FileUtils.createParentDirectories(path);
Files.writeString(path, "test");
assertEquals(List.of(path), FileUtils.walkPathWithPattern(tmpDir.resolve(Path.of("a", "*", "c.txt"))));
assertEquals(List.of(path), FileUtils.walkPathWithPattern(tmpDir.resolve(Path.of("*", "*", "c.txt"))));
assertEquals(List.of(path), FileUtils.walkPathWithPattern(tmpDir.resolve(Path.of("a", "b", "c.txt"))));
}
}
Loading

0 comments on commit 50ad84d

Please sign in to comment.