From bbea298da615a6fca8a06b8a108b4a56d9417e1e Mon Sep 17 00:00:00 2001 From: Michael Barry Date: Tue, 2 Jul 2024 07:33:20 -0400 Subject: [PATCH] Handle no content length in downloader (#944) --- .../planetiler/util/Downloader.java | 39 +++++++++------ .../planetiler/util/DownloaderTest.java | 47 ++++++++++++------- 2 files changed, 53 insertions(+), 33 deletions(-) diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Downloader.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Downloader.java index 43e10961e3..7bad09b2d4 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Downloader.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Downloader.java @@ -24,6 +24,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Optional; +import java.util.OptionalLong; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; @@ -165,7 +166,7 @@ public void run() { for (var toDownload : toDownloadList) { try { - long size = toDownload.metadata.get(10, TimeUnit.SECONDS).size; + long size = toDownload.metadata.get(10, TimeUnit.SECONDS).size.orElse(0); loggers.addStorageRatePercentCounter(toDownload.id, size, toDownload::bytesDownloaded, true); } catch (InterruptedException e) { Thread.currentThread().interrupt(); @@ -183,14 +184,19 @@ CompletableFuture downloadIfNecessary(ResourceToDownload resourceToDownloa return CompletableFuture.runAsync(RunnableThatThrows.wrap(() -> { LogUtil.setStage("download", resourceToDownload.id); long existingSize = FileUtils.size(resourceToDownload.output); - var metadata = httpHeadFollowRedirects(resourceToDownload.url, 0); - Path tmpPath = resourceToDownload.tmpPath(); - resourceToDownload.metadata.complete(metadata); - if (metadata.size == existingSize) { - LOGGER.info("Skipping {}: {} already up-to-date", resourceToDownload.id, resourceToDownload.output); - return; + try { + resourceToDownload.metadata.complete(httpHeadFollowRedirects(resourceToDownload.url, 0)); + } catch (Exception e) { + resourceToDownload.metadata.completeExceptionally(e); + throw e; } + Path tmpPath = resourceToDownload.tmpPath(); try { + var metadata = resourceToDownload.metadata.get(); + if (metadata.size.orElse(-1) == existingSize) { + LOGGER.info("Skipping {}: {} already up-to-date", resourceToDownload.id, resourceToDownload.output); + return; + } String redirectInfo = metadata.canonicalUrl.equals(resourceToDownload.url) ? "" : " (redirected to " + metadata.canonicalUrl + ")"; LOGGER.info("Downloading {}{} to {}", resourceToDownload.url, redirectInfo, resourceToDownload.output); @@ -198,7 +204,9 @@ CompletableFuture downloadIfNecessary(ResourceToDownload resourceToDownloa FileUtils.createParentDirectories(resourceToDownload.output); FileUtils.delete(tmpPath); FileUtils.deleteOnExit(tmpPath); - diskSpaceCheck.addDisk(tmpPath, metadata.size, resourceToDownload.id); + if (metadata.size.isPresent()) { + diskSpaceCheck.addDisk(tmpPath, metadata.size.getAsLong(), resourceToDownload.id); + } diskSpaceCheck.checkAgainstLimits(config.force(), false); httpDownload(resourceToDownload, tmpPath); Files.move(tmpPath, resourceToDownload.output); @@ -225,7 +233,7 @@ ResourceMetadata httpHead(String url) throws IOException, InterruptedException { responseInfo -> { int status = responseInfo.statusCode(); Optional location = Optional.empty(); - long contentLength = 0; + OptionalLong contentLength = OptionalLong.empty(); HttpHeaders headers = responseInfo.headers(); if (status >= 300 && status < 400) { location = responseInfo.headers().firstValue(LOCATION); @@ -235,7 +243,7 @@ ResourceMetadata httpHead(String url) throws IOException, InterruptedException { } else if (responseInfo.statusCode() != 200) { throw new IllegalStateException("Bad response: " + responseInfo.statusCode()); } else { - contentLength = headers.firstValueAsLong(CONTENT_LENGTH).orElseThrow(); + contentLength = headers.firstValueAsLong(CONTENT_LENGTH); } boolean supportsRangeRequest = headers.allValues(ACCEPT_RANGES).contains("bytes"); ResourceMetadata metadata = new ResourceMetadata(location, url, contentLength, supportsRangeRequest); @@ -250,12 +258,13 @@ private void httpDownload(ResourceToDownload resource, Path tmpPath) record Range(long start, long end) {} List chunks = new ArrayList<>(); boolean ranges = metadata.acceptRange && config.downloadThreads() > 1; - long chunkSize = ranges ? chunkSizeBytes : metadata.size; - for (long start = 0; start < metadata.size; start += chunkSize) { - long end = Math.min(start + chunkSize, metadata.size); + long fileSize = metadata.size.orElse(Long.MAX_VALUE); + long chunkSize = ranges ? chunkSizeBytes : fileSize; + for (long start = 0; start < fileSize; start += chunkSize) { + long end = Math.min(start + chunkSize, fileSize); chunks.add(new Range(start, end)); } - FileUtils.setLength(tmpPath, metadata.size); + FileUtils.setLength(tmpPath, metadata.size.orElse(1)); Semaphore perFileLimiter = new Semaphore(config.downloadThreads()); Worker.joinFutures(chunks.stream().map(range -> CompletableFuture.runAsync(RunnableThatThrows.wrap(() -> { LogUtil.setStage("download", resource.id); @@ -299,7 +308,7 @@ private HttpRequest.Builder newHttpRequest(String url) { .header(USER_AGENT, config.httpUserAgent()); } - record ResourceMetadata(Optional redirect, String canonicalUrl, long size, boolean acceptRange) {} + record ResourceMetadata(Optional redirect, String canonicalUrl, OptionalLong size, boolean acceptRange) {} record ResourceToDownload( String id, String url, Path output, CompletableFuture metadata, diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/DownloaderTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/DownloaderTest.java index f5b285b265..37ff10b7e0 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/DownloaderTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/DownloaderTest.java @@ -10,6 +10,7 @@ import java.nio.file.Path; import java.util.Map; import java.util.Optional; +import java.util.OptionalLong; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicLong; @@ -25,7 +26,8 @@ class DownloaderTest { private final PlanetilerConfig config = PlanetilerConfig.defaults(); private AtomicLong downloads = new AtomicLong(0); - private Downloader mockDownloader(Map resources, boolean supportsRange) { + private Downloader mockDownloader(Map resources, boolean supportsRange, + boolean supportsContentLength) { return new Downloader(config, 2L) { @Override @@ -55,23 +57,30 @@ ResourceMetadata httpHead(String url) { if (parts.length > 1) { int redirectNum = Integer.parseInt(parts[1]); String next = redirectNum <= 1 ? parts[0] : (parts[0] + "#" + (redirectNum - 1)); - return new ResourceMetadata(Optional.of(next), url, 0, supportsRange); + return new ResourceMetadata(Optional.of(next), url, + supportsContentLength ? OptionalLong.of(0) : OptionalLong.empty(), supportsRange); } byte[] bytes = resources.get(url); - return new ResourceMetadata(Optional.empty(), url, bytes.length, supportsRange); + return new ResourceMetadata(Optional.empty(), url, + supportsContentLength ? OptionalLong.of(bytes.length) : OptionalLong.empty(), supportsRange); } }; } @ParameterizedTest @CsvSource({ - "false,0", - "true,0", - "false,1", - "false,2", - "true,4", + "false,0,true", + "true,0,true", + "false,1,true", + "false,2,true", + "true,4,true", + + "false,0,false", + "true,0,false", + "false,1,false", + "true,1,false", }) - void testDownload(boolean range, int redirects) throws Exception { + void testDownload(boolean range, int redirects, boolean supportsContentLength) throws Exception { Path dest = path.resolve("out"); String string = "0123456789"; String url = "http://url"; @@ -79,7 +88,7 @@ void testDownload(boolean range, int redirects) throws Exception { Map resources = new ConcurrentHashMap<>(); byte[] bytes = string.getBytes(StandardCharsets.UTF_8); - Downloader downloader = mockDownloader(resources, range); + Downloader downloader = mockDownloader(resources, range, supportsContentLength); // fails if no data var resource1 = new Downloader.ResourceToDownload("resource", initialUrl, dest); @@ -96,13 +105,15 @@ void testDownload(boolean range, int redirects) throws Exception { assertEquals(10, resource2.bytesDownloaded()); // does not re-request if size is the same - downloads.set(0); - var resource3 = new Downloader.ResourceToDownload("resource", initialUrl, dest); - downloader.downloadIfNecessary(resource3).get(); - assertEquals(0, downloads.get()); - assertEquals(string, Files.readString(dest)); - assertEquals(FileUtils.size(path), FileUtils.size(dest)); - assertEquals(0, resource3.bytesDownloaded()); + if (supportsContentLength) { + downloads.set(0); + var resource3 = new Downloader.ResourceToDownload("resource", initialUrl, dest); + downloader.downloadIfNecessary(resource3).get(); + assertEquals(0, downloads.get()); + assertEquals(string, Files.readString(dest)); + assertEquals(FileUtils.size(path), FileUtils.size(dest)); + assertEquals(0, resource3.bytesDownloaded()); + } // does re-download if size changes var resource4 = new Downloader.ResourceToDownload("resource", initialUrl, dest); @@ -131,7 +142,7 @@ InputStream openStreamRange(String url, long start, long end) { @Override ResourceMetadata httpHead(String url) { - return new ResourceMetadata(Optional.empty(), url, Long.MAX_VALUE, true); + return new ResourceMetadata(Optional.empty(), url, OptionalLong.of(Long.MAX_VALUE), true); } };