From ed923efcf9e8c724ae2b22354d16f1fabb496599 Mon Sep 17 00:00:00 2001 From: fmizzell Date: Fri, 13 Sep 2019 15:29:33 -0500 Subject: [PATCH] Try fetching files from incompatible servers. (#5) --- composer.json | 2 +- src/FileFetcher.php | 51 +++++++++++++++++++++++++++++++++++----- test/FileFetcherTest.php | 23 ++++++++++++++---- 3 files changed, 65 insertions(+), 11 deletions(-) diff --git a/composer.json b/composer.json index a2b6850..c74e4e6 100644 --- a/composer.json +++ b/composer.json @@ -17,7 +17,7 @@ } }, "require": { - "getdkan/procrastinator": "~1.0.2", + "getdkan/procrastinator": "~2.0.0", "ext-curl": "*" } } diff --git a/src/FileFetcher.php b/src/FileFetcher.php index cc89ef3..26ea000 100644 --- a/src/FileFetcher.php +++ b/src/FileFetcher.php @@ -9,6 +9,7 @@ class FileFetcher extends Job { private $temporaryDirectory; private $chunkSizeInBytes = (1024 * 100); + private $compatibleServer = true; public function __construct($filePath, $temporaryDirectory = "/tmp") { @@ -27,11 +28,13 @@ public function __construct($filePath, $temporaryDirectory = "/tmp") $state['destination'] = $file->isFile() ? $filePath : $this->getTemporaryFilePath($filePath); if (!$file->isFile() && $this->serverIsNotCompatible($filePath)) { - throw new \Exception("The server hosting the file does not support ranged requests."); + $this->compatibleServer = false; + $state['total_bytes'] = PHP_INT_MAX; + $this->deleteFile($state['destination']); + } else { + $state['total_bytes'] = $file->isFile() ? $file->getSize() : $this->getRemoteFileSize($filePath); } - $state['total_bytes'] = $file->isFile() ? $file->getSize() : $this->getRemoteFileSize($filePath); - if (file_exists($state['destination'])) { $state['total_bytes_copied'] = filesize($state['destination']); } @@ -39,10 +42,22 @@ public function __construct($filePath, $temporaryDirectory = "/tmp") $this->setState($state); } + public function setTimeLimit(int $seconds): bool + { + if (!$this->compatibleServer) { + return false; + } + return parent::setTimeLimit($seconds); + } + protected function runIt() { try { - $this->copy(); + if ($this->compatibleServer) { + $this->copy(); + } else { + $this->copyIncompatible(); + } $result = $this->getResult(); $result->setStatus(Result::DONE); } catch (FileCopyInterruptedException $e) { @@ -149,6 +164,23 @@ private function copy() } } + private function copyIncompatible() + { + $from = $this->getStateProperty('source'); + $to = $this->getStateProperty('destination'); + + $bufferSize = 1048576; + $bytesCopied = 0; + $fin = fopen($from, "rb"); + $fout = fopen($to, "w"); + while (!feof($fin)) { + $bytesCopied += fwrite($fout, fread($fin, $bufferSize)); + } + fclose($fin); + fclose($fout); + $this->setStateProperty('total_bytes_copied', $bytesCopied); + } + private function getChunk() { $url = $this->getStateProperty('source'); @@ -175,8 +207,8 @@ private function getChunk() private function getTemporaryFilePath($sourceFileUrl) { - $pieces = explode("/", $sourceFileUrl); - $file_name = end($pieces); + $info = parse_url($sourceFileUrl); + $file_name = str_replace(".", "_", $info["host"]) . str_replace("/", "_", $info['path']); return $this->getTemporaryFile($file_name); } @@ -218,6 +250,13 @@ private function setState($state) $this->getResult()->setData(json_encode($state)); } + private function deleteFile($file) + { + if (file_exists($file)) { + unlink($file); + } + } + public function setStateProperty($property, $value) { $state = $this->getState(); diff --git a/test/FileFetcherTest.php b/test/FileFetcherTest.php index 30a35c6..fd405c2 100644 --- a/test/FileFetcherTest.php +++ b/test/FileFetcherTest.php @@ -2,6 +2,9 @@ namespace FileFetcherTest; +use FileFetcher\FileFetcher; +use Procrastinator\Result; + class FileFetcherTest extends \PHPUnit\Framework\TestCase { @@ -15,7 +18,8 @@ public function testRemote() ); $result = $fetcher->run(); $data = json_decode($result->getData()); - $this->assertEquals("/tmp/sacramentorealestatetransactions.csv", $data->destination); + $filepath = "/tmp/samplecsvs_s3_amazonaws_com_sacramentorealestatetransactions.csv"; + $this->assertEquals($filepath, $data->destination); $this->assertTrue($data->temporary); } @@ -50,18 +54,29 @@ public function testTimeOut() $fetcher2->run(); $this->assertEquals($file_size, $fetcher2->getStateProperty('total_bytes_copied')); $this->assertEquals( - filesize("/tmp/{$this->sampleCsvSize}_mb_sample.csv"), + filesize("/tmp/dkan_default_content_files_s3_amazonaws_com_{$this->sampleCsvSize}_mb_sample.csv"), $fetcher2->getStateProperty('total_bytes_copied') ); $this->assertEquals($fetcher2->getResult()->getStatus(), \Procrastinator\Result::DONE); } + public function testIncompatibleServer() + { + $url = "https://data.medicare.gov/api/views/42wc-33ci/rows.csv?accessType=DOWNLOAD&sorting=true"; + $fetcher = new FileFetcher($url); + $fetcher->setTimeLimit(1); + $result = $fetcher->run(); + $this->assertEquals(Result::DONE, $result->getStatus()); + $this->assertEquals(2853, json_decode($result->getData())->total_bytes_copied); + } + public function tearDown(): void { parent::tearDown(); $files = [ - "/tmp/{$this->sampleCsvSize}_mb_sample.csv", - "/tmp/sacramentorealestatetransactions.csv" + "/tmp/samplecsvs_s3_amazonaws_com_sacramentorealestatetransactions.csv", + "/tmp/dkan_default_content_files_s3_amazonaws_com_{$this->sampleCsvSize}_mb_sample.csv", + "/tmp/data_medicare_gov_api_views_42wc_33ci_rows.csv", ]; foreach ($files as $file) {