Skip to content

Commit

Permalink
Try fetching files from incompatible servers. (#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
fmizzell authored Sep 13, 2019
1 parent 3193983 commit ed923ef
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 11 deletions.
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
}
},
"require": {
"getdkan/procrastinator": "~1.0.2",
"getdkan/procrastinator": "~2.0.0",
"ext-curl": "*"
}
}
51 changes: 45 additions & 6 deletions src/FileFetcher.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class FileFetcher extends Job
{
private $temporaryDirectory;
private $chunkSizeInBytes = (1024 * 100);
private $compatibleServer = true;

public function __construct($filePath, $temporaryDirectory = "/tmp")
{
Expand All @@ -27,22 +28,36 @@ public function __construct($filePath, $temporaryDirectory = "/tmp")
$state['destination'] = $file->isFile() ? $filePath : $this->getTemporaryFilePath($filePath);

if (!$file->isFile() && $this->serverIsNotCompatible($filePath)) {
throw new \Exception("The server hosting the file does not support ranged requests.");
$this->compatibleServer = false;
$state['total_bytes'] = PHP_INT_MAX;
$this->deleteFile($state['destination']);
} else {
$state['total_bytes'] = $file->isFile() ? $file->getSize() : $this->getRemoteFileSize($filePath);
}

$state['total_bytes'] = $file->isFile() ? $file->getSize() : $this->getRemoteFileSize($filePath);

if (file_exists($state['destination'])) {
$state['total_bytes_copied'] = filesize($state['destination']);
}

$this->setState($state);
}

public function setTimeLimit(int $seconds): bool
{
if (!$this->compatibleServer) {
return false;
}
return parent::setTimeLimit($seconds);
}

protected function runIt()
{
try {
$this->copy();
if ($this->compatibleServer) {
$this->copy();
} else {
$this->copyIncompatible();
}
$result = $this->getResult();
$result->setStatus(Result::DONE);
} catch (FileCopyInterruptedException $e) {
Expand Down Expand Up @@ -149,6 +164,23 @@ private function copy()
}
}

private function copyIncompatible()
{
$from = $this->getStateProperty('source');
$to = $this->getStateProperty('destination');

$bufferSize = 1048576;
$bytesCopied = 0;
$fin = fopen($from, "rb");
$fout = fopen($to, "w");
while (!feof($fin)) {
$bytesCopied += fwrite($fout, fread($fin, $bufferSize));
}
fclose($fin);
fclose($fout);
$this->setStateProperty('total_bytes_copied', $bytesCopied);
}

private function getChunk()
{
$url = $this->getStateProperty('source');
Expand All @@ -175,8 +207,8 @@ private function getChunk()

private function getTemporaryFilePath($sourceFileUrl)
{
$pieces = explode("/", $sourceFileUrl);
$file_name = end($pieces);
$info = parse_url($sourceFileUrl);
$file_name = str_replace(".", "_", $info["host"]) . str_replace("/", "_", $info['path']);
return $this->getTemporaryFile($file_name);
}

Expand Down Expand Up @@ -218,6 +250,13 @@ private function setState($state)
$this->getResult()->setData(json_encode($state));
}

private function deleteFile($file)
{
if (file_exists($file)) {
unlink($file);
}
}

public function setStateProperty($property, $value)
{
$state = $this->getState();
Expand Down
23 changes: 19 additions & 4 deletions test/FileFetcherTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

namespace FileFetcherTest;

use FileFetcher\FileFetcher;
use Procrastinator\Result;

class FileFetcherTest extends \PHPUnit\Framework\TestCase
{

Expand All @@ -15,7 +18,8 @@ public function testRemote()
);
$result = $fetcher->run();
$data = json_decode($result->getData());
$this->assertEquals("/tmp/sacramentorealestatetransactions.csv", $data->destination);
$filepath = "/tmp/samplecsvs_s3_amazonaws_com_sacramentorealestatetransactions.csv";
$this->assertEquals($filepath, $data->destination);
$this->assertTrue($data->temporary);
}

Expand Down Expand Up @@ -50,18 +54,29 @@ public function testTimeOut()
$fetcher2->run();
$this->assertEquals($file_size, $fetcher2->getStateProperty('total_bytes_copied'));
$this->assertEquals(
filesize("/tmp/{$this->sampleCsvSize}_mb_sample.csv"),
filesize("/tmp/dkan_default_content_files_s3_amazonaws_com_{$this->sampleCsvSize}_mb_sample.csv"),
$fetcher2->getStateProperty('total_bytes_copied')
);
$this->assertEquals($fetcher2->getResult()->getStatus(), \Procrastinator\Result::DONE);
}

public function testIncompatibleServer()
{
$url = "https://data.medicare.gov/api/views/42wc-33ci/rows.csv?accessType=DOWNLOAD&sorting=true";
$fetcher = new FileFetcher($url);
$fetcher->setTimeLimit(1);
$result = $fetcher->run();
$this->assertEquals(Result::DONE, $result->getStatus());
$this->assertEquals(2853, json_decode($result->getData())->total_bytes_copied);
}

public function tearDown(): void
{
parent::tearDown();
$files = [
"/tmp/{$this->sampleCsvSize}_mb_sample.csv",
"/tmp/sacramentorealestatetransactions.csv"
"/tmp/samplecsvs_s3_amazonaws_com_sacramentorealestatetransactions.csv",
"/tmp/dkan_default_content_files_s3_amazonaws_com_{$this->sampleCsvSize}_mb_sample.csv",
"/tmp/data_medicare_gov_api_views_42wc_33ci_rows.csv",
];

foreach ($files as $file) {
Expand Down

0 comments on commit ed923ef

Please sign in to comment.