From 92de261d01c61981d9ce9df1ca14c2f20c532c5d Mon Sep 17 00:00:00 2001 From: Dan Feder Date: Wed, 31 Jul 2019 12:16:08 -0500 Subject: [PATCH] Remove functions and interface now in Job class (#1) --- composer.json | 5 +- src/FileCopyInterruptedException.php | 3 +- src/FileFetcher.php | 349 ++++++++++++++------------- test/FileFetcherTest.php | 121 +++++----- 4 files changed, 248 insertions(+), 230 deletions(-) diff --git a/composer.json b/composer.json index b130c6b..a2b6850 100644 --- a/composer.json +++ b/composer.json @@ -12,11 +12,12 @@ ], "autoload": { "psr-4": { - "FileFetcher\\": "src/" + "FileFetcher\\": "src/", + "FileFetcherTest\\": "test/" } }, "require": { - "getdkan/procrastinator": "~1.0.1", + "getdkan/procrastinator": "~1.0.2", "ext-curl": "*" } } diff --git a/src/FileCopyInterruptedException.php b/src/FileCopyInterruptedException.php index bf104a9..a234bfa 100644 --- a/src/FileCopyInterruptedException.php +++ b/src/FileCopyInterruptedException.php @@ -3,7 +3,6 @@ namespace FileFetcher; - class FileCopyInterruptedException extends \Exception { -} \ No newline at end of file +} diff --git a/src/FileFetcher.php b/src/FileFetcher.php index 513a57c..e41565b 100644 --- a/src/FileFetcher.php +++ b/src/FileFetcher.php @@ -5,177 +5,179 @@ use Procrastinator\Job\Job; use Procrastinator\Result; -class FileFetcher extends Job implements \JsonSerializable { +class FileFetcher extends Job +{ - private $chunkSizeInBytes = (1024 * 100); - private $timeLimit; + private $chunkSizeInBytes = (1024 * 100); + private $timeLimit; - public function __construct($filePath) - { - parent::__construct(); + public function __construct($filePath) + { + parent::__construct(); - $state = [ - 'source' => $filePath, - 'total_bytes_copied' => 0 - ]; + $state = [ + 'source' => $filePath, + 'total_bytes_copied' => 0 + ]; - $file = new \SplFileObject($filePath); + $file = new \SplFileObject($filePath); - $state['temporary'] = !$file->isFile(); - $state['destination'] = $file->isFile() ? $filePath : $this->getTemporaryFilePath($filePath); + $state['temporary'] = !$file->isFile(); + $state['destination'] = $file->isFile() ? $filePath : $this->getTemporaryFilePath($filePath); - if (!$file->isFile() && $this->serverIsNotCompatible($filePath)) { - throw new \Exception("The server hosting the file does not support ranged requests."); - } - - $state['total_bytes'] = $file->isFile() ? $file->getSize() : $this->getRemoteFileSize($filePath); - - $this->setState($state); - } - - public function setTimeLimit($seconds) - { - $this->timeLimit = $seconds; - } - - public function getState() { - return (array) json_decode($this->getResult()->getData()); - } + if (!$file->isFile() && $this->serverIsNotCompatible($filePath)) { + throw new \Exception("The server hosting the file does not support ranged requests."); + } - public function getStateProperty($property) { - return $this->getState()[$property]; - } + $state['total_bytes'] = $file->isFile() ? $file->getSize() : $this->getRemoteFileSize($filePath); - protected function runIt() - { - try { - $this->copy(); - $result = $this->getResult(); - $result->setStatus(Result::DONE); - } - catch (FileCopyInterruptedException $e) { - $result = $this->getResult(); - $result->setStatus(Result::STOPPED); + $this->setState($state); } - return $result; - } + public function setTimeLimit($seconds) + { + $this->timeLimit = $seconds; + } - private function serverIsNotCompatible($url) { - $headers = $this->getHeaders($url); + protected function runIt() + { + try { + $this->copy(); + $result = $this->getResult(); + $result->setStatus(Result::DONE); + } catch (FileCopyInterruptedException $e) { + $result = $this->getResult(); + $result->setStatus(Result::STOPPED); + } - if(!isset($headers['Accept-Ranges']) || !isset($headers['Content-Length'])) { - return TRUE; + return $result; } - return FALSE; - } + private function serverIsNotCompatible($url) + { + $headers = $this->getHeaders($url); - private function getRemoteFileSize($url) { - $headers = $this->getHeaders($url); - return $headers['Content-Length']; - } + if (!isset($headers['Accept-Ranges']) || !isset($headers['Content-Length'])) { + return true; + } - private function getHeaders($url) { - $ch = curl_init(); - curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); - curl_setopt ($ch, CURLOPT_URL, $url); - curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, 20); - curl_setopt($ch, CURLOPT_HEADER, true); - curl_setopt($ch, CURLOPT_NOBODY, true); + return false; + } - $headers = $this->parseHeaders(curl_exec ($ch)); - curl_close ($ch); - return $headers; - } + private function getRemoteFileSize($url) + { + $headers = $this->getHeaders($url); + return $headers['Content-Length']; + } - private function parseHeaders($string) { - $headers = []; - $lines = explode(PHP_EOL, $string); - foreach ($lines as $line) { - $line = trim($line); - $parts = explode(":", $line); - if (count($parts) > 1) { - $key = array_shift($parts); - $value = trim(implode(":", $parts)); - $headers[$key] = $value; - } - else { + private function getHeaders($url) + { + $ch = curl_init(); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 20); + curl_setopt($ch, CURLOPT_HEADER, true); + curl_setopt($ch, CURLOPT_NOBODY, true); + + $headers = $this->parseHeaders(curl_exec($ch)); + curl_close($ch); + return $headers; + } - if (!empty($value)) { - $headers[] = $value; + private function parseHeaders($string) + { + $headers = []; + $lines = explode(PHP_EOL, $string); + foreach ($lines as $line) { + $line = trim($line); + $parts = explode(":", $line); + if (count($parts) > 1) { + $key = array_shift($parts); + $value = trim(implode(":", $parts)); + $headers[$key] = $value; + } else { + if (!empty($value)) { + $headers[] = $value; + } + } } - } + return $headers; } - return $headers; - } /** * Copy the remote file locally. */ - private function copy() { - - if ($this->getStateProperty('temporary') == FALSE) { - return; - } - - $destination_file = $this->getStateProperty('destination'); - $time_limit = ($this->timeLimit) ? time() + $this->timeLimit : time() + PHP_INT_MAX; - $total = $this->getStateProperty('total_bytes_copied'); - - while ($chunk = $this->getChunk()) { + private function copy() + { - if (!file_exists($destination_file)) { - $bytesWritten = file_put_contents($destination_file, $chunk); - } - else{ - $bytesWritten = file_put_contents($destination_file, $chunk, FILE_APPEND); - } + if ($this->getStateProperty('temporary') == false) { + return; + } - if ($bytesWritten !== strlen($chunk)) { - throw new \RuntimeException("Unable to fetch {$this->setStateProperty('source')}. Reason: Failed to write to destination " . $dest->getPath(), 0); - } + $destination_file = $this->getStateProperty('destination'); + $time_limit = ($this->timeLimit) ? time() + $this->timeLimit : time() + PHP_INT_MAX; + $total = $this->getStateProperty('total_bytes_copied'); + + while ($chunk = $this->getChunk()) { + if (!file_exists($destination_file)) { + $bytesWritten = file_put_contents($destination_file, $chunk); + } else { + $bytesWritten = file_put_contents($destination_file, $chunk, FILE_APPEND); + } + + if ($bytesWritten !== strlen($chunk)) { + throw new \RuntimeException( + "Unable to fetch {$this->setStateProperty('source')}. " . + " Reason: Failed to write to destination " . $dest->getPath(), + 0 + ); + } + + $total += $bytesWritten; + $this->setStateProperty('total_bytes_copied', $total); + + if (time() > $time_limit) { + $this->setStateProperty('total_bytes_copied', $total); + throw new FileCopyInterruptedException( + "Stopped copying file after {$total} bytes. Time limit of " . + "{$this->timeLimit} second(s) reached." + ); + } + } + } - $total += $bytesWritten; - $this->setStateProperty('total_bytes_copied', $total); + private function getChunk() + { + $url = $this->getStateProperty('source'); + $start = $this->getStateProperty('total_bytes_copied'); + $end = $start + $this->chunkSizeInBytes; - if (time() > $time_limit) { - $this->setStateProperty('total_bytes_copied', $total); - throw new FileCopyInterruptedException("Stopped copying file after {$total} bytes. Time limit of {$this->timeLimit} second(s) reached."); - } - } - } + if ($end > $this->getStateProperty('total_bytes')) { + $end = $this->getStateProperty('total_bytes'); + } - private function getChunk() { - $url = $this->getStateProperty('source'); - $start = $this->getStateProperty('total_bytes_copied'); - $end = $start + $this->chunkSizeInBytes; + if ($start == $end) { + return false; + } - if ($end > $this->getStateProperty('total_bytes')) { - $end = $this->getStateProperty('total_bytes'); + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_RANGE, "{$start}-{$end}"); + curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); + $result = curl_exec($ch); + curl_close($ch); + return $result; } - if ($start == $end) { - return FALSE; + private function getTemporaryFilePath($sourceFileUrl) + { + $pieces = explode("/", $sourceFileUrl); + $file_name = end($pieces); + return $this->getTemporaryFile($file_name); } - $ch = curl_init(); - curl_setopt($ch, CURLOPT_URL, $url); - curl_setopt($ch, CURLOPT_RANGE, "{$start}-{$end}"); - curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); - $result = curl_exec($ch); - curl_close($ch); - return $result; - } - - private function getTemporaryFilePath($sourceFileUrl) { - $pieces = explode("/", $sourceFileUrl); - $file_name = end($pieces); - return $this->getTemporaryFile($file_name); - } - /** * Generate a tmp filepath for a given $uuid. * @@ -184,61 +186,66 @@ private function getTemporaryFilePath($sourceFileUrl) { * * @return string */ - private function getTemporaryFile(string $filename): string { - return $this->getTemporaryDirectory() . '/' . $this->sanitizeString($filename); - } + private function getTemporaryFile(string $filename): string + { + return $this->getTemporaryDirectory() . '/' . $this->sanitizeString($filename); + } /** * returns the temporary directory used by drupal. * * @return string */ - private function getTemporaryDirectory() { - return "/tmp"; - } + private function getTemporaryDirectory() + { + return "/tmp"; + } /** * * @param string $string * @return string */ - private function sanitizeString($string) { - return preg_replace('~[^a-z0-9.]+~', '_', strtolower($string)); - } - - private function setState($state) { - $this->getResult()->setData(json_encode($state)); - } + private function sanitizeString($string) + { + return preg_replace('~[^a-z0-9.]+~', '_', strtolower($string)); + } - public function setStateProperty($property, $value) { - $state = $this->getState(); - $state[$property] = $value; - $this->setState($state); - } + private function setState($state) + { + $this->getResult()->setData(json_encode($state)); + } - public function jsonSerialize() - { - return (object) ['timeLimit' => $this->timeLimit, 'result' => $this->getResult()]; - } + public function setStateProperty($property, $value) + { + $state = $this->getState(); + $state[$property] = $value; + $this->setState($state); + } - public static function hydrate($json) { - $data = json_decode($json); + public function jsonSerialize() + { + return (object) ['timeLimit' => $this->timeLimit, 'result' => $this->getResult()]; + } - $reflector = new \ReflectionClass(self::class); - $object = $reflector->newInstanceWithoutConstructor(); + public static function hydrate($json) + { + $data = json_decode($json); - $reflector = new \ReflectionClass($object); + $reflector = new \ReflectionClass(self::class); + $object = $reflector->newInstanceWithoutConstructor(); - $p = $reflector->getProperty('timeLimit'); - $p->setAccessible(true); - $p->setValue($object, $data->timeLimit); + $reflector = new \ReflectionClass($object); - $class = $reflector->getParentClass(); - $p = $class->getProperty('result'); - $p->setAccessible(true); - $p->setValue($object, Result::hydrate(json_encode($data->result))); + $p = $reflector->getProperty('timeLimit'); + $p->setAccessible(true); + $p->setValue($object, $data->timeLimit); - return $object; - } + $class = $reflector->getParentClass(); + $p = $class->getProperty('result'); + $p->setAccessible(true); + $p->setValue($object, Result::hydrate(json_encode($data->result))); + return $object; + } } diff --git a/test/FileFetcherTest.php b/test/FileFetcherTest.php index 27ddd10..30a35c6 100644 --- a/test/FileFetcherTest.php +++ b/test/FileFetcherTest.php @@ -1,62 +1,73 @@ run(); - $data = json_decode($result->getData()); - $this->assertEquals("/tmp/sacramentorealestatetransactions.csv", $data->destination); - $this->assertTrue($data->temporary); - } - - public function testLocal() { - $local_file = __DIR__ . "/files/tiny.csv"; - $fetcher = new \FileFetcher\FileFetcher($local_file); - $result = $fetcher->run(); - $data = json_decode($result->getData()); - $this->assertEquals($local_file, $data->destination); - $this->assertFalse($data->temporary); - } - - public function testTimeOut() { - $fetcher = new \FileFetcher\FileFetcher("https://dkan-default-content-files.s3.amazonaws.com/{$this->sampleCsvSize}_mb_sample.csv"); - $file_size = $fetcher->getStateProperty('total_bytes'); - $this->assertLessThan($file_size, $fetcher->getStateProperty('total_bytes_copied')); - - $fetcher->setTimeLimit(1); - $fetcher->run(); - $this->assertLessThan($file_size, $fetcher->getStateProperty('total_bytes_copied')); - $this->assertGreaterThan(0, $fetcher->getStateProperty('total_bytes_copied')); - $this->assertEquals($fetcher->getResult()->getStatus(), \Procrastinator\Result::STOPPED); - - $json = json_encode($fetcher); - $fetcher2 = \FileFetcher\FileFetcher::hydrate($json); - - $fetcher2->setTimeLimit(PHP_INT_MAX); - $fetcher2->run(); - $this->assertEquals($file_size, $fetcher2->getStateProperty('total_bytes_copied')); - $this->assertEquals(filesize("/tmp/{$this->sampleCsvSize}_mb_sample.csv"), $fetcher2->getStateProperty('total_bytes_copied')); - $this->assertEquals($fetcher2->getResult()->getStatus(), \Procrastinator\Result::DONE); - } - - public function tearDown(): void - { - parent::tearDown(); - $files = [ - "/tmp/{$this->sampleCsvSize}_mb_sample.csv", - "/tmp/sacramentorealestatetransactions.csv" - ]; - - foreach ($files as $file) { - if (file_exists($file)) { - unlink($file); - } + private $sampleCsvSize = 50; + + public function testRemote() + { + // https://drive.google.com/uc?export=download&confirm=-NkI&id=1-9N00dZkOipIAkXMl2D0cdWaVlqfF0E5 + $fetcher = new \FileFetcher\FileFetcher( + "http://samplecsvs.s3.amazonaws.com/Sacramentorealestatetransactions.csv" + ); + $result = $fetcher->run(); + $data = json_decode($result->getData()); + $this->assertEquals("/tmp/sacramentorealestatetransactions.csv", $data->destination); + $this->assertTrue($data->temporary); + } + + public function testLocal() + { + $local_file = __DIR__ . "/files/tiny.csv"; + $fetcher = new \FileFetcher\FileFetcher($local_file); + $result = $fetcher->run(); + $data = json_decode($result->getData()); + $this->assertEquals($local_file, $data->destination); + $this->assertFalse($data->temporary); + } + + public function testTimeOut() + { + $fetcher = new \FileFetcher\FileFetcher( + "https://dkan-default-content-files.s3.amazonaws.com/{$this->sampleCsvSize}_mb_sample.csv" + ); + $file_size = $fetcher->getStateProperty('total_bytes'); + $this->assertLessThan($file_size, $fetcher->getStateProperty('total_bytes_copied')); + + $fetcher->setTimeLimit(1); + $fetcher->run(); + $this->assertLessThan($file_size, $fetcher->getStateProperty('total_bytes_copied')); + $this->assertGreaterThan(0, $fetcher->getStateProperty('total_bytes_copied')); + $this->assertEquals($fetcher->getResult()->getStatus(), \Procrastinator\Result::STOPPED); + + $json = json_encode($fetcher); + $fetcher2 = \FileFetcher\FileFetcher::hydrate($json); + + $fetcher2->setTimeLimit(PHP_INT_MAX); + $fetcher2->run(); + $this->assertEquals($file_size, $fetcher2->getStateProperty('total_bytes_copied')); + $this->assertEquals( + filesize("/tmp/{$this->sampleCsvSize}_mb_sample.csv"), + $fetcher2->getStateProperty('total_bytes_copied') + ); + $this->assertEquals($fetcher2->getResult()->getStatus(), \Procrastinator\Result::DONE); + } + + public function tearDown(): void + { + parent::tearDown(); + $files = [ + "/tmp/{$this->sampleCsvSize}_mb_sample.csv", + "/tmp/sacramentorealestatetransactions.csv" + ]; + + foreach ($files as $file) { + if (file_exists($file)) { + unlink($file); + } + } } - } -} \ No newline at end of file +}