From 6395cb4a30bb6f620a54d53d8b0bac379b5c7cbe Mon Sep 17 00:00:00 2001 From: Ares Date: Fri, 7 Dec 2018 13:47:49 +0800 Subject: [PATCH 1/6] add getCacheFile() --- demo/auto_cache.php | 1 + demo/basic.php | 1 + demo/chained_task.php | 1 + demo/download.php | 1 + demo/http_clone.php | 1 + demo/task_callback.php | 1 + src/Curl.php | 30 ++++++++++++++++++++++-------- 7 files changed, 28 insertions(+), 8 deletions(-) diff --git a/demo/auto_cache.php b/demo/auto_cache.php index 459fdfa..fc4d5aa 100644 --- a/demo/auto_cache.php +++ b/demo/auto_cache.php @@ -2,6 +2,7 @@ require_once '_inc.php'; use Ares333\Curl\Toolkit; $toolkit = new Toolkit(); +$toolkit->setCurl(); $curl = $toolkit->getCurl(); $curl->maxThread = 2; $curl->cache['enable'] = true; diff --git a/demo/basic.php b/demo/basic.php index a354135..6b00585 100644 --- a/demo/basic.php +++ b/demo/basic.php @@ -2,6 +2,7 @@ require_once '_inc.php'; use Ares333\Curl\Toolkit; $toolkit = new Toolkit(); +$toolkit->setCurl(); $curl = $toolkit->getCurl(); $curl->onInfo = null; $responseCode = null; diff --git a/demo/chained_task.php b/demo/chained_task.php index 9f47441..b512a6b 100644 --- a/demo/chained_task.php +++ b/demo/chained_task.php @@ -2,6 +2,7 @@ require_once '_inc.php'; use Ares333\Curl\Toolkit; $toolkit = new Toolkit(); +$toolkit->setCurl(); $curl = $toolkit->getCurl(); $curl->onInfo = null; $url = 'http://baidu.com'; diff --git a/demo/download.php b/demo/download.php index dfe791a..39ec80e 100644 --- a/demo/download.php +++ b/demo/download.php @@ -2,6 +2,7 @@ require '_inc.php'; use Ares333\Curl\Toolkit; $toolkit = new Toolkit(); +$toolkit->setCurl(); $curl = $toolkit->getCurl(); $curl->onInfo = null; $url = 'http://www.baidu.com/img/bd_logo1.png'; diff --git a/demo/http_clone.php b/demo/http_clone.php index 24a3e5a..4055b8f 100644 --- a/demo/http_clone.php +++ b/demo/http_clone.php @@ -33,6 +33,7 @@ function onProcess($r, $args) } } $clone = new HttpCloneDemo($dir); +$clone->setCurl(); $clone->getCurl()->opt[CURLOPT_CONNECTTIMEOUT] = 3; $clone->getCurl()->opt[CURLOPT_ENCODING] = 'gzip,deflate'; $clone->getCurl()->cache['enable'] = true; diff --git a/demo/task_callback.php b/demo/task_callback.php index 71eb7cb..d917bd7 100644 --- a/demo/task_callback.php +++ b/demo/task_callback.php @@ -2,6 +2,7 @@ require_once '_inc.php'; use Ares333\Curl\Toolkit; $toolkit = new Toolkit(); +$toolkit->setCurl(); $curl = $toolkit->getCurl(); $curl->maxThread = 1; $curl->onTask = function ($curl) { diff --git a/src/Curl.php b/src/Curl.php index daea5ce..536a999 100644 --- a/src/Curl.php +++ b/src/Curl.php @@ -440,6 +440,26 @@ protected function onProcess($task, $param) } } + /** + * + * @param string $url + * @param string|array $post + * @return string + */ + public function getCacheFile($url, $post = null) + { + $suffix = ''; + if (isset($post)) { + if (is_array($post)) { + $post = http_build_query($post); + ksort($post); + } + $suffix .= $post; + } + $key = md5($url . $suffix); + return substr($key, 0, 3) . '/' . substr($key, 3, 3) . '/' . substr($key, 6); + } + /** * Set or get file cache. * @@ -458,18 +478,12 @@ protected function cache($task, $data = null) return; } $url = $task['opt'][CURLOPT_URL]; - // verify post - $suffix = ''; + $post = null; if (true == $config['verifyPost'] && ! empty($task['opt'][CURLOPT_POSTFIELDS])) { $post = $task['opt'][CURLOPT_POSTFIELDS]; - if (is_array($post)) { - $post = http_build_query($post); - } - $suffix .= $post; } - $key = md5($url . $suffix); $file = rtrim($config['dir'], '/') . '/'; - $file .= substr($key, 0, 3) . '/' . substr($key, 3, 3) . '/' . substr($key, 6); + $file .= $this->getCacheFile($url, $post); if (! isset($data)) { if (file_exists($file)) { $time = time(); From ba483488f91144cffe359b4dc59b780d7f8d083e Mon Sep 17 00:00:00 2001 From: Ares Date: Fri, 7 Dec 2018 14:07:42 +0800 Subject: [PATCH 2/6] optimize function name --- src/HttpClone.php | 14 +++++++------- src/Toolkit.php | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/HttpClone.php b/src/HttpClone.php index fa29733..4b79921 100644 --- a/src/HttpClone.php +++ b/src/HttpClone.php @@ -66,7 +66,7 @@ function __construct($dir) */ function add($url, $depth = null) { - $url = $this->urlFormater($url); + $url = $this->formatUrl($url); if (! isset($url)) { user_error('invalid url(' . $url . ')', E_USER_ERROR); } @@ -87,9 +87,9 @@ function add($url, $depth = null) * * @see \Ares333\Curl\Toolkit::formatUrl() */ - function urlFormater($url) + function formatUrl($url) { - $url = parent::urlFormater($url); + $url = parent::formatUrl($url); $parse = parse_url($url); if (! isset($parse['path'])) { $parse['path'] = '/'; @@ -103,7 +103,7 @@ function urlFormater($url) function start() { foreach ($this->blacklist as $k => $v) { - $this->blacklist[$k] = $this->urlFormater($v); + $this->blacklist[$k] = $this->formatUrl($v); } foreach (array_keys($this->_task) as $v) { if ($this->checkUrl($v)) { @@ -139,7 +139,7 @@ function start() */ protected function url2src($url, $urlCurrent, $isLocal, $fixQuery = true) { - $url = $this->urlFormater($url); + $url = $this->formatUrl($url); if (in_array($url, $this->blacklist)) { return ''; } @@ -295,7 +295,7 @@ function onProcess($r, $args) continue; } $url = $this->uri2url($href, $urlCurrent); - if ($this->isProcess($this->urlFormater($url))) { + if ($this->isProcess($this->formatUrl($url))) { if (in_array(pathinfo($url, PATHINFO_EXTENSION), $this->downloadExtension)) { $urlDownload[$url] = array(); @@ -332,7 +332,7 @@ function onProcess($r, $args) $urlParse ) as $k => $v) { foreach ($v as $k1 => $v1) { - $k1 = $this->urlFormater($k1); + $k1 = $this->formatUrl($k1); if ($this->checkUrl($k1)) { $file = $this->url2file($k1); if (null == $file) { diff --git a/src/Toolkit.php b/src/Toolkit.php index ce72893..b033c5a 100644 --- a/src/Toolkit.php +++ b/src/Toolkit.php @@ -289,7 +289,7 @@ function isUrl($url) * @param string $url * @return string */ - function urlFormater($url) + function formatUrl($url) { if (! $this->isUrl($url)) { return; From 441e9ae5acb9744c640551b2e030d5c72b671653 Mon Sep 17 00:00:00 2001 From: Ares Date: Fri, 7 Dec 2018 15:08:24 +0800 Subject: [PATCH 3/6] add parseResponse --- src/Curl.php | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/Curl.php b/src/Curl.php index 536a999..9638caa 100644 --- a/src/Curl.php +++ b/src/Curl.php @@ -253,13 +253,7 @@ public function start() if (! isset($task['opt'][CURLOPT_FILE])) { $param['body'] = curl_multi_getcontent($ch); if (isset($task['opt'][CURLOPT_HEADER])) { - preg_match_all("/HTTP\/.+(?=\r\n\r\n)/Usm", $param['body'], $param['header']); - $param['header'] = $param['header'][0]; - $pos = 0; - foreach ($param['header'] as $v) { - $pos += strlen($v) + 4; - } - $param['body'] = substr($param['body'], $pos); + $param = array_merge($param, $this->parseResponse($param['body'])); } } } @@ -311,6 +305,19 @@ public function start() $this->_mh = null; } + public function parseResponse($content) + { + $res = []; + preg_match_all("/HTTP\/.+(?=\r\n\r\n)/Usm", $content, $res['header']); + $res['header'] = $res['header'][0]; + $pos = 0; + foreach ($res['header'] as $v) { + $pos += strlen($v) + 4; + } + $res['body'] = substr($content, $pos); + return $res; + } + /** * Call $this->onInfo * From 727a27c88f8ab98929d2a78dccf4304eabe174e9 Mon Sep 17 00:00:00 2001 From: Ares Date: Fri, 7 Dec 2018 15:15:40 +0800 Subject: [PATCH 4/6] optimize code --- src/Curl.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Curl.php b/src/Curl.php index 9638caa..a4e2840 100644 --- a/src/Curl.php +++ b/src/Curl.php @@ -305,16 +305,16 @@ public function start() $this->_mh = null; } - public function parseResponse($content) + public function parseResponse($response) { $res = []; - preg_match_all("/HTTP\/.+(?=\r\n\r\n)/Usm", $content, $res['header']); + preg_match_all("/HTTP\/.+(?=\r\n\r\n)/Usm", $response, $res['header']); $res['header'] = $res['header'][0]; $pos = 0; foreach ($res['header'] as $v) { $pos += strlen($v) + 4; } - $res['body'] = substr($content, $pos); + $res['body'] = substr($response, $pos); return $res; } From 081d933d17276bc2537a59d1bca3af32cc736dfd Mon Sep 17 00:00:00 2001 From: Ares Date: Mon, 10 Dec 2018 19:52:54 +0800 Subject: [PATCH 5/6] Update README_CN.md --- README_CN.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README_CN.md b/README_CN.md index d3ee4ce..edcaa09 100644 --- a/README_CN.md +++ b/README_CN.md @@ -12,9 +12,7 @@ composer require ares333/php-curl ## 联系我们 QQ群: -2️⃣ 744854777 - -1⃣️ 215348766(满) +215348766(满) ## 特性 1. 极低的CPU、内存使用率和高性能(实测抓取html速度达到3000+页每秒,下载速度1000Mbps)。 From 2d57aee309618e45b9eeee39542b2b101d9a4161 Mon Sep 17 00:00:00 2001 From: Ares Date: Mon, 10 Dec 2018 19:53:08 +0800 Subject: [PATCH 6/6] Update README_CN.md --- README_CN.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README_CN.md b/README_CN.md index edcaa09..92d16b6 100644 --- a/README_CN.md +++ b/README_CN.md @@ -10,9 +10,7 @@ composer require ares333/php-curl ``` ## 联系我们 -QQ群: - -215348766(满) +QQ群:215348766 ## 特性 1. 极低的CPU、内存使用率和高性能(实测抓取html速度达到3000+页每秒,下载速度1000Mbps)。