From c6564084fdfe4f1c50444fc64e76d37b1445bdb0 Mon Sep 17 00:00:00 2001 From: Krzysztof Madejski Date: Tue, 6 Jun 2017 17:59:31 +0200 Subject: [PATCH] Add decoding of incoming URLs, fixes #46 --- ckanext/archiver/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/archiver/tasks.py b/ckanext/archiver/tasks.py index ffcbc490..bd51d9bf 100644 --- a/ckanext/archiver/tasks.py +++ b/ckanext/archiver/tasks.py @@ -13,7 +13,6 @@ import re import routes import time -import urlparse from requests.packages import urllib3 @@ -555,6 +554,7 @@ def archive_resource(context, resource, log, result=None, url_timeout=30): parsed_url = urlparse.urlparse(resource.get('url')) try: file_name = parsed_url.path.split('/')[-1] or 'resource' + file_name = urllib.unquote(file_name) file_name = file_name.strip() # trailing spaces cause problems file_name = file_name.encode('ascii', 'ignore') # e.g. u'\xa3' signs except: