Skip to content

Commit

Permalink
Fix URL checking for Windows OS
Browse files Browse the repository at this point in the history
  - Add tests for URL helper function and remove unused code
  • Loading branch information
brunato committed Jun 12, 2020
1 parent 2c3032f commit 201bffe
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 60 deletions.
24 changes: 24 additions & 0 deletions tests/test_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
etree_element, py_etree_element
from xmlschema.namespaces import XSD_NAMESPACE
from xmlschema.helpers import is_etree_element
from xmlschema.resources import is_url, is_local_url, is_remote_url, url_path_is_file
from xmlschema.documents import get_context
from xmlschema.testing import SKIP_REMOTE_TESTS

Expand Down Expand Up @@ -94,6 +95,29 @@ def check_url(self, url, expected):
expected_path = PurePath(expected_parts.path)
self.assertEqual(path, expected_path, "%r: Paths differ." % url)

def test_url_helper_functions(self):
self.assertTrue(is_url(self.col_xsd_file))
self.assertFalse(is_url(' \t<root/>'))
self.assertFalse(is_url('line1\nline2'))
self.assertFalse(is_url(None))

self.assertTrue(is_local_url(self.col_xsd_file))
self.assertTrue(is_local_url('/home/user/'))
self.assertTrue(is_local_url('/home/user/schema.xsd'))
self.assertTrue(is_local_url(' /home/user/schema.xsd '))
self.assertTrue(is_local_url('C:\\Users\\foo\\schema.xsd'))
self.assertTrue(is_local_url(' file:///home/user/schema.xsd'))
self.assertFalse(is_local_url('http://example.com/schema.xsd'))

self.assertFalse(is_remote_url(self.col_xsd_file))
self.assertFalse(is_remote_url('/home/user/'))
self.assertFalse(is_remote_url('/home/user/schema.xsd'))
self.assertFalse(is_remote_url(' file:///home/user/schema.xsd'))
self.assertTrue(is_remote_url(' http://example.com/schema.xsd'))

self.assertTrue(url_path_is_file(self.col_xml_file))
self.assertFalse(url_path_is_file(self.col_dir))

def test_normalize_url_posix(self):
url1 = "https://example.com/xsd/other_schema.xsd"
self.check_url(normalize_url(url1, base_url="/path_my_schema/schema.xsd"), url1)
Expand Down
119 changes: 60 additions & 59 deletions xmlschema/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,65 +42,7 @@ class XmlResourceXPathParser(XPath1Parser):


###
# Internal helper functions

def is_url(obj):
"""
Checks if and object can be an URL, restricting to strings that cannot be XML data.
"""
if not isinstance(obj, (str, bytes)):
return False
elif '\n' in obj or obj.lstrip().startswith('<'):
return False

try:
urlsplit(obj)
except ValueError:
return False
else:
return True


def is_remote_url(url):
return is_url(url) and urlsplit(url).scheme not in ('', 'file')


def is_local_url(url):
return is_url(url) and urlsplit(url).scheme in ('', 'file')


def url_path_is_directory(url):
return is_local_url(url) and os.path.isdir(urlsplit(url).path)


def url_path_is_file(url):
return is_local_url(url) and os.path.isfile(urlsplit(url).path)


def update_prefix(namespaces, prefix, uri):
"""Update namespace registration without overwrite an existing one."""
if not prefix:
if '' not in namespaces:
namespaces[prefix] = uri
return
elif namespaces[''] == uri:
return
prefix = 'default'

while prefix in namespaces:
if namespaces[prefix] == uri:
return
match = re.search(r'(\d+)$', prefix)
if match:
index = int(match.group()) + 1
prefix = prefix[:match.span()[0]] + str(index)
else:
prefix += '0'
namespaces[prefix] = uri


###
# API for XML resources
# URL normalization (that fixes many headaches :)

def normalize_url(url, base_url=None, keep_relative=False):
"""
Expand Down Expand Up @@ -191,6 +133,65 @@ def filter_url(x):
return filter_url(normalized_url)


###
# Internal helper functions

def is_url(obj):
"""
Checks if and object can be an URL, restricting to strings that cannot be XML data.
"""
if not isinstance(obj, (str, bytes)):
return False
elif '\n' in obj or obj.lstrip().startswith('<'):
return False

try:
urlsplit(obj.strip())
except ValueError:
return False
else:
return True


def is_remote_url(url):
return is_url(url) and urlsplit(normalize_url(url)).scheme not in ('', 'file')


def is_local_url(url):
return is_url(url) and urlsplit(normalize_url(url)).scheme in ('', 'file')


def url_path_is_file(url):
if not is_local_url(url):
return False
return os.path.isfile(url) or os.path.isfile(urlsplit(normalize_url(url)).path)


def update_prefix(namespaces, prefix, uri):
"""Update namespace registration without overwrite an existing one."""
if not prefix:
if '' not in namespaces:
namespaces[prefix] = uri
return
elif namespaces[''] == uri:
return
prefix = 'default'

while prefix in namespaces:
if namespaces[prefix] == uri:
return
match = re.search(r'(\d+)$', prefix)
if match:
index = int(match.group()) + 1
prefix = prefix[:match.span()[0]] + str(index)
else:
prefix += '0'
namespaces[prefix] = uri


###
# API for XML resources

def normalize_locations(locations, base_url=None, keep_relative=False):
"""
Returns a list of normalized locations. The locations are normalized using
Expand Down
3 changes: 2 additions & 1 deletion xmlschema/validators/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,8 @@ def __init__(self, source, namespace=None, validation='strict', global_maps=None
logger.setLevel(logging.WARNING)

if allow == 'sandbox' and base_url is None and is_local_url(source):
base_url = os.path.dirname(source)
# Allow sandbox mode without a base_url using the initial schema URL as base
base_url = os.path.dirname(normalize_url(source))

self.source = XMLResource(source, base_url, allow, defuse, timeout, lazy=False)
logger.debug("Read schema from %r", self.source)
Expand Down

0 comments on commit 201bffe

Please sign in to comment.