-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
filesystem.resolve_filesystem_and_path: Fix host for HDFS
Using `fsspec.url_to_fs` resulted in using an incorrect host value for instantiating HDFS filesystems. For example `fsspec.url_to_fs("viewfs://root/user/someone")` would call `fsspec.filesystem("viewfs", host="root")`, which could cause errors. Instead in this case we need the host to be `viewfs://root`, so we restore most of the code of that function from before #128. However since we're using fsspec we can generalize to all its supported file system implementations.
- Loading branch information
1 parent
087d658
commit 1729706
Showing
2 changed files
with
44 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,32 @@ | ||
import logging | ||
from typing import Tuple, Any | ||
from urllib.parse import urlparse | ||
|
||
import fsspec | ||
import fsspec.implementations.arrow | ||
|
||
# Support viewfs:// protocol for HDFS | ||
fsspec.register_implementation("viewfs", fsspec.implementations.arrow.HadoopFileSystem) | ||
|
||
_logger = logging.getLogger(__name__) | ||
|
||
|
||
def resolve_filesystem_and_path(uri: str, **kwargs: Any) -> Tuple[fsspec.AbstractFileSystem, str]: | ||
fs, fs_path = fsspec.url_to_fs(uri, **kwargs) | ||
parsed_uri = urlparse(uri) | ||
fs_path = parsed_uri.path | ||
if parsed_uri.scheme == 'hdfs' or parsed_uri.scheme == 'viewfs': | ||
netloc_split = parsed_uri.netloc.split(':') | ||
host = netloc_split[0] | ||
if host == '': | ||
host = 'default' | ||
else: | ||
host = parsed_uri.scheme + "://" + host | ||
port = 0 | ||
if len(netloc_split) == 2 and netloc_split[1].isnumeric(): | ||
port = int(netloc_split[1]) | ||
|
||
fs = fsspec.filesystem("hdfs", host=host, port=port, **kwargs) | ||
elif parsed_uri.scheme == '': | ||
# Input is local path such as /home/user/myfile.parquet | ||
fs = fsspec.filesystem('file', **kwargs) | ||
else: | ||
fs = fsspec.filesystem(parsed_uri.scheme, **kwargs) | ||
|
||
_logger.info(f"Resolved base filesystem: {type(fs)}") | ||
return fs, fs_path |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters