diff --git a/python/data_download/ceda_ftp_download.py b/python/data_download/ceda_ftp_download.py index 5d95b66e..ab7969af 100644 --- a/python/data_download/ceda_ftp_download.py +++ b/python/data_download/ceda_ftp_download.py @@ -7,31 +7,35 @@ from pathlib import Path -def download_ftp(input, output, username, password, order): +def download_ftp(input: str, output: str, username: str, password: str, order: int) -> None: """ - Function to connect to the CEDA archive and download data. Note you need to have a user account and - provide your username and FTP password. + Function to connect to the CEDA archive and download data. + + Note + ---- + You need to have a user account and provide your username and `FTP` password. Parameters ---------- - input: str + input Path where the CEDA data to download is located - (e.g '/badc/ukmo-hadobs/data/insitu/MOHC/HadOBS/HadUK-Grid/v1.1.0.0/1km/tasmin/day/v20220310' - or top level folder like '/badc/ukcp18/data/land-cpm/uk/2.2km/rcp85' if you want to + (e.g `/badc/ukmo-hadobs/data/insitu/MOHC/HadOBS/HadUK-Grid/v1.1.0.0/1km/tasmin/day/v20220310` + or top level folder like `/badc/ukcp18/data/land-cpm/uk/2.2km/rcp85` if you want to download all files in all sub-directories). - output: str + output Path to save the downloaded data - sub-directories will be created automatically under the output directory. - username: str + username CEDA registered username - password: str - CEDA FPT password (obtained as explained in https://help.ceda.ac.uk/article/280-ftp) - order: int - Order in which to run download 0: default order of file from ftp server 1: reverse order 2: shuffle. - This functionality allows to run several downloads in parallel without rewriting files that are being downloaded. + password + CEDA FPT password (obtained as explained in `https://help.ceda.ac.uk/article/280-ftp`) + order + Order in which to run download - Returns - ------- + `0`: default order of file from `FTP` server + `1`: reverse order + `2`: shuffle. + This functionality allows to run several downloads in parallel without rewriting files that are being downloaded. """ # If directory doesn't exist make it diff --git a/python/utils.py b/python/utils.py index e0ea784b..eca94930 100644 --- a/python/utils.py +++ b/python/utils.py @@ -20,9 +20,17 @@ def date_to_str( ) -> str: """Return a `str` in `date_format_str` of `date_obj`. + Parameters + ---------- + date_obj + A `datetime.date` or `str` object to convert. + in_format_str + A `strftime` format `str` to convert `date_obj` from if `date_obj` is a `str`. + out_format_str + A `strftime` format `str` to convert `date_obj` to. + Examples -------- - >>> date_to_str('20100101') '20100101' >>> date_to_str(date(2010, 1, 1)) @@ -43,9 +51,21 @@ def date_range_to_str( ) -> str: """Take `start_date` and `end_date` `str` or `date` instances and return a range `str`. + Parameters + ---------- + start_date + First date in range. + end_date + Last date in range + split_str + `char` to split returned date range `str`. + in_format_str + A `strftime` format `str` to convert `start_date` from. + out_format_str + A `strftime` format `str` to convert `end_date` from. + Examples -------- - >>> date_range_to_str('20100101', '20100330') '20100101-20100330' >>> date_range_to_str(date(2010, 1, 1), '20100330') @@ -64,9 +84,13 @@ def date_range_to_str( def iter_to_tuple_strs(iter_var: Iterable[Any]) -> tuple[str, ...]: """Return a `tuple` with all components converted to `strs`. + Parameters + ---------- + iter_var + Iterable of objects that can be converted into `strs`. + Examples -------- - >>> iter_to_tuple_strs(['cat', 1, Path('a/path')]) ('cat', '1', 'a/path') @@ -79,9 +103,25 @@ def path_iterdir( ) -> Generator[Optional[Path], None, None]: """Return an `Generator` after ensuring `path` exists. + Parameters + ---------- + path + `Path` of folder to iterate through + strict + Whether to raise `FileNotFoundError` if `path` not found. + + Returns + ------- + A `Generator` of `Paths` within folder `path`. + + Raises + ------ + FileNotFoundError + Raised if `strict = True` and `path` does not exist. + + Examples -------- - >>> tmp_path = getfixture('tmp_path') >>> from os import chdir >>> chdir(tmp_path) @@ -101,7 +141,6 @@ def path_iterdir( >>> example_path.unlink() >>> tuple(path_iterdir(example_path.parent)) () - """ try: yield from path.iterdir() @@ -120,27 +159,29 @@ def make_user( ) -> Path: """Make user account and copy code to that environment. - Args: - user: user and home folder name - password: login password - code_path: path to copy code from to user path - - Example: - ```pycon - >>> import os - >>> if os.geteuid() != 0: - ... pytest.skip('requires root permission to run') - >>> user_name: str = 'very_unlinkely_test_user' - >>> password: str = 'test_pass' - >>> code_path: Path = Path('/home/jovyan') - >>> make_user(user_name, password, code_path=JUPYTER_DOCKER_USER_PATH) - PosixPath('/home/very_unlinkely_test_user') - >>> Path(f'/home/{user_name}/python/conftest.py').is_file() - True - >>> rm_user(user_name) - 'very_unlinkely_test_user' - - ``` + Parameters + ---------- + user + user and home folder name + password + login password + code_path + path to copy code from to user path + + Examples + -------- + >>> import os + >>> if os.geteuid() != 0: + ... pytest.skip('requires root permission to run') + >>> user_name: str = 'very_unlinkely_test_user' + >>> password: str = 'test_pass' + >>> code_path: Path = Path('/home/jovyan') + >>> make_user(user_name, password, code_path=JUPYTER_DOCKER_USER_PATH) + PosixPath('/home/very_unlinkely_test_user') + >>> Path(f'/home/{user_name}/python/conftest.py').is_file() + True + >>> rm_user(user_name) + 'very_unlinkely_test_user' """ home_path: Path = user_home_path / user subprocess.run(f"useradd {user}", shell=True) @@ -154,23 +195,24 @@ def make_user( def rm_user(user: str, user_home_path: Path = DEBIAN_HOME_PATH) -> str: """Remove user and user home folder. - Args: - user: user and home folder name - password: login password - - Example: - ```pycon - >>> import os - >>> if os.geteuid() != 0: - ... pytest.skip('requires root permission to run') - >>> user_name: str = 'very_unlinkely_test_user' - >>> password: str = 'test_pass' - >>> make_user(user_name, password, code_path=JUPYTER_DOCKER_USER_PATH) - PosixPath('/home/very_unlinkely_test_user') - >>> rm_user(user_name) - 'very_unlinkely_test_user' - - ``` + Parameters + ---------- + user + User home folder name (usually the same as the user login name). + user_home_path + Parent path of `user` folder name. + + Examples + -------- + >>> import os + >>> if os.geteuid() != 0: + ... pytest.skip('requires root permission to run') + >>> user_name: str = 'very_unlinkely_test_user' + >>> password: str = 'test_pass' + >>> make_user(user_name, password, code_path=JUPYTER_DOCKER_USER_PATH) + PosixPath('/home/very_unlinkely_test_user') + >>> rm_user(user_name) + 'very_unlinkely_test_user' """ subprocess.run(f"userdel {user}", shell=True) rmtree(user_home_path / user) @@ -182,37 +224,41 @@ def make_users( ) -> Generator[Path, None, None]: """Load a file of usernames and passwords and to pass to make_user. - Args: - file_path: path to collumned file including user names and passwords per row - user_col: str of column name for user names - password_col: name of column name for passwords - file_reader: function to read `file_path` - **kwargs: additional parameters for to pass to `file_reader` - - Example: - ```pycon - >>> import os - >>> if os.geteuid() != 0: - ... pytest.skip('requires root permission to run') - >>> from pandas import read_excel - >>> code_path: Path = Path('/home/jovyan') - >>> def excel_row_iter(path: Path, **kwargs) -> dict: - ... df: DataFrame = read_excel(path, **kwargs) - ... return df.to_dict(orient="records") - >>> test_accounts_path: Path = Path('tests/test_user_accounts.xlsx') - >>> user_paths: tuple[Path, ...] = tuple(make_users( - ... file_path=test_accounts_path, - ... user_col="User Name", - ... password_col="Password", - ... file_reader=excel_row_iter, - ... code_path=JUPYTER_DOCKER_USER_PATH, - ... )) - >>> [(path / 'python' / 'conftest.py').is_file() for path in user_paths] - [True, True, True, True, True] - >>> [rm_user(user_path.name) for user_path in user_paths] - ['sally', 'george', 'jean', 'felicity', 'frank'] - - ``` + Parameters + ---------- + file_path + `Path` to collumned file including user names and passwords per row. + user_col + `str` of column name for user names. + password_col + `str` of column name for passwords. + file_reader + Callable (function) to read `file_path`. + **kwargs + Additional parameters for to pass to `file_reader` function. + + Examples + -------- + >>> import os + >>> if os.geteuid() != 0: + ... pytest.skip('requires root permission to run') + >>> from pandas import read_excel + >>> code_path: Path = Path('/home/jovyan') + >>> def excel_row_iter(path: Path, **kwargs) -> dict: + ... df: DataFrame = read_excel(path, **kwargs) + ... return df.to_dict(orient="records") + >>> test_accounts_path: Path = Path('tests/test_user_accounts.xlsx') + >>> user_paths: tuple[Path, ...] = tuple(make_users( + ... file_path=test_accounts_path, + ... user_col="User Name", + ... password_col="Password", + ... file_reader=excel_row_iter, + ... code_path=JUPYTER_DOCKER_USER_PATH, + ... )) + >>> [(path / 'python' / 'conftest.py').is_file() for path in user_paths] + [True, True, True, True, True] + >>> [rm_user(user_path.name) for user_path in user_paths] + ['sally', 'george', 'jean', 'felicity', 'frank'] """ for record in file_reader(file_path): yield make_user(user=record[user_col], password=record[password_col], **kwargs)