From 18cc7e0c58a360f0b1b15837a866e48a4951ca33 Mon Sep 17 00:00:00 2001 From: Leo Schick Date: Wed, 29 Nov 2023 10:20:01 +0100 Subject: [PATCH 1/4] refactoring drop mara-config integration --- mara_cli/_mara_modules.py | 68 ++++++++++++++++++++++++++++++++++++++ mara_cli/cli.py | 55 +++++++++++++++--------------- setup.cfg | 3 +- tests/test_mara_main.py | 4 --- tests/test_print_config.py | 32 ------------------ 5 files changed, 97 insertions(+), 65 deletions(-) create mode 100644 mara_cli/_mara_modules.py delete mode 100644 tests/test_print_config.py diff --git a/mara_cli/_mara_modules.py b/mara_cli/_mara_modules.py new file mode 100644 index 0000000..914ca40 --- /dev/null +++ b/mara_cli/_mara_modules.py @@ -0,0 +1,68 @@ +"""Internal functions interacting with mara modules""" + +import copy +from logging import Logger +import sys +from types import ModuleType +from typing import Callable, Dict, Iterable + +import click + + +_mara_modules_imported = False + +def import_mara_modules(log: Logger): + """ + Import all installed mara modules + + Args: + log: The application logger. + """ + global _mara_modules_imported + if _mara_modules_imported: + return + + import pkg_resources + import importlib + + for i in pkg_resources.working_set: + package_name = i.key + #version = i.version + if package_name.startswith('mara-'): + log.debug(f"Import module {package_name}") + importlib.import_module(name=package_name.replace('-', '_'), package=package_name) + + _mara_modules_imported = True + + +def module_functionalities(module: ModuleType, MARA_XXX: str, type) -> []: + """ + Returns some functionalities of a module that is declared in a MARA_XXX variable or function + + `module.MARA_XXX` can be + - a function that returns a list or dict + - a list + - a dict + """ + if MARA_XXX in dir(module): + functionalities = getattr(module, MARA_XXX) + if isinstance(functionalities, Callable): + functionalities = functionalities() + if isinstance(functionalities, Dict): + functionalities = functionalities.values() + if not isinstance(functionalities, Iterable): + raise TypeError( + f'{module.__name__}.{MARA_XXX} should be or return a list or dict of {type.__name__}. Got "{functionalities}".') + for functionality in functionalities: + if not isinstance(functionality, type): + raise TypeError(f'In {module.__name__}.{MARA_XXX}: Expected a {type.__name__}, got "{functionality}"') + return functionalities + else: + return [] + + +def get_contributed_functionality(name: str, type) -> Dict[ModuleType, object]: + """Gets the contributed functionality for one MARA_ variable""" + for module in copy.copy(sys.modules).values(): + for obj in module_functionalities(module, name, click.Command): + yield (module, obj) diff --git a/mara_cli/cli.py b/mara_cli/cli.py index 8efffad..3f2bc04 100644 --- a/mara_cli/cli.py +++ b/mara_cli/cli.py @@ -7,15 +7,18 @@ log = logging.getLogger(__name__) +RED = '\033[31m' +RESET = '\033[0m' -@click.group(help="""\ -Runs contributed commandline commands - -Contributed functionality (ETL runners, downloader,...) are available as subcommands. +@click.group(help=(""" +The Mara ETL Framework is a Python framework to build data pipelines. + +Contributed functionality (ETL runners, downloader,...) are available as subcommands.""" + + (""" + To run the flask webapp, use 'flask run'. - -""") +""") if 'mara_app' in sys.modules else '')) @click.option('--debug', default=False, is_flag=True, help="Show debug output") @click.option('--log-stderr', default=False, is_flag=True, help="Send log output to stderr") def cli(debug: bool, log_stderr: bool): @@ -26,6 +29,7 @@ def cli(debug: bool, log_stderr: bool): def setup_commandline_commands(): """Needs to be run before click itself is run so the config which contributes click commands is available""" + from ._mara_modules import import_mara_modules, get_contributed_functionality commandline_debug = '--debug' in sys.argv # makefiles expect all log in stdout. Send to stderr only if asked to log_stream = sys.stderr if '--log-stderr' in sys.argv else sys.stdout @@ -38,33 +42,20 @@ def setup_commandline_commands(): logging.root.setLevel(logging.DEBUG) log.debug("Enabled debug output via commandline") - # Initialize the config system - from mara_config import init_mara_config_once - init_mara_config_once() - - # The order basically means that the we only get information about the config system startup - # when --debug is given on the commandline, but not when mara_config.config.debug() is configured - # in the config system itself. - # I think we can live with that... - from mara_config.config import debug as configured_debug - if configured_debug(): - logging.root.setLevel(logging.DEBUG) - log.debug("Enabled debug output via config") - - # overwrite any config system with commandline debug switch - if commandline_debug and not configured_debug(): - from mara_config.config_system import set_config - set_config('debug', function=lambda: True) + # Import all installed mara packages + import_mara_modules(log) - from mara_config import get_contributed_functionality known_names = [] - for module, command in get_contributed_functionality('MARA_CLICK_COMMANDS'): + for module, command in get_contributed_functionality('MARA_CLICK_COMMANDS', click.Command): if command and 'callback' in command.__dict__ and command.__dict__['callback']: package = command.__dict__['callback'].__module__.rpartition('.')[0] # Give a package a chance to put all their commands as subcommands of the main package name. # For that to work we have to make sure we do not add multiple commands with the same name - if isinstance(command, click.Group): - name = command.name + if isinstance(command, click.MultiCommand): + if command.name.startswith('mara-'): + name = command.name[5:] + else: + name = command.name else: name = package + '.' + command.name if name in known_names: @@ -74,6 +65,16 @@ def setup_commandline_commands(): known_names.append(name) command.name = name cli.add_command(command) + + if not cli.commands: + # Could not find any command in the installed modules + print(RED + "No mara package is installed which provide commands" + RESET, file=sys.stderr) + print(""" +Please install the packages you want to use, e.g. by calling + + pip install mara-pipelines +""", file=sys.stderr) + sys.exit(1) def main(): diff --git a/setup.cfg b/setup.cfg index 6bf1b1c..2c5188d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -11,9 +11,8 @@ license = MIT [options] packages = mara_cli install_requires = - mara-config>=0.2.0 click -dependency_links = git+https://github.com/mara/mara-config.git@main#egg=mara-config + setuptools [options.extras_require] test = diff --git a/tests/test_mara_main.py b/tests/test_mara_main.py index 978e915..d0a86aa 100644 --- a/tests/test_mara_main.py +++ b/tests/test_mara_main.py @@ -1,9 +1,6 @@ from mara_cli.cli import cli import re -import os -# needed workaorund because mara expects a MARA_APP be importable -os.environ['MARA_APP'] = 'mara_cli' def test_without_argument(cli_runner): @@ -12,4 +9,3 @@ def test_without_argument(cli_runner): # here we get the name as 'cli' instead of 'mara' assert 'Usage: cli [OPTIONS] COMMAND [ARGS]' in result.output assert re.search(r'--debug\s+Show debug output',result.output) is not None - diff --git a/tests/test_print_config.py b/tests/test_print_config.py deleted file mode 100644 index 5df17ea..0000000 --- a/tests/test_print_config.py +++ /dev/null @@ -1,32 +0,0 @@ -from mara_cli.cli import cli, setup_commandline_commands -import re -import os - -import mara_config - -# needed workaorund because mara expects a MARA_APP be importable -os.environ['MARA_APP'] = 'mara_cli' - - -def test_print_config(cli_runner): - # needed to get the debug into the config ouput - mara_config.register_functionality(mara_config) - # Needed to get the click commands registered - setup_commandline_commands() - result = cli_runner.invoke(cli , ['config', 'print']) - assert result.exit_code == 0 - assert 'Config:' in result.output - assert re.search(r'debug.+-D--.+->.+False',result.output) is not None - -def test_print_config_debug(cli_runner): - mara_config.register_functionality(mara_config) - # Needed to get the click commands registered - setup_commandline_commands() - # unfortunately, you cannot simply specify ['--debug', 'config', 'print'] - # because '--debug is handled outside of click - mara_config.set_config('debug', function=lambda: True) - result = cli_runner.invoke(cli , ['config', 'print']) - #assert result.exit_code == 0 - assert 'Config:' in result.output - print(result.output) - assert re.search(r'debug.+SD--.+->.+True',result.output) is not None From 478be841d0507e19fe309baa45387240f8e4bbbf Mon Sep 17 00:00:00 2001 From: Leo Schick Date: Wed, 29 Nov 2023 10:30:38 +0100 Subject: [PATCH 2/4] fix global help message --- mara_cli/cli.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/mara_cli/cli.py b/mara_cli/cli.py index 3f2bc04..bd1a19e 100644 --- a/mara_cli/cli.py +++ b/mara_cli/cli.py @@ -11,14 +11,10 @@ RESET = '\033[0m' -@click.group(help=(""" +@click.group(help=""" The Mara ETL Framework is a Python framework to build data pipelines. - -Contributed functionality (ETL runners, downloader,...) are available as subcommands.""" - + (""" - -To run the flask webapp, use 'flask run'. -""") if 'mara_app' in sys.modules else '')) + +Contributed functionality (ETL runners, downloader,...) are available as subcommands.""") @click.option('--debug', default=False, is_flag=True, help="Show debug output") @click.option('--log-stderr', default=False, is_flag=True, help="Send log output to stderr") def cli(debug: bool, log_stderr: bool): From 8f299611bf4d7aeec86d35ec807128696bece386 Mon Sep 17 00:00:00 2001 From: Leo Schick Date: Wed, 29 Nov 2023 16:04:08 +0100 Subject: [PATCH 3/4] refactoring to discover click commands from entry point mara.commands --- mara_cli/_mara_modules.py | 68 --------------------------------------- mara_cli/cli.py | 37 ++++++++------------- 2 files changed, 13 insertions(+), 92 deletions(-) delete mode 100644 mara_cli/_mara_modules.py diff --git a/mara_cli/_mara_modules.py b/mara_cli/_mara_modules.py deleted file mode 100644 index 914ca40..0000000 --- a/mara_cli/_mara_modules.py +++ /dev/null @@ -1,68 +0,0 @@ -"""Internal functions interacting with mara modules""" - -import copy -from logging import Logger -import sys -from types import ModuleType -from typing import Callable, Dict, Iterable - -import click - - -_mara_modules_imported = False - -def import_mara_modules(log: Logger): - """ - Import all installed mara modules - - Args: - log: The application logger. - """ - global _mara_modules_imported - if _mara_modules_imported: - return - - import pkg_resources - import importlib - - for i in pkg_resources.working_set: - package_name = i.key - #version = i.version - if package_name.startswith('mara-'): - log.debug(f"Import module {package_name}") - importlib.import_module(name=package_name.replace('-', '_'), package=package_name) - - _mara_modules_imported = True - - -def module_functionalities(module: ModuleType, MARA_XXX: str, type) -> []: - """ - Returns some functionalities of a module that is declared in a MARA_XXX variable or function - - `module.MARA_XXX` can be - - a function that returns a list or dict - - a list - - a dict - """ - if MARA_XXX in dir(module): - functionalities = getattr(module, MARA_XXX) - if isinstance(functionalities, Callable): - functionalities = functionalities() - if isinstance(functionalities, Dict): - functionalities = functionalities.values() - if not isinstance(functionalities, Iterable): - raise TypeError( - f'{module.__name__}.{MARA_XXX} should be or return a list or dict of {type.__name__}. Got "{functionalities}".') - for functionality in functionalities: - if not isinstance(functionality, type): - raise TypeError(f'In {module.__name__}.{MARA_XXX}: Expected a {type.__name__}, got "{functionality}"') - return functionalities - else: - return [] - - -def get_contributed_functionality(name: str, type) -> Dict[ModuleType, object]: - """Gets the contributed functionality for one MARA_ variable""" - for module in copy.copy(sys.modules).values(): - for obj in module_functionalities(module, name, click.Command): - yield (module, obj) diff --git a/mara_cli/cli.py b/mara_cli/cli.py index bd1a19e..315cc6d 100644 --- a/mara_cli/cli.py +++ b/mara_cli/cli.py @@ -25,7 +25,6 @@ def cli(debug: bool, log_stderr: bool): def setup_commandline_commands(): """Needs to be run before click itself is run so the config which contributes click commands is available""" - from ._mara_modules import import_mara_modules, get_contributed_functionality commandline_debug = '--debug' in sys.argv # makefiles expect all log in stdout. Send to stderr only if asked to log_stream = sys.stderr if '--log-stderr' in sys.argv else sys.stdout @@ -38,30 +37,20 @@ def setup_commandline_commands(): logging.root.setLevel(logging.DEBUG) log.debug("Enabled debug output via commandline") - # Import all installed mara packages - import_mara_modules(log) - - known_names = [] - for module, command in get_contributed_functionality('MARA_CLICK_COMMANDS', click.Command): - if command and 'callback' in command.__dict__ and command.__dict__['callback']: - package = command.__dict__['callback'].__module__.rpartition('.')[0] - # Give a package a chance to put all their commands as subcommands of the main package name. - # For that to work we have to make sure we do not add multiple commands with the same name - if isinstance(command, click.MultiCommand): - if command.name.startswith('mara-'): - name = command.name[5:] - else: - name = command.name - else: - name = package + '.' + command.name - if name in known_names: - callback = command.__dict__['callback'] - func_name = f"{callback.__module__}{callback.__name__}" - raise RuntimeError(f"Attempting to add conflicting click.Commands for name '{name}': {func_name}") - known_names.append(name) - command.name = name + if sys.version_info < (3, 10): + from importlib_metadata import entry_points + else: + from importlib.metadata import entry_points + + discovered_plugins = entry_points(group='mara.commands') + for entry_point in discovered_plugins: + command = entry_point.load() + command.name = entry_point.name + if not isinstance(command, click.Command): + log.warn(f"Entry point '{entry_point}' is ignored because it does not return a click command.") + else: cli.add_command(command) - + if not cli.commands: # Could not find any command in the installed modules print(RED + "No mara package is installed which provide commands" + RESET, file=sys.stderr) From e84a19d8e7c4d08b1a492b3f2912bcf43b631dff Mon Sep 17 00:00:00 2001 From: Leo Schick Date: Fri, 1 Dec 2023 10:28:28 +0100 Subject: [PATCH 4/4] change ETL to ELT --- mara_cli/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mara_cli/cli.py b/mara_cli/cli.py index 315cc6d..d4bac32 100644 --- a/mara_cli/cli.py +++ b/mara_cli/cli.py @@ -12,7 +12,7 @@ @click.group(help=""" -The Mara ETL Framework is a Python framework to build data pipelines. +The Mara ELT Framework is a Python framework to build data pipelines. Contributed functionality (ETL runners, downloader,...) are available as subcommands.""") @click.option('--debug', default=False, is_flag=True, help="Show debug output")