diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..02192a4 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +unittests/** linguist-vendored \ No newline at end of file diff --git a/alloy/collector.py b/alloy/collector.py index bb2890c..437a241 100644 --- a/alloy/collector.py +++ b/alloy/collector.py @@ -1,52 +1,20 @@ import logging import os -_logger = logging.getLogger(__name__) - - -def read_alloyignore(project_root): - """ - Excludes all files, extensions and directories specified in .alloyignore. - """ - alloyignore = os.path.join(project_root, ".alloyignore") +from alloy.filter import filter_extensions, read_alloyignore - if not os.path.exists(alloyignore): - return lambda _: False - - ignore_list = [] - with open(alloyignore, "r", encoding="utf-8") as f: - for line in f: - line = line.strip() - if line and not line.startswith("#"): - ignore_list.append(line) # ignore comments in .alloyignore - - def exclude_files(file_path): - for pattern in ignore_list: - if pattern.startswith("/"): # covers absolute paths from the root - if file_path.startswith(pattern[1:]): - return True - elif pattern.endswith("/"): # ignores certain directories - if any(part == pattern[:-1] for part in file_path.split(os.sep)): - return True - elif pattern.startswith("*."): # ignores certain file extensions - if file_path.endswith(pattern[1:]): - return True - elif pattern.endswith("*"): # ignores certain files with depending on their prefixes - if os.path.basename(file_path).startswith(pattern[:-1]): - return True - elif pattern in file_path or pattern == os.path.basename(file_path): - return True - return False - - return exclude_files +_logger = logging.getLogger(__name__) -def consolidate(path): +def consolidate(path, extensions=None): """ - Consolidates the content of all files from a given directory into a single markdown file. + Consolidates the content of all files from a given directory into a single markdown file. Any files, directories and + extensions specified in .alloyignore are excluded. If optional file extensions are provided, only files with these + extensions will be included in the consolidated markdown file, regardless of whether they are listed in .alloyignore + or not. """ project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - exclude_files = read_alloyignore(project_root) + exclude_files = read_alloyignore(project_root, extensions) codebase = "" for root, dirs, files in os.walk(path): @@ -56,9 +24,8 @@ def consolidate(path): file_path = os.path.join(root, file) relative_path = os.path.relpath(str(file_path), path) - if exclude_files(relative_path): + if (extensions and not filter_extensions(file_path, extensions)) or exclude_files(relative_path): continue - _, file_extension = os.path.splitext(file) try: diff --git a/alloy/filter.py b/alloy/filter.py new file mode 100644 index 0000000..606320c --- /dev/null +++ b/alloy/filter.py @@ -0,0 +1,66 @@ +import os + + +def read_alloyignore(project_root, extension_filter): + """ + Excludes all files, extensions and directories specified in .alloyignore, located inside the root directory. + """ + alloyignore = os.path.join(project_root, ".alloyignore") + + if not os.path.exists(alloyignore): + return lambda _: False + + ignore_list = [] + with open(alloyignore, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if line and not line.startswith("#"): + ignore_list.append(line) # ignore comments in .alloyignore + + # pylint: disable=too-many-return-statements + def exclude_files(file_path): + if extension_filter: + _, file_extension = os.path.splitext(file_path) + if file_extension[1:] in extension_filter: + return False + + for pattern in ignore_list: + if pattern.startswith("/"): # covers absolute paths from the root + if file_path.startswith(pattern[1:]): + return True + elif pattern.endswith("/"): # ignores certain directories + if any(part == pattern[:-1] for part in file_path.split(os.sep)): + return True + elif pattern.startswith("*."): # ignores certain file extensions + if file_path.endswith(pattern[1:]): + return True + elif pattern.endswith("*"): # ignores certain files with depending on their prefixes + if os.path.basename(file_path).startswith(pattern[:-1]): + return True + elif pattern in file_path or pattern == os.path.basename(file_path): + return True + return False + + return exclude_files + + +def filter_extensions(file_path, extensions): + """ + Optional filter to include only certain provided extensions in the consolidated markdown file. If no extensions are + provided, all files are considered except files, extensions and directories that are explicitly excluded in the + specified .alloyignore file, located inside the root directory. + """ + if not extensions: + return True + _, file_extension = os.path.splitext(file_path) + return file_extension[1:] in extensions + + +def parse_extensions(_csx, _param, value): + """ + Converts a comma-separated string of file extensions into a list of individual extensions, which - in turn - is + parsed to the main function to filter files during the consolidation process. + """ + if not value: + return None + return [ext.strip() for item in value for ext in item.split(",")] diff --git a/alloy/main.py b/alloy/main.py index b5cf763..6b2e093 100644 --- a/alloy/main.py +++ b/alloy/main.py @@ -4,6 +4,7 @@ import click from alloy.collector import consolidate +from alloy.filter import parse_extensions GLOBAL_LOG_LEVEL = logging.INFO logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") @@ -14,11 +15,17 @@ @click.command() @click.argument("path", type=click.Path(exists=True)) -def main(path): - """ - Generates a consolidated markdown file. - """ - markdown_content = consolidate(path) +@click.option( + "--filter", + "-f", + "extensions", + callback=parse_extensions, + multiple=True, + help="Filter files by extension via an optional '-f' flag, for instance: -f py,json,yml", +) +def generate_markdown(path, extensions): + extensions = list(extensions) if extensions else None + markdown_content = consolidate(path, extensions) project_root = os.path.dirname(os.path.abspath(__file__)) output_file = os.path.join(project_root, "../codebase.md") @@ -29,4 +36,4 @@ def main(path): if __name__ == "__main__": - main() # pylint: disable=no-value-for-parameter + generate_markdown() # pylint: disable=no-value-for-parameter diff --git a/pyproject.toml b/pyproject.toml index d1787a7..689551b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,3 @@ truethy-bool = true [tool.mypy] disable_error_code = ["no-untyped-def", "no-untyped-call"] - -[tool.pytest.ini_options] -asyncio_mode = "auto" diff --git a/unittests/conftest.py b/unittests/conftest.py index 94475a5..680ccb4 100644 --- a/unittests/conftest.py +++ b/unittests/conftest.py @@ -4,8 +4,13 @@ @pytest.fixture -def setup_paths(request): +def unittests_directory(request): unittests_dir = os.path.dirname(request.module.__file__) return { "test_data": os.path.join(unittests_dir, "data"), } + + +@pytest.fixture +def alloyignore_path(unittests_directory): + return os.path.join(unittests_directory["test_data"], ".alloyignore") diff --git a/unittests/data/dummy/dummy.svg b/unittests/data/dummy_directory/dummy_svg.svg similarity index 100% rename from unittests/data/dummy/dummy.svg rename to unittests/data/dummy_directory/dummy_svg.svg diff --git a/unittests/data/dummy.txt b/unittests/data/dummy_directory/dummy_yml.yml similarity index 100% rename from unittests/data/dummy.txt rename to unittests/data/dummy_directory/dummy_yml.yml diff --git a/unittests/data/dummy.md b/unittests/data/dummy_md.md similarity index 100% rename from unittests/data/dummy.md rename to unittests/data/dummy_md.md diff --git a/unittests/data/dummy.png b/unittests/data/dummy_png.png similarity index 100% rename from unittests/data/dummy.png rename to unittests/data/dummy_png.png diff --git a/unittests/data/dummy.py b/unittests/data/dummy_py.py similarity index 100% rename from unittests/data/dummy.py rename to unittests/data/dummy_py.py diff --git a/unittests/data/dummy/dummy.yml b/unittests/data/dummy_txt.txt similarity index 100% rename from unittests/data/dummy/dummy.yml rename to unittests/data/dummy_txt.txt diff --git a/unittests/test_file_collector.py b/unittests/test_file_collector.py index 677b2ce..c6eb772 100644 --- a/unittests/test_file_collector.py +++ b/unittests/test_file_collector.py @@ -1,25 +1,34 @@ -from alloy.collector import consolidate, read_alloyignore +from alloy.collector import consolidate +from alloy.filter import read_alloyignore -def test_consolidate_excludes_png_and_svg(setup_paths): - codebase = consolidate(setup_paths["test_data"]) - - assert "dummy.md" in codebase - assert "dummy.txt" in codebase - assert "dummy.py" in codebase - assert "dummy.yml" in codebase - - assert "dummy.png" not in codebase - assert "dummy.svg" not in codebase - - -def test_read_alloyignore(setup_paths): - exclude = read_alloyignore(setup_paths["test_data"]) +def test_read_alloyignore(unittests_directory, alloyignore_path): + exclude = read_alloyignore(unittests_directory["test_data"], []) + with open(alloyignore_path, encoding="utf-8") as f: + alloyignore = f.read() + assert ".png" in alloyignore assert exclude("test.png") is True + assert ".svg" in alloyignore assert exclude("test.svg") is True assert exclude("test.md") is False assert exclude("test.txt") is False assert exclude("test.py") is False assert exclude("test.yml") is False + + +def test_consolidate_excludes_png_and_svg(unittests_directory, alloyignore_path): + codebase = consolidate(unittests_directory["test_data"]) + with open(alloyignore_path, encoding="utf-8") as f: + alloyignore = f.read() + + assert "dummy_md.md" in codebase + assert "dummy_txt.txt" in codebase + assert "dummy_py.py" in codebase + assert "dummy_yml.yml" in codebase + + assert ".png" in alloyignore + assert "dummy_png.png" not in codebase + assert ".svg" in alloyignore + assert "dummy_svg.svg" not in codebase diff --git a/unittests/test_file_filter.py b/unittests/test_file_filter.py new file mode 100644 index 0000000..a8ef8f8 --- /dev/null +++ b/unittests/test_file_filter.py @@ -0,0 +1,33 @@ +from alloy.collector import consolidate + + +def test_consolidate_specified_filter_extensions(unittests_directory, alloyignore_path): + filtered_codebase = consolidate(unittests_directory["test_data"], extensions=["md", "txt"]) + with open(alloyignore_path, encoding="utf-8") as f: + alloyignore = f.read() + + assert not any(ext in alloyignore for ext in [".md", ".txt", ".py", ".yml"]) + assert "dummy_md.md" in filtered_codebase + assert "dummy_txt.txt" in filtered_codebase + assert "dummy_py.py" not in filtered_codebase + assert "dummy_yml.yml" not in filtered_codebase + + assert ".png" in alloyignore + assert "dummy_png.png" not in filtered_codebase + assert ".svg" in alloyignore + assert "dummy_svg.svg" not in filtered_codebase + + +def test_extension_filter_bypasses_alloyignore(unittests_directory, alloyignore_path): + filtered_codebase = consolidate(unittests_directory["test_data"], extensions=["svg"]) + with open(alloyignore_path, encoding="utf-8") as f: + alloyignore = f.read() + + assert ".svg" in alloyignore + assert "dummy_svg.svg" in filtered_codebase + + assert "dummy_md.md" not in filtered_codebase + assert "dummy_txt.txt" not in filtered_codebase + assert "dummy_py.py" not in filtered_codebase + assert "dummy_yml.yml" not in filtered_codebase + assert "dummy_png.png" not in filtered_codebase