Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🔍 Add optional extension filter #4

Merged
merged 7 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
unittests/** linguist-vendored
51 changes: 9 additions & 42 deletions alloy/collector.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,20 @@
import logging
import os

_logger = logging.getLogger(__name__)


def read_alloyignore(project_root):
"""
Excludes all files, extensions and directories specified in .alloyignore.
"""
alloyignore = os.path.join(project_root, ".alloyignore")
from alloy.filter import filter_extensions, read_alloyignore

if not os.path.exists(alloyignore):
return lambda _: False

ignore_list = []
with open(alloyignore, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line and not line.startswith("#"):
ignore_list.append(line) # ignore comments in .alloyignore

def exclude_files(file_path):
for pattern in ignore_list:
if pattern.startswith("/"): # covers absolute paths from the root
if file_path.startswith(pattern[1:]):
return True
elif pattern.endswith("/"): # ignores certain directories
if any(part == pattern[:-1] for part in file_path.split(os.sep)):
return True
elif pattern.startswith("*."): # ignores certain file extensions
if file_path.endswith(pattern[1:]):
return True
elif pattern.endswith("*"): # ignores certain files with depending on their prefixes
if os.path.basename(file_path).startswith(pattern[:-1]):
return True
elif pattern in file_path or pattern == os.path.basename(file_path):
return True
return False

return exclude_files
_logger = logging.getLogger(__name__)


def consolidate(path):
def consolidate(path, extensions=None):
"""
Consolidates the content of all files from a given directory into a single markdown file.
Consolidates the content of all files from a given directory into a single markdown file. Any files, directories and
extensions specified in .alloyignore are excluded. If optional file extensions are provided, only files with these
extensions will be included in the consolidated markdown file, regardless of whether they are listed in .alloyignore
or not.
"""
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
exclude_files = read_alloyignore(project_root)
exclude_files = read_alloyignore(project_root, extensions)
codebase = ""

for root, dirs, files in os.walk(path):
Expand All @@ -56,9 +24,8 @@ def consolidate(path):
file_path = os.path.join(root, file)
relative_path = os.path.relpath(str(file_path), path)

if exclude_files(relative_path):
if (extensions and not filter_extensions(file_path, extensions)) or exclude_files(relative_path):
continue

_, file_extension = os.path.splitext(file)

try:
Expand Down
66 changes: 66 additions & 0 deletions alloy/filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os


def read_alloyignore(project_root, extension_filter):
"""
Excludes all files, extensions and directories specified in .alloyignore, located inside the root directory.
"""
alloyignore = os.path.join(project_root, ".alloyignore")

if not os.path.exists(alloyignore):
return lambda _: False

ignore_list = []
with open(alloyignore, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line and not line.startswith("#"):
ignore_list.append(line) # ignore comments in .alloyignore

# pylint: disable=too-many-return-statements
def exclude_files(file_path):
if extension_filter:
_, file_extension = os.path.splitext(file_path)
if file_extension[1:] in extension_filter:
return False

for pattern in ignore_list:
if pattern.startswith("/"): # covers absolute paths from the root
if file_path.startswith(pattern[1:]):
return True
elif pattern.endswith("/"): # ignores certain directories
if any(part == pattern[:-1] for part in file_path.split(os.sep)):
return True
elif pattern.startswith("*."): # ignores certain file extensions
if file_path.endswith(pattern[1:]):
return True
elif pattern.endswith("*"): # ignores certain files with depending on their prefixes
if os.path.basename(file_path).startswith(pattern[:-1]):
return True
elif pattern in file_path or pattern == os.path.basename(file_path):
return True
return False

return exclude_files


def filter_extensions(file_path, extensions):
"""
Optional filter to include only certain provided extensions in the consolidated markdown file. If no extensions are
provided, all files are considered except files, extensions and directories that are explicitly excluded in the
specified .alloyignore file, located inside the root directory.
"""
if not extensions:
return True
_, file_extension = os.path.splitext(file_path)
return file_extension[1:] in extensions


def parse_extensions(_csx, _param, value):
"""
Converts a comma-separated string of file extensions into a list of individual extensions, which - in turn - is
parsed to the main function to filter files during the consolidation process.
"""
if not value:
return None
return [ext.strip() for item in value for ext in item.split(",")]
19 changes: 13 additions & 6 deletions alloy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import click

from alloy.collector import consolidate
from alloy.filter import parse_extensions

GLOBAL_LOG_LEVEL = logging.INFO
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
Expand All @@ -14,11 +15,17 @@

@click.command()
@click.argument("path", type=click.Path(exists=True))
def main(path):
"""
Generates a consolidated markdown file.
"""
markdown_content = consolidate(path)
@click.option(
"--filter",
"-f",
"extensions",
callback=parse_extensions,
multiple=True,
help="Filter files by extension via an optional '-f' flag, for instance: -f py,json,yml",
)
def generate_markdown(path, extensions):
extensions = list(extensions) if extensions else None
markdown_content = consolidate(path, extensions)
project_root = os.path.dirname(os.path.abspath(__file__))
output_file = os.path.join(project_root, "../codebase.md")

Expand All @@ -29,4 +36,4 @@ def main(path):


if __name__ == "__main__":
main() # pylint: disable=no-value-for-parameter
generate_markdown() # pylint: disable=no-value-for-parameter
3 changes: 0 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,3 @@ truethy-bool = true

[tool.mypy]
disable_error_code = ["no-untyped-def", "no-untyped-call"]

[tool.pytest.ini_options]
asyncio_mode = "auto"
7 changes: 6 additions & 1 deletion unittests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,13 @@


@pytest.fixture
def setup_paths(request):
def unittests_directory(request):
unittests_dir = os.path.dirname(request.module.__file__)
return {
"test_data": os.path.join(unittests_dir, "data"),
}


@pytest.fixture
def alloyignore_path(unittests_directory):
return os.path.join(unittests_directory["test_data"], ".alloyignore")
File renamed without changes.
File renamed without changes.
File renamed without changes
File renamed without changes.
File renamed without changes.
39 changes: 24 additions & 15 deletions unittests/test_file_collector.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,34 @@
from alloy.collector import consolidate, read_alloyignore
from alloy.collector import consolidate
from alloy.filter import read_alloyignore


def test_consolidate_excludes_png_and_svg(setup_paths):
codebase = consolidate(setup_paths["test_data"])

assert "dummy.md" in codebase
assert "dummy.txt" in codebase
assert "dummy.py" in codebase
assert "dummy.yml" in codebase

assert "dummy.png" not in codebase
assert "dummy.svg" not in codebase


def test_read_alloyignore(setup_paths):
exclude = read_alloyignore(setup_paths["test_data"])
def test_read_alloyignore(unittests_directory, alloyignore_path):
exclude = read_alloyignore(unittests_directory["test_data"], [])
with open(alloyignore_path, encoding="utf-8") as f:
alloyignore = f.read()

assert ".png" in alloyignore
assert exclude("test.png") is True
assert ".svg" in alloyignore
assert exclude("test.svg") is True

assert exclude("test.md") is False
assert exclude("test.txt") is False
assert exclude("test.py") is False
assert exclude("test.yml") is False


def test_consolidate_excludes_png_and_svg(unittests_directory, alloyignore_path):
codebase = consolidate(unittests_directory["test_data"])
with open(alloyignore_path, encoding="utf-8") as f:
alloyignore = f.read()

assert "dummy_md.md" in codebase
assert "dummy_txt.txt" in codebase
assert "dummy_py.py" in codebase
assert "dummy_yml.yml" in codebase

assert ".png" in alloyignore
assert "dummy_png.png" not in codebase
assert ".svg" in alloyignore
assert "dummy_svg.svg" not in codebase
33 changes: 33 additions & 0 deletions unittests/test_file_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from alloy.collector import consolidate


def test_consolidate_specified_filter_extensions(unittests_directory, alloyignore_path):
filtered_codebase = consolidate(unittests_directory["test_data"], extensions=["md", "txt"])
with open(alloyignore_path, encoding="utf-8") as f:
alloyignore = f.read()

assert not any(ext in alloyignore for ext in [".md", ".txt", ".py", ".yml"])
assert "dummy_md.md" in filtered_codebase
assert "dummy_txt.txt" in filtered_codebase
assert "dummy_py.py" not in filtered_codebase
assert "dummy_yml.yml" not in filtered_codebase

assert ".png" in alloyignore
assert "dummy_png.png" not in filtered_codebase
assert ".svg" in alloyignore
assert "dummy_svg.svg" not in filtered_codebase


def test_extension_filter_bypasses_alloyignore(unittests_directory, alloyignore_path):
filtered_codebase = consolidate(unittests_directory["test_data"], extensions=["svg"])
with open(alloyignore_path, encoding="utf-8") as f:
alloyignore = f.read()

assert ".svg" in alloyignore
assert "dummy_svg.svg" in filtered_codebase

assert "dummy_md.md" not in filtered_codebase
assert "dummy_txt.txt" not in filtered_codebase
assert "dummy_py.py" not in filtered_codebase
assert "dummy_yml.yml" not in filtered_codebase
assert "dummy_png.png" not in filtered_codebase