From 939f7b153b7bcebb3ae8d34fa9b59f9639a80470 Mon Sep 17 00:00:00 2001 From: olli <144932831+OLILHR@users.noreply.github.com> Date: Wed, 21 Aug 2024 08:29:43 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=96=20Prepare=20project=20for=20releas?= =?UTF-8?q?e=20and=20publishing=20(#19)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix license classifier * Rebrand from `codebase` to `epitaxy` * Simplify `CaseInsensitivePathCompleter` using `expanduser` * Simplify `parse_extensions()` function * Add type hints to utilities and filter functions --- ...seignore.example => .epitaxyignore.example | 8 +- .gitignore | 6 +- README.md | 30 ++--- codebase.svg | 1 - epitaxy.svg | 1 + {codebase => epitaxy}/__init__.py | 0 {codebase => epitaxy}/__main__.py | 0 {codebase => epitaxy}/filter.py | 41 ++++--- {codebase => epitaxy}/main.py | 105 ++++++++---------- {codebase => epitaxy}/py.typed | 2 +- {codebase => epitaxy}/utilities.py | 33 +++--- pyproject.toml | 12 +- tox.ini | 6 +- unittests/conftest.py | 14 +-- unittests/test_codebaseignore.py | 57 ---------- unittests/test_epitaxyignore.py | 57 ++++++++++ ...est_extension_filter.py => test_filter.py} | 20 ++-- unittests/test_utilities.py | 53 ++++----- 18 files changed, 214 insertions(+), 232 deletions(-) rename .codebaseignore.example => .epitaxyignore.example (83%) delete mode 100644 codebase.svg create mode 100644 epitaxy.svg rename {codebase => epitaxy}/__init__.py (100%) rename {codebase => epitaxy}/__main__.py (100%) rename {codebase => epitaxy}/filter.py (72%) rename {codebase => epitaxy}/main.py (57%) rename {codebase => epitaxy}/py.typed (67%) rename {codebase => epitaxy}/utilities.py (81%) delete mode 100644 unittests/test_codebaseignore.py create mode 100644 unittests/test_epitaxyignore.py rename unittests/{test_extension_filter.py => test_filter.py} (79%) diff --git a/.codebaseignore.example b/.epitaxyignore.example similarity index 83% rename from .codebaseignore.example rename to .epitaxyignore.example index ec8d3d5..9a1322f 100644 --- a/.codebaseignore.example +++ b/.epitaxyignore.example @@ -53,8 +53,8 @@ README.md # "node_modules/", # # Python # "*.pyc", -# # codebase-specific files -# ".codebaseignore", -# ".codebaseignore.example", -# "codebase.md", +# # epitaxy-specific files +# ".epitaxyignore", +# ".epitaxyignore.example", +# "epitaxy.md", # ] \ No newline at end of file diff --git a/.gitignore b/.gitignore index e9de942..9b6d44c 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,6 @@ htmlcov/ .DS_Store Thumbs.db -# coodebase-specific files -.codebaseignore -codebase.md +# epitaxy-specific files +.epitaxyignore +epitaxy.md diff --git a/README.md b/README.md index 909602d..69c6cdf 100644 --- a/README.md +++ b/README.md @@ -1,40 +1,40 @@
-codebase.svg
+epitaxy.svg
-

šŸ§Š data consolidation.

+

šŸ§Š codebase consolidation.

-![PyPI status badge](https://img.shields.io/pypi/v/alloy?labelColor=30363D&color=fccccc) -![Unittests status badge](https://github.com/OLILHR/codebase/workflows/Unittests/badge.svg) -![Coverage status badge](https://github.com/OLILHR/codebase/workflows/Coverage/badge.svg) -![Pylint status badge](https://github.com/OLILHR/codebase/workflows/Linting/badge.svg) -![Formatting status badge](https://github.com/OLILHR/codebase/workflows/Formatting/badge.svg) +![PyPI status badge](https://img.shields.io/pypi/v/epitaxy?labelColor=30363D&color=fccccc) +![Unittests status badge](https://github.com/OLILHR/epitaxy/workflows/Unittests/badge.svg) +![Coverage status badge](https://github.com/OLILHR/epitaxy/workflows/Coverage/badge.svg) +![Pylint status badge](https://github.com/OLILHR/epitaxy/workflows/Linting/badge.svg) +![Formatting status badge](https://github.com/OLILHR/epitaxy/workflows/Formatting/badge.svg)
## ā„¹ļø Installation ```sh -$ pip install codebase +$ pip install epitaxy ``` > [!NOTE] -> It is generally recommended to add a `.codebaseignore` file to the root directory of the codebase you'd like to consolidate. -> All files, folders and file extensions specified in `.codebaseignore` will be excluded from the output file. -> Please refer to the `.codebaseignore.example` for suggestions regarding what to include in `.codebaseignore`. +> It is generally recommended to add an `.epitaxyignore` file to the root directory of the codebase you'd like to consolidate. +> All files, folders and file extensions specified in `.epitaxyignore` will be excluded from the output file. +> Please refer to the `.epitaxyignore.example` for suggestions regarding what to include in `.epitaxyignore`. To execute the script, simply run ```sh -$ codebase +$ epitaxy ``` and follow the prompts by providing an input directory, an output file destination and optional filters. -Alternatively, the script can also be executed using a single command with the appropriate flags: +Alternatively, the script can be executed using a single command with the appropriate flags: ```sh -$ codebase -i -o -f <(optional) filters> +$ epitaxy -i -o -f <(optional) filters> ``` -For further information, run `$ codebase --help`. +For further information, run `$ epitaxy --help`. diff --git a/codebase.svg b/codebase.svg deleted file mode 100644 index 9bd9208..0000000 --- a/codebase.svg +++ /dev/null @@ -1 +0,0 @@ -Artboard 7 \ No newline at end of file diff --git a/epitaxy.svg b/epitaxy.svg new file mode 100644 index 0000000..9476d66 --- /dev/null +++ b/epitaxy.svg @@ -0,0 +1 @@ +Artboard 7 \ No newline at end of file diff --git a/codebase/__init__.py b/epitaxy/__init__.py similarity index 100% rename from codebase/__init__.py rename to epitaxy/__init__.py diff --git a/codebase/__main__.py b/epitaxy/__main__.py similarity index 100% rename from codebase/__main__.py rename to epitaxy/__main__.py diff --git a/codebase/filter.py b/epitaxy/filter.py similarity index 72% rename from codebase/filter.py rename to epitaxy/filter.py index 5d26557..7e27f0e 100644 --- a/codebase/filter.py +++ b/epitaxy/filter.py @@ -1,31 +1,32 @@ import os +from typing import Any, Callable, List, Optional -def skip_ignore_list_comments(file_path): - ignore_list = [] +def skip_ignore_list_comments(file_path: str) -> List[str]: + ignore_list: List[str] = [] with open(file_path, "r", encoding="utf-8") as f: for line in f: line = line.strip() - if line and not line.startswith("#"): # ignore comments in .codebaseignore and DEFAULT_IGNORE_LIST + if line and not line.startswith("#"): # ignore comments in .epitaxyignore and DEFAULT_IGNORE_LIST ignore_list.append(line) return ignore_list -def read_codebaseignore(project_root, extension_filter): +def read_epitaxyignore(project_root: str, extension_filter: Optional[List[str]]) -> Callable[[str], bool]: """ - Excludes all files, extensions and directories specified in .codebaseignore, located inside the root directory. + Excludes all files, extensions and directories specified in .epitaxyignore, located inside the root directory. """ - codebaseignore = os.path.join(project_root, ".codebaseignore") + epitaxyignore = os.path.join(project_root, ".epitaxyignore") default_ignore_list = DEFAULT_IGNORE_LIST.copy() - ignore_list = [] - if os.path.exists(codebaseignore): - with open(codebaseignore, "r", encoding="utf-8") as f: + ignore_list: List[str] = [] + if os.path.exists(epitaxyignore): + with open(epitaxyignore, "r", encoding="utf-8") as f: ignore_list = [line.strip() for line in f if line.strip() and not line.startswith("#")] default_ignore_list.extend(ignore_list) - def exclude_files(file_path): + def exclude_files(file_path: str) -> bool: file_path = file_path.replace(os.sep, "/") if extension_filter: @@ -58,11 +59,11 @@ def exclude_files(file_path): return exclude_files -def filter_extensions(file_path, extensions): +def filter_extensions(file_path: str, extensions: Optional[List[str]]) -> bool: """ Optional filter to include only certain provided extensions in the consolidated markdown file. If no extensions are provided, all files are considered except files, extensions and directories that are explicitly excluded in the - specified .codebaseignore file, located inside the root directory. + specified .epitaxyignore file, located inside the root directory. """ if not extensions: return True @@ -70,17 +71,15 @@ def filter_extensions(file_path, extensions): return file_extension[1:] in extensions -def parse_extensions(_csx, _param, value): +def parse_extensions(_csx: Any, _param: Any, value: Optional[List[str]]) -> Optional[List[str]]: """ Converts a comma-separated string of file extensions into a list of individual extensions, which - in turn - is parsed to the main function to filter files during the consolidation process. """ - if not value: - return None - return [ext.strip() for item in value for ext in item.split(",")] + return [ext.strip() for item in value for ext in item.split(",")] if value else None -DEFAULT_IGNORE_LIST = [ +DEFAULT_IGNORE_LIST: List[str] = [ ".cache/", ".coverage", "dist/", @@ -94,8 +93,8 @@ def parse_extensions(_csx, _param, value): "node_modules/", # Python "*.pyc", - # codebase-specific files - ".codebaseignore", - ".codebaseignore.example", - "codebase.md", + # epitaxy-specific files + ".epitaxyignore", + ".epitaxyignore.example", + "epitaxy.md", ] diff --git a/codebase/main.py b/epitaxy/main.py similarity index 57% rename from codebase/main.py rename to epitaxy/main.py index a5d9b48..1602d26 100644 --- a/codebase/main.py +++ b/epitaxy/main.py @@ -1,10 +1,12 @@ import logging import os from dataclasses import dataclass +from typing import Any, Iterable, List, Optional import click from prompt_toolkit import prompt from prompt_toolkit.completion import Completer, Completion +from prompt_toolkit.document import Document from .filter import parse_extensions from .utilities import NoMatchingExtensionError, consolidate @@ -15,17 +17,13 @@ _logger = logging.getLogger(__name__) _logger.setLevel(GLOBAL_LOG_LEVEL) -MAX_FILE_SIZE = 1024 * 1024 * 10 # 10 MB +MAX_FILE_SIZE: int = 1024 * 1024 * 10 # 10 MB -def get_project_root(): - """ - Required for input/output path prompts to display the project root as default path. - """ - - current_dir = os.path.abspath(os.getcwd()) +def get_project_root() -> str: + current_dir: str = os.path.abspath(os.getcwd()) - root_indicators = [ + root_indicators: List[str] = [ ".git", "package.json", "pdm.lock", @@ -45,23 +43,19 @@ def get_project_root(): @dataclass class CaseInsensitivePathCompleter(Completer): only_directories: bool = False - expanduser: bool = True - def get_completions(self, document, complete_event): - text = document.text_before_cursor - if len(text) == 0: + def get_completions(self, document: Document, complete_event: Any) -> Iterable[Completion]: + text: str = os.path.expanduser(document.text_before_cursor) + if not text: return - directory = os.path.dirname(text) - prefix = os.path.basename(text) + directory: str = os.path.dirname(text) + prefix: str = os.path.basename(text) - if os.path.isabs(text): - full_directory = os.path.abspath(directory) - else: - full_directory = os.path.abspath(os.path.join(os.getcwd(), directory)) + full_directory: str = os.path.abspath(directory) try: - suggestions = os.listdir(full_directory) + suggestions: List[str] = os.listdir(full_directory) except OSError: return @@ -69,14 +63,12 @@ def get_completions(self, document, complete_event): if suggestion.lower().startswith(prefix.lower()): if self.only_directories and not os.path.isdir(os.path.join(full_directory, suggestion)): continue - completion = suggestion[len(prefix) :] - display = suggestion - yield Completion(completion, start_position=0, display=display) + yield Completion(suggestion[len(prefix) :], start_position=0, display=suggestion) -def path_prompt(message, default, exists=False): +def path_prompt(message: str, default: str, exists: bool = False) -> str: """ - Enables basic shell features, like relative path suggestion and autocompletion, for CLI prompts. + Required by prompt_toolkit to enable basic shell features for CLI prompts, like path suggestion and autocompletion. """ path_completer = CaseInsensitivePathCompleter() @@ -84,11 +76,11 @@ def path_prompt(message, default, exists=False): default += os.path.sep while True: - path = prompt(f"{message} ", default=default, completer=path_completer) - full_path = os.path.abspath(os.path.expanduser(path)) + path: str = prompt(f"{message} ", default=default, completer=path_completer) + full_path: str = os.path.abspath(os.path.expanduser(path)) if not exists or os.path.exists(full_path): return full_path - print(f"šŸ”“ {full_path} DOES NOT EXIST.") + logging.error("šŸ”“ %s DOES NOT EXIST.", full_path) @click.command() @@ -100,26 +92,23 @@ def path_prompt(message, default, exists=False): "extension_filter", callback=parse_extensions, multiple=True, - help="enables optional filtering by extensions, for instance: -f py,json", # markdown contains only .py/.json files + help="enables optional filtering by extensions, for instance: -f py,json", ) # pylint: disable=too-many-locals -def generate_markdown(input_path, output_path, extension_filter): - no_flags_provided = input_path is None and output_path is None and not extension_filter - project_root = get_project_root() - - if input_path is None: - input_path = path_prompt("šŸ“ INPUT PATH OF YOUR TARGET DIRECTORY -", default=project_root, exists=True) - else: - input_path = os.path.abspath(input_path) - - if output_path is None: - output_path = path_prompt("šŸ“ OUTPUT PATH FOR THE MARKDOWN FILE -", default=project_root) - else: - output_path = os.path.abspath(output_path) +def generate_markdown( + input_path: Optional[str], output_path: Optional[str], extension_filter: Optional[List[str]] +) -> None: + no_flags_provided: bool = input_path is None and output_path is None and not extension_filter + project_root: str = get_project_root() + + input_path = input_path or path_prompt( + "šŸ“ INPUT PATH OF YOUR TARGET DIRECTORY -", default=project_root, exists=True + ) + output_path = output_path or path_prompt("šŸ“ OUTPUT PATH FOR THE MARKDOWN FILE -", default=project_root) - extensions = extension_filter + extensions: Optional[List[str]] = extension_filter if no_flags_provided: - extensions_input = click.prompt( + extensions_input: str = click.prompt( "šŸ”Ž OPTIONAL FILTER FOR SPECIFIC EXTENSIONS (COMMA-SEPARATED)", default="", show_default=False, @@ -134,31 +123,29 @@ def generate_markdown(input_path, output_path, extension_filter): input_path, extensions ) except NoMatchingExtensionError: - _logger.error("\nāš ļø NO FILES MATCH THE SPECIFIED EXTENSION(S) - PLEASE REVIEW YOUR .codebaseignore FILE.") + _logger.error("\nāš ļø NO FILES MATCH THE SPECIFIED EXTENSION(S) - PLEASE REVIEW YOUR .epitaxyignore FILE.") _logger.error("šŸ”“ NO MARKDOWN FILE GENERATED.\n") return if len(markdown_content.encode("utf-8")) > MAX_FILE_SIZE: - _logger.error( - "\n" + "šŸ”“ GENERATED CONTENT EXCEEDS 10 MB. CONSIDER ADDING LARGER FILES TO YOUR .codebaseignore." - ) + _logger.error("\n" + "šŸ”“ GENERATED CONTENT EXCEEDS 10 MB. CONSIDER ADDING LARGER FILES TO YOUR .epitaxyignore.") return - codebase = os.path.join(output_path, "codebase.md") + epitaxy: str = os.path.join(output_path, "epitaxy.md") os.makedirs(output_path, exist_ok=True) - with open(codebase, "w", encoding="utf-8") as f: + with open(epitaxy, "w", encoding="utf-8") as f: f.write(markdown_content) - codebase_size = os.path.getsize(codebase) - if codebase_size < 1024: - file_size = f"{codebase_size} bytes" - elif codebase_size < 1024 * 1024: - file_size = f"{codebase_size / 1024:.2f} KB" + epitaxy_size: int = os.path.getsize(epitaxy) + if epitaxy_size < 1024: + file_size: str = f"{epitaxy_size} bytes" + elif epitaxy_size < 1024 * 1024: + file_size = f"{epitaxy_size / 1024:.2f} KB" else: - file_size = f"{codebase_size / (1024 * 1024):.2f} MB" + file_size = f"{epitaxy_size / (1024 * 1024):.2f} MB" - file_type_distribution = " ".join( + file_type_distribution: str = " ".join( f".{file_type} ({percentage:.0f}%)" for file_type, percentage in type_distribution ) @@ -178,7 +165,7 @@ def generate_markdown(input_path, output_path, extension_filter): + "\n" + "šŸŖ™ TOKEN COUNT: %d" + "\n", - codebase, + epitaxy, file_size, file_count, file_type_distribution, @@ -187,7 +174,7 @@ def generate_markdown(input_path, output_path, extension_filter): ) -# to run the script during local development, either execute $ python -m codebase -# or install codebase locally via `pdm install` and simply run $ codebase +# to run the script during local development, either execute $ python -m epitaxy +# or install epitaxy locally via `pdm install` and simply run $ epitaxy if __name__ == "__main__": generate_markdown.main(standalone_mode=False) diff --git a/codebase/py.typed b/epitaxy/py.typed similarity index 67% rename from codebase/py.typed rename to epitaxy/py.typed index 3e7cc69..098a69f 100644 --- a/codebase/py.typed +++ b/epitaxy/py.typed @@ -1,2 +1,2 @@ -# py.typed marks codebase as PEP561 compatible. +# py.typed marks epitaxy as PEP561 compatible. # https://mypy.readthedocs.io/en/stable/installed_packages.html#creating-pep-561-compatible-packages \ No newline at end of file diff --git a/codebase/utilities.py b/epitaxy/utilities.py similarity index 81% rename from codebase/utilities.py rename to epitaxy/utilities.py index de3cdee..6c79523 100644 --- a/codebase/utilities.py +++ b/epitaxy/utilities.py @@ -3,22 +3,23 @@ import re from collections import Counter from dataclasses import dataclass +from typing import List, Optional, Tuple import tiktoken from tqdm import tqdm -from .filter import filter_extensions, read_codebaseignore +from .filter import filter_extensions, read_epitaxyignore _logger = logging.getLogger(__name__) -def remove_trailing_whitespace(content): +def remove_trailing_whitespace(content: str) -> str: content = re.sub(r"\n{3,}", "\n\n", content) content = re.sub(r" +$", "", content, flags=re.MULTILINE) return content -def escape_markdown_characters(file_name): +def escape_markdown_characters(file_name: str) -> str: """ Escapes special characters in file names such as "__init__.py" in order to display paths correctly inside the output markdown file. @@ -27,7 +28,7 @@ def escape_markdown_characters(file_name): return re.sub(special_chars, r"\\\1", file_name) -def count_lines_of_code(content): +def count_lines_of_code(content: str) -> int: """ Counts the lines of code within each code blocks in the output markdown file. """ @@ -36,7 +37,7 @@ def count_lines_of_code(content): return lines_of_code -def get_file_type_distribution(markdown_content): +def get_file_type_distribution(markdown_content: str) -> List[Tuple[str, float]]: """ Returns a distribution of the four most common file types in the output markdown file. """ @@ -56,7 +57,7 @@ def get_file_type_distribution(markdown_content): return type_distribution -def count_tokens(text): +def count_tokens(text: str) -> int: """ Encoding for GPT-3.5/GPT-4.0. """ @@ -74,19 +75,21 @@ class NoMatchingExtensionError(Exception): # pylint: disable=too-many-locals -def consolidate(path, extensions=None): +def consolidate( + path: str, extensions: Optional[List[str]] = None +) -> Tuple[str, int, int, int, List[Tuple[str, float]]]: """ Gathers and formats the content and metadata of all files inside a provided input directory, - while taking into account optional extension filters as well as .codebaseignore specific exceptions. + while taking into account optional extension filters as well as .epitaxyignore specific exceptions. """ project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - exclude_files = read_codebaseignore(project_root, extensions) - codebase = "" + exclude_files = read_epitaxyignore(project_root, extensions) + epitaxy = "" file_count = 0 token_count = 0 lines_of_code_count = 0 - matching_filter_extensions = [] + matching_filter_extensions: List[str] = [] for root, dirs, files in os.walk(path): dirs[:] = [d for d in dirs if not exclude_files(os.path.relpath(str(os.path.join(root, d)), path))] for file in files: @@ -122,13 +125,13 @@ def consolidate(path, extensions=None): escaped_relative_path = escape_markdown_characters(relative_path) file_content = f"\n#### {escaped_relative_path}\n\n```{file_extension[1:]}\n{content.rstrip()}\n```\n" - codebase += file_content + epitaxy += file_content token_count += count_tokens(file_content) lines_of_code_count += len(content.split("\n")) progress_bar.update(1) - codebase = remove_trailing_whitespace(codebase) - type_distribution = get_file_type_distribution(codebase) + epitaxy = remove_trailing_whitespace(epitaxy) + type_distribution = get_file_type_distribution(epitaxy) - return codebase, file_count, token_count, lines_of_code_count, type_distribution + return epitaxy, file_count, token_count, lines_of_code_count, type_distribution diff --git a/pyproject.toml b/pyproject.toml index 2755afb..5a65587 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "codebase" +name = "epitaxy" description = "codebase consolidation tool" license = { file = "LICENSE" } requires-python = ">=3.12" @@ -8,7 +8,7 @@ classifiers = [ "Development Status :: 4 - Beta", "Environment :: Console", "Intended Audience :: Developers", - "License :: OSI Approved :: GPL-3.0 License", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3 :: Only", @@ -26,11 +26,11 @@ dynamic = ["version"] readme = "README.md" [project.urls] -Changelog = "https://github.com/OLILHR/codebase/releases" -Homepage = "https://github.com/OLILHR/codebase" +Changelog = "https://github.com/OLILHR/epitaxy/releases" +Homepage = "https://github.com/OLILHR/epitaxy" [project.scripts] -codebase = "codebase.main:generate_markdown" +epitaxy = "epitaxy.main:generate_markdown" [build-system] requires = ["pdm-backend"] @@ -40,7 +40,7 @@ build-backend = "pdm.backend" source = "scm" [tool.pdm.build] -includes = ["codebase"] +includes = ["epitaxy"] excludes = ["/unittests"] [tool.pdm.readme] diff --git a/tox.ini b/tox.ini index 77fe41a..fc7cad4 100644 --- a/tox.ini +++ b/tox.ini @@ -28,7 +28,7 @@ deps = -r dev_requirements/requirements-linting.txt setenv = PYTHONPATH = {toxinidir} commands = - pylint codebase + pylint epitaxy pylint unittests --rcfile=unittests/.pylintrc [testenv:spellchecking] @@ -37,7 +37,7 @@ deps = -r requirements.txt -r dev_requirements/requirements-spellchecking.txt commands = - codespell codebase/ + codespell epitaxy/ codespell unittests/ codespell README.md @@ -54,7 +54,7 @@ deps = {[testenv:testing]deps} -r dev_requirements/requirements-typechecking.txt commands = - mypy --show-error-codes codebase --strict + mypy --show-error-codes epitaxy --strict mypy --show-error-codes unittests --strict [testenv:dev] diff --git a/unittests/conftest.py b/unittests/conftest.py index b7f4ce4..82c4403 100644 --- a/unittests/conftest.py +++ b/unittests/conftest.py @@ -11,7 +11,7 @@ def project_root(): @pytest.fixture(scope="function") -def mock_codebaseignore_content(): +def mock_epitaxyignore_content(): return [ ".png", ".svg", @@ -22,12 +22,12 @@ def mock_codebaseignore_content(): @pytest.fixture(scope="function") -def mock_codebaseignore(mock_codebaseignore_content): - return "\n".join(mock_codebaseignore_content) + "\n" +def mock_epitaxyignore(mock_epitaxyignore_content): + return "\n".join(mock_epitaxyignore_content) + "\n" @pytest.fixture(scope="function") -def mock_project(project_root, mock_codebaseignore): +def mock_project(project_root, mock_epitaxyignore): files = { os.path.join(project_root, "markdown.md"): "# markdown content", os.path.join(project_root, "python.py"): 'print("python content")', @@ -35,7 +35,7 @@ def mock_project(project_root, mock_codebaseignore): os.path.join(project_root, "image.png"): "", os.path.join(project_root, "subdirectory", "markup.yml"): "key: value", os.path.join(project_root, "subdirectory", "vector.svg"): "", - os.path.join(project_root, ".codebaseignore"): mock_codebaseignore, + os.path.join(project_root, ".epitaxyignore"): mock_epitaxyignore, } return files @@ -58,7 +58,7 @@ def _mock_walk(top): parts = relpath.split(os.sep) if len(parts) > 1: directories.add(parts[0]) - elif len(parts) == 1 and parts[0] != ".codebaseignore": + elif len(parts) == 1 and parts[0] != ".epitaxyignore": files.append(parts[0]) yield top, list(directories), files @@ -80,4 +80,4 @@ def _mock_walk(top): monkeypatch.setattr("tiktoken.get_encoding", mock_tiktoken.get_encoding) # Mock the entire tiktoken module - monkeypatch.setattr("codebase.utilities.tiktoken", mock_tiktoken) + monkeypatch.setattr("epitaxy.utilities.tiktoken", mock_tiktoken) diff --git a/unittests/test_codebaseignore.py b/unittests/test_codebaseignore.py deleted file mode 100644 index e534073..0000000 --- a/unittests/test_codebaseignore.py +++ /dev/null @@ -1,57 +0,0 @@ -import os -from unittest.mock import mock_open, patch - -from codebase.filter import read_codebaseignore, skip_ignore_list_comments - - -def test_read_codebaseignore( - mock_codebaseignore, - mock_codebaseignore_content, - project_root, -): - - assert ".png" in mock_codebaseignore_content - assert ".svg" in mock_codebaseignore_content - - expected_path = os.path.join(project_root, ".codebaseignore") - - with patch("os.path.exists", return_value=True): - with patch("builtins.open", mock_open(read_data=mock_codebaseignore)) as mock_file: - exclude = read_codebaseignore(project_root, []) - - test_files = [ - "test.png", - "test.svg", - "test.log", - "node_modules/test.json", - "test.md", - "test.txt", - "test.py", - "test.yml", - ] - for file in test_files: - result = exclude(file) - if file in ["test.png", "test.svg", "test.log"] or file.startswith("node_modules/"): - assert result is True - else: - assert result is False - - mock_file.assert_called_once_with(expected_path, "r", encoding="utf-8") - - -def test_skip_ignore_list_comments(mock_codebaseignore, mock_codebaseignore_content, project_root): - codebaseignore_path = os.path.join(project_root, ".codebaseignore") - - with patch("builtins.open", mock_open(read_data=mock_codebaseignore)) as mock_file: - result = skip_ignore_list_comments(codebaseignore_path) - - mock_file.assert_called_once_with(codebaseignore_path, "r", encoding="utf-8") - expected_result = [line for line in mock_codebaseignore_content if line and not line.startswith("#")] - - assert result == expected_result, f"Expected {expected_result}, but got {result}" - - for content in mock_codebaseignore_content: - if content and not content.startswith("#"): - assert content in result - - assert "# comment" not in result diff --git a/unittests/test_epitaxyignore.py b/unittests/test_epitaxyignore.py new file mode 100644 index 0000000..2698e83 --- /dev/null +++ b/unittests/test_epitaxyignore.py @@ -0,0 +1,57 @@ +import os +from unittest.mock import mock_open, patch + +from epitaxy.filter import read_epitaxyignore, skip_ignore_list_comments + + +def test_read_epitaxyignore( + mock_epitaxyignore, + mock_epitaxyignore_content, + project_root, +): + + assert ".png" in mock_epitaxyignore_content + assert ".svg" in mock_epitaxyignore_content + + expected_path = os.path.join(project_root, ".epitaxyignore") + + with patch("os.path.exists", return_value=True): + with patch("builtins.open", mock_open(read_data=mock_epitaxyignore)) as mock_file: + exclude = read_epitaxyignore(project_root, []) + + test_files = [ + "test.png", + "test.svg", + "test.log", + "node_modules/test.json", + "test.md", + "test.txt", + "test.py", + "test.yml", + ] + for file in test_files: + result = exclude(file) + if file in ["test.png", "test.svg", "test.log"] or file.startswith("node_modules/"): + assert result is True + else: + assert result is False + + mock_file.assert_called_once_with(expected_path, "r", encoding="utf-8") + + +def test_skip_ignore_list_comments(mock_epitaxyignore, mock_epitaxyignore_content, project_root): + epitaxyignore_path = os.path.join(project_root, ".epitaxyignore") + + with patch("builtins.open", mock_open(read_data=mock_epitaxyignore)) as mock_file: + result = skip_ignore_list_comments(epitaxyignore_path) + + mock_file.assert_called_once_with(epitaxyignore_path, "r", encoding="utf-8") + expected_result = [line for line in mock_epitaxyignore_content if line and not line.startswith("#")] + + assert result == expected_result, f"Expected {expected_result}, but got {result}" + + for content in mock_epitaxyignore_content: + if content and not content.startswith("#"): + assert content in result + + assert "# comment" not in result diff --git a/unittests/test_extension_filter.py b/unittests/test_filter.py similarity index 79% rename from unittests/test_extension_filter.py rename to unittests/test_filter.py index 48c1fb1..58bf604 100644 --- a/unittests/test_extension_filter.py +++ b/unittests/test_filter.py @@ -1,16 +1,16 @@ import os import re -from codebase.filter import filter_extensions, parse_extensions -from codebase.utilities import consolidate, escape_markdown_characters +from epitaxy.filter import filter_extensions, parse_extensions +from epitaxy.utilities import consolidate, escape_markdown_characters def test_consolidate_only_specified_filters( - project_root, mock_project, mock_operations, mock_codebaseignore + project_root, mock_project, mock_operations, mock_epitaxyignore ): # pylint: disable=unused-argument filtered_codebase, *_ = consolidate(project_root, extensions=["md", "txt"]) - assert not any(extension in mock_codebaseignore for extension in [".md", ".txt", ".py", ".yml"]) + assert not any(extension in mock_epitaxyignore for extension in [".md", ".txt", ".py", ".yml"]) assert re.search(rf"#### {re.escape(escape_markdown_characters('markdown.md'))}", filtered_codebase) assert re.search(rf"#### {re.escape(escape_markdown_characters('text.txt'))}", filtered_codebase) @@ -20,21 +20,21 @@ def test_consolidate_only_specified_filters( filtered_codebase, ) - assert ".png" in mock_codebaseignore + assert ".png" in mock_epitaxyignore assert not re.search(rf"#### {re.escape(escape_markdown_characters('image.png'))}", filtered_codebase) - assert ".svg" in mock_codebaseignore + assert ".svg" in mock_epitaxyignore assert not re.search( rf"#### {re.escape(escape_markdown_characters(os.path.join('subdirectory', 'vector.svg')))}", filtered_codebase, ) -def test_extension_filter_bypasses_codebaseignore( - project_root, mock_project, mock_operations, mock_codebaseignore +def test_filter_bypasses_epitaxyignore( + project_root, mock_project, mock_operations, mock_epitaxyignore ): # pylint: disable=unused-argument filtered_codebase, *_ = consolidate(project_root, extensions=["svg"]) - assert ".svg" in mock_codebaseignore + assert ".svg" in mock_epitaxyignore assert re.search( rf"#### {re.escape(escape_markdown_characters(os.path.join('subdirectory', 'vector.svg')))}", filtered_codebase, @@ -60,6 +60,6 @@ def test_filter_extensions_edge_cases(): def test_parse_extensions_edge_cases(): - assert parse_extensions(None, None, "") is None + assert parse_extensions(None, None, []) is None assert parse_extensions(None, None, ["py, js, css"]) == ["py", "js", "css"] assert parse_extensions(None, None, ["py", "js", "css"]) == ["py", "js", "css"] diff --git a/unittests/test_utilities.py b/unittests/test_utilities.py index 852b184..1e8ab43 100644 --- a/unittests/test_utilities.py +++ b/unittests/test_utilities.py @@ -3,49 +3,42 @@ import pytest -from codebase.utilities import consolidate, escape_markdown_characters, remove_trailing_whitespace +from epitaxy.utilities import consolidate, escape_markdown_characters, remove_trailing_whitespace def test_consolidate_excludes_ignored_files( project_root, mock_project, mock_operations ): # pylint: disable=unused-argument - codebase, *_ = consolidate(project_root) - codebaseignore = mock_project[os.path.join(project_root, ".codebaseignore")] + epitaxy, *_ = consolidate(project_root) + epitaxyignore = mock_project[os.path.join(project_root, ".epitaxyignore")] - assert ".png" in codebaseignore - assert ".svg" in codebaseignore - assert not re.search(rf"#### {re.escape(escape_markdown_characters('image.png'))}", codebase) - assert not re.search(rf"#### {re.escape(escape_markdown_characters('vector.svg'))}", codebase) + assert ".png" in epitaxyignore + assert ".svg" in epitaxyignore + assert not re.search(rf"#### {re.escape(escape_markdown_characters('image.png'))}", epitaxy) + assert not re.search(rf"#### {re.escape(escape_markdown_characters('vector.svg'))}", epitaxy) - assert ".markdown.md" not in codebaseignore - assert ".python.py" not in codebaseignore - assert "text.txt" not in codebaseignore - assert re.search(rf"#### {re.escape(escape_markdown_characters('markdown.md'))}", codebase) - assert re.search(rf"#### {re.escape(escape_markdown_characters('python.py'))}", codebase) - assert re.search(rf"#### {re.escape(escape_markdown_characters('text.txt'))}", codebase) + assert ".markdown.md" not in epitaxyignore + assert ".python.py" not in epitaxyignore + assert "text.txt" not in epitaxyignore + assert re.search(rf"#### {re.escape(escape_markdown_characters('markdown.md'))}", epitaxy) + assert re.search(rf"#### {re.escape(escape_markdown_characters('python.py'))}", epitaxy) + assert re.search(rf"#### {re.escape(escape_markdown_characters('text.txt'))}", epitaxy) def test_consolidate_considers_subdirectories( project_root, mock_project, mock_operations ): # pylint: disable=unused-argument - codebase, *_ = consolidate(project_root) + epitaxy, *_ = consolidate(project_root) - print(f"Mock project structure: {mock_project}") - print(f"Consolidated codebase:\n{codebase}") - - assert re.search(rf"#### {re.escape(escape_markdown_characters('markdown.md'))}", codebase) - assert re.search(rf"#### {re.escape(escape_markdown_characters('text.txt'))}", codebase) - assert re.search(rf"#### {re.escape(escape_markdown_characters('python.py'))}", codebase) + assert re.search(rf"#### {re.escape(escape_markdown_characters('markdown.md'))}", epitaxy) + assert re.search(rf"#### {re.escape(escape_markdown_characters('text.txt'))}", epitaxy) + assert re.search(rf"#### {re.escape(escape_markdown_characters('python.py'))}", epitaxy) subdir_yml_path = os.path.join("subdirectory", "markup.yml") - assert re.search( - rf"#### {re.escape(escape_markdown_characters(subdir_yml_path))}", codebase - ), f"File {subdir_yml_path} not found in consolidated output" + assert re.search(rf"#### {re.escape(escape_markdown_characters(subdir_yml_path))}", epitaxy) subdir_svg_path = os.path.join("subdirectory", "vector.svg") - assert not re.search( - rf"#### {re.escape(escape_markdown_characters(subdir_svg_path))}", codebase - ), f"File {subdir_svg_path} should be excluded as per .codebaseignore" + assert not re.search(rf"#### {re.escape(escape_markdown_characters(subdir_svg_path))}", epitaxy) def test_consolidate_file_token_count(project_root, mock_project, mock_operations): # pylint: disable=unused-argument @@ -55,7 +48,7 @@ def test_consolidate_file_token_count(project_root, mock_project, mock_operation [ f for f in mock_project.keys() - if not f.endswith(".codebaseignore") and not f.endswith(".png") and not f.endswith(".svg") + if not f.endswith(".epitaxyignore") and not f.endswith(".png") and not f.endswith(".svg") ] ) @@ -69,7 +62,7 @@ def test_consolidate_line_of_code_count(project_root, mock_project, mock_operati expected_lines_of_code_count = sum( len(content.split("\n")) for file_path, content in mock_project.items() - if not file_path.endswith((".codebaseignore", ".png", ".svg")) + if not file_path.endswith((".epitaxyignore", ".png", ".svg")) ) assert lines_of_code_count == expected_lines_of_code_count @@ -78,7 +71,7 @@ def test_consolidate_line_of_code_count(project_root, mock_project, mock_operati def test_consolidate_file_type_distribution( project_root, mock_project, mock_operations ): # pylint: disable=unused-argument - codebase, file_count, *_ = consolidate(project_root) + epitaxy, file_count, *_ = consolidate(project_root) expected_types = { "py": 1, # mock_project/python.py @@ -91,7 +84,7 @@ def test_consolidate_file_type_distribution( assert file_count == file_type_distribution for file_type in expected_types: - assert re.search(rf"#### .*\.{file_type.lower()}", codebase, re.IGNORECASE) + assert re.search(rf"#### .*\.{file_type.lower()}", epitaxy, re.IGNORECASE) def test_consolidate_removes_trailing_whitespace():