diff --git a/tests/conftest.py b/tests/conftest.py index b82aa5a..eb7d50f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -118,6 +118,14 @@ def repo_data(): }, "changelog_file": None, }, + "foobar/baz": { + "last_changed": "2018-09-17 15:15:15", + "readme_file": { + "path": "roles/foobar/baz/README.rst", + "content": raw_file("repo_files/roles/foobar/baz/README.rst"), + }, + "changelog_file": None, + }, } return repo, tenants, job_files, role_files diff --git a/tests/scraper/test_integration.py b/tests/scraper/test_integration.py index dc583ef..66f2038 100644 --- a/tests/scraper/test_integration.py +++ b/tests/scraper/test_integration.py @@ -73,6 +73,7 @@ class MockContents: DIR = "dir" def __init__(self, path, type): + self.name = path.split("/")[-1] self.path = path self.type = type @@ -101,9 +102,10 @@ class MockGitHubRepository(GitHubRepository): "zuul.d/jobs.yaml": MOCKED_JOB_CONTENT, "roles": {"docker-run": MockContents("roles/docker-run", MockContents.DIR)}, "roles/docker-run": { + "tasks": MockContents("roles/docker-run/tasks", MockContents.DIR), "README.rst": MockContents( "roles/docker-run/README.rst", MockContents.FILE - ) + ), }, "roles/docker-run/README.rst": MOCKED_ROLE_DESCRIPTION, "roles/ignored": MockContents("not a valid role", MockContents.FILE), @@ -117,25 +119,40 @@ class MockGitHubRepository(GitHubRepository): "bar": MockContents("roles/bar", MockContents.DIR), "foobar": MockContents("roles/foobar", MockContents.DIR), "empty-dir": MockContents("roles/empty-dir", MockContents.DIR), + "foobaz": MockContents("roles/foobaz", MockContents.DIR), }, "roles/foo": { - "README.md": MockContents("roles/foo/README.md", MockContents.FILE) + "defaults": MockContents("roles/foo/defaults", MockContents.DIR), + "README.md": MockContents("roles/foo/README.md", MockContents.FILE), }, "roles/bar": { - "README.txt": MockContents("roles/bar/README.txt", MockContents.FILE) + "library": MockContents("roles/bar/library", MockContents.DIR), + "README.txt": MockContents("roles/bar/README.txt", MockContents.FILE), }, "roles/foobar": { - "README": MockContents("roles/foobar/README", MockContents.FILE) + "handlers": MockContents("roles/foobar/handlers", MockContents.DIR), + "README": MockContents("roles/foobar/README", MockContents.FILE), }, "roles/empty-dir": { + "meta": MockContents("roles/empty-dir/meta", MockContents.DIR), "README.whatever": MockContents( "roles/empty-dir/README.whatever", MockContents.FILE - ) + ), + }, + "roles/foobaz": { + "baz": MockContents("roles/foobaz/baz", MockContents.DIR), + }, + "roles/foobaz/baz": { + "vars": MockContents("roles/foobaz/baz/vars", MockContents.DIR), + "README.rst": MockContents( + "roles/foobaz/baz/README.rst", MockContents.FILE + ), }, "roles/foo/README.md": "# Just some Markdown", "roles/bar/README.txt": "Just some simple text\nwith a line break", "roles/foobar/README": "Simple text in a file without extension", - "roles/empty-dir/REAMDE.whatever": "This file won't be checked out", + "roles/empty-dir/README.whatever": "This file won't be checked out", + "roles/foobaz/baz/README.rst": "Simple readme file from nested role", }, "orga1/repo3": { REPO_ROOT: { @@ -215,19 +232,24 @@ def test_scrape(): "orga1/repo2": ( {}, { - "foo": { + "bar": { "last_changed": "2018-09-17 15:15:15", "readme_file": { - "path": "roles/foo/README.md", - "content": "# Just some Markdown", + "path": "roles/bar/README.txt", + "content": "Just some simple text\nwith a line break", }, "changelog_file": None, }, - "bar": { + "empty-dir": { + "last_changed": "2018-09-17 15:15:15", + "readme_file": None, + "changelog_file": None, + }, + "foo": { "last_changed": "2018-09-17 15:15:15", "readme_file": { - "path": "roles/bar/README.txt", - "content": "Just some simple text\nwith a line break", + "path": "roles/foo/README.md", + "content": "# Just some Markdown", }, "changelog_file": None, }, @@ -239,9 +261,12 @@ def test_scrape(): }, "changelog_file": None, }, - "empty-dir": { + "foobaz/baz": { "last_changed": "2018-09-17 15:15:15", - "readme_file": None, + "readme_file": { + "path": "roles/foobaz/baz/README.rst", + "content": "Simple readme file from nested role", + }, "changelog_file": None, }, }, diff --git a/tests/scraper/test_repo_parser.py b/tests/scraper/test_repo_parser.py index 8434775..baf5201 100644 --- a/tests/scraper/test_repo_parser.py +++ b/tests/scraper/test_repo_parser.py @@ -47,6 +47,7 @@ def test_parse(repo_data): job_5 = jobs[4] role_1 = [r for r in roles if r["role_name"] == "foo"][0] role_2 = [r for r in roles if r["role_name"] == "bar"][0] + role_3 = [r for r in roles if r["role_name"] == "foobar/baz"][0] expected_job_1 = { "job_name": "my-cool-new-job", @@ -215,6 +216,39 @@ def test_parse(repo_data): "last_updated": "2018-09-17 15:15:15", } + expected_role_3 = { + "role_name": "foobar/baz", + "repo": "my/project", + "tenants": ["foo", "bar"], + "description": ( + "Yet another simple description\n\n" + "**Role variables**\n\n" + ".. zuul:rolevar:: my_mandatory_variable\n\n" + " This variable is mandatory.\n" + ), + "description_html": ( + "

Yet another simple description

\n" + "

Role variables

\n" + '
\n' + '
\n' + '' + 'my_mandatory_variable' + "" + '' + "ΒΆ
\n" + "

This variable is mandatory.

\n" + "
\n" + "\n" + ), + "url": "https://github/my/project/tree/master/roles/foobar/baz", + "private": False, + "platforms": [], + "reusable": False, + "scrape_time": scrape_time, + "last_updated": "2018-09-17 15:15:15", + } + # NOTE (fschmidt): Without the skip_empty flag, empty (= None) keys will # be stripped from the resulting dict. assert job_1.to_dict(skip_empty=False) == expected_job_1 @@ -224,6 +258,7 @@ def test_parse(repo_data): assert job_5.to_dict(skip_empty=False) == expected_job_5 assert role_1.to_dict(skip_empty=False) == expected_role_1 assert role_2.to_dict(skip_empty=False) == expected_role_2 + assert role_3.to_dict(skip_empty=False) == expected_role_3 def test_parse_reusable_repo(repo_data): diff --git a/tests/testdata/repo_files/roles/foobar/baz/README.rst b/tests/testdata/repo_files/roles/foobar/baz/README.rst new file mode 100644 index 0000000..7ecbbd3 --- /dev/null +++ b/tests/testdata/repo_files/roles/foobar/baz/README.rst @@ -0,0 +1,7 @@ +Yet another simple description + +**Role variables** + +.. zuul:rolevar:: my_mandatory_variable + + This variable is mandatory. diff --git a/zubbi/scraper/scraper.py b/zubbi/scraper/scraper.py index b272b55..ece56af 100644 --- a/zubbi/scraper/scraper.py +++ b/zubbi/scraper/scraper.py @@ -29,6 +29,19 @@ ROLES_DIRECTORY = "roles" +# Role needs to contains at least one of those directories to be considered as role +# https://docs.ansible.com/ansible/latest/playbook_guide/playbooks_reuse_roles.html#role-directory-structure +ROLE_MANDATORY_DIRS = [ + "tasks", + "handlers", + "library", + "defaults", + "vars", + "files", + "templates", + "meta", +] + REPO_ROOT = "/" @@ -146,29 +159,44 @@ def get_file_info(self, path): def scrape_role_files(self): role_files = {} - # We are only interested in the role name (=> directory name) - # and the README and CHANGELOG files. Thus, we don't need to - # iterate recursively over the roles directory as those files - # should be on the top-level per role. + # Roles might be grouped in some parent directory, so we need to + # recursively check nested directories. + + # Listing repository files is based on the example in pyGithub doc: + # https://pygithub.readthedocs.io/en/v2.1.1/examples/Repository.html#get-all-of-the-contents-of-the-repository-recursively try: - roles = self.repo.directory_contents(ROLES_DIRECTORY) - for role_name, remote_file in roles.items(): + # Ansible requires roles to be defined in a certain directory structure. + # Thus, we are only interested in directories + dirs_to_search = [ + item + for item in self.repo.directory_contents(ROLES_DIRECTORY).values() + if item.type == "dir" + ] + while dirs_to_search: try: - if remote_file.type != "dir": - # Ansible requires the role to be defined in a - # certain directory structure. Thus, we can - # ignore it in case it's not a directory. - continue - last_changed = self.repo.last_changed(remote_file.path) - existing_files = self.repo.directory_contents(remote_file.path) - # Skip empty directories or files - if not existing_files: + dir = dirs_to_search.pop(0) + dir_items = self.repo.directory_contents(dir.path) + + subdirs = [ + item for item in dir_items.values() if item.type == "dir" + ] + # When directory does not contain one of role mandatory directories + # it is not a role and its subdirectories (if any) should be further + # scanned. + # This is done by appending those subdirectories to 'dirs_to_search' + # search list (this implements the recursive search) + if set([d.name for d in subdirs]).isdisjoint(ROLE_MANDATORY_DIRS): + dirs_to_search.extend(subdirs) continue - readme_file = self.find_matching_file(README_FILES, existing_files) - changelog_file = self.find_matching_file( - CHANGELOG_FILES, existing_files - ) - role_files[role_name] = { + + # Once the role is found, we are only interested in the timestamp of + # the latest update (the last git change), README and CHANGELOG files + # Those files should be on the top-level per role. + last_changed = self.repo.last_changed(dir.path) + readme_file = self.find_matching_file(README_FILES, dir_items) + changelog_file = self.find_matching_file(CHANGELOG_FILES, dir_items) + # role directory path (relative to ROLES_DIRECTORY) is the role name + role_files[dir.path.replace(f"{ROLES_DIRECTORY}/", "", 1)] = { "last_changed": last_changed, "readme_file": readme_file, "changelog_file": changelog_file, @@ -178,7 +206,8 @@ def scrape_role_files(self): except CheckoutError as e: LOGGER.debug(e) - return role_files + # sort keys (role names) alphabetically + return {key: value for key, value in sorted(role_files.items())} def find_matching_file(self, file_filter, existing_files): for filename, file_content in existing_files.items():