Skip to content

Commit

Permalink
Crawler connectors: consider empty directories up-to-date
Browse files Browse the repository at this point in the history
Fixes: #437

Signed-off-by: Aurélien Bompard <[email protected]>
  • Loading branch information
abompard committed Jan 14, 2025
1 parent 5184902 commit d5b3b11
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 1 deletion.
2 changes: 2 additions & 0 deletions mirrormanager2/crawler/ftp_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ def _check_dir(self, url, directory):

with mmlib.instance_attribute(directory, "files") as files:
# Getting Directory.files is a bit expensive, involves json decoding
# files can be None in case of empty directories
files = files or []
for filename in files:
if filename not in results:
return False # Missing file, we don't need to go over other files
Expand Down
2 changes: 2 additions & 0 deletions mirrormanager2/crawler/http_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ def _check_dir(self, url, directory):
return None
with mmlib.instance_attribute(directory, "files") as files:
# Getting Directory.files is a bit expensive, involves json decoding
# files can be None in case of empty directories
files = files or []
for filename in files:
file_url = f"{url}/{filename}"
exists = self._check_file(conn, file_url, files[filename], directory.readable)
Expand Down
2 changes: 2 additions & 0 deletions mirrormanager2/crawler/rsync_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ def _check_file(self, current_file_info, db_file_info):
def _check_dir(self, dirname, directory):
with mmlib.instance_attribute(directory, "files") as files:
# Getting Directory.files is a bit expensive, involves json decoding
# files can be None in case of empty directories
files = files or []
for filename in sorted(files):
if len(dirname) == 0:
key = filename
Expand Down
1 change: 0 additions & 1 deletion mirrormanager2/lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,6 @@ def _get_directories_by_category_query(category, only_repodata=False):
.where(
model.Category.id == category.id,
model.Directory.readable.is_(True),
model.Directory.files.is_not(None),
)
)
if only_repodata:
Expand Down
33 changes: 33 additions & 0 deletions tests/test_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,15 @@ def test_scan_missing_files_rsync(db, dir_obj_with_files, config):
assert result is False


def test_scan_empty_directory_rsync(db, dir_obj):
"""Test scanning empty directories with rsync"""
connection_pool = ConnectionPool({})
connector = connection_pool.get(f"rsync://{FOLDER}/../testdata/")
dir_url = f"rsync://{FOLDER}/../testdata/pub/fedora/linux"
result = connector.check_dir(dir_url, dir_obj)
assert result is True


def test_scan_http(db, dir_obj_with_files):
"""Test scanning directories with http"""
connection_pool = ConnectionPool({})
Expand Down Expand Up @@ -175,6 +184,19 @@ def test_scan_missing_files_http(db, dir_obj_with_files):
)


def test_scan_empty_directory_http(db, dir_obj):
"""Test scanning empty directories with http"""
connection_pool = ConnectionPool({})
connector = connection_pool.get("http://localhost/testdata/")
connector.get_connection = Mock()
connector._check_file = Mock(return_value=True)
dir_url = "http://localhost/testdata/pub/fedora/linux"
result = connector.check_dir(dir_url, dir_obj)
assert result is True
connector.get_connection.assert_called_once()
connector._check_file.assert_not_called()


def test_scan_ftp(db, dir_obj_with_files):
"""Test scanning directories with ftp"""
connection_pool = ConnectionPool({})
Expand All @@ -195,3 +217,14 @@ def test_scan_missing_files_ftp(db, dir_obj_with_files):
result = connector.check_dir(dir_url, dir_obj_with_files)
assert result is False
connector.get_ftp_dir.assert_called_once_with(dir_url, True)


def test_scan_empty_directory_ftp(db, dir_obj):
"""Test scanning empty directories with ftp"""
connection_pool = ConnectionPool({})
connector = connection_pool.get("ftp://localhost/testdata/")
connector.get_ftp_dir = Mock(return_value={})
dir_url = "ftp://localhost/testdata/pub/fedora/linux"
result = connector.check_dir(dir_url, dir_obj)
assert result is True
connector.get_ftp_dir.assert_called_once_with(dir_url, True)

0 comments on commit d5b3b11

Please sign in to comment.