diff --git a/mirrormanager2/crawler/ftp_connector.py b/mirrormanager2/crawler/ftp_connector.py index 0bb8f772..bb6775a0 100755 --- a/mirrormanager2/crawler/ftp_connector.py +++ b/mirrormanager2/crawler/ftp_connector.py @@ -114,6 +114,8 @@ def _check_dir(self, url, directory): with mmlib.instance_attribute(directory, "files") as files: # Getting Directory.files is a bit expensive, involves json decoding + # files can be None in case of empty directories + files = files or [] for filename in files: if filename not in results: return False # Missing file, we don't need to go over other files diff --git a/mirrormanager2/crawler/http_connector.py b/mirrormanager2/crawler/http_connector.py index eb7af71b..b6d27694 100755 --- a/mirrormanager2/crawler/http_connector.py +++ b/mirrormanager2/crawler/http_connector.py @@ -89,6 +89,8 @@ def _check_dir(self, url, directory): return None with mmlib.instance_attribute(directory, "files") as files: # Getting Directory.files is a bit expensive, involves json decoding + # files can be None in case of empty directories + files = files or [] for filename in files: file_url = f"{url}/{filename}" exists = self._check_file(conn, file_url, files[filename], directory.readable) diff --git a/mirrormanager2/crawler/rsync_connector.py b/mirrormanager2/crawler/rsync_connector.py index d5edaac1..ab087d7c 100755 --- a/mirrormanager2/crawler/rsync_connector.py +++ b/mirrormanager2/crawler/rsync_connector.py @@ -78,6 +78,8 @@ def _check_file(self, current_file_info, db_file_info): def _check_dir(self, dirname, directory): with mmlib.instance_attribute(directory, "files") as files: # Getting Directory.files is a bit expensive, involves json decoding + # files can be None in case of empty directories + files = files or [] for filename in sorted(files): if len(dirname) == 0: key = filename diff --git a/mirrormanager2/lib/__init__.py b/mirrormanager2/lib/__init__.py index da8b8243..a3038fb6 100644 --- a/mirrormanager2/lib/__init__.py +++ b/mirrormanager2/lib/__init__.py @@ -772,7 +772,6 @@ def _get_directories_by_category_query(category, only_repodata=False): .where( model.Category.id == category.id, model.Directory.readable.is_(True), - model.Directory.files.is_not(None), ) ) if only_repodata: diff --git a/tests/test_crawler.py b/tests/test_crawler.py index 5e6540b9..258f5e34 100644 --- a/tests/test_crawler.py +++ b/tests/test_crawler.py @@ -143,6 +143,15 @@ def test_scan_missing_files_rsync(db, dir_obj_with_files, config): assert result is False +def test_scan_empty_directory_rsync(db, dir_obj): + """Test scanning empty directories with rsync""" + connection_pool = ConnectionPool({}) + connector = connection_pool.get(f"rsync://{FOLDER}/../testdata/") + dir_url = f"rsync://{FOLDER}/../testdata/pub/fedora/linux" + result = connector.check_dir(dir_url, dir_obj) + assert result is True + + def test_scan_http(db, dir_obj_with_files): """Test scanning directories with http""" connection_pool = ConnectionPool({}) @@ -175,6 +184,19 @@ def test_scan_missing_files_http(db, dir_obj_with_files): ) +def test_scan_empty_directory_http(db, dir_obj): + """Test scanning empty directories with http""" + connection_pool = ConnectionPool({}) + connector = connection_pool.get("http://localhost/testdata/") + connector.get_connection = Mock() + connector._check_file = Mock(return_value=True) + dir_url = "http://localhost/testdata/pub/fedora/linux" + result = connector.check_dir(dir_url, dir_obj) + assert result is True + connector.get_connection.assert_called_once() + connector._check_file.assert_not_called() + + def test_scan_ftp(db, dir_obj_with_files): """Test scanning directories with ftp""" connection_pool = ConnectionPool({}) @@ -195,3 +217,14 @@ def test_scan_missing_files_ftp(db, dir_obj_with_files): result = connector.check_dir(dir_url, dir_obj_with_files) assert result is False connector.get_ftp_dir.assert_called_once_with(dir_url, True) + + +def test_scan_empty_directory_ftp(db, dir_obj): + """Test scanning empty directories with ftp""" + connection_pool = ConnectionPool({}) + connector = connection_pool.get("ftp://localhost/testdata/") + connector.get_ftp_dir = Mock(return_value={}) + dir_url = "ftp://localhost/testdata/pub/fedora/linux" + result = connector.check_dir(dir_url, dir_obj) + assert result is True + connector.get_ftp_dir.assert_called_once_with(dir_url, True)