diff --git a/data/external/main.py b/data/external/main.py index a480a8192..717dd12dc 100644 --- a/data/external/main.py +++ b/data/external/main.py @@ -1,37 +1,56 @@ import logging import os +from pathlib import Path from external.scrapers import nat, public_transport, roomfinder, tumonline from external.scraping_utils import CACHE_PATH from utils import setup_logging + +def ensure_is_empty(directory: Path): + """ + Make the specified directory empty by recursively deleting all its contents. + + Args: + ---- + directory: The directory path to be emptied. + + Returns: + ------- + None + + """ + for item in directory.iterdir(): + if item.is_dir(): + try: + os.removedirs(item) + except OSError: + ensure_is_empty(item) + else: + item.unlink() + directory.rmdir() + os.makedirs(CACHE_PATH, exist_ok=True) + + if __name__ == "__main__": setup_logging(level=logging.INFO) - # Create cache directory structure - os.makedirs(CACHE_PATH, exist_ok=True) - os.makedirs(CACHE_PATH / "filter", exist_ok=True) - os.makedirs(CACHE_PATH / "tumonline", exist_ok=True) - os.makedirs(CACHE_PATH / "nat", exist_ok=True) - os.makedirs(CACHE_PATH / "room", exist_ok=True) - os.makedirs(CACHE_PATH / "maps" / "roomfinder", exist_ok=True) - os.makedirs(CACHE_PATH / "maps" / "roomfinder" / "kmz", exist_ok=True) - os.makedirs(CACHE_PATH / "public_transport", exist_ok=True) + ensure_is_empty(CACHE_PATH) - # You can comment out steps that should be skipped. - # The downloader will automatically create a cache in `cache/`. - roomfinder.scrape_buildings() tumonline.scrape_buildings() - nat.scrape_buildings() - - roomfinder.scrape_rooms() tumonline.scrape_rooms() - nat.scrape_rooms() - tumonline.scrape_usages() + tumonline.scrape_orgs(lang="de") + tumonline.scrape_orgs(lang="en") + os.makedirs(CACHE_PATH / "nat", exist_ok=True) + nat.scrape_buildings() + nat.scrape_rooms() + + os.makedirs(CACHE_PATH / "maps" / "roomfinder", exist_ok=True) + os.makedirs(CACHE_PATH / "maps" / "roomfinder" / "kmz", exist_ok=True) + roomfinder.scrape_buildings() + roomfinder.scrape_rooms() roomfinder.scrape_maps() + os.makedirs(CACHE_PATH / "public_transport", exist_ok=True) public_transport.scrape_stations() - - tumonline.scrape_orgs(lang="de") - tumonline.scrape_orgs(lang="en")