Skip to content

Commit

Permalink
made sure that the scraper is sorted by scaped service
Browse files Browse the repository at this point in the history
  • Loading branch information
CommanderStorm authored May 5, 2024
1 parent 1b277bb commit 1e15e35
Showing 1 changed file with 39 additions and 20 deletions.
59 changes: 39 additions & 20 deletions data/external/main.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,56 @@
import logging
import os
from pathlib import Path

from external.scrapers import nat, public_transport, roomfinder, tumonline
from external.scraping_utils import CACHE_PATH
from utils import setup_logging


def ensure_is_empty(directory: Path):
"""
Make the specified directory empty by recursively deleting all its contents.
Args:
----
directory: The directory path to be emptied.
Returns:
-------
None
"""
for item in directory.iterdir():
if item.is_dir():
try:
os.removedirs(item)
except OSError:
ensure_is_empty(item)
else:
item.unlink()
directory.rmdir()
os.makedirs(CACHE_PATH, exist_ok=True)


if __name__ == "__main__":
setup_logging(level=logging.INFO)
# Create cache directory structure
os.makedirs(CACHE_PATH, exist_ok=True)
os.makedirs(CACHE_PATH / "filter", exist_ok=True)
os.makedirs(CACHE_PATH / "tumonline", exist_ok=True)
os.makedirs(CACHE_PATH / "nat", exist_ok=True)
os.makedirs(CACHE_PATH / "room", exist_ok=True)
os.makedirs(CACHE_PATH / "maps" / "roomfinder", exist_ok=True)
os.makedirs(CACHE_PATH / "maps" / "roomfinder" / "kmz", exist_ok=True)
os.makedirs(CACHE_PATH / "public_transport", exist_ok=True)
ensure_is_empty(CACHE_PATH)

# You can comment out steps that should be skipped.
# The downloader will automatically create a cache in `cache/`.
roomfinder.scrape_buildings()
tumonline.scrape_buildings()
nat.scrape_buildings()

roomfinder.scrape_rooms()
tumonline.scrape_rooms()
nat.scrape_rooms()

tumonline.scrape_usages()
tumonline.scrape_orgs(lang="de")
tumonline.scrape_orgs(lang="en")

os.makedirs(CACHE_PATH / "nat", exist_ok=True)
nat.scrape_buildings()
nat.scrape_rooms()

os.makedirs(CACHE_PATH / "maps" / "roomfinder", exist_ok=True)
os.makedirs(CACHE_PATH / "maps" / "roomfinder" / "kmz", exist_ok=True)
roomfinder.scrape_buildings()
roomfinder.scrape_rooms()
roomfinder.scrape_maps()

os.makedirs(CACHE_PATH / "public_transport", exist_ok=True)
public_transport.scrape_stations()

tumonline.scrape_orgs(lang="de")
tumonline.scrape_orgs(lang="en")

0 comments on commit 1e15e35

Please sign in to comment.