Skip to content

Commit

Permalink
removed unnessearily letting steps of the data pipeline not/only run …
Browse files Browse the repository at this point in the history
…in prod
  • Loading branch information
CommanderStorm committed May 5, 2024
1 parent 2e1307a commit d2d3f9b
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 20 deletions.
4 changes: 2 additions & 2 deletions data/compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
structure,
tumonline,
)
from utils import DEBUG_MODE, setup_logging
from utils import DEV_MODE, setup_logging


# pylint: disable=too-many-locals,too-many-statements
Expand Down Expand Up @@ -130,7 +130,7 @@ def main() -> None:


if __name__ == "__main__":
setup_logging(level=logging.DEBUG if DEBUG_MODE else logging.INFO)
setup_logging(level=logging.DEBUG if DEV_MODE else logging.INFO)

# Pillow prints all imported modules to the debug stream
logging.getLogger("PIL").setLevel(logging.INFO)
Expand Down
21 changes: 9 additions & 12 deletions data/processors/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
from typing import Any, NamedTuple, TypeVar

import pydantic
import utils
import yaml
from external.models.common import PydanticConfiguration
from PIL import Image
from pydantic import Field
from pydantic.networks import HttpUrl

import utils
from external.models.common import PydanticConfiguration


class UrlStr(PydanticConfiguration):
text: str
Expand Down Expand Up @@ -219,9 +220,6 @@ def _extract_offsets(_id: str, _index: int, img_path: Path, img_sources: dict[st

def _get_hash_lut() -> dict[str, str]:
"""Get a lookup table for the hash of the image files content and offset if present"""
if not DEV_MODE:
return {}
logging.info("Since GIT_COMMIT_SHA is unset, we assume this is acting in In Dev mode.")
logging.info("Only files, with sha256(file-content)_sha256(offset) not present in the .hash_lut.json will be used")
if HASH_LUT.is_file():
with open(HASH_LUT, encoding="utf-8") as file:
Expand Down Expand Up @@ -268,13 +266,12 @@ def resize_and_crop() -> None:
for img_path in IMAGE_SOURCE.glob("*.webp"):
_id, _index = parse_image_filename(img_path.name)
offsets = _extract_offsets(_id, _index, img_path, img_sources)
actual_hash = _gen_file_hash(img_path, offsets)
if actual_hash == expected_hashes_lut.get(img_path.name, ""):
continue # skip this image, since it (and its offsets) have not changed
if DEV_MODE:
actual_hash = _gen_file_hash(img_path, offsets)
if actual_hash == expected_hashes_lut.get(img_path.name, ""):
continue # skip this image, since it (and its offsets) have not changed
logging.debug(f"Image '{img_path.name}' has changed, resizing and cropping...")
executor.submit(_refresh_for_all_resolutions, RefreshResolutionOrder(img_path, offsets))
if DEV_MODE:
_save_hash_lut(img_sources)
resize_and_crop_time = time.time() - start_time
logging.info(f"Resize and crop took {resize_and_crop_time:.2f}s")
_save_hash_lut(img_sources)
resize_and_crop_time = time.time() - start_time
logging.info(f"Resize and crop took {resize_and_crop_time:.2f}s")
5 changes: 0 additions & 5 deletions data/processors/sitemap.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
import requests
from defusedxml import ElementTree as defusedET

from utils import DEBUG_MODE

OLD_DATA_URL = "https://nav.tum.de/cdn/api_data.json"


Expand All @@ -35,9 +33,6 @@ class SimplifiedSitemaps(TypedDict):

def generate_sitemap() -> None:
"""Generate a sitemap that diffs changes since to the currently online data"""
if DEBUG_MODE:
logging.info("Skipping sitemap generation in Dev Mode (GIT_COMMIT_SHA is unset)")
return

# Load exported data. This function is intentionally not using the data object
# directly, but re-parsing the output file instead, because the export not
Expand Down
2 changes: 1 addition & 1 deletion data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
with open(TRANSLATION_BUFFER_PATH, encoding="utf-8") as yaml_file:
TRANSLATION_BUFFER = yaml.load(yaml_file)

DEBUG_MODE = "GIT_COMMIT_SHA" not in os.environ
DEV_MODE = "GIT_COMMIT_SHA" not in os.environ


class TranslatableStr(dict):
Expand Down

0 comments on commit d2d3f9b

Please sign in to comment.