diff --git a/data/Dockerfile b/data/Dockerfile index b0beb3bb8..b976ba5e3 100644 --- a/data/Dockerfile +++ b/data/Dockerfile @@ -16,8 +16,12 @@ COPY *.py ./ COPY translations.yaml translations.yaml RUN mkdir output \ && python3 compile.py \ + && test -f "./output/status_data.json" \ + && test -f "./output/status_data.parquet" \ && test -f "./output/search_data.json" \ + && test -f "./output/search_data.parquet" \ && test -f "./output/api_data.json" \ + && test -f "./output/api_data.parquet" \ && cp -r sources/img/* output COPY output/openapi.yaml output/openapi.yaml diff --git a/data/processors/export.py b/data/processors/export.py index 2f7e307fd..0280b3c98 100644 --- a/data/processors/export.py +++ b/data/processors/export.py @@ -160,10 +160,12 @@ def export_for_status() -> None: """Generate hashes for the contents of data""" with open("output/api_data.json", encoding="utf-8") as file: export_data = json.load(file) - export_data = [(d["id"], d["hash"]) for d in export_data] + export_json_data = [(d["id"], d["hash"]) for d in export_data] with open("output/status_data.json", "w", encoding="utf-8") as file: - json.dump(export_data, file) - df = pl.read_json("output/status_data.json") + json.dump(export_json_data, file) + + export_polars_data = [{'id': d["id"], 'hash': d["hash"]} for d in export_data] + df = pl.DataFrame(export_polars_data) df.write_parquet("output/status_data.parquet", use_pyarrow=True, compression_level=22)