diff --git a/automation/start_minio.py b/automation/start_minio.py index 19532698..6fe3d8cc 100644 --- a/automation/start_minio.py +++ b/automation/start_minio.py @@ -7,9 +7,9 @@ import shutil from tempfile import NamedTemporaryFile import pandas as pd -from vegafusion.transformer import to_feather from csv import QUOTE_ALL from io import BytesIO +import pyarrow as pa root = Path(__file__).parent.parent @@ -61,13 +61,14 @@ def main(): ) # Convert to arrow - with NamedTemporaryFile("wb") as f: - to_feather(df, f) - client.fput_object( - "data", - "movies.arrow", - f.name, - ) + tbl = pa.Table.from_pandas(df) + b = arrow_table_to_ipc_bytes(tbl) + client.put_object( + "data", + "movies.arrow", + BytesIO(b), + len(b) + ) # Convert to parquet. For some reason, uploading to minio with client.fput_object # (as above for arrow) results in a parquet file with corrupt footer. @@ -111,6 +112,15 @@ def start_minio_server(access_key, secret_key): return process +def arrow_table_to_ipc_bytes(table): + bytes_buffer = BytesIO() + max_chunksize=8096 + with pa.ipc.new_file(bytes_buffer, table.schema) as f: + f.write_table(table, max_chunksize=max_chunksize) + + return bytes_buffer.getvalue() + + if __name__ == "__main__": try: main()