Skip to content

Commit

Permalink
don't import vegafusion from start_minio.py
Browse files Browse the repository at this point in the history
  • Loading branch information
jonmmease committed Nov 12, 2023
1 parent c158359 commit 9df43e8
Showing 1 changed file with 18 additions and 8 deletions.
26 changes: 18 additions & 8 deletions automation/start_minio.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
import shutil
from tempfile import NamedTemporaryFile
import pandas as pd
from vegafusion.transformer import to_feather
from csv import QUOTE_ALL
from io import BytesIO
import pyarrow as pa

root = Path(__file__).parent.parent

Expand Down Expand Up @@ -61,13 +61,14 @@ def main():
)

# Convert to arrow
with NamedTemporaryFile("wb") as f:
to_feather(df, f)
client.fput_object(
"data",
"movies.arrow",
f.name,
)
tbl = pa.Table.from_pandas(df)
b = arrow_table_to_ipc_bytes(tbl)
client.put_object(
"data",
"movies.arrow",
BytesIO(b),
len(b)
)

# Convert to parquet. For some reason, uploading to minio with client.fput_object
# (as above for arrow) results in a parquet file with corrupt footer.
Expand Down Expand Up @@ -111,6 +112,15 @@ def start_minio_server(access_key, secret_key):
return process


def arrow_table_to_ipc_bytes(table):
bytes_buffer = BytesIO()
max_chunksize=8096
with pa.ipc.new_file(bytes_buffer, table.schema) as f:
f.write_table(table, max_chunksize=max_chunksize)

return bytes_buffer.getvalue()


if __name__ == "__main__":
try:
main()
Expand Down

0 comments on commit 9df43e8

Please sign in to comment.