Skip to content

Commit

Permalink
fix mistakes in dump and add csv creation script
Browse files Browse the repository at this point in the history
  • Loading branch information
berinaniesh committed Mar 3, 2025
1 parent f25da90 commit cfd33c8
Show file tree
Hide file tree
Showing 8 changed files with 98 additions and 12 deletions.
86 changes: 86 additions & 0 deletions csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import os
import subprocess
from dotenv import load_dotenv
from urllib.parse import urlparse

import glob
import zstandard as zstd
import os

# Load environment variables from .env file
load_dotenv()

# Get the PostgreSQL database URL from the .env file
DATABASE_URL = os.getenv('DATABASE_URL')

# List of possible translations
translations = ["TOVBSI", "KJV", "MLSVP", "ASV", "WEB", "WEBU"]

def dump_to_csv(translation):
if translation not in translations:
print(f"Invalid translation: {translation}")
return

# Extract connection parameters from the DATABASE_URL
parsed_url = urlparse(DATABASE_URL)
host = parsed_url.hostname
port = parsed_url.port
user = parsed_url.username
password = parsed_url.password
dbname = parsed_url.path.lstrip('/')

# Prepare the psql command using \copy (client-side COPY)
query = f"\\copy (SELECT * FROM fulltable WHERE translation='{translation}') TO 'csv/{translation}.csv' WITH CSV HEADER;"

# Run the psql command to export the data to a CSV file
try:
result = subprocess.run(
['psql',
f'postgresql://{user}:{password}@{host}:{port}/{dbname}',
'-c', query],
check=True,
text=True,
capture_output=True
)
print(f"Data exported to {translation}.csv successfully.")
except subprocess.CalledProcessError as e:
print(f"Error: {e.stderr}")

def compress_csv_files(directory):
# Globbing all CSV files in the given directory
csv_files = glob.glob(os.path.join(directory, "*.csv"))

# Compress each CSV file
for csv_file in csv_files:
# Create the output filename with the .csv.zst extension
output_file = csv_file + ".zst"

try:
with open(csv_file, 'rb') as f_in:
with open(output_file, 'wb') as f_out:
# Create a Zstandard compressor object
compressor = zstd.ZstdCompressor()
# Compress the file
compressor.copy_stream(f_in, f_out)
print(f"Compressed {csv_file} to {output_file}")
except Exception as e:
print(f"Error compressing {csv_file}: {e}")

def delete_csv_files(directory):
# Globbing all CSV files in the given directory
csv_files = glob.glob(os.path.join(directory, "*.csv"))

# Deleting each CSV file
for csv_file in csv_files:
try:
os.remove(csv_file)
print(f"Deleted {csv_file}")
except Exception as e:
print(f"Error deleting {csv_file}: {e}")


for tr in translations:
dump_to_csv(tr)

compress_csv_files("csv")
delete_csv_files("csv")
4 changes: 2 additions & 2 deletions csv/ASV.csv.zst
Git LFS file not shown
4 changes: 2 additions & 2 deletions csv/KJV.csv.zst
Git LFS file not shown
4 changes: 2 additions & 2 deletions csv/MLSVP.csv.zst
Git LFS file not shown
4 changes: 2 additions & 2 deletions csv/TOVBSI.csv.zst
Git LFS file not shown
4 changes: 2 additions & 2 deletions csv/WEB.csv.zst
Git LFS file not shown
4 changes: 2 additions & 2 deletions csv/WEBU.csv.zst
Git LFS file not shown
Binary file modified sql/latest_full.sql.zst
Binary file not shown.

0 comments on commit cfd33c8

Please sign in to comment.