Skip to content

Commit

Permalink
profile insertion script and chunk queries
Browse files Browse the repository at this point in the history
  • Loading branch information
GISRedeDev committed Jan 25, 2025
1 parent 883bd47 commit 96c3ff5
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
12 changes: 8 additions & 4 deletions scripts/reinsert_all_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pycountry
import numpy as np
from contextlib import contextmanager
from memory_profiler import profile

sys.path.append(str(Path(__file__).resolve().parent.parent / "api"))

Expand All @@ -28,7 +29,7 @@
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "")
POSTGRES_DB = os.getenv("POSTGRES_DB", "")

engine = create_engine(DATABASE_URL)
engine = create_engine(DATABASE_URL, pool_size=50, max_overflow=10)
Session = sessionmaker(bind=engine)


Expand Down Expand Up @@ -66,13 +67,16 @@ def get_country_name(row):
return row['gid_0']


def upload_csv_to_indicators(csv_path, table_model, session):
@profile
def upload_csv_to_indicators(csv_path, table_model, session, chunk_size=1000):
df = pd.read_csv(csv_path)
df['date'] = pd.to_datetime(df['date'], format="%Y-%m").dt.strftime('%Y-%m')
df['country'] = df.apply(get_country_name, axis=1)
df = df.replace({np.nan: None, 'NaN': None, 'nan': None, 'null': None, 'NULL': None, 'None': None})
data = df.to_dict(orient='records')
session.bulk_insert_mappings(table_model, data)
for i in range(0, len(df), chunk_size):
data_chunk = df.iloc[i:i+chunk_size].to_dict(orient='records')
session.bulk_insert_mappings(table_model, data_chunk)
session.commit()


def main():
Expand Down
1 change: 1 addition & 0 deletions scripts/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,4 @@ tzdata==2024.1
urllib3==2.2.2
Werkzeug==2.3.8
wrapt==1.16.0
memory-profiler==0.61.0

0 comments on commit 96c3ff5

Please sign in to comment.