Skip to content

Commit

Permalink
Always write out xlsx files, even when no rows
Browse files Browse the repository at this point in the history
  • Loading branch information
simon-20 committed May 28, 2024
1 parent 2a0deb8 commit 65948b4
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 30 deletions.
46 changes: 17 additions & 29 deletions iatiflattener/group_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,12 @@ def write_dataframe_to_excel(self, dataframe, filename, lang):

def get_dataframe(self, country_code, transaction_budget, lang):
full_df = pd.DataFrame()
print("Read CSV {}-{}.csv".format(transaction_budget, country_code))
print("Read CSV {}-{}.csv (for {})".format(transaction_budget, country_code, lang))

for df in pd.read_csv(f"{self.output_folder}/csv/{transaction_budget}-{country_code}.csv",
dtype=self.CSV_HEADER_DTYPES,
chunksize=100000):
print("Reading chunk...")
if (not "iati_identifier" in df.columns.values) or (len(df)==0):
print("df length is 0")
return
headers_with_langs = variables.group_by_headers_with_langs([lang])
all_relevant_headers = headers_with_langs + ['value_usd', 'value_eur', 'value_local']
df = df[all_relevant_headers]
Expand All @@ -127,33 +125,23 @@ def group_results(self, country_code):
for lang in self.langs:
df_transaction = self.get_dataframe(country_code, 'transaction', lang)
df_budget = self.get_dataframe(country_code, 'budget', lang)
if (df_transaction is None) and (df_budget is None):
continue
elif (df_transaction is not None) and (df_budget is not None):
df = pd.concat([df_transaction, df_budget], ignore_index=True)
else:
if df_transaction is not None:
df = df_transaction
else:
df = df_budget

# write out the results to an Excel file, putting a maximum of 500000 rows in each file
if df is not None:
num_rows = len(df)
output_rows = 500000
if num_rows > output_rows:
for start in range(0, num_rows, output_rows):
df_part = df.iloc[start:start+output_rows, :]
page = (start/output_rows)+1
self.write_dataframe_to_excel(
dataframe = df_part,
filename = f"{self.output_folder}/xlsx/{lang}/{country_code}-{page}.xlsx",
lang = lang)
else:
df = pd.concat([df_transaction, df_budget], ignore_index=True)

num_rows = len(df)
output_rows = 500000
if num_rows > output_rows:
for start in range(0, num_rows, output_rows):
df_part = df.iloc[start:start+output_rows, :]
page = (start/output_rows)+1
self.write_dataframe_to_excel(
dataframe = df,
filename = f"{self.output_folder}/xlsx/{lang}/{country_code}.xlsx",
dataframe = df_part,
filename = f"{self.output_folder}/xlsx/{lang}/{country_code}-{page}.xlsx",
lang = lang)
else:
self.write_dataframe_to_excel(
dataframe = df,
filename = f"{self.output_folder}/xlsx/{lang}/{country_code}.xlsx",
lang = lang)


def group_data(self):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
setup(
name="iatiflattener",
packages=find_packages(exclude=['ez_setup', 'examples']),
version='0.10.9',
version='0.10.10',
description="A set of tools to flatten IATI data.",
author="Mark Brough",
author_email="[email protected]",
Expand Down

0 comments on commit 65948b4

Please sign in to comment.