Skip to content

Commit

Permalink
Merge pull request mbari-org#214 from MBARIMike/main
Browse files Browse the repository at this point in the history
Fix --last_n_days for SurveyTally exection from load.py
  • Loading branch information
MBARIMike authored May 16, 2024
2 parents a169dec + d91ce39 commit b2f2c7d
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 24 deletions.
8 changes: 5 additions & 3 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@
//"args": ["--compilation", "-v", "1", "--skipuntil", "2022/AxialSeamount/FiguresCaress", "--limit", "10", "--log_file", "compilation.txt"],
//"args": ["--compilation", "-v", "1"],
//"args": ["--spreadsheets", "-v", "1"],
//"args": ["-v", "2", "--last_n_days", "60"],
"args": ["-v", "1", "--compilation", "--last_n_days", "30"],
"args": ["-v", "1", "--last_n_days", "0.5"],
//"args": ["-v", "1", "--spreadsheets", "--last_n_days", "0.5"],
//"args": ["-v", "1", "--compilation", "--last_n_days", "30"],
"justMyCode": false
},
{
Expand All @@ -94,7 +95,8 @@
//"args": ["-v", "2", "--parent_dir", "2022", "--read_xlsx"],
//"args": ["-v", "2", "--parent_dir", "2022", "--write_csv"],
//"args": ["-v", "1", "--write_csv"],
"args": ["-v", "1", "--parent_dir", "2022", "--read_xlsx", "--last_n_days", ".5"],
//"args": ["-v", "1", "--parent_dir", "2022", "--read_xlsx", "--last_n_days", ".5"],
"args": ["-v", "1", "--last_n_days", "0.5"],
}
]
}
12 changes: 7 additions & 5 deletions smdb/scripts/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -1367,6 +1367,7 @@ def mbgrids_from_cmd_to_compilations(
return compilations

def link_compilation_to_missions(self):
self.logger.info("Linking Compilations to Missions")
for count, cmd_filename in enumerate(self.comp_dirs()):
comp_dir = os.path.dirname(cmd_filename)
self.logger.debug("%4d. %s", count, cmd_filename)
Expand Down Expand Up @@ -1529,13 +1530,13 @@ def run(*args):
elif bl.args.compilation:
compilation_load()
elif bl.args.spreadsheets:
spreadsheets_load()
spreadsheets_load(bl.args)
else:
missions_saved = bootstrap_load()
notes_load(missions_saved)
fnv_load(missions_saved)
compilation_load()
spreadsheets_load()
spreadsheets_load(bl.args)
bl.save_logger_output()


Expand Down Expand Up @@ -1570,14 +1571,15 @@ def compilation_load():
comp.link_compilation_to_missions()


def spreadsheets_load():
def spreadsheets_load(args: argparse.Namespace):
st = SurveyTally()
st.args = argparse.Namespace()
st.args.parent_dir = ""
st.args.verbose = 1
st.args.last_n_days = args.last_n_days
st.setup_logging()
st.process_xlsx()
st.process_csv()
xlsx_files_processed = st.process_xlsx()
st.process_csv(xlsx_files_processed)


if __name__ == "__main__":
Expand Down
45 changes: 29 additions & 16 deletions smdb/scripts/survey_tally.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,26 +132,25 @@ def read_xlsx_into_df(self, parent_dir: str) -> pd.DataFrame:
"SMDB",
f"SMDB_{parent_dir}_survey_tally.xlsx",
)
if not os.path.exists(xlsx_file):
self.logger.debug(f"File {xlsx_file} not found")
return pd.DataFrame(), xlsx_file
if self.args.last_n_days:
if os.path.getmtime(xlsx_file) < time() - self.args.last_n_days * 86400:
self.logger.debug(
f"Skipping file {xlsx_file} older than {self.args.last_n_days = }"
)
return pd.DataFrame()
return pd.DataFrame(), xlsx_file
self.logger.info(f"Reading {xlsx_file}")
try:
df = pd.read_excel(xlsx_file, engine="openpyxl")
except FileNotFoundError:
self.logger.warning(f"File {xlsx_file} not found")
return pd.DataFrame()
df = pd.read_excel(xlsx_file, engine="openpyxl")
df = pd.read_excel(xlsx_file, engine="openpyxl")
df = df.fillna("") # Replace NaN with empty string

# The df (from sheet index_col=0) looks like (from print(df.head(2).to_csv())):
# Mission,Route,Location,Vehicle,Comment,AUV,LASS,Status*,Patch_test**,km of trackline,MGDS_compilation
# 20230310m1,PuyDesFolles_1v7,MAR PdF,MAUV1,pressure-depth problem with code,x,,production_survey,,78.4,FKt230303_MBARI_AUV
# 20230310m2,PuyDesFolles_2v7,MAR PdF,MAUV2,pressure-depth problem with code,x,,production_survey,,79.6,FKt230303_MBARI_AUV
return df
return df, xlsx_file

def update_db_from_df(self, df: pd.DataFrame, parent_dir: str) -> None:
# Loop through rows in data frame and update the appropriate database fields
Expand All @@ -172,15 +171,19 @@ def update_db_from_df(self, df: pd.DataFrame, parent_dir: str) -> None:
# mission.track_length = row["km of trackline"]
mission.mgds_compilation = row["MGDS_compilation"]
mission.save()
self.logger.info(f"Updated {mission}")
self.logger.info(f"Updated {mission = }")
except Mission.DoesNotExist:
self.logger.warning(f"Mission {row['Mission']} not found in database")

def process_xlsx(self) -> None:
xlsx_files_processed = []
for parent_dir in self.get_parent_dirs():
self.logger.info(f"Processing {parent_dir}")
df = self.read_xlsx_into_df(parent_dir)
self.update_db_from_df(df, parent_dir)
self.logger.debug(f"Processing {parent_dir}")
df, xlsx_file = self.read_xlsx_into_df(parent_dir)
if not df.empty:
self.update_db_from_df(df, parent_dir)
xlsx_files_processed.append(xlsx_file)
return xlsx_files_processed

def get_parent_dirs(self) -> List[str]:
"""Return a list of parent directories to process. Check if they are in the database.
Expand Down Expand Up @@ -249,10 +252,18 @@ def read_from_db_into_rows(self, parent_dir: str) -> pd.DataFrame:
rows.append(row)
return cols, rows

def process_csv(self):
def process_csv(self, xlsx_files_processed: List[str]):
for parent_dir in self.get_parent_dirs():
cols, rows = self.read_from_db_into_rows(parent_dir)
csv_dir = os.path.join(MBARI_DIR, parent_dir, "SMDB")
if (
os.path.join(csv_dir, f"SMDB_{parent_dir}_survey_tally.xlsx")
not in xlsx_files_processed
):
self.logger.debug(
f"No .xlsx file processed for {parent_dir}. Skipping .csv file creation."
)
continue
cols, rows = self.read_from_db_into_rows(parent_dir)
if not os.path.exists(csv_dir):
os.makedirs(csv_dir)
csv_file = os.path.join(csv_dir, f"SMDB_{parent_dir}_survey_tally.csv")
Expand All @@ -268,10 +279,12 @@ def process_csv(self):
st = SurveyTally()
st.process_command_line()
st.setup_logging()
xlsx_files_processed = []
if st.args.read_xlsx:
st.process_xlsx()
xlsx_files_processed = st.process_xlsx()
elif st.args.write_csv:
st.process_csv()
st.process_csv(xlsx_files_processed)
else:
st.logger.error("No action specified. Use --read_xlsx or --write_csv")
xlsx_files_processed = st.process_xlsx()
st.process_csv(xlsx_files_processed)
sys.exit(0)

0 comments on commit b2f2c7d

Please sign in to comment.