Skip to content

Commit

Permalink
Optimise name checks
Browse files Browse the repository at this point in the history
  • Loading branch information
alrichardbollans committed Dec 12, 2023
1 parent 22251fb commit 76e3d88
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions wcvp_download/get_distributions_from_wcvp.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ def get_distributions_for_accepted_taxa(df: pd.DataFrame, acc_name_col: str, inc
wcvp_with_dists = wcvp_with_dists[
[wcvp_accepted_columns['name'], native_code_column, introduced_code_column]]
# relevant_data = wcvp_with_dists[wcvp_with_dists[wcvp_columns['wcvp_id'].isin(df[wcvp_id_col].values)]]
for name in df[acc_name_col].unique():
if name not in wcvp_with_dists[wcvp_accepted_columns['name']].unique():
raise ValueError(
f'{name} not an accepted name in your WCVP version when checking for distribution data. This could be an issue with incorrectly specified version.\n Or could be a result of inclusion of Artifical Hyrbids. Also check spelling')
interstn = set(df[acc_name_col].tolist()).intersection(wcvp_with_dists[wcvp_accepted_columns['name']].tolist())
df_names = df[acc_name_col].unique()
problems = [name for name in df_names if name not in interstn]
if len(problems) > 0:
raise ValueError(
f'{problems}: not accepted names in your WCVP version when checking for distribution data. This could be an issue with incorrectly specified version.\n Or could be a result of inclusion of Artifical Hyrbids. Also check spelling')

output = pd.merge(df, wcvp_with_dists, how='left', left_on=acc_name_col,
right_on=wcvp_accepted_columns['name'])
Expand Down

0 comments on commit 76e3d88

Please sign in to comment.