From 7cfb0ee4950365658321b789b00143935c8f7585 Mon Sep 17 00:00:00 2001 From: alrichardbollans <38588335+alrichardbollans@users.noreply.github.com> Date: Thu, 5 Oct 2023 17:27:57 +0100 Subject: [PATCH] Allow specifying versions in matching methods --- wcvp_name_matching/get_accepted_info.py | 6 +++--- wcvp_name_matching/wcvp_matching.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/wcvp_name_matching/get_accepted_info.py b/wcvp_name_matching/get_accepted_info.py index 93881c4..88cc7fb 100644 --- a/wcvp_name_matching/get_accepted_info.py +++ b/wcvp_name_matching/get_accepted_info.py @@ -287,7 +287,7 @@ def get_accepted_info_from_names_in_column(in_df: pd.DataFrame, name_col: str, families_of_interest: List[str] = None, family_column: str = None, manual_resolution_csv: str = None, - match_level: str = 'full') -> pd.DataFrame: + match_level: str = 'full', wcvp_version: str = None) -> pd.DataFrame: """ First tries to match names in df to wcvp directly to obtain accepted info and then matches names in df using knms and gets corresponding accepted info from wcvp @@ -343,7 +343,7 @@ def get_accepted_info_from_names_in_column(in_df: pd.DataFrame, name_col: str, # Check families of interest and in family column are in wcvp, and remove if not if families_of_interest is not None or family_column is not None: - wcvp_families_df = get_all_taxa() + wcvp_families_df = get_all_taxa(version=wcvp_version) wcvp_families = list(wcvp_families_df[wcvp_columns['family']].unique()) wcvp_acc_families = list(wcvp_families_df[wcvp_accepted_columns['family']].unique()) wcvp_all_families = wcvp_families + wcvp_acc_families @@ -379,7 +379,7 @@ def get_accepted_info_from_names_in_column(in_df: pd.DataFrame, name_col: str, in_df[unique_submission_index_col] = in_df[unique_submission_index_col].astype(str) df = df.drop_duplicates(subset=[unique_submission_index_col]) - all_taxa = get_all_taxa(families_of_interest=families_of_interest) + all_taxa = get_all_taxa(families_of_interest=families_of_interest, version=wcvp_version) # First get manual matches using given ipni ids if manual_resolution_csv is not None: manual_match_df = pd.read_csv(manual_resolution_csv) diff --git a/wcvp_name_matching/wcvp_matching.py b/wcvp_name_matching/wcvp_matching.py index 56a306c..e8948ed 100644 --- a/wcvp_name_matching/wcvp_matching.py +++ b/wcvp_name_matching/wcvp_matching.py @@ -92,7 +92,7 @@ def match_name_to_concatenated_columns(df: pd.DataFrame, matching_name_col: str, def get_wcvp_info_for_names_in_column(df: pd.DataFrame, matching_name_col: str, unique_submission_id_col: str, - all_taxa: pd.DataFrame = None, family_column: str = None): + all_taxa: pd.DataFrame = None, family_column: str = None, wcvp_version:str = None): """ Appends accepted info columns to df from list of taxa, based on names in matching_name_col :param df: @@ -101,7 +101,7 @@ def get_wcvp_info_for_names_in_column(df: pd.DataFrame, matching_name_col: str, :return: """ if all_taxa is None: - all_taxa = get_all_taxa() + all_taxa = get_all_taxa(version=wcvp_version) # First try with author info i.e. taxon name + taxon_authors and then # taxon name + parenthetical_author + primary_author