From 7619b4487f410e9e370b9d266c182693e39158cc Mon Sep 17 00:00:00 2001 From: tcnichol Date: Mon, 6 May 2019 10:18:19 -0500 Subject: [PATCH] initial commit, add first todo new method for getting exp id adding brapi endpoints adding methods for brapi calls brapi get studies, brapi get studies/germplasm added removing todo since experiment_id added to sites json method for getting map of site_ids and cultivar info per experiment id. using brapi to get map of cultivars by site_id, this is added to cached sites json file and then added to site metadata in lemnatec fixing unicode u adding site names, this will help since brapi endpoint that gives treatments has studyDbId, but not siteid - it has sitename adding treatments to cached sites file treatments added to metadata some plots do not have cultivar or treatment data available. in these cases the site entry will have 'no info available' as the value for cultivar and treatments all dict entries now strings check for keys replaces try/except, ending W or E removed these changes work with an issue and pull request created in brapi. pagination did not work properly for the observationunits endpoint using observationUnitName and not location_abbrevation using 'definition' instead of 'treatment_description' to match actual key in bety db adding page size to call to brapi observationunits using one method brapi_get to make all requests still need to fix getting all the observationunits iterate through results for observation units page by page one method brapi_get handles the brapi url, and version method also paginates through results new brapi.py class - brapi methods moved there v1 added to endpoints, not read from environment variable no need to remove ending W or E from plot names --- terrautils/betydb.py | 17 +++++- terrautils/brapi.py | 130 +++++++++++++++++++++++++++++++++++++++++ terrautils/lemnatec.py | 4 ++ 3 files changed, 150 insertions(+), 1 deletion(-) create mode 100644 terrautils/brapi.py diff --git a/terrautils/betydb.py b/terrautils/betydb.py index d96ec58..cb21f63 100644 --- a/terrautils/betydb.py +++ b/terrautils/betydb.py @@ -8,9 +8,13 @@ from datetime import datetime import requests +import urllib import json from osgeo import ogr +import brapi +BRAPI_URL="https://brapi.workbench.terraref.org/brapi" +BRAPI_VERSION="v1" BETYDB_URL="https://terraref.ncsa.illinois.edu/bety" BETYDB_LOCAL_CACHE_FOLDER = os.environ.get('BETYDB_LOCAL_CACHE_FOLDER', '/home/extractor/') @@ -236,12 +240,23 @@ def get_sites(filter_date='', include_halves=False, **kwargs): if query_data: results = [] for exp in query_data: + exp_site_cultivar_map = brapi.get_site_id_cultivar_info_map(exp['id']) start = datetime.strptime(exp['start_date'], '%Y-%m-%d') end = datetime.strptime(exp['end_date'], '%Y-%m-%d') if start <= targ_date <= end: if 'sites' in exp: for t in exp['sites']: s = t['site'] + s['experiment_id'] = exp['id'] + current_site_id = s['id'] + if current_site_id in exp_site_cultivar_map: + cultivar_info_for_site = exp_site_cultivar_map[s['id']]['cultivar'] + treatment_info_for_site = exp_site_cultivar_map[s['id']]['treatments'] + s['cultivar'] = cultivar_info_for_site + s['treatments'] = treatment_info_for_site + else: + s['cultivar'] = 'no info' + s['treatments'] = 'no info' # TODO: Eventually find better solution for S4 half-plots - they are omitted here if (s["sitename"].endswith(" W") or s["sitename"].endswith(" E")) and not include_halves: continue @@ -327,4 +342,4 @@ def submit_traits(csv, filetype='csv', betykey='', betyurl=''): return resp.json()['data']['ids_of_new_traits'] else: logging.error("Error submitting data to BETYdb: %s -- %s" % (resp.status_code, resp.reason)) - resp.raise_for_status() + resp.raise_for_status() \ No newline at end of file diff --git a/terrautils/brapi.py b/terrautils/brapi.py new file mode 100644 index 0000000..777e1a2 --- /dev/null +++ b/terrautils/brapi.py @@ -0,0 +1,130 @@ +"""brapi + +This module provides wrappers to BRAPI API for getting and posting data. +""" + +import os + +import requests +import urllib + +BRAPI_URL="https://brapi.workbench.terraref.org/brapi" +BRAPI_VERSION="v1" + + +def brapi_get(path='',request_params=None): + brapi_url = os.environ.get('BRAPI_URL', BRAPI_URL) + path = 'brapi/'+path + request_url = urllib.parse.urljoin(brapi_url, path) + + result = [] + + if request_params: + r = requests.get(url=request_url, params=request_params) + totalPages = r.json()['metadata']['pagination']['totalPages'] + current_data = r.json()['result']['data'] + result.extend(current_data) + if totalPages > 1: + for i in range(1, totalPages -1): + request_params['page']=i + r = requests.get(url=request_url, params=request_params) + current_data = r.json()['result']['data'] + result.extend(current_data) + return result + else: + r = requests.get(url=request_url) + totalPages = r.json()['metadata']['pagination']['totalPages'] + current_data = r.json()['result']['data'] + result.extend(current_data) + if totalPages > 1: + for i in range(1, totalPages -1): + request_params['page']=i + r = requests.get(url=request_url, params=request_params) + current_data = r.json()['result']['data'] + result.extend(current_data) + return result + + +def get_brapi_study(studyDbId): + """return study from brapi based on brapi url""" + studies_path = 'v1/studies' + request_params = {'studyDbId': studyDbId} + studies_result = brapi_get(path=studies_path, request_params=request_params) + return studies_result + + +def get_brapi_observationunits(studyDbId): + observation_units_path = 'v1/observationunits' + request_params = {'studyDbId': studyDbId} + observationunits_result = brapi_get(path=observation_units_path, request_params=request_params) + + return observationunits_result + + +def get_brapi_study_layouts(studyDbId): + """return study layouts from brapi based on brapi url""" + current_path = 'v1/studies/' + str(studyDbId) + '/layouts' + data = brapi_get(path=current_path) + + site_id_layouts_map = {} + + for entry in data: + site_id = str(entry['observationUnitDbId']) + site_name = str(entry['observationUnitName']) + cultivar_id = str(entry['germPlasmDbId']) + site_info = {} + site_info['sitename'] = site_name + site_info['germplasmDbId'] = cultivar_id + site_id_layouts_map[site_id] = site_info + return site_id_layouts_map + + +def get_brapi_study_germplasm(studyDbId): + current_path = 'v1/studies/' + str(studyDbId) + '/germplasm' + data = brapi_get(current_path) + + germplasm_id_data_map = {} + for entry in data: + germplasm = {} + germplasm['germplasmName'] = str(entry['germplasmName']) + germplasm['species'] = str(entry['species']) + germplasm['genus'] = str(entry['genus']) + germplasm['germplasmDbId'] = str(entry['germplasmDbId']) + germplasm_id_data_map[str(entry['germplasmDbId'])] = germplasm + + return germplasm_id_data_map + + +def get_experiment_observation_units_map(studyDbId): + data = get_brapi_observationunits(studyDbId) + location_name_treatments_map = {} + for entry in data: + treatment = {} + treatment['definition'] = str(entry['observationtreatment']) + treatment['id'] = str(entry['treatmentDbId']) + treatment['experiment_id'] = str(entry['studyDbId']) + location_name_treatments_map[str(entry['observationUnitName'])] = treatment + return location_name_treatments_map + + +def get_site_id_cultivar_info_map(studyDbId): + layouts = get_brapi_study_layouts(studyDbId) + germplasm = get_brapi_study_germplasm(studyDbId) + observationunits = get_experiment_observation_units_map(studyDbId) + + site_ids = layouts.keys() + + for site_id in site_ids: + corresponding_site_cultivar_id = layouts[site_id]['germplasmDbId'] + corresponding_site_name = layouts[site_id]['sitename'] + if corresponding_site_cultivar_id in germplasm: + cultivar_info_from_germplasm = germplasm[corresponding_site_cultivar_id] + layouts[site_id]['cultivar'] = cultivar_info_from_germplasm + else: + layouts[site_id]['cultivar'] = 'no info' + if corresponding_site_name in observationunits: + treatment_info = observationunits[corresponding_site_name] + layouts[site_id]['treatments'] = treatment_info + else: + layouts[site_id]['treatments'] = 'no info' + return layouts \ No newline at end of file diff --git a/terrautils/lemnatec.py b/terrautils/lemnatec.py index 0b7cf3a..5bf6df2 100644 --- a/terrautils/lemnatec.py +++ b/terrautils/lemnatec.py @@ -116,6 +116,10 @@ def _get_sites(cleaned_md, date, sensorId): site_id = str(bety_site["id"]) sites[site_id] = {} sites[site_id]["sitename"] = bety_site["sitename"] + sites[site_id]["experiment_id"] = bety_site["experiment_id"] + sites[site_id]["treatments"] = bety_site["treatments"] + sites[site_id]["cultivar"] = bety_site["cultivar"] + if "view_url" in bety_site: sites[site_id]["url"] = bety_site["view_url"] else: