Skip to content

Commit

Permalink
Merge pull request #9 from terraref/germplasm_contrib
Browse files Browse the repository at this point in the history
Germplasm contrib
  • Loading branch information
robkooper authored Oct 9, 2018
2 parents f1f5e71 + 61afc69 commit 7cb57ed
Show file tree
Hide file tree
Showing 6 changed files with 996 additions and 0 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ for TERRA-REF.
The server code was generated using [swagger-codegen](https://github.com/swagger-api/swagger-codegen).

## Requirements

Python 3.5.2+

## Usage

To run, first start an intance of the BETY database:

```
Expand Down Expand Up @@ -42,6 +44,11 @@ preliminary mapping of BETY fields to BRAPI objects.
| /germplasm | cultivars. | |
| /observations | traits | |

## Contributed Data

This repository provides the canonical reference for data that is outside of the scope of databases used in the TERRA REF program. Such data can be found in the `/contrib/` folder.

Genomics data in `contrib/genomics` is in a set of CSVs that were previously only available in the [experimental design section of the TERRA REF documentation](https://docs.terraref.org/scientific-objectives-and-experimental-design/experimental-design). These files provide metadata that describe the germplasm used in the sorghum trials, and were originally prepared by Noah Fahlgren.

## How to add an endpoint

Expand Down
32 changes: 32 additions & 0 deletions contrib/germplasm/commercial_hybrids.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
Entry,Source,Common name,Ecotype
301,PI651496,RIO,Sweet Sorghum
302,PI651491,BAILEY,Sweet Sorghum
303,PI651495,DALE,Sweet Sorghum
304,PI653616,WRAY,Sweet Sorghum
305,PI653411,M 81E,Sweet Sorghum
306,PI586537,ATLAS,Sweet Sorghum
307,PI535785,N100,Sweet Sorghum
308,PI583832,TOP_76_6,Sweet Sorghum
309,PI641824,KS_ORANGE,Sweet Sorghum
310,PI641825,LEOTI,Sweet Sorghum
311,PI566819,DELLA,Sweet Sorghum
312,PI571107,COLMAN,Sweet Sorghum
313,PI505722,ZM/A 5298,Biomass Sorghum
314,PI505735,ZM/A 5345,Biomass Sorghum
315,PI562730,Grain Grass 3A,Biomass Sorghum
316,PI297171,IS13647,Biomass Sorghum
317,PI506122,Epo,Biomass Sorghum
318,PI297130,IS13613,Biomass Sorghum
319,PI154844,GRASSL,Biomass Sorghum
320,PI508366,MA 38,Biomass Sorghum
321,PI506114,Kwete-Mila,Biomass Sorghum
322,PI506030,Tchinlouol,Biomass Sorghum
323,PI506069,Mbonou,Biomass Sorghum
324,PI267573,IS 2983,Biomass Sorghum
325,PI564163,BTx623,Grain Sorghum
326,PI656056,P850029,Grain Sorghum
327,PI533964,"Safara, Kordafan",Grain Sorghum
328,PI533759,Mugbash 56/56,Grain Sorghum
329,PI35038,SUMAC,Grain Sorghum
330,PI561472,SURENO,Grain Sorghum
401,Richardson Seeds,700D BMR,Forage Sorghum
205 changes: 205 additions & 0 deletions contrib/germplasm/germplasm.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
---
title: "Building Data for Germplasm Endpoint"
output: html_document
---


```{r}
library(tidyverse)
genotypes <- read_csv('sorghum_lines_genomics.csv')
g_json <- jsonlite::toJSON(genotypes[1:10,])
print(g_json)
```


Search for germplasm from https://www.genesys-pgr.org BrAPI endpoint ...

https://www.genesys-pgr.org/brapi/v1/germplasm/0ba28636-e634-428a-aa58-4346a20de326


```{r}
devtools::install_github(repo = 'brapi', username = 'CIP-RIU')
library(brapi)
library(jsonlite)
genesys_con <- ba_connect(brapiDb = as.ba_db(protocol = 'https://', db = 'www.genesys-pgr.org', secure = TRUE))
a <- list()
for(z in seq_along(genotypes$`source material identifiers`)){
a[[genotypes$accession[z]]] <- brapi::ba_germplasm_details(con = genesys_con, rclass = 'json',
germplasmDbId = genotypes$`source material identifiers`[z])
}
empty <- lapply(fromJSON(a[[1]])$result, function(x) x = '')
empty$commonCropName <- 'sorghum'
empty$genus <- 'Sorghum'
empty$species <- 'bicolor'
empty$subtaxa <- 'subsp. bicolor'
empty$speciesAuthority <- "(L.) Moench"
empty$biologicalStatusOfAccessionCode <- "412"
b <- list()
for(i in seq_along(a)){
if(all(class(a[[i]]) == 'try-error')){
res <- empty
accession <- genotypes$accession[i]
res$germplasmDbId <- genotypes$`source material identifiers`[i]
} else {
res <- fromJSON(a[[i]])$result
accession <- gsub(' ', '', genotypes$accession[i])
}
res$donors <- list(donorAccessionNumber = "", donorInstituteCode = "", germplasmPUI = "")
res$breedingMethodDbId <- ""
res$instituteName <- ""
res$germplasmName <- res$accessionNumber <- res$defaultDisplayName <- accession
res$germplasmPUI <- paste("https://purl.org/germplasm/id/", res$germplasmDbId, sep = "")
res$germplasmSeedSource <- res$seedSource
res$subtaxaAuthority <- res$speciesAuthority
res$taxonIds <- list(list(sourceName = "ncbiTaxon", taxonId = "http://purl.obolibrary.org/obo/NCBITaxon_4558"),
list(sourceName = "USDA Plants", taxonId = "https://plants.usda.gov/core/profile?symbol=SOBIB"))
b[[i]] <- res[c("accessionNumber", "acquisitionDate", "biologicalStatusOfAccessionCode", "breedingMethodDbId",
"commonCropName", "defaultDisplayName", "donors",
"genus", "germplasmDbId", "germplasmName",
"germplasmPUI", "instituteCode", "instituteName", "pedigree", "seedSource", "species", "speciesAuthority", "subtaxa", "subtaxaAuthority", "synonyms", "taxonIds", "typeOfGermplasmStorageCode")]
}
```

get Ids from BETY

```{r, eval=FALSE}
library(tidyverse)
cultivars <- jsonlite::fromJSON(paste0("https://terraref.ncsa.illinois.edu/bety/api/v1/cultivars?key=9999999999999999999999999999999999999999&limit=none"), flatten = TRUE, simplifyVector = TRUE)
cultivars <- cultivars[[2]]
```

```{r}
for(i in seq_along(b)){
c <- cultivars[cultivars$cultivar.name == b[[i]]$accessionNumber,]
cultivars_id <- c$cultivar.id
b[[i]][['xref']] <- list(id = cultivars_id,
source = c$cultivar.view_url
)
b[[i]]$germplasmDbId <- cultivars_id
}
```


### Add other cultivars

```{r}
zz <- list()
for(i in seq_along(b)){
zz[i] <- b[[i]]$accessionNumber
}
zzz <- unlist(zz)
remaining_cultivars <- cultivars[!cultivars$cultivar.name %in% zzz, ]
template <- b[[1]]
bb <- list()
for(i in 1:nrow(remaining_cultivars)){
t <- template
c <- remaining_cultivars[i,]
t$accessionNumber <- c$cultivar.name
t$acquisitionDate <- ""
t$commonCropName <- ifelse(c$cultivar.specie_id == 2588, 'sorghum', 'wheat')
t$defaultDisplayName <- c$cultivar.name
t$genus <- ifelse(c$cultivar.specie_id == 2588, 'Sorghum', 'Triticum')
t$germplasmDbId <- c$cultivar.id
t$germplasmName <- c$cultivar.name
t$germplasmPUI <- ""
t$instituteCode <- ""
t$species <- ifelse(c$cultivar.specie_id == 2588, 'bicolor', 'Triticum')
t$speciesAuthority <- ifelse(c$cultivar.specie_id == 2588, "(L.) Moench", 'L.')
t$subtaxa <- ifelse(c$cultivar.specie_id == 2588, "subsp. bicolor", '')
t$subtaxaAuthority <- ifelse(c$cultivar.specie_id == 2588, "(L.) Moench", '')
t$synonyms <- ''
t$taxonIds[[1]]$taxonId <- ifelse(c$cultivar.specie_id == 2588, "http://purl.obolibrary.org/obo/NCBITaxon_4558", 'http://purl.obolibrary.org/obo/NCBITaxon_4565')
t$taxonIds[[2]]$taxonId <- ifelse(c$cultivar.specie_id == 2588, "https://plants.usda.gov/core/profile?symbol=SOBIB", 'https://plants.usda.gov/core/profile?symbol=TRAE')
t$xref$id <- c$cultivar.id
t$xref$source <- c$cultivar.view_url
bb[[i]] <- t
}
cc <- append(b, bb)
```


```{r}
for(i in seq_along(cc)){
}
```


```{r}
names(cc) <- NULL #should not be needed, just in case
ccc <- toJSON(cc, auto_unbox = TRUE)
#jsonlite::write_json(c, 'germplasm.txt')
write_lines(jsonlite::prettify(ccc), '~/dev/brapi/bety_brapi/data/germplasm.json')
#dir.create('~/dev/brapi/data')
#jsonlite::write_json(b, '~/dev/brapi/germplasm.json')
```


## Further updates

```{r}
germplasm <- jsonlite::fromJSON('germplasm.json', simplifyVector = FALSE)
```


## Add latitudes and longitudes for referenced baps

```{r}
locations <- readr::read_tsv('referenced_bap.txt')
for(i in seq_along(germplasm)){
cultivars.name <- germplasm[[i]]$accessionNumber
if(cultivars.name %in% locations$Taxa){
print(i)
location <- locations[locations$Taxa == cultivars.name,]
germplasm[[i]][['additionalInfo']] <- list(longitude = location$Longitude, latitude = location$Latitude)
}
}
write_lines(jsonlite::prettify(toJSON(germplasm)), '~/dev/brapi/bety_brapi/data/germplasm.json')
```

##

### Construct inserts for attributes table in betydb


```{r}
insert_germplasm <- list()
for(i in seq_along(b)){
cultivars.name <- b[[i]]$accessionNumber
id <- cultivars[[cultivars.name]]$data[[1]]$cultivar$id
json <- toJSON(b[[cultivars.name]])
insert_germplasm[[cultivars.name]] <- list(id = id, json = as.character(json))
}
inserts <- lapply(insert_germplasm, function(x){
paste0("insert into attributes (table, table_id, json), values (",x$id, "'cultivars'", x$json,");")})
```
Loading

0 comments on commit 7cb57ed

Please sign in to comment.