Skip to content

Commit

Permalink
version 1.1.3
Browse files Browse the repository at this point in the history
  • Loading branch information
phoeguo committed Aug 25, 2020
1 parent 2ce3a5e commit ae52b8e
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 13 deletions.
4 changes: 2 additions & 2 deletions R/zzz.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
.onAttach <- function(libname, pkgname){
packageStartupMessage(
"g3viz: visualizing gene/genome/gentics data for fun.\n
- Pfam (v33.1)\n
"g3viz: visualizing gene/genome/gentics data for fun.
- Pfam (v33.1)
- UniProt (date: 2020/08/24)\n
Any questions, please send emails to <[email protected]> or post on GitHub <https://github.com/G3viz/g3viz/issues>."
)
Expand Down
22 changes: 11 additions & 11 deletions prepare_data/generate_hgnc2pfam.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ message("Parsing filtered human data from UniProt ...")
# - Length
# --------------------------

uniprot_fn = "uniprot-filtered-organism__Homo+sapiens+(Human)+[9606]_+AND+review--.tab.gz"
uniprot_fn <- "uniprot-filtered-organism__Homo+sapiens+(Human)+[9606]_+AND+review--.tab.gz"

uniprot.file <- gzcon(file(uniprot_fn, "r"))
uniprot.txt <- readLines(uniprot.file)
Expand Down Expand Up @@ -57,8 +57,8 @@ for(idx in 1:nrow(uniprot.to.parse.df)){
# Date: 2020-08-24
# Version: 33.1
message("Download Pfam data from Pfam website ...")
pfam_url = "ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/proteomes/9606.tsv.gz"
pfam_fn = "data-raw/hgnc_to_pfam/9606.tsv.gz"
pfam_url <- "ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/proteomes/9606.tsv.gz"
pfam_fn <- "9606.tsv.gz"
download.file(pfam_url, pfam_fn)

# --------------------------
Expand All @@ -75,17 +75,17 @@ pfam.df <- read.table(
header = FALSE
)

colnames(pfam.df) = c("id", "align.start", "align.end", "start", "end",
"hmm.acc", "hmm.name", "type", "hmm.start", "hmm.end", "hmm.length",
"bit.score", "e.value", "clan")
pfam.sub.df = pfam.df[, c("id", "start", "end", "hmm.acc", "hmm.name", "type")]
colnames(pfam.df) <- c("id", "align.start", "align.end", "start", "end",
"hmm.acc", "hmm.name", "type", "hmm.start", "hmm.end", "hmm.length",
"bit.score", "e.value", "clan")
pfam.sub.df <- pfam.df[, c("id", "start", "end", "hmm.acc", "hmm.name", "type")]

# merge by UniProt
message("Generating mapping table ...")
hgnc2pfam.df = merge(uniprot.single.df, pfam.sub.df, by.x="uniprot", by.y = "id", all.x=TRUE, sort = FALSE)
hgnc2pfam.df = hgnc2pfam.df[with(hgnc2pfam.df, order(symbol, uniprot, start, end)), ]
hgnc2pfam.df = hgnc2pfam.df[, c("symbol", "uniprot", "length",
"start", "end", "hmm.acc", "hmm.name", "type")]
hgnc2pfam.df <- merge(uniprot.single.df, pfam.sub.df, by.x="uniprot", by.y = "id", all.x=TRUE, sort = FALSE)
hgnc2pfam.df <- hgnc2pfam.df[with(hgnc2pfam.df, order(symbol, uniprot, start, end)), ]
hgnc2pfam.df <- hgnc2pfam.df[, c("symbol", "uniprot", "length",
"start", "end", "hmm.acc", "hmm.name", "type")]

# create Rdata, move this to "data" directory
save(hgnc2pfam.df, file="hgnc2pfam.df.rda")
Expand Down

0 comments on commit ae52b8e

Please sign in to comment.