forked from shankarkshakya/GBS-Pcinnamomi
-
Notifications
You must be signed in to change notification settings - Fork 2
/
vcfR2migrate.R
111 lines (80 loc) · 3.08 KB
/
vcfR2migrate.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#' @title Convert vcfR object to MigrateN format
#' @description The function converts vcfR format data to text format that can be used as infile for MigrateN.
#'
#' @param vcf a vcfR object.
#' @param pop factor of population.
#' @param in_pop population of interest to report in output file.
#' @param outfile name of output file.
#' @return a text file that can be used as an input for MigrateN software (SNP format).
#'
#' @details
#' This function converts the vcfR object to a text file which can be used downstream for MigrateN.
#' The function will remove loci with missing data, indels, and multiallelic loci. Thus, only SNPs data are analysed
#' where length of each locus is 1.
#'
#'
#' @example
#' library(vcfR)
#' pkg <- "pinfsc50"
#' my_vcf <- system.file("extdata", "pinf_sc50.vcf.gz", package = pkg)
#' my_vcf <- read.vcfR( my_vcf, verbose = FALSE )
#' my_pop <- as.factor(rep(c("A", "B", "C"), each = 6))
#' vcfR2migrate(vcf = my_vcf , pop = my_pop , in_pop = c("A","C"), out_file = "my2pop.txt")
#'
#'
#' @export
vcfR2migrate <- function(vcf, pop, in_pop, out_file) {
library(vcfR)
#Removing indels and multiallelic loci
vcf <- extract.indels(vcf, return.indels = F)
vcf <- vcf[is.biallelic(vcf)]
gt <- extract.gt(vcf, return.alleles = T, convertNA = T)
gt <- gt[!rowSums((is.na(gt))),]
FORMAT <- vcf@gt[1:nrow(gt),1]
vcf@gt <- cbind(FORMAT, gt)
vcf_list <- vector("list", length(unique(pop)))
names(vcf_list) <- unique(pop)
for (i in (1:length(vcf_list))) {
temp_pop <- names(vcf_list[i])
temp_vcf <- vcf
FORMAT <- vcf@gt[,1]
gt <- temp_vcf@gt[, -1]
cols <- gt[ , which(names(vcf_list[i]) == pop)]
temp_vcf@gt <- cbind(FORMAT, cols)
vcf_list[[i]] <- temp_vcf
}
n.pop <- vector("list", length(vcf_list))
names(n.pop) <- names(vcf_list)
for ( i in 1:length(vcf_list)) {
pop1 <- vcf_list[[i]]
gt <- pop1@gt
all <- vector("list", nrow(gt))
for (j in 1:nrow(gt)){
ind <- gt[j, -1]
all[[j]] <- as.data.frame(unlist(strsplit(ind, "\\||/")))
}
n.pop[[i]] <- do.call("rbind", all)
}
migrate_pop <- n.pop
for (i in 1:length(n.pop)) {
temp.pop <- n.pop[[i]]
names <- rownames(temp.pop)
rownames(temp.pop) <- NULL
temp.pop <- cbind(names,temp.pop)
indnum <- nrow(temp.pop)/ (nrow(vcf@gt))
colnames(temp.pop) <- c(indnum, names(n.pop)[[i]])
migrate_pop[[i]] <- temp.pop
}
loci_length <- rep(1, nrow(vcf@gt))
x <- list(loci_length = loci_length, migrate_pop = migrate_pop)
in_pop <- in_pop
output_path <- file.path(out_file)
data_to_write <- x$migrate_pop[sapply(x$migrate_pop, function(x) any(colnames(x) %in% in_pop))]
# write header
header <- paste0("N ", length(data_to_write), " ", length(x$loci_length), "\n", paste(x$loci_length, collapse = " "))
writeLines(header, output_path)
# write data
for (frame in data_to_write) {
write.table(frame, file = output_path, append = TRUE, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
}
}