diff --git a/articles/a01_overview.html b/articles/a01_overview.html index 7c23adfe..04af8c99 100644 --- a/articles/a01_overview.html +++ b/articles/a01_overview.html @@ -188,12 +188,12 @@

The gen_tibble loci=example_loci, backingfile = tempfile()) #> -#> gen_tibble saved to /tmp/RtmpKIkufM/file20a81f3d3e36.gt -#> using bigSNP file: /tmp/RtmpKIkufM/file20a81f3d3e36.rds -#> with backing file: /tmp/RtmpKIkufM/file20a81f3d3e36.bk +#> gen_tibble saved to /tmp/RtmppJAnLk/file1ce2794f8b8a.gt +#> using bigSNP file: /tmp/RtmppJAnLk/file1ce2794f8b8a.rds +#> with backing file: /tmp/RtmppJAnLk/file1ce2794f8b8a.bk #> make sure that you do NOT delete those files! #> to reload the gen_tibble in another session, use: -#> gt_load('/tmp/RtmpKIkufM/file20a81f3d3e36.gt') +#> gt_load('/tmp/RtmppJAnLk/file1ce2794f8b8a.gt')

We are provided information on where the three files underlying the genotype information are stored. As we don’t want to keep the files, we used the tmp directory; normally you will want to use your working @@ -230,15 +230,15 @@

The gen_tibble
 example_gt %>% show_loci()
-#> # A tibble: 6 × 7
-#>   big_index name  chromosome position genetic_dist allele_ref allele_alt
-#>       <int> <chr>      <dbl>    <dbl>        <dbl> <chr>      <chr>     
-#> 1         1 rs1            1        3            0 A          T         
-#> 2         2 rs2            1        5            0 T          C         
-#> 3         3 rs3            1       65            0 C          NA        
-#> 4         4 rs4            1      343            0 G          C         
-#> 5         5 x1             2       23            0 C          G         
-#> 6         6 x2             2      456            0 T          A
+#> # A tibble: 6 × 8 +#> big_index name chromosome position genetic_dist allele_ref allele_alt chr_int +#> <int> <chr> <dbl> <dbl> <dbl> <chr> <chr> <int> +#> 1 1 rs1 1 3 0 A T 1 +#> 2 2 rs2 1 5 0 T C 1 +#> 3 3 rs3 1 65 0 C NA 1 +#> 4 4 rs4 1 343 0 G C 1 +#> 5 5 x1 2 23 0 C G 2 +#> 6 6 x2 2 456 0 T A 2

Note that, if we are passing a gen_tibble to a function that works on genotypes, it is generally not necessary to pass the column genotypes in the call:

@@ -370,12 +370,12 @@

Using verbs on loci
 example_gt %>% select_loci (c(2,6,1)) %>% show_loci()
-#> # A tibble: 3 × 7
-#>   big_index name  chromosome position genetic_dist allele_ref allele_alt
-#>       <int> <chr>      <dbl>    <dbl>        <dbl> <chr>      <chr>     
-#> 1         2 rs2            1        5            0 T          C         
-#> 2         6 x2             2      456            0 T          A         
-#> 3         1 rs1            1        3            0 A          T
+#> # A tibble: 3 × 8 +#> big_index name chromosome position genetic_dist allele_ref allele_alt chr_int +#> <int> <chr> <dbl> <dbl> <dbl> <chr> <chr> <int> +#> 1 2 rs2 1 5 0 T C 1 +#> 2 6 x2 2 456 0 T A 2 +#> 3 1 rs1 1 3 0 A T 1

This operation could be helpful when merging datasets that do not fully overlap on their loci (more on that later).

@@ -399,13 +399,13 @@ 

Using verbs on loci
 sel_indices <- which((example_gt %>% loci_maf())>0.2)
 example_gt %>% select_loci (all_of(sel_indices)) %>% show_loci()
-#> # A tibble: 4 × 7
-#>   big_index name  chromosome position genetic_dist allele_ref allele_alt
-#>       <int> <chr>      <dbl>    <dbl>        <dbl> <chr>      <chr>     
-#> 1         1 rs1            1        3            0 A          T         
-#> 2         2 rs2            1        5            0 T          C         
-#> 3         4 rs4            1      343            0 G          C         
-#> 4         5 x1             2       23            0 C          G

+#> # A tibble: 4 × 8 +#> big_index name chromosome position genetic_dist allele_ref allele_alt chr_int +#> <int> <chr> <dbl> <dbl> <dbl> <chr> <chr> <int> +#> 1 1 rs1 1 3 0 A T 1 +#> 2 2 rs2 1 5 0 T C 1 +#> 3 4 rs4 1 343 0 G C 1 +#> 4 5 x1 2 23 0 C G 2

Note that passing a variable directly to select is deprecated, and so we have to use all_of to wrap it.

select_loci_if allows us to avoid creating a temporary @@ -432,10 +432,10 @@

Using verbs on loci
 example_gt %>% select_loci_if(loci_chromosomes(genotypes)==2 &
                              loci_maf(genotypes)>0.2) %>% show_loci()
-#> # A tibble: 1 × 7
-#>   big_index name  chromosome position genetic_dist allele_ref allele_alt
-#>       <int> <chr>      <dbl>    <dbl>        <dbl> <chr>      <chr>     
-#> 1         5 x1             2       23            0 C          G
+#> # A tibble: 1 × 8 +#> big_index name chromosome position genetic_dist allele_ref allele_alt chr_int +#> <int> <chr> <dbl> <dbl> <dbl> <chr> <chr> <int> +#> 1 5 x1 2 23 0 C G 2

Incidentally, loci_maf() is one of several functions that compute quantities by locus; they can be identified as they start with loci_.

@@ -535,22 +535,22 @@

Saving and reading data
 gt_file_name <- gt_save(example_gt)
 #> 
-#> gen_tibble saved to /tmp/RtmpKIkufM/file20a81f3d3e36.gt
-#> using bigSNP file: /tmp/RtmpKIkufM/file20a81f3d3e36.rds
-#> with backing file: /tmp/RtmpKIkufM/file20a81f3d3e36.bk
+#> gen_tibble saved to /tmp/RtmppJAnLk/file1ce2794f8b8a.gt
+#> using bigSNP file: /tmp/RtmppJAnLk/file1ce2794f8b8a.rds
+#> with backing file: /tmp/RtmppJAnLk/file1ce2794f8b8a.bk
 #> make sure that you do NOT delete those files!
 #> to reload the gen_tibble in another session, use:
-#> gt_load('/tmp/RtmpKIkufM/file20a81f3d3e36.gt')
+#> gt_load('/tmp/RtmppJAnLk/file1ce2794f8b8a.gt')
 gt_file_name
-#> [1] "/tmp/RtmpKIkufM/file20a81f3d3e36.gt" 
-#> [2] "/tmp/RtmpKIkufM/file20a81f3d3e36.rds"
-#> [3] "/tmp/RtmpKIkufM/file20a81f3d3e36.bk"
+#> [1] "/tmp/RtmppJAnLk/file1ce2794f8b8a.gt" +#> [2] "/tmp/RtmppJAnLk/file1ce2794f8b8a.rds" +#> [3] "/tmp/RtmppJAnLk/file1ce2794f8b8a.bk"

And if we ever need to retrieve the location of the .bk and .rds files for a gen_tibble, we can use:

 gt_get_file_names(example_gt)
-#> [1] "/tmp/RtmpKIkufM/file20a81f3d3e36.rds"
-#> [2] "/tmp/RtmpKIkufM/file20a81f3d3e36.bk"
+#> [1] "/tmp/RtmppJAnLk/file1ce2794f8b8a.rds" +#> [2] "/tmp/RtmppJAnLk/file1ce2794f8b8a.bk"

In a later session, we could reload the data with:

 new_example_gt <- gt_load(gt_file_name[1])
@@ -574,12 +574,12 @@ 

Saving and reading databed_path_pop_a <- system.file("extdata/pop_a.bed", package = "tidypopgen") pop_a_gt <- gen_tibble(bed_path_pop_a, backingfile = tempfile("pop_a_")) #> -#> gen_tibble saved to /tmp/RtmpKIkufM/pop_a_20a87136c5e5.gt -#> using bigSNP file: /tmp/RtmpKIkufM/pop_a_20a87136c5e5.rds -#> with backing file: /tmp/RtmpKIkufM/pop_a_20a87136c5e5.bk +#> gen_tibble saved to /tmp/RtmppJAnLk/pop_a_1ce29d4844b.gt +#> using bigSNP file: /tmp/RtmppJAnLk/pop_a_1ce29d4844b.rds +#> with backing file: /tmp/RtmppJAnLk/pop_a_1ce29d4844b.bk #> make sure that you do NOT delete those files! #> to reload the gen_tibble in another session, use: -#> gt_load('/tmp/RtmpKIkufM/pop_a_20a87136c5e5.gt')

+#> gt_load('/tmp/RtmppJAnLk/pop_a_1ce29d4844b.gt')

For this vignette, we don’t want to keep files, so we are using again a temporary path for the backing files, but in normal instances, we can simply omit the backingfile parameter, and the @@ -595,7 +595,7 @@

Saving and reading data
 gt_as_plink(example_gt, file =  tempfile("new_bed_"))
-#> [1] "/tmp/RtmpKIkufM/new_bed_20a864557320.bed"
+#> [1] "/tmp/RtmppJAnLk/new_bed_1ce2760677e7.bed"

This will also write a .bim and .fam file and save them together with the .bed file. Note that, from the main tibble, only id, population and sex will be preserved in the @@ -629,22 +629,22 @@

Merging databigsnp_path_a <- bigsnpr::snp_readBed(bed_path_pop_a, backingfile = tempfile("pop_a_")) pop_a_gt <- gen_tibble(bigsnp_path_a) #> -#> gen_tibble saved to /tmp/RtmpKIkufM/pop_a_20a87876275c.gt -#> using bigSNP file: /tmp/RtmpKIkufM/pop_a_20a87876275c.rds -#> with backing file: /tmp/RtmpKIkufM/pop_a_20a87876275c.bk +#> gen_tibble saved to /tmp/RtmppJAnLk/pop_a_1ce22be9cd86.gt +#> using bigSNP file: /tmp/RtmppJAnLk/pop_a_1ce22be9cd86.rds +#> with backing file: /tmp/RtmppJAnLk/pop_a_1ce22be9cd86.bk #> make sure that you do NOT delete those files! #> to reload the gen_tibble in another session, use: -#> gt_load('/tmp/RtmpKIkufM/pop_a_20a87876275c.gt') +#> gt_load('/tmp/RtmppJAnLk/pop_a_1ce22be9cd86.gt') bed_path_pop_b <- system.file("extdata/pop_b.bed", package = "tidypopgen") bigsnp_path_b <- bigsnpr::snp_readBed(bed_path_pop_b, backingfile = tempfile("pop_b_")) pop_b_gt <- gen_tibble(bigsnp_path_b) #> -#> gen_tibble saved to /tmp/RtmpKIkufM/pop_b_20a86af47285.gt -#> using bigSNP file: /tmp/RtmpKIkufM/pop_b_20a86af47285.rds -#> with backing file: /tmp/RtmpKIkufM/pop_b_20a86af47285.bk +#> gen_tibble saved to /tmp/RtmppJAnLk/pop_b_1ce25f2a0339.gt +#> using bigSNP file: /tmp/RtmppJAnLk/pop_b_1ce25f2a0339.rds +#> with backing file: /tmp/RtmppJAnLk/pop_b_1ce25f2a0339.bk #> make sure that you do NOT delete those files! #> to reload the gen_tibble in another session, use: -#> gt_load('/tmp/RtmpKIkufM/pop_b_20a86af47285.gt') +#> gt_load('/tmp/RtmppJAnLk/pop_b_1ce25f2a0339.gt')

And inspect them:

 pop_a_gt
@@ -713,12 +713,12 @@ 

Merging data#> ( 5 were flipped to match the reference set) #> ( 2 are ambiguous, of which 2 were removed) #> -#> gen_tibble saved to /tmp/RtmpKIkufM/gt_merged.gt -#> using bigSNP file: /tmp/RtmpKIkufM/gt_merged.rds -#> with backing file: /tmp/RtmpKIkufM/gt_merged.bk +#> gen_tibble saved to /tmp/RtmppJAnLk/gt_merged.gt +#> using bigSNP file: /tmp/RtmppJAnLk/gt_merged.rds +#> with backing file: /tmp/RtmppJAnLk/gt_merged.bk #> make sure that you do NOT delete those files! #> to reload the gen_tibble in another session, use: -#> gt_load('/tmp/RtmpKIkufM/gt_merged.gt')

+#> gt_load('/tmp/RtmppJAnLk/gt_merged.gt')

Let’s check the resulting gen_tibble:

 merged_gt
@@ -742,7 +742,7 @@ 

Merging data
 merged_gt %>% show_loci()
-#> # A tibble: 12 × 7
+#> # A tibble: 12 × 8
 #>    big_index name       chromosome  position genetic_dist allele_ref allele_alt
 #>        <int> <chr>           <int>     <int>        <int> <chr>      <chr>     
 #>  1         1 rs3094315           1    752566            0 A          G         
@@ -756,7 +756,8 @@ 

Merging data#> 9 9 rs28569024 2 139008811 0 T C #> 10 10 rs10106770 2 235832763 0 G A #> 11 11 rs11942835 3 155913651 0 T C -#> 12 12 rs5945676 23 51433071 0 T G

+#> 12 12 rs5945676 23 51433071 0 T G +#> # ℹ 1 more variable: chr_int <int>

Again, note that the big_index values have changed compared to the original files, as we generated a new FBM with the merged data.

@@ -779,12 +780,12 @@

Imputationbed_file <- system.file("extdata", "example-missing.bed", package = "bigsnpr") missing_gt <- gen_tibble(bed_file, backingfile = tempfile("missing_")) #> -#> gen_tibble saved to /tmp/RtmpKIkufM/missing_20a863ecc42.gt -#> using bigSNP file: /tmp/RtmpKIkufM/missing_20a863ecc42.rds -#> with backing file: /tmp/RtmpKIkufM/missing_20a863ecc42.bk +#> gen_tibble saved to /tmp/RtmppJAnLk/missing_1ce225759cf5.gt +#> using bigSNP file: /tmp/RtmppJAnLk/missing_1ce225759cf5.rds +#> with backing file: /tmp/RtmppJAnLk/missing_1ce225759cf5.bk #> make sure that you do NOT delete those files! #> to reload the gen_tibble in another session, use: -#> gt_load('/tmp/RtmpKIkufM/missing_20a863ecc42.gt') +#> gt_load('/tmp/RtmppJAnLk/missing_1ce225759cf5.gt') missing_gt #> # A gen_tibble: 500 loci #> # A tibble: 200 × 3 diff --git a/articles/a02_qc.html b/articles/a02_qc.html index fbdcceb4..dc9177e9 100644 --- a/articles/a02_qc.html +++ b/articles/a02_qc.html @@ -316,14 +316,15 @@

Save
 gt_save(ld_data, file_name = tempfile())
## 
-## gen_tibble saved to /tmp/RtmpSWl8gy/file20fa3e8758e9.gt
-
## using bigSNP file: /tmp/RtmpSWl8gy/file20fa36194e6.rds
-
## with backing file: /tmp/RtmpSWl8gy/file20fa36194e6.bk
+## gen_tibble saved to /tmp/RtmpOMrOpV/file1d3961adf25.gt +
## using bigSNP file: /tmp/RtmpOMrOpV/file1d3950ef3695.rds
+
## with backing file: /tmp/RtmpOMrOpV/file1d3950ef3695.bk
## make sure that you do NOT delete those files!
## to reload the gen_tibble in another session, use:
-
## gt_load('/tmp/RtmpSWl8gy/file20fa3e8758e9.gt')
-
## [1] "/tmp/RtmpSWl8gy/file20fa3e8758e9.gt" "/tmp/RtmpSWl8gy/file20fa36194e6.rds"
-## [3] "/tmp/RtmpSWl8gy/file20fa36194e6.bk"
+
## gt_load('/tmp/RtmpOMrOpV/file1d3961adf25.gt')
+
## [1] "/tmp/RtmpOMrOpV/file1d3961adf25.gt"  
+## [2] "/tmp/RtmpOMrOpV/file1d3950ef3695.rds"
+## [3] "/tmp/RtmpOMrOpV/file1d3950ef3695.bk"
diff --git a/articles/a03_example_clustering_and_dapc.html b/articles/a03_example_clustering_and_dapc.html index a8d1c1c6..d07c8428 100644 --- a/articles/a03_example_clustering_and_dapc.html +++ b/articles/a03_example_clustering_and_dapc.html @@ -475,7 +475,7 @@

Clustering with sNMF
 geno_file <- gt_as_geno_lea(anole_gt)
 geno_file
-#> [1] "/tmp/RtmpUILpcW/anolis_213d56945c4f.geno"
+#> [1] "/tmp/RtmpqB5xqU/anolis_1d7d54bd870f.geno"

Note that the .geno file is placed by default in the same directory and using the same name as the backing file of the gen_tibble

diff --git a/articles/a99_plink_cheatsheet.html b/articles/a99_plink_cheatsheet.html index f9ee7ad6..e7277adc 100644 --- a/articles/a99_plink_cheatsheet.html +++ b/articles/a99_plink_cheatsheet.html @@ -179,14 +179,14 @@

Quality control:my_snps <- c("rs4477212","rs3094315","rs3131972","rs12124819","rs11240777") data %>% select_loci_if(loci_names(genotypes) %in% my_snps) %>% show_loci() -#> # A tibble: 5 × 7 -#> big_index name chromosome position genetic_dist allele_ref allele_alt -#> <int> <chr> <int> <int> <int> <chr> <chr> -#> 1 1 rs4477212 1 82154 0 A NA -#> 2 2 rs3094315 1 752566 0 A G -#> 3 3 rs3131972 1 752721 0 G A -#> 4 4 rs12124819 1 776546 0 A NA -#> 5 5 rs11240777 1 798959 0 G A +#> # A tibble: 5 × 8 +#> big_index name chromosome position genetic_dist allele_ref allele_alt chr_int +#> <int> <chr> <int> <int> <int> <chr> <chr> <int> +#> 1 1 rs44… 1 82154 0 A NA 1 +#> 2 2 rs30… 1 752566 0 A G 1 +#> 3 3 rs31… 1 752721 0 G A 1 +#> 4 4 rs12… 1 776546 0 A NA 1 +#> 5 5 rs11… 1 798959 0 G A 1

will select loci from a previously defined set in the same way as –extract.

Similarly, to filter out individuals, as might be performed with diff --git a/pkgdown.yml b/pkgdown.yml index 30152be8..4d0b3b84 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -6,4 +6,4 @@ articles: a02_qc: a02_qc.html a03_example_clustering_and_dapc: a03_example_clustering_and_dapc.html a99_plink_cheatsheet: a99_plink_cheatsheet.html -last_built: 2024-09-25T09:23Z +last_built: 2024-09-26T11:16Z