diff --git a/R/filter_high_relatedness.R b/R/filter_high_relatedness.R
index f91a2893..b3825f70 100644
--- a/R/filter_high_relatedness.R
+++ b/R/filter_high_relatedness.R
@@ -4,7 +4,6 @@
 #' coefficients and returns the maximum set of individuals that contains no
 #' relationships above the given threshold.
 #'
-#' TODO this function needs a test
 #'
 #' @param matrix a square symmetric matrix of individuals containing relationship coefficients
 #' @param .x a [`gen_tibble`] object
diff --git a/R/gen_tibble.R b/R/gen_tibble.R
index cfff22a2..90e07577 100644
--- a/R/gen_tibble.R
+++ b/R/gen_tibble.R
@@ -196,7 +196,12 @@ gen_tibble.matrix <- function(x, indiv_meta, loci, ...,
     stop ("'0' can not be a valid allele (it is the default missing allele value!)")
   }
 
-  # TODO check object types
+  if (!inherits(loci, "data.frame") || inherits(x, "tbl")){
+    stop("loci must be one of data.frame or tbl")
+  }
+  if (!inherits(indiv_meta, "data.frame") || inherits(x, "tbl") || is.list(x)){
+    stop("indiv_meta must be one of data.frame, tbl, or list")
+  }
   if (!all(c("id", "population") %in% names(indiv_meta))){
     stop("ind_meta does not include the compulsory columns 'id' and 'population")
   }
diff --git a/R/gt_has_imputed.R b/R/gt_has_imputed.R
index bfb45d97..d4b98bf4 100644
--- a/R/gt_has_imputed.R
+++ b/R/gt_has_imputed.R
@@ -28,7 +28,7 @@ gt_uses_imputed <- function (x){
     x <- x$genotypes
   }
   if (!gt_has_imputed(x)){
-    stop("this dataset does not have any imputated values to use!")
+    stop("this dataset does not have any imputed values to use!")
   }
   if (identical(attr(x,"bigsnp")$genotypes$code256, bigsnpr::CODE_IMPUTE_PRED) |
       identical(attr(x,"bigsnp")$genotypes$code256, bigsnpr::CODE_DOSAGE)){
diff --git a/R/gt_impute_simple.R b/R/gt_impute_simple.R
index 097d3577..1fddd504 100644
--- a/R/gt_impute_simple.R
+++ b/R/gt_impute_simple.R
@@ -7,7 +7,7 @@
 #'
 #' @param x a [gen_tibble] with missing data
 #' @param method one of
-#' - 'median': the most frequent genotype
+#' - 'mode': the most frequent genotype
 #' - 'mean0': the mean rounded to the nearest integer
 #' - 'mean2': the mean rounded to 2 decimal places
 #' - 'random': randomly sample a genotype based on the observed allele frequencies
diff --git a/R/gt_pca_tidiers.R b/R/gt_pca_tidiers.R
index c9757217..6430c70b 100644
--- a/R/gt_pca_tidiers.R
+++ b/R/gt_pca_tidiers.R
@@ -172,7 +172,7 @@ augment_loci.gt_pca <- function(x, data = NULL, k= NULL, ...) {
   ret <- if (!missing(data) && !is.null(data)) {
     #check that names of the two columns are in sync
 # @TODO reinstate this check once we have rownames in the pca object for loadings
-   if (!all.equal(loci_names(data), rownames(as.data.frame(x$v)))){
+   if (!identical(loci_names(data), rownames(as.data.frame(x$v)))){
      stop("the loci names in 'data' do not correspond to the loci in the pca object 'x'")
    }
     show_loci(data) <- show_loci(data) %>% tibble::add_column(loadings)
diff --git a/R/loci_ld_clump.R b/R/loci_ld_clump.R
index 822c528d..658ac855 100644
--- a/R/loci_ld_clump.R
+++ b/R/loci_ld_clump.R
@@ -63,13 +63,18 @@ loci_ld_clump.vctrs_bigSNP <- function(.x,
                                        ...)
 {
   rlang::check_dots_empty()
-
+  stopifnot_diploid(.x)
+  
   if (gt_has_imputed(.x) && gt_uses_imputed(.x)==FALSE){ #but not uses_imputed
     gt_set_imputed(.x, set = TRUE)
     on.exit(gt_set_imputed(.x, set = FALSE))
   }
 
-  stopifnot_diploid(.x)
+  if(!identical(show_loci(.x),.x %>% show_loci() %>% arrange(show_loci(.x)$chromosome,show_loci(.x)$position))){
+    stop("Your loci are not sorted, try using: show_loci(.data) <- .data %>% show_loci() %>% arrange(chromosome,position)")
+
+  }
+
   # get the FBM
   geno_fbm <- attr(.x,"bigsnp")$genotypes
   # rows (individuals) that we want to use
diff --git a/R/loci_transitions.R b/R/loci_transitions.R
index 80d9d9d8..df87bf2d 100644
--- a/R/loci_transitions.R
+++ b/R/loci_transitions.R
@@ -18,6 +18,7 @@ loci_transitions <- function(.x, ...) {
 loci_transitions.tbl_df <- function(.x, ...) {
   #TODO this is a hack to deal with the class being dropped when going through group_map
   stopifnot_gen_tibble(.x)
+  check_allele_alphabet(.x$genotypes)
   loci_transitions(.x$genotypes, ...)
 }
 
diff --git a/R/loci_transversions.R b/R/loci_transversions.R
index 36472c8d..000adbd0 100644
--- a/R/loci_transversions.R
+++ b/R/loci_transversions.R
@@ -18,6 +18,7 @@ loci_transversions <- function(.x, ...) {
 loci_transversions.tbl_df <- function(.x, ...) {
   #TODO this is a hack to deal with the class being dropped when going through group_map
   stopifnot_gen_tibble(.x)
+  check_allele_alphabet(.x$genotypes)
   loci_transversions(.x$genotypes, ...)
 }
 
diff --git a/R/qc_report_indiv.R b/R/qc_report_indiv.R
index 66f37ea5..8440bc34 100644
--- a/R/qc_report_indiv.R
+++ b/R/qc_report_indiv.R
@@ -46,7 +46,7 @@ qc_report_indiv <- function(.x, kings_threshold = NULL, ...){
 #' ready plots.
 #'
 #' @param object an object of class `qc_report_indiv`
-#' @param type the type of plot (`scatter`)
+#' @param type the type of plot (`scatter`,`relatedness`)
 #' @param miss_threshold a threshold for the accepted rate of missingness within
 #' individuals
 #' @param kings_threshold an optional numeric, a threshold of relatedness for the sample
@@ -96,7 +96,6 @@ autoplot_qc_report_indiv <- function(object, miss_threshold = miss_threshold){
 
 autoplot_qc_report_indiv_king <- function(object, kings_threshold = kings_threshold){
 
-  browser()
   king <- as.data.frame(attr(object$to_keep, "king"))
   num_samples <- nrow(king)
   king$row <- colnames(king)
diff --git a/R/select_loci_if.R b/R/select_loci_if.R
index bf99873b..e24b5e53 100644
--- a/R/select_loci_if.R
+++ b/R/select_loci_if.R
@@ -20,7 +20,7 @@ select_loci_if <-function(.data, .sel_logical){
   # and now evaluate it, allowing it to see the data
   loci_sel <- rlang::eval_tidy(sel_defused,data=.data)
   if (!inherits(loci_sel,"logical")){
-    stop(".sel_logical should be a logical (boolean) vector")
+    stop(".sel_logical should be a logical boolean vector")
   }
   if (length(loci_sel) != ncol(show_genotypes(.data$genotypes))){
     stop(".sel_logical should be the same length as the number of loci")
diff --git a/inst/extdata/pop_b.ped b/inst/extdata/pop_b.ped
new file mode 100644
index 00000000..71093746
--- /dev/null
+++ b/inst/extdata/pop_b.ped
@@ -0,0 +1,3 @@
+pop_b SA073 0 0 2 -9 A A G G A A A A G G C C G G G G T T G G G G C C C C A A C T A G A A
+pop_b SA1021 0 0 2 -9 G A A G A A G A G G C C T G A G 0 0 G G G G C C C C G A T T G G A A
+pop_b SA1008 0 0 1 -9 A A A G A A G A G G C C T G G G T T G G G G C C C C A A C T G G A A
diff --git a/inst/extdata/related/families_hwe.hwe b/inst/extdata/related/families_hwe.hwe
new file mode 100644
index 00000000..85ff2289
--- /dev/null
+++ b/inst/extdata/related/families_hwe.hwe
@@ -0,0 +1,11 @@
+ CHR  SNP     TEST   A1   A2                 GENO   O(HET)   E(HET)            P 
+   1    1  ALL(NP)    2    1                0/4/6      0.4     0.32            1
+   1    2  ALL(NP)    2    1                2/4/6   0.3333   0.4444       0.5176
+   1    3  ALL(NP)    2    1                0/5/7   0.4167   0.3299            1
+   1    4  ALL(NP)    1    2                2/4/5   0.3636   0.4628       0.5377
+   1    5  ALL(NP)    2    1                0/7/5   0.5833   0.4132       0.4874
+   1    6  ALL(NP)    2    1                1/6/3      0.6     0.48            1
+   1    7  ALL(NP)    2    1                1/5/5   0.4545   0.4339            1
+   1    8  ALL(NP)    2    1                0/8/4   0.6667   0.4444        0.216
+   1    9  ALL(NP)    2    1                0/6/5   0.5455   0.3967       0.5046
+   1   10  ALL(NP)    1    2                2/7/3   0.5833   0.4965            1
diff --git a/inst/extdata/related/families_hwe.nosex b/inst/extdata/related/families_hwe.nosex
new file mode 100644
index 00000000..22ee7fca
--- /dev/null
+++ b/inst/extdata/related/families_hwe.nosex
@@ -0,0 +1,12 @@
+1	1
+2	2
+3	3
+4	4
+5	5
+6	6
+7	7
+8	8
+9	9
+10	10
+11	11
+12	12
diff --git a/inst/extdata/related/families_hwe_midp.hwe b/inst/extdata/related/families_hwe_midp.hwe
new file mode 100644
index 00000000..fa72488f
--- /dev/null
+++ b/inst/extdata/related/families_hwe_midp.hwe
@@ -0,0 +1,11 @@
+ CHR  SNP     TEST   A1   A2                 GENO   O(HET)   E(HET)            P 
+   1    1  ALL(NP)    2    1                0/4/6      0.4     0.32       0.6533
+   1    2  ALL(NP)    2    1                2/4/6   0.3333   0.4444       0.3668
+   1    3  ALL(NP)    2    1                0/5/7   0.4167   0.3299       0.7019
+   1    4  ALL(NP)    1    2                2/4/5   0.3636   0.4628       0.3643
+   1    5  ALL(NP)    2    1                0/7/5   0.5833   0.4132        0.341
+   1    6  ALL(NP)    2    1                1/6/3      0.6     0.48       0.7866
+   1    7  ALL(NP)    2    1                1/5/5   0.4545   0.4339       0.7399
+   1    8  ALL(NP)    2    1                0/8/4   0.6667   0.4444       0.1299
+   1    9  ALL(NP)    2    1                0/6/5   0.5455   0.3967       0.3065
+   1   10  ALL(NP)    1    2                2/7/3   0.5833   0.4965       0.7969
diff --git a/inst/extdata/related/families_hwe_midp.nosex b/inst/extdata/related/families_hwe_midp.nosex
new file mode 100644
index 00000000..22ee7fca
--- /dev/null
+++ b/inst/extdata/related/families_hwe_midp.nosex
@@ -0,0 +1,12 @@
+1	1
+2	2
+3	3
+4	4
+5	5
+6	6
+7	7
+8	8
+9	9
+10	10
+11	11
+12	12
diff --git a/man/autoplot.qc_report_indiv.Rd b/man/autoplot.qc_report_indiv.Rd
index e87735e8..47bf6a9e 100644
--- a/man/autoplot.qc_report_indiv.Rd
+++ b/man/autoplot.qc_report_indiv.Rd
@@ -15,7 +15,7 @@
 \arguments{
 \item{object}{an object of class \code{qc_report_indiv}}
 
-\item{type}{the type of plot (\code{scatter})}
+\item{type}{the type of plot (\code{scatter},\code{relatedness})}
 
 \item{miss_threshold}{a threshold for the accepted rate of missingness within
 individuals}
diff --git a/man/filter_high_relatedness.Rd b/man/filter_high_relatedness.Rd
index c9e29afd..a9ee8121 100644
--- a/man/filter_high_relatedness.Rd
+++ b/man/filter_high_relatedness.Rd
@@ -30,6 +30,3 @@ This function takes a matrix of x by y individuals containing relatedness
 coefficients and returns the maximum set of individuals that contains no
 relationships above the given threshold.
 }
-\details{
-TODO this function needs a test
-}
diff --git a/man/gt_impute_simple.Rd b/man/gt_impute_simple.Rd
index 832d5687..fba31f5d 100644
--- a/man/gt_impute_simple.Rd
+++ b/man/gt_impute_simple.Rd
@@ -15,7 +15,7 @@ gt_impute_simple(
 
 \item{method}{one of
 \itemize{
-\item 'median': the most frequent genotype
+\item 'mode': the most frequent genotype
 \item 'mean0': the mean rounded to the nearest integer
 \item 'mean2': the mean rounded to 2 decimal places
 \item 'random': randomly sample a genotype based on the observed allele frequencies
diff --git a/tests/testthat/test_augment_loci.R b/tests/testthat/test_augment_loci.R
new file mode 100644
index 00000000..297672f0
--- /dev/null
+++ b/tests/testthat/test_augment_loci.R
@@ -0,0 +1,16 @@
+test_that("augment_loci adds to loci_table",{
+  bed_file <- system.file("extdata", "example-missing.bed", package = "bigsnpr")
+  missing_gt <- gen_tibble(bed_file,  backingfile = tempfile("missing_"),quiet=TRUE)
+  missing_gt <- gt_impute_simple(missing_gt)
+  missing_pca <- missing_gt %>% gt_pca_partialSVD()
+
+  #Add loadings to loci table
+  missing_pca_load <- augment_loci(missing_pca, data = missing_gt)
+  expect_true(all(colnames(missing_pca_load) == c(colnames(show_loci(missing_gt)),paste0(".loadingPC", c(1:10)))))
+
+  #Try assigning loadings to object with fewer loci
+  missing_gt_rm <- missing_gt %>% select_loci(c(1:400))
+  expect_error(augment_loci(missing_pca, data = missing_gt_rm),"the loci names in 'data' do not correspond to the loci in the pca object ")
+
+
+})
diff --git a/tests/testthat/test_gen_tibble.R b/tests/testthat/test_gen_tibble.R
index a0d3750a..2ce54980 100644
--- a/tests/testthat/test_gen_tibble.R
+++ b/tests/testthat/test_gen_tibble.R
@@ -32,16 +32,19 @@ test_that("create gen_tibble from dfs",{
 
 })
 
-test_genotypes_c <- rbind(c("1","1","0","1","1","0"),
-                          c("2","1","0","0","0","0"),
-                          c("2","2","0","0","1","1"))
 
-
-test_that("gen_tibble does not accept character matrix",{
-  expect_error(test_dfs_gt <- gen_tibble(test_genotypes_c, indiv_meta = test_indiv_meta,
-                                         loci = test_loci, quiet = TRUE),"'x' is not a matrix of integers")
+# now create it directly from the dfs
+test_that("create gen_tibble from dfs",{
+  test_dfs_gt <- gen_tibble(test_genotypes, indiv_meta = test_indiv_meta,
+             loci = test_loci, quiet = TRUE)
+  # because of the different backing file info, we cannot use identical on the whole object
+  expect_true(identical(show_genotypes(test_gt), show_genotypes(test_dfs_gt)))
+  expect_true(identical(show_loci(test_gt), show_loci(test_dfs_gt)))
+  expect_true(identical(test_gt %>% select(-genotypes),
+                        test_dfs_gt %>% select(-genotypes)))
 })
 
+
 test_that("gen_tibble catches invalid alleles",{
   test_loci_wrong <- test_loci
   test_loci_wrong$allele_alt[1] <- "N"
@@ -70,6 +73,92 @@ test_that("gen_tibble catches invalid alleles",{
 
 })
 
+
+test_that("if order of loci is changed, order of genotypes also changes",{
+
+  pop_b <- gen_tibble(system.file("extdata/pop_b.bed", package="tidypopgen"),backingfile = tempfile(), quiet = TRUE)
+  #original genotypes
+  pop_b_gen <- show_genotypes(pop_b)
+
+  #now scramble the loci
+  set.seed(123)
+  random_order <- sample(1:17)
+  show_loci(pop_b) <- pop_b %>% select_loci(all_of(random_order)) %>% show_loci()
+
+  #reorder the original genotypes according to 'random_order'
+  pop_b_gen_reordered <- pop_b_gen[,random_order]
+
+  #check that genotypes are now reordered according to random order
+  expect_equal(pop_b_gen_reordered, show_genotypes(pop_b))
+
+
+})
+
+test_that("gen_tibble does not accept character matrix",{
+  test_genotypes_c <- rbind(c("1","1","0","1","1","0"),
+                            c("2","1","0","0","0","0"),
+                            c("2","2","0","0","1","1"))
+  expect_error(test_dfs_gt <- gen_tibble(test_genotypes_c, indiv_meta = test_indiv_meta,
+                                         loci = test_loci, quiet = TRUE),"'x' is not a matrix of integers")
+})
+
+test_that("gen_tibble wrong filetype error",{
+  expect_error(test_dfs_gt <- gen_tibble(system.file("extdata/related/test_king.kin0", package = "tidypopgen")),
+               "file_path should be pointing ")
+})
+
+test_that("gen_tibble loci is dataframe or tbl",{
+
+  test_loci <- data.frame(name=paste0("rs",1:6),
+                          chromosome=paste0("chr",c(1,1,1,1,2,2)),
+                          position=as.integer(c(3,5,65,343,23,456)),
+                          genetic_dist = as.integer(rep(0,6)),
+                          allele_ref = c("A","T","C","G","C","T"),
+                          allele_alt = c("T","C", NA,"C","G","A"))
+  wrong_loci_matrix <- as.matrix(test_loci)
+
+  expect_error(test_dfs_gt <- gen_tibble(test_genotypes, indiv_meta = test_indiv_meta,
+                                         loci = wrong_loci_matrix, quiet = TRUE),"loci must be one of data.frame or tbl")
+})
+
+test_that("gen_tibble required id and population",{
+  wrong_indiv_meta <- data.frame (x =c("a","b","c"),
+                                  y = c("pop1","pop1","pop2"))
+  expect_error(test_dfs_gt <- gen_tibble(test_genotypes, indiv_meta = wrong_indiv_meta,
+                                         loci = test_loci, quiet = TRUE),"ind_meta does not include the compulsory columns")
+})
+
+test_that("gen_tibble indiv_meta is list, dataframe, or tbl",{
+  wrong_indiv_meta <- data.frame (id=c("a","b","c"),
+                                  population = c("pop1","pop1","pop2"))
+  wrong_indiv_meta_matrix <- as.matrix(wrong_indiv_meta)
+
+  expect_error(test_dfs_gt <- gen_tibble(test_genotypes, indiv_meta = wrong_indiv_meta_matrix,
+                                         loci = test_loci, quiet = TRUE),"indiv_meta must be one of data.frame, tbl, or list")
+})
+
+test_that("gen_tibble identifies wrong dimensions in genotypes",{
+  wrong_genotypes <- rbind(c(1,1,0,1,1,0),
+                          c(2,1,0,0,0,0))
+  expect_error(test_dfs_gt <- gen_tibble(wrong_genotypes, indiv_meta = test_indiv_meta,
+                                         loci = test_loci, quiet = TRUE),
+               "there is a mismatch between the number of loci in the genotype table x and in the loci table")
+
+})
+
+test_that("gen_tibble identifies wrong loci table columns",{
+  wrong_loci <- data.frame(a=paste0("rs",1:6),
+                          b=paste0("chr",c(1,1,1,1,2,2)),
+                          c=as.integer(c(3,5,65,343,23,456)),
+                          d = as.integer(rep(0,6)),
+                          e = c("A","T","C","G","C","T"),
+                          f = c("T","C", NA,"C","G","A"))
+  expect_error(test_dfs_gt <- gen_tibble(test_genotypes, indiv_meta = test_indiv_meta,
+                                         loci = wrong_loci, quiet = TRUE),
+               "loci does not include the compulsory columns")
+})
+
+
 test_that("gen_tibble from files",{
   bed_path <- system.file("extdata/pop_a.bed", package = "tidypopgen")
   pop_a_gt <- gen_tibble(bed_path, quiet=TRUE, backingfile = tempfile())
diff --git a/tests/testthat/test_gt_impute_simple.R b/tests/testthat/test_gt_impute_simple.R
index 65899882..ec469829 100644
--- a/tests/testthat/test_gt_impute_simple.R
+++ b/tests/testthat/test_gt_impute_simple.R
@@ -6,7 +6,7 @@ test_that("impute and use the imputation",{
                "You can't have missing values")
   expect_false(gt_has_imputed(missing_gt))
   expect_error(gt_uses_imputed(missing_gt),
-               "this dataset does not have any imputated")
+               "this dataset does not have any imputed")
   # now impute
   missing_gt <- gt_impute_simple(missing_gt)
   # we have imputed
@@ -19,3 +19,98 @@ test_that("impute and use the imputation",{
   expect_error(gt_set_imputed(missing_gt),
                "set should be either")
 })
+
+
+test_that("gt_impute imputes properly",{
+  test_indiv_meta <- data.frame (id=c("a","b","c","d","e","f"),
+                                 population = c("pop1","pop1","pop2","pop1","pop1","pop2"))
+
+  test_genotypes <- matrix(c(
+    0, 2, 1, 1, 0,  #
+    0, 0, 2, 0, 1,  #
+    2, 0, 0, 1, 1,  #
+    1, 1, 2, 2, 2,  #
+    0, 0, 2, 1, 1,  #
+    NA,NA,NA,NA,NA
+  ), nrow = 6, byrow = TRUE)
+
+
+
+  test_loci <- data.frame(name=paste0("rs",1:5),
+                          chromosome=c(1,1,1,2,2),
+                          position=c(3,65,343,23,456),
+                          genetic_dist = as.integer(rep(0,5)),
+                          allele_ref = c("A","T","C","G","C"),
+                          allele_alt = c("T","C", NA,"C","G"))
+
+  test_gt <- gen_tibble(x = test_genotypes, loci = test_loci, indiv_meta = test_indiv_meta, quiet = TRUE)
+
+  #test errors on non-imputed set
+  expect_error(gt_uses_imputed(test_gt),"this dataset does not have any imputed values")
+  expect_error(gt_set_imputed(test_gt, TRUE),"this dataset does not have imputed values")
+
+
+  #impute method = 'mode'
+  imputed_gt_mode <- gt_impute_simple(test_gt, method = "mode")
+
+  #check imputation
+  expect_false(gt_has_imputed(test_gt))
+  expect_true(gt_has_imputed(imputed_gt_mode))
+
+  #set imputation
+  gt_set_imputed(imputed_gt_mode, TRUE)
+  expect_false(any(is.na(show_genotypes(imputed_gt_mode))))
+
+  #test error trying to impute an already imputed set
+  #expect_error(gt_impute_simple(imputed_gt_mode),"object x is already imputed")
+
+  #Check imputed 'mode' method
+  mode_function <- function(x){
+    unique_x <- unique(x)
+    return(unique_x[which.max(tabulate(match(x, unique_x)))])
+
+  }
+
+  modes <- apply(test_genotypes, 2, mode_function)
+  expect_true(all(show_genotypes(imputed_gt_mode)[6,] == modes))
+
+
+  #impute method = 'mean0'
+  imputed_gt_mean0 <- gt_impute_simple(test_gt, method = "mean0")
+
+  #set imputation
+  gt_set_imputed(imputed_gt_mean0, TRUE)
+  expect_false(any(is.na(show_genotypes(imputed_gt_mean0))))
+
+  #check imputed 'mean0' method
+  means <- round(colMeans(test_genotypes, na.rm = TRUE), digit = 0)
+  expect_true(all(means == show_genotypes(imputed_gt_mean0)[6,]))
+
+
+  #impute method = 'random'
+  imputed_gt_random <- gt_impute_simple(test_gt, method = "random")
+
+  #set imputation
+  gt_set_imputed(imputed_gt_random, TRUE)
+  expect_false(any(is.na(show_genotypes(imputed_gt_random))))
+
+  # Problem with method 'mean2'?
+
+
+  #impute method = 'mean2'
+  imputed_gt_mean2 <- gt_impute_simple(test_gt, method = "mean2")
+
+  #set imputation
+  gt_set_imputed(imputed_gt_mean2, TRUE)
+  #expect_false(any(is.na(show_genotypes(imputed_gt_mean2)))) #?
+
+  #check imputed 'mean2' method
+  means2 <- round(colMeans(test_genotypes, na.rm = TRUE), digit = 2)
+  #expect_true(all(means2 == show_genotypes(imputed_gt_mean2)[6,])) #?
+
+
+
+})
+
+
+
diff --git a/tests/testthat/test_indiv_het_obs.R b/tests/testthat/test_indiv_het_obs.R
index 1219f853..190ff05e 100644
--- a/tests/testthat/test_indiv_het_obs.R
+++ b/tests/testthat/test_indiv_het_obs.R
@@ -22,3 +22,30 @@ test_that("indiv_het_obs computes correctly",{
   expect_true(all(indiv_het_obs(test_gt)==
                     rowMeans(test_genotypes==1,na.rm=TRUE)))
 })
+
+test_that("indiv_het_obs returns 0's when all genotypes are homozygous", {
+
+  test_indiv_meta <- data.frame (id=c("a","b","c"),
+                                 population = c("pop1","pop1","pop2"))
+  test_genotypes_homozyg <- rbind(c(2,2,0,0,2,0),
+                                  c(2,2,0,0,2,0),
+                                  c(2,2,0,0,2,0))
+  test_loci <- data.frame(name=paste0("rs",1:6),
+                          chromosome=c(1,1,1,1,2,2),
+                          position=c(3,5,65,343,23,456),
+                          genetic_dist = as.integer(rep(0,6)),
+                          allele_ref = c("A","T","C","G","C","T"),
+                          allele_alt = c("T","C", NA,"C","G","A"))
+
+  test_gt_homozyg <- gen_tibble(x = test_genotypes_homozyg, loci = test_loci, indiv_meta = test_indiv_meta, quiet = TRUE)
+
+  # feeding the list of SNPbin directly
+  expect_true(all(indiv_het_obs(test_gt_homozyg$genotypes)==
+                    rowMeans(test_genotypes_homozyg==1,na.rm=TRUE)))
+
+  # passing tibble
+  expect_true(all(indiv_het_obs(test_gt_homozyg)==
+                    rowMeans(test_genotypes_homozyg==1,na.rm=TRUE)))
+
+})
+
diff --git a/tests/testthat/test_indiv_missingness.R b/tests/testthat/test_indiv_missingness.R
index f9b69a3c..249b4e27 100644
--- a/tests/testthat/test_indiv_missingness.R
+++ b/tests/testthat/test_indiv_missingness.R
@@ -41,4 +41,24 @@ test_that("as_counts switch",{
   expect_false(identical(qc_rates,qc_counts))
 })
 
+test_genotypes_moreNA <- rbind(c(1,1,0,1,1,2),
+                        c(2,NA,NA,NA,NA,NA),
+                        c(2,2,0,0,1,NA))
+
+test_gt2 <- gen_tibble(x = test_genotypes_moreNA, loci = test_loci, indiv_meta = test_indiv_meta, quiet = TRUE)
+
+
+test_that("behaviour of indiv_missingness",{
+
+  #create a subset gt
+  test_gt2_subset <- test_gt2 %>% select_loci(c(2,3,4,5,6))
+  test_genotypes_moreNA_subset <- test_genotypes_moreNA[,c(2:6)]
+
+  sum_na <- function(x){sum(is.na(x))}
+  # feeding the genotypes directly
+  expect_true(all(indiv_missingness(test_gt2_subset$genotypes)==
+                    round(apply(test_genotypes_moreNA_subset, MARGIN = 1,sum_na)/ncol(test_genotypes_moreNA_subset), digits = 1)))
+
+})
+
 
diff --git a/tests/testthat/test_loci_freq.R b/tests/testthat/test_loci_freq.R
index 9a8ac109..a190e185 100644
--- a/tests/testthat/test_loci_freq.R
+++ b/tests/testthat/test_loci_freq.R
@@ -23,11 +23,46 @@ test_that("loci_alt_freq and loci_maf computes correctly",{
 
   # repeat the tests for a subset of the data
   # remove the 2nd individual and the 3rd and 5th snp
-  test_genotypes <- test_genotypes[-2,c(-3,-5)]
-  test_gt <- test_gt %>% filter(id!="b") %>% select_loci(c(-3,-5))
-  freq <- colSums(test_genotypes, na.rm=TRUE)/(c(2,2,2,1)*2)
-  expect_true(all(loci_alt_freq(test_gt$genotypes)==freq))
+  test_genotypes_subset1 <- test_genotypes[-2,c(-3,-5)]
+  test_gt_subset1 <- test_gt %>% filter(id!="b") %>% select_loci(c(-3,-5))
+  freq <- colSums(test_genotypes_subset1, na.rm=TRUE)/(c(2,2,2,1)*2)
+  expect_true(all(loci_alt_freq(test_gt_subset1$genotypes)==freq))
   # convert to minor frequencies
   freq[freq>0.5] <- 1 - freq[freq>0.5]
-  expect_true(all(loci_maf(test_gt$genotypes)==freq))
+  expect_true(all(loci_maf(test_gt_subset1$genotypes)==freq))
+
+  # repeat the tests for a subset where for loci 6, all genotypes are missing
+  # remove the 1st individual and the 3rd and 4th snp
+  test_genotypes_subset2 <- test_genotypes[-1,c(-3,-4)]
+  test_gt_subset2 <- test_gt %>% filter(id!="a") %>% select_loci(c(-3,-4))
+
+  #we expect NaN for loci 6 - as both genotypes are NA
+  freq <- colSums(test_genotypes_subset2, na.rm=TRUE)/(c(2,2,2,NA)*2)
+  expect_equal(loci_alt_freq(test_gt_subset2$genotypes),freq)
+
+  # convert to minor frequencies
+  freq[freq>0.5 & !is.na(freq)] <- 1-freq[freq>0.5 & !is.na(freq)]
+  expect_equal(loci_maf(test_gt_subset2$genotypes),freq)
+
+  #Test NA in centre
+  test_genotypes2 <- rbind(c(1,1,0,1,1,2),
+                          c(2,1,0,NA,0,NA),
+                          c(2,2,0,NA,1,NA))
+
+  test_gt2 <- gen_tibble(x = test_genotypes2, loci = test_loci, indiv_meta = test_indiv_meta, quiet = TRUE)
+
+  # repeat the tests for a subset where for loci 6, all genotypes are missing
+  # remove the 1st individual
+  test_genotypes_subset3 <- test_genotypes2[-1,]
+  test_gt_subset3 <- test_gt2 %>% filter(id!="a") #%>% select_loci(c(-3,-4))
+
+  #we expect NaN for loci 4 and 6 - as both genotypes are NA
+  freq2 <- colSums(test_genotypes_subset3, na.rm=TRUE)/(c(2,2,2,NA,2,NA)*2)
+  expect_equal(loci_alt_freq(test_gt_subset3$genotypes),freq2)
+
+
+  # convert to minor frequencies
+  freq2[freq2>0.5 & !is.na(freq2)] <- 1-freq2[freq2>0.5 & !is.na(freq2)]
+  expect_equal(loci_maf(test_gt_subset3$genotypes),freq2)
+
 })
diff --git a/tests/testthat/test_loci_hwe.R b/tests/testthat/test_loci_hwe.R
new file mode 100644
index 00000000..e21b6951
--- /dev/null
+++ b/tests/testthat/test_loci_hwe.R
@@ -0,0 +1,45 @@
+test_that("loci_hwe produces the same output as plink --hardy midp",{
+
+  #Create gentibble for our data
+  bed_path <- system.file("extdata/related/families.bed", package = "tidypopgen")
+  families_bigsnp_path <- bigsnpr::snp_readBed(bed_path, backingfile = tempfile()) #bigsnpr::sub_bed(bed_path)
+  #families_bigsnp_path <- system.file("extdata/related/families.rds", package = "tidypopgen")
+  families <- gen_tibble(families_bigsnp_path, quiet = TRUE, valid_alleles = c("1","2"))
+
+  #Read in plink results (first 10 snps)
+  plink_hwe <- read.table(system.file("extdata/related/families_hwe_midp.hwe", package = "tidypopgen"),header = TRUE)
+
+  #Calculate hwe in tidypopgen
+  tidy_hwe <- families %>% select_loci(c(1:10)) %>% loci_hwe(mid_p = TRUE)
+
+  #Compare
+  result <- all.equal(plink_hwe$P, tidy_hwe, tolerance = 0.0001)
+
+  #Check results are the same to 4 decimals
+  expect_true(result)
+
+})
+
+
+test_that("loci_hwe mid_p = FALSE produces the same output as plink --hardy ",{
+
+  #Create gentibble for our data
+  bed_path <- system.file("extdata/related/families.bed", package = "tidypopgen")
+  families_bigsnp_path <- bigsnpr::snp_readBed(bed_path, backingfile = tempfile()) #bigsnpr::sub_bed(bed_path)
+  #families_bigsnp_path <- system.file("extdata/related/families.rds", package = "tidypopgen")
+  families <- gen_tibble(families_bigsnp_path, quiet = TRUE, valid_alleles = c("1","2"))
+
+  #Read in plink results (first 10 snps)
+  plink_hwe <- read.table(system.file("extdata/related/families_hwe.hwe", package = "tidypopgen"),header = TRUE)
+
+  #Calculate hwe in tidypopgen
+  tidy_hwe <- families %>% select_loci(c(1:10)) %>% loci_hwe(mid_p = FALSE)
+
+  #Compare
+  result <- all.equal(plink_hwe$P, tidy_hwe, tolerance = 0.0001)
+
+  #Check results are the same to 4 decimals
+  expect_true(result)
+
+})
+
diff --git a/tests/testthat/test_loci_ld_clump.R b/tests/testthat/test_loci_ld_clump.R
index 4d5e83f8..7fd27932 100644
--- a/tests/testthat/test_loci_ld_clump.R
+++ b/tests/testthat/test_loci_ld_clump.R
@@ -10,8 +10,9 @@ test_loci <- data.frame(name=paste0("rs",1:6),
                         allele_ref = c("A","T","C","G","C","T"),
                         allele_alt = c("T","C", NA,"C","G","A"))
 
+
 test_gt <- gen_tibble(x = test_genotypes, indiv_meta = test_indiv_meta, loci = test_loci,
-                      backingfile = tempfile())
+                      backingfile = tempfile(), quiet = TRUE)
 
 test_that("ld clumping runs",{
 
@@ -22,3 +23,71 @@ test_that("ld clumping runs",{
 
 
 
+test_that("loci_ld_clump returns the same as bigsnpr",{
+
+  bedfile <- system.file("extdata/related/families.bed", package="tidypopgen")
+  rds <- bigsnpr::snp_readBed(bedfile, backingfile = tempfile())
+
+  bigsnp <- bigsnpr::snp_attach(rds)
+
+  gen_tbl <- gen_tibble(bedfile, quiet = TRUE,
+                        backingfile = tempfile(), valid_alleles = c("1","2"))
+
+  #also tests imputation
+  bigsnp_imputed <- bigsnpr::snp_fastImputeSimple(bigsnp$genotypes,method = "mode")
+  bigsnp$genotypes <- bigsnp_imputed
+  bigsnpr::snp_save(bigsnp)
+  bigsnp_imputed <- bigsnpr::snp_attach(rds)
+
+
+  gen_tbl_imputed <- gt_impute_simple(gen_tbl, method = "mode")
+  gt_set_imputed(gen_tbl_imputed, TRUE)
+
+  #set up bigsnpr
+  G <- bigsnp_imputed$genotypes
+  POS <- bigsnp_imputed$map$physical.pos
+  CHR <- bigsnp_imputed$map$chromosome
+
+
+  ind.keep <- bigsnpr::snp_clumping(G, infos.chr = CHR, infos.pos = POS,thr.r2 = 0.2)
+  to_keep <- loci_ld_clump(gen_tbl_imputed, thr_r2 = 0.2)
+
+  #convert our output to indices
+  ind <- which(to_keep == TRUE)
+
+  #check they match
+  expect_true(all(ind.keep == ind))
+
+
+})
+
+
+test_that("loci_ld_clump error unsorted loci",{
+
+  pop_b <- gen_tibble(system.file("extdata/pop_b.bed", package="tidypopgen"),backingfile = tempfile(), quiet = TRUE)
+
+  #now scramble the loci
+  set.seed(123)
+  random_order <- sample(1:17)
+  show_loci(pop_b) <- pop_b %>% select_loci(all_of(random_order)) %>% show_loci()
+
+  #impute
+  pop_b_imputed <- gt_impute_simple(pop_b, method = "mode")
+
+  #ld
+  expect_error(loci_ld_clump(pop_b_imputed, thr_r2 = 0.2), "Your loci are not sorted, try using:")
+  expect_false(identical(show_loci(pop_b_imputed), pop_b_imputed %>% show_loci() %>% arrange(chromosome,position)))
+
+  #reorder the loci
+  show_loci(pop_b_imputed) <- pop_b_imputed %>% show_loci() %>% arrange(chromosome,position)
+
+  #try again
+  expect_equal(loci_ld_clump(pop_b_imputed, thr_r2 = 0.2),
+               c(FALSE,TRUE,TRUE,FALSE,TRUE,TRUE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE))
+  expect_true(identical(show_loci(pop_b_imputed), pop_b_imputed %>% show_loci() %>% arrange(chromosome,position)))
+
+})
+
+
+
+
diff --git a/tests/testthat/test_loci_missingness.R b/tests/testthat/test_loci_missingness.R
index f17f07ea..c7dd631f 100644
--- a/tests/testthat/test_loci_missingness.R
+++ b/tests/testthat/test_loci_missingness.R
@@ -20,4 +20,17 @@ test_that("snpbin_list_means computes correctly",{
   expect_true(all(loci_missingness(test_gt$genotypes, as_counts = TRUE)==n_na))
   # convert to frequencies
   expect_true(all(loci_missingness(test_gt$genotypes)==n_na/nrow(test_genotypes)))
+
+  #create a subset gt
+  test_gt_subset1 <- test_gt %>% filter(id!="a")
+  #subset genotypes
+  test_genotypes_subset1 <- test_genotypes[-1,]
+
+  n_na <- apply(test_genotypes_subset1, 2, count_na)
+  expect_true(all(loci_missingness(test_gt_subset1$genotypes, as_counts = TRUE)==n_na))
+  # convert to frequencies
+  expect_true(all(loci_missingness(test_gt_subset1$genotypes)==n_na/nrow(test_genotypes_subset1)))
+
+
+
 })
diff --git a/tests/testthat/test_loci_transversions_transitions.R b/tests/testthat/test_loci_transversions_transitions.R
index 5eedd551..f091f7f5 100644
--- a/tests/testthat/test_loci_transversions_transitions.R
+++ b/tests/testthat/test_loci_transversions_transitions.R
@@ -16,3 +16,27 @@ test_that("find transitions and transversions",{
   expect_true(all.equal(loci_transversions(test_gt), transv_bool))
   expect_true(all.equal(loci_transitions(test_gt), !transv_bool))
 })
+
+test_that("check warning message for different alleles",{
+  test_indiv_meta <- data.frame (id=c("a","b","c"),
+                                 population = c("pop1","pop1","pop2"))
+  test_genotypes <- rbind(c(1,1,0,1,1,2),
+                          c(2,1,0,NA,0,NA),
+                          c(2,2,0,0,1,NA))
+  test_loci <- data.frame(name=paste0("rs",1:6),
+                          chromosome=c(1,1,1,1,2,2),
+                          position=c(3,5,65,343,23,456),
+                          genetic_dist = as.integer(rep(0,6)),
+                          allele_ref = c("a","t","c","g","c","t"),
+                          allele_alt = c("t","c", NA,"c","g","a"))
+
+  #Create a gen_tibble with alleles that are not "A" "T" "C" "G" using valid_alleles
+  test_gt <- gen_tibble(x = test_genotypes, loci = test_loci, indiv_meta = test_indiv_meta, quiet = TRUE, valid_alleles = c("a", "t", "c", "g"))
+
+  testthat::expect_error(loci_transversions(test_gt), "valid alleles are A T C G 0 . but ")
+  testthat::expect_error(loci_transitions(test_gt), "valid alleles are A T C G 0 . but ")
+
+})
+
+
+
diff --git a/tests/testthat/test_select_loci_if.R b/tests/testthat/test_select_loci_if.R
index 503472cb..078a77a1 100644
--- a/tests/testthat/test_select_loci_if.R
+++ b/tests/testthat/test_select_loci_if.R
@@ -31,6 +31,14 @@ test_that("select_loci_if subsets correctly",{
   criterion_na <- c(TRUE, TRUE, NA, TRUE, TRUE, FALSE)
   expect_identical(test_gen_sub,
                    test_gt %>% select_loci_if(criterion_na))
-  #TODO test errors
+
+  #Test error warnings: boolean criteria too short
+  criterion_na <- c(TRUE, TRUE, NA, TRUE, TRUE)
+  expect_error(test_gt %>% select_loci_if(criterion_na), ".sel_logical should be the same length ")
+
+  #Test error warnings: selection criteria not boolean
+  rsID <- c("rs1", "rs2", "rs3", "rs4", "x1", "x2")
+  expect_error(test_gt %>% select_loci_if(rsID), ".sel_logical should be a logical boolean vector")
+
 
 })
diff --git a/vignettes/plink_cheatsheet.Rmd b/vignettes/plink_cheatsheet.Rmd
index e58ae56f..0e328a3c 100644
--- a/vignettes/plink_cheatsheet.Rmd
+++ b/vignettes/plink_cheatsheet.Rmd
@@ -25,10 +25,10 @@ data <- gen_tibble(bigsnp_path, quiet = TRUE)
 
 ## File management and reading data:
 
-| PLINK                         | `tidypopgen`                                   |
+| PLINK                         | `tidypopgen`                                 |
 |-------------------------------|----------------------------------------------|
-| --make-bed --out              | `gt_as_plink(data, bedfile = my_file)`    |
-| --recode                      | (TODO? generate .ped and .raw again)         |
+| --make-bed --out              | `gt_as_plink(data, bedfile = my_file)`       |
+| --recode                      | `gt_as_plink(data, bedfile = my_file         |
 | --allele1234 and --alleleACGT | See `gen_tibble()` parameter 'valid_alleles' |
 
 PLINK flags --update-alleles, --allele1234, and --alleleACGT, all alter the coding of alleles. In `tidypopgen`, valid alleles are supplied when reading in a `gen_tibble`.