From f6b96e5b89c1aa0c5ebf67acc7a28ca58a2fa1c0 Mon Sep 17 00:00:00 2001
From: Matthew Wells <mattwells9@shaw.ca>
Date: Thu, 30 May 2024 14:11:29 -0500
Subject: [PATCH 01/21] updated report table_values function

Previously missing values would cause a shift in the index of the map due to using the tokenize function instead of split. This as been reolved and tests have been added
---
 modules/local/report.nf                    | 41 +++++++++++++++++++---
 tests/data/tables/header_missing_val.csv   |  2 ++
 tests/data/tables/mock_missing_value.csv   |  2 ++
 tests/data/tables/mock_missing_value.tab   |  2 ++
 tests/data/tables/mock_missing_value_2.tab |  2 ++
 tests/data/tables/no_header.csv            |  1 +
 tests/data/tables/no_missing.csv           |  2 ++
 tests/data/tables/no_missing.tab           |  2 ++
 tests/data/tables/two_missing_headers.csv  |  2 ++
 tests/data/tables/vector.csv               |  4 +++
 10 files changed, 56 insertions(+), 4 deletions(-)
 create mode 100644 tests/data/tables/header_missing_val.csv
 create mode 100644 tests/data/tables/mock_missing_value.csv
 create mode 100644 tests/data/tables/mock_missing_value.tab
 create mode 100644 tests/data/tables/mock_missing_value_2.tab
 create mode 100644 tests/data/tables/no_header.csv
 create mode 100644 tests/data/tables/no_missing.csv
 create mode 100644 tests/data/tables/no_missing.tab
 create mode 100644 tests/data/tables/two_missing_headers.csv
 create mode 100644 tests/data/tables/vector.csv

diff --git a/modules/local/report.nf b/modules/local/report.nf
index d1f39a69..7dc35f62 100644
--- a/modules/local/report.nf
+++ b/modules/local/report.nf
@@ -804,24 +804,57 @@ def table_values(file_path, header_p, seperator, headers=null){
     def converted_data = [:]
     def idx = 0
     def lines_read = false
+    def missing_value = "NoData"
+    def default_index_col = "__default_index__"
     file_path.withReader{
         String line
         if(header_p){
             header = it.readLine()
-            split_header = header.tokenize(seperator)
+            split_header = header.split(seperator)
+
+            def missing_headers = 0
+            if(split_header.size() > 1){
+                for(col_header in split_header[1..-1]){ // skip first column as it is allowed to be empty
+                    if(!col_header){
+                        missing_headers++;
+                    }
+                }
+            }
+
+            if(missing_headers){
+                error("Missing multiple column headers in ${file_path}. You may need to pass in column headers in the nextflow.config file.")
+            }
+
+            if(!split_header[0] && ( split_header.size() == 1 || split_header[1] != default_index_col)){
+                // Missing column headers could arise from the first column serving as and index, if this is the case
+                // verify that the split_split header size is greater == 1 (e.g is it a vector) or that the next column
+                // value is not equal to the value of "default_index_col"
+                split_header[0] = default_index_col
+            }
+
         }
         if(headers){
             split_header = headers
         }
         while(line = it.readLine()){
-            split_line = line.tokenize(seperator)
+            split_line = line.split(seperator) // split will allow for missing values
             // Transpose, and collect converts the data to a map
-            converted_data[idx] = [split_header, split_line].transpose().collectEntries()
+            if(split_line.size() != split_header.size()){
+                error("The number of values in ${file_path} differs from number of columns headers ${split_header}")
+            }
+
+            def new_row = [split_header, split_line].transpose().collectEntries()
+            new_row.each{
+                if(!it.value){
+                    it.value = missing_value;
+                }
+            }
+            converted_data[idx] = new_row
             idx++
             lines_read = true
         }
         if(!lines_read){
-            converted_data[idx] = [split_header, Collections.nCopies(split_header.size, "NoData")].transpose().collectEntries()
+            converted_data[idx] = [split_header, Collections.nCopies(split_header.size, missing_value)].transpose().collectEntries()
         }
 
     }
diff --git a/tests/data/tables/header_missing_val.csv b/tests/data/tables/header_missing_val.csv
new file mode 100644
index 00000000..f1e36c71
--- /dev/null
+++ b/tests/data/tables/header_missing_val.csv
@@ -0,0 +1,2 @@
+,header2,header3
+stuff1,stuff2,stuff3
diff --git a/tests/data/tables/mock_missing_value.csv b/tests/data/tables/mock_missing_value.csv
new file mode 100644
index 00000000..0afedaa4
--- /dev/null
+++ b/tests/data/tables/mock_missing_value.csv
@@ -0,0 +1,2 @@
+header1,header2,header3
+,stuff2,stuff3
diff --git a/tests/data/tables/mock_missing_value.tab b/tests/data/tables/mock_missing_value.tab
new file mode 100644
index 00000000..4d0244c8
--- /dev/null
+++ b/tests/data/tables/mock_missing_value.tab
@@ -0,0 +1,2 @@
+header1	header2	header3
+	stuff2	stuff3
diff --git a/tests/data/tables/mock_missing_value_2.tab b/tests/data/tables/mock_missing_value_2.tab
new file mode 100644
index 00000000..e505b6fc
--- /dev/null
+++ b/tests/data/tables/mock_missing_value_2.tab
@@ -0,0 +1,2 @@
+header1	header2	header3
+		stuff3
diff --git a/tests/data/tables/no_header.csv b/tests/data/tables/no_header.csv
new file mode 100644
index 00000000..7b4a1978
--- /dev/null
+++ b/tests/data/tables/no_header.csv
@@ -0,0 +1 @@
+stuff1,stuff2,stuff3
diff --git a/tests/data/tables/no_missing.csv b/tests/data/tables/no_missing.csv
new file mode 100644
index 00000000..1e99cd35
--- /dev/null
+++ b/tests/data/tables/no_missing.csv
@@ -0,0 +1,2 @@
+header1,header2,header3
+stuff1,stuff2,stuff3
diff --git a/tests/data/tables/no_missing.tab b/tests/data/tables/no_missing.tab
new file mode 100644
index 00000000..5116b305
--- /dev/null
+++ b/tests/data/tables/no_missing.tab
@@ -0,0 +1,2 @@
+header1	header2	header3
+stuff1	stuff2	stuff3
diff --git a/tests/data/tables/two_missing_headers.csv b/tests/data/tables/two_missing_headers.csv
new file mode 100644
index 00000000..a3b2e951
--- /dev/null
+++ b/tests/data/tables/two_missing_headers.csv
@@ -0,0 +1,2 @@
+,,header3
+stuff1,stuff2,stuff3
diff --git a/tests/data/tables/vector.csv b/tests/data/tables/vector.csv
new file mode 100644
index 00000000..692f198b
--- /dev/null
+++ b/tests/data/tables/vector.csv
@@ -0,0 +1,4 @@
+header1
+stuff1
+stuff2
+stuff3

From 837f27072905b92d469caeb14014377c464786c8 Mon Sep 17 00:00:00 2001
From: Matthew Wells <mattwells9@shaw.ca>
Date: Thu, 30 May 2024 14:14:51 -0500
Subject: [PATCH 02/21] updated changelog

---
 CHANGELOG.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 644393ac..31fa6c42 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,12 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [Unreleased]
+
+### `Fixed`
+
+- Parsed table values would not show up properly if values were missing resolving issue [Issue 82](https://github.com/phac-nml/mikrokondo/issues/82)
+
 ## v0.2.0 - [2024-05-14]
 
 ### `Added`

From 3eb2c81eaa873a55b9ea7e13c487d3a61ad4a91c Mon Sep 17 00:00:00 2001
From: Matthew Wells <mattwells9@shaw.ca>
Date: Thu, 30 May 2024 14:22:34 -0500
Subject: [PATCH 03/21] added test modules...

---
 tests/functions/report.nf.test | 202 +++++++++++++++++++++++++++++++++
 1 file changed, 202 insertions(+)
 create mode 100644 tests/functions/report.nf.test

diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test
new file mode 100644
index 00000000..ca92f560
--- /dev/null
+++ b/tests/functions/report.nf.test
@@ -0,0 +1,202 @@
+/*
+Tests for functions in the report module.
+*/
+
+
+nextflow_function {
+    name "Test report.nf functions"
+    script "modules/local/report.nf"
+    function "table_values"
+
+    test("Test tab missing column value, header supplied"){
+
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/mock_missing_value.tab")
+                input[1] = true
+                input[2] = '\t'
+                input[3] = null
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.result == ['0':['header1':'NoData', 'header2':'stuff2', 'header3':'stuff3']]
+            assert function.success
+        }
+    }
+
+    test("Test tab missing 2 column values, header supplied"){
+
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/mock_missing_value_2.tab")
+                input[1] = true
+                input[2] = '\t'
+                input[3] = null
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.result == ['0':['header1':'NoData', 'header2':'NoData', 'header3':'stuff3']]
+            assert function.success
+        }
+    }
+
+
+    test("Test tab, header supplied"){
+
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/no_missing.tab")
+                input[1] = true
+                input[2] = '\t'
+                input[3] = null
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'stuff3']]
+            assert function.success
+        }
+    }
+
+    test("Test csv missing column value, header supplied"){
+
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/mock_missing_value.csv")
+                input[1] = true
+                input[2] = ','
+                input[3] = null
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.result == ['0':['header1':'NoData', 'header2':'stuff2', 'header3':'stuff3']]
+            assert function.success
+        }
+    }
+
+
+    test("Test csv, header supplied"){
+
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/no_missing.csv")
+                input[1] = true
+                input[2] = ','
+                input[3] = null
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'stuff3']]
+            assert function.success
+        }
+    }
+
+    test("Test csv, header missing one value"){
+
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/header_missing_val.csv")
+                input[1] = true
+                input[2] = ','
+                input[3] = null
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.success
+            assert function.result == ['0':['__default_index':'stuff1', 'header2':'stuff2', 'header3':'stuff3']]
+        }
+    }
+
+
+    test("Test csv, no header"){
+
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/no_header.csv")
+                input[1] = false
+                input[2] = ','
+                input[3] = ['header1', 'header2', 'header3']
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.success
+            assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'stuff3']]
+        }
+    }
+
+
+    test("Test csv, two headers missing values"){
+
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/two_missing_headers.csv")
+                input[1] = true
+                input[2] = ','
+                input[3] = null
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.failed
+        }
+    }
+
+        test("Test csv, vector with header"){
+
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/vector.csv")
+                input[1] = true
+                input[2] = ','
+                input[3] = null
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.result == ['0':['header1':'stuff1'], '1': ['header1':'stuff2'], '2':['header1':'stuff3']]
+            assert function.success
+        }
+    }
+}

From 049c43c917af70478960c9ec3f1a83b49b931cf9 Mon Sep 17 00:00:00 2001
From: Matthew Wells <mattwells9@shaw.ca>
Date: Thu, 30 May 2024 15:52:09 -0500
Subject: [PATCH 04/21] added additional test cases

---
 modules/local/report.nf                       |   8 +-
 nextflow.config                               |   6 +-
 tests/data/tables/all_values_missing.csv      |   1 +
 tests/data/tables/empty.csv                   |   0
 tests/data/tables/missing_all_headers.csv     |   2 +
 .../data/tables/mistmatch_headers_values.csv  |   2 +
 tests/data/tables/vector_no_hdr.csv           |   4 +
 tests/functions/report.nf.test                | 112 +++++++++++++++++-
 8 files changed, 123 insertions(+), 12 deletions(-)
 create mode 100644 tests/data/tables/all_values_missing.csv
 create mode 100644 tests/data/tables/empty.csv
 create mode 100644 tests/data/tables/missing_all_headers.csv
 create mode 100644 tests/data/tables/mistmatch_headers_values.csv
 create mode 100644 tests/data/tables/vector_no_hdr.csv

diff --git a/modules/local/report.nf b/modules/local/report.nf
index 7dc35f62..e5299a28 100644
--- a/modules/local/report.nf
+++ b/modules/local/report.nf
@@ -804,7 +804,7 @@ def table_values(file_path, header_p, seperator, headers=null){
     def converted_data = [:]
     def idx = 0
     def lines_read = false
-    def missing_value = "NoData"
+    def missing_value = ''
     def default_index_col = "__default_index__"
     file_path.withReader{
         String line
@@ -815,13 +815,13 @@ def table_values(file_path, header_p, seperator, headers=null){
             def missing_headers = 0
             if(split_header.size() > 1){
                 for(col_header in split_header[1..-1]){ // skip first column as it is allowed to be empty
-                    if(!col_header){
+                    if(col_header == null || col_header == ''){
                         missing_headers++;
                     }
                 }
             }
 
-            if(missing_headers){
+            if(missing_headers != 0){
                 error("Missing multiple column headers in ${file_path}. You may need to pass in column headers in the nextflow.config file.")
             }
 
@@ -854,7 +854,7 @@ def table_values(file_path, header_p, seperator, headers=null){
             lines_read = true
         }
         if(!lines_read){
-            converted_data[idx] = [split_header, Collections.nCopies(split_header.size, missing_value)].transpose().collectEntries()
+            converted_data[idx] = [split_header, Collections.nCopies(split_header.size(), missing_value)].transpose().collectEntries()
         }
 
     }
diff --git a/nextflow.config b/nextflow.config
index 529ac098..2dd1498c 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1078,12 +1078,12 @@ dag {
 
 manifest {
     name            = 'phac-nml/mikrokondo'
-    author          = """matthew wells"""
+    author          = """Matthew Wells, James Robertson, Aaron Petkau, Christy-Lynn Peterson, Eric Marinier"""
     homePage        = 'https://github.com/phac-nml/mikrokondo'
-    description     = """Mikrokondo beta"""
+    description     = """Mikrokondo"""
     mainScript      = 'main.nf'
     nextflowVersion = '!>=23.04.0'
-    version         = '0.2.0'
+    version         = '0.2.1'
     defaultBranch   = 'main'
     doi             = ''
 }
diff --git a/tests/data/tables/all_values_missing.csv b/tests/data/tables/all_values_missing.csv
new file mode 100644
index 00000000..e099d617
--- /dev/null
+++ b/tests/data/tables/all_values_missing.csv
@@ -0,0 +1 @@
+header1,header2,header3
diff --git a/tests/data/tables/empty.csv b/tests/data/tables/empty.csv
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/data/tables/missing_all_headers.csv b/tests/data/tables/missing_all_headers.csv
new file mode 100644
index 00000000..bb3ccac4
--- /dev/null
+++ b/tests/data/tables/missing_all_headers.csv
@@ -0,0 +1,2 @@
+
+stuff1,stuff2,stuff3
diff --git a/tests/data/tables/mistmatch_headers_values.csv b/tests/data/tables/mistmatch_headers_values.csv
new file mode 100644
index 00000000..37ff0287
--- /dev/null
+++ b/tests/data/tables/mistmatch_headers_values.csv
@@ -0,0 +1,2 @@
+header1,header2,header3
+stuff1,stuff2,stuff3,stuff4
diff --git a/tests/data/tables/vector_no_hdr.csv b/tests/data/tables/vector_no_hdr.csv
new file mode 100644
index 00000000..d861c49f
--- /dev/null
+++ b/tests/data/tables/vector_no_hdr.csv
@@ -0,0 +1,4 @@
+
+stuff1
+stuff2
+stuff3
diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test
index ca92f560..888f28ce 100644
--- a/tests/functions/report.nf.test
+++ b/tests/functions/report.nf.test
@@ -24,7 +24,7 @@ nextflow_function {
             }
         }
         then{
-            assert function.result == ['0':['header1':'NoData', 'header2':'stuff2', 'header3':'stuff3']]
+            assert function.result == ['0':['header1':'', 'header2':'stuff2', 'header3':'stuff3']]
             assert function.success
         }
     }
@@ -45,7 +45,7 @@ nextflow_function {
             }
         }
         then{
-            assert function.result == ['0':['header1':'NoData', 'header2':'NoData', 'header3':'stuff3']]
+            assert function.result == ['0':['header1':'', 'header2':'', 'header3':'stuff3']]
             assert function.success
         }
     }
@@ -88,7 +88,7 @@ nextflow_function {
             }
         }
         then{
-            assert function.result == ['0':['header1':'NoData', 'header2':'stuff2', 'header3':'stuff3']]
+            assert function.result == ['0':['header1':'', 'header2':'stuff2', 'header3':'stuff3']]
             assert function.success
         }
     }
@@ -132,7 +132,7 @@ nextflow_function {
         }
         then{
             assert function.success
-            assert function.result == ['0':['__default_index':'stuff1', 'header2':'stuff2', 'header3':'stuff3']]
+            assert function.result == ['0':['__default_index__':'stuff1', 'header2':'stuff2', 'header3':'stuff3']]
         }
     }
 
@@ -179,7 +179,7 @@ nextflow_function {
         }
     }
 
-        test("Test csv, vector with header"){
+    test("Test csv, vector with header"){
 
         when {
             function {
@@ -199,4 +199,106 @@ nextflow_function {
             assert function.success
         }
     }
+
+    test("Test empty file"){
+
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/empty.csv")
+                input[1] = true
+                input[2] = ','
+                input[3] = null
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.failed
+        }
+    }
+
+
+    test("Test more values than columns"){
+
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/mismatch_headers_values.csv")
+                input[1] = true
+                input[2] = ','
+                input[3] = null
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.failed
+        }
+    }
+
+    test("Vector no column header"){
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/vector_no_hdr.csv")
+                input[1] = true
+                input[2] = ','
+                input[3] = null
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.result == ['0':['__default_index__':'stuff1'], '1': ['__default_index__':'stuff2'], '2':['__default_index__':'stuff3']]
+            assert function.success
+        }
+    }
+
+
+    test("Missing all column headers"){
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/missing_all_headers.csv")
+                input[1] = true
+                input[2] = ','
+                input[3] = null
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.failed
+        }
+    }
+
+    test("Missing all values"){
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/all_values_missing.csv")
+                input[1] = true
+                input[2] = ','
+                input[3] = null
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.success
+            assert function.result == ['0':['header1':'', 'header2':'', 'header3':'']]
+
+        }
+    }
 }

From e35451093675decefda2e34c1de5b0952812cf35 Mon Sep 17 00:00:00 2001
From: Matthew Wells <mattwells9@shaw.ca>
Date: Thu, 30 May 2024 15:55:00 -0500
Subject: [PATCH 05/21] updated changelog

---
 CHANGELOG.md                   | 2 +-
 tests/functions/report.nf.test | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 31fa6c42..94c49409 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [Unreleased]
+## v0.2.1 - [Unreleased]
 
 ### `Fixed`
 
diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test
index 888f28ce..9029c04a 100644
--- a/tests/functions/report.nf.test
+++ b/tests/functions/report.nf.test
@@ -298,7 +298,6 @@ nextflow_function {
         then{
             assert function.success
             assert function.result == ['0':['header1':'', 'header2':'', 'header3':'']]
-
         }
     }
 }

From 14558a9190bae33dc30cf35d227dd33494960a25 Mon Sep 17 00:00:00 2001
From: Matthew Wells <mattwells9@shaw.ca>
Date: Fri, 31 May 2024 13:22:42 -0500
Subject: [PATCH 06/21] reverted to old commit

---
 modules/local/report.nf        | 2 +-
 tests/functions/report.nf.test | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/modules/local/report.nf b/modules/local/report.nf
index e5299a28..d36fbd30 100644
--- a/modules/local/report.nf
+++ b/modules/local/report.nf
@@ -804,7 +804,7 @@ def table_values(file_path, header_p, seperator, headers=null){
     def converted_data = [:]
     def idx = 0
     def lines_read = false
-    def missing_value = ''
+    def missing_value = 'NoData'
     def default_index_col = "__default_index__"
     file_path.withReader{
         String line
diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test
index 9029c04a..b6fd5c9a 100644
--- a/tests/functions/report.nf.test
+++ b/tests/functions/report.nf.test
@@ -24,7 +24,7 @@ nextflow_function {
             }
         }
         then{
-            assert function.result == ['0':['header1':'', 'header2':'stuff2', 'header3':'stuff3']]
+            assert function.result == ['0':['header1':'NoData', 'header2':'stuff2', 'header3':'stuff3']]
             assert function.success
         }
     }
@@ -45,7 +45,7 @@ nextflow_function {
             }
         }
         then{
-            assert function.result == ['0':['header1':'', 'header2':'', 'header3':'stuff3']]
+            assert function.result == ['0':['header1':'NoData', 'header2':'NoData', 'header3':'stuff3']]
             assert function.success
         }
     }
@@ -88,7 +88,7 @@ nextflow_function {
             }
         }
         then{
-            assert function.result == ['0':['header1':'', 'header2':'stuff2', 'header3':'stuff3']]
+            assert function.result == ['0':['header1':'NoData', 'header2':'stuff2', 'header3':'stuff3']]
             assert function.success
         }
     }
@@ -297,7 +297,7 @@ nextflow_function {
         }
         then{
             assert function.success
-            assert function.result == ['0':['header1':'', 'header2':'', 'header3':'']]
+            assert function.result == ['0':['header1':'NoData', 'header2':'NoData', 'header3':'NoData']]
         }
     }
 }

From 201a4d4b4406384769de47508ca71d8fca0e8df1 Mon Sep 17 00:00:00 2001
From: Matthew Wells <mattwells9@shaw.ca>
Date: Fri, 31 May 2024 14:10:03 -0500
Subject: [PATCH 07/21] flipped inequality allowing more headers than values

---
 modules/local/report.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/report.nf b/modules/local/report.nf
index d36fbd30..15a52e3a 100644
--- a/modules/local/report.nf
+++ b/modules/local/report.nf
@@ -839,7 +839,7 @@ def table_values(file_path, header_p, seperator, headers=null){
         while(line = it.readLine()){
             split_line = line.split(seperator) // split will allow for missing values
             // Transpose, and collect converts the data to a map
-            if(split_line.size() != split_header.size()){
+            if(split_line.size() > split_header.size()){
                 error("The number of values in ${file_path} differs from number of columns headers ${split_header}")
             }
 

From 34269842f7147c0d23106cb0e481c25374ae5729 Mon Sep 17 00:00:00 2001
From: Matthew Wells <mattwells9@shaw.ca>
Date: Fri, 31 May 2024 15:27:19 -0500
Subject: [PATCH 08/21] added additional tests to table parser

---
 tests/data/tables/missing_last_value.tab      |  2 +
 .../missing_multiple_value_separators.tab     |  2 +
 ..._multiple_value_separators_extra_field.tab |  2 +
 tests/functions/report.nf.test                | 60 +++++++++++++++++++
 4 files changed, 66 insertions(+)
 create mode 100644 tests/data/tables/missing_last_value.tab
 create mode 100644 tests/data/tables/missing_multiple_value_separators.tab
 create mode 100644 tests/data/tables/missing_multiple_value_separators_extra_field.tab

diff --git a/tests/data/tables/missing_last_value.tab b/tests/data/tables/missing_last_value.tab
new file mode 100644
index 00000000..e2f48fcb
--- /dev/null
+++ b/tests/data/tables/missing_last_value.tab
@@ -0,0 +1,2 @@
+header1	header2	header3
+stuff1	stuff2
diff --git a/tests/data/tables/missing_multiple_value_separators.tab b/tests/data/tables/missing_multiple_value_separators.tab
new file mode 100644
index 00000000..71217d05
--- /dev/null
+++ b/tests/data/tables/missing_multiple_value_separators.tab
@@ -0,0 +1,2 @@
+header1	header2	header3	header4
+stuff1	stuff2
diff --git a/tests/data/tables/missing_multiple_value_separators_extra_field.tab b/tests/data/tables/missing_multiple_value_separators_extra_field.tab
new file mode 100644
index 00000000..74402fd7
--- /dev/null
+++ b/tests/data/tables/missing_multiple_value_separators_extra_field.tab
@@ -0,0 +1,2 @@
+header1	header2	header3	header4
+stuff1	stuff2		stuff4
diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test
index b6fd5c9a..d3421806 100644
--- a/tests/functions/report.nf.test
+++ b/tests/functions/report.nf.test
@@ -300,4 +300,64 @@ nextflow_function {
             assert function.result == ['0':['header1':'NoData', 'header2':'NoData', 'header3':'NoData']]
         }
     }
+
+    test("Missing last two values"){
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/missing_last_value.tab")
+                input[1] = true
+                input[2] = '\t'
+                input[3] = null
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.success
+            assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'NoData']]
+        }
+    }
+
+    test("Missing multiple terminal value separators"){
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/missing_multiple_value_separators.tab")
+                input[1] = true
+                input[2] = '\t'
+                input[3] = null
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.success
+            assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'NoData', 'header4': 'NoData']]
+        }
+    }
+
+    test("Missing internal value separator"){
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/missing_multiple_value_separators.tab")
+                input[1] = true
+                input[2] = '\t'
+                input[3] = null
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.success
+            assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'NoData', 'header4': 'stuff4']]
+        }
+    }
 }

From 7df332fa40f8a9eb8d036986f56a512b6664f0c9 Mon Sep 17 00:00:00 2001
From: Aaron Petkau <aaron.petkau@gmail.com>
Date: Fri, 31 May 2024 16:19:05 -0500
Subject: [PATCH 09/21] Switched to splitCsv

---
 modules/local/report.nf        | 59 +++-------------------------------
 tests/functions/report.nf.test |  2 ++
 2 files changed, 6 insertions(+), 55 deletions(-)

diff --git a/modules/local/report.nf b/modules/local/report.nf
index 15a52e3a..32bdcf5f 100644
--- a/modules/local/report.nf
+++ b/modules/local/report.nf
@@ -799,64 +799,13 @@ def table_values(file_path, header_p, seperator, headers=null){
 
         returns a map
     */
-    def split_header = null
-    def split_line = null
-    def converted_data = [:]
-    def idx = 0
-    def lines_read = false
     def missing_value = 'NoData'
     def default_index_col = "__default_index__"
-    file_path.withReader{
-        String line
-        if(header_p){
-            header = it.readLine()
-            split_header = header.split(seperator)
-
-            def missing_headers = 0
-            if(split_header.size() > 1){
-                for(col_header in split_header[1..-1]){ // skip first column as it is allowed to be empty
-                    if(col_header == null || col_header == ''){
-                        missing_headers++;
-                    }
-                }
-            }
-
-            if(missing_headers != 0){
-                error("Missing multiple column headers in ${file_path}. You may need to pass in column headers in the nextflow.config file.")
-            }
-
-            if(!split_header[0] && ( split_header.size() == 1 || split_header[1] != default_index_col)){
-                // Missing column headers could arise from the first column serving as and index, if this is the case
-                // verify that the split_split header size is greater == 1 (e.g is it a vector) or that the next column
-                // value is not equal to the value of "default_index_col"
-                split_header[0] = default_index_col
-            }
-
-        }
-        if(headers){
-            split_header = headers
-        }
-        while(line = it.readLine()){
-            split_line = line.split(seperator) // split will allow for missing values
-            // Transpose, and collect converts the data to a map
-            if(split_line.size() > split_header.size()){
-                error("The number of values in ${file_path} differs from number of columns headers ${split_header}")
-            }
-
-            def new_row = [split_header, split_line].transpose().collectEntries()
-            new_row.each{
-                if(!it.value){
-                    it.value = missing_value;
-                }
-            }
-            converted_data[idx] = new_row
-            idx++
-            lines_read = true
-        }
-        if(!lines_read){
-            converted_data[idx] = [split_header, Collections.nCopies(split_header.size(), missing_value)].transpose().collectEntries()
-        }
+    def replace_missing = { it == null || it == '' ? missing_value : it }
 
+    def rows_list = file_path.splitCsv(header: (header_p ? true : headers), sep:seperator)
+    def converted_data = rows_list.indexed().collectEntries { idx, row -> 
+        ["${idx}": row.collectEntries { k, v -> [(k): replace_missing(v)] }]
     }
     return converted_data
 }
diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test
index d3421806..5d7ea71c 100644
--- a/tests/functions/report.nf.test
+++ b/tests/functions/report.nf.test
@@ -95,6 +95,7 @@ nextflow_function {
 
 
     test("Test csv, header supplied"){
+        tag "no_missing"
 
         when {
             function {
@@ -138,6 +139,7 @@ nextflow_function {
 
 
     test("Test csv, no header"){
+        tag "csv_no_header"
 
         when {
             function {

From 54f23048d6ef9c4f63525d5d57fe788b613cb9f3 Mon Sep 17 00:00:00 2001
From: Aaron Petkau <aaron.petkau@gmail.com>
Date: Fri, 31 May 2024 16:59:39 -0500
Subject: [PATCH 10/21] Added ability to set default index

---
 modules/local/report.nf        | 24 ++++++++++++++++++++----
 tests/functions/report.nf.test |  2 ++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/modules/local/report.nf b/modules/local/report.nf
index 32bdcf5f..b3dd528a 100644
--- a/modules/local/report.nf
+++ b/modules/local/report.nf
@@ -801,14 +801,30 @@ def table_values(file_path, header_p, seperator, headers=null){
     */
     def missing_value = 'NoData'
     def default_index_col = "__default_index__"
+    def rows_list = null
     def replace_missing = { it == null || it == '' ? missing_value : it }
 
-    def rows_list = file_path.splitCsv(header: (header_p ? true : headers), sep:seperator)
+    try {
+        rows_list = file_path.splitCsv(header: (header_p ? true : headers), sep:seperator)
+    } catch (java.lang.IllegalStateException e) {
+        // Probably not the best solution since messages could change with different versions
+        // of Nextflow, but there isn't a way to get any more specific exception type
+        if (header_p && e.getMessage() == "Empty header columns are not allowed in CSV file") {
+            // Attempt to read file assuming first line is header line with missing value
+            def header_line = file_path.splitText()[0].trim()
+            def headers_from_file = header_line.split(seperator)
+            if (headers_from_file[0] == null || headers_from_file[0] == '') {
+                headers_from_file[0] = default_index_col
+                rows_list = file_path.splitCsv(header: headers_from_file as List, sep:seperator, skip: 1)
+            } else {
+                throw e
+            }
+        } else {
+            throw e
+        }
+    }
     def converted_data = rows_list.indexed().collectEntries { idx, row -> 
         ["${idx}": row.collectEntries { k, v -> [(k): replace_missing(v)] }]
     }
     return converted_data
 }
-
-
-
diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test
index 5d7ea71c..a72b14a4 100644
--- a/tests/functions/report.nf.test
+++ b/tests/functions/report.nf.test
@@ -9,6 +9,7 @@ nextflow_function {
     function "table_values"
 
     test("Test tab missing column value, header supplied"){
+        tag "tab_missing_value"
 
         when {
             function {
@@ -117,6 +118,7 @@ nextflow_function {
     }
 
     test("Test csv, header missing one value"){
+        tag "header_missing_one_value"
 
         when {
             function {

From 88a65c674d80b7f9a90722a61fc184930a4aa340 Mon Sep 17 00:00:00 2001
From: Aaron Petkau <aaron.petkau@gmail.com>
Date: Mon, 3 Jun 2024 08:01:24 -0500
Subject: [PATCH 11/21] Fixed all values missing test

---
 tests/data/tables/all_values_missing.csv | 1 +
 tests/functions/report.nf.test           | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/tests/data/tables/all_values_missing.csv b/tests/data/tables/all_values_missing.csv
index e099d617..519dd61e 100644
--- a/tests/data/tables/all_values_missing.csv
+++ b/tests/data/tables/all_values_missing.csv
@@ -1 +1,2 @@
 header1,header2,header3
+,,
diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test
index a72b14a4..2996ef64 100644
--- a/tests/functions/report.nf.test
+++ b/tests/functions/report.nf.test
@@ -184,6 +184,7 @@ nextflow_function {
     }
 
     test("Test csv, vector with header"){
+        tag "csv_vector_header"
 
         when {
             function {
@@ -286,6 +287,8 @@ nextflow_function {
     }
 
     test("Missing all values"){
+        tag "missing_all_values"
+
         when {
             function {
                 """

From c4a418ffc198f18af0f7e67e6f0fe90a59554b3b Mon Sep 17 00:00:00 2001
From: Aaron Petkau <aaron.petkau@gmail.com>
Date: Mon, 3 Jun 2024 08:06:46 -0500
Subject: [PATCH 12/21] Fixed up internal separator test

---
 tests/functions/report.nf.test | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test
index 2996ef64..480aff0e 100644
--- a/tests/functions/report.nf.test
+++ b/tests/functions/report.nf.test
@@ -349,6 +349,8 @@ nextflow_function {
     }
 
     test("Missing internal value separator"){
+        tag "missing_internal_separator"
+
         when {
             function {
                 """
@@ -364,7 +366,7 @@ nextflow_function {
         }
         then{
             assert function.success
-            assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'NoData', 'header4': 'stuff4']]
+            assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'NoData', 'header4': 'NoData']]
         }
     }
 }

From 4e7226a1a00a793d97c8833a3cfddc0dff6559e5 Mon Sep 17 00:00:00 2001
From: Aaron Petkau <aaron.petkau@gmail.com>
Date: Mon, 3 Jun 2024 08:14:56 -0500
Subject: [PATCH 13/21] Fixed default headers issue

---
 modules/local/report.nf        | 4 +---
 tests/functions/report.nf.test | 2 ++
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/local/report.nf b/modules/local/report.nf
index b3dd528a..60eda82c 100644
--- a/modules/local/report.nf
+++ b/modules/local/report.nf
@@ -807,9 +807,7 @@ def table_values(file_path, header_p, seperator, headers=null){
     try {
         rows_list = file_path.splitCsv(header: (header_p ? true : headers), sep:seperator)
     } catch (java.lang.IllegalStateException e) {
-        // Probably not the best solution since messages could change with different versions
-        // of Nextflow, but there isn't a way to get any more specific exception type
-        if (header_p && e.getMessage() == "Empty header columns are not allowed in CSV file") {
+        if (header_p) {
             // Attempt to read file assuming first line is header line with missing value
             def header_line = file_path.splitText()[0].trim()
             def headers_from_file = header_line.split(seperator)
diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test
index 480aff0e..c110aba5 100644
--- a/tests/functions/report.nf.test
+++ b/tests/functions/report.nf.test
@@ -247,6 +247,8 @@ nextflow_function {
     }
 
     test("Vector no column header"){
+        tag "vector_no_column_header"
+
         when {
             function {
                 """

From 7b4e1f9bef101768a1448ad6b46c757c6f51954a Mon Sep 17 00:00:00 2001
From: Aaron Petkau <aaron.petkau@gmail.com>
Date: Mon, 3 Jun 2024 08:26:21 -0500
Subject: [PATCH 14/21] Fixed up situation of more than one missing header

---
 modules/local/report.nf        | 9 +++++++--
 tests/functions/report.nf.test | 1 +
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/modules/local/report.nf b/modules/local/report.nf
index 60eda82c..097c3c9c 100644
--- a/modules/local/report.nf
+++ b/modules/local/report.nf
@@ -802,16 +802,21 @@ def table_values(file_path, header_p, seperator, headers=null){
     def missing_value = 'NoData'
     def default_index_col = "__default_index__"
     def rows_list = null
-    def replace_missing = { it == null || it == '' ? missing_value : it }
+    def is_missing = { it == null || it == '' }
+    def replace_missing = { is_missing(it) ? missing_value : it }
 
     try {
         rows_list = file_path.splitCsv(header: (header_p ? true : headers), sep:seperator)
     } catch (java.lang.IllegalStateException e) {
+        // Catch exception here to deal with situation where the very first header is missing
         if (header_p) {
             // Attempt to read file assuming first line is header line with missing value
             def header_line = file_path.splitText()[0].trim()
             def headers_from_file = header_line.split(seperator)
-            if (headers_from_file[0] == null || headers_from_file[0] == '') {
+            def count_missing_headers = headers_from_file.collect{ is_missing(it) ? 1 : 0 }.sum()
+            if (count_missing_headers > 1) {
+                throw e
+            } else if (is_missing(headers_from_file[0])) {
                 headers_from_file[0] = default_index_col
                 rows_list = file_path.splitCsv(header: headers_from_file as List, sep:seperator, skip: 1)
             } else {
diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test
index c110aba5..393d55ea 100644
--- a/tests/functions/report.nf.test
+++ b/tests/functions/report.nf.test
@@ -164,6 +164,7 @@ nextflow_function {
 
 
     test("Test csv, two headers missing values"){
+        tag "two_headers_missing_values"
 
         when {
             function {

From 2f840b5ce6b68a48a35374a62040c0038288e291 Mon Sep 17 00:00:00 2001
From: Aaron Petkau <aaron.petkau@gmail.com>
Date: Mon, 3 Jun 2024 08:55:40 -0500
Subject: [PATCH 15/21] Fixed up issue with mismatched number of headers and
 values

---
 modules/local/report.nf        | 12 +++++++++++-
 tests/functions/report.nf.test |  3 +++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/modules/local/report.nf b/modules/local/report.nf
index 097c3c9c..65f1b63f 100644
--- a/modules/local/report.nf
+++ b/modules/local/report.nf
@@ -811,8 +811,18 @@ def table_values(file_path, header_p, seperator, headers=null){
         // Catch exception here to deal with situation where the very first header is missing
         if (header_p) {
             // Attempt to read file assuming first line is header line with missing value
-            def header_line = file_path.splitText()[0].trim()
+            def file_lines = file_path.splitText()
+            def header_line = file_lines[0].trim()
+            def values_line1 = file_lines[1].trim()
             def headers_from_file = header_line.split(seperator)
+            def value1_columns = values_line1.split(seperator)
+
+            // If you pass a list of headers, then splitCsv does not seem to check to make sure
+            // the list has the same number as the values columns in the file, so I need to check this here
+            if (headers_from_file.size() != value1_columns.size()) {
+                throw new java.lang.IllegalStateException("Mismatched number of headers ${headers_from_file} and column values ${value1_columns} for file ${file_path}")
+            }
+
             def count_missing_headers = headers_from_file.collect{ is_missing(it) ? 1 : 0 }.sum()
             if (count_missing_headers > 1) {
                 throw e
diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test
index 393d55ea..90445755 100644
--- a/tests/functions/report.nf.test
+++ b/tests/functions/report.nf.test
@@ -271,6 +271,8 @@ nextflow_function {
 
 
     test("Missing all column headers"){
+        tag "missing_all_column_headers"
+
         when {
             function {
                 """
@@ -286,6 +288,7 @@ nextflow_function {
         }
         then{
             assert function.failed
+            assert function.stdout.any { it.contains("ERROR ~ Mismatched number of headers [] and column values [stuff1, stuff2, stuff3] for file") }
         }
     }
 

From df4def6524b958eedf4f4e3b584a1dd98c7a7bc1 Mon Sep 17 00:00:00 2001
From: Aaron Petkau <aaron.petkau@gmail.com>
Date: Mon, 3 Jun 2024 10:01:41 -0500
Subject: [PATCH 16/21] Fixed up issue with row index value

---
 modules/local/report.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/report.nf b/modules/local/report.nf
index 65f1b63f..4caa8ca6 100644
--- a/modules/local/report.nf
+++ b/modules/local/report.nf
@@ -837,7 +837,7 @@ def table_values(file_path, header_p, seperator, headers=null){
         }
     }
     def converted_data = rows_list.indexed().collectEntries { idx, row -> 
-        ["${idx}": row.collectEntries { k, v -> [(k): replace_missing(v)] }]
+        [(idx): row.collectEntries { k, v -> [(k): replace_missing(v)] }]
     }
     return converted_data
 }

From 2cbac13461bf038c001612763f7610ff06ecb2ec Mon Sep 17 00:00:00 2001
From: Aaron Petkau <aaron.petkau@gmail.com>
Date: Mon, 3 Jun 2024 10:08:11 -0500
Subject: [PATCH 17/21] Fixed up mismatched text in schema and changelog

---
 CHANGELOG.md         | 19 +++++++++++++------
 nextflow_schema.json |  4 ++--
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 94c49409..1e9f65fd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,13 +3,14 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## v0.2.1 - [Unreleased]
+## [0.2.1] - 2024-06-03
 
 ### `Fixed`
 
-- Parsed table values would not show up properly if values were missing resolving issue [Issue 82](https://github.com/phac-nml/mikrokondo/issues/82)
+- Parsed table values would not show up properly if values were missing resolving issue See [PR 83](https://github.com/phac-nml/mikrokondo/pull/83)
+- Fixed mismatched description for minimap2 and mash databases. See [PR 83](https://github.com/phac-nml/mikrokondo/pull/83)
 
-## v0.2.0 - [2024-05-14]
+## [0.2.0] - 2024-05-14
 
 ### `Added`
 
@@ -43,7 +44,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Updated StarAMR to version 0.10.0. See [PR 74](https://github.com/phac-nml/mikrokondo/pull/74)
 
-## v0.1.2 - [2024-05-02]
+## [0.1.2] - 2024-05-02
 
 ### Changed
 
@@ -52,13 +53,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Set `--kraken2_db` to be a required parameter for the pipeline. See [PR 71](https://github.com/phac-nml/mikrokondo/pull/71)
 - Hide bakta parameters from IRIDA Next UI. See [PR 71](https://github.com/phac-nml/mikrokondo/pull/71)
 
-## v0.1.1 - [2024-04-22]
+## [0.1.1] - 2024-04-22
 
 ### Changed
 
 - Switched the resource labels for **parse_fastp**, **select_pointfinder**, **report**, and **parse_kat** from `process_low` to `process_single` as they are all configured to run on the local Nextflow machine. See [PR 67](https://github.com/phac-nml/mikrokondo/pull/67)
 
-## v0.1.0 - [2024-03-22]
+## [0.1.0] - 2024-03-22
 
 Initial release of phac-nml/mikrokondo. Mikrokondo currently supports: read trimming and quality control, contamination detection, assembly (isolate, metagenomic or hybrid), annotation, AMR detection and subtyping of genomic sequencing data targeting bacterial or metagenomic data.
 
@@ -85,3 +86,9 @@ Initial release of phac-nml/mikrokondo. Mikrokondo currently supports: read trim
 - Changed salmonella default default coverage to 40
 
 - Added integration testing using [nf-test](https://www.nf-test.com/).
+
+[0.2.1]: https://github.com/phac-nml/mikrokondo/releases/tag/0.2.1
+[0.2.0]: https://github.com/phac-nml/mikrokondo/releases/tag/0.2.0
+[0.1.2]: https://github.com/phac-nml/mikrokondo/releases/tag/0.1.2
+[0.1.1]: https://github.com/phac-nml/mikrokondo/releases/tag/0.1.1
+[0.1.0]: https://github.com/phac-nml/mikrokondo/releases/tag/0.1.0
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 722de4a4..b7864817 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -76,14 +76,14 @@
             "properties": {
                 "dehosting_idx": {
                     "type": "string",
-                    "description": "Mash sketch used for contamination detection and speciation (Sketch comments must be a taxonomic string similar to what Kraken2 outputs)",
+                    "description": "Minimpa2 index for dehosting and kitome removal",
                     "pattern": "^\\S+$",
                     "exists": true,
                     "format": "file-path"
                 },
                 "mash_sketch": {
                     "type": "string",
-                    "description": "Minimpa2 index for dehosting and kitome removal",
+                    "description": "Mash sketch used for contamination detection and speciation (Sketch comments must be a taxonomic string similar to what Kraken2 outputs)",
                     "pattern": "^\\S+$",
                     "exists": true,
                     "format": "file-path"

From ac72001aea23aab9329a3be101f0fe330af3099d Mon Sep 17 00:00:00 2001
From: Aaron Petkau <aaron.petkau@gmail.com>
Date: Mon, 3 Jun 2024 15:14:23 -0500
Subject: [PATCH 18/21] Fixed up tests/tabular parsing

---
 modules/local/report.nf        | 77 ++++++++++++++++++++++------------
 tests/functions/report.nf.test |  2 +-
 2 files changed, 52 insertions(+), 27 deletions(-)

diff --git a/modules/local/report.nf b/modules/local/report.nf
index 4caa8ca6..62912598 100644
--- a/modules/local/report.nf
+++ b/modules/local/report.nf
@@ -802,42 +802,67 @@ def table_values(file_path, header_p, seperator, headers=null){
     def missing_value = 'NoData'
     def default_index_col = "__default_index__"
     def rows_list = null
+    def use_modified_headers_from_file = false
     def is_missing = { it == null || it == '' }
     def replace_missing = { is_missing(it) ? missing_value : it }
 
-    try {
-        rows_list = file_path.splitCsv(header: (header_p ? true : headers), sep:seperator)
-    } catch (java.lang.IllegalStateException e) {
-        // Catch exception here to deal with situation where the very first header is missing
-        if (header_p) {
-            // Attempt to read file assuming first line is header line with missing value
-            def file_lines = file_path.splitText()
-            def header_line = file_lines[0].trim()
-            def values_line1 = file_lines[1].trim()
-            def headers_from_file = header_line.split(seperator)
-            def value1_columns = values_line1.split(seperator)
-
-            // If you pass a list of headers, then splitCsv does not seem to check to make sure
-            // the list has the same number as the values columns in the file, so I need to check this here
-            if (headers_from_file.size() != value1_columns.size()) {
-                throw new java.lang.IllegalStateException("Mismatched number of headers ${headers_from_file} and column values ${value1_columns} for file ${file_path}")
+    // Reads two lines (up to one header line + one row) for making decisions on how to parse the file
+    def file_lines = file_path.splitText(limit: 2)
+    if (!header_p) {
+        if (file_lines.size() == 0) {
+            // headers were not in the file, and file size is 0, so return missing data based
+            // on passed headers
+            rows_list = headers.collectEntries { [(it): null] }
+        } else {
+            // verify that passed headers and rows have same number
+            def row_line = file_lines[0].replaceAll('(\n|\r\n)$', '')
+            def row_line_columns = row_line.split(seperator, -1)
+            if (headers.size() != row_line_columns.size()) {
+                throw new Exception("Mismatched number of passed headers ${headers} and column values ${row_line_columns} for file ${file_path}")
+            } else {
+                rows_list = file_path.splitCsv(header: headers, sep:seperator)
+            }
+        }
+    } else {
+        // Headers exist in file
+
+        if (file_lines.size() == 0) {
+            throw new Exception("Attempting to parse empty file [${file_path}] as a table where header_p=${header_p}")
+        }
+
+        def header_line = file_lines[0].replaceAll('(\n|\r\n)$', '')
+        def headers_from_file = header_line.split(seperator, -1)
+        def total_missing_headers = headers_from_file.collect{ is_missing(it) ? 1 : 0 }.sum()
+
+        if (total_missing_headers > 1) {
+            throw new Exception("Attempting to parse tabular file with more than one missing header: [${file_path}]")
+        } else if (is_missing(headers_from_file[0])) {
+            // Case, single missing header as first column
+            headers_from_file[0] = default_index_col
+            use_modified_headers_from_file = true
+        }
+
+        if (file_lines.size() == 1) {
+            // There is no row lines, only headers, so return missing data
+            rows_list = headers_from_file.collectEntries { [(it): null] }
+        } else {
+            // If there exists a row line, then make sure rows + headers match
+
+            def row_line1 = file_lines[1].replaceAll('(\n|\r\n)$', '')
+            def row_line1_columns = row_line1.split(seperator, -1)
+            if (headers_from_file.size() != row_line1_columns.size()) {
+                throw new java.lang.IllegalStateException("Mismatched number of headers ${headers_from_file} and column values ${row_line1_columns} for file ${file_path}")
             }
 
-            def count_missing_headers = headers_from_file.collect{ is_missing(it) ? 1 : 0 }.sum()
-            if (count_missing_headers > 1) {
-                throw e
-            } else if (is_missing(headers_from_file[0])) {
-                headers_from_file[0] = default_index_col
+            if (use_modified_headers_from_file) {
                 rows_list = file_path.splitCsv(header: headers_from_file as List, sep:seperator, skip: 1)
             } else {
-                throw e
+                rows_list = file_path.splitCsv(header: (header_p ? true : headers), sep:seperator)
             }
-        } else {
-            throw e
         }
     }
-    def converted_data = rows_list.indexed().collectEntries { idx, row -> 
+
+    return rows_list.indexed().collectEntries { idx, row -> 
         [(idx): row.collectEntries { k, v -> [(k): replace_missing(v)] }]
     }
-    return converted_data
 }
diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test
index 90445755..cda9544e 100644
--- a/tests/functions/report.nf.test
+++ b/tests/functions/report.nf.test
@@ -288,7 +288,7 @@ nextflow_function {
         }
         then{
             assert function.failed
-            assert function.stdout.any { it.contains("ERROR ~ Mismatched number of headers [] and column values [stuff1, stuff2, stuff3] for file") }
+            assert function.stdout.any { it.contains("ERROR ~ Mismatched number of headers [__default_index__] and column values [stuff1, stuff2, stuff3] for file") }
         }
     }
 

From ef1aa4934e5d3e1b71a5438415fdf8d6a261b076 Mon Sep 17 00:00:00 2001
From: Aaron Petkau <aaron.petkau@gmail.com>
Date: Mon, 3 Jun 2024 15:36:46 -0500
Subject: [PATCH 19/21] Fixed up tests

---
 modules/local/report.nf        |  8 +++++---
 tests/functions/report.nf.test | 32 +++++++-------------------------
 2 files changed, 12 insertions(+), 28 deletions(-)

diff --git a/modules/local/report.nf b/modules/local/report.nf
index 62912598..d61ae712 100644
--- a/modules/local/report.nf
+++ b/modules/local/report.nf
@@ -808,7 +808,9 @@ def table_values(file_path, header_p, seperator, headers=null){
 
     // Reads two lines (up to one header line + one row) for making decisions on how to parse the file
     def file_lines = file_path.splitText(limit: 2)
-    if (!header_p) {
+    if (!header_p && headers == null) {
+        throw new Exception("Header is not provided in file [header_p=${header_p}], but headers passed to function is null")
+    } else if (!header_p) {
         if (file_lines.size() == 0) {
             // headers were not in the file, and file size is 0, so return missing data based
             // on passed headers
@@ -851,13 +853,13 @@ def table_values(file_path, header_p, seperator, headers=null){
             def row_line1 = file_lines[1].replaceAll('(\n|\r\n)$', '')
             def row_line1_columns = row_line1.split(seperator, -1)
             if (headers_from_file.size() != row_line1_columns.size()) {
-                throw new java.lang.IllegalStateException("Mismatched number of headers ${headers_from_file} and column values ${row_line1_columns} for file ${file_path}")
+                throw new Exception("Mismatched number of headers ${headers_from_file} and column values ${row_line1_columns} for file ${file_path}")
             }
 
             if (use_modified_headers_from_file) {
                 rows_list = file_path.splitCsv(header: headers_from_file as List, sep:seperator, skip: 1)
             } else {
-                rows_list = file_path.splitCsv(header: (header_p ? true : headers), sep:seperator)
+                rows_list = file_path.splitCsv(header: true, sep:seperator)
             }
         }
     }
diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test
index cda9544e..baae3b66 100644
--- a/tests/functions/report.nf.test
+++ b/tests/functions/report.nf.test
@@ -315,6 +315,8 @@ nextflow_function {
     }
 
     test("Missing last two values"){
+        tag "missing_last_two_values"
+
         when {
             function {
                 """
@@ -329,33 +331,13 @@ nextflow_function {
             }
         }
         then{
-            assert function.success
-            assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'NoData']]
+            assert function.failed
+            assert function.stdout.any { it.contains("Mismatched number of headers [header1, header2, header3] and column values [stuff1, stuff2]") }
         }
     }
 
     test("Missing multiple terminal value separators"){
-        when {
-            function {
-                """
-                input[0] = file("$baseDir/tests/data/tables/missing_multiple_value_separators.tab")
-                input[1] = true
-                input[2] = '\t'
-                input[3] = null
-                """
-            }
-            params {
-                outdir = "results"
-            }
-        }
-        then{
-            assert function.success
-            assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'NoData', 'header4': 'NoData']]
-        }
-    }
-
-    test("Missing internal value separator"){
-        tag "missing_internal_separator"
+        tag "missing_multiple_terminal_value_separators"
 
         when {
             function {
@@ -371,8 +353,8 @@ nextflow_function {
             }
         }
         then{
-            assert function.success
-            assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'NoData', 'header4': 'NoData']]
+            assert function.failed
+            assert function.stdout.any { it.contains("Mismatched number of headers [header1, header2, header3, header4] and column values [stuff1, stuff2]") }
         }
     }
 }

From 0e85c667cd761eb0ff2c759aae72f6adadfabe98 Mon Sep 17 00:00:00 2001
From: Aaron Petkau <aaron.petkau@gmail.com>
Date: Mon, 3 Jun 2024 16:10:23 -0500
Subject: [PATCH 20/21] Fixed up structure of returned empty tables

---
 modules/local/report.nf        |  7 ++++---
 tests/functions/report.nf.test | 23 +++++++++++++++++++++++
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/modules/local/report.nf b/modules/local/report.nf
index d61ae712..adf1fe77 100644
--- a/modules/local/report.nf
+++ b/modules/local/report.nf
@@ -813,8 +813,8 @@ def table_values(file_path, header_p, seperator, headers=null){
     } else if (!header_p) {
         if (file_lines.size() == 0) {
             // headers were not in the file, and file size is 0, so return missing data based
-            // on passed headers
-            rows_list = headers.collectEntries { [(it): null] }
+            // on passed headers (i.e., single row of empty values)
+            rows_list = [headers.collectEntries { [(it): null] }]
         } else {
             // verify that passed headers and rows have same number
             def row_line = file_lines[0].replaceAll('(\n|\r\n)$', '')
@@ -846,7 +846,8 @@ def table_values(file_path, header_p, seperator, headers=null){
 
         if (file_lines.size() == 1) {
             // There is no row lines, only headers, so return missing data
-            rows_list = headers_from_file.collectEntries { [(it): null] }
+            // (single row of empty values)
+            rows_list = [headers_from_file.collectEntries { [(it): null] }]
         } else {
             // If there exists a row line, then make sure rows + headers match
 
diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test
index baae3b66..2218c7d1 100644
--- a/tests/functions/report.nf.test
+++ b/tests/functions/report.nf.test
@@ -207,6 +207,7 @@ nextflow_function {
     }
 
     test("Test empty file"){
+        tag "test_empty"
 
         when {
             function {
@@ -223,9 +224,31 @@ nextflow_function {
         }
         then{
             assert function.failed
+            assert function.stdout.any { it.contains("ERROR ~ Attempting to parse empty file") }
         }
     }
 
+    test("Test empty file pass header"){
+        tag "test_empty_pass_header"
+
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/empty.csv")
+                input[1] = false
+                input[2] = ','
+                input[3] = ['header1', 'header2']
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.success
+            assert function.result == ['0':['header1':'NoData', 'header2':'NoData']]
+        }
+    }
 
     test("Test more values than columns"){
 

From e5a75700084f64b6c4ee8bc04507c529432d9790 Mon Sep 17 00:00:00 2001
From: Aaron Petkau <aaron.petkau@gmail.com>
Date: Mon, 3 Jun 2024 16:20:01 -0500
Subject: [PATCH 21/21] Added additional test case

---
 .../missing_all_headers_single_line.csv       |  1 +
 tests/functions/report.nf.test                | 24 +++++++++++++++++++
 2 files changed, 25 insertions(+)
 create mode 100644 tests/data/tables/missing_all_headers_single_line.csv

diff --git a/tests/data/tables/missing_all_headers_single_line.csv b/tests/data/tables/missing_all_headers_single_line.csv
new file mode 100644
index 00000000..7b4a1978
--- /dev/null
+++ b/tests/data/tables/missing_all_headers_single_line.csv
@@ -0,0 +1 @@
+stuff1,stuff2,stuff3
diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test
index 2218c7d1..1df91d55 100644
--- a/tests/functions/report.nf.test
+++ b/tests/functions/report.nf.test
@@ -315,6 +315,30 @@ nextflow_function {
         }
     }
 
+    test("Missing all column headers, only single line"){
+        tag "missing_all_column_headers_single_line"
+
+        when {
+            function {
+                """
+                input[0] = file("$baseDir/tests/data/tables/missing_all_headers_single_line.csv")
+                input[1] = true
+                input[2] = ','
+                input[3] = null
+                """
+            }
+            params {
+                outdir = "results"
+            }
+        }
+        then{
+            assert function.success
+            // When headers are supposed to exist in the file, but only a single line of values 
+            // will assume first line is headers
+            assert function.result == ['0':['stuff1':'NoData', 'stuff2':'NoData', 'stuff3':'NoData']]
+        }
+    }
+
     test("Missing all values"){
         tag "missing_all_values"