From f6b96e5b89c1aa0c5ebf67acc7a28ca58a2fa1c0 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Thu, 30 May 2024 14:11:29 -0500 Subject: [PATCH 01/21] updated report table_values function Previously missing values would cause a shift in the index of the map due to using the tokenize function instead of split. This as been reolved and tests have been added --- modules/local/report.nf | 41 +++++++++++++++++++--- tests/data/tables/header_missing_val.csv | 2 ++ tests/data/tables/mock_missing_value.csv | 2 ++ tests/data/tables/mock_missing_value.tab | 2 ++ tests/data/tables/mock_missing_value_2.tab | 2 ++ tests/data/tables/no_header.csv | 1 + tests/data/tables/no_missing.csv | 2 ++ tests/data/tables/no_missing.tab | 2 ++ tests/data/tables/two_missing_headers.csv | 2 ++ tests/data/tables/vector.csv | 4 +++ 10 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 tests/data/tables/header_missing_val.csv create mode 100644 tests/data/tables/mock_missing_value.csv create mode 100644 tests/data/tables/mock_missing_value.tab create mode 100644 tests/data/tables/mock_missing_value_2.tab create mode 100644 tests/data/tables/no_header.csv create mode 100644 tests/data/tables/no_missing.csv create mode 100644 tests/data/tables/no_missing.tab create mode 100644 tests/data/tables/two_missing_headers.csv create mode 100644 tests/data/tables/vector.csv diff --git a/modules/local/report.nf b/modules/local/report.nf index d1f39a69..7dc35f62 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -804,24 +804,57 @@ def table_values(file_path, header_p, seperator, headers=null){ def converted_data = [:] def idx = 0 def lines_read = false + def missing_value = "NoData" + def default_index_col = "__default_index__" file_path.withReader{ String line if(header_p){ header = it.readLine() - split_header = header.tokenize(seperator) + split_header = header.split(seperator) + + def missing_headers = 0 + if(split_header.size() > 1){ + for(col_header in split_header[1..-1]){ // skip first column as it is allowed to be empty + if(!col_header){ + missing_headers++; + } + } + } + + if(missing_headers){ + error("Missing multiple column headers in ${file_path}. You may need to pass in column headers in the nextflow.config file.") + } + + if(!split_header[0] && ( split_header.size() == 1 || split_header[1] != default_index_col)){ + // Missing column headers could arise from the first column serving as and index, if this is the case + // verify that the split_split header size is greater == 1 (e.g is it a vector) or that the next column + // value is not equal to the value of "default_index_col" + split_header[0] = default_index_col + } + } if(headers){ split_header = headers } while(line = it.readLine()){ - split_line = line.tokenize(seperator) + split_line = line.split(seperator) // split will allow for missing values // Transpose, and collect converts the data to a map - converted_data[idx] = [split_header, split_line].transpose().collectEntries() + if(split_line.size() != split_header.size()){ + error("The number of values in ${file_path} differs from number of columns headers ${split_header}") + } + + def new_row = [split_header, split_line].transpose().collectEntries() + new_row.each{ + if(!it.value){ + it.value = missing_value; + } + } + converted_data[idx] = new_row idx++ lines_read = true } if(!lines_read){ - converted_data[idx] = [split_header, Collections.nCopies(split_header.size, "NoData")].transpose().collectEntries() + converted_data[idx] = [split_header, Collections.nCopies(split_header.size, missing_value)].transpose().collectEntries() } } diff --git a/tests/data/tables/header_missing_val.csv b/tests/data/tables/header_missing_val.csv new file mode 100644 index 00000000..f1e36c71 --- /dev/null +++ b/tests/data/tables/header_missing_val.csv @@ -0,0 +1,2 @@ +,header2,header3 +stuff1,stuff2,stuff3 diff --git a/tests/data/tables/mock_missing_value.csv b/tests/data/tables/mock_missing_value.csv new file mode 100644 index 00000000..0afedaa4 --- /dev/null +++ b/tests/data/tables/mock_missing_value.csv @@ -0,0 +1,2 @@ +header1,header2,header3 +,stuff2,stuff3 diff --git a/tests/data/tables/mock_missing_value.tab b/tests/data/tables/mock_missing_value.tab new file mode 100644 index 00000000..4d0244c8 --- /dev/null +++ b/tests/data/tables/mock_missing_value.tab @@ -0,0 +1,2 @@ +header1 header2 header3 + stuff2 stuff3 diff --git a/tests/data/tables/mock_missing_value_2.tab b/tests/data/tables/mock_missing_value_2.tab new file mode 100644 index 00000000..e505b6fc --- /dev/null +++ b/tests/data/tables/mock_missing_value_2.tab @@ -0,0 +1,2 @@ +header1 header2 header3 + stuff3 diff --git a/tests/data/tables/no_header.csv b/tests/data/tables/no_header.csv new file mode 100644 index 00000000..7b4a1978 --- /dev/null +++ b/tests/data/tables/no_header.csv @@ -0,0 +1 @@ +stuff1,stuff2,stuff3 diff --git a/tests/data/tables/no_missing.csv b/tests/data/tables/no_missing.csv new file mode 100644 index 00000000..1e99cd35 --- /dev/null +++ b/tests/data/tables/no_missing.csv @@ -0,0 +1,2 @@ +header1,header2,header3 +stuff1,stuff2,stuff3 diff --git a/tests/data/tables/no_missing.tab b/tests/data/tables/no_missing.tab new file mode 100644 index 00000000..5116b305 --- /dev/null +++ b/tests/data/tables/no_missing.tab @@ -0,0 +1,2 @@ +header1 header2 header3 +stuff1 stuff2 stuff3 diff --git a/tests/data/tables/two_missing_headers.csv b/tests/data/tables/two_missing_headers.csv new file mode 100644 index 00000000..a3b2e951 --- /dev/null +++ b/tests/data/tables/two_missing_headers.csv @@ -0,0 +1,2 @@ +,,header3 +stuff1,stuff2,stuff3 diff --git a/tests/data/tables/vector.csv b/tests/data/tables/vector.csv new file mode 100644 index 00000000..692f198b --- /dev/null +++ b/tests/data/tables/vector.csv @@ -0,0 +1,4 @@ +header1 +stuff1 +stuff2 +stuff3 From 837f27072905b92d469caeb14014377c464786c8 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Thu, 30 May 2024 14:14:51 -0500 Subject: [PATCH 02/21] updated changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 644393ac..31fa6c42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### `Fixed` + +- Parsed table values would not show up properly if values were missing resolving issue [Issue 82](https://github.com/phac-nml/mikrokondo/issues/82) + ## v0.2.0 - [2024-05-14] ### `Added` From 3eb2c81eaa873a55b9ea7e13c487d3a61ad4a91c Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Thu, 30 May 2024 14:22:34 -0500 Subject: [PATCH 03/21] added test modules... --- tests/functions/report.nf.test | 202 +++++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 tests/functions/report.nf.test diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test new file mode 100644 index 00000000..ca92f560 --- /dev/null +++ b/tests/functions/report.nf.test @@ -0,0 +1,202 @@ +/* +Tests for functions in the report module. +*/ + + +nextflow_function { + name "Test report.nf functions" + script "modules/local/report.nf" + function "table_values" + + test("Test tab missing column value, header supplied"){ + + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/mock_missing_value.tab") + input[1] = true + input[2] = '\t' + input[3] = null + """ + } + params { + outdir = "results" + } + } + then{ + assert function.result == ['0':['header1':'NoData', 'header2':'stuff2', 'header3':'stuff3']] + assert function.success + } + } + + test("Test tab missing 2 column values, header supplied"){ + + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/mock_missing_value_2.tab") + input[1] = true + input[2] = '\t' + input[3] = null + """ + } + params { + outdir = "results" + } + } + then{ + assert function.result == ['0':['header1':'NoData', 'header2':'NoData', 'header3':'stuff3']] + assert function.success + } + } + + + test("Test tab, header supplied"){ + + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/no_missing.tab") + input[1] = true + input[2] = '\t' + input[3] = null + """ + } + params { + outdir = "results" + } + } + then{ + assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'stuff3']] + assert function.success + } + } + + test("Test csv missing column value, header supplied"){ + + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/mock_missing_value.csv") + input[1] = true + input[2] = ',' + input[3] = null + """ + } + params { + outdir = "results" + } + } + then{ + assert function.result == ['0':['header1':'NoData', 'header2':'stuff2', 'header3':'stuff3']] + assert function.success + } + } + + + test("Test csv, header supplied"){ + + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/no_missing.csv") + input[1] = true + input[2] = ',' + input[3] = null + """ + } + params { + outdir = "results" + } + } + then{ + assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'stuff3']] + assert function.success + } + } + + test("Test csv, header missing one value"){ + + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/header_missing_val.csv") + input[1] = true + input[2] = ',' + input[3] = null + """ + } + params { + outdir = "results" + } + } + then{ + assert function.success + assert function.result == ['0':['__default_index':'stuff1', 'header2':'stuff2', 'header3':'stuff3']] + } + } + + + test("Test csv, no header"){ + + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/no_header.csv") + input[1] = false + input[2] = ',' + input[3] = ['header1', 'header2', 'header3'] + """ + } + params { + outdir = "results" + } + } + then{ + assert function.success + assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'stuff3']] + } + } + + + test("Test csv, two headers missing values"){ + + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/two_missing_headers.csv") + input[1] = true + input[2] = ',' + input[3] = null + """ + } + params { + outdir = "results" + } + } + then{ + assert function.failed + } + } + + test("Test csv, vector with header"){ + + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/vector.csv") + input[1] = true + input[2] = ',' + input[3] = null + """ + } + params { + outdir = "results" + } + } + then{ + assert function.result == ['0':['header1':'stuff1'], '1': ['header1':'stuff2'], '2':['header1':'stuff3']] + assert function.success + } + } +} From 049c43c917af70478960c9ec3f1a83b49b931cf9 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Thu, 30 May 2024 15:52:09 -0500 Subject: [PATCH 04/21] added additional test cases --- modules/local/report.nf | 8 +- nextflow.config | 6 +- tests/data/tables/all_values_missing.csv | 1 + tests/data/tables/empty.csv | 0 tests/data/tables/missing_all_headers.csv | 2 + .../data/tables/mistmatch_headers_values.csv | 2 + tests/data/tables/vector_no_hdr.csv | 4 + tests/functions/report.nf.test | 112 +++++++++++++++++- 8 files changed, 123 insertions(+), 12 deletions(-) create mode 100644 tests/data/tables/all_values_missing.csv create mode 100644 tests/data/tables/empty.csv create mode 100644 tests/data/tables/missing_all_headers.csv create mode 100644 tests/data/tables/mistmatch_headers_values.csv create mode 100644 tests/data/tables/vector_no_hdr.csv diff --git a/modules/local/report.nf b/modules/local/report.nf index 7dc35f62..e5299a28 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -804,7 +804,7 @@ def table_values(file_path, header_p, seperator, headers=null){ def converted_data = [:] def idx = 0 def lines_read = false - def missing_value = "NoData" + def missing_value = '' def default_index_col = "__default_index__" file_path.withReader{ String line @@ -815,13 +815,13 @@ def table_values(file_path, header_p, seperator, headers=null){ def missing_headers = 0 if(split_header.size() > 1){ for(col_header in split_header[1..-1]){ // skip first column as it is allowed to be empty - if(!col_header){ + if(col_header == null || col_header == ''){ missing_headers++; } } } - if(missing_headers){ + if(missing_headers != 0){ error("Missing multiple column headers in ${file_path}. You may need to pass in column headers in the nextflow.config file.") } @@ -854,7 +854,7 @@ def table_values(file_path, header_p, seperator, headers=null){ lines_read = true } if(!lines_read){ - converted_data[idx] = [split_header, Collections.nCopies(split_header.size, missing_value)].transpose().collectEntries() + converted_data[idx] = [split_header, Collections.nCopies(split_header.size(), missing_value)].transpose().collectEntries() } } diff --git a/nextflow.config b/nextflow.config index 529ac098..2dd1498c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1078,12 +1078,12 @@ dag { manifest { name = 'phac-nml/mikrokondo' - author = """matthew wells""" + author = """Matthew Wells, James Robertson, Aaron Petkau, Christy-Lynn Peterson, Eric Marinier""" homePage = 'https://github.com/phac-nml/mikrokondo' - description = """Mikrokondo beta""" + description = """Mikrokondo""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '0.2.0' + version = '0.2.1' defaultBranch = 'main' doi = '' } diff --git a/tests/data/tables/all_values_missing.csv b/tests/data/tables/all_values_missing.csv new file mode 100644 index 00000000..e099d617 --- /dev/null +++ b/tests/data/tables/all_values_missing.csv @@ -0,0 +1 @@ +header1,header2,header3 diff --git a/tests/data/tables/empty.csv b/tests/data/tables/empty.csv new file mode 100644 index 00000000..e69de29b diff --git a/tests/data/tables/missing_all_headers.csv b/tests/data/tables/missing_all_headers.csv new file mode 100644 index 00000000..bb3ccac4 --- /dev/null +++ b/tests/data/tables/missing_all_headers.csv @@ -0,0 +1,2 @@ + +stuff1,stuff2,stuff3 diff --git a/tests/data/tables/mistmatch_headers_values.csv b/tests/data/tables/mistmatch_headers_values.csv new file mode 100644 index 00000000..37ff0287 --- /dev/null +++ b/tests/data/tables/mistmatch_headers_values.csv @@ -0,0 +1,2 @@ +header1,header2,header3 +stuff1,stuff2,stuff3,stuff4 diff --git a/tests/data/tables/vector_no_hdr.csv b/tests/data/tables/vector_no_hdr.csv new file mode 100644 index 00000000..d861c49f --- /dev/null +++ b/tests/data/tables/vector_no_hdr.csv @@ -0,0 +1,4 @@ + +stuff1 +stuff2 +stuff3 diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test index ca92f560..888f28ce 100644 --- a/tests/functions/report.nf.test +++ b/tests/functions/report.nf.test @@ -24,7 +24,7 @@ nextflow_function { } } then{ - assert function.result == ['0':['header1':'NoData', 'header2':'stuff2', 'header3':'stuff3']] + assert function.result == ['0':['header1':'', 'header2':'stuff2', 'header3':'stuff3']] assert function.success } } @@ -45,7 +45,7 @@ nextflow_function { } } then{ - assert function.result == ['0':['header1':'NoData', 'header2':'NoData', 'header3':'stuff3']] + assert function.result == ['0':['header1':'', 'header2':'', 'header3':'stuff3']] assert function.success } } @@ -88,7 +88,7 @@ nextflow_function { } } then{ - assert function.result == ['0':['header1':'NoData', 'header2':'stuff2', 'header3':'stuff3']] + assert function.result == ['0':['header1':'', 'header2':'stuff2', 'header3':'stuff3']] assert function.success } } @@ -132,7 +132,7 @@ nextflow_function { } then{ assert function.success - assert function.result == ['0':['__default_index':'stuff1', 'header2':'stuff2', 'header3':'stuff3']] + assert function.result == ['0':['__default_index__':'stuff1', 'header2':'stuff2', 'header3':'stuff3']] } } @@ -179,7 +179,7 @@ nextflow_function { } } - test("Test csv, vector with header"){ + test("Test csv, vector with header"){ when { function { @@ -199,4 +199,106 @@ nextflow_function { assert function.success } } + + test("Test empty file"){ + + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/empty.csv") + input[1] = true + input[2] = ',' + input[3] = null + """ + } + params { + outdir = "results" + } + } + then{ + assert function.failed + } + } + + + test("Test more values than columns"){ + + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/mismatch_headers_values.csv") + input[1] = true + input[2] = ',' + input[3] = null + """ + } + params { + outdir = "results" + } + } + then{ + assert function.failed + } + } + + test("Vector no column header"){ + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/vector_no_hdr.csv") + input[1] = true + input[2] = ',' + input[3] = null + """ + } + params { + outdir = "results" + } + } + then{ + assert function.result == ['0':['__default_index__':'stuff1'], '1': ['__default_index__':'stuff2'], '2':['__default_index__':'stuff3']] + assert function.success + } + } + + + test("Missing all column headers"){ + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/missing_all_headers.csv") + input[1] = true + input[2] = ',' + input[3] = null + """ + } + params { + outdir = "results" + } + } + then{ + assert function.failed + } + } + + test("Missing all values"){ + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/all_values_missing.csv") + input[1] = true + input[2] = ',' + input[3] = null + """ + } + params { + outdir = "results" + } + } + then{ + assert function.success + assert function.result == ['0':['header1':'', 'header2':'', 'header3':'']] + + } + } } From e35451093675decefda2e34c1de5b0952812cf35 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Thu, 30 May 2024 15:55:00 -0500 Subject: [PATCH 05/21] updated changelog --- CHANGELOG.md | 2 +- tests/functions/report.nf.test | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31fa6c42..94c49409 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## v0.2.1 - [Unreleased] ### `Fixed` diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test index 888f28ce..9029c04a 100644 --- a/tests/functions/report.nf.test +++ b/tests/functions/report.nf.test @@ -298,7 +298,6 @@ nextflow_function { then{ assert function.success assert function.result == ['0':['header1':'', 'header2':'', 'header3':'']] - } } } From 14558a9190bae33dc30cf35d227dd33494960a25 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Fri, 31 May 2024 13:22:42 -0500 Subject: [PATCH 06/21] reverted to old commit --- modules/local/report.nf | 2 +- tests/functions/report.nf.test | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/local/report.nf b/modules/local/report.nf index e5299a28..d36fbd30 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -804,7 +804,7 @@ def table_values(file_path, header_p, seperator, headers=null){ def converted_data = [:] def idx = 0 def lines_read = false - def missing_value = '' + def missing_value = 'NoData' def default_index_col = "__default_index__" file_path.withReader{ String line diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test index 9029c04a..b6fd5c9a 100644 --- a/tests/functions/report.nf.test +++ b/tests/functions/report.nf.test @@ -24,7 +24,7 @@ nextflow_function { } } then{ - assert function.result == ['0':['header1':'', 'header2':'stuff2', 'header3':'stuff3']] + assert function.result == ['0':['header1':'NoData', 'header2':'stuff2', 'header3':'stuff3']] assert function.success } } @@ -45,7 +45,7 @@ nextflow_function { } } then{ - assert function.result == ['0':['header1':'', 'header2':'', 'header3':'stuff3']] + assert function.result == ['0':['header1':'NoData', 'header2':'NoData', 'header3':'stuff3']] assert function.success } } @@ -88,7 +88,7 @@ nextflow_function { } } then{ - assert function.result == ['0':['header1':'', 'header2':'stuff2', 'header3':'stuff3']] + assert function.result == ['0':['header1':'NoData', 'header2':'stuff2', 'header3':'stuff3']] assert function.success } } @@ -297,7 +297,7 @@ nextflow_function { } then{ assert function.success - assert function.result == ['0':['header1':'', 'header2':'', 'header3':'']] + assert function.result == ['0':['header1':'NoData', 'header2':'NoData', 'header3':'NoData']] } } } From 201a4d4b4406384769de47508ca71d8fca0e8df1 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Fri, 31 May 2024 14:10:03 -0500 Subject: [PATCH 07/21] flipped inequality allowing more headers than values --- modules/local/report.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/report.nf b/modules/local/report.nf index d36fbd30..15a52e3a 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -839,7 +839,7 @@ def table_values(file_path, header_p, seperator, headers=null){ while(line = it.readLine()){ split_line = line.split(seperator) // split will allow for missing values // Transpose, and collect converts the data to a map - if(split_line.size() != split_header.size()){ + if(split_line.size() > split_header.size()){ error("The number of values in ${file_path} differs from number of columns headers ${split_header}") } From 34269842f7147c0d23106cb0e481c25374ae5729 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Fri, 31 May 2024 15:27:19 -0500 Subject: [PATCH 08/21] added additional tests to table parser --- tests/data/tables/missing_last_value.tab | 2 + .../missing_multiple_value_separators.tab | 2 + ..._multiple_value_separators_extra_field.tab | 2 + tests/functions/report.nf.test | 60 +++++++++++++++++++ 4 files changed, 66 insertions(+) create mode 100644 tests/data/tables/missing_last_value.tab create mode 100644 tests/data/tables/missing_multiple_value_separators.tab create mode 100644 tests/data/tables/missing_multiple_value_separators_extra_field.tab diff --git a/tests/data/tables/missing_last_value.tab b/tests/data/tables/missing_last_value.tab new file mode 100644 index 00000000..e2f48fcb --- /dev/null +++ b/tests/data/tables/missing_last_value.tab @@ -0,0 +1,2 @@ +header1 header2 header3 +stuff1 stuff2 diff --git a/tests/data/tables/missing_multiple_value_separators.tab b/tests/data/tables/missing_multiple_value_separators.tab new file mode 100644 index 00000000..71217d05 --- /dev/null +++ b/tests/data/tables/missing_multiple_value_separators.tab @@ -0,0 +1,2 @@ +header1 header2 header3 header4 +stuff1 stuff2 diff --git a/tests/data/tables/missing_multiple_value_separators_extra_field.tab b/tests/data/tables/missing_multiple_value_separators_extra_field.tab new file mode 100644 index 00000000..74402fd7 --- /dev/null +++ b/tests/data/tables/missing_multiple_value_separators_extra_field.tab @@ -0,0 +1,2 @@ +header1 header2 header3 header4 +stuff1 stuff2 stuff4 diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test index b6fd5c9a..d3421806 100644 --- a/tests/functions/report.nf.test +++ b/tests/functions/report.nf.test @@ -300,4 +300,64 @@ nextflow_function { assert function.result == ['0':['header1':'NoData', 'header2':'NoData', 'header3':'NoData']] } } + + test("Missing last two values"){ + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/missing_last_value.tab") + input[1] = true + input[2] = '\t' + input[3] = null + """ + } + params { + outdir = "results" + } + } + then{ + assert function.success + assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'NoData']] + } + } + + test("Missing multiple terminal value separators"){ + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/missing_multiple_value_separators.tab") + input[1] = true + input[2] = '\t' + input[3] = null + """ + } + params { + outdir = "results" + } + } + then{ + assert function.success + assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'NoData', 'header4': 'NoData']] + } + } + + test("Missing internal value separator"){ + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/missing_multiple_value_separators.tab") + input[1] = true + input[2] = '\t' + input[3] = null + """ + } + params { + outdir = "results" + } + } + then{ + assert function.success + assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'NoData', 'header4': 'stuff4']] + } + } } From 7df332fa40f8a9eb8d036986f56a512b6664f0c9 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Fri, 31 May 2024 16:19:05 -0500 Subject: [PATCH 09/21] Switched to splitCsv --- modules/local/report.nf | 59 +++------------------------------- tests/functions/report.nf.test | 2 ++ 2 files changed, 6 insertions(+), 55 deletions(-) diff --git a/modules/local/report.nf b/modules/local/report.nf index 15a52e3a..32bdcf5f 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -799,64 +799,13 @@ def table_values(file_path, header_p, seperator, headers=null){ returns a map */ - def split_header = null - def split_line = null - def converted_data = [:] - def idx = 0 - def lines_read = false def missing_value = 'NoData' def default_index_col = "__default_index__" - file_path.withReader{ - String line - if(header_p){ - header = it.readLine() - split_header = header.split(seperator) - - def missing_headers = 0 - if(split_header.size() > 1){ - for(col_header in split_header[1..-1]){ // skip first column as it is allowed to be empty - if(col_header == null || col_header == ''){ - missing_headers++; - } - } - } - - if(missing_headers != 0){ - error("Missing multiple column headers in ${file_path}. You may need to pass in column headers in the nextflow.config file.") - } - - if(!split_header[0] && ( split_header.size() == 1 || split_header[1] != default_index_col)){ - // Missing column headers could arise from the first column serving as and index, if this is the case - // verify that the split_split header size is greater == 1 (e.g is it a vector) or that the next column - // value is not equal to the value of "default_index_col" - split_header[0] = default_index_col - } - - } - if(headers){ - split_header = headers - } - while(line = it.readLine()){ - split_line = line.split(seperator) // split will allow for missing values - // Transpose, and collect converts the data to a map - if(split_line.size() > split_header.size()){ - error("The number of values in ${file_path} differs from number of columns headers ${split_header}") - } - - def new_row = [split_header, split_line].transpose().collectEntries() - new_row.each{ - if(!it.value){ - it.value = missing_value; - } - } - converted_data[idx] = new_row - idx++ - lines_read = true - } - if(!lines_read){ - converted_data[idx] = [split_header, Collections.nCopies(split_header.size(), missing_value)].transpose().collectEntries() - } + def replace_missing = { it == null || it == '' ? missing_value : it } + def rows_list = file_path.splitCsv(header: (header_p ? true : headers), sep:seperator) + def converted_data = rows_list.indexed().collectEntries { idx, row -> + ["${idx}": row.collectEntries { k, v -> [(k): replace_missing(v)] }] } return converted_data } diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test index d3421806..5d7ea71c 100644 --- a/tests/functions/report.nf.test +++ b/tests/functions/report.nf.test @@ -95,6 +95,7 @@ nextflow_function { test("Test csv, header supplied"){ + tag "no_missing" when { function { @@ -138,6 +139,7 @@ nextflow_function { test("Test csv, no header"){ + tag "csv_no_header" when { function { From 54f23048d6ef9c4f63525d5d57fe788b613cb9f3 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Fri, 31 May 2024 16:59:39 -0500 Subject: [PATCH 10/21] Added ability to set default index --- modules/local/report.nf | 24 ++++++++++++++++++++---- tests/functions/report.nf.test | 2 ++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/modules/local/report.nf b/modules/local/report.nf index 32bdcf5f..b3dd528a 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -801,14 +801,30 @@ def table_values(file_path, header_p, seperator, headers=null){ */ def missing_value = 'NoData' def default_index_col = "__default_index__" + def rows_list = null def replace_missing = { it == null || it == '' ? missing_value : it } - def rows_list = file_path.splitCsv(header: (header_p ? true : headers), sep:seperator) + try { + rows_list = file_path.splitCsv(header: (header_p ? true : headers), sep:seperator) + } catch (java.lang.IllegalStateException e) { + // Probably not the best solution since messages could change with different versions + // of Nextflow, but there isn't a way to get any more specific exception type + if (header_p && e.getMessage() == "Empty header columns are not allowed in CSV file") { + // Attempt to read file assuming first line is header line with missing value + def header_line = file_path.splitText()[0].trim() + def headers_from_file = header_line.split(seperator) + if (headers_from_file[0] == null || headers_from_file[0] == '') { + headers_from_file[0] = default_index_col + rows_list = file_path.splitCsv(header: headers_from_file as List, sep:seperator, skip: 1) + } else { + throw e + } + } else { + throw e + } + } def converted_data = rows_list.indexed().collectEntries { idx, row -> ["${idx}": row.collectEntries { k, v -> [(k): replace_missing(v)] }] } return converted_data } - - - diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test index 5d7ea71c..a72b14a4 100644 --- a/tests/functions/report.nf.test +++ b/tests/functions/report.nf.test @@ -9,6 +9,7 @@ nextflow_function { function "table_values" test("Test tab missing column value, header supplied"){ + tag "tab_missing_value" when { function { @@ -117,6 +118,7 @@ nextflow_function { } test("Test csv, header missing one value"){ + tag "header_missing_one_value" when { function { From 88a65c674d80b7f9a90722a61fc184930a4aa340 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Mon, 3 Jun 2024 08:01:24 -0500 Subject: [PATCH 11/21] Fixed all values missing test --- tests/data/tables/all_values_missing.csv | 1 + tests/functions/report.nf.test | 3 +++ 2 files changed, 4 insertions(+) diff --git a/tests/data/tables/all_values_missing.csv b/tests/data/tables/all_values_missing.csv index e099d617..519dd61e 100644 --- a/tests/data/tables/all_values_missing.csv +++ b/tests/data/tables/all_values_missing.csv @@ -1 +1,2 @@ header1,header2,header3 +,, diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test index a72b14a4..2996ef64 100644 --- a/tests/functions/report.nf.test +++ b/tests/functions/report.nf.test @@ -184,6 +184,7 @@ nextflow_function { } test("Test csv, vector with header"){ + tag "csv_vector_header" when { function { @@ -286,6 +287,8 @@ nextflow_function { } test("Missing all values"){ + tag "missing_all_values" + when { function { """ From c4a418ffc198f18af0f7e67e6f0fe90a59554b3b Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Mon, 3 Jun 2024 08:06:46 -0500 Subject: [PATCH 12/21] Fixed up internal separator test --- tests/functions/report.nf.test | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test index 2996ef64..480aff0e 100644 --- a/tests/functions/report.nf.test +++ b/tests/functions/report.nf.test @@ -349,6 +349,8 @@ nextflow_function { } test("Missing internal value separator"){ + tag "missing_internal_separator" + when { function { """ @@ -364,7 +366,7 @@ nextflow_function { } then{ assert function.success - assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'NoData', 'header4': 'stuff4']] + assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'NoData', 'header4': 'NoData']] } } } From 4e7226a1a00a793d97c8833a3cfddc0dff6559e5 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Mon, 3 Jun 2024 08:14:56 -0500 Subject: [PATCH 13/21] Fixed default headers issue --- modules/local/report.nf | 4 +--- tests/functions/report.nf.test | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/report.nf b/modules/local/report.nf index b3dd528a..60eda82c 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -807,9 +807,7 @@ def table_values(file_path, header_p, seperator, headers=null){ try { rows_list = file_path.splitCsv(header: (header_p ? true : headers), sep:seperator) } catch (java.lang.IllegalStateException e) { - // Probably not the best solution since messages could change with different versions - // of Nextflow, but there isn't a way to get any more specific exception type - if (header_p && e.getMessage() == "Empty header columns are not allowed in CSV file") { + if (header_p) { // Attempt to read file assuming first line is header line with missing value def header_line = file_path.splitText()[0].trim() def headers_from_file = header_line.split(seperator) diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test index 480aff0e..c110aba5 100644 --- a/tests/functions/report.nf.test +++ b/tests/functions/report.nf.test @@ -247,6 +247,8 @@ nextflow_function { } test("Vector no column header"){ + tag "vector_no_column_header" + when { function { """ From 7b4e1f9bef101768a1448ad6b46c757c6f51954a Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Mon, 3 Jun 2024 08:26:21 -0500 Subject: [PATCH 14/21] Fixed up situation of more than one missing header --- modules/local/report.nf | 9 +++++++-- tests/functions/report.nf.test | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/modules/local/report.nf b/modules/local/report.nf index 60eda82c..097c3c9c 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -802,16 +802,21 @@ def table_values(file_path, header_p, seperator, headers=null){ def missing_value = 'NoData' def default_index_col = "__default_index__" def rows_list = null - def replace_missing = { it == null || it == '' ? missing_value : it } + def is_missing = { it == null || it == '' } + def replace_missing = { is_missing(it) ? missing_value : it } try { rows_list = file_path.splitCsv(header: (header_p ? true : headers), sep:seperator) } catch (java.lang.IllegalStateException e) { + // Catch exception here to deal with situation where the very first header is missing if (header_p) { // Attempt to read file assuming first line is header line with missing value def header_line = file_path.splitText()[0].trim() def headers_from_file = header_line.split(seperator) - if (headers_from_file[0] == null || headers_from_file[0] == '') { + def count_missing_headers = headers_from_file.collect{ is_missing(it) ? 1 : 0 }.sum() + if (count_missing_headers > 1) { + throw e + } else if (is_missing(headers_from_file[0])) { headers_from_file[0] = default_index_col rows_list = file_path.splitCsv(header: headers_from_file as List, sep:seperator, skip: 1) } else { diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test index c110aba5..393d55ea 100644 --- a/tests/functions/report.nf.test +++ b/tests/functions/report.nf.test @@ -164,6 +164,7 @@ nextflow_function { test("Test csv, two headers missing values"){ + tag "two_headers_missing_values" when { function { From 2f840b5ce6b68a48a35374a62040c0038288e291 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Mon, 3 Jun 2024 08:55:40 -0500 Subject: [PATCH 15/21] Fixed up issue with mismatched number of headers and values --- modules/local/report.nf | 12 +++++++++++- tests/functions/report.nf.test | 3 +++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/modules/local/report.nf b/modules/local/report.nf index 097c3c9c..65f1b63f 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -811,8 +811,18 @@ def table_values(file_path, header_p, seperator, headers=null){ // Catch exception here to deal with situation where the very first header is missing if (header_p) { // Attempt to read file assuming first line is header line with missing value - def header_line = file_path.splitText()[0].trim() + def file_lines = file_path.splitText() + def header_line = file_lines[0].trim() + def values_line1 = file_lines[1].trim() def headers_from_file = header_line.split(seperator) + def value1_columns = values_line1.split(seperator) + + // If you pass a list of headers, then splitCsv does not seem to check to make sure + // the list has the same number as the values columns in the file, so I need to check this here + if (headers_from_file.size() != value1_columns.size()) { + throw new java.lang.IllegalStateException("Mismatched number of headers ${headers_from_file} and column values ${value1_columns} for file ${file_path}") + } + def count_missing_headers = headers_from_file.collect{ is_missing(it) ? 1 : 0 }.sum() if (count_missing_headers > 1) { throw e diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test index 393d55ea..90445755 100644 --- a/tests/functions/report.nf.test +++ b/tests/functions/report.nf.test @@ -271,6 +271,8 @@ nextflow_function { test("Missing all column headers"){ + tag "missing_all_column_headers" + when { function { """ @@ -286,6 +288,7 @@ nextflow_function { } then{ assert function.failed + assert function.stdout.any { it.contains("ERROR ~ Mismatched number of headers [] and column values [stuff1, stuff2, stuff3] for file") } } } From df4def6524b958eedf4f4e3b584a1dd98c7a7bc1 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Mon, 3 Jun 2024 10:01:41 -0500 Subject: [PATCH 16/21] Fixed up issue with row index value --- modules/local/report.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/report.nf b/modules/local/report.nf index 65f1b63f..4caa8ca6 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -837,7 +837,7 @@ def table_values(file_path, header_p, seperator, headers=null){ } } def converted_data = rows_list.indexed().collectEntries { idx, row -> - ["${idx}": row.collectEntries { k, v -> [(k): replace_missing(v)] }] + [(idx): row.collectEntries { k, v -> [(k): replace_missing(v)] }] } return converted_data } From 2cbac13461bf038c001612763f7610ff06ecb2ec Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Mon, 3 Jun 2024 10:08:11 -0500 Subject: [PATCH 17/21] Fixed up mismatched text in schema and changelog --- CHANGELOG.md | 19 +++++++++++++------ nextflow_schema.json | 4 ++-- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 94c49409..1e9f65fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,13 +3,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v0.2.1 - [Unreleased] +## [0.2.1] - 2024-06-03 ### `Fixed` -- Parsed table values would not show up properly if values were missing resolving issue [Issue 82](https://github.com/phac-nml/mikrokondo/issues/82) +- Parsed table values would not show up properly if values were missing resolving issue See [PR 83](https://github.com/phac-nml/mikrokondo/pull/83) +- Fixed mismatched description for minimap2 and mash databases. See [PR 83](https://github.com/phac-nml/mikrokondo/pull/83) -## v0.2.0 - [2024-05-14] +## [0.2.0] - 2024-05-14 ### `Added` @@ -43,7 +44,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Updated StarAMR to version 0.10.0. See [PR 74](https://github.com/phac-nml/mikrokondo/pull/74) -## v0.1.2 - [2024-05-02] +## [0.1.2] - 2024-05-02 ### Changed @@ -52,13 +53,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Set `--kraken2_db` to be a required parameter for the pipeline. See [PR 71](https://github.com/phac-nml/mikrokondo/pull/71) - Hide bakta parameters from IRIDA Next UI. See [PR 71](https://github.com/phac-nml/mikrokondo/pull/71) -## v0.1.1 - [2024-04-22] +## [0.1.1] - 2024-04-22 ### Changed - Switched the resource labels for **parse_fastp**, **select_pointfinder**, **report**, and **parse_kat** from `process_low` to `process_single` as they are all configured to run on the local Nextflow machine. See [PR 67](https://github.com/phac-nml/mikrokondo/pull/67) -## v0.1.0 - [2024-03-22] +## [0.1.0] - 2024-03-22 Initial release of phac-nml/mikrokondo. Mikrokondo currently supports: read trimming and quality control, contamination detection, assembly (isolate, metagenomic or hybrid), annotation, AMR detection and subtyping of genomic sequencing data targeting bacterial or metagenomic data. @@ -85,3 +86,9 @@ Initial release of phac-nml/mikrokondo. Mikrokondo currently supports: read trim - Changed salmonella default default coverage to 40 - Added integration testing using [nf-test](https://www.nf-test.com/). + +[0.2.1]: https://github.com/phac-nml/mikrokondo/releases/tag/0.2.1 +[0.2.0]: https://github.com/phac-nml/mikrokondo/releases/tag/0.2.0 +[0.1.2]: https://github.com/phac-nml/mikrokondo/releases/tag/0.1.2 +[0.1.1]: https://github.com/phac-nml/mikrokondo/releases/tag/0.1.1 +[0.1.0]: https://github.com/phac-nml/mikrokondo/releases/tag/0.1.0 diff --git a/nextflow_schema.json b/nextflow_schema.json index 722de4a4..b7864817 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -76,14 +76,14 @@ "properties": { "dehosting_idx": { "type": "string", - "description": "Mash sketch used for contamination detection and speciation (Sketch comments must be a taxonomic string similar to what Kraken2 outputs)", + "description": "Minimpa2 index for dehosting and kitome removal", "pattern": "^\\S+$", "exists": true, "format": "file-path" }, "mash_sketch": { "type": "string", - "description": "Minimpa2 index for dehosting and kitome removal", + "description": "Mash sketch used for contamination detection and speciation (Sketch comments must be a taxonomic string similar to what Kraken2 outputs)", "pattern": "^\\S+$", "exists": true, "format": "file-path" From ac72001aea23aab9329a3be101f0fe330af3099d Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Mon, 3 Jun 2024 15:14:23 -0500 Subject: [PATCH 18/21] Fixed up tests/tabular parsing --- modules/local/report.nf | 77 ++++++++++++++++++++++------------ tests/functions/report.nf.test | 2 +- 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/modules/local/report.nf b/modules/local/report.nf index 4caa8ca6..62912598 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -802,42 +802,67 @@ def table_values(file_path, header_p, seperator, headers=null){ def missing_value = 'NoData' def default_index_col = "__default_index__" def rows_list = null + def use_modified_headers_from_file = false def is_missing = { it == null || it == '' } def replace_missing = { is_missing(it) ? missing_value : it } - try { - rows_list = file_path.splitCsv(header: (header_p ? true : headers), sep:seperator) - } catch (java.lang.IllegalStateException e) { - // Catch exception here to deal with situation where the very first header is missing - if (header_p) { - // Attempt to read file assuming first line is header line with missing value - def file_lines = file_path.splitText() - def header_line = file_lines[0].trim() - def values_line1 = file_lines[1].trim() - def headers_from_file = header_line.split(seperator) - def value1_columns = values_line1.split(seperator) - - // If you pass a list of headers, then splitCsv does not seem to check to make sure - // the list has the same number as the values columns in the file, so I need to check this here - if (headers_from_file.size() != value1_columns.size()) { - throw new java.lang.IllegalStateException("Mismatched number of headers ${headers_from_file} and column values ${value1_columns} for file ${file_path}") + // Reads two lines (up to one header line + one row) for making decisions on how to parse the file + def file_lines = file_path.splitText(limit: 2) + if (!header_p) { + if (file_lines.size() == 0) { + // headers were not in the file, and file size is 0, so return missing data based + // on passed headers + rows_list = headers.collectEntries { [(it): null] } + } else { + // verify that passed headers and rows have same number + def row_line = file_lines[0].replaceAll('(\n|\r\n)$', '') + def row_line_columns = row_line.split(seperator, -1) + if (headers.size() != row_line_columns.size()) { + throw new Exception("Mismatched number of passed headers ${headers} and column values ${row_line_columns} for file ${file_path}") + } else { + rows_list = file_path.splitCsv(header: headers, sep:seperator) + } + } + } else { + // Headers exist in file + + if (file_lines.size() == 0) { + throw new Exception("Attempting to parse empty file [${file_path}] as a table where header_p=${header_p}") + } + + def header_line = file_lines[0].replaceAll('(\n|\r\n)$', '') + def headers_from_file = header_line.split(seperator, -1) + def total_missing_headers = headers_from_file.collect{ is_missing(it) ? 1 : 0 }.sum() + + if (total_missing_headers > 1) { + throw new Exception("Attempting to parse tabular file with more than one missing header: [${file_path}]") + } else if (is_missing(headers_from_file[0])) { + // Case, single missing header as first column + headers_from_file[0] = default_index_col + use_modified_headers_from_file = true + } + + if (file_lines.size() == 1) { + // There is no row lines, only headers, so return missing data + rows_list = headers_from_file.collectEntries { [(it): null] } + } else { + // If there exists a row line, then make sure rows + headers match + + def row_line1 = file_lines[1].replaceAll('(\n|\r\n)$', '') + def row_line1_columns = row_line1.split(seperator, -1) + if (headers_from_file.size() != row_line1_columns.size()) { + throw new java.lang.IllegalStateException("Mismatched number of headers ${headers_from_file} and column values ${row_line1_columns} for file ${file_path}") } - def count_missing_headers = headers_from_file.collect{ is_missing(it) ? 1 : 0 }.sum() - if (count_missing_headers > 1) { - throw e - } else if (is_missing(headers_from_file[0])) { - headers_from_file[0] = default_index_col + if (use_modified_headers_from_file) { rows_list = file_path.splitCsv(header: headers_from_file as List, sep:seperator, skip: 1) } else { - throw e + rows_list = file_path.splitCsv(header: (header_p ? true : headers), sep:seperator) } - } else { - throw e } } - def converted_data = rows_list.indexed().collectEntries { idx, row -> + + return rows_list.indexed().collectEntries { idx, row -> [(idx): row.collectEntries { k, v -> [(k): replace_missing(v)] }] } - return converted_data } diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test index 90445755..cda9544e 100644 --- a/tests/functions/report.nf.test +++ b/tests/functions/report.nf.test @@ -288,7 +288,7 @@ nextflow_function { } then{ assert function.failed - assert function.stdout.any { it.contains("ERROR ~ Mismatched number of headers [] and column values [stuff1, stuff2, stuff3] for file") } + assert function.stdout.any { it.contains("ERROR ~ Mismatched number of headers [__default_index__] and column values [stuff1, stuff2, stuff3] for file") } } } From ef1aa4934e5d3e1b71a5438415fdf8d6a261b076 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Mon, 3 Jun 2024 15:36:46 -0500 Subject: [PATCH 19/21] Fixed up tests --- modules/local/report.nf | 8 +++++--- tests/functions/report.nf.test | 32 +++++++------------------------- 2 files changed, 12 insertions(+), 28 deletions(-) diff --git a/modules/local/report.nf b/modules/local/report.nf index 62912598..d61ae712 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -808,7 +808,9 @@ def table_values(file_path, header_p, seperator, headers=null){ // Reads two lines (up to one header line + one row) for making decisions on how to parse the file def file_lines = file_path.splitText(limit: 2) - if (!header_p) { + if (!header_p && headers == null) { + throw new Exception("Header is not provided in file [header_p=${header_p}], but headers passed to function is null") + } else if (!header_p) { if (file_lines.size() == 0) { // headers were not in the file, and file size is 0, so return missing data based // on passed headers @@ -851,13 +853,13 @@ def table_values(file_path, header_p, seperator, headers=null){ def row_line1 = file_lines[1].replaceAll('(\n|\r\n)$', '') def row_line1_columns = row_line1.split(seperator, -1) if (headers_from_file.size() != row_line1_columns.size()) { - throw new java.lang.IllegalStateException("Mismatched number of headers ${headers_from_file} and column values ${row_line1_columns} for file ${file_path}") + throw new Exception("Mismatched number of headers ${headers_from_file} and column values ${row_line1_columns} for file ${file_path}") } if (use_modified_headers_from_file) { rows_list = file_path.splitCsv(header: headers_from_file as List, sep:seperator, skip: 1) } else { - rows_list = file_path.splitCsv(header: (header_p ? true : headers), sep:seperator) + rows_list = file_path.splitCsv(header: true, sep:seperator) } } } diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test index cda9544e..baae3b66 100644 --- a/tests/functions/report.nf.test +++ b/tests/functions/report.nf.test @@ -315,6 +315,8 @@ nextflow_function { } test("Missing last two values"){ + tag "missing_last_two_values" + when { function { """ @@ -329,33 +331,13 @@ nextflow_function { } } then{ - assert function.success - assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'NoData']] + assert function.failed + assert function.stdout.any { it.contains("Mismatched number of headers [header1, header2, header3] and column values [stuff1, stuff2]") } } } test("Missing multiple terminal value separators"){ - when { - function { - """ - input[0] = file("$baseDir/tests/data/tables/missing_multiple_value_separators.tab") - input[1] = true - input[2] = '\t' - input[3] = null - """ - } - params { - outdir = "results" - } - } - then{ - assert function.success - assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'NoData', 'header4': 'NoData']] - } - } - - test("Missing internal value separator"){ - tag "missing_internal_separator" + tag "missing_multiple_terminal_value_separators" when { function { @@ -371,8 +353,8 @@ nextflow_function { } } then{ - assert function.success - assert function.result == ['0':['header1':'stuff1', 'header2':'stuff2', 'header3':'NoData', 'header4': 'NoData']] + assert function.failed + assert function.stdout.any { it.contains("Mismatched number of headers [header1, header2, header3, header4] and column values [stuff1, stuff2]") } } } } From 0e85c667cd761eb0ff2c759aae72f6adadfabe98 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Mon, 3 Jun 2024 16:10:23 -0500 Subject: [PATCH 20/21] Fixed up structure of returned empty tables --- modules/local/report.nf | 7 ++++--- tests/functions/report.nf.test | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/modules/local/report.nf b/modules/local/report.nf index d61ae712..adf1fe77 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -813,8 +813,8 @@ def table_values(file_path, header_p, seperator, headers=null){ } else if (!header_p) { if (file_lines.size() == 0) { // headers were not in the file, and file size is 0, so return missing data based - // on passed headers - rows_list = headers.collectEntries { [(it): null] } + // on passed headers (i.e., single row of empty values) + rows_list = [headers.collectEntries { [(it): null] }] } else { // verify that passed headers and rows have same number def row_line = file_lines[0].replaceAll('(\n|\r\n)$', '') @@ -846,7 +846,8 @@ def table_values(file_path, header_p, seperator, headers=null){ if (file_lines.size() == 1) { // There is no row lines, only headers, so return missing data - rows_list = headers_from_file.collectEntries { [(it): null] } + // (single row of empty values) + rows_list = [headers_from_file.collectEntries { [(it): null] }] } else { // If there exists a row line, then make sure rows + headers match diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test index baae3b66..2218c7d1 100644 --- a/tests/functions/report.nf.test +++ b/tests/functions/report.nf.test @@ -207,6 +207,7 @@ nextflow_function { } test("Test empty file"){ + tag "test_empty" when { function { @@ -223,9 +224,31 @@ nextflow_function { } then{ assert function.failed + assert function.stdout.any { it.contains("ERROR ~ Attempting to parse empty file") } } } + test("Test empty file pass header"){ + tag "test_empty_pass_header" + + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/empty.csv") + input[1] = false + input[2] = ',' + input[3] = ['header1', 'header2'] + """ + } + params { + outdir = "results" + } + } + then{ + assert function.success + assert function.result == ['0':['header1':'NoData', 'header2':'NoData']] + } + } test("Test more values than columns"){ From e5a75700084f64b6c4ee8bc04507c529432d9790 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Mon, 3 Jun 2024 16:20:01 -0500 Subject: [PATCH 21/21] Added additional test case --- .../missing_all_headers_single_line.csv | 1 + tests/functions/report.nf.test | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 tests/data/tables/missing_all_headers_single_line.csv diff --git a/tests/data/tables/missing_all_headers_single_line.csv b/tests/data/tables/missing_all_headers_single_line.csv new file mode 100644 index 00000000..7b4a1978 --- /dev/null +++ b/tests/data/tables/missing_all_headers_single_line.csv @@ -0,0 +1 @@ +stuff1,stuff2,stuff3 diff --git a/tests/functions/report.nf.test b/tests/functions/report.nf.test index 2218c7d1..1df91d55 100644 --- a/tests/functions/report.nf.test +++ b/tests/functions/report.nf.test @@ -315,6 +315,30 @@ nextflow_function { } } + test("Missing all column headers, only single line"){ + tag "missing_all_column_headers_single_line" + + when { + function { + """ + input[0] = file("$baseDir/tests/data/tables/missing_all_headers_single_line.csv") + input[1] = true + input[2] = ',' + input[3] = null + """ + } + params { + outdir = "results" + } + } + then{ + assert function.success + // When headers are supposed to exist in the file, but only a single line of values + // will assume first line is headers + assert function.result == ['0':['stuff1':'NoData', 'stuff2':'NoData', 'stuff3':'NoData']] + } + } + test("Missing all values"){ tag "missing_all_values"