From 623cf323aa8201fe5c5644f769a301d3f06d2698 Mon Sep 17 00:00:00 2001 From: Victor Payno Date: Mon, 11 Sep 2023 18:17:50 -0700 Subject: [PATCH] awk/etl: 2nd solution --- awk/etl/etl.awk | 65 ++++++++------------ awk/etl/etl_test.awk | 59 +++++++++++++++++++ awk/etl/run-tests-awk.txt | 121 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 203 insertions(+), 42 deletions(-) create mode 100644 awk/etl/etl_test.awk create mode 100644 awk/etl/run-tests-awk.txt diff --git a/awk/etl/etl.awk b/awk/etl/etl.awk index fc1bc860..e0387517 100644 --- a/awk/etl/etl.awk +++ b/awk/etl/etl.awk @@ -1,58 +1,39 @@ #!/usr/bin/gawk --bignum --lint --file -function etl(input) { - uppercase = "" - score = 0 +function etl() { + PROCINFO["sorted_in"] = "@ind_str_asc" - if (length(input) == 0) { - return uppercase "," score + for (i=2; i<=NF; i++) { + key = tolower($i) + score = $1 + scores[key] = score } +} + +# arrays keep getting passed as a scalar, using global +function prettyEtl() { + PROCINFO["sorted_in"] = "@ind_str_asc" - _ = split(input, chars, "") - - for (i=1; i<=length(input); ++i) { - letter = toupper(chars[i]) - - # intentionally using strings with fallthrough cases and single regex cases - switch(letter) { - case /^[AEIOULNRST]$/: - score += 1 - break - case "D": - case "G": - score += 2 - break - case /^[BCMP]$/: - score += 3 - break - case /^[FHVWY]$/: - score += 4 - break - case "K": - score += 5 - break - case /^[JX]$/: - score += 8 - break - case /^[QZ]$/: - score += 10 - break - default: - break - } - - uppercase = uppercase letter + for (key in scores) { + lines = lines key "," scores[key] "\n" } - return uppercase "," score + return lines } BEGIN { + FPAT = "[[:alnum:]]+" } -{ - print etl($0) +# score: keys +#$1 $2 $3 $4 $5 $6 -> $0 +# 1: "A", "E", "I", "O", "U" +NF > 1 { + # uses $0-9 + etl() } END { + # uses scores array + print prettyEtl() } diff --git a/awk/etl/etl_test.awk b/awk/etl/etl_test.awk new file mode 100644 index 00000000..f549218e --- /dev/null +++ b/awk/etl/etl_test.awk @@ -0,0 +1,59 @@ +#!/usr/bin/gawk --bignum --lint --file + +@include "awkunit" +@include "etl" + +passed = 0 +testCount = 0 + +function _debugTestPre() { + printf "Test %s:\n", (passed + 1) + printf " input -> [%s]\n", input +} + +function _debugTestPost() { + passed = passed + 1 + printf " output -> [%s]\n", got + printf " result -> passed\n\n" +} + +function testEtl() { + want = "a,1\ne,1\ni,1\no,1\nu,1\n" + + # _ = split(input, a, " ") + + _debugTestPre() + NF = 6 + $1 = 1 + $2 = "A" + $3 = "E" + $4 = "I" + $5 = "O" + $6 = "U" + etl() # creates scores + got = prettyEtl() # uses scores + + assertEquals(got, want) + _debugTestPost() +} + +BEGIN { + exit 0 +} + +END { + cmd = "grep --no-filename --count ^function\\ test *_test.awk" + cmd | getline testCount + + printf "\nRunning %d tests...\n\n", testCount + + testCount = testCount + length(cases) + + # running tests with a lot of duplication + testEtl() + + print "\n" passed " out of " testCount " tests passed!" + + # add exit here to keep it from looping + exit 0 +} diff --git a/awk/etl/run-tests-awk.txt b/awk/etl/run-tests-awk.txt new file mode 100644 index 00000000..82a40fb1 --- /dev/null +++ b/awk/etl/run-tests-awk.txt @@ -0,0 +1,121 @@ +Running automated test file(s): + + +=============================================================================== + +AWKLIBPATH=/usr/lib/x86_64-linux-gnu/gawk:../.lib + +/usr/lib/x86_64-linux-gnu/gawk +filefuncs.so +fnmatch.so +fork.so +inplace.so +intdiv.so +ordchr.so +readdir.so +readfile.so +revoutput.so +revtwoway.so +rwarray.so +time.so + +../.lib +awkunit.awk +awkunit.so + +gawk --lint --file=./awkunit.awk < /dev/null > /dev/null +gawk: ./awkunit.awk:3: warning: `load' is a gawk extension +gawk: warning: function `assertEquals' defined but never called directly +gawk: warning: function `assert' defined but never called directly +gawk: ./awkunit.awk:26: warning: reference to uninitialized variable `_assert_exit' + +real 0m0.005s +user 0m0.003s +sys 0m0.002s + +gawk --lint --file=./etl.awk < /dev/null > /dev/null +gawk: ./etl.awk:25: warning: `FPAT' is a gawk extension +gawk: ./etl.awk:17: warning: behavior of `for' loop on untyped variable is not defined by POSIX +gawk: ./etl.awk:21: warning: reference to uninitialized variable `lines' + +real 0m0.005s +user 0m0.001s +sys 0m0.003s + +exit 0 + +=============================================================================== + +Running: bats ./test-etl.bats +1..6 +ok 1 single letter +ok 2 single score with multiple letters +ok 3 a score with no letters +ok 4 multiple scores with multiple letters +ok 5 multiple scores with multiple letters, blank lines +ok 6 multiple scores with differing numbers of letters + +real 0m0.367s +user 0m0.276s +sys 0m0.107s + +exit 0 + +=============================================================================== + +AWKLIBPATH=/usr/lib/x86_64-linux-gnu/gawk:../.lib + +/usr/lib/x86_64-linux-gnu/gawk +filefuncs.so +fnmatch.so +fork.so +inplace.so +intdiv.so +ordchr.so +readdir.so +readfile.so +revoutput.so +revtwoway.so +rwarray.so +time.so + +../.lib +awkunit.awk +awkunit.so + +Running: gawk --file ./etl_test.awk && printf \n%s\n Tests Passed! || printf \n%s\n Tests Failed! + + +Running 1 tests... + +Test 1: + input -> [] + output -> [a,1 +e,1 +i,1 +o,1 +u,1 +] + result -> passed + + +1 out of 1 tests passed! + +real 0m0.004s +user 0m0.003s +sys 0m0.001s + +Tests Passed! + +exit 0 + +=============================================================================== + +Running: misspell . + +real 0m0.025s +user 0m0.029s +sys 0m0.009s + +=============================================================================== +