Skip to content

Commit

Permalink
awk/etl: 2nd solution
Browse files Browse the repository at this point in the history
  • Loading branch information
vpayno committed Sep 12, 2023
1 parent ec32680 commit 623cf32
Show file tree
Hide file tree
Showing 3 changed files with 203 additions and 42 deletions.
65 changes: 23 additions & 42 deletions awk/etl/etl.awk
Original file line number Diff line number Diff line change
@@ -1,58 +1,39 @@
#!/usr/bin/gawk --bignum --lint --file

function etl(input) {
uppercase = ""
score = 0
function etl() {
PROCINFO["sorted_in"] = "@ind_str_asc"

if (length(input) == 0) {
return uppercase "," score
for (i=2; i<=NF; i++) {
key = tolower($i)
score = $1
scores[key] = score
}
}

# arrays keep getting passed as a scalar, using global
function prettyEtl() {
PROCINFO["sorted_in"] = "@ind_str_asc"

_ = split(input, chars, "")

for (i=1; i<=length(input); ++i) {
letter = toupper(chars[i])

# intentionally using strings with fallthrough cases and single regex cases
switch(letter) {
case /^[AEIOULNRST]$/:
score += 1
break
case "D":
case "G":
score += 2
break
case /^[BCMP]$/:
score += 3
break
case /^[FHVWY]$/:
score += 4
break
case "K":
score += 5
break
case /^[JX]$/:
score += 8
break
case /^[QZ]$/:
score += 10
break
default:
break
}

uppercase = uppercase letter
for (key in scores) {
lines = lines key "," scores[key] "\n"
}

return uppercase "," score
return lines
}

BEGIN {
FPAT = "[[:alnum:]]+"
}

{
print etl($0)
# score: keys
#$1 $2 $3 $4 $5 $6 -> $0
# 1: "A", "E", "I", "O", "U"
NF > 1 {
# uses $0-9
etl()
}

END {
# uses scores array
print prettyEtl()
}
59 changes: 59 additions & 0 deletions awk/etl/etl_test.awk
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/gawk --bignum --lint --file

@include "awkunit"
@include "etl"

passed = 0
testCount = 0

function _debugTestPre() {
printf "Test %s:\n", (passed + 1)
printf " input -> [%s]\n", input
}

function _debugTestPost() {
passed = passed + 1
printf " output -> [%s]\n", got
printf " result -> passed\n\n"
}

function testEtl() {
want = "a,1\ne,1\ni,1\no,1\nu,1\n"

# _ = split(input, a, " ")

_debugTestPre()
NF = 6
$1 = 1
$2 = "A"
$3 = "E"
$4 = "I"
$5 = "O"
$6 = "U"
etl() # creates scores
got = prettyEtl() # uses scores

assertEquals(got, want)
_debugTestPost()
}

BEGIN {
exit 0
}

END {
cmd = "grep --no-filename --count ^function\\ test *_test.awk"
cmd | getline testCount

printf "\nRunning %d tests...\n\n", testCount

testCount = testCount + length(cases)

# running tests with a lot of duplication
testEtl()

print "\n" passed " out of " testCount " tests passed!"

# add exit here to keep it from looping
exit 0
}
121 changes: 121 additions & 0 deletions awk/etl/run-tests-awk.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
Running automated test file(s):


===============================================================================

AWKLIBPATH=/usr/lib/x86_64-linux-gnu/gawk:../.lib

/usr/lib/x86_64-linux-gnu/gawk
filefuncs.so
fnmatch.so
fork.so
inplace.so
intdiv.so
ordchr.so
readdir.so
readfile.so
revoutput.so
revtwoway.so
rwarray.so
time.so

../.lib
awkunit.awk
awkunit.so

gawk --lint --file=./awkunit.awk < /dev/null > /dev/null
gawk: ./awkunit.awk:3: warning: `load' is a gawk extension
gawk: warning: function `assertEquals' defined but never called directly
gawk: warning: function `assert' defined but never called directly
gawk: ./awkunit.awk:26: warning: reference to uninitialized variable `_assert_exit'

real 0m0.005s
user 0m0.003s
sys 0m0.002s

gawk --lint --file=./etl.awk < /dev/null > /dev/null
gawk: ./etl.awk:25: warning: `FPAT' is a gawk extension
gawk: ./etl.awk:17: warning: behavior of `for' loop on untyped variable is not defined by POSIX
gawk: ./etl.awk:21: warning: reference to uninitialized variable `lines'

real 0m0.005s
user 0m0.001s
sys 0m0.003s

exit 0

===============================================================================

Running: bats ./test-etl.bats
1..6
ok 1 single letter
ok 2 single score with multiple letters
ok 3 a score with no letters
ok 4 multiple scores with multiple letters
ok 5 multiple scores with multiple letters, blank lines
ok 6 multiple scores with differing numbers of letters

real 0m0.367s
user 0m0.276s
sys 0m0.107s

exit 0

===============================================================================

AWKLIBPATH=/usr/lib/x86_64-linux-gnu/gawk:../.lib

/usr/lib/x86_64-linux-gnu/gawk
filefuncs.so
fnmatch.so
fork.so
inplace.so
intdiv.so
ordchr.so
readdir.so
readfile.so
revoutput.so
revtwoway.so
rwarray.so
time.so

../.lib
awkunit.awk
awkunit.so

Running: gawk --file ./etl_test.awk && printf \n%s\n Tests Passed! || printf \n%s\n Tests Failed!


Running 1 tests...

Test 1:
input -> []
output -> [a,1
e,1
i,1
o,1
u,1
]
result -> passed


1 out of 1 tests passed!

real 0m0.004s
user 0m0.003s
sys 0m0.001s

Tests Passed!

exit 0

===============================================================================

Running: misspell .

real 0m0.025s
user 0m0.029s
sys 0m0.009s

===============================================================================

0 comments on commit 623cf32

Please sign in to comment.