From ccb70263c15a60bc25f3dd5cd89a731c9bca0535 Mon Sep 17 00:00:00 2001
From: Dyson Simmons <dysonsimmons@gmail.com>
Date: Sun, 3 Mar 2024 01:52:28 +1100
Subject: [PATCH] Add --help, --version, and improved errors

---
 README.md                          |   8 +-
 cmd/pipesore/main.go               |  23 ++---
 internal/pipesore/ast.go           |   9 +-
 internal/pipesore/cmd.go           |  60 ++++++++++++
 internal/pipesore/errors.go        |  99 +++++++++++++++++++
 internal/pipesore/executor.go      |  59 ++++++++----
 internal/pipesore/executor_test.go |   2 +-
 internal/pipesore/help.go          |  86 +++++++++++++++++
 internal/pipesore/lexer.go         |  43 ++++++---
 internal/pipesore/lexer_test.go    |  43 ++++-----
 internal/pipesore/parser.go        |  30 +++---
 internal/pipesore/parser_test.go   |   8 +-
 internal/pipesore/token.go         |  25 +++--
 pkg/levenshtein/levenshtein.go     |  33 +++++++
 pkg/pipeline/filters.go            | 149 +++++++++++++++++++++++++----
 pkg/pipeline/filters_test.go       |   4 +
 pkg/pipeline/pipeline.go           |   1 -
 17 files changed, 562 insertions(+), 120 deletions(-)
 create mode 100644 internal/pipesore/cmd.go
 create mode 100644 internal/pipesore/errors.go
 create mode 100644 internal/pipesore/help.go
 create mode 100644 pkg/levenshtein/levenshtein.go
diff --git a/README.md b/README.md
index f5f355c..a490547 100644
--- a/README.md
+++ b/README.md
@@ -68,14 +68,14 @@ first line of the input and return all other lines.
 
 | Filter                                          |         |
 | ------                                          | ------- |
-| Columns(delimiter *string*, columns *string*)   | Returns the selected `columns` in order where `columns` is a 1-indexed comma separated list of column positions. Columns are defined by splitting with the 'delimiter'. |
+| Columns(delimiter *string*, columns *string*)   | Returns the selected `columns` in order where `columns` is a 1-indexed comma separated list of column positions. Columns are defined by splitting with the `delimiter`. |
 | ColumnsCSV(delimiter *string*, columns *string*)| Returns the selected `columns` in order where `columns` is a 1-indexed comma separated list of column positions. Parsing is CSV aware so quoted columns containing the `delimiter` when splitting are preserved. |
 | CountLines()                                    | Returns the line count. Lines are delimited by `\r?\n`. |
 | CountRunes()                                    | Returns the rune (Unicode code points) count. Erroneous and short encodings are treated as single runes of width 1 byte. |
-| CountWords()                                    | Returns the word count. Words are delimited by<br />`\t\|\n\|\v\|\f\|\r\| \|0x85\|0xA0`. |
+| CountWords()                                    | Returns the word count. Words are delimited by `\t\|\n\|\v\|\f\|\r\|&nbsp;\|0x85\|0xA0`. |
 | First(n int)                                    | Returns first `n` lines where `n` is a positive integer. If the input has less than `n` lines, all lines are returned. |
 | !First(n int)                                   | Returns all but the the first `n` lines where `n` is a positive integer. If the input has less than `n` lines, no lines are returned. |
-| Frequency()                                     | Ruturns a descending list containing frequency and unique line. Lines with equal frequency are sorted alphabetically. |
+| Frequency()                                     | Returns a descending list containing frequency and unique line. Lines with equal frequency are sorted alphabetically. |
 | Join(delimiter *string*)                        | Joins all lines together seperated by `delimiter`. |
 | Last(n int)                                     | Returns last `n` lines where `n` is a positive integer. If the input has less than `n` lines, all lines are returned. |
 | !Last(n int)                                    | Returns all but the last `n` lines where `n` is a positive integer. If the input has less than `n` lines, no lines are returned. |
@@ -84,7 +84,7 @@ first line of the input and return all other lines.
 | MatchRegex(regex *string*)                      | Returns all lines that match the compiled regular expression 'regex'. Regex is in the form of [Re2](https://github.com/google/re2/wiki/Syntax). |
 | !MatchRegex(regex *string*)                     | Returns all lines that don't match the compiled regular expression 'regex'. Regex is in the form of [Re2](https://github.com/google/re2/wiki/Syntax). |
 | Replace(old *string*, replace *string*)         | Replaces all non-overlapping instances of `old` with `replace`. |
-| ReplaceRegex(regex *string*, replace *string*)  | Replaces all matches of the compiled regular expression `regex` with `replace`. Inside `replace`, `$` signs represent submatches. For example `$1` represents the text of the first submatch. |
+| ReplaceRegex(regex *string*, replace *string*)  | Replaces all matches of the compiled regular expression `regex` with `replace`. Inside `replace`, `$` signs represent submatches. For example `$1` represents the text of the first submatch. Regex is in the form of [Re2](https://github.com/google/re2/wiki/Syntax). |
 
 ## License
 See [LICENSE](https://github.com/dyson/pipesore/blob/master/LICENSE) file.
diff --git a/cmd/pipesore/main.go b/cmd/pipesore/main.go
index 17f0737..f7d15ff 100644
--- a/cmd/pipesore/main.go
+++ b/cmd/pipesore/main.go
@@ -7,24 +7,17 @@ import (
 	"github.com/dyson/pipesore/internal/pipesore"
 )
 
+var (
+	version = "dev"
+	commit  = "none"
+	date    = "unknown"
+)
+
 func main() {
-	s, err := run()
+	s, err := pipesore.Run(version, commit, date)
 	if err != nil {
-		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		fmt.Fprintf(os.Stderr, "%v\n", err)
 	}
 
 	os.Exit(s)
 }
-
-func run() (int, error) {
-	if len(os.Args) != 2 {
-		return 1, fmt.Errorf("use a single string to define pipeline")
-	}
-
-	err := pipesore.Execute(os.Args[1], os.Stdin, os.Stdout)
-	if err != nil {
-		return 1, fmt.Errorf("error executing pipeline: %w", err)
-	}
-
-	return 0, nil
-}
diff --git a/internal/pipesore/ast.go b/internal/pipesore/ast.go
index 23f65d5..daac5b1 100644
--- a/internal/pipesore/ast.go
+++ b/internal/pipesore/ast.go
@@ -1,20 +1,21 @@
 package pipesore
 
 type ast struct {
-	functions []function
+	filters []filter
 }
 
 func newAST() *ast {
 	return &ast{
-		functions: []function{},
+		filters: []filter{},
 	}
 }
 
-type function struct {
+type filter struct {
 	name      string
 	arguments []any
+	position
 }
 
-func (f function) isNot() bool {
+func (f filter) isNot() bool {
 	return f.name[0:1] == "!"
 }
diff --git a/internal/pipesore/cmd.go b/internal/pipesore/cmd.go
new file mode 100644
index 0000000..eea71d8
--- /dev/null
+++ b/internal/pipesore/cmd.go
@@ -0,0 +1,60 @@
+package pipesore
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/dyson/pipesore/pkg/pipeline"
+)
+
+func Run(version, commit, date string) (int, error) {
+	seeHelp := fmt.Sprintf("See '%s --help'", filepath.Base(os.Args[0]))
+
+	if len(os.Args) != 2 {
+		return 1, fmt.Errorf("error: define a single pipeline or option.\n%s.", seeHelp)
+	}
+
+	input := os.Args[1]
+
+	switch input {
+	case "-h", "--help":
+		printHelp()
+		return 0, nil
+	case "-v", "--version":
+		fmt.Printf("pipesore version %s, commit %s, date %s\n", version, commit, date)
+		return 0, nil
+	case "":
+		return 1, fmt.Errorf("error: no pipeline defined.\n%s.", seeHelp)
+	}
+
+	err := execute(input, os.Stdin, os.Stdout)
+	if err != nil {
+		var syntaxError *syntaxError
+		if errors.As(err, &syntaxError) {
+			return 1, newFormattedError(err, input, syntaxError.position, seeHelp)
+		}
+
+		var filterNameError *filterNameError
+		if errors.As(err, &filterNameError) {
+			if filterNameError.suggestion != "" {
+				definition := pipeline.Filters[filterNameError.suggestion].Definition
+				seeHelp = fmt.Sprintf("Did you mean '%s'?\n%s", definition, seeHelp)
+			}
+
+			return 1, newFormattedError(err, input, filterNameError.position, seeHelp)
+		}
+
+		var filterArgumentError *filterArgumentError
+		if errors.As(err, &filterArgumentError) {
+			help := fmt.Sprintf("%s. %s", pipeline.Filters[filterArgumentError.name].Definition, seeHelp)
+
+			return 1, newFormattedError(err, input, filterArgumentError.position, help)
+		}
+
+		return 1, fmt.Errorf("%w.\n%s.", err, seeHelp)
+	}
+
+	return 0, nil
+}
diff --git a/internal/pipesore/errors.go b/internal/pipesore/errors.go
new file mode 100644
index 0000000..8de2750
--- /dev/null
+++ b/internal/pipesore/errors.go
@@ -0,0 +1,99 @@
+package pipesore
+
+import "fmt"
+
+type syntaxError struct {
+	err error
+	position
+}
+
+func newSyntaxError(err error, position position) *syntaxError {
+	return &syntaxError{
+		err:      err,
+		position: position,
+	}
+}
+
+func (pe *syntaxError) Error() string {
+	return pe.err.Error()
+}
+
+type filterNameError struct {
+	err error
+	position
+	name       string
+	suggestion string
+}
+
+func newFilterNameError(err error, position position, name, suggestion string) *filterNameError {
+	return &filterNameError{
+		err:        err,
+		position:   position,
+		name:       name,
+		suggestion: suggestion,
+	}
+}
+
+func (fne *filterNameError) Error() string {
+	return fne.err.Error()
+}
+
+type filterArgumentError struct {
+	err  error
+	name string
+	position
+}
+
+func newFilterArgumentError(err error, position position, name string) *filterArgumentError {
+	return &filterArgumentError{
+		err:      err,
+		position: position,
+		name:     name,
+	}
+}
+
+func (fne *filterArgumentError) Error() string {
+	return fne.err.Error()
+}
+
+func newFormattedError(err error, input string, position position, help string) error {
+	red := "\x1b[31m"
+	undercurl := "\x1b[4:3m"
+	reset := "\x1b[0m"
+
+	var inputBefore, inputAfter string
+
+	start := position.start
+	end := position.end
+
+	// handle EOF
+	if len(input) == start {
+		input += " "
+	}
+
+	if start > 0 {
+		inputBefore = input[:start]
+	}
+
+	inputError := input[start:end]
+
+	if len(input) > end {
+		inputAfter = input[end:]
+	}
+
+	if help != "" {
+		help = "\n" + help
+	}
+
+	return fmt.Errorf(
+		"%w:\n\t%s%s%s%s%s%s%s.",
+		err,
+		inputBefore,
+		red,
+		undercurl,
+		inputError,
+		reset,
+		inputAfter,
+		help,
+	)
+}
diff --git a/internal/pipesore/executor.go b/internal/pipesore/executor.go
index aff84cc..da81e04 100644
--- a/internal/pipesore/executor.go
+++ b/internal/pipesore/executor.go
@@ -7,10 +7,11 @@ import (
 	"regexp"
 	"strings"
 
+	"github.com/dyson/pipesore/pkg/levenshtein"
 	"github.com/dyson/pipesore/pkg/pipeline"
 )
 
-func Execute(input string, in io.Reader, out io.Writer) error {
+func execute(input string, in io.Reader, out io.Writer) error {
 	tree, err := newParser(newLexer(input)).parse()
 	if err != nil {
 		return fmt.Errorf("error parsing pipeline: %w", err)
@@ -32,22 +33,41 @@ func newExecutor(tree *ast, r io.Reader, w io.Writer) *executor {
 func (e executor) execute() error {
 	p := pipeline.NewPipeline(e.reader)
 
-	for _, inFunction := range e.tree.functions {
-		name := strings.ToLower(inFunction.name)
+	for _, inFilter := range e.tree.filters {
+		name := strings.ToLower(inFilter.name)
 
 		filter, ok := pipeline.Filters[name]
 		if !ok {
-			return fmt.Errorf("unknown function %s()", inFunction.name)
+			lowestScore := len(name)
+			suggestion := ""
+			for f := range pipeline.Filters {
+				distance := levenshtein.Distance(name, f)
+				if distance < lowestScore {
+					lowestScore = distance
+					suggestion = f
+				}
+			}
+
+			return newFilterNameError(
+				fmt.Errorf("error running pipeline: unknown filter '%s()'", inFilter.name),
+				inFilter.position,
+				inFilter.name,
+				suggestion,
+			)
 		}
 
-		filterType := filter.Type()
+		filterType := filter.Value.Type()
 
-		args, err := e.convertArguments(inFunction, filterType)
+		args, err := e.convertArguments(inFilter, filterType)
 		if err != nil {
-			return err
+			return newFilterArgumentError(
+				fmt.Errorf("error running pipeline: %w", err),
+				inFilter.position,
+				name,
+			)
 		}
 
-		p.Filter(filter.Call(args)[0].Interface().(func(io.Reader, io.Writer) error))
+		p.Filter(filter.Value.Call(args)[0].Interface().(func(io.Reader, io.Writer) error))
 	}
 
 	if _, err := p.Output(e.writer); err != nil {
@@ -57,40 +77,45 @@ func (e executor) execute() error {
 	return nil
 }
 
-func (e executor) convertArguments(inFunction function, filterType reflect.Type) ([]reflect.Value, error) {
-	if len(inFunction.arguments) != filterType.NumIn() {
-		return nil, fmt.Errorf("wrong number of arguments in call to %s(): expected %d, got %d", inFunction.name, filterType.NumIn(), len(inFunction.arguments))
+func (e executor) convertArguments(inFilter filter, filterType reflect.Type) ([]reflect.Value, error) {
+	if len(inFilter.arguments) != filterType.NumIn() {
+		argument := "argument"
+		if filterType.NumIn() > 1 {
+			argument += "s"
+		}
+
+		return nil, fmt.Errorf("expected %d %s in call to '%s()', got %d", filterType.NumIn(), argument, inFilter.name, len(inFilter.arguments))
 	}
 
 	args := []reflect.Value{}
 
-	for i := 0; i < len(inFunction.arguments); i++ {
-		inArg := inFunction.arguments[i]
+	for i := 0; i < len(inFilter.arguments); i++ {
+		inArg := inFilter.arguments[i]
 		filterArgType := filterType.In(i)
 
 		switch filterArgType.String() {
 		case "string":
 			if reflect.TypeOf(inArg).String() != "string" {
-				return nil, fmt.Errorf("expected argument %d in call to %s() to be string, got: %v (%T)", i+1, inFunction.name, inArg, inArg)
+				return nil, fmt.Errorf("expected argument %d in call to '%s()' to be a string, got %v (%T)", i+1, inFilter.name, inArg, inArg)
 			}
 
 			args = append(args, reflect.ValueOf(inArg))
 
 		case "int":
 			if reflect.TypeOf(inArg).String() != "int" {
-				return nil, fmt.Errorf("expected argument %d in call to %s() to be int, got: %v (%T)", i+1, inFunction.name, inArg, inArg)
+				return nil, fmt.Errorf("expected argument %d in call to '%s()' to be an int, got %v (%T)", i+1, inFilter.name, inArg, inArg)
 			}
 
 			args = append(args, reflect.ValueOf(inArg))
 
 		case "*regexp.Regexp":
 			if reflect.TypeOf(inArg).String() != "string" {
-				return nil, fmt.Errorf("expected argument %d in call to %s() to be valid regex.Regexp string, got: %v (%T)", i+1, inFunction.name, inArg, inArg)
+				return nil, fmt.Errorf("expected argument %d in call to '%s()' to be a valid regex.Regexp string, got %v (%T)", i+1, inFilter.name, inArg, inArg)
 			}
 
 			re, err := regexp.Compile(inArg.(string))
 			if err != nil {
-				return nil, fmt.Errorf("expected argument %d in call to %s() to be valid regex.Regexp string, got: %v (%T), err: %v", i+1, inFunction.name, inArg, inArg, err)
+				return nil, fmt.Errorf("expected argument %d in call to '%s()' to be a valid regex.Regexp string, got %v (%T), err %v", i+1, inFilter.name, inArg, inArg, err)
 			}
 
 			args = append(args, reflect.ValueOf(re))
diff --git a/internal/pipesore/executor_test.go b/internal/pipesore/executor_test.go
index 9ee1ec0..c9ba489 100644
--- a/internal/pipesore/executor_test.go
+++ b/internal/pipesore/executor_test.go
@@ -16,7 +16,7 @@ func TestExecute(t *testing.T) {
 	want := "4 bird\n"
 	got := &bytes.Buffer{}
 
-	err := Execute(filters, strings.NewReader(input), got)
+	err := execute(filters, strings.NewReader(input), got)
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/internal/pipesore/help.go b/internal/pipesore/help.go
new file mode 100644
index 0000000..fc5dde2
--- /dev/null
+++ b/internal/pipesore/help.go
@@ -0,0 +1,86 @@
+package pipesore
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/dyson/pipesore/pkg/pipeline"
+)
+
+func printHelp() {
+	sb := &strings.Builder{}
+
+	w := func(s string) {
+		wrap(sb, s)
+	}
+
+	w("pipesore - command-line text processor")
+	w("")
+	w("Usage:")
+	w("  pipesore '<filter>[ | <filter>]...'")
+	w("  pipesore [option]")
+	w("")
+	w("Example:")
+	w("  $ echo \"cat cat cat dog bird bird bird bird\" | \\")
+	w("  pipesore 'Replace(\" \", \"\\n\") | Frequency() | First(1)'")
+	w("  4 bird")
+	w("")
+	w("Filters:")
+	w("  All filters can be '|' (piped) together in any order, although not all ordering is logical.")
+	w("")
+	w("  All filter arguments are required. There a no assumptions about default values.")
+	w("")
+	w("  A filter prefixed with an \"!\" will return the opposite result of the non prefixed filter of the same name. For example `First(1)` would return only the first line of the input and `!First(1)` (read as not first) would skip the first line of the input and return all other lines.")
+	w("")
+	w("  ---")
+	w("")
+	for _, name := range pipeline.Filters.GetOrderedNames() {
+		filter := pipeline.Filters[name]
+		w("  " + filter.Definition)
+		w("    " + filter.Description)
+		w("")
+	}
+	w("Options:")
+	w("  -h, --help     show this help message")
+	w("  -v, --version  show pipesore version")
+
+	fmt.Printf(sb.String())
+}
+
+func wrap(sb *strings.Builder, s string) {
+	width := 80
+
+	prefix := ""
+	for i := 0; i < len(s); i++ {
+		if s[i] != ' ' {
+			prefix = s[:i]
+			s = s[i:]
+			break
+		}
+	}
+
+	width = width - len(prefix)
+
+	lastSpace := 0
+	lastBreak := 0
+
+	for i := 0; i < len(s); {
+		if i == lastBreak+width {
+			sb.WriteString(prefix)
+			sb.WriteString(s[lastBreak:lastSpace])
+			sb.WriteString("\n")
+			lastBreak = lastSpace + 1
+			i = lastBreak
+		}
+
+		if s[i] == ' ' {
+			lastSpace = i
+		}
+
+		i++
+	}
+
+	sb.WriteString(prefix)
+	sb.WriteString(s[lastBreak:])
+	sb.WriteString("\n")
+}
diff --git a/internal/pipesore/lexer.go b/internal/pipesore/lexer.go
index 33a06a6..9b62aef 100644
--- a/internal/pipesore/lexer.go
+++ b/internal/pipesore/lexer.go
@@ -4,6 +4,7 @@ import (
 	"errors"
 	"fmt"
 	"strconv"
+	"unicode"
 )
 
 type lexer struct {
@@ -11,8 +12,6 @@ type lexer struct {
 	position int
 }
 
-type filter func(ch byte) bool
-
 func newLexer(input string) *lexer {
 	l := &lexer{input: input}
 	return l
@@ -23,30 +22,41 @@ func (l *lexer) getToken() token {
 	var tl string
 
 	ch := l.getSignificantChar()
-
-	if isFunction(ch) {
+	start := l.position
+	if isFilter(ch) {
 		return token{
-			ttype:   FUNCTION,
-			literal: l.getString(isFunction),
+			ttype:   FILTER,
+			literal: l.getString(isFilter),
+			position: position{
+				start: start,
+				end:   l.position,
+			},
 		}
 	} else if isDigit(ch) {
 		return token{
 			ttype:   INT,
 			literal: l.getString(isDigit),
+			position: position{
+				start: start,
+				end:   l.position,
+			},
 		}
 	} else if isQuote(ch) {
 		str, err := l.getQuotedString()
-
 		if err == nil {
 			tt = STRING
 			tl = str
 		} else {
 			tt = ILLEGAL
-			tl = fmt.Sprintf("%v in %s", err, str)
+			tl = fmt.Sprintf("%v '%s'", err, str)
 		}
 		return token{
 			ttype:   tt,
 			literal: tl,
+			position: position{
+				start: start,
+				end:   l.position,
+			},
 		}
 	}
 
@@ -63,9 +73,20 @@ func (l *lexer) getToken() token {
 		tt = EOF
 	}
 
+	if unicode.IsPrint(rune(ch)) {
+		tl = string(ch)
+	}
+
 	l.position++
 
-	return token{ttype: tt, literal: string(ch)}
+	return token{
+		ttype:   tt,
+		literal: tl,
+		position: position{
+			start: start,
+			end:   l.position,
+		},
+	}
 }
 
 func (l *lexer) getSignificantChar() byte {
@@ -79,7 +100,7 @@ func (l *lexer) getSignificantChar() byte {
 	return ch
 }
 
-func (l *lexer) getString(fn filter) string {
+func (l *lexer) getString(fn func(byte) bool) string {
 	startPosition := l.position
 	l.position++
 
@@ -125,7 +146,7 @@ func isWhitespace(ch byte) bool {
 	return ch == ' '
 }
 
-func isFunction(ch byte) bool {
+func isFilter(ch byte) bool {
 	return ch == '!' || 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z'
 }
 
diff --git a/internal/pipesore/lexer_test.go b/internal/pipesore/lexer_test.go
index 1cef59f..555c872 100644
--- a/internal/pipesore/lexer_test.go
+++ b/internal/pipesore/lexer_test.go
@@ -2,6 +2,7 @@ package pipesore
 
 import (
 	"fmt"
+	"reflect"
 	"testing"
 )
 
@@ -10,31 +11,28 @@ func TestGetToken(t *testing.T) {
 
 	filters := `Replace(" ", "\n") | Freq() | First(1)`
 
-	tests := []struct {
-		wantedType    tokenType
-		wantedLiteral string
-	}{
-		{FUNCTION, "Replace"},
-		{LPAREN, "("},
-		{STRING, " "},
-		{COMMA, ","},
-		{STRING, "\n"},
-		{RPAREN, ")"},
+	tests := []token{
+		{ttype: FILTER, literal: "Replace", position: position{start: 0, end: 7}},
+		{ttype: LPAREN, literal: "(", position: position{start: 7, end: 8}},
+		{ttype: STRING, literal: " ", position: position{start: 8, end: 11}},
+		{ttype: COMMA, literal: ",", position: position{start: 11, end: 12}},
+		{ttype: STRING, literal: "\n", position: position{start: 13, end: 17}},
+		{ttype: RPAREN, literal: ")", position: position{start: 17, end: 18}},
 
-		{PIPE, "|"},
+		{ttype: PIPE, literal: "|", position: position{start: 19, end: 20}},
 
-		{FUNCTION, "Freq"},
-		{LPAREN, "("},
-		{RPAREN, ")"},
+		{ttype: FILTER, literal: "Freq", position: position{start: 21, end: 25}},
+		{ttype: LPAREN, literal: "(", position: position{start: 25, end: 26}},
+		{ttype: RPAREN, literal: ")", position: position{start: 26, end: 27}},
 
-		{PIPE, "|"},
+		{ttype: PIPE, literal: "|", position: position{start: 28, end: 29}},
 
-		{FUNCTION, "First"},
-		{LPAREN, "("},
-		{INT, "1"},
-		{RPAREN, ")"},
+		{ttype: FILTER, literal: "First", position: position{start: 30, end: 35}},
+		{ttype: LPAREN, literal: "(", position: position{start: 35, end: 36}},
+		{ttype: INT, literal: "1", position: position{start: 36, end: 37}},
+		{ttype: RPAREN, literal: ")", position: position{start: 37, end: 38}},
 
-		{EOF, "\000"},
+		{ttype: EOF, literal: "", position: position{start: 38, end: 39}},
 	}
 
 	l := newLexer(filters)
@@ -48,9 +46,8 @@ func TestGetToken(t *testing.T) {
 		t.Run(fmt.Sprint(k), func(t *testing.T) {
 			t.Parallel()
 
-			if got.ttype != tc.wantedType || got.literal != tc.wantedLiteral {
-				t.Logf("tokentype wanted=%q, got=%q", tc.wantedType, got.ttype)
-				t.Fatalf("literal wanted=%q, got=%q", tc.wantedLiteral, got.literal)
+			if !reflect.DeepEqual(tc, got) {
+				t.Fatalf("wanted: %#v, got: %#v", tc, got)
 			}
 		})
 	}
diff --git a/internal/pipesore/parser.go b/internal/pipesore/parser.go
index d172e65..030ae77 100644
--- a/internal/pipesore/parser.go
+++ b/internal/pipesore/parser.go
@@ -22,17 +22,15 @@ func (p *parser) nextToken() *parser {
 func (p *parser) parse() (*ast, error) {
 	tree := newAST()
 
-	if p.nextToken().tokenIsType(EOF) {
-		return tree, nil
-	}
+	p.nextToken()
 
 	for {
-		f, err := p.parseFunction()
+		f, err := p.parseFilter()
 		if err != nil {
 			return nil, err
 		}
 
-		tree.functions = append(tree.functions, *f)
+		tree.filters = append(tree.filters, *f)
 
 		if p.nextToken().tokenIsType(EOF) {
 			break
@@ -65,27 +63,32 @@ func (p *parser) tokenIsTypes(tts ...tokenType) bool {
 
 func (p *parser) tokenMustType(tt tokenType) error {
 	if !p.tokenIsType(tt) {
-		return fmt.Errorf("unexpected %s (%s), expected %s", p.t.ttype, p.t.literal, tt)
+		return newSyntaxError(
+			fmt.Errorf("unexpected %s: expected '%s'", p.t, tt),
+			p.t.position,
+		)
 	}
-
 	return nil
 }
 
 func (p *parser) tokenMustTypes(tts ...tokenType) error {
 	if !p.tokenIsTypes(tts...) {
-		return fmt.Errorf("unexpected %s (%s), expected one of %s", p.t.ttype, p.t.literal, tts)
+		return newSyntaxError(
+			fmt.Errorf("unexpected %s: expected one of '%s'", p.t, tts),
+			p.t.position,
+		)
 	}
 
 	return nil
 }
 
-func (p *parser) parseFunction() (*function, error) {
-	err := p.tokenMustType(FUNCTION)
+func (p *parser) parseFilter() (*filter, error) {
+	err := p.tokenMustType(FILTER)
 	if err != nil {
 		return nil, err
 	}
 
-	name := p.t.literal
+	filterToken := p.t
 
 	err = p.nextToken().tokenMustType(LPAREN)
 	if err != nil {
@@ -102,9 +105,10 @@ func (p *parser) parseFunction() (*function, error) {
 		return nil, err
 	}
 
-	f := function{
-		name:      name,
+	f := filter{
+		name:      filterToken.literal,
 		arguments: args,
+		position:  filterToken.position,
 	}
 
 	return &f, nil
diff --git a/internal/pipesore/parser_test.go b/internal/pipesore/parser_test.go
index 8aa0644..9656c51 100644
--- a/internal/pipesore/parser_test.go
+++ b/internal/pipesore/parser_test.go
@@ -12,10 +12,10 @@ func TestParse(t *testing.T) {
 	filters := `Replace(" ", "\n") | Freq() | First(1)`
 
 	want := &ast{
-		functions: []function{
-			{name: "Replace", arguments: []any{" ", "\n"}},
-			{name: "Freq", arguments: nil},
-			{name: "First", arguments: []any{1}},
+		filters: []filter{
+			{name: "Replace", arguments: []any{" ", "\n"}, position: position{start: 0, end: 7}},
+			{name: "Freq", arguments: nil, position: position{start: 21, end: 25}},
+			{name: "First", arguments: []any{1}, position: position{start: 30, end: 35}},
 		},
 	}
 
diff --git a/internal/pipesore/token.go b/internal/pipesore/token.go
index d55f692..7e8f0ce 100644
--- a/internal/pipesore/token.go
+++ b/internal/pipesore/token.go
@@ -6,9 +6,9 @@ const (
 	ILLEGAL = iota
 	EOF
 
-	FUNCTION // First
-	INT      // 1234
-	STRING   // hello, world!
+	FILTER // First
+	INT    // 1234
+	STRING // hello, world!
 
 	QUOTE // "
 
@@ -23,9 +23,9 @@ var tokens = [...]string{
 	ILLEGAL: "ILLEGAL",
 	EOF:     "EOF",
 
-	FUNCTION: "FUNCTION",
-	INT:      "INT",
-	STRING:   "STRING",
+	FILTER: "FILTER",
+	INT:    "INT",
+	STRING: "STRING",
 
 	QUOTE: "\"",
 
@@ -42,11 +42,22 @@ func (t tokenType) String() string {
 	return tokens[t]
 }
 
+type position struct {
+	start int
+	end   int
+}
+
 type token struct {
 	ttype   tokenType
 	literal string
+	position
 }
 
 func (t token) String() string {
-	return fmt.Sprintf("{%s %v}", t.ttype, t.literal)
+	s := fmt.Sprintf("'%s'", t.ttype)
+	if t.literal != "" && t.literal != t.ttype.String() {
+		s += fmt.Sprintf(" (%s)", t.literal)
+	}
+
+	return s
 }
diff --git a/pkg/levenshtein/levenshtein.go b/pkg/levenshtein/levenshtein.go
new file mode 100644
index 0000000..89d207c
--- /dev/null
+++ b/pkg/levenshtein/levenshtein.go
@@ -0,0 +1,33 @@
+package levenshtein
+
+// https://rosettacode.org/wiki/Levenshtein_distance#Go
+func Distance(s, t string) int {
+	d := make([][]int, len(s)+1)
+	for i := range d {
+		d[i] = make([]int, len(t)+1)
+	}
+	for i := range d {
+		d[i][0] = i
+	}
+	for j := range d[0] {
+		d[0][j] = j
+	}
+	for j := 1; j <= len(t); j++ {
+		for i := 1; i <= len(s); i++ {
+			if s[i-1] == t[j-1] {
+				d[i][j] = d[i-1][j-1]
+			} else {
+				min := d[i-1][j]
+				if d[i][j-1] < min {
+					min = d[i][j-1]
+				}
+				if d[i-1][j-1] < min {
+					min = d[i-1][j-1]
+				}
+				d[i][j] = min + 1
+			}
+		}
+
+	}
+	return d[len(s)][len(t)]
+}
diff --git a/pkg/pipeline/filters.go b/pkg/pipeline/filters.go
index 8da1635..665ab52 100644
--- a/pkg/pipeline/filters.go
+++ b/pkg/pipeline/filters.go
@@ -14,25 +14,136 @@ import (
 	"unicode/utf8"
 )
 
+type filters map[string]filter
+
+func (f filters) GetOrderedNames() []string {
+	names := []string{}
+	for name := range f {
+		names = append(names, name)
+	}
+
+	sort.SliceStable(names, func(i, j int) bool {
+		iName := names[i]
+		iNot := false
+
+		jName := names[j]
+
+		if iName[0] == '!' {
+			iName = iName[1:]
+			iNot = true
+		}
+
+		if jName[0] == '!' {
+			jName = jName[1:]
+		}
+
+		if iName != jName {
+			return iName < jName
+		}
+
+		if iNot == true {
+			return false
+		}
+
+		return true
+	})
+
+	return names
+}
+
+type filter struct {
+	Value       reflect.Value
+	Definition  string
+	Description string
+}
+
 var (
-	Filters = map[string]reflect.Value{
-		"columns":      reflect.ValueOf(Columns),
-		"columnscsv":   reflect.ValueOf(ColumnsCSV),
-		"countlines":   reflect.ValueOf(CountLines),
-		"countrunes":   reflect.ValueOf(CountRunes),
-		"countwords":   reflect.ValueOf(CountWords),
-		"first":        reflect.ValueOf(First),
-		"!first":       reflect.ValueOf(NotFirst),
-		"frequency":    reflect.ValueOf(Frequency),
-		"join":         reflect.ValueOf(Join),
-		"last":         reflect.ValueOf(Last),
-		"!last":        reflect.ValueOf(NotLast),
-		"match":        reflect.ValueOf(Match),
-		"!match":       reflect.ValueOf(NotMatch),
-		"matchregex":   reflect.ValueOf(MatchRegex),
-		"!matchregex":  reflect.ValueOf(NotMatchRegex),
-		"replace":      reflect.ValueOf(Replace),
-		"replaceregex": reflect.ValueOf(ReplaceRegex),
+	Filters = filters{
+		"columns": {
+			reflect.ValueOf(Columns),
+			"Columns(delimiter string, columns string)",
+			"Returns the selected `columns` in order where `columns is a 1-indexed comma separated list of column positions. Columns are defined by splitting with the `delimiter`.",
+		},
+		"columnscsv": {
+			reflect.ValueOf(ColumnsCSV),
+			"ColumnsCSV(delimiter string, columns string)",
+			"Returns the selected `columns` in order where `columns` is a 1-indexed comma separated list of column positions. Parsing is CSV aware so quoted columns containing the `delimiter` when splitting are preserved.",
+		},
+		"countlines": {
+			reflect.ValueOf(CountLines),
+			"CountLines()",
+			"Returns the line count. Lines are delimited by `\\r\\n`.",
+		},
+		"countrunes": {
+			reflect.ValueOf(CountRunes),
+			"CountRunes()",
+			"Returns the rune (Unicode code points) count. Erroneous and short encodings are treated as single runes of width 1 byte.",
+		},
+		"countwords": {
+			reflect.ValueOf(CountWords),
+			"CountWords()",
+			"Returns the word count. Words are delimited by `\\t|\\n|\\v|\\f|\\r|\u00A0|0x85|0xA0`.",
+		},
+		"first": {
+			reflect.ValueOf(First),
+			"First(n int)",
+			"Returns first `n` lines where `n` is a positive integer. If the input has less than `n` lines, all lines are returned.",
+		},
+		"!first": {
+			reflect.ValueOf(NotFirst),
+			"!First(n int)",
+			"Returns all but the the first `n` lines where `n` is a positive integer. If the input has less than `n` lines, no lines are returned.",
+		},
+		"frequency": {
+			reflect.ValueOf(Frequency),
+			"Frequency()",
+			"Returns a descending list containing frequency and unique line. Lines with equal frequency are sorted alphabetically.",
+		},
+		"join": {
+			reflect.ValueOf(Join),
+			"Join(delimiter string)",
+			"Joins all lines together seperated by `delimiter`.",
+		},
+		"last": {
+			reflect.ValueOf(Last),
+			"Last(n int)",
+			"Returns last `n` lines where `n` is a positive integer. If the input has less than `n` lines, all lines are returned.",
+		},
+		"!last": {
+			reflect.ValueOf(NotLast),
+			"!Last(n int)",
+			"Returns all but the last `n` lines where `n` is a positive integer. If the input has less than `n` lines, no lines are returned.",
+		},
+		"match": {
+			reflect.ValueOf(Match),
+			"Match(substring string)",
+			"Returns all lines that contain `substring`.",
+		},
+		"!match": {
+			reflect.ValueOf(NotMatch),
+			"!Match(substring string)",
+			"Returns all lines that don't contain `substring`.",
+		},
+		"matchregex": {
+			reflect.ValueOf(MatchRegex),
+			"MatchRegex(regex string)",
+			"Returns all lines that match the compiled regular expression 'regex'. Regex is in the form of Re2 (https://github.com/google/re2/wiki/Syntax).",
+		},
+		"!matchregex": {
+			reflect.ValueOf(NotMatchRegex),
+			"!MatchRegex(regex string)",
+			"Returns all lines that don't match the compiled regular expression 'regex'. Regex is in the form of Re2 (https://github.com/google/re2/wiki/Syntax).",
+		},
+		"replace": {
+			reflect.ValueOf(Replace),
+			"Replace(old string, replace string)",
+			"Replaces all non-overlapping instances of `old` with `replace`.",
+		},
+		"replaceregex": {
+			reflect.ValueOf(ReplaceRegex),
+			"ReplaceRegex(regex string, replace string)",
+			"Replaces all matches of the compiled regular expression `regex` with `replace`. Inside `replace`, `$` signs represent submatches. For example `$1` represents the text of the first submatch. Regex is in the form of Re2 (https://github.com/google/re2/wiki/Syntax).",
+		},
 	}
 )
 
@@ -429,8 +540,6 @@ func ReplaceRegex(regex *regexp.Regexp, replace string) func(io.Reader, io.Write
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 // SOFTWARE.
 
-// MIT License
-
 // Frequency returns a filter that writes unique lines from the input, prefixed
 // with a frequency count, in descending numerical order (most frequent lines
 // first). Lines with equal frequency will be sorted alphabetically.
diff --git a/pkg/pipeline/filters_test.go b/pkg/pipeline/filters_test.go
index 81cb4b2..2746737 100644
--- a/pkg/pipeline/filters_test.go
+++ b/pkg/pipeline/filters_test.go
@@ -67,6 +67,10 @@ func TestFilters(t *testing.T) {
 	}
 
 	for k, tc := range tests {
+		// TODO: remove shadowing once using go v1.22
+		k := k
+		tc := tc
+
 		t.Run(fmt.Sprint(k), func(t *testing.T) {
 			t.Parallel()
 
diff --git a/pkg/pipeline/pipeline.go b/pkg/pipeline/pipeline.go
index b421b6e..96b0d3b 100644
--- a/pkg/pipeline/pipeline.go
+++ b/pkg/pipeline/pipeline.go
@@ -33,7 +33,6 @@ func (p *pipeline) SetError(err error) {
 // the filter errors the error is set on the pipeline.
 func (p *pipeline) Filter(filter func(io.Reader, io.Writer) error) {
 	r := p.r
-
 	pr, pw := io.Pipe()
 
 	go func() {