From ccb70263c15a60bc25f3dd5cd89a731c9bca0535 Mon Sep 17 00:00:00 2001 From: Dyson Simmons Date: Sun, 3 Mar 2024 01:52:28 +1100 Subject: [PATCH] Add --help, --version, and improved errors --- README.md | 8 +- cmd/pipesore/main.go | 23 ++--- internal/pipesore/ast.go | 9 +- internal/pipesore/cmd.go | 60 ++++++++++++ internal/pipesore/errors.go | 99 +++++++++++++++++++ internal/pipesore/executor.go | 59 ++++++++---- internal/pipesore/executor_test.go | 2 +- internal/pipesore/help.go | 86 +++++++++++++++++ internal/pipesore/lexer.go | 43 ++++++--- internal/pipesore/lexer_test.go | 43 ++++----- internal/pipesore/parser.go | 30 +++--- internal/pipesore/parser_test.go | 8 +- internal/pipesore/token.go | 25 +++-- pkg/levenshtein/levenshtein.go | 33 +++++++ pkg/pipeline/filters.go | 149 +++++++++++++++++++++++++---- pkg/pipeline/filters_test.go | 4 + pkg/pipeline/pipeline.go | 1 - 17 files changed, 562 insertions(+), 120 deletions(-) create mode 100644 internal/pipesore/cmd.go create mode 100644 internal/pipesore/errors.go create mode 100644 internal/pipesore/help.go create mode 100644 pkg/levenshtein/levenshtein.go diff --git a/README.md b/README.md index f5f355c..a490547 100644 --- a/README.md +++ b/README.md @@ -68,14 +68,14 @@ first line of the input and return all other lines. | Filter | | | ------ | ------- | -| Columns(delimiter *string*, columns *string*) | Returns the selected `columns` in order where `columns` is a 1-indexed comma separated list of column positions. Columns are defined by splitting with the 'delimiter'. | +| Columns(delimiter *string*, columns *string*) | Returns the selected `columns` in order where `columns` is a 1-indexed comma separated list of column positions. Columns are defined by splitting with the `delimiter`. | | ColumnsCSV(delimiter *string*, columns *string*)| Returns the selected `columns` in order where `columns` is a 1-indexed comma separated list of column positions. Parsing is CSV aware so quoted columns containing the `delimiter` when splitting are preserved. | | CountLines() | Returns the line count. Lines are delimited by `\r?\n`. | | CountRunes() | Returns the rune (Unicode code points) count. Erroneous and short encodings are treated as single runes of width 1 byte. | -| CountWords() | Returns the word count. Words are delimited by
`\t\|\n\|\v\|\f\|\r\| \|0x85\|0xA0`. | +| CountWords() | Returns the word count. Words are delimited by `\t\|\n\|\v\|\f\|\r\| \|0x85\|0xA0`. | | First(n int) | Returns first `n` lines where `n` is a positive integer. If the input has less than `n` lines, all lines are returned. | | !First(n int) | Returns all but the the first `n` lines where `n` is a positive integer. If the input has less than `n` lines, no lines are returned. | -| Frequency() | Ruturns a descending list containing frequency and unique line. Lines with equal frequency are sorted alphabetically. | +| Frequency() | Returns a descending list containing frequency and unique line. Lines with equal frequency are sorted alphabetically. | | Join(delimiter *string*) | Joins all lines together seperated by `delimiter`. | | Last(n int) | Returns last `n` lines where `n` is a positive integer. If the input has less than `n` lines, all lines are returned. | | !Last(n int) | Returns all but the last `n` lines where `n` is a positive integer. If the input has less than `n` lines, no lines are returned. | @@ -84,7 +84,7 @@ first line of the input and return all other lines. | MatchRegex(regex *string*) | Returns all lines that match the compiled regular expression 'regex'. Regex is in the form of [Re2](https://github.com/google/re2/wiki/Syntax). | | !MatchRegex(regex *string*) | Returns all lines that don't match the compiled regular expression 'regex'. Regex is in the form of [Re2](https://github.com/google/re2/wiki/Syntax). | | Replace(old *string*, replace *string*) | Replaces all non-overlapping instances of `old` with `replace`. | -| ReplaceRegex(regex *string*, replace *string*) | Replaces all matches of the compiled regular expression `regex` with `replace`. Inside `replace`, `$` signs represent submatches. For example `$1` represents the text of the first submatch. | +| ReplaceRegex(regex *string*, replace *string*) | Replaces all matches of the compiled regular expression `regex` with `replace`. Inside `replace`, `$` signs represent submatches. For example `$1` represents the text of the first submatch. Regex is in the form of [Re2](https://github.com/google/re2/wiki/Syntax). | ## License See [LICENSE](https://github.com/dyson/pipesore/blob/master/LICENSE) file. diff --git a/cmd/pipesore/main.go b/cmd/pipesore/main.go index 17f0737..f7d15ff 100644 --- a/cmd/pipesore/main.go +++ b/cmd/pipesore/main.go @@ -7,24 +7,17 @@ import ( "github.com/dyson/pipesore/internal/pipesore" ) +var ( + version = "dev" + commit = "none" + date = "unknown" +) + func main() { - s, err := run() + s, err := pipesore.Run(version, commit, date) if err != nil { - fmt.Fprintf(os.Stderr, "error: %v\n", err) + fmt.Fprintf(os.Stderr, "%v\n", err) } os.Exit(s) } - -func run() (int, error) { - if len(os.Args) != 2 { - return 1, fmt.Errorf("use a single string to define pipeline") - } - - err := pipesore.Execute(os.Args[1], os.Stdin, os.Stdout) - if err != nil { - return 1, fmt.Errorf("error executing pipeline: %w", err) - } - - return 0, nil -} diff --git a/internal/pipesore/ast.go b/internal/pipesore/ast.go index 23f65d5..daac5b1 100644 --- a/internal/pipesore/ast.go +++ b/internal/pipesore/ast.go @@ -1,20 +1,21 @@ package pipesore type ast struct { - functions []function + filters []filter } func newAST() *ast { return &ast{ - functions: []function{}, + filters: []filter{}, } } -type function struct { +type filter struct { name string arguments []any + position } -func (f function) isNot() bool { +func (f filter) isNot() bool { return f.name[0:1] == "!" } diff --git a/internal/pipesore/cmd.go b/internal/pipesore/cmd.go new file mode 100644 index 0000000..eea71d8 --- /dev/null +++ b/internal/pipesore/cmd.go @@ -0,0 +1,60 @@ +package pipesore + +import ( + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/dyson/pipesore/pkg/pipeline" +) + +func Run(version, commit, date string) (int, error) { + seeHelp := fmt.Sprintf("See '%s --help'", filepath.Base(os.Args[0])) + + if len(os.Args) != 2 { + return 1, fmt.Errorf("error: define a single pipeline or option.\n%s.", seeHelp) + } + + input := os.Args[1] + + switch input { + case "-h", "--help": + printHelp() + return 0, nil + case "-v", "--version": + fmt.Printf("pipesore version %s, commit %s, date %s\n", version, commit, date) + return 0, nil + case "": + return 1, fmt.Errorf("error: no pipeline defined.\n%s.", seeHelp) + } + + err := execute(input, os.Stdin, os.Stdout) + if err != nil { + var syntaxError *syntaxError + if errors.As(err, &syntaxError) { + return 1, newFormattedError(err, input, syntaxError.position, seeHelp) + } + + var filterNameError *filterNameError + if errors.As(err, &filterNameError) { + if filterNameError.suggestion != "" { + definition := pipeline.Filters[filterNameError.suggestion].Definition + seeHelp = fmt.Sprintf("Did you mean '%s'?\n%s", definition, seeHelp) + } + + return 1, newFormattedError(err, input, filterNameError.position, seeHelp) + } + + var filterArgumentError *filterArgumentError + if errors.As(err, &filterArgumentError) { + help := fmt.Sprintf("%s. %s", pipeline.Filters[filterArgumentError.name].Definition, seeHelp) + + return 1, newFormattedError(err, input, filterArgumentError.position, help) + } + + return 1, fmt.Errorf("%w.\n%s.", err, seeHelp) + } + + return 0, nil +} diff --git a/internal/pipesore/errors.go b/internal/pipesore/errors.go new file mode 100644 index 0000000..8de2750 --- /dev/null +++ b/internal/pipesore/errors.go @@ -0,0 +1,99 @@ +package pipesore + +import "fmt" + +type syntaxError struct { + err error + position +} + +func newSyntaxError(err error, position position) *syntaxError { + return &syntaxError{ + err: err, + position: position, + } +} + +func (pe *syntaxError) Error() string { + return pe.err.Error() +} + +type filterNameError struct { + err error + position + name string + suggestion string +} + +func newFilterNameError(err error, position position, name, suggestion string) *filterNameError { + return &filterNameError{ + err: err, + position: position, + name: name, + suggestion: suggestion, + } +} + +func (fne *filterNameError) Error() string { + return fne.err.Error() +} + +type filterArgumentError struct { + err error + name string + position +} + +func newFilterArgumentError(err error, position position, name string) *filterArgumentError { + return &filterArgumentError{ + err: err, + position: position, + name: name, + } +} + +func (fne *filterArgumentError) Error() string { + return fne.err.Error() +} + +func newFormattedError(err error, input string, position position, help string) error { + red := "\x1b[31m" + undercurl := "\x1b[4:3m" + reset := "\x1b[0m" + + var inputBefore, inputAfter string + + start := position.start + end := position.end + + // handle EOF + if len(input) == start { + input += " " + } + + if start > 0 { + inputBefore = input[:start] + } + + inputError := input[start:end] + + if len(input) > end { + inputAfter = input[end:] + } + + if help != "" { + help = "\n" + help + } + + return fmt.Errorf( + "%w:\n\t%s%s%s%s%s%s%s.", + err, + inputBefore, + red, + undercurl, + inputError, + reset, + inputAfter, + help, + ) +} diff --git a/internal/pipesore/executor.go b/internal/pipesore/executor.go index aff84cc..da81e04 100644 --- a/internal/pipesore/executor.go +++ b/internal/pipesore/executor.go @@ -7,10 +7,11 @@ import ( "regexp" "strings" + "github.com/dyson/pipesore/pkg/levenshtein" "github.com/dyson/pipesore/pkg/pipeline" ) -func Execute(input string, in io.Reader, out io.Writer) error { +func execute(input string, in io.Reader, out io.Writer) error { tree, err := newParser(newLexer(input)).parse() if err != nil { return fmt.Errorf("error parsing pipeline: %w", err) @@ -32,22 +33,41 @@ func newExecutor(tree *ast, r io.Reader, w io.Writer) *executor { func (e executor) execute() error { p := pipeline.NewPipeline(e.reader) - for _, inFunction := range e.tree.functions { - name := strings.ToLower(inFunction.name) + for _, inFilter := range e.tree.filters { + name := strings.ToLower(inFilter.name) filter, ok := pipeline.Filters[name] if !ok { - return fmt.Errorf("unknown function %s()", inFunction.name) + lowestScore := len(name) + suggestion := "" + for f := range pipeline.Filters { + distance := levenshtein.Distance(name, f) + if distance < lowestScore { + lowestScore = distance + suggestion = f + } + } + + return newFilterNameError( + fmt.Errorf("error running pipeline: unknown filter '%s()'", inFilter.name), + inFilter.position, + inFilter.name, + suggestion, + ) } - filterType := filter.Type() + filterType := filter.Value.Type() - args, err := e.convertArguments(inFunction, filterType) + args, err := e.convertArguments(inFilter, filterType) if err != nil { - return err + return newFilterArgumentError( + fmt.Errorf("error running pipeline: %w", err), + inFilter.position, + name, + ) } - p.Filter(filter.Call(args)[0].Interface().(func(io.Reader, io.Writer) error)) + p.Filter(filter.Value.Call(args)[0].Interface().(func(io.Reader, io.Writer) error)) } if _, err := p.Output(e.writer); err != nil { @@ -57,40 +77,45 @@ func (e executor) execute() error { return nil } -func (e executor) convertArguments(inFunction function, filterType reflect.Type) ([]reflect.Value, error) { - if len(inFunction.arguments) != filterType.NumIn() { - return nil, fmt.Errorf("wrong number of arguments in call to %s(): expected %d, got %d", inFunction.name, filterType.NumIn(), len(inFunction.arguments)) +func (e executor) convertArguments(inFilter filter, filterType reflect.Type) ([]reflect.Value, error) { + if len(inFilter.arguments) != filterType.NumIn() { + argument := "argument" + if filterType.NumIn() > 1 { + argument += "s" + } + + return nil, fmt.Errorf("expected %d %s in call to '%s()', got %d", filterType.NumIn(), argument, inFilter.name, len(inFilter.arguments)) } args := []reflect.Value{} - for i := 0; i < len(inFunction.arguments); i++ { - inArg := inFunction.arguments[i] + for i := 0; i < len(inFilter.arguments); i++ { + inArg := inFilter.arguments[i] filterArgType := filterType.In(i) switch filterArgType.String() { case "string": if reflect.TypeOf(inArg).String() != "string" { - return nil, fmt.Errorf("expected argument %d in call to %s() to be string, got: %v (%T)", i+1, inFunction.name, inArg, inArg) + return nil, fmt.Errorf("expected argument %d in call to '%s()' to be a string, got %v (%T)", i+1, inFilter.name, inArg, inArg) } args = append(args, reflect.ValueOf(inArg)) case "int": if reflect.TypeOf(inArg).String() != "int" { - return nil, fmt.Errorf("expected argument %d in call to %s() to be int, got: %v (%T)", i+1, inFunction.name, inArg, inArg) + return nil, fmt.Errorf("expected argument %d in call to '%s()' to be an int, got %v (%T)", i+1, inFilter.name, inArg, inArg) } args = append(args, reflect.ValueOf(inArg)) case "*regexp.Regexp": if reflect.TypeOf(inArg).String() != "string" { - return nil, fmt.Errorf("expected argument %d in call to %s() to be valid regex.Regexp string, got: %v (%T)", i+1, inFunction.name, inArg, inArg) + return nil, fmt.Errorf("expected argument %d in call to '%s()' to be a valid regex.Regexp string, got %v (%T)", i+1, inFilter.name, inArg, inArg) } re, err := regexp.Compile(inArg.(string)) if err != nil { - return nil, fmt.Errorf("expected argument %d in call to %s() to be valid regex.Regexp string, got: %v (%T), err: %v", i+1, inFunction.name, inArg, inArg, err) + return nil, fmt.Errorf("expected argument %d in call to '%s()' to be a valid regex.Regexp string, got %v (%T), err %v", i+1, inFilter.name, inArg, inArg, err) } args = append(args, reflect.ValueOf(re)) diff --git a/internal/pipesore/executor_test.go b/internal/pipesore/executor_test.go index 9ee1ec0..c9ba489 100644 --- a/internal/pipesore/executor_test.go +++ b/internal/pipesore/executor_test.go @@ -16,7 +16,7 @@ func TestExecute(t *testing.T) { want := "4 bird\n" got := &bytes.Buffer{} - err := Execute(filters, strings.NewReader(input), got) + err := execute(filters, strings.NewReader(input), got) if err != nil { t.Fatal(err) } diff --git a/internal/pipesore/help.go b/internal/pipesore/help.go new file mode 100644 index 0000000..fc5dde2 --- /dev/null +++ b/internal/pipesore/help.go @@ -0,0 +1,86 @@ +package pipesore + +import ( + "fmt" + "strings" + + "github.com/dyson/pipesore/pkg/pipeline" +) + +func printHelp() { + sb := &strings.Builder{} + + w := func(s string) { + wrap(sb, s) + } + + w("pipesore - command-line text processor") + w("") + w("Usage:") + w(" pipesore '[ | ]...'") + w(" pipesore [option]") + w("") + w("Example:") + w(" $ echo \"cat cat cat dog bird bird bird bird\" | \\") + w(" pipesore 'Replace(\" \", \"\\n\") | Frequency() | First(1)'") + w(" 4 bird") + w("") + w("Filters:") + w(" All filters can be '|' (piped) together in any order, although not all ordering is logical.") + w("") + w(" All filter arguments are required. There a no assumptions about default values.") + w("") + w(" A filter prefixed with an \"!\" will return the opposite result of the non prefixed filter of the same name. For example `First(1)` would return only the first line of the input and `!First(1)` (read as not first) would skip the first line of the input and return all other lines.") + w("") + w(" ---") + w("") + for _, name := range pipeline.Filters.GetOrderedNames() { + filter := pipeline.Filters[name] + w(" " + filter.Definition) + w(" " + filter.Description) + w("") + } + w("Options:") + w(" -h, --help show this help message") + w(" -v, --version show pipesore version") + + fmt.Printf(sb.String()) +} + +func wrap(sb *strings.Builder, s string) { + width := 80 + + prefix := "" + for i := 0; i < len(s); i++ { + if s[i] != ' ' { + prefix = s[:i] + s = s[i:] + break + } + } + + width = width - len(prefix) + + lastSpace := 0 + lastBreak := 0 + + for i := 0; i < len(s); { + if i == lastBreak+width { + sb.WriteString(prefix) + sb.WriteString(s[lastBreak:lastSpace]) + sb.WriteString("\n") + lastBreak = lastSpace + 1 + i = lastBreak + } + + if s[i] == ' ' { + lastSpace = i + } + + i++ + } + + sb.WriteString(prefix) + sb.WriteString(s[lastBreak:]) + sb.WriteString("\n") +} diff --git a/internal/pipesore/lexer.go b/internal/pipesore/lexer.go index 33a06a6..9b62aef 100644 --- a/internal/pipesore/lexer.go +++ b/internal/pipesore/lexer.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "strconv" + "unicode" ) type lexer struct { @@ -11,8 +12,6 @@ type lexer struct { position int } -type filter func(ch byte) bool - func newLexer(input string) *lexer { l := &lexer{input: input} return l @@ -23,30 +22,41 @@ func (l *lexer) getToken() token { var tl string ch := l.getSignificantChar() - - if isFunction(ch) { + start := l.position + if isFilter(ch) { return token{ - ttype: FUNCTION, - literal: l.getString(isFunction), + ttype: FILTER, + literal: l.getString(isFilter), + position: position{ + start: start, + end: l.position, + }, } } else if isDigit(ch) { return token{ ttype: INT, literal: l.getString(isDigit), + position: position{ + start: start, + end: l.position, + }, } } else if isQuote(ch) { str, err := l.getQuotedString() - if err == nil { tt = STRING tl = str } else { tt = ILLEGAL - tl = fmt.Sprintf("%v in %s", err, str) + tl = fmt.Sprintf("%v '%s'", err, str) } return token{ ttype: tt, literal: tl, + position: position{ + start: start, + end: l.position, + }, } } @@ -63,9 +73,20 @@ func (l *lexer) getToken() token { tt = EOF } + if unicode.IsPrint(rune(ch)) { + tl = string(ch) + } + l.position++ - return token{ttype: tt, literal: string(ch)} + return token{ + ttype: tt, + literal: tl, + position: position{ + start: start, + end: l.position, + }, + } } func (l *lexer) getSignificantChar() byte { @@ -79,7 +100,7 @@ func (l *lexer) getSignificantChar() byte { return ch } -func (l *lexer) getString(fn filter) string { +func (l *lexer) getString(fn func(byte) bool) string { startPosition := l.position l.position++ @@ -125,7 +146,7 @@ func isWhitespace(ch byte) bool { return ch == ' ' } -func isFunction(ch byte) bool { +func isFilter(ch byte) bool { return ch == '!' || 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' } diff --git a/internal/pipesore/lexer_test.go b/internal/pipesore/lexer_test.go index 1cef59f..555c872 100644 --- a/internal/pipesore/lexer_test.go +++ b/internal/pipesore/lexer_test.go @@ -2,6 +2,7 @@ package pipesore import ( "fmt" + "reflect" "testing" ) @@ -10,31 +11,28 @@ func TestGetToken(t *testing.T) { filters := `Replace(" ", "\n") | Freq() | First(1)` - tests := []struct { - wantedType tokenType - wantedLiteral string - }{ - {FUNCTION, "Replace"}, - {LPAREN, "("}, - {STRING, " "}, - {COMMA, ","}, - {STRING, "\n"}, - {RPAREN, ")"}, + tests := []token{ + {ttype: FILTER, literal: "Replace", position: position{start: 0, end: 7}}, + {ttype: LPAREN, literal: "(", position: position{start: 7, end: 8}}, + {ttype: STRING, literal: " ", position: position{start: 8, end: 11}}, + {ttype: COMMA, literal: ",", position: position{start: 11, end: 12}}, + {ttype: STRING, literal: "\n", position: position{start: 13, end: 17}}, + {ttype: RPAREN, literal: ")", position: position{start: 17, end: 18}}, - {PIPE, "|"}, + {ttype: PIPE, literal: "|", position: position{start: 19, end: 20}}, - {FUNCTION, "Freq"}, - {LPAREN, "("}, - {RPAREN, ")"}, + {ttype: FILTER, literal: "Freq", position: position{start: 21, end: 25}}, + {ttype: LPAREN, literal: "(", position: position{start: 25, end: 26}}, + {ttype: RPAREN, literal: ")", position: position{start: 26, end: 27}}, - {PIPE, "|"}, + {ttype: PIPE, literal: "|", position: position{start: 28, end: 29}}, - {FUNCTION, "First"}, - {LPAREN, "("}, - {INT, "1"}, - {RPAREN, ")"}, + {ttype: FILTER, literal: "First", position: position{start: 30, end: 35}}, + {ttype: LPAREN, literal: "(", position: position{start: 35, end: 36}}, + {ttype: INT, literal: "1", position: position{start: 36, end: 37}}, + {ttype: RPAREN, literal: ")", position: position{start: 37, end: 38}}, - {EOF, "\000"}, + {ttype: EOF, literal: "", position: position{start: 38, end: 39}}, } l := newLexer(filters) @@ -48,9 +46,8 @@ func TestGetToken(t *testing.T) { t.Run(fmt.Sprint(k), func(t *testing.T) { t.Parallel() - if got.ttype != tc.wantedType || got.literal != tc.wantedLiteral { - t.Logf("tokentype wanted=%q, got=%q", tc.wantedType, got.ttype) - t.Fatalf("literal wanted=%q, got=%q", tc.wantedLiteral, got.literal) + if !reflect.DeepEqual(tc, got) { + t.Fatalf("wanted: %#v, got: %#v", tc, got) } }) } diff --git a/internal/pipesore/parser.go b/internal/pipesore/parser.go index d172e65..030ae77 100644 --- a/internal/pipesore/parser.go +++ b/internal/pipesore/parser.go @@ -22,17 +22,15 @@ func (p *parser) nextToken() *parser { func (p *parser) parse() (*ast, error) { tree := newAST() - if p.nextToken().tokenIsType(EOF) { - return tree, nil - } + p.nextToken() for { - f, err := p.parseFunction() + f, err := p.parseFilter() if err != nil { return nil, err } - tree.functions = append(tree.functions, *f) + tree.filters = append(tree.filters, *f) if p.nextToken().tokenIsType(EOF) { break @@ -65,27 +63,32 @@ func (p *parser) tokenIsTypes(tts ...tokenType) bool { func (p *parser) tokenMustType(tt tokenType) error { if !p.tokenIsType(tt) { - return fmt.Errorf("unexpected %s (%s), expected %s", p.t.ttype, p.t.literal, tt) + return newSyntaxError( + fmt.Errorf("unexpected %s: expected '%s'", p.t, tt), + p.t.position, + ) } - return nil } func (p *parser) tokenMustTypes(tts ...tokenType) error { if !p.tokenIsTypes(tts...) { - return fmt.Errorf("unexpected %s (%s), expected one of %s", p.t.ttype, p.t.literal, tts) + return newSyntaxError( + fmt.Errorf("unexpected %s: expected one of '%s'", p.t, tts), + p.t.position, + ) } return nil } -func (p *parser) parseFunction() (*function, error) { - err := p.tokenMustType(FUNCTION) +func (p *parser) parseFilter() (*filter, error) { + err := p.tokenMustType(FILTER) if err != nil { return nil, err } - name := p.t.literal + filterToken := p.t err = p.nextToken().tokenMustType(LPAREN) if err != nil { @@ -102,9 +105,10 @@ func (p *parser) parseFunction() (*function, error) { return nil, err } - f := function{ - name: name, + f := filter{ + name: filterToken.literal, arguments: args, + position: filterToken.position, } return &f, nil diff --git a/internal/pipesore/parser_test.go b/internal/pipesore/parser_test.go index 8aa0644..9656c51 100644 --- a/internal/pipesore/parser_test.go +++ b/internal/pipesore/parser_test.go @@ -12,10 +12,10 @@ func TestParse(t *testing.T) { filters := `Replace(" ", "\n") | Freq() | First(1)` want := &ast{ - functions: []function{ - {name: "Replace", arguments: []any{" ", "\n"}}, - {name: "Freq", arguments: nil}, - {name: "First", arguments: []any{1}}, + filters: []filter{ + {name: "Replace", arguments: []any{" ", "\n"}, position: position{start: 0, end: 7}}, + {name: "Freq", arguments: nil, position: position{start: 21, end: 25}}, + {name: "First", arguments: []any{1}, position: position{start: 30, end: 35}}, }, } diff --git a/internal/pipesore/token.go b/internal/pipesore/token.go index d55f692..7e8f0ce 100644 --- a/internal/pipesore/token.go +++ b/internal/pipesore/token.go @@ -6,9 +6,9 @@ const ( ILLEGAL = iota EOF - FUNCTION // First - INT // 1234 - STRING // hello, world! + FILTER // First + INT // 1234 + STRING // hello, world! QUOTE // " @@ -23,9 +23,9 @@ var tokens = [...]string{ ILLEGAL: "ILLEGAL", EOF: "EOF", - FUNCTION: "FUNCTION", - INT: "INT", - STRING: "STRING", + FILTER: "FILTER", + INT: "INT", + STRING: "STRING", QUOTE: "\"", @@ -42,11 +42,22 @@ func (t tokenType) String() string { return tokens[t] } +type position struct { + start int + end int +} + type token struct { ttype tokenType literal string + position } func (t token) String() string { - return fmt.Sprintf("{%s %v}", t.ttype, t.literal) + s := fmt.Sprintf("'%s'", t.ttype) + if t.literal != "" && t.literal != t.ttype.String() { + s += fmt.Sprintf(" (%s)", t.literal) + } + + return s } diff --git a/pkg/levenshtein/levenshtein.go b/pkg/levenshtein/levenshtein.go new file mode 100644 index 0000000..89d207c --- /dev/null +++ b/pkg/levenshtein/levenshtein.go @@ -0,0 +1,33 @@ +package levenshtein + +// https://rosettacode.org/wiki/Levenshtein_distance#Go +func Distance(s, t string) int { + d := make([][]int, len(s)+1) + for i := range d { + d[i] = make([]int, len(t)+1) + } + for i := range d { + d[i][0] = i + } + for j := range d[0] { + d[0][j] = j + } + for j := 1; j <= len(t); j++ { + for i := 1; i <= len(s); i++ { + if s[i-1] == t[j-1] { + d[i][j] = d[i-1][j-1] + } else { + min := d[i-1][j] + if d[i][j-1] < min { + min = d[i][j-1] + } + if d[i-1][j-1] < min { + min = d[i-1][j-1] + } + d[i][j] = min + 1 + } + } + + } + return d[len(s)][len(t)] +} diff --git a/pkg/pipeline/filters.go b/pkg/pipeline/filters.go index 8da1635..665ab52 100644 --- a/pkg/pipeline/filters.go +++ b/pkg/pipeline/filters.go @@ -14,25 +14,136 @@ import ( "unicode/utf8" ) +type filters map[string]filter + +func (f filters) GetOrderedNames() []string { + names := []string{} + for name := range f { + names = append(names, name) + } + + sort.SliceStable(names, func(i, j int) bool { + iName := names[i] + iNot := false + + jName := names[j] + + if iName[0] == '!' { + iName = iName[1:] + iNot = true + } + + if jName[0] == '!' { + jName = jName[1:] + } + + if iName != jName { + return iName < jName + } + + if iNot == true { + return false + } + + return true + }) + + return names +} + +type filter struct { + Value reflect.Value + Definition string + Description string +} + var ( - Filters = map[string]reflect.Value{ - "columns": reflect.ValueOf(Columns), - "columnscsv": reflect.ValueOf(ColumnsCSV), - "countlines": reflect.ValueOf(CountLines), - "countrunes": reflect.ValueOf(CountRunes), - "countwords": reflect.ValueOf(CountWords), - "first": reflect.ValueOf(First), - "!first": reflect.ValueOf(NotFirst), - "frequency": reflect.ValueOf(Frequency), - "join": reflect.ValueOf(Join), - "last": reflect.ValueOf(Last), - "!last": reflect.ValueOf(NotLast), - "match": reflect.ValueOf(Match), - "!match": reflect.ValueOf(NotMatch), - "matchregex": reflect.ValueOf(MatchRegex), - "!matchregex": reflect.ValueOf(NotMatchRegex), - "replace": reflect.ValueOf(Replace), - "replaceregex": reflect.ValueOf(ReplaceRegex), + Filters = filters{ + "columns": { + reflect.ValueOf(Columns), + "Columns(delimiter string, columns string)", + "Returns the selected `columns` in order where `columns is a 1-indexed comma separated list of column positions. Columns are defined by splitting with the `delimiter`.", + }, + "columnscsv": { + reflect.ValueOf(ColumnsCSV), + "ColumnsCSV(delimiter string, columns string)", + "Returns the selected `columns` in order where `columns` is a 1-indexed comma separated list of column positions. Parsing is CSV aware so quoted columns containing the `delimiter` when splitting are preserved.", + }, + "countlines": { + reflect.ValueOf(CountLines), + "CountLines()", + "Returns the line count. Lines are delimited by `\\r\\n`.", + }, + "countrunes": { + reflect.ValueOf(CountRunes), + "CountRunes()", + "Returns the rune (Unicode code points) count. Erroneous and short encodings are treated as single runes of width 1 byte.", + }, + "countwords": { + reflect.ValueOf(CountWords), + "CountWords()", + "Returns the word count. Words are delimited by `\\t|\\n|\\v|\\f|\\r|\u00A0|0x85|0xA0`.", + }, + "first": { + reflect.ValueOf(First), + "First(n int)", + "Returns first `n` lines where `n` is a positive integer. If the input has less than `n` lines, all lines are returned.", + }, + "!first": { + reflect.ValueOf(NotFirst), + "!First(n int)", + "Returns all but the the first `n` lines where `n` is a positive integer. If the input has less than `n` lines, no lines are returned.", + }, + "frequency": { + reflect.ValueOf(Frequency), + "Frequency()", + "Returns a descending list containing frequency and unique line. Lines with equal frequency are sorted alphabetically.", + }, + "join": { + reflect.ValueOf(Join), + "Join(delimiter string)", + "Joins all lines together seperated by `delimiter`.", + }, + "last": { + reflect.ValueOf(Last), + "Last(n int)", + "Returns last `n` lines where `n` is a positive integer. If the input has less than `n` lines, all lines are returned.", + }, + "!last": { + reflect.ValueOf(NotLast), + "!Last(n int)", + "Returns all but the last `n` lines where `n` is a positive integer. If the input has less than `n` lines, no lines are returned.", + }, + "match": { + reflect.ValueOf(Match), + "Match(substring string)", + "Returns all lines that contain `substring`.", + }, + "!match": { + reflect.ValueOf(NotMatch), + "!Match(substring string)", + "Returns all lines that don't contain `substring`.", + }, + "matchregex": { + reflect.ValueOf(MatchRegex), + "MatchRegex(regex string)", + "Returns all lines that match the compiled regular expression 'regex'. Regex is in the form of Re2 (https://github.com/google/re2/wiki/Syntax).", + }, + "!matchregex": { + reflect.ValueOf(NotMatchRegex), + "!MatchRegex(regex string)", + "Returns all lines that don't match the compiled regular expression 'regex'. Regex is in the form of Re2 (https://github.com/google/re2/wiki/Syntax).", + }, + "replace": { + reflect.ValueOf(Replace), + "Replace(old string, replace string)", + "Replaces all non-overlapping instances of `old` with `replace`.", + }, + "replaceregex": { + reflect.ValueOf(ReplaceRegex), + "ReplaceRegex(regex string, replace string)", + "Replaces all matches of the compiled regular expression `regex` with `replace`. Inside `replace`, `$` signs represent submatches. For example `$1` represents the text of the first submatch. Regex is in the form of Re2 (https://github.com/google/re2/wiki/Syntax).", + }, } ) @@ -429,8 +540,6 @@ func ReplaceRegex(regex *regexp.Regexp, replace string) func(io.Reader, io.Write // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -// MIT License - // Frequency returns a filter that writes unique lines from the input, prefixed // with a frequency count, in descending numerical order (most frequent lines // first). Lines with equal frequency will be sorted alphabetically. diff --git a/pkg/pipeline/filters_test.go b/pkg/pipeline/filters_test.go index 81cb4b2..2746737 100644 --- a/pkg/pipeline/filters_test.go +++ b/pkg/pipeline/filters_test.go @@ -67,6 +67,10 @@ func TestFilters(t *testing.T) { } for k, tc := range tests { + // TODO: remove shadowing once using go v1.22 + k := k + tc := tc + t.Run(fmt.Sprint(k), func(t *testing.T) { t.Parallel() diff --git a/pkg/pipeline/pipeline.go b/pkg/pipeline/pipeline.go index b421b6e..96b0d3b 100644 --- a/pkg/pipeline/pipeline.go +++ b/pkg/pipeline/pipeline.go @@ -33,7 +33,6 @@ func (p *pipeline) SetError(err error) { // the filter errors the error is set on the pipeline. func (p *pipeline) Filter(filter func(io.Reader, io.Writer) error) { r := p.r - pr, pw := io.Pipe() go func() {