From 10bf35412e00bcf7ce6ec6e805ceac3337d5b5e7 Mon Sep 17 00:00:00 2001 From: pongzu Date: Thu, 18 Jun 2020 19:40:23 +0900 Subject: [PATCH 01/15] edit .gitignore --- .gitignore | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 51987b0..95dd308 100644 --- a/.gitignore +++ b/.gitignore @@ -4,5 +4,4 @@ vendor # for GoLand user -.idea/**/workspace.xml -.idea/**/tasks.xmlq +.idea From 572ae8cd1d03aff01df45eff5f54cb0f8924d79f Mon Sep 17 00:00:00 2001 From: pongzu Date: Thu, 18 Jun 2020 19:46:26 +0900 Subject: [PATCH 02/15] fixed go.mod --- go.mod | 4 ++-- go.sum | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 4cb1d58..dfbad23 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,5 @@ module github.com/kanmu/go-sqlfmt -go 1.13 +go 1.14 -require github.com/pkg/errors v0.8.1 +require github.com/pkg/errors v0.9.1 diff --git a/go.sum b/go.sum index f29ab35..d026a67 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,4 @@ github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= From e639143902dfb11367e58452859be86e62d02619 Mon Sep 17 00:00:00 2001 From: pongzu Date: Thu, 18 Jun 2020 22:54:58 +0900 Subject: [PATCH 03/15] refactored ast.go --- go.mod | 5 ++- go.sum | 20 ++++++++-- sqlfmt.go | 20 +++------- sqlfmt/ast.go | 97 ++++++++++++++++++++++++++++++------------------ sqlfmt/errors.go | 14 ------- sqlfmt/format.go | 3 +- sqlfmt/sqlfmt.go | 15 +++----- 7 files changed, 93 insertions(+), 81 deletions(-) delete mode 100644 sqlfmt/errors.go diff --git a/go.mod b/go.mod index dfbad23..4cdd34f 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,7 @@ module github.com/kanmu/go-sqlfmt go 1.14 -require github.com/pkg/errors v0.9.1 +require ( + github.com/pkg/errors v0.9.1 + golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2 +) diff --git a/go.sum b/go.sum index d026a67..b9ac73a 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,18 @@ -github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= -github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/sqlfmt.go b/sqlfmt.go index 275d34e..e5efc00 100644 --- a/sqlfmt.go +++ b/sqlfmt.go @@ -45,8 +45,7 @@ func visitFile(path string, info os.FileInfo, err error) error { err = processFile(path, nil, os.Stdout) } if err != nil { - processError(errors.Wrap(err, "visit file failed")) - + log.Fatal(err) } return nil } @@ -111,7 +110,7 @@ func sqlfmtMain() { log.Fatal("can not use -w while using pipeline") } if err := processFile("", os.Stdin, os.Stdout); err != nil { - processError(errors.Wrap(err, "processFile failed")) + log.Fatal(err) } return } @@ -120,18 +119,18 @@ func sqlfmtMain() { path := flag.Arg(i) switch dir, err := os.Stat(path); { case err != nil: - processError(err) + log.Fatal(err) case dir.IsDir(): walkDir(path) default: info, err := os.Stat(path) if err != nil { - processError(err) + log.Fatal(err) } if isGoFile(info) { err = processFile(path, nil, os.Stdout) if err != nil { - processError(err) + log.Fatal(err) } } } @@ -169,12 +168,3 @@ func diff(b1, b2 []byte) (data []byte, err error) { } return } - -func processError(err error) { - switch err.(type) { - case *sqlfmt.FormatError: - log.Println(err) - default: - log.Fatal(err) - } -} diff --git a/sqlfmt/ast.go b/sqlfmt/ast.go index b003c36..594465d 100644 --- a/sqlfmt/ast.go +++ b/sqlfmt/ast.go @@ -1,50 +1,75 @@ package sqlfmt import ( - "fmt" "go/ast" "go/token" + "golang.org/x/tools/go/ast/astutil" "log" - "strings" - - "github.com/kanmu/go-sqlfmt/sqlfmt/parser/group" -) - -// sqlfmt retrieves all strings from "Query" and "QueryRow" and "Exec" functions in .go file -const ( - QUERY = "Query" - QUERYROW = "QueryRow" - EXEC = "Exec" ) -// replaceAst replace ast node with formatted SQL statement -func replaceAst(f *ast.File, fset *token.FileSet, options *Options) { +// Replace replace ast node with formatted SQL statement +func Replace(f *ast.File, options *Options) { ast.Inspect(f, func(n ast.Node) bool { - if x, ok := n.(*ast.CallExpr); ok { - if fun, ok := x.Fun.(*ast.SelectorExpr); ok { - funcName := fun.Sel.Name - if funcName == QUERY || funcName == QUERYROW || funcName == EXEC { - // not for parsing url.Query - if len(x.Args) > 0 { - if arg, ok := x.Args[0].(*ast.BasicLit); ok { - sqlStmt := arg.Value - if !strings.HasPrefix(sqlStmt, "`") { - return true - } - src := strings.Trim(sqlStmt, "`") - res, err := Format(src, options) - if err != nil { - log.Println(fmt.Sprintf("Format failed at %s: %v", fset.Position(arg.Pos()), err)) - return true - } - // FIXME - // more elegant - arg.Value = "`" + res + strings.Repeat(group.WhiteSpace, options.Distance) + "`" - } - } - } + sql, found := findSQL(n) + if found { + sql, err := Format(sql, options) + if err != nil { + log.Println(err) + } else { + replace(n, sql) } } return true }) } + +func replace(n ast.Node, sql string) { + replaceFunc := func(cr *astutil.Cursor) bool { + cr.Replace(&ast.BasicLit{ + Kind: token.STRING, + Value: sql, + }) + return true + } + astutil.Apply(n, replaceFunc, nil) +} + +func findSQL(n ast.Node) (string, bool) { + ce, ok := n.(*ast.CallExpr) + if !ok { + return "", false + } + se, ok := ce.Fun.(*ast.SelectorExpr) + if !ok { + return "", false + } + + // check func name + ok = validateFuncName(se.Sel.Name) + if !ok { + return "", false + } + + // check length of the parameter + // this is not for parsing "url.Query()" + // FIXME: very adhoc + if len(ce.Args) == 0 { + return "", false + } + + // SQL statement should appear in the first parameter + arg, ok := ce.Args[0].(*ast.BasicLit) + if !ok { + return "", false + } + return arg.Value, true +} + +// go-sqlfmt only formats the value passed as the parameter of "Exec(string, ... any type)", "Query(string, ... any type)" and "QueryRow(string, ... any type)" +func validateFuncName(name string) bool { + switch name { + case "Exec", "Query", "QueryRow": + return true + } + return false +} diff --git a/sqlfmt/errors.go b/sqlfmt/errors.go deleted file mode 100644 index 0fa3c90..0000000 --- a/sqlfmt/errors.go +++ /dev/null @@ -1,14 +0,0 @@ -package sqlfmt - -import ( - "fmt" -) - -// FormatError is an error that occurred while sqlfmt.Process -type FormatError struct { - msg string -} - -func (e *FormatError) Error() string { - return fmt.Sprint(e.msg) -} diff --git a/sqlfmt/format.go b/sqlfmt/format.go index 1fffd06..5983d5c 100644 --- a/sqlfmt/format.go +++ b/sqlfmt/format.go @@ -4,12 +4,11 @@ import ( "bufio" "bytes" "fmt" - "strings" - "github.com/kanmu/go-sqlfmt/sqlfmt/lexer" "github.com/kanmu/go-sqlfmt/sqlfmt/parser" "github.com/kanmu/go-sqlfmt/sqlfmt/parser/group" "github.com/pkg/errors" + "strings" ) // Format formats src in 3 steps diff --git a/sqlfmt/sqlfmt.go b/sqlfmt/sqlfmt.go index c23a9e7..8efef17 100644 --- a/sqlfmt/sqlfmt.go +++ b/sqlfmt/sqlfmt.go @@ -20,26 +20,21 @@ func Process(filename string, src []byte, options *Options) ([]byte, error) { fset := token.NewFileSet() parserMode := parser.ParseComments - astFile, err := parser.ParseFile(fset, filename, src, parserMode) + f, err := parser.ParseFile(fset, filename, src, parserMode) if err != nil { - return nil, formatErr(errors.Wrap(err, "parser.ParseFile failed")) + return nil, errors.Wrap(err, "parser.ParseFile failed") } - replaceAst(astFile, fset, options) + Replace(f, options) var buf bytes.Buffer - if err = printer.Fprint(&buf, fset, astFile); err != nil { - return nil, formatErr(errors.Wrap(err, "printer.Fprint failed")) + return nil, errors.Wrap(err, "printer.Fprint failed") } out, err := format.Source(buf.Bytes()) if err != nil { - return nil, formatErr(errors.Wrap(err, "format.Source failed")) + return nil, errors.Wrap(err, "format.Source failed") } return out, nil } - -func formatErr(err error) error { - return &FormatError{msg: err.Error()} -} From 5b76d4e050cd68d48923dce11ac5f00ebc357a29 Mon Sep 17 00:00:00 2001 From: pongzu Date: Fri, 19 Jun 2020 01:00:41 +0900 Subject: [PATCH 04/15] fixed tiny bug --- go.sum | 2 ++ sqlfmt/sqlfmt.go | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.sum b/go.sum index b9ac73a..3eaeeed 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,4 @@ +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= @@ -12,6 +13,7 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2 h1:FD4wDsP+CQUqh2V12OBOt90pLHVToe58P++fUu3ggV4= golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/sqlfmt/sqlfmt.go b/sqlfmt/sqlfmt.go index 8efef17..9522f59 100644 --- a/sqlfmt/sqlfmt.go +++ b/sqlfmt/sqlfmt.go @@ -28,7 +28,7 @@ func Process(filename string, src []byte, options *Options) ([]byte, error) { Replace(f, options) var buf bytes.Buffer - if err = printer.Fprint(&buf, fset, astFile); err != nil { + if err = printer.Fprint(&buf, fset, f); err != nil { return nil, errors.Wrap(err, "printer.Fprint failed") } From 3aee94e348293872eaa8e8a1f54c38008f56efe9 Mon Sep 17 00:00:00 2001 From: pongzu Date: Fri, 19 Jun 2020 01:06:59 +0900 Subject: [PATCH 05/15] fixed README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5888658..775e803 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ ORDER BY ## Installation ```bash -run git clone and go build -o sqlfmt +go get github.com/kanmu/go-sqlfmt/cmd/sqlfmt ``` ## Usage From 3d6c289fd6117726714e6dfeb7345126c0188076 Mon Sep 17 00:00:00 2001 From: pongzu Date: Fri, 19 Jun 2020 01:27:51 +0900 Subject: [PATCH 06/15] fixed lexer --- go.mod | 1 + go.sum | 10 + sqlfmt/format.go | 3 +- sqlfmt/lexer/token.go | 5 +- sqlfmt/lexer/tokenizer.go | 563 +++++++++++++++------------------ sqlfmt/lexer/tokenizer_test.go | 242 ++++++-------- sqlfmt/sqlfmt.go | 1 + 7 files changed, 383 insertions(+), 442 deletions(-) diff --git a/go.mod b/go.mod index 4cdd34f..d50c32c 100644 --- a/go.mod +++ b/go.mod @@ -4,5 +4,6 @@ go 1.14 require ( github.com/pkg/errors v0.9.1 + github.com/stretchr/testify v1.6.1 golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2 ) diff --git a/go.sum b/go.sum index 3eaeeed..659224f 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,12 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -18,3 +25,6 @@ golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roY golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/sqlfmt/format.go b/sqlfmt/format.go index 5983d5c..4c90d6c 100644 --- a/sqlfmt/format.go +++ b/sqlfmt/format.go @@ -16,8 +16,7 @@ import ( // 2: parse tokens by SQL clause group // 3: for each clause group (Reindenter), add indentation or new line in the correct position func Format(src string, options *Options) (string, error) { - t := lexer.NewTokenizer(src) - tokens, err := t.GetTokens() + tokens, err := lexer.Tokenize(src) if err != nil { return src, errors.Wrap(err, "Tokenize failed") } diff --git a/sqlfmt/lexer/token.go b/sqlfmt/lexer/token.go index a5443b1..ef74f0d 100644 --- a/sqlfmt/lexer/token.go +++ b/sqlfmt/lexer/token.go @@ -90,6 +90,9 @@ const ( QUOTEAREA SURROUNDING + COLON + DOUBLECOLON + SPACE ) // TokenType is an alias type that represents a kind of token @@ -186,4 +189,4 @@ func (t Token) IsNeedNewLineBefore() bool { // IsKeyWordInSelect returns true if token is a keyword in select group func (t Token) IsKeyWordInSelect() bool { return t.Type == SELECT || t.Type == EXISTS || t.Type == DISTINCT || t.Type == DISTINCTROW || t.Type == INTO || t.Type == AS || t.Type == GROUP || t.Type == ORDER || t.Type == BY || t.Type == ON || t.Type == RETURNING || t.Type == SET || t.Type == UPDATE -} +} \ No newline at end of file diff --git a/sqlfmt/lexer/tokenizer.go b/sqlfmt/lexer/tokenizer.go index 3fa575c..d92a97d 100644 --- a/sqlfmt/lexer/tokenizer.go +++ b/sqlfmt/lexer/tokenizer.go @@ -3,398 +3,293 @@ package lexer import ( "bufio" "bytes" + "fmt" "strings" "github.com/pkg/errors" ) -// Tokenizer tokenizes SQL statements -type Tokenizer struct { - r *bufio.Reader - w *bytes.Buffer // w writes token value. It resets its value when the end of token appears - result []Token +type tokenizer struct { + r *bufio.Reader } -// rune that can't be contained in SQL statement -// TODO: I have to make better solution of making rune of eof in stead of using '∂' -var eof = '∂' - -// value of literal -const ( - Comma = "," - StartParenthesis = "(" - EndParenthesis = ")" - StartBracket = "[" - EndBracket = "]" - StartBrace = "{" - EndBrace = "}" - SingleQuote = "'" - NewLine = "\n" -) - -// NewTokenizer creates Tokenizer -func NewTokenizer(src string) *Tokenizer { - return &Tokenizer{ +// Tokenize tokenize src and returns slice of Token +// It ignores Token of white-space, new-line and tab +func Tokenize(src string) ([]Token, error) { + t := &tokenizer{ r: bufio.NewReader(strings.NewReader(src)), - w: &bytes.Buffer{}, } -} -// GetTokens returns tokens for parsing -func (t *Tokenizer) GetTokens() ([]Token, error) { - var result []Token - - tokens, err := t.Tokenize() + tokens, err := t.tokenize() if err != nil { - return nil, errors.Wrap(err, "Tokenize failed") - } - // replace all tokens without whitespaces and new lines - // if "AND" or "OR" appears after new line, token value will be ANDGROUP, ORGROUP - for i, tok := range tokens { - if tok.Type == AND && tokens[i-1].Type == NEWLINE { - andGroupToken := Token{Type: ANDGROUP, Value: tok.Value} - result = append(result, andGroupToken) - continue - } - if tok.Type == OR && tokens[i-1].Type == NEWLINE { - orGroupToken := Token{Type: ORGROUP, Value: tok.Value} - result = append(result, orGroupToken) - continue - } - if tok.Type == WS || tok.Type == NEWLINE { - continue - } - result = append(result, tok) + return nil, errors.Wrap(err, "failed to tokenize") } - return result, nil + + return tokens, nil } -// Tokenize analyses every rune in SQL statement -// every token is identified when whitespace appears -func (t *Tokenizer) Tokenize() ([]Token, error) { +// scan until END OF FILE +func (t *tokenizer) tokenize() ([]Token, error) { + var tokens []Token for { - isEOF, err := t.scan() - - if isEOF { - break - } + token, err := t.scan() if err != nil { return nil, err } + + // ignorig space (white-space, new-line and tab) + // go-sqlfmt formats src consistent with any space forcibly so far, but should I make a option to choose whether to ignore space..? + if !(token.Type == SPACE) { + tokens = append(tokens, token) + } + if token.Type == EOF { + return tokens, nil + } } - return t.result, nil } // unread undoes t.r.readRune method to get last character -func (t *Tokenizer) unread() { t.r.UnreadRune() } - -func isWhiteSpace(ch rune) bool { - return ch == ' ' || ch == '\t' || ch == '\n' || ch == ' ' -} - -func isComma(ch rune) bool { - return ch == ',' -} - -func isStartParenthesis(ch rune) bool { - return ch == '(' -} - -func isEndParenthesis(ch rune) bool { - return ch == ')' -} - -func isSingleQuote(ch rune) bool { - return ch == '\'' -} - -func isStartBracket(ch rune) bool { - return ch == '[' +func (t *tokenizer) unread() error { + if err := t.r.UnreadRune(); err != nil { + return err + } + return nil } -func isEndBracket(ch rune) bool { - return ch == ']' -} +// firstCharactor returns the first charactor of t.r without reading t.r +func (t *tokenizer) firstCharactor() (rune, error) { + ch, _, err := t.r.ReadRune() + if err != nil { + return ch, err + } -func isStartBrace(ch rune) bool { - return ch == '{' + // unread one charactor consumed already + t.unread() + return ch, nil } -func isEndBrace(ch rune) bool { - return ch == '}' -} +// scan reads the first charactor of t.r and creates Token +func (t *tokenizer) scan() (Token, error) { + ch, err := t.firstCharactor() -// scan scans each character and appends to result until "eof" appears -// when it finishes scanning all characters, it returns true -func (t *Tokenizer) scan() (bool, error) { - ch, _, err := t.r.ReadRune() + // create EOF Token if END OF FILE if err != nil { if err.Error() == "EOF" { - ch = eof - } else { - return false, errors.Wrap(err, "read rune failed") + return Token{Type: EOF, Value: "EOF"}, nil } + return Token{}, err } + var buf bytes.Buffer switch { - case ch == eof: - tok := Token{Type: EOF, Value: "EOF"} - t.result = append(t.result, tok) - return true, nil - case isWhiteSpace(ch): - if err := t.scanWhiteSpace(); err != nil { - return false, err + case isSpace(ch): + token, err := t.scanSpace(&buf) + if err != nil { + return Token{}, err } - return false, nil - // extract string + return token, nil + case isPunctuation(ch): + token, err := t.scanPunctuation(&buf) + if err != nil { + return Token{}, err + } + return token, nil + // scan string surrounded by single quote such as 'xxxxxxxx' case isSingleQuote(ch): - if err := t.scanString(); err != nil { - return false, err + token, err := t.scanString(&buf) + if err != nil { + return Token{}, err } - return false, nil - case isComma(ch): - token := Token{Type: COMMA, Value: Comma} - t.result = append(t.result, token) - return false, nil - case isStartParenthesis(ch): - token := Token{Type: STARTPARENTHESIS, Value: StartParenthesis} - t.result = append(t.result, token) - return false, nil - case isEndParenthesis(ch): - token := Token{Type: ENDPARENTHESIS, Value: EndParenthesis} - t.result = append(t.result, token) - return false, nil - case isStartBracket(ch): - token := Token{Type: STARTBRACKET, Value: StartBracket} - t.result = append(t.result, token) - return false, nil - case isEndBracket(ch): - token := Token{Type: ENDBRACKET, Value: EndBracket} - t.result = append(t.result, token) - return false, nil - case isStartBrace(ch): - token := Token{Type: STARTBRACE, Value: StartBrace} - t.result = append(t.result, token) - return false, nil - case isEndBrace(ch): - token := Token{Type: ENDBRACE, Value: EndBrace} - t.result = append(t.result, token) - return false, nil + return token, nil default: - if err := t.scanIdent(); err != nil { - return false, err + token, err := t.scanIdent(&buf) + if err != nil { + return Token{}, err } - return false, nil + return token, err } } -func (t *Tokenizer) scanWhiteSpace() error { - t.unread() - +// create token of space +func (t *tokenizer) scanSpace(buf *bytes.Buffer) (Token, error) { for { ch, _, err := t.r.ReadRune() if err != nil { if err.Error() == "EOF" { break } else { - return err + return Token{}, err } } - if !isWhiteSpace(ch) { + if !isSpace(ch) { t.unread() break } else { - t.w.WriteRune(ch) + buf.WriteRune(ch) } } - if strings.Contains(t.w.String(), "\n") { - tok := Token{Type: NEWLINE, Value: "\n"} - t.result = append(t.result, tok) - } else { - tok := Token{Type: WS, Value: t.w.String()} - t.result = append(t.result, tok) + return Token{Type: SPACE, Value: buf.String()}, nil +} + +// create token of punctuation +func (t *tokenizer) scanPunctuation(buf *bytes.Buffer) (Token, error) { + // token of punctuation is consisted of one charactor, so it reads t.r once except DOUBLECOLON token + ch, _, err := t.r.ReadRune() + if err != nil { + return Token{}, err } - t.w.Reset() - return nil + buf.WriteRune(ch) + + // create token of colon or double-colon + // TODO: more elegant + if isColon(ch) { + nextCh, _, err := t.r.ReadRune() + if err != nil { + return Token{}, err + } + // double-colon + if isColon(nextCh) { + return Token{Type: DOUBLECOLON, Value: fmt.Sprintf("%s%s", string(ch), string(nextCh))}, nil + } else { + // it already read the charactor of next token when colon does not appear twice + // t.unread() makes it possible for caller function to scan next charactor that consumed already + t.unread() + return Token{Type: COLON, Value: string(ch)}, nil + } + } + + if ttype, ok := punctuationMap[buf.String()]; ok { + return Token{Type: ttype, Value: buf.String()}, nil + } + + return Token{}, fmt.Errorf("unexpected value: %v", buf.String()) } -// scan string token including single quotes -func (t *Tokenizer) scanString() error { - var counter int - t.unread() +// create token of string +// scan value surrounded with single-quote and return STRING token +func (t *tokenizer) scanString(buf *bytes.Buffer) (Token, error) { + // read and write the first charactor before scanning so that it can ignore the first single quote and read until the last single-quote appears + // TODO: more elegant way to scan string in the SQL + sq, _, err := t.r.ReadRune() + if err != nil { + return Token{}, err + } + buf.WriteRune(sq) + // read until next single-quote appears for { ch, _, err := t.r.ReadRune() if err != nil { if err.Error() == "EOF" { break } else { - return err + return Token{}, err } } - // ignore the first single quote - if counter != 0 && isSingleQuote(ch) { - t.w.WriteRune(ch) + + buf.WriteRune(ch) + if isSingleQuote(ch) { break - } else { - t.w.WriteRune(ch) } - counter++ } - tok := Token{Type: STRING, Value: t.w.String()} - t.result = append(t.result, tok) - t.w.Reset() - return nil -} -// append all ch to result until ch is a white space -// if ident is keyword, Type will be the keyword and value will be the uppercase keyword -func (t *Tokenizer) scanIdent() error { - t.unread() + return Token{Type: STRING, Value: buf.String()}, nil +} +// create token of iden +// append all ch to result until ch is a white-space, new-line or punctuation +// if ident is SQL keyword, it returns Token of the keyword +func (t *tokenizer) scanIdent(buf *bytes.Buffer) (Token, error) { for { ch, _, err := t.r.ReadRune() if err != nil { if err.Error() == "EOF" { break } else { - return err + return Token{}, err } } - if isWhiteSpace(ch) { - t.unread() - break - } else if isComma(ch) { - t.unread() - break - } else if isStartParenthesis(ch) { - t.unread() - break - } else if isEndParenthesis(ch) { - t.unread() - break - } else if isSingleQuote(ch) { - t.unread() - break - } else if isStartBracket(ch) { - t.unread() - break - } else if isEndBracket(ch) { - t.unread() - break - } else if isStartBrace(ch) { - t.unread() - break - } else if isEndBrace(ch) { + if isPunctuation(ch) || isSpace(ch) || isSingleQuote(ch) { t.unread() break } else { - t.w.WriteRune(ch) + buf.WriteRune(ch) } } - t.append(t.w.String()) - return nil -} -func (t *Tokenizer) append(v string) { - upperValue := strings.ToUpper(v) - - if ttype, ok := t.isSQLKeyWord(upperValue); ok { - t.result = append(t.result, Token{ - Type: ttype, - Value: upperValue, - }) - } else { - t.result = append(t.result, Token{ - Type: ttype, - Value: v, - }) + upperValue := strings.ToUpper(buf.String()) + if ttype, ok := keywordMap[upperValue]; ok { + return Token{Type: ttype, Value: upperValue}, nil } - t.w.Reset() -} -func (t *Tokenizer) isSQLKeyWord(v string) (TokenType, bool) { - if ttype, ok := sqlKeywordMap[v]; ok { - return ttype, ok - } else if ttype, ok := typeWithParenMap[v]; ok { - if r, _, err := t.r.ReadRune(); err == nil && string(r) == StartParenthesis { - t.unread() - return ttype, ok - } - t.unread() - return IDENT, ok - } - return IDENT, false + return Token{Type: IDENT, Value: buf.String()}, nil } -var sqlKeywordMap = map[string]TokenType{ - "SELECT": SELECT, - "FROM": FROM, - "WHERE": WHERE, - "CASE": CASE, - "ORDER": ORDER, - "BY": BY, - "AS": AS, - "JOIN": JOIN, - "LEFT": LEFT, - "RIGHT": RIGHT, - "INNER": INNER, - "OUTER": OUTER, - "ON": ON, - "WHEN": WHEN, - "END": END, - "GROUP": GROUP, - "DESC": DESC, - "ASC": ASC, - "LIMIT": LIMIT, - "AND": AND, - "OR": OR, - "IN": IN, - "IS": IS, - "NOT": NOT, - "NULL": NULL, - "DISTINCT": DISTINCT, - "LIKE": LIKE, - "BETWEEN": BETWEEN, - "UNION": UNION, - "ALL": ALL, - "HAVING": HAVING, - "EXISTS": EXISTS, - "UPDATE": UPDATE, - "SET": SET, - "RETURNING": RETURNING, - "DELETE": DELETE, - "INSERT": INSERT, - "INTO": INTO, - "DO": DO, - "VALUES": VALUES, - "FOR": FOR, - "THEN": THEN, - "ELSE": ELSE, - "DISTINCTROW": DISTINCTROW, - "FILTER": FILTER, - "WITHIN": WITHIN, - "COLLATE": COLLATE, - "INTERSECT": INTERSECT, - "EXCEPT": EXCEPT, - "OFFSET": OFFSET, - "FETCH": FETCH, - "FIRST": FIRST, - "ROWS": ROWS, - "USING": USING, - "OVERLAPS": OVERLAPS, - "NATURAL": NATURAL, - "CROSS": CROSS, - "ZONE": ZONE, - "NULLS": NULLS, - "LAST": LAST, - "AT": AT, - "LOCK": LOCK, - "WITH": WITH, -} - -var typeWithParenMap = map[string]TokenType{ +var keywordMap = map[string]TokenType{ + "SELECT": SELECT, + "FROM": FROM, + "WHERE": WHERE, + "CASE": CASE, + "ORDER": ORDER, + "BY": BY, + "AS": AS, + "JOIN": JOIN, + "LEFT": LEFT, + "RIGHT": RIGHT, + "INNER": INNER, + "OUTER": OUTER, + "ON": ON, + "WHEN": WHEN, + "END": END, + "GROUP": GROUP, + "DESC": DESC, + "ASC": ASC, + "LIMIT": LIMIT, + "AND": AND, + "OR": OR, + "IN": IN, + "IS": IS, + "NOT": NOT, + "NULL": NULL, + "DISTINCT": DISTINCT, + "LIKE": LIKE, + "BETWEEN": BETWEEN, + "UNION": UNION, + "ALL": ALL, + "HAVING": HAVING, + "EXISTS": EXISTS, + "UPDATE": UPDATE, + "SET": SET, + "RETURNING": RETURNING, + "DELETE": DELETE, + "INSERT": INSERT, + "INTO": INTO, + "DO": DO, + "VALUES": VALUES, + "FOR": FOR, + "THEN": THEN, + "ELSE": ELSE, + "DISTINCTROW": DISTINCTROW, + "FILTER": FILTER, + "WITHIN": WITHIN, + "COLLATE": COLLATE, + "INTERSECT": INTERSECT, + "EXCEPT": EXCEPT, + "OFFSET": OFFSET, + "FETCH": FETCH, + "FIRST": FIRST, + "ROWS": ROWS, + "USING": USING, + "OVERLAPS": OVERLAPS, + "NATURAL": NATURAL, + "CROSS": CROSS, + "ZONE": ZONE, + "NULLS": NULLS, + "LAST": LAST, + "AT": AT, + "LOCK": LOCK, + "WITH": WITH, "SUM": FUNCTION, "AVG": FUNCTION, "MAX": FUNCTION, @@ -438,3 +333,69 @@ var typeWithParenMap = map[string]TokenType{ "SECOND": TYPE, "INTERVAL": TYPE, } + +var punctuationMap = map[string]TokenType{ + "(": STARTPARENTHESIS, + ")": ENDPARENTHESIS, + "[": STARTBRACKET, + "]": ENDBRACKET, + "{": STARTBRACE, + "}": ENDBRACKET, + ",": COMMA, +} + +func isWhiteSpace(ch rune) bool { + return ch == ' ' || ch == ' ' +} + +func isTab(ch rune) bool { + return ch == '\t' +} + +func isNewLine(ch rune) bool { + return ch == '\n' +} + +func isSpace(ch rune) bool { + return isWhiteSpace(ch) || isNewLine(ch) || isTab(ch) +} + +func isComma(ch rune) bool { + return ch == ',' +} + +func isStartParenthesis(ch rune) bool { + return ch == '(' +} + +func isEndParenthesis(ch rune) bool { + return ch == ')' +} + +func isSingleQuote(ch rune) bool { + return ch == '\'' +} + +func isStartBracket(ch rune) bool { + return ch == '[' +} + +func isEndBracket(ch rune) bool { + return ch == ']' +} + +func isStartBrace(ch rune) bool { + return ch == '{' +} + +func isEndBrace(ch rune) bool { + return ch == '}' +} + +func isColon(ch rune) bool { + return ch == ':' +} + +func isPunctuation(ch rune) bool { + return isStartParenthesis(ch) || isEndParenthesis(ch) || isStartBracket(ch) || isEndBracket(ch) || isStartBrace(ch) || isEndBrace(ch) || isComma(ch) || isColon(ch) +} \ No newline at end of file diff --git a/sqlfmt/lexer/tokenizer_test.go b/sqlfmt/lexer/tokenizer_test.go index 873d11a..962503c 100644 --- a/sqlfmt/lexer/tokenizer_test.go +++ b/sqlfmt/lexer/tokenizer_test.go @@ -1,12 +1,13 @@ package lexer import ( - "reflect" "strings" "testing" + + "github.com/stretchr/testify/assert" ) -func TestGetTokens(t *testing.T) { +func TestTokenize(t *testing.T) { var testingSQLStatement = strings.Trim(`select name, age,sum, sum(case xxx) from user where name xxx and age = 'xxx' limit 100 except 100`, "`") want := []Token{ {Type: SELECT, Value: "SELECT"}, @@ -14,14 +15,13 @@ func TestGetTokens(t *testing.T) { {Type: COMMA, Value: ","}, {Type: IDENT, Value: "age"}, {Type: COMMA, Value: ","}, - {Type: IDENT, Value: "SUM"}, + {Type: FUNCTION, Value: "SUM"}, {Type: COMMA, Value: ","}, {Type: FUNCTION, Value: "SUM"}, {Type: STARTPARENTHESIS, Value: "("}, {Type: CASE, Value: "CASE"}, {Type: IDENT, Value: "xxx"}, {Type: ENDPARENTHESIS, Value: ")"}, - {Type: FROM, Value: "FROM"}, {Type: IDENT, Value: "user"}, {Type: WHERE, Value: "WHERE"}, @@ -35,145 +35,111 @@ func TestGetTokens(t *testing.T) { {Type: IDENT, Value: "100"}, {Type: EXCEPT, Value: "EXCEPT"}, {Type: IDENT, Value: "100"}, - {Type: EOF, Value: "EOF"}, } - tnz := NewTokenizer(testingSQLStatement) - got, err := tnz.GetTokens() - if err != nil { - t.Fatalf("\nERROR: %#v", err) - } else if !reflect.DeepEqual(want, got) { - t.Errorf("\nwant %#v, \ngot %#v", want, got) - } + got, err := Tokenize(testingSQLStatement) + assert.Nil(t, err) + assert.Equal(t, want, got) } -func TestIsWhiteSpace(t *testing.T) { - tests := []struct { - name string - src rune - want bool - }{ - { - name: "normal test case 1", - src: '\n', - want: true, - }, - { - name: "normal test case 2", - src: '\t', - want: true, - }, - { - name: "normal test case 3", - src: ' ', - want: true, - }, - { - name: "abnormal case", - src: 'a', - want: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := isWhiteSpace(tt.src); got != tt.want { - t.Errorf("\nwant %v, \ngot %v", tt.want, got) - } - }) - } -} +// func TestIsWhiteSpace(t *testing.T) { +// tests := []struct { +// name string +// src rune +// want bool +// }{ +// { +// name: "normal test case 1", +// src: '\n', +// want: true, +// }, +// { +// name: "normal test case 2", +// src: '\t', +// want: true, +// }, +// { +// name: "normal test case 3", +// src: ' ', +// want: true, +// }, +// { +// name: "abnormal case", +// src: 'a', +// want: false, +// }, +// } +// for _, tt := range tests { +// t.Run(tt.name, func(t *testing.T) { +// if got := isWhiteSpace(tt.src); got != tt.want { +// t.Errorf("\nwant %v, \ngot %v", tt.want, got) +// } +// }) +// } +// } -func TestScan(t *testing.T) { - tests := []struct { - name string - src string - want bool - }{ - { - name: "normal test case 1", - src: `select`, - want: false, - }, - { - name: "normal test case 2", - src: `table`, - want: false, - }, - { - name: "normal test case 3", - src: ` `, - want: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - tnz := NewTokenizer(tt.src) +// func TestScan(t *testing.T) { +// tests := []struct { +// name string +// src string +// want bool +// }{ +// { +// name: "normal test case 1", +// src: `select`, +// want: false, +// }, +// { +// name: "normal test case 2", +// src: `table`, +// want: false, +// }, +// { +// name: "normal test case 3", +// src: ` `, +// want: false, +// }, +// } +// for _, tt := range tests { +// t.Run(tt.name, func(t *testing.T) { +// tnz := NewTokenizer(tt.src) - got, err := tnz.scan() - if err != nil { - t.Errorf("\nERROR: %#v", err) - } - if got != tt.want { - t.Errorf("\nwant %v, \ngot %v", tt.want, got) - } - }) - } -} - -func TestScanWhiteSpace(t *testing.T) { - tests := []struct { - name string - src string - want Token - }{ - { - name: "normal test case 1", - src: ` `, - want: Token{Type: WS, Value: " "}, - }, - { - name: "normal test case 2", - src: "\n", - want: Token{Type: NEWLINE, Value: "\n"}, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - tnz := NewTokenizer(tt.src) - tnz.scanWhiteSpace() +// got, err := tnz.scan() +// if err != nil { +// t.Errorf("\nERROR: %#v", err) +// } +// if got != tt.want { +// t.Errorf("\nwant %v, \ngot %v", tt.want, got) +// } +// }) +// } +// } - if got := tnz.result[0]; got != tt.want { - t.Errorf("\nwant %v, \ngot %v", tt.want, got) - } - }) - } -} +// func TestScanIdent(t *testing.T) { +// tests := []struct { +// name string +// src string +// want Token +// }{ +// { +// name: "normal test case 1", +// src: `select`, +// want: Token{Type: SELECT, Value: "SELECT"}, +// }, +// { +// name: "normal test case 2", +// src: "table", +// want: Token{Type: IDENT, Value: "table"}, +// }, +// } +// for _, tt := range tests { +// t.Run(tt.name, func(t *testing.T) { +// tnz := NewTokenizer(tt.src) +// tnz.scanIdent() -func TestScanIdent(t *testing.T) { - tests := []struct { - name string - src string - want Token - }{ - { - name: "normal test case 1", - src: `select`, - want: Token{Type: SELECT, Value: "SELECT"}, - }, - { - name: "normal test case 2", - src: "table", - want: Token{Type: IDENT, Value: "table"}, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - tnz := NewTokenizer(tt.src) - tnz.scanIdent() - - if got := tnz.result[0]; got != tt.want { - t.Errorf("\nwant %v, \ngot %v", tt.want, got) - } - }) - } -} +// if got := tnz.result[0]; got != tt.want { +// t.Errorf("\nwant %v, \ngot %v", tt.want, got) +// } +// }) +// } +// } \ No newline at end of file diff --git a/sqlfmt/sqlfmt.go b/sqlfmt/sqlfmt.go index 9522f59..b2aa3aa 100644 --- a/sqlfmt/sqlfmt.go +++ b/sqlfmt/sqlfmt.go @@ -28,6 +28,7 @@ func Process(filename string, src []byte, options *Options) ([]byte, error) { Replace(f, options) var buf bytes.Buffer + if err = printer.Fprint(&buf, fset, f); err != nil { return nil, errors.Wrap(err, "printer.Fprint failed") } From accc9d9bced29750cf8acadf27b323b73b1cf7bc Mon Sep 17 00:00:00 2001 From: pongzu Date: Fri, 19 Jun 2020 01:45:19 +0900 Subject: [PATCH 07/15] fixed comments and tiny bugs --- sqlfmt/lexer/tokenizer.go | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/sqlfmt/lexer/tokenizer.go b/sqlfmt/lexer/tokenizer.go index d92a97d..039b507 100644 --- a/sqlfmt/lexer/tokenizer.go +++ b/sqlfmt/lexer/tokenizer.go @@ -37,7 +37,7 @@ func (t *tokenizer) tokenize() ([]Token, error) { return nil, err } - // ignorig space (white-space, new-line and tab) + // go-sqlfmt ignores any spaces (white-space, new-line and tab) // go-sqlfmt formats src consistent with any space forcibly so far, but should I make a option to choose whether to ignore space..? if !(token.Type == SPACE) { tokens = append(tokens, token) @@ -56,23 +56,26 @@ func (t *tokenizer) unread() error { return nil } -// firstCharactor returns the first charactor of t.r without reading t.r -func (t *tokenizer) firstCharactor() (rune, error) { +// firstCharacter returns the first character of t.r without reading t.r +func (t *tokenizer) firstCharacter() (rune, error) { ch, _, err := t.r.ReadRune() if err != nil { return ch, err } - // unread one charactor consumed already - t.unread() + // unread already consumed character + if err = t.unread(); err != nil{ + return ch, err + } + return ch, nil } -// scan reads the first charactor of t.r and creates Token +// scan reads the first character of t.r and creates Token func (t *tokenizer) scan() (Token, error) { - ch, err := t.firstCharactor() + ch, err := t.firstCharacter() - // create EOF Token if END OF FILE + // create EOF Token if it gets eof if err != nil { if err.Error() == "EOF" { return Token{Type: EOF, Value: "EOF"}, nil @@ -134,7 +137,7 @@ func (t *tokenizer) scanSpace(buf *bytes.Buffer) (Token, error) { // create token of punctuation func (t *tokenizer) scanPunctuation(buf *bytes.Buffer) (Token, error) { - // token of punctuation is consisted of one charactor, so it reads t.r once except DOUBLECOLON token + // token of punctuation is consisted of a character, so it reads t.r only once except DOUBLECOLON token ch, _, err := t.r.ReadRune() if err != nil { return Token{}, err @@ -148,12 +151,12 @@ func (t *tokenizer) scanPunctuation(buf *bytes.Buffer) (Token, error) { if err != nil { return Token{}, err } - // double-colon + // in case double-colon appears if isColon(nextCh) { return Token{Type: DOUBLECOLON, Value: fmt.Sprintf("%s%s", string(ch), string(nextCh))}, nil } else { - // it already read the charactor of next token when colon does not appear twice - // t.unread() makes it possible for caller function to scan next charactor that consumed already + // it already read the character of next token when colon does not appear twice + // t.unread() makes it possible for caller function to scan next character that consumed already t.unread() return Token{Type: COLON, Value: string(ch)}, nil } @@ -169,8 +172,8 @@ func (t *tokenizer) scanPunctuation(buf *bytes.Buffer) (Token, error) { // create token of string // scan value surrounded with single-quote and return STRING token func (t *tokenizer) scanString(buf *bytes.Buffer) (Token, error) { - // read and write the first charactor before scanning so that it can ignore the first single quote and read until the last single-quote appears - // TODO: more elegant way to scan string in the SQL + // read and write the first character before scanning, so that it can ignore the first single quote and read until the last single-quote appears + // FIXME: more elegant way to scan string in the SQL sq, _, err := t.r.ReadRune() if err != nil { return Token{}, err @@ -197,7 +200,7 @@ func (t *tokenizer) scanString(buf *bytes.Buffer) (Token, error) { return Token{Type: STRING, Value: buf.String()}, nil } -// create token of iden +// create token of identifier // append all ch to result until ch is a white-space, new-line or punctuation // if ident is SQL keyword, it returns Token of the keyword func (t *tokenizer) scanIdent(buf *bytes.Buffer) (Token, error) { From 0034a60a3e6c9872c2cb2e347dde3ec79f9b3767 Mon Sep 17 00:00:00 2001 From: pongzu Date: Fri, 19 Jun 2020 01:50:04 +0900 Subject: [PATCH 08/15] fixed comment --- sqlfmt/lexer/tokenizer.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sqlfmt/lexer/tokenizer.go b/sqlfmt/lexer/tokenizer.go index 039b507..f29baf5 100644 --- a/sqlfmt/lexer/tokenizer.go +++ b/sqlfmt/lexer/tokenizer.go @@ -97,7 +97,7 @@ func (t *tokenizer) scan() (Token, error) { return Token{}, err } return token, nil - // scan string surrounded by single quote such as 'xxxxxxxx' + // scan string which appears in the SQL statement surrounded by single quote such as 'xxxxxxxx' case isSingleQuote(ch): token, err := t.scanString(&buf) if err != nil { @@ -145,7 +145,7 @@ func (t *tokenizer) scanPunctuation(buf *bytes.Buffer) (Token, error) { buf.WriteRune(ch) // create token of colon or double-colon - // TODO: more elegant + // FIXME: more elegant if isColon(ch) { nextCh, _, err := t.r.ReadRune() if err != nil { @@ -172,7 +172,7 @@ func (t *tokenizer) scanPunctuation(buf *bytes.Buffer) (Token, error) { // create token of string // scan value surrounded with single-quote and return STRING token func (t *tokenizer) scanString(buf *bytes.Buffer) (Token, error) { - // read and write the first character before scanning, so that it can ignore the first single quote and read until the last single-quote appears + // read and write the first character before scanning, so that it can ignore the first single quote and read until the last single-quote appears // FIXME: more elegant way to scan string in the SQL sq, _, err := t.r.ReadRune() if err != nil { From f1e4489517d591702fdf075852d58cf901571f6c Mon Sep 17 00:00:00 2001 From: pongzu Date: Fri, 19 Jun 2020 02:30:54 +0900 Subject: [PATCH 09/15] add print debug --- sqlfmt/ast.go | 7 +++++-- sqlfmt/format.go | 5 +---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sqlfmt/ast.go b/sqlfmt/ast.go index 594465d..634539b 100644 --- a/sqlfmt/ast.go +++ b/sqlfmt/ast.go @@ -12,11 +12,14 @@ func Replace(f *ast.File, options *Options) { ast.Inspect(f, func(n ast.Node) bool { sql, found := findSQL(n) if found { - sql, err := Format(sql, options) + res, err := Format(sql, options) if err != nil { log.Println(err) + + // XXX for debugging + log.Println(sql) } else { - replace(n, sql) + replace(n, res) } } return true diff --git a/sqlfmt/format.go b/sqlfmt/format.go index 4c90d6c..1f02dbf 100644 --- a/sqlfmt/format.go +++ b/sqlfmt/format.go @@ -11,10 +11,7 @@ import ( "strings" ) -// Format formats src in 3 steps -// 1: tokenize src -// 2: parse tokens by SQL clause group -// 3: for each clause group (Reindenter), add indentation or new line in the correct position +// Format parse tokens, and build func Format(src string, options *Options) (string, error) { tokens, err := lexer.Tokenize(src) if err != nil { From 078813e099bd0477543375f7c50dae61a3faa911 Mon Sep 17 00:00:00 2001 From: pongzu Date: Sat, 20 Jun 2020 13:41:54 +0900 Subject: [PATCH 10/15] create parserR directory fix token.go, format.go --- sqlfmt/format.go | 20 ++++++++- sqlfmt/lexer/token.go | 9 ++-- sqlfmt/parserR/parser.go | 91 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 6 deletions(-) create mode 100644 sqlfmt/parserR/parser.go diff --git a/sqlfmt/format.go b/sqlfmt/format.go index 1f02dbf..2cf38a9 100644 --- a/sqlfmt/format.go +++ b/sqlfmt/format.go @@ -13,17 +13,33 @@ import ( // Format parse tokens, and build func Format(src string, options *Options) (string, error) { + // 最初と最後を取り除く的な事をする ` or " tokens, err := lexer.Tokenize(src) if err != nil { return src, errors.Wrap(err, "Tokenize failed") } - rs, err := parser.ParseTokens(tokens) + exprs, err := parser.ParseTokens(tokens) if err != nil { return src, errors.Wrap(err, "ParseTokens failed") } - res, err := getFormattedStmt(rs, options.Distance) + /* + var buf &bytes.Buffer{} + for _, expr := range exprs { + stmt, err = expr.Build(options) + // err + buf.WriteString(stmt) + } + + res := buf.String() + if bytes.Compare(src, res) { + // 崩れたよ!って教えるやつ + return err + } + return res + */ + res, err := getFormattedStmt(exprs, options.Distance) if err != nil { return src, errors.Wrap(err, "getFormattedStmt failed") } diff --git a/sqlfmt/lexer/token.go b/sqlfmt/lexer/token.go index ef74f0d..c2ce0e5 100644 --- a/sqlfmt/lexer/token.go +++ b/sqlfmt/lexer/token.go @@ -123,16 +123,17 @@ var ( EndOfHaving = []TokenType{LIMIT, OFFSET, FETCH, ORDER, UNION, EXCEPT, INTERSECT, EOF, ENDPARENTHESIS} EndOfOrderBy = []TokenType{LIMIT, FETCH, OFFSET, UNION, EXCEPT, INTERSECT, EOF, ENDPARENTHESIS} EndOfLimitClause = []TokenType{UNION, EXCEPT, INTERSECT, EOF, ENDPARENTHESIS} - EndOfParenthesis = []TokenType{ENDPARENTHESIS} - EndOfTieClause = []TokenType{SELECT} + EndOfParenthesis = []TokenType{ENDPARENTHESIS, EOF} + // 微妙 + EndOfTieClause = []TokenType{SELECT, EOF} EndOfUpdate = []TokenType{WHERE, SET, RETURNING, EOF} EndOfSet = []TokenType{WHERE, RETURNING, EOF} EndOfReturning = []TokenType{EOF} EndOfDelete = []TokenType{WHERE, FROM, EOF} EndOfInsert = []TokenType{VALUES, EOF} EndOfValues = []TokenType{UPDATE, RETURNING, EOF} - EndOfFunction = []TokenType{ENDPARENTHESIS} - EndOfTypeCast = []TokenType{ENDPARENTHESIS} + EndOfFunction = []TokenType{ENDPARENTHESIS, EOF} + EndOfTypeCast = []TokenType{ENDPARENTHESIS, EOF} EndOfLock = []TokenType{EOF} EndOfWith = []TokenType{EOF} ) diff --git a/sqlfmt/parserR/parser.go b/sqlfmt/parserR/parser.go new file mode 100644 index 0000000..0a8169c --- /dev/null +++ b/sqlfmt/parserR/parser.go @@ -0,0 +1,91 @@ +package parserR + +import ( + "fmt" + "github.com/kanmu/go-sqlfmt/sqlfmt/lexer" +) + +type Expr interface { + Build()string +} + +func ParseTokens(tokens []lexer.Token) ([]Expr, error) { + var ( + err error + expr Expr + exprs []Expr + consumed int + ) + + restTokens := tokens + for t := restTokens[0]; t.Type == lexer.EOF; { + switch t.Type { + case lexer.FUNCTION: + expr, consumed, err = parseFunction(restTokens) + if err != nil{ + fmt.Println(err) + } + case lexer.IDENT: + // ... + } + + restTokens = restTokens[consumed:] + exprs = append(exprs, expr) + } + + return exprs, nil +} + +// parentがない場合はnilを入れる +type FunctionExpr struct { + Values []interface{} + Parent Expr + SubQueryCnt int +} + +func (f FunctionExpr) Build() string { + return "" +} + +func parseFunction(tokens []lexer.Token)(*FunctionExpr, int, error){ + var ( + expr = &FunctionExpr{} + consumed = 0 + restTokens = tokens + ) + + // parseのそれぞれの関数がExprとconsumeしたcntだけを返すというインターフェースはそれで良さそう + for t := restTokens[0]; expr.endTType(t.Type); { + switch t.Type { + case restTokens[0].Type: + // 一発目は自分自身をパースしてまうので、そのままTokenを入れておく + expr.Values = append(expr.Values, t) + consumed ++ + case lexer.FUNCTION: + cExpr, cConsumed, err := parseFunction(tokens[consumed:]) + if err != nil { + // FIXME: エラーハンドリングする + return nil, 0, err + } + + cExpr.Parent = expr + expr.Values = append(expr.Values, cExpr) + consumed += cConsumed + default: + expr.Values = append(expr.Values, t) + consumed ++ + } + restTokens = restTokens[consumed:] + } + + return expr, consumed, nil +} + +func (expr *FunctionExpr) endTType(ttype lexer.TokenType) bool{ + for _, end := range lexer.EndOfFunction{ + if ttype == end { + return true + } + } + return false +} \ No newline at end of file From 863da4f3cc1e8f4fca44612e106887284ee23fae Mon Sep 17 00:00:00 2001 From: pongzu Date: Sat, 20 Jun 2020 13:46:40 +0900 Subject: [PATCH 11/15] move function process to another file --- sqlfmt/parserR/function.go | 56 +++++++++++++++++++++++++++++++++ sqlfmt/parserR/function_test.go | 0 sqlfmt/parserR/parser.go | 56 +-------------------------------- sqlfmt/parserR/select.go | 0 sqlfmt/parserR/select_test.go | 0 5 files changed, 57 insertions(+), 55 deletions(-) create mode 100644 sqlfmt/parserR/function.go create mode 100644 sqlfmt/parserR/function_test.go create mode 100644 sqlfmt/parserR/select.go create mode 100644 sqlfmt/parserR/select_test.go diff --git a/sqlfmt/parserR/function.go b/sqlfmt/parserR/function.go new file mode 100644 index 0000000..cf0a160 --- /dev/null +++ b/sqlfmt/parserR/function.go @@ -0,0 +1,56 @@ +package parserR + +import "github.com/kanmu/go-sqlfmt/sqlfmt/lexer" + +type FunctionExpr struct { + Values []interface{} + Parent Expr + SubQueryCnt int +} + +func parseFunction(tokens []lexer.Token)(*FunctionExpr, int, error){ + var ( + expr = &FunctionExpr{} + consumed = 0 + restTokens = tokens + ) + + // parseのそれぞれの関数がExprとconsumeしたcntだけを返すというインターフェースはそれで良さそう + for t := restTokens[0]; expr.endTType(t.Type); { + switch t.Type { + case restTokens[0].Type: + // 一発目は自分自身をパースしてまうので、そのままTokenを入れておく + expr.Values = append(expr.Values, t) + consumed ++ + case lexer.FUNCTION: + cExpr, cConsumed, err := parseFunction(tokens[consumed:]) + if err != nil { + // FIXME: エラーハンドリングする + return nil, 0, err + } + + cExpr.Parent = expr + expr.Values = append(expr.Values, cExpr) + consumed += cConsumed + default: + expr.Values = append(expr.Values, t) + consumed ++ + } + restTokens = restTokens[consumed:] + } + + return expr, consumed, nil +} + +func (expr *FunctionExpr) endTType(ttype lexer.TokenType) bool{ + for _, end := range lexer.EndOfFunction{ + if ttype == end { + return true + } + } + return false +} + +func (f FunctionExpr) Build() string { + return "" +} diff --git a/sqlfmt/parserR/function_test.go b/sqlfmt/parserR/function_test.go new file mode 100644 index 0000000..e69de29 diff --git a/sqlfmt/parserR/parser.go b/sqlfmt/parserR/parser.go index 0a8169c..fc20959 100644 --- a/sqlfmt/parserR/parser.go +++ b/sqlfmt/parserR/parser.go @@ -26,7 +26,7 @@ func ParseTokens(tokens []lexer.Token) ([]Expr, error) { fmt.Println(err) } case lexer.IDENT: - // ... + } restTokens = restTokens[consumed:] @@ -35,57 +35,3 @@ func ParseTokens(tokens []lexer.Token) ([]Expr, error) { return exprs, nil } - -// parentがない場合はnilを入れる -type FunctionExpr struct { - Values []interface{} - Parent Expr - SubQueryCnt int -} - -func (f FunctionExpr) Build() string { - return "" -} - -func parseFunction(tokens []lexer.Token)(*FunctionExpr, int, error){ - var ( - expr = &FunctionExpr{} - consumed = 0 - restTokens = tokens - ) - - // parseのそれぞれの関数がExprとconsumeしたcntだけを返すというインターフェースはそれで良さそう - for t := restTokens[0]; expr.endTType(t.Type); { - switch t.Type { - case restTokens[0].Type: - // 一発目は自分自身をパースしてまうので、そのままTokenを入れておく - expr.Values = append(expr.Values, t) - consumed ++ - case lexer.FUNCTION: - cExpr, cConsumed, err := parseFunction(tokens[consumed:]) - if err != nil { - // FIXME: エラーハンドリングする - return nil, 0, err - } - - cExpr.Parent = expr - expr.Values = append(expr.Values, cExpr) - consumed += cConsumed - default: - expr.Values = append(expr.Values, t) - consumed ++ - } - restTokens = restTokens[consumed:] - } - - return expr, consumed, nil -} - -func (expr *FunctionExpr) endTType(ttype lexer.TokenType) bool{ - for _, end := range lexer.EndOfFunction{ - if ttype == end { - return true - } - } - return false -} \ No newline at end of file diff --git a/sqlfmt/parserR/select.go b/sqlfmt/parserR/select.go new file mode 100644 index 0000000..e69de29 diff --git a/sqlfmt/parserR/select_test.go b/sqlfmt/parserR/select_test.go new file mode 100644 index 0000000..e69de29 From 90da7c2155443cdc28a8132b8ee8d82148353895 Mon Sep 17 00:00:00 2001 From: pongzu Date: Wed, 24 Jun 2020 00:48:59 +0900 Subject: [PATCH 12/15] passed test --- sqlfmt/parserR/from.go | 60 +++++++++++++++++++++++++++++ sqlfmt/parserR/function_test.go | 1 + sqlfmt/parserR/parenthesis.go | 60 +++++++++++++++++++++++++++++ sqlfmt/parserR/parser.go | 22 +++++++---- sqlfmt/parserR/parser_test.go | 27 +++++++++++++ sqlfmt/parserR/select.go | 68 +++++++++++++++++++++++++++++++++ sqlfmt/parserR/select_test.go | 1 + 7 files changed, 231 insertions(+), 8 deletions(-) create mode 100644 sqlfmt/parserR/from.go create mode 100644 sqlfmt/parserR/parenthesis.go create mode 100644 sqlfmt/parserR/parser_test.go diff --git a/sqlfmt/parserR/from.go b/sqlfmt/parserR/from.go new file mode 100644 index 0000000..835b5c3 --- /dev/null +++ b/sqlfmt/parserR/from.go @@ -0,0 +1,60 @@ +package parserR + +import ( + "github.com/kanmu/go-sqlfmt/sqlfmt/lexer" +) + +type FromExpr struct { + Values []interface{} + Parent Expr + SubQueryCnt int +} + +func parseFrom(tokens []lexer.Token)(*FromExpr, int, error){ + var ( + expr = &FromExpr{} + consumed = 0 + restTokens = tokens + ) + + idx := 0 + // parseのそれぞれの関数がExprとconsumeしたcntだけを返すというインターフェースはそれで良さそう + for { + t := restTokens[idx] + + if expr.endTType(t.Type) { + return expr, idx, nil + } + + switch t.Type { + case lexer.STARTPARENTHESIS: + parseParenthesis(restTokens) + case lexer.FUNCTION: + cExpr, consumed, err := parseFunction(tokens[consumed:]) + if err != nil { + // FIXME: エラーハンドリングする + return nil, 0, err + } + + cExpr.Parent = expr + expr.Values = append(expr.Values, cExpr) + idx += consumed + default: + expr.Values = append(expr.Values, t) + idx++ + } + } +} + +func (expr *FromExpr) endTType(ttype lexer.TokenType) bool{ + for _, end := range lexer.EndOfFrom{ + if ttype == end { + return true + } + } + return false +} + +func (f *FromExpr) Build() string { + return "" +} \ No newline at end of file diff --git a/sqlfmt/parserR/function_test.go b/sqlfmt/parserR/function_test.go index e69de29..83d53b9 100644 --- a/sqlfmt/parserR/function_test.go +++ b/sqlfmt/parserR/function_test.go @@ -0,0 +1 @@ +package parserR_test \ No newline at end of file diff --git a/sqlfmt/parserR/parenthesis.go b/sqlfmt/parserR/parenthesis.go new file mode 100644 index 0000000..98daa18 --- /dev/null +++ b/sqlfmt/parserR/parenthesis.go @@ -0,0 +1,60 @@ +package parserR + +import "github.com/kanmu/go-sqlfmt/sqlfmt/lexer" + +type ParenthesisExpr struct { + Values []interface{} + Parent Expr + SubQueryCnt int +} + +func parseParenthesis(tokens []lexer.Token)(*ParenthesisExpr, int, error){ + var ( + expr = &ParenthesisExpr{} + consumed = 0 + restTokens = tokens + ) + + // parseのそれぞれの関数がExprとconsumeしたcntだけを返すというインターフェースはそれで良さそう + for t := restTokens[0]; expr.endTType(t.Type); { + switch t.Type { + case restTokens[0].Type: + // 一発目は自分自身をパースしてまうので、そのままTokenを入れておく + expr.Values = append(expr.Values, t) + consumed ++ + case lexer.SELECT: + // ParseSubquery的な関数を読んだら良さそう + case lexer.FUNCTION: + cExpr, cConsumed, err := parseFunction(tokens[consumed:]) + if err != nil { + // FIXME: エラーハンドリングする + return nil, 0, err + } + + cExpr.Parent = expr + expr.Values = append(expr.Values, cExpr) + consumed += cConsumed + default: + expr.Values = append(expr.Values, t) + consumed ++ + } + restTokens = restTokens[consumed:] + } + + return expr, consumed, nil +} + +func (expr *ParenthesisExpr) endTType(ttype lexer.TokenType) bool{ + for _, end := range lexer.EndOfParenthesis{ + if ttype == end { + return true + } + } + return false +} + +func (f *ParenthesisExpr) Build() string { + return "" +} + + diff --git a/sqlfmt/parserR/parser.go b/sqlfmt/parserR/parser.go index fc20959..5c1d1e4 100644 --- a/sqlfmt/parserR/parser.go +++ b/sqlfmt/parserR/parser.go @@ -18,20 +18,26 @@ func ParseTokens(tokens []lexer.Token) ([]Expr, error) { ) restTokens := tokens - for t := restTokens[0]; t.Type == lexer.EOF; { + idx := 0 + for { + t := restTokens[idx] + switch t.Type { + case lexer.SELECT: + expr, consumed, err = parseSelect(restTokens[idx:]) + case lexer.FROM: + expr, consumed, err = parseFrom(restTokens[idx:]) case lexer.FUNCTION: expr, consumed, err = parseFunction(restTokens) - if err != nil{ - fmt.Println(err) - } - case lexer.IDENT: + case lexer.EOF: + return exprs, nil + } + if err != nil{ + fmt.Println(err) } - restTokens = restTokens[consumed:] + idx += consumed exprs = append(exprs, expr) } - - return exprs, nil } diff --git a/sqlfmt/parserR/parser_test.go b/sqlfmt/parserR/parser_test.go new file mode 100644 index 0000000..2cfdf28 --- /dev/null +++ b/sqlfmt/parserR/parser_test.go @@ -0,0 +1,27 @@ +package parserR + +import ( + "fmt" + "github.com/kanmu/go-sqlfmt/sqlfmt/lexer" + "testing" +) + + +func TestParseTokens(t *testing.T) { + testTokens := []lexer.Token{ + {Type: lexer.SELECT, Value: "SELECT"}, + {Type: lexer.IDENT, Value: "name"}, + {Type: lexer.COMMA, Value: ","}, + {Type: lexer.IDENT, Value: "age"}, + {Type: lexer.FROM, Value: "FROM"}, + {Type: lexer.IDENT, Value: "user"}, + {Type: lexer.EOF, Value: "EOF"}, + } + + res, err := ParseTokens(testTokens) + if err != nil { + fmt.Println("Error") + t.Fatal(err) + } + fmt.Println(res) +} \ No newline at end of file diff --git a/sqlfmt/parserR/select.go b/sqlfmt/parserR/select.go index e69de29..9738ca8 100644 --- a/sqlfmt/parserR/select.go +++ b/sqlfmt/parserR/select.go @@ -0,0 +1,68 @@ +package parserR + +import ( + "github.com/kanmu/go-sqlfmt/sqlfmt/lexer" +) + +type SelectExpr struct { + Values []interface{} + Parent Expr + SubQueryCnt int +} + +func parseSelect(tokens []lexer.Token)(*SelectExpr, int, error){ + var ( + expr = &SelectExpr{} + consumed = 0 + restTokens = tokens + ) + + // parseのそれぞれの関数がExprとconsumeしたcntだけを返すというインターフェースはそれで良さそう + idx := 0 + for { + t := restTokens[idx] + + if expr.endTType(t.Type) { + return expr, idx, nil + } + + + // 一番最初のトークンはそのままアペンド + // これはでも、fanctionの時しか必要ない? + if idx == 0 { + expr.Values = append(expr.Values, t) + idx++ + } else { + switch t.Type { + case lexer.STARTPARENTHESIS: + parseParenthesis(restTokens) + case lexer.FUNCTION: + cExpr, consumed, err := parseFunction(tokens[consumed:]) + if err != nil { + // FIXME: エラーハンドリングする + return nil, 0, err + } + + cExpr.Parent = expr + expr.Values = append(expr.Values, cExpr) + idx += consumed + default: + expr.Values = append(expr.Values, t) + idx++ + } + } + } +} + +func (expr *SelectExpr) endTType(ttype lexer.TokenType) bool{ + for _, end := range lexer.EndOfSelect{ + if ttype == end { + return true + } + } + return false +} + +func (f *SelectExpr) Build() string { + return "" +} \ No newline at end of file diff --git a/sqlfmt/parserR/select_test.go b/sqlfmt/parserR/select_test.go index e69de29..c89290c 100644 --- a/sqlfmt/parserR/select_test.go +++ b/sqlfmt/parserR/select_test.go @@ -0,0 +1 @@ +package parserR \ No newline at end of file From 04d097254eb80130c8a04f48f90670d63cc7eff9 Mon Sep 17 00:00:00 2001 From: pongzu Date: Wed, 24 Jun 2020 02:13:55 +0900 Subject: [PATCH 13/15] refactord --- sqlfmt/parserR/from.go | 11 ---- sqlfmt/parserR/function.go | 107 +++++++++++++++++----------------- sqlfmt/parserR/parenthesis.go | 9 --- sqlfmt/parserR/parser.go | 51 +++++++++++----- sqlfmt/parserR/select.go | 54 ++++++++--------- 5 files changed, 113 insertions(+), 119 deletions(-) diff --git a/sqlfmt/parserR/from.go b/sqlfmt/parserR/from.go index 835b5c3..fc85d8d 100644 --- a/sqlfmt/parserR/from.go +++ b/sqlfmt/parserR/from.go @@ -13,7 +13,6 @@ type FromExpr struct { func parseFrom(tokens []lexer.Token)(*FromExpr, int, error){ var ( expr = &FromExpr{} - consumed = 0 restTokens = tokens ) @@ -28,17 +27,7 @@ func parseFrom(tokens []lexer.Token)(*FromExpr, int, error){ switch t.Type { case lexer.STARTPARENTHESIS: - parseParenthesis(restTokens) case lexer.FUNCTION: - cExpr, consumed, err := parseFunction(tokens[consumed:]) - if err != nil { - // FIXME: エラーハンドリングする - return nil, 0, err - } - - cExpr.Parent = expr - expr.Values = append(expr.Values, cExpr) - idx += consumed default: expr.Values = append(expr.Values, t) idx++ diff --git a/sqlfmt/parserR/function.go b/sqlfmt/parserR/function.go index cf0a160..589f31d 100644 --- a/sqlfmt/parserR/function.go +++ b/sqlfmt/parserR/function.go @@ -1,56 +1,55 @@ package parserR -import "github.com/kanmu/go-sqlfmt/sqlfmt/lexer" - -type FunctionExpr struct { - Values []interface{} - Parent Expr - SubQueryCnt int -} - -func parseFunction(tokens []lexer.Token)(*FunctionExpr, int, error){ - var ( - expr = &FunctionExpr{} - consumed = 0 - restTokens = tokens - ) - - // parseのそれぞれの関数がExprとconsumeしたcntだけを返すというインターフェースはそれで良さそう - for t := restTokens[0]; expr.endTType(t.Type); { - switch t.Type { - case restTokens[0].Type: - // 一発目は自分自身をパースしてまうので、そのままTokenを入れておく - expr.Values = append(expr.Values, t) - consumed ++ - case lexer.FUNCTION: - cExpr, cConsumed, err := parseFunction(tokens[consumed:]) - if err != nil { - // FIXME: エラーハンドリングする - return nil, 0, err - } - - cExpr.Parent = expr - expr.Values = append(expr.Values, cExpr) - consumed += cConsumed - default: - expr.Values = append(expr.Values, t) - consumed ++ - } - restTokens = restTokens[consumed:] - } - - return expr, consumed, nil -} - -func (expr *FunctionExpr) endTType(ttype lexer.TokenType) bool{ - for _, end := range lexer.EndOfFunction{ - if ttype == end { - return true - } - } - return false -} - -func (f FunctionExpr) Build() string { - return "" -} +//import "github.com/kanmu/go-sqlfmt/sqlfmt/lexer" +// +//type FunctionExpr struct { +// Values []interface{} +// SubQueryCnt int +//} +// +//func parseFunction(tokens []lexer.Token)(*FunctionExpr, int, error){ +// var ( +// expr = &FunctionExpr{} +// consumed = 0 +// restTokens = tokens +// ) +// +// // parseのそれぞれの関数がExprとconsumeしたcntだけを返すというインターフェースはそれで良さそう +// for t := restTokens[0]; expr.endTType(t.Type); { +// switch t.Type { +// case restTokens[0].Type: +// // 一発目は自分自身をパースしてまうので、そのままTokenを入れておく +// expr.Values = append(expr.Values, t) +// consumed ++ +// case lexer.FUNCTION: +// cExpr, cConsumed, err := parseFunction(tokens[consumed:]) +// if err != nil { +// // FIXME: エラーハンドリングする +// return nil, 0, err +// } +// +// cExpr.Parent = expr +// expr.Values = append(expr.Values, cExpr) +// consumed += cConsumed +// default: +// expr.Values = append(expr.Values, t) +// consumed ++ +// } +// restTokens = restTokens[consumed:] +// } +// +// return expr, consumed, nil +//} +// +//func (expr *FunctionExpr) endTType(ttype lexer.TokenType) bool{ +// for _, end := range lexer.EndOfFunction{ +// if ttype == end { +// return true +// } +// } +// return false +//} +// +//func (f FunctionExpr) Build() string { +// return "" +//} diff --git a/sqlfmt/parserR/parenthesis.go b/sqlfmt/parserR/parenthesis.go index 98daa18..fcfdeff 100644 --- a/sqlfmt/parserR/parenthesis.go +++ b/sqlfmt/parserR/parenthesis.go @@ -25,15 +25,6 @@ func parseParenthesis(tokens []lexer.Token)(*ParenthesisExpr, int, error){ case lexer.SELECT: // ParseSubquery的な関数を読んだら良さそう case lexer.FUNCTION: - cExpr, cConsumed, err := parseFunction(tokens[consumed:]) - if err != nil { - // FIXME: エラーハンドリングする - return nil, 0, err - } - - cExpr.Parent = expr - expr.Values = append(expr.Values, cExpr) - consumed += cConsumed default: expr.Values = append(expr.Values, t) consumed ++ diff --git a/sqlfmt/parserR/parser.go b/sqlfmt/parserR/parser.go index 5c1d1e4..69c6db2 100644 --- a/sqlfmt/parserR/parser.go +++ b/sqlfmt/parserR/parser.go @@ -1,7 +1,6 @@ package parserR import ( - "fmt" "github.com/kanmu/go-sqlfmt/sqlfmt/lexer" ) @@ -9,35 +8,59 @@ type Expr interface { Build()string } +type Result struct { + Values []Expr +} + +func (pr *Result) Build() string { + return "" +} + func ParseTokens(tokens []lexer.Token) ([]Expr, error) { + rslt := &Result{} var ( - err error + idx int expr Expr - exprs []Expr consumed int + err error ) - restTokens := tokens - idx := 0 for { - t := restTokens[idx] + t := tokens[idx] + if rslt.endTType(t.Type){ + return rslt.Values, nil + } switch t.Type { case lexer.SELECT: - expr, consumed, err = parseSelect(restTokens[idx:]) + expr, consumed, err = parseSelect(tokens[idx:]) case lexer.FROM: - expr, consumed, err = parseFrom(restTokens[idx:]) + expr, consumed, err = parseFrom(tokens[idx:]) case lexer.FUNCTION: - expr, consumed, err = parseFunction(restTokens) case lexer.EOF: - return exprs, nil + return rslt.Values, nil } - if err != nil{ - fmt.Println(err) + return nil, err } - idx += consumed - exprs = append(exprs, expr) + rslt.append(expr) + idx = nextIDX(idx, consumed) } } + + +func (rslt *Result) append(elm Expr){ + rslt.Values = append(rslt.Values, elm) +} + +func nextIDX(idx, consumed int) int{ + return idx+consumed +} + +func (rslt *Result) endTType(ttype lexer.TokenType) bool { + if ttype == lexer.EOF{ + return true + } + return false +} \ No newline at end of file diff --git a/sqlfmt/parserR/select.go b/sqlfmt/parserR/select.go index 9738ca8..100d1d9 100644 --- a/sqlfmt/parserR/select.go +++ b/sqlfmt/parserR/select.go @@ -1,56 +1,44 @@ package parserR import ( + "fmt" "github.com/kanmu/go-sqlfmt/sqlfmt/lexer" ) type SelectExpr struct { Values []interface{} - Parent Expr SubQueryCnt int } func parseSelect(tokens []lexer.Token)(*SelectExpr, int, error){ - var ( - expr = &SelectExpr{} - consumed = 0 - restTokens = tokens - ) + expr := &SelectExpr{} - // parseのそれぞれの関数がExprとconsumeしたcntだけを返すというインターフェースはそれで良さそう - idx := 0 + var( + idx int + consumed int + value interface{} + err error + ) for { - t := restTokens[idx] - + t := tokens[idx] if expr.endTType(t.Type) { return expr, idx, nil } - - // 一番最初のトークンはそのままアペンド - // これはでも、fanctionの時しか必要ない? - if idx == 0 { - expr.Values = append(expr.Values, t) - idx++ - } else { + value = t + consumed = 1 + if idx > 0 { switch t.Type { case lexer.STARTPARENTHESIS: - parseParenthesis(restTokens) + // TODO case lexer.FUNCTION: - cExpr, consumed, err := parseFunction(tokens[consumed:]) - if err != nil { - // FIXME: エラーハンドリングする - return nil, 0, err - } - - cExpr.Parent = expr - expr.Values = append(expr.Values, cExpr) - idx += consumed - default: - expr.Values = append(expr.Values, t) - idx++ + // TODO } } + + fmt.Println(err) + expr.append(value) + idx = nextIDX(idx, consumed) } } @@ -63,6 +51,10 @@ func (expr *SelectExpr) endTType(ttype lexer.TokenType) bool{ return false } -func (f *SelectExpr) Build() string { +func (expr *SelectExpr) append(elm interface{}) { + expr.Values = append(expr.Values, elm) +} + +func (expr *SelectExpr) Build() string { return "" } \ No newline at end of file From 0e9024cbb18a3aa52830ca5155b88ccfb2567714 Mon Sep 17 00:00:00 2001 From: pongzu Date: Wed, 24 Jun 2020 13:11:04 +0900 Subject: [PATCH 14/15] test passed --- sqlfmt/lexer/token.go | 2 +- sqlfmt/parserR/from.go | 34 ++++++++++++++++++++++------------ sqlfmt/parserR/parser.go | 6 +----- sqlfmt/parserR/select.go | 6 +++--- 4 files changed, 27 insertions(+), 21 deletions(-) diff --git a/sqlfmt/lexer/token.go b/sqlfmt/lexer/token.go index c2ce0e5..ecb24ed 100644 --- a/sqlfmt/lexer/token.go +++ b/sqlfmt/lexer/token.go @@ -113,7 +113,7 @@ func (t Token) IncrementIndentLevel(lev int) {} // end keywords of each clause var ( EndOfSelect = []TokenType{FROM, UNION, EOF} - EndOfCase = []TokenType{END} + EndOfCase = []TokenType{END, EOF} EndOfFrom = []TokenType{WHERE, INNER, OUTER, LEFT, RIGHT, JOIN, NATURAL, CROSS, ORDER, GROUP, UNION, OFFSET, LIMIT, FETCH, EXCEPT, INTERSECT, EOF, ENDPARENTHESIS} EndOfJoin = []TokenType{WHERE, ORDER, GROUP, LIMIT, OFFSET, FETCH, ANDGROUP, ORGROUP, LEFT, RIGHT, INNER, OUTER, NATURAL, CROSS, UNION, EXCEPT, INTERSECT, EOF, ENDPARENTHESIS} EndOfWhere = []TokenType{GROUP, ORDER, LIMIT, OFFSET, FETCH, ANDGROUP, OR, UNION, EXCEPT, INTERSECT, RETURNING, EOF, ENDPARENTHESIS} diff --git a/sqlfmt/parserR/from.go b/sqlfmt/parserR/from.go index fc85d8d..839048d 100644 --- a/sqlfmt/parserR/from.go +++ b/sqlfmt/parserR/from.go @@ -1,6 +1,7 @@ package parserR import ( + "fmt" "github.com/kanmu/go-sqlfmt/sqlfmt/lexer" ) @@ -11,27 +12,32 @@ type FromExpr struct { } func parseFrom(tokens []lexer.Token)(*FromExpr, int, error){ + expr := &FromExpr{} var ( - expr = &FromExpr{} - restTokens = tokens + idx int + value interface{} + consumed int + err error ) - - idx := 0 // parseのそれぞれの関数がExprとconsumeしたcntだけを返すというインターフェースはそれで良さそう for { - t := restTokens[idx] - + t := tokens[idx] if expr.endTType(t.Type) { return expr, idx, nil } - switch t.Type { - case lexer.STARTPARENTHESIS: - case lexer.FUNCTION: - default: - expr.Values = append(expr.Values, t) - idx++ + value = t + consumed = 1 + if idx > 0{ + switch t.Type { + case lexer.STARTPARENTHESIS: + case lexer.FUNCTION: + } } + + fmt.Println(err) + expr.append(value) + idx += consumed } } @@ -46,4 +52,8 @@ func (expr *FromExpr) endTType(ttype lexer.TokenType) bool{ func (f *FromExpr) Build() string { return "" +} + +func (f *FromExpr) append(elm interface{}) { + f.Values = append(f.Values, elm) } \ No newline at end of file diff --git a/sqlfmt/parserR/parser.go b/sqlfmt/parserR/parser.go index 69c6db2..34f0c06 100644 --- a/sqlfmt/parserR/parser.go +++ b/sqlfmt/parserR/parser.go @@ -45,7 +45,7 @@ func ParseTokens(tokens []lexer.Token) ([]Expr, error) { } rslt.append(expr) - idx = nextIDX(idx, consumed) + idx += consumed } } @@ -54,10 +54,6 @@ func (rslt *Result) append(elm Expr){ rslt.Values = append(rslt.Values, elm) } -func nextIDX(idx, consumed int) int{ - return idx+consumed -} - func (rslt *Result) endTType(ttype lexer.TokenType) bool { if ttype == lexer.EOF{ return true diff --git a/sqlfmt/parserR/select.go b/sqlfmt/parserR/select.go index 100d1d9..d7022d5 100644 --- a/sqlfmt/parserR/select.go +++ b/sqlfmt/parserR/select.go @@ -13,10 +13,10 @@ type SelectExpr struct { func parseSelect(tokens []lexer.Token)(*SelectExpr, int, error){ expr := &SelectExpr{} - var( + var ( idx int - consumed int value interface{} + consumed int err error ) for { @@ -38,7 +38,7 @@ func parseSelect(tokens []lexer.Token)(*SelectExpr, int, error){ fmt.Println(err) expr.append(value) - idx = nextIDX(idx, consumed) + idx += consumed } } From edf902304f15aaaf8dd6d3a642437119f111f7fe Mon Sep 17 00:00:00 2001 From: pongzu Date: Thu, 25 Jun 2020 00:15:53 +0900 Subject: [PATCH 15/15] add comment --- sqlfmt/parserR/select.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sqlfmt/parserR/select.go b/sqlfmt/parserR/select.go index d7022d5..2ee1dd1 100644 --- a/sqlfmt/parserR/select.go +++ b/sqlfmt/parserR/select.go @@ -20,15 +20,17 @@ func parseSelect(tokens []lexer.Token)(*SelectExpr, int, error){ err error ) for { - t := tokens[idx] - if expr.endTType(t.Type) { + token := tokens[idx] + if expr.endTType(token.Type) { return expr, idx, nil } - value = t + // if any expr appears from the second token, it should be parsed as one expr and consumed will be the count of tokens in the expr + // in other cases, value will be the token and consumed will be 1 + value = token consumed = 1 if idx > 0 { - switch t.Type { + switch token.Type { case lexer.STARTPARENTHESIS: // TODO case lexer.FUNCTION: