From 6f345ac9d4c0a602df8925c4994f2d9cdb3e9810 Mon Sep 17 00:00:00 2001 From: Hiroya Fujinami Date: Sat, 21 Dec 2024 09:42:38 +0900 Subject: [PATCH] Introduce `BadNode` and `MultiError` (#230) * Introduce `BadNode` and `ErrorList` * Fix ErrorList.String * Fix test to expect parsing errors for testdata/*/bad_*.sql * Use direct returns instead https://github.com/cloudspannerecosystem/memefish/pull/189#discussion_r1835347015 * Make the `bad_` prefix more evil * Store tokens in `BadNode` instead of a raw string * Rename noError to noPanic * Add Bad{Statement,QueryExpr,Expr,Type,DDL,DML} ASTs https://github.com/cloudspannerecosystem/memefish/pull/230#issuecomment-2556336229 Co-Authored-By: apstndb <803393+apstndb@users.noreply.github.com> * Add the doc about error recovering * Rename `ErrorList` to `MultiError` and improve error messages * Shorten error message more --------- Co-authored-by: apstndb <803393+apstndb@users.noreply.github.com> --- ast/ast.go | 96 +++- ast/pos.go | 56 ++ ast/sql.go | 24 + docs/content/error-recover/_index.md | 137 +++++ docs/content/example-parse/_index.md | 4 - docs/hugo.toml | 2 + error.go | 40 +- error_test.go | 88 +++ lexer.go | 177 ++++-- lexer_test.go | 20 + parser.go | 543 ++++++++++++------ parser_test.go | 23 +- .../input/ddl/!bad_alter_table_add_column.sql | 1 + testdata/input/dml/!bad_insert.sql | 3 + .../expr/!bad_new_braced_constructor.sql | 1 + testdata/input/expr/!bad_plus.sql | 1 + testdata/input/expr/!bad_plus2.sql | 1 + testdata/input/expr/!bad_typed_struct.sql | 1 + testdata/input/query/!bad_hint_select.sql | 1 + testdata/input/query/!bad_hint_select_2.sql | 1 + testdata/input/query/!bad_select.sql | 1 + testdata/input/query/!bad_select_order.sql | 1 + .../input/query/!bad_select_union_select.sql | 1 + .../ddl/!bad_alter_table_add_column.sql.txt | 41 ++ testdata/result/dml/!bad_insert.sql.txt | 158 +++++ .../expr/!bad_new_braced_constructor.sql.txt | 63 ++ testdata/result/expr/!bad_plus.sql.txt | 33 ++ testdata/result/expr/!bad_plus2.sql.txt | 70 +++ .../result/expr/!bad_typed_struct.sql.txt | 62 ++ .../result/query/!bad_hint_select.sql.txt | 42 ++ .../result/query/!bad_hint_select_2.sql.txt | 45 ++ testdata/result/query/!bad_select.sql.txt | 27 + .../result/query/!bad_select_order.sql.txt | 57 ++ .../query/!bad_select_union_select.sql.txt | 48 ++ .../!bad_alter_table_add_column.sql.txt | 41 ++ .../result/statement/!bad_hint_select.sql.txt | 42 ++ .../statement/!bad_hint_select_2.sql.txt | 45 ++ testdata/result/statement/!bad_insert.sql.txt | 158 +++++ testdata/result/statement/!bad_select.sql.txt | 27 + .../statement/!bad_select_order.sql.txt | 57 ++ .../!bad_select_union_select.sql.txt | 48 ++ token/file.go | 9 +- token/file_test.go | 22 +- token/quote.go | 4 +- token/token.go | 1 + tools/parse/main.go | 22 +- 46 files changed, 2090 insertions(+), 255 deletions(-) create mode 100644 docs/content/error-recover/_index.md create mode 100644 error_test.go create mode 100644 testdata/input/ddl/!bad_alter_table_add_column.sql create mode 100644 testdata/input/dml/!bad_insert.sql create mode 100644 testdata/input/expr/!bad_new_braced_constructor.sql create mode 100644 testdata/input/expr/!bad_plus.sql create mode 100644 testdata/input/expr/!bad_plus2.sql create mode 100644 testdata/input/expr/!bad_typed_struct.sql create mode 100644 testdata/input/query/!bad_hint_select.sql create mode 100644 testdata/input/query/!bad_hint_select_2.sql create mode 100644 testdata/input/query/!bad_select.sql create mode 100644 testdata/input/query/!bad_select_order.sql create mode 100644 testdata/input/query/!bad_select_union_select.sql create mode 100644 testdata/result/ddl/!bad_alter_table_add_column.sql.txt create mode 100644 testdata/result/dml/!bad_insert.sql.txt create mode 100644 testdata/result/expr/!bad_new_braced_constructor.sql.txt create mode 100644 testdata/result/expr/!bad_plus.sql.txt create mode 100644 testdata/result/expr/!bad_plus2.sql.txt create mode 100644 testdata/result/expr/!bad_typed_struct.sql.txt create mode 100644 testdata/result/query/!bad_hint_select.sql.txt create mode 100644 testdata/result/query/!bad_hint_select_2.sql.txt create mode 100644 testdata/result/query/!bad_select.sql.txt create mode 100644 testdata/result/query/!bad_select_order.sql.txt create mode 100644 testdata/result/query/!bad_select_union_select.sql.txt create mode 100644 testdata/result/statement/!bad_alter_table_add_column.sql.txt create mode 100644 testdata/result/statement/!bad_hint_select.sql.txt create mode 100644 testdata/result/statement/!bad_hint_select_2.sql.txt create mode 100644 testdata/result/statement/!bad_insert.sql.txt create mode 100644 testdata/result/statement/!bad_select.sql.txt create mode 100644 testdata/result/statement/!bad_select_order.sql.txt create mode 100644 testdata/result/statement/!bad_select_union_select.sql.txt diff --git a/ast/ast.go b/ast/ast.go index 61408b1d..396a0b96 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -14,6 +14,7 @@ // - sqlIdentQuote x: Quotes the given identifier string if needed. // - sqlStringQuote s: Returns the SQL quoted string of s. // - sqlBytesQuote bs: Returns the SQL quotes bytes of bs. +// - tokenJoin toks: Concateates the string representations of tokens. // - isnil v: Checks whether v is nil or others. // // Each Node's documentation has pos and end information using the following EBNF. @@ -59,6 +60,9 @@ type Statement interface { // - https://cloud.google.com/spanner/docs/reference/standard-sql/data-definition-language // - https://cloud.google.com/spanner/docs/reference/standard-sql/dml-syntax +func (BadStatement) isStatement() {} +func (BadDDL) isStatement() {} +func (BadDML) isStatement() {} func (QueryStatement) isStatement() {} func (CreateSchema) isStatement() {} func (DropSchema) isStatement() {} @@ -109,6 +113,7 @@ type QueryExpr interface { isQueryExpr() } +func (BadQueryExpr) isQueryExpr() {} func (Select) isQueryExpr() {} func (Query) isQueryExpr() {} func (FromQuery) isQueryExpr() {} @@ -174,6 +179,7 @@ type Expr interface { isExpr() } +func (BadExpr) isExpr() {} func (BinaryExpr) isExpr() {} func (UnaryExpr) isExpr() {} func (InExpr) isExpr() {} @@ -296,6 +302,7 @@ type Type interface { isType() } +func (BadType) isType() {} func (SimpleType) isType() {} func (ArrayType) isType() {} func (StructType) isType() {} @@ -343,6 +350,7 @@ type DDL interface { // // - https://cloud.google.com/spanner/docs/reference/standard-sql/data-definition-language +func (BadDDL) isDDL() {} func (CreateSchema) isDDL() {} func (DropSchema) isDDL() {} func (CreateDatabase) isDDL() {} @@ -501,6 +509,7 @@ type DML interface { isDML() } +func (BadDML) isDML() {} func (Insert) isDML() {} func (Delete) isDML() {} func (Update) isDML() {} @@ -533,6 +542,84 @@ func (ChangeStreamSetFor) isChangeStreamAlteration() {} func (ChangeStreamDropForAll) isChangeStreamAlteration() {} func (ChangeStreamSetOptions) isChangeStreamAlteration() {} +// ================================================================================ +// +// Bad Node +// +// ================================================================================ + +// BadNode is a placeholder node for a source code containing syntax errors. +// +// {{.Tokens | tokenJoin}} +type BadNode struct { + // pos = NodePos + // end = NodeEnd + + NodePos, NodeEnd token.Pos + + Tokens []*token.Token +} + +// BadStatement is a BadNode for Statement. +// +// {{.BadNode | sql}} +type BadStatement struct { + // pos = BadNode.pos + // end = BadNode.end + + BadNode *BadNode +} + +// BadQueryExpr is a BadNode for QueryExpr. +// +// {{.BadNode | sql}} +type BadQueryExpr struct { + // pos = BadNode.pos + // end = BadNode.end + + BadNode *BadNode +} + +// BadExpr is a BadNode for Expr. +// +// {{.BadNode | sql}} +type BadExpr struct { + // pos = BadNode.pos + // end = BadNode.end + + BadNode *BadNode +} + +// BadType is a BadNode for Type. +// +// {{.BadNode | sql}} +type BadType struct { + // pos = BadNode.pos + // end = BadNode.end + + BadNode *BadNode +} + +// BadDDL is a BadNode for DDL. +// +// {{.BadNode | sql}} +type BadDDL struct { + // pos = BadNode.pos + // end = BadNode.end + + BadNode *BadNode +} + +// BadDML is a BadNode for DML. +// +// {{.BadNode | sql}} +type BadDML struct { + // pos = BadNode.pos + // end = BadNode.end + + BadNode *BadNode +} + // ================================================================================ // // SELECT @@ -1265,9 +1352,10 @@ type SelectorExpr struct { // IndexExpr is a subscript operator expression node. // This node can be: -// - array subscript operator -// - struct subscript operator -// - JSON subscript operator +// - array subscript operator +// - struct subscript operator +// - JSON subscript operator +// // Note: The name IndexExpr is a historical reason, maybe better to rename to SubscriptExpr. // // {{.Expr | sql}}[{{.Index | sql}}] @@ -2259,8 +2347,6 @@ type DropProtoBundle struct { Bundle token.Pos // position of "BUNDLE" pseudo keyword } -// end of PROTO BUNDLE statements - // CreateTable is CREATE TABLE statement node. // // CREATE TABLE {{if .IfNotExists}}IF NOT EXISTS{{end}} {{.Name | sql}} ( diff --git a/ast/pos.go b/ast/pos.go index 8993b82a..7ae4f66a 100644 --- a/ast/pos.go +++ b/ast/pos.go @@ -6,6 +6,62 @@ import ( "github.com/cloudspannerecosystem/memefish/token" ) +func (b *BadNode) Pos() token.Pos { + return b.NodePos +} + +func (b *BadNode) End() token.Pos { + return b.NodeEnd +} + +func (b *BadStatement) Pos() token.Pos { + return nodePos(wrapNode(b.BadNode)) +} + +func (b *BadStatement) End() token.Pos { + return nodeEnd(wrapNode(b.BadNode)) +} + +func (b *BadQueryExpr) Pos() token.Pos { + return nodePos(wrapNode(b.BadNode)) +} + +func (b *BadQueryExpr) End() token.Pos { + return nodeEnd(wrapNode(b.BadNode)) +} + +func (b *BadExpr) Pos() token.Pos { + return nodePos(wrapNode(b.BadNode)) +} + +func (b *BadExpr) End() token.Pos { + return nodeEnd(wrapNode(b.BadNode)) +} + +func (b *BadType) Pos() token.Pos { + return nodePos(wrapNode(b.BadNode)) +} + +func (b *BadType) End() token.Pos { + return nodeEnd(wrapNode(b.BadNode)) +} + +func (b *BadDDL) Pos() token.Pos { + return nodePos(wrapNode(b.BadNode)) +} + +func (b *BadDDL) End() token.Pos { + return nodeEnd(wrapNode(b.BadNode)) +} + +func (b *BadDML) Pos() token.Pos { + return nodePos(wrapNode(b.BadNode)) +} + +func (b *BadDML) End() token.Pos { + return nodeEnd(wrapNode(b.BadNode)) +} + func (q *QueryStatement) Pos() token.Pos { return nodePos(nodeChoice(wrapNode(q.Hint), wrapNode(q.Query))) } diff --git a/ast/sql.go b/ast/sql.go index 7bf4caba..87e383f9 100644 --- a/ast/sql.go +++ b/ast/sql.go @@ -155,6 +155,30 @@ func paren(p prec, e Expr) string { } } +// ================================================================================ +// +// Bad Node +// +// ================================================================================ + +func (b *BadNode) SQL() string { + var sql string + for _, tok := range b.Tokens { + if sql != "" && len(tok.Space) > 0 { + sql += " " + } + sql += tok.Raw + } + return sql +} + +func (b *BadStatement) SQL() string { return b.BadNode.SQL() } +func (b *BadQueryExpr) SQL() string { return b.BadNode.SQL() } +func (b *BadExpr) SQL() string { return b.BadNode.SQL() } +func (b *BadType) SQL() string { return b.BadNode.SQL() } +func (b *BadDDL) SQL() string { return b.BadNode.SQL() } +func (b *BadDML) SQL() string { return b.BadNode.SQL() } + // ================================================================================ // // SELECT diff --git a/docs/content/error-recover/_index.md b/docs/content/error-recover/_index.md new file mode 100644 index 00000000..bd9ce958 --- /dev/null +++ b/docs/content/error-recover/_index.md @@ -0,0 +1,137 @@ +--- +date: 2024-12-20 00:00:00 +0900 +title: "Error recovering" +weight: 2 +--- + +Since v0.1.0, `memefish.ParseXXX` methods returns AST node(s) even if an error is reproted. +That is, if we try to parse incomplete SQL such as: + +```sql +SELECT (1 +) + (* 2) +``` + +Then, the following two errors are reported: + +```sql +syntax error: :1:12: unexpected token: ) + 1| SELECT (1 +) + (* 2) + | ^ +syntax error: :1:17: unexpected token: * + 1| SELECT (1 +) + (* 2) + | ^ +``` + +Hoever, the AST is also returned: + +```go {hl_lines=["10-31","36-57"]} +&ast.QueryStatement{ + Query: &ast.Select{ + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.BinaryExpr{ + Op: "+", + Left: &ast.ParenExpr{ + Lparen: 7, + Rparen: 11, + Expr: &ast.BadExpr{ + BadNode: &ast.BadNode{ + NodePos: 8, + NodeEnd: 11, + Tokens: []*token.Token{ + &token.Token{ + Kind: "", + Raw: "1", + Base: 10, + Pos: 8, + End: 9, + }, + &token.Token{ + Kind: "+", + Space: " ", + Raw: "+", + Pos: 10, + End: 11, + }, + }, + }, + }, + }, + Right: &ast.ParenExpr{ + Lparen: 15, + Rparen: 19, + Expr: &ast.BadExpr{ + BadNode: &ast.BadNode{ + NodePos: 16, + NodeEnd: 19, + Tokens: []*token.Token{ + &token.Token{ + Kind: "*", + Raw: "*", + Pos: 16, + End: 17, + }, + &token.Token{ + Kind: "", + Space: " ", + Raw: "2", + Base: 10, + Pos: 18, + End: 19, + }, + }, + }, + }, + }, + }, + }, + }, + }, +} +``` + +Thus, the places where the error occurred are filled with the `ast.BadXXX` nodes (`ast.BadExpr` in this example). + +## How méméfish performs error recovery + +This section explains how méméfish performs error recovery. + +In méméfish, a *recovery point* is set when parsing a syntax where some multiple types of AST nodes are the result. +For example, when parsing an parenthesized expression, the recovery point is set after the open parenthesis `(`. +If an error occurs in the parenthesized expression, the parser backtracks to the recovery point and skips the tokens until the parenthesized expression ends. +The skipped tokens are then collectively `ast.BadNode` and this node is wrapped up a specific `ast.BadXXX` node (e.g., `ast.BadExpr`). + +```sql +SELECT (1 + 2 *) + ^--- error point + ^---------- recovery point + |~~~~~| --- skipped tokens +``` + +Recovery points are set where: + +- the beginning of statements, queries, DDLs, DMLs, +- the beginning of expressions (e.g., after an open parenthesis `(`, `SELECT`, `WHERE` etc.), and +- the beginning of types. + +Token skipping is performed as follows. + +- For `ast.Statement`, `ast.DDL`, and `ast.DML`, + * skip tokens until a semicolon `;` appears. +- For `ast.QueryExpr`, + * skip tokens until a semicolon `;` appears, or + * skip tokens with counting the nest of parentheses `(` + + until the closing symbol (`)`) appears at no nestings, or + + until the symbol that is supposed to be the end of the expression (`UNION`, `INTERSECT`, `EXCEPT`) appears at no nestings. +- For `ast.Expr`, + * skip tokens until a semicolon `;` appears, or + * skip tokens with counting the nest of parentheses `(`, brackets `[`, `CASE` and `WHEN` + + until the closing symbol (`)`, `]`, `END`, `THEN`) appears at no nestings or + + until the symbol that is supposed to be the end of the expression (`,`, `AS`, `FROM`, `GROUP`, `HAVING`, `ORDER`, `LIMIT`, `OFFSET`, `AT`, `UNION`, `INTERSECT`, `EXCEPT`) appears at no nestings. +- For `ast.Type`, + * skip tokens until the semicolon `;` or the closing parenthesis `)` appears, or + * skip tokens with counting the nest of triangle brackets `<` + * until the closing symbol (`>`) appears at no nestings. + +Note that this skipping rules are just heuristics and may not be perfect. +In some cases, there is a possibility of skipping too many tokens. diff --git a/docs/content/example-parse/_index.md b/docs/content/example-parse/_index.md index 0d820e95..bcde9055 100644 --- a/docs/content/example-parse/_index.md +++ b/docs/content/example-parse/_index.md @@ -6,10 +6,6 @@ weight: 1 This example shows how to parse a Spanner SQL and unparse it. - - - ## Code - ```go package main diff --git a/docs/hugo.toml b/docs/hugo.toml index c881e529..d7d89cff 100644 --- a/docs/hugo.toml +++ b/docs/hugo.toml @@ -21,6 +21,8 @@ summaryLength = 30 startLevel = 2 endLevel = 6 ordered = false + [markup.highlight] + style = 'catppuccin-frappe' [params] description = "Spanner SQL parser for Go" diff --git a/error.go b/error.go index 8be99bf6..66cd5d4a 100644 --- a/error.go +++ b/error.go @@ -7,6 +7,41 @@ import ( "github.com/cloudspannerecosystem/memefish/token" ) +// MultiError is a list of errors occured on parsing. +// +// Note that `ParseXXX` methods returns this wrapped error even if the error is just one. +type MultiError []*Error + +func (list MultiError) String() string { + return list.Error() +} + +// Error returns an error message. +// +// This message only shows the first error's message and other errors' messages are omitted. +// If you want to obtain all messages of errors at once, you can use FullError instead. +func (list MultiError) Error() string { + switch len(list) { + case 0: + return "(0 errors)" + case 1: + return list[0].Error() + case 2: + return list[0].Error() + "\n(and 1 other error)" + default: + return fmt.Sprintf("%s\n(and %d other errors)", list[0].Error(), len(list)) + } +} + +// FullError returns a full error message. +func (list MultiError) FullError() string { + var message bytes.Buffer + for _, err := range list { + fmt.Fprintln(&message, err.Error()) + } + return message.String() +} + type Error struct { Message string Position *token.Position @@ -18,10 +53,9 @@ func (e *Error) String() string { func (e *Error) Error() string { var message bytes.Buffer - fmt.Fprintf(&message, "syntax error: %s: %s\n", e.Position, e.Message) + fmt.Fprintf(&message, "syntax error: %s: %s", e.Position, e.Message) if e.Position.Source != "" { - fmt.Fprintln(&message) - fmt.Fprint(&message, e.Position.Source) + fmt.Fprintf(&message, "\n%s", e.Position.Source) } return message.String() } diff --git a/error_test.go b/error_test.go new file mode 100644 index 00000000..1da85ddb --- /dev/null +++ b/error_test.go @@ -0,0 +1,88 @@ +package memefish + +import ( + "strings" + "testing" + + "github.com/MakeNowJust/heredoc/v2" + "github.com/cloudspannerecosystem/memefish/token" +) + +func TestMultiError(t *testing.T) { + err1 := &Error{ + Message: "error 1", + Position: &token.Position{ + FilePath: "foo", + Pos: 0, + End: 1, + Line: 0, + Column: 0, + EndLine: 0, + EndColumn: 1, + Source: " 1| a b\n | ^", + }, + } + err2 := &Error{ + Message: "error 2", + Position: &token.Position{ + FilePath: "foo", + Pos: 2, + End: 3, + Line: 0, + Column: 2, + EndLine: 0, + EndColumn: 3, + Source: " 1| a b\n | ^", + }, + } + + for _, testCase := range []struct { + list MultiError + error string + fullError string + }{ + { + MultiError{}, + "(0 errors)", + "", + }, + { + MultiError{err1}, + heredoc.Doc(` + syntax error: foo:1:1: error 1 + 1| a b + | ^ + `), + heredoc.Doc(` + syntax error: foo:1:1: error 1 + 1| a b + | ^ + `), + }, + { + MultiError{err1, err2}, + heredoc.Doc(` + syntax error: foo:1:1: error 1 + 1| a b + | ^ + (and 1 other error) + `), + heredoc.Doc(` + syntax error: foo:1:1: error 1 + 1| a b + | ^ + syntax error: foo:1:3: error 2 + 1| a b + | ^ + `), + }, + } { + if testCase.list.Error() != strings.TrimRight(testCase.error, "\n") { + t.Errorf("error on MultiError.Error():\n%s", testCase.list.Error()) + } + + if testCase.list.FullError() != testCase.fullError { + t.Errorf("error on MultiError.FullError():\n%s", testCase.list.FullError()) + } + } +} diff --git a/lexer.go b/lexer.go index 216aafb2..0c202e96 100644 --- a/lexer.go +++ b/lexer.go @@ -59,11 +59,11 @@ func (l *Lexer) NextToken() (err error) { } }() - l.nextToken() + l.nextToken(false) return } -func (l *Lexer) nextToken() { +func (l *Lexer) nextToken(noPanic bool) { l.lastTokenKind = l.Token.Kind l.Token = token.Token{} @@ -75,7 +75,8 @@ func (l *Lexer) nextToken() { space = l.Buffer[i:l.pos] i = l.pos - l.skipComment() + hasError := l.skipComment(noPanic) + if l.pos == i { break } @@ -85,6 +86,13 @@ func (l *Lexer) nextToken() { Pos: token.Pos(i), End: token.Pos(l.pos), }) + + if hasError { + l.Token.Pos = token.Pos(l.pos) + l.Token.End = token.Pos(l.pos) + l.Token.Kind = token.TokenBad + return + } } l.Token.Space = space @@ -93,16 +101,16 @@ func (l *Lexer) nextToken() { l.Token.Pos = token.Pos(l.pos) i := l.pos if l.dotIdent { - l.consumeFieldToken() + l.consumeFieldToken(noPanic) l.dotIdent = false } else { - l.consumeToken() + l.consumeToken(noPanic) } l.Token.Raw = l.Buffer[i:l.pos] l.Token.End = token.Pos(l.pos) } -func (l *Lexer) consumeToken() { +func (l *Lexer) consumeToken(noPanic bool) { if l.eof() { l.Token.Kind = token.TokenEOF return @@ -117,7 +125,7 @@ func (l *Lexer) consumeToken() { case '.': nextDotIdent := isNextDotIdent(l.lastTokenKind) if !nextDotIdent && l.peekOk(1) && char.IsDigit(l.peek(1)) { - l.consumeNumber() + l.consumeNumber(noPanic) } else { l.skip() l.Token.Kind = "." @@ -233,10 +241,15 @@ func (l *Lexer) consumeToken() { return case '`': l.Token.Kind = token.TokenIdent - l.Token.AsString = l.consumeQuotedContent("`", false, true, "identifier") + + var hasError bool + l.Token.AsString, hasError = l.consumeQuotedContent("`", false, true, "identifier", noPanic) + if hasError { + l.Token.Kind = token.TokenBad + } return case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - l.consumeNumber() + l.consumeNumber(noPanic) return case 'B', 'b', 'R', 'r', '"', '\'': bytes, raw := false, false @@ -251,13 +264,13 @@ func (l *Lexer) consumeToken() { l.skipN(i) switch { case bytes && raw: - l.consumeRawBytes() + l.consumeRawBytes(noPanic) case bytes: - l.consumeBytes() + l.consumeBytes(noPanic) case raw: - l.consumeRawString() + l.consumeRawString(noPanic) default: - l.consumeString() + l.consumeString(noPanic) } return default: @@ -283,10 +296,16 @@ func (l *Lexer) consumeToken() { return } + if noPanic { + l.skip() + l.Token.Kind = token.TokenBad + return + } + panic(l.errorf("illegal input character: %q", l.peek(0))) } -func (l *Lexer) consumeFieldToken() { +func (l *Lexer) consumeFieldToken(noPanic bool) { if l.peekOk(0) && char.IsIdentPart(l.peek(0)) { i := 0 for l.peekOk(i) && char.IsIdentPart(l.peek(i)) { @@ -298,10 +317,10 @@ func (l *Lexer) consumeFieldToken() { return } - l.consumeToken() + l.consumeToken(noPanic) } -func (l *Lexer) consumeNumber() { +func (l *Lexer) consumeNumber(noPanic bool) { // https://cloud.google.com/spanner/docs/lexical#integer-literals // https://cloud.google.com/spanner/docs/lexical#floating-point-literals @@ -354,35 +373,61 @@ func (l *Lexer) consumeNumber() { } if l.peekOk(0) && char.IsIdentPart(l.peek(0)) { + if noPanic { + l.Token.Kind = token.TokenBad + return + } + l.panicf("number literal cannot follow identifier without any spaces") } } -func (l *Lexer) consumeRawBytes() { +func (l *Lexer) consumeRawBytes(noPanic bool) { l.Token.Kind = token.TokenBytes - l.Token.AsString = l.consumeQuotedContent(l.peekDelimiter(), true, false, "raw bytes literal") + + var hasError bool + l.Token.AsString, hasError = l.consumeQuotedContent(l.peekDelimiter(), true, false, "raw bytes literal", noPanic) + if hasError { + l.Token.Kind = token.TokenBad + } } -func (l *Lexer) consumeBytes() { +func (l *Lexer) consumeBytes(noPanic bool) { l.Token.Kind = token.TokenBytes - l.Token.AsString = l.consumeQuotedContent(l.peekDelimiter(), false, false, "bytes literal") + + var hasError bool + l.Token.AsString, hasError = l.consumeQuotedContent(l.peekDelimiter(), false, false, "bytes literal", noPanic) + if hasError { + l.Token.Kind = token.TokenBad + } } -func (l *Lexer) consumeRawString() { +func (l *Lexer) consumeRawString(noPanic bool) { l.Token.Kind = token.TokenString - l.Token.AsString = l.consumeQuotedContent(l.peekDelimiter(), true, true, "raw string literal") + + var hasError bool + l.Token.AsString, hasError = l.consumeQuotedContent(l.peekDelimiter(), true, true, "raw string literal", noPanic) + if hasError { + l.Token.Kind = token.TokenBad + } } -func (l *Lexer) consumeString() { +func (l *Lexer) consumeString(noPanic bool) { l.Token.Kind = token.TokenString - l.Token.AsString = l.consumeQuotedContent(l.peekDelimiter(), false, true, "string literal") + + var hasError bool + l.Token.AsString, hasError = l.consumeQuotedContent(l.peekDelimiter(), false, true, "string literal", noPanic) + if hasError { + l.Token.Kind = token.TokenBad + } } func (l *Lexer) peekDelimiter() string { i := 0 c := l.peek(i) if c != '"' && c != '\'' { - l.panicf("invalid delimiter: %v", c) + // This error is unreachable + panic(fmt.Sprintf("BUG: invalid delimiter: %v", c)) } i++ @@ -405,7 +450,7 @@ func (l *Lexer) peekDelimiter() string { } } -func (l *Lexer) consumeQuotedContent(q string, raw, unicode bool, name string) string { +func (l *Lexer) consumeQuotedContent(q string, raw, unicode bool, name string, noPanic bool) (string, bool) { // https://cloud.google.com/spanner/docs/lexical#string-and-bytes-literals if len(q) == 3 { @@ -414,20 +459,33 @@ func (l *Lexer) consumeQuotedContent(q string, raw, unicode bool, name string) s i := len(q) var content []byte + hasError := false for l.peekOk(i) { if l.slice(i, i+len(q)) == q { if len(content) == 0 && name == "identifier" { - l.panicfAtPosition(token.Pos(l.pos), token.Pos(l.pos+i+len(q)), "invalid empty identifier") + if noPanic { + hasError = true + } else { + l.panicfAtPosition(token.Pos(l.pos), token.Pos(l.pos+i+len(q)), "invalid empty identifier") + } } l.skipN(i + len(q)) - return string(content) + + if hasError { + return "", true + } + return string(content), false } c := l.peek(i) if c == '\\' { i++ if !l.peekOk(i) { + if noPanic { + hasError = true + continue + } l.panicfAtPosition(token.Pos(l.pos+i-1), token.Pos(l.pos+i), "invalid escape sequence: \\") } @@ -459,17 +517,29 @@ func (l *Lexer) consumeQuotedContent(q string, raw, unicode bool, name string) s case 'x', 'X': for j := 0; j < 2; j++ { if !(l.peekOk(i+j) && char.IsHexDigit(l.peek(i+j))) { + if noPanic { + hasError = true + continue + } l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+j+1), "invalid escape sequence: hex escape sequence must be follwed by 2 hex digits") } } u, err := strconv.ParseUint(l.slice(i, i+2), 16, 8) if err != nil { + if noPanic { + hasError = true + continue + } l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+2), "invalid escape sequence: %v", err) } content = append(content, byte(u)) i += 2 case 'u', 'U': if !unicode { + if noPanic { + hasError = true + continue + } l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i), "invalid escape sequence: \\%c is not allowed in %s", c, name) } size := 4 @@ -478,14 +548,26 @@ func (l *Lexer) consumeQuotedContent(q string, raw, unicode bool, name string) s } for j := 0; j < size; j++ { if !(l.peekOk(i+j) && char.IsHexDigit(l.peek(i+j))) { + if noPanic { + hasError = true + continue + } l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+j+1), "invalid escape sequence: \\%c must be followed by %d hex digits", c, size) } } u, err := strconv.ParseUint(l.slice(i, i+size), 16, 32) if err != nil { + if noPanic { + hasError = true + continue + } l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+size), "invalid escape sequence: %v", err) } if 0xD800 <= u && u <= 0xDFFF || 0x10FFFF < u { + if noPanic { + hasError = true + continue + } l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+size), "invalid escape sequence: invalid code point: U+%04X", u) } var buf [utf8.MaxRune]byte @@ -495,16 +577,28 @@ func (l *Lexer) consumeQuotedContent(q string, raw, unicode bool, name string) s case '0', '1', '2', '3': for j := 0; j < 2; j++ { if !(l.peekOk(i+j) && char.IsOctalDigit(l.peek(i+j))) { + if noPanic { + hasError = true + continue + } l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+j+1), "invalid escape sequence: octal escape sequence must be follwed by 3 octal digits") } } u, err := strconv.ParseUint(l.slice(i-1, i+2), 8, 8) if err != nil { + if noPanic { + hasError = true + continue + } l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+2), "invalid escape sequence: %v", err) } content = append(content, byte(u)) i += 2 default: + if noPanic { + hasError = true + continue + } l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i), "invalid escape sequence: \\%c", c) } @@ -512,6 +606,11 @@ func (l *Lexer) consumeQuotedContent(q string, raw, unicode bool, name string) s } if c == '\n' && len(q) != 3 { + if noPanic { + hasError = true + i++ + continue + } l.panicfAtPosition(token.Pos(l.pos), token.Pos(l.pos+i), "unclosed %s: newline appears in non triple-quoted", name) } @@ -519,6 +618,11 @@ func (l *Lexer) consumeQuotedContent(q string, raw, unicode bool, name string) s i++ } + if noPanic { + l.skipN(i) + return "", true + } + panic(l.errorfAtPosition(token.Pos(l.pos), token.Pos(l.pos+i), "unclosed %s", name)) } @@ -534,30 +638,35 @@ func (l *Lexer) skipSpaces() { } } -func (l *Lexer) skipComment() { +func (l *Lexer) skipComment(noPanic bool) bool { r, _ := utf8.DecodeRuneInString(l.Buffer[l.pos:]) switch { case r == '#' || r == '/' && l.peekIs(1, '/') || r == '-' && l.peekIs(1, '-'): - l.skipCommentUntil("\n", false) + return l.skipCommentUntil("\n", false, noPanic) case r == '/' && l.peekIs(1, '*'): - l.skipCommentUntil("*/", true) + return l.skipCommentUntil("*/", true, noPanic) default: - return + return false } } -func (l *Lexer) skipCommentUntil(end string, mustEnd bool) { +func (l *Lexer) skipCommentUntil(end string, mustEnd bool, noPanic bool) bool { pos := token.Pos(l.pos) for !l.eof() { if l.slice(0, len(end)) == end { l.skipN(len(end)) - return + return false } l.skip() } if mustEnd { + if noPanic { + return true + } l.panicfAtPosition(pos, token.Pos(l.pos), "unclosed comment") } + + return false } func (l *Lexer) peek(i int) byte { diff --git a/lexer_test.go b/lexer_test.go index e69aef1e..2149d4de 100644 --- a/lexer_test.go +++ b/lexer_test.go @@ -252,3 +252,23 @@ func TestLexerWrong(t *testing.T) { }) } } + +func TestLexerWrongNoError(t *testing.T) { + for _, tc := range lexerWrongTestCase { + t.Run(fmt.Sprintf("testcase/%q", tc.source), func(t *testing.T) { + l := &Lexer{ + File: &File{FilePath: "[test]", Buffer: tc.source}, + } + hasBad := false + for l.Token.Kind != TokenEOF { + l.nextToken(true) + if l.Token.Kind == TokenBad { + hasBad = true + } + } + if !hasBad { + t.Errorf("expected ") + } + }) + } +} diff --git a/parser.go b/parser.go index 912fd800..9ba84e9f 100644 --- a/parser.go +++ b/parser.go @@ -11,204 +11,153 @@ import ( type Parser struct { *Lexer + + errors []*Error } // ParseStatement parses a SQL statement. -func (p *Parser) ParseStatement() (stmt ast.Statement, err error) { - defer func() { - if r := recover(); r != nil { - stmt = nil - if e, ok := r.(*Error); ok { - err = e - } else { - panic(r) - } - } - }() - +func (p *Parser) ParseStatement() (ast.Statement, error) { p.nextToken() - stmt = p.parseStatement() + stmt := p.parseStatement() if p.Token.Kind != token.TokenEOF { - p.panicfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind) + p.errors = append(p.errors, p.errorfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind)) } - return + + if len(p.errors) > 0 { + return stmt, MultiError(p.errors) + } + + return stmt, nil } // ParseStatements parses SQL statements list separated by semi-colon. -func (p *Parser) ParseStatements() (stmts []ast.Statement, err error) { - defer func() { - if r := recover(); r != nil { - stmts = nil - if e, ok := r.(*Error); ok { - err = e - } else { - panic(r) - } - } - }() - +func (p *Parser) ParseStatements() ([]ast.Statement, error) { p.nextToken() - p.parseStatements(func() { - stmts = append(stmts, p.parseStatement()) - }) + stmts := parseStatements(p, p.parseStatement) if p.Token.Kind != token.TokenEOF { - p.panicfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind) + p.errors = append(p.errors, p.errorfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind)) } - return + + if len(p.errors) > 0 { + return stmts, MultiError(p.errors) + } + + return stmts, nil } // ParseQuery parses a query statement. -func (p *Parser) ParseQuery() (stmt *ast.QueryStatement, err error) { - defer func() { - if r := recover(); r != nil { - stmt = nil - if e, ok := r.(*Error); ok { - err = e - } else { - panic(r) - } - } - }() - +func (p *Parser) ParseQuery() (*ast.QueryStatement, error) { p.nextToken() - stmt = p.parseQueryStatement() + stmt := p.parseQueryStatement() if p.Token.Kind != token.TokenEOF { - p.panicfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind) + p.errors = append(p.errors, p.errorfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind)) } - return + + if len(p.errors) > 0 { + return stmt, MultiError(p.errors) + } + + return stmt, nil } // ParseExpr parses a SQL expression. -func (p *Parser) ParseExpr() (expr ast.Expr, err error) { - defer func() { - if r := recover(); r != nil { - expr = nil - if e, ok := r.(*Error); ok { - err = e - } else { - panic(r) - } - } - }() - +func (p *Parser) ParseExpr() (ast.Expr, error) { p.nextToken() - expr = p.parseExpr() + expr := p.parseExpr() if p.Token.Kind != token.TokenEOF { - p.panicfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind) + p.errors = append(p.errors, p.errorfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind)) } - return + + if len(p.errors) > 0 { + return expr, MultiError(p.errors) + } + + return expr, nil } // ParseType parses a type name. -func (p *Parser) ParseType() (typ ast.Type, err error) { - defer func() { - if r := recover(); r != nil { - typ = nil - if e, ok := r.(*Error); ok { - err = e - } else { - panic(r) - } - } - }() - +func (p *Parser) ParseType() (ast.Type, error) { p.nextToken() - typ = p.parseType() + t := p.parseType() if p.Token.Kind != token.TokenEOF { - p.panicfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind) + p.errors = append(p.errors, p.errorfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind)) } - return + + if len(p.errors) > 0 { + return t, MultiError(p.errors) + } + + return t, nil } // ParseDDL parses a CREATE/ALTER/DROP statement. -func (p *Parser) ParseDDL() (ddl ast.DDL, err error) { - defer func() { - if r := recover(); r != nil { - ddl = nil - if e, ok := r.(*Error); ok { - err = e - } else { - panic(r) - } - } - }() - +func (p *Parser) ParseDDL() (ast.DDL, error) { p.nextToken() - ddl = p.parseDDL() + ddl := p.parseDDL() if p.Token.Kind != token.TokenEOF { - p.panicfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind) + p.errors = append(p.errors, p.errorfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind)) } - return + + if len(p.errors) > 0 { + return ddl, MultiError(p.errors) + } + + return ddl, nil } // ParseDDLs parses CREATE/ALTER/DROP statements list separated by semi-colon. -func (p *Parser) ParseDDLs() (ddls []ast.DDL, err error) { - defer func() { - if r := recover(); r != nil { - ddls = nil - if e, ok := r.(*Error); ok { - err = e - } else { - panic(r) - } - } - }() - +func (p *Parser) ParseDDLs() ([]ast.DDL, error) { p.nextToken() - p.parseStatements(func() { - ddls = append(ddls, p.parseDDL()) - }) + ddls := parseStatements(p, p.parseDDL) if p.Token.Kind != token.TokenEOF { - p.panicfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind) + p.errors = append(p.errors, p.errorfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind)) } - return + + if len(p.errors) > 0 { + return ddls, MultiError(p.errors) + } + + return ddls, nil } // ParseDML parses a INSERT/DELETE/UPDATE statement. -func (p *Parser) ParseDML() (dml ast.DML, err error) { - defer func() { - if r := recover(); r != nil { - dml = nil - if e, ok := r.(*Error); ok { - err = e - } else { - panic(r) - } - } - }() - +func (p *Parser) ParseDML() (ast.DML, error) { p.nextToken() - dml = p.parseDML() + dml := p.parseDML() if p.Token.Kind != token.TokenEOF { - p.panicfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind) + p.errors = append(p.errors, p.errorfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind)) } - return + + if len(p.errors) > 0 { + return dml, MultiError(p.errors) + } + + return dml, nil } // ParseDMLs parses INSERT/DELETE/UPDATE statements list separated by semi-colon. -func (p *Parser) ParseDMLs() (dmls []ast.DML, err error) { - defer func() { - if r := recover(); r != nil { - dmls = nil - if e, ok := r.(*Error); ok { - err = e - } else { - panic(r) - } - } - }() - +func (p *Parser) ParseDMLs() ([]ast.DML, error) { p.nextToken() - p.parseStatements(func() { - dmls = append(dmls, p.parseDML()) - }) + dmls := parseStatements(p, p.parseDML) if p.Token.Kind != token.TokenEOF { - p.panicfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind) + p.errors = append(p.errors, p.errorfAtToken(&p.Token, "expected token: , but: %s", p.Token.Kind)) } - return + + if len(p.errors) > 0 { + return dmls, MultiError(p.errors) + } + + return dmls, nil } -func (p *Parser) parseStatement() ast.Statement { +func (p *Parser) parseStatement() (stmt ast.Statement) { + l := p.Lexer.Clone() + defer func() { + if r := recover(); r != nil { + stmt = &ast.BadStatement{BadNode: p.handleParseStatementError(r, l)} + } + }() + switch { case p.Token.Kind == "SELECT" || p.Token.Kind == "@" || p.Token.Kind == "WITH" || p.Token.Kind == "(" || p.Token.Kind == "FROM": return p.parseQueryStatement() @@ -253,19 +202,22 @@ func (p *Parser) parseCall() *ast.Call { Args: args, } } -func (p *Parser) parseStatements(doParse func()) { + +func parseStatements[T ast.Node](p *Parser, doParse func() T) []T { + var nodes []T for p.Token.Kind != token.TokenEOF { if p.Token.Kind == ";" { p.nextToken() continue } - doParse() + nodes = append(nodes, doParse()) if p.Token.Kind != ";" { break } } + return nodes } // ================================================================================ @@ -274,7 +226,18 @@ func (p *Parser) parseStatements(doParse func()) { // // ================================================================================ -func (p *Parser) parseQueryStatement() *ast.QueryStatement { +func (p *Parser) parseQueryStatement() (stmt *ast.QueryStatement) { + l := p.Lexer.Clone() + defer func() { + if r := recover(); r != nil { + // When parsing is failed on tryParseHint or tryParseWith, the result of these methods are discarded + // becasue they are concrete structs and we cannot fill them with *ast.BadNode. + stmt = &ast.QueryStatement{ + Query: &ast.BadQueryExpr{BadNode: p.handleParseStatementError(r, l)}, + } + } + }() + hint := p.tryParseHint() query := p.parseQueryExpr() @@ -420,13 +383,20 @@ func (p *Parser) parseCTE() *ast.CTE { } } -func (p *Parser) parseQueryExpr() ast.QueryExpr { +func (p *Parser) parseQueryExpr() (query ast.QueryExpr) { + l := p.Lexer.Clone() + defer func() { + if r := recover(); r != nil { + query = p.handleParseQueryExprError(false, r, l) + } + }() + // If WITH is appeared, it is treated as an outer node than compound query. if p.Token.Kind == "WITH" { return p.parseQuery() } - query := p.parseSimpleQueryExpr() + query = p.parseSimpleQueryExpr() // If the query is directly followed by ORDER BY, LIMIT or pipe operators, it won't be a compound query switch p.Token.Kind { @@ -491,77 +461,84 @@ func (p *Parser) parseFromQuery() *ast.FromQuery { } // parseSimpleQueryExpr parses simple QueryExpr, which can be wrapped in Query or CompoundQuery. -func (p *Parser) parseSimpleQueryExpr() ast.QueryExpr { +func (p *Parser) parseSimpleQueryExpr() (query ast.QueryExpr) { + l := p.Lexer.Clone() + defer func() { + if r := recover(); r != nil { + query = p.handleParseQueryExprError(true, r, l) + } + }() + switch p.Token.Kind { // FROM and SELECT are the most primitive query form case "FROM": return p.parseFromQuery() case "SELECT": return p.parseSelect() - // Query with paren - case "(": + case "(": // Query with paren lparen := p.expect("(").Pos - query := p.parseQueryExpr() + q := p.parseQueryExpr() rparen := p.expect(")").Pos return &ast.SubQuery{ Lparen: lparen, Rparen: rparen, - Query: query, + Query: q, } default: panic(p.errorfAtToken(&p.Token, `expected beginning of simple query "(", SELECT, FROM, but: %q`, p.Token.AsString)) } } +func (p *Parser) parseSelect() *ast.Select { + sel := p.expect("SELECT").Pos + allOrDistinct := p.tryParseAllOrDistinct() + selectAs := p.tryParseSelectAs() + results := p.parseSelectResults() + from := p.tryParseFrom() + where := p.tryParseWhere() + groupBy := p.tryParseGroupBy() + having := p.tryParseHaving() + + return &ast.Select{ + Select: sel, + AllOrDistinct: allOrDistinct, + As: selectAs, + Results: results, + From: from, + Where: where, + GroupBy: groupBy, + Having: having, + } +} + func (p *Parser) tryParseSelectAs() ast.SelectAs { if p.Token.Kind != "AS" { return nil } - asPos := p.expect("AS").Pos + pos := p.expect("AS").Pos + switch { case p.Token.Kind == "STRUCT": structPos := p.expect("STRUCT").Pos return &ast.AsStruct{ - As: asPos, + As: pos, Struct: structPos, } case p.Token.IsKeywordLike("VALUE"): valuePos := p.expectKeywordLike("VALUE").Pos return &ast.AsValue{ - As: asPos, + As: pos, Value: valuePos, } default: namedType := p.parseNamedType() return &ast.AsTypeName{ - As: asPos, + As: pos, TypeName: namedType, } } } -func (p *Parser) parseSelect() *ast.Select { - sel := p.expect("SELECT").Pos - allOrDistinct := p.tryParseAllOrDistinct() - selectAs := p.tryParseSelectAs() - results := p.parseSelectResults() - from := p.tryParseFrom() - where := p.tryParseWhere() - groupBy := p.tryParseGroupBy() - having := p.tryParseHaving() - - return &ast.Select{ - Select: sel, - AllOrDistinct: allOrDistinct, - As: selectAs, - Results: results, - From: from, - Where: where, - GroupBy: groupBy, - Having: having, - } -} - func (p *Parser) parseSelectResults() []ast.SelectItem { results := []ast.SelectItem{p.parseSelectItem()} for p.Token.Kind != token.TokenEOF { @@ -1274,7 +1251,14 @@ func (p *Parser) parseTableSampleSize() *ast.TableSampleSize { // // ================================================================================ -func (p *Parser) parseExpr() ast.Expr { +func (p *Parser) parseExpr() (expr ast.Expr) { + l := p.Lexer.Clone() + defer func() { + if r := recover(); r != nil { + expr = p.handleParseExprError(r, l) + } + }() + return p.parseOr() } @@ -2524,7 +2508,14 @@ func (p *Parser) parseNamedType() *ast.NamedType { return &ast.NamedType{Path: path} } -func (p *Parser) parseType() ast.Type { +func (p *Parser) parseType() (t ast.Type) { + l := p.Lexer.Clone() + defer func() { + if r := recover(); r != nil { + t = p.handleParseTypeError(r, l) + } + }() + switch p.Token.Kind { case token.TokenIdent: if !p.lookaheadSimpleType() { @@ -2776,7 +2767,14 @@ func (p *Parser) lookaheadSimpleType() bool { // // ================================================================================ -func (p *Parser) parseDDL() ast.DDL { +func (p *Parser) parseDDL() (ddl ast.DDL) { + l := p.Lexer.Clone() + defer func() { + if r := recover(); r != nil { + ddl = &ast.BadDDL{BadNode: p.handleParseStatementError(r, l)} + } + }() + pos := p.Token.Pos switch { case p.Token.Kind == "CREATE": @@ -4661,7 +4659,14 @@ func (p *Parser) parseIfExists() bool { // // ================================================================================ -func (p *Parser) parseDML() ast.DML { +func (p *Parser) parseDML() (dml ast.DML) { + l := p.Lexer.Clone() + defer func() { + if r := recover(); r != nil { + dml = &ast.BadDML{BadNode: p.handleParseStatementError(r, l)} + } + }() + id := p.expect(token.TokenIdent) pos := id.Pos switch { @@ -5049,6 +5054,176 @@ func (p *Parser) parseStringValue() ast.StringValue { panic(p.errorfAtToken(&p.Token, "expected token: , , but: %s", p.Token.Kind)) } +// ================================================================================ +// +// Error Handlers +// +// ================================================================================ + +func (p *Parser) handleError(r any, l *Lexer) { + e, ok := r.(*Error) + if !ok { + panic(r) + } + + p.errors = append(p.errors, e) + p.Lexer = l +} + +func (p *Parser) handleParseStatementError(r any, l *Lexer) *ast.BadNode { + p.handleError(r, l) + + var tokens []*token.Token + pos := p.Token.Pos + end := p.Token.Pos +skip: + for p.Token.Kind != token.TokenEOF { + switch p.Token.Kind { + case ";": + break skip + } + end = p.Token.End + tokens = append(tokens, p.Token.Clone()) + p.Lexer.nextToken(true) + } + + return &ast.BadNode{ + NodePos: pos, + NodeEnd: end, + Tokens: tokens, + } +} + +func (p *Parser) handleParseQueryExprError(simple bool, r any, l *Lexer) *ast.BadQueryExpr { + p.handleError(r, l) + + var tokens []*token.Token + pos := p.Token.Pos + end := p.Token.Pos + nesting := 0 +skip: + for p.Token.Kind != token.TokenEOF { + switch p.Token.Kind { + case ";": + break skip + case "(": + nesting += 1 + case ")": + if nesting == 0 { + break skip + } + nesting -= 1 + case "UNION", "INTERSECT", "EXCEPT": + if simple && nesting == 0 { + break skip + } + } + end = p.Token.End + tokens = append(tokens, p.Token.Clone()) + p.Lexer.nextToken(true) + } + + return &ast.BadQueryExpr{ + BadNode: &ast.BadNode{ + NodePos: pos, + NodeEnd: end, + Tokens: tokens, + }, + } +} + +func (p *Parser) handleParseExprError(r any, l *Lexer) *ast.BadExpr { + p.handleError(r, l) + + var tokens []*token.Token + pos := p.Token.Pos + end := p.Token.Pos + nesting := 0 +skip: + for p.Token.Kind != token.TokenEOF { + switch p.Token.Kind { + case ";": + break skip + case "(", "[", "CASE", "WHEN": + nesting += 1 + case ")", "]", "}", "END", "THEN": + if nesting == 0 { + break skip + } + nesting -= 1 + case ",", "AS", "FROM", "GROUP", "HAVING", "ORDER", "LIMIT", "OFFSET", "AT", "UNION", "INTERSECT", "EXCEPT": + if nesting == 0 { + break skip + } + } + end = p.Token.End + tokens = append(tokens, p.Token.Clone()) + p.Lexer.nextToken(true) + } + + return &ast.BadExpr{ + BadNode: &ast.BadNode{ + NodePos: pos, + NodeEnd: end, + Tokens: tokens, + }, + } +} + +func (p *Parser) handleParseTypeError(r any, l *Lexer) *ast.BadType { + p.handleError(r, l) + + var tokens []*token.Token + pos := p.Token.Pos + end := p.Token.Pos + nesting := 0 +skip: + for p.Token.Kind != token.TokenEOF { + switch p.Token.Kind { + case ";", ")": + break skip + case "<": + nesting += 1 + case ">": + if nesting == 0 { + break skip + } + nesting -= 1 + case ">>": + if nesting == 0 { + break skip + } + if nesting == 1 { + p.Token.Kind = ">" + p.Token.Pos += 1 + break skip + } + nesting -= 2 + case ",": + if nesting == 0 { + break skip + } + } + tokens = append(tokens, p.Token.Clone()) + end = p.Token.End + p.Lexer.nextToken(true) + } + + return &ast.BadType{ + BadNode: &ast.BadNode{ + NodePos: pos, + NodeEnd: end, + Tokens: tokens, + }, + } +} + +// ================================================================================ +// +// Utilities +// +// ================================================================================ + // parseCommaSeparatedList parses a comma separated list of nodes parsed by `doParse`. // // `doParse` should be a reference to a method of `Parser`. That is, this function should always be used on a single line, e.g.: @@ -5126,3 +5301,7 @@ func (p *Parser) parseRenameTable(pos token.Pos) *ast.RenameTable { } } + +func (p *Parser) nextToken() { + p.Lexer.nextToken(false) +} diff --git a/parser_test.go b/parser_test.go index 6db86502..e8e07b2e 100644 --- a/parser_test.go +++ b/parser_test.go @@ -7,6 +7,7 @@ import ( "log" "os" "path/filepath" + "strings" "testing" "github.com/k0kubun/pp/v3" @@ -41,6 +42,7 @@ func testParser(t *testing.T, inputPath, resultPath string, parse func(p *memefi for _, in := range inputs { in := in + bad := strings.HasPrefix(in.Name(), "!bad_") t.Run(in.Name(), func(t *testing.T) { t.Parallel() @@ -57,9 +59,6 @@ func testParser(t *testing.T, inputPath, resultPath string, parse func(p *memefi } node, err := parse(p) - if err != nil { - log.Fatalf("error on parsing input file: %v", err) - } pprinter := pp.New() pprinter.SetColoringEnabled(false) @@ -71,6 +70,19 @@ func testParser(t *testing.T, inputPath, resultPath string, parse func(p *memefi fmt.Fprint(&buf, string(b)) fmt.Fprintln(&buf) + if err != nil { + list, ok := err.(memefish.MultiError) + if bad && ok { + fmt.Fprintf(&buf, "--- Error\n%s\n\n", list.FullError()) + } else { + t.Errorf("unexpected error: %v", err) + } + } else { + if bad { + t.Errorf("error is expected, but parsing succeeded") + } + } + fmt.Fprintf(&buf, "--- AST\n") _, _ = pprinter.Fprintln(&buf, node) fmt.Fprintln(&buf) @@ -115,10 +127,7 @@ func testParser(t *testing.T, inputPath, resultPath string, parse func(p *memefi }, } - node1, err := parse(p1) - if err != nil { - log.Fatalf("error on parsing unparsed SQL: %v", err) - } + node1, _ := parse(p1) s2 := node1.SQL() if s1 != s2 { diff --git a/testdata/input/ddl/!bad_alter_table_add_column.sql b/testdata/input/ddl/!bad_alter_table_add_column.sql new file mode 100644 index 00000000..f2564a0c --- /dev/null +++ b/testdata/input/ddl/!bad_alter_table_add_column.sql @@ -0,0 +1 @@ +alter table foo add column baz string(max) null \ No newline at end of file diff --git a/testdata/input/dml/!bad_insert.sql b/testdata/input/dml/!bad_insert.sql new file mode 100644 index 00000000..c91365c2 --- /dev/null +++ b/testdata/input/dml/!bad_insert.sql @@ -0,0 +1,3 @@ +insert foo (foo, bar, baz) +vales (1, 2, 3), + (4, 5, 6) \ No newline at end of file diff --git a/testdata/input/expr/!bad_new_braced_constructor.sql b/testdata/input/expr/!bad_new_braced_constructor.sql new file mode 100644 index 00000000..b0d884f4 --- /dev/null +++ b/testdata/input/expr/!bad_new_braced_constructor.sql @@ -0,0 +1 @@ +NEW foo { bar: 1 + } diff --git a/testdata/input/expr/!bad_plus.sql b/testdata/input/expr/!bad_plus.sql new file mode 100644 index 00000000..ca49acb0 --- /dev/null +++ b/testdata/input/expr/!bad_plus.sql @@ -0,0 +1 @@ +1 + diff --git a/testdata/input/expr/!bad_plus2.sql b/testdata/input/expr/!bad_plus2.sql new file mode 100644 index 00000000..31fb00a7 --- /dev/null +++ b/testdata/input/expr/!bad_plus2.sql @@ -0,0 +1 @@ +(1 +) + (2 +) diff --git a/testdata/input/expr/!bad_typed_struct.sql b/testdata/input/expr/!bad_typed_struct.sql new file mode 100644 index 00000000..a9bcc918 --- /dev/null +++ b/testdata/input/expr/!bad_typed_struct.sql @@ -0,0 +1 @@ +STRUCT<1>(2 +) diff --git a/testdata/input/query/!bad_hint_select.sql b/testdata/input/query/!bad_hint_select.sql new file mode 100644 index 00000000..6c38aebb --- /dev/null +++ b/testdata/input/query/!bad_hint_select.sql @@ -0,0 +1 @@ +@ select 1 diff --git a/testdata/input/query/!bad_hint_select_2.sql b/testdata/input/query/!bad_hint_select_2.sql new file mode 100644 index 00000000..636947f2 --- /dev/null +++ b/testdata/input/query/!bad_hint_select_2.sql @@ -0,0 +1 @@ +@{hint = 1} select \ No newline at end of file diff --git a/testdata/input/query/!bad_select.sql b/testdata/input/query/!bad_select.sql new file mode 100644 index 00000000..f1b62cf1 --- /dev/null +++ b/testdata/input/query/!bad_select.sql @@ -0,0 +1 @@ +select diff --git a/testdata/input/query/!bad_select_order.sql b/testdata/input/query/!bad_select_order.sql new file mode 100644 index 00000000..b2e0a826 --- /dev/null +++ b/testdata/input/query/!bad_select_order.sql @@ -0,0 +1 @@ +select 1 order x asc diff --git a/testdata/input/query/!bad_select_union_select.sql b/testdata/input/query/!bad_select_union_select.sql new file mode 100644 index 00000000..c0fb7fed --- /dev/null +++ b/testdata/input/query/!bad_select_union_select.sql @@ -0,0 +1 @@ +select union all select \ No newline at end of file diff --git a/testdata/result/ddl/!bad_alter_table_add_column.sql.txt b/testdata/result/ddl/!bad_alter_table_add_column.sql.txt new file mode 100644 index 00000000..ce6c29ac --- /dev/null +++ b/testdata/result/ddl/!bad_alter_table_add_column.sql.txt @@ -0,0 +1,41 @@ +--- !bad_alter_table_add_column.sql +alter table foo add column baz string(max) null +--- Error +syntax error: testdata/input/ddl/!bad_alter_table_add_column.sql:1:44: expected token: , but: NULL + 1| alter table foo add column baz string(max) null + | ^~~~ + + +--- AST +&ast.AlterTable{ + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 12, + NameEnd: 15, + Name: "foo", + }, + }, + }, + TableAlteration: &ast.AddColumn{ + Add: 16, + Column: &ast.ColumnDef{ + Null: -1, + Name: &ast.Ident{ + NamePos: 27, + NameEnd: 30, + Name: "baz", + }, + Type: &ast.SizedSchemaType{ + NamePos: 31, + Rparen: 41, + Name: "STRING", + Max: true, + }, + Hidden: -1, + }, + }, +} + +--- SQL +ALTER TABLE foo ADD COLUMN baz STRING(MAX) diff --git a/testdata/result/dml/!bad_insert.sql.txt b/testdata/result/dml/!bad_insert.sql.txt new file mode 100644 index 00000000..ddcb4be3 --- /dev/null +++ b/testdata/result/dml/!bad_insert.sql.txt @@ -0,0 +1,158 @@ +--- !bad_insert.sql +insert foo (foo, bar, baz) +vales (1, 2, 3), + (4, 5, 6) +--- Error +syntax error: testdata/input/dml/!bad_insert.sql:2:1: expected beginning of simple query "(", SELECT, FROM, but: "vales" + 2| vales (1, 2, 3), + | ^~~~~ + + +--- AST +&ast.Insert{ + TableName: &ast.Ident{ + NamePos: 7, + NameEnd: 10, + Name: "foo", + }, + Columns: []*ast.Ident{ + &ast.Ident{ + NamePos: 12, + NameEnd: 15, + Name: "foo", + }, + &ast.Ident{ + NamePos: 17, + NameEnd: 20, + Name: "bar", + }, + &ast.Ident{ + NamePos: 22, + NameEnd: 25, + Name: "baz", + }, + }, + Input: &ast.SubQueryInput{ + Query: &ast.BadQueryExpr{ + BadNode: &ast.BadNode{ + NodePos: 27, + NodeEnd: 59, + Tokens: []*token.Token{ + &token.Token{ + Kind: "", + Space: "\n", + Raw: "vales", + AsString: "vales", + Pos: 27, + End: 32, + }, + &token.Token{ + Kind: "(", + Space: " ", + Raw: "(", + Pos: 33, + End: 34, + }, + &token.Token{ + Kind: "", + Raw: "1", + Base: 10, + Pos: 34, + End: 35, + }, + &token.Token{ + Kind: ",", + Raw: ",", + Pos: 35, + End: 36, + }, + &token.Token{ + Kind: "", + Space: " ", + Raw: "2", + Base: 10, + Pos: 37, + End: 38, + }, + &token.Token{ + Kind: ",", + Raw: ",", + Pos: 38, + End: 39, + }, + &token.Token{ + Kind: "", + Space: " ", + Raw: "3", + Base: 10, + Pos: 40, + End: 41, + }, + &token.Token{ + Kind: ")", + Raw: ")", + Pos: 41, + End: 42, + }, + &token.Token{ + Kind: ",", + Raw: ",", + Pos: 42, + End: 43, + }, + &token.Token{ + Kind: "(", + Space: "\n ", + Raw: "(", + Pos: 50, + End: 51, + }, + &token.Token{ + Kind: "", + Raw: "4", + Base: 10, + Pos: 51, + End: 52, + }, + &token.Token{ + Kind: ",", + Raw: ",", + Pos: 52, + End: 53, + }, + &token.Token{ + Kind: "", + Space: " ", + Raw: "5", + Base: 10, + Pos: 54, + End: 55, + }, + &token.Token{ + Kind: ",", + Raw: ",", + Pos: 55, + End: 56, + }, + &token.Token{ + Kind: "", + Space: " ", + Raw: "6", + Base: 10, + Pos: 57, + End: 58, + }, + &token.Token{ + Kind: ")", + Raw: ")", + Pos: 58, + End: 59, + }, + }, + }, + }, + }, +} + +--- SQL +INSERT INTO foo (foo, bar, baz) vales (1, 2, 3), (4, 5, 6) diff --git a/testdata/result/expr/!bad_new_braced_constructor.sql.txt b/testdata/result/expr/!bad_new_braced_constructor.sql.txt new file mode 100644 index 00000000..668ad259 --- /dev/null +++ b/testdata/result/expr/!bad_new_braced_constructor.sql.txt @@ -0,0 +1,63 @@ +--- !bad_new_braced_constructor.sql +NEW foo { bar: 1 + } + +--- Error +syntax error: testdata/input/expr/!bad_new_braced_constructor.sql:1:20: unexpected token: } + 1| NEW foo { bar: 1 + } + | ^ + + +--- AST +&ast.BracedNewConstructor{ + Type: &ast.NamedType{ + Path: []*ast.Ident{ + &ast.Ident{ + NamePos: 4, + NameEnd: 7, + Name: "foo", + }, + }, + }, + Body: &ast.BracedConstructor{ + Lbrace: 8, + Rbrace: 19, + Fields: []*ast.BracedConstructorField{ + &ast.BracedConstructorField{ + Name: &ast.Ident{ + NamePos: 10, + NameEnd: 13, + Name: "bar", + }, + Value: &ast.BracedConstructorFieldValueExpr{ + Colon: 13, + Expr: &ast.BadExpr{ + BadNode: &ast.BadNode{ + NodePos: 15, + NodeEnd: 18, + Tokens: []*token.Token{ + &token.Token{ + Kind: "", + Space: " ", + Raw: "1", + Base: 10, + Pos: 15, + End: 16, + }, + &token.Token{ + Kind: "+", + Space: " ", + Raw: "+", + Pos: 17, + End: 18, + }, + }, + }, + }, + }, + }, + }, + }, +} + +--- SQL +NEW foo {bar: 1 +} diff --git a/testdata/result/expr/!bad_plus.sql.txt b/testdata/result/expr/!bad_plus.sql.txt new file mode 100644 index 00000000..ccd96afc --- /dev/null +++ b/testdata/result/expr/!bad_plus.sql.txt @@ -0,0 +1,33 @@ +--- !bad_plus.sql +1 + + +--- Error +syntax error: testdata/input/expr/!bad_plus.sql:2:1: unexpected token: + 2| + | ^ + + +--- AST +&ast.BadExpr{ + BadNode: &ast.BadNode{ + NodeEnd: 3, + Tokens: []*token.Token{ + &token.Token{ + Kind: "", + Raw: "1", + Base: 10, + End: 1, + }, + &token.Token{ + Kind: "+", + Space: " ", + Raw: "+", + Pos: 2, + End: 3, + }, + }, + }, +} + +--- SQL +1 + diff --git a/testdata/result/expr/!bad_plus2.sql.txt b/testdata/result/expr/!bad_plus2.sql.txt new file mode 100644 index 00000000..aa09007a --- /dev/null +++ b/testdata/result/expr/!bad_plus2.sql.txt @@ -0,0 +1,70 @@ +--- !bad_plus2.sql +(1 +) + (2 +) + +--- Error +syntax error: testdata/input/expr/!bad_plus2.sql:1:5: unexpected token: ) + 1| (1 +) + (2 +) + | ^ +syntax error: testdata/input/expr/!bad_plus2.sql:1:13: unexpected token: ) + 1| (1 +) + (2 +) + | ^ + + +--- AST +&ast.BinaryExpr{ + Op: "+", + Left: &ast.ParenExpr{ + Rparen: 4, + Expr: &ast.BadExpr{ + BadNode: &ast.BadNode{ + NodePos: 1, + NodeEnd: 4, + Tokens: []*token.Token{ + &token.Token{ + Kind: "", + Raw: "1", + Base: 10, + Pos: 1, + End: 2, + }, + &token.Token{ + Kind: "+", + Space: " ", + Raw: "+", + Pos: 3, + End: 4, + }, + }, + }, + }, + }, + Right: &ast.ParenExpr{ + Lparen: 8, + Rparen: 12, + Expr: &ast.BadExpr{ + BadNode: &ast.BadNode{ + NodePos: 9, + NodeEnd: 12, + Tokens: []*token.Token{ + &token.Token{ + Kind: "", + Raw: "2", + Base: 10, + Pos: 9, + End: 10, + }, + &token.Token{ + Kind: "+", + Space: " ", + Raw: "+", + Pos: 11, + End: 12, + }, + }, + }, + }, + }, +} + +--- SQL +(1 +) + (2 +) diff --git a/testdata/result/expr/!bad_typed_struct.sql.txt b/testdata/result/expr/!bad_typed_struct.sql.txt new file mode 100644 index 00000000..4c821820 --- /dev/null +++ b/testdata/result/expr/!bad_typed_struct.sql.txt @@ -0,0 +1,62 @@ +--- !bad_typed_struct.sql +STRUCT<1>(2 +) + +--- Error +syntax error: testdata/input/expr/!bad_typed_struct.sql:1:8: expected token: , ARRAY, STRUCT, but: + 1| STRUCT<1>(2 +) + | ^ +syntax error: testdata/input/expr/!bad_typed_struct.sql:1:14: unexpected token: ) + 1| STRUCT<1>(2 +) + | ^ + + +--- AST +&ast.TypedStructLiteral{ + Rparen: 13, + Fields: []*ast.StructField{ + &ast.StructField{ + Type: &ast.BadType{ + BadNode: &ast.BadNode{ + NodePos: 7, + NodeEnd: 8, + Tokens: []*token.Token{ + &token.Token{ + Kind: "", + Raw: "1", + Base: 10, + Pos: 7, + End: 8, + }, + }, + }, + }, + }, + }, + Values: []ast.Expr{ + &ast.BadExpr{ + BadNode: &ast.BadNode{ + NodePos: 10, + NodeEnd: 13, + Tokens: []*token.Token{ + &token.Token{ + Kind: "", + Raw: "2", + Base: 10, + Pos: 10, + End: 11, + }, + &token.Token{ + Kind: "+", + Space: " ", + Raw: "+", + Pos: 12, + End: 13, + }, + }, + }, + }, + }, +} + +--- SQL +STRUCT<1>(2 +) diff --git a/testdata/result/query/!bad_hint_select.sql.txt b/testdata/result/query/!bad_hint_select.sql.txt new file mode 100644 index 00000000..1db96187 --- /dev/null +++ b/testdata/result/query/!bad_hint_select.sql.txt @@ -0,0 +1,42 @@ +--- !bad_hint_select.sql +@ select 1 + +--- Error +syntax error: testdata/input/query/!bad_hint_select.sql:1:3: expected token: {, but: SELECT + 1| @ select 1 + | ^~~~~~ + + +--- AST +&ast.QueryStatement{ + Query: &ast.BadQueryExpr{ + BadNode: &ast.BadNode{ + NodeEnd: 10, + Tokens: []*token.Token{ + &token.Token{ + Kind: "@", + Raw: "@", + End: 1, + }, + &token.Token{ + Kind: "SELECT", + Space: " ", + Raw: "select", + Pos: 2, + End: 8, + }, + &token.Token{ + Kind: "", + Space: " ", + Raw: "1", + Base: 10, + Pos: 9, + End: 10, + }, + }, + }, + }, +} + +--- SQL +@ select 1 diff --git a/testdata/result/query/!bad_hint_select_2.sql.txt b/testdata/result/query/!bad_hint_select_2.sql.txt new file mode 100644 index 00000000..5f06efa5 --- /dev/null +++ b/testdata/result/query/!bad_hint_select_2.sql.txt @@ -0,0 +1,45 @@ +--- !bad_hint_select_2.sql +@{hint = 1} select +--- Error +syntax error: testdata/input/query/!bad_hint_select_2.sql:1:19: unexpected token: + 1| @{hint = 1} select + | ^ + + +--- AST +&ast.QueryStatement{ + Hint: &ast.Hint{ + Rbrace: 10, + Records: []*ast.HintRecord{ + &ast.HintRecord{ + Key: &ast.Ident{ + NamePos: 2, + NameEnd: 6, + Name: "hint", + }, + Value: &ast.IntLiteral{ + ValuePos: 9, + ValueEnd: 10, + Base: 10, + Value: "1", + }, + }, + }, + }, + Query: &ast.Select{ + Select: 12, + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.BadExpr{ + BadNode: &ast.BadNode{ + NodePos: 18, + NodeEnd: 18, + }, + }, + }, + }, + }, +} + +--- SQL +@{hint=1} SELECT diff --git a/testdata/result/query/!bad_select.sql.txt b/testdata/result/query/!bad_select.sql.txt new file mode 100644 index 00000000..446ffdab --- /dev/null +++ b/testdata/result/query/!bad_select.sql.txt @@ -0,0 +1,27 @@ +--- !bad_select.sql +select + +--- Error +syntax error: testdata/input/query/!bad_select.sql:2:1: unexpected token: + 2| + | ^ + + +--- AST +&ast.QueryStatement{ + Query: &ast.Select{ + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.BadExpr{ + BadNode: &ast.BadNode{ + NodePos: 7, + NodeEnd: 7, + }, + }, + }, + }, + }, +} + +--- SQL +SELECT diff --git a/testdata/result/query/!bad_select_order.sql.txt b/testdata/result/query/!bad_select_order.sql.txt new file mode 100644 index 00000000..0e4fc2ae --- /dev/null +++ b/testdata/result/query/!bad_select_order.sql.txt @@ -0,0 +1,57 @@ +--- !bad_select_order.sql +select 1 order x asc + +--- Error +syntax error: testdata/input/query/!bad_select_order.sql:1:16: expected token: BY, but: + 1| select 1 order x asc + | ^ + + +--- AST +&ast.QueryStatement{ + Query: &ast.BadQueryExpr{ + BadNode: &ast.BadNode{ + NodeEnd: 20, + Tokens: []*token.Token{ + &token.Token{ + Kind: "SELECT", + Raw: "select", + End: 6, + }, + &token.Token{ + Kind: "", + Space: " ", + Raw: "1", + Base: 10, + Pos: 7, + End: 8, + }, + &token.Token{ + Kind: "ORDER", + Space: " ", + Raw: "order", + Pos: 9, + End: 14, + }, + &token.Token{ + Kind: "", + Space: " ", + Raw: "x", + AsString: "x", + Pos: 15, + End: 16, + }, + &token.Token{ + Kind: "ASC", + Space: " ", + Raw: "asc", + Pos: 17, + End: 20, + }, + }, + }, + }, +} + +--- SQL +select 1 order x asc diff --git a/testdata/result/query/!bad_select_union_select.sql.txt b/testdata/result/query/!bad_select_union_select.sql.txt new file mode 100644 index 00000000..913e0587 --- /dev/null +++ b/testdata/result/query/!bad_select_union_select.sql.txt @@ -0,0 +1,48 @@ +--- !bad_select_union_select.sql +select union all select +--- Error +syntax error: testdata/input/query/!bad_select_union_select.sql:1:8: unexpected token: UNION + 1| select union all select + | ^~~~~ +syntax error: testdata/input/query/!bad_select_union_select.sql:1:24: unexpected token: + 1| select union all select + | ^ + + +--- AST +&ast.QueryStatement{ + Query: &ast.CompoundQuery{ + Op: "UNION", + AllOrDistinct: "ALL", + Queries: []ast.QueryExpr{ + &ast.Select{ + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.BadExpr{ + BadNode: &ast.BadNode{ + NodePos: 7, + NodeEnd: 7, + }, + }, + }, + }, + }, + &ast.Select{ + Select: 17, + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.BadExpr{ + BadNode: &ast.BadNode{ + NodePos: 23, + NodeEnd: 23, + }, + }, + }, + }, + }, + }, + }, +} + +--- SQL +SELECT UNION ALL SELECT diff --git a/testdata/result/statement/!bad_alter_table_add_column.sql.txt b/testdata/result/statement/!bad_alter_table_add_column.sql.txt new file mode 100644 index 00000000..ce6c29ac --- /dev/null +++ b/testdata/result/statement/!bad_alter_table_add_column.sql.txt @@ -0,0 +1,41 @@ +--- !bad_alter_table_add_column.sql +alter table foo add column baz string(max) null +--- Error +syntax error: testdata/input/ddl/!bad_alter_table_add_column.sql:1:44: expected token: , but: NULL + 1| alter table foo add column baz string(max) null + | ^~~~ + + +--- AST +&ast.AlterTable{ + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 12, + NameEnd: 15, + Name: "foo", + }, + }, + }, + TableAlteration: &ast.AddColumn{ + Add: 16, + Column: &ast.ColumnDef{ + Null: -1, + Name: &ast.Ident{ + NamePos: 27, + NameEnd: 30, + Name: "baz", + }, + Type: &ast.SizedSchemaType{ + NamePos: 31, + Rparen: 41, + Name: "STRING", + Max: true, + }, + Hidden: -1, + }, + }, +} + +--- SQL +ALTER TABLE foo ADD COLUMN baz STRING(MAX) diff --git a/testdata/result/statement/!bad_hint_select.sql.txt b/testdata/result/statement/!bad_hint_select.sql.txt new file mode 100644 index 00000000..1db96187 --- /dev/null +++ b/testdata/result/statement/!bad_hint_select.sql.txt @@ -0,0 +1,42 @@ +--- !bad_hint_select.sql +@ select 1 + +--- Error +syntax error: testdata/input/query/!bad_hint_select.sql:1:3: expected token: {, but: SELECT + 1| @ select 1 + | ^~~~~~ + + +--- AST +&ast.QueryStatement{ + Query: &ast.BadQueryExpr{ + BadNode: &ast.BadNode{ + NodeEnd: 10, + Tokens: []*token.Token{ + &token.Token{ + Kind: "@", + Raw: "@", + End: 1, + }, + &token.Token{ + Kind: "SELECT", + Space: " ", + Raw: "select", + Pos: 2, + End: 8, + }, + &token.Token{ + Kind: "", + Space: " ", + Raw: "1", + Base: 10, + Pos: 9, + End: 10, + }, + }, + }, + }, +} + +--- SQL +@ select 1 diff --git a/testdata/result/statement/!bad_hint_select_2.sql.txt b/testdata/result/statement/!bad_hint_select_2.sql.txt new file mode 100644 index 00000000..5f06efa5 --- /dev/null +++ b/testdata/result/statement/!bad_hint_select_2.sql.txt @@ -0,0 +1,45 @@ +--- !bad_hint_select_2.sql +@{hint = 1} select +--- Error +syntax error: testdata/input/query/!bad_hint_select_2.sql:1:19: unexpected token: + 1| @{hint = 1} select + | ^ + + +--- AST +&ast.QueryStatement{ + Hint: &ast.Hint{ + Rbrace: 10, + Records: []*ast.HintRecord{ + &ast.HintRecord{ + Key: &ast.Ident{ + NamePos: 2, + NameEnd: 6, + Name: "hint", + }, + Value: &ast.IntLiteral{ + ValuePos: 9, + ValueEnd: 10, + Base: 10, + Value: "1", + }, + }, + }, + }, + Query: &ast.Select{ + Select: 12, + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.BadExpr{ + BadNode: &ast.BadNode{ + NodePos: 18, + NodeEnd: 18, + }, + }, + }, + }, + }, +} + +--- SQL +@{hint=1} SELECT diff --git a/testdata/result/statement/!bad_insert.sql.txt b/testdata/result/statement/!bad_insert.sql.txt new file mode 100644 index 00000000..ddcb4be3 --- /dev/null +++ b/testdata/result/statement/!bad_insert.sql.txt @@ -0,0 +1,158 @@ +--- !bad_insert.sql +insert foo (foo, bar, baz) +vales (1, 2, 3), + (4, 5, 6) +--- Error +syntax error: testdata/input/dml/!bad_insert.sql:2:1: expected beginning of simple query "(", SELECT, FROM, but: "vales" + 2| vales (1, 2, 3), + | ^~~~~ + + +--- AST +&ast.Insert{ + TableName: &ast.Ident{ + NamePos: 7, + NameEnd: 10, + Name: "foo", + }, + Columns: []*ast.Ident{ + &ast.Ident{ + NamePos: 12, + NameEnd: 15, + Name: "foo", + }, + &ast.Ident{ + NamePos: 17, + NameEnd: 20, + Name: "bar", + }, + &ast.Ident{ + NamePos: 22, + NameEnd: 25, + Name: "baz", + }, + }, + Input: &ast.SubQueryInput{ + Query: &ast.BadQueryExpr{ + BadNode: &ast.BadNode{ + NodePos: 27, + NodeEnd: 59, + Tokens: []*token.Token{ + &token.Token{ + Kind: "", + Space: "\n", + Raw: "vales", + AsString: "vales", + Pos: 27, + End: 32, + }, + &token.Token{ + Kind: "(", + Space: " ", + Raw: "(", + Pos: 33, + End: 34, + }, + &token.Token{ + Kind: "", + Raw: "1", + Base: 10, + Pos: 34, + End: 35, + }, + &token.Token{ + Kind: ",", + Raw: ",", + Pos: 35, + End: 36, + }, + &token.Token{ + Kind: "", + Space: " ", + Raw: "2", + Base: 10, + Pos: 37, + End: 38, + }, + &token.Token{ + Kind: ",", + Raw: ",", + Pos: 38, + End: 39, + }, + &token.Token{ + Kind: "", + Space: " ", + Raw: "3", + Base: 10, + Pos: 40, + End: 41, + }, + &token.Token{ + Kind: ")", + Raw: ")", + Pos: 41, + End: 42, + }, + &token.Token{ + Kind: ",", + Raw: ",", + Pos: 42, + End: 43, + }, + &token.Token{ + Kind: "(", + Space: "\n ", + Raw: "(", + Pos: 50, + End: 51, + }, + &token.Token{ + Kind: "", + Raw: "4", + Base: 10, + Pos: 51, + End: 52, + }, + &token.Token{ + Kind: ",", + Raw: ",", + Pos: 52, + End: 53, + }, + &token.Token{ + Kind: "", + Space: " ", + Raw: "5", + Base: 10, + Pos: 54, + End: 55, + }, + &token.Token{ + Kind: ",", + Raw: ",", + Pos: 55, + End: 56, + }, + &token.Token{ + Kind: "", + Space: " ", + Raw: "6", + Base: 10, + Pos: 57, + End: 58, + }, + &token.Token{ + Kind: ")", + Raw: ")", + Pos: 58, + End: 59, + }, + }, + }, + }, + }, +} + +--- SQL +INSERT INTO foo (foo, bar, baz) vales (1, 2, 3), (4, 5, 6) diff --git a/testdata/result/statement/!bad_select.sql.txt b/testdata/result/statement/!bad_select.sql.txt new file mode 100644 index 00000000..446ffdab --- /dev/null +++ b/testdata/result/statement/!bad_select.sql.txt @@ -0,0 +1,27 @@ +--- !bad_select.sql +select + +--- Error +syntax error: testdata/input/query/!bad_select.sql:2:1: unexpected token: + 2| + | ^ + + +--- AST +&ast.QueryStatement{ + Query: &ast.Select{ + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.BadExpr{ + BadNode: &ast.BadNode{ + NodePos: 7, + NodeEnd: 7, + }, + }, + }, + }, + }, +} + +--- SQL +SELECT diff --git a/testdata/result/statement/!bad_select_order.sql.txt b/testdata/result/statement/!bad_select_order.sql.txt new file mode 100644 index 00000000..0e4fc2ae --- /dev/null +++ b/testdata/result/statement/!bad_select_order.sql.txt @@ -0,0 +1,57 @@ +--- !bad_select_order.sql +select 1 order x asc + +--- Error +syntax error: testdata/input/query/!bad_select_order.sql:1:16: expected token: BY, but: + 1| select 1 order x asc + | ^ + + +--- AST +&ast.QueryStatement{ + Query: &ast.BadQueryExpr{ + BadNode: &ast.BadNode{ + NodeEnd: 20, + Tokens: []*token.Token{ + &token.Token{ + Kind: "SELECT", + Raw: "select", + End: 6, + }, + &token.Token{ + Kind: "", + Space: " ", + Raw: "1", + Base: 10, + Pos: 7, + End: 8, + }, + &token.Token{ + Kind: "ORDER", + Space: " ", + Raw: "order", + Pos: 9, + End: 14, + }, + &token.Token{ + Kind: "", + Space: " ", + Raw: "x", + AsString: "x", + Pos: 15, + End: 16, + }, + &token.Token{ + Kind: "ASC", + Space: " ", + Raw: "asc", + Pos: 17, + End: 20, + }, + }, + }, + }, +} + +--- SQL +select 1 order x asc diff --git a/testdata/result/statement/!bad_select_union_select.sql.txt b/testdata/result/statement/!bad_select_union_select.sql.txt new file mode 100644 index 00000000..913e0587 --- /dev/null +++ b/testdata/result/statement/!bad_select_union_select.sql.txt @@ -0,0 +1,48 @@ +--- !bad_select_union_select.sql +select union all select +--- Error +syntax error: testdata/input/query/!bad_select_union_select.sql:1:8: unexpected token: UNION + 1| select union all select + | ^~~~~ +syntax error: testdata/input/query/!bad_select_union_select.sql:1:24: unexpected token: + 1| select union all select + | ^ + + +--- AST +&ast.QueryStatement{ + Query: &ast.CompoundQuery{ + Op: "UNION", + AllOrDistinct: "ALL", + Queries: []ast.QueryExpr{ + &ast.Select{ + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.BadExpr{ + BadNode: &ast.BadNode{ + NodePos: 7, + NodeEnd: 7, + }, + }, + }, + }, + }, + &ast.Select{ + Select: 17, + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.BadExpr{ + BadNode: &ast.BadNode{ + NodePos: 23, + NodeEnd: 23, + }, + }, + }, + }, + }, + }, + }, +} + +--- SQL +SELECT UNION ALL SELECT diff --git a/token/file.go b/token/file.go index 0d08fe5f..098f9ff3 100644 --- a/token/file.go +++ b/token/file.go @@ -61,12 +61,15 @@ func (f *File) Position(pos, end Pos) *Position { if count < 0 { count = 0 } - fmt.Fprintf(&source, "%3d: %s\n", line+1, lineBuffer) - fmt.Fprintf(&source, " %s^%s\n", strings.Repeat(" ", column), strings.Repeat("~", count)) + fmt.Fprintf(&source, "%3d| %s\n", line+1, lineBuffer) + fmt.Fprintf(&source, " | %s^%s", strings.Repeat(" ", column), strings.Repeat("~", count)) case line < endLine: for l := line; l <= endLine; l++ { + if l > 0 { + fmt.Fprintln(&source) + } lineBuffer := f.Buffer[f.lines[l] : f.lines[l+1]-1] - fmt.Fprintf(&source, "%3d: %s\n", l+1, lineBuffer) + fmt.Fprintf(&source, "%3d| %s", l+1, lineBuffer) } } diff --git a/token/file_test.go b/token/file_test.go index cda3b2ae..627077d0 100644 --- a/token/file_test.go +++ b/token/file_test.go @@ -16,7 +16,7 @@ func stripMargin(s string) string { lines[i] = line } } - return strings.Join(lines, "\n") + return strings.TrimRight(strings.Join(lines, "\n"), "\n") } var file = &File{ @@ -56,8 +56,8 @@ var positionTestCases = []struct { line: 0, column: 0, endLine: 0, endColumn: 0, source: stripMargin(heredoc.Doc(` - | 1: select 1 union all - | ^ + | 1| select 1 union all + | | ^ `)), }, { @@ -65,8 +65,8 @@ var positionTestCases = []struct { line: 0, column: 0, endLine: 0, endColumn: 1, source: stripMargin(heredoc.Doc(` - | 1: select 1 union all - | ^ + | 1| select 1 union all + | | ^ `)), }, { @@ -74,8 +74,8 @@ var positionTestCases = []struct { line: 0, column: 0, endLine: 0, endColumn: 6, source: stripMargin(heredoc.Doc(` - | 1: select 1 union all - | ^~~~~~ + | 1| select 1 union all + | | ^~~~~~ `)), }, { @@ -83,8 +83,8 @@ var positionTestCases = []struct { line: 0, column: 9, endLine: 0, endColumn: 18, source: stripMargin(heredoc.Doc(` - | 1: select 1 union all - | ^~~~~~~~~ + | 1| select 1 union all + | | ^~~~~~~~~ `)), }, { @@ -92,8 +92,8 @@ var positionTestCases = []struct { line: 0, column: 18, endLine: 1, endColumn: 0, source: stripMargin(heredoc.Doc(` - | 1: select 1 union all - | 2: select 2 + | 1| select 1 union all + | 2| select 2 `)), }, } diff --git a/token/quote.go b/token/quote.go index 78187b2f..13ed1657 100644 --- a/token/quote.go +++ b/token/quote.go @@ -43,7 +43,7 @@ func QuoteSQLBytes(bs []byte) string { buf.WriteString("b") buf.WriteRune(quote) for _, b := range bs { - q := quoteSingleEscape(rune(b), quote, /* isString */ false) + q := quoteSingleEscape(rune(b), quote /* isString */, false) if q != "" { buf.WriteString(q) continue @@ -75,7 +75,7 @@ func QuoteSQLIdent(s string) string { func quoteSQLStringContent(s string, quote rune, buf *bytes.Buffer) { for _, r := range s { - q := quoteSingleEscape(r, quote, /* isString */ true) + q := quoteSingleEscape(r, quote /* isString */, true) if q != "" { buf.WriteString(q) continue diff --git a/token/token.go b/token/token.go index fbb0d73a..a953117f 100644 --- a/token/token.go +++ b/token/token.go @@ -36,6 +36,7 @@ type TokenComment struct { type TokenKind string const ( + TokenBad TokenKind = "" TokenEOF TokenKind = "" TokenIdent TokenKind = "" TokenParam TokenKind = "" diff --git a/tools/parse/main.go b/tools/parse/main.go index 17d4a6ac..f3a3c4f9 100644 --- a/tools/parse/main.go +++ b/tools/parse/main.go @@ -15,7 +15,7 @@ import ( "github.com/cloudspannerecosystem/memefish/ast" "github.com/cloudspannerecosystem/memefish/token" "github.com/cloudspannerecosystem/memefish/tools/util/poslang" - "github.com/k0kubun/pp" + "github.com/k0kubun/pp/v3" ) var usage = heredoc.Doc(` @@ -77,15 +77,26 @@ func main() { node, err = p.ParseQuery() case "expr": node, err = p.ParseExpr() + case "type": + node, err = p.ParseType() case "ddl": node, err = p.ParseDDL() case "dml": node, err = p.ParseDML() + default: + log.Fatalf("unknown mode: %s", *mode) } + logf("finish parsing successfully") + if err != nil { - log.Fatal(err) + fmt.Println("--- Error") + if list, ok := err.(memefish.MultiError); ok { + fmt.Print(list.FullError()) + } else { + fmt.Print(err) + } + fmt.Println() } - logf("finish parsing successfully") if *dig != "" { value := reflect.ValueOf(node) @@ -114,8 +125,11 @@ func main() { } fmt.Println("--- AST") - _, _ = pp.Println(node) + pprinter := pp.New() + pprinter.SetOmitEmpty(true) + _, _ = pprinter.Println(node) fmt.Println() + fmt.Println("--- SQL") fmt.Println(node.SQL())