Skip to content

Commit

Permalink
Merge pull request #360 from mindsdb/error-info
Browse files Browse the repository at this point in the history
Better error messaging
  • Loading branch information
ea-rus authored Mar 14, 2024
2 parents ac6406b + edad4a8 commit 727cd00
Show file tree
Hide file tree
Showing 8 changed files with 239 additions and 15 deletions.
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,32 @@ SLY does not support inheritance, therefore every dialect is described completel
- get_string - to return object as sql expression (or sub-expression)
- copy - to copy AST-tree to new object

### Error handling

For better user experience parsing error contains useful information about problem location and possible solution to solve it.
1. it shows location of error if
- character isn't parsed (by lexer)
- token is unexpected (by parser)
2. it tries to propose correct token instead (or before) error location. Possible options
- Keyword will be showed as is.
- '[number]' - if float and integer is expected
- '[string]' - if string is expected
- '[identifier]' - if name of the objects is expected. For example, they are bold words here:
- "select **x** as **name** from **tbl1** where **col**=1"

How suggestion works:
It uses next possible tokens defined by syntax rules.
If this is the end of the query: just shows these tokens.
Else:
- it tries to replace bad token with other token from list of possible tokens
- tries to parse query once again, if there is no error:
- add this token to suggestion list
- second iteration: put possible token before bad token (instead of replacement) and repeat the same operation.

Example:
![image](https://github.com/mindsdb/mindsdb_sql/assets/8502631/c4707087-ca6e-47f6-aaba-db3a641947a6)


# Planner


Expand Down
165 changes: 165 additions & 0 deletions mindsdb_sql/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,166 @@
import re
from collections import defaultdict

from sly.lex import Token

from mindsdb_sql.exceptions import ParsingException
from mindsdb_sql.parser.ast import *


class ErrorHandling:

def __init__(self, lexer, parser):
self.parser = parser
self.lexer = lexer

def process(self, error_info):
self.tokens = [t for t in error_info['tokens'] if t is not None]
self.bad_token = error_info['bad_token']
self.expected_tokens = error_info['expected_tokens']

if len(self.tokens) == 0:
return 'Empty input'

# show error location
msgs = self.error_location()

# suggestion
suggestions = self.make_suggestion()

if suggestions:
prefix = 'Possible inputs: ' if len(suggestions) > 1 else 'Expected symbol: '
msgs.append(prefix + ', '.join([f'"{item}"' for item in suggestions]))
return '\n'.join(msgs)

def error_location(self):

# restore query text
lines_idx = defaultdict(str)

# used + unused tokens
for token in self.tokens:
if token is None:
continue
line = lines_idx[token.lineno]

if len(line) > token.index:
line = line[: token.index]
else:
line = line.ljust(token.index)

line += token.value
lines_idx[token.lineno] = line

msgs = []

# error message and location
if self.bad_token is None:
msgs.append('Syntax error, unexpected end of query:')
error_len = 1
# last line
error_line_num = list(lines_idx.keys())[-1]
error_index = len(lines_idx[error_line_num])
else:
msgs.append('Syntax error, unknown input:')
error_len = len(self.bad_token.value)
error_line_num = self.bad_token.lineno
error_index = self.bad_token.index

# shift lines indexes (it removes spaces from beginnings of the lines)
lines = []
shift = 0
error_line = 0
for i, line_num in enumerate(lines_idx.keys()):
if line_num == error_line_num:
error_index -= shift
error_line = i

line = lines_idx[line_num]
lines.append(line[shift:])
shift = len(line)

# add source code
first_line = error_line - 2 if error_line > 1 else 0
for line in lines[first_line: error_line + 1]:
msgs.append('>' + line)

# error position
msgs.append('-' * (error_index + 1) + '^' * error_len)
return msgs

def make_suggestion(self):
if len(self.expected_tokens) == 0:
return []

# find error index
error_index = None
for i, token in enumerate(self.tokens):
if token is self.bad_token :
error_index = i

expected = {} # value: token

for token_name in self.expected_tokens:
value = getattr(self.lexer, token_name, None)
if token_name == 'ID':
# a lot of other tokens could be ID
expected = {'[identifier]': token_name}
break
elif token_name in ('FLOAT', 'INTEGER'):
expected['[number]'] = token_name

elif token_name in ('DQUOTE_STRING', 'QUOTE_STRING'):
expected['[string]'] = token_name

elif isinstance(value, str):
value = value.replace('\\b', '').replace('\\', '')

# doesn't content regexp
if '\\s' not in value and '|' not in value:
expected[value] = token_name

suggestions = []
if len(expected) == 1:
# use only it
first_value = list(expected.keys())[0]
suggestions.append(first_value)

elif 1 < len(expected) < 20:
if self.bad_token is None:
# if this is the end of query, just show next expected keywords
return list(expected.keys())

# not every suggestion satisfy the end of the query. we have to check if it works
for value, token_name in expected.items():
# make up a token
token = Token()
token.type = token_name
token.value = value
token.end = 0
token.index = 0
token.lineno = 0

# try to add token
tokens2 = self.tokens[:error_index] + [token] + self.tokens[error_index:]
if self.query_is_valid(tokens2):
suggestions.append(value)
continue

# try to replace token
tokens2 = self.tokens[:error_index - 1] + [token] + self.tokens[error_index:]
if self.query_is_valid(tokens2):
suggestions.append(value)
continue

return suggestions

def query_is_valid(self, tokens):
# try to parse list of tokens

ast = self.parser.parse(iter(tokens))
return ast is not None


def get_lexer_parser(dialect):
if dialect == 'sqlite':
from mindsdb_sql.parser.lexer import SQLLexer
Expand All @@ -29,4 +186,12 @@ def parse_sql(sql, dialect='mindsdb'):
lexer, parser = get_lexer_parser(dialect)
tokens = lexer.tokenize(sql)
ast = parser.parse(tokens)

if ast is None:

eh = ErrorHandling(lexer, parser)
message = eh.process(parser.error_info)

raise ParsingException(message)

return ast
23 changes: 23 additions & 0 deletions mindsdb_sql/parser/dialects/mindsdb/lexer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
from sly import Lexer
from sly.lex import LexError

"""
Unfortunately we can't inherit from base SQLLexer, because the order of rules is important.
Expand Down Expand Up @@ -355,3 +356,25 @@ def SYSTEM_VARIABLE(self, t):
t.value = t.value.strip('`')
return t

def error(self, t):

# convert to lines
lines = []
shift = 0
error_line = 0
error_index = 0
for i, line in enumerate(self.text.split('\n')):
if 0 <= t.index - shift < len(line):
error_line = i
error_index = t.index - shift
lines.append(line)
shift += len(line) + 1

msgs = [f'Illegal character {t.value[0]!r}:']
# show error code
for line in lines[error_line - 1: error_line + 1]:
msgs.append('>' + line)

msgs.append('-' * (error_index + 1) + '^')

raise LexError('\n'.join(msgs), t.value, self.index)
28 changes: 19 additions & 9 deletions mindsdb_sql/parser/dialects/mindsdb/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1219,8 +1219,8 @@ def result_columns(self, p):
'result_column quote_string')
def result_column(self, p):
col = p.result_column
if col.alias:
raise ParsingException(f'Attempt to provide two aliases for {str(col)}')
# if col.alias:
# raise ParsingException(f'Attempt to provide two aliases for {str(col)}')
if hasattr(p, 'dquote_string'):
alias = Identifier(p.dquote_string)
elif hasattr(p, 'quote_string'):
Expand Down Expand Up @@ -1599,7 +1599,6 @@ def function_name(self, p):
'COLUMNS',
'COMMIT',
'COMMITTED',
'CONCAT',
'DATASET',
'DATASETS',
'DATABASE',
Expand Down Expand Up @@ -1629,7 +1628,6 @@ def function_name(self, p):
'OFFSET',
'ONLY',
'OPEN',
'PARAMETER',
'PARAMETERS',
'PERSIST',
'PLUGINS',
Expand Down Expand Up @@ -1764,8 +1762,20 @@ def raw_query(self, p):
def empty(self, p):
pass

def error(self, p):
if p:
raise ParsingException(f"Syntax error at token {p.type}: \"{p.value}\"")
else:
raise ParsingException("Syntax error at EOF")
def error(self, p, expected_tokens=None):

if not hasattr(self, 'used_tokens'):
# failback mode if user has another sly version module installed
if p:
raise ParsingException(f"Syntax error at token {p.type}: \"{p.value}\"")
else:
raise ParsingException("Syntax error at EOF")

# save error info for future usage
self.error_info = dict(
tokens=self.used_tokens.copy() + list(self.tokens),
bad_token=p,
expected_tokens=expected_tokens
)
# don't raise exception
return
2 changes: 0 additions & 2 deletions mindsdb_sql/parser/dialects/mysql/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -972,7 +972,6 @@ def parameter(self, p):
'COLUMNS',
'COMMIT',
'COMMITTED',
'CONCAT',
'DATABASES',
'DATABASE',
'ENGINE',
Expand All @@ -992,7 +991,6 @@ def parameter(self, p):
'OFFSET',
'ONLY',
'OPEN',
'PARAMETER',
'PERSIST',
'PLUGINS',
'PRIVILEGES',
Expand Down
2 changes: 1 addition & 1 deletion mindsdb_sql/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,7 +766,7 @@ def dquote_string(self, p):
def empty(self, p):
pass

def error(self, p):
def error(self, p, expected_tokens=None):
if p:
raise ParsingException(f"Syntax error at token {p.type}: \"{p.value}\"")
else:
Expand Down
2 changes: 1 addition & 1 deletion sly/lex.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------

__all__ = ['Lexer', 'LexerStateChange']
__all__ = ['Lexer', 'LexerStateChange', 'Token']

import re
import copy
Expand Down
6 changes: 4 additions & 2 deletions sly/yacc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2030,7 +2030,7 @@ def _build(cls, definitions):
# ----------------------------------------------------------------------
# Parsing Support. This is the parsing runtime that users use to
# ----------------------------------------------------------------------
def error(self, token):
def error(self, token, expected_tokens=None):
'''
Default error handling function. This may be subclassed.
'''
Expand Down Expand Up @@ -2076,6 +2076,7 @@ def parse(self, tokens):

# Set up the state and symbol stacks
self.tokens = tokens
self.used_tokens = []
self.statestack = statestack = [] # Stack of parsing states
self.symstack = symstack = [] # Stack of grammar symbols
pslice._stack = symstack # Associate the stack with the production
Expand All @@ -2096,6 +2097,7 @@ def parse(self, tokens):
if not lookahead:
if not lookaheadstack:
lookahead = next(tokens, None) # Get the next token
self.used_tokens.append(lookahead)
else:
lookahead = lookaheadstack.pop()
if not lookahead:
Expand Down Expand Up @@ -2187,7 +2189,7 @@ def parse(self, tokens):
else:
errtoken = lookahead

tok = self.error(errtoken)
tok = self.error(errtoken, expected_tokens=list(actions[self.state].keys()))
if tok:
# User must have done some kind of panic
# mode recovery on their own. The
Expand Down

0 comments on commit 727cd00

Please sign in to comment.