completed lexer component

NickSolante · Nov 13, 2024 · 2a311d1 · 2a311d1
1 parent eb3cee0
commit 2a311d1
Show file tree

Hide file tree

Showing 2 changed files with 130 additions and 31 deletions.
diff --git a/lib/lexer/lexer.go b/lib/lexer/lexer.go
@@ -18,6 +18,8 @@ func New(input string) *Lexer {
 func (l *Lexer) NextToken() token.Token {
 	var tok token.Token
 
+	l.skipWhitespace()
+
 	switch l.ch {
 	case '{':
 		tok = newToken(token.OBRACKET, l.ch)
@@ -31,7 +33,22 @@ func (l *Lexer) NextToken() token.Token {
 		tok = newToken(token.OPAREN, l.ch)
 	case ')':
 		tok = newToken(token.CPAREN, l.ch)
+	case ',':
+		tok = newToken(token.COMMA, l.ch)
+	case ':':
+		tok = newToken(token.COLON, l.ch)
+	case '"':
+		tok = l.readString()
+	case 0:
+		tok = token.Token{Type: token.EOF, Literal: ""}
+	default:
+		if l.isDigit(l.ch) || l.ch == token.MINUS {
+			tok = l.readNumber()
+		} else if l.isLiteralName(l.ch) {
+			tok = l.readLiteral()
+		}
 	}
+
 	l.readChar()
 	return tok
 }
@@ -46,29 +63,97 @@ func (l *Lexer) readChar() {
 	l.readPosition += 1
 }
 
-func (l *Lexer) readIdentifier() string {
-	position := l.position
-	for isLetter(l.ch) {
-		l.readChar()
-	}
-	return l.input[position:l.position]
-}
-
 func (l *Lexer) peekChar() byte {
 	if l.readPosition >= len(l.input) {
 		return 0
 	}
 	return l.input[l.readPosition]
 }
 
-func isLetter(ch byte) bool {
-	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
+func (l *Lexer) readString() token.Token {
+	var tok token.Token
+	position := l.position + 1
+	for {
+		l.readChar()
+		if l.ch == token.QUOTE {
+			tok = token.Token{Type: token.STRING, Literal: l.input[position:l.position]}
+			break
+		}
+		if l.ch == 0 {
+			if l.input[l.position-1] != token.QUOTE {
+				tok = token.Token{Type: token.ILLEGAL, Literal: l.input[position:l.position]}
+			}
+			break
+		}
+	}
+	return tok
 }
 
-func isDigit(ch byte) bool {
+func (l *Lexer) isDigit(ch byte) bool {
 	return '0' <= ch && ch <= '9'
 }
 
+func (l *Lexer) isLiteralName(ch byte) bool {
+	return ch == 't' || ch == 'f' || ch == 'n'
+}
+
+func (l *Lexer) readLiteral() token.Token {
+	start := l.position
+	var tok token.Token
+	switch l.ch {
+	case 't':
+		for i, c := range token.TRUE[1:] {
+			if c != rune(l.peekChar()) {
+				return token.Token{Type: token.ILLEGAL, Literal: l.input[start : start+i]}
+			}
+			l.readChar()
+		}
+		tok = token.Token{Type: token.TRUE, Literal: "true"}
+	case 'f':
+		for i, c := range token.FALSE[1:] {
+			if c != rune(l.peekChar()) {
+				return token.Token{Type: token.ILLEGAL, Literal: l.input[start : start+i]}
+			}
+			l.readChar()
+		}
+		tok = token.Token{Type: token.FALSE, Literal: "false"}
+	case 'n':
+		for i, c := range token.NULL[1:] {
+			if c != rune(l.peekChar()) {
+				return token.Token{Type: token.ILLEGAL, Literal: string(l.input[start : start+i])}
+			}
+			l.readChar()
+		}
+		tok = token.Token{Type: token.NULL, Literal: "null"}
+	default:
+		return token.Token{Type: token.ILLEGAL, Literal: string(l.ch)}
+	}
+	return tok
+}
+
+func (l *Lexer) readNumber() token.Token {
+	start := l.position
+	for l.isDigit(l.peekChar()) || l.peekChar() == token.DECIMALPOINT || l.peekChar() == token.MINUS {
+		l.readChar()
+	}
+	return token.Token{Type: token.NUMBER, Literal: string(l.input[start:l.readPosition])}
+}
+
 func newToken(tokenType token.TokenType, ch byte) token.Token {
 	return token.Token{Type: tokenType, Literal: string(ch)}
 }
+
+func (l *Lexer) skipWhitespace() {
+	for {
+		if l.ch == token.SPACE {
+			l.readChar()
+			continue
+		}
+		if l.ch == token.BACKSLASH && (l.peekChar() == 't' || l.peekChar() == 'n' || l.peekChar() == 'r') {
+			l.readChar()
+			l.readChar()
+			continue
+		}
+		break
+	}
+}
diff --git a/lib/token/token.go b/lib/token/token.go
@@ -2,28 +2,42 @@ package token
 
 type TokenType string
 
+type Token struct {
+	Type    TokenType
+	Literal string
+}
+
 const (
 	// structural characters
-	OBRACKET       string = "{"
-	CBRACKET       string = "}"
-	OPAREN         string = "("
-	CPAREN         string = ")"
-	OSQRBRACKET    string = "["
-	CSQRBRACKET    string = "]"
-	COLON          string = ":"
-	DOUBLEQOUTE    string = "\""
-	BACKSLASH      string = "\\"
-	DOUBLBACKSLASH string = "\\\\"
+	OBRACKET         = "{"
+	CBRACKET         = "}"
+	OPAREN           = "("
+	CPAREN           = ")"
+	OSQRBRACKET      = "["
+	CSQRBRACKET      = "]"
+	COLON            = ":"
+	COMMA            = ","
+	DOUBLE_BACKSLASH = "\\\\"
 
-	IDENT string = "IDENT"
-	INT   string = "INT"
+	IDENT = "IDENT"
+	INT   = "INT"
 
-	FALSE string = "false"
-	NULL  string = "null"
-	TRUE  string = "true"
-)
+	//values
+	FALSE  = "false"
+	NULL   = "null"
+	TRUE   = "true"
+	OBJECT = "object"
+	ARRAY  = "array"
+	NUMBER = "number"
+	STRING = "string"
 
-type Token struct {
-	Type    TokenType
-	Literal string
-}
+	EOF     = "EOF"
+	ILLEGAL = "ILLEGAL"
+
+	// Numbers
+	BACKSLASH    byte = '\\'
+	MINUS        byte = '-'
+	DECIMALPOINT byte = '.'
+	SPACE        byte = ' '
+	QUOTE        byte = '"'
+)