Skip to content

Commit

Permalink
completed lexer component
Browse files Browse the repository at this point in the history
  • Loading branch information
NickSolante committed Nov 13, 2024
1 parent eb3cee0 commit 2a311d1
Show file tree
Hide file tree
Showing 2 changed files with 130 additions and 31 deletions.
107 changes: 96 additions & 11 deletions lib/lexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ func New(input string) *Lexer {
func (l *Lexer) NextToken() token.Token {
var tok token.Token

l.skipWhitespace()

switch l.ch {
case '{':
tok = newToken(token.OBRACKET, l.ch)
Expand All @@ -31,7 +33,22 @@ func (l *Lexer) NextToken() token.Token {
tok = newToken(token.OPAREN, l.ch)
case ')':
tok = newToken(token.CPAREN, l.ch)
case ',':
tok = newToken(token.COMMA, l.ch)
case ':':
tok = newToken(token.COLON, l.ch)
case '"':
tok = l.readString()
case 0:
tok = token.Token{Type: token.EOF, Literal: ""}
default:
if l.isDigit(l.ch) || l.ch == token.MINUS {
tok = l.readNumber()
} else if l.isLiteralName(l.ch) {
tok = l.readLiteral()
}
}

l.readChar()
return tok
}
Expand All @@ -46,29 +63,97 @@ func (l *Lexer) readChar() {
l.readPosition += 1
}

func (l *Lexer) readIdentifier() string {
position := l.position
for isLetter(l.ch) {
l.readChar()
}
return l.input[position:l.position]
}

func (l *Lexer) peekChar() byte {
if l.readPosition >= len(l.input) {
return 0
}
return l.input[l.readPosition]
}

func isLetter(ch byte) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
func (l *Lexer) readString() token.Token {
var tok token.Token
position := l.position + 1
for {
l.readChar()
if l.ch == token.QUOTE {
tok = token.Token{Type: token.STRING, Literal: l.input[position:l.position]}
break
}
if l.ch == 0 {
if l.input[l.position-1] != token.QUOTE {
tok = token.Token{Type: token.ILLEGAL, Literal: l.input[position:l.position]}
}
break
}
}
return tok
}

func isDigit(ch byte) bool {
func (l *Lexer) isDigit(ch byte) bool {
return '0' <= ch && ch <= '9'
}

func (l *Lexer) isLiteralName(ch byte) bool {
return ch == 't' || ch == 'f' || ch == 'n'
}

func (l *Lexer) readLiteral() token.Token {
start := l.position
var tok token.Token
switch l.ch {
case 't':
for i, c := range token.TRUE[1:] {
if c != rune(l.peekChar()) {
return token.Token{Type: token.ILLEGAL, Literal: l.input[start : start+i]}
}
l.readChar()
}
tok = token.Token{Type: token.TRUE, Literal: "true"}
case 'f':
for i, c := range token.FALSE[1:] {
if c != rune(l.peekChar()) {
return token.Token{Type: token.ILLEGAL, Literal: l.input[start : start+i]}
}
l.readChar()
}
tok = token.Token{Type: token.FALSE, Literal: "false"}
case 'n':
for i, c := range token.NULL[1:] {
if c != rune(l.peekChar()) {
return token.Token{Type: token.ILLEGAL, Literal: string(l.input[start : start+i])}
}
l.readChar()
}
tok = token.Token{Type: token.NULL, Literal: "null"}
default:
return token.Token{Type: token.ILLEGAL, Literal: string(l.ch)}
}
return tok
}

func (l *Lexer) readNumber() token.Token {
start := l.position
for l.isDigit(l.peekChar()) || l.peekChar() == token.DECIMALPOINT || l.peekChar() == token.MINUS {
l.readChar()
}
return token.Token{Type: token.NUMBER, Literal: string(l.input[start:l.readPosition])}
}

func newToken(tokenType token.TokenType, ch byte) token.Token {
return token.Token{Type: tokenType, Literal: string(ch)}
}

func (l *Lexer) skipWhitespace() {
for {
if l.ch == token.SPACE {
l.readChar()
continue
}
if l.ch == token.BACKSLASH && (l.peekChar() == 't' || l.peekChar() == 'n' || l.peekChar() == 'r') {
l.readChar()
l.readChar()
continue
}
break
}
}
54 changes: 34 additions & 20 deletions lib/token/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,42 @@ package token

type TokenType string

type Token struct {
Type TokenType
Literal string
}

const (
// structural characters
OBRACKET string = "{"
CBRACKET string = "}"
OPAREN string = "("
CPAREN string = ")"
OSQRBRACKET string = "["
CSQRBRACKET string = "]"
COLON string = ":"
DOUBLEQOUTE string = "\""
BACKSLASH string = "\\"
DOUBLBACKSLASH string = "\\\\"
OBRACKET = "{"
CBRACKET = "}"
OPAREN = "("
CPAREN = ")"
OSQRBRACKET = "["
CSQRBRACKET = "]"
COLON = ":"
COMMA = ","
DOUBLE_BACKSLASH = "\\\\"

IDENT string = "IDENT"
INT string = "INT"
IDENT = "IDENT"
INT = "INT"

FALSE string = "false"
NULL string = "null"
TRUE string = "true"
)
//values
FALSE = "false"
NULL = "null"
TRUE = "true"
OBJECT = "object"
ARRAY = "array"
NUMBER = "number"
STRING = "string"

type Token struct {
Type TokenType
Literal string
}
EOF = "EOF"
ILLEGAL = "ILLEGAL"

// Numbers
BACKSLASH byte = '\\'
MINUS byte = '-'
DECIMALPOINT byte = '.'
SPACE byte = ' '
QUOTE byte = '"'
)

0 comments on commit 2a311d1

Please sign in to comment.