-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.c
144 lines (108 loc) · 2.92 KB
/
lexer.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#include "lexer.h"
#include "error.h"
#include <assert.h>
#include <ctype.h>
#include <stdlib.h>
#define new_token(name) (struct Token){.type = name, .position = ftell(lexer->fptr)}
struct Lexer init_lexer(const char* const file_name) {
FILE* fptr = fopen(file_name, "r");
if(fptr == NULL) {
return (struct Lexer){0};
}
struct Lexer lexer = {
.fptr = fptr,
.file_name = file_name
};
next_token(&lexer);
return lexer;
}
void skip_whitespace(FILE* const fptr) {
int c;
while((c = getc(fptr)) != EOF && isspace(c));
ungetc(c, fptr);
}
void skip_comment(FILE* const fptr) {
int c;
while((c = getc(fptr)) != EOF && c != '\n');
}
void skip_delimiter(FILE* const fptr) {
int c;
while((c = getc(fptr)) != EOF && c == '-');
ungetc(c, fptr);
}
char* get_identifier(FILE* const fptr, int c) {
size_t size = 16;
char* buf = calloc(size, sizeof(char));
if(buf == NULL) {
fprintf(stderr, "Couldn't allocate enough memory.\n");
exit(100);
}
buf[0] = c;
size_t i = 1;
for(; i < 255; ++i) {
c = getc(fptr);
if(!isalnum(c) && c != '_') {
break;
}
if(i >= size) {
size *= 2;
buf = realloc(buf, sizeof(char) * size);
if(buf == NULL) {
fprintf(stderr, "Couldn't allocate enough memory.\n");
exit(100);
}
}
buf[i] = c;
}
if(i > 255) {
fprintf(stderr, "Identifiers cannot be larger than '255' characters long.");
}
buf[i] = '\0';
buf = realloc(buf, sizeof(char) * (i + 1));
ungetc(c, fptr);
return buf;
}
void next_token(struct Lexer* const lexer) {
start:
skip_whitespace(lexer->fptr);
struct Token next;
int c = getc(lexer->fptr);
switch (c) {
case EOF:
next = (struct Token){.type = TOK_EOF};
break;
case '#':
// This is a comment. It should not be returned as a token as it is completely useless for a parser
skip_comment(lexer->fptr);
goto start;
case '=':
next = new_token(TOK_EQUALS);
break;
case ',':
next = new_token(TOK_COMMA);
break;
case '_':
next = new_token(TOK_UNDERSCORE);
break;
case '{':
next = new_token(TOK_OPEN_CURLY);
break;
case '}':
next = new_token(TOK_CLOSE_CURLY);
break;
case '-':
skip_delimiter(lexer->fptr);
next = new_token(TOK_DELIMITER);
break;
default:
if(isalpha(c) || isdigit(c)) {
next = new_token(TOK_IDENTIFIER);
next.content = get_identifier(lexer->fptr, c);
break;
}
print_parser_error(lexer->fptr, lexer->file_name, "Lexer Error", "Cannot decypher this token.");
exit(5);
}
lexer->curr_token = lexer->next_token;
lexer->next_token = next;
}