Compare commits

...

6 Commits

Author SHA1 Message Date
chloe 82e68e92a9 core: lexer: Return -1 on bad ident
Signed-off-by: Chloe M. <chloe@mirocom.org>
2026-05-23 08:13:45 -04:00
chloe f6e7360fd5 core: lexer: Add token for 'return' keyword
Signed-off-by: Chloe M. <chloe@mirocom.org>
2026-05-23 08:13:45 -04:00
chloe 86645eb137 core: lexer: Add arrow token
Signed-off-by: Chloe M. <chloe@mirocom.org>
2026-05-23 08:13:45 -04:00
chloe 3e3ccae003 core: lexer: Add comment skipping
Signed-off-by: Chloe M. <chloe@mirocom.org>
2026-05-23 08:13:45 -04:00
chloe 50bd6324fe core: lexer: Fix up whitespace skipping
Signed-off-by: Chloe M. <chloe@mirocom.org>
2026-05-23 08:13:45 -04:00
chloe 9bc47590c3 core: lexer: Add keyword tokens
Signed-off-by: Chloe M. <chloe@mirocom.org>
2026-05-23 08:13:45 -04:00
3 changed files with 109 additions and 13 deletions
+94 -2
View File
@@ -7,6 +7,7 @@
#include <stdbool.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "cescal/lexer.h"
#include "cescal/log.h"
#include "cescal/ptrbox.h"
@@ -72,13 +73,17 @@ lexer_consume_single(struct cescal_state *state, bool skip_ws)
}
if ((c = lexer_putback_pop(state)) != '\0') {
if (lexer_is_ws(c) && !skip_ws) {
if (!skip_ws) {
return c;
}
if (skip_ws && !lexer_is_ws(c)) {
return c;
}
}
while ((c = readbuf_read(&state->rb, state->in_fd)) != '\0') {
if (lexer_is_ws(c)) {
if (lexer_is_ws(c) && skip_ws) {
continue;
}
@@ -107,6 +112,7 @@ lexer_scan_ident(struct cescal_state *state, char lc, struct token *res)
if (lc != '_' && !isalpha(lc)) {
cc_error("bad identifier\n");
return -1;
}
buf[bufsz++] = lc;
@@ -135,6 +141,75 @@ lexer_scan_ident(struct cescal_state *state, char lc, struct token *res)
return 0;
}
/*
* Checks if an identifier token is actually a keyword
*
* @state: Compiler state
* @res: Token result
*/
static void
lexer_check_kw(struct cescal_state *state, struct token *res)
{
if (state == NULL || res == NULL) {
return;
}
switch (*res->s) {
case 'p':
if (strcmp(res->s, "pub") == 0) {
res->type = TT_PUB;
return;
}
if (strcmp(res->s, "proc") == 0) {
res->type = TT_PROC;
return;
}
break;
case 'b':
if (strcmp(res->s, "begin") == 0) {
res->type = TT_BEGIN;
return;
}
break;
case 'e':
if (strcmp(res->s, "end") == 0) {
res->type = TT_END;
return;
}
break;
case 'r':
if (strcmp(res->s, "return") == 0) {
res->type = TT_RETURN;
return;
}
}
}
/*
* Skip anything after a comment
*
* @state: Compiler state
*/
static void
lexer_skip_comment(struct cescal_state *state)
{
char c;
if (state == NULL) {
return;
}
while ((c = lexer_consume_single(state, false)) != '\n') {
if (c == '\0') {
break;
}
}
}
int
lexer_nom(struct cescal_state *state, struct token *res)
{
@@ -162,8 +237,25 @@ lexer_nom(struct cescal_state *state, struct token *res)
res->type = TT_COMMA;
res->c = c;
return 0;
case '/':
if (lexer_consume_single(state, true) == '/') {
res->type = TT_COMMENT;
res->c = c;
lexer_skip_comment(state);
return 0;
}
return -1;
case '-':
res->c = c;
if (lexer_consume_single(state, true) == '>') {
res->type = TT_ARROW;
return 0;
}
return -1;
default:
if (lexer_scan_ident(state, c, res) == 0) {
lexer_check_kw(state, res);
return 0;
}
}
+2
View File
@@ -32,10 +32,12 @@
static const char *toktab[] = {
[TT_NONE] = symtok("none"),
[TT_IDENT] = symtok("ident"),
[TT_COMMENT] = symtok("comment"),
[TT_INTLIT] = symtok("number"),
[TT_LPAREN] = qtok("("),
[TT_RPAREN] = qtok(")"),
[TT_COMMA] = qtok(","),
[TT_ARROW] = qtok("->"),
[TT_RETURN] = qtok("return"),
[TT_PUB] = qtok("pub"),
[TT_PROC] = qtok("proc"),
+2
View File
@@ -12,10 +12,12 @@
typedef enum {
TT_NONE, /* [none] */
TT_IDENT, /* [identifier] */
TT_COMMENT, /* [comment : ignored] */
TT_INTLIT, /* [0-9]+ */
TT_LPAREN, /* '(' */
TT_RPAREN, /* '( */
TT_COMMA, /* ',' */
TT_ARROW, /* '->' */
TT_RETURN, /* 'return' */
TT_PUB, /* 'pub' */
TT_PROC, /* 'proc' */