core: Add lexer + parser groundwork

Signed-off-by: Ian Moffett <ian@mirocom.org>
This commit is contained in:
2026-05-23 02:21:09 -04:00
parent 659dd38932
commit 74e2e8c772
6 changed files with 232 additions and 0 deletions
+5
View File
@@ -6,6 +6,7 @@
#include <stdio.h>
#include <unistd.h>
#include "cescal/state.h"
#include "cescal/parser.h"
#include "cescal/log.h"
static void
@@ -28,6 +29,10 @@ compile(const char *pathname)
return -1;
}
if (parser_parse(&st) < 0) {
return -1;
}
state_close(&st);
return 0;
}
+84
View File
@@ -0,0 +1,84 @@
#include <errno.h>
#include <stdbool.h>
#include "cescal/lexer.h"
#include "cescal/log.h"
/*
* Returns true if the given character is a whitespace
*
* @c: Character to check
*/
static inline bool
lexer_is_ws(char c)
{
switch (c) {
case '\t':
case '\n':
case ' ':
case '\f':
case '\r':
return true;
}
return false;
}
/*
* Consume a single character from the input source file and
* optionally skip whitespace
*
* @state: Compiler state
* @skip_ws: If true skip whitespace
*/
static char
lexer_consume_single(struct cescal_state *state, bool skip_ws)
{
char c;
if (state == NULL) {
return '\0';
}
while ((c = readbuf_read(&state->rb, state->in_fd)) != '\0') {
if (lexer_is_ws(c)) {
continue;
}
return c;
}
return '\0';
}
int
lexer_nom(struct cescal_state *state, struct token *res)
{
char c;
if (state == NULL || res == NULL) {
errno = EINVAL;
return -1;
}
if ((c = lexer_consume_single(state, true)) == '\0') {
return -1;
}
switch (c) {
case '(':
res->type = TT_LPAREN;
res->c = c;
return 0;
case ')':
res->type = TT_RPAREN;
res->c = c;
return 0;
case ',':
res->type = TT_COMMA;
res->c = c;
return 0;
}
cc_error("got unknown token '%c'\n", c);
return -1;
}
+61
View File
@@ -0,0 +1,61 @@
/*
* Copyright (c) 2026, Chloe M.
* Provided under the BSD-3 clause
*/
#include <stdint.h>
#include <errno.h>
#include "cescal/log.h"
#include "cescal/parser.h"
#include "cescal/state.h"
#include "cescal/lexer.h"
/* Symbolic token */
#define symtok(tok) \
"[" tok "]"
/* Quoted token */
#define qtok(tok) \
"'" tok "'"
/* Convert token to string */
#define tokstr1(tt) \
toktab[(tt)]
/* Convert token to string */
#define tokstr(tok) \
toktab[(tok)->type]
/*
* Converts numeric tokens into human readable strings
*/
static const char *toktab[] = {
[TT_NONE] = symtok("none"),
[TT_IDENT] = symtok("ident"),
[TT_INTLIT] = symtok("number"),
[TT_LPAREN] = qtok("("),
[TT_RPAREN] = qtok(")"),
[TT_COMMA] = qtok(","),
[TT_RETURN] = qtok("return"),
[TT_PUB] = qtok("pub"),
[TT_PROC] = qtok("proc"),
[TT_BEGIN] = qtok("begin"),
[TT_END] = qtok("end")
};
int
parser_parse(struct cescal_state *state)
{
struct token tok;
if (state == NULL) {
errno = EINVAL;
return -1;
}
while (lexer_nom(state, &tok) == 0) {
cc_trace("got token %s\n", tokstr(&tok));
}
return 0;
}
+24
View File
@@ -0,0 +1,24 @@
/*
* Copyright (c) 2026, Chloe M.
* Provided under the BSD-3 clause
*/
#ifndef CESCAL_LEXER_H
#define CESCAL_LEXER_H 1
#include <stdint.h>
#include <stddef.h>
#include "cescal/token.h"
#include "cescal/state.h"
/*
* Consume a single token from the input source file
*
* @state: Compiler state
* @res: Token result is written here
*
* Returns zero on success
*/
int lexer_nom(struct cescal_state *state, struct token *res);
#endif /* !CESCAL_LEXER_H */
+20
View File
@@ -0,0 +1,20 @@
/*
* Copyright (c) 2026, Chloe M.
* Provided under the BSD-3 clause
*/
#ifndef CESCAL_PARSER_H
#define CESCAL_PARSER_H 1
#include "cescal/state.h"
/*
* Begin parsing the input source file
*
* @state: Compiler state
*
* Returns zero on success
*/
int parser_parse(struct cescal_state *state);
#endif /* !CESCAL_PARSER_H */
+38
View File
@@ -0,0 +1,38 @@
/*
* Copyright (c) 2026, Chloe M.
* Provided under the BSD-3 clause
*/
#ifndef CESCAL_TOKEN_H
#define CESCAL_TOKEN_H 1
/*
* Represents valid source file token types
*/
typedef enum {
TT_NONE, /* [none] */
TT_IDENT, /* [identifier] */
TT_INTLIT, /* [0-9]+ */
TT_LPAREN, /* '(' */
TT_RPAREN, /* '( */
TT_COMMA, /* ',' */
TT_RETURN, /* 'return' */
TT_PUB, /* 'pub' */
TT_PROC, /* 'proc' */
TT_BEGIN, /* 'begin' */
TT_END, /* 'end' */
} tt_t;
/*
* Represents a source file token
*
* @type: Token type
*/
struct token {
tt_t type;
union {
char c;
};
};
#endif /* !CESCAL_TOKEN_H */