core: Add lexer + parser groundwork

Signed-off-by: Ian Moffett <ian@mirocom.org>
This commit is contained in:
2026-05-23 02:21:09 -04:00
parent 659dd38932
commit 74e2e8c772
6 changed files with 232 additions and 0 deletions
+5
View File
@@ -6,6 +6,7 @@
#include <stdio.h>
#include <unistd.h>
#include "cescal/state.h"
#include "cescal/parser.h"
#include "cescal/log.h"
static void
@@ -28,6 +29,10 @@ compile(const char *pathname)
return -1;
}
if (parser_parse(&st) < 0) {
return -1;
}
state_close(&st);
return 0;
}
+84
View File
@@ -0,0 +1,84 @@
#include <errno.h>
#include <stdbool.h>
#include "cescal/lexer.h"
#include "cescal/log.h"
/*
* Returns true if the given character is a whitespace
*
* @c: Character to check
*/
static inline bool
lexer_is_ws(char c)
{
switch (c) {
case '\t':
case '\n':
case ' ':
case '\f':
case '\r':
return true;
}
return false;
}
/*
* Consume a single character from the input source file and
* optionally skip whitespace
*
* @state: Compiler state
* @skip_ws: If true skip whitespace
*/
static char
lexer_consume_single(struct cescal_state *state, bool skip_ws)
{
char c;
if (state == NULL) {
return '\0';
}
while ((c = readbuf_read(&state->rb, state->in_fd)) != '\0') {
if (lexer_is_ws(c)) {
continue;
}
return c;
}
return '\0';
}
int
lexer_nom(struct cescal_state *state, struct token *res)
{
char c;
if (state == NULL || res == NULL) {
errno = EINVAL;
return -1;
}
if ((c = lexer_consume_single(state, true)) == '\0') {
return -1;
}
switch (c) {
case '(':
res->type = TT_LPAREN;
res->c = c;
return 0;
case ')':
res->type = TT_RPAREN;
res->c = c;
return 0;
case ',':
res->type = TT_COMMA;
res->c = c;
return 0;
}
cc_error("got unknown token '%c'\n", c);
return -1;
}
+61
View File
@@ -0,0 +1,61 @@
/*
* Copyright (c) 2026, Chloe M.
* Provided under the BSD-3 clause
*/
#include <stdint.h>
#include <errno.h>
#include "cescal/log.h"
#include "cescal/parser.h"
#include "cescal/state.h"
#include "cescal/lexer.h"
/* Symbolic token */
#define symtok(tok) \
"[" tok "]"
/* Quoted token */
#define qtok(tok) \
"'" tok "'"
/* Convert token to string */
#define tokstr1(tt) \
toktab[(tt)]
/* Convert token to string */
#define tokstr(tok) \
toktab[(tok)->type]
/*
* Converts numeric tokens into human readable strings
*/
static const char *toktab[] = {
[TT_NONE] = symtok("none"),
[TT_IDENT] = symtok("ident"),
[TT_INTLIT] = symtok("number"),
[TT_LPAREN] = qtok("("),
[TT_RPAREN] = qtok(")"),
[TT_COMMA] = qtok(","),
[TT_RETURN] = qtok("return"),
[TT_PUB] = qtok("pub"),
[TT_PROC] = qtok("proc"),
[TT_BEGIN] = qtok("begin"),
[TT_END] = qtok("end")
};
int
parser_parse(struct cescal_state *state)
{
struct token tok;
if (state == NULL) {
errno = EINVAL;
return -1;
}
while (lexer_nom(state, &tok) == 0) {
cc_trace("got token %s\n", tokstr(&tok));
}
return 0;
}