50bd6324fe
Signed-off-by: Chloe M. <chloe@mirocom.org>
223 lines
3.9 KiB
C
223 lines
3.9 KiB
C
/*
|
|
* Copyright (c) 2026, Chloe M.
|
|
* Provided under the BSD-3 clause
|
|
*/
|
|
|
|
#include <errno.h>
|
|
#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
#include <ctype.h>
|
|
#include <string.h>
|
|
#include "cescal/lexer.h"
|
|
#include "cescal/log.h"
|
|
#include "cescal/ptrbox.h"
|
|
|
|
static inline void
|
|
lexer_putback(struct cescal_state *state, char c)
|
|
{
|
|
if (state == NULL) {
|
|
return;
|
|
}
|
|
|
|
state->lex_putback = c;
|
|
}
|
|
|
|
static inline char
|
|
lexer_putback_pop(struct cescal_state *state)
|
|
{
|
|
char retc;
|
|
|
|
if (state == NULL) {
|
|
return '\0';
|
|
}
|
|
|
|
retc = state->lex_putback;
|
|
state->lex_putback = '\0';
|
|
return retc;
|
|
}
|
|
|
|
/*
|
|
* Returns true if the given character is a whitespace
|
|
*
|
|
* @c: Character to check
|
|
*/
|
|
static inline bool
|
|
lexer_is_ws(char c)
|
|
{
|
|
switch (c) {
|
|
case '\t':
|
|
case '\n':
|
|
case ' ':
|
|
case '\f':
|
|
case '\r':
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Consume a single character from the input source file and
|
|
* optionally skip whitespace
|
|
*
|
|
* @state: Compiler state
|
|
* @skip_ws: If true skip whitespace
|
|
*/
|
|
static char
|
|
lexer_consume_single(struct cescal_state *state, bool skip_ws)
|
|
{
|
|
char c;
|
|
|
|
if (state == NULL) {
|
|
return '\0';
|
|
}
|
|
|
|
if ((c = lexer_putback_pop(state)) != '\0') {
|
|
if (!skip_ws) {
|
|
return c;
|
|
}
|
|
|
|
if (skip_ws && !lexer_is_ws(c)) {
|
|
return c;
|
|
}
|
|
}
|
|
|
|
while ((c = readbuf_read(&state->rb, state->in_fd)) != '\0') {
|
|
if (lexer_is_ws(c) && skip_ws) {
|
|
continue;
|
|
}
|
|
|
|
return c;
|
|
}
|
|
|
|
return '\0';
|
|
}
|
|
|
|
static int
|
|
lexer_scan_ident(struct cescal_state *state, char lc, struct token *res)
|
|
{
|
|
char *buf, c;
|
|
size_t bufsz, bufcap;
|
|
|
|
if (state == NULL || res == NULL) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
bufsz = 0;
|
|
bufcap = 8;
|
|
if ((buf = malloc(bufcap)) == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
if (lc != '_' && !isalpha(lc)) {
|
|
cc_error("bad identifier\n");
|
|
}
|
|
|
|
buf[bufsz++] = lc;
|
|
for (;;) {
|
|
c = lexer_consume_single(state, false);
|
|
if (c != '_' && !isalnum(c)) {
|
|
lexer_putback(state, c);
|
|
buf[bufsz] = '\0';
|
|
break;
|
|
}
|
|
|
|
buf[bufsz++] = c;
|
|
if (bufsz >= bufcap) {
|
|
bufcap += 8;
|
|
buf = realloc(buf, bufcap);
|
|
}
|
|
|
|
if (buf == NULL) {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
res->s = ptrbox_strdup(&state->ptrbox, buf);
|
|
res->type = TT_IDENT;
|
|
free(buf);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Checks if an identifier token is actually a keyword
|
|
*
|
|
* @state: Compiler state
|
|
* @res: Token result
|
|
*/
|
|
static void
|
|
lexer_check_kw(struct cescal_state *state, struct token *res)
|
|
{
|
|
if (state == NULL || res == NULL) {
|
|
return;
|
|
}
|
|
|
|
switch (*res->s) {
|
|
case 'p':
|
|
if (strcmp(res->s, "pub") == 0) {
|
|
res->type = TT_PUB;
|
|
return;
|
|
}
|
|
|
|
if (strcmp(res->s, "proc") == 0) {
|
|
res->type = TT_PROC;
|
|
return;
|
|
}
|
|
|
|
break;
|
|
case 'b':
|
|
if (strcmp(res->s, "begin") == 0) {
|
|
res->type = TT_BEGIN;
|
|
return;
|
|
}
|
|
|
|
break;
|
|
case 'e':
|
|
if (strcmp(res->s, "end") == 0) {
|
|
res->type = TT_END;
|
|
return;
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
int
|
|
lexer_nom(struct cescal_state *state, struct token *res)
|
|
{
|
|
char c;
|
|
|
|
if (state == NULL || res == NULL) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
if ((c = lexer_consume_single(state, true)) == '\0') {
|
|
return -1;
|
|
}
|
|
|
|
switch (c) {
|
|
case '(':
|
|
res->type = TT_LPAREN;
|
|
res->c = c;
|
|
return 0;
|
|
case ')':
|
|
res->type = TT_RPAREN;
|
|
res->c = c;
|
|
return 0;
|
|
case ',':
|
|
res->type = TT_COMMA;
|
|
res->c = c;
|
|
return 0;
|
|
default:
|
|
if (lexer_scan_ident(state, c, res) == 0) {
|
|
lexer_check_kw(state, res);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
cc_error("got unknown token '%c'\n", c);
|
|
return -1;
|
|
}
|