From f462f66c61e70dedcc4f9ee319457df142e83f6b Mon Sep 17 00:00:00 2001 From: Trent Huber Date: Mon, 30 Jun 2025 20:58:59 -0400 Subject: [PATCH] Even more improved tokenizer --- .gitignore | 1 + src/.gitignore | 1 - src/input.c | 40 +++++++++++++------------ src/lex.c | 80 ++++++++++++++++++-------------------------------- src/lex.h | 18 ++++++++++-- 5 files changed, 66 insertions(+), 74 deletions(-) delete mode 100644 src/.gitignore diff --git a/.gitignore b/.gitignore index 378eac2..7353e7e 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ build +*.o diff --git a/src/.gitignore b/src/.gitignore deleted file mode 100644 index 5761abc..0000000 --- a/src/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.o diff --git a/src/input.c b/src/input.c index f08463a..96b0f79 100644 --- a/src/input.c +++ b/src/input.c @@ -25,23 +25,23 @@ enum character { BACKSPACE = '\177', }; -static char buffer[BUFLEN + 1]; +static char buffer[1 + BUFLEN + 2]; char *input(void) { - char *cursor, *end; + char *start, *cursor, *end; int c, i; signal(SIGCHLD, waitbg); // TODO: Use sigaction for portability reset: - end = cursor = buffer; - *history.t = *buffer = '\0'; + end = cursor = start = buffer + 1; + *history.t = *start = '\0'; history.c = history.t; - while (buffer == end) { + while (start == end) { fputs(PROMPT, stdout); while ((c = getchar()) != '\n') { if (c >= ' ' && c <= '~') { - if (end - buffer == BUFLEN) continue; + if (end - start == BUFLEN) continue; memmove(cursor + 1, cursor, end - cursor); *cursor++ = c; *++end = '\0'; @@ -60,7 +60,7 @@ reset: case CLEAR: fputs("\033[H\033[J", stdout); fputs(PROMPT, stdout); - fputs(buffer, stdout); + fputs(start, stdout); continue; case ESCAPE: switch ((c = getchar())) { @@ -69,8 +69,8 @@ reset: while (cursor != end && *cursor == ' ') putchar(*cursor++); break; case BACKWARD: - while (cursor != buffer && *(cursor - 1) == ' ') putchar((--cursor, '\b')); - while (cursor != buffer && *(cursor - 1) != ' ') putchar((--cursor, '\b')); + while (cursor != start && *(cursor - 1) == ' ') putchar((--cursor, '\b')); + while (cursor != start && *(cursor - 1) != ' ') putchar((--cursor, '\b')); break; case ARROW: switch ((c = getchar())) { @@ -79,19 +79,19 @@ reset: if (history.c == (c == UP ? history.b : history.t)) continue; putchar('\r'); - for (i = end - buffer + strlen(PROMPT); i > 0; --i) putchar(' '); + for (i = end - start + strlen(PROMPT); i > 0; --i) putchar(' '); putchar('\r'); - if (strcmp(history.c, buffer) != 0) strcpy(history.t, buffer); + if (strcmp(history.c, start) != 0) strcpy(history.t, start); if (c == UP) DEC(history, c); else INC(history, c); - strcpy(buffer, history.c); - end = cursor = buffer + strlen(buffer); + strcpy(start, history.c); + end = cursor = start + strlen(start); fputs(PROMPT, stdout); - fputs(buffer, stdout); + fputs(start, stdout); break; case LEFT: - if (cursor > buffer) putchar((--cursor, '\b')); + if (cursor > start) putchar((--cursor, '\b')); break; case RIGHT: if (cursor < end) putchar(*cursor++); @@ -103,7 +103,7 @@ reset: } break; case BACKSPACE: - if (cursor == buffer) continue; + if (cursor == start) continue; memmove(cursor - 1, cursor, end - cursor); --cursor; *--end = '\0'; @@ -118,10 +118,12 @@ reset: } } fpurge(stdout); - push(&history, buffer); + push(&history, start); + + *end = ';'; + *++end = '\0'; signal(SIGCHLD, SIG_DFL); - return buffer; + return start; } - diff --git a/src/lex.c b/src/lex.c index fe20f40..2843d65 100644 --- a/src/lex.c +++ b/src/lex.c @@ -1,72 +1,50 @@ +#include #include +#include #include "history.h" #include "lex.h" -static char *tokens[1 + BUFLEN + 1]; -static struct cmd cmds[1 + (BUFLEN + 1) / 2 + 1]; - -static int delimiter(char c) { - return c == ' ' || c == '&' || c == '|' || c == ';' || c == '`' || c == '\0'; -} - -static enum terminator strp2term(char **strp) { - char *p; - enum terminator term; - - switch (*(p = (*strp)++)) { - case '&': - term = **strp == '&' ? (++*strp, AND) : BG; - break; - case '|': - term = **strp == '|' ? (++*strp, OR) : PIPE; - break; - default: - term = SEMI; - } - *p = '\0'; - - return term; -} +static char *tokens[BUFLEN + 1]; +static struct cmd cmds[1 + (BUFLEN + 1) / 2 + 1] = {{.type = SEMI}}; struct cmd *lex(char *b) { char **t; struct cmd *c; - + if (!b) return NULL; - t = tokens + 1; - c = cmds + 1; - while (*b == ' ') ++b; - c->args = t; + t = tokens; + c = cmds; while (*b) switch (*b) { default: - *t++ = b; - while (!delimiter(*b)) ++b; + if (!*(b - 1)) { + if (c->type) { + (++c)->args = t; + c->type = NONE; + } + *t++ = b; + } + ++b; break; - case ' ': - *b++ = '\0'; - while (*b == ' ') ++b; + case '<': + case '>': break; - case ';': case '&': case '|': - if (*(t - 1)) { + if (*b == *(b + 1)) *b = '\0'; + case ';': + if (c->type == NONE) { *t++ = NULL; - c++->type = strp2term(&b); - c->args = t; - } else strp2term(&b); - break; - case '`': - *t++ = ++b; - while (*b != '\'') ++b; - *b = '\0'; - break; - } - if (*(t - 1)) { - *t = NULL; - c++->type = SEMI; + if (!*b) { + ++b; + ++c->type; + } + c->type += *b; + } + case ' ': + *b++ = '\0'; } - *c = (struct cmd){0}; + *++c = (struct cmd){0}; return cmds; } diff --git a/src/lex.h b/src/lex.h index d5b862d..b865490 100644 --- a/src/lex.h +++ b/src/lex.h @@ -1,15 +1,27 @@ enum terminator { - SEMI, - BG, + NONE, + SEMI = ';', + BG = '&', AND, - PIPE, + PIPE = '|', OR, }; +struct fred { + int newfd, mode; + char *oldfd; +}; + +/* a>&b -> dup2(b, a); reopen(a, "w"); + * a<&b -> dup2(b, a); reopen(a, "r"); + * x >a >b >c ... + */ + struct cmd { char **args; enum terminator type; int pipe[2]; + struct fred freds[(BUFLEN - 1) / 3 + 1]; }; struct cmd *lex(char *b); -- 2.51.0