From: Trent Huber Date: Tue, 28 Oct 2025 03:31:53 +0000 (-0400) Subject: Quotes and variables made reasonable X-Git-Url: https://trenthuber.com/code?a=commitdiff_plain;h=a683081c5ec70da3080ce7c45e021438a3e13320;p=thus.git Quotes and variables made reasonable --- diff --git a/README.md b/README.md index ca18c4f..6fccdbc 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,8 @@ thus is a custom Unix shell for POSIX platforms. - Conditional execution (`&&`, `||`) - File redirection (`&1`, etc.) - Globbing (`*`, `?`, `[...]`) -- Quoting with escape sequences (`"\r...\n"`) -- Environment variables (`set`, `unset`, `$VAR$`, etc.) +- Quoting (`'...', "..."`) +- Variables (`set`, `unset`, `$VAR$`, etc.) - Aliasing (`alias`, `unalias`) - Configuration files (`~.thuslogin`, `~.thusrc`) - Cached history (`~.thushistory`) @@ -49,33 +49,41 @@ where it takes a subtly different approach. ### Quotes -Quoting is done with double quotes (`"..."`) and undergoes no shell -substitution, similar to single quotes in Bourne shell. In place of -substitution, quotes can be concatenated with surrounding tokens not separated -by whitespace. +Like most other shells, variables, tildes, and escape sequences will be expanded +inside of double quotes, but not single quotes. *Unlike* other shells however, +quotes do not concatenate with other arguments that are not separated from the +quote by whitespace. For example, the command `echo "abc"def` would print +`abc def` whereas other shells would combine them into a single argument and +print `abcdef`. -### Environment variables and aliases +### Variables and aliases -Environment variables are referred to by tokens that begin and end with a `$`. -For example, evaluating the path would look like `$PATH$`. Setting environment +Variables are referred to by strings of characters that begin and end with a +`$`. For example, evaluating the path variable would look like `$PATH$`. Setting variables is done with the `set` built-in command, not with the `name=value` syntax. This syntax is similarly avoided when declaring aliases with the `alias` built-in command. +Additionally, all shell variables are automatically made to be environment +variables, so there's no need to `export` variables. + ### Leading and trailing slashes Prepending `./` to executables located in the current directory is not mandatory unless there already exists an executable in `$PATH$` with the same name that you would like to override. -The `$HOME$`, `$PWD$`, and `$PATH$` environment variables are always initialized -with trailing slashes. Therefore, whenever one of these variables or `~` is +The `$HOME$`, `$PWD$`, and `$PATH$` variables are always initialized with +trailing slashes. Therefore, whenever one of these variables or `~` is substituted in the shell, it will retain the trailing slash. ### File redirection -For the sake of syntactic consistency, there is no whitespace between a file -redirection operator and the filename that comes after it. +If there is whitespace between a file redirection operator and a filename +following it, then it is *not* parsed as a file redirection, but instead as two +separate arguments. Something like `ls >file` would redirect the output of the +`ls` command to `file`, whereas `ls > file` would list any files named `>` and +`file`. ## Resources diff --git a/src/builtins/unset.c b/src/builtins/unset.c index 28dfbcd..f86f39f 100644 --- a/src/builtins/unset.c +++ b/src/builtins/unset.c @@ -7,7 +7,7 @@ BUILTIN(unset) { if (argc != 2) return usage(argv[0], "name"); if (!getenv(argv[1])) { - note("Environment variable does not exist"); + note("$%s$ does not exist", argv[1]); return EXIT_FAILURE; } if (unsetenv(argv[1]) == -1) { diff --git a/src/parse.c b/src/parse.c index ac78b00..dbd2dfd 100644 --- a/src/parse.c +++ b/src/parse.c @@ -10,9 +10,9 @@ #include "utils.h" int parse(struct context *c) { - int globbing, e, offset, globflags; + int globbing, quote, v, offset, globflags; size_t prevsublen, sublen; - char *stlend, *p, *end, *env, term, **sub; + char *stlend, *p, *end, *var, term, **sub; long l; static glob_t globs; @@ -25,30 +25,23 @@ int parse(struct context *c) { c->r->mode = NONE; c->prev = c->current; + for (end = c->b; *end; ++end); + prevsublen = globbing = quote = 0; + sub = NULL; if (globs.gl_pathc) { globfree(&globs); globs.gl_pathc = 0; } - prevsublen = globbing = 0; for (*c->t = c->b; *c->b; ++c->b) switch (*c->b) { case '<': case '>': - if (c->r->mode) { - note("Invalid syntax for file redirection"); - return quit(c); - } - if (c->r - c->redirects == MAXREDIRECTS) { - note("Too many file redirects, exceeds %d redirect limit", MAXREDIRECTS); - return quit(c); - } - if (*c->t != c->b) { - if ((l = strtol(*c->t, &stlend, 10)) < 0 || l > INT_MAX || stlend != c->b) { - note("Invalid value for a file redirection"); - return quit(c); - } - c->r->newfd = l; - } else c->r->newfd = *c->b == '>'; + if (quote || c->r->mode) break; + + if (*c->t == c->b) c->r->newfd = *c->b == '>'; + else if ((c->r->newfd = strtol(*c->t, &stlend, 10)) < 0 + || c->r->newfd > INT_MAX || stlend != c->b) + break; c->r->mode = *c->b; if (*(c->b + 1) == '>') { ++c->r->mode; @@ -56,116 +49,129 @@ int parse(struct context *c) { } c->r->oldname = c->b + 1; if (*(c->b + 1) == '&') ++c->b; + break; case '$': p = c->b++; while (*c->b && *c->b != '$') ++c->b; if (!*c->b) { - note("Environment variable lacks a terminating `$'"); + note("Variable left open-ended"); return quit(c); } *c->b++ = '\0'; - for (end = c->b; *end; ++end); l = strtol(p + 1, &stlend, 10); errno = 0; - if (stlend == c->b - 1) env = l >= 0 && l < argcount ? arglist[l] : c->b - 1; + if (stlend == c->b - 1) var = l >= 0 && l < argcount ? arglist[l] : c->b - 1; else if (strcmp(p + 1, "^") == 0) { - if (!sprintf(env = (char [12]){0}, "%d", status)) { + if (!sprintf(var = (char [12]){0}, "%d", status)) { note("Unable to get previous command status"); return quit(c); } - } else if (!(env = getenv(p + 1))) { - note("Environment variable does not exist"); - return quit(c); - } + } else if (!(var = getenv(p + 1))) var = ""; - e = strlen(env); - offset = e - (c->b - p); + v = strlen(var); + offset = v - (c->b - p); memmove(c->b + offset, c->b, end - c->b + 1); - strncpy(p, env, e); + strncpy(p, var, v); c->b += offset - 1; + end += offset; break; case '~': - for (end = c->b; *end; ++end); offset = strlen(home); memmove(c->b + offset, c->b + 1, end - c->b); strncpy(c->b, home, offset); c->b += offset - 1; + end += offset; break; - case '[': - while (*c->b && *c->b != ']') ++c->b; + case '\'': + if (quote) break; + + *c->b = '\0'; + if (*c->t != c->b) ++c->t; + *c->t = ++c->b; + + while (*c->b && *c->b != '\'') ++c->b; if (!*c->b) { - note("Range in glob left open-ended"); + note("Quote left open-ended"); return quit(c); } - case '*': - case '?': - globbing = 1; + + *c->b = '\0'; + ++c->t; + *c->t = c->b + 1; + break; case '"': - for (end = (p = c->b) + 1, c->b = NULL; *end; ++end) if (!c->b) { - if (*end == '"') c->b = end; - if (*end == '\\') ++end; - } - if (!c->b) { - note("Quote left open-ended"); - return quit(c); + *c->b = '\0'; + if (quote || *c->t != c->b) ++c->t; + *c->t = c->b + 1; + + quote = !quote; + + break; + case '\\': + if (!quote) break; + switch (*(c->b + 1)) { + case '$': + case '"': + case '\\': + break; + default: + memmove(c->b, c->b + 1, end-- - c->b--); + *(c->b + 1) = *c->b; } - memmove(p, p + 1, end-- - p); - --c->b; + memmove(c->b, c->b + 1, end-- - c->b); + break; + case '*': + case '?': + case '[': + if (quote) break; - while (p != c->b) if (*p++ == '\\') { - switch (*p) { - case 't': - *p = '\t'; - break; - case 'v': - *p = '\v'; - break; - case 'r': - *p = '\r'; - break; - case 'n': - *p = '\n'; - break; - } - memmove(p - 1, p, end-- - p + 1); - --c->b; + if (*c->b == '[') { + p = c->b; + while (*p && *p != ' ' && *p != ']') ++p; + if (*p != ']') break; + c->b = p; } - memmove(p, p + 1, end-- - p); - --c->b; + globbing = 1; break; case '#': - *(c->b + 1) = '\0'; case '&': case '|': case ';': case ' ': + if (quote) break; + if (*c->b == '#') *(c->b + 1) = '\0'; + term = *c->b; *c->b = '\0'; - if (c->r->mode) { - switch (*c->r->oldname) { - case '&': - if ((l = strtol(++c->r->oldname, &stlend, 10)) < 0 || l > INT_MAX - || *stlend) { - case '\0': - note("Invalid syntax for file redirection"); - return quit(c); - } + if (c->r->mode) switch (*c->r->oldname) { + case '&': + ++c->r->oldname; + if (*c->r->oldname && (l = strtol(c->r->oldname, &stlend, 10)) >= 0 + && l <= INT_MAX && !*stlend) { c->r->oldfd = l; c->r->oldname = NULL; + default: + if (c->r - c->redirects == MAXREDIRECTS) { + note("Too many file redirects, exceeds %d redirect limit", MAXREDIRECTS); + return quit(c); + } + ++c->r; + *c->t = c->b; } - (++c->r)->mode = NONE; - globbing = 0; + case '\0': + c->r->mode = NONE; + } - *c->t = c->b; - } else if (!c->alias && c->t == c->tokens && (sub = getalias(c->tokens[0])) - || globbing) { - if (globbing) { + if (*c->t != c->b) { + if (!c->alias && c->t == c->tokens && (sub = getalias(c->tokens[0]))) + for (sublen = 0; sub[sublen]; ++sublen); + else if (globbing) { globflags = GLOB_MARK; if (prevsublen) globflags |= GLOB_APPEND; switch (glob(*c->t, globflags, NULL, &globs)) { @@ -179,12 +185,14 @@ int parse(struct context *c) { sub = globs.gl_pathv + prevsublen; prevsublen = globs.gl_pathc; globbing = 0; - } else for (sublen = 0; sub[sublen]; ++sublen); - - memcpy(c->t, sub, sublen * sizeof*c->t); - c->t += sublen; - *c->t = c->b; - } else if (*c->t != c->b) ++c->t; + } + if (sub) { + memcpy(c->t, sub, sublen * sizeof*c->t); + c->t += sublen - 1; + sub = NULL; + } + ++c->t; + } if (term != ' ') { if (c->t != c->tokens) { @@ -208,9 +216,15 @@ int parse(struct context *c) { return 1; } + *c->t = c->b + 1; } + if (quote) { + note("Quote left open-ended"); + return quit(c); + } + switch (c->current.term) { case AND: case PIPE: diff --git a/src/utils.c b/src/utils.c index 55d3502..154bd32 100644 --- a/src/utils.c +++ b/src/utils.c @@ -48,7 +48,7 @@ void fatal(char *fmt, ...) { } void init(void) { - char buffer[PATH_MAX], *shlvlstr; + char *shlvlstr, buffer[PATH_MAX]; size_t l; long shlvl; @@ -59,28 +59,27 @@ void init(void) { note("Unable to update $SHLVL$ environment variable"); if (!(home = getenv("HOME"))) fatal("Unable to find home directory"); - if (shlvl == 1) { - l = strlen(home); - if (home[l - 1] != '/') { - strcpy(buffer, home); - buffer[l] = '/'; - buffer[l + 1] = '\0'; - if (setenv("HOME", buffer, 1) == -1 || !(home = getenv("HOME"))) - fatal("Unable to append trailing slash to $HOME$"); - } - - if (!getcwd(buffer, PATH_MAX)) fatal("Unable to find current directory"); - l = strlen(buffer); + l = strlen(home); + if (home[l - 1] != '/') { + strcpy(buffer, home); buffer[l] = '/'; buffer[l + 1] = '\0'; - if (setenv("PWD", buffer, 1) == -1) - fatal("Unable to append trailing slash to $PWD$"); - - if (setenv("PATH", "/usr/local/bin/:/usr/local/sbin/" - ":/usr/bin/:/usr/sbin/:/bin/:/sbin/", 1) == -1) - fatal("Unable to initialize $PATH$"); + if (setenv("HOME", buffer, 1) == -1 || !(home = getenv("HOME"))) + note("Unable to append trailing slash to $HOME$"); } + if (!getcwd(buffer, PATH_MAX)) fatal("Unable to find current directory"); + l = strlen(buffer); + buffer[l] = '/'; + buffer[l + 1] = '\0'; + if (setenv("PWD", buffer, 1) == -1) + note("Unable to append trailing slash to $PWD$"); + + if (shlvl == 1 + && setenv("PATH", "/usr/local/bin/:/usr/local/sbin/:" + "/usr/bin/:/usr/sbin/:/bin/:/sbin/", 1) == -1) + note("Unable to initialize $PATH$"); + initfg(); initbg(); inithistory();