Logo  

CS456 - Systems Programming

Parsing the command input

To parse the input we use the lexer built for assignment 6, The main loop consisting of:

int main(void)
{
  char *args[K], buf[K], wbuf[K], *input = NULL, *output = NULL;
  int append = FALSE, w = 0, stop;
  token_t tok;

  printf("> ");
  while(fgets(buf, K, stdin) != NULL) {
    startlex(buf);

    stop = FALSE;
    do {
      tok = lex(wbuf);
      switch(tok) {

        ...

      }
    } while (!stop);

    // Execute the command here perhaps?

    printf("> ");
  }

  return 0;
}

The local variables and their purpose is:

Variable Purpose
char buf[K] Holds the input string read with fgets
char wbuf[K] Holds the word returned from the lexer, if applicable
char *args[K] Holds the command words (temporarily) before they are pushed to a command stack.
int w The current word pointer into args
char *input Points to a input filename if a < token is encountered, NULL otherwise.
char *output Points to an output filename if a > or >> token is encountered, NULL otherwise
int append Determines if > or >> was for output.
int stop Used to tell when to stop processing tokens (usually only when end of input is encountered

Normally words or strings, when encountered are appended to the args array:

        case T_SSTRING:
        case T_DSTRING:
        case T_WORD:
          args[w++] = strdup(wbuf);
          break;

We duplicate the wbuf buffer with strdup which will need to be freed after we've executed the command. We could alter the lexer to return just T_WORD for both strings as well and simplify the parser code, which we may do, but most shells do do different things with respect to variable and wild- card expansion depending on what kind of string it may be, but it may be the case that the lexer should make that determination.

To handle > or >> we have the following code:

        case T_APPOUT:
          append = TRUE;
        case T_OUTPUT:
          tok = lex(wbuf);
          if (tok != T_WORD && tok != T_DSTRING && tok != T_SSTRING) {
            printf("Missing name for redirect.\n");
            stop = flushcmds();
            break;
          }
          output = strdup(wbuf);
          break;

The append case falls through to the main output code which checks that the next token is a valid word or string and sets the output variable to that string. However if the token isn't we abort the entire command and all pending commands in the command stack (to be discussed later.) Stop will be set to TRUE in that case as well. The input case is virtually identical.

        case T_EOI:
          stop = TRUE;
        case T_EOC:
          pushcmd(args, &w, &input, &output, &append);
          sys();
          break;
        case T_PIPE:
          pushcmd(args, &w, &input, &output, &append);
          break;

For pipes we push the current command to the command stack via pushcmd to be discussed below. For a ; (T_EOC) or end of input (in this case a newline) we push the current command then call sys to execute the command stack. We only stop processing if end of input is reached, so if we have 'cmda ; cmdb ; cmdc' on one line, then the entire command sequence can be executed.

The pushcmd function is as follows:

cmd_t *cmdstack[MAXCMDS];
int csp = 0;

void pushcmd(char **args, int *w, char **input, char **output, int *append)
{
  cmd_t *cmd = malloc(sizeof(cmd_t));
  cmd->input  = *input;
  cmd->output = *output;
  cmd->append = *append;
  cmd->argv   = malloc(sizeof(char *) * (*w)+1);
  for(int i=0; i < *w; i++) cmd->argv[i] = args[i];
  cmd->argv[*w] = NULL;
  *input = *output = NULL;
  *append = *w = 0;

  cmdstack[csp++] = cmd;
}

The pushcmd function allocates a new cmd_t structure and populates it with the inputs and then push it onto the cmdstack array. The purpose of the inputs being passed by reference instead of value is to reset the inputs to their defaults, rather than duplicating the code to reset them in the main loop.

Finally we have a few functions to free the command stack:

void freecmd(cmd_t *c)
{
  if (c->input) free(c->input);
  if (c->output) free(c->output);
  if (c->argv) {
    for(int i=0; c->argv[i] != NULL; i++) free(c->argv[i]);
    free(c->argv);
  }
  free(c);
}

bool flushcmds(void)
{
  while(csp > 0) freecmd(cmdstack[--csp]);
  return TRUE;
}

flushcmds is the main entry point and then freecmd frees each command that has been pushed to it. Note that flushcmds returns the Boolean TRUE so we can flush the command stack and set the stop variable in the main loop in one invocation.

At the end of the token processing loop we have the following code to clean up in the event of an error:

    if (input) free(input);
    if (output) free(output);
    for(int i = 0; i < w; i++) free(args[i]);
    input = output = NULL;
    append = w = 0;

Note we are not doing all the error checking we should, such as determining if we have ambiguous redirects (< on the right side of a pipe or > on the left,) or making sure that there is in fact a command on the right side of a pipe (|.) Checking for ambiguity when pushing a command (and checking the command stack level) should be sufficient to error check those conditions.

main.c

#include "shell.h"

char *tstring[] = {
  "EOI", "<", "<<", "<<-", "<<<", ">", ">>", "&>", "&>>", "|", "|&", "&",
  "||", "&&", ";", "'", "\"", "word"
};

cmd_t *cmdstack[MAXCMDS];
int csp = 0;

void pushcmd(char **args, int *w, char **input, char **output, int *append)
{
  cmd_t *cmd = malloc(sizeof(cmd_t));
  cmd->input  = *input;
  cmd->output = *output;
  cmd->append = *append;
  cmd->argv   = malloc(sizeof(char *) * (*w)+1);
  for(int i=0; i < *w; i++) cmd->argv[i] = args[i];
  cmd->argv[*w] = NULL;
  *input = *output = NULL;
  *append = *w = 0;

  cmdstack[csp++] = cmd;
}

void freecmd(cmd_t *c)
{
  if (c->input) free(c->input);
  if (c->output) free(c->output);
  if (c->argv) {
    for(int i=0; c->argv[i] != NULL; i++) free(c->argv[i]);
    free(c->argv);
  }
  free(c);
}

bool flushcmds(void)
{
  while(csp > 0) freecmd(cmdstack[--csp]);
  return TRUE;
}

int main(void)
{
  char *args[K], buf[K], wbuf[K], *input = NULL, *output = NULL;
  int append = FALSE, w = 0, stop;
  token_t tok;

  printf("> ");
  while(fgets(buf, K, stdin) != NULL) {

    startlex(buf);

    stop = FALSE;
    do {
      tok = lex(wbuf);
      switch(tok) {
        case T_EOI:
          stop = TRUE;
        case T_EOC:
          pushcmd(args, &w, &input, &output, &append);
          sys();
          break;
        case T_PIPE:
          pushcmd(args, &w, &input, &output, &append);
          break;
        case T_SSTRING:
        case T_DSTRING:
        case T_WORD:
          args[w++] = strdup(wbuf);
          break;
        case T_INPUT:
          tok = lex(wbuf);
          if (tok != T_WORD && tok != T_DSTRING && tok != T_SSTRING) {
            printf("Missing name for redirect.\n");
            stop = flushcmds();
            break;
          }
          input = strdup(wbuf);
          break;
        case T_APPOUT:
          append = TRUE;
        case T_OUTPUT:
          tok = lex(wbuf);
          if (tok != T_WORD && tok != T_DSTRING && tok != T_SSTRING) {
            printf("Missing name for redirect.\n");
            stop = flushcmds();
            break;
          }
          output = strdup(wbuf);
          break;
        default:
          printf("Unhandled token [%s].\n",tstring[tok]);
          stop = flushcmds();
          break;
      }
    } while (!stop);

    if (input) free(input);
    if (output) free(output);
    for(int i = 0; i < w; i++) free(args[i]);
    input = output = NULL;
    append = w = 0;

    printf("> ");
  }

  return 0;
}