Logo  

CS456 - Systems Programming

Interpreting part 4

lang.c

In main() we want to take input from either stdin or from a FILE stream, then start accepting a list of statements as if the program were inside of a block statement (just w/o the {}'s.)

  FILE *fp = stdin;
  stnode_t *hp = NULL, *ep = NULL, *st;

  if (argc > 1) {
    if ((fp = fopen(argv[1], "r")) == NULL) {
      perror("fopen");
      exit(1);
    }
  }

  startlex(fp);

  // Main parse loop:
  while(_cur.tok != T_EOI) {
    st = parse_stmt();
    if (hp == NULL) hp = ep = st;
    else {
      ep->next = st;
      ep = st;
    }
  }

  if (fp != stdin) fclose(fp);

  // At this point we can either "run" or "compile" the AST:


Next we add two following two functions:

void match(token_t tok)
{
  if (tok == _cur.tok) _cur.tok = lex(&_cur.value, _cur.buf);
  else {
    printf("Syntax error. Expected %s, got %s\n", tokstr[tok], tokstr[_cur.tok]);
    exit(1);
  }
}

int accept(token_t tok)
{
  if (_cur.tok == tok) {
    match(tok);
    return TRUE;
  }
  return FALSE;
}

The match function expects the current token to match the token value passed to it and then moves to the next token, bailing out of the program with a syntax error (this should really be improved to show at least the line number the error occurred on.) tokstr is the array of strings representing each token. The match function will be the only way to advance the token in the lang.c file.

The accept function is almost exactly analogous to the lexers next function except for tokens rather than characters.

Then we add a function to allocate a new statement node of a particular type (while or if statements need to have their sub-structure allocated as well):

stnode_t *new_stnode(stmt_t type)
{
  stnode_t *n = malloc(sizeof(stnode_t));
  n->type = type;
  switch(type) {
    case ST_WHILE:
      n->w = malloc(sizeof(struct while_st));
      break;
    case ST_IF:
      n->i = malloc(sizeof(struct if_st));
      break;
    default:
      break;
  }
  n->next = NULL;
  return n;
}

What follows is the full parse_stmt() code, to see how it works lets consider just the if / if-else case:

// The grammar (from interpreter2):
//  "if" "(" expression ")"  statement [ "else" statement ]

    case T_IF:
      match(T_IF);
      st = new_stnode(ST_IF);
      match(T_OPAREN);
      st->i->expr = expr(&empty);
      match(T_CPAREN);
      st->i->body = parse_stmt();
      st->i->elsebody = NULL;
      if (accept(T_ELSE)) {
        st->i->elsebody = parse_stmt();
      }

When we switch on the current token (_cur.tok) we'll match the if keyword and allocate a new statement node of type ST_IF, and force a match on the open parenthesis then extract an expression tree using the expr() function, which should stop at the closing parenthesis which we force a match on. At that point the current token should point to the first token of the statement body, and so we recursively call parse_stmt() to get the statement that follows the if (expression) part. After that the current token either points to an else keyword or it doesn't, we'll use the accept() function to accept an else if it is there. A second recursive call to parse_stmt() is made only in the event that there was an else clause.

Comparing the grammar from interpreter2 we can see that match() is used to match required tokens in the grammar and accept() used for optional paths. Non-terminals in the grammar are calls to those functions that handle those non-terminals.

stnode_t *parse_stmt(void) {
  bool empty;
  stnode_t *hp = NULL, *ep = NULL, *st;

  switch(_cur.tok) {
    case T_WHILE:
      match(T_WHILE);
      st = new_stnode(ST_WHILE);
      match(T_OPAREN);
      st->w->expr = expr(&empty);
      match(T_CPAREN);
      st->w->body = parse_stmt();
      break;
    case T_IF:
      match(T_IF);
      st = new_stnode(ST_IF);
      match(T_OPAREN);
      st->i->expr = expr(&empty);
      match(T_CPAREN);
      st->i->body = parse_stmt();
      st->i->elsebody = NULL;
      if (accept(T_ELSE)) {
        st->i->elsebody = parse_stmt();
      }
      break;
    case T_PRINT:
      match(T_PRINT);
      st = new_stnode(ST_PRINT);
      match(T_OPAREN);
      st->expr = expr(&empty);
      match(T_CPAREN);
      match(T_EOS);
      break;
    case T_BREAK:
      match(T_BREAK);
      st = new_stnode(ST_BREAK);
      match(T_EOS);
      break;
    case T_CONTINUE:
      match(T_CONTINUE);
      st = new_stnode(ST_CONTINUE);
      match(T_EOS);
      break;
    case T_OCBRACE:
      match(T_OCBRACE);
      while (!accept(T_CCBRACE)) {
        st = parse_stmt();
        if (hp == NULL) hp = ep = st;
        else {
          ep->next = st;
          ep = st;
        }
      }
      st = hp;
      break;
    default:
      st = new_stnode(ST_EXPRESSION);
      st->expr = expr(&empty);
      match(T_EOS);
      break;
  }

  return st;
}


In the next lesson we'll make a function to recursively walk the AST (Abstract Syntax Tree) and expression trees to either "run" the code immediately or generate assembly code.