Logo  

CS456 - Systems Programming

Interpreting part 3

expr.c

This file is the shunting yard algorithm for evaluating an expression we change the main function to expr to evaluate an expression and the return type is modified to return the following enode structures:

typedef struct enode enode_t;

struct enode {
  token_t op;
  union {
    int64_t value;
    struct var *var;
  };
  enode_t *left, *right;
};


Here we use an anonymous union to hold either a value or a pointer to a variable depending on whether the op is a T_NUMBER or T_IDENTIFIER.

In the expr function we need to make the following changes:

  1. Use _cur.tok for the current token everywhere tok would have been used.

  2. Change the main while loop to a do-while loop to only call the lexer after a token has been accepted.

    do {
      switch(_cur.tok) {
       ...
      }
    } while (!stop && ((_cur.tok = lex(&_cur.value,_cur.buf)) != T_EOI));
  1. Add a stop Boolean (as shown above) to indicate when to stop processing the expression. This will be set to true on any token not normally handled by an expression or a closing parenthesis that has no matching open parenthesis.

  2. Push an enode to a enode stack for numbers and variables, add some functions to create a new enode or free it. e_zero is a predefined T_NUMBER enode with the value of zero that is used for the left side of a unary expression.

enode_t e_zero = {.op = T_NUMBER, .value = 0, .left = NULL, .right = NULL};

enode_t *new_enode(token_t tok, int64_t value)
{
  enode_t *e = malloc(sizeof(enode_t));
  e->op = tok;
  e->value = value;
  e->left = e->right = NULL;
  return e;
}

void free_enode(enode_t *e)
{
  if (e->left) free_enode(e->left);
  if (e->right) free_enode(e->right);
  if (e != &e_zero) free(e);
}

  1. Modify the action() function to pop/push enodes instead of values. We can perform opportunistic constant folding if both the right and left enodes are of T_NUMBER types, perform the operation and push a T_NUMBER result instead of constructing a new expression sub-tree node and pushing that.

The expr() function:

Almost all the necessary code changes are in the outer loop structure and the first four cases which switch from pushing numbers/values to enodes and keeping track of the ()'s. empty is a boolean that will be true if the enode stack is empty after the final result has been pop'ed off the stack, this would be used for error checking to make sure the expression is valid, however we'll mostly ignore it for this version of the program. It could just be removed and an error thrown if esp != 0 prior to returning the enode.

enode_t *expr(bool *empty)
{
  enode_t *e;
  int stop = 0, parens = 0, p, d;

  flag_t flag = NUMBER;
  // Reset the stacks:
  osp = esp = 0;

  do {
    switch(_cur.tok) {
      case T_NUMBER:
        if (flag != NUMBER) die("Syntax error");
        push_enode(new_enode(T_NUMBER, _cur.value));
        flag = OPERATOR;
        break;
      case T_IDENTIFIER:
        e = new_enode(_cur.tok, 0);
        e->var = lookup(_cur.buf);
        push_enode(e);
        flag = OPERATOR;
        break;
      case T_OPAREN:
        parens++;   // keep track of parens
        push_op(T_OPAREN, 0, 0, 0);
        flag = NUMBER;
        break;
      case T_CPAREN:
        // A close parenthesis may signal the end of the expression:
        if (parens-- == 0) {
          stop = 1;
          break;
        }
        // Drain the op stack until it reaches it's matching open paren:
        while ((_cur.tok = peek_op()) != T_UNKNOWN) {
          if (_cur.tok == T_OPAREN) break;
          action();
        }
        if (_cur.tok == T_UNKNOWN) die("Mismatched parenthesis.");
        // Removes the opening paren:
        pop_op();
        flag = OPERATOR;
        break;
      case T_PLUS:
      case T_MINUS:
      case T_MULT:
      case T_DIV:
      case T_MOD:
      case T_OR:
      case T_AND:
      case T_LT:
      case T_GT:
      case T_EQUAL:
      case T_ASSIGN:
        // Get the precedence and direction of the current operator:
        p = (flag == NUMBER)? uprec[_cur.tok] : prec[_cur.tok];
        if (p == -1) die("Malformed expression, operator not a valid unary operator.");
        d = (flag == NUMBER)? udir[_cur.tok] : dir[_cur.tok];

        // Drain the op stack of operators of higher precedence (and equal to if
        // this op is left-assoc):
        if (d == 1) {   // Right associative
          while (peek_prec() > p)
            action();
        } else {    // Left associative
          while (peek_prec() >= p)
            action();
        }
        // Finally push the operator to the op stack:
        push_op(_cur.tok, flag==NUMBER, p, d);
        flag = NUMBER;
        break;
      default:
        stop = 1;
        break;
    }  
  } while (!stop && ((_cur.tok = lex(&_cur.value,_cur.buf)) != T_EOI));

  // Drain the operator stack until it's empty:
  while(peek_op() != T_UNKNOWN)
    action();

  e = pop_enode();
  // The result of the entire expression should be the last and only value left:
  *empty = (esp == 0);
  return e;
}


The action() function:

void action(void)
{
  struct op op;
  enode_t *l, *r;
  int64_t n;

  // Get the operator:
  op = pop_op();

  // Right side is at the top of the stack, left would be underneath it,
  // unless it's a unary op
  r = pop_enode();
  if (op.unary == FALSE) l = pop_enode();
  else l = &e_zero;

  if (uprec[op.op] == -1 && op.unary)
    die("Malformed unary expression in action.");

  // Perform constant folding if both sides of the operator are constants, this
  // is where we normally would have performed the action.
  if (l->op == T_NUMBER && r->op == T_NUMBER) {
    switch(op.op) {
      case T_PLUS:  n = l->value  + r->value; break;
      case T_MINUS: n = l->value  - r->value; break;
      case T_MULT:  n = l->value  * r->value; break;
      case T_DIV:   n = l->value  / r->value; break;
      case T_MOD:   n = l->value  % r->value; break;
      case T_OR:    n = l->value || r->value; break;
      case T_AND:   n = l->value && r->value; break;
      case T_LT:    n = l->value  < r->value; break;
      case T_GT:    n = l->value  > r->value; break;
      case T_EQUAL: n = l->value == r->value; break;
      default:
        break;
    }
    // Free the original enodes and push the folded result:
    free_enode(l);
    free_enode(r);
    push_enode(new_enode(T_NUMBER, n));
    return;
  }

  enode_t *e = new_enode(op.op, 0);
  e->left = l;
  if (op.op == T_ASSIGN && l->op != T_IDENTIFIER) die("Left side of assignment is not an lval.\n");
  e->right = r;
  push_enode(e);

  return;
}