Logo  

CS456 - Systems Programming

The strtok(3) C library function:

#include <string.h>
char *strtok(char *str, const char *delim);

The strtok() C library function returns the next sub-string portion (token) of the original string or NULL when there are no more tokens to return (the end of the string was reached.) The first invocation of strtok() the first parameter should be the string to be tokenized (which will be modified by strtok()), subsequent calls to strtok() should replace the first parameter with NULL to indicate to strtok to fetch the next token from the initial string.

An example of a typical strtok() loop, splitting a string into words separated by spaces or tabs:

char *token = strtok(string, " \t\n");
for ( ; token != NULL; token = strtok(NULL, " \t\n")) {
  // do something with the "token":
  ...
}

A strtok() example:

#include <stdio.h>
#include <unistd.h>
#include <string.h>

#define K   1024

/**
 * Uses the strtok() function to split the string in buf into individual
 * strings.
 */
void tokenize(char *buf)
{
  int w = 0;
  char *token = strtok(buf, " \t\n");
  for(; token != NULL; token = strtok(NULL, " \t\n")) {
    printf("%d = '%s'\n", w++, token);
  }
}

int main(void)
{
  int r;
  char buf[K+1];

  /**
   * Read a line of text from stdin using the read syscall.
   * Using fgets() would probably be better.
   */
  while ( (r = read(STDIN_FILENO, buf, K)) > 0) {
    // Null terminate the string, because read() does not do this.
    buf[r] = '\0';
    tokenize(buf);
  }

  return 0;
}

Constructing an array of strings using strtok():

char **tokenize(char *buf)
{
  int w = 0, mw;
  char **words = malloc(sizeof(char *) * (mw = 10));

  char *token = strtok(buf, " \t\n");
  for(; token != NULL; token = strtok(NULL, " \t\n")) {
    if (w == mw-1)
      words = realloc(words, sizeof(char *) * (mw += 10));
    words[w++] = strdup(token);
  }
  return words;
}

Implementing strtok():

int isdelim(char c, char *delim)
{
  while (*delim)
    if (c == *(delim++)) return 1;
  return 0;
}

char *mystrtok(char *str, char *delim)
{
  static char *p;
  char *s;

  if (str != NULL) p = str;

  while(*p && isdelim(*p,delim)) p++;
  if (*p == '\0') return NULL;

  s = p++;
  while(*p && !isdelim(*p,delim)) p++;
  if (*p) {
    *(p++) = '\0';
  }
  return s;
}