/*
 *  A lexical analyzer for the C language.
 *  John Viega
 *
 *  Jul 28, 1999
 *
 *  Lexical Analyzer based on the reference manual in K+R, 2nd Edition.
 *  Should handle all ANSI C just fine.  I think this also happens to 
 *  scan all valid ANSI C++.
 *
 *  This program is a big state machine.  Often we will jump from
 *  state to state with a goto.  IMHO, the gotos aren't a big deal.
 *  However, this file remains pretty unreadable for other reasons.
 *  I'm basically implementing a fully compliant ANSI C(++) lexer 
 *  (minus some non-important stuff) in a single pass.  Usually it's
 *  done in multiple passes, probably for clarity's sake...
 *  I was going for minimizing time of implementation, myself.
 *  Multiple passes would have taken me a lot longer to get right wrt
 *  line numbers in some situations.
 *  As a result, there are some gross things such as checking for \ 
 *  followed by a newline EVERYWHERE.
 *  
 *  What isn't done:
 *    <><><> Necessary for "correct" functionality for this app <><><>
 *
 *  - Trigraphs need to be handled.
 *      ??= #
 *      ??/ \
 *      ??' ^
 *      ??( [
 *      ??) ]
 *      ??! |
 *      ??< {
 *      ??> }
 *      ??- ~
 *      ??? supposedly goes to ?
 *  - trigraphs are supposed to work in a string, bigraphs no.
 *  - I sometimes make an assumption that EOF is going to come after a \n.
 *
 *    <><><> Desirable, but not necessary <><><>
 *  - Wide strings are processed, but treated like regular strings.
 *  - Keywords are treated as identifiers.  That's OK for my purposes, tho.
 *  - See also "TODO:" items in place.  
 *
 *
 *  Jan 25, 2000:
 *  Apparently, gcc allows $ in identifiers.  Might as well recognize it.
 *
 *  Feb 1,  2000:
 *  Go ahead and treat \r as whitespace in the same context as \t, \n, etc.
 */

#include "lex.H"
#include "fatal.H"
#include "config.H"
#include <ctype.h>  // For isalanum(x)
#include <stdio.h>

Lex::~Lex()
{
  if(free_input)
    delete[] input;
  if(str)
    delete[] str;
  if(token_box)
    delete token_box;
  if(comment_box)
    delete comment_box;
}

Lex::Lex(FILE *f, char *srcid)
{
  return_on_error = 0;
  free_input = 1;
  const size_t file_incr = 1024*10; // alloc 10K at a time.
  char *buf = new char[file_incr];
  if(!buf)
    OutOfMemory();
  char *buf_pos = buf;
  long size = 0;

  while(1)
    {
      size_t s = fread(buf_pos, sizeof(char), file_incr, f); 
      size += s;
      if(s != file_incr)
	{
	  if(feof(f) > 0)
	    {
	      Init(buf, size, srcid, 1);
	      return;
	    }
	  else
	    {
	      fprintf(stderr, "%s" NEWLINE, strerror(ferror(f)));
	      fprintf(stderr, "End of file not reached.  Progressing anyway."
		      NEWLINE);
	      Init(buf, size, srcid, 1);
	      return;
	    }
	}
      char *tmp = new char[file_incr + size];
      if(!tmp)
	OutOfMemory();
      buf_pos = &tmp[size];
      memcpy(tmp, buf, size);
      delete[] buf;
      buf = tmp;
    }
}

Lex::Lex(char* s, long len, char *srcid, unsigned int l, int err)
{
  free_input = 0;
  return_on_error = err;
  Init(s, len, srcid, l);
}

void Lex::Init(char *s, long len, char *srcid, int l)
{
  input = s;
  input_size = len;
  source_id = srcid;
  token_box = new TokenContainer();
  if(!token_box)
    OutOfMemory();
  comment_box = new TokenContainer();
  if(!comment_box)
    OutOfMemory();
  str_pos = 0;
  str_len = 0;
  pos = 0;
  lineno_offset = 0;
  comment_lineno_offset = 0;
  str = NULL;
  lineno = l;
  Scan();
}

int Lex::LexCComment()
{
  int t;
  StartCComment();
  while (1)
    { 
      t = GetChar(); 
      switch(t)
	{
	case EOF:
	  return 0;
	case '\n':
	  AddCharToComment(t);
	  lineno++;
	  comment_lineno_offset++;
	  lineno_offset++;
	  continue;
	case '*':
	  if ((t = GetChar()) == '/') 
	    {
	      EndComment();
	      return 1;
	    }
	  else
	    {
	      AddCharToComment('*');
	      UngetChar(t);
	      continue;
	    }
	default:
	  AddCharToComment(t);
	  continue;
	}
    }
}

void Lex::LexCPPComment()
{
  int t;

  StartCPPComment();
  while(1)
    {
      switch (t=GetChar())
	{
	case EOF:
	  return;
	case '\\':
	  switch(t=GetChar())
	    {
	    case '\n':
	      lineno++;
	      comment_lineno_offset++;
	      lineno_offset++;
	      continue;
	    default:
	      UngetChar(t);
	      AddCharToComment('\\');
	      continue;
	    }
	case '\n':
	  EndComment();
	  lineno++;
          /* Do this for the following:
           * x = x + // foo
           * #if 1
           * 2;
           * #endif
	   */
	  return;
	default:
	  AddCharToComment(t);
	  continue;
	}
    }
}

void Lex::StartHexChr(char c)
{
  if(isdigit(c))
    {
      chr_val = c - '0';
    }
  else if(islower(c))
    {
      chr_val = c - 'a' + 10;
    }
  else
    {
      chr_val = c - 'A' + 10;
    }
}

void Lex::AddHexChr(char c)
{
  if(isdigit(c))
    {
      chr_val = (chr_val << 4) + (c - '0');
    }
  else if(islower(c))
    {
      chr_val = (chr_val << 4) + (c - 'a' + 10);
    }
  else
    {
      chr_val = (chr_val << 4) + (c - 'A' + 10);
    }
}

void Lex::EndHexChr()
{
  GenChr(chr_val);
}

void Lex::StartOctChr(char c)
{
  chr_val = c - '0';
}

void Lex::AddOctChr(char c)
{
  chr_val = (chr_val << 3) + (c - '0');
}

void Lex::EndOctChr()
{
  GenChr(chr_val);
}

void Lex::StartIdentifier(char ch)
{
  AddCharToStr(ch);
}

void Lex::ContinueIdentifier(char ch)
{
  AddCharToStr(ch);
}

void Lex::EndIdentifier()
{
  str[str_pos++] = '\0';
  char *tmp = new char[str_pos];
  if(!tmp)
    OutOfMemory();
  strncpy(tmp, str, str_pos);
  IdTok *tok = new IdTok(tmp, str_pos, lineno-lineno_offset, lineno);
  if(!tok)
    OutOfMemory();
  token_box->Add(tok);
  lineno_offset = 0;
  str_pos = 0;
}

void Lex::StartHexNum()
{
  real = 0;
  num_val = 0;
  looks_octal = 0;
  unsigned_flag = 0;
  long_flag = 0;
}

void Lex::AddHexDigit(char c)
{
  if(isdigit(c))
    {
      num_val = (num_val << 4) + (c - '0');
    }
  else if(islower(c))
    {
      num_val = (num_val << 4) + (c - 'a' + 10);
    }
  else
    {
      num_val = (num_val << 4) + (c - 'A' + 10);
    }
}

void Lex::EndNum()
{
  if(!real)
    {
      IntegerTok *tok = new IntegerTok( looks_octal ? oct_val : num_val, 
				     unsigned_flag, long_flag, 
				     lineno-lineno_offset);
      if(!tok)
	OutOfMemory();
      token_box->Add(tok);
      lineno_offset = 0;
    }
  else
    {
      RealTok *tok = new RealTok(num_val, mant_val, 
			       exp_neg_flag ? -exp : exp, 
			       float_flag ? FLOAT : (long_flag ? LONG_DOUBLE
						     : DOUBLE),
			       lineno - lineno_offset);
      if(!tok)
	OutOfMemory();
      token_box->Add(tok);
      lineno_offset = 0;
    }
}

void Lex::StartBase10OrLowerNum(char c)
{
  real = 0;
  mant_val = 0;
  exp = 0;
  unsigned_flag = 0;
  long_flag = 0;
  float_flag = 0;
  exp_neg_flag = 0;
  num_val = 0;


  switch(c)
    {
    case '.':
      real = 1;
      return;
    case '0':
      looks_octal = 1;
      oct_val = 0;
      return;
    default:
      looks_octal = 0;
      num_val = c - '0';
      return;
    }

}

void Lex::BeginExponent(char c)
{
  switch(c)
    {
    case '+':
      return;
    case '-':
      exp_neg_flag = 1;
      return;
    default:
      exp = c - '0';
    }
}

void Lex::AddExponent(char c)
{
  exp = exp * 10 + (c - '0');
}

void Lex::AddOctDigit(char c)      
{
  // In case it turns out to be a float.
  num_val = (num_val*10) + (c - '0');
  oct_val = (oct_val << 3) + (c - '0');
  
}

void Lex::AddDecDigit(char c)
{
  long int *which_val;
  if(c == '.')
    {
      real = 1;
      return;
    }
  if(real == 0)
    {
      which_val = &num_val;
    }
  else
    {
      which_val = &mant_val;
    }
  *which_val = *which_val * 10 + (c - '0');
}

void Lex::MakeLong()
{
  long_flag = 1;
}

void Lex::MakeUnsigned()
{
  unsigned_flag = 1;
}

void Lex::MakeFloat()    
{
  float_flag = 1;
}

void Lex::GenChr(long c)
{
  CharTok *tok = new CharTok(c, lineno-lineno_offset);
  if(!tok)
    OutOfMemory();
  token_box->Add(tok);
  lineno_offset = 0;
}

void Lex::AddCharToComment(char c)
{
  AddCharToStr(c);
}

void Lex::AddCharToStr(char c)
{
  if(str_pos == str_len) 
    {
      char *tmp = new char[str_len + BUFFER_SIZE];
      if(!tmp)
	OutOfMemory();
      memcpy(tmp, str, str_pos);
      str_len += BUFFER_SIZE;
      if(str) delete[] str;
      str = tmp;
    }
  str[str_pos++] = c;
}

void Lex::EndStr()
{
  if(!str_pos && !str)
  {
    str = new char[1];
    if(!str)
      OutOfMemory();
    str[0] = 0;
  }
  str[str_pos++] = 0;
  char *tmp = new char[str_pos];
  if(!tmp)
    OutOfMemory();
  strncpy(tmp, str, str_pos);
  StringTok *tok = new StringTok(tmp, str_pos-1, lineno - lineno_offset);
  if(!tok)
    OutOfMemory();
  token_box->Add(tok);
  lineno_offset = 0;
  str_pos = 0;
}

void Lex::EndComment()
{
  if(!str_pos && !str)
    {
      str = new char[1];
      if(!str)
	OutOfMemory();
      str[0] = 0;
    }
  str[str_pos++] = 0;
  char *tmp = new char[str_pos];
  if(!tmp)
    OutOfMemory();

  strncpy(tmp, str, str_pos);
  CommentTok *tok = new CommentTok(tmp, str_pos, lineno-lineno_offset,
				   cpp_comment, token_box->GetCurrentSize(), 
				   lineno, 0);
  if(!tok)
    OutOfMemory();
  token_box->Add(tok);
  // In the comment box, we don't free the string, so we pass the 1 param
  // to say this.
  tok = new CommentTok(tmp, str_pos, lineno-lineno_offset, cpp_comment,
		       token_box->GetCurrentSize(), lineno, 1);
  comment_box->Add(tok);
  comment_lineno_offset = 0;
  str_pos = 0;
}

void Lex::GenOp(char *s)
{
  OperatorTok *tok = new OperatorTok(s, lineno - lineno_offset);
  if(!tok)
    OutOfMemory();
  token_box->Add(tok);
  lineno_offset = 0;
}

int Lex::GetChar()
{
  if(input_size <= pos) 
    {
      return EOF;
    }
  return input[pos++];
}

void Lex::UngetChar(int c)
{
  if(c == EOF)
    return;
  if(pos <= 0)
    {
      return;
    }
  input[--pos] = c;
}

// Return 1 if we found junk, 0 otherwise.
int Lex::LexPreprocessorStuff() 
{
  char t;
  while(1)
    {
      switch(t = GetChar())
	{
	case '/':
	try_again:
	  switch(t = GetChar())
	    {
	    case '\\':
	      switch(t = GetChar())
		{
		case '\n':
		  lineno++;
		  // Don't lineno_offset++; not in a token yet.
		  goto try_again;
		default:
		  UngetChar(t);
		  UngetChar('\\');
		  UngetChar('/');
		  return 0;
		}
	    case '/':
	      LexCPPComment();
	      return 0;
	    case '*':
	      if(!LexCComment())
		{
		  // TODO: Make sure this is ok behavior.
		  if(return_on_error)
		    return 0;
		  fprintf(stderr, "%s: Error: Unterminated comment." NEWLINE, 
			  source_id); 
		  exit(0); 

		}
	      else
		{
		  continue;
		}
	    default:
	      UngetChar(t);
	      UngetChar('/');
	      return 0;
	    }
	case '\\':
	  switch(t = GetChar())
	    {
	    case '\n':
	      lineno++;
	      // Don't lineno_offset++; not in a token yet.
	      continue;
	    default:
	      UngetChar(t);
	      UngetChar('\\');
	      return 0;
	    }
	case '#':
	  goto remove_directive;
	case ' ':
	case '\t':
	case '\v':
	case '\r':
	case '\f':
	  continue;
	default:
	  UngetChar(t);
	  return 0;
	}
    }

 remove_directive:
  Token *tok = new PreprocStartToken(lineno-lineno_offset);
  if(!tok) OutOfMemory();
  token_box->Add(tok);
  lineno_offset = 0;
  int old_return_on_error = return_on_error;
  return_on_error = 1;
  ScanLine();
  return_on_error = old_return_on_error;
  // Preprocessor guy ended on the previous line, since ScanLine
  // bumped it up by 1.
  tok = new PreprocEndToken(lineno-lineno_offset-1);
  if(!tok) OutOfMemory();
  token_box->Add(tok);
  lineno_offset = 0;
  return 1;
}

void Lex::Scan() 
{
  do
    {
      while(LexPreprocessorStuff());
    }while(ScanLine());
}

int Lex::ScanLine()
{
  char t, t2;

 next:
  t = GetChar();

  switch(t){
  case EOF:
    return 0;
  case '\n':
    lineno++;
    return 1;
  case '\t':
  case '\v':
  case '\r':
  case '\f':
  case ' ':
    goto next;
  case ';':
    GenOp(";");
    goto next;
  case '/':
  slash_start:
    switch(t = GetChar())
      {
      case '\\':
	switch(t = GetChar())
	  {
	  case '\n':
	    lineno++;
	    lineno_offset++;
	    goto slash_start;
	  default:
	    UngetChar(t);
	    UngetChar('\\');
	    GenOp("/");
	    goto next;
	  }
      case '*':
	if(!LexCComment())
	  goto unterminatedCommentError;
	else
	  {
	    lineno_offset = 0;
	    goto next;
	  }
      case '/':
	LexCPPComment();
	lineno_offset = 0;
	return 1;
      case '=':
	GenOp("/=");
	goto next;
      default:
	UngetChar(t);
	GenOp("/");
	goto next;
      }
  case '-':
  minus_start:
    switch(t = GetChar())
      {
      case '\\':
	switch(t = GetChar())
	  {
	  case '\n':
	    lineno++;
	    lineno_offset++;
	    goto minus_start;
	  default:
	    UngetChar(t);
	    UngetChar('\\');
	    GenOp("-");
	    goto next;
	  }

      case '-':
	GenOp("--");
	goto next;
      case '>':
	GenOp("->");
	goto next;
      case '=':
	GenOp("-=");
	goto next;
      default:
	UngetChar(t);
	GenOp("-");
	goto next;
      }
  case '+':
  plus_start:
    switch(t = GetChar())
      {
      case '\\':
	switch(t = GetChar())
	  {
	  case '\n':
	    lineno++;
	    lineno_offset++;
	    goto plus_start;
	  default:
	    UngetChar(t);
	    UngetChar('\\');
	    GenOp("+");
	    goto next;
	  }
      case '+':
	GenOp("++");
	goto next;
      case '=':
	GenOp("+=");
	goto next;
      default:
	UngetChar(t);
	GenOp("+");
	goto next;
      }
  case '*':
  star_start:
    switch(t = GetChar())
      {
      case '\\':
	switch(t = GetChar())
	  {
	  case '\n':
	    lineno++;
	    lineno_offset++;
	    goto star_start;
	  default:
	    UngetChar(t);
	    UngetChar('\\');
	    GenOp("*");
	    goto next;
	  }
      case '=':
	GenOp("*=");
	goto next;
      default:
	UngetChar(t);
	GenOp("*");
	goto next;
      }
  case '%':
  mod_start:
    switch(t = GetChar())
      {
      case '\\':
	switch(t = GetChar())
	  {
	  case '\n':
	    lineno++;
	    lineno_offset++;
	    goto mod_start;
	  default:
	    UngetChar(t);
	    UngetChar('\\');
	    GenOp("%");
	    goto next;
	  }
      case '=':
	GenOp("%=");
	goto next;
      default:
	UngetChar(t);
	GenOp("%");
	goto next;
      }
  case '>':
  gt_start:
    switch (t = GetChar())
      {
      case '\\':
	switch(t = GetChar())
	  {
	  case '\n':
	    lineno++;
	    lineno_offset++;
	    goto gt_start;
	  default:
	    UngetChar(t);
	    UngetChar('\\');
	    GenOp(">");
	    goto next;
	  }
      case '=':
	GenOp(">=");
	goto next;
      case '>':
      sr_start:
	switch (t = GetChar())
	  {
	  case '\\':
	    switch(t = GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		goto sr_start;
	      default:
		UngetChar(t);
		UngetChar('\\');
		GenOp(">>");
		goto next;
	      }
	  case '=':
	    GenOp(">>=");
	    goto next;
	  default:
	    UngetChar (t);
	    GenOp(">>");
	    goto next;
	  }
      default:
	UngetChar (t);
	GenOp(">");
	goto next;
      }
  case '<':
  lt_start:
    switch (t = GetChar())
      {
      case '\\':
	switch(t = GetChar())
	  {
	  case '\n':
	    lineno++;
	    lineno_offset++;
	    goto lt_start;
	  default:
	    UngetChar(t);
	    UngetChar('\\');
	    GenOp("<");
	    goto next;
	  }
      case '=':
	GenOp("<=");
	goto next;
      case '<':
      sl_start:
	switch (t = GetChar())
	  {
	  case '\\':
	    switch(t = GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		goto sl_start;
	      default:
		UngetChar(t);
		UngetChar('\\');
		GenOp("<<");
		goto next;
	      }
	  case '=':
	    GenOp("<<=");
	    goto next;
	  default:
	    UngetChar (t);
	    GenOp("<<");
	    goto next;
	  }
      default:
	UngetChar (t);
	GenOp("<");
	goto next;
      }
  case '=':
  equals_start:
    switch(t = GetChar())
      {
      case '\\':
	switch(t = GetChar())
	  {
	  case '\n':
	    lineno++;
	    lineno_offset++;
	    goto equals_start;
	  default:
	    UngetChar(t);
	    UngetChar('\\');
	    GenOp("=");
	    goto next;
	  }
      case '=':
	GenOp("==");
	goto next;
      default:
	UngetChar(t);
	GenOp("=");
	goto next;
      }
  case '!':
  bang_start:
    switch(t = GetChar())
      {
      case '\\':
	switch(t = GetChar())
	  {
	  case '\n':
	    lineno++;
	    lineno_offset++;
	    goto bang_start;
	  default:
	    UngetChar(t);
	    UngetChar('\\');
	    GenOp("!");
	    goto next;
	  }
      case '=':
	GenOp("!=");
	goto next;
      default:
	UngetChar(t);
	GenOp("!");
	goto next;
      }
  case '^':
  xor_start:
    switch(t = GetChar())
      {
      case '\\':
	switch(t = GetChar())
	  {
	  case '\n':
	    lineno++;
	    lineno_offset++;
	    goto xor_start;
	  default:
	    UngetChar(t);
	    UngetChar('\\');
	    GenOp("^");
	    goto next;
	  }
      case '=':
	GenOp("^=");
	goto next;
      default:
	UngetChar(t);
	GenOp("^");
	goto next;
      }
  case '|':
  or_start:
    switch(t = GetChar())
      {
      case '\\':
	switch(t = GetChar())
	  {
	  case '\n':
	    lineno++;
	    lineno_offset++;
	    goto or_start;
	  default:
	    UngetChar(t);
	    UngetChar('\\');
	    GenOp("|");
	    goto next;
	  }
      case '=':
	GenOp("|=");
	goto next;
      case '|':
	GenOp("||");
	goto next;
      default:
	UngetChar(t);
	GenOp("|");
	goto next;
      }
  case '&':
  and_start:
    switch(t = GetChar())
      {
      case '\\':
	switch(t = GetChar())
	  {
	  case '\n':
	    lineno++;
	    lineno_offset++;
	    goto and_start;
	  default:
	    UngetChar(t);
	    UngetChar('\\');
	    GenOp("&");
	    goto next;
	  }
      case '=':
	GenOp("&=");
	goto next;
      case '&':
	GenOp("&&");
	goto next;
      default:
	UngetChar(t);
	GenOp("&");
	goto next;
      }

    // Pretend # is an operator, as it probably indicates that we're parsing
    // a macro definition with some stringize crap.
    // It doesn't really hurt for the scanner to be this liberal.
  case '#':
    GenOp("#");
    goto next;
  case '~':
    GenOp("~");
    goto next;
  case '(':
    GenOp("(");
    goto next;
  case ')':
    GenOp(")");
    goto next;
  case '{':
    GenOp("{");
    goto next;
  case '}':
    GenOp("}");
    goto next;
  case '[':
    GenOp("[");
    goto next;
  case ']':
    GenOp("]");
    goto next;
  case ',':
    GenOp(",");
    goto next;
  case ':':
    GenOp(":");
    goto next;
  case '?':
    GenOp("?");
    goto next;
  case '"':
  stringpart:
    switch(t = GetChar())
      {
      case EOF:
	goto eofError;
      case '\\':
	switch(t = GetChar())
	  {
	  case EOF:
	    goto eofError;
	  case '\n':
	    lineno++;
	    lineno_offset++;
	    goto stringpart;
	  default:
	    AddCharToStr('\\');
	    AddCharToStr(t);
	    goto stringpart;
	  }
      case '"':
	EndStr();
	goto next;
      default:
	AddCharToStr(t);
	goto stringpart;
      }
  case '\'':
  char_const:
  switch(t = GetChar())
    {
    case '\\':
      switch(t = GetChar())
	{
	case '\n':
	  lineno++;
	  lineno_offset++;
	  goto char_const;
	case '\\':
	case '\'':
	  if(GetChar() != '\'')
	    {
	      goto errBadToken;
	    }
	  GenChr(t);
	  goto next;
	case 'x':
	case 'X':
	reading_hex_char:
	  switch(t = GetChar())
	    {
	    case '\\':
	      switch(t = GetChar())
		{
		case '\n':
		  lineno_offset++;
		  lineno++;
		  goto reading_hex_char;
		default:
		  goto errBadToken;
		}

	    case '0':
	    case '1':
	    case '2':
	    case '3':
	    case '4':
	    case '5':
	    case '6':
	    case '7':
	    case '8':
	    case '9':
	    case 'a':
	    case 'A':
	    case 'b':
	    case 'B':
	    case 'c':
	    case 'C':
	    case 'd':
	    case 'D':
	    case 'e':
	    case 'E':
	    case 'f':
	    case 'F':
	      StartHexChr(t);
	    hex:
	      switch(t = GetChar())
		{
		case '\\':
		  switch(t = GetChar())
		    {
		    case '\n':
		      lineno_offset++;
		      lineno++;
		      goto hex;
		    default:
		      goto errBadToken;
		    }

		case '0':
		case '1':
		case '2':
		case '3':
		case '4':
		case '5':
		case '6':
		case '7':
		case '8':
		case '9':
		case 'a':
		case 'A':
		case 'b':
		case 'B':
		case 'c':
		case 'C':
		case 'd':
		case 'D':
		case 'e':
		case 'E':
		case 'f':
		case 'F':
		  AddHexChr(t);
		  goto hex;
		default:
		  if(t != '\'')
		    {
		      goto errBadToken;
		    }
		  EndHexChr();
		  goto next;
		}
	    default:
	      goto errBadToken;
	    }
	case '0':    
	case '1':    
	case '2':    
	case '3': 
	case '4':    
	case '5':    
	case '6':    
	case '7': 
	  StartOctChr(t);
	start_octal_char:
	  switch (t = GetChar())
	    { 
	    case '\\':
	      switch(t = GetChar())
		{
		case '\n':
		  lineno++;
		  lineno_offset++;
		  goto start_octal_char;
		default:
		  goto errBadToken;
		}
	    case '\'': 
	      EndOctChr();
	      goto next; 
	    case '0':        
	    case '1':        
	    case '2':        
	    case '3': 
	    case '4':        
	    case '5':        
	    case '6':        
	    case '7': 
	      AddOctChr(t); 
	    grab_octal_char:
	      switch (t = GetChar()) 
		{ 
		case '\\':
		  switch(t = GetChar())
		    {
		    case '\n':
		      lineno++;
		      lineno_offset++;
		      goto grab_octal_char;
		    default:
		      goto errBadToken;
		    }
		case '\'': 
		  EndOctChr();
		  goto next; 
		case '0':    
		case '1':    
		case '2':    
		case '3': 
		case '4':    
		case '5':    
		case '6':    
		case '7': 
		finish_octal:
		  switch(GetChar())
		    {
		    case '\'':
		      AddOctChr(t);
		      EndOctChr();
		      goto next;
		    case '\\':
		      switch(GetChar())
			{
			case '\n':
			  lineno++;
			  lineno_offset++;
			  goto finish_octal;
			default:
			  goto errBadToken;
			}
		    default:
		      goto errBadToken;
		    }
		} 
	    default: 
	      goto errBadToken; 
	    } 
	case 'n':
	finish_newline:
	switch(GetChar())
	  {
	  case '\\':
	    switch(GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		goto finish_newline;
	      default:
		goto errBadToken;
	      }
	  case '\'':
	  GenChr('\n');
	  goto next;
	  default:
	    goto errBadToken;
	  }
	case 't':
	finish_tab:
	switch(GetChar())
	  {
	  case '\\':
	    switch(GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		goto finish_tab;
	      default:
		goto errBadToken;
	      }
	  case '\'':
	  GenChr('\t');
	  goto next;
	  default:
	    goto errBadToken;
	  }
	case 'v':
	finish_vtab:
	switch(GetChar())
	  {
	  case '\\':
	    switch(GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		goto finish_vtab;
	      default:
		goto errBadToken;
	      }
	  case '\'':
	  GenChr('\v');
	  goto next;
	  default:
	    goto errBadToken;
	  }
	case 'b':
	finish_backspace:
	switch(GetChar())
	  {
	  case '\\':
	    switch(GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		goto finish_backspace;
	      default:
		goto errBadToken;
	      }
	  case '\'':
	  GenChr('\b');
	  goto next;
	  default:
	    goto errBadToken;
	  }
	case 'r':
	finish_cr:
	switch(GetChar())
	  {
	  case '\\':
	    switch(GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		goto finish_cr;
	      default:
		goto errBadToken;
	      }
	  case '\'':
	  GenChr('\r');
	  goto next;
	  default:
	    goto errBadToken;
	  }
	case 'f':
	finish_formfeed:
	switch(GetChar())
	  {
	  case '\\':
	    switch(GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		goto finish_formfeed;
	      default:
		goto errBadToken;
	      }
	  case '\'':
	  GenChr('\f');
	  goto next;
	  default:
	    goto errBadToken;
	  }
	case 'a':
	finish_alarm:
	switch(GetChar())
	  {
	  case '\\':
	    switch(GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		goto finish_alarm;
	      default:
		goto errBadToken;
	      }
	  case '\'':
	  GenChr('\a');
	  goto next;
	  default:
	    goto errBadToken;
	  }
	case '?':
	finish_qmark:
	switch(GetChar())
	  {
	  case '\\':
	    switch(GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		goto finish_qmark;
	      default:
		goto errBadToken;
	      }
	  case '\'':
	  GenChr('\?');
	  goto next;
	  default:
	    goto errBadToken;
	  }
	case '"':
	finish_quote:
	switch(GetChar())
	  {
	  case '\\':
	    switch(GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		goto finish_quote;
	      default:
		goto errBadToken;
	      }
	  case '\'':
	  GenChr('\"');
	  goto next;
	  default:
	    goto errBadToken;
	  }
	default: 
	  goto errBadToken; 
	} 
    case '\'': 
      // TODO: Is the right?
      GenChr(0);
      goto next; 
    default:         
	finish_regular:
	switch(GetChar())
	  {
	  case '\\':
	    switch(GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		goto finish_regular;
	      default:
		goto errBadToken;
	      }
	  case '\'':
	    GenChr(t);
	    goto next;
	  default:
	    goto errBadToken;
	  }
    } 
  case 'l': 
  case 'L':
  start_long:
    switch(t2 = GetChar())
      {
      case '\\':
	switch(GetChar())
	  {
	  case '\n':
	    lineno++;
	    lineno_offset++;
	    goto start_long;
	  default:
	    goto errBadToken;
	  }
      case '"':
	goto stringpart;
      case '\'':
	goto char_const;
      default:
	UngetChar(t2);
      }
  case 'a': 
  case 'b': 
  case 'c': 
  case 'd': 
  case 'e': 
  case 'f': 
  case 'g': 
  case 'h': 
  case 'i': 
  case 'j': 
  case 'k':
  case 'm': 
  case 'n':    
  case 'o': 
  case 'p': 
  case 'q': 
  case 'r': 
  case 's': 
  case 't': 
  case 'u': 
  case 'v': 
  case 'w': 
  case 'x': 
  case 'y': 
  case 'z': 
  case 'A': 
  case 'B': 
  case 'C': 
  case 'D': 
  case 'E': 
  case 'F': 
  case 'G': 
  case 'H': 
  case 'I': 
  case 'J': 
  case 'K': 
  case 'M': 
  case 'N': 
  case 'O': 
  case 'P': 
  case 'Q': 
  case 'R': 
  case 'S': 
  case 'T': 
  case 'U': 
  case 'V': 
  case 'W': 
  case 'X': 
  case 'Y': 
  case 'Z': 
  case '_': 
  case '$':
    StartIdentifier(t);
    t = GetChar();
    while (isalnum(t) || t == '_' || t == '$' || t == '\\') 
      { 
	if(t == '\\')
	  {
	    switch(t = GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		t = GetChar();
		continue;
	      default:
		UngetChar(t);
		UngetChar('\\');
		EndIdentifier();
		goto next;
	      }
	  }
	ContinueIdentifier(t);
	t = GetChar(); 
      } 
    UngetChar(t); 
    EndIdentifier();
    goto next; 
  case '0': 
    switch(t = GetChar())
      {
      case '\\':
	switch(t = GetChar())
	  {
	  case '\n':
	    lineno++;
	    lineno_offset++;
	    goto next;
	  default:
	    UngetChar(t);
	    UngetChar('\\');
	    StartBase10OrLowerNum(0);
	    EndNum();
	    goto next;
	  }
      case 'x':
      case 'X':
	StartHexNum();
      starting_hex_num:
	switch(t = GetChar())
	  {
	  case '\\':
	    switch(t = GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		goto starting_hex_num;
	      default:
		goto errBadToken;
	      }
	  case '0':
	  case '1':
	  case '2':
	  case '3':
	  case '4':
	  case '5':
	  case '6':
	  case '7':
	  case '8':
	  case '9':
	  case 'a':
	  case 'A':
	  case 'b':
	  case 'B':
	  case 'c':
	  case 'C':
	  case 'd':
	  case 'D':
	  case 'e':
	  case 'E':
	  case 'f':
	  case 'F':
	    AddHexDigit(t);
	  next_hex_digit:
	    switch(t = GetChar())
	      {
	      case '\\':
		switch(t = GetChar())
		  {
		  case '\n':
		    lineno++;
		    lineno_offset++;
		    goto next_hex_digit;
		  default:
		    EndNum();
		    UngetChar(t);
		    UngetChar('\\');
		    goto next;
		  }
	      case '0':
	      case '1':
	      case '2':
	      case '3':
	      case '4':
	      case '5':
	      case '6':
	      case '7':
	      case '8':
	      case '9':
	      case 'a':
	      case 'A':
	      case 'b':
	      case 'B':
	      case 'c':
	      case 'C':
	      case 'd':
	      case 'D':
	      case 'e':
	      case 'E':
	      case 'f':
	      case 'F':
		AddHexDigit(t);		
		goto next_hex_digit;
	      case 'l':
	      case 'L':
		MakeLong();
	      check_for_unsigned:
		switch(t = GetChar())
		  {
		  case '\\':
		    switch(t = GetChar())
		      {
		      case '\n':
			lineno++;
			lineno_offset++;
			goto check_for_unsigned;
		      default:
			EndNum();
			UngetChar(t);
			UngetChar('\\');
			goto next;
		      }
		  case 'u':
		  case 'U':
		    MakeUnsigned();
		    EndNum();
		    goto next;
		  default:
		    UngetChar(t);
		    EndNum();
		    goto next;
		  }
	      case 'u':
	      case 'U':
		MakeUnsigned();
	      check_for_long:
		switch(t = GetChar())
		  {
		  case '\\':
		    switch(t = GetChar())
		      {
		      case '\n':
			lineno++;
			lineno_offset++;
			goto check_for_long;
		      default:
			EndNum();
			UngetChar(t);
			UngetChar('\\');
			goto next;
		      }
		  case 'l':
		  case 'L':
		    MakeLong();
		    EndNum();
		    goto next;
		  default:
		    UngetChar(t);
		    EndNum();
		    goto next;
		  }
	      default:
		EndNum();
		UngetChar(t);
		goto next;
	      }
	  default:
	    goto errBadToken;
	  }
      case '0':
      case '1':
      case '2':
      case '3':
      case '4':
      case '5':
      case '6':
      case '7':
	StartBase10OrLowerNum('0');
	AddOctDigit(t);
      next_oct_digit:
	switch(t = GetChar())
	  {
	  case '\\':
	    switch(t = GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		goto next_oct_digit;
	      default:
		EndNum();
		UngetChar(t);
		UngetChar('\\');
		goto next;
	      }
	  case '0':
	  case '1':
	  case '2':
	  case '3':
	  case '4':
	  case '5':
	  case '6':
	  case '7':
	    AddOctDigit(t);
	    goto next_oct_digit;
	  case '.':
	    AddDecDigit(t);
	    goto next_mantissa_digit;
	  case 'l':
	  case 'L':
	    MakeLong();
		  check_for_unsigned_2:
	    switch(t = GetChar())
	      {
	      case '\\':
		switch(t = GetChar())
		  {
		  case '\n':
		    lineno++;
		    lineno_offset++;
		    goto check_for_unsigned_2;
		  default:
		    EndNum();
		    UngetChar(t);
		    UngetChar('\\');
		    goto next;
		  }
	      case 'u':
	      case 'U':
		MakeUnsigned();
		EndNum();
		goto next;
	      default:
		UngetChar(t);
		EndNum();
		goto next;
	      }
	  case 'u':
	  case 'U':
	    MakeUnsigned();
		  check_for_long_2:
	    switch(t = GetChar())
	      {
	      case '\\':
		switch(t = GetChar())
		  {
		  case '\n':
		    lineno++;
		    lineno_offset++;
		    goto check_for_long_2;
		  default:
		    EndNum();
		    UngetChar(t);
		    UngetChar('\\');
		    goto next;
		  }
	      case 'l':
	      case 'L':
		MakeLong();
		EndNum();
		goto next;
	      default:
		UngetChar(t);
		EndNum();
		goto next;
	      }
	  default:
	    EndNum();
	    UngetChar(t);
	    goto next;
	  }
      case '.':
	StartBase10OrLowerNum('0');
	AddDecDigit('.');
      next_mantissa_digit:
	switch(t = GetChar())
	  { 
	  case '\\':
	    switch(t = GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		goto next_mantissa_digit;
	      default:
		EndNum();
		UngetChar(t);
		UngetChar('\\');
		goto next;
	      }
	  case '0':
	  case '1':
	  case '2':
	  case '3':
	  case '4':
	  case '5':
	  case '6':
	  case '7':
	  case '8':
	  case '9':
	    AddDecDigit(t);
	    goto next_mantissa_digit;
	  case 'e':
	  case 'E':
	  collect_exponent:
	  switch(t = GetChar())
	    {
	    case '\\':
	      switch(t = GetChar())
		{
		case '\n':
		  lineno++;
		  lineno_offset++;
		  goto collect_exponent;
		default:
		  goto errBadToken;
		}
	    case '+':
	    case '-':
	    got_sign:
	      switch(t2 = GetChar())
		{
		case '\\':
		  switch(t = GetChar())
		    {
		    case '\n':
		      lineno++;
		      lineno_offset++;
		      goto got_sign;
		    default:
		      goto errBadToken;
		    }
		case '0':
		case '1':
		case '2':
		case '3':
		case '4':
		case '5':
		case '6':
		case '7':
		case '8':
		case '9':
		  UngetChar(t2);		
		  break;
		default:
		  goto errBadToken; 
		}
	    case '0':
	    case '1':
	    case '2':
	    case '3':
	    case '4':
	    case '5':
	    case '6':
	    case '7':
	    case '8':
	    case '9':
	      BeginExponent(t);
	    next_exp_digit_v1:
	      switch(t = GetChar())
		{
		case '\\':
		  switch(t = GetChar())
		    {
		    case '\n':
		      lineno++;
		      lineno_offset++;
		      goto next_exp_digit_v1;
		    default:
		      EndNum();	
		      UngetChar(t);
		      UngetChar('\\');
		    }
		case '0':
		case '1':
		case '2':
		case '3':
		case '4':
		case '5':
		case '6':
		case '7':
		case '8':
		case '9':
		  AddExponent(t);
		  goto next_exp_digit_v1;
		case 'f':
		case 'F':
		  MakeFloat();
		  EndNum();
		  goto next;
		case 'l':
		case 'L':
		  MakeLong();
		  EndNum();
		  goto next;
		default:
		  EndNum();
		  UngetChar(t);
		  goto next;
		}
	    default:
	      goto errBadToken;
	    }
	  case 'f':
	  case 'F':
	    MakeFloat();
	    EndNum();
	    goto next;
	  case 'l':
	  case 'L':
	    MakeLong();
	    EndNum();
	    goto next;
	  default:
	    EndNum();
	    UngetChar(t);
	    goto next;
	  }
      default:
	UngetChar(t);
	StartBase10OrLowerNum('0');
	EndNum();
	goto next;
      }
  case '1': 
  case '2': 
  case '3': 
  case '4': 
  case '5': 
  case '6': 
  case '7': 
  case '8': 
  case '9': 
    StartBase10OrLowerNum(t);
  got_first_digit:
    switch(t = GetChar())
      {
      case '\\':
	switch(t = GetChar())
	  {
	  case '\n':
	    lineno++;
	    lineno_offset++;
	    goto got_first_digit;
	  default:
	    EndNum();
	    UngetChar(t);
	    UngetChar('\\');
	    goto next;
	  }
      case '0':
      case '1':
      case '2':
      case '3':
      case '4':
      case '5':
      case '6':
      case '7':
      case '8':
      case '9':
	AddDecDigit(t);
      next_dec_digit:
	switch(t = GetChar())
	  {
	  case '\\':
	    switch(t = GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		goto next_dec_digit;
	      default:
		EndNum();
		UngetChar(t);
		UngetChar('\\');
		goto next;
	      }
	  case '0':
	  case '1':
	  case '2':
	  case '3':
	  case '4':
	  case '5':
	  case '6':
	  case '7':
	  case '8':
	  case '9':
	    AddDecDigit(t);
	    goto next_dec_digit;
	  case 'e':
	  case 'E':
	    goto collect_exponent;
	  case '.':
	    AddDecDigit('.');
	    goto next_mantissa_digit;
	  case 'l':
	  case 'L':
	    MakeLong();
	  check_unsigned_3:
	    switch(t = GetChar())
	      {
	      case '\\':
		switch(t = GetChar())
		  {
		  case '\n':
		    lineno++;
		    lineno_offset++;
		    goto check_unsigned_3;
		  default:
		    EndNum();
		    UngetChar(t);
		    UngetChar('\\');
		    goto next;
		  }
	      case 'u':
	      case 'U':
		MakeUnsigned();
		EndNum();
		goto next;
	      default:
		UngetChar(t);
		EndNum();
		goto next;
	      }
	  case 'u':
	  case 'U':
	    MakeUnsigned();
	  check_long_3:
	    switch(t = GetChar())
	      {
	      case '\\':
		switch(t = GetChar())
		  {
		  case '\n':
		    lineno++;
		    lineno_offset++;
		    goto check_long_3;
		  default:
		    EndNum();
		    UngetChar(t);
		    UngetChar('\\');
		    goto next;
		  }
	      case 'l':
	      case 'L':
		MakeLong();
		EndNum();
		goto next;
	      default:
		UngetChar(t);
		EndNum();
		goto next;
	      }
	  default:
	    UngetChar(t);
	    EndNum();
	    goto next;
	  }
      case 'e':
      case 'E': 
	goto collect_exponent;
      case '.':
	AddDecDigit('.');
	goto next_mantissa_digit;
      case 'l':
      case 'L':
	MakeLong();
      check_unsigned_4:
	switch(t = GetChar())
	  {
	  case '\\':
	    switch(t = GetChar())
	      {
	      case '\n':
		lineno++;
		lineno_offset++;
		goto check_unsigned_4;
	      default:
		EndNum();
		UngetChar(t);
		UngetChar('\\');
		goto next;
	      }
	  case 'u':
	  case 'U':
	    MakeUnsigned();
	    EndNum();
	    goto next;
	  default:
	    UngetChar(t);
	    EndNum();
	    goto next;
	  }
      case 'u':
      case 'U':
	MakeUnsigned();
      check_long_4:
	switch(t = GetChar())
	  {
	      case '\\':
		switch(t = GetChar())
		  {
		  case '\n':
		    lineno++;
		    lineno_offset++;
		    goto check_long_4;
		  default:
		    EndNum();
		    UngetChar(t);
		    UngetChar('\\');
		    goto next;
		  }
	  case 'l':
	  case 'L':
	    MakeLong();
	    EndNum();
	    goto next;
	  default:
	    UngetChar(t);
	    EndNum();
	    goto next;
	  }
      default:
	UngetChar(t);
	EndNum();
	goto next;
      }
  case '.':
  got_period:
    t = GetChar();
    switch(t)
      {
      case '\\':
	switch(t = GetChar())
	  {
	  case '\n':
	    lineno++;
	    lineno_offset++;
	    goto got_period;
	  default:
	    UngetChar(t);
	    UngetChar('\\');
	    GenOp(".");
	    goto next;
	  }
      case '0':
      case '1':
      case '2':
      case '3':
      case '4':
      case '5':
      case '6':
      case '7':
      case '8':
      case '9':
	StartBase10OrLowerNum('0');
	AddDecDigit('.');
	UngetChar(t);
	goto next_mantissa_digit;
      default:
	UngetChar(t);
	GenOp(".");
	goto next;
      }

  case '\\':
    switch(t = GetChar())
      {
      case '\n':
	lineno++;
	// Don't bump lineno_offset; haven't started a token.
	goto next;
      default:
	goto errBadToken;
      }

  default: 
    goto errBadToken; 
  } 
 unterminatedCommentError: 
  if(return_on_error)
    return EOF;
  fprintf(stderr, "%s: Error: Unterminated comment." NEWLINE, source_id); 
  exit(0); 
 eofError: 
  if(return_on_error)
    return EOF;
  fprintf(stderr, "%s: Error: Unexpected end of file encountered." NEWLINE,
	  source_id); 
  exit(0);
 errBadToken: 
  if(return_on_error)
    return EOF;
  fprintf(stderr, "%s:%d: Unrecognized token." NEWLINE
	  "Skipping to the next line and continuing..." NEWLINE, source_id,
	  lineno); 
  while((t = GetChar())) 
    { 
      switch(t) 
        { 
        case '\n': 
          lineno++; 
	  return 1;
        case EOF: 
	  return 0;
        default: 
          continue; 
        } 
    } 
  // Should not happen.
  abort();
  return 0;
} 
