// Integer arithmetic interpreter lexical analyzer. // // File: lexeme.cc // Author: course // Version: 2 // // This file defines the arithmetic interpreter lexical // analyzer. #include "basic.h" #include "lexeme.h" // Output a lexeme to a stream: s << l where s is // typically cout. (This is used only for debugging.) // ostream& operator << (ostream& s, lexeme l) { switch (l->type) { case LEX_SYMBOL: if (l->inited) return (s << l->pname << "[value = " << l->value << "]"); else return (s << l->pname); case LEX_NUMBER: return (s << l->value); case LEX_OP: switch (l->op) { case OP_PLUS: return (s << "OP_PLUS"); case OP_MINUS: return (s << "OP_MINUS"); case OP_TIMES: return (s << "OP_TIMES"); case OP_DIVIDE: return (s << "OP_DIVIDE"); } case LEX_ASSIGN: return (s << "LEX_ASSIGN"); case LEX_EOF: return (s << "LEX_EOF"); case LEX_LEFT_PAREN: return (s << "LEX_LEFT_PAREN"); case LEX_RIGHT_PAREN: return (s << "LEX_RIGHT_PAREN"); case LEX_SEMICOLON: return (s << "LEX_SEMICOLON"); } } // Symbol table: // // This is just the head of a list of lexeme_classes // threaded through their next fields. // static lexeme symtab = NULL; // Initialize lexeme reader // (i.e. initialize symbol table). // void init_lexemes(void) { // Empty symbol table // lexeme lex; while (lex = symtab) { symtab = lex->next; delete lex; } } // Lookup a symbol in the symbol table. Return NULL if // none. // static lexeme lookup(char * pname) { lexeme l = symtab; while (l != NULL && strcmp(l->pname, pname) != 0) l = l->next; return l; } // Input lexeme from a stream: s >> l. Remove preceding // space. If end of string, set l = NULL. Otherwise if // not legal lexeme, call error. // istream& operator >> (istream& s, lexeme& l) { // Skip whitespace. s >> ws; char c = s.get(); if (s.eof()) l = NULL; // End of file. else if (isalpha(c)) { // Lexeme is a symbol. char buffer[MAX_SYMBOL_LENGTH + 1]; char * p = buffer; // Skip to end of symbol. // while (isalpha(c) || isdigit(c)) { *p++ = c; c = s.get(); } // Put symbol terminating character back // into stream. // s.putback (c); // Put '\0' at end of symbol. // *p = '\0'; // Try to lookup symbol. // l = lookup (buffer); if (l == NULL) { // Symbol does NOT exist. Create symbol. l = new lexeme_class; l->type = LEX_SYMBOL; l->pname = new char[p + 1 - buffer]; l->inited = false; strcpy (l->pname, buffer); // Insert symbol in symbol table. // l->next = symtab; symtab = l; } } else if (isdigit(c)) { // Lexeme is a number. Create lexeme. l = new lexeme_class; l->type = LEX_NUMBER; s.putback (c); s >> l->value; } else { // Lexeme is a single character. // Create lexeme and then fill in type and op. l = new lexeme_class; switch (c) { case EOF_MARKER: l->type = LEX_EOF; break; case PLUS_SIGN: l->type = LEX_OP; l->op = OP_PLUS; break; case MINUS_SIGN: l->type = LEX_OP; l->op = OP_MINUS; break; case TIMES_SIGN: l->type = LEX_OP; l->op = OP_TIMES; break; case DIVIDE_SIGN: l->type = LEX_OP; l->op = OP_DIVIDE; break; case LEFT_PAREN: l->type = LEX_LEFT_PAREN; break; case RIGHT_PAREN: l->type = LEX_RIGHT_PAREN; break; case SEMICOLON: l->type = LEX_SEMICOLON; break; case EQUAL_SIGN: l->type = LEX_ASSIGN; break; default: error ("Bad character: ", c); } } return s; } // Input stream lexeme functions. static lexeme lastlex = NULL; // Last lexeme read. static bool backed_up = false; // True if lastlex // holds the next // lexeme. // Test to see if there are no more lexemes in input // stream ein. // bool end_lexemes(void) { if (! backed_up) { ein >> lastlex; if (lastlex != NULL) backed_up = true; } return ! backed_up; } // Get the next lexeme from the input stream ein. // lexeme get_lexeme(void) { if (end_lexemes()) error ("Premature end of file"); assert (backed_up); backed_up = false; return lastlex; } // Backup over last lexeme gotten. // void unget_lexeme(void) { assert (lastlex != NULL && ! backed_up); backed_up = true; } // Flush line of lexemes (e.g. after error). void flush_lexemes(void) { while (ein.get() != '\n' && ! ein.eof()); backed_up = false; }