// Lexical Scanner for the LISP Interpreter. // // File: scanner.cc // Author: {your name} <{your e-mail address}> // Assignment: 7 // Notes on Grammar: // // A file is a string of ASCII characters followed // by an infinite number of EOF characters. An EOF // is NOT an ASCII character (it is represented // internally by -1). // // A graphic character c is an ASCII character that // makes a mark on the paper, or one for which // isgraph (c) is true. // // A space character c is an ASCII character that // makes no mark on the paper, or one for which // isspace (c) is true. // // At each point where a token begins, the longest // possible token is identified. // Grammar: // // file ::= { space token }* // // token ::= symbol | end-of-file | error-token // // symbol ::= symbol-character* // // symbol-character ::= // // // end-of-file ::= EOF // // error-token ::= // // // space ::= * // Deterministic Finite Automaton (with putback): // // Notes: Upon any transition, the current input // character must be accepted, discarded, or // putback. // // `c' denotes the current input character. // // symbolic (c) is true if c can be in a symbol, // or is `|'. It is true of `#'. // // EOF is not an ASCII character, and cannot be // accepted. It can be discarded or putback. // // accept_upper is a special form of accept // that converts c from lower to upper case // before accepting it, if c is a letter. // BEGIN: // EOF ----> BEGIN discard // output (EOF_TOKEN) // // | ----> BEGIN discard // output (ERROR_TOKEN) // // isspace (c) discard // ----> BEGIN // // symbolic (c) putback // ----> SYMBOL // // other ----> BEGIN discard // output (ERROR_TOKEN) // // SYMBOL: // | ----> BEGIN putback // output (SYMBOL_TOKEN) // // symbolic (c) accept_upper // ----> SYMBOL // // other putback // ----> BEGIN output (SYMBOL_TOKEN) #include "lisp.h" #include "scanner.h" #include // Macros to test the class of // a character. // Characters that can be used in symbolic and numeric // atoms, or the '|' character. `#' is included. // inline bool symbolic (char c) { return isgraph(c) && (c) != '"' && (c) != '\'' && (c) != '(' && (c) != ')' && (c) != '\\' && (c) != '[' && (c) != ']' && (c) != ';' && (c) != '`' && (c) != ','; } // History of last token read and whether we have // backed up over it. // // TBW // Function to backup over last token. Assumes at // least one token has been read. // void backup_token() { // TBW } struct scanner_state // Stores the state of a token scanner. { // TBW istream * in; // The input stream from which characters // are gotten. scanner_state ( istream & s) // Construct a scanner state for a given // input stream. All operations on the // input stream must be done through the // scanner state until the scanner state // is no longer needed. { in = & s; // TBW } // The basic operations are `next', accept, // `discard', and `putback'. Rule: after doing a // `next', exactly one of the other basic // operations MUST be done before doing any other // operation on either the scanner state OR the // input stream. // // An end of file is represented as an EOF // character. This is actually the integer -1, // so a character is an int and not a char (the // latter may not be able to store a -1 value). // // Rule: an EOF cannot be accepted (it may be // discarded or putback). int next ( void ) // Get the next character. May return EOF. { return in->peek(); } void accept ( void ) // Accept the character gotten by next(); { in->get(); // TBW } void accept_upper ( void ) // Accept the character gotten by next(), after // converting that character from lower to upper // case IF that character is a letter. { in->get(); // TBW } void discard ( void ) // Discard the character gotten by next(). { in->get(); } void putback ( void ) {} // Put back the character gotten by next(). char * output (void) // Return a string consisting of all characters // accepted since the scanner state was // constructed. { return "FOOBAR"; // TBW } }; // Function to read a token. // token get_token (istream& s) { token tok; scanner_state st (s); int c; // Last character gotten. BEGIN: switch ( c = st.next() ) { case EOF: st.discard(); tok.type = EOF_TOKEN; goto OUTPUT; case '|': st.discard(); tok.type = ERROR_TOKEN; goto OUTPUT; default: if ( isspace (c) ) { st.discard(); goto BEGIN; } else if ( symbolic (c) ) { st.putback(); goto SYMBOL; } else { st.discard(); tok.type = ERROR_TOKEN; goto OUTPUT; } } SYMBOL: switch ( c = st.next() ) { case '|': st.putback(); goto OUTPUT_SYMBOL; default: if ( symbolic (c) ) { st.accept_upper(); goto SYMBOL; } else { st.putback(); goto OUTPUT_SYMBOL; } } OUTPUT_SYMBOL: tok.type = SYMBOL_TOKEN; tok.value = make_symbol ( st.output() ); goto OUTPUT; OUTPUT: return tok; }