%{ /* -*-C++-*- */ /* lexer.l * by Will Wagner * last updated 20 Sep 1997, 17:21:04 wwagner * * This file contains the lexical analyzer for the POVRay raytracer. * * If in_comment is > 0, we ignore *everything* until it == 0. * * Changes * 25 Jul 1997 * - Created the file. * * 27 Jul 1997 * - We now return the semicolon, since somebody might want it; * deleted the directives, since they will be intercepted by the * parser, not here. * * 04 Aug 1997 * - Tweaked the identifier rule - if the identifier is found in * the user symbol table (meaning it has already been defined * somewhere), we return the type, put the actual token number in * token_float, and put the token text in token_string; added a * set of rules for transparently handling includes; we are now a * C++ scanner derived from the FlexLexer class. * * 11 Aug 1997 * - Added some rules to transparently handle exec; switched to * exclusive use of LexerError instead of throwing the exceptions * by hand. * * 16 Aug 1997 * - Added a string-terminated-by- rule; added the WS * definition of whitespace. * * 06 Sep 1997 * - Fixed the fname_stack declaration; changed a reference to * Registry::LocateIncludeFile to the current * Registry::LocateLibraryFile in {STRING} state; it now * compiles errorlessly. * * 07 Sep 1997 * - Got rid of the exec rules, since it requires a bit more * handling than we can give it here; for purposes of * orthogonality, removed the include rules as well, which will * also move to the parser. * * 20 Sep 1997 * - Minor changes to STRING and BADSTRING definitions. * * Things to do * - When resetting the file position in ResetFilePosition, make * the lexer dump the current buffer contents. These functions * are for doing looping constructs. * * - Modify the STRING and BADSTRING definitions to allow \" in * the strings; as it stands now, they don't, which is incorrect. * */ #include #include #include #include #include #include "registry.h" #include "lexer.h" int in_comment = 0; stack< list > fname_stack; stack< list > include_stack; %} %option yylineno %option yyclass="Tokenizer" DIGIT [0-9] EXPONENT [Ee][+-]?{DIGIT}+ ID [A-Za-z_][A-Za-z_0-9]* STRING \"[^\"\n]*\" BADSTRING \"[^\"]*\n WS [ \t\n] %% {ID} { if (!in_comment) { TOKEN token_val; if ((token_val = registry.res().Find(yytext)) == -1) { int len = strlen(yytext); if ((token_val = registry.sym().Find(yytext)) == -1) token_val = registry.sym().Add(yytext); else { token_float = (double)token_val; token_val = registry.sym().Type(yytext); } if (token_string != NULL && strlen(token_string) < len) delete[] token_string; if (token_string == NULL) token_string = new char[len]; strcpy(token_string, yytext); } return (token_id = token_val); } } {STRING} { if (!in_comment) { int len = strlen(yytext); if (token_string != NULL && strlen(token_string) < len - 1) delete[] token_string; if (token_string == NULL) token_string = new char[len - 1]; strncpy(token_string, &yytext[1], len - 2); return (token_id = STRING_LITERAL_TOKEN); } } {BADSTRING} { if (!in_comment) LexerError("Illegal string constant"); } [+-]?({DIGIT}+|{DIGIT}+"."{DIGIT}*|"."{DIGIT}+){EXPONENT}? { if (!in_comment) { token_float = atof(yytext); return (token_id = FLOAT_TOKEN); } } "//".* /* single-line comment; eat it */ "/*" ++in_comment; "*/" --in_comment; "!=" if (!in_comment) return (token_id = REL_NE_TOKEN); "<=" if (!in_comment) return (token_id = REL_LE_TOKEN); ">=" if (!in_comment) return (token_id = REL_GE_TOKEN); "{" if (!in_comment) return (token_id = LEFT_CURLY_TOKEN); "}" if (!in_comment) return (token_id = RIGHT_CURLY_TOKEN); "<" if (!in_comment) return (token_id = LEFT_ANGLE_TOKEN); ">" if (!in_comment) return (token_id = RIGHT_ANGLE_TOKEN); "(" if (!in_comment) return (token_id = LEFT_PAREN_TOKEN); ")" if (!in_comment) return (token_id = RIGHT_PAREN_TOKEN); "[" if (!in_comment) return (token_id = LEFT_SQUARE_TOKEN); "]" if (!in_comment) return (token_id = RIGHT_SQUARE_TOKEN); "=" if (!in_comment) return (token_id = EQUALS_TOKEN); "+" if (!in_comment) return (token_id = PLUS_TOKEN); "-" if (!in_comment) return (token_id = DASH_TOKEN); "*" if (!in_comment) return (token_id = STAR_TOKEN); "/" if (!in_comment) return (token_id = SLASH_TOKEN); "&" if (!in_comment) return (token_id = AMPERSAND_TOKEN); "|" if (!in_comment) return (token_id = BAR_TOKEN); "!" if (!in_comment) return (token_id = EXCLAMATION_TOKEN); "," if (!in_comment) return (token_id = COMMA_TOKEN); "#" if (!in_comment) return (token_id = HASH_TOKEN); "%" if (!in_comment) return (token_id = PERCENT_TOKEN); "?" if (!in_comment) return (token_id = QUESTION_TOKEN); ":" if (!in_comment) return (token_id = COLON_TOKEN); "." if (!in_comment) return (token_id = PERIOD_TOKEN); "@" if (!in_comment) return (token_id = AT_TOKEN); "`" if (!in_comment) return (token_id = BACK_QUOTE_TOKEN); "\\" if (!in_comment) return (token_id = BACK_SLASH_TOKEN); "^" if (!in_comment) return (token_id = HAT_TOKEN); "'" if (!in_comment) return (token_id = SINGLE_QUOTE_TOKEN); "\"" if (!in_comment) return (token_id = DOUBLE_QUOTE_TOKEN); "~" if (!in_comment) return (token_id = TILDE_TOKEN); "$" if (!in_comment) return (token_id = DOLLAR_TOKEN); ";" if (!in_comment) return (token_id = SEMICOLON_TOKEN); {WS}+ /* whitespace; munch, munch */ <> { if (!in_comment) { if (!include_stack.empty()) { yy_delete_buffer(YY_CURRENT_BUFFER); yy_switch_to_buffer(include_stack.top()); fname_stack.pop(); include_stack.pop(); } else return (token_id = END_OF_FILE_TOKEN); } else LexerError("Unexpected EOF in comment"); } . LexerError("Unrecognized character"); %% void Tokenizer::LexerError(const char *msg) { throw TokenizerError("yylex", msg, token_id, yylineno); } Tokenizer::Tokenizer() { token_string = NULL; } Tokenizer::~Tokenizer() { if (token_string) delete[] token_string; } TOKEN Tokenizer::GetToken(void) { if (!unget_token) return this->yylex(); unget_token = 0; return token_id; } void Tokenizer::UngetToken(void) { unget_token = 1; } char *Tokenizer::GetTokenString(void) const { return token_string; } DBL Tokenizer::GetTokenFloat(void) const { return token_float; } streampos Tokenizer::GetFilePosition(void) const { return yyin->tellg(); } void Tokenizer::ResetFilePosition(streampos sp) { yyin->seekg(sp); }