/*
 * Copyright (c) 2004-2005 CAS Software AG, 
 * 10 Wilhelm-Schickard Street, 76131 Karlsruhe, Germany
 * 
 * Copyright (c) 2004-2006 FZI Forschungszentrum Informatik, 
 * 10-14 Haid-und-Neu Street, 76131 Karlsruhe, Germany
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
/*
	$Header: /cvsroot/sissy/SISSy/src/de/fzi/delphi/parser/op_lexer.g,v 1.3 2008/04/23 18:49:34 mtrifu Exp $
*/

/*
	ACHTUNG:
	in folgenden Regeln wurden Warnungen deaktiviert
	- elseBlock -> ok, laut Doku / keine Prfung
	- caseStatement -> Laufzeitprfung/nicht getestet
	- statementList -> keine Prfung
*/
header{	// Package
	package de.fzi.delphi.parser;
	// Imports
	import de.fzi.delphi.OPDebug;
	import de.fzi.delphi.OPProjectManager;
    import java.util.Vector;
    import antlr.*;
}

/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////    
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Lexer 
//
// The Delphi Language uses the ASCII character set, including
// the letters A through Z and a through z, the digits 0 through 9,
// and other standard characters. It is not case-sensitive. The
// space character (ASCII 32) and the control characters (ASCII 0
// through 31--including ASCII 13, the return or end-of-line
// character) are called blanks.
// Fundamental syntactic elements, called tokens, combine to form
// expressions, declarations, and statements. A statement describes
// an algorithmic action that can be executed within a program. An
// expression is a syntactic unit that occurs within a statement and
// denotes a value. A declaration defines an identifier (such as the
// name of a function or variable) that can be used in expressions
// and statements, and, where appropriate, allocates memory for the
// identifier.
class OPLexer extends Lexer;
options {
            charVocabulary = '\0'..'\377';
            testLiterals=false;    			// don't automatically test for literals
			caseSensitive=false;
            caseSensitiveLiterals=false;
	        importVocab=Common;
			exportVocab=OPLexer;	
//            k=3;                			// needed for Strings
            k=4;                			// needed for ASM
  }
	
	
	
	
	
tokens {
	DIV		    = "div" ;
	MOD    		= "mod";

	// Boolean operators
	NOT			= "not";
	AND			= "and";
	OR			= "or";
	XOR			= "xor";

	SHL			= "shl";
	SHR			= "shr";
	
	// Typecast
	AS			= "as";

	// Set Op
	IN			= "in";
	//IS;
	IS			= "is";

	// Keywords
	PROGRAM		= "program";
	UNIT		= "unit";
	LIBRARY		= "library";
	PACKAGE		= "package";
	IMPLEMENTATION = "implementation";
	INITIALIZATION = "initialization";	
	LABEL		= "label";	
	USES 		= "uses";
	VAR			= "var";
	OUT			= "out";
	TYPE		= "type";
	CONSTANT	= "constant";
	INTERFACE	= "interface";
	DISPINTERFACE = "dispinterface";
	ABSOLUTE	= "absolute";
	BEGIN		= "begin";
	CASE		= "case";
	OF			= "of";
	ELSE		= "else";
	ON			= "on";
	FINALLY		= "finally";
	EXCEPT 		= "except";
	TRY			= "try";
	RAISE		= "raise";
	AT			= "at";
	WITH		= "with";
	UNTIL		= "until";
	REPEAT		= "repeat";
	WHILE		= "while";
	DO			= "do";
	FOR 		= "for";
	TO 			= "to";
	DOWNTO 		= "downto";	
	THEN		= "then";
	IF			= "if";
	GOTO		= "goto";
	FUNCTION	= "function";
	PROCEDURE	= "procedure";
	CONSTRUCTOR = "constructor";
	DESTRUCTOR  = "destructor";
	OBJECT		= "object";
	CONST		= "const";
	RESOURCESTRING = "resourcestring";
	ARRAY		= "array";
	RECORD		= "record";
	SET			= "set";
	PROPERTY	= "property";

	IMPLEMENTS	= "implements";
	INHERITED	= "inherited";
	FINALIZATION = "finalization";
	CLASS		= "class";
	PRIVATE		= "private";
	PUBLIC		= "public";
	PROTECTED	= "protected";
	PUBLISHED	= "published";
	AUTOMATED	= "automated";
 	NIL			= "nil";

	// Typed
	REAL48		= "real48";
	SINGLE		= "single";
	DOUBLE		= "double";
	INTEGER		= "integer";
	CARDINAL	= "cardinal";
	SHORTINT	= "shortint";
	SMALLINT	= "smallint";
	LONGINT		= "longint";
	INT64		= "int64";
	BYTE		= "byte";
	WORD		= "word";
	LONGWORD	= "longword";
	OLEVARIANT	= "olevariant";
	VARIANT		= "variant";
 	STRING		= "string";
	ANSISTRING	= "ansistring";
	WIDESTRING	= "widestring";
	PACKED		= "packed";
	REAL		= "real";
	BOOLEAN		= "boolean";
	TRUE		= "true";	
	FALSE		= "false";
	FILE		= "file";
	
	// directives
	CDECL		= "cdecl";
	DYNAMIC		= "dynamic";
	VIRTUAL		= "virtual";
	EXPORT		= "export";
	EXTERNAL	= "external";
	NEAR		= "near";
	FAR			= "far";
	FORWARD		= "forward";
	OVERRIDE	= "override";
	OVERLOAD	= "overload";
	PASCAL		= "pascal";
	REINTRODUCE	= "reintroduce";
	SAFECALL	= "safecall";
	STDCALL		= "stdcall";
	VARARGS		= "varargs";
	LOCAL		= "local";
	ABSTRACT	= "abstract";
	PLATFORM 	= "platform";
	DEPRECATED	= "deprecated";
	MESSAGE		= "message"; 
	NAME		= "name";

	REGISTER	= "register";

	DISPID 		= "dispid";
	READONLY	= "readonly";
	WRITEONLY	= "writeonly";

	ASSEMBLER	= "assembler";

// PropertySpecs
	INDEX		= "index"	; 
	READ		= "read";
	WRITE		= "write";
	STORED		= "stored";
	DEFAULT		= "default";
	NODEFAULT	= "nodefault";

	REQUIRES 	= "requires";
	CONTAINS	= "contains";
	EXPORTS		= "exports";
//	ABSTRACT;
//	CDECL;
//	DEPRECATED;
//	DYNAMIC;
//	EXPORT;
// parser	EXTERNAL;
//	FAR;
// parser	FORWARD;
//	ASSEMBLER;
// parser	INDEX;
//	LOCAL;
// parser	MESSAGE;
// parser	NAME;
//	NEAR;
//	OVERLOAD;
//	OVERRIDE;
//	PASCAL;
//	REGISTER;
//	REINTRODUCE;
//	SAFECALL;
//	STDCALL;
//	VARARGS;
//	VIRTUAL;
	PORTAB_DIRECTIVE;
//	PRPTY_SPEC;
	COMPILER_DIRECTIVE;
	EXPONENT;
	REAL_LIT;
	CHAR_LIT;

	// Misc
// parser	REAL_LIT;
// parser	CHAR_LIT;
	FUNC_CALL;
//	PROC_CALL;
	PARAM_LIST;
	STMNT_LIST;
	EMPTY_STMNT;
	TYPE_NODE;
	VALUE_NODE;
	IDLIST;
	PREDEF_TYPE;
	FILE_TYPE;
	UNIT_IDENT;

	// Declarations
	CLASS_DECL;
	TYPE_DECL;
	LABEL_DECL;
	ARRAY_DECL;
	ENUM_DECL;
	RECORD_DECL;
	FUNC_DECL;
	PROC_DECL;
	INNER_DECL;
	CONSTR_DECL;
	DESTR_DECL;
	PRPTY_DECL;
	VAR_DECL;
	PARAM_SPEC;
	DECL_SECT;
	VARIANT_SECT;
	
	PRPTY_PARAM;
	PRPTY_PARAM_LIST;
	DIRECTIVE;
	RANGE;
	COM_GUID;

	EXPR;
	TERM;
	SIMP_EXPR;
	SIG_FACT;
	FACTOR;
	ARRAY_INDEX;
	UNARY_PLUS;
	UNARY_MINUS;
	SET_CONSTR;
	CONST_EXPR;
	OBJ_INIT;
	EXPR_LIST;
	MAIN_PROGRAM;
	EXTENDS;
	TYPE_CAST;
	
	AST_LIST;
	META_INFO;
	//END			= "end";
}

{ ////////////////////////////////
  ////  ACTION
	static public TokenStreamSelector selector =  new TokenStreamSelector();

	// set this to 'true' if you want to check for preprocessor directives
	private static final boolean DETECT_PREPROCESSOR_DIRECTIVES = false; 
			
	private int currentLine = 0;
	private int commentLines = 0;
	private int asmLines = 0;
//	public boolean directiveMode = false;
//	public boolean propertyMode = false;
//	public boolean dispInterfaceDirectiveMode = false;

	public int getCurrentLine() {
		return currentLine;
	}
	
	public int getCommentLines() {
		return commentLines;
	}

	public int getAsmLines() {
		return asmLines;
	}

	public void newline() {
		currentLine++;
		super.newline();
	}
	
	
	private int currentStartLine = 0;
	private int currentStartColumn = 0;
	private int currentEndLine = 0;
	private int currentEndColumn = 0;
	
	public Token makeToken(int i) {
			     Token token = super.makeToken(i);
			     
			     if (token instanceof de.fzi.delphi.symbols.TokenWithStartPosition) {
			          ((de.fzi.delphi.symbols.TokenWithStartPosition)	token).setStartLine(currentStartLine);
						          ((de.fzi.delphi.symbols.TokenWithStartPosition)	token).setEndLine(currentEndLine);
									          ((de.fzi.delphi.symbols.TokenWithStartPosition)	token).setStartColumn(currentStartColumn);
									          ((de.fzi.delphi.symbols.TokenWithStartPosition)	token).setEndColumn(currentEndColumn);
			     }
			     
			     return token;
			}
	
	
	private boolean containsPossiblePreprocessorDirective(String s){
		for(int i=0;i<s.length();i++){
			if( i==0 ){
				continue;
			}else if( !Character.isWhitespace(s.charAt(i))) {
				if(  s.charAt(i) == '$' )
					return true;
				break;
			}			
		}	
		return false;
	}
  ////  END OF ACTION
  ////////////////////////////////
	}

////////////////////////////////
//// Lexer 


// Literals
INT_LIT
	:	('0'..'9')+ 
		(	(	{(LA(2)!='.')&&(LA(2)!=')')}?
				'.' {$setType(REAL_LIT);}	
				('0'..'9')+ (EXPONENT)?
			)?
		|	EXPONENT {$setType(REAL_LIT);}
		)
	;
// a couple protected methods to assist in matching floating point numbers
protected
EXPONENT
:('e') ('+'|'-')? ('0'..'9')+
;

HEX_CONST
	:	'$' ('0'..'9'|'a'..'f')+ 
	;
	
// string literals

STRING_LIT
		{ int length=0; }
	:
	(
		( // String in Anfhrungszeichen '
			'\''!
	   		( 	'\'' '\''!
			|	~('\''|'\n'|'\r')
	    			{ length++;}
	    	)*
	    	(	'\''!
	    	|	// nothing -- write error message (illegal, so we'll assume it ends the string)
	    	)
		)
		|	{ StringBuffer tempString = new StringBuffer(); 
			  int l;
			}
			( '#'! 
				( '0'..'9' { l=1; } ) 			    			{ length++;}
				(('0'..'9' { l=2; } ) 			    			{ length++;}
				 ('0'..'9' { l=3; } )? 			    			{ length++;}
				 )?
			  { 
			  	tempString.append((char)Integer.parseInt($getText.substring($getText.length()-l))); 
			  }
			 (' ')*
			)+
			
			{ 
				$setText(tempString.toString()); 
			}
		|	( '#' HEX_CONST )
    )
    ;

// Characters
//CHAR_LIT
//	:	 '#'! ('0'..'9')+ 
//		
//	;

// Comments
// Single-line comments
COMMENT
  : "//" ( ~('\n'|'\r') )* 
  	{ $setType(Token.SKIP);
  		commentLines++;
  	}
  ;  


//COMPILER_DIRECTIVE_OPEN
//	:
//		"{$"! 
////		{ 	System.err.println("new Lexer"); 
////			// BREAK comdir open
////			OPMain.selector.push("compilerDirective"); 
////			OPCompilerDirectiveParser p 
////				= new OPCompilerDirectiveParser(OPMain.selector.getCurrentStream());
////
////			p.start();
////		}
//		 ( ~('}') )+ '}' // for skipping
//        {$setType(Token.SKIP);} //ignore this token
//	;


//protected COMPILER_SUB_DIRECTIVE
//	:
//		    '$' IDENT
//		    {
//		    	System.out.println();
//				String str=$getText;
//
//				if( str.toLowerCase().startsWith("$if",1) )
//				{
//					System.out.println("$$$ Sorry, can't handle conditional compiler-directives yet. "+$getText+"}"); 
//					//conditionalCompilerDirective = true;
//					//System.exit(2);
//				}
//				else if( str.toLowerCase().startsWith("$else",1) )
//				{
//				}
//				else if( str.toLowerCase().startsWith("$endif",1) )
//				{
//				//	conditionalCompilerDirective = false;
//				}
//				else System.out.println($getText); 
//		    }
//		;

// multiple-line comments
ML_COMMENT1 // { }
    : '{'
   // 	~('$')
	    (   /*  '\r' '\n' can be matched in one alternative or by matching
	                '\r' in one iteration and '\n' in another.  I am trying to
	                handle any flavor of newline that comes in, but the language
	                that allows both "\r\n" and "\r" and "\n" to all be valid
	                newline is ambiguous.  Consequently, the resulting grammar
	                must be ambiguous.  I'm shutting this warning off.
	             */
	        options { generateAmbigWarnings=false; }
	    	:	'\r' '\n'{	newline();
	    					commentLines++;
	    				}
	        |	'\r'	{	newline();
	        				commentLines++;
	        				}
	        |	'\n'	{	newline();
	        				commentLines++;
	        				}
	        |	~('}'|'\n'|'\r')
	    )*
	    {$setType(Token.SKIP);
	    	commentLines++;
	    }
    '}'
	{   // detect preprocessor directives
		if( DETECT_PREPROCESSOR_DIRECTIVES ){
			if( containsPossiblePreprocessorDirective(
					new String(text.getBuffer(), _begin, text.length()-_begin) ) )
				OPDebug.debugPrintln(1,"Warning! Preprocessor-directive found in line "+(getCurrentLine()+1)+": '"+(new String(text.getBuffer(), _begin, text.length()-_begin))+"'");
		}
	}
;

ML_COMMENT2 // (* *)
    : "(*"
        (   /*  '\r' '\n' can be matched in one alternative or by matching
                '\r' in one iteration and '\n' in another.  I am trying to
                handle any flavor of newline that comes in, but the language
                that allows both "\r\n" and "\r" and "\n" to all be valid
                newline is ambiguous.  Consequently, the resulting grammar
                must be ambiguous.  I'm shutting this warning off.
             */
        options { generateAmbigWarnings=false; }
    : { LA(2)!=')' }? '*'
        |'\r' '\n'{newline();
	        				commentLines++;
			        }
        |'\r'{newline();
	        				commentLines++;
        }
        |'\n'{newline();
	        				commentLines++;
        }
        |~('*'|'\n'|'\r')
        )*
        "*)"
	{$setType(Token.SKIP);
	        				commentLines++;
	}
;

ASM // ASM ... END
    :  	"asm"
    	{	OPDebug.debugPrintln(3,"Switching to ASM Lexer ..."); 
    		selector.push("asmlexer"); 
    	}
	;
	
//ASM_DIREC // ASM ... END
//        :  	'a''s''m' WS
//        (   /*  '\r' '\n' can be matched in one alternative or by matching
//                '\r' in one iteration and '\n' in another.  I am trying to
//                handle any flavor of newline that comes in, but the language
//                that allows both "\r\n" and "\r" and "\n" to all be valid
//                newline is ambiguous.  Consequently, the resulting grammar
//                must be ambiguous.  I'm shutting this warning off.
//             */
//        options { generateAmbigWarnings=false; }
//    : 	
//	    { LA(2)!='n' && LA(3)!='d' }? 'e'
//  	|	'\r' '\n'{newline();
//  				asmLines++;
//  		}
//	|	'\r'{newline();
//	  				asmLines++;
//	  				}
//	|	'\n'{newline();
//  				asmLines++;	
//	}
//	|	COMMENT
//	|	ML_COMMENT1
//	|	ML_COMMENT2
//	|	~('e'|'\n'|'\r')
//	)*
//	"end"
////        {$setType(Token.SKIP);} //ignore this token
//  				{ asmLines+=2; }
//	;

// Operators
DOT        : {currentStartLine = getLine(); currentStartColumn = getColumn(); }  '.'  {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
EOF_DOT		: {currentStartLine = getLine(); currentStartColumn = getColumn(); } '!'  {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
ASSIGN    : {currentStartLine = getLine(); currentStartColumn = getColumn(); } ":=" {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
//IS        : {currentStartLine = getLine(); currentStartColumn = getColumn(); }  " is "  {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
COLON      : {currentStartLine = getLine(); currentStartColumn = getColumn(); } ':'  {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
SEMI!       : {currentStartLine = getLine(); currentStartColumn = getColumn(); } ';'  {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
COMMA      : {currentStartLine = getLine(); currentStartColumn = getColumn(); } ','  {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
EQUALS     : {currentStartLine = getLine(); currentStartColumn = getColumn(); } '='  {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
LBRACKET   : {currentStartLine = getLine(); currentStartColumn = getColumn(); } '['  {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
RBRACKET   : {currentStartLine = getLine(); currentStartColumn = getColumn(); } ']'  {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
LCURLY   : {currentStartLine = getLine(); currentStartColumn = getColumn(); } '{'  {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
RCURLY   : {currentStartLine = getLine(); currentStartColumn = getColumn(); } '}'  {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
LBRACKET_ALT   : {currentStartLine = getLine(); currentStartColumn = getColumn(); } "(." {  currentEndLine = getLine(); currentEndColumn = getColumn(); } ;
RBRACKET_ALT   : {currentStartLine = getLine(); currentStartColumn = getColumn(); } ".)" {  currentEndLine = getLine(); currentEndColumn = getColumn(); } ;
DOTDOT     : {currentStartLine = getLine(); currentStartColumn = getColumn(); } ".." {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
LPAREN     : {currentStartLine = getLine(); currentStartColumn = getColumn(); } '('  {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
RPAREN     : {currentStartLine = getLine(); currentStartColumn = getColumn(); } ')'  {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
NOT_EQUALS : {currentStartLine = getLine(); currentStartColumn = getColumn(); } "<>" {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
LT         : {currentStartLine = getLine(); currentStartColumn = getColumn(); } '<'  {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
LTE        : {currentStartLine = getLine(); currentStartColumn = getColumn(); } "<=" {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
GT         : {currentStartLine = getLine(); currentStartColumn = getColumn(); } '>'  {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
GTE        : {currentStartLine = getLine(); currentStartColumn = getColumn(); } ">=" {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
DEREF		: {currentStartLine = getLine(); currentStartColumn = getColumn(); } '^' {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
ADDR_OP		: {currentStartLine = getLine(); currentStartColumn = getColumn(); } '@' {  currentEndLine = getLine(); currentEndColumn = getColumn(); } ;
// Arithmetic operators
PLUS       : {currentStartLine = getLine(); currentStartColumn = getColumn(); }'+'  {  currentEndLine = getLine(); currentEndColumn = getColumn(); };
MINUS      : {currentStartLine = getLine(); currentStartColumn = getColumn(); } '-' {  currentEndLine = getLine(); currentEndColumn = getColumn(); } ;
TIMES      : {currentStartLine = getLine(); currentStartColumn = getColumn(); } '*' {  currentEndLine = getLine(); currentEndColumn = getColumn(); } ;
DIVIDE     : {currentStartLine = getLine(); currentStartColumn = getColumn(); } '/'{  currentEndLine = getLine(); currentEndColumn = getColumn(); };
// misc
// APOSTROPHE:	'\'';
WS  :   (        options { generateAmbigWarnings=false; }
		:
		   ' '
        |   '\t'
        |	   '\r' '\n' { newline(); } // DOS
        |   '\n'      { newline(); } // Unix
        |   '\r'      { newline(); } // Mac
        )
        {$setType(Token.SKIP);} //ignore this token
    ;
	
// match an upper/lower case name of any length
// An identifier can be of any length, but only the first 255 characters are significant. An identifier must begin with a letter or an underscore (_) and cannot contain spaces; letters, digits, and underscores are allowed after the first character. 
// Reserved words cannot be used as identifiers.
IDENT  
options { 
	testLiterals=true;
        } 
    :  {currentStartLine = getLine(); currentStartColumn = getColumn(); } 
    ( 'a'..'z' | '_' )( 'a'..'z' | '0'..'9' | '_' )* 
    {  currentEndLine = getLine(); currentEndColumn = getColumn(); } 
    ;

