lexer grammar PreprocessorLexer; /* * Author: Stephen F. Siegel, University of Delaware * Last changed: June 2012 * * This is a grammar for lexical analysis for a preprocessor * file. It follows the C11 Standard. This grammar assumes * that the stream of characters being scanned has already * gone through translation phases 1 and 2. In particular * backslash followed by newline sequences have been removed. * * This lexer grammar will not contain C keywords and ones for * CIVL-C, ACSL, GNU and CUDA extensions of C. * Those keywords are defined in * dev.civl.abc.front.c.parse.PP2CivlcTokenCConverter * A private function named as `initCKeywordMap` shall identify * and set tokens in proprocessed streams as its corresponding * token-types. */ @header { package dev.civl.abc.front.c.preproc; } @members { @Override public void emitErrorMessage(String msg) { // don't try to recover! throw new RuntimeException(msg); } } /****** White space ******/ NEWLINE : '\r'? '\n' ; WS : ' ' | '\t' ; /* Words that are used in both C and the preprocessor */ IF : 'if' ; ELSE : 'else' ; /* Words used in preprocessor but not in C */ DEFINE : 'define' ; DEFINED : 'defined' ; ELIF : 'elif' ; ENDIF : 'endif' ; ERROR : 'error' ; IFDEF : 'ifdef' ; IFNDEF : 'ifndef' ; INCLUDE : 'include' ; LINE : 'line' ; PRAGMA : 'pragma' ; UNDEF : 'undef' ; /****** Punctuators: C11 Sec. 6.4.6 ******/ ELLIPSIS : '...' ; DOTDOT : '..' ; DOT : '.' ; AMPERSAND : '&' ; AND : '&&' ; ARROW : '->' ; ASSIGN : '=' ; BITANDEQ : '&=' ; BITOR : '|' ; BITOREQ : '|=' ; BITXOR : '^' ; BITXOREQ : '^=' ; COLON : ':' ; COMMA : ',' ; DIV : '/' ; DIVEQ : '/=' ; EQUALS : '==' ; GT : '>' ; GTE : '>=' ; HASH : '#' | '%:' ; HASHHASH : '##' | '%:%:' ; LCURLY : '{' | '<%' ; LPAREN : '(' ; LSQUARE : '[' | '<:' ; LT : '<' ; LTE : '<=' ; MINUSMINUS : '--' ; MOD : '%' ; MODEQ : '%=' ; NEQ : '!=' ; NOT : '!' ; OR : '||' ; PLUS : '+' ; PLUSEQ : '+=' ; PLUSPLUS : '++' ; QMARK : '?' ; RCURLY : '}' | '%>' ; RPAREN : ')' ; RSQUARE : ']' | ':>' ; SEMI : ';' ; SHIFTLEFT : '<<' ; SHIFTLEFTEQ : '<<=' ; SHIFTRIGHT : '>>' ; SHIFTRIGHTEQ : '>>=' ; STAR : '*' ; STAREQ : '*=' ; SUB : '-' ; SUBEQ : '-=' ; TILDE : '~' ; /* CIVL-C and ACSL Punctuators */ ANNOTATION_START : '/*@' ; ANNOTATION_END : '*/' ; AT : '@' ; EQUIV_ACSL : '<==>' ; IMPLIES : '=>' ; IMPLIES_ACSL : '==>' ; INLINE_ANNOTATION_START : '//@' ; // LSLIST and RSLIST enclose a scope list LSLIST : '<|' ; RSLIST : '|>' ; XOR_ACSL : '^^' ; /* CUDA Punctuators */ LEXCON : '<<<' ; REXCON : '>>>' ; /****** Identifiers: C11 Sec. 6.4.2 ******/ IDENTIFIER : IdentifierNonDigit (IdentifierNonDigit | Digit)* ; fragment IdentifierNonDigit : NonDigit | UniversalCharacterName ; fragment Zero : '0' ; fragment Digit : Zero | NonZeroDigit ; fragment NonZeroDigit : '1' .. '9' ; fragment NonDigit : 'A'..'Z' | 'a'..'z' | '_' | '$'; fragment UniversalCharacterName : '\\' 'u' HexQuad | '\\' 'U' HexQuad HexQuad ; fragment HexQuad : HexadecimalDigit HexadecimalDigit HexadecimalDigit HexadecimalDigit ; fragment HexadecimalDigit : '0'..'9' | 'a'..'f' | 'A'..'F' ; /****** Sec. 6.4.4.1: Integer constants ******/ INTEGER_CONSTANT : DecimalConstant IntegerSuffix? | OctalConstant IntegerSuffix? | HexadecimalConstant IntegerSuffix? ; fragment DecimalConstant : NonZeroDigit Digit* ; fragment IntegerSuffix : UnsignedSuffix LongSuffix? | UnsignedSuffix LongLongSuffix | LongSuffix UnsignedSuffix? | LongLongSuffix UnsignedSuffix? ; fragment UnsignedSuffix : 'u' | 'U' ; fragment LongSuffix : 'l' | 'L' ; fragment LongLongSuffix : 'll' | 'LL' ; fragment OctalConstant : Zero OctalDigit* IntegerSuffix? ; fragment HexadecimalConstant : HexPrefix HexadecimalDigit+ IntegerSuffix? ; fragment HexPrefix : Zero ('x' | 'X') ; /****** Sec. 6.4.4.2: Floating Constants ******/ FLOATING_CONSTANT : DecimalFloatingConstant | HexadecimalFloatingConstant ; fragment DecimalFloatingConstant : FractionalConstant ExponentPart? FloatingSuffix? | Digit+ ExponentPart FloatingSuffix? ; fragment FractionalConstant : Digit* DOT Digit+ | Digit+ DOT ; fragment ExponentPart : ('e' | 'E') ('+' | '-')? Digit+ ; fragment FloatingSuffix : 'f' | 'l' | 'F' | 'L' ; fragment HexadecimalFloatingConstant : HexPrefix HexFractionalConstant BinaryExponentPart FloatingSuffix? | HexPrefix HexadecimalDigit+ BinaryExponentPart FloatingSuffix? ; fragment HexFractionalConstant : HexadecimalDigit* DOT HexadecimalDigit+ | HexadecimalDigit+ DOT ; fragment BinaryExponentPart : ('p' | 'P') ('+' | '-')? Digit+ ; /****** Preprocessing Numbers: C11 Sec 6.4.8 ******/ /* PP_NUMBER should be anything that doesn't match the previous * rules but does match this one. */ PP_NUMBER : '.'? Digit ( '.' | IdentifierNonDigit | Digit | ('e' | 'E' | 'p' | 'P') ('+' | '-') )* ; /****** Sec. 6.4.4.4: Character Constants ******/ CHARACTER_CONSTANT : ('L' | 'U' | 'u')? '\'' CChar+ '\'' ; fragment CChar : ~('\'' | '\\' | '\n') | EscapeSequence ; fragment EscapeSequence : '\\' ( '\'' | '"' | '\?' | '\\' | 'a' | 'b' | 'f' | 'n' |'r' | 't' | 'v' ) | OctalEscape | HexEscape ; fragment OctalEscape : '\\' OctalDigit (OctalDigit OctalDigit?)? ; fragment OctalDigit : '0' .. '7'; fragment HexEscape : '\\' 'x' HexadecimalDigit+ ; /****** 6.4.5: String Literals *****/ STRING_LITERAL : ('u8' | 'u' | 'U' | 'L')? '"' SChar* '"' ; fragment SChar : ~('"' | '\\' | '\n') | EscapeSequence ; /* ***** Comments: C11 Sec 6.4.9 ******/ // the following is not quite perfect because in the case of the \n or \r // immediately following the // it counts that white space as part of the // comment, otherwise it doesn't. Would like to make the \n or \r NOT // part of the comment always, but how --- need to look ahead one character? fragment INLINE_COMMENT : '//' ( (~('@' | '\n' | '\r') ( options {greedy=true;} : ~('\n'|'\r') )*) | NEWLINE | EOF ) ; fragment BLOCK_COMMENT : '/*' ( '*/' | ~('@') ( options {greedy=false;} : . )* '*/') ; COMMENT : INLINE_COMMENT | BLOCK_COMMENT ; /* Special keywords starting with backslash reserved for extensions * such as ACSL */ EXTENDED_IDENTIFIER : '\\' IdentifierNonDigit (IdentifierNonDigit | Digit)* ; /****** Other characters: C11 Sec. 6.4 ******/ OTHER : . ;