source: CIVL/mods/dev.civl.abc/grammar/c/PreprocessorLexer.g@ b66600f

main test-branch
Last change on this file since b66600f was aad342c, checked in by Stephen Siegel <siegel@…>, 3 years ago

Performing huge refactor to incorporate ABC, GMC, and SARL into CIVL repo and use Java modules.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5664 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 6.5 KB
Line 
1lexer grammar PreprocessorLexer;
2
3/*
4 * Author: Stephen F. Siegel, University of Delaware
5 * Last changed: June 2012
6 *
7 * This is a grammar for lexical analysis for a preprocessor
8 * file. It follows the C11 Standard. This grammar assumes
9 * that the stream of characters being scanned has already
10 * gone through translation phases 1 and 2. In particular
11 * backslash followed by newline sequences have been removed.
12 *
13 * This lexer grammar will not contain C keywords and ones for
14 * CIVL-C, ACSL, GNU and CUDA extensions of C.
15 * Those keywords are defined in
16 * dev.civl.abc.front.c.parse.PP2CivlcTokenCConverter
17 * A private function named as `initCKeywordMap` shall identify
18 * and set tokens in proprocessed streams as its corresponding
19 * token-types.
20 */
21
22@header
23{
24package dev.civl.abc.front.c.preproc;
25}
26
27@members
28{
29@Override
30public void emitErrorMessage(String msg) { // don't try to recover!
31 throw new RuntimeException(msg);
32}
33}
34
35/****** White space ******/
36NEWLINE : '\r'? '\n' ;
37WS : ' ' | '\t' ;
38
39/* Words that are used in both C and the preprocessor */
40IF : 'if' ;
41ELSE : 'else' ;
42
43/* Words used in preprocessor but not in C */
44DEFINE : 'define' ;
45DEFINED : 'defined' ;
46ELIF : 'elif' ;
47ENDIF : 'endif' ;
48ERROR : 'error' ;
49IFDEF : 'ifdef' ;
50IFNDEF : 'ifndef' ;
51INCLUDE : 'include' ;
52LINE : 'line' ;
53PRAGMA : 'pragma' ;
54UNDEF : 'undef' ;
55
56/****** Punctuators: C11 Sec. 6.4.6 ******/
57ELLIPSIS : '...' ;
58DOTDOT : '..' ;
59DOT : '.' ;
60AMPERSAND : '&' ;
61AND : '&&' ;
62ARROW : '->' ;
63ASSIGN : '=' ;
64BITANDEQ : '&=' ;
65BITOR : '|' ;
66BITOREQ : '|=' ;
67BITXOR : '^' ;
68BITXOREQ : '^=' ;
69COLON : ':' ;
70COMMA : ',' ;
71DIV : '/' ;
72DIVEQ : '/=' ;
73EQUALS : '==' ;
74GT : '>' ;
75GTE : '>=' ;
76HASH : '#' | '%:' ;
77HASHHASH : '##' | '%:%:' ;
78LCURLY : '{' | '<%' ;
79LPAREN : '(' ;
80LSQUARE : '[' | '<:' ;
81LT : '<' ;
82LTE : '<=' ;
83MINUSMINUS : '--' ;
84MOD : '%' ;
85MODEQ : '%=' ;
86NEQ : '!=' ;
87NOT : '!' ;
88OR : '||' ;
89PLUS : '+' ;
90PLUSEQ : '+=' ;
91PLUSPLUS : '++' ;
92QMARK : '?' ;
93RCURLY : '}' | '%>' ;
94RPAREN : ')' ;
95RSQUARE : ']' | ':>' ;
96SEMI : ';' ;
97SHIFTLEFT : '<<' ;
98SHIFTLEFTEQ : '<<=' ;
99SHIFTRIGHT : '>>' ;
100SHIFTRIGHTEQ : '>>=' ;
101STAR : '*' ;
102STAREQ : '*=' ;
103SUB : '-' ;
104SUBEQ : '-=' ;
105TILDE : '~' ;
106
107/* CIVL-C and ACSL Punctuators */
108ANNOTATION_START : '/*@' ;
109ANNOTATION_END : '*/' ;
110AT : '@' ;
111EQUIV_ACSL : '<==>' ;
112IMPLIES : '=>' ;
113IMPLIES_ACSL : '==>' ;
114INLINE_ANNOTATION_START : '//@' ;
115// LSLIST and RSLIST enclose a scope list
116LSLIST : '<|' ;
117RSLIST : '|>' ;
118XOR_ACSL : '^^' ;
119
120/* CUDA Punctuators */
121LEXCON : '<<<' ;
122REXCON : '>>>' ;
123
124/****** Identifiers: C11 Sec. 6.4.2 ******/
125IDENTIFIER : IdentifierNonDigit
126 (IdentifierNonDigit | Digit)*
127 ;
128
129fragment
130IdentifierNonDigit
131 : NonDigit | UniversalCharacterName ;
132
133fragment
134Zero : '0' ;
135
136fragment
137Digit : Zero | NonZeroDigit ;
138
139fragment
140NonZeroDigit : '1' .. '9' ;
141
142fragment
143NonDigit : 'A'..'Z' | 'a'..'z' | '_' | '$';
144
145fragment
146UniversalCharacterName
147 : '\\' 'u' HexQuad
148 | '\\' 'U' HexQuad HexQuad
149 ;
150
151fragment
152HexQuad : HexadecimalDigit HexadecimalDigit HexadecimalDigit HexadecimalDigit ;
153
154fragment
155HexadecimalDigit
156 : '0'..'9' | 'a'..'f' | 'A'..'F' ;
157
158/****** Sec. 6.4.4.1: Integer constants ******/
159INTEGER_CONSTANT
160 : DecimalConstant IntegerSuffix?
161 | OctalConstant IntegerSuffix?
162 | HexadecimalConstant IntegerSuffix?
163 ;
164
165fragment
166DecimalConstant : NonZeroDigit Digit* ;
167
168
169fragment
170IntegerSuffix : UnsignedSuffix LongSuffix?
171 | UnsignedSuffix LongLongSuffix
172 | LongSuffix UnsignedSuffix?
173 | LongLongSuffix UnsignedSuffix?
174 ;
175
176fragment
177UnsignedSuffix : 'u' | 'U' ;
178
179fragment
180LongSuffix : 'l' | 'L' ;
181
182fragment
183LongLongSuffix : 'll' | 'LL' ;
184
185fragment
186OctalConstant : Zero OctalDigit* IntegerSuffix? ;
187
188fragment
189HexadecimalConstant
190 : HexPrefix HexadecimalDigit+ IntegerSuffix? ;
191
192fragment
193HexPrefix : Zero ('x' | 'X') ;
194
195/****** Sec. 6.4.4.2: Floating Constants ******/
196
197FLOATING_CONSTANT
198 : DecimalFloatingConstant
199 | HexadecimalFloatingConstant
200 ;
201
202fragment
203DecimalFloatingConstant
204 : FractionalConstant ExponentPart? FloatingSuffix?
205 | Digit+ ExponentPart FloatingSuffix?
206 ;
207
208fragment
209FractionalConstant
210 : Digit* DOT Digit+
211 | Digit+ DOT
212 ;
213
214fragment
215ExponentPart : ('e' | 'E') ('+' | '-')? Digit+ ;
216
217fragment
218FloatingSuffix : 'f' | 'l' | 'F' | 'L' ;
219
220fragment
221HexadecimalFloatingConstant
222 : HexPrefix HexFractionalConstant BinaryExponentPart
223 FloatingSuffix?
224 | HexPrefix HexadecimalDigit+ BinaryExponentPart
225 FloatingSuffix?
226 ;
227
228fragment
229HexFractionalConstant
230 : HexadecimalDigit* DOT HexadecimalDigit+
231 | HexadecimalDigit+ DOT
232 ;
233
234fragment
235BinaryExponentPart
236 : ('p' | 'P') ('+' | '-')? Digit+ ;
237
238
239/****** Preprocessing Numbers: C11 Sec 6.4.8 ******/
240
241/* PP_NUMBER should be anything that doesn't match the previous
242 * rules but does match this one.
243 */
244PP_NUMBER : '.'? Digit
245 ( '.'
246 | IdentifierNonDigit
247 | Digit
248 | ('e' | 'E' | 'p' | 'P') ('+' | '-')
249 )*
250 ;
251
252
253/****** Sec. 6.4.4.4: Character Constants ******/
254
255CHARACTER_CONSTANT
256 : ('L' | 'U' | 'u')? '\'' CChar+ '\'' ;
257
258fragment
259CChar : ~('\'' | '\\' | '\n') | EscapeSequence ;
260
261fragment
262EscapeSequence : '\\' ( '\'' | '"' | '\?' | '\\' |
263 'a' | 'b' | 'f' | 'n' |'r' | 't' | 'v'
264 )
265 | OctalEscape
266 | HexEscape
267 ;
268fragment
269OctalEscape : '\\' OctalDigit (OctalDigit OctalDigit?)? ;
270
271fragment
272OctalDigit : '0' .. '7';
273
274fragment
275HexEscape : '\\' 'x' HexadecimalDigit+ ;
276
277
278/****** 6.4.5: String Literals *****/
279
280
281STRING_LITERAL : ('u8' | 'u' | 'U' | 'L')? '"' SChar* '"'
282 ;
283
284fragment
285SChar : ~('"' | '\\' | '\n') | EscapeSequence ;
286
287
288
289/* ***** Comments: C11 Sec 6.4.9 ******/
290
291// the following is not quite perfect because in the case of the \n or \r
292// immediately following the // it counts that white space as part of the
293// comment, otherwise it doesn't. Would like to make the \n or \r NOT
294// part of the comment always, but how --- need to look ahead one character?
295
296fragment
297INLINE_COMMENT : '//'
298 ( (~('@' | '\n' | '\r') ( options {greedy=true;} : ~('\n'|'\r') )*)
299 | NEWLINE
300 | EOF
301 )
302 ;
303
304fragment
305BLOCK_COMMENT : '/*'
306 ( '*/' | ~('@') ( options {greedy=false;} : . )* '*/')
307 ;
308
309COMMENT : INLINE_COMMENT | BLOCK_COMMENT ;
310
311/* Special keywords starting with backslash reserved for extensions
312 * such as ACSL */
313EXTENDED_IDENTIFIER
314 :
315 '\\' IdentifierNonDigit (IdentifierNonDigit | Digit)*
316 ;
317
318/****** Other characters: C11 Sec. 6.4 ******/
319OTHER : . ;
Note: See TracBrowser for help on using the repository browser.