source: CIVL/mods/dev.civl.abc/grammar/c/PreprocessorParser.g

main
Last change on this file was aad342c, checked in by Stephen Siegel <siegel@…>, 3 years ago

Performing huge refactor to incorporate ABC, GMC, and SARL into CIVL repo and use Java modules.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5664 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 9.6 KB
RevLine 
[aad342c]1parser grammar PreprocessorParser;
2
3/* Author: Stephen F. Siegel, University of Delaware
4 * Last modified: July 15, 2016
5 *
6 * Grammar for C preprocessor.
7 * This grammar describes a C source file before preprocessing.
8 * It does not execute any preprocessor directives.
9 * It simply represents the file in a structured way.
10 *
11 * See the C11 Standard, Sec. 6.10.
12 *
13 * This grammar uses the PreprocessorLexer, which has already
14 * formed the preprocessor tokens.
15 *
16 * Extensions from other languages (beyond C11) are included.
17 */
18
19// TODO: use things like this:
20// bar : ID{$ID.setText("HELLO");$ID.setType(0);} WS INT -> ID INT;
21
22options {
23 tokenVocab=PreprocessorLexer;
24 output=AST;
25}
26
27/* "imaginary" tokens that will be used in the tree */
28tokens {
29 FILE; // root node
30 TEXT_BLOCK; // a list of tokens
31 PARAMLIST; // x1,x2,x3
32 EXPR; // an expression used in a conditional (#if)
33 SEQUENCE; // true branch of conditional directive
34 BODY; // body of macro definition
35 PIF; // preprocessor if: #if
36 PELSE; // preprocessor else: #else
37 PPRAGMA; // preprocessor pragma: #pragma
38 /* C, CIVL, ACSL, and CUDA keywords */
39 AUTO;
40 ASM;
41 BREAK;
42 CASE;
43 CHAR;
44 CONST;
45 CONTINUE;
46 DEFAULT;
47 DO;
48 DOUBLE;
49 ENUM;
50 EXTERN;
51 FLOAT;
52 FOR;
53 GOTO;
54 INLINE;
55 INT;
56 LONG;
57 REGISTER;
58 RESTRICT;
59 RETURN;
60 SHORT;
61 SIGNED;
62 SIZEOF;
63 STATIC;
64 STRUCT;
65 SWITCH;
66 TYPEDEF;
67 UNION;
68 UNSIGNED;
69 VOID;
70 VOLATILE;
71 WHILE;
72 ALIGNAS;
73 ALIGNOF;
74 ATOMIC;
75 BOOL;
76 COMPLEX;
77 GENERIC;
78 IMAGINARY;
79 NORETURN;
80 STATICASSERT;
81 THREADLOCAL;
82 /* */
83 ABSTRACT;
84 ASSIGNS;
85 BIG_O;
86 CALLS;
87 CATCH;
88 CHOOSE;
89 CIVLATOMIC;
90 CIVLFOR;
91 COLLECTIVE; //dummy
92 CONTIN;
93 DEPENDS;
94 DERIV;
95 DIFFERENTIABLE;
96 DOMAIN;
97 ENSURES;
98 EXISTS;
99 FORALL;
100 FATOMIC;
101 GUARD;
102 HERE;
103 INPUT;
104 INVARIANT;
105 LAMBDA;
106 MEM_TYPE;
107 OUTPUT;
108 ORIGINAL;
109 PARFOR;
110 PROCNULL;
111 PURE;
112 RANGE;
113 REAL;
114 REQUIRES;
115 RESULT;
116 RUN;
117 SCOPEOF;
118 SELF;
119 STATE_F;
120 STATE_NULL;
121 READS;
122 SPAWN;
123 SYSTEM;
124 UNIFORM;
125 UPDATE;
126 VALUE_AT;
127 WHEN;
128 WITH;
129 /* */
130 DEVICE;
131 GLOBAL;
132 SHARED;
133 /* */
134 TYPEOF;
135}
136
137@header
138{
139package dev.civl.abc.front.c.preproc;
140}
141
142@members{
143@Override
144public void emitErrorMessage(String msg) { // don't try to recover!
145 throw new RuntimeException(msg);
146}
147}
148
149/* An item is either a preprocessor directive
150 * or a text block. For compound directives,
151 * such as #ifdef ... #endif, all of the text
152 * between the opening if and the closing #endif
153 * is considered part of the directive.
154 * A textblock is a maximal sequence of plain
155 * text lines.
156 */
157file : whiteBlock? itemList EOF
158 -> ^(FILE whiteBlock? itemList EOF)
159 ;
160
161/*
162items : directiveBlock*
163 (textBlock directiveBlock+)*
164 textBlock?
165 ;
166*/
167
168/* starts with non-ws token # or something not # and ends just before
169 * non-ws token that does not start a directive block or text block. */
170itemList : directiveBlock itemList
171 | textBlock ( directiveBlock itemList | )
172 |
173 ;
174
175whiteBlock : white+ -> ^(TEXT_BLOCK white+)
176 ;
177
178textBlock : textSegment+ -> ^(TEXT_BLOCK textSegment+)
179 ;
180
181textSegment : NEWLINE white*
182 | ~(HASH|WS|COMMENT|NEWLINE) (~NEWLINE)* NEWLINE white*
183 ;
184
185directiveBlock : directive whiteBlock?
186 ;
187
188directive : HASH! white!* directiveSuffix
189 ;
190
191directiveSuffix : macrodef
192 | macroundef
193 | includeline
194 | pragmaline
195 | errorline
196 | lineline
197 | ifdefblock
198 | ifblock
199 | ifndefblock
200 | nondirective
201 ;
202
203/* A nondirective is any line starting with # that
204 * doesn't fall into one of the ordinary directive
205 * forms. */
206nondirective : t+=not_directive t+=wpptoken* NEWLINE -> ^(HASH $t+)
207 | NEWLINE -> ^(HASH)
208 ;
209
210/* A function-like or object-like macro definition. */
211macrodef : DEFINE white+ i=identifier
212 ( paramlist macrobody -> ^(DEFINE $i paramlist macrobody)
213 | NEWLINE -> ^(DEFINE $i ^(BODY))
214 | white macrobody -> ^(DEFINE $i macrobody)
215 )
216 ;
217
218macrobody : white*
219 ( t+=pptoken (t+=wpptoken* t+=pptoken)? white* NEWLINE
220 -> ^(BODY $t+)
221 | NEWLINE -> ^(BODY)
222 )
223 ;
224
225paramlist : LPAREN white*
226 ( RPAREN -> ^(PARAMLIST)
227 | ELLIPSIS white* RPAREN -> ^(PARAMLIST ELLIPSIS)
228 | identifier (white* COMMA white* identifier)* white*
229 ( RPAREN -> ^(PARAMLIST identifier+)
230 | COMMA white* ELLIPSIS white* RPAREN
231 -> ^(PARAMLIST identifier+ ELLIPSIS)
232 )
233 )
234 ;
235
236macroundef : UNDEF white+ identifier white* NEWLINE
237 -> ^(UNDEF identifier)
238 ;
239
240includeline : INCLUDE white* t+=pptoken (t+=wpptoken* t+=pptoken)?
241 white* NEWLINE
242 -> ^(INCLUDE $t+)
243 ;
244
245pragmaline : PRAGMA{$PRAGMA.setType(PPRAGMA);} wpptoken* NEWLINE ->
246 ^(PRAGMA wpptoken* NEWLINE)
247 ;
248
249errorline : ERROR wpptoken* NEWLINE -> ^(ERROR wpptoken*)
250 ;
251
252lineline : LINE wpptoken* NEWLINE -> ^(LINE wpptoken*)
253 ;
254
255/* #ifdef X ... #elif ... #elif ... #else ... #endif.
256 * Tree:
257 * (IFDEF identifier ^(SEQUENCE item*)), or
258 * (IFDEF identifier ^(SEQUENCE item*) elseblock)
259 */
260ifdefblock : IFDEF white* i=identifier white* NEWLINE
261 t=if_section f=if_suffix
262 -> ^(IFDEF $i ^(SEQUENCE $t?) $f?)
263 ;
264
265/* Exactly like above, except with #ifndef instead of #ifdef */
266ifndefblock : IFNDEF white* i=identifier white* NEWLINE
267 t=if_section f=if_suffix
268 -> ^(IFNDEF $i ^(SEQUENCE $t?) $f?)
269 ;
270
271/* #if expr ... #elif ... #elif ... #else ... #endif.
272 * Very similar to #ifdef, but with an expression in place
273 * of an identifier. */
274ifblock : IF{$IF.setType(PIF);}
275 white* e=expr white* NEWLINE
276 t=if_section f=if_suffix
277 -> ^(IF $e ^(SEQUENCE $t?) $f?)
278 ;
279
280/* A section of a conditional directive.
281 * Begins just after the line containing
282 * one of #ifdef, #ifndef, #if, #elif,
283 * or #else.
284 * Ends with the HASH white*
285 * immediately preceding the first matching
286 * endif, elif, or else.
287 */
288if_section : whiteBlock? section_body
289 ;
290
291/* Begins with first non-white token on a line inside a
292 * conditional section,
293 * ends with the HASH white* immediately preceding the
294 * endif, elif, or else closing that section.
295 * Tree is just flat
296 * list of TEXT_BLOCKs and directives.
297 */
298section_body : textBlock? subsection
299 ;
300
301/* Begins with a # at beginning of a line (after possible
302 * white space) inside a conditional directive body.
303 * Ends with the HASH white* immediately preceding
304 * the closing endif, elif, or else. Tree is just
305 * flat list of TEXT_BLOCKs and directives.
306 */
307subsection : HASH! white!*
308 ( directiveSuffix whiteBlock? section_body)?
309 ;
310
311/* Begins with endif, elif, or else. Ends with NEWLINE after
312 * closing #endif.
313 * Tree: one of
314 * 1. empty
315 * 2. (ELIF (ELIF expr (SEQUENCE items) elseblock?))
316 * 3. (ELSE items)
317 * respectively. The reason for #2 is to make the tree
318 * for a #elif... look the same as what would be obtained from
319 * #else #if .... The first ELIF
320 * should be interpreted as ELSE and the second as IF.
321 */
322if_suffix : ENDIF white* NEWLINE
323 ->
324 | c=ELIF white* expr white* NEWLINE if_section if_suffix
325 -> ^($c ^($c expr ^(SEQUENCE if_section?) if_suffix?))
326 | ELSE{$ELSE.setType(PELSE);}
327 white* NEWLINE if_section ENDIF white* NEWLINE
328 -> ^(ELSE if_section?)
329 ;
330
331/* A space, tab, or comment */
332white : WS | COMMENT ;
333
334/* A preprocessor token or white space token (but not NEWLINE). */
335wpptoken : pptoken | white ;
336
337/* An expression that can be used with #if or #elif.
338 * This grammar will accept just about anything here. */
339expr : ppdExpr (white* ppdExpr)* -> ^(EXPR ppdExpr+) ;
340
341definedExpr : DEFINED white!*
342 ( identifier
343 | LPAREN! white!* identifier white!* RPAREN!
344 )
345 ;
346
347/* A preprocessor token or defined expressions. These are the
348 * things that can occur in an #if or #elif directive: */
349ppdExpr : (DEFINED)=> definedExpr
350 | pptoken
351 ;
352
353/* A "preprocessor token" as defined in the C11 Standard.
354 * This rule includes all of the extensions from the other
355 * languages too. We got rid of header names because
356 * those are composed of smaller tokens in our lexer. */
357pptoken : identifier
358 | pp_number
359 | CHARACTER_CONSTANT
360 | STRING_LITERAL
361 | punctuator
362 | OTHER
363 ;
364
365/* Any token that is not a preprocessor keyword */
366not_directive : pp_number
367 | CHARACTER_CONSTANT
368 | STRING_LITERAL
369 | punctuator
370 | OTHER
371 | IDENTIFIER
372 | EXTENDED_IDENTIFIER
373 ;
374
375/* An "identifier" for the preprocessor is an IDENTIFIER
376 * or any of the reserved words from any of the languages
377 */
378identifier : IDENTIFIER
379 | EXTENDED_IDENTIFIER
380 | pp_keyword
381 ;
382
383/* C and preprocessor keywords: */
384
385
386/* Words that are used in both C and the preprocessor */
387c_pp_keyword : IF
388 | ELSE
389 ;
390
391/* Words used in preprocessor but not in C */
392pp_notc_keyword : DEFINE
393 | DEFINED
394 | ELIF
395 | ENDIF
396 | ERROR
397 | IFDEF
398 | IFNDEF
399 | INCLUDE
400 | LINE
401 | PRAGMA
402 | UNDEF
403 ;
404
405/* Words used in preprocessor */
406pp_keyword : pp_notc_keyword | c_pp_keyword
407 ;
408
409/* a "pp_number" is any PP_NUMBER, INTEGER_CONSTANT, or FLOATING_CONSTANT */
410pp_number : INTEGER_CONSTANT
411 | FLOATING_CONSTANT
412 | PP_NUMBER
413 ;
414
415/* The punctuators are the symbols which are not words.
416 * These are punctuators from all languages: */
417punctuator : c_punctuator
418 | civl_punctuator
419 | cuda_punctuator
420 ;
421
422/* C punctuators: */
423c_punctuator : AMPERSAND
424 | AND
425 | ARROW
426 | ASSIGN
427 | BITANDEQ
428 | BITOR
429 | BITOREQ
430 | BITXOR
431 | BITXOREQ
432 | COLON
433 | COMMA
434 | DIV
435 | DIVEQ
436 | ELLIPSIS
437 | DOTDOT
438 | DOT
439 | EQUALS
440 | GT
441 | GTE
442 | HASH
443 | HASHHASH
444 | LCURLY
445 | LPAREN
446 | LSQUARE
447 | LT
448 | LTE
449 | MINUSMINUS
450 | MOD
451 | MODEQ
452 | NEQ
453 | NOT
454 | OR
455 | PLUS
456 | PLUSEQ
457 | PLUSPLUS
458 | QMARK
459 | RCURLY
460 | RPAREN
461 | RSQUARE
462 | SEMI
463 | SHIFTLEFT
464 | SHIFTLEFTEQ
465 | SHIFTRIGHT
466 | SHIFTRIGHTEQ
467 | STAR
468 | STAREQ
469 | SUB
470 | SUBEQ
471 | TILDE
472 ;
473
474civl_punctuator : ANNOTATION_END
475 | ANNOTATION_START
476 | AT
477 | EQUIV_ACSL
478 | IMPLIES
479 | IMPLIES_ACSL
480 | INLINE_ANNOTATION_START
481 | LSLIST
482 | RSLIST
483 | XOR_ACSL
484 ;
485
486cuda_punctuator : LEXCON
487 | REXCON
488 ;
Note: See TracBrowser for help on using the repository browser.