Context Navigation

PreprocessorParser.g

main

Last change on this file was aad342c, checked in by Stephen Siegel <siegel@…>, 3 years ago

Performing huge refactor to incorporate ABC, GMC, and SARL into CIVL repo and use Java modules.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5664 fb995dde-84ed-4084-dfe6-e5aef3e2452c

Property mode set to 100644

File size: 9.6 KB

Rev	Line
[aad342c]	1	parser grammar PreprocessorParser;
	2
	3	/* Author: Stephen F. Siegel, University of Delaware
	4	* Last modified: July 15, 2016
	5	*
	6	* Grammar for C preprocessor.
	7	* This grammar describes a C source file before preprocessing.
	8	* It does not execute any preprocessor directives.
	9	* It simply represents the file in a structured way.
	10	*
	11	* See the C11 Standard, Sec. 6.10.
	12	*
	13	* This grammar uses the PreprocessorLexer, which has already
	14	* formed the preprocessor tokens.
	15	*
	16	* Extensions from other languages (beyond C11) are included.
	17	*/
	18
	19	// TODO: use things like this:
	20	// bar : ID{$ID.setText("HELLO");$ID.setType(0);} WS INT -> ID INT;
	21
	22	options {
	23	tokenVocab=PreprocessorLexer;
	24	output=AST;
	25	}
	26
	27	/* "imaginary" tokens that will be used in the tree */
	28	tokens {
	29	FILE; // root node
	30	TEXT_BLOCK; // a list of tokens
	31	PARAMLIST; // x1,x2,x3
	32	EXPR; // an expression used in a conditional (#if)
	33	SEQUENCE; // true branch of conditional directive
	34	BODY; // body of macro definition
	35	PIF; // preprocessor if: #if
	36	PELSE; // preprocessor else: #else
	37	PPRAGMA; // preprocessor pragma: #pragma
	38	/* C, CIVL, ACSL, and CUDA keywords */
	39	AUTO;
	40	ASM;
	41	BREAK;
	42	CASE;
	43	CHAR;
	44	CONST;
	45	CONTINUE;
	46	DEFAULT;
	47	DO;
	48	DOUBLE;
	49	ENUM;
	50	EXTERN;
	51	FLOAT;
	52	FOR;
	53	GOTO;
	54	INLINE;
	55	INT;
	56	LONG;
	57	REGISTER;
	58	RESTRICT;
	59	RETURN;
	60	SHORT;
	61	SIGNED;
	62	SIZEOF;
	63	STATIC;
	64	STRUCT;
	65	SWITCH;
	66	TYPEDEF;
	67	UNION;
	68	UNSIGNED;
	69	VOID;
	70	VOLATILE;
	71	WHILE;
	72	ALIGNAS;
	73	ALIGNOF;
	74	ATOMIC;
	75	BOOL;
	76	COMPLEX;
	77	GENERIC;
	78	IMAGINARY;
	79	NORETURN;
	80	STATICASSERT;
	81	THREADLOCAL;
	82	/* */
	83	ABSTRACT;
	84	ASSIGNS;
	85	BIG_O;
	86	CALLS;
	87	CATCH;
	88	CHOOSE;
	89	CIVLATOMIC;
	90	CIVLFOR;
	91	COLLECTIVE; //dummy
	92	CONTIN;
	93	DEPENDS;
	94	DERIV;
	95	DIFFERENTIABLE;
	96	DOMAIN;
	97	ENSURES;
	98	EXISTS;
	99	FORALL;
	100	FATOMIC;
	101	GUARD;
	102	HERE;
	103	INPUT;
	104	INVARIANT;
	105	LAMBDA;
	106	MEM_TYPE;
	107	OUTPUT;
	108	ORIGINAL;
	109	PARFOR;
	110	PROCNULL;
	111	PURE;
	112	RANGE;
	113	REAL;
	114	REQUIRES;
	115	RESULT;
	116	RUN;
	117	SCOPEOF;
	118	SELF;
	119	STATE_F;
	120	STATE_NULL;
	121	READS;
	122	SPAWN;
	123	SYSTEM;
	124	UNIFORM;
	125	UPDATE;
	126	VALUE_AT;
	127	WHEN;
	128	WITH;
	129	/* */
	130	DEVICE;
	131	GLOBAL;
	132	SHARED;
	133	/* */
	134	TYPEOF;
	135	}
	136
	137	@header
	138	{
	139	package dev.civl.abc.front.c.preproc;
	140	}
	141
	142	@members{
	143	@Override
	144	public void emitErrorMessage(String msg) { // don't try to recover!
	145	throw new RuntimeException(msg);
	146	}
	147	}
	148
	149	/* An item is either a preprocessor directive
	150	* or a text block. For compound directives,
	151	* such as #ifdef ... #endif, all of the text
	152	* between the opening if and the closing #endif
	153	* is considered part of the directive.
	154	* A textblock is a maximal sequence of plain
	155	* text lines.
	156	*/
	157	file : whiteBlock? itemList EOF
	158	-> ^(FILE whiteBlock? itemList EOF)
	159	;
	160
	161	/*
	162	items : directiveBlock*
	163	(textBlock directiveBlock+)*
	164	textBlock?
	165	;
	166	*/
	167
	168	/* starts with non-ws token # or something not # and ends just before
	169	* non-ws token that does not start a directive block or text block. */
	170	itemList : directiveBlock itemList
	171	\| textBlock ( directiveBlock itemList \| )
	172	\|
	173	;
	174
	175	whiteBlock : white+ -> ^(TEXT_BLOCK white+)
	176	;
	177
	178	textBlock : textSegment+ -> ^(TEXT_BLOCK textSegment+)
	179	;
	180
	181	textSegment : NEWLINE white*
	182	\| ~(HASH\|WS\|COMMENT\|NEWLINE) (~NEWLINE)* NEWLINE white*
	183	;
	184
	185	directiveBlock : directive whiteBlock?
	186	;
	187
	188	directive : HASH! white!* directiveSuffix
	189	;
	190
	191	directiveSuffix : macrodef
	192	\| macroundef
	193	\| includeline
	194	\| pragmaline
	195	\| errorline
	196	\| lineline
	197	\| ifdefblock
	198	\| ifblock
	199	\| ifndefblock
	200	\| nondirective
	201	;
	202
	203	/* A nondirective is any line starting with # that
	204	* doesn't fall into one of the ordinary directive
	205	* forms. */
	206	nondirective : t+=not_directive t+=wpptoken* NEWLINE -> ^(HASH $t+)
	207	\| NEWLINE -> ^(HASH)
	208	;
	209
	210	/* A function-like or object-like macro definition. */
	211	macrodef : DEFINE white+ i=identifier
	212	( paramlist macrobody -> ^(DEFINE $i paramlist macrobody)
	213	\| NEWLINE -> ^(DEFINE $i ^(BODY))
	214	\| white macrobody -> ^(DEFINE $i macrobody)
	215	)
	216	;
	217
	218	macrobody : white*
	219	( t+=pptoken (t+=wpptoken* t+=pptoken)? white* NEWLINE
	220	-> ^(BODY $t+)
	221	\| NEWLINE -> ^(BODY)
	222	)
	223	;
	224
	225	paramlist : LPAREN white*
	226	( RPAREN -> ^(PARAMLIST)
	227	\| ELLIPSIS white* RPAREN -> ^(PARAMLIST ELLIPSIS)
	228	\| identifier (white* COMMA white* identifier)* white*
	229	( RPAREN -> ^(PARAMLIST identifier+)
	230	\| COMMA white* ELLIPSIS white* RPAREN
	231	-> ^(PARAMLIST identifier+ ELLIPSIS)
	232	)
	233	)
	234	;
	235
	236	macroundef : UNDEF white+ identifier white* NEWLINE
	237	-> ^(UNDEF identifier)
	238	;
	239
	240	includeline : INCLUDE white* t+=pptoken (t+=wpptoken* t+=pptoken)?
	241	white* NEWLINE
	242	-> ^(INCLUDE $t+)
	243	;
	244
	245	pragmaline : PRAGMA{$PRAGMA.setType(PPRAGMA);} wpptoken* NEWLINE ->
	246	^(PRAGMA wpptoken* NEWLINE)
	247	;
	248
	249	errorline : ERROR wpptoken* NEWLINE -> ^(ERROR wpptoken*)
	250	;
	251
	252	lineline : LINE wpptoken* NEWLINE -> ^(LINE wpptoken*)
	253	;
	254
	255	/* #ifdef X ... #elif ... #elif ... #else ... #endif.
	256	* Tree:
	257	* (IFDEF identifier ^(SEQUENCE item*)), or
	258	* (IFDEF identifier ^(SEQUENCE item*) elseblock)
	259	*/
	260	ifdefblock : IFDEF white* i=identifier white* NEWLINE
	261	t=if_section f=if_suffix
	262	-> ^(IFDEF $i ^(SEQUENCE $t?) $f?)
	263	;
	264
	265	/* Exactly like above, except with #ifndef instead of #ifdef */
	266	ifndefblock : IFNDEF white* i=identifier white* NEWLINE
	267	t=if_section f=if_suffix
	268	-> ^(IFNDEF $i ^(SEQUENCE $t?) $f?)
	269	;
	270
	271	/* #if expr ... #elif ... #elif ... #else ... #endif.
	272	* Very similar to #ifdef, but with an expression in place
	273	* of an identifier. */
	274	ifblock : IF{$IF.setType(PIF);}
	275	white* e=expr white* NEWLINE
	276	t=if_section f=if_suffix
	277	-> ^(IF $e ^(SEQUENCE $t?) $f?)
	278	;
	279
	280	/* A section of a conditional directive.
	281	* Begins just after the line containing
	282	* one of #ifdef, #ifndef, #if, #elif,
	283	* or #else.
	284	* Ends with the HASH white*
	285	* immediately preceding the first matching
	286	* endif, elif, or else.
	287	*/
	288	if_section : whiteBlock? section_body
	289	;
	290
	291	/* Begins with first non-white token on a line inside a
	292	* conditional section,
	293	* ends with the HASH white* immediately preceding the
	294	* endif, elif, or else closing that section.
	295	* Tree is just flat
	296	* list of TEXT_BLOCKs and directives.
	297	*/
	298	section_body : textBlock? subsection
	299	;
	300
	301	/* Begins with a # at beginning of a line (after possible
	302	* white space) inside a conditional directive body.
	303	* Ends with the HASH white* immediately preceding
	304	* the closing endif, elif, or else. Tree is just
	305	* flat list of TEXT_BLOCKs and directives.
	306	*/
	307	subsection : HASH! white!*
	308	( directiveSuffix whiteBlock? section_body)?
	309	;
	310
	311	/* Begins with endif, elif, or else. Ends with NEWLINE after
	312	* closing #endif.
	313	* Tree: one of
	314	* 1. empty
	315	* 2. (ELIF (ELIF expr (SEQUENCE items) elseblock?))
	316	* 3. (ELSE items)
	317	* respectively. The reason for #2 is to make the tree
	318	* for a #elif... look the same as what would be obtained from
	319	* #else #if .... The first ELIF
	320	* should be interpreted as ELSE and the second as IF.
	321	*/
	322	if_suffix : ENDIF white* NEWLINE
	323	->
	324	\| c=ELIF white* expr white* NEWLINE if_section if_suffix
	325	-> ^($c ^($c expr ^(SEQUENCE if_section?) if_suffix?))
	326	\| ELSE{$ELSE.setType(PELSE);}
	327	white* NEWLINE if_section ENDIF white* NEWLINE
	328	-> ^(ELSE if_section?)
	329	;
	330
	331	/* A space, tab, or comment */
	332	white : WS \| COMMENT ;
	333
	334	/* A preprocessor token or white space token (but not NEWLINE). */
	335	wpptoken : pptoken \| white ;
	336
	337	/* An expression that can be used with #if or #elif.
	338	* This grammar will accept just about anything here. */
	339	expr : ppdExpr (white* ppdExpr)* -> ^(EXPR ppdExpr+) ;
	340
	341	definedExpr : DEFINED white!*
	342	( identifier
	343	\| LPAREN! white!* identifier white!* RPAREN!
	344	)
	345	;
	346
	347	/* A preprocessor token or defined expressions. These are the
	348	* things that can occur in an #if or #elif directive: */
	349	ppdExpr : (DEFINED)=> definedExpr
	350	\| pptoken
	351	;
	352
	353	/* A "preprocessor token" as defined in the C11 Standard.
	354	* This rule includes all of the extensions from the other
	355	* languages too. We got rid of header names because
	356	* those are composed of smaller tokens in our lexer. */
	357	pptoken : identifier
	358	\| pp_number
	359	\| CHARACTER_CONSTANT
	360	\| STRING_LITERAL
	361	\| punctuator
	362	\| OTHER
	363	;
	364
	365	/* Any token that is not a preprocessor keyword */
	366	not_directive : pp_number
	367	\| CHARACTER_CONSTANT
	368	\| STRING_LITERAL
	369	\| punctuator
	370	\| OTHER
	371	\| IDENTIFIER
	372	\| EXTENDED_IDENTIFIER
	373	;
	374
	375	/* An "identifier" for the preprocessor is an IDENTIFIER
	376	* or any of the reserved words from any of the languages
	377	*/
	378	identifier : IDENTIFIER
	379	\| EXTENDED_IDENTIFIER
	380	\| pp_keyword
	381	;
	382
	383	/* C and preprocessor keywords: */
	384
	385
	386	/* Words that are used in both C and the preprocessor */
	387	c_pp_keyword : IF
	388	\| ELSE
	389	;
	390
	391	/* Words used in preprocessor but not in C */
	392	pp_notc_keyword : DEFINE
	393	\| DEFINED
	394	\| ELIF
	395	\| ENDIF
	396	\| ERROR
	397	\| IFDEF
	398	\| IFNDEF
	399	\| INCLUDE
	400	\| LINE
	401	\| PRAGMA
	402	\| UNDEF
	403	;
	404
	405	/* Words used in preprocessor */
	406	pp_keyword : pp_notc_keyword \| c_pp_keyword
	407	;
	408
	409	/* a "pp_number" is any PP_NUMBER, INTEGER_CONSTANT, or FLOATING_CONSTANT */
	410	pp_number : INTEGER_CONSTANT
	411	\| FLOATING_CONSTANT
	412	\| PP_NUMBER
	413	;
	414
	415	/* The punctuators are the symbols which are not words.
	416	* These are punctuators from all languages: */
	417	punctuator : c_punctuator
	418	\| civl_punctuator
	419	\| cuda_punctuator
	420	;
	421
	422	/* C punctuators: */
	423	c_punctuator : AMPERSAND
	424	\| AND
	425	\| ARROW
	426	\| ASSIGN
	427	\| BITANDEQ
	428	\| BITOR
	429	\| BITOREQ
	430	\| BITXOR
	431	\| BITXOREQ
	432	\| COLON
	433	\| COMMA
	434	\| DIV
	435	\| DIVEQ
	436	\| ELLIPSIS
	437	\| DOTDOT
	438	\| DOT
	439	\| EQUALS
	440	\| GT
	441	\| GTE
	442	\| HASH
	443	\| HASHHASH
	444	\| LCURLY
	445	\| LPAREN
	446	\| LSQUARE
	447	\| LT
	448	\| LTE
	449	\| MINUSMINUS
	450	\| MOD
	451	\| MODEQ
	452	\| NEQ
	453	\| NOT
	454	\| OR
	455	\| PLUS
	456	\| PLUSEQ
	457	\| PLUSPLUS
	458	\| QMARK
	459	\| RCURLY
	460	\| RPAREN
	461	\| RSQUARE
	462	\| SEMI
	463	\| SHIFTLEFT
	464	\| SHIFTLEFTEQ
	465	\| SHIFTRIGHT
	466	\| SHIFTRIGHTEQ
	467	\| STAR
	468	\| STAREQ
	469	\| SUB
	470	\| SUBEQ
	471	\| TILDE
	472	;
	473
	474	civl_punctuator : ANNOTATION_END
	475	\| ANNOTATION_START
	476	\| AT
	477	\| EQUIV_ACSL
	478	\| IMPLIES
	479	\| IMPLIES_ACSL
	480	\| INLINE_ANNOTATION_START
	481	\| LSLIST
	482	\| RSLIST
	483	\| XOR_ACSL
	484	;
	485
	486	cuda_punctuator : LEXCON
	487	\| REXCON
	488	;

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format