PreprocessorTokenSource.java
package edu.udel.cis.vsl.abc.front.c.preproc;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.Token;
import org.antlr.runtime.TokenSource;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.Tree;
import edu.udel.cis.vsl.abc.front.IF.IllegalMacroArgumentException;
import edu.udel.cis.vsl.abc.front.IF.Preprocessor;
import edu.udel.cis.vsl.abc.front.IF.PreprocessorException;
import edu.udel.cis.vsl.abc.front.IF.PreprocessorRuntimeException;
import edu.udel.cis.vsl.abc.front.c.preproc.PreprocessorParser.file_return;
import edu.udel.cis.vsl.abc.front.common.preproc.CTokenIterator;
import edu.udel.cis.vsl.abc.token.IF.CivlcToken;
import edu.udel.cis.vsl.abc.token.IF.CivlcToken.TokenVocabulary;
import edu.udel.cis.vsl.abc.token.IF.CivlcTokenSource;
import edu.udel.cis.vsl.abc.token.IF.FileIndexer;
import edu.udel.cis.vsl.abc.token.IF.Formation;
import edu.udel.cis.vsl.abc.token.IF.FunctionMacro;
import edu.udel.cis.vsl.abc.token.IF.Macro;
import edu.udel.cis.vsl.abc.token.IF.ObjectMacro;
import edu.udel.cis.vsl.abc.token.IF.SourceFile;
import edu.udel.cis.vsl.abc.token.IF.StringToken;
import edu.udel.cis.vsl.abc.token.IF.SyntaxException;
import edu.udel.cis.vsl.abc.token.IF.TokenFactory;
import edu.udel.cis.vsl.abc.token.IF.TokenUtils;
import edu.udel.cis.vsl.abc.util.IF.Pair;
import edu.udel.cis.vsl.abc.util.IF.StringPredicate;
/**
* <p>
* A {@link PreprocessorTokenSource} is created by scanning a sequence of
* character streams and executing the preprocessor directives in those streams
* to produce a stream of (output) tokens. The directives may include
* <code>#include</code> directives, which cause additional files to be opened
* and scanned.
* </p>
*
* <p>
* The tokens produced are instances of {@link CivlcToken}.
* </p>
*
* @author Stephen F. Siegel
*/
public class PreprocessorTokenSource implements CivlcTokenSource {
// Fields...
/** The list of character streams to be parsed and preprocessed */
private CharStream[] theStreams;
/**
* The list of formations corresponding to {@link #theStreams}. The
* {@link Formation} corresponding to a {@link CharStream} provides information
* on where the stream came from.
*/
private Formation[] theFormations;
/**
* Object to track of all source files encountered by this preprocessing
* instance.
*/
private FileIndexer indexer;
/**
* The index of the current stream being processed. Runs from 0 to
* <code>theStreams.length - 1</code>.
*/
private int currentSource = 0;
/**
* The source files used to build this token stream only. These do not
* necessarily include all of the source files seen by the preprocessor creating
* this token source, because the preprocessor can be re-used multiple times to
* create many token sources.
*/
private Set<SourceFile> sourceFiles = new LinkedHashSet<>();
/**
* Factory used to produce new {@link CivlcToken}s.
*/
private TokenFactory tokenFactory;
/**
* Stack of included source file information objects. When a #include directive
* is executed, an element is pushed on to this stack. When the EOF of a file is
* reached, the stack is popped. The stack is initialized with the initial given
* source file.
*/
private Stack<PreprocessorSourceFileInfo> sourceStack = new Stack<PreprocessorSourceFileInfo>();
/**
* The directories which should be searched for files that are included using
*
* <pre>
* #include <filename>
* </pre>
*
* syntax.
*/
private File[] systemIncludePaths;
/**
* The directories which should be searched for files that are included using
*
* <pre>
* #include "filename"
* </pre>
*
* syntax. If the file is not found in one of these directories, the system
* paths are then searched.
*/
private File[] userIncludePaths;
/**
* Should we save all output tokens in some kind of array?
*/
private boolean saveTokens = true;
/**
* The output tokens, if we are saving them, else null.
*/
private ArrayList<CivlcToken> theTokens = null;
/**
* Adjacent string literal tokens will be accumulated in a buffer before being
* added to the output stream because adjacent string literal must be
* concatenated to form one token.
*/
private LinkedList<CivlcToken> stringLiteralBuffer = new LinkedList<CivlcToken>();
/**
* The last token, which will be the EOF token, once it is reached.
*/
private CivlcToken finalToken = null;
/**
* The expression analyzer is used to analyze and evaluate integer expressions
* in preprocessor conditionals.
*/
private PreprocessorExpressionAnalyzer expressionAnalyzer;
/**
* A mapping of macro names to the Macro object. An entry is created and added
* to this map whenever a '#define' directive is processed. An entry is removed
* by a '#undef'
*/
Map<String, Macro> macroMap;
/** Is the current node inside a text block ? */
private boolean inTextBlock = false;
/**
* <p>
* A stack of "ACSL" pragmas (under the CIVL command-universe). An "ACSL" pragma
* denotes that all the annotations, in the same source file and coming after
* this pragma, will be parsed and be a part of the final AST if they are
* written in ACSL syntax.
* </p>
*
* <p>
* This stack is used to keep track of when to parse ACSL annotations: A
* {@link #pushStream(CharStream, Formation)} will cause a push of a default
* PARSE_PRAGMA value (false); A "EOF" will cause a pop on the stack;
* Encountering a "#pragma CIVL ACSL" changes the value on the top frame to
* true.
* </p>
*/
private Stack<Boolean> parseACSLPragmaStack = new Stack<>();
/**
* Is the current node inside a pragma line? We need to know this because then
* closing NEWLINE that terminates the pragma must be made VISIBLE (usually all
* white space is rendered invisible).
*/
private boolean inPragma = false;
/**
* Is the current node inside an inline annotation, i.e., one that starts with
* "//@"? We need to know this for two reasons: (1) the NEWLINE that terminates
* the annotation must be made visible, (2) inside annotations, "@" tokens are
* rendered invisible.
*/
private boolean inInlineAnnotation = false;
/**
* Is the current node inside a block annotation, i.e., one delimited by "/*@"
* and "*" "/"? We need to know this because inside annotations, "@" tokens are
* rendered invisible.
*/
private boolean inBlockAnnotation = false;
/**
* First and last elements of the output buffer, which forms a linked list. This
* is where tokens stay temporarily until they are removed by an invocation of
* nextToken().
*/
private CivlcToken firstOutput, lastOutput;
/**
* The number of tokens that have been output.
*/
private int outputTokenCount = 0;
// Constructors...
/**
*
* Instantiates new CTokenSource object. The given source file is parsed, a file
* info object is created for it and pushed onto the stack. The output tokens
* are not generated; this does not begin until "getToken" is called.
*
* @param config the ABC configuration
* @param FileIndexer indexer the file indexer that will be used to index
* all source files encountered by this preprocessing
* session
* @param streams the input character streams that will be
* concatenated to form a single stream that will be
* the input to the preprocessing algorithm
* @param formations an array specifying the formation history of each
* input stream, basically used to give a name to that
* stream
* @param systemIncludePaths the directories where files included with angle
* brackets are searched
* @param userIncludePaths the directories where files included with double
* quotes are searched
* @param macroMap the predefined macros; maps a macro name to its
* definition body
* @param tokenFactory the token factory to be used for producing new
* {@link CivlcToken}s
* @throws PreprocessorException if and IOException or RecognitionException
* occurs while scanning and parsing the source
* file
*/
public PreprocessorTokenSource(FileIndexer indexer, CharStream[] streams, Formation[] formations,
File[] systemIncludePaths, File[] userIncludePaths, Map<String, Macro> macroMap, TokenFactory tokenFactory)
throws PreprocessorException {
int numStreams = streams.length;
assert systemIncludePaths != null;
assert userIncludePaths != null;
this.indexer = indexer;
this.tokenFactory = tokenFactory;
assert numStreams == formations.length;
this.theStreams = streams;
this.theFormations = formations;
this.systemIncludePaths = systemIncludePaths;
this.userIncludePaths = userIncludePaths;
this.expressionAnalyzer = new PreprocessorExpressionAnalyzer(new MacroDefinedPredicate(macroMap));
this.macroMap = macroMap;
if (saveTokens)
theTokens = new ArrayList<CivlcToken>();
for (Formation formation : formations) {
SourceFile sourceFile = formation.getLastFile();
sourceFiles.add(sourceFile);
indexer.getOrAdd(sourceFile.getFile());
}
pushStream(streams[0], formations[0]);
}
// Helpers...
/**
* Searches for an internal file, i.e., one which is stored in a directory in
* the ABC class path. Returns a new character stream obtained by reading the
* file, or <code>null</code> if the file cannot be found.
*
* @param path the path leading to the file; this path is relative to the
* class path and starts with "/", e.g., "/include/abc".
* @param filename the name of the file proper, e.g., "stdlib.h"
* @return a new character stream obtained from the file or <code>null</code>
*/
private Pair<File, CharStream> findInternalSystemFile(File path, String filename) {
File file = new File(path, filename);
String resource = file.getAbsolutePath();
try {
CharStream stream = PreprocessorUtils.newFilteredCharStreamFromResource(resource, resource);
if (stream != null)
return new Pair<File, CharStream>(file, stream);
} catch (IOException e) {
}
return null;
}
/**
* Finds an internal system file in the default ABC include path,
* {@link Preprocessor#ABC_INCLUDE_PATH}.
*
* @param filename name of file, e.g., "stdlib.h"
* @return new character stream obtained from the file or <code>null</code> if
* no such file is found in the default include paths
*/
private Pair<File, CharStream> findInternalSystemFile(String filename) {
for (File systemPath : systemIncludePaths) {
Pair<File, CharStream> result = findInternalSystemFile(systemPath, filename);
if (result != null)
return result;
}
// look in directory "/include/abc" in the class path:
return findInternalSystemFile(Preprocessor.ABC_INCLUDE_PATH, filename);
}
/**
* Pushes a new character stream onto the {@link #sourceStack}.
*
* @param charStream the new character stream to push
* @param formation a formation which specifies the origin of the character
* stream
* @throws PreprocessorException if something goes wrong parsing the character
* stream
*/
private void pushStream(CharStream charStream, Formation formation) throws PreprocessorException {
String name = formation.getLastFile().getName();
try {
PreprocessorLexer lexer = new PreprocessorLexer(charStream);
PreprocessorParser parser = new PreprocessorParser(new CommonTokenStream(lexer));
file_return fileReturn = parser.file();
int numErrors = parser.getNumberOfSyntaxErrors();
if (numErrors != 0)
throw new PreprocessorException(
numErrors + " syntax errors occurred while scanning included file " + name);
Tree tree = (Tree) fileReturn.getTree();
sourceStack.push(new PreprocessorSourceFileInfo(formation, tree));
parseACSLPragmaStack.push(false);
incrementNextNode(); // skip root "FILE" node
} catch (RecognitionException e) {
throw new PreprocessorException("Preprocessing " + formation.toString() + " failed: " + e);
} catch (RuntimeException e) {
throw new PreprocessorException(e.getMessage());
}
}
/**
* Returns current file being processed.
*
* @return current file or <code>null</code> if the source stack is currently
* empty
*/
private SourceFile getCurrentSource() {
if (sourceStack.isEmpty())
return theFormations[currentSource].getLastFile();
else
return sourceStack.peek().getFile();
}
/**
* Process the next node in the CPP AST. This may actually involve processing
* more nodes, but it will at least involve processing one node and incrementing
* the current position by at least one token.
*
* @throws PreprocessorException
*/
private void processNextNode() throws PreprocessorException {
CommonTree node = (CommonTree) getNextInputNode();
if (inTextBlock) {
// If you are in a TEXT_BLOCK, you don't have to check for any
// directives.
processText(node);
} else {
Token token = node.getToken();
int type = token.getType();
switch (type) {
case PreprocessorParser.EOF:
processEOF(node);
break;
case PreprocessorParser.TEXT_BLOCK:
processTextBlock(node);
break;
case PreprocessorParser.DEFINE:
processMacroDefinition(node);
break;
case PreprocessorParser.ERROR:
processError(node);
break;
case PreprocessorParser.ELIF:
case PreprocessorParser.PIF:
processIf(node);
break;
case PreprocessorParser.IFDEF:
processIfdef(node);
break;
case PreprocessorParser.IFNDEF:
processIfndef(node);
break;
case PreprocessorParser.INCLUDE:
processInclude(node);
break;
case PreprocessorParser.PPRAGMA:
processPragma(node);
break;
case PreprocessorParser.UNDEF:
processUndef(node);
break;
case PreprocessorParser.HASH:
processNondirective(node);
break;
case PreprocessorParser.LINE:
processLine(node);
break;
default:
processText(node);
}
}
}
// processText...
/**
* <p>
* Processes a text node, i.e., a node for a token which is not a preprocessor
* directive and does not have any special meaning to the preprocessor, though
* it may be a macro invocation. This node may or may not be in a text block.
* </p>
*
* <p>
* Precondition: This method should be called only from the outermost scope of
* expansion, i.e., we are not currently in a macro expansion.
* </p>
*
* <p>
* Postcondition: if the node is not an identifier for a macro, a
* {@link CivlcToken} for it is created and added to the output buffer. If it is
* a macro, macro expansion takes place. For a function macro, this involves
* consuming more input tokens (the tokens comprising the left parenthesis, the
* arguments, and the final right parenthesis). The current position in the
* input AST is moved to the point just after that last token. The tokens
* resulting from the expansion are added to the output buffer.
* </p>
*
* @param textNode the text node to process
* @throws PreprocessorException if something goes wrong with macro expansion,
* or if a ppnumber token does not have the form
* of a CIVL-C range expression (a..b)
*/
private void processText(Tree textNode) throws PreprocessorException {
Token token = ((CommonTree) textNode).getToken();
if (PreprocessorUtils.isIdentifier(token)) {
processIdentifier(textNode);
} else {
// Wether parse ACSL annotation or not:
boolean parseACSL = parseACSLPragmaStack.peek();
switch (token.getType()) {
case PreprocessorLexer.INLINE_ANNOTATION_START:
assert (inTextBlock);
inInlineAnnotation = true;
// will be set to false at next NEWLINE
break;
case PreprocessorLexer.NEWLINE:
assert (inTextBlock);
if (inInlineAnnotation) {
// if parse ACSL, NEWLINE in an annotation line must be
// added to output when "inInlineAnnotation" flag is
// true: Otherwise, nothing in annotation line goes to
// output
if (parseACSL)
shiftToOutput(textNode);
inInlineAnnotation = false;
incrementNextNode();
return;
}
break;
case PreprocessorLexer.ANNOTATION_START:
assert (inTextBlock);
inBlockAnnotation = true;
break;
case PreprocessorLexer.ANNOTATION_END:
assert (inTextBlock && inBlockAnnotation);
inBlockAnnotation = false;
// if parse ACSL, ANNOTATION_END in an annotation block must
// be added to output when "inBlockAnnotation" flag is true;
// Otherwise, nothing in annotation block goes to output:
if (parseACSL)
shiftToOutput(textNode);
incrementNextNode();
return;
case PreprocessorLexer.PP_NUMBER:
String sourceName = token.getInputStream().getSourceName();
Boolean isFortran = sourceName.toUpperCase().contains(".F");
if (isFortran) {
processIdentifier(textNode);
} else
processPPNumber(token);
return;
default:
}
// If current control is NOT in block annotation and line
// annotation, or ACSL will be parsed anyway, put the node to
// output:
if ((!inBlockAnnotation && !inInlineAnnotation) || parseACSL)
shiftToOutput(textNode);
incrementNextNode();
}
}
/**
* Processes a "PPNumber", which is any preprocessor number that is not a
* standard integer or floating constant. The only possibility currently is the
* CIVL-C range or fragment thereof, which has the form "a..b", or "a..", or
* "..b", where a and b are integer constants. Ideally we would like to have
* been parsed as 3 (or 2) separate tokens, but we just couldn't find any way to
* do that with ANTLR. So we fix it here.
*
* @param token the ppnumber token which should be a CIVL-C range expression or
* part thereof
* @throws PreprocessorException if the text of that token does not have the
* form of a CIVL-C range expression or part
* thereof
*/
private void processPPNumber(Token token) throws PreprocessorException {
String text = token.getText();
int line = token.getLine();
int pos = token.getCharPositionInLine();
int length = text.length();
int index = text.indexOf("..");
int startIndex = ((CommonToken) token).getStartIndex();
int stopIndex = ((CommonToken) token).getStopIndex();
int chan = token.getChannel();
CharStream stream = token.getInputStream();
Formation formation = getIncludeHistory();
if (index < 0)
throw new PreprocessorException("Unknown preprocessor number", token);
if (index > 0) {
CivlcToken leftToken = tokenFactory.newCivlcToken(stream, PreprocessorLexer.INTEGER_CONSTANT, chan,
startIndex, startIndex + index - 1, formation, line, pos, TokenVocabulary.PREPROC);
// should not be necessary since the text is obtained from the
// stream by default, using the start and stop indexes:
// leftToken.setText(text.substring(0, index));
addOutput(leftToken);
}
CivlcToken dotdot = tokenFactory.newCivlcToken(stream, PreprocessorLexer.DOTDOT, chan, startIndex + index,
startIndex + index + 1, formation, line, pos + index, TokenVocabulary.PREPROC);
addOutput(dotdot);
if (index + 2 < length) {
CivlcToken rightToken = tokenFactory.newCivlcToken(stream, PreprocessorLexer.INTEGER_CONSTANT, chan,
startIndex + index + 2, stopIndex, formation, line, pos + index + 2, TokenVocabulary.PREPROC);
String rightText = rightToken.getText();
char firstChar = rightText.charAt(0);
// rightText should be either an integer literal or
// identifier ... how to tell
if (firstChar == '+' || firstChar == '-' || (firstChar >= '0' && firstChar <= '9')) {
// OK: should be integer
} else {
rightToken.setType(PreprocessorLexer.IDENTIFIER);
}
addOutput(rightToken);
}
incrementNextNode();
}
/**
* Is this one of the special macros __LINE__ or __FILE__, predefined macros
* whose values change automatically.
*
* @param name an identifier string
* @return <code>true</code> iff name is one of the special macro names
*/
private boolean isSpecialMacro(String name) {
return "__LINE__".equals(name) || "__FILE__".equals(name);
}
/**
* If the identifier is a macro, do macro expansion. Else, it's just a regular
* token that gets shifted to output.
*
* @throws PreprocessorException
*/
private void processIdentifier(Tree identifierNode) throws PreprocessorException {
String name = identifierNode.getText();
Macro macro = macroMap.get(name);
// If the control is in block annotation or inline annotation but ACSL
// shall not be parsed, skip:
if ((inBlockAnnotation || inInlineAnnotation) && !parseACSLPragmaStack.peek()) {
incrementNextNode();
} else if (macro != null
&& (macro instanceof ObjectMacro || peekAheadSkipWSHasType(PreprocessorLexer.LPAREN))) {
processInvocation(macro, identifierNode);
} else if (isSpecialMacro(name)) {
addOutput(processSpecialInvocation(identifierNode));
incrementNextNode();
} else {
shiftToOutput(identifierNode);
incrementNextNode();
}
}
/**
* <p>
* Processes an object or function-like macro invocation node. Continues walking
* the input tree to find the macro arguments (if the macro is a function
* macro). Then expands the macro using the macro's definition. The resulting
* token sequence is added to the end of the outputBuffer and the position in
* the input sequence is moved to the point just after the complete macro
* invocation.
* </p>
*
* <p>
* This method is recursive: if any macro expansions occur within arguments,
* they are also expanded, following the rules laid out in the C11
* specification. See C11 6.10.3.
* </p>
*
* <p>
* Implementation notes: for function-like macros: calls
* {@link #findInvocationArguments(FunctionMacro, Tree)}, then calls
* {@link #processInvocation(FunctionMacro, CivlcToken, CivlcToken[])}.
* </p>
*
* @param macro a Macro object
* @param invocationNode the node containing the identifier token whose string
* value is the name of the macro
* @throws PreprocessorException if something goes wrong expanding the macro,
* such as the wrong number of arguments is
* provided
*/
private void processInvocation(Macro macro, Tree invocationNode) throws PreprocessorException {
Token token = ((CommonTree) invocationNode).getToken();
CivlcToken cToken = tokenFactory.newCivlcToken(token, getIncludeHistory(), TokenVocabulary.PREPROC);
Pair<CivlcToken, CivlcToken> result;
if (macro instanceof ObjectMacro) {
result = processInvocation((ObjectMacro) macro, cToken);
incrementNextNode();
} else {
Iterator<CivlcToken> argumentIter = getArgumentIterator((FunctionMacro) macro);
// note the call to findInvocationArguments updated
// position correctly.
result = processInvocation((FunctionMacro) macro, cToken, argumentIter);
}
addOutputList(result);
}
/**
* Processes the invocation of a "special" macro, one of the built-ins, such as
* "__LINE__" or "__FILE__".
*
* @param invocationNode the parse tree node corresponding to the use of the
* macro name
* @throws PreprocessorException if something goes wrong adding the new string
* or integer constant token to the current output
* buffer
*/
private CivlcToken processSpecialInvocation(Tree invocationNode) throws PreprocessorException {
Token token = ((CommonTree) invocationNode).getToken();
CivlcToken cToken = tokenFactory.newCivlcToken(token, getIncludeHistory(), TokenVocabulary.PREPROC);
return expandSpecial(cToken);
}
private CivlcToken expandSpecial(CivlcToken origin) {
String name = origin.getText();
Formation formation = tokenFactory.newBuiltinMacroExpansion(origin);
CivlcToken newToken;
switch (name) {
case "__LINE__":
newToken = tokenFactory.newCivlcToken(PreprocessorLexer.INTEGER_CONSTANT, "" + origin.getLine(), formation,
TokenVocabulary.PREPROC);
break;
case "__FILE__":
newToken = tokenFactory.newCivlcToken(PreprocessorLexer.STRING_LITERAL,
'"' + getCurrentSource().getFile().getAbsolutePath() + '"', formation, TokenVocabulary.PREPROC);
break;
default:
throw new PreprocessorRuntimeException("unreachable");
}
return newToken;
}
/**
* <p>
* Performs expansion of an object macro. Returns a list of tokens obtained from
* the macro's replacement list.
* </p>
*
* <p>
* The list of tokens returned consists entirely of newly created tokens (i.e.,
* tokens which did not exist upon entry to this method). They will have correct
* include and expansion histories.
* </p>
*
* <p>
* Following the C Standard, the list of replacement tokens are expanded a
* second time, after marking all tokens corresponding to the given macro
* identifier as "do not expand".
* </p>
*
* @param macro the object macro
* @param origin the expansion history of the identifier before getting to the
* current expansion.
* @return a pair consisting of the first and last elements of the new list of
* tokens
* @throws PreprocessorException if something goes wrong while expanding the
* list of replacement tokens (i.e., the second
* expansion)
*/
private Pair<CivlcToken, CivlcToken> processInvocation(ObjectMacro macro, CivlcToken origin)
throws PreprocessorException {
return performSecondExpansion(instantiate(macro, origin), macro);
}
/**
* <p>
* Processes a function macro invocation, given the macro and the actual
* arguments. Each actual argument is given as a null-terminated list of tokens.
* This returns the list of tokens which is the result of expanding the macros.
* This procedure is recursive: if any macro invocations occur within the
* arguments, they are also expanded.
* </p>
*
* <p>
* It is assumed that the input argument tokens will have the correct expansion
* histories, i.e., for all expansions up to but not including the current one.
* </p>
*
* <p>
* The list of tokens returned consists entirely of newly created tokens (i.e.,
* tokens which did not exist upon entry to this method). They will have correct
* include and expansion histories.
* </p>
*
* <p>
* NOTE: in a function-like macro replacement list, each '#' must be immediately
* followed by a parameter name, else error --- C11 6.10.3.2(1). The '#' may
* also occur in an object-like macro replacement list, but has no special
* meaning or restrictions.
* </p>
*
* <p>
* NOTE: if "# A" occurs in the replacement sequence, where A is one of the
* parameter names, then the actual argument corresponding to A is NOT expanded.
* Instead, the exact actual argument is stringified, and the resulting string
* token replaces the "# A". Stringification involves escaping characters such
* as " and \. See C11 6.10.3.2.
* </p>
*
* <p>
* NOTE: The "##" may be used in object and function-like macro replacement
* lists. Also, "[a] ## preprocessing token shall not occur at the beginning or
* at the end of a replacement list for either form of macro definition." C11
* 6.10.3.3(1).
* </p>
*
* <p>
* NOTE: if "A ##" or "## A" occurs, then A is replaced by the non-expanded
* actual argument token list UNLESS actual is empty (no tokens), in which case
* A is replaced by PLACEMARKER, a special symbol. After that and before second
* expansion: each "X ## Y" --- if the ## is from the replacement list and NOT
* from an argument --- is replaced by concatenation of X and Y, where
* PLACEMARKER is the identity element for concatenation. Then all PLACEMARKERs
* are removed. Hence before second expansion all placemarkers and all of the
* original ## tokens should be gone. ISN'T THIS THE SAME as saying: if actual
* argument is empty then delete "A ##" or "## A" from the replacement list.
* What about "B ## A ## C"? It becomes "B ## C".
* </p>
*
*
* Formations: concatenation, stringification. Both part of macro expansion.
*
* After all of this, second expansion.
* </p>
*
* @param macro the function-like macro which is being invoked
* @param arguments the actual arguments in the invocation of the macro
* @return a pair consisting of the first and last elements of the linked list
* which is the new token sequence produced by the expansion
* @throws PreprocessorException
*/
private Pair<CivlcToken, CivlcToken> processInvocation(FunctionMacro macro, CivlcToken origin,
Iterator<CivlcToken> argumentIter) throws PreprocessorException {
return performSecondExpansion(instantiate(macro, origin, argumentIter), macro);
}
/**
* In the second expansion of a macro, the result of the first expansion is
* first scanned for instances of the macro being expanded. Any such instances
* are marked to not be re-expanded, to prevent an infinite recursion. See C11
* 6.10.3.4(2): "If the name of the macro being replaced is found during this
* scan of the replacement list (not including the rest of the source file’s
* preprocessing tokens), it is not replaced."
*
* @param tokenList a list of tokens to be expanded given as pair consisting of
* first and last elements
* @param macro the macro being expanded
* @return result of second expansion of tokenList (first and last elements of
* the linked list of tokens)
* @throws PreprocessorException any improper macro invocations occur in the
* token list
*/
private Pair<CivlcToken, CivlcToken> performSecondExpansion(Pair<CivlcToken, CivlcToken> tokenList, Macro macro)
throws PreprocessorException {
String name = macro.getName();
// mark all occurrences of identifier M as "do not expand"...
for (CivlcToken token = tokenList.left; token != null; token = token.getNext()) {
if (PreprocessorUtils.isIdentifier(token) && name.equals(token.getText()))
token.makeNonExpandable();
}
return expandList(tokenList.left);
}
private Pair<CivlcToken, CivlcToken> instantiate(FunctionMacro macro, CivlcToken origin,
Iterator<CivlcToken> argumentIter) throws PreprocessorException {
MacroExpander expander = new MacroExpander(this, macro, origin, argumentIter);
Pair<CivlcToken, CivlcToken> result = expander.expand();
return result;
}
/**
* Creates a new instance of the object macro's body. Creates a sequence of
* CTokens corresponding to the sequence of input tokens in the macro's
* replacement list.
*
* The result is a linked list of CToken using the "next" field of CToken. The
* elements of the Pair returned are the first and last element of the list.
*
* @param macro any object macro
* @param origin the original expansion history for the identifier which is the
* macro's name and led to its expansion
* @return first and last element in a null-terminated linked list of fresh
* CTokens obtained from the macro's body
*/
private Pair<CivlcToken, CivlcToken> instantiate(ObjectMacro macro, CivlcToken origin)
throws PreprocessorException {
MacroExpander expander = new MacroExpander(this, macro, origin);
Pair<CivlcToken, CivlcToken> result = expander.expand();
return result;
}
/**
* <p>
* Expands all macro invocations in a null-terminated linked list of
* {@link CivlcToken}, modifying the list in the process. Does not care about
* preprocessor directives, or anything else other than macro invocations. Even
* if this method comes across a directive, it does not treat the directive as a
* directive---it is treated as just another token.
* </p>
*
* <p>
* Exception: do not expand macros if they occur within a "defined" operator!
* </p>
*
* <p>
* Note that the left token in the pair returned by this method MAY be the first
* token in the given list. It also may NOT be the first token in the given
* list, because that token was a macro invocation and was replaced.
* </p>
*
* <p>
* Implementation: iterate over the tokens looking for macro invocations. When
* you find one, call processInvocation and insert its output into list, in
* place of the the invocation.
*
* <pre>
* ... X X X M ( A0 A0 A1 A2 A2 A2 ) X X X ... ->
* ... X X X R R R ... R X X X ...
* </pre>
* </p>
*
* @param token the first element in a null-terminated list of tokens; may be
* <code>null</code> for empty list
* @return pair consisting of the first and last elements of the expanded list;
* the two components of this pair will be <code>null</node> if the
* expanded list is empty
* @throws PreprocessorException if something goes wrong in a macro expansion
* (e.g., the wrong number of arguments)
*/
Pair<CivlcToken, CivlcToken> expandList(CivlcToken first) throws PreprocessorException {
CivlcToken current = first, previous = null;
while (current != null) {
if (!current.isExpandable()) {
previous = current;
current = current.getNext();
continue;
}
int type = current.getType();
if (type == PreprocessorLexer.DEFINED) {
// ignore it and next token
previous = current.getNext();
current = previous.getNext();
continue;
}
Macro macro = null;
boolean isInvocation = false;
boolean isSpecial = false;
if (PreprocessorUtils.isIdentifier(current)) {
macro = macroMap.get(current.getText());
if (macro == null) {
isInvocation = isSpecialMacro(current.getText());
isSpecial = isInvocation;
} else if (macro instanceof ObjectMacro) {
isInvocation = true;
} else {
CivlcToken next = current.getNext();
// skip all white space
while (next != null && PreprocessorUtils.isWhiteSpace(next))
next = next.getNext();
isInvocation = (next != null && next.getType() == PreprocessorLexer.LPAREN);
}
}
if (!isInvocation) { // ignore it
previous = current;
current = current.getNext();
continue;
}
if (isSpecial) {
CivlcToken newToken = expandSpecial(current);
newToken.setNext(current.getNext());
if (previous == null)
first = newToken;
else
previous.setNext(newToken);
current = current.getNext();
continue;
}
Pair<CivlcToken, CivlcToken> replacements;
if (macro instanceof ObjectMacro) {
replacements = processInvocation((ObjectMacro) macro, current);
current = current.getNext();
} else {
Iterator<CivlcToken> iter = new CTokenIterator(current.getNext());
// CivlcToken[] arguments = findInvocationArguments(
// (FunctionMacro) macro, iter);
replacements = processInvocation((FunctionMacro) macro, current, iter);
// move current to the token just after the closing ')'
// of the invocation, or null if the ')' was the last token:
if (iter.hasNext())
current = iter.next();
else
current = null;
}
if (replacements.left == null) {
if (previous == null)
first = current;
else
previous.setNext(current);
} else {
if (previous == null)
first = replacements.left;
else
previous.setNext(replacements.left);
previous = replacements.right;
previous.setNext(current);
}
}
return new Pair<>(first, previous);
}
/**
* <p>
* When current position in preprocessor parse tree is at a macro application
* node, this method will continue walking through the tree from that point in
* order to find the macro invocation arguments. It creates new
* {@link CivlcToken}s for the tokens occurring in those arguments. Returns an
* array (whose length is the number of inputs to the macro) in which element i
* is first element in the list of {@link CivlcToken}s for the tokens which
* comprise argument i.
* </p>
*
* <p>
* The new tokens will have the correct include history. They will have the
* "trivial" expansion history of length 0.
* </p>
*
* <p>
* From C11 Sec. 6.10.3.11: "If there are sequences of preprocessing tokens
* within the list of arguments that would otherwise act as preprocessing
* directives, the behavior is undefined." This method will throw a
* {@link PreprocessorException} if that occurs.
* </p>
*
* <p>
* Updates position to point just after closing right parenthesis.
* </p>
*
* <p>
* Implementation notes: creates an {@link Iterator} of {@link CivlcToken} by
* iterating over the {@link Tree} nodes starting from the node immediately
* following the <code>invocationNode</code> in DFS order and creating new
* tokens. Feeds this to method
* {@link #findInvocationArguments(FunctionMacro, Iterator)}.
* </p>
*
* @param macro the function-like macro which is being applied
* @param invocationNode the node in the preprocessor parse tree representing an
* invocation of the macro
* @return array of length number of formal parameters of macro; i-th element of
* array is the first element of a list of tokens that comprise the i-th
* argument
* @throws PreprocessorException if a sequence of tokens that would otherwise
* act as preprocessing directives occurs in the
* argument list
*/
private Iterator<CivlcToken> getArgumentIterator(FunctionMacro macro) throws PreprocessorException {
Iterator<CivlcToken> iter;
incrementNextNode();
iter = new Iterator<CivlcToken>() {
@Override
public boolean hasNext() {
return getNextInputNode() != null;
}
@Override
public CivlcToken next() {
Tree node = getNextInputNode();
Token inputToken = ((CommonTree) node).getToken();
CivlcToken result = tokenFactory.newCivlcToken(inputToken, getIncludeHistory(),
TokenVocabulary.PREPROC);
if (node.getChildCount() > 0)
throw new IllegalMacroArgumentException(result);
incrementNextNode();
return result;
}
@Override
public void remove() { // should never be called
throw new UnsupportedOperationException();
}
};
return iter;
}
/**
* Process and End-of-file token. If this is the root file, you are all done,
* and the EOF token is moved to the output buffer. Otherwise, you pop stack and
* throw away the EOF token---it does not get output.
*
* @throws PreprocessorException
*/
private void processEOF(Tree node) throws PreprocessorException {
Token eof = ((CommonTree) node).getToken();
PreprocessorSourceFileInfo o = sourceStack.pop();
parseACSLPragmaStack.pop();
assert parseACSLPragmaStack.size() == sourceStack.size();
if (sourceStack.isEmpty()) {
if (currentSource == theStreams.length - 1) {
CivlcToken myEof = tokenFactory.newCivlcToken(eof, o.getIncludeHistory(), TokenVocabulary.PREPROC);
addOutput(myEof);
} else {
currentSource++;
inTextBlock = false;
inPragma = false;
inInlineAnnotation = false;
inBlockAnnotation = false;
pushStream(theStreams[currentSource], theFormations[currentSource]);
processNextNode();
}
} else {
// you were at the include node. jump to next node ignoring
// children of include node.
jumpNextNode();
}
}
/**
* <p>
* Processes a text block node by moving the current position to the first child
* (the first token in the text block).
* </p>
*
* <p>
* A text block consists of a sequence of tokens that do not contain any
* preprocessor directives. The sequence may, however, contains macro
* invocations which need to be expanded.
* </p>
*
* @param node a node in the tree with TEXT_BLOCK token
*/
private void processTextBlock(Tree textBlockNode) {
int numChildren = textBlockNode.getChildCount();
if (numChildren != 0)
inTextBlock = true;
incrementNextNode();
}
/**
* Processes a '#define' for an (object or function) macro. For an object macro,
* child 0 is the identifier being defined. Child 1 is the "body" node, whose
* children form a (possibly empty) list of tokens.
*
* For a function macro. Child 0 is the identifier being defined. Child 1 is the
* formal parameter list node. Child 2 is the body.
*
* @param node A node in the tree with token of type PDEFINE.
* @throws PreprocessorException if the macro has already been defined
* differently
*/
private void processMacroDefinition(Tree node) throws PreprocessorException {
SourceFile sourceFile = getCurrentSource();
if (node.getChildCount() == 3)
processMacroDefinition(tokenFactory.newFunctionMacro(node, sourceFile));
else
processMacroDefinition(tokenFactory.newObjectMacro(node, sourceFile));
}
/**
* Takes a newly created Macro object and checks whether it has been defined
* previously. If it has, checks that the two definitions are equivalent. If the
* checks pass, an entry for the macro is added to the macroMap.
*
* @param newMacro a new Macro object
* @throws PreprocessorException if a macro with the same name has been defined
* previously (and not undefined) in a different
* way
*/
private void processMacroDefinition(Macro newMacro) throws PreprocessorException {
String name = newMacro.getName();
Macro oldMacro = macroMap.get(name);
if (oldMacro != null) {
if (!oldMacro.equals(newMacro))
throw new PreprocessorException("Attempt to redefine macro in new way: " + newMacro
+ "\nOriginal defintion was at: " + oldMacro.getDefinitionNode());
} else {
macroMap.put(name, newMacro);
}
jumpNextNode();
}
/**
* Processes a "#error" node. Always throws an exception with a message formed
* from the error node's list of tokens.
*
* @param errorNode a node of type "#error"
* @throws PreprocessorException always, with a message formed from the #error
* line's tokens
*/
private void processError(Tree errorNode) throws PreprocessorException {
String message = "Preprocessor #error directive encountered:\n";
int numChildren = errorNode.getChildCount();
Token errorToken = ((CommonTree) errorNode).getToken();
Token betterErrorToken = tokenFactory.newCivlcToken(errorToken, getIncludeHistory(), TokenVocabulary.PREPROC);
PreprocessorException e = null;
for (int i = 0; i < numChildren; i++) {
Tree child = errorNode.getChild(i);
String text = child.getText();
message += text;
}
e = new PreprocessorException(message, betterErrorToken);
e.setStackTrace(new StackTraceElement[0]);
throw e;
}
private void processIf(Tree ifNode) throws PreprocessorException {
CommonTree expressionNode = (CommonTree) ifNode.getChild(0);
CivlcToken first, expandedFirst;
TokenSource source;
int result;
assert expressionNode.getType() == PreprocessorParser.EXPR;
// form a list of new non-whitespace tokens from the expression node...
first = nonWhiteSpaceTokenListFromChildren(expressionNode);
// expand all macro invocations in the expression...
expandedFirst = expandList(first).left;
// form a TokenSource from this list...
source = TokenUtils.makeTokenSourceFromList(expandedFirst);
// evaluate to get integer result...
result = expressionAnalyzer.evaluate(source);
// move to the appropriate point based on result...
processConditional(ifNode, result != 0);
}
private void processIfdef(Tree ifdefNode) {
processDefOrNdefNode(ifdefNode, true);
}
private void processIfndef(Tree ifndefNode) {
processDefOrNdefNode(ifndefNode, false);
}
private void processDefOrNdefNode(Tree node, boolean isIfDef) {
String macroName = node.getChild(0).getText();
Macro macro = macroMap.get(macroName);
boolean trueBranch = (isIfDef ? macro != null : macro == null);
processConditional(node, trueBranch);
}
private void processConditional(Tree node, boolean takeTrueBranch) {
if (takeTrueBranch) {
Tree next = node.getChild(1); // TRUE_BRANCH node
if (next.getChildCount() > 0)
setNextInputNode(next.getChild(0));
else
jumpNextNode();
} else if (node.getChildCount() > 2) {
Tree next = node.getChild(2);
if (next.getChildCount() > 0)
setNextInputNode(next.getChild(0));
else
jumpNextNode();
} else
jumpNextNode();
}
/**
* Given a node in the tree, this method creates a list of new CTokens formed
* from children of the given node. It returns the first element in the list.
* White space tokens (including new lines) are filtered out.
*
* @param root a node in the AST of a preprocessor source file
* @return the first node in a list of new CTokens formed from the children of
* root, or null if the root has 0 children
* @throws PreprocessorException if one of the children has a null token
*/
private CivlcToken nonWhiteSpaceTokenListFromChildren(CommonTree root) throws PreprocessorException {
int numChildren = root.getChildCount();
CivlcToken first = null, prev = null;
for (int i = 0; i < numChildren; i++) {
Token token = ((CommonTree) root.getChild(i)).getToken();
if (token == null)
throw new PreprocessorException("Encountered null token as child " + i + " of node " + root);
if (!PreprocessorUtils.isWhiteSpace(token)) {
CivlcToken newToken = tokenFactory.newCivlcToken(token, getIncludeHistory(), TokenVocabulary.PREPROC);
if (prev == null)
first = newToken;
else
prev.setNext(newToken);
prev = newToken;
}
}
return first;
}
/**
* Processes a #include directive. Locates the file by searching the appropriate
* paths, parses it, creates a source file info for it and pushes it onto the
* sourceStack.
*
* @param includeNode node in source tree of type PINCLUDE
* @throws PreprocessorException if the file can't be found, or if it does not
* conform to the Preprocessor grammar
*/
private void processInclude(Tree includeNode) throws PreprocessorException {
int numChildren = includeNode.getChildCount();
CommonToken firstToken = (CommonToken) ((CommonTree) includeNode.getChild(0)).getToken();
CommonToken lastToken = (CommonToken) ((CommonTree) includeNode.getChild(numChildren - 1)).getToken();
CivlcToken filenameToken = tokenFactory.newCivlcToken(firstToken.getInputStream(),
PreprocessorLexer.STRING_LITERAL, firstToken.getChannel(), firstToken.getStartIndex(),
lastToken.getStopIndex(), getIncludeHistory(), firstToken.getLine(), firstToken.getCharPositionInLine(),
TokenVocabulary.PREPROC);
String fullName = filenameToken.getText();
int numChars = fullName.length();
String name;
boolean system;
if (numChars < 3)
throw new PreprocessorException("Improper file name in #include: " + fullName, filenameToken);
int firstChar = fullName.charAt(0), lastChar = fullName.charAt(numChars - 1);
if (firstChar == '<') {
if (lastChar != '>')
throw new PreprocessorException("Improper file name in #include: " + fullName, filenameToken);
name = fullName.substring(1, numChars - 1);
system = true;
} else if (firstChar == '"') {
if (lastChar != '"')
throw new PreprocessorException("Improper file name in #include: " + fullName, filenameToken);
name = fullName.substring(1, numChars - 1);
system = false;
} else {
throw new PreprocessorException("Improper file name in #include: " + fullName, filenameToken);
}
Pair<File, CharStream> pair;
try {
pair = findIncludeStream(name, system);
} catch (IOException e) {
throw new PreprocessorException("I/O error when attempting to include " + name + ":\n" + e, filenameToken);
}
if (pair == null)
throw new PreprocessorException("Cannot find included file " + name, filenameToken);
SourceFile sourceFile = indexer.getOrAdd(pair.left);
sourceFiles.add(sourceFile);
pushStream(pair.right, tokenFactory.newInclusion(sourceFile, filenameToken));
}
/**
* Locates an include file, opens it, and creates a {@link CharStream} from it.
*
* @param filename the name of the file, as extracted from the token
* @param system <code>true</code> if angular brackets were used around the
* filename in the <code>#include</code> directive,
* <code>false</code> if double quotes were used
* @return if the file can't be found, returns <code>null</code>, otherwise,
* returns a pair consisting of the {@link File} object which wraps the
* file name and the {@link CharStream} generated from the file
* @throws IOException if something goes wrong trying to read or open the file
*/
private Pair<File, CharStream> findIncludeStream(String filename, boolean system) throws IOException {
File file = null;
CharStream charStream = null;
if (!system) {
// first look in dir containing current file, then
// user's include paths
File currentDir = sourceStack.peek().getFile().getFile().getParentFile();
file = new File(currentDir, filename);
if (!file.isFile())
file = PreprocessorUtils.findFile(userIncludePaths, filename);
}
if (file == null)
file = PreprocessorUtils.findFile(systemIncludePaths, filename);
if (file == null) {
// last but not least: look internally in the class path:
return findInternalSystemFile(filename);
} else {
charStream = PreprocessorUtils.newFilteredCharStreamFromFile(file);
}
if (charStream == null)
return null;
return new Pair<>(file, charStream);
}
/**
* <p>
* Processes a <code>#pragma</code> node. For now, the whole pragma is just
* going to be sent to the output stream. Macro replacement will take place in
* the pragma tokens as usual.
* </p>
*
* <p>
* This method sends the <code>#pragma</code> node to the output list, and moves
* the current position to the first child (or the next token if there are no
* children).
* </p>
*
* <p>
* This sets <code>inPragma</code> to <code>true</code>. This is so that when
* the newline is reached that terminates the pragma, that newline will not have
* its channel set to the hidden channel. Hence the newline will be visible to
* an ANTLR parser that consumes from this source. The newline is necessary to
* know where the pragma ends.
* </p>
*
* <p>
* Also sets <code>inTextBlock</code> to <code>true</code>, to avoid the expense
* of checking for preprocessor directives in the body of the pragma. It will
* get set back to <code>false</code> after leaving the pragma body.
* </p>
*
* @param pragmaNode A <code>#pragma</code> node in the preprocessor tree
* @throws PreprocessorException should not happen
*/
private void processPragma(Tree pragmaNode) throws PreprocessorException {
Token token = ((CommonTree) pragmaNode).getToken();
CivlcToken pragmaToken = tokenFactory.newCivlcToken(token, getIncludeHistory(), TokenVocabulary.PREPROC);
Token pragmaUniverseName = ((CommonTree) pragmaNode.getChild(1)).getToken();
// Assign the top ACSL parse mark to true:
if (pragmaUniverseName.getText().equals("CIVL")) {
Token pragmaCommandName = ((CommonTree) pragmaNode.getChild(3)).getToken();
if (pragmaCommandName.getText().equals("ACSL")) {
parseACSLPragmaStack.pop();
parseACSLPragmaStack.push(true);
}
}
addOutput(pragmaToken);
inPragma = true;
inTextBlock = true;
incrementNextNode();
}
/**
* Processes a <code>#undef</code> node. This node has 1 child, which is the
* identifier of the macro which is to be undefined. This removes the macro from
* the {@link #macroMap}. According to C11 Sec. 6.10.3.5.2, the operation is
* just ignored if the macro is not already defined, so this cannot lead to an
* error.
*
* @param undefNode the <code>#undef</code> node
*/
private void processUndef(Tree undefNode) {
Tree identifierNode = undefNode.getChild(0);
String name = identifierNode.getText();
macroMap.remove(name);
jumpNextNode();
}
/**
* Processes a preprocessor "nondirective" (which, yes, is a kind of directive).
* For now, a no-op.
*
* @param nondirectiveNode a preprocessor nondirective node
*/
private void processNondirective(Tree nondirectiveNode) {
jumpNextNode();
}
/**
* Process a <code>#line</code> directive.
*
* Ignored, for now.
*
* @param lineNode preprocessor parse tree node for <code>#line</code> directive
*/
private void processLine(Tree lineNode) {
// TODO
// strategy: get the real line number from the "line" token.
// let delta = new line number - real line number
// when creating new CivlcTokens: add delta to line number.
// this delta goes on the include stack: each entry has its own.
// setting the file also affects the entry on include stack
//
// macro definition: when processing the macro definition, common
// tokens from the preprocessor parse tree are used. These will
// have original (unadjusted) line numbers. And filenames.
// could pass arguments to CommonMacro to adjust these things.
// or do it before calling CommonMacro.
// macro expansion: creates new tokens by copying replacement
// tokens and argument tokens. The replacement token copy already
// has the right line number and should not be further modified.
// The argument tokens should be adjusted when they are created
// the first time and not further changed.
// Moral: tokens should have the correct (adjusted) line/file
// information when they are created, not just before
// being shifted to output stream.
// alternatively: if all line/file info ultimately comes from
// common tokens, adjust them first before using them
// to create CivlcTokens.
jumpNextNode();
}
// Utility methods...
/**
* Determines whether a node in the preprocessor parse tree is one of the
* conditional nodes "#if", "#ifdef", "#ifndef", "#elif".
*
* @param node a non-<code>null</code> node in a preprocessor parse tree
* @return <code>true</code> iff the node is one of the preprocessor conditional
* nodes, else <code>false</code>
*/
private static boolean isConditional(CommonTree node) {
Token token = node.getToken();
if (token == null)
return false;
else {
int type = token.getType();
return type == PreprocessorParser.PIF || type == PreprocessorParser.IFDEF
|| type == PreprocessorParser.IFNDEF || type == PreprocessorParser.ELIF;
}
}
/**
* Returns next input tree node, or <code>null</code> if end of file has been
* reached. Does not modify anything.
*
* @return tree position for top entry of <code>sourceStack</code> or
* <code>null</code> if <code>sourceStack</code> is empty
*/
private Tree getNextInputNode() {
if (sourceStack.isEmpty())
return null;
return sourceStack.peek().getPosition();
}
private void setNextInputNode(Tree node) {
sourceStack.peek().setPosition(node);
}
/**
* Move next node to next node in Tree in DFS order, or null if at last node.
*
* When backtracking through a conditional node, this method will skip over the
* alternative branch. In other words, if we start in the "true" branch and then
* backtrack, we will NOT go down the false branch. This is because we only want
* to traverse one of the two branches when we preprocess a file.
*
* The inTextBlock flag is turned off when we exit a block.
*
* If given null this method is a no-op. If we are at the last node, it returns
* null. It should never throw an exception.
*/
private void incrementNextNode() {
Tree node = getNextInputNode();
if (node != null) {
if (node.getChildCount() > 0)
setNextInputNode(node.getChild(0));
else
jumpNextNode();
}
}
/**
* <p>
* Move next node to the next sibling. If there is no next sibling, proceed as
* in DFS. In other words, this is the same as DFS after removing all children
* of the current next node.
* </p>
*
* <p>
* When backtracking through a conditional node, this method will skip over the
* alternative branch. In other words, if we start in the "true" branch and then
* backtrack, we will NOT go down the false branch. This is because we only want
* to traverse one of the two branches when we preprocess a file.
* </p>
*
* <p>
* The {@link #inTextBlock}, {@link #inInlineAnnotation}, and
* {@link #inBlockAnnotation} flags are turned off when we exit a block.
* </p>
*
* <p>
* If given null this method is a no-op. If there is no node to jump to, it
* returns null. It should never throw an exception.
* </p>
*/
private void jumpNextNode() {
CommonTree node = (CommonTree) getNextInputNode();
if (node != null) {
while (true) {
int index = node.getChildIndex() + 1;
node = (CommonTree) node.getParent();
if (node == null)
break;
if (!isConditional(node) && index < node.getChildCount()) {
// move to next sibling
node = (CommonTree) node.getChild(index);
break;
}
// a nontrivial backtrack is taking place
// if you were in a TEXT_BLOCK (including in
// an annotation) or PRAGMA, now you're not...
inTextBlock = false;
inInlineAnnotation = false;
inBlockAnnotation = false;
}
setNextInputNode(node);
}
}
private boolean peekAheadSkipWSHasType(int tokenType) {
CommonTree node = (CommonTree) getSuccessorNode(getNextInputNode());
while (node != null) {
Token token = node.getToken();
if (!PreprocessorUtils.isWhiteSpace(token)) {
return token != null && token.getType() == tokenType;
}
node = (CommonTree) getSuccessorNode(node);
}
return false;
}
/**
* Returns the node that follows the given node in DFS order without modifying
* the state. Alternative conditional branches are skipped, as usual. It is
* useful for peeking ahead.
*
* @param node any node in the tree.
* @return the next node in the DFS traversal
*/
private Tree getSuccessorNode(Tree node) {
if (node == null)
return null;
else {
int numChildren = node.getChildCount();
if (numChildren > 0)
return node.getChild(0);
while (true) {
int index = node.getChildIndex() + 1;
node = node.getParent();
if (node == null)
return null;
if (!isConditional((CommonTree) node) && index < node.getChildCount()) {
return node.getChild(index);
}
}
}
}
private Formation getIncludeHistory() {
return sourceStack.peek().getIncludeHistory();
}
// Methods modifying the output list...
/**
* <p>
* Empties the string literal buffer. The buffer contains a sequence of string
* literal tokens and possibly some white space tokens at the end. The string
* literals are concatenated to from a single string token which is inserted
* into the output buffer by method {@link #addOutputHelper(CivlcToken)}. Then
* the white space tokens are all added to the output buffer as well. Note that
* any white space between two string literals was already removed from this
* buffer, so only the white space after the last string literal in the sequence
* is actually sent to the output stream.
* </p>
*
* <p>
* Precondition: <code>stringLiteralBuffer</code> is not empty.
* </p>
*
* @throws PreprocessorException if the strings cannot be concatenated for some
* reason
*/
private void emptyStringLiteralBuffer() throws PreprocessorException {
assert !stringLiteralBuffer.isEmpty();
List<CivlcToken> pureStringTokens = new LinkedList<>();
List<CivlcToken> extraWhiteSpaces = new LinkedList<>();
for (CivlcToken stringToken : stringLiteralBuffer) {
if (PreprocessorUtils.isWhiteSpace(stringToken))
extraWhiteSpaces.add(stringToken);
else
pureStringTokens.add(stringToken);
}
stringLiteralBuffer.clear();
try {
StringToken result = pureStringTokens.size() != 1 ? tokenFactory.newStringToken(pureStringTokens)
: tokenFactory.newStringToken(pureStringTokens.get(0));
addOutputHelper(result);
} catch (SyntaxException e) {
throw new PreprocessorException(e.getMessage(), pureStringTokens.get(0));
}
for (CivlcToken ws : extraWhiteSpaces)
addOutputHelper(ws);
}
/**
* <p>
* Adds a single token to output buffer. Special handling is needed for string
* literals and some other cases.
* </p>
*
* <p>
* The string literals may be separated by some white space tokens. All of these
* consecutive literals (ignoring the possible white space) must be concatenated
* into a single string literal token. Therefore they are inserted into a
* separate buffer, the string literal buffer, until a token is reached that is
* not a string literal or white space and the buffer should be processed to
* form a single token which is inserted into the output stream.
* </p>
*
* <p>
* White space is made invisible, with the following exception: a NEWLINE
* terminating an inline annotation or a pragma. Such a NEWLINE is part of the
* grammar of the CIVL-C language and is needed to delineate the end of the
* annotation/pragma.
* </p>
*
* @param token a token to add to output buffer
* @throws PreprocessorException if something goes wrong concatenating strings
* or forming a character
*/
private void addOutput(CivlcToken token) throws PreprocessorException {
int type = token.getType();
if (type == PreprocessorParser.STRING_LITERAL) {
// first remove any white space tokens at the end of the list, as
// we throw away any white space between two adjacent string
// literals. Then add the literal to the SLB.
while (!stringLiteralBuffer.isEmpty() && PreprocessorUtils.isWhiteSpace(stringLiteralBuffer.getLast()))
stringLiteralBuffer.removeLast();
stringLiteralBuffer.add(token);
} else if (type == PreprocessorParser.NEWLINE && (inInlineAnnotation || inPragma)) {
// a NEWLINE while in an inlineAnnotation or pragma always
// ends that annotation or pragma and clears the string literal
// buffer...
if (!stringLiteralBuffer.isEmpty())
emptyStringLiteralBuffer();
addOutputHelper(token); // keep that NEWLINE visible
inInlineAnnotation = false;
inPragma = false;
} else if (type == PreprocessorParser.AT && (inInlineAnnotation || inBlockAnnotation)) {
// ignore @s in annotations (that is the ACSL way)
token.setChannel(Token.HIDDEN_CHANNEL);
addOutputHelper(token);
} else { // cases:
// 1. white, empty : invisible, output
// 2. white, nonempty : invisible, SLB
// 3. not white: emptySLB, output
if (PreprocessorUtils.isWhiteSpace(token)) {
token.setChannel(Token.HIDDEN_CHANNEL);
if (stringLiteralBuffer.isEmpty())
addOutputHelper(token);
else
stringLiteralBuffer.add(token);
} else {
if (!stringLiteralBuffer.isEmpty())
emptyStringLiteralBuffer();
addOutputHelper(token);
}
}
}
/**
* Actually adds the token to the linked list which forms the output buffer.
* Makes adjustments to character tokens. Sets the index of the token to the
* current outputTokenCount. Increments outputTokenCount.
*
* @param token
* @throws PreprocessorException
*/
private void addOutputHelper(CivlcToken token) throws PreprocessorException {
int type = token.getType();
if (type == PreprocessorParser.CHARACTER_CONSTANT) {
try {
token = tokenFactory.characterToken(token);
} catch (SyntaxException e) {
throw new PreprocessorException(e.getMessage(), e.getSource().getFirstToken());
}
}
token.setIndex(outputTokenCount);
if (saveTokens)
theTokens.add(token);
outputTokenCount++;
if (firstOutput == null) {
firstOutput = lastOutput = token;
} else {
lastOutput.setNext(token);
token.setNext(null);
lastOutput = token;
}
}
private void addOutputList(Pair<CivlcToken, CivlcToken> list) throws PreprocessorException {
CivlcToken previous = null, current = list.left;
while (current != null) {
previous = current;
current = current.getNext();
addOutput(previous);
}
}
/**
* Creates a new {@link CivlcToken} from the given {@link Tree} node and adds
* the token to the end of the output list. Tokens for preprocessor keywords
* (e.g., "define") have their types changed to IDENTIFIER.
*
* @param node a {@link CommonTree} node
* @throws PreprocessorException if something goes wrong concatenating strings
* when the token is shifted to output buffer
*/
private void shiftToOutput(Tree node) throws PreprocessorException {
Token token = ((CommonTree) node).getToken();
PreprocessorUtils.convertPreprocessorIdentifiers(token);
CivlcToken output = tokenFactory.newCivlcToken(token, getIncludeHistory(), TokenVocabulary.PREPROC);
addOutput(output);
}
/**
* Removes a token from the front of the output list.
*
* @return the first token, i.e., the one removed
*/
private CivlcToken removeOutput() {
CivlcToken result = firstOutput;
if (result == null)
throw new PreprocessorRuntimeException("Internal error: no output to remove");
firstOutput = result.getNext();
if (firstOutput == null)
lastOutput = null;
return result;
}
// Public methods...
@Override
public String toString() {
return "PreprocessorTokenSource[" + getSourceName() + "]";
}
@Override
public TokenFactory getTokenFactory() {
return tokenFactory;
}
@Override
public FileIndexer getIndexer() {
return indexer;
}
/**
* Returns name of current file being scanned. I.e., the name returned by this
* method will change dynamically as new files are included.
*/
@Override
public String getSourceName() {
SourceFile sourceFile = getCurrentSource();
if (sourceFile != null)
return sourceFile.getPath();
return "<unknown source>";
}
@Override
public int getNumTokens() {
return theTokens.size();
}
@Override
public CivlcToken getToken(int index) {
return theTokens.get(index);
}
@Override
public Collection<SourceFile> getSourceFiles() {
return sourceFiles;
}
/**
* {@inheritDoc}
*
* <p>
* Returns the next token in the post-preprocessing stream. This is the main
* method that must be implemented in order to implement ANTLR's
* {@link TokenSource} interface.
* </p>
*
* <p>
* EOF will be the last token returned, and if subsequent calls to this method
* are made, it will continue to return EOF forever. This seems to be what
* ANTLR's parsers expect.
* </p>
*
* @exception PreprocessorRuntimeException if anything goes wrong in trying to
* find the next token. Note that this
* method cannot throw a
* {@link PreprocessorException} because
* the interface that it implements does
* not specify any exception being
* thrown
*/
@Override
public CivlcToken nextToken() {
// nextToken_calls++;
if (finalToken != null)
return finalToken;
while (firstOutput == null || firstOutput.getType() != PreprocessorLexer.EOF && firstOutput == lastOutput)
try {
processNextNode();
} catch (PreprocessorException e) {
PreprocessorRuntimeException pre = new PreprocessorRuntimeException(e);
pre.setStackTrace(e.getStackTrace());
throw pre;
} catch (PreprocessorRuntimeException e) {
throw e;
} catch (RuntimeException e) {
PreprocessorRuntimeException pre = new PreprocessorRuntimeException(e.toString(), firstOutput);
pre.setStackTrace(e.getStackTrace());
throw pre;
}
if (firstOutput.getType() == PreprocessorLexer.EOF)
finalToken = firstOutput;
return removeOutput();
}
}
class MacroDefinedPredicate implements StringPredicate {
private Map<String, Macro> macroMap;
MacroDefinedPredicate(Map<String, Macro> macroMap) {
this.macroMap = macroMap;
}
@Override
public boolean holds(String string) {
return macroMap.containsKey(string);
}
}