MacroExpander.java
package edu.udel.cis.vsl.abc.front.c.preproc;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.Token;
import edu.udel.cis.vsl.abc.front.IF.CivlcTokenConstant;
import edu.udel.cis.vsl.abc.front.IF.PreprocessorException;
import edu.udel.cis.vsl.abc.token.IF.CivlcToken;
import edu.udel.cis.vsl.abc.token.IF.Formation;
import edu.udel.cis.vsl.abc.token.IF.FunctionMacro;
import edu.udel.cis.vsl.abc.token.IF.FunctionMacro.FunctionReplacementUnit;
import edu.udel.cis.vsl.abc.token.IF.Macro;
import edu.udel.cis.vsl.abc.token.IF.Macro.ReplacementUnit;
import edu.udel.cis.vsl.abc.token.IF.ObjectMacro;
import edu.udel.cis.vsl.abc.token.IF.TokenFactory;
import edu.udel.cis.vsl.abc.token.IF.CivlcToken.TokenVocabulary;
import edu.udel.cis.vsl.abc.util.IF.Pair;
/**
* <p>
* This class is used to execute a single {@link Macro} invocation. It is used
* for both {@link ObjectMacro}s and {@link FunctionMacro}s. For both kinds of
* {@link Macro}s, it performs concatenation ("##"). For {@link FunctionMacro}s,
* it also substitutes arguments for corresponding formal parameters in the
* macro definition body, and performs stringification ("#").
* </p>
*
* <p>
* The result, returned by method {@link #expand()}, is a null-terminated linked
* list of {@link CivlcToken}. The elements of the {@link Pair} returned are the
* first and last elements of the list.
* </p>
*
* <p>
* For {@link FunctionMacro}s, the given arguments will not be modified and will
* not be re-used in the returned result. I.e., each node in the result will be
* a fresh new token.
* </p>
*
* <p>
* The histories (@link Formation}s) of the new tokens will all be set
* appropriately. The history of a new token generated from a replacement token
* which was not a formal parameter is obtained by appending a new expansion
* record (macro, replacement-index) to the history of the origin. The history
* of a new token generated from an argument token (which is substituted for a
* formal parameter) is obtained by appending the new expansion record to the
* history of the argument token.
* </p>
*/
public class MacroExpander {
// Static constants...
/**
* This is the integer token type for the temporary "placemarker" tokens
* used to perform token concatenation. All placemarker tokens will be
* removed before the final result is returned. Used in
* {@link FunctionMacro}s only.
*/
private final static int placemarkerType = -999;
// Instance fields...
/**
* The preprocessor token source that will be used to expand a list of
* tokens (method {@link PreprocessorTokenSource#expandList(CivlcToken)}.
* This is needed for {@link FunctionMacro} invocations, because the
* arguments may need to be expanded before being substituted for the formal
* parameters. Hence there is a mutual recursion between this class and the
* token source.
*/
private PreprocessorTokenSource ts;
/**
* The token factory that will be used to produce new {@link CivlcToken}s
* and {@link Formation}s.
*/
private TokenFactory tokenFactory;
/**
* The macro which is being applied.
*/
private Macro macro;
/**
* The original {@link CivlcToken} which is an identifier spelling the
* macro's name and which led to this expansion.
*/
private CivlcToken origin;
/**
* The given iterator over the tokens that comprise the arguments of a
* function macro. Not used for object macro.
*/
private Iterator<CivlcToken> argumentIter;
/**
* For {@link FunctionMacro}s, the number of formal parameters, including
* the possible "..." parameter. 0 for {@link ObjectMacro}s.
*/
private int numParameters;
/**
* For {@link FunctionMacro}s, actual arguments to macro invocation. The
* length must equal {@link #numParameters} --- if there are more actual
* arguments occurring in the text than the number of parameters in a
* variadic macro, the final arguments beyond the n-th are treated as a
* single argument, to ensure in the end that the number of arguments is
* exactly equal to the number of formals. Element i is a null-terminated
* linked list of tokens. Element i may be <code>null</code>, indicating an
* empty argument. Not used for {@link ObjectMacro}s.
*/
private CivlcToken[] arguments;
/**
* For a function-like macro, the argument list begins with a "(" token; the
* arguments are separated by "," tokens, and the argument list is
* terminated with a closing ")" token. If there are n arguments, there are
* n+1 such delimiter tokens. (Exception: if there are 0 arguments, there
* are still 2 delimiter tokens.)
*/
private CivlcToken[] delimiters;
/**
* For {@link FunctionMacro}s, the cached results of macro expanding each of
* the arguments. Each entry gives the first and last elements in a
* null-terminated list. In case the expansion is empty, the first and last
* elements will both be null. Not used for {@link ObjectMacro}s.
*/
private Pair<CivlcToken, CivlcToken>[] expandedArguments;
/**
* Special token used in a temporary phase to perform concatenations ("##");
* see C11. The type of this token is {@link #placemarkerType}. Not used for
* {@link ObjectMacro}s.
*/
private CivlcToken placemarker;
// Constructor...
/**
* Constructs new expander for a {@link FunctionMacro} application. No work
* will be done at this time. The work will be done when method
* {@link #expand()} is invoked.
*
* @param ts
* the preprocessor token source that will be used to macro
* expand the arguments
* @param macro
* the function-like macro that is being applied
* @param origin
* the original {@link CivlcToken} which is an identifier
* spelling the macro's name and which led to its expansion.
* @param arguments
* actual arguments to macro invocation. Element i is a
* null-terminated linked list of tokens. Element i may be null,
* indicating an empty argument.
*/
public MacroExpander(PreprocessorTokenSource ts, FunctionMacro macro,
CivlcToken origin, Iterator<CivlcToken> argumentIter) {
this.ts = ts;
this.tokenFactory = ts.getTokenFactory();
this.macro = macro;
this.origin = origin;
this.argumentIter = argumentIter;
this.placemarker = tokenFactory.newCivlcToken(placemarkerType, "", null,
TokenVocabulary.PREPROC);
if (macro instanceof FunctionMacro)
this.numParameters = ((FunctionMacro) macro).getNumFormals();
else
this.numParameters = 0;
}
/**
* Constructs new expander for an invocation of an {@link ObjectMacro}.
*
* @param ts
* the preprocessor token source that will be used to macro
* expand the arguments
* @param macro
* the object-like macro that is being applied
* @param origin
* the original {@link CivlcToken} which is an identifier
* spelling the macro's name and which led to its expansion.
*/
public MacroExpander(PreprocessorTokenSource ts, ObjectMacro macro,
CivlcToken origin) {
this.ts = ts;
this.tokenFactory = ts.getTokenFactory();
this.macro = macro;
this.origin = origin;
this.arguments = null;
this.numParameters = 0;
this.expandedArguments = null;
this.placemarker = null;
}
// Helpers...
/**
* A class used to represent a single preprocessing token resulting from
* expansion, along with possible white space before and after.
*
* @author siegel
*/
private class ExpandedToken {
/**
* Possible whitespace preceding the token. Always non-<code>null</code>
* but may be empty.
*/
ArrayList<CivlcToken> prews;
/**
* The token itself. May be <code>null</code>.
*/
CivlcToken token;
/**
* Possible whitespace following the token. Always non-<code>null</code>
* but may be empty.
*/
ArrayList<CivlcToken> postws;
/**
* Is this a concatenation operator ("##") which should be executed?
* Default is <code>false</code>.
*/
private boolean concat;
/**
* Constructs new instance from the given fields. Nothing is copied.
*
* @param prews
* the whitespace preceding the token; non-<code>null</code>
* @param token
* the token itself; may be <code>null</code>
* @param postws
* the whitespace following the token; non-<code>null</code>
*/
ExpandedToken(ArrayList<CivlcToken> prews, CivlcToken token,
ArrayList<CivlcToken> postws) {
this.prews = prews;
this.token = token;
this.postws = postws;
}
/**
* Constructs new instance with no preceding whitespace.
*
* @param token
* the token itself; may be <code>null</code>
* @param postws
* the whitespace following the token; non-<code>null</code>
*/
ExpandedToken(CivlcToken token, ArrayList<CivlcToken> postws) {
this(new ArrayList<CivlcToken>(), token, postws);
}
/**
* Constructs new instance with no preceding or following whitespace.
*
* @param token
* the token itself; may be <code>null</code>
*/
ExpandedToken(CivlcToken token) {
this(new ArrayList<CivlcToken>(), token,
new ArrayList<CivlcToken>());
}
/**
* Is this a concatenation operator ("##") which should be executed?
* Default is <code>false</code>. Note that "##" tokens that come from
* arguments are NOT executed. Only those that come from the macro
* replacement sequence are executed.
*
* @return <code>true</code> iff is a concatenation operator ("##")
* which should be executed
*/
boolean isConcat() {
return concat;
}
/**
* Specifies whether this is a concatenation operator ("##") which
* should be executed. Default is <code>false</code>.
*
* @param flag
* Is this a concatenation operator ("##") which should be
* executed?
*/
void setConcat(boolean flag) {
this.concat = flag;
}
}
/**
* <p>
* Finds the invocation arguments from a sequence of tokens beginning just
* after the macro name. The first token may be "(" or some white space
* preceding the "(". This method will consume from the
* {@link #argumentIter} stopping just after consuming the closing ")"
* token. It will modify the tokens obtained from the iterator by changing
* the next token field of the last token in an argument to
* <code>null</code>. The effect is to set {@link #arguments} to an array
* whose length is the number of arguments, in which element i points to the
* first token of the i-th actual argument, or is <code>null</code> if that
* actual argument is empty.
* </p>
*
* <p>
* In the case of a variadic macro with n parameters, where n includes the
* final formal parameter "...": the number of actual arguments must be at
* least n. Any arguments beyond the n-th will be joined together (including
* the commas) to form a single n-th argument. So in the end, the number of
* arguments will be n.
* </p>
*
* <p>
* This method does NOT create any new tokens. It DOES modify the tokens
* coming out of the iterator by setting their "next" fields appropriately.
* The "next" field in the last token for each argument is set to null, in
* order to terminate the new list. The commas separating arguments are
* spliced out of the list.
* </p>
*
* <p>
* Note that any or all of the entries in the array returned may be
* <code>null</code>. This is because a macro argument can be empty, as in
* "f()" or "f(1,,3)".
* </p>
*
* <p>
* The first token in the given argumentIterator had better be LPAREN (left
* parenthesis). Otherwise this is not an invocation of a function macro.
* The arguments finish when the matching RPAREN is reached.
* </p>
*
* <p>
* It is guaranteed that this method will invoke next() on the
* argumentIterator until it reaches the final ')' (unless that is not
* possible for some reason, in which case an exception is thrown). The
* method will not iterate beyond that point. In other words, after this
* method returns, the "cursor" in the iterator will be at the point just
* after the ')'.
* </p>
*
* <p>
* Implementation notes: doing a little parsing here. Scan the tokens
* dividing into the following classes: LPAREN, RPAREN, COMMA, and OTHER.
* </p>
*
* @exception PreprocessorException
* if the number of actual arguments does not match the
* number of formals in the macro definition
*/
private void findInvocationArguments() throws PreprocessorException {
int argCount = 0, type, parenDepth;
CivlcToken token, previousToken;
List<CivlcToken> delimiterList = new LinkedList<>();
boolean variadic = ((FunctionMacro) macro).isVariadic();
arguments = new CivlcToken[numParameters];
// first, skip through all the white space to get to the '(':
while (true) {
if (!argumentIter.hasNext())
throw new PreprocessorException("Macro invocation " + macro
+ " does not contain any arguments");
token = argumentIter.next();
if (!PreprocessorUtils.isWhiteSpace(token))
break;
}
if (token.getType() != PreprocessorLexer.LPAREN)
throw new PreprocessorException(
"Invocation of function macro does not begin with '(': "
+ macro,
token);
delimiterList.add(token);
if (numParameters == 0) {
if (!argumentIter.hasNext())
throw new PreprocessorException(
"Macro invocation " + macro + " does not end in ')'",
token);
token = argumentIter.next();
if (token.getType() != PreprocessorLexer.RPAREN)
throw new PreprocessorException(
"Invocation of function macro with 0 arguments"
+ "must have form f() with no spaces between the parentheses",
token);
delimiterList.add(token);
} else { // numParameters > 0
parenDepth = 1;
// argCount is the total number of actual arguments completely
// consumed = number of separating commas consumed
while (true) {
if (!argumentIter.hasNext())
throw new PreprocessorException(
"Not enough arguments to macro " + macro, token);
previousToken = token;
token = argumentIter.next();
type = token.getType();
// variadic macro: f(x,y,...) : numParameters=3
// consumed: (a,b,c : argCount=2
// see ",": (a,b,c, : not a separating comma. argCount remains 2
if (parenDepth == 1 && type == PreprocessorLexer.COMMA
&& (!variadic || argCount < numParameters - 1)) {
// this is a separating comma
if (arguments[argCount] != null)
previousToken.setNext(null);
argCount++;
if (argCount >= numParameters)
throw new PreprocessorException(
"Number of arguments to macro " + macro
+ " exceeds " + "expected number "
+ numParameters,
token);
delimiterList.add(token);
} else {
if (type == PreprocessorLexer.LPAREN)
parenDepth++;
else if (type == PreprocessorLexer.RPAREN) {
parenDepth--;
if (parenDepth < 0)
throw new PreprocessorException("Extra ')'", token);
if (parenDepth == 0) {
if (arguments[argCount] != null)
previousToken.setNext(null);
argCount++;
delimiterList.add(token);
break;
}
}
if (arguments[argCount] != null)
previousToken.setNext(token);
else
arguments[argCount] = token;
}
}
if (argCount != numParameters)
throw new PreprocessorException("Invocation of macro "
+ macro.getName() + ": expected " + numParameters
+ " arguments but saw " + argCount, token);
}
delimiters = delimiterList
.toArray(new CivlcToken[delimiterList.size()]);
if (numParameters == 0)
assert delimiters.length == 2;
else
assert delimiters.length == numParameters + 1;
}
/**
* Creates clone of null-terminated linked list of {@link CivlcToken}.
*
* @param token
* first element in linked list; could be <code>null</code>
* @return first element in cloned list
*/
private CivlcToken cloneList(CivlcToken token) {
CivlcToken current, previous = null, result = null;
for (CivlcToken t = token; t != null; t = t.getNext()) {
current = tokenFactory.newCivlcToken(t, t.getFormation(),
t.getTokenVocab());
if (previous != null)
previous.setNext(current);
if (result == null)
result = current;
previous = current;
}
return result;
}
/**
* Returns the result of macro-expanding the token sequence of the index-th
* argument, using the {@link PreprocessorTokenSource} to perform the
* expansions.
*
* @param index
* index into {@link #arguments} array
* @return the result of macro-expanding the token sequence which is the
* link list headed by the index-th argument
* @throws PreprocessorException
* if an error occurs in expansion, such as wrong number of
* arguments in a macro invocation
*/
private CivlcToken getExpandedArgument(int index)
throws PreprocessorException {
if (expandedArguments[index] == null)
expandedArguments[index] = ts
.expandList(cloneList(arguments[index]));
return expandedArguments[index].left;
}
/**
* Gets the index-th argument that should be substituted for a formal
* parameter in the case where that formal parameter immediately precedes or
* follows a concatenation operation ("##"). In that case the plain
* (unexpanded) argument is used, unless it is empty, in which case a single
* placemarker token is used.
*
* @param index
* integer in [0,numArgs-1]
* @return first token in index-th argument's token sequence, or placemarker
*/
private CivlcToken getSpecialArgument(int index) {
CivlcToken result = arguments[index];
if (result == null)
return placemarker;
return result;
}
/**
* Creates a single token by concatenating the text of a sequence of a
* non-whitespace tokens. A placemarker token is treated as the identity
* element for concatenation.
*
* @param tokens
* list of tokens, all of which are non-whitespace. List may
* contain placemarkers. List may be empty.
* @return a single token whose text is obtained by concatenating the text
* of the give tokens; result may be placemarker
* @throws PreprocessorException
* if the text resulting from concatenation is does not spell a
* token name
*/
private CivlcToken concatenate(List<CivlcToken> tokens)
throws PreprocessorException {
StringBuffer concatBuffer = new StringBuffer();
Formation formation = tokenFactory.newConcatenation(tokens);
for (CivlcToken t : tokens) {
int type = t.getType();
if (type != placemarkerType)
concatBuffer.append(t.getText());
}
if (concatBuffer.length() == 0)
return placemarker;
String concatString = concatBuffer.toString();
CharStream charStream = new ANTLRStringStream(concatString);
PreprocessorLexer lexer = new PreprocessorLexer(charStream);
Token newToken = null;
try {
newToken = lexer.nextToken();
} catch (Exception e) {
}
if (newToken != null) {
Token nextToken = lexer.nextToken();
if (nextToken != null && nextToken.getType() == Token.EOF)
return tokenFactory.newCivlcToken(newToken, formation,
TokenVocabulary.PREPROC);
}
throw new PreprocessorException(
"Result of # concatenation not token: " + concatString,
tokens.get(tokens.size() - 1));
}
/**
* Places a backslash before every backslash and double quote in a string.
*
* @param s
* the given string
* @return the result of placing a backslash before every backslash and
* double quote in <code>s</code>
*/
private String escape(String s) {
s = s.replaceAll("\\", "\\\\");
s = s.replaceAll("\"", "\\\"");
return s;
}
/**
* <p>
* Given a null-terminated list of tokens, this methods takes the text of
* the non-whitespace tokens in that list and concatenates them using a
* single space as separator, finally forming a single string literal token
* from the resulting text. The exact rules are specified in C11
* 6.10.3.2(2):
* </p>
*
* <blockquote>Each occurrence of white space between the argument’s
* preprocessing tokens becomes a single space character in the character
* string literal. White space before the first preprocessing token and
* after the last preprocessing token composing the argument is deleted.
* Otherwise, the original spelling of each preprocessing token in the
* argument is retained in the character string literal, except for special
* handling for producing the spelling of string literals and character
* constants: a \ character is inserted before each " and \ character of a
* character constant or string literal (including the delimiting "
* characters), except that it is implementation-defined whether a \
* character is inserted before the \ character beginning a universal
* character name. If the replacement that results is not a valid character
* string literal, the behavior is undefined. The character string literal
* corresponding to an empty argument is "".</blockquote>
*
* Example:
*
* <pre>
* #define f(X) # X
* f(this "is" not a "newline: \n") :
*
* "this \"is\" not a \"newline: \\n\""
* </pre>
*
* <p>
* This is used for function-like macros only. Object-like macros do not use
* stringification since the argument to the stringification operator "#" is
* a formal parameter.
* </p>
*
* @param tokenIndex
* the index of the occurrence of the parameter in the macro body
* token sequence
* @return the string literal token formed by concatenating the token text
* as specified in C11 6.10.3.2 (2)
* @throws PreprocessException
* if the tokenIndex-th replacement token in the macro
* definition is not an occurrence of a formal parameter
*/
private CivlcToken stringifyTokenSequence(int tokenIndex)
throws PreprocessorException {
FunctionReplacementUnit ru = ((FunctionMacro) macro)
.getReplacementUnit(tokenIndex);
if (ru.formalIndex < 0)
throw new PreprocessorException(
"# must be followed by macro parameter in function-like macro definition",
ru.token);
CivlcToken token = arguments[ru.formalIndex];
StringBuffer concatBuffer = new StringBuffer();
boolean first = true;
ArrayList<CivlcToken> tokenList = new ArrayList<>();
concatBuffer.append('"'); // open " of new string literal
for (CivlcToken t = token; t != null; t = t.getNext()) {
if (PreprocessorUtils.isWhiteSpace(t))
continue;
String tokenText = t.getText();
int type = t.getType();
if (type == PreprocessorLexer.STRING_LITERAL
|| type == PreprocessorLexer.CHARACTER_CONSTANT)
tokenText = escape(tokenText);
if (first)
first = false;
else
concatBuffer.append(" ");
concatBuffer.append(tokenText);
tokenList.add(t);
}
concatBuffer.append('"'); // close " of new string literal
String text = concatBuffer.toString();
Formation formation = tokenFactory.newStringification(
(FunctionMacro) macro, tokenIndex, tokenList);
// delimiter immediately preceding empty argument: "(" or ","
CivlcToken delimiter = delimiters[ru.formalIndex];
int start, stop, line, charPositionInLine;
// RETHINk THIS...
if (token == null) { // empty argument
start = ((CommonToken) delimiter).getStartIndex() + 1;
stop = ((CommonToken) delimiters[ru.formalIndex + 1])
.getStartIndex() - 1;
line = delimiter.getLine();
charPositionInLine = delimiter.getCharPositionInLine();
} else {
start = ((CommonToken) token).getStartIndex();
stop = ((CommonToken) tokenList.get(tokenList.size() - 1))
.getStopIndex();
line = token.getLine();
charPositionInLine = token.getCharPositionInLine();
}
CivlcToken newToken = tokenFactory.newCivlcToken(
delimiter.getInputStream(), CivlcTokenConstant.STRING_LITERAL,
delimiter.getChannel(), start, stop, formation, line,
charPositionInLine, TokenVocabulary.PREPROC);
newToken.setText(text);
return newToken;
}
/**
* Determines whether the replacement token at given index immediately
* precedes or follows a "##" token in the replacement token sequence.
*
* @param index
* an index in the macro body token sequence for a replacement
* token
* @return <code>true</code> iff the replacement token immediately preceding
* or following this position is "##"
*/
private boolean isSpecial(int index) {
if (index > 0 && macro.getReplacementUnit(index - 1).token
.getType() == PreprocessorLexer.HASHHASH)
return true;
if (index < macro.getNumReplacements() - 1
&& macro.getReplacementUnit(index + 1).token
.getType() == PreprocessorLexer.HASHHASH)
return true;
return false;
}
/**
* Adds a token to an expansion list. Deals with white space properly.
*
* @param elist
* an expansion list. Non-<code>null</code>. May be empty. If not
* empty, then only the last element may have a <code>null</code>
* token field.
* @param token
* token that should be added to the <code>elist</code>
* @return the entry that contains the newly added token; may be an existing
* entry or a newly created one
*/
ExpandedToken add(ArrayList<ExpandedToken> elist, CivlcToken token) {
ExpandedToken result;
if (PreprocessorUtils.isWhiteSpace(token)) {
if (elist.isEmpty()) {
ArrayList<CivlcToken> prews = new ArrayList<CivlcToken>(),
postws = new ArrayList<CivlcToken>();
prews.add(token);
result = new ExpandedToken(prews, null, postws);
elist.add(result);
} else {
result = elist.get(elist.size() - 1);
if (result.token == null)
result.prews.add(token);
else
result.postws.add(token);
}
} else {
if (elist.isEmpty()) {
result = new ExpandedToken(token);
elist.add(result);
} else {
result = elist.get(elist.size() - 1);
if (result.token == null) {
result.token = token;
} else {
result = new ExpandedToken(token);
elist.add(result);
}
}
}
return result;
}
/**
* Creates a new white space token sequence by duplicating a white space
* token sequence from the macro definition body.
*
* @param ru
* a replacement unit in a macro definition
* @return new array of white space tokens formed by cloning the whitespace
* immediately following the replacement token specified by ru in
* the macro definition; the formation used will be a macro
* expansion based on ru
*/
private ArrayList<CivlcToken> cloneWhitespace(ReplacementUnit ru) {
int index = ru.index;
Token[] wstokens = ru.whitespace;
ArrayList<CivlcToken> result = new ArrayList<CivlcToken>(
wstokens.length);
Formation formation = tokenFactory.newMacroExpansion(origin, macro,
index);
for (int i = 0; i < wstokens.length; i++) {
Token token = wstokens[i];
result.add(tokenFactory.newCivlcToken(token, formation,
TokenVocabulary.PREPROC));
}
return result;
}
/**
* Adds a clone of the replacement unit to an expansion list.
*
* @param elist
* an expansion list
* @param ru
* a replacement unit
* @return the entry containing the token added; may be an existing entry
* (which must be the last one in the list) or a newly created one
*/
private ExpandedToken addClone(ArrayList<ExpandedToken> elist,
ReplacementUnit ru) {
CivlcToken newToken = tokenFactory.newCivlcToken(ru.token,
tokenFactory.newMacroExpansion(origin, macro, ru.index),
TokenVocabulary.PREPROC);
ArrayList<CivlcToken> whitespace = cloneWhitespace(ru);
ExpandedToken result;
if (elist.isEmpty()) {
result = new ExpandedToken(newToken, whitespace);
elist.add(result);
} else {
result = elist.get(elist.size() - 1);
if (result.token == null) {
result.token = newToken;
result.postws.addAll(whitespace);
} else {
result = new ExpandedToken(newToken, whitespace);
elist.add(result);
}
}
return result;
}
/**
* <p>
* Creates a sequence of new {@link CivlcToken}s (with associated white
* space) by copying the macro definition replacement list, replacing formal
* parameters with actual arguments. The stringification operator '#' is
* also executed.
* </p>
*
* <p>
* As specified in C11, an occurrence of a formal parameter in the macro
* replacement sequence that immediately follows a "#" or immediately
* follows or precedes a "##" is first replaced by the (unmodified) argument
* token list. All other occurrences of formal parameters in the replacement
* sequence are replaced by the expanded argument list (i.e., the argument
* is first macro expanded before being substituted). Moreover, if the
* parameter immediately precedes or follows a "##" and the corresponding
* argument list is empty (i.e., is all whitespace), then the formal is
* replaced by the special temporary "placemarker" token.
* </p>
*
* @return sequence of tokens with pre- and post-whitespace obtained by
* instantiating macro replacement list, substituting arguments for
* formals, and executing stringification operators
* @throws PreprocessorException
* if the stringification operator '#' is not followed
* immediately (except for white space) by an occurrence of a
* formal parameter
*/
private ArrayList<ExpandedToken> instantiateFunctionMacro()
throws PreprocessorException {
int numTokens = macro.getNumReplacements();
ArrayList<ExpandedToken> result = new ArrayList<>(numTokens);
findInvocationArguments();
@SuppressWarnings("unchecked")
Pair<CivlcToken, CivlcToken>[] temp = (Pair<CivlcToken, CivlcToken>[]) new Pair<?, ?>[numParameters];
this.expandedArguments = temp;
for (int i = 0; i < numTokens; i++) {
FunctionReplacementUnit ru = ((FunctionMacro) macro)
.getReplacementUnit(i);
Token token = ru.token;
int type = token.getType();
if (type == PreprocessorParser.HASH) {
if (i == numTokens - 1)
throw new PreprocessorException(
"# cannot be last token in a function-like macro definition",
token);
i++;
ru = ((FunctionMacro) macro).getReplacementUnit(i);
result.add(new ExpandedToken(stringifyTokenSequence(i),
cloneWhitespace(ru)));
} else if (type == PreprocessorParser.HASHHASH) {
addClone(result, ru).setConcat(true);
} else {
int formalIndex = ru.formalIndex;
if (ru.formalIndex < 0) {
addClone(result, ru);
} else { // splice in argument clone...
CivlcToken argument = isSpecial(i)
? getSpecialArgument(formalIndex)
: getExpandedArgument(formalIndex);
while (argument != null) {
CivlcToken newToken = tokenFactory.newCivlcToken(
argument, tokenFactory
.newMacroExpansion(argument, macro, i),
TokenVocabulary.PREPROC);
add(result, newToken);
argument = argument.getNext();
}
}
}
}
return result;
}
/**
* Creates sequence of new {@link CivlcToken}s (with associated whitespace)
* by cloning the replacement sequence.
*
* @return sequence of {@link ExpandedToken}s which are clones of the macro
* replacement sequence; "##" tokens have their "concat" bits set
*/
private ArrayList<ExpandedToken> instantiateObjectMacro() {
int numTokens = macro.getNumReplacements();
ArrayList<ExpandedToken> result = new ArrayList<>(numTokens);
for (int i = 0; i < numTokens; i++) {
ReplacementUnit ru = macro.getReplacementUnit(i);
Token token = ru.token;
int type = token.getType();
if (type == PreprocessorParser.HASHHASH) {
addClone(result, ru).setConcat(true);
} else {
addClone(result, ru);
}
}
return result;
}
/**
* Operates on a list of {@link ExpandedToken} by executing the
* concatenation operator "##". As specified in C11, the placemarker token
* acts as the identity for concatenation.
*
* @param input
* a list of expanded tokens
* @returns the result of executing all concatenation operators in the given
* list
* @throws PreprocessorException
* if the result of a concatenation does not spell a valid token
* name
*/
private ArrayList<ExpandedToken> processConcatenations(
ArrayList<ExpandedToken> input) throws PreprocessorException {
ArrayList<ExpandedToken> output = new ArrayList<>(input.size());
int numIn = input.size();
for (int i = 0; i < numIn; i++) {
ExpandedToken et = input.get(i);
if (et.isConcat()) {
assert i > 0;
assert !output.isEmpty();
LinkedList<CivlcToken> concatBuf = new LinkedList<>();
ExpandedToken concatOutput = output.get(output.size() - 1);
assert (concatOutput.token != null);
concatBuf.add(concatOutput.token);
do { // invariant: i, et point to a ## token
i++;
assert i < numIn;
et = input.get(i);
assert et.token != null;
concatBuf.add(et.token);
i++;
if (i == numIn)
break;
et = input.get(i);
} while (et.isConcat());
// i points to token after last token in concat sequence
i--;
// i points to last token in concat sequence
concatOutput.token = concatenate(concatBuf);
concatOutput.postws = input.get(i).postws;
} else {
output.add(et);
}
}
return output;
}
/**
* Removes any placemarkers from a list.
*
* @param input
* list of expanded tokens
* @return list obtained by removing any placemarker entries from given list
*/
private ArrayList<ExpandedToken> removePlacemarkers(
ArrayList<ExpandedToken> input) {
int n = input.size();
ArrayList<ExpandedToken> output = new ArrayList<>(n);
for (int i = 0; i < n; i++) {
ExpandedToken et = input.get(i);
if (et.token == null || et.token.getType() != placemarkerType)
output.add(et);
}
return output;
}
/**
* Given an expansion list, extracts all the tokens (including white space)
* in order, to create a flat, null-terminated linked list of
* {@link CivlcToken}s, which is the form of the final output of this class.
*
* @param input
* list of expanded tokens
* @return flat list of {@link CivlcToken}s
*/
private Pair<CivlcToken, CivlcToken> expansionToTokenList(
ArrayList<ExpandedToken> input) {
CivlcToken firstToken = null, previousToken = null;
for (ExpandedToken et : input) {
for (CivlcToken token : et.prews) {
if (previousToken == null)
firstToken = token;
else
previousToken.setNext(token);
previousToken = token;
}
if (et.token != null) {
if (previousToken == null)
firstToken = et.token;
else
previousToken.setNext(et.token);
previousToken = et.token;
}
for (CivlcToken token : et.postws) {
if (previousToken == null)
firstToken = token;
else
previousToken.setNext(token);
previousToken = token;
}
}
return new Pair<CivlcToken, CivlcToken>(firstToken, previousToken);
}
// Public methods...
/**
* Perform the macro expansion and return the result.
*
* @return the first and last elements in a null-terminated linked list of
* {@link CivlcToken} which results from performing the macro
* expansion
* @throws PreprocessorException
* if anything goes wrong in the expansion process
*/
public Pair<CivlcToken, CivlcToken> expand() throws PreprocessorException {
boolean isFunction = macro instanceof FunctionMacro;
ArrayList<ExpandedToken> expansion = isFunction
? instantiateFunctionMacro()
: instantiateObjectMacro();
expansion = processConcatenations(expansion);
if (isFunction)
expansion = removePlacemarkers(expansion);
return expansionToTokenList(expansion);
}
}