TreeUtils.java
package edu.udel.cis.vsl.abc.front.common.parse;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.CommonTree;
import edu.udel.cis.vsl.abc.token.IF.CivlcToken;
/**
* General utility class for operations on ANTLR Trees.
*
* @author siegel
*
*/
public class TreeUtils {
/**
* <p>
* Sets some fields of the tokens that occur in the tree.
* </p>
*
* <p>
* I know ANTLR is supposed to do this but I don't think it does it right.
* First, the tokenIndexes are not always what I expect. For some reason,
* ANTLR's CommonTokenStream sets the index of the last token (EOF) to be
* one higher than it should be, so there is a gap in the indexes between
* the penultimate token and the last token. I introduced my own "index"
* field to CToken (which extends CommonToken) and set it myself correctly.
* </p>
*
* <p>
* Second, ANTLR is supposed to find the range of tokens spanned by each
* node in the tree (by examining all descendants of the node). However:
* first, the code that does this uses ANTLR's tokenIndex, and I want to do
* it using my index. Second, the ANTLR code is only correct under the
* assumption that the token indices are non-decreasing as child index
* increases, i.e., the token index of child i is less than or equal to that
* of child i+1, for all i, for all nodes. (Hence it only has to examine the
* first and last child.) There is no reason that assumption has to hold. So
* I compute this correctly (and using CToken indexes) and re-set the
* "tokenStartIndex" and "tokenStopIndex" fields of each tree node.
* </p>
*
* @param tree
* a tree resulting from executing an ANTLR parser
*/
public static void postProcessTree(CommonTree tree) {
initPostProcess(tree);
completePostProcess(tree);
}
/**
* Marks all nodes as "not yet visited"---indicating by the magic number
* -999 for tokenStartIndex and tokenStopIndex.
*
* @param tree
* root of tree to be explored
*/
private static void initPostProcess(CommonTree tree) {
int numChildren = tree.getChildCount();
tree.setTokenStartIndex(-999);
tree.setTokenStopIndex(-999);
for (int i = 0; i < numChildren; i++)
initPostProcess((CommonTree) tree.getChild(i));
}
/**
* <p>
* Computes the actual start and stop index of each node in the tree.
* </p>
*
* <p>
* Precondition: nodes reachable from <code>tree</code> have not yet been
* visited by this method iff their start and stop indexes are both -999.
* </p>
*
* <p>
* If there is no CToken occurring in a node or any of its descendants, the
* start and stop index of that node will both be set to -1.
* </p>
*
* @param tree
* root of the ANTLR tree that is to be processed
*/
private static void completePostProcess(CommonTree tree) {
if (tree.getTokenStartIndex() != -999)
return;
else {
int numChildren = tree.getChildCount();
Token token = tree.getToken();
int min, max;
if (token instanceof CivlcToken) {
min = max = ((CivlcToken) token).getIndex();
} else {
min = max = -1;
}
for (int i = 0; i < numChildren; i++) {
CommonTree child = (CommonTree) tree.getChild(i);
int childMin, childMax;
completePostProcess(child);
childMin = getSubTreeStartIndex(child);
childMax = getSubTreeStopIndex(child);
if (childMin >= 0 && (min < 0 || childMin < min))
min = childMin;
if (childMax >= 0 && (max < 0 || childMax > max))
max = childMax;
}
tree.setTokenStartIndex(min);
tree.setTokenStopIndex(max);
}
}
private static int getSubTreeStartIndex(CommonTree tree) {
int index = tree.getTokenStartIndex();
Token token = tree.getToken();
if (token != null && index == token.getTokenIndex()
&& !(token instanceof CivlcToken)) {
index = -1;
}
return index;
}
private static int getSubTreeStopIndex(CommonTree tree) {
int index = tree.getTokenStopIndex();
Token token = tree.getToken();
if (token != null && index == token.getTokenIndex()
&& !(token instanceof CivlcToken)) {
index = -1;
}
return index;
}
}