FortranTokenStream.java
/*******************************************************************************
* Copyright (c) 2005, 2006 Los Alamos National Security, LLC.
* This material was produced under U.S. Government contract DE-AC52-06NA25396
* for Los Alamos National Laboratory (LANL), which is operated by the Los Alamos
* National Security, LLC (LANS) for the U.S. Department of Energy. The U.S. Government has
* rights to use, reproduce, and distribute this software. NEITHER THE
* GOVERNMENT NOR LANS MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
* ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
* to produce derivative works, such modified software should be clearly marked,
* so as not to confuse it with the version available from LANL.
*
* Additionally, this program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*******************************************************************************/
package fortran.ofp.parser.java;
import java.util.*;
import org.antlr.runtime.*;
import fortran.ofp.parser.java.FortranToken;
public class FortranTokenStream extends LegacyCommonTokenStream {
public FortranLexer lexer;
public int needIdent;
public int parserBacktracking;
public boolean matchFailed;
private List currLine;
private int lineLength;
private Token eofToken = null;
private ArrayList<Token> packedList;
private ArrayList<Token> newTokenList;
public FortranTokenStream(FortranLexer lexer) {
super(lexer);
this.lexer = lexer;
this.needIdent = 0;
this.parserBacktracking = 0;
this.matchFailed = false;
this.currLine = null;
this.lineLength = 0;
this.packedList = null;
this.newTokenList = new ArrayList<Token>();
this.fillBuffer();
// For some reason antlr v3.3 LA/LT(1) no longer return <EOF> token
// save it last token from source (EOF) and return it in LT method.
eofToken = tokenSource.nextToken();
eofToken.setTokenIndex(size());
FortranStream fs = ((FortranLexer) lexer).getInput();
eofToken.setText(fs.getFileName() + ":" + fs.getAbsolutePath());
} // end constructor
/**
* For some reason antlr v3.3 LA/LT() no longer returns <EOF> token,
* so save it last token from source (EOF) and return it in LT method.
*/
public Token LT(int k) {
if (index()+k-1 >= this.size()) {
return eofToken;
}
return super.LT(k);
}
/*******OBSOLETE
public void fixupFixedFormat() {
ArrayList<Token> tmpArrayList = null;
boolean hasContinuation = false;
List tmpList = null;
int i = 0;
Token tk;
tmpList = super.getTokens();
tmpArrayList = new ArrayList<Token>(tmpList.size());
// TODO:
// this won't be necessary once ANTLR updates their getTokens method
// to return an ArrayList, that uses the syntax ArrayList<Token>.
// otherwise, the compiler gives a warning about unchecked or unsafe
// operations. this loop is overkill for simply avoiding the warning...
// however, having an ArrayList that contains typed objects (Token) is
// useful below because we may have to rewrite the stream when handling
// comments and continuations.
for (i = 0; i < tmpList.size(); i++) {
try {
tmpArrayList.add((Token)tmpList.get(i));
} catch(Exception e) {
e.printStackTrace();
System.exit(1);
}
}
// Loop across the tokens and convert anything in column 0 to a
// line comment, and anything in col 6 to continuation. note: this may
// require the splitting of tokens!
//
// We also have to check for tabs in the first character position. Following
// DEC's convention, <TAB>digit (other than zero) is a continuation line,
// otherwise the line starts a new statement. Codes seem to use <TAB><BLANK> to
// start a new line so perhaps <TAB> is essentially treated as 5 spaces?
//
int continue_pos = 5;
for (i = 0; i < tmpArrayList.size(); i++) {
tk = tmpArrayList.get(i);
int tk_pos = tk.getCharPositionInLine();
char tk_char_0 = tk.getText().charAt(0);
// check for tab formatting
if (tk_pos == 0) {
if (tk_char_0 == '\t') {
continue_pos = 1; // follows tab
} else {
continue_pos = 5; // column 6
}
}
if (tk_pos == continue_pos) {
int tk_type = tk.getType();
if (tk_type != FortranLexer.WS && tk_char_0 != '0' &&
(tk_type != FortranLexer.T_EOS ||
(tk_type == FortranLexer.T_EOS && tk_char_0 == ';'))) {
// Any non blank char other than '0' can be a continuation char if it's in
// the 6th column (col. 5 because zero based), even '!' or ';'.
// If an initial tab then continuation char is a digit 1-9.
// TODO:
// if the length is greater than 1, then the user is most likely
// using a letter or number to signal the continuation. in this
// case, we need to split off the character that's in column 6 and
// make two tokens -- the continuation token and what's left. we
// should maybe warn the user about this in case they accidentally
// started in the wrong column?
if (tk.getText().length() == 1) {
hasContinuation = true;
}
else if (continue_pos != 1) {
System.err.println("TODO: handle this continuation type!");
}
}
}
if (hasContinuation) {
int j, k, prevType;
Token prevToken = null;
hasContinuation = false; // reset
tk.setType(FortranLexer.CONTINUE_CHAR);
// hide the continuation token
tk.setChannel(lexer.getIgnoreChannelNumber());
tmpArrayList.set(i, tk);
j = i-1;
do {
prevToken = tmpArrayList.get(j);
prevType = prevToken.getType();
j--;
} while (j >= 0 && (prevType == FortranLexer.WS ||
prevType == FortranLexer.LINE_COMMENT ||
prevType == FortranLexer.T_EOS));
// channel 99 (hide) all tokens from after prevToken (j+1)+1
// through the continue token (i)
for (k = j+2; k < i; k++) {
tk = tmpArrayList.get(k);
// only hide the T_EOS tokens. all WS and LINE_COMMENT tokens
// should already be hidden.
if (tk.getType() == FortranLexer.T_EOS && tk.getText().charAt(0) != ';') {
tk.setChannel(lexer.getIgnoreChannelNumber());
tmpArrayList.set(k, tk);
}
}
// TODO:
// how can we handle fixed-format split tokens? for example:
// inte
// ger j
// this is the variable declaration 'integer j'. how are we
// suppose to know this? it compiles with gfortran.
//
// // need to find the next non-WS token
// i++;
// while(tmpArrayList.get(i).getType() == FortranLexer.WS ||
// tmpArrayList.get(i).getType() ==
// FortranLexer.LINE_COMMENT) {
// i++;
// }
// StringBuffer buffer = new StringBuffer();
// Token token;
// int tokenCount = 0;
// buffer = buffer.append(prevToken.getText());
// buffer = buffer.append(tmpArrayList.get(i).getText());
// ANTLRStringStream charStream =
// new ANTLRStringStream(buffer.toString().toUpperCase());
// FortranLexer myLexer = new FortranLexer(charStream);
// System.out.println("trying to match the string: " +
// buffer.toString().toUpperCase() +
// " for fixed-format continuation");
}
} // end for (each Token in the ArrayList)
// System.out.println("tmpArrayList as one big string: ");
// StringBuffer buffer = new StringBuffer();
// for(i = 0; i < tmpArrayList.size(); i++) {
// tmpToken = tmpArrayList.get(i);
// if (tmpToken.getType() == FortranLexer.WS ||
// (tmpToken.getType() == FortranLexer.T_EOS &&
// tmpToken.getText().charAt(0) != ';') ||
// tmpToken.getChannel() != lexer.getIgnoreChannelNumber()) {
// buffer = buffer.append(tmpToken.getText());
// }
// }
// System.out.println(buffer.toString().toUpperCase());
// {
// System.out.println("parsing above buffer with FixedLexer");
// ANTLRStringStream charStream =
// new ANTLRStringStream(buffer.toString().toUpperCase());
// FixedLexer myFixed = new FixedLexer(charStream);
// Token fixedToken;
// do {
// fixedToken = myFixed.nextToken();
// } while(fixedToken.getType() >= 0);
// System.out.println("done parsing above buffer with FixedLexer");
// System.exit(1);
// }
// System.out.println("tmpArrayList.toString(): " +
// tmpArrayList.toString());
// System.out.println("tmpArrayList.size(): " + tmpArrayList.size());
// System.out.println("super.tokens.size(): " + super.tokens.size());
// System.out.println("super.p is: " + super.p);
// save the new ArrayList (possibly modified) to the super classes
// token list.
super.tokens = tmpArrayList;
return;
} // end fixupFixedFormat()
END OBSOLETE*******/
/**
* Create a subset list of the non-whitespace tokens in the current line.
*/
private ArrayList<Token> createPackedList() {
int i = 0;
Token tk = null;
ArrayList<Token> pList = new ArrayList<Token>(this.lineLength+1);
for (i = 0; i < currLine.size(); i++) {
tk = getTokenFromCurrLine(i);
try {
if (tk.getChannel() != lexer.getIgnoreChannelNumber()) {
pList.add(tk);
}
} catch(Exception e) {
e.printStackTrace();
System.exit(1);
}
} // end for(each item in buffered line)
// need to make sure the line was terminated with a T_EOS. this may
// not happen if we're working on a file that ended w/o a newline
if (pList.get(pList.size()-1).getType() != FortranLexer.T_EOS) {
FortranToken eos = new FortranToken(lexer.getInput(), FortranLexer.T_EOS,
Token.DEFAULT_CHANNEL,
lexer.getInput().index(),
lexer.getInput().index()+1);
eos.setText("\n");
packedList.add(eos);
}
return pList;
} // end createPackedList()
/******OBSOLETE
private boolean possiblySplitToken(ArrayList<Token> packedList,
int firstContCharOffset,
int currOffset) {
int i = 0;
for(i = firstContCharOffset+1; i < currOffset; i++) {
if (packedList.get(i).getType() != FortranLexer.WS &&
packedList.get(i).getType() != FortranLexer.T_EOS) {
return false;
}
}
return true;
} // end possiblySplitToken()
private void fixupContinuedLine(ArrayList<Token> packedList) {
int firstContCharOffset = -1;
int i;
int j;
// search for a continue char ('&' in free form)
for(i = 0; i < packedList.size(); i++) {
if (packedList.get(i).getType() == FortranLexer.CONTINUE_CHAR) {
if (firstContCharOffset == -1)
firstContCharOffset = i;
else {
// if all tokens between the first '&' and this one are WS,
// we have to consider the '&' chars together. otherwise,
// we don't.
if (possiblySplitToken(packedList, firstContCharOffset, i)
== true) {
// we have to consider the token preceding the first '&' and
// the one following the second '&' together.
// two continue chars. need to re-tokenize what's
// immediately before the first continue and immediately
// after the second.
StringBuffer buffer = new StringBuffer();
Token token;
int tokenCount = 0;
// channel 99 all of the tokens from the from the
// token preceding the first '&' and the token following
// the second '&', inclusive
for(j = firstContCharOffset-1; j <= i; j++) {
packedList.get(j).setChannel(
lexer.getIgnoreChannelNumber());
}
buffer =
buffer.append(
packedList.get(firstContCharOffset-1).getText());
buffer =
buffer.append(
packedList.get(i+1).getText());
ANTLRStringStream charStream =
new ANTLRStringStream(buffer.toString().toUpperCase());
FortranLexer myLexer = new FortranLexer(charStream);
// drop the token following the second '&'. the token
// the first '&' has already been dropped by the 'else'
// clause below.
packedList.get(i+1).setChannel(
lexer.getIgnoreChannelNumber());
do {
tokenCount++;
token = myLexer.nextToken();
if (tokenCount == 1) {
// this is the first of two possible tokens that
// we're adding to the packed list, so look up the
// line/col position from
// the original token (at firstContCharOffset-1).
token.setLine(
packedList.get(firstContCharOffset-1).getLine());
token.setCharPositionInLine(
packedList.get(firstContCharOffset-1).
getCharPositionInLine());
} else {
// the second of two tokens we're adding
token.setLine(
packedList.get(i+1).getLine());
token.setCharPositionInLine(
packedList.get(i+1).
getCharPositionInLine());
}
if (token.getType() >= 0) {
token.setText(token.getText().toLowerCase());
// insert the token
try {
packedList.add(i, token);
} catch(Exception e) {
e.printStackTrace();
System.exit(1);
}
// increment the loop variable to advance past the
// token we just inserted.
i++;
}
} while(token.getType() >= 0);
firstContCharOffset = -1;
} else {
// separate tokens, so drop the '&' and update to the current
// '&' as being the first cont char.
packedList.get(firstContCharOffset).setChannel(
lexer.getIgnoreChannelNumber());
firstContCharOffset = i;
}
}
} // end if (FortranLexer.T_CONTINUE_CHAR)
} // end for()
return;
} // end fixupContinuedLine()
END OBSOLETE*******/
public String lineToString(int lineStart, int lineEnd) {
int i = 0;
StringBuffer lineText = new StringBuffer();
for(i = lineStart; i < packedList.size()-1; i++) {
lineText.append(packedList.get(i).getText());
}
return lineText.toString();
} // end lineToString()
public List getTokens(int start, int stop) {
return super.getTokens(start, stop);
} // end getTokens()
public int getCurrLineLength() {
return this.packedList.size();
}
public int getRawLineLength() {
return this.currLine.size();
}
public int getLineLength(int start) {
int lineLength;
Token token;
lineLength = 0;
if (start >= super.tokens.size()) return lineLength;
// this will not give you a lexer.EOF, so may need to
// add a T_EOS token when creating the packed list if the file
// ended w/o a T_EOS (now new line at end of the file).
do {
token = super.get(start+lineLength);
lineLength++;
} while((start+lineLength) < super.tokens.size() &&
(token.getChannel() == lexer.getIgnoreChannelNumber() ||
token.getType() != FortranLexer.T_EOS &&
token.getType() != FortranLexer.EOF));
return lineLength;
} // end getLineLength()
public int findTokenInPackedList(int start, int desiredToken) {
Token tk;
if (start >= this.packedList.size()) {
return -1;
}
do {
tk = (Token)(packedList.get(start));
start++;
} while(start < this.packedList.size() &&
tk.getType() != desiredToken);
if (tk.getType() == desiredToken)
// start is one token past the one we want
return start-1;
return -1;
} // end findTokenInPackedList()
public Token getToken(int pos) {
if (pos >= this.packedList.size() || pos < 0) {
System.out.println("pos is out of range!");
System.out.println("pos: " + pos +
" packedListSize: " + this.packedList.size());
return null;
}
else
return (Token)(packedList.get(pos));
} // end getToken()
public Token getToken(int start, int desiredToken) {
int index;
index = findToken(start, desiredToken);
if (index != -1)
return (Token)(packedList.get(index));
else
return null;
} //end getToken()
public int findToken(int start, int desiredToken) {
Token tk;
if (start >= this.packedList.size()) {
System.out.println("start is out of range!");
System.out.println("start: " + start +
" packedListSize: " + this.packedList.size());
return -1;
}
do {
tk = (Token)(packedList.get(start));
start++;
} while (start < this.packedList.size() && tk.getType() != desiredToken);
if (tk.getType() == desiredToken)
// start is one token past the one we want
return start-1;
return -1;
} // end findToken()
/**
* Search the currLine list for the desired token.
*/
public int findTokenInCurrLine(int start, int desiredToken) {
int size;
Token tk;
size = currLine.size();
if (start >= size)
return -1;
do {
// get the i'th object out of the list
tk = (Token)(currLine.get(start));
start++;
} while(start < size &&
tk.getType() != desiredToken);
if (tk.getType() == desiredToken)
return start;
return -1;
} // end findTokenInCurrLine()
/**
* @param pos Current location in the currLine list; the search
* will begin by looking at the next token (pos+1).
*/
public Token getNextNonWSToken(int pos) {
Token tk;
tk = (Token)(packedList.get(pos+1));
return tk;
} // end getNextNonWSToken()
/**
* @param pos Current location in the currLine list; the search
* will begin by looking at the next token (pos+1).
*/
public int getNextNonWSTokenPos(int pos) {
Token tk;
// find the next non WS token
tk = getNextNonWSToken(pos);
// find it's position now
pos = findTokenInCurrLine(pos, tk.getType());
return pos;
} // end getNextNonWSTokenPos()
public Token getTokenFromCurrLine(int pos) {
if (pos >= currLine.size() || pos < 0) {
return null;
}
else {
return ((Token)(currLine.get(pos)));
}
} // end getTokenFromCurrLine()
public void setCurrLine(int lineStart) {
this.lineLength = this.getLineLength(lineStart);
// this will get the tokens [lineStart->((lineStart+lineLength)-1)]
currLine = this.getTokens(lineStart, (lineStart + this.lineLength) - 1);
if (currLine == null) {
System.err.println("currLine is null!!!!");
System.exit(1);
}
// pack all non-ws tokens
this.packedList = createPackedList();
} // end setCurrLine()
/**
* This will use the super classes methods to keep track of the
* start and end of the original line, not the line buffered by
* this class.
*/
public int findTokenInSuper(int lineStart, int desiredToken) {
int lookAhead = 0;
int tk, channel;
/*****OBSOLETE NOTE: returning -1 is painful when looking for T_EOS
// if this line is a comment, skip scanning it
if (super.LA(1) == FortranLexer.LINE_COMMENT) {
return -1;
}
OBSOLETE*****/
do {
// lookAhead was initialized to 0
lookAhead++;
// get the token
Token token = LT(lookAhead);
tk = token.getType();
channel = token.getChannel();
// continue until find what looking for or reach end
} while ((tk != FortranLexer.EOF && tk != FortranLexer.T_EOS && tk != desiredToken)
|| channel == lexer.getIgnoreChannelNumber());
if (tk == desiredToken) {
// we found a what we wanted to
return lookAhead;
}
return -1;
} // end findTokenInSuper()
public void printCurrLine() {
System.out.println("=================================");
System.out.println("currLine.size() is: " + currLine.size());
System.out.println(currLine.toString());
System.out.println("=================================");
return;
} // end printCurrLine()
public void printPackedList() {
System.out.println("*********************************");
System.out.println("packedListSize is: " + this.packedList.size());
System.out.println(this.packedList.toString());
System.out.println("*********************************");
return;
} // end printPackedList()
public void outputTokenList(IFortranParserAction actions) {
ArrayList<Token> tmpArrayList = null;
List tmpList = null;
tmpList = super.getTokens();
tmpArrayList = new ArrayList<Token>(tmpList.size());
for (int i = 0; i < tmpList.size(); i++) {
try {
tmpArrayList.add((Token)tmpList.get(i));
} catch(Exception e) {
e.printStackTrace();
System.exit(1);
}
}
for (int i = 0; i < tmpArrayList.size(); i++) {
Token tk = tmpArrayList.get(i);
actions.next_token(tk);
}
} // end printTokenList()
public int currLineLA(int lookAhead) {
Token tk = null;
// get the token from the packedList
try {
tk = (Token)(packedList.get(lookAhead-1));
} catch(Exception e) {
// e.printStackTrace();
// System.exit(1);
return -1;
}
return tk.getType();
} // end currLineLA()
public boolean lookForToken(int desiredToken) {
int lookAhead = 1;
int tk;
do {
// get the next token
tk = this.LA(lookAhead);
// update lookAhead in case we look again
lookAhead++;
} while(tk != FortranLexer.T_EOS && tk != FortranLexer.EOF &&
tk != desiredToken);
if (tk == desiredToken) {
return true;
} else {
return false;
}
} // end testForFunction()
public boolean appendToken(int tokenType, String tokenText) {
FortranToken newToken = new FortranToken(tokenType);
newToken.setText(tokenText);
// append a token to the end of newTokenList
return this.packedList.add(newToken);
} // end appendToken()
public void addToken(Token token) {
this.packedList.add(token);
}
public void addToken(int index, int tokenType, String tokenText) {
try {
// for example:
// index = 1
// packedList == label T_CONTINUE T_EOS (size is 3)
// newTokenList.size() == 22
// 22-3+1=20
// so, inserted between the label and T_CONTINUE
this.packedList.add(index, new FortranToken(tokenType, tokenText));
} catch(Exception e) {
e.printStackTrace();
System.exit(1);
}
return;
} // end addToken()
public void set(int index, Token token) {
packedList.set(index, token);
} // end set()
public void add(int index, Token token) {
packedList.add(index, token);
}
public void removeToken(int index) {
packedList.remove(index);
return;
} // end removeToken()
public void clearTokensList() {
this.packedList.clear();
return;
} // end clearTokensList()
public ArrayList<Token> getTokensList() {
return this.packedList;
} // end getTokensList()
public void setTokensList(ArrayList<Token> newList) {
this.packedList = newList;
return;
} // end setTokensList()
public int getTokensListSize() {
return this.packedList.size();
} // end getTokensListSize()
public FortranToken createToken(int type, String text, int line, int col) {
FortranToken token = new FortranToken(type, text);
token.setLine(line);
token.setCharPositionInLine(col);
return token;
} // end createToken()
public void addTokenToNewList(Token token) {
if (this.newTokenList.add(token) == false) {
System.err.println("Couldn't add to newTokenList!");
}
return;
}
public void finalizeLine() {
if (this.newTokenList.addAll(packedList) == false) {
System.err.println("Couldn't add to newTokenList!");
}
} // end finalizeLine()
public void finalizeTokenStream() {
super.tokens = this.newTokenList;
} // end finalizeTokenStream()
} // end class FortranTokenStream