CommonCharacterStream.java

package edu.udel.cis.vsl.abc.front.c.preproc;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;

import org.antlr.runtime.ANTLRStringStream;

/**
 * <p>
 * A general-purpose ANTLR character stream which can be formed from an
 * arbitrary Java {@link InputStream}. Since it has no idea how long the input
 * stream will be, it dynamically allocates memory in chunks as it reads the
 * stream, and only at the end stitches everything together into a big char
 * array.
 * </p>
 * 
 * @author siegel
 */
public class CommonCharacterStream extends ANTLRStringStream {

	// Static constants...

	/**
	 * The default number of characters to read at one time.
	 */
	public final static int DEFAULT_CHUNK_SIZE = 8192;

	// Types...

	/**
	 * Helper class for recording chunk of data read from stream.
	 * 
	 * @author siegel
	 *
	 */
	private class Chunk {
		/**
		 * Buffer to store the characters read in from file. Length is specified
		 * in constructor but is always {@link #chunkSize} for now.
		 */
		char buf[];

		/**
		 * Number of elements actually used: bounded above by buf.length.
		 */
		int size;

		/**
		 * Creates a new chunk with buffer the given size.
		 * 
		 * @param bufferSize
		 *            the number of characters to allocate for {@link #buf}
		 *            array.
		 */
		Chunk(int bufferSize) {
			buf = new char[bufferSize];
		}
	}

	/**
	 * Name of this stream, like file name, for example.
	 */
	private String name;

	/**
	 * The number of characters to read at one time.
	 */
	private int chunkSize = DEFAULT_CHUNK_SIZE;

	// Constructors...

	/**
	 * Creates new filtered ANTLR input stream from given stream.
	 * 
	 * @param name
	 *            the name to use for this stream; used mostly for reporting
	 *            errors
	 * @param stream
	 *            the underlying stream to read from
	 * @param encoding
	 *            the character encoding or null if default encoding is to be
	 *            used
	 * @param chunkSize
	 *            the number of characters to read at one time from the stream
	 * @throws IOException
	 *             if something goes wrong reading from the underlying stream
	 */
	public CommonCharacterStream(String name, InputStream stream,
			String encoding, int chunkSize) throws IOException {
		this.name = name;
		this.chunkSize = chunkSize;
		load(stream, encoding);
	}

	/**
	 * Creates new filtered ANTLR input stream from given stream using default
	 * chunk size.
	 * 
	 * @param name
	 *            the name to use for this stream; used mostly for reporting
	 *            errors
	 * @param stream
	 *            the underlying stream to read from
	 * @param encoding
	 *            the character encoding or null if default encoding is to be
	 *            used
	 * @throws IOException
	 *             if something goes wrong reading from the underlying stream
	 */
	public CommonCharacterStream(String name, InputStream stream,
			String encoding) throws IOException {
		this.name = name;
		load(stream, encoding);
	}

	/**
	 * Creates new filtered ANTLR input stream from given stream using default
	 * chunk size and default character encoding.
	 * 
	 * @param name
	 *            the name to use for this stream; used mostly for reporting
	 *            errors
	 * @param stream
	 *            the underlying stream to read from
	 * @throws IOException
	 *             if something goes wrong reading from the underlying stream
	 */
	public CommonCharacterStream(String name, InputStream stream)
			throws IOException {
		this(name, stream, null);
	}

	/**
	 * Creates new filtered ANTLR input stream from a string.
	 * 
	 * @param name
	 *            the name to use for this stream; used mostly for reporting
	 *            errors
	 * @param string
	 *            a string while will form the source for the stream
	 * @param chunkSize
	 *            the number of characters to read from the string at one time
	 * @throws IOException
	 *             should not happen
	 */
	public CommonCharacterStream(String name, String string, int chunkSize)
			throws IOException {
		this(name, new ByteArrayInputStream(string.getBytes()), null,
				chunkSize);
	}

	/**
	 * Reads data from an input stream reader into chunks of memory. Goes
	 * through each chunk and removes backslash-newlines. Special handling if
	 * one chunk ends in backslash and the next begins with newline. Then
	 * allocates data array any copies the filtered data into it.
	 * 
	 * @param isr
	 *            the input stream reader
	 * @throws IOException
	 *             if something goes wrong reading the stream
	 */
	private void load(InputStreamReader isr) throws IOException {
		ArrayList<Chunk> chunks = new ArrayList<>();
		int totalSize = 0;
		// boolean previousBackslash = false; // does previous chunk end in \ ?
		// Chunk previousChunk = null; // the previous chunk
		int pos = 0;

		// first read in the whole file in chunks...
		while (true) {
			Chunk chunk = new Chunk(chunkSize);
			chunk.size = isr.read(chunk.buf);

			if (chunk.size <= 0)
				break;
			chunks.add(chunk);
			totalSize += chunk.size;
			if (chunk.size < chunkSize)
				break;
		}
		// // remove the backslash-newlines in-place...
		// for (Chunk chunk : chunks) {
		// char[] buf = chunk.buf;
		// int size = chunk.unfilteredLength;
		// int i = 0; // higher index
		// int j = 0; // lower index
		// char c = 0;
		//
		// assert size >= 1;
		// if (previousBackslash && buf[0] == '\n') {
		// // remove last char from previous chunk and skip the \n...
		// previousChunk.filteredLength--;
		// i = 1;
		// }
		// while (i < size) {
		// c = buf[i];
		// if (c == '\\' && i < size - 1 && buf[i + 1] == '\n') {
		// i += 2;
		// } else {
		// if (i != j)
		// buf[j] = c;
		// j++;
		// i++;
		// }
		// }
		// chunk.filteredLength = j;
		// previousBackslash = (c == '\\');
		// previousChunk = chunk;
		// }
		// count the filtered characters and allocate data...
		// for (Chunk chunk : chunks)
		// numFilteredChars += chunk.filteredLength;

		// int numChunks = chunks.size();
		// boolean addNewline = true;

		// for (int index = numChunks - 1; index >= 0; index--) {
		// Chunk chunk = chunks.get(index);
		// int len = chunk.filteredLength;
		//
		// if (len != 0) {
		// addNewline = chunk.buf[len - 1] != '\n';
		// break;
		// }
		// }
		// if (addNewline)
		// numFilteredChars++;
		super.data = new char[totalSize];
		super.n = totalSize;
		// copy chunk bufs into data...
		for (Chunk chunk : chunks) {
			System.arraycopy(chunk.buf, 0, super.data, pos, chunk.size);
			pos += chunk.size;
		}
		// if (addNewline)
		// super.data[pos] = '\n';
	}

	/**
	 * <p>
	 * Reads data from an input stream into chunks of memory. The character
	 * encoding is specified, which determines how to translate the bytes from
	 * the stream into characters. If null is given for encoding, the default
	 * encoding is used.
	 * </p>
	 * 
	 * <p>
	 * Goes through each chunk and removes backslash-newlines. Special handling
	 * if one chunk ends in backslash and the next begins with newline. Then
	 * allocates data array any copies the filtered data into it.
	 * </p>
	 * 
	 * @param stream
	 *            the input stream
	 * @param encoding
	 *            the character encoding
	 * @throws IOException
	 *             if something goes wrong reading the stream
	 */
	private void load(InputStream stream, String encoding) throws IOException {
		InputStreamReader isr;

		if (encoding != null) {
			isr = new InputStreamReader(stream, encoding);
		} else {
			isr = new InputStreamReader(stream);
		}
		try {
			load(isr);
		} finally {
			isr.close();
		}
	}

	@Override
	public String getSourceName() {
		return name;
	}
}