package AsciiDatabase;
import java.io.*;
import java.util.zip.*;

/* AsciiDatabase/AsciiReader
 *
 * Copyright (c) 2000 Chris Studholme <chris.studholme@utoronto.ca>
 *
 * May be copied or modified under the terms of the GNU General Public
 * License.  See COPYING for more information.
 */

/**
 * <p>A fully buffered, random access reader capable of reading both
 * fixed sized blocks and carrage return terminated lines.  Also,
 * can read from either normal uncompressed files, or GZIP compressed
 * files.
 *
 * @author chris.studholme@utoronto.ca
 */
public final class AsciiReader {

  /** Default buffer size for uncompressed files.*/
  public final static int buffersize = 4096;

  /** Default buffer size for compressed files.*/
  public final static int zstreambuffer = 4096;

  /** Open file.*/
  private File file;
  /** Stream used for uncompressed file.*/
  private RandomAccessFile stream=null;
  /** Stream used for compressed file.*/
  private GZIPInputStream zstream=null;

  /** Buffer used to improve performance of small reads.*/
  private byte[] buffer=null;
  /** Offset of next valid data in buffer.*/
  private int validoffset;
  /** Length of valid data in buffer.*/
  private int validlength;

  /** Current stream position.*/
  private long streampos=0;


  /** 
   * Open an uncompressed file by name.
   *
   * @param filename name of file to open
   * @exception IOException if an error occurs openning the file
   */
  public AsciiReader(String filename) throws IOException {
    file = new File(filename);
    stream = new RandomAccessFile(file,"r");
    buffer = new byte[buffersize];
    validoffset=validlength=0;
  }

  /** 
   * Open an uncompressed or compressed file by name.
   *
   * @param filename name of file to open
   * @param gzip flag to indicate if the file is GZIP compressed
   * @exception IOException if an error occurs openning the file
   */
  public AsciiReader(String filename, boolean gzip) throws IOException {
    file = new File(filename);
    if (gzip)
      zstream = new GZIPInputStream(new FileInputStream(file),zstreambuffer);
    else
      stream = new RandomAccessFile(file,"r");
    buffer = new byte[buffersize];
    validoffset=validlength=0;
  }

  /**
   * Close the data file.
   */
  public void close() {
    try {
      if (zstream!=null)
	zstream.close();
      if (stream!=null)
	stream.close();
    }
    catch (Exception e) {
    }
    zstream=null;
    stream=null;
  }

  /**
   * Close the data file.  Just calls close().
   */
  protected void finalize() {
    close();
  }

  /**
   * Push back the last byte read so it can be read again.  Pushing
   * back more that one byte is possible but is unreliable (may
   * result in an exception). 
   *
   * @exception IOException if the byte cannot be pushed back
   */
  public void pushBack() throws IOException {
    
    // last read may have been EOF
    if (validlength<=0)
      return;

    // pushing back more than one byte is unreliable
    if (validoffset<=0)
      throw new IOException("cannot push back byte");

    --validoffset;
    --streampos;
  }


  /**
   * Read a byte from the file.
   *
   * @return byte read
   * @exception IOException if an error occurs reading the file
   */
  public final int read() throws IOException {
    // read from file if necessary
    if (validoffset>=validlength) {
      validlength = (stream!=null ? stream.read(buffer,0,buffersize) : 
		     zstream.read(buffer,0,buffersize));
      validoffset=0;
      if (validlength<=0)
	return -1;
    }
    ++streampos;
    return buffer[validoffset++];
  }


  /**
   * Read until the end of line is reached.  End of line is denoted by one of:
   * '\r', '\n', or '\r\n'.  If the end of line is reached, the buffer will
   * contain one of '\r' or '\n' but not '\r\n'.
   *
   * @param b buffer where data should be placed
   * @param off offset within buffer where data should go
   * @param len maximum number of to be placed in buffer 
   * @return number of bytes actually read
   * @exception IOException if an error occurs reading the file
   */
  public int readToEOL(byte b[], int off, int len) throws IOException {
    int bytesread=0;
    while (len>0) {

      // get byte from buffer
      if (validoffset<validlength) {
	byte c = b[off++] = buffer[validoffset++];
	--len;
	++bytesread;
	++streampos;
	if ((c=='\r')&&(read()!='\n')) 
	  pushBack();
	if ((c=='\r')||(c=='\n'))
	  return bytesread;
      }
      
      // or, read from file
      else {
	validlength = (stream!=null ? stream.read(buffer,0,buffersize) : 
		       zstream.read(buffer,0,buffersize));
	validoffset=0;
	if (validlength<=0)
	  return bytesread>0 ? bytesread : validlength;
      }
    }
    return bytesread;
  }
  /**
   * Read until the end of line is reached.  End of line is denoted by one of:
   * '\r', '\n', or '\r\n'.  If the end of line is reached, the buffer will
   * contain one of '\r' or '\n' but not '\r\n'.  The maximum number of characters
   * read is b.length.
   *
   * @param b buffer where data should be placed
   * @return number of bytes actually read
   * @exception IOException if an error occurs reading the file
   */
  public final int readToEOL(byte b[]) throws IOException {
    return readToEOL(b,0,b.length);
  }
  
  /**
   * Read data from the file.  If the size of the read is equal to
   * or greater than buffersize, the buffer is not used and the
   * data is read directly into b.  
   *
   * @param b buffer where data should be placed
   * @param off offset within buffer where data should go
   * @param len maximum number of to be placed in buffer 
   * @return number of bytes read
   * @exception IOException if an error occurs reading the file
   */
  public int read(byte b[], int off, int len) throws IOException {
    int bytesread=0;
    while (len>0) {

      // get byte from buffer
      if (validoffset<validlength) {
	b[off++]=buffer[validoffset++];
	--len;
	++bytesread;
	++streampos;
      }
      
      // or, read from file (to fill buffer)
      else if (len<buffersize) {
	validlength = (stream!=null ? stream.read(buffer,0,buffersize) : 
		       zstream.read(buffer,0,buffersize));
	validoffset=0;
	if (validlength<=0)
	  return bytesread>0 ? bytesread : validlength;
      }
      
      // or, for large reads, read directly to destination buffer
      else {
	validlength=validoffset=0;
	int dread = (stream!=null ? stream.read(b,off,len) : 
		     zstream.read(b,off,len));
	if (dread<=0)
	  return bytesread>0 ? bytesread : dread;
	streampos += dread;
	bytesread += dread;
	off += dread;
	len -= dread;
      }
    }
    return bytesread;
  }

  /**
   * Read data from the file.  If the size of the read is equal to
   * or greater than buffersize, the buffer is not used and the
   * data is read directly into b.  
   *
   * @param b buffer where data should be placed
   * @return number of bytes read
   * @exception IOException if an error occurs reading the file
   */
  public final int read(byte b[]) throws IOException {
    return read(b,0,b.length);
  }

  /**
   * Return the current file pointer position. 
   *
   * @return current file position
   * @exception IOException if an error occurs
   */
  public long getFilePointer() throws IOException {
    return streampos;
  }

  /**
   * Return the length of the file.  For compressed files, this method
   * returns the compressed length.  There is no way to discover the
   * uncompressed length of a compressed file without reading (and uncompressing) 
   * the whole file.
   *
   * @return length of the file
   */
  public long length() {
    return file.length();
  }

    /**
     * Seek to an arbitrary offset within the file.  This method
     * works correctly even for compressed files; however, it may
     * be quite slow when seeking within a compressed file.  For
     * example, when seeking backwards within a compressed file, 
     * the file must be closed, reopened, and data read until the
     * desired location is reached.  
     *
     * @param off offset to seek to
     * @exception IOException if an error occurs
     */
    public void seek(long off) throws IOException {
	
	// first check existing buffer
	if ((off>=streampos-validoffset)&&(off-streampos<=validlength-validoffset)) {
	    validoffset+=off-streampos;
	    streampos=off;
	    return;
	}

	streampos+=validlength-validoffset;
	validoffset=validlength=0;

	// for RandomAccessFile seeking is easy
	if (stream!=null) {
	    stream.seek(off);
	    streampos=off;
	    return;
	}

	// GZIP file, if seeking backwards, re-open file
	if (off<streampos) {
	    zstream = new GZIPInputStream(new FileInputStream(file));
	    streampos=zstream.skip(off);
	    if (streampos!=off)
		throw new IOException("cannot seek to desired position in gzip file");
	    return;
	}
	
	if (streampos<off) {
	    streampos+=zstream.skip(off-streampos);
	    if (streampos!=off)
		throw new IOException("cannot seek to desired position in gzip file");
	}
  }


  /**
   * Read a single line of an ascii file.  A line is terminated with one of:
   * '/r', '/n', or '/r/n'.  The terminating character(s) are not appended
   * to the returned string.  This method is not as efficient as readToEOL().
   *
   * @return single ascii line as a String without the end of line characters
   * @exception IOException if an error occurs openning the file
   */
  public String readLine() throws IOException {
    StringBuffer result = new StringBuffer();

    while (true) {
      
      // read character
      int b = read();

      // check for EOF
      if (b==-1)
	return result.length()>0 ? result.toString() : null;

      // check for EOL
      if ((b=='\r')||(b=='\n')) {
	if ((b=='\r')&&(read()!='\n'))
	  pushBack();
	return result.toString();
      }

      result.append((char)b);

    } 
  }


};

