Optimisation of RecordInputStream - removed intermediate 8K byte buffer. Expected performance gain was not realised immediately, so LittleEndianInput stuff has been pushed down into DocumentInputStream to help.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@707778 13f79535-47bb-0310-9956-ffa450edef68
2008-10-24 23:13:44 +00:00 · 2008-10-24 23:13:44 +00:00 · ffd6eab0da
commit ffd6eab0da
parent 56a142c735
9 changed files with 696 additions and 804 deletions
--- a/src/contrib/src/org/apache/poi/contrib/poibrowser/TreeReaderListener.java
+++ b/src/contrib/src/org/apache/poi/contrib/poibrowser/TreeReaderListener.java
@ -1,4 +1,3 @@
-
 /* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@ -15,11 +14,9 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 ==================================================================== */
-        

 package org.apache.poi.contrib.poibrowser;

-import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;

@ -160,17 +157,7 @@ public class TreeReaderListener implements POIFSReaderListener
            throw new RuntimeException(t.getMessage());
        }

-        try
-        {
-            is.close();
-        }
-        catch (IOException ex)
-        {
-            System.err.println
-                ("Unexpected exception while closing " +
-                event.getName() + " in " + event.getPath().toString());
-            ex.printStackTrace(System.err);
-        }
+        is.close();

        final MutableTreeNode parentNode = getNode(d.path, filename, rootNode);
        final MutableTreeNode nameNode = new DefaultMutableTreeNode(d.name);
--- a/src/java/org/apache/poi/hssf/record/RecordInputStream.java
+++ b/src/java/org/apache/poi/hssf/record/RecordInputStream.java
@ -17,12 +17,13 @@

 package org.apache.poi.hssf.record;

-import org.apache.poi.util.LittleEndian;
-import org.apache.poi.util.LittleEndianInput;
-
+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.ByteArrayOutputStream;
+
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.LittleEndianInput;
+import org.apache.poi.util.LittleEndianInputStream;

 /**
 * Title:  Record Input Stream<P>
@ -34,106 +35,131 @@ public final class RecordInputStream extends InputStream implements LittleEndian
 	/** Maximum size of a single record (minus the 4 byte header) without a continue*/
 	public final static short MAX_RECORD_DATA_SIZE = 8224;
 	private static final int INVALID_SID_VALUE = -1;
+	private static final int DATA_LEN_NEEDS_TO_BE_READ = -1;
+	private static final byte[] EMPTY_BYTE_ARRAY = { };

-	private InputStream in;
-	private short currentSid;
-	private short currentLength = -1;
-	private short nextSid;
+	private final InputStream _in;
+	/** {@link LittleEndianInput} facet of field {@link #_in} */
+	private final LittleEndianInput _le;
+	private int currentSid;
+	private int _currentDataLength;
+	private int nextSid;
+	private int recordOffset;
+	private boolean autoContinue; // TODO - remove this

-	private final byte[] data = new byte[MAX_RECORD_DATA_SIZE];
-	private short recordOffset;
-	private long pos;
+	public RecordInputStream(InputStream in) throws RecordFormatException {
+		_in = in;
+		if (in instanceof LittleEndianInput) {
+			// accessing directly is an optimisation
+			_le = (LittleEndianInput) in;
+		} else {
+			// less optimal, but should work OK just the same. Often occurs in junit tests.
+			_le = new LittleEndianInputStream(in);
+		}
+		try {
+		      if (_in.available() < LittleEndian.SHORT_SIZE) {
+		          nextSid = INVALID_SID_VALUE;
+		      } else {
+		    	  nextSid = LittleEndian.readShort(in);
+		      }
+		} catch (IOException ex) {
+			throw new RecordFormatException("Error reading bytes", ex);
+		}
+		_currentDataLength = DATA_LEN_NEEDS_TO_BE_READ;
+		autoContinue = true;
+	}

-  private boolean autoContinue = true;
-
-  public RecordInputStream(InputStream in) throws RecordFormatException {
-    this.in = in;
-    try {
-      nextSid = LittleEndian.readShort(in);
-      //Don't increment the pos just yet (technically we are at the start of
-      //the record stream until nextRecord is called).
-    } catch (IOException ex) {
-      throw new RecordFormatException("Error reading bytes", ex);
-    }
-  }
-
-	/** This method will read a byte from the current record*/
 	public int read() {
 		checkRecordPosition(LittleEndian.BYTE_SIZE);
-
-		byte result = data[recordOffset];
 		recordOffset += LittleEndian.BYTE_SIZE;
-		pos += LittleEndian.BYTE_SIZE;
-		return result;
+		return _le.readUByte();
+	}
+	public int read(byte[] b, int off, int len) {
+		int limit = Math.min(len, remaining());
+		if (limit == 0) {
+			return 0;
+		}
+		readFully(b, off,limit);
+		return limit;
 	}

  public short getSid() {
-    return currentSid;
+    return (short) currentSid;
  }

-  public short getLength() {
-    return currentLength;
+  public short getLength() { // TODO - remove
+    return (short) _currentDataLength;
  }

-  public short getRecordOffset() {
-    return recordOffset;
-  }

-  public long getPos() {
-    return pos;
-  }
+	/**
+	 * Note - this method is expected to be called only when completed reading the current BIFF record.
+	 * Calling this before reaching the end of the current record will cause all remaining data to be
+	 * discarded
+	 */
+	public boolean hasNextRecord() {
+		if (_currentDataLength != -1 && _currentDataLength != recordOffset) {
+			System.out.println("WARN. Unread "+remaining()+" bytes of record 0x"+Integer.toHexString(currentSid));
+			// discard unread data
+			while (recordOffset < _currentDataLength) {
+				readByte();
+			}
+		}
+		if (_currentDataLength != DATA_LEN_NEEDS_TO_BE_READ) {
+			nextSid = readNextSid();
+			_currentDataLength = DATA_LEN_NEEDS_TO_BE_READ;
+		}
+		return nextSid != INVALID_SID_VALUE;
+	}

-  public boolean hasNextRecord() {
-    return nextSid != INVALID_SID_VALUE;
-  }
+	/**
+	 * 
+	 * @return the sid of the next record or {@link #INVALID_SID_VALUE} if at end of stream
+	 */
+	private int readNextSid() {
+		int nAvailable;
+		try {
+			nAvailable = _in.available();
+		} catch (IOException e) {
+			throw new RecordFormatException("Error checking stream available bytes", e);
+		}
+		if (nAvailable < EOFRecord.ENCODED_SIZE) {
+			if (nAvailable > 0) {
+				// some scrap left over?
+				// ex45582-22397.xls has one extra byte after the last record
+				// Excel reads that file OK
+			}
+			return INVALID_SID_VALUE;
+		}
+		int result = _le.readUShort();
+		if (result == INVALID_SID_VALUE) {
+			throw new RecordFormatException("Found invalid sid (" + result + ")");
+		}
+		return result;
+	}

-  /** Moves to the next record in the stream.
-   *
-   * <i>Note: The auto continue flag is reset to true</i>
-   */
-  public void nextRecord() throws RecordFormatException {
-    if ((currentLength != -1) && (currentLength != recordOffset)) {
-      System.out.println("WARN. Unread "+remaining()+" bytes of record 0x"+Integer.toHexString(currentSid));
-    }
-    currentSid = nextSid;
-    pos += LittleEndian.SHORT_SIZE;
-    autoContinue = true;
-    try {
-      recordOffset = 0;
-      currentLength = LittleEndian.readShort(in);
-      if (currentLength > MAX_RECORD_DATA_SIZE)
-        throw new RecordFormatException("The content of an excel record cannot exceed "+MAX_RECORD_DATA_SIZE+" bytes");
-      pos += LittleEndian.SHORT_SIZE;
-      in.read(data, 0, currentLength);
-
-      //Read the Sid of the next record
-      if (in.available() < EOFRecord.ENCODED_SIZE) {
-          if (in.available() > 0) {
-              // some scrap left over?
-              // ex45582-22397.xls has one extra byte after the last record
-              // Excel reads that file OK
-          }
-          nextSid = INVALID_SID_VALUE;
-      } else {
-          nextSid = LittleEndian.readShort(in);
-          if (nextSid == INVALID_SID_VALUE) {
-              throw new RecordFormatException("Found sid " + nextSid + " after record with sid 0x"
-                      + Integer.toHexString(currentSid).toUpperCase());
-          }
-      }
-    } catch (IOException ex) {
-      throw new RecordFormatException("Error reading bytes", ex);
-    }
-  }
+	/** Moves to the next record in the stream.
+	 *
+	 * <i>Note: The auto continue flag is reset to true</i>
+	 */
+	public void nextRecord() throws RecordFormatException {
+		if (nextSid == INVALID_SID_VALUE) {
+			throw new IllegalStateException("EOF - next record not available");
+		}
+		currentSid = nextSid;
+		autoContinue = true;
+		recordOffset = 0;
+		_currentDataLength = _le.readUShort();
+		if (_currentDataLength > MAX_RECORD_DATA_SIZE) {
+			throw new RecordFormatException("The content of an excel record cannot exceed "
+					+ MAX_RECORD_DATA_SIZE + " bytes");
+		}
+	}

  public void setAutoContinue(boolean enable) {
    this.autoContinue = enable;
  }

-  public boolean getAutoContinue() {
-    return autoContinue;
-  }
-
 	private void checkRecordPosition(int requiredByteCount) {

 		if (remaining() < requiredByteCount) {
@ -150,11 +176,8 @@ public final class RecordInputStream extends InputStream implements LittleEndian
 	 */
 	public byte readByte() {
 		checkRecordPosition(LittleEndian.BYTE_SIZE);
-
-		byte result = data[recordOffset];
 		recordOffset += LittleEndian.BYTE_SIZE;
-		pos += LittleEndian.BYTE_SIZE;
-		return result;
+		return _le.readByte();
 	}

 	/**
@ -162,29 +185,20 @@ public final class RecordInputStream extends InputStream implements LittleEndian
 	 */
 	public short readShort() {
 		checkRecordPosition(LittleEndian.SHORT_SIZE);
-
-		short result = LittleEndian.getShort(data, recordOffset);
 		recordOffset += LittleEndian.SHORT_SIZE;
-		pos += LittleEndian.SHORT_SIZE;
-		return result;
+		return _le.readShort();
 	}

 	public int readInt() {
 		checkRecordPosition(LittleEndian.INT_SIZE);
-
-		int result = LittleEndian.getInt(data, recordOffset);
 		recordOffset += LittleEndian.INT_SIZE;
-		pos += LittleEndian.INT_SIZE;
-		return result;
+		return _le.readInt();
 	}

 	public long readLong() {
 		checkRecordPosition(LittleEndian.LONG_SIZE);
-
-		long result = LittleEndian.getLong(data, recordOffset);
 		recordOffset += LittleEndian.LONG_SIZE;
-		pos += LittleEndian.LONG_SIZE;
-		return result;
+		return _le.readLong();
 	}

 	/**
@ -200,22 +214,18 @@ public final class RecordInputStream extends InputStream implements LittleEndian
 	 */
 	public int readUShort() {
 		checkRecordPosition(LittleEndian.SHORT_SIZE);
-
-		int result = LittleEndian.getUShort(data, recordOffset);
 		recordOffset += LittleEndian.SHORT_SIZE;
-		pos += LittleEndian.SHORT_SIZE;
-		return result;
+		return _le.readUShort();
 	}

 	public double readDouble() {
 		checkRecordPosition(LittleEndian.DOUBLE_SIZE);
-		long valueLongBits = LittleEndian.getLong(data, recordOffset);
+		recordOffset += LittleEndian.DOUBLE_SIZE;
+		long valueLongBits = _le.readLong();
 		double result = Double.longBitsToDouble(valueLongBits);
 		if (Double.isNaN(result)) {
 			throw new RuntimeException("Did not expect to read NaN"); // (Because Excel typically doesn't write NaN
 		}
-		recordOffset += LittleEndian.DOUBLE_SIZE;
-		pos += LittleEndian.DOUBLE_SIZE;
 		return result;
 	}
 	public void readFully(byte[] buf) {
@ -224,9 +234,8 @@ public final class RecordInputStream extends InputStream implements LittleEndian

 	public void readFully(byte[] buf, int off, int len) {
 		checkRecordPosition(len);
-		System.arraycopy(data, recordOffset, buf, off, len);
+		_le.readFully(buf, off, len);
 		recordOffset+=len;
-		pos+=len;
 	}

 	public String readString() {
@ -315,18 +324,19 @@ public final class RecordInputStream extends InputStream implements LittleEndian
    return new UnicodeString(this);
  }

-  /** Returns the remaining bytes for the current record.
-   *
-   * @return The remaining bytes of the current record.
-   */
-  public byte[] readRemainder() {
-    int size = remaining();
-    byte[] result = new byte[size];
-    System.arraycopy(data, recordOffset, result, 0, size);
-    recordOffset += size;
-    pos += size;
-    return result;
-  }
+	/** Returns the remaining bytes for the current record.
+	 *
+	  * @return The remaining bytes of the current record.
+	  */
+	public byte[] readRemainder() {
+		int size = remaining();
+		if (size ==0) {
+			return EMPTY_BYTE_ARRAY;
+		}
+		byte[] result = new byte[size];
+		readFully(result);
+		return result;
+	}

  /** Reads all byte data for the current record, including any
   *  that overlaps into any following continue records.
@ -350,19 +360,29 @@ public final class RecordInputStream extends InputStream implements LittleEndian
    return out.toByteArray();
  }

-  /** The remaining number of bytes in the <i>current</i> record.
-   *
-   * @return The number of bytes remaining in the current record
-   */
-  public int remaining() {
-    return (currentLength - recordOffset);
-  }
+	/** The remaining number of bytes in the <i>current</i> record.
+	 *
+	 * @return The number of bytes remaining in the current record
+	 */
+	public int remaining() {
+		if (_currentDataLength == DATA_LEN_NEEDS_TO_BE_READ) {
+			// already read sid of next record. so current one is finished
+			return 0;
+		}
+		return (_currentDataLength - recordOffset);
+	}

-  /** Returns true iif a Continue record is next in the excel stream
-   *
-   * @return True when a ContinueRecord is next.
-   */
-  public boolean isContinueNext() {
-    return (nextSid == ContinueRecord.sid);
-  }
+	/**
+	 *
+	 * @return <code>true</code> when a {@link ContinueRecord} is next.
+	 */
+	public boolean isContinueNext() {
+		if (_currentDataLength != DATA_LEN_NEEDS_TO_BE_READ && recordOffset != _currentDataLength) {
+			throw new IllegalStateException("Should never be called before end of current record");
+		}
+		if (!hasNextRecord()) {
+			return false;
+		}
+		return nextSid == ContinueRecord.sid;
+	}
 }
--- a/src/java/org/apache/poi/poifs/filesystem/DocumentInputStream.java
+++ b/src/java/org/apache/poi/poifs/filesystem/DocumentInputStream.java
@ -1,4 +1,3 @@
-
 /* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@ -15,437 +14,312 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 ==================================================================== */
-        

 package org.apache.poi.poifs.filesystem;

-import java.io.*;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.poifs.storage.DataInputBlock;
+import org.apache.poi.util.LittleEndianInput;

 /**
 * This class provides methods to read a DocumentEntry managed by a
- * Filesystem instance.
+ * {@link POIFSFileSystem} instance.
 *
 * @author Marc Johnson (mjohnson at apache dot org)
 */
+public final class DocumentInputStream extends InputStream implements LittleEndianInput {
+	/** returned by read operations if we're at end of document */
+	private static final int EOF = -1;

-public class DocumentInputStream
-    extends InputStream
-{
+	private static final int SIZE_SHORT = 2;
+	private static final int SIZE_INT = 4;
+	private static final int SIZE_LONG = 8;

-    // current offset into the Document
-    private int              _current_offset;
+	/** current offset into the Document */
+	private int _current_offset;

-    // current marked offset into the Document (used by mark and
-    // reset)
-    private int              _marked_offset;
+	/** current marked offset into the Document (used by mark and reset) */
+	private int _marked_offset;

-    // the Document's size
-    private int              _document_size;
+	/** the Document's size */
+	private int _document_size;

-    // have we been closed?
-    private boolean          _closed;
+	/** have we been closed? */
+	private boolean _closed;

-    // the actual Document
-    private POIFSDocument    _document;
+	/** the actual Document */
+	private POIFSDocument _document;

-    // buffer used to read one byte at a time
-    private byte[]           _tiny_buffer;
+	/** the data block containing the current stream pointer */
+	private DataInputBlock _currentBlock;

-    // returned by read operations if we're at end of document
-    static private final int EOD = -1;
+	/**
+	 * Create an InputStream from the specified DocumentEntry
+	 * 
+	 * @param document the DocumentEntry to be read
+	 * 
+	 * @exception IOException if the DocumentEntry cannot be opened (like, maybe it has
+	 *                been deleted?)
+	 */
+	public DocumentInputStream(DocumentEntry document) throws IOException {
+		if (!(document instanceof DocumentNode)) {
+			throw new IOException("Cannot open internal document storage");
+		}
+		_current_offset = 0;
+		_marked_offset = 0;
+		_document_size = document.getSize();
+		_closed = false;
+		_document = ((DocumentNode) document).getDocument();
+		_currentBlock = getDataInputBlock(0);
+	}

-    /**
-     * Create an InputStream from the specified DocumentEntry
-     *
-     * @param document the DocumentEntry to be read
-     *
-     * @exception IOException if the DocumentEntry cannot be opened
-     *            (like, maybe it has been deleted?)
-     */
+	/**
+	 * Create an InputStream from the specified Document
+	 * 
+	 * @param document the Document to be read
+	 */
+	public DocumentInputStream(POIFSDocument document) {
+		_current_offset = 0;
+		_marked_offset = 0;
+		_document_size = document.getSize();
+		_closed = false;
+		_document = document;
+		_currentBlock = getDataInputBlock(0);
+	}

-    public DocumentInputStream(final DocumentEntry document)
-        throws IOException
-    {
-        _current_offset = 0;
-        _marked_offset  = 0;
-        _document_size  = document.getSize();
-        _closed         = false;
-        _tiny_buffer    = null;
-        if (document instanceof DocumentNode)
-        {
-            _document = (( DocumentNode ) document).getDocument();
-        }
-        else
-        {
-            throw new IOException("Cannot open internal document storage");
-        }
-    }
+	public int available() throws IOException {
+		dieIfClosed();
+		return _document_size - _current_offset;
+	}

-    /**
-     * Create an InputStream from the specified Document
-     *
-     * @param document the Document to be read
-     *
-     * @exception IOException if the DocumentEntry cannot be opened
-     *            (like, maybe it has been deleted?)
-     */
+	public void close() {
+		_closed = true;
+	}

-    public DocumentInputStream(final POIFSDocument document)
-        throws IOException
-    {
-        _current_offset = 0;
-        _marked_offset  = 0;
-        _document_size  = document.getSize();
-        _closed         = false;
-        _tiny_buffer    = null;
-        _document       = document;
-    }
+	public void mark(int ignoredReadlimit) {
+		_marked_offset = _current_offset;
+	}

-    /**
-     * Returns the number of bytes that can be read (or skipped over)
-     * from this input stream without blocking by the next caller of a
-     * method for this input stream. The next caller might be the same
-     * thread or or another thread.
-     *
-     * @return the number of bytes that can be read from this input
-     *         stream without blocking.
-     *
-     * @exception IOException on error (such as the stream has been
-     *            closed)
-     */
+	/**
+	 * Tests if this input stream supports the mark and reset methods.
+	 * 
+	 * @return <code>true</code> always
+	 */
+	public boolean markSupported() {
+		return true;
+	}

-    public int available()
-        throws IOException
-    {
-        dieIfClosed();
-        return _document_size - _current_offset;
-    }
+	private DataInputBlock getDataInputBlock(int offset) {
+		return _document.getDataInputBlock(offset);
+	}

-    /**
-     * Closes this input stream and releases any system resources
-     * associated with the stream.
-     *
-     * @exception IOException
-     */
+	public int read() throws IOException {
+		dieIfClosed();
+		if (atEOD()) {
+			return EOF;
+		}
+		int result = _currentBlock.readUByte();
+		_current_offset++;
+		if (_currentBlock.available() < 1) {
+			_currentBlock = getDataInputBlock(_current_offset);
+		}
+		return result;
+	}

-    public void close()
-        throws IOException
-    {
-        _closed = true;
-    }
+	public int read(byte[] b) throws IOException {
+		return read(b, 0, b.length);
+	}

-    /**
-     * Marks the current position in this input stream. A subsequent
-     * call to the reset method repositions this stream at the last
-     * marked position so that subsequent reads re-read the same
-     * bytes.
-     * <p>
-     * The readlimit arguments tells this input stream to allow that
-     * many bytes to be read before the mark position gets
-     * invalidated. This implementation, however, does not care.
-     * <p>
-     * The general contract of mark is that, if the method
-     * markSupported returns true, the stream somehow remembers all
-     * the bytes read after the call to mark and stands ready to
-     * supply those same bytes again if and whenever the method reset
-     * is called. However, the stream is not required to remember any
-     * data at all if more than readlimit bytes are read from the
-     * stream before reset is called. But this stream will.
-     *
-     * @param ignoredReadlimit the maximum limit of bytes that can be
-     *                         read before the mark position becomes
-     *                         invalid. Ignored by this
-     *                         implementation.
-     */
+	public int read(byte[] b, int off, int len) throws IOException {
+		dieIfClosed();
+		if (b == null) {
+			throw new IllegalArgumentException("buffer must not be null");
+		}
+		if (off < 0 || len < 0 || b.length < off + len) {
+			throw new IndexOutOfBoundsException("can't read past buffer boundaries");
+		}
+		if (len == 0) {
+			return 0;
+		}
+		if (atEOD()) {
+			return EOF;
+		}
+		int limit = Math.min(available(), len);
+		readFully(b, off, limit);
+		return limit;
+	}

-    public void mark(int ignoredReadlimit)
-    {
-        _marked_offset = _current_offset;
-    }
+	/**
+	 * Repositions this stream to the position at the time the mark() method was
+	 * last called on this input stream. If mark() has not been called this
+	 * method repositions the stream to its beginning.
+	 */
+	public void reset() {
+		_current_offset = _marked_offset;
+		_currentBlock = getDataInputBlock(_current_offset);
+	}

-    /**
-     * Tests if this input stream supports the mark and reset methods.
-     *
-     * @return true
-     */
+	public long skip(long n) throws IOException {
+		dieIfClosed();
+		if (n < 0) {
+			return 0;
+		}
+		int new_offset = _current_offset + (int) n;

-    public boolean markSupported()
-    {
-        return true;
-    }
+		if (new_offset < _current_offset) {

-    /**
-     * Reads the next byte of data from the input stream. The value
-     * byte is returned as an int in the range 0 to 255. If no byte is
-     * available because the end of the stream has been reached, the
-     * value -1 is returned. The definition of this method in
-     * java.io.InputStream allows this method to block, but it won't.
-     *
-     * @return the next byte of data, or -1 if the end of the stream
-     *         is reached.
-     *
-     * @exception IOException
-     */
+			// wrap around in converting a VERY large long to an int
+			new_offset = _document_size;
+		} else if (new_offset > _document_size) {
+			new_offset = _document_size;
+		}
+		long rval = new_offset - _current_offset;

-    public int read()
-        throws IOException
-    {
-        dieIfClosed();
-        if (atEOD())
-        {
-            return EOD;
-        }
-        if (_tiny_buffer == null)
-        {
-            _tiny_buffer = new byte[ 1 ];
-        }
-        _document.read(_tiny_buffer, _current_offset++);
-        return ((int)_tiny_buffer[ 0 ]) & 0x000000FF;
-    }
+		_current_offset = new_offset;
+		_currentBlock = getDataInputBlock(_current_offset);
+		return rval;
+	}

-    /**
-     * Reads some number of bytes from the input stream and stores
-     * them into the buffer array b. The number of bytes actually read
-     * is returned as an integer. The definition of this method in
-     * java.io.InputStream allows this method to block, but it won't.
-     * <p>
-     * If b is null, a NullPointerException is thrown. If the length
-     * of b is zero, then no bytes are read and 0 is returned;
-     * otherwise, there is an attempt to read at least one byte. If no
-     * byte is available because the stream is at end of file, the
-     * value -1 is returned; otherwise, at least one byte is read and
-     * stored into b.
-     * <p>
-     * The first byte read is stored into element b[0], the next one
-     * into b[1], and so on. The number of bytes read is, at most,
-     * equal to the length of b. Let k be the number of bytes actually
-     * read; these bytes will be stored in elements b[0] through
-     * b[k-1], leaving elements b[k] through b[b.length-1] unaffected.
-     * <p>
-     * If the first byte cannot be read for any reason other than end
-     * of file, then an IOException is thrown. In particular, an
-     * IOException is thrown if the input stream has been closed.
-     * <p>
-     * The read(b) method for class InputStream has the same effect as:
-     * <p>
-     * <code>read(b, 0, b.length)</code>
-     *
-     * @param b the buffer into which the data is read.
-     *
-     * @return the total number of bytes read into the buffer, or -1
-     *         if there is no more data because the end of the stream
-     *         has been reached.
-     *
-     * @exception IOException
-     * @exception NullPointerException
-     */
+	private void dieIfClosed() throws IOException {
+		if (_closed) {
+			throw new IOException("cannot perform requested operation on a closed stream");
+		}
+	}

-    public int read(final byte [] b)
-        throws IOException, NullPointerException
-    {
-        return read(b, 0, b.length);
-    }
+	private boolean atEOD() {
+		return _current_offset == _document_size;
+	}

-    /**
-     * Reads up to len bytes of data from the input stream into an
-     * array of bytes. An attempt is made to read as many as len
-     * bytes, but a smaller number may be read, possibly zero. The
-     * number of bytes actually read is returned as an integer.
-     * <p>
-     * The definition of this method in java.io.InputStream allows it
-     * to block, but it won't.
-     * <p>
-     * If b is null, a NullPointerException is thrown.
-     * <p>
-     * If off is negative, or len is negative, or off+len is greater
-     * than the length of the array b, then an
-     * IndexOutOfBoundsException is thrown.
-     * <p>
-     * If len is zero, then no bytes are read and 0 is returned;
-     * otherwise, there is an attempt to read at least one byte. If no
-     * byte is available because the stream is at end of file, the
-     * value -1 is returned; otherwise, at least one byte is read and
-     * stored into b.
-     * <p>
-     * The first byte read is stored into element b[off], the next one
-     * into b[off+1], and so on. The number of bytes read is, at most,
-     * equal to len. Let k be the number of bytes actually read; these
-     * bytes will be stored in elements b[off] through b[off+k-1],
-     * leaving elements b[off+k] through b[off+len-1] unaffected.
-     * <p>
-     * In every case, elements b[0] through b[off] and elements
-     * b[off+len] through b[b.length-1] are unaffected.
-     * <p>
-     * If the first byte cannot be read for any reason other than end
-     * of file, then an IOException is thrown. In particular, an
-     * IOException is thrown if the input stream has been closed.
-     *
-     * @param b the buffer into which the data is read.
-     * @param off the start offset in array b at which the data is
-     *            written.
-     * @param len the maximum number of bytes to read.
-     *
-     * @return the total number of bytes read into the buffer, or -1
-     *         if there is no more data because the end of the stream
-     *         has been reached.
-     *
-     * @exception IOException
-     * @exception NullPointerException
-     * @exception IndexOutOfBoundsException
-     */
+	private void checkAvaliable(int requestedSize) {
+		if (_closed) {
+			throw new RuntimeException("cannot perform requested operation on a closed stream");
+		}
+		if (requestedSize > _document_size - _current_offset) {
+			throw new RuntimeException("Buffer underrun - requested " + requestedSize
+					+ " bytes but " + (_document_size - _current_offset) + " was available");
+		}
+	}

-    public int read(final byte [] b, final int off, final int len)
-        throws IOException, NullPointerException, IndexOutOfBoundsException
-    {
-        dieIfClosed();
-        if (b == null)
-        {
-            throw new NullPointerException("buffer is null");
-        }
-        if ((off < 0) || (len < 0) || (b.length < (off + len)))
-        {
-            throw new IndexOutOfBoundsException(
-                "can't read past buffer boundaries");
-        }
-        if (len == 0)
-        {
-            return 0;
-        }
-        if (atEOD())
-        {
-            return EOD;
-        }
-        int limit = Math.min(available(), len);
+	public byte readByte() {
+		return (byte) readUByte();
+	}

-        if ((off == 0) && (limit == b.length))
-        {
-            _document.read(b, _current_offset);
-        }
-        else
-        {
-            byte[] buffer = new byte[ limit ];
+	public double readDouble() {
+		return Double.longBitsToDouble(readLong());
+	}

-            _document.read(buffer, _current_offset);
-            System.arraycopy(buffer, 0, b, off, limit);
-        }
-        _current_offset += limit;
-        return limit;
-    }
+	public void readFully(byte[] buf) {
+		readFully(buf, 0, buf.length);
+	}

-    /**
-     * Repositions this stream to the position at the time the mark
-     * method was last called on this input stream.
-     * <p>
-     * The general contract of reset is:
-     * <p>
-     * <ul>
-     *    <li>
-     *        If the method markSupported returns true, then:
-     *        <ul>
-     *            <li>
-     *                If the method mark has not been called since the
-     *                stream was created, or the number of bytes read
-     *                from the stream since mark was last called is
-     *                larger than the argument to mark at that last
-     *                call, then an IOException might be thrown.
-     *            </li>
-     *            <li>
-     *                If such an IOException is not thrown, then the
-     *                stream is reset to a state such that all the
-     *                bytes read since the most recent call to mark
-     *                (or since the start of the file, if mark has not
-     *                been called) will be resupplied to subsequent
-     *                callers of the read method, followed by any
-     *                bytes that otherwise would have been the next
-     *                input data as of the time of the call to reset.
-     *             </li>
-     *         </ul>
-     *     </li>
-     *     <li>
-     *         If the method markSupported returns false, then:
-     *         <ul>
-     *             <li>
-     *                 The call to reset may throw an IOException.
-     *             </li>
-     *             <li>
-     *                 If an IOException is not thrown, then the
-     *                 stream is reset to a fixed state that depends
-     *                 on the particular type of the input and how it
-     *                 was created. The bytes that will be supplied to
-     *                 subsequent callers of the read method depend on
-     *                 the particular type of the input stream.
-     *             </li>
-     *         </ul>
-     *     </li>
-     * </ul>
-     * <p>
-     * All well and good ... this class's markSupported method returns
-     * true and this method does not care whether you've called mark
-     * at all, or whether you've exceeded the number of bytes
-     * specified in the last call to mark. We're basically walking a
-     * byte array ... mark and reset to your heart's content.
-     */
+	public short readShort() {
+		return (short) readUShort();
+	}

-    public void reset()
-    {
-        _current_offset = _marked_offset;
-    }
+	public void readFully(byte[] buf, int off, int len) {
+		checkAvaliable(len);
+		int blockAvailable = _currentBlock.available();
+		if (blockAvailable > len) {
+			_currentBlock.readFully(buf, off, len);
+			_current_offset += len;
+			return;
+		}
+		// else read big amount in chunks
+		int remaining = len;
+		int writePos = off;
+		while (remaining > 0) {
+			boolean blockIsExpiring = remaining >= blockAvailable;
+			int reqSize;
+			if (blockIsExpiring) {
+				reqSize = blockAvailable;
+			} else {
+				reqSize = remaining;
+			}
+			_currentBlock.readFully(buf, writePos, reqSize);
+			remaining -= reqSize;
+			writePos += reqSize;
+			_current_offset += reqSize;
+			if (blockIsExpiring) {
+				if (_current_offset == _document_size) {
+					if (remaining > 0) {
+						throw new IllegalStateException(
+								"reached end of document stream unexpectedly");
+					}
+					_currentBlock = null;
+					break;
+				}
+				_currentBlock = getDataInputBlock(_current_offset);
+				blockAvailable = _currentBlock.available();
+			}
+		}
+	}

-    /**
-     * Skips over and discards n bytes of data from this input
-     * stream. The skip method may, for a variety of reasons, end up
-     * skipping over some smaller number of bytes, possibly 0. This
-     * may result from any of a number of conditions; reaching end of
-     * file before n bytes have been skipped is only one
-     * possibility. The actual number of bytes skipped is returned. If
-     * n is negative, no bytes are skipped.
-     *
-     * @param n the number of bytes to be skipped.
-     *
-     * @return the actual number of bytes skipped.
-     *
-     * @exception IOException
-     */
+	public long readLong() {
+		checkAvaliable(SIZE_LONG);
+		int blockAvailable = _currentBlock.available();
+		long result;
+		if (blockAvailable > SIZE_LONG) {
+			result = _currentBlock.readLongLE();
+		} else {
+			DataInputBlock nextBlock = getDataInputBlock(_current_offset + blockAvailable);
+			if (blockAvailable == SIZE_LONG) {
+				result = _currentBlock.readLongLE();
+			} else {
+				result = nextBlock.readLongLE(_currentBlock, blockAvailable);
+			}
+			_currentBlock = nextBlock;
+		}
+		_current_offset += SIZE_LONG;
+		return result;
+	}

-    public long skip(final long n)
-        throws IOException
-    {
-        dieIfClosed();
-        if (n < 0)
-        {
-            return 0;
-        }
-        int new_offset = _current_offset + ( int ) n;
+	public int readInt() {
+		checkAvaliable(SIZE_INT);
+		int blockAvailable = _currentBlock.available();
+		int result;
+		if (blockAvailable > SIZE_INT) {
+			result = _currentBlock.readIntLE();
+		} else {
+			DataInputBlock nextBlock = getDataInputBlock(_current_offset + blockAvailable);
+			if (blockAvailable == SIZE_INT) {
+				result = _currentBlock.readIntLE();
+			} else {
+				result = nextBlock.readIntLE(_currentBlock, blockAvailable);
+			}
+			_currentBlock = nextBlock;
+		}
+		_current_offset += SIZE_INT;
+		return result;
+	}

-        if (new_offset < _current_offset)
-        {
-
-            // wrap around in converting a VERY large long to an int
-            new_offset = _document_size;
-        }
-        else if (new_offset > _document_size)
-        {
-            new_offset = _document_size;
-        }
-        long rval = new_offset - _current_offset;
-
-        _current_offset = new_offset;
-        return rval;
-    }
-
-    private void dieIfClosed()
-        throws IOException
-    {
-        if (_closed)
-        {
-            throw new IOException(
-                "cannot perform requested operation on a closed stream");
-        }
-    }
-
-    private boolean atEOD()
-    {
-        return _current_offset == _document_size;
-    }
-}   // end public class DocumentInputStream
+	public int readUShort() {
+		checkAvaliable(SIZE_SHORT);
+		int blockAvailable = _currentBlock.available();
+		int result;
+		if (blockAvailable > SIZE_SHORT) {
+			result = _currentBlock.readUShortLE();
+		} else {
+			DataInputBlock nextBlock = getDataInputBlock(_current_offset + blockAvailable);
+			if (blockAvailable == SIZE_SHORT) {
+				result = _currentBlock.readUShortLE();
+			} else {
+				result = nextBlock.readUShortLE(_currentBlock);
+			}
+			_currentBlock = nextBlock;
+		}
+		_current_offset += SIZE_SHORT;
+		return result;
+	}

+	public int readUByte() {
+		checkAvaliable(1);
+		int result = _currentBlock.readUByte();
+		_current_offset++;
+		if (_currentBlock.available() < 1) {
+			_currentBlock = getDataInputBlock(_current_offset);
+		}
+		return result;
+	}
+}
--- a/src/java/org/apache/poi/poifs/filesystem/POIFSDocument.java
+++ b/src/java/org/apache/poi/poifs/filesystem/POIFSDocument.java
@ -31,6 +31,7 @@ import org.apache.poi.poifs.dev.POIFSViewable;
 import org.apache.poi.poifs.property.DocumentProperty;
 import org.apache.poi.poifs.property.Property;
 import org.apache.poi.poifs.storage.BlockWritable;
+import org.apache.poi.poifs.storage.DataInputBlock;
 import org.apache.poi.poifs.storage.DocumentBlock;
 import org.apache.poi.poifs.storage.ListManagedBlock;
 import org.apache.poi.poifs.storage.RawDataBlock;
@ -194,12 +195,62 @@ public final class POIFSDocument implements BATManaged, BlockWritable, POIFSView
 	 *
 	 * @param buffer the buffer to write to
 	 * @param offset the offset into our storage to read from
+	 * This method is currently (Oct 2008) only used by test code. Perhaps it can be deleted
 	 */
 	void read(byte[] buffer, int offset) {
+		int len = buffer.length;
+
+		DataInputBlock currentBlock = getDataInputBlock(offset);
+		
+		int blockAvailable = currentBlock.available();
+		if (blockAvailable > len) {
+			currentBlock.readFully(buffer, 0, len);
+			return;
+		}
+		// else read big amount in chunks
+		int remaining = len;
+		int writePos = 0;
+		int currentOffset = offset;
+		while (remaining > 0) {
+			boolean blockIsExpiring = remaining >= blockAvailable;
+			int reqSize;
+			if (blockIsExpiring) {
+				reqSize = blockAvailable;
+			} else {
+				reqSize = remaining;
+			}
+			currentBlock.readFully(buffer, writePos, reqSize);
+			remaining-=reqSize;
+			writePos+=reqSize;
+			currentOffset += reqSize;
+			if (blockIsExpiring) {
+				if (currentOffset == _size) {
+					if (remaining > 0) {
+						throw new IllegalStateException("reached end of document stream unexpectedly");
+					}
+					currentBlock = null;
+					break;
+				}
+				currentBlock = getDataInputBlock(currentOffset);
+				blockAvailable = currentBlock.available();
+			}
+		}
+	}
+
+	/**
+	 * @return <code>null</code> if <tt>offset</tt> points to the end of the document stream
+	 */
+	DataInputBlock getDataInputBlock(int offset) {
+		if (offset >= _size) {
+			if (offset > _size) {
+				throw new RuntimeException("Request for Offset " + offset + " doc size is " + _size);
+			}
+			return null;
+		}
 		if (_property.shouldUseSmallBlocks()) {
-			SmallDocumentBlock.read(_small_store.getBlocks(), buffer, offset);
+			return SmallDocumentBlock.getDataInputBlock(_small_store.getBlocks(), offset);
 		} else {
-			DocumentBlock.read(_big_store.getBlocks(), buffer, offset);
+			return DocumentBlock.getDataInputBlock(_big_store.getBlocks(), offset);
 		}
 	}

--- a/src/java/org/apache/poi/poifs/storage/DataInputBlock.java
+++ b/src/java/org/apache/poi/poifs/storage/DataInputBlock.java
@ -0,0 +1,186 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.poifs.storage;
+
+/**
+ * Wraps a <tt>byte</tt> array and provides simple data input access.
+ * Internally, this class maintains a buffer read index, so that for the most part, primitive
+ * data can be read in a data-input-stream-like manner.<p/>
+ *
+ * Note - the calling class should call the {@link #available()} method to detect end-of-buffer
+ * and move to the next data block when the current is exhausted.
+ * For optimisation reasons, no error handling is performed in this class.  Thus, mistakes in
+ * calling code ran may raise ugly exceptions here, like {@link ArrayIndexOutOfBoundsException},
+ * etc .<p/>
+ *
+ * The multi-byte primitive input methods ({@link #readUShortLE()}, {@link #readIntLE()} and
+ * {@link #readLongLE()}) have corresponding 'spanning read' methods which (when required) perform
+ * a read across the block boundary.  These spanning read methods take the previous
+ * {@link DataInputBlock} as a parameter.
+ * Reads of larger amounts of data (into <tt>byte</tt> array buffers) must be managed by the caller
+ * since these could conceivably involve more than two blocks.
+ *
+ * @author Josh Micich
+ */
+public final class DataInputBlock {
+
+	/**
+	 * Possibly any size (usually 512K or 64K).  Assumed to be at least 8 bytes for all blocks
+	 * before the end of the stream.  The last block in the stream can be any size except zero. 
+	 */
+	private final byte[] _buf;
+	private int _readIndex;
+	private int _maxIndex;
+
+	DataInputBlock(byte[] data, int startOffset) {
+		_buf = data;
+		_readIndex = startOffset;
+		_maxIndex = _buf.length;
+	}
+	public int available() {
+		return _maxIndex-_readIndex;
+	}
+
+	public int readUByte() {
+		return _buf[_readIndex++] & 0xFF;
+	}
+
+	/**
+	 * Reads a <tt>short</tt> which was encoded in <em>little endian</em> format.
+	 */
+	public int readUShortLE() {
+		int i = _readIndex;
+		
+		int b0 = _buf[i++] & 0xFF;
+		int b1 = _buf[i++] & 0xFF;
+		_readIndex = i;
+		return (b1 << 8) + (b0 << 0);
+	}
+
+	/**
+	 * Reads a <tt>short</tt> which spans the end of <tt>prevBlock</tt> and the start of this block.
+	 */
+	public int readUShortLE(DataInputBlock prevBlock) {
+		// simple case - will always be one byte in each block
+		int i = prevBlock._buf.length-1;
+		
+		int b0 = prevBlock._buf[i++] & 0xFF;
+		int b1 = _buf[_readIndex++] & 0xFF;
+		return (b1 << 8) + (b0 << 0);
+	}
+
+	/**
+	 * Reads an <tt>int</tt> which was encoded in <em>little endian</em> format.
+	 */
+	public int readIntLE() {
+		int i = _readIndex;
+		
+		int b0 = _buf[i++] & 0xFF;
+		int b1 = _buf[i++] & 0xFF;
+		int b2 = _buf[i++] & 0xFF;
+		int b3 = _buf[i++] & 0xFF;
+		_readIndex = i;
+		return (b3 << 24) + (b2 << 16) + (b1 << 8) + (b0 << 0);
+	}
+
+	/**
+	 * Reads an <tt>int</tt> which spans the end of <tt>prevBlock</tt> and the start of this block.
+	 */
+	public int readIntLE(DataInputBlock prevBlock, int prevBlockAvailable) {
+		byte[] buf = new byte[4];
+		
+		readSpanning(prevBlock, prevBlockAvailable, buf);
+		int b0 = buf[0] & 0xFF;
+		int b1 = buf[1] & 0xFF;
+		int b2 = buf[2] & 0xFF;
+		int b3 = buf[3] & 0xFF;
+		return (b3 << 24) + (b2 << 16) + (b1 << 8) + (b0 << 0);
+	}
+
+	/**
+	 * Reads a <tt>long</tt> which was encoded in <em>little endian</em> format.
+	 */
+	public long readLongLE() {
+		int i = _readIndex;
+		
+		int b0 = _buf[i++] & 0xFF;
+		int b1 = _buf[i++] & 0xFF;
+		int b2 = _buf[i++] & 0xFF;
+		int b3 = _buf[i++] & 0xFF;
+		int b4 = _buf[i++] & 0xFF;
+		int b5 = _buf[i++] & 0xFF;
+		int b6 = _buf[i++] & 0xFF;
+		int b7 = _buf[i++] & 0xFF;
+		_readIndex = i;
+		return (((long)b7 << 56) +
+				((long)b6 << 48) +
+				((long)b5 << 40) +
+				((long)b4 << 32) +
+				((long)b3 << 24) +
+				(b2 << 16) +
+				(b1 <<  8) +
+				(b0 <<  0));
+	}
+
+	/**
+	 * Reads a <tt>long</tt> which spans the end of <tt>prevBlock</tt> and the start of this block.
+	 */
+	public long readLongLE(DataInputBlock prevBlock, int prevBlockAvailable) {
+		byte[] buf = new byte[8];
+		
+		readSpanning(prevBlock, prevBlockAvailable, buf);
+		
+		int b0 = buf[0] & 0xFF;
+		int b1 = buf[1] & 0xFF;
+		int b2 = buf[2] & 0xFF;
+		int b3 = buf[3] & 0xFF;
+		int b4 = buf[4] & 0xFF;
+		int b5 = buf[5] & 0xFF;
+		int b6 = buf[6] & 0xFF;
+		int b7 = buf[7] & 0xFF;
+		return (((long)b7 << 56) +
+				((long)b6 << 48) +
+				((long)b5 << 40) +
+				((long)b4 << 32) +
+				((long)b3 << 24) +
+				(b2 << 16) +
+				(b1 <<  8) +
+				(b0 <<  0));
+	}
+
+	/**
+	 * Reads a small amount of data from across the boundary between two blocks.  
+	 * The {@link #_readIndex} of this (the second) block is updated accordingly.
+	 * Note- this method (and other code) assumes that the second {@link DataInputBlock}
+	 * always is big enough to complete the read without being exhausted.
+	 */
+	private void readSpanning(DataInputBlock prevBlock, int prevBlockAvailable, byte[] buf) {
+		System.arraycopy(prevBlock._buf, prevBlock._readIndex, buf, 0, prevBlockAvailable);
+		int secondReadLen = buf.length-prevBlockAvailable;
+		System.arraycopy(_buf, 0, buf, prevBlockAvailable, secondReadLen);
+		_readIndex = secondReadLen;
+	}
+
+	/**
+	 * Reads <tt>len</tt> bytes from this block into the supplied buffer.
+	 */
+	public void readFully(byte[] buf, int off, int len) {
+		System.arraycopy(_buf, _readIndex, buf, off, len);
+		_readIndex += len;
+	}
+}
--- a/src/java/org/apache/poi/poifs/storage/DocumentBlock.java
+++ b/src/java/org/apache/poi/poifs/storage/DocumentBlock.java
@ -1,4 +1,3 @@
-
 /* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@ -15,31 +14,27 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 ==================================================================== */
-        

 package org.apache.poi.poifs.storage;

 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-
 import java.util.Arrays;

 import org.apache.poi.poifs.common.POIFSConstants;
 import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.IntegerField;
-import org.apache.poi.util.LittleEndian;
-import org.apache.poi.util.LittleEndianConsts;

 /**
 * A block of document data.
 *
 * @author Marc Johnson (mjohnson at apache dot org)
 */
+public final class DocumentBlock extends BigBlock {
+    private static final int BLOCK_SHIFT = 9;
+    private static final int BLOCK_SIZE = 1 << BLOCK_SHIFT;
+    private static final int BLOCK_MASK = BLOCK_SIZE-1;

-public class DocumentBlock
-    extends BigBlock
-{
    private static final byte _default_value = ( byte ) 0xFF;
    private byte[]            _data;
    private int               _bytes_read;
@ -161,45 +156,10 @@ public class DocumentBlock
        return rval;
    }

-    /**
-     * read data from an array of DocumentBlocks
-     *
-     * @param blocks the blocks to read from
-     * @param buffer the buffer to write the data into
-     * @param offset the offset into the array of blocks to read from
-     */
-
-    public static void read(final DocumentBlock [] blocks,
-                            final byte [] buffer, final int offset)
-    {
-        int firstBlockIndex  = offset / POIFSConstants.BIG_BLOCK_SIZE;
-        int firstBlockOffset = offset % POIFSConstants.BIG_BLOCK_SIZE;
-        int lastBlockIndex   = (offset + buffer.length - 1)
-                               / POIFSConstants.BIG_BLOCK_SIZE;
-
-        if (firstBlockIndex == lastBlockIndex)
-        {
-            System.arraycopy(blocks[ firstBlockIndex ]._data,
-                             firstBlockOffset, buffer, 0, buffer.length);
-        }
-        else
-        {
-            int buffer_offset = 0;
-
-            System.arraycopy(blocks[ firstBlockIndex ]._data,
-                             firstBlockOffset, buffer, buffer_offset,
-                             POIFSConstants.BIG_BLOCK_SIZE
-                             - firstBlockOffset);
-            buffer_offset += POIFSConstants.BIG_BLOCK_SIZE - firstBlockOffset;
-            for (int j = firstBlockIndex + 1; j < lastBlockIndex; j++)
-            {
-                System.arraycopy(blocks[ j ]._data, 0, buffer, buffer_offset,
-                                 POIFSConstants.BIG_BLOCK_SIZE);
-                buffer_offset += POIFSConstants.BIG_BLOCK_SIZE;
-            }
-            System.arraycopy(blocks[ lastBlockIndex ]._data, 0, buffer,
-                             buffer_offset, buffer.length - buffer_offset);
-        }
+    public static DataInputBlock getDataInputBlock(DocumentBlock[] blocks, int offset) {
+        int firstBlockIndex = offset >> BLOCK_SHIFT;
+        int firstBlockOffset= offset & BLOCK_MASK;
+        return new DataInputBlock(blocks[firstBlockIndex]._data, firstBlockOffset);
    }

    /* ********** START extension of BigBlock ********** */
--- a/src/java/org/apache/poi/poifs/storage/SmallDocumentBlock.java
+++ b/src/java/org/apache/poi/poifs/storage/SmallDocumentBlock.java
@ -1,4 +1,3 @@
-
 /* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@ -15,13 +14,15 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 ==================================================================== */
-        

 package org.apache.poi.poifs.storage;

-import java.io.*;
-
-import java.util.*;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;

 import org.apache.poi.poifs.common.POIFSConstants;

@ -31,13 +32,14 @@ import org.apache.poi.poifs.common.POIFSConstants;
 *
 * @author  Marc Johnson (mjohnson at apache dot org)
 */
+public final class SmallDocumentBlock implements BlockWritable, ListManagedBlock {
+    private static final int BLOCK_SHIFT = 6;

-public class SmallDocumentBlock
-    implements BlockWritable, ListManagedBlock
-{
    private byte[]            _data;
    private static final byte _default_fill         = ( byte ) 0xff;
-    private static final int  _block_size           = 64;
+    private static final int  _block_size           = 1 << BLOCK_SHIFT;
+    private static final int BLOCK_MASK = _block_size-1;
+
    private static final int  _blocks_per_big_block =
        POIFSConstants.BIG_BLOCK_SIZE / _block_size;

@ -178,46 +180,10 @@ public class SmallDocumentBlock
        return sdbs;
    }

-    /**
-     * read data from an array of SmallDocumentBlocks
-     *
-     * @param blocks the blocks to read from
-     * @param buffer the buffer to write the data into
-     * @param offset the offset into the array of blocks to read from
-     */
-
-    public static void read(final BlockWritable [] blocks,
-                            final byte [] buffer, final int offset)
-    {
-        int firstBlockIndex  = offset / _block_size;
-        int firstBlockOffset = offset % _block_size;
-        int lastBlockIndex   = (offset + buffer.length - 1) / _block_size;
-
-        if (firstBlockIndex == lastBlockIndex)
-        {
-            System.arraycopy(
-                (( SmallDocumentBlock ) blocks[ firstBlockIndex ])._data,
-                firstBlockOffset, buffer, 0, buffer.length);
-        }
-        else
-        {
-            int buffer_offset = 0;
-
-            System.arraycopy(
-                (( SmallDocumentBlock ) blocks[ firstBlockIndex ])._data,
-                firstBlockOffset, buffer, buffer_offset,
-                _block_size - firstBlockOffset);
-            buffer_offset += _block_size - firstBlockOffset;
-            for (int j = firstBlockIndex + 1; j < lastBlockIndex; j++)
-            {
-                System.arraycopy((( SmallDocumentBlock ) blocks[ j ])._data,
-                                 0, buffer, buffer_offset, _block_size);
-                buffer_offset += _block_size;
-            }
-            System.arraycopy(
-                (( SmallDocumentBlock ) blocks[ lastBlockIndex ])._data, 0,
-                buffer, buffer_offset, buffer.length - buffer_offset);
-        }
+    public static DataInputBlock getDataInputBlock(SmallDocumentBlock[] blocks, int offset) {
+        int firstBlockIndex = offset >> BLOCK_SHIFT;
+        int firstBlockOffset= offset & BLOCK_MASK;
+        return new DataInputBlock(blocks[firstBlockIndex]._data, firstBlockOffset);
    }

    /**
--- a/src/testcases/org/apache/poi/poifs/storage/TestDocumentBlock.java
+++ b/src/testcases/org/apache/poi/poifs/storage/TestDocumentBlock.java
@ -1,4 +1,3 @@
-
 /* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@ -15,25 +14,21 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 ==================================================================== */
-        

 package org.apache.poi.poifs.storage;

-import java.io.*;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;

-import java.util.*;
-
-import junit.framework.*;
+import junit.framework.TestCase;

 /**
 * Class to test DocumentBlock functionality
 *
 * @author Marc Johnson
 */
-
-public class TestDocumentBlock
-    extends TestCase
-{
+public final class TestDocumentBlock extends TestCase {
    static final private byte[] _testdata;

    static
@ -44,25 +39,10 @@ public class TestDocumentBlock
            _testdata[ j ] = ( byte ) j;
        }
    }
-    ;
-
-    /**
-     * Constructor TestDocumentBlock
-     *
-     * @param name
-     */
-
-    public TestDocumentBlock(String name)
-    {
-        super(name);
-    }

    /**
     * Test the writing DocumentBlock constructor.
-     *
-     * @exception IOException
     */
-
    public void testConstructor()
        throws IOException
    {
@ -88,46 +68,10 @@ public class TestDocumentBlock
        assertEquals(_testdata.length, size);
    }

-    /**
-     * test static read method
-     *
-     * @exception IOException
-     */
-
-    public void testRead()
-        throws IOException
-    {
-        DocumentBlock[]      blocks = new DocumentBlock[ 4 ];
-        ByteArrayInputStream input  = new ByteArrayInputStream(_testdata);
-
-        for (int j = 0; j < 4; j++)
-        {
-            blocks[ j ] = new DocumentBlock(input);
-        }
-        for (int j = 1; j <= 2000; j += 17)
-        {
-            byte[] buffer = new byte[ j ];
-            int    offset = 0;
-
-            for (int k = 0; k < (2000 / j); k++)
-            {
-                DocumentBlock.read(blocks, buffer, offset);
-                for (int n = 0; n < buffer.length; n++)
-                {
-                    assertEquals("checking byte " + (k * j) + n,
-                                 _testdata[ (k * j) + n ], buffer[ n ]);
-                }
-                offset += j;
-            }
-        }
-    }

    /**
     * Test 'reading' constructor
-     *
-     * @exception IOException
     */
-
    public void testReadingConstructor()
        throws IOException
    {
@ -164,17 +108,4 @@ public class TestDocumentBlock
            assertEquals(( byte ) 0xFF, copy[ j ]);
        }
    }
-
-    /**
-     * main method to run the unit tests
-     *
-     * @param ignored_args
-     */
-
-    public static void main(String [] ignored_args)
-    {
-        System.out
-            .println("Testing org.apache.poi.poifs.storage.DocumentBlock");
-        junit.textui.TestRunner.run(TestDocumentBlock.class);
-    }
 }
--- a/src/testcases/org/apache/poi/poifs/storage/TestSmallDocumentBlock.java
+++ b/src/testcases/org/apache/poi/poifs/storage/TestSmallDocumentBlock.java
@ -1,4 +1,3 @@
-
 /* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@ -15,25 +14,24 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 ==================================================================== */
-        

 package org.apache.poi.poifs.storage;

-import java.io.*;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;

-import java.util.*;
-
-import junit.framework.*;
+import junit.framework.TestCase;

 /**
 * Class to test SmallDocumentBlock functionality
 *
 * @author Marc Johnson
 */
-
-public class TestSmallDocumentBlock
-    extends TestCase
-{
+public final class TestSmallDocumentBlock extends TestCase {
    static final private byte[] _testdata;
    static final private int    _testdata_size = 2999;

@ -45,25 +43,10 @@ public class TestSmallDocumentBlock
            _testdata[ j ] = ( byte ) j;
        }
    }
-    ;
-
-    /**
-     * constructor
-     *
-     * @param name
-     */
-
-    public TestSmallDocumentBlock(String name)
-    {
-        super(name);
-    }

    /**
     * Test conversion from DocumentBlocks
-     *
-     * @exception IOException
     */
-
    public void testConvert1()
        throws IOException
    {
@ -113,12 +96,7 @@ public class TestSmallDocumentBlock

    /**
     * Test conversion from byte array
-     *
-     * @exception IOException;
-     *
-     * @exception IOException
     */
-
    public void testConvert2()
        throws IOException
    {
@ -154,57 +132,9 @@ public class TestSmallDocumentBlock
        }
    }

-    /**
-     * Test read method
-     *
-     * @exception IOException
-     */
-
-    public void testRead()
-        throws IOException
-    {
-        ByteArrayInputStream stream    = new ByteArrayInputStream(_testdata);
-        List                 documents = new ArrayList();
-
-        while (true)
-        {
-            DocumentBlock block = new DocumentBlock(stream);
-
-            documents.add(block);
-            if (block.partiallyRead())
-            {
-                break;
-            }
-        }
-        SmallDocumentBlock[] blocks =
-            SmallDocumentBlock
-                .convert(( BlockWritable [] ) documents
-                    .toArray(new DocumentBlock[ 0 ]), _testdata_size);
-
-        for (int j = 1; j <= _testdata_size; j += 38)
-        {
-            byte[] buffer = new byte[ j ];
-            int    offset = 0;
-
-            for (int k = 0; k < (_testdata_size / j); k++)
-            {
-                SmallDocumentBlock.read(blocks, buffer, offset);
-                for (int n = 0; n < buffer.length; n++)
-                {
-                    assertEquals("checking byte " + (k * j) + n,
-                                 _testdata[ (k * j) + n ], buffer[ n ]);
-                }
-                offset += j;
-            }
-        }
-    }
-
    /**
     * test fill
-     *
-     * @exception IOException
     */
-
    public void testFill()
        throws IOException
    {
@ -294,17 +224,4 @@ public class TestSmallDocumentBlock
            }
        }
    }
-
-    /**
-     * main method to run the unit tests
-     *
-     * @param ignored_args
-     */
-
-    public static void main(String [] ignored_args)
-    {
-        System.out.println(
-            "Testing org.apache.poi.poifs.storage.SmallDocumentBlock");
-        junit.textui.TestRunner.run(TestSmallDocumentBlock.class);
-    }
 }