Start on lower memory POIFS implementation - data source to provide common access to array of bytes and files

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1050607 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-12-18 10:18:43 +00:00
parent cbf86ed0bc
commit 5e9cbd4645
6 changed files with 268 additions and 25 deletions

View File

@ -0,0 +1,83 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.poifs.nio;
import java.nio.ByteBuffer;
/**
* A POIFS {@link DataSource} backed by a byte array.
*/
public class ByteArrayBackedDataSource extends DataSource {
private byte[] buffer;
private long size;
public ByteArrayBackedDataSource(byte[] data) {
this.buffer = data;
this.size = data.length;
}
public void read(ByteBuffer dst, long position) {
if(position + dst.capacity() > size) {
throw new IndexOutOfBoundsException(
"Unable to read " + dst.capacity() + " bytes from " +
position + " in stream of length " + size
);
}
dst.put(buffer, (int)position, dst.capacity());
}
public void write(ByteBuffer src, long position) {
// Extend if needed
long endPosition = position + src.capacity();
if(endPosition > buffer.length) {
extend(endPosition);
}
// Now copy
src.get(buffer, (int)position, src.capacity());
// Update size if needed
if(endPosition > size) {
size = endPosition;
}
}
private void extend(long length) {
// Consider extending by a bit more than requested
long difference = length - buffer.length;
if(difference < buffer.length*0.25) {
difference = (long)(buffer.length*0.25);
}
if(difference < 4096) {
difference = 4096;
}
byte[] nb = new byte[(int)(difference+buffer.length)];
System.arraycopy(buffer, 0, nb, 0, (int)size);
buffer = nb;
}
public long size() {
return size;
}
public void close() {
buffer = null;
size = -1;
}
}

View File

@ -0,0 +1,31 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.poifs.nio;
import java.io.IOException;
import java.nio.ByteBuffer;
/**
* Common definition of how we read and write bytes
*/
public abstract class DataSource {
abstract void read(ByteBuffer dst, long position) throws IOException;
abstract void write(ByteBuffer src, long position) throws IOException;
abstract long size() throws IOException;
abstract void close() throws IOException;
}

View File

@ -0,0 +1,48 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.poifs.nio;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
/**
* A POIFS {@link DataSource} backed by a File
*/
public class FileBackedDataSource extends DataSource {
private FileChannel file;
public FileBackedDataSource(FileChannel file) {
this.file = file;
}
public void read(ByteBuffer dst, long position) throws IOException {
file.read(dst, position);
}
public void write(ByteBuffer src, long position) throws IOException {
file.write(src, position);
}
public long size() throws IOException {
return file.size();
}
public void close() throws IOException {
file.close();
}
}

View File

@ -30,6 +30,7 @@ import static org.apache.poi.poifs.storage.HeaderBlockConstants._xbat_start_offs
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import org.apache.poi.poifs.common.POIFSBigBlockSize;
import org.apache.poi.poifs.common.POIFSConstants;
@ -83,6 +84,10 @@ public final class HeaderBlockReader {
* (Number of DIFAT Sectors in Microsoft parlance)
*/
private final int _xbat_count;
/**
* The data
*/
private final byte[] _data;
/**
@ -93,26 +98,36 @@ public final class HeaderBlockReader {
* @exception IOException on errors or bad data
*/
public HeaderBlockReader(InputStream stream) throws IOException {
// At this point, we don't know how big our
// block sizes are
// So, read the first 32 bytes to check, then
// read the rest of the block
byte[] blockStart = new byte[32];
int bsCount = IOUtils.readFully(stream, blockStart);
if(bsCount != 32) {
throw alertShortRead(bsCount, 32);
// Grab the first 512 bytes
// (For 4096 sized blocks, the remaining 3584 bytes are zero)
// Then, process the contents
this(readFirst512(stream));
// Fetch the rest of the block if needed
if(bigBlockSize.getBigBlockSize() != 512) {
int rest = bigBlockSize.getBigBlockSize() - 512;
byte[] tmp = new byte[rest];
IOUtils.readFully(stream, tmp);
}
}
public HeaderBlockReader(ByteBuffer buffer) throws IOException {
this(buffer.array());
}
private HeaderBlockReader(byte[] data) throws IOException {
this._data = data;
// verify signature
long signature = LittleEndian.getLong(blockStart, _signature_offset);
long signature = LittleEndian.getLong(_data, _signature_offset);
if (signature != _signature) {
// Is it one of the usual suspects?
byte[] OOXML_FILE_HEADER = POIFSConstants.OOXML_FILE_HEADER;
if(blockStart[0] == OOXML_FILE_HEADER[0] &&
blockStart[1] == OOXML_FILE_HEADER[1] &&
blockStart[2] == OOXML_FILE_HEADER[2] &&
blockStart[3] == OOXML_FILE_HEADER[3]) {
if(_data[0] == OOXML_FILE_HEADER[0] &&
_data[1] == OOXML_FILE_HEADER[1] &&
_data[2] == OOXML_FILE_HEADER[2] &&
_data[3] == OOXML_FILE_HEADER[3]) {
throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. You are calling the part of POI that deals with OLE2 Office Documents. You need to call a different part of POI to process this data (eg XSSF instead of HSSF)");
}
if ((signature & 0xFF8FFFFFFFFFFFFFL) == 0x0010000200040009L) {
@ -129,22 +144,14 @@ public final class HeaderBlockReader {
// Figure out our block size
switch (blockStart[30]) {
switch (_data[30]) {
case 12:
bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS; break;
case 9:
bigBlockSize = POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS; break;
default:
throw new IOException("Unsupported blocksize (2^"
+ blockStart[30] + "). Expected 2^9 or 2^12.");
}
_data = new byte[ bigBlockSize.getBigBlockSize() ];
System.arraycopy(blockStart, 0, _data, 0, blockStart.length);
// Now we can read the rest of our header
int byte_count = IOUtils.readFully(stream, _data, blockStart.length, _data.length - blockStart.length);
if (byte_count+bsCount != bigBlockSize.getBigBlockSize()) {
throw alertShortRead(byte_count, bigBlockSize.getBigBlockSize());
+ _data[30] + "). Expected 2^9 or 2^12.");
}
_bat_count = getInt(_bat_count_offset, _data);
@ -154,6 +161,17 @@ public final class HeaderBlockReader {
_xbat_start = getInt(_xbat_start_offset, _data);
_xbat_count = getInt(_xbat_count_offset, _data);
}
private static byte[] readFirst512(InputStream stream) throws IOException {
// Grab the first 512 bytes
// (For 4096 sized blocks, the remaining 3584 bytes are zero)
byte[] data = new byte[512];
int bsCount = IOUtils.readFully(stream, data);
if(bsCount != 512) {
throw alertShortRead(bsCount, 512);
}
return data;
}
private static int getInt(int offset, byte[] data) {
return LittleEndian.getInt(data, offset);
@ -216,7 +234,7 @@ public final class HeaderBlockReader {
for (int j = 0; j < _max_bats_in_header; j++) {
result[ j ] = LittleEndian.getInt(_data, offset);
offset += LittleEndianConsts.INT_SIZE;
offset += LittleEndianConsts.INT_SIZE;
}
return result;
}

View File

@ -21,6 +21,8 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.channels.ReadableByteChannel;
public final class IOUtils {
private IOUtils() {
@ -74,6 +76,29 @@ public final class IOUtils {
}
}
}
/**
* Same as the normal <tt>channel.read(b)</tt>, but tries to ensure
* that the entire len number of bytes is read.
* <p>
* If the end of file is reached before any bytes are read, returns -1. If
* the end of the file is reached after some bytes are read, returns the
* number of bytes read. If the end of the file isn't reached before len
* bytes have been read, will return len bytes.
*/
public static int readFully(ReadableByteChannel channel, ByteBuffer b) throws IOException {
int total = 0;
while (true) {
int got = channel.read(b);
if (got < 0) {
return (total == 0) ? -1 : total;
}
total += got;
if (total == b.capacity()) {
return total;
}
}
}
/**
* Copies all the data from the given InputStream to the OutputStream. It

View File

@ -0,0 +1,38 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.poifs.nio;
import java.io.IOException;
import junit.framework.TestCase;
/**
* Tests for the datasource implementations
*/
public class TestDataSource extends TestCase
{
public void testFile() throws IOException {
// TODO
}
public void testByteArray() throws IOException {
// TODO
}
}