diff --git a/build.xml b/build.xml index cbbfde883..3eed344cb 100644 --- a/build.xml +++ b/build.xml @@ -649,6 +649,7 @@ under the License. + @@ -707,6 +708,7 @@ under the License. + @@ -742,6 +744,7 @@ under the License. + @@ -1284,10 +1287,13 @@ FORREST_HOME environment variable! + + + @@ -1313,10 +1319,13 @@ FORREST_HOME environment variable! + + + diff --git a/src/documentation/content/xdocs/book.xml b/src/documentation/content/xdocs/book.xml index a55b67bad..d9bc0e91d 100644 --- a/src/documentation/content/xdocs/book.xml +++ b/src/documentation/content/xdocs/book.xml @@ -41,6 +41,7 @@ + diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 269a13fd4..62fee5b1c 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -64,6 +64,7 @@ Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx + 45645 - Fix for HSSFSheet.autoSizeColumn() for widths exceeding Short.MAX_VALUE 45623 - Support for additional HSSF header and footer fields, including bold and full file path 45623 - Support stripping HSSF header and footer fields (eg page number) out of header and footer text if required 45622 - Support stripping HWPF fields (eg macros) out of text, via Range.stripFields(text) diff --git a/src/documentation/content/xdocs/hpbf/file-format.xml b/src/documentation/content/xdocs/hpbf/file-format.xml new file mode 100644 index 000000000..97d5a33d7 --- /dev/null +++ b/src/documentation/content/xdocs/hpbf/file-format.xml @@ -0,0 +1,170 @@ + + + + + +
+ POI-HPBF - A Guide to the Publisher File Format + Overview + + + +
+ + +
Document Streams +

+ The file is made up of a number of POIFS streams. A typical + file will be made up as follows: +

+ +Root Entry - + Objects - + (no children) + SummaryInformation <(0x05)SummaryInformation> + DocumentSummaryInformation <(0x05)DocumentSummaryInformation> + Escher - + EscherStm + EscherDelayStm + Quill - + QuillSub - + CONTENTS + CompObj <(0x01)CompObj> + Envelope + Contents + Internal <(0x03)Internal> + CompObj <(0x01)CompObj> + VBA - + (no children) + +
+
Changing Text +

If you make a change to the text of a file, but not change + how much text there is, then the CONTENTS stream + will undergo a small change, and the Contents stream + will undergo a large change.

+

If you make a change to the text of a file, and change the + amount of text there is, then both the Contents and + the CONTENTS streams change.

+
+
Changing Shapes +

If you alter the size of a textbox, but make no text changes, + then both Contents and CONTENTS streams + change. There are no changes to the Escher streams.

+

If you set the background colour of a textbox, but make + no changes to the text, (to finish off)

+
+
Structure of CONTENTS +

First we have "CHNKINK ", followed by 24 bytes.

+

Next we have 20 sequences of 24 bytes each. If the first two bytes + at 0x1800, then that sequence entry exists, but if it's 0x0000 then + the entry doesn't exist. If it does exist, we then have 4 bytes of + upper case ASCII text, followed by three little endian shorts. + The first of these seems to be the count of that type, the second is + usually 1, the third is usually zero. The we have another 4 bytes of + upper case ASCII text, normally but not always the same as the first + text. Finally, we have an unsigned little endian 32 bit offset to + the start of the data for this, then an unsigned little endian + 32 bit offset of the length of this section.

+

Normally, the first sequence entry is for TEXT, and the text data + will start at 0x200. After that is normally two or three STSH entries + (so the first short has values 0, then 1, then 2). After that it + seems to vary.

+

At 0x200 we have the text, stored as little endian 16 bit unicode.

+

After the text comes all sorts of other stuff, presumably as + described by the sequences.

+

For a contents stream of length 7168 / 0x1c00 bytes, the start + looks something like:

+ +CHNKINK // "CHNKINK " +04 00 07 00 // Normally 04 00 07 00 +13 00 00 03 // Normally ## 00 00 03 +00 02 00 00 // Normally 00 ## 00 00 +00 1c 00 00 // Normally length of the stream +f8 01 13 00 // Normally f8 01 11/13 00 +ff ff ff ff // Normally seems to be ffffffff + +18 00 +TEXT 00 00 01 00 00 00 // TEXT 0 1 0 +TEXT 00 02 00 00 d0 03 00 00 // TEXT from: 200 (512), len: 3d0 (976) +18 00 +STSH 00 00 01 00 00 00 // STSH 0 1 0 +STSH d0 05 00 00 1e 00 00 00 // STSH from: 5d0 (1488), len: 1e (30) +18 00 +STSH 01 00 01 00 00 00 // STSH 1 1 0 +STSH ee 05 00 00 b8 01 00 00 // STSH from: 5ee (1518), len: 1b8 (440) +18 00 +STSH 02 00 01 00 00 00 // STSH 2 1 0 +STSH a6 07 00 00 3c 00 00 00 // STSH from: 7a6 (1958), len: 3c (60) +18 00 +FDPP 00 00 01 00 00 00 // FDPP 0 1 0 +FDPP 00 08 00 00 00 02 00 00 // FDPP from: 800 (2048), len: 200 (512) +18 00 +FDPC 00 00 01 00 00 00 // FDPC 0 1 0 +FDPC 00 0a 00 00 00 02 00 00 // FDPC from: a00 (2560), len: 200 (512) +18 00 +FDPC 01 00 01 00 00 00 // FDPC 1 1 0 +FDPC 00 0c 00 00 00 02 00 00 // FDPC from: c00 (3072), len: 200 (512) +18 00 +SYID 00 00 01 00 00 00 // SYID 0 1 0 +SYID 00 0e 00 00 20 00 00 00 // SYID from: e00 (3584), len: 20 (32) +18 00 +SGP 00 00 01 00 00 00 // SGP 0 1 0 +SGP 20 0e 00 00 0a 00 00 00 // SGP from: e20 (3616), len: a (10) +18 00 +INK 00 00 01 00 00 00 // INK 0 1 0 +INK 2a 0e 00 00 04 00 00 00 // INK from: e2a (3626), len: 4 (4) +18 00 +BTEP 00 00 01 00 00 00 // BTEP 0 1 0 +PLC 2e 0e 00 00 18 00 00 00 // PLC from: e2e (3630), len: 18 (24) +18 00 +BTEC 00 00 01 00 00 00 // BTEC 0 1 0 +PLC 46 0e 00 00 20 00 00 00 // PLC from: e46 (3654), len: 20 (32) +18 00 +FONT 00 00 01 00 00 00 // FONT 0 1 0 +FONT 66 0e 00 00 48 03 00 00 // FONT from: e66 (3686), len: 348 (840) +18 00 +TCD 03 00 01 00 00 00 // TCD 3 1 0 +PLC ae 11 00 00 24 00 00 00 // PLC from: 11ae (4526), len: 24 (36) +18 00 +TOKN 04 00 01 00 00 00 // TOKN 4 1 0 +PLC d2 11 00 00 0a 01 00 00 // PLC from: 11d2 (4562), len: 10a (266) +18 00 +TOKN 05 00 01 00 00 00 // TOKN 5 1 0 +PLC dc 12 00 00 2a 01 00 00 // PLC from: 12dc (4828), len: 12a (298) +18 00 +STRS 00 00 01 00 00 00 // STRS 0 1 0 +PLC 06 14 00 00 46 00 00 00 // PLC from: 1406 (5126), len: 46 (70) +18 00 +MCLD 00 00 01 00 00 00 // MCLD 0 1 0 +MCLD 4c 14 00 00 16 06 00 00 // MCLD from: 144c (5196), len: 616 (1558) +18 00 +PL 00 00 01 00 00 00 // PL 0 1 0 +PL 62 1a 00 00 48 00 00 00 // PL from: 1a62 (6754), len: 48 (72) +00 00 // Blank entry follows +00 00 00 00 00 00 +00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 + +(the text will then start) + +
+ +
diff --git a/src/documentation/content/xdocs/hpbf/index.xml b/src/documentation/content/xdocs/hpbf/index.xml new file mode 100755 index 000000000..c74dc2362 --- /dev/null +++ b/src/documentation/content/xdocs/hpbf/index.xml @@ -0,0 +1,53 @@ + + + + + +
+ POI-HPBF - Java API To Access Microsoft Publisher Format Files + Overview + + + +
+ + +
+ Overview + +

HPBF is the POI Project's pure Java implementation of the Visio file format.

+

Currently, HPBF is in the experimental stage, while we try + to figure out the file format. Our initial aim is to provide + a text extractor for the format, with low level code following + after that if demand and developer interest warrant it.

+

At this time, there is no usermodel api or similar.

+

Our current understanding of the file format is documented + here.

+ + This code currently lives the + scratchpad area + of the POI SVN repository. + Ensure that you have the scratchpad jar or the scratchpad + build area in your + classpath before experimenting with this code. + +
+ +
diff --git a/src/documentation/content/xdocs/index.xml b/src/documentation/content/xdocs/index.xml index d112140c7..f8336be0b 100644 --- a/src/documentation/content/xdocs/index.xml +++ b/src/documentation/content/xdocs/index.xml @@ -141,6 +141,16 @@ href="./slideshow/index.html">the HSLF project page for more information.

+
HPSF for Document Properties +

HPSF is our port of the OLE 2 property set format to pure + Java. Property sets are mostly use to store a document's properties + (title, author, date of last modification etc.), but they can be used + for application-specific purposes as well.

+ +

HPSF supports both reading and writing of properties.

+

Please see the HPSF project + page for more information.

+
HDGF for Visio Documents

HDGF is our port of the Microsoft Viso 97(-2003) file format to pure Java. It currently only supports reading at a very low level, and @@ -148,19 +158,13 @@ href="./hdgf/index.html">the HDGF project page for more information.

-
HPSF for Document Properties -

HPSF is our port of the OLE 2 property set format to pure - Java. Property sets are mostly use to store a document's properties - (title, author, date of last modification etc.), but they can be used - for application-specific purposes as well.

- -

HPSF supports reading and writing of properties. However, you will - need to be using version 3.0 of POI to utilise the write support.

- -

Please see the HPSF project - page for more information.

+
HPBF for Publisher Documents +

HPBF is our port of the Microsoft Publisher 98(-2007) file format to pure + Java. At the moment, we are still figuring out the file format, but we hope + to have simple text extraction shortly. Please see the HPBF project page for more + information.

-
Contributing diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index b59455ebf..96272b165 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -61,6 +61,7 @@ Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx + 45645 - Fix for HSSFSheet.autoSizeColumn() for widths exceeding Short.MAX_VALUE 45623 - Support for additional HSSF header and footer fields, including bold and full file path 45623 - Support stripping HSSF header and footer fields (eg page number) out of header and footer text if required 45622 - Support stripping HWPF fields (eg macros) out of text, via Range.stripFields(text) diff --git a/src/java/org/apache/poi/hssf/model/Sheet.java b/src/java/org/apache/poi/hssf/model/Sheet.java index 710d57118..336b1dbd4 100644 --- a/src/java/org/apache/poi/hssf/model/Sheet.java +++ b/src/java/org/apache/poi/hssf/model/Sheet.java @@ -106,7 +106,7 @@ public final class Sheet implements Model { protected ArrayList records = null; int preoffset = 0; // offset of the sheet in a new file - protected int dimsloc = -1; // TODO - is it legal for dims record to be missing? + protected int dimsloc = -1; // TODO - remove dimsloc protected PrintGridlinesRecord printGridlines = null; protected GridsetRecord gridset = null; private GutsRecord _gutsRecord; @@ -125,7 +125,8 @@ public final class Sheet implements Model { private MergedCellsTable _mergedCellsTable; /** always present in this POI object, not always written to Excel file */ /*package*/ColumnInfoRecordsAggregate _columnInfos; - protected DimensionsRecord dims; + /** the DimensionsRecord is always present */ + private DimensionsRecord _dimensions; protected RowRecordsAggregate _rowsAggregate = null; private DataValidityTable _dataValidityTable= null; private ConditionalFormattingTable condFormatting; @@ -287,7 +288,7 @@ public final class Sheet implements Model { records.add(retval._columnInfos); } - retval.dims = ( DimensionsRecord ) rec; + retval._dimensions = ( DimensionsRecord ) rec; retval.dimsloc = records.size(); } else if (rec.getSid() == DefaultColWidthRecord.sid) @@ -333,7 +334,7 @@ public final class Sheet implements Model { records.add(rec); } - if (retval.dimsloc < 0) { + if (retval._dimensions == null) { throw new RuntimeException("DimensionsRecord was not found"); } retval.records = records; @@ -404,6 +405,8 @@ public final class Sheet implements Model { public static Sheet createSheet() { + // TODO - convert this method to a constructor + if (log.check( POILogger.DEBUG )) log.log(POILogger.DEBUG, "Sheet createsheet from scratch called"); Sheet retval = new Sheet(); @@ -423,7 +426,8 @@ public final class Sheet implements Model { records.add( retval.printGridlines ); retval.gridset = createGridset(); records.add( retval.gridset ); - records.add( retval.createGuts() ); + retval._gutsRecord = createGuts(); + records.add( retval._gutsRecord ); retval.defaultrowheight = createDefaultRowHeight(); records.add( retval.defaultrowheight ); records.add( retval.createWSBool() ); @@ -440,8 +444,8 @@ public final class Sheet implements Model { ColumnInfoRecordsAggregate columns = new ColumnInfoRecordsAggregate(); records.add( columns ); retval._columnInfos = columns; - retval.dims = createDimensions(); - records.add(retval.dims); + retval._dimensions = createDimensions(); + records.add(retval._dimensions); retval.dimsloc = records.size()-1; records.add(retval.windowTwo = retval.createWindowTwo()); retval.selection = createSelection(); @@ -460,7 +464,7 @@ public final class Sheet implements Model { if (_rowsAggregate == null) { _rowsAggregate = new RowRecordsAggregate(); - records.add(getDimsLoc() + 1, _rowsAggregate); + records.add(dimsloc + 1, _rowsAggregate); } } private MergedCellsTable getMergedRecords() { @@ -556,10 +560,10 @@ public final class Sheet implements Model { .append(lastrow).append("lastcol").append(lastcol) .toString()); } - dims.setFirstCol(firstcol); - dims.setFirstRow(firstrow); - dims.setLastCol(lastcol); - dims.setLastRow(lastrow); + _dimensions.setFirstCol(firstcol); + _dimensions.setFirstRow(firstrow); + _dimensions.setLastCol(lastcol); + _dimensions.setLastRow(lastrow); if (log.check( POILogger.DEBUG )) log.log(POILogger.DEBUG, "Sheet.setDimensions exiting"); } @@ -696,7 +700,7 @@ public final class Sheet implements Model { if(log.check(POILogger.DEBUG)) { log.log(POILogger.DEBUG, "add value record row" + row); } - DimensionsRecord d = ( DimensionsRecord ) records.get(getDimsLoc()); + DimensionsRecord d = _dimensions; if (col.getColumn() > d.getLastCol()) { @@ -720,8 +724,8 @@ public final class Sheet implements Model { */ public void removeValueRecord(int row, CellValueRecordInterface col) { - log.logFormatted(POILogger.DEBUG, "remove value record row,dimsloc %,%", - new int[]{row, dimsloc} ); + log.logFormatted(POILogger.DEBUG, "remove value record row %", + new int[]{row } ); _rowsAggregate.removeCell(col); } @@ -766,7 +770,7 @@ public final class Sheet implements Model { checkRows(); if (log.check( POILogger.DEBUG )) log.log(POILogger.DEBUG, "addRow "); - DimensionsRecord d = ( DimensionsRecord ) records.get(getDimsLoc()); + DimensionsRecord d = _dimensions; if (row.getRowNumber() >= d.getLastRow()) { @@ -1330,27 +1334,6 @@ public final class Sheet implements Model { } } - /** - * get the location of the DimensionsRecord (which is the last record before the value section) - * @return location in the array of records of the DimensionsRecord - */ - - public int getDimsLoc() - { - if (log.check( POILogger.DEBUG )) - log.log(POILogger.DEBUG, "getDimsLoc dimsloc= " + dimsloc); - return dimsloc; - } - - /** - * in the event the record is a dimensions record, resets both the loc index and dimsloc index - */ - public void checkDimsLoc(Record rec, int recloc) { - if (rec.getSid() == DimensionsRecord.sid) { - dimsloc = recloc; - } - } - /** * @return the serialized size of this sheet */ diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java b/src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java index e61d7a1cc..3e51fe831 100644 --- a/src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java @@ -1806,10 +1806,11 @@ public class HSSFSheet implements org.apache.poi.ss.usermodel.Sheet } if (width != -1) { + width *= 256; if (width > Short.MAX_VALUE) { //width can be bigger that Short.MAX_VALUE! width = Short.MAX_VALUE; } - sheet.setColumnWidth(column, (short) (width * 256)); + sheet.setColumnWidth(column, (short) (width)); } } diff --git a/src/java/org/apache/poi/poifs/dev/POIFSLister.java b/src/java/org/apache/poi/poifs/dev/POIFSLister.java index c9fa349d6..cdd9902c4 100644 --- a/src/java/org/apache/poi/poifs/dev/POIFSLister.java +++ b/src/java/org/apache/poi/poifs/dev/POIFSLister.java @@ -45,37 +45,54 @@ public class POIFSLister { System.exit(1); } - for (int j = 0; j < args.length; j++) - { - viewFile(args[ j ]); + boolean withSizes = false; + for (int j = 0; j < args.length; j++) { + if(args[j].equalsIgnoreCase("-size") || + args[j].equalsIgnoreCase("-sizes")) { + withSizes = true; + } else { + viewFile(args[j], withSizes); + } } } - public static void viewFile(final String filename) throws IOException + public static void viewFile(final String filename, boolean withSizes) throws IOException { POIFSFileSystem fs = new POIFSFileSystem( new FileInputStream(filename) ); - displayDirectory(fs.getRoot(), ""); + displayDirectory(fs.getRoot(), "", withSizes); } - public static void displayDirectory(DirectoryNode dir, String indent) { + public static void displayDirectory(DirectoryNode dir, String indent, boolean withSizes) { System.out.println(indent + dir.getName() + " -"); String newIndent = indent + " "; + boolean hadChildren = false; for(Iterator it = dir.getEntries(); it.hasNext(); ) { + hadChildren = true; Object entry = it.next(); if(entry instanceof DirectoryNode) { - displayDirectory((DirectoryNode)entry, newIndent); + displayDirectory((DirectoryNode)entry, newIndent, withSizes); } else { DocumentNode doc = (DocumentNode)entry; String name = doc.getName(); + String size = ""; if(name.charAt(0) < 10) { String altname = "(0x0" + (int)name.charAt(0) + ")" + name.substring(1); name = name.substring(1) + " <" + altname + ">"; } - System.out.println(newIndent + name); + if(withSizes) { + size = " [" + + doc.getSize() + " / 0x" + + Integer.toHexString(doc.getSize()) + + "]"; + } + System.out.println(newIndent + name + size); } } + if(!hadChildren) { + System.out.println(newIndent + "(no children)"); + } } } \ No newline at end of file diff --git a/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java new file mode 100644 index 000000000..6c52bbb04 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java @@ -0,0 +1,353 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hpbf.dev; + +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.poi.ddf.DefaultEscherRecordFactory; +import org.apache.poi.ddf.EscherRecord; +import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.DocumentEntry; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.StringUtil; + +/** + * For dumping out the contents of HPBF (Publisher) + * files, while we try to figure out how they're + * constructed. + */ +public class HPBFDumper { + private POIFSFileSystem fs; + public HPBFDumper(POIFSFileSystem fs) { + this.fs = fs; + } + public HPBFDumper(InputStream inp) throws IOException { + this(new POIFSFileSystem(inp)); + } + + private static byte[] getData(DirectoryNode dir, String name) throws IOException { + DocumentEntry docProps = + (DocumentEntry)dir.getEntry(name); + + // Grab the document stream + byte[] d = new byte[docProps.getSize()]; + dir.createDocumentInputStream(name).read(d); + + // All done + return d; + } + + /** + * Dumps out the given number of bytes as hex, + * two chars + */ + private String dumpBytes(byte[] data, int offset, int len) { + StringBuffer ret = new StringBuffer(); + for(int i=0; i"); + System.exit(1); + } + HPBFDumper dump = new HPBFDumper( + new FileInputStream(args[0]) + ); + + System.out.println("Dumping " + args[0]); + dump.dumpContents(); + dump.dumpEnvelope(); + dump.dumpEscher(); + dump.dump001CompObj(dump.fs.getRoot()); + dump.dumpQuill(); + + // Still to go: + // (0x03)Internal + // Objects + } + + /** + * Dump out the escher parts of the file. + * Escher -> EscherStm and EscherDelayStm + */ + public void dumpEscher() throws IOException { + DirectoryNode escherDir = (DirectoryNode) + fs.getRoot().getEntry("Escher"); + + dumpEscherStm(escherDir); + dumpEscherDelayStm(escherDir); + } + private void dumpEscherStream(byte[] data) { + DefaultEscherRecordFactory erf = + new DefaultEscherRecordFactory(); + + // Dump + int left = data.length; + while(left > 0) { + EscherRecord er = erf.createRecord(data, 0); + er.fillFields(data, 0, erf); + left -= er.getRecordSize(); + + System.out.println(er.toString()); + } + } + protected void dumpEscherStm(DirectoryNode escherDir) throws IOException { + byte[] data = getData(escherDir, "EscherStm"); + System.out.println(""); + System.out.println("EscherStm - " + data.length + " bytes long:"); + if(data.length > 0) + dumpEscherStream(data); + } + protected void dumpEscherDelayStm(DirectoryNode escherDir) throws IOException { + byte[] data = getData(escherDir, "EscherDelayStm"); + System.out.println(""); + System.out.println("EscherDelayStm - " + data.length + " bytes long:"); + if(data.length > 0) + dumpEscherStream(data); + } + + public void dumpEnvelope() throws IOException { + byte[] data = getData(fs.getRoot(), "Envelope"); + + System.out.println(""); + System.out.println("Envelope - " + data.length + " bytes long:"); + } + + public void dumpContents() throws IOException { + byte[] data = getData(fs.getRoot(), "Contents"); + + System.out.println(""); + System.out.println("Contents - " + data.length + " bytes long:"); + + // 8 bytes, always seems to be + // E8 AC 2C 00 E8 03 05 01 + // E8 AC 2C 00 E8 03 05 01 + + // 4 bytes - size of contents + // 13/15 00 00 01 + + // .... + + // E8 03 08 08 0C 20 03 00 00 00 00 88 16 00 00 00 ..... .......... + + // 01 18 27 00 03 20 00 00 E8 03 08 08 0C 20 03 00 ..'.. ....... .. + + // 01 18 30 00 03 20 00 00 + // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00 + // 00 00 00 88 1E 00 00 00 + + // 01 18 31 00 03 20 00 00 + // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00 + // 00 00 00 88 1E 00 00 00 + + // 01 18 32 00 03 20 00 00 + // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00 + // 00 00 00 88 1E 00 00 00 + } + + public void dumpCONTENTSraw(DirectoryNode dir) throws IOException { + byte[] data = getData(dir, "CONTENTS"); + + System.out.println(""); + System.out.println("CONTENTS - " + data.length + " bytes long:"); + + // Between the start and 0x200 we have + // CHNKINK(space) + 24 bytes + // 0x1800 + // TEXT + 6 bytes + // TEXT + 8 bytes + // 0x1800 + // STSH + 6 bytes + // STSH + 8 bytes + // 0x1800 + // STSH + 6 bytes + // STSH + 8 bytes + // but towards 0x200 the pattern may + // break down a little bit + + // After the second of a given type, + // it seems to be 4 bytes giving the start, + // then 4 bytes giving the length, then + // 18 00 + System.out.println( + new String(data, 0, 8) + + dumpBytes(data, 8, 0x20-8) + ); + + int pos = 0x20; + boolean sixNotEight = true; + while(pos < 0x200) { + if(sixNotEight) { + System.out.println( + dumpBytes(data, pos, 2) + ); + pos += 2; + } + String text = new String(data, pos, 4); + int blen = 8; + if(sixNotEight) + blen = 6; + System.out.println( + text + " " + dumpBytes(data, pos+4, blen) + ); + + pos += 4 + blen; + sixNotEight = ! sixNotEight; + } + + // Text from 0x200 onwards until we get + // to \r(00)\n(00)(00)(00) + int textStop = -1; + for(int i=0x200; i 0) { + int len = (textStop - 0x200) / 2; + System.out.println(""); + System.out.println( + StringUtil.getFromUnicodeLE(data, 0x200, len) + ); + } + + // The font list comes slightly later + + // The hyperlinks may come before the fonts, + // or slightly in front + } + public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException { + byte[] data = getData(dir, "CONTENTS"); + + System.out.println(""); + System.out.println("CONTENTS - " + data.length + " bytes long:"); + + String[] startType = new String[20]; + String[] endType = new String[20]; + int[] optA = new int[20]; + int[] optB = new int[20]; + int[] optC = new int[20]; + int[] from = new int[20]; + int[] len = new int[20]; + + for(int i=0; i<20; i++) { + int offset = 0x20 + i*24; + if(data[offset] == 0x18 && data[offset+1] == 0x00) { + // Has data + startType[i] = new String(data, offset+2, 4); + optA[i] = LittleEndian.getUShort(data, offset+6); + optB[i] = LittleEndian.getUShort(data, offset+8); + optC[i] = LittleEndian.getUShort(data, offset+10); + endType[i] = new String(data, offset+12, 4); + from[i] = (int)LittleEndian.getUInt(data, offset+16); + len[i] = (int)LittleEndian.getUInt(data, offset+20); + } else { + // Doesn't have data + } + } + + String text = StringUtil.getFromUnicodeLE( + data, from[0], len[0]/2 + ); + + // Dump + for(int i=0; i<20; i++) { + String num = Integer.toString(i); + if(i < 10) { + num = "0" + i; + } + System.out.print(num + " "); + + if(startType[i] == null) { + System.out.println("(not present)"); + } else { + System.out.println( + "\t" + + startType[i] + " " + + optA[i] + " " + + optB[i] + " " + + optC[i] + ); + System.out.println( + "\t" + + endType[i] + " " + + "from: " + + Integer.toHexString(from[i]) + + " (" + from[i] + ")" + + ", len: " + + Integer.toHexString(len[i]) + + " (" + len[i] + ")" + ); + } + } + + // Text + System.out.println(""); + System.out.println("TEXT:"); + System.out.println(text); + System.out.println(""); + + // All the others + for(int i=0; i<20; i++) { + if(startType[i] == null) { + continue; + } + int start = from[i]; + + System.out.println( + startType[i] + " -> " + endType[i] + + " @ " + Integer.toHexString(start) + + " (" + start + ")" + ); + System.out.println("\t" + dumpBytes(data, start, 4)); + System.out.println("\t" + dumpBytes(data, start+4, 4)); + System.out.println("\t" + dumpBytes(data, start+8, 4)); + System.out.println("\t(etc)"); + } + } + + protected void dump001CompObj(DirectoryNode dir) { + // TODO + } + + public void dumpQuill() throws IOException { + DirectoryNode quillDir = (DirectoryNode) + fs.getRoot().getEntry("Quill"); + DirectoryNode quillSubDir = (DirectoryNode) + quillDir.getEntry("QuillSub"); + + dump001CompObj(quillSubDir); + dumpCONTENTSraw(quillSubDir); + dumpCONTENTSguessed(quillSubDir); + } +} diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.pub new file mode 100755 index 000000000..b87016831 Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.pub differ diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.txt new file mode 100644 index 000000000..279395e5d --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.txt @@ -0,0 +1,29 @@ +This is some text on the first page +It’s in times new roman, font size 10, all normal + + +This is in bold and italic +It’s Arial, 20 point font +It’s in the second textbox on the first page + + +This is the second page + +It is also times new roman, 10 point + + +Table on page 2 Top right +P2 table left P2 table right +Bottom Left Bottom Right + + +This text is on page two +This is a link to Apache POI +More normal text +Link to a file + + +More text, more hyperlinks +email link +Final hyperlink +Within doc to page 1 diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub new file mode 100755 index 000000000..610362c47 Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub differ diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.txt new file mode 100644 index 000000000..f8a68bb64 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.txt @@ -0,0 +1,34 @@ +This is some text on the first page +It’s in times new roman, font size 10, all normal + +We’ve added some more text in here, to push all the offsets about a bit. + + + +This is in bold and italic +It’s Arial, 20 point font +It’s in the second textbox on the first page + +Ditto with more text in here. + + +This is the second page + +It is also times new roman, 10 point + + +Table on page 2 Top right +P2 table left P2 table right +Bottom Left Bottom Right + + +This text is on page two +This is a link to Apache POI +More normal text +Link to a file + + +More text, more hyperlinks +email link +Final hyperlink +Within doc to page 1 diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2000.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2000.pub new file mode 100755 index 000000000..2c6174e5e Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2000.pub differ diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub new file mode 100755 index 000000000..4f19bec93 Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub differ diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.txt new file mode 100644 index 000000000..c2d791b9a --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.txt @@ -0,0 +1,29 @@ +This is some text on the first page +It’s in times new roman, font size 10, all normal + + +This is in bold and italic +It’s Arial, 20 point font +It’s in the second textbox on the first page + + +This is the second page12345678 + +It is also times new roman, 10 point + + +Table on page 2 Top right +P2 table left P2 table right +Bottom Left Bottom Right + + +This text is on page two +This is a link to Apache POI +More normal text +Link to a file + + +More text, more hyperlinks +email link +Final hyperlink +Within doc to page 1 diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub new file mode 100755 index 000000000..445df85f0 Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub differ diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.txt new file mode 100644 index 000000000..279395e5d --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.txt @@ -0,0 +1,29 @@ +This is some text on the first page +It’s in times new roman, font size 10, all normal + + +This is in bold and italic +It’s Arial, 20 point font +It’s in the second textbox on the first page + + +This is the second page + +It is also times new roman, 10 point + + +Table on page 2 Top right +P2 table left P2 table right +Bottom Left Bottom Right + + +This text is on page two +This is a link to Apache POI +More normal text +Link to a file + + +More text, more hyperlinks +email link +Final hyperlink +Within doc to page 1 diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample98.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample98.pub new file mode 100755 index 000000000..8adffda77 Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample98.pub differ diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleBrochure.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleBrochure.pub new file mode 100644 index 000000000..00deec14d Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleBrochure.pub differ diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleNewsletter.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleNewsletter.pub new file mode 100644 index 000000000..94900925a Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleNewsletter.pub differ diff --git a/src/testcases/org/apache/poi/hssf/model/TestSheet.java b/src/testcases/org/apache/poi/hssf/model/TestSheet.java index 0b245c0a7..6c6dd9fb2 100644 --- a/src/testcases/org/apache/poi/hssf/model/TestSheet.java +++ b/src/testcases/org/apache/poi/hssf/model/TestSheet.java @@ -32,6 +32,7 @@ import org.apache.poi.hssf.record.CellValueRecordInterface; import org.apache.poi.hssf.record.ColumnInfoRecord; import org.apache.poi.hssf.record.DimensionsRecord; import org.apache.poi.hssf.record.EOFRecord; +import org.apache.poi.hssf.record.GutsRecord; import org.apache.poi.hssf.record.IndexRecord; import org.apache.poi.hssf.record.MergeCellsRecord; import org.apache.poi.hssf.record.Record; @@ -41,6 +42,8 @@ import org.apache.poi.hssf.record.UncalcedRecord; import org.apache.poi.hssf.record.aggregates.ColumnInfoRecordsAggregate; import org.apache.poi.hssf.record.aggregates.PageSettingsBlock; import org.apache.poi.hssf.record.aggregates.RowRecordsAggregate; +import org.apache.poi.hssf.usermodel.HSSFSheet; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hssf.util.CellRangeAddress; /** @@ -438,8 +441,8 @@ public final class TestSheet extends TestCase { if (false) { // make sure that RRA and VRA are in the right place // (Aug 2008) since the VRA is now part of the RRA, there is much less chance that - // they could get out of order. Still, one could write serialize the sheet here, - // and read back with EventRecordFactory to make sure... + // they could get out of order. Still, one could write serialize the sheet here, + // and read back with EventRecordFactory to make sure... } assertEquals(242, dbCellRecordPos); } @@ -475,5 +478,29 @@ public final class TestSheet extends TestCase { return _indexRecord; } } + + /** + * Checks for bug introduced around r682282-r683880 that caused a second GUTS records + * which in turn got the dimensions record out of alignment + */ + public void testGutsRecord_bug45640() { + + Sheet sheet = Sheet.createSheet(); + sheet.addRow(new RowRecord(0)); + sheet.addRow(new RowRecord(1)); + sheet.groupRowRange( 0, 1, true ); + sheet.toString(); + List recs = sheet.getRecords(); + int count=0; + for(int i=0; i< recs.size(); i++) { + if (recs.get(i) instanceof GutsRecord) { + count++; + } + } + if (count == 2) { + throw new AssertionFailedError("Identified bug 45640"); + } + assertEquals(1, count); + } }