From eab2286d49d70c6234ccd05880c80871b30559a1 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Sun, 17 Aug 2008 17:39:10 +0000 Subject: [PATCH] More work understanding hpbf git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@686624 13f79535-47bb-0310-9956-ffa450edef68 --- .../content/xdocs/hpbf/file-format.xml | 75 ++++++++++++++++++ .../content/xdocs/hpbf/index.xml | 53 +++++++++++++ .../org/apache/poi/hpbf/dev/HPBFDumper.java | 78 ++++++++++++++++++- 3 files changed, 204 insertions(+), 2 deletions(-) create mode 100644 src/documentation/content/xdocs/hpbf/file-format.xml create mode 100755 src/documentation/content/xdocs/hpbf/index.xml diff --git a/src/documentation/content/xdocs/hpbf/file-format.xml b/src/documentation/content/xdocs/hpbf/file-format.xml new file mode 100644 index 000000000..e130f7ba1 --- /dev/null +++ b/src/documentation/content/xdocs/hpbf/file-format.xml @@ -0,0 +1,75 @@ + + + + + +
+ POI-HPBF - A Guide to the Publisher File Format + Overview + + + +
+ + +
Document Streams +

+ The file is made up of a number of POIFS streams. A typical + file will be made up as follows: +

+ +Root Entry - + Objects - + (no children) + SummaryInformation <(0x05)SummaryInformation> + DocumentSummaryInformation <(0x05)DocumentSummaryInformation> + Escher - + EscherStm + EscherDelayStm + Quill - + QuillSub - + CONTENTS + CompObj <(0x01)CompObj> + Envelope + Contents + Internal <(0x03)Internal> + CompObj <(0x01)CompObj> + VBA - + (no children) + +
+
Changing Text +

If you make a change to the text of a file, but not change + how much text there is, then the CONTENTS stream + will undergo a small change, and the Contents stream + will undergo a large change.

+

If you make a change to the text of a file, and change the + amount of text there is, then both the Contents and + the CONTENTS streams change.

+
+
Changing Shapes +

If you alter the size of a textbox, but make no text changes, + then both Contents and CONTENTS streams + change. There are no changes to the Escher streams.

+

If you set the background colour of a textbox, but make + no changes to the text, +

+ +
diff --git a/src/documentation/content/xdocs/hpbf/index.xml b/src/documentation/content/xdocs/hpbf/index.xml new file mode 100755 index 000000000..2601a4174 --- /dev/null +++ b/src/documentation/content/xdocs/hpbf/index.xml @@ -0,0 +1,53 @@ + + + + + +
+ POI-HPBF - Java API To Access Microsoft Publisher Format Files + Overview + + + +
+ + +
+ Overview + +

HPBF is the POI Project's pure Java implementation of the Visio file format.

+

Currently, HPBF is in the experimental stage, while we try + to figure out the file format. Our initial aim is to provide + a text extractor for the format, with low level code following + after that if demand and developer interest warrant it.

+

At this time, there is no usermodel api or similar.

+

Our current understanding of the file format is documented + here.

+ + This code currently lives the + scratchpad area + of the POI SVN repository. + Ensure that you have the scratchpad jar or the scratchpad + build area in your + classpath before experimenting with this code. + +
+ +
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java index 7f1dade14..47ee17ece 100644 --- a/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java +++ b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java @@ -25,6 +25,8 @@ import org.apache.poi.ddf.EscherRecord; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DocumentEntry; import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.StringUtil; /** * For dumping out the contents of HPBF (Publisher) @@ -52,6 +54,26 @@ public class HPBFDumper { return d; } + /** + * Dumps out the given number of bytes as hex, + * two chars + */ + private String dumpBytes(byte[] data, int offset, int len) { + StringBuffer ret = new StringBuffer(); + for(int i=0; i 0) { + int len = (textStop - 0x200) / 2; + System.out.println(""); + System.out.println( + StringUtil.getFromUnicodeLE(data, 0x200, len) + ); + } + + // The font list comes slightly later + + // The hyperlinks may come before the fonts, + // or slightly in front } protected void dump001CompObj(DirectoryNode dir) {