Further HPBF documentation, and some more sample files used
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@686640 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d35d590c07
commit
b185a26d00
@ -41,6 +41,7 @@
|
||||
<menu-item label="HSLF" href="hslf/index.html"/>
|
||||
<menu-item label="HSMF" href="hsmf/index.html"/>
|
||||
<menu-item label="HDGF" href="hdgf/index.html"/>
|
||||
<menu-item label="HPBF" href="hpbf/index.html"/>
|
||||
<menu-item label="POI-Ruby" href="poi-ruby.html"/>
|
||||
<menu-item label="POI-Utils" href="utils/index.html"/>
|
||||
<menu-item label="Text Extraction" href="text-extraction.html"/>
|
||||
|
@ -38,19 +38,19 @@
|
||||
Root Entry -
|
||||
Objects -
|
||||
(no children)
|
||||
SummaryInformation <(0x05)SummaryInformation>
|
||||
DocumentSummaryInformation <(0x05)DocumentSummaryInformation>
|
||||
SummaryInformation <(0x05)SummaryInformation>
|
||||
DocumentSummaryInformation <(0x05)DocumentSummaryInformation>
|
||||
Escher -
|
||||
EscherStm
|
||||
EscherDelayStm
|
||||
Quill -
|
||||
QuillSub -
|
||||
CONTENTS
|
||||
CompObj <(0x01)CompObj>
|
||||
CompObj <(0x01)CompObj>
|
||||
Envelope
|
||||
Contents
|
||||
Internal <(0x03)Internal>
|
||||
CompObj <(0x01)CompObj>
|
||||
Internal <(0x03)Internal>
|
||||
CompObj <(0x01)CompObj>
|
||||
VBA -
|
||||
(no children)
|
||||
</source>
|
||||
@ -69,7 +69,7 @@ Root Entry -
|
||||
then both <em>Contents</em> and <em>CONTENTS</em> streams
|
||||
change. There are no changes to the Escher streams.</p>
|
||||
<p>If you set the background colour of a textbox, but make
|
||||
no changes to the text,
|
||||
no changes to the text, (to finish off)</p>
|
||||
</section>
|
||||
<section><title>Structure of CONTENTS</title>
|
||||
<p>First we have "CHNKINK ", followed by 24 bytes.</p>
|
||||
@ -162,6 +162,8 @@ PL 62 1a 00 00 48 00 00 00 // PL from: 1a62 (6754), len: 48 (72)
|
||||
00 00 00 00 00 00
|
||||
00 00 00 00 00 00 00 00
|
||||
00 00 00 00 00 00 00 00
|
||||
|
||||
(the text will then start)
|
||||
</source>
|
||||
</section>
|
||||
</body>
|
||||
|
@ -39,7 +39,7 @@
|
||||
after that if demand and developer interest warrant it.</p>
|
||||
<p>At this time, there is no <em>usermodel</em> api or similar.</p>
|
||||
<p>Our current understanding of the file format is documented
|
||||
<link href="file-format.html">here</a>.</p>
|
||||
<link href="file-format.html">here</link>.</p>
|
||||
<note>
|
||||
This code currently lives the
|
||||
<link href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">scratchpad area</link>
|
||||
|
@ -146,6 +146,16 @@
|
||||
href="./hslf/index.html">the HSLF project page for more
|
||||
information</link>.</p>
|
||||
</section>
|
||||
<section><title>HPSF for Document Properties</title>
|
||||
<p>HPSF is our port of the OLE 2 property set format to pure
|
||||
Java. Property sets are mostly use to store a document's properties
|
||||
(title, author, date of last modification etc.), but they can be used
|
||||
for application-specific purposes as well.</p>
|
||||
|
||||
<p>HPSF supports both reading and writing of properties.</p>
|
||||
<p>Please see <link href="./hpsf/index.html">the HPSF project
|
||||
page</link> for more information.</p>
|
||||
</section>
|
||||
<section><title>HDGF for Visio Documents</title>
|
||||
<p>HDGF is our port of the Microsoft Viso 97(-2003) file format to pure
|
||||
Java. It currently only supports reading at a very low level, and
|
||||
@ -153,19 +163,13 @@
|
||||
href="./hdgf/index.html">the HDGF project page for more
|
||||
information</link>.</p>
|
||||
</section>
|
||||
<section><title>HPSF for Document Properties</title>
|
||||
<p>HPSF is our port of the OLE 2 property set format to pure
|
||||
Java. Property sets are mostly use to store a document's properties
|
||||
(title, author, date of last modification etc.), but they can be used
|
||||
for application-specific purposes as well.</p>
|
||||
|
||||
<p>HPSF supports reading and writing of properties. However, you will
|
||||
need to be using version 3.0 of POI to utilise the write support.</p>
|
||||
|
||||
<p>Please see <link href="./hpsf/index.html">the HPSF project
|
||||
page</link> for more information.</p>
|
||||
<section><title>HPBF for Publisher Documents</title>
|
||||
<p>HPBF is our port of the Microsoft Publisher 98(-2007) file format to pure
|
||||
Java. At the moment, we are still figuring out the file format, but we hope
|
||||
to have simple text extraction shortly. Please see <link
|
||||
href="./hpbf/index.html">the HPBF project page for more
|
||||
information</link>.</p>
|
||||
</section>
|
||||
|
||||
</section>
|
||||
|
||||
<section><title>Contributing </title>
|
||||
|
@ -310,8 +310,30 @@ public class HPBFDumper {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Text
|
||||
System.out.println("");
|
||||
System.out.println("TEXT:");
|
||||
System.out.println(text);
|
||||
System.out.println("");
|
||||
|
||||
// All the others
|
||||
for(int i=0; i<20; i++) {
|
||||
if(startType[i] == null) {
|
||||
continue;
|
||||
}
|
||||
int start = from[i];
|
||||
|
||||
System.out.println(
|
||||
startType[i] + " -> " + endType[i] +
|
||||
" @ " + Integer.toHexString(start) +
|
||||
" (" + start + ")"
|
||||
);
|
||||
System.out.println("\t" + dumpBytes(data, start, 4));
|
||||
System.out.println("\t" + dumpBytes(data, start+4, 4));
|
||||
System.out.println("\t" + dumpBytes(data, start+8, 4));
|
||||
System.out.println("\t(etc)");
|
||||
}
|
||||
}
|
||||
|
||||
protected void dump001CompObj(DirectoryNode dir) {
|
||||
|
BIN
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub
Executable file
BIN
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub
Executable file
Binary file not shown.
@ -0,0 +1,34 @@
|
||||
This is some text on the first page
|
||||
It’s in times new roman, font size 10, all normal
|
||||
|
||||
We’ve added some more text in here, to push all the offsets about a bit.
|
||||
|
||||
|
||||
|
||||
This is in bold and italic
|
||||
It’s Arial, 20 point font
|
||||
It’s in the second textbox on the first page
|
||||
|
||||
Ditto with more text in here.
|
||||
|
||||
|
||||
This is the second page
|
||||
|
||||
It is also times new roman, 10 point
|
||||
|
||||
|
||||
Table on page 2 Top right
|
||||
P2 table left P2 table right
|
||||
Bottom Left Bottom Right
|
||||
|
||||
|
||||
This text is on page two
|
||||
This is a link to Apache POI
|
||||
More normal text
|
||||
Link to a file
|
||||
|
||||
|
||||
More text, more hyperlinks
|
||||
email link
|
||||
Final hyperlink
|
||||
Within doc to page 1
|
BIN
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub
Executable file
BIN
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub
Executable file
Binary file not shown.
@ -0,0 +1,29 @@
|
||||
This is some text on the first page
|
||||
It’s in times new roman, font size 10, all normal
|
||||
|
||||
|
||||
This is in bold and italic
|
||||
It’s Arial, 20 point font
|
||||
It’s in the second textbox on the first page
|
||||
|
||||
|
||||
This is the second page12345678
|
||||
|
||||
It is also times new roman, 10 point
|
||||
|
||||
|
||||
Table on page 2 Top right
|
||||
P2 table left P2 table right
|
||||
Bottom Left Bottom Right
|
||||
|
||||
|
||||
This text is on page two
|
||||
This is a link to Apache POI
|
||||
More normal text
|
||||
Link to a file
|
||||
|
||||
|
||||
More text, more hyperlinks
|
||||
email link
|
||||
Final hyperlink
|
||||
Within doc to page 1
|
BIN
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub
Executable file
BIN
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub
Executable file
Binary file not shown.
@ -0,0 +1,29 @@
|
||||
This is some text on the first page
|
||||
It’s in times new roman, font size 10, all normal
|
||||
|
||||
|
||||
This is in bold and italic
|
||||
It’s Arial, 20 point font
|
||||
It’s in the second textbox on the first page
|
||||
|
||||
|
||||
This is the second page
|
||||
|
||||
It is also times new roman, 10 point
|
||||
|
||||
|
||||
Table on page 2 Top right
|
||||
P2 table left P2 table right
|
||||
Bottom Left Bottom Right
|
||||
|
||||
|
||||
This text is on page two
|
||||
This is a link to Apache POI
|
||||
More normal text
|
||||
Link to a file
|
||||
|
||||
|
||||
More text, more hyperlinks
|
||||
email link
|
||||
Final hyperlink
|
||||
Within doc to page 1
|
Loading…
Reference in New Issue
Block a user