add autonumbered endnotes and footnotes support in Word-to-HTML and Word-to-FO converters

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1148974 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-20 23:24:16 +00:00
parent 583802a735
commit d3b9fa47e6
9 changed files with 361 additions and 115 deletions

View File

@ -35,6 +35,7 @@ import org.apache.poi.hwpf.model.ListFormatOverride;
import org.apache.poi.hwpf.model.ListTables;
import org.apache.poi.hwpf.usermodel.Bookmark;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Notes;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
@ -58,6 +59,8 @@ public abstract class AbstractWordConverter
private static final POILogger logger = POILogFactory
.getLogger( AbstractWordConverter.class );
private static final byte SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE = 2;
private final Set<Bookmark> bookmarkStack = new LinkedHashSet<Bookmark>();
private FontReplacer fontReplacer = new DefaultFontReplacer();
@ -139,6 +142,17 @@ public abstract class AbstractWordConverter
if ( text.getBytes().length == 0 )
continue;
if ( characterRun.isSpecialCharacter() )
{
if ( text.charAt( 0 ) == SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE
&& ( document instanceof HWPFDocument ) )
{
HWPFDocument doc = (HWPFDocument) document;
processNoteAnchor( doc, characterRun, block );
continue;
}
}
if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
{
if ( document instanceof HWPFDocument )
@ -271,15 +285,23 @@ public abstract class AbstractWordConverter
processDocumentInformation( summaryInformation );
}
final Range range = wordDocument.getRange();
processDocumentPart( wordDocument, wordDocument.getRange() );
}
protected abstract void processDocumentInformation(
SummaryInformation summaryInformation );
protected void processDocumentPart( HWPFDocumentCore wordDocument,
final Range range )
{
for ( int s = 0; s < range.numSections(); s++ )
{
processSection( wordDocument, range.getSection( s ), s );
}
}
protected abstract void processDocumentInformation(
SummaryInformation summaryInformation );
protected abstract void processEndnoteAutonumbered( HWPFDocument doc,
int noteIndex, Element block, Range endnoteTextRange );
protected void processField( HWPFDocument hwpfDocument, Range parentRange,
int currentTableLevel, Field field, Element currentBlock )
@ -353,6 +375,9 @@ public abstract class AbstractWordConverter
return field;
}
protected abstract void processFootnoteAutonumbered( HWPFDocument doc,
int noteIndex, Element block, Range footnoteTextRange );
protected abstract void processHyperlink( HWPFDocumentCore wordDocument,
Element currentBlock, Range textRange, int currentTableLevel,
String hyperlink );
@ -363,6 +388,56 @@ public abstract class AbstractWordConverter
protected abstract void processLineBreak( Element block,
CharacterRun characterRun );
protected void processNoteAnchor( HWPFDocument doc,
CharacterRun characterRun, final Element block )
{
{
Notes footnotes = doc.getFootnotes();
int noteIndex = footnotes
.getNoteIndexByAnchorPosition( characterRun
.getStartOffset() );
if ( noteIndex != -1 )
{
Range footnoteRange = doc.getFootnoteRange();
int rangeStartOffset = footnoteRange.getStartOffset();
int noteTextStartOffset = footnotes
.getNoteTextStartOffset( noteIndex );
int noteTextEndOffset = footnotes
.getNoteTextEndOffset( noteIndex );
Range noteTextRange = new Range( rangeStartOffset
+ noteTextStartOffset, rangeStartOffset
+ noteTextEndOffset, doc );
processFootnoteAutonumbered( doc, noteIndex, block,
noteTextRange );
return;
}
}
{
Notes endnotes = doc.getEndnotes();
int noteIndex = endnotes.getNoteIndexByAnchorPosition( characterRun
.getStartOffset() );
if ( noteIndex != -1 )
{
Range endnoteRange = doc.getEndnoteRange();
int rangeStartOffset = endnoteRange.getStartOffset();
int noteTextStartOffset = endnotes
.getNoteTextStartOffset( noteIndex );
int noteTextEndOffset = endnotes
.getNoteTextEndOffset( noteIndex );
Range noteTextRange = new Range( rangeStartOffset
+ noteTextStartOffset, rangeStartOffset
+ noteTextEndOffset, doc );
processEndnoteAutonumbered( doc, noteIndex, block,
noteTextRange );
return;
}
}
}
protected abstract void processPageref( HWPFDocumentCore wordDocument,
Element currentBlock, Range textRange, int currentTableLevel,
String pageref );
@ -371,6 +446,76 @@ public abstract class AbstractWordConverter
Element parentFopElement, int currentTableLevel,
Paragraph paragraph, String bulletText );
protected void processParagraphes( HWPFDocumentCore wordDocument,
Element flow, Range range, int currentTableLevel )
{
final ListTables listTables = wordDocument.getListTables();
int currentListInfo = 0;
final int paragraphs = range.numParagraphs();
for ( int p = 0; p < paragraphs; p++ )
{
Paragraph paragraph = range.getParagraph( p );
if ( paragraph.isInTable()
&& paragraph.getTableLevel() != currentTableLevel )
{
if ( paragraph.getTableLevel() < currentTableLevel )
throw new IllegalStateException(
"Trying to process table cell with higher level ("
+ paragraph.getTableLevel()
+ ") than current table level ("
+ currentTableLevel
+ ") as inner table part" );
Table table = range.getTable( paragraph );
processTable( wordDocument, flow, table );
p += table.numParagraphs();
p--;
continue;
}
if ( paragraph.getIlfo() != currentListInfo )
{
currentListInfo = paragraph.getIlfo();
}
if ( currentListInfo != 0 )
{
if ( listTables != null )
{
final ListFormatOverride listFormatOverride = listTables
.getOverride( paragraph.getIlfo() );
String label = AbstractWordUtils.getBulletText( listTables,
paragraph, listFormatOverride.getLsid() );
processParagraph( wordDocument, flow, currentTableLevel,
paragraph, label );
}
else
{
logger.log( POILogger.WARN,
"Paragraph #" + paragraph.getStartOffset() + "-"
+ paragraph.getEndOffset()
+ " has reference to list structure #"
+ currentListInfo
+ ", but listTables not defined in file" );
processParagraph( wordDocument, flow, currentTableLevel,
paragraph, AbstractWordUtils.EMPTY );
}
}
else
{
processParagraph( wordDocument, flow, currentTableLevel,
paragraph, AbstractWordUtils.EMPTY );
}
}
}
private boolean processRangeBookmarks( HWPFDocumentCore document,
int currentTableLevel, Range range, final Element block,
Map<Integer, List<Bookmark>> rangeBookmakrs )
@ -497,76 +642,6 @@ public abstract class AbstractWordConverter
protected abstract void processSection( HWPFDocumentCore wordDocument,
Section section, int s );
protected void processSectionParagraphes( HWPFDocumentCore wordDocument,
Element flow, Range range, int currentTableLevel )
{
final ListTables listTables = wordDocument.getListTables();
int currentListInfo = 0;
final int paragraphs = range.numParagraphs();
for ( int p = 0; p < paragraphs; p++ )
{
Paragraph paragraph = range.getParagraph( p );
if ( paragraph.isInTable()
&& paragraph.getTableLevel() != currentTableLevel )
{
if ( paragraph.getTableLevel() < currentTableLevel )
throw new IllegalStateException(
"Trying to process table cell with higher level ("
+ paragraph.getTableLevel()
+ ") than current table level ("
+ currentTableLevel
+ ") as inner table part" );
Table table = range.getTable( paragraph );
processTable( wordDocument, flow, table );
p += table.numParagraphs();
p--;
continue;
}
if ( paragraph.getIlfo() != currentListInfo )
{
currentListInfo = paragraph.getIlfo();
}
if ( currentListInfo != 0 )
{
if ( listTables != null )
{
final ListFormatOverride listFormatOverride = listTables
.getOverride( paragraph.getIlfo() );
String label = AbstractWordUtils.getBulletText( listTables,
paragraph, listFormatOverride.getLsid() );
processParagraph( wordDocument, flow, currentTableLevel,
paragraph, label );
}
else
{
logger.log( POILogger.WARN,
"Paragraph #" + paragraph.getStartOffset() + "-"
+ paragraph.getEndOffset()
+ " has reference to list structure #"
+ currentListInfo
+ ", but listTables not defined in file" );
processParagraph( wordDocument, flow, currentTableLevel,
paragraph, AbstractWordUtils.EMPTY );
}
}
else
{
processParagraph( wordDocument, flow, currentTableLevel,
paragraph, AbstractWordUtils.EMPTY );
}
}
}
protected void processSingleSection( HWPFDocumentCore wordDocument,
Section section )
{

View File

@ -137,6 +137,11 @@ public class FoDocumentFacade
return result;
}
public Element createFootnoteBody()
{
return document.createElementNS( NS_XSLFO, "fo:footnote-body" );
}
public Element createInline()
{
return document.createElementNS( NS_XSLFO, "fo:inline" );

View File

@ -97,6 +97,45 @@ public class WordToFoConverter extends AbstractWordConverter
}
}
@Override
protected void processEndnoteAutonumbered( HWPFDocument doc, int noteIndex,
Element block, Range endnoteTextRange )
{
// TODO: add endnote implementation?
processFootnoteAutonumbered( doc, noteIndex, block, endnoteTextRange );
}
@Override
protected void processFootnoteAutonumbered( HWPFDocument doc,
int noteIndex, Element block, Range footnoteTextRange )
{
String textIndex = String.valueOf( noteIndex + 1 );
{
Element inline = foDocumentFacade.createInline();
inline.setTextContent( textIndex );
inline.setAttribute( "baseline-shift", "super" );
inline.setAttribute( "font-size", "smaller" );
block.appendChild( inline );
}
Element footnoteBody = foDocumentFacade.createFootnoteBody();
Element footnoteBlock = foDocumentFacade.createBlock();
footnoteBody.appendChild( footnoteBlock );
block.appendChild( footnoteBody );
{
Element inline = foDocumentFacade.createInline();
inline.setTextContent( textIndex );
inline.setAttribute( "baseline-shift", "super" );
inline.setAttribute( "font-size", "smaller" );
footnoteBlock.appendChild( inline );
}
processCharacters( doc, Integer.MIN_VALUE, footnoteTextRange,
footnoteBlock );
}
static Document process( File docFile ) throws Exception
{
final HWPFDocumentCore hwpfDocument = WordToFoUtils.loadDoc( docFile );
@ -381,8 +420,7 @@ public class WordToFoConverter extends AbstractWordConverter
Element flow = foDocumentFacade.addFlowToPageSequence( pageSequence,
"xsl-region-body" );
processSectionParagraphes( wordDocument, flow, section,
Integer.MIN_VALUE );
processParagraphes( wordDocument, flow, section, Integer.MIN_VALUE );
}
protected void processTable( HWPFDocumentCore wordDocument, Element flow,
@ -465,8 +503,8 @@ public class WordToFoConverter extends AbstractWordConverter
+ count );
}
processSectionParagraphes( wordDocument, tableCellElement,
tableCell, table.getTableLevel() );
processParagraphes( wordDocument, tableCellElement, tableCell,
table.getTableLevel() );
if ( !tableCellElement.hasChildNodes() )
{

View File

@ -148,6 +148,8 @@ public class WordToHtmlConverter extends AbstractWordConverter
private final HtmlDocumentFacade htmlDocumentFacade;
private Element notes = null;
/**
* Creates new instance of {@link WordToHtmlConverter}. Can be used for
* output several {@link HWPFDocument}s into single HTML document.
@ -205,6 +207,33 @@ public class WordToHtmlConverter extends AbstractWordConverter
span.appendChild( textNode );
}
@Override
protected void processBookmarks( HWPFDocumentCore wordDocument,
Element currentBlock, Range range, int currentTableLevel,
List<Bookmark> rangeBookmarks )
{
Element parent = currentBlock;
for ( Bookmark bookmark : rangeBookmarks )
{
Element bookmarkElement = htmlDocumentFacade
.createBookmark( bookmark.getName() );
parent.appendChild( bookmarkElement );
parent = bookmarkElement;
}
if ( range != null )
processCharacters( wordDocument, currentTableLevel, range, parent );
}
@Override
public void processDocument( HWPFDocumentCore wordDocument )
{
super.processDocument( wordDocument );
if ( notes != null )
htmlDocumentFacade.getBody().appendChild( notes );
}
@Override
protected void processDocumentInformation(
SummaryInformation summaryInformation )
@ -223,6 +252,21 @@ public class WordToHtmlConverter extends AbstractWordConverter
.addDescription( summaryInformation.getComments() );
}
@Override
protected void processEndnoteAutonumbered( HWPFDocument doc, int noteIndex,
Element block, Range endnoteTextRange )
{
processNoteAutonumbered( doc, "end", noteIndex, block, endnoteTextRange );
}
@Override
protected void processFootnoteAutonumbered( HWPFDocument doc,
int noteIndex, Element block, Range footnoteTextRange )
{
processNoteAutonumbered( doc, "foot", noteIndex, block,
footnoteTextRange );
}
@Override
protected void processHyperlink( HWPFDocumentCore wordDocument,
Element currentBlock, Range textRange, int currentTableLevel,
@ -236,24 +280,6 @@ public class WordToHtmlConverter extends AbstractWordConverter
basicLink );
}
@Override
protected void processBookmarks( HWPFDocumentCore wordDocument,
Element currentBlock, Range range, int currentTableLevel,
List<Bookmark> rangeBookmarks )
{
Element parent = currentBlock;
for ( Bookmark bookmark : rangeBookmarks )
{
Element bookmarkElement = htmlDocumentFacade
.createBookmark( bookmark.getName() );
parent.appendChild( bookmarkElement );
parent = bookmarkElement;
}
if ( range != null )
processCharacters( wordDocument, currentTableLevel, range, parent );
}
/**
* This method shall store image bytes in external file and convert it if
* necessary. Images shall be stored using PNG format. Other formats may be
@ -377,6 +403,43 @@ public class WordToHtmlConverter extends AbstractWordConverter
block.appendChild( htmlDocumentFacade.createLineBreak() );
}
protected void processNoteAutonumbered( HWPFDocument doc, String type,
int noteIndex, Element block, Range noteTextRange )
{
String textIndex = String.valueOf( noteIndex + 1 );
final String forwardNoteLink = type + "note_" + textIndex;
final String backwardNoteLink = type + "note_back_" + textIndex;
Element anchor = htmlDocumentFacade.createHyperlink( "#"
+ forwardNoteLink );
anchor.setAttribute( "name", backwardNoteLink );
anchor.setAttribute( "class", type + "noteanchor" );
anchor.setTextContent( textIndex );
block.appendChild( anchor );
if ( notes == null )
{
notes = htmlDocumentFacade.createBlock();
notes.setAttribute( "class", "notes" );
}
Element note = htmlDocumentFacade.createBlock();
note.setAttribute( "class", type + "note" );
notes.appendChild( note );
Element bookmark = htmlDocumentFacade.createBookmark( forwardNoteLink );
bookmark.setAttribute( "href", "#" + backwardNoteLink );
bookmark.setTextContent( textIndex );
note.appendChild( bookmark );
Element span = htmlDocumentFacade.getDocument().createElement( "span" );
span.setAttribute( "class", type + "notetext" );
note.appendChild( span );
processCharacters( doc, Integer.MIN_VALUE, noteTextRange, span );
}
protected void processPageref( HWPFDocumentCore hwpfDocument,
Element currentBlock, Range textRange, int currentTableLevel,
String pageref )
@ -458,8 +521,7 @@ public class WordToHtmlConverter extends AbstractWordConverter
div.getTagName(), "d", getSectionStyle( section ) ) );
htmlDocumentFacade.body.appendChild( div );
processSectionParagraphes( wordDocument, div, section,
Integer.MIN_VALUE );
processParagraphes( wordDocument, div, section, Integer.MIN_VALUE );
}
@Override
@ -470,8 +532,8 @@ public class WordToHtmlConverter extends AbstractWordConverter
.setAttribute( "class", htmlDocumentFacade.getOrCreateCssClass(
"body", "b", getSectionStyle( section ) ) );
processSectionParagraphes( wordDocument, htmlDocumentFacade.body,
section, Integer.MIN_VALUE );
processParagraphes( wordDocument, htmlDocumentFacade.body, section,
Integer.MIN_VALUE );
}
protected void processTable( HWPFDocumentCore hwpfDocument, Element flow,
@ -562,8 +624,8 @@ public class WordToHtmlConverter extends AbstractWordConverter
tableCellElement.setAttribute( "rowspan", "" + count );
}
processSectionParagraphes( hwpfDocument, tableCellElement,
tableCell, table.getTableLevel() );
processParagraphes( hwpfDocument, tableCellElement, tableCell,
table.getTableLevel() );
if ( !tableCellElement.hasChildNodes() )
{

View File

@ -299,14 +299,23 @@ public final class HWPFLister
if ( true )
{
System.out.println( new Range( chpx.getStart(), chpx.getEnd(),
String text = new Range( chpx.getStart(), chpx.getEnd(),
_doc.getOverallRange() )
{
public String toString()
{
return "CHPX range (" + super.toString() + ")";
}
}.text() );
}.text();
StringBuilder stringBuilder = new StringBuilder();
for ( char c : text.toCharArray() )
{
if ( c < 30 )
stringBuilder.append( "\\0x" + Integer.toHexString( c ) );
else
stringBuilder.append( c );
}
System.out.println( stringBuilder );
}
}
}

View File

@ -33,6 +33,12 @@ public interface Notes
*/
int getNotesCount();
/**
* Returns index of note (if exists, otherwise -1) with specified anchor
* position
*/
int getNoteIndexByAnchorPosition( int anchorPosition );
/**
* Returns the end offset of the text corresponding to the reference within
* the footnote text address space

View File

@ -16,6 +16,9 @@
==================================================================== */
package org.apache.poi.hwpf.usermodel;
import java.util.HashMap;
import java.util.Map;
import org.apache.poi.hwpf.model.NotesTables;
/**
@ -25,6 +28,8 @@ import org.apache.poi.hwpf.model.NotesTables;
*/
public class NotesImpl implements Notes
{
private Map<Integer, Integer> anchorToIndexMap = null;
private final NotesTables notesTables;
public NotesImpl( NotesTables notesTables )
@ -37,6 +42,18 @@ public class NotesImpl implements Notes
return notesTables.getDescriptor( index ).getStart();
}
public int getNoteIndexByAnchorPosition( int anchorPosition )
{
updateAnchorToIndexMap();
Integer index = anchorToIndexMap
.get( Integer.valueOf( anchorPosition ) );
if ( index == null )
return -1;
return index.intValue();
}
public int getNotesCount()
{
return notesTables.getDescriptorsCount();
@ -51,4 +68,18 @@ public class NotesImpl implements Notes
{
return notesTables.getTextPosition( index ).getStart();
}
private void updateAnchorToIndexMap()
{
if ( anchorToIndexMap != null )
return;
Map<Integer, Integer> result = new HashMap<Integer, Integer>();
for ( int n = 0; n < notesTables.getDescriptorsCount(); n++ )
{
int anchorPosition = notesTables.getDescriptor( n ).getStart();
result.put( Integer.valueOf( anchorPosition ), Integer.valueOf( n ) );
}
this.anchorToIndexMap = result;
}
}

View File

@ -28,6 +28,7 @@ import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import junit.framework.TestCase;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
@ -38,6 +39,14 @@ import org.apache.poi.hwpf.HWPFDocument;
*/
public class TestWordToFoConverter extends TestCase
{
private static void assertContains( String result, final String substring )
{
if ( !result.contains( substring ) )
fail( "Substring \"" + substring
+ "\" not found in the following string: \"" + result
+ "\"" );
}
private static String getFoText( final String sampleFileName )
throws Exception
{
@ -72,6 +81,15 @@ public class TestWordToFoConverter extends TestCase
.contains( "<pdf:Keywords xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\">This is document keywords</pdf:Keywords>" ) );
}
public void testEndnote() throws Exception
{
String result = getFoText( "endingnote.doc" );
assertContains( result,
"<fo:inline baseline-shift=\"super\" font-size=\"smaller\">1</fo:inline>" );
assertContains( result, "Ending note text" );
}
public void testEquation() throws Exception
{
final String sampleFileName = "equation.doc";
@ -81,15 +99,6 @@ public class TestWordToFoConverter extends TestCase
.contains( "<!--Image link to '0.emf' can be here-->" ) );
}
public void testInnerTable() throws Exception
{
final String sampleFileName = "innertable.doc";
String result = getFoText( sampleFileName );
assertTrue( result
.contains( "padding-end=\"0.0in\" padding-start=\"0.0in\" width=\"1.0770833in\"" ) );
}
public void testHyperlink() throws Exception
{
final String sampleFileName = "hyperlink.doc";
@ -100,6 +109,15 @@ public class TestWordToFoConverter extends TestCase
assertTrue( result.contains( "Hyperlink text" ) );
}
public void testInnerTable() throws Exception
{
final String sampleFileName = "innertable.doc";
String result = getFoText( sampleFileName );
assertTrue( result
.contains( "padding-end=\"0.0in\" padding-start=\"0.0in\" width=\"1.0770833in\"" ) );
}
public void testPageref() throws Exception
{
final String sampleFileName = "pageref.doc";

View File

@ -145,9 +145,11 @@ public class TestWordToHtmlConverter extends TestCase
{
String result = getHtmlText( "endingnote.doc" );
assertContains( result, "<a href=\"#userref\">" );
assertContains( result, "<a name=\"userref\">" );
assertContains( result, "1" );
assertContains( result,
"<a class=\"endnoteanchor\" href=\"#endnote_1\" name=\"endnote_back_1\">1</a>" );
assertContains( result,
"<a href=\"#endnote_back_1\" name=\"endnote_1\">1</a>" );
assertContains( result, "Ending note text" );
}
public void testEquation() throws Exception