add bookmarks support for Word-to-HTML and Word-to-FO converters
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1148824 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2113215046
commit
6d8949ef0c
@ -16,6 +16,12 @@
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf.converter;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@ -23,10 +29,11 @@ import org.apache.poi.hpsf.SummaryInformation;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFDocumentCore;
|
||||
import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
|
||||
import org.apache.poi.hwpf.model.FieldsDocumentPart;
|
||||
import org.apache.poi.hwpf.model.Field;
|
||||
import org.apache.poi.hwpf.model.FieldsDocumentPart;
|
||||
import org.apache.poi.hwpf.model.ListFormatOverride;
|
||||
import org.apache.poi.hwpf.model.ListTables;
|
||||
import org.apache.poi.hwpf.usermodel.Bookmark;
|
||||
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||
import org.apache.poi.hwpf.usermodel.Picture;
|
||||
@ -51,6 +58,8 @@ public abstract class AbstractWordConverter
|
||||
private static final POILogger logger = POILogFactory
|
||||
.getLogger( AbstractWordConverter.class );
|
||||
|
||||
private final Set<Bookmark> bookmarkStack = new LinkedHashSet<Bookmark>();
|
||||
|
||||
private FontReplacer fontReplacer = new DefaultFontReplacer();
|
||||
|
||||
protected Triplet getCharacterRunTriplet( CharacterRun characterRun )
|
||||
@ -73,7 +82,16 @@ public abstract class AbstractWordConverter
|
||||
protected abstract void outputCharacters( Element block,
|
||||
CharacterRun characterRun, String text );
|
||||
|
||||
protected boolean processCharacters( HWPFDocumentCore hwpfDocument,
|
||||
/**
|
||||
* Wrap range into bookmark(s) and process it. All bookmarks have starts
|
||||
* equal to range start and ends equal to range end. Usually it's only one
|
||||
* bookmark.
|
||||
*/
|
||||
protected abstract void processBookmarks( HWPFDocumentCore wordDocument,
|
||||
Element currentBlock, Range range, int currentTableLevel,
|
||||
List<Bookmark> rangeBookmarks );
|
||||
|
||||
protected boolean processCharacters( HWPFDocumentCore document,
|
||||
int currentTableLevel, Range range, final Element block )
|
||||
{
|
||||
if ( range == null )
|
||||
@ -81,6 +99,22 @@ public abstract class AbstractWordConverter
|
||||
|
||||
boolean haveAnyText = false;
|
||||
|
||||
if ( document instanceof HWPFDocument )
|
||||
{
|
||||
final HWPFDocument doc = (HWPFDocument) document;
|
||||
Map<Integer, List<Bookmark>> rangeBookmarks = doc.getBookmarks()
|
||||
.getBookmarksStartedBetween( range.getStartOffset(),
|
||||
range.getEndOffset() );
|
||||
|
||||
if ( rangeBookmarks != null && !rangeBookmarks.isEmpty() )
|
||||
{
|
||||
boolean processedAny = processRangeBookmarks( doc,
|
||||
currentTableLevel, range, block, rangeBookmarks );
|
||||
if ( processedAny )
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
for ( int c = 0; c < range.numCharacterRuns(); c++ )
|
||||
{
|
||||
CharacterRun characterRun = range.getCharacterRun( c );
|
||||
@ -88,11 +122,11 @@ public abstract class AbstractWordConverter
|
||||
if ( characterRun == null )
|
||||
throw new AssertionError();
|
||||
|
||||
if ( hwpfDocument instanceof HWPFDocument
|
||||
&& ( (HWPFDocument) hwpfDocument ).getPicturesTable()
|
||||
if ( document instanceof HWPFDocument
|
||||
&& ( (HWPFDocument) document ).getPicturesTable()
|
||||
.hasPicture( characterRun ) )
|
||||
{
|
||||
HWPFDocument newFormat = (HWPFDocument) hwpfDocument;
|
||||
HWPFDocument newFormat = (HWPFDocument) document;
|
||||
Picture picture = newFormat.getPicturesTable().extractPicture(
|
||||
characterRun, true );
|
||||
|
||||
@ -107,15 +141,15 @@ public abstract class AbstractWordConverter
|
||||
|
||||
if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
|
||||
{
|
||||
if ( hwpfDocument instanceof HWPFDocument )
|
||||
if ( document instanceof HWPFDocument )
|
||||
{
|
||||
Field aliveField = ( (HWPFDocument) hwpfDocument )
|
||||
Field aliveField = ( (HWPFDocument) document )
|
||||
.getFieldsTables().lookupFieldByStartOffset(
|
||||
FieldsDocumentPart.MAIN,
|
||||
characterRun.getStartOffset() );
|
||||
if ( aliveField != null )
|
||||
{
|
||||
processField( ( (HWPFDocument) hwpfDocument ), range,
|
||||
processField( ( (HWPFDocument) document ), range,
|
||||
currentTableLevel, aliveField, block );
|
||||
|
||||
int continueAfter = aliveField.getFieldEndOffset();
|
||||
@ -130,8 +164,8 @@ public abstract class AbstractWordConverter
|
||||
}
|
||||
}
|
||||
|
||||
int skipTo = tryDeadField( hwpfDocument, range,
|
||||
currentTableLevel, c, block );
|
||||
int skipTo = tryDeadField( document, range, currentTableLevel,
|
||||
c, block );
|
||||
|
||||
if ( skipTo != c )
|
||||
{
|
||||
@ -337,6 +371,129 @@ public abstract class AbstractWordConverter
|
||||
Element parentFopElement, int currentTableLevel,
|
||||
Paragraph paragraph, String bulletText );
|
||||
|
||||
private boolean processRangeBookmarks( HWPFDocumentCore document,
|
||||
int currentTableLevel, Range range, final Element block,
|
||||
Map<Integer, List<Bookmark>> rangeBookmakrs )
|
||||
{
|
||||
final int startOffset = range.getStartOffset();
|
||||
final int endOffset = range.getEndOffset();
|
||||
|
||||
int beforeBookmarkStart = startOffset;
|
||||
for ( Map.Entry<Integer, List<Bookmark>> entry : rangeBookmakrs
|
||||
.entrySet() )
|
||||
{
|
||||
final List<Bookmark> startedAt = entry.getValue();
|
||||
|
||||
final List<Bookmark> bookmarks;
|
||||
if ( entry.getKey().intValue() == startOffset
|
||||
&& !bookmarkStack.isEmpty() )
|
||||
{
|
||||
/*
|
||||
* we need to filter out some bookmarks because already
|
||||
* processing them in caller methods
|
||||
*/
|
||||
List<Bookmark> filtered = new ArrayList<Bookmark>(
|
||||
startedAt.size() );
|
||||
for ( Bookmark bookmark : startedAt )
|
||||
{
|
||||
if ( this.bookmarkStack.contains( bookmark ) )
|
||||
continue;
|
||||
|
||||
filtered.add( bookmark );
|
||||
}
|
||||
|
||||
if ( filtered.isEmpty() )
|
||||
// no bookmarks - skip to next start point
|
||||
continue;
|
||||
|
||||
bookmarks = filtered;
|
||||
}
|
||||
else
|
||||
{
|
||||
bookmarks = startedAt;
|
||||
}
|
||||
|
||||
// TODO: test me
|
||||
/*
|
||||
* we processing only bookmarks with max size, they shall be first
|
||||
* in sorted list. Other bookmarks will be processed by called
|
||||
* method
|
||||
*/
|
||||
final Bookmark firstBookmark = bookmarks.iterator().next();
|
||||
final int startBookmarkOffset = firstBookmark.getStart();
|
||||
final int endBookmarkOffset = Math.min( firstBookmark.getEnd(),
|
||||
range.getEndOffset() );
|
||||
List<Bookmark> toProcess = new ArrayList<Bookmark>(
|
||||
bookmarks.size() );
|
||||
for ( Bookmark bookmark : bookmarks )
|
||||
{
|
||||
if ( Math.min( bookmark.getEnd(), range.getEndOffset() ) != endBookmarkOffset )
|
||||
break;
|
||||
toProcess.add( bookmark );
|
||||
}
|
||||
|
||||
if ( beforeBookmarkStart != startBookmarkOffset )
|
||||
{
|
||||
// we have range before bookmark
|
||||
Range beforeBookmarkRange = new Range( beforeBookmarkStart,
|
||||
startBookmarkOffset, range )
|
||||
{
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "BeforeBookmarkRange (" + super.toString() + ")";
|
||||
}
|
||||
};
|
||||
processCharacters( document, currentTableLevel,
|
||||
beforeBookmarkRange, block );
|
||||
}
|
||||
Range bookmarkRange = new Range( startBookmarkOffset,
|
||||
endBookmarkOffset, range )
|
||||
{
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "BookmarkRange (" + super.toString() + ")";
|
||||
}
|
||||
};
|
||||
|
||||
bookmarkStack.addAll( toProcess );
|
||||
try
|
||||
{
|
||||
processBookmarks( document, block, bookmarkRange,
|
||||
currentTableLevel,
|
||||
Collections.unmodifiableList( toProcess ) );
|
||||
}
|
||||
finally
|
||||
{
|
||||
bookmarkStack.removeAll( toProcess );
|
||||
}
|
||||
beforeBookmarkStart = endBookmarkOffset;
|
||||
}
|
||||
|
||||
if ( beforeBookmarkStart == startOffset )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( beforeBookmarkStart != endOffset )
|
||||
{
|
||||
// we have range after last bookmark
|
||||
Range afterLastBookmarkRange = new Range( beforeBookmarkStart,
|
||||
endOffset, range )
|
||||
{
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "AfterBookmarkRange (" + super.toString() + ")";
|
||||
}
|
||||
};
|
||||
processCharacters( document, currentTableLevel,
|
||||
afterLastBookmarkRange, block );
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
protected abstract void processSection( HWPFDocumentCore wordDocument,
|
||||
Section section, int s );
|
||||
|
||||
|
@ -89,6 +89,13 @@ public class HtmlDocumentFacade
|
||||
return document.createElement( "div" );
|
||||
}
|
||||
|
||||
public Element createBookmark( String name )
|
||||
{
|
||||
final Element basicLink = document.createElement( "a" );
|
||||
basicLink.setAttribute( "name", name );
|
||||
return basicLink;
|
||||
}
|
||||
|
||||
public Element createHeader1()
|
||||
{
|
||||
return document.createElement( "h1" );
|
||||
|
@ -18,6 +18,7 @@ package org.apache.poi.hwpf.converter;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.util.List;
|
||||
import java.util.Stack;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
@ -31,6 +32,7 @@ import org.apache.poi.hpsf.SummaryInformation;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFDocumentCore;
|
||||
import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
|
||||
import org.apache.poi.hwpf.usermodel.Bookmark;
|
||||
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||
import org.apache.poi.hwpf.usermodel.Picture;
|
||||
@ -51,28 +53,6 @@ import org.w3c.dom.Text;
|
||||
public class WordToFoConverter extends AbstractWordConverter
|
||||
{
|
||||
|
||||
/**
|
||||
* Holds properties values, applied to current <tt>fo:block</tt> element.
|
||||
* Those properties shall not be doubled in children <tt>fo:inline</tt>
|
||||
* elements.
|
||||
*/
|
||||
private static class BlockProperies
|
||||
{
|
||||
final boolean pBold;
|
||||
final String pFontName;
|
||||
final int pFontSize;
|
||||
final boolean pItalic;
|
||||
|
||||
public BlockProperies( String pFontName, int pFontSize, boolean pBold,
|
||||
boolean pItalic )
|
||||
{
|
||||
this.pFontName = pFontName;
|
||||
this.pFontSize = pFontSize;
|
||||
this.pBold = pBold;
|
||||
this.pItalic = pItalic;
|
||||
}
|
||||
}
|
||||
|
||||
private static final POILogger logger = POILogFactory
|
||||
.getLogger( WordToFoConverter.class );
|
||||
|
||||
@ -237,6 +217,24 @@ public class WordToFoConverter extends AbstractWordConverter
|
||||
inline.appendChild( textNode );
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void processBookmarks( HWPFDocumentCore wordDocument,
|
||||
Element currentBlock, Range range, int currentTableLevel,
|
||||
List<Bookmark> rangeBookmarks )
|
||||
{
|
||||
Element parent = currentBlock;
|
||||
for ( Bookmark bookmark : rangeBookmarks )
|
||||
{
|
||||
Element bookmarkElement = foDocumentFacade.createInline();
|
||||
bookmarkElement.setAttribute( "id", bookmark.getName() );
|
||||
parent.appendChild( bookmarkElement );
|
||||
parent = bookmarkElement;
|
||||
}
|
||||
|
||||
if ( range != null )
|
||||
processCharacters( wordDocument, currentTableLevel, range, parent );
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void processDocumentInformation(
|
||||
SummaryInformation summaryInformation )
|
||||
@ -509,4 +507,26 @@ public class WordToFoConverter extends AbstractWordConverter
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds properties values, applied to current <tt>fo:block</tt> element.
|
||||
* Those properties shall not be doubled in children <tt>fo:inline</tt>
|
||||
* elements.
|
||||
*/
|
||||
private static class BlockProperies
|
||||
{
|
||||
final boolean pBold;
|
||||
final String pFontName;
|
||||
final int pFontSize;
|
||||
final boolean pItalic;
|
||||
|
||||
public BlockProperies( String pFontName, int pFontSize, boolean pBold,
|
||||
boolean pItalic )
|
||||
{
|
||||
this.pFontName = pFontName;
|
||||
this.pFontSize = pFontSize;
|
||||
this.pBold = pBold;
|
||||
this.pItalic = pItalic;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ package org.apache.poi.hwpf.converter;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.util.List;
|
||||
import java.util.Stack;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
@ -31,6 +32,7 @@ import org.apache.poi.hpsf.SummaryInformation;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFDocumentCore;
|
||||
import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
|
||||
import org.apache.poi.hwpf.usermodel.Bookmark;
|
||||
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||
import org.apache.poi.hwpf.usermodel.Picture;
|
||||
@ -234,6 +236,24 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
||||
basicLink );
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void processBookmarks( HWPFDocumentCore wordDocument,
|
||||
Element currentBlock, Range range, int currentTableLevel,
|
||||
List<Bookmark> rangeBookmarks )
|
||||
{
|
||||
Element parent = currentBlock;
|
||||
for ( Bookmark bookmark : rangeBookmarks )
|
||||
{
|
||||
Element bookmarkElement = htmlDocumentFacade
|
||||
.createBookmark( bookmark.getName() );
|
||||
parent.appendChild( bookmarkElement );
|
||||
parent = bookmarkElement;
|
||||
}
|
||||
|
||||
if ( range != null )
|
||||
processCharacters( wordDocument, currentTableLevel, range, parent );
|
||||
}
|
||||
|
||||
/**
|
||||
* This method shall store image bytes in external file and convert it if
|
||||
* necessary. Images shall be stored using PNG format. Other formats may be
|
||||
|
@ -1,12 +1,33 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf.usermodel;
|
||||
|
||||
/**
|
||||
* User friendly interface to access information about document bookmarks
|
||||
*
|
||||
* @author Sergey Vladimirov (vlsergey {at} gmail {doc} com)
|
||||
*/
|
||||
public interface Bookmark
|
||||
{
|
||||
public int getEnd();
|
||||
int getEnd();
|
||||
|
||||
public String getName();
|
||||
String getName();
|
||||
|
||||
public int getStart();
|
||||
int getStart();
|
||||
|
||||
public void setName( String name );
|
||||
void setName( String name );
|
||||
}
|
||||
|
@ -50,50 +50,7 @@ public class BookmarksImpl implements Bookmarks
|
||||
|
||||
private Bookmark getBookmark( final GenericPropertyNode first )
|
||||
{
|
||||
return new Bookmark()
|
||||
{
|
||||
public int getEnd()
|
||||
{
|
||||
int currentIndex = bookmarksTables
|
||||
.getDescriptorFirstIndex( first );
|
||||
try
|
||||
{
|
||||
GenericPropertyNode descriptorLim = bookmarksTables
|
||||
.getDescriptorLim( currentIndex );
|
||||
return descriptorLim.getStart();
|
||||
}
|
||||
catch ( IndexOutOfBoundsException exc )
|
||||
{
|
||||
return first.getEnd();
|
||||
}
|
||||
}
|
||||
|
||||
public String getName()
|
||||
{
|
||||
int currentIndex = bookmarksTables
|
||||
.getDescriptorFirstIndex( first );
|
||||
try
|
||||
{
|
||||
return bookmarksTables.getName( currentIndex );
|
||||
}
|
||||
catch ( ArrayIndexOutOfBoundsException exc )
|
||||
{
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
public int getStart()
|
||||
{
|
||||
return first.getStart();
|
||||
}
|
||||
|
||||
public void setName( String name )
|
||||
{
|
||||
int currentIndex = bookmarksTables
|
||||
.getDescriptorFirstIndex( first );
|
||||
bookmarksTables.setName( currentIndex, name );
|
||||
}
|
||||
};
|
||||
return new BookmarkImpl( first );
|
||||
}
|
||||
|
||||
public Bookmark getBookmark( int index )
|
||||
@ -143,6 +100,11 @@ public class BookmarksImpl implements Bookmarks
|
||||
for ( int lookupIndex = startLookupIndex; lookupIndex < endLookupIndex; lookupIndex++ )
|
||||
{
|
||||
int s = sortedStartPositions[lookupIndex];
|
||||
if ( s < startInclusive )
|
||||
continue;
|
||||
if ( s >= endExclusive )
|
||||
break;
|
||||
|
||||
List<Bookmark> startedAt = getBookmarksAt( s );
|
||||
if ( startedAt != null )
|
||||
result.put( Integer.valueOf( s ), startedAt );
|
||||
@ -186,4 +148,87 @@ public class BookmarksImpl implements Bookmarks
|
||||
this.sortedDescriptors = result;
|
||||
this.sortedStartPositions = indices;
|
||||
}
|
||||
|
||||
private final class BookmarkImpl implements Bookmark
|
||||
{
|
||||
private final GenericPropertyNode first;
|
||||
|
||||
private BookmarkImpl( GenericPropertyNode first )
|
||||
{
|
||||
this.first = first;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals( Object obj )
|
||||
{
|
||||
if ( this == obj )
|
||||
return true;
|
||||
if ( obj == null )
|
||||
return false;
|
||||
if ( getClass() != obj.getClass() )
|
||||
return false;
|
||||
BookmarkImpl other = (BookmarkImpl) obj;
|
||||
if ( first == null )
|
||||
{
|
||||
if ( other.first != null )
|
||||
return false;
|
||||
}
|
||||
else if ( !first.equals( other.first ) )
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
public int getEnd()
|
||||
{
|
||||
int currentIndex = bookmarksTables.getDescriptorFirstIndex( first );
|
||||
try
|
||||
{
|
||||
GenericPropertyNode descriptorLim = bookmarksTables
|
||||
.getDescriptorLim( currentIndex );
|
||||
return descriptorLim.getStart();
|
||||
}
|
||||
catch ( IndexOutOfBoundsException exc )
|
||||
{
|
||||
return first.getEnd();
|
||||
}
|
||||
}
|
||||
|
||||
public String getName()
|
||||
{
|
||||
int currentIndex = bookmarksTables.getDescriptorFirstIndex( first );
|
||||
try
|
||||
{
|
||||
return bookmarksTables.getName( currentIndex );
|
||||
}
|
||||
catch ( ArrayIndexOutOfBoundsException exc )
|
||||
{
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
public int getStart()
|
||||
{
|
||||
return first.getStart();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
return 31 + ( first == null ? 0 : first.hashCode() );
|
||||
}
|
||||
|
||||
public void setName( String name )
|
||||
{
|
||||
int currentIndex = bookmarksTables.getDescriptorFirstIndex( first );
|
||||
bookmarksTables.setName( currentIndex, name );
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "Bookmark [" + getStart() + "; " + getEnd() + "): name: "
|
||||
+ getName();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -187,6 +187,7 @@ public class TestWordToHtmlConverter extends TestCase
|
||||
String result = getHtmlText( "pageref.doc" );
|
||||
|
||||
assertContains( result, "<a href=\"#userref\">" );
|
||||
assertContains( result, "<a name=\"userref\">" );
|
||||
assertContains( result, "1" );
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user