different workarounds for old Word format
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1195133 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ec46537ca0
commit
5a930ae36f
@ -80,7 +80,7 @@ import org.apache.poi.util.Internal;
|
||||
*/
|
||||
public final class HWPFDocument extends HWPFDocumentCore
|
||||
{
|
||||
private static final String PROPERTY_PRESERVE_BIN_TABLES = "org.apache.poi.hwpf.preserveBinTables";
|
||||
static final String PROPERTY_PRESERVE_BIN_TABLES = "org.apache.poi.hwpf.preserveBinTables";
|
||||
private static final String PROPERTY_PRESERVE_TEXT_TABLE = "org.apache.poi.hwpf.preserveTextTable";
|
||||
|
||||
private static final String STREAM_DATA = "Data";
|
||||
|
@ -66,9 +66,10 @@ public class HWPFOldDocument extends HWPFDocumentCore {
|
||||
|
||||
// We need to get hold of the text that makes up the
|
||||
// document, which might be regular or fast-saved
|
||||
ComplexFileTable cft = null;
|
||||
StringBuffer text = new StringBuffer();
|
||||
if(_fib.getFibBase().isFComplex()) {
|
||||
ComplexFileTable cft = new ComplexFileTable(
|
||||
cft = new ComplexFileTable(
|
||||
_mainStream, _mainStream,
|
||||
complexTableOffset, _fib.getFibBase().getFcMin()
|
||||
);
|
||||
@ -113,6 +114,27 @@ public class HWPFOldDocument extends HWPFDocumentCore {
|
||||
_mainStream, sedTableOffset, sedTableSize,
|
||||
_fib.getFibBase().getFcMin(), tpt
|
||||
);
|
||||
|
||||
/*
|
||||
* in this mode we preserving PAPX/CHPX structure from file, so text may
|
||||
* miss from output, and text order may be corrupted
|
||||
*/
|
||||
boolean preserveBinTables = false;
|
||||
try
|
||||
{
|
||||
preserveBinTables = Boolean.parseBoolean( System
|
||||
.getProperty( HWPFDocument.PROPERTY_PRESERVE_BIN_TABLES ) );
|
||||
}
|
||||
catch ( Exception exc )
|
||||
{
|
||||
// ignore;
|
||||
}
|
||||
|
||||
if ( !preserveBinTables )
|
||||
{
|
||||
_cbt.rebuild( cft );
|
||||
_pbt.rebuild( _text, cft );
|
||||
}
|
||||
}
|
||||
|
||||
public Range getOverallRange()
|
||||
|
@ -17,8 +17,6 @@
|
||||
|
||||
package org.apache.poi.hwpf.model;
|
||||
|
||||
import java.util.Collections;
|
||||
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
@ -57,7 +55,5 @@ public final class OldPAPBinTable extends PAPBinTable
|
||||
_paragraphs.add( papx );
|
||||
}
|
||||
}
|
||||
Collections.sort( _paragraphs, PropertyNode.StartComparator.instance );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -113,6 +113,12 @@ public class PAPBinTable
|
||||
|
||||
public void rebuild( final StringBuilder docText,
|
||||
ComplexFileTable complexFileTable )
|
||||
{
|
||||
rebuild( docText, complexFileTable, _paragraphs );
|
||||
}
|
||||
|
||||
static void rebuild( final StringBuilder docText,
|
||||
ComplexFileTable complexFileTable, List<PAPX> paragraphs )
|
||||
{
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
@ -156,19 +162,19 @@ public class PAPBinTable
|
||||
|
||||
PAPX papx = new PAPX( textPiece.getStart(),
|
||||
textPiece.getEnd(), newSprmBuffer );
|
||||
_paragraphs.add( papx );
|
||||
paragraphs.add( papx );
|
||||
}
|
||||
}
|
||||
|
||||
logger.log( POILogger.DEBUG,
|
||||
"Merged (?) with PAPX from complex file table in ",
|
||||
Long.valueOf( System.currentTimeMillis() - start ),
|
||||
" ms (", Integer.valueOf( _paragraphs.size() ),
|
||||
" ms (", Integer.valueOf( paragraphs.size() ),
|
||||
" elements in total)" );
|
||||
start = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
List<PAPX> oldPapxSortedByEndPos = new ArrayList<PAPX>( _paragraphs );
|
||||
List<PAPX> oldPapxSortedByEndPos = new ArrayList<PAPX>( paragraphs );
|
||||
Collections.sort( oldPapxSortedByEndPos,
|
||||
PropertyNode.EndComparator.instance );
|
||||
|
||||
@ -179,7 +185,7 @@ public class PAPBinTable
|
||||
final Map<PAPX, Integer> papxToFileOrder = new IdentityHashMap<PAPX, Integer>();
|
||||
{
|
||||
int counter = 0;
|
||||
for ( PAPX papx : _paragraphs )
|
||||
for ( PAPX papx : paragraphs )
|
||||
{
|
||||
papxToFileOrder.put( papx, Integer.valueOf( counter++ ) );
|
||||
}
|
||||
@ -270,6 +276,9 @@ public class PAPBinTable
|
||||
SprmBuffer sprmBuffer = null;
|
||||
for ( PAPX papx : papxs )
|
||||
{
|
||||
if ( papx.getGrpprl() == null || papx.getGrpprl().length == 0 )
|
||||
continue;
|
||||
|
||||
if ( sprmBuffer == null )
|
||||
try
|
||||
{
|
||||
@ -281,7 +290,9 @@ public class PAPBinTable
|
||||
throw new Error( e );
|
||||
}
|
||||
else
|
||||
{
|
||||
sprmBuffer.append( papx.getGrpprl(), 2 );
|
||||
}
|
||||
}
|
||||
PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer );
|
||||
newPapxs.add( newPapx );
|
||||
@ -289,11 +300,12 @@ public class PAPBinTable
|
||||
lastParStart = endExclusive;
|
||||
continue;
|
||||
}
|
||||
this._paragraphs = new ArrayList<PAPX>( newPapxs );
|
||||
paragraphs.clear();
|
||||
paragraphs.addAll( newPapxs );
|
||||
|
||||
logger.log( POILogger.DEBUG, "PAPX rebuilded from document text in ",
|
||||
Long.valueOf( System.currentTimeMillis() - start ), " ms (",
|
||||
Integer.valueOf( _paragraphs.size() ), " elements)" );
|
||||
Integer.valueOf( paragraphs.size() ), " elements)" );
|
||||
start = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
|
@ -112,11 +112,17 @@ public final class PAPX extends BytePropertyNode<PAPX> {
|
||||
|
||||
public byte[] getGrpprl()
|
||||
{
|
||||
if (_buf == null)
|
||||
return new byte[0];
|
||||
|
||||
return ((SprmBuffer)_buf).toByteArray();
|
||||
}
|
||||
|
||||
public short getIstd()
|
||||
{
|
||||
public short getIstd()
|
||||
{
|
||||
if ( _buf == null )
|
||||
return 0;
|
||||
|
||||
byte[] buf = getGrpprl();
|
||||
if (buf.length == 0)
|
||||
{
|
||||
|
@ -1101,7 +1101,7 @@ public class Range { // TODO -instantiable superclass
|
||||
int endIndex = binarySearchEnd( rpl, startIndex, end );
|
||||
while ( endIndex < rpl.size() - 1
|
||||
&& rpl.get( endIndex + 1 ).getEnd() <= end )
|
||||
endIndex--;
|
||||
endIndex++;
|
||||
|
||||
if ( startIndex < 0 || startIndex >= rpl.size()
|
||||
|| startIndex > endIndex || endIndex < 0
|
||||
|
@ -28,6 +28,8 @@ import java.util.List;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.poi.hwpf.converter.WordToTextConverter;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
@ -736,7 +738,8 @@ public class TestBugs extends TestCase
|
||||
*/
|
||||
public void testBug51944() throws Exception
|
||||
{
|
||||
HWPFTestDataSamples.openOldSampleFile( "Bug51944.doc" );
|
||||
HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile( "Bug51944.doc" );
|
||||
WordToTextConverter.getText( doc );
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user