push boundaries checks down, removing deprecation warnings, remove (unused) cpMin (Word XP) hack
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145075 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
55c6850928
commit
26c1fa750d
@ -214,15 +214,10 @@ public final class HWPFDocument extends HWPFDocumentCore
|
||||
_cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin);
|
||||
_tpt = _cft.getTextPieceTable();
|
||||
|
||||
// Word XP and later all put in a zero filled buffer in
|
||||
// front of the text. This screws up the system for offsets,
|
||||
// which assume we always start at zero. This is an adjustment.
|
||||
int cpMin = _tpt.getCpMin();
|
||||
|
||||
// Now load the rest of the properties, which need to be adjusted
|
||||
// for where text really begin
|
||||
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt, true);
|
||||
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), cpMin, _tpt, true);
|
||||
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt, true);
|
||||
|
||||
// Read FSPA and Escher information
|
||||
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
|
||||
|
@ -43,6 +43,8 @@ public class HtmlDocumentFacade
|
||||
|
||||
html.appendChild( head );
|
||||
html.appendChild( body );
|
||||
|
||||
body.setAttribute( "style", "white-space-collapsing: preserve; " );
|
||||
}
|
||||
|
||||
public void addAuthor( String value )
|
||||
|
@ -172,7 +172,8 @@ public class WordToHtmlUtils extends AbstractWordUtils
|
||||
style.append( "break-before: page; " );
|
||||
}
|
||||
|
||||
style.append( "hyphenate: " + paragraph.isAutoHyphenated() + "; " );
|
||||
style.append( "hyphenate: "
|
||||
+ ( paragraph.isAutoHyphenated() ? "auto" : "none" ) + "; " );
|
||||
|
||||
if ( paragraph.keepOnPage() )
|
||||
{
|
||||
@ -183,9 +184,6 @@ public class WordToHtmlUtils extends AbstractWordUtils
|
||||
{
|
||||
style.append( "keep-with-next.within-page: always; " );
|
||||
}
|
||||
|
||||
style.append( "linefeed-treatment: preserve; " );
|
||||
style.append( "white-space-collapse: false; " );
|
||||
}
|
||||
|
||||
public static void addTableCellProperties( TableRow tableRow,
|
||||
|
@ -21,6 +21,8 @@ import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
|
||||
/**
|
||||
* Represents a CHP fkp. The style properties for paragraph and character runs
|
||||
@ -40,6 +42,9 @@ import org.apache.poi.util.LittleEndian;
|
||||
*/
|
||||
public final class CHPFormattedDiskPage extends FormattedDiskPage
|
||||
{
|
||||
private static final POILogger logger = POILogFactory
|
||||
.getLogger( CHPFormattedDiskPage.class );
|
||||
|
||||
private static final int FC_SIZE = 4;
|
||||
|
||||
private ArrayList<CHPX> _chpxList = new ArrayList<CHPX>();
|
||||
@ -79,11 +84,20 @@ public final class CHPFormattedDiskPage extends FormattedDiskPage
|
||||
int startAt = getStart(x);
|
||||
int endAt = getEnd(x);
|
||||
|
||||
if (ignoreChpxWithoutTextPieces && !tpt.isIndexInTable( startAt, endAt ) ) {
|
||||
_chpxList.add(null);
|
||||
} else {
|
||||
if (!ignoreChpxWithoutTextPieces || tpt.isIndexInTable( startAt, endAt ) )
|
||||
{
|
||||
_chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x)));
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.log( POILogger.WARN, "CHPX [",
|
||||
Integer.valueOf( startAt ), "; ",
|
||||
Integer.valueOf( endAt ),
|
||||
") (bytes) doesn't have corresponding text pieces "
|
||||
+ "and will be skipped" );
|
||||
|
||||
_chpxList.add(null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -21,8 +21,6 @@ import java.util.Collections;
|
||||
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
|
||||
/**
|
||||
* This class holds all of the character formatting
|
||||
@ -34,9 +32,6 @@ import org.apache.poi.util.POILogger;
|
||||
*/
|
||||
public final class OldCHPBinTable extends CHPBinTable
|
||||
{
|
||||
private static final POILogger logger = POILogFactory
|
||||
.getLogger( OldCHPBinTable.class );
|
||||
|
||||
/**
|
||||
* Constructor used to read an old-style binTable
|
||||
* in from a Word document.
|
||||
@ -67,15 +62,8 @@ public final class OldCHPBinTable extends CHPBinTable
|
||||
for (int y = 0; y < fkpSize; y++)
|
||||
{
|
||||
CHPX chpx = cfkp.getCHPX(y);
|
||||
if (chpx != null && tpt.isIndexInTable( chpx.getStartBytes(), chpx.getEndBytes() )) {
|
||||
if (chpx != null)
|
||||
_textRuns.add(chpx);
|
||||
} else {
|
||||
if ( chpx != null )
|
||||
logger.log( POILogger.WARN, "CHPX [",
|
||||
chpx.getStartBytes(), "; ", chpx.getEndBytes(),
|
||||
") (bytes) doesn't have corresponding text pieces "
|
||||
+ "and will be skipped" );
|
||||
}
|
||||
}
|
||||
}
|
||||
Collections.sort( _textRuns, PropertyNode.StartComparator.instance );
|
||||
|
@ -21,8 +21,6 @@ import java.util.Collections;
|
||||
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
|
||||
/**
|
||||
* This class holds all of the paragraph formatting
|
||||
@ -34,8 +32,6 @@ import org.apache.poi.util.POILogger;
|
||||
*/
|
||||
public final class OldPAPBinTable extends PAPBinTable
|
||||
{
|
||||
private static final POILogger logger = POILogFactory
|
||||
.getLogger( OldPAPBinTable.class );
|
||||
|
||||
public OldPAPBinTable(byte[] documentStream, int offset,
|
||||
int size, int fcMin, TextPieceTable tpt)
|
||||
@ -51,21 +47,15 @@ public final class OldPAPBinTable extends PAPBinTable
|
||||
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
|
||||
|
||||
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
|
||||
documentStream, pageOffset, fcMin, tpt);
|
||||
documentStream, pageOffset, tpt, true);
|
||||
|
||||
int fkpSize = pfkp.size();
|
||||
|
||||
for (int y = 0; y < fkpSize; y++)
|
||||
{
|
||||
PAPX papx = pfkp.getPAPX(y);
|
||||
if (papx != null && tpt.isIndexInTable( papx.getStartBytes(), papx.getEndBytes() )) {
|
||||
if (papx != null) {
|
||||
_paragraphs.add(papx);
|
||||
} else {
|
||||
if ( papx != null )
|
||||
logger.log( POILogger.WARN, "PAPX [",
|
||||
papx.getStartBytes(), "; ", papx.getEndBytes(),
|
||||
") (bytes) doesn't have corresponding text pieces "
|
||||
+ "and will be skipped" );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -56,13 +56,12 @@ public class PAPBinTable
|
||||
byte[] dataStream, int offset, int size, int fcMin,
|
||||
TextPieceTable tpt )
|
||||
{
|
||||
this( documentStream, tableStream, dataStream, offset, size, fcMin,
|
||||
tpt, true );
|
||||
this( documentStream, tableStream, dataStream, offset, size, tpt, true );
|
||||
}
|
||||
|
||||
public PAPBinTable( byte[] documentStream, byte[] tableStream,
|
||||
byte[] dataStream, int offset, int size, int fcMin,
|
||||
TextPieceTable tpt, boolean ignorePapxWithoutTextPieces )
|
||||
byte[] dataStream, int offset, int size, TextPieceTable tpt,
|
||||
boolean ignorePapxWithoutTextPieces )
|
||||
{
|
||||
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
|
||||
this.tpt = tpt;
|
||||
@ -76,7 +75,7 @@ public class PAPBinTable
|
||||
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
|
||||
|
||||
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
|
||||
dataStream, pageOffset, fcMin, tpt);
|
||||
dataStream, pageOffset, tpt, ignorePapxWithoutTextPieces);
|
||||
|
||||
int fkpSize = pfkp.size();
|
||||
|
||||
@ -84,8 +83,7 @@ public class PAPBinTable
|
||||
{
|
||||
PAPX papx = pfkp.getPAPX(y);
|
||||
|
||||
//we don't need PAPX if they are references nowhere
|
||||
if (!ignorePapxWithoutTextPieces || tpt.isIndexInTable( papx.getStartBytes(), papx.getEndBytes() ))
|
||||
if (papx != null)
|
||||
_paragraphs.add(papx);
|
||||
}
|
||||
}
|
||||
|
@ -22,6 +22,8 @@ import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
|
||||
/**
|
||||
* Represents a PAP FKP. The style properties for paragraph and character runs
|
||||
@ -40,6 +42,8 @@ import org.apache.poi.util.LittleEndian;
|
||||
* @author Ryan Ackley
|
||||
*/
|
||||
public final class PAPFormattedDiskPage extends FormattedDiskPage {
|
||||
private static final POILogger logger = POILogFactory
|
||||
.getLogger( PAPFormattedDiskPage.class );
|
||||
|
||||
private static final int BX_SIZE = 13;
|
||||
private static final int FC_SIZE = 4;
|
||||
@ -56,17 +60,45 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
|
||||
|
||||
/**
|
||||
* Creates a PAPFormattedDiskPage from a 512 byte array
|
||||
*
|
||||
* @deprecated Use
|
||||
* {@link #PAPFormattedDiskPage(byte[],byte[],int,int,TextPieceTable,boolean)}
|
||||
* instead
|
||||
*/
|
||||
public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt)
|
||||
public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
|
||||
int offset, int fcMin, TextPieceTable tpt )
|
||||
{
|
||||
super(documentStream, offset);
|
||||
for (int x = 0; x < _crun; x++) {
|
||||
int startAt = getStart(x);
|
||||
int endAt = getEnd(x);
|
||||
_papxList.add(new PAPX(startAt, endAt, tpt, getGrpprl(x), getParagraphHeight(x), dataStream));
|
||||
}
|
||||
_fkp = null;
|
||||
_dataStream = dataStream;
|
||||
this( documentStream, dataStream, offset, tpt, true );
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a PAPFormattedDiskPage from a 512 byte array
|
||||
*/
|
||||
public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
|
||||
int offset, TextPieceTable tpt, boolean ignorePapxWithoutTextPieces )
|
||||
{
|
||||
super( documentStream, offset );
|
||||
for ( int x = 0; x < _crun; x++ )
|
||||
{
|
||||
int startAt = getStart( x );
|
||||
int endAt = getEnd( x );
|
||||
if ( !ignorePapxWithoutTextPieces
|
||||
|| tpt.isIndexInTable( startAt, endAt ) )
|
||||
_papxList.add( new PAPX( startAt, endAt, tpt, getGrpprl( x ),
|
||||
getParagraphHeight( x ), dataStream ) );
|
||||
else
|
||||
{
|
||||
logger.log( POILogger.WARN, "PAPX [",
|
||||
Integer.valueOf( startAt ), "; ",
|
||||
Integer.valueOf( endAt ),
|
||||
") (bytes) doesn't have corresponding text pieces "
|
||||
+ "and will be skipped" );
|
||||
|
||||
_papxList.add( null );
|
||||
}
|
||||
}
|
||||
_fkp = null;
|
||||
_dataStream = dataStream;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -26,6 +26,7 @@ import javax.xml.transform.dom.DOMSource;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
|
||||
@ -68,6 +69,13 @@ public class TestWordToHtmlConverter extends TestCase
|
||||
assertTrue( result.substring( 0, 2000 ).contains( "<table>" ) );
|
||||
}
|
||||
|
||||
public void testBug33519() throws Exception
|
||||
{
|
||||
String result = getHtmlText( "Bug33519.doc" );
|
||||
assertTrue( result.contains( "Планински турове" ) );
|
||||
assertTrue( result.contains( "Явор Асенов" ) );
|
||||
}
|
||||
|
||||
public void testBug46610_2() throws Exception
|
||||
{
|
||||
String result = getHtmlText( "Bug46610_2.doc" );
|
||||
|
@ -21,6 +21,7 @@ import java.io.ByteArrayOutputStream;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.poi.hwpf.HWPFDocFixture;
|
||||
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
|
||||
|
||||
@ -38,9 +39,8 @@ public final class TestPAPBinTable
|
||||
FileInformationBlock fib = _hWPFDocFixture._fib;
|
||||
byte[] mainStream = _hWPFDocFixture._mainStream;
|
||||
byte[] tableStream = _hWPFDocFixture._tableStream;
|
||||
int fcMin = fib.getFcMin();
|
||||
|
||||
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin, fakeTPT, false);
|
||||
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fakeTPT, false);
|
||||
|
||||
HWPFFileSystem fileSys = new HWPFFileSystem();
|
||||
|
||||
@ -51,7 +51,7 @@ public final class TestPAPBinTable
|
||||
byte[] newTableStream = tableOut.toByteArray();
|
||||
byte[] newMainStream = mainOut.toByteArray();
|
||||
|
||||
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, 0, fakeTPT, false);
|
||||
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, fakeTPT, false);
|
||||
|
||||
ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
|
||||
ArrayList newTextRuns = newBinTable.getParagraphs();
|
||||
|
Loading…
Reference in New Issue
Block a user