push boundaries checks down, removing deprecation warnings, remove (unused) cpMin (Word XP) hack

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145075 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-11 08:57:42 +00:00
parent 55c6850928
commit 26c1fa750d
10 changed files with 82 additions and 57 deletions

View File

@ -214,15 +214,10 @@ public final class HWPFDocument extends HWPFDocumentCore
_cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin);
_tpt = _cft.getTextPieceTable();
// Word XP and later all put in a zero filled buffer in
// front of the text. This screws up the system for offsets,
// which assume we always start at zero. This is an adjustment.
int cpMin = _tpt.getCpMin();
// Now load the rest of the properties, which need to be adjusted
// for where text really begin
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt, true);
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), cpMin, _tpt, true);
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt, true);
// Read FSPA and Escher information
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());

View File

@ -43,6 +43,8 @@ public class HtmlDocumentFacade
html.appendChild( head );
html.appendChild( body );
body.setAttribute( "style", "white-space-collapsing: preserve; " );
}
public void addAuthor( String value )

View File

@ -172,7 +172,8 @@ public class WordToHtmlUtils extends AbstractWordUtils
style.append( "break-before: page; " );
}
style.append( "hyphenate: " + paragraph.isAutoHyphenated() + "; " );
style.append( "hyphenate: "
+ ( paragraph.isAutoHyphenated() ? "auto" : "none" ) + "; " );
if ( paragraph.keepOnPage() )
{
@ -183,9 +184,6 @@ public class WordToHtmlUtils extends AbstractWordUtils
{
style.append( "keep-with-next.within-page: always; " );
}
style.append( "linefeed-treatment: preserve; " );
style.append( "white-space-collapse: false; " );
}
public static void addTableCellProperties( TableRow tableRow,

View File

@ -21,6 +21,8 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/**
* Represents a CHP fkp. The style properties for paragraph and character runs
@ -40,6 +42,9 @@ import org.apache.poi.util.LittleEndian;
*/
public final class CHPFormattedDiskPage extends FormattedDiskPage
{
private static final POILogger logger = POILogFactory
.getLogger( CHPFormattedDiskPage.class );
private static final int FC_SIZE = 4;
private ArrayList<CHPX> _chpxList = new ArrayList<CHPX>();
@ -79,11 +84,20 @@ public final class CHPFormattedDiskPage extends FormattedDiskPage
int startAt = getStart(x);
int endAt = getEnd(x);
if (ignoreChpxWithoutTextPieces && !tpt.isIndexInTable( startAt, endAt ) ) {
_chpxList.add(null);
} else {
if (!ignoreChpxWithoutTextPieces || tpt.isIndexInTable( startAt, endAt ) )
{
_chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x)));
}
else
{
logger.log( POILogger.WARN, "CHPX [",
Integer.valueOf( startAt ), "; ",
Integer.valueOf( endAt ),
") (bytes) doesn't have corresponding text pieces "
+ "and will be skipped" );
_chpxList.add(null);
}
}
}

View File

@ -21,8 +21,6 @@ import java.util.Collections;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/**
* This class holds all of the character formatting
@ -34,9 +32,6 @@ import org.apache.poi.util.POILogger;
*/
public final class OldCHPBinTable extends CHPBinTable
{
private static final POILogger logger = POILogFactory
.getLogger( OldCHPBinTable.class );
/**
* Constructor used to read an old-style binTable
* in from a Word document.
@ -67,15 +62,8 @@ public final class OldCHPBinTable extends CHPBinTable
for (int y = 0; y < fkpSize; y++)
{
CHPX chpx = cfkp.getCHPX(y);
if (chpx != null && tpt.isIndexInTable( chpx.getStartBytes(), chpx.getEndBytes() )) {
if (chpx != null)
_textRuns.add(chpx);
} else {
if ( chpx != null )
logger.log( POILogger.WARN, "CHPX [",
chpx.getStartBytes(), "; ", chpx.getEndBytes(),
") (bytes) doesn't have corresponding text pieces "
+ "and will be skipped" );
}
}
}
Collections.sort( _textRuns, PropertyNode.StartComparator.instance );

View File

@ -21,8 +21,6 @@ import java.util.Collections;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/**
* This class holds all of the paragraph formatting
@ -34,8 +32,6 @@ import org.apache.poi.util.POILogger;
*/
public final class OldPAPBinTable extends PAPBinTable
{
private static final POILogger logger = POILogFactory
.getLogger( OldPAPBinTable.class );
public OldPAPBinTable(byte[] documentStream, int offset,
int size, int fcMin, TextPieceTable tpt)
@ -51,21 +47,15 @@ public final class OldPAPBinTable extends PAPBinTable
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
documentStream, pageOffset, fcMin, tpt);
documentStream, pageOffset, tpt, true);
int fkpSize = pfkp.size();
for (int y = 0; y < fkpSize; y++)
{
PAPX papx = pfkp.getPAPX(y);
if (papx != null && tpt.isIndexInTable( papx.getStartBytes(), papx.getEndBytes() )) {
if (papx != null) {
_paragraphs.add(papx);
} else {
if ( papx != null )
logger.log( POILogger.WARN, "PAPX [",
papx.getStartBytes(), "; ", papx.getEndBytes(),
") (bytes) doesn't have corresponding text pieces "
+ "and will be skipped" );
}
}
}

View File

@ -56,13 +56,12 @@ public class PAPBinTable
byte[] dataStream, int offset, int size, int fcMin,
TextPieceTable tpt )
{
this( documentStream, tableStream, dataStream, offset, size, fcMin,
tpt, true );
this( documentStream, tableStream, dataStream, offset, size, tpt, true );
}
public PAPBinTable( byte[] documentStream, byte[] tableStream,
byte[] dataStream, int offset, int size, int fcMin,
TextPieceTable tpt, boolean ignorePapxWithoutTextPieces )
byte[] dataStream, int offset, int size, TextPieceTable tpt,
boolean ignorePapxWithoutTextPieces )
{
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
this.tpt = tpt;
@ -76,7 +75,7 @@ public class PAPBinTable
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
dataStream, pageOffset, fcMin, tpt);
dataStream, pageOffset, tpt, ignorePapxWithoutTextPieces);
int fkpSize = pfkp.size();
@ -84,8 +83,7 @@ public class PAPBinTable
{
PAPX papx = pfkp.getPAPX(y);
//we don't need PAPX if they are references nowhere
if (!ignorePapxWithoutTextPieces || tpt.isIndexInTable( papx.getStartBytes(), papx.getEndBytes() ))
if (papx != null)
_paragraphs.add(papx);
}
}

View File

@ -22,6 +22,8 @@ import java.util.Arrays;
import java.util.List;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/**
* Represents a PAP FKP. The style properties for paragraph and character runs
@ -40,6 +42,8 @@ import org.apache.poi.util.LittleEndian;
* @author Ryan Ackley
*/
public final class PAPFormattedDiskPage extends FormattedDiskPage {
private static final POILogger logger = POILogFactory
.getLogger( PAPFormattedDiskPage.class );
private static final int BX_SIZE = 13;
private static final int FC_SIZE = 4;
@ -56,14 +60,42 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
/**
* Creates a PAPFormattedDiskPage from a 512 byte array
*
* @deprecated Use
* {@link #PAPFormattedDiskPage(byte[],byte[],int,int,TextPieceTable,boolean)}
* instead
*/
public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt)
public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
int offset, int fcMin, TextPieceTable tpt )
{
super(documentStream, offset);
for (int x = 0; x < _crun; x++) {
int startAt = getStart(x);
int endAt = getEnd(x);
_papxList.add(new PAPX(startAt, endAt, tpt, getGrpprl(x), getParagraphHeight(x), dataStream));
this( documentStream, dataStream, offset, tpt, true );
}
/**
* Creates a PAPFormattedDiskPage from a 512 byte array
*/
public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
int offset, TextPieceTable tpt, boolean ignorePapxWithoutTextPieces )
{
super( documentStream, offset );
for ( int x = 0; x < _crun; x++ )
{
int startAt = getStart( x );
int endAt = getEnd( x );
if ( !ignorePapxWithoutTextPieces
|| tpt.isIndexInTable( startAt, endAt ) )
_papxList.add( new PAPX( startAt, endAt, tpt, getGrpprl( x ),
getParagraphHeight( x ), dataStream ) );
else
{
logger.log( POILogger.WARN, "PAPX [",
Integer.valueOf( startAt ), "; ",
Integer.valueOf( endAt ),
") (bytes) doesn't have corresponding text pieces "
+ "and will be skipped" );
_papxList.add( null );
}
}
_fkp = null;
_dataStream = dataStream;

View File

@ -26,6 +26,7 @@ import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import junit.framework.TestCase;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
@ -68,6 +69,13 @@ public class TestWordToHtmlConverter extends TestCase
assertTrue( result.substring( 0, 2000 ).contains( "<table>" ) );
}
public void testBug33519() throws Exception
{
String result = getHtmlText( "Bug33519.doc" );
assertTrue( result.contains( "Планински турове" ) );
assertTrue( result.contains( "Явор Асенов" ) );
}
public void testBug46610_2() throws Exception
{
String result = getHtmlText( "Bug46610_2.doc" );

View File

@ -21,6 +21,7 @@ import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import junit.framework.TestCase;
import org.apache.poi.hwpf.HWPFDocFixture;
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
@ -38,9 +39,8 @@ public final class TestPAPBinTable
FileInformationBlock fib = _hWPFDocFixture._fib;
byte[] mainStream = _hWPFDocFixture._mainStream;
byte[] tableStream = _hWPFDocFixture._tableStream;
int fcMin = fib.getFcMin();
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin, fakeTPT, false);
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fakeTPT, false);
HWPFFileSystem fileSys = new HWPFFileSystem();
@ -51,7 +51,7 @@ public final class TestPAPBinTable
byte[] newTableStream = tableOut.toByteArray();
byte[] newMainStream = mainOut.toByteArray();
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, 0, fakeTPT, false);
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, fakeTPT, false);
ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
ArrayList newTextRuns = newBinTable.getParagraphs();