push boundaries checks down, removing deprecation warnings, remove (unused) cpMin (Word XP) hack
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145075 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
55c6850928
commit
26c1fa750d
@ -214,15 +214,10 @@ public final class HWPFDocument extends HWPFDocumentCore
|
|||||||
_cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin);
|
_cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin);
|
||||||
_tpt = _cft.getTextPieceTable();
|
_tpt = _cft.getTextPieceTable();
|
||||||
|
|
||||||
// Word XP and later all put in a zero filled buffer in
|
|
||||||
// front of the text. This screws up the system for offsets,
|
|
||||||
// which assume we always start at zero. This is an adjustment.
|
|
||||||
int cpMin = _tpt.getCpMin();
|
|
||||||
|
|
||||||
// Now load the rest of the properties, which need to be adjusted
|
// Now load the rest of the properties, which need to be adjusted
|
||||||
// for where text really begin
|
// for where text really begin
|
||||||
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt, true);
|
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt, true);
|
||||||
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), cpMin, _tpt, true);
|
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt, true);
|
||||||
|
|
||||||
// Read FSPA and Escher information
|
// Read FSPA and Escher information
|
||||||
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
|
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
|
||||||
|
@ -43,6 +43,8 @@ public class HtmlDocumentFacade
|
|||||||
|
|
||||||
html.appendChild( head );
|
html.appendChild( head );
|
||||||
html.appendChild( body );
|
html.appendChild( body );
|
||||||
|
|
||||||
|
body.setAttribute( "style", "white-space-collapsing: preserve; " );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addAuthor( String value )
|
public void addAuthor( String value )
|
||||||
|
@ -172,7 +172,8 @@ public class WordToHtmlUtils extends AbstractWordUtils
|
|||||||
style.append( "break-before: page; " );
|
style.append( "break-before: page; " );
|
||||||
}
|
}
|
||||||
|
|
||||||
style.append( "hyphenate: " + paragraph.isAutoHyphenated() + "; " );
|
style.append( "hyphenate: "
|
||||||
|
+ ( paragraph.isAutoHyphenated() ? "auto" : "none" ) + "; " );
|
||||||
|
|
||||||
if ( paragraph.keepOnPage() )
|
if ( paragraph.keepOnPage() )
|
||||||
{
|
{
|
||||||
@ -183,9 +184,6 @@ public class WordToHtmlUtils extends AbstractWordUtils
|
|||||||
{
|
{
|
||||||
style.append( "keep-with-next.within-page: always; " );
|
style.append( "keep-with-next.within-page: always; " );
|
||||||
}
|
}
|
||||||
|
|
||||||
style.append( "linefeed-treatment: preserve; " );
|
|
||||||
style.append( "white-space-collapse: false; " );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void addTableCellProperties( TableRow tableRow,
|
public static void addTableCellProperties( TableRow tableRow,
|
||||||
|
@ -21,6 +21,8 @@ import java.util.ArrayList;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
|
import org.apache.poi.util.POILogFactory;
|
||||||
|
import org.apache.poi.util.POILogger;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents a CHP fkp. The style properties for paragraph and character runs
|
* Represents a CHP fkp. The style properties for paragraph and character runs
|
||||||
@ -40,6 +42,9 @@ import org.apache.poi.util.LittleEndian;
|
|||||||
*/
|
*/
|
||||||
public final class CHPFormattedDiskPage extends FormattedDiskPage
|
public final class CHPFormattedDiskPage extends FormattedDiskPage
|
||||||
{
|
{
|
||||||
|
private static final POILogger logger = POILogFactory
|
||||||
|
.getLogger( CHPFormattedDiskPage.class );
|
||||||
|
|
||||||
private static final int FC_SIZE = 4;
|
private static final int FC_SIZE = 4;
|
||||||
|
|
||||||
private ArrayList<CHPX> _chpxList = new ArrayList<CHPX>();
|
private ArrayList<CHPX> _chpxList = new ArrayList<CHPX>();
|
||||||
@ -79,11 +84,20 @@ public final class CHPFormattedDiskPage extends FormattedDiskPage
|
|||||||
int startAt = getStart(x);
|
int startAt = getStart(x);
|
||||||
int endAt = getEnd(x);
|
int endAt = getEnd(x);
|
||||||
|
|
||||||
if (ignoreChpxWithoutTextPieces && !tpt.isIndexInTable( startAt, endAt ) ) {
|
if (!ignoreChpxWithoutTextPieces || tpt.isIndexInTable( startAt, endAt ) )
|
||||||
_chpxList.add(null);
|
{
|
||||||
} else {
|
|
||||||
_chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x)));
|
_chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x)));
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
logger.log( POILogger.WARN, "CHPX [",
|
||||||
|
Integer.valueOf( startAt ), "; ",
|
||||||
|
Integer.valueOf( endAt ),
|
||||||
|
") (bytes) doesn't have corresponding text pieces "
|
||||||
|
+ "and will be skipped" );
|
||||||
|
|
||||||
|
_chpxList.add(null);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,8 +21,6 @@ import java.util.Collections;
|
|||||||
|
|
||||||
import org.apache.poi.poifs.common.POIFSConstants;
|
import org.apache.poi.poifs.common.POIFSConstants;
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
import org.apache.poi.util.POILogFactory;
|
|
||||||
import org.apache.poi.util.POILogger;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class holds all of the character formatting
|
* This class holds all of the character formatting
|
||||||
@ -34,9 +32,6 @@ import org.apache.poi.util.POILogger;
|
|||||||
*/
|
*/
|
||||||
public final class OldCHPBinTable extends CHPBinTable
|
public final class OldCHPBinTable extends CHPBinTable
|
||||||
{
|
{
|
||||||
private static final POILogger logger = POILogFactory
|
|
||||||
.getLogger( OldCHPBinTable.class );
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor used to read an old-style binTable
|
* Constructor used to read an old-style binTable
|
||||||
* in from a Word document.
|
* in from a Word document.
|
||||||
@ -67,15 +62,8 @@ public final class OldCHPBinTable extends CHPBinTable
|
|||||||
for (int y = 0; y < fkpSize; y++)
|
for (int y = 0; y < fkpSize; y++)
|
||||||
{
|
{
|
||||||
CHPX chpx = cfkp.getCHPX(y);
|
CHPX chpx = cfkp.getCHPX(y);
|
||||||
if (chpx != null && tpt.isIndexInTable( chpx.getStartBytes(), chpx.getEndBytes() )) {
|
|
||||||
_textRuns.add(chpx);
|
|
||||||
} else {
|
|
||||||
if (chpx != null)
|
if (chpx != null)
|
||||||
logger.log( POILogger.WARN, "CHPX [",
|
_textRuns.add(chpx);
|
||||||
chpx.getStartBytes(), "; ", chpx.getEndBytes(),
|
|
||||||
") (bytes) doesn't have corresponding text pieces "
|
|
||||||
+ "and will be skipped" );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Collections.sort( _textRuns, PropertyNode.StartComparator.instance );
|
Collections.sort( _textRuns, PropertyNode.StartComparator.instance );
|
||||||
|
@ -21,8 +21,6 @@ import java.util.Collections;
|
|||||||
|
|
||||||
import org.apache.poi.poifs.common.POIFSConstants;
|
import org.apache.poi.poifs.common.POIFSConstants;
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
import org.apache.poi.util.POILogFactory;
|
|
||||||
import org.apache.poi.util.POILogger;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class holds all of the paragraph formatting
|
* This class holds all of the paragraph formatting
|
||||||
@ -34,8 +32,6 @@ import org.apache.poi.util.POILogger;
|
|||||||
*/
|
*/
|
||||||
public final class OldPAPBinTable extends PAPBinTable
|
public final class OldPAPBinTable extends PAPBinTable
|
||||||
{
|
{
|
||||||
private static final POILogger logger = POILogFactory
|
|
||||||
.getLogger( OldPAPBinTable.class );
|
|
||||||
|
|
||||||
public OldPAPBinTable(byte[] documentStream, int offset,
|
public OldPAPBinTable(byte[] documentStream, int offset,
|
||||||
int size, int fcMin, TextPieceTable tpt)
|
int size, int fcMin, TextPieceTable tpt)
|
||||||
@ -51,21 +47,15 @@ public final class OldPAPBinTable extends PAPBinTable
|
|||||||
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
|
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
|
||||||
|
|
||||||
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
|
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
|
||||||
documentStream, pageOffset, fcMin, tpt);
|
documentStream, pageOffset, tpt, true);
|
||||||
|
|
||||||
int fkpSize = pfkp.size();
|
int fkpSize = pfkp.size();
|
||||||
|
|
||||||
for (int y = 0; y < fkpSize; y++)
|
for (int y = 0; y < fkpSize; y++)
|
||||||
{
|
{
|
||||||
PAPX papx = pfkp.getPAPX(y);
|
PAPX papx = pfkp.getPAPX(y);
|
||||||
if (papx != null && tpt.isIndexInTable( papx.getStartBytes(), papx.getEndBytes() )) {
|
if (papx != null) {
|
||||||
_paragraphs.add(papx);
|
_paragraphs.add(papx);
|
||||||
} else {
|
|
||||||
if ( papx != null )
|
|
||||||
logger.log( POILogger.WARN, "PAPX [",
|
|
||||||
papx.getStartBytes(), "; ", papx.getEndBytes(),
|
|
||||||
") (bytes) doesn't have corresponding text pieces "
|
|
||||||
+ "and will be skipped" );
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -56,13 +56,12 @@ public class PAPBinTable
|
|||||||
byte[] dataStream, int offset, int size, int fcMin,
|
byte[] dataStream, int offset, int size, int fcMin,
|
||||||
TextPieceTable tpt )
|
TextPieceTable tpt )
|
||||||
{
|
{
|
||||||
this( documentStream, tableStream, dataStream, offset, size, fcMin,
|
this( documentStream, tableStream, dataStream, offset, size, tpt, true );
|
||||||
tpt, true );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public PAPBinTable( byte[] documentStream, byte[] tableStream,
|
public PAPBinTable( byte[] documentStream, byte[] tableStream,
|
||||||
byte[] dataStream, int offset, int size, int fcMin,
|
byte[] dataStream, int offset, int size, TextPieceTable tpt,
|
||||||
TextPieceTable tpt, boolean ignorePapxWithoutTextPieces )
|
boolean ignorePapxWithoutTextPieces )
|
||||||
{
|
{
|
||||||
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
|
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
|
||||||
this.tpt = tpt;
|
this.tpt = tpt;
|
||||||
@ -76,7 +75,7 @@ public class PAPBinTable
|
|||||||
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
|
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
|
||||||
|
|
||||||
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
|
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
|
||||||
dataStream, pageOffset, fcMin, tpt);
|
dataStream, pageOffset, tpt, ignorePapxWithoutTextPieces);
|
||||||
|
|
||||||
int fkpSize = pfkp.size();
|
int fkpSize = pfkp.size();
|
||||||
|
|
||||||
@ -84,8 +83,7 @@ public class PAPBinTable
|
|||||||
{
|
{
|
||||||
PAPX papx = pfkp.getPAPX(y);
|
PAPX papx = pfkp.getPAPX(y);
|
||||||
|
|
||||||
//we don't need PAPX if they are references nowhere
|
if (papx != null)
|
||||||
if (!ignorePapxWithoutTextPieces || tpt.isIndexInTable( papx.getStartBytes(), papx.getEndBytes() ))
|
|
||||||
_paragraphs.add(papx);
|
_paragraphs.add(papx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,8 @@ import java.util.Arrays;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
|
import org.apache.poi.util.POILogFactory;
|
||||||
|
import org.apache.poi.util.POILogger;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents a PAP FKP. The style properties for paragraph and character runs
|
* Represents a PAP FKP. The style properties for paragraph and character runs
|
||||||
@ -40,6 +42,8 @@ import org.apache.poi.util.LittleEndian;
|
|||||||
* @author Ryan Ackley
|
* @author Ryan Ackley
|
||||||
*/
|
*/
|
||||||
public final class PAPFormattedDiskPage extends FormattedDiskPage {
|
public final class PAPFormattedDiskPage extends FormattedDiskPage {
|
||||||
|
private static final POILogger logger = POILogFactory
|
||||||
|
.getLogger( PAPFormattedDiskPage.class );
|
||||||
|
|
||||||
private static final int BX_SIZE = 13;
|
private static final int BX_SIZE = 13;
|
||||||
private static final int FC_SIZE = 4;
|
private static final int FC_SIZE = 4;
|
||||||
@ -56,14 +60,42 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a PAPFormattedDiskPage from a 512 byte array
|
* Creates a PAPFormattedDiskPage from a 512 byte array
|
||||||
|
*
|
||||||
|
* @deprecated Use
|
||||||
|
* {@link #PAPFormattedDiskPage(byte[],byte[],int,int,TextPieceTable,boolean)}
|
||||||
|
* instead
|
||||||
*/
|
*/
|
||||||
public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt)
|
public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
|
||||||
|
int offset, int fcMin, TextPieceTable tpt )
|
||||||
|
{
|
||||||
|
this( documentStream, dataStream, offset, tpt, true );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a PAPFormattedDiskPage from a 512 byte array
|
||||||
|
*/
|
||||||
|
public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
|
||||||
|
int offset, TextPieceTable tpt, boolean ignorePapxWithoutTextPieces )
|
||||||
{
|
{
|
||||||
super( documentStream, offset );
|
super( documentStream, offset );
|
||||||
for (int x = 0; x < _crun; x++) {
|
for ( int x = 0; x < _crun; x++ )
|
||||||
|
{
|
||||||
int startAt = getStart( x );
|
int startAt = getStart( x );
|
||||||
int endAt = getEnd( x );
|
int endAt = getEnd( x );
|
||||||
_papxList.add(new PAPX(startAt, endAt, tpt, getGrpprl(x), getParagraphHeight(x), dataStream));
|
if ( !ignorePapxWithoutTextPieces
|
||||||
|
|| tpt.isIndexInTable( startAt, endAt ) )
|
||||||
|
_papxList.add( new PAPX( startAt, endAt, tpt, getGrpprl( x ),
|
||||||
|
getParagraphHeight( x ), dataStream ) );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
logger.log( POILogger.WARN, "PAPX [",
|
||||||
|
Integer.valueOf( startAt ), "; ",
|
||||||
|
Integer.valueOf( endAt ),
|
||||||
|
") (bytes) doesn't have corresponding text pieces "
|
||||||
|
+ "and will be skipped" );
|
||||||
|
|
||||||
|
_papxList.add( null );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
_fkp = null;
|
_fkp = null;
|
||||||
_dataStream = dataStream;
|
_dataStream = dataStream;
|
||||||
|
@ -26,6 +26,7 @@ import javax.xml.transform.dom.DOMSource;
|
|||||||
import javax.xml.transform.stream.StreamResult;
|
import javax.xml.transform.stream.StreamResult;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
import org.apache.poi.POIDataSamples;
|
import org.apache.poi.POIDataSamples;
|
||||||
import org.apache.poi.hwpf.HWPFDocument;
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
|
|
||||||
@ -68,6 +69,13 @@ public class TestWordToHtmlConverter extends TestCase
|
|||||||
assertTrue( result.substring( 0, 2000 ).contains( "<table>" ) );
|
assertTrue( result.substring( 0, 2000 ).contains( "<table>" ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testBug33519() throws Exception
|
||||||
|
{
|
||||||
|
String result = getHtmlText( "Bug33519.doc" );
|
||||||
|
assertTrue( result.contains( "Планински турове" ) );
|
||||||
|
assertTrue( result.contains( "Явор Асенов" ) );
|
||||||
|
}
|
||||||
|
|
||||||
public void testBug46610_2() throws Exception
|
public void testBug46610_2() throws Exception
|
||||||
{
|
{
|
||||||
String result = getHtmlText( "Bug46610_2.doc" );
|
String result = getHtmlText( "Bug46610_2.doc" );
|
||||||
|
@ -21,6 +21,7 @@ import java.io.ByteArrayOutputStream;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
import org.apache.poi.hwpf.HWPFDocFixture;
|
import org.apache.poi.hwpf.HWPFDocFixture;
|
||||||
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
|
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
|
||||||
|
|
||||||
@ -38,9 +39,8 @@ public final class TestPAPBinTable
|
|||||||
FileInformationBlock fib = _hWPFDocFixture._fib;
|
FileInformationBlock fib = _hWPFDocFixture._fib;
|
||||||
byte[] mainStream = _hWPFDocFixture._mainStream;
|
byte[] mainStream = _hWPFDocFixture._mainStream;
|
||||||
byte[] tableStream = _hWPFDocFixture._tableStream;
|
byte[] tableStream = _hWPFDocFixture._tableStream;
|
||||||
int fcMin = fib.getFcMin();
|
|
||||||
|
|
||||||
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin, fakeTPT, false);
|
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fakeTPT, false);
|
||||||
|
|
||||||
HWPFFileSystem fileSys = new HWPFFileSystem();
|
HWPFFileSystem fileSys = new HWPFFileSystem();
|
||||||
|
|
||||||
@ -51,7 +51,7 @@ public final class TestPAPBinTable
|
|||||||
byte[] newTableStream = tableOut.toByteArray();
|
byte[] newTableStream = tableOut.toByteArray();
|
||||||
byte[] newMainStream = mainOut.toByteArray();
|
byte[] newMainStream = mainOut.toByteArray();
|
||||||
|
|
||||||
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, 0, fakeTPT, false);
|
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, fakeTPT, false);
|
||||||
|
|
||||||
ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
|
ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
|
||||||
ArrayList newTextRuns = newBinTable.getParagraphs();
|
ArrayList newTextRuns = newBinTable.getParagraphs();
|
||||||
|
Loading…
Reference in New Issue
Block a user