add TOC support
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1189145 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5badcee102
commit
5cba03af8e
|
@ -78,6 +78,13 @@ public abstract class AbstractWordConverter
|
||||||
this.structure = field;
|
this.structure = field;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Structure( int start, int end )
|
||||||
|
{
|
||||||
|
this.start = start;
|
||||||
|
this.end = end;
|
||||||
|
this.structure = null;
|
||||||
|
}
|
||||||
|
|
||||||
public int compareTo( Structure o )
|
public int compareTo( Structure o )
|
||||||
{
|
{
|
||||||
return start < o.start ? -1 : start == o.start ? 0 : 1;
|
return start < o.start ? -1 : start == o.start ? 0 : 1;
|
||||||
|
@ -102,6 +109,15 @@ public abstract class AbstractWordConverter
|
||||||
private static final POILogger logger = POILogFactory
|
private static final POILogger logger = POILogFactory
|
||||||
.getLogger( AbstractWordConverter.class );
|
.getLogger( AbstractWordConverter.class );
|
||||||
|
|
||||||
|
private static final Pattern PATTERN_HYPERLINK_EXTERNAL = Pattern
|
||||||
|
.compile( "^[ \\t\\r\\n]*HYPERLINK \"(.*)\".*$" );
|
||||||
|
|
||||||
|
private static final Pattern PATTERN_HYPERLINK_LOCAL = Pattern
|
||||||
|
.compile( "^[ \\t\\r\\n]*HYPERLINK \\\\l \"(.*)\"[ ](.*)$" );
|
||||||
|
|
||||||
|
private static final Pattern PATTERN_PAGEREF = Pattern
|
||||||
|
.compile( "^[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h.*$" );
|
||||||
|
|
||||||
private static final byte SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE = 2;
|
private static final byte SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE = 2;
|
||||||
|
|
||||||
private static final byte SPECCHAR_DRAWN_OBJECT = 8;
|
private static final byte SPECCHAR_DRAWN_OBJECT = 8;
|
||||||
|
@ -291,17 +307,38 @@ public abstract class AbstractWordConverter
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: dead fields?
|
// TODO: dead fields?
|
||||||
|
int skipUntil = -1;
|
||||||
for ( int c = 0; c < range.numCharacterRuns(); c++ )
|
for ( int c = 0; c < range.numCharacterRuns(); c++ )
|
||||||
{
|
{
|
||||||
CharacterRun characterRun = range.getCharacterRun( c );
|
CharacterRun characterRun = range.getCharacterRun( c );
|
||||||
if ( characterRun == null )
|
if ( characterRun == null )
|
||||||
throw new AssertionError();
|
throw new AssertionError();
|
||||||
|
if ( characterRun.getStartOffset() < skipUntil )
|
||||||
|
continue;
|
||||||
|
String text = characterRun.text();
|
||||||
|
if ( text == null || text.length() == 0
|
||||||
|
|| text.charAt( 0 ) != FIELD_BEGIN_MARK )
|
||||||
|
continue;
|
||||||
|
|
||||||
Field aliveField = ( (HWPFDocument) wordDocument ).getFields()
|
Field aliveField = ( (HWPFDocument) wordDocument ).getFields()
|
||||||
.getFieldByStartOffset( FieldsDocumentPart.MAIN,
|
.getFieldByStartOffset( FieldsDocumentPart.MAIN,
|
||||||
characterRun.getStartOffset() );
|
characterRun.getStartOffset() );
|
||||||
if ( aliveField != null )
|
if ( aliveField != null )
|
||||||
{
|
{
|
||||||
addToStructures( structures, new Structure( aliveField ) );
|
addToStructures( structures, new Structure( aliveField ) );
|
||||||
|
skipUntil = aliveField.getFieldEndOffset() + 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int[] separatorEnd = tryDeadField_lookupFieldSeparatorEnd(
|
||||||
|
wordDocument, range, c );
|
||||||
|
if ( separatorEnd != null )
|
||||||
|
{
|
||||||
|
addToStructures( structures, new Structure(
|
||||||
|
characterRun.getStartOffset(),
|
||||||
|
separatorEnd[1] + 1 ) );
|
||||||
|
c = separatorEnd[1];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -562,6 +599,39 @@ public abstract class AbstractWordConverter
|
||||||
Element currentBlock, Range range, int currentTableLevel,
|
Element currentBlock, Range range, int currentTableLevel,
|
||||||
int beginMark, int separatorMark, int endMark )
|
int beginMark, int separatorMark, int endMark )
|
||||||
{
|
{
|
||||||
|
if ( beginMark + 1 < separatorMark && separatorMark + 1 < endMark )
|
||||||
|
{
|
||||||
|
Range formulaRange = new Range( range.getCharacterRun(
|
||||||
|
beginMark + 1 ).getStartOffset(), range.getCharacterRun(
|
||||||
|
separatorMark - 1 ).getEndOffset(), range )
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public String toString()
|
||||||
|
{
|
||||||
|
return "Dead field formula subrange: " + super.toString();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Range valueRange = new Range( range.getCharacterRun(
|
||||||
|
separatorMark + 1 ).getStartOffset(), range
|
||||||
|
.getCharacterRun( endMark - 1 ).getEndOffset(), range )
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public String toString()
|
||||||
|
{
|
||||||
|
return "Dead field value subrange: " + super.toString();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
String formula = formulaRange.text();
|
||||||
|
final Matcher matcher = PATTERN_HYPERLINK_LOCAL.matcher( formula );
|
||||||
|
if ( matcher.matches() )
|
||||||
|
{
|
||||||
|
String localref = matcher.group( 1 );
|
||||||
|
processPageref( wordDocument, currentBlock, valueRange,
|
||||||
|
currentTableLevel, localref );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
StringBuilder debug = new StringBuilder( "Unsupported field type: \n" );
|
StringBuilder debug = new StringBuilder( "Unsupported field type: \n" );
|
||||||
for ( int i = beginMark; i <= endMark; i++ )
|
for ( int i = beginMark; i <= endMark; i++ )
|
||||||
{
|
{
|
||||||
|
@ -706,9 +776,7 @@ public abstract class AbstractWordConverter
|
||||||
if ( firstSubrange != null )
|
if ( firstSubrange != null )
|
||||||
{
|
{
|
||||||
String formula = firstSubrange.text();
|
String formula = firstSubrange.text();
|
||||||
Pattern pagerefPattern = Pattern
|
Matcher matcher = PATTERN_PAGEREF.matcher( formula );
|
||||||
.compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
|
|
||||||
Matcher matcher = pagerefPattern.matcher( formula );
|
|
||||||
if ( matcher.find() )
|
if ( matcher.find() )
|
||||||
{
|
{
|
||||||
String pageref = matcher.group( 1 );
|
String pageref = matcher.group( 1 );
|
||||||
|
@ -756,10 +824,8 @@ public abstract class AbstractWordConverter
|
||||||
if ( firstSubrange != null )
|
if ( firstSubrange != null )
|
||||||
{
|
{
|
||||||
String formula = firstSubrange.text();
|
String formula = firstSubrange.text();
|
||||||
Pattern hyperlinkPattern = Pattern
|
Matcher matcher = PATTERN_HYPERLINK_EXTERNAL.matcher( formula );
|
||||||
.compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
|
if ( matcher.matches() )
|
||||||
Matcher matcher = hyperlinkPattern.matcher( formula );
|
|
||||||
if ( matcher.find() )
|
|
||||||
{
|
{
|
||||||
String hyperlink = matcher.group( 1 );
|
String hyperlink = matcher.group( 1 );
|
||||||
processHyperlink( wordDocument, currentBlock,
|
processHyperlink( wordDocument, currentBlock,
|
||||||
|
@ -767,6 +833,30 @@ public abstract class AbstractWordConverter
|
||||||
currentTableLevel, hyperlink );
|
currentTableLevel, hyperlink );
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
matcher.usePattern( PATTERN_HYPERLINK_LOCAL );
|
||||||
|
if ( matcher.matches() )
|
||||||
|
{
|
||||||
|
String hyperlink = matcher.group( 1 );
|
||||||
|
Range textRange = null;
|
||||||
|
String text = matcher.group( 2 );
|
||||||
|
if ( AbstractWordUtils.isNotEmpty( text ) )
|
||||||
|
{
|
||||||
|
textRange = new Range( firstSubrange.getStartOffset()
|
||||||
|
+ matcher.start( 2 ),
|
||||||
|
firstSubrange.getStartOffset()
|
||||||
|
+ matcher.end( 2 ), firstSubrange )
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public String toString()
|
||||||
|
{
|
||||||
|
return "Local hyperlink text";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
processPageref( wordDocument, currentBlock, textRange,
|
||||||
|
currentTableLevel, hyperlink );
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -817,13 +907,13 @@ public abstract class AbstractWordConverter
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected abstract void processImage( Element currentBlock,
|
||||||
|
boolean inlined, Picture picture, String url );
|
||||||
|
|
||||||
@Internal
|
@Internal
|
||||||
protected abstract void processImageWithoutPicturesManager(
|
protected abstract void processImageWithoutPicturesManager(
|
||||||
Element currentBlock, boolean inlined, Picture picture );
|
Element currentBlock, boolean inlined, Picture picture );
|
||||||
|
|
||||||
protected abstract void processImage( Element currentBlock,
|
|
||||||
boolean inlined, Picture picture, String url );
|
|
||||||
|
|
||||||
protected abstract void processLineBreak( Element block,
|
protected abstract void processLineBreak( Element block,
|
||||||
CharacterRun characterRun );
|
CharacterRun characterRun );
|
||||||
|
|
||||||
|
@ -1021,6 +1111,20 @@ public abstract class AbstractWordConverter
|
||||||
|
|
||||||
protected int tryDeadField( HWPFDocumentCore wordDocument, Range range,
|
protected int tryDeadField( HWPFDocumentCore wordDocument, Range range,
|
||||||
int currentTableLevel, int beginMark, Element currentBlock )
|
int currentTableLevel, int beginMark, Element currentBlock )
|
||||||
|
{
|
||||||
|
int[] separatorEnd = tryDeadField_lookupFieldSeparatorEnd(
|
||||||
|
wordDocument, range, beginMark );
|
||||||
|
if ( separatorEnd == null )
|
||||||
|
return beginMark;
|
||||||
|
|
||||||
|
processDeadField( wordDocument, currentBlock, range, currentTableLevel,
|
||||||
|
beginMark, separatorEnd[0], separatorEnd[1] );
|
||||||
|
|
||||||
|
return separatorEnd[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
private int[] tryDeadField_lookupFieldSeparatorEnd(
|
||||||
|
HWPFDocumentCore wordDocument, Range range, int beginMark )
|
||||||
{
|
{
|
||||||
int separatorMark = -1;
|
int separatorMark = -1;
|
||||||
int endMark = -1;
|
int endMark = -1;
|
||||||
|
@ -1032,28 +1136,24 @@ public abstract class AbstractWordConverter
|
||||||
if ( text.getBytes().length == 0 )
|
if ( text.getBytes().length == 0 )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
|
final byte firstByte = text.getBytes()[0];
|
||||||
|
if ( firstByte == FIELD_BEGIN_MARK )
|
||||||
{
|
{
|
||||||
// nested?
|
int[] nested = tryDeadField_lookupFieldSeparatorEnd(
|
||||||
Field possibleField = processDeadField( wordDocument, range,
|
wordDocument, range, c );
|
||||||
currentTableLevel, characterRun.getStartOffset(),
|
if ( nested != null )
|
||||||
currentBlock );
|
|
||||||
if ( possibleField != null )
|
|
||||||
{
|
{
|
||||||
c = possibleField.getFieldEndOffset();
|
c = nested[1];
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
|
if ( firstByte == FIELD_SEPARATOR_MARK )
|
||||||
{
|
{
|
||||||
if ( separatorMark != -1 )
|
if ( separatorMark != -1 )
|
||||||
{
|
{
|
||||||
// double;
|
// double; incorrect format
|
||||||
return beginMark;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
separatorMark = c;
|
separatorMark = c;
|
||||||
|
@ -1065,22 +1165,18 @@ public abstract class AbstractWordConverter
|
||||||
if ( endMark != -1 )
|
if ( endMark != -1 )
|
||||||
{
|
{
|
||||||
// double;
|
// double;
|
||||||
return beginMark;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
endMark = c;
|
endMark = c;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( separatorMark == -1 || endMark == -1 )
|
if ( separatorMark == -1 || endMark == -1 )
|
||||||
return beginMark;
|
return null;
|
||||||
|
|
||||||
processDeadField( wordDocument, currentBlock, range, currentTableLevel,
|
return new int[] { separatorMark, endMark };
|
||||||
beginMark, separatorMark, endMark );
|
|
||||||
|
|
||||||
return endMark;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue