more progress with WordToFoExtractor, see Bugzilla 51351

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1136001 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2011-06-15 11:41:22 +00:00
parent ffc5d68b12
commit cbb3176c72
2 changed files with 236 additions and 156 deletions

View File

@ -279,63 +279,95 @@ public class WordToFoExtractor {
} }
} }
@SuppressWarnings("unused") /**
protected void processImage(Element currentBlock, Picture picture) { * This method shall store image bytes in external file and convert it if
// no default implementation -- skip * necessary. Images shall be stored using PNG format (for bitmap) or SVG
* (for vector). Other formats may be not supported by your XSL FO
* processor.
* <p>
* Please note the
* {@link WordToFoUtils#setPictureProperties(Picture, Element)} method.
*
* @param currentBlock
* currently processed FO element, like <tt>fo:block</tt>. Shall
* be used as parent of newly created
* <tt>fo:external-graphic</tt> or
* <tt>fo:instream-foreign-object</tt>
* @param inlined
* if image is inlined
* @param picture
* HWPF object, contained picture data and properties
*/
protected void processImage(Element currentBlock, boolean inlined,
Picture picture) {
// no default implementation -- skip
} }
protected void processParagraph(HWPFDocument hwpfDocument, protected void processParagraph(HWPFDocument hwpfDocument,
Element parentFopElement, int currentTableLevel, Element parentFopElement, int currentTableLevel,
Paragraph paragraph, String bulletText) { Paragraph paragraph, String bulletText) {
final Element block = createBlock(); final Element block = createBlock();
parentFopElement.appendChild(block); parentFopElement.appendChild(block);
WordToFoUtils.setParagraphProperties(paragraph, block); WordToFoUtils.setParagraphProperties(paragraph, block);
final int charRuns = paragraph.numCharacterRuns(); final int charRuns = paragraph.numCharacterRuns();
if (charRuns == 0) { if (charRuns == 0) {
return; return;
} }
final String pFontName; final String pFontName;
final int pFontSize; final int pFontSize;
final boolean pBold; final boolean pBold;
final boolean pItalic; final boolean pItalic;
{ {
CharacterRun characterRun = paragraph.getCharacterRun(0); CharacterRun characterRun = paragraph.getCharacterRun(0);
pFontSize = characterRun.getFontSize() / 2; pFontSize = characterRun.getFontSize() / 2;
pFontName = characterRun.getFontName(); pFontName = characterRun.getFontName();
pBold = characterRun.isBold(); pBold = characterRun.isBold();
pItalic = characterRun.isItalic(); pItalic = characterRun.isItalic();
} }
WordToFoUtils.setFontFamily(block, pFontName); WordToFoUtils.setFontFamily(block, pFontName);
WordToFoUtils.setFontSize(block, pFontSize); WordToFoUtils.setFontSize(block, pFontSize);
WordToFoUtils.setBold(block, pBold); WordToFoUtils.setBold(block, pBold);
WordToFoUtils.setItalic(block, pItalic); WordToFoUtils.setItalic(block, pItalic);
StringBuilder lineText = new StringBuilder(); StringBuilder lineText = new StringBuilder();
if (WordToFoUtils.isNotEmpty(bulletText)) { if (WordToFoUtils.isNotEmpty(bulletText)) {
Element inline = createInline(); Element inline = createInline();
block.appendChild(inline); block.appendChild(inline);
Text textNode = createText(bulletText); Text textNode = createText(bulletText);
inline.appendChild(textNode); inline.appendChild(textNode);
lineText.append(bulletText); lineText.append(bulletText);
} }
for (int c = 0; c < charRuns; c++) { for (int c = 0; c < charRuns; c++) {
CharacterRun characterRun = paragraph.getCharacterRun(c); CharacterRun characterRun = paragraph.getCharacterRun(c);
if (hwpfDocument.getPicturesTable().hasPicture(characterRun)) {
Picture picture = hwpfDocument.getPicturesTable()
.extractPicture(characterRun, true);
processImage(block, characterRun.text().charAt(0) == 0x01,
picture);
continue;
}
String text = characterRun.text(); String text = characterRun.text();
if (text.getBytes().length == 0) if (text.getBytes().length == 0)
continue; continue;
if (text.getBytes()[0] == FIELD_BEGIN_MARK) { if (text.getBytes()[0] == FIELD_BEGIN_MARK) {
int skipTo = tryImageWithinField(hwpfDocument, paragraph, c, /*
block); * check if we have a field with calculated image as a result.
* MathType equation, for example.
*/
int skipTo = tryImageWithinField(hwpfDocument, paragraph, c,
block);
if (skipTo != c) { if (skipTo != c) {
c = skipTo; c = skipTo;
@ -550,60 +582,62 @@ public class WordToFoExtractor {
} }
protected int tryImageWithinField(HWPFDocument hwpfDocument, protected int tryImageWithinField(HWPFDocument hwpfDocument,
Paragraph paragraph, int beginMark, Element currentBlock) { Paragraph paragraph, int beginMark, Element currentBlock) {
int separatorMark = -1; int separatorMark = -1;
int pictureMark = -1; int pictureMark = -1;
int endMark = -1; int pictureChar = Integer.MIN_VALUE;
for (int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++) { int endMark = -1;
CharacterRun characterRun = paragraph.getCharacterRun(c); for (int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++) {
CharacterRun characterRun = paragraph.getCharacterRun(c);
String text = characterRun.text(); String text = characterRun.text();
if (text.getBytes().length == 0) if (text.getBytes().length == 0)
continue; continue;
if (text.getBytes()[0] == FIELD_SEPARATOR_MARK) { if (text.getBytes()[0] == FIELD_SEPARATOR_MARK) {
if (separatorMark != -1) { if (separatorMark != -1) {
// double; // double;
return beginMark; return beginMark;
} }
separatorMark = c; separatorMark = c;
continue; continue;
} }
if (text.getBytes()[0] == FIELD_END_MARK) { if (text.getBytes()[0] == FIELD_END_MARK) {
if (endMark != -1) { if (endMark != -1) {
// double; // double;
return beginMark; return beginMark;
} }
endMark = c; endMark = c;
break; break;
} }
if (hwpfDocument.getPicturesTable().hasPicture(characterRun)) { if (hwpfDocument.getPicturesTable().hasPicture(characterRun)) {
if (pictureMark != -1) { if (c != -1) {
// double; // double;
return beginMark; return beginMark;
} }
pictureMark = c; pictureMark = c;
continue; pictureChar = characterRun.text().charAt(0);
} continue;
} }
}
if (separatorMark == -1 || pictureMark == -1 || endMark == -1) if (separatorMark == -1 || pictureMark == -1 || endMark == -1)
return beginMark; return beginMark;
final CharacterRun pictureRun = paragraph.getCharacterRun(pictureMark); final CharacterRun pictureRun = paragraph.getCharacterRun(pictureMark);
final Picture picture = hwpfDocument.getPicturesTable().extractPicture( final Picture picture = hwpfDocument.getPicturesTable().extractPicture(
pictureRun, true); pictureRun, true);
processImage(currentBlock, picture);
return endMark; processImage(currentBlock, pictureChar == 0x01, picture);
return endMark;
} }
/** /**
* Java main() interface to interact with WordToFoExtractor * Java main() interface to interact with WordToFoExtractor
* *

View File

@ -9,6 +9,7 @@ import org.apache.poi.hwpf.usermodel.BorderCode;
import org.apache.poi.hwpf.usermodel.CharacterProperties; import org.apache.poi.hwpf.usermodel.CharacterProperties;
import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Section; import org.apache.poi.hwpf.usermodel.Section;
import org.apache.poi.hwpf.usermodel.SectionProperties; import org.apache.poi.hwpf.usermodel.SectionProperties;
@ -179,6 +180,31 @@ public class WordToFoUtils {
} }
} }
public static String getJustification(int js) {
switch (js) {
case 0:
return "start";
case 1:
return "center";
case 2:
return "end";
case 3:
case 4:
return "justify";
case 5:
return "center";
case 6:
return "left";
case 7:
return "start";
case 8:
return "end";
case 9:
return "justify";
}
return "";
}
public static String getListItemNumberLabel(int number, int format) { public static String getListItemNumberLabel(int number, int format) {
if (format != 0) if (format != 0)
@ -244,48 +270,51 @@ public class WordToFoUtils {
} }
public static void setCharactersProperties(final CharacterRun characterRun, public static void setCharactersProperties(final CharacterRun characterRun,
final Element inline) { final Element inline) {
final CharacterProperties clonedProperties = characterRun final CharacterProperties clonedProperties = characterRun
.cloneProperties(); .cloneProperties();
StringBuilder textDecorations = new StringBuilder(); StringBuilder textDecorations = new StringBuilder();
setBorder(inline, clonedProperties.getBrc(), EMPTY); setBorder(inline, clonedProperties.getBrc(), EMPTY);
if (characterRun.isCapitalized()) { if (characterRun.isCapitalized()) {
inline.setAttribute("text-transform", "uppercase"); inline.setAttribute("text-transform", "uppercase");
} }
if (characterRun.isHighlighted()) { if (characterRun.isHighlighted()) {
inline.setAttribute("background-color", inline.setAttribute("background-color",
getColor(clonedProperties.getIcoHighlight())); getColor(clonedProperties.getIcoHighlight()));
} }
if (characterRun.isStrikeThrough()) { if (characterRun.isStrikeThrough()) {
if (textDecorations.length() > 0) if (textDecorations.length() > 0)
textDecorations.append(" "); textDecorations.append(" ");
textDecorations.append("line-through"); textDecorations.append("line-through");
} }
if (characterRun.isShadowed()) { if (characterRun.isShadowed()) {
inline.setAttribute("text-shadow", characterRun.getFontSize() / 24 inline.setAttribute("text-shadow", characterRun.getFontSize() / 24
+ "pt"); + "pt");
} }
if (characterRun.isSmallCaps()) { if (characterRun.isSmallCaps()) {
inline.setAttribute("font-variant", "small-caps"); inline.setAttribute("font-variant", "small-caps");
} }
if (characterRun.getSubSuperScriptIndex() == 1) { if (characterRun.getSubSuperScriptIndex() == 1) {
inline.setAttribute("baseline-shift", "super"); inline.setAttribute("baseline-shift", "super");
inline.setAttribute("font-size", "smaller"); inline.setAttribute("font-size", "smaller");
} }
if (characterRun.getSubSuperScriptIndex() == 2) { if (characterRun.getSubSuperScriptIndex() == 2) {
inline.setAttribute("baseline-shift", "sub"); inline.setAttribute("baseline-shift", "sub");
inline.setAttribute("font-size", "smaller"); inline.setAttribute("font-size", "smaller");
} }
if (characterRun.getUnderlineCode() > 0) { if (characterRun.getUnderlineCode() > 0) {
if (textDecorations.length() > 0) if (textDecorations.length() > 0)
textDecorations.append(" "); textDecorations.append(" ");
textDecorations.append("underline"); textDecorations.append("underline");
} }
if (textDecorations.length() > 0) { if (characterRun.isVanished()) {
inline.setAttribute("text-decoration", textDecorations.toString()); inline.setAttribute("visibility", "hidden");
} }
if (textDecorations.length() > 0) {
inline.setAttribute("text-decoration", textDecorations.toString());
}
} }
public static void setFontFamily(final Element element, public static void setFontFamily(final Element element,
@ -335,40 +364,10 @@ public class WordToFoUtils {
} }
public static void setJustification(Paragraph paragraph, public static void setJustification(Paragraph paragraph,
final Element element) { final Element element) {
final int justification = paragraph.getJustification(); String justification = getJustification(paragraph.getJustification());
switch (justification) { if (isNotEmpty(justification))
case 0: element.setAttribute("text-align", justification);
element.setAttribute("text-align", "start");
break;
case 1:
element.setAttribute("text-align", "center");
break;
case 2:
element.setAttribute("text-align", "end");
break;
case 3:
element.setAttribute("text-align", "justify");
break;
case 4:
element.setAttribute("text-align", "justify");
break;
case 5:
element.setAttribute("text-align", "center");
break;
case 6:
element.setAttribute("text-align", "left");
break;
case 7:
element.setAttribute("text-align", "start");
break;
case 8:
element.setAttribute("text-align", "end");
break;
case 9:
element.setAttribute("text-align", "justify");
break;
}
} }
public static void setParagraphProperties(Paragraph paragraph, Element block) { public static void setParagraphProperties(Paragraph paragraph, Element block) {
@ -399,6 +398,53 @@ public class WordToFoUtils {
block.setAttribute("white-space-collapse", "false"); block.setAttribute("white-space-collapse", "false");
} }
public static void setPictureProperties(Picture picture,
Element graphicElement) {
final int aspectRatioX = picture.getAspectRatioX();
final int aspectRatioY = picture.getAspectRatioY();
if (aspectRatioX > 0) {
graphicElement.setAttribute("content-width", ((picture.getDxaGoal()
* aspectRatioX / 100) / WordToFoUtils.TWIPS_PER_PT)
+ "pt");
} else
graphicElement.setAttribute("content-width",
(picture.getDxaGoal() / WordToFoUtils.TWIPS_PER_PT) + "pt");
if (aspectRatioY > 0)
graphicElement
.setAttribute("content-height", ((picture.getDyaGoal()
* aspectRatioY / 100) / WordToFoUtils.TWIPS_PER_PT)
+ "pt");
else
graphicElement.setAttribute("content-height",
(picture.getDyaGoal() / WordToFoUtils.TWIPS_PER_PT) + "pt");
if (aspectRatioX <= 0 || aspectRatioY <= 0) {
graphicElement.setAttribute("scaling", "uniform");
} else {
graphicElement.setAttribute("scaling", "non-uniform");
}
graphicElement.setAttribute("vertical-align", "text-bottom");
if (picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
|| picture.getDyaCropBottom() != 0
|| picture.getDxaCropLeft() != 0) {
int rectTop = picture.getDyaCropTop() / WordToFoUtils.TWIPS_PER_PT;
int rectRight = picture.getDxaCropRight()
/ WordToFoUtils.TWIPS_PER_PT;
int rectBottom = picture.getDyaCropBottom()
/ WordToFoUtils.TWIPS_PER_PT;
int rectLeft = picture.getDxaCropLeft()
/ WordToFoUtils.TWIPS_PER_PT;
graphicElement.setAttribute("clip", "rect(" + rectTop + "pt, "
+ rectRight + "pt, " + rectBottom + "pt, " + rectLeft
+ "pt)");
graphicElement.setAttribute("oveerflow", "hidden");
}
}
public static void setTableCellProperties(TableRow tableRow, public static void setTableCellProperties(TableRow tableRow,
TableCell tableCell, Element element, boolean toppest, TableCell tableCell, Element element, boolean toppest,
boolean bottomest, boolean leftest, boolean rightest) { boolean bottomest, boolean leftest, boolean rightest) {