diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 1f0db4a16..8d05120a9 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + Correct XWPFRun detection of bold/italic in a paragraph with multiple runs of different styles Link XWPFPicture to XWPFRun, so that embedded pictures can be access from where they live in the text stream Improve handling of Hyperlinks inside XWPFParagraph objects through XWPFHyperlinkRun Make XWPFParagraph make more use of XWPFRun, and less on internal StringBuffers diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java index bc773132e..ab3b933e5 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java @@ -1161,7 +1161,7 @@ public class XWPFDocument extends POIXMLDocument implements Document, IBody { * @see org.apache.poi.xwpf.usermodel.IBody#getParagraphArray(int) */ public XWPFParagraph getParagraphArray(int pos) { - if(pos > 0 && pos < paragraphs.size()){ + if(pos >= 0 && pos < paragraphs.size()){ return paragraphs.get(pos); } return null; diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java index d7fbd2d6a..a72a0c760 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java @@ -118,6 +118,19 @@ public class XWPFRun { public XWPFParagraph getParagraph() { return paragraph; } + + /** + * For isBold, isItalic etc + */ + private boolean isCTOnOff(CTOnOff onoff) { + if(! onoff.isSetVal()) + return true; + if(onoff.getVal() == STOnOff.ON) + return true; + if(onoff.getVal() == STOnOff.TRUE) + return true; + return false; + } /** * Whether the bold property shall be applied to all non-complex script @@ -127,7 +140,9 @@ public class XWPFRun { */ public boolean isBold() { CTRPr pr = run.getRPr(); - return pr != null && pr.isSetB(); + if(pr == null || !pr.isSetB()) + return false; + return isCTOnOff(pr.getB()); } /** @@ -208,7 +223,9 @@ public class XWPFRun { */ public boolean isItalic() { CTRPr pr = run.getRPr(); - return pr != null && pr.isSetI(); + if(pr == null || !pr.isSetI()) + return false; + return isCTOnOff(pr.getI()); } /** @@ -284,7 +301,9 @@ public class XWPFRun { */ public boolean isStrike() { CTRPr pr = run.getRPr(); - return pr != null && pr.isSetStrike(); + if(pr == null || !pr.isSetStrike()) + return false; + return isCTOnOff(pr.getStrike()); } /** diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java index 77315f795..116b69906 100644 --- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java @@ -98,16 +98,20 @@ public class TestXWPFWordExtractor extends TestCase { // Now check contents extractor.setFetchHyperlinks(false); assertEquals( - "This is a test document\nThis bit is in bold and italic\n" + - "Back to normal\nWe have a hyperlink here, and another.\n", + "This is a test document.\nThis bit is in bold and italic\n" + + "Back to normal\n" + + "This contains BOLD, ITALIC and BOTH, as well as RED and YELLOW text.\n" + + "We have a hyperlink here, and another.\n", extractor.getText() ); // One hyperlink is a real one, one is just to the top of page extractor.setFetchHyperlinks(true); assertEquals( - "This is a test document\nThis bit is in bold and italic\n" + - "Back to normal\nWe have a hyperlink here, and another.\n", + "This is a test document.\nThis bit is in bold and italic\n" + + "Back to normal\n" + + "This contains BOLD, ITALIC and BOTH, as well as RED and YELLOW text.\n" + + "We have a hyperlink here, and another.\n", extractor.getText() ); } diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFRun.java b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFRun.java index 1e55cc5e9..695f936a5 100644 --- a/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFRun.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFRun.java @@ -20,6 +20,7 @@ import java.math.BigInteger; import junit.framework.TestCase; +import org.apache.poi.xwpf.XWPFTestDataSamples; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBr; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr; @@ -190,7 +191,139 @@ public class TestXWPFRun extends TestCase { run.addBreak(BreakType.TEXT_WRAPPING); assertEquals(2, run.getCTR().sizeOfBrArray()); } - + /** + * Test that on an existing document, we do the + * right thing with it + */ + public void testExisting() { + XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("TestDocument.docx"); + XWPFParagraph p; + XWPFRun run; + + + // First paragraph is simple + p = doc.getParagraphArray(0); + assertEquals("This is a test document.", p.getText()); + assertEquals(2, p.getRuns().size()); + + run = p.getRuns().get(0); + assertEquals("This is a test document", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + + run = p.getRuns().get(1); + assertEquals(".", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + + + // Next paragraph is all in one style, but a different one + p = doc.getParagraphArray(1); + assertEquals("This bit is in bold and italic", p.getText()); + assertEquals(1, p.getRuns().size()); + + run = p.getRuns().get(0); + assertEquals("This bit is in bold and italic", run.toString()); + assertEquals(true, run.isBold()); + assertEquals(true, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(true, run.getCTR().getRPr().isSetB()); + assertEquals(false, run.getCTR().getRPr().getB().isSetVal()); + + + // Back to normal + p = doc.getParagraphArray(2); + assertEquals("Back to normal", p.getText()); + assertEquals(1, p.getRuns().size()); + + run = p.getRuns().get(0); + assertEquals("Back to normal", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + + + // Different styles in one paragraph + p = doc.getParagraphArray(3); + assertEquals("This contains BOLD, ITALIC and BOTH, as well as RED and YELLOW text.", p.getText()); + assertEquals(11, p.getRuns().size()); + + run = p.getRuns().get(0); + assertEquals("This contains ", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + + run = p.getRuns().get(1); + assertEquals("BOLD", run.toString()); + assertEquals(true, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + + run = p.getRuns().get(2); + assertEquals(", ", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + + run = p.getRuns().get(3); + assertEquals("ITALIC", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(true, run.isItalic()); + assertEquals(false, run.isStrike()); + + run = p.getRuns().get(4); + assertEquals(" and ", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + + run = p.getRuns().get(5); + assertEquals("BOTH", run.toString()); + assertEquals(true, run.isBold()); + assertEquals(true, run.isItalic()); + assertEquals(false, run.isStrike()); + + run = p.getRuns().get(6); + assertEquals(", as well as ", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + + run = p.getRuns().get(7); + assertEquals("RED", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + + run = p.getRuns().get(8); + assertEquals(" and ", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + + run = p.getRuns().get(9); + assertEquals("YELLOW", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + + run = p.getRuns().get(10); + assertEquals(" text.", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + } } - diff --git a/test-data/document/TestDocument.docx b/test-data/document/TestDocument.docx index 058dec5e4..d87a542ec 100644 Binary files a/test-data/document/TestDocument.docx and b/test-data/document/TestDocument.docx differ