Update the word code to the new style ooxml stuff
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@635253 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
be6166389f
commit
b084eb866d
@ -124,6 +124,25 @@ public abstract class POIXMLDocument {
|
||||
return getTargetPart(rel);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves all the PackageParts which are defined as
|
||||
* relationships of the base document with the
|
||||
* specified content type.
|
||||
*/
|
||||
protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException {
|
||||
PackageRelationshipCollection partsC =
|
||||
getCorePart().getRelationshipsByType(contentType);
|
||||
|
||||
PackagePart[] parts = new PackagePart[partsC.size()];
|
||||
int count = 0;
|
||||
for (PackageRelationship rel : partsC) {
|
||||
parts[count] = getTargetPart(rel);
|
||||
count++;
|
||||
}
|
||||
return parts;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Checks that the supplied InputStream (which MUST
|
||||
|
@ -14,11 +14,11 @@
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf;
|
||||
package org.apache.poi.xwpf;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.poi.hxf.HXFDocument;
|
||||
import org.apache.poi.POIXMLDocument;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.openxml4j.exceptions.OpenXML4JException;
|
||||
@ -41,7 +41,7 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
|
||||
*
|
||||
* WARNING - APIs expected to change rapidly
|
||||
*/
|
||||
public class HWPFXML extends HXFDocument {
|
||||
public class XWPFDocument extends POIXMLDocument {
|
||||
public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml";
|
||||
public static final String FOOTER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml";
|
||||
public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
|
||||
@ -50,11 +50,11 @@ public class HWPFXML extends HXFDocument {
|
||||
|
||||
private DocumentDocument wordDoc;
|
||||
|
||||
public HWPFXML(Package container) throws OpenXML4JException, IOException, XmlException {
|
||||
super(container, MAIN_CONTENT_TYPE);
|
||||
public XWPFDocument(Package container) throws OpenXML4JException, IOException, XmlException {
|
||||
super(container);
|
||||
|
||||
wordDoc =
|
||||
DocumentDocument.Factory.parse(basePart.getInputStream());
|
||||
DocumentDocument.Factory.parse(getCorePart().getInputStream());
|
||||
}
|
||||
|
||||
/**
|
@ -14,15 +14,14 @@
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf.extractor;
|
||||
package org.apache.poi.xwpf.extractor;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.poi.POIXMLDocument;
|
||||
import org.apache.poi.POIXMLTextExtractor;
|
||||
import org.apache.poi.hwpf.HWPFXML;
|
||||
import org.apache.poi.hwpf.usermodel.HWPFXMLDocument;
|
||||
import org.apache.poi.hxf.HXFDocument;
|
||||
import org.apache.poi.xwpf.XWPFDocument;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.openxml4j.opc.Package;
|
||||
@ -34,15 +33,13 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
||||
/**
|
||||
* Helper class to extract text from an OOXML Word file
|
||||
*/
|
||||
public class HXFWordExtractor extends POIXMLTextExtractor {
|
||||
private HWPFXMLDocument document;
|
||||
public class XWPFWordExtractor extends POIXMLTextExtractor {
|
||||
private XWPFDocument document;
|
||||
|
||||
public HXFWordExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
|
||||
this(new HWPFXMLDocument(
|
||||
new HWPFXML(container)
|
||||
));
|
||||
public XWPFWordExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
|
||||
this(new XWPFDocument(container));
|
||||
}
|
||||
public HXFWordExtractor(HWPFXMLDocument document) {
|
||||
public XWPFWordExtractor(XWPFDocument document) {
|
||||
super(document);
|
||||
this.document = document;
|
||||
}
|
||||
@ -54,14 +51,14 @@ public class HXFWordExtractor extends POIXMLTextExtractor {
|
||||
System.exit(1);
|
||||
}
|
||||
POIXMLTextExtractor extractor =
|
||||
new HXFWordExtractor(HXFDocument.openPackage(
|
||||
new File(args[0])
|
||||
new XWPFWordExtractor(POIXMLDocument.openPackage(
|
||||
args[0]
|
||||
));
|
||||
System.out.println(extractor.getText());
|
||||
}
|
||||
|
||||
public String getText() {
|
||||
CTBody body = document._getHWPFXML().getDocumentBody();
|
||||
CTBody body = document.getDocumentBody();
|
||||
StringBuffer text = new StringBuffer();
|
||||
|
||||
// Loop over paragraphs
|
@ -14,23 +14,21 @@
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf.usermodel;
|
||||
package org.apache.poi.xwpf.usermodel;
|
||||
|
||||
import org.apache.poi.POIXMLDocument;
|
||||
import org.apache.poi.hwpf.HWPFXML;
|
||||
import org.apache.poi.xwpf.XWPFDocument;
|
||||
|
||||
/**
|
||||
* High level representation of a ooxml text document.
|
||||
*/
|
||||
public class HWPFXMLDocument extends POIXMLDocument {
|
||||
private HWPFXML hwpfXML;
|
||||
public class XMLWordDocument {
|
||||
private XWPFDocument xwpfXML;
|
||||
|
||||
public HWPFXMLDocument(HWPFXML xml) {
|
||||
super(xml);
|
||||
this.hwpfXML = xml;
|
||||
public XMLWordDocument(XWPFDocument xml) {
|
||||
this.xwpfXML = xml;
|
||||
}
|
||||
|
||||
public HWPFXML _getHWPFXML() {
|
||||
return hwpfXML;
|
||||
public XWPFDocument _getXWPFXML() {
|
||||
return xwpfXML;
|
||||
}
|
||||
}
|
@ -14,45 +14,47 @@
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hslf.extractor;
|
||||
package org.apache.poi.xslf.extractor;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import org.apache.poi.hslf.HSLFXML;
|
||||
import org.apache.poi.hslf.usermodel.HSLFXMLSlideShow;
|
||||
import org.apache.poi.hxf.HXFDocument;
|
||||
import org.apache.poi.POIXMLDocument;
|
||||
import org.apache.poi.xslf.XSLFSlideShow;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* Tests for HXFPowerPointExtractor
|
||||
*/
|
||||
public class TestHXFPowerPointExtractor extends TestCase {
|
||||
public class TestXSLFPowerPointExtractor extends TestCase {
|
||||
/**
|
||||
* A simple file
|
||||
*/
|
||||
private HSLFXML xmlA;
|
||||
private XSLFSlideShow xmlA;
|
||||
private File fileA;
|
||||
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
File fileA = new File(
|
||||
fileA = new File(
|
||||
System.getProperty("HSLF.testdata.path") +
|
||||
File.separator + "sample.pptx"
|
||||
);
|
||||
assertTrue(fileA.exists());
|
||||
|
||||
xmlA = new HSLFXML(HXFDocument.openPackage(fileA));
|
||||
xmlA = new XSLFSlideShow(fileA.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get text out of the simple file
|
||||
*/
|
||||
public void testGetSimpleText() throws Exception {
|
||||
new HXFPowerPointExtractor(xmlA.getPackage());
|
||||
new HXFPowerPointExtractor(new XMLSlideShow(xmlA));
|
||||
new XSLFPowerPointExtractor(xmlA);
|
||||
new XSLFPowerPointExtractor(
|
||||
POIXMLDocument.openPackage(fileA.toString()));
|
||||
|
||||
HXFPowerPointExtractor extractor =
|
||||
new HXFPowerPointExtractor(xmlA.getPackage());
|
||||
XSLFPowerPointExtractor extractor =
|
||||
new XSLFPowerPointExtractor(xmlA);
|
||||
extractor.getText();
|
||||
|
||||
String text = extractor.getText();
|
@ -14,17 +14,17 @@
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf;
|
||||
package org.apache.poi.xwpf;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import org.apache.poi.hxf.HXFDocument;
|
||||
import org.apache.poi.POIXMLDocument;
|
||||
import org.openxml4j.opc.Package;
|
||||
import org.openxml4j.opc.PackagePart;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
public class TestHWPFXML extends TestCase {
|
||||
public class TestXWPFDocument extends TestCase {
|
||||
private File sampleFile;
|
||||
private File complexFile;
|
||||
|
||||
@ -39,14 +39,17 @@ public class TestHWPFXML extends TestCase {
|
||||
System.getProperty("HWPF.testdata.path") +
|
||||
File.separator + "IllustrativeCases.docx"
|
||||
);
|
||||
|
||||
assertTrue(sampleFile.exists());
|
||||
assertTrue(complexFile.exists());
|
||||
}
|
||||
|
||||
public void testContainsMainContentType() throws Exception {
|
||||
Package pack = HXFDocument.openPackage(sampleFile);
|
||||
Package pack = POIXMLDocument.openPackage(sampleFile.toString());
|
||||
|
||||
boolean found = false;
|
||||
for(PackagePart part : pack.getParts()) {
|
||||
if(part.getContentType().equals(HWPFXML.MAIN_CONTENT_TYPE)) {
|
||||
if(part.getContentType().equals(XWPFDocument.MAIN_CONTENT_TYPE)) {
|
||||
found = true;
|
||||
}
|
||||
System.out.println(part);
|
||||
@ -55,14 +58,21 @@ public class TestHWPFXML extends TestCase {
|
||||
}
|
||||
|
||||
public void testOpen() throws Exception {
|
||||
HXFDocument.openPackage(sampleFile);
|
||||
HXFDocument.openPackage(complexFile);
|
||||
POIXMLDocument.openPackage(sampleFile.toString());
|
||||
POIXMLDocument.openPackage(complexFile.toString());
|
||||
|
||||
HWPFXML xml;
|
||||
new XWPFDocument(
|
||||
POIXMLDocument.openPackage(sampleFile.toString())
|
||||
);
|
||||
new XWPFDocument(
|
||||
POIXMLDocument.openPackage(complexFile.toString())
|
||||
);
|
||||
|
||||
XWPFDocument xml;
|
||||
|
||||
// Simple file
|
||||
xml = new HWPFXML(
|
||||
HXFDocument.openPackage(sampleFile)
|
||||
xml = new XWPFDocument(
|
||||
POIXMLDocument.openPackage(sampleFile.toString())
|
||||
);
|
||||
// Check it has key parts
|
||||
assertNotNull(xml.getDocument());
|
||||
@ -70,8 +80,8 @@ public class TestHWPFXML extends TestCase {
|
||||
assertNotNull(xml.getStyle());
|
||||
|
||||
// Complex file
|
||||
xml = new HWPFXML(
|
||||
HXFDocument.openPackage(complexFile)
|
||||
xml = new XWPFDocument(
|
||||
POIXMLDocument.openPackage(complexFile.toString())
|
||||
);
|
||||
assertNotNull(xml.getDocument());
|
||||
assertNotNull(xml.getDocumentBody());
|
||||
@ -79,8 +89,8 @@ public class TestHWPFXML extends TestCase {
|
||||
}
|
||||
|
||||
public void testMetadataBasics() throws Exception {
|
||||
HWPFXML xml = new HWPFXML(
|
||||
HXFDocument.openPackage(sampleFile)
|
||||
XWPFDocument xml = new XWPFDocument(
|
||||
POIXMLDocument.openPackage(sampleFile.toString())
|
||||
);
|
||||
assertNotNull(xml.getCoreProperties());
|
||||
assertNotNull(xml.getExtendedProperties());
|
||||
@ -94,8 +104,8 @@ public class TestHWPFXML extends TestCase {
|
||||
}
|
||||
|
||||
public void testMetadataComplex() throws Exception {
|
||||
HWPFXML xml = new HWPFXML(
|
||||
HXFDocument.openPackage(complexFile)
|
||||
XWPFDocument xml = new XWPFDocument(
|
||||
POIXMLDocument.openPackage(complexFile.toString())
|
||||
);
|
||||
assertNotNull(xml.getCoreProperties());
|
||||
assertNotNull(xml.getExtendedProperties());
|
@ -14,54 +14,57 @@
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf.extractor;
|
||||
package org.apache.poi.xwpf.extractor;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import org.apache.poi.hwpf.HWPFXML;
|
||||
import org.apache.poi.hwpf.usermodel.HWPFXMLDocument;
|
||||
import org.apache.poi.hxf.HXFDocument;
|
||||
import org.apache.poi.POIXMLDocument;
|
||||
import org.apache.poi.xwpf.XWPFDocument;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* Tests for HXFWordExtractor
|
||||
*/
|
||||
public class TestHXFWordExtractor extends TestCase {
|
||||
public class TestXWPFWordExtractor extends TestCase {
|
||||
/**
|
||||
* A very simple file
|
||||
*/
|
||||
private HWPFXML xmlA;
|
||||
private XWPFDocument xmlA;
|
||||
private File fileA;
|
||||
/**
|
||||
* A fairly complex file
|
||||
*/
|
||||
private HWPFXML xmlB;
|
||||
private XWPFDocument xmlB;
|
||||
private File fileB;
|
||||
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
File fileA = new File(
|
||||
fileA = new File(
|
||||
System.getProperty("HWPF.testdata.path") +
|
||||
File.separator + "sample.docx"
|
||||
);
|
||||
File fileB = new File(
|
||||
fileB = new File(
|
||||
System.getProperty("HWPF.testdata.path") +
|
||||
File.separator + "IllustrativeCases.docx"
|
||||
);
|
||||
assertTrue(fileA.exists());
|
||||
assertTrue(fileB.exists());
|
||||
|
||||
xmlA = new HWPFXML(HXFDocument.openPackage(fileA));
|
||||
xmlB = new HWPFXML(HXFDocument.openPackage(fileB));
|
||||
xmlA = new XWPFDocument(POIXMLDocument.openPackage(fileA.toString()));
|
||||
xmlB = new XWPFDocument(POIXMLDocument.openPackage(fileB.toString()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get text out of the simple file
|
||||
*/
|
||||
public void testGetSimpleText() throws Exception {
|
||||
new HXFWordExtractor(xmlA.getPackage());
|
||||
new HXFWordExtractor(new HWPFXMLDocument(xmlA));
|
||||
new XWPFWordExtractor(xmlA);
|
||||
new XWPFWordExtractor(POIXMLDocument.openPackage(fileA.toString()));
|
||||
|
||||
HXFWordExtractor extractor =
|
||||
new HXFWordExtractor(xmlA.getPackage());
|
||||
XWPFWordExtractor extractor =
|
||||
new XWPFWordExtractor(xmlA);
|
||||
extractor.getText();
|
||||
|
||||
String text = extractor.getText();
|
||||
@ -88,8 +91,8 @@ public class TestHXFWordExtractor extends TestCase {
|
||||
* Tests getting the text out of a complex file
|
||||
*/
|
||||
public void testGetComplexText() throws Exception {
|
||||
HXFWordExtractor extractor =
|
||||
new HXFWordExtractor(xmlB.getPackage());
|
||||
XWPFWordExtractor extractor =
|
||||
new XWPFWordExtractor(xmlB);
|
||||
extractor.getText();
|
||||
|
||||
String text = extractor.getText();
|
Loading…
Reference in New Issue
Block a user