diff --git a/build.xml b/build.xml index b97df7693..cdcd7924e 100644 --- a/build.xml +++ b/build.xml @@ -74,7 +74,6 @@ under the License. - @@ -124,33 +123,6 @@ under the License. - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -195,15 +167,6 @@ under the License. - - - - - - - - - @@ -268,15 +231,12 @@ under the License. - - - @@ -311,11 +271,6 @@ under the License. - - - - - @@ -330,67 +285,8 @@ under the License. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -473,24 +369,6 @@ under the License. - - - - - - - - - - - - - - - @@ -725,43 +603,6 @@ under the License. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -1148,21 +989,6 @@ FORREST_HOME environment variable! - - - - - - - - - - - - - - - @@ -1171,6 +997,7 @@ FORREST_HOME environment variable! + @@ -1193,6 +1020,7 @@ FORREST_HOME environment variable! + diff --git a/src/scratchpad/ooxml-src/org/apache/poi/POIXMLDocument.java b/src/scratchpad/ooxml-src/org/apache/poi/POIXMLDocument.java deleted file mode 100644 index 86b0d557b..000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/POIXMLDocument.java +++ /dev/null @@ -1,45 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi; - -import org.apache.poi.hxf.HXFDocument; - -/** - * Parent class of all UserModel POI XML (ooxml) - * implementations. - * Provides a similar function to {@link POIDocument}, - * for the XML based classes. - */ -public abstract class POIXMLDocument { - private HXFDocument document; - - /** - * Creates a new POI XML Document, wrapping up - * the underlying raw HXFDocument - */ - protected POIXMLDocument(HXFDocument document) { - this.document = document; - } - - /** - * Returns the underlying HXFDocument, typically - * used for unit testing - */ - public HXFDocument _getHXFDocument() { - return document; - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/POIXMLTextExtractor.java b/src/scratchpad/ooxml-src/org/apache/poi/POIXMLTextExtractor.java deleted file mode 100644 index c28eba49d..000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/POIXMLTextExtractor.java +++ /dev/null @@ -1,31 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi; - -public abstract class POIXMLTextExtractor extends POITextExtractor { - /** The POIXMLDocument that's open */ - protected POIXMLDocument document; - - /** - * Creates a new text extractor for the given document - */ - public POIXMLTextExtractor(POIXMLDocument document) { - super(null); - - this.document = document; - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hslf/HSLFXML.java b/src/scratchpad/ooxml-src/org/apache/poi/hslf/HSLFXML.java deleted file mode 100644 index 568cb80aa..000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hslf/HSLFXML.java +++ /dev/null @@ -1,148 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hslf; - -import java.io.IOException; - -import org.apache.poi.hxf.HXFDocument; -import org.apache.xmlbeans.XmlException; -import org.openxml4j.exceptions.InvalidFormatException; -import org.openxml4j.exceptions.OpenXML4JException; -import org.openxml4j.opc.Package; -import org.openxml4j.opc.PackagePart; -import org.openxml4j.opc.PackageRelationshipCollection; -import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide; -import org.openxmlformats.schemas.presentationml.x2006.main.CTPresentation; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdList; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMaster; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdList; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdListEntry; -import org.openxmlformats.schemas.presentationml.x2006.main.NotesDocument; -import org.openxmlformats.schemas.presentationml.x2006.main.PresentationDocument; -import org.openxmlformats.schemas.presentationml.x2006.main.SldDocument; -import org.openxmlformats.schemas.presentationml.x2006.main.SldMasterDocument; - -/** - * Experimental class to do low level processing - * of pptx files. - * - * If you are using these low level classes, then you - * will almost certainly need to refer to the OOXML - * specifications from - * http://www.ecma-international.org/publications/standards/Ecma-376.htm - * - * WARNING - APIs expected to change rapidly - */ -public class HSLFXML extends HXFDocument { - public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml"; - public static final String NOTES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml"; - public static final String SLIDE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml"; - public static final String SLIDE_LAYOUT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout"; - public static final String NOTES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide"; - - private PresentationDocument presentationDoc; - - public HSLFXML(Package container) throws OpenXML4JException, IOException, XmlException { - super(container, MAIN_CONTENT_TYPE); - - presentationDoc = - PresentationDocument.Factory.parse(basePart.getInputStream()); - } - - /** - * Returns the low level presentation base object - */ - public CTPresentation getPresentation() { - return presentationDoc.getPresentation(); - } - - /** - * Returns the references from the presentation to its - * slides. - * You'll need these to figure out the slide ordering, - * and to get at the actual slides themselves - */ - public CTSlideIdList getSlideReferences() { - return getPresentation().getSldIdLst(); - } - /** - * Returns the references from the presentation to its - * slide masters. - * You'll need these to get at the actual slide - * masters themselves - */ - public CTSlideMasterIdList getSlideMasterReferences() { - return getPresentation().getSldMasterIdLst(); - } - - /** - * Returns the low level slide master object from - * the supplied slide master reference - */ - public CTSlideMaster getSlideMaster(CTSlideMasterIdListEntry master) throws IOException, XmlException { - PackagePart masterPart = - getRelatedPackagePart(master.getId2()); - SldMasterDocument masterDoc = - SldMasterDocument.Factory.parse(masterPart.getInputStream()); - return masterDoc.getSldMaster(); - } - - /** - * Returns the low level slide object from - * the supplied slide reference - */ - public CTSlide getSlide(CTSlideIdListEntry slide) throws IOException, XmlException { - PackagePart slidePart = - getRelatedPackagePart(slide.getId2()); - SldDocument slideDoc = - SldDocument.Factory.parse(slidePart.getInputStream()); - return slideDoc.getSld(); - } - - /** - * Returns the low level notes object for the given - * slide, as found from the supplied slide reference - */ - public CTNotesSlide getNotes(CTSlideIdListEntry slide) throws IOException, XmlException { - PackagePart slidePart = - getRelatedPackagePart(slide.getId2()); - - PackageRelationshipCollection notes; - try { - notes = slidePart.getRelationshipsByType(NOTES_RELATION_TYPE); - } catch(InvalidFormatException e) { - throw new IllegalStateException(e); - } - - if(notes.size() == 0) { - // No notes for this slide - return null; - } - if(notes.size() > 1) { - throw new IllegalStateException("Expecting 0 or 1 notes for a slide, but found " + notes.size()); - } - - PackagePart notesPart = - getPackagePart(notes.getRelationship(0)); - NotesDocument notesDoc = - NotesDocument.Factory.parse(notesPart.getInputStream()); - - return notesDoc.getNotes(); - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hslf/extractor/HXFPowerPointExtractor.java b/src/scratchpad/ooxml-src/org/apache/poi/hslf/extractor/HXFPowerPointExtractor.java deleted file mode 100644 index 1d4b1a2bd..000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hslf/extractor/HXFPowerPointExtractor.java +++ /dev/null @@ -1,139 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hslf.extractor; - -import java.io.File; -import java.io.IOException; - -import org.apache.poi.POIXMLTextExtractor; -import org.apache.poi.hslf.HSLFXML; -import org.apache.poi.hslf.usermodel.HSLFXMLSlideShow; -import org.apache.poi.hxf.HXFDocument; -import org.apache.xmlbeans.XmlException; -import org.openxml4j.exceptions.OpenXML4JException; -import org.openxml4j.opc.Package; -import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun; -import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody; -import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph; -import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape; -import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide; -import org.openxmlformats.schemas.presentationml.x2006.main.CTShape; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry; - -public class HXFPowerPointExtractor extends POIXMLTextExtractor { - private HSLFXMLSlideShow slideshow; - private boolean slidesByDefault = true; - private boolean notesByDefault = false; - - public HXFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException { - this(new HSLFXMLSlideShow( - new HSLFXML(container) - )); - } - public HXFPowerPointExtractor(HSLFXMLSlideShow slideshow) { - super(slideshow); - this.slideshow = slideshow; - } - - public static void main(String[] args) throws Exception { - if(args.length < 1) { - System.err.println("Use:"); - System.err.println(" HXFPowerPointExtractor "); - System.exit(1); - } - POIXMLTextExtractor extractor = - new HXFPowerPointExtractor(HXFDocument.openPackage( - new File(args[0]) - )); - System.out.println(extractor.getText()); - } - - /** - * Should a call to getText() return slide text? - * Default is yes - */ - public void setSlidesByDefault(boolean slidesByDefault) { - this.slidesByDefault = slidesByDefault; - } - /** - * Should a call to getText() return notes text? - * Default is no - */ - public void setNotesByDefault(boolean notesByDefault) { - this.notesByDefault = notesByDefault; - } - - /** - * Gets the slide text, but not the notes text - */ - public String getText() { - return getText(slidesByDefault, notesByDefault); - } - - /** - * Gets the requested text from the file - * @param slideText Should we retrieve text from slides? - * @param notesText Should we retrieve text from notes? - */ - public String getText(boolean slideText, boolean notesText) { - StringBuffer text = new StringBuffer(); - - CTSlideIdListEntry[] slideRefs = - slideshow._getHSLFXML().getSlideReferences().getSldIdArray(); - for (int i = 0; i < slideRefs.length; i++) { - try { - CTSlide slide = - slideshow._getHSLFXML().getSlide(slideRefs[i]); - CTNotesSlide notes = - slideshow._getHSLFXML().getNotes(slideRefs[i]); - - if(slideText) { - extractText(slide.getCSld().getSpTree(), text); - } - if(notesText && notes != null) { - extractText(notes.getCSld().getSpTree(), text); - } - } catch(Exception e) { - throw new RuntimeException(e); - } - } - - return text.toString(); - } - - private void extractText(CTGroupShape gs, StringBuffer text) { - CTShape[] shapes = gs.getSpArray(); - for (int i = 0; i < shapes.length; i++) { - CTTextBody textBody = - shapes[i].getTxBody(); - if(textBody != null) { - CTTextParagraph[] paras = - textBody.getPArray(); - for (int j = 0; j < paras.length; j++) { - CTRegularTextRun[] textRuns = - paras[j].getRArray(); - for (int k = 0; k < textRuns.length; k++) { - text.append( textRuns[k].getT() ); - } - // End each paragraph with a new line - text.append("\n"); - } - } - } - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hslf/usermodel/HSLFXMLSlideShow.java b/src/scratchpad/ooxml-src/org/apache/poi/hslf/usermodel/HSLFXMLSlideShow.java deleted file mode 100644 index b8a5fcde3..000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hslf/usermodel/HSLFXMLSlideShow.java +++ /dev/null @@ -1,39 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hslf.usermodel; - -import org.apache.poi.POIXMLDocument; -import org.apache.poi.hslf.HSLFXML; - -/** - * High level representation of a ooxml slideshow. - * This is the first object most users will construct whether - * they are reading or writing a slideshow. It is also the - * top level object for creating new slides/etc. - */ -public class HSLFXMLSlideShow extends POIXMLDocument { - private org.apache.poi.hslf.HSLFXML hslfXML; - - public HSLFXMLSlideShow(HSLFXML xml) { - super(xml); - this.hslfXML = xml; - } - - public HSLFXML _getHSLFXML() { - return hslfXML; - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java deleted file mode 100644 index 3766a046a..000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java +++ /dev/null @@ -1,104 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hssf; - -import java.io.IOException; - -import org.apache.poi.hssf.model.SharedStringsTable; -import org.apache.poi.hxf.HXFDocument; -import org.apache.xmlbeans.XmlException; -import org.openxml4j.exceptions.OpenXML4JException; -import org.openxml4j.opc.Package; -import org.openxml4j.opc.PackagePart; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheets; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorksheetDocument; - -/** - * Experimental class to do low level processing - * of xlsx files. - * - * If you are using these low level classes, then you - * will almost certainly need to refer to the OOXML - * specifications from - * http://www.ecma-international.org/publications/standards/Ecma-376.htm - * - * WARNING - APIs expected to change rapidly - */ -public class HSSFXML extends HXFDocument { - public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"; - public static final String SHEET_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"; - public static final String SHARED_STRINGS_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml"; - public static final String SHARED_STRINGS_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings"; - - private WorkbookDocument workbookDoc; - private SharedStringsTable sharedStrings; - - public HSSFXML(Package container) throws OpenXML4JException, IOException, XmlException { - super(container, MAIN_CONTENT_TYPE); - - workbookDoc = - WorkbookDocument.Factory.parse(basePart.getInputStream()); - - PackagePart ssPart = getSinglePartByRelationType(SHARED_STRINGS_RELATION_TYPE, basePart); - if (ssPart != null) { - sharedStrings = new SharedStringsTable(ssPart); - } else { - - } - } - - /** - * Returns the low level workbook base object - */ - public CTWorkbook getWorkbook() { - return workbookDoc.getWorkbook(); - } - /** - * Returns the references from the workbook to its - * sheets. - * You'll need these to figure out the sheet ordering, - * and to get at the actual sheets themselves - */ - public CTSheets getSheetReferences() { - return getWorkbook().getSheets(); - } - /** - * Returns the low level (work)sheet object from - * the supplied sheet reference - */ - public CTWorksheet getSheet(CTSheet sheet) throws IOException, XmlException { - PackagePart sheetPart = - getRelatedPackagePart(sheet.getId()); - WorksheetDocument sheetDoc = - WorksheetDocument.Factory.parse(sheetPart.getInputStream()); - return sheetDoc.getWorksheet(); - } - - /** - * Returns the shared string at the given index - */ - public String getSharedString(int index) { - return this.sharedStrings.get(index); - } - protected SharedStringsTable _getSharedStringsTable() { - return sharedStrings; - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java deleted file mode 100644 index 34ae06800..000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java +++ /dev/null @@ -1,133 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hssf.extractor; - -import java.io.File; -import java.io.IOException; - -import org.apache.poi.POIXMLTextExtractor; -import org.apache.poi.hssf.HSSFXML; -import org.apache.poi.hssf.usermodel.HSSFXMLCell; -import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook; -import org.apache.poi.hxf.HXFDocument; -import org.apache.xmlbeans.XmlException; -import org.openxml4j.exceptions.OpenXML4JException; -import org.openxml4j.opc.Package; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet; - -/** - * Helper class to extract text from an OOXML Excel file - */ -public class HXFExcelExtractor extends POIXMLTextExtractor { - private HSSFXMLWorkbook workbook; - private boolean includeSheetNames = true; - private boolean formulasNotResults = false; - - public HXFExcelExtractor(Package container) throws XmlException, OpenXML4JException, IOException { - this(new HSSFXMLWorkbook( - new HSSFXML(container) - )); - } - public HXFExcelExtractor(HSSFXMLWorkbook workbook) { - super(workbook); - this.workbook = workbook; - } - - public static void main(String[] args) throws Exception { - if(args.length < 1) { - System.err.println("Use:"); - System.err.println(" HXFExcelExtractor "); - System.exit(1); - } - POIXMLTextExtractor extractor = - new HXFExcelExtractor(HXFDocument.openPackage( - new File(args[0]) - )); - System.out.println(extractor.getText()); - } - - /** - * Should sheet names be included? Default is true - */ - public void setIncludeSheetNames(boolean includeSheetNames) { - this.includeSheetNames = includeSheetNames; - } - /** - * Should we return the formula itself, and not - * the result it produces? Default is false - */ - public void setFormulasNotResults(boolean formulasNotResults) { - this.formulasNotResults = formulasNotResults; - } - - /** - * Retreives the text contents of the file - */ - public String getText() { - StringBuffer text = new StringBuffer(); - - CTSheet[] sheetRefs = - workbook._getHSSFXML().getSheetReferences().getSheetArray(); - for(int i=0; i 0) { - text.append("\n"); - } - if(includeSheetNames) { - text.append(sheetRefs[i].getName() + "\n"); - } - - for(int j=0; j 0) { - text.append("\t"); - } - - boolean done = false; - - // Is it a formula one? - if(cell.getF() != null) { - if(formulasNotResults) { - text.append(cell.getF().getStringValue()); - done = true; - } - } - if(!done) { - HSSFXMLCell uCell = new HSSFXMLCell(cell, workbook); - text.append(uCell.getStringValue()); - } - } - text.append("\n"); - } - } catch(Exception e) { - throw new RuntimeException(e); - } - } - - return text.toString(); - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/model/SharedStringsTable.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/model/SharedStringsTable.java deleted file mode 100644 index b3e219256..000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hssf/model/SharedStringsTable.java +++ /dev/null @@ -1,78 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.hssf.model; - -import java.io.IOException; -import java.io.OutputStream; -import java.util.LinkedList; - -import org.apache.xmlbeans.XmlException; -import org.openxml4j.opc.PackagePart; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSst; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.SstDocument; - - -public class SharedStringsTable extends LinkedList { - public static final String MAIN_SML_NS_URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"; - - private SstDocument doc; - private PackagePart part; - - public SharedStringsTable(PackagePart part) throws IOException, XmlException { - this.part = part; - doc = SstDocument.Factory.parse( - part.getInputStream() - ); - read(); - } - - private void read() { - CTRst[] sts = doc.getSst().getSiArray(); - for (int i = 0; i < sts.length; i++) { - add(sts[i].getT()); - } - } - - /** - * Writes the current shared strings table into - * the associated OOXML PackagePart - */ - public void write() throws IOException { - CTSst sst = doc.getSst(); - - // Remove the old list - for(int i=sst.sizeOfSiArray() - 1; i>=0; i--) { - sst.removeSi(i); - } - - // Add the new one - for(String s : this) { - sst.addNewSi().setT(s); - } - - // Update the counts - sst.setCount(this.size()); - sst.setUniqueCount(this.size()); - - // Write out - OutputStream out = part.getOutputStream(); - doc.save(out); - out.close(); - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java deleted file mode 100644 index b24556cd8..000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java +++ /dev/null @@ -1,58 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hssf.usermodel; - -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.STCellType; - -/** - * User facing wrapper around an underlying cell object - */ -public class HSSFXMLCell { - private CTCell cell; - - /** The workbook to which this cell belongs */ - private final HSSFXMLWorkbook workbook; - - public HSSFXMLCell(CTCell rawCell, HSSFXMLWorkbook workbook) { - this.cell = rawCell; - this.workbook = workbook; - } - - /** - * Formats the cell's contents, based on its type, - * and returns it as a string. - */ - public String getStringValue() { - - switch (cell.getT().intValue()) { - case STCellType.INT_S: - return this.workbook.getSharedString(Integer.valueOf(cell.getV())); - case STCellType.INT_INLINE_STR: - return cell.getV(); - case STCellType.INT_N: - return cell.getV(); - // TODO: support other types - default: - return "UNSUPPORTED CELL TYPE: '" + cell.getT() + "'"; - } - } - - public String toString() { - return cell.getR() + " - " + getStringValue(); - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java deleted file mode 100644 index 023b80f4d..000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java +++ /dev/null @@ -1,43 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hssf.usermodel; - -import org.apache.poi.POIXMLDocument; -import org.apache.poi.hssf.HSSFXML; - -/** - * High level representation of a ooxml workbook. - * This is the first object most users will construct whether - * they are reading or writing a workbook. It is also the - * top level object for creating new sheets/etc. - */ -public class HSSFXMLWorkbook extends POIXMLDocument { - private HSSFXML hssfXML; - - public HSSFXMLWorkbook(HSSFXML xml) { - super(xml); - this.hssfXML = xml; - } - - public HSSFXML _getHSSFXML() { - return hssfXML; - } - - public String getSharedString(int index) { - return hssfXML.getSharedString(index); - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hwpf/HWPFXML.java b/src/scratchpad/ooxml-src/org/apache/poi/hwpf/HWPFXML.java deleted file mode 100644 index 66bba7ee1..000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hwpf/HWPFXML.java +++ /dev/null @@ -1,92 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hwpf; - -import java.io.IOException; - -import org.apache.poi.hxf.HXFDocument; -import org.apache.xmlbeans.XmlException; -import org.openxml4j.exceptions.InvalidFormatException; -import org.openxml4j.exceptions.OpenXML4JException; -import org.openxml4j.opc.Package; -import org.openxml4j.opc.PackagePart; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument; - -/** - * Experimental class to do low level processing - * of docx files. - * - * If you are using these low level classes, then you - * will almost certainly need to refer to the OOXML - * specifications from - * http://www.ecma-international.org/publications/standards/Ecma-376.htm - * - * WARNING - APIs expected to change rapidly - */ -public class HWPFXML extends HXFDocument { - public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"; - public static final String FOOTER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"; - public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"; - public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"; - public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"; - - private DocumentDocument wordDoc; - - public HWPFXML(Package container) throws OpenXML4JException, IOException, XmlException { - super(container, MAIN_CONTENT_TYPE); - - wordDoc = - DocumentDocument.Factory.parse(basePart.getInputStream()); - } - - /** - * Returns the low level document base object - */ - public CTDocument1 getDocument() { - return wordDoc.getDocument(); - } - - /** - * Returns the low level body of the document - */ - public CTBody getDocumentBody() { - return getDocument().getBody(); - } - - /** - * Returns the styles object used - */ - public CTStyles getStyle() throws XmlException, IOException { - PackagePart[] parts; - try { - parts = getRelatedByType(STYLES_RELATION_TYPE); - } catch(InvalidFormatException e) { - throw new IllegalStateException(e); - } - if(parts.length != 1) { - throw new IllegalStateException("Expecting one Styles document part, but found " + parts.length); - } - - StylesDocument sd = - StylesDocument.Factory.parse(parts[0].getInputStream()); - return sd.getStyles(); - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hwpf/extractor/HXFWordExtractor.java b/src/scratchpad/ooxml-src/org/apache/poi/hwpf/extractor/HXFWordExtractor.java deleted file mode 100644 index a4427e49e..000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hwpf/extractor/HXFWordExtractor.java +++ /dev/null @@ -1,87 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hwpf.extractor; - -import java.io.File; -import java.io.IOException; - -import org.apache.poi.POIXMLTextExtractor; -import org.apache.poi.hwpf.HWPFXML; -import org.apache.poi.hwpf.usermodel.HWPFXMLDocument; -import org.apache.poi.hxf.HXFDocument; -import org.apache.xmlbeans.XmlException; -import org.openxml4j.exceptions.OpenXML4JException; -import org.openxml4j.opc.Package; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText; - -/** - * Helper class to extract text from an OOXML Word file - */ -public class HXFWordExtractor extends POIXMLTextExtractor { - private HWPFXMLDocument document; - - public HXFWordExtractor(Package container) throws XmlException, OpenXML4JException, IOException { - this(new HWPFXMLDocument( - new HWPFXML(container) - )); - } - public HXFWordExtractor(HWPFXMLDocument document) { - super(document); - this.document = document; - } - - public static void main(String[] args) throws Exception { - if(args.length < 1) { - System.err.println("Use:"); - System.err.println(" HXFWordExtractor "); - System.exit(1); - } - POIXMLTextExtractor extractor = - new HXFWordExtractor(HXFDocument.openPackage( - new File(args[0]) - )); - System.out.println(extractor.getText()); - } - - public String getText() { - CTBody body = document._getHWPFXML().getDocumentBody(); - StringBuffer text = new StringBuffer(); - - // Loop over paragraphs - CTP[] ps = body.getPArray(); - for (int i = 0; i < ps.length; i++) { - // Loop over ranges - CTR[] rs = ps[i].getRArray(); - for (int j = 0; j < rs.length; j++) { - // Loop over text runs - CTText[] texts = rs[j].getTArray(); - for (int k = 0; k < texts.length; k++) { - text.append( - texts[k].getStringValue() - ); - } - } - // New line after each paragraph. - text.append("\n"); - } - - return text.toString(); - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hwpf/usermodel/HWPFXMLDocument.java b/src/scratchpad/ooxml-src/org/apache/poi/hwpf/usermodel/HWPFXMLDocument.java deleted file mode 100644 index 64597e83d..000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hwpf/usermodel/HWPFXMLDocument.java +++ /dev/null @@ -1,36 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hwpf.usermodel; - -import org.apache.poi.POIXMLDocument; -import org.apache.poi.hwpf.HWPFXML; - -/** - * High level representation of a ooxml text document. - */ -public class HWPFXMLDocument extends POIXMLDocument { - private HWPFXML hwpfXML; - - public HWPFXMLDocument(HWPFXML xml) { - super(xml); - this.hwpfXML = xml; - } - - public HWPFXML _getHWPFXML() { - return hwpfXML; - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java b/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java deleted file mode 100644 index 9849a7d21..000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java +++ /dev/null @@ -1,272 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hxf; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.PushbackInputStream; -import java.util.ArrayList; - -import org.apache.poi.POIXMLDocument; -import org.apache.poi.poifs.common.POIFSConstants; -import org.apache.poi.poifs.storage.HeaderBlockConstants; -import org.apache.poi.util.IOUtils; -import org.apache.poi.util.LongField; -import org.apache.xmlbeans.XmlException; -import org.dom4j.Document; -import org.dom4j.DocumentException; -import org.dom4j.io.SAXReader; -import org.openxml4j.exceptions.InvalidFormatException; -import org.openxml4j.exceptions.OpenXML4JException; -import org.openxml4j.opc.Package; -import org.openxml4j.opc.PackageAccess; -import org.openxml4j.opc.PackagePart; -import org.openxml4j.opc.PackagePartName; -import org.openxml4j.opc.PackageRelationship; -import org.openxml4j.opc.PackageRelationshipCollection; -import org.openxml4j.opc.PackagingURIHelper; -import org.openxml4j.opc.internal.PackagePropertiesPart; -import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties; -import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument; - -/** - * Parent class of the low level interface to - * all POI XML (OOXML) implementations. - * Normal users should probably deal with things that - * extends {@link POIXMLDocument}, unless they really - * do need to get low level access to the files. - * - * If you are using these low level classes, then you - * will almost certainly need to refer to the OOXML - * specifications from - * http://www.ecma-international.org/publications/standards/Ecma-376.htm - * - * WARNING - APIs expected to change rapidly - */ -public abstract class HXFDocument { - public static final String CORE_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties"; - public static final String EXTENDED_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties"; - - /** - * File package/container. - */ - protected Package container; - /** - * The Package Part for our base document - */ - protected PackagePart basePart; - /** - * The base document of this instance, eg Workbook for - * xslsx - */ - protected Document baseDocument; - - protected HXFDocument(Package container, String baseContentType) throws OpenXML4JException { - this.container = container; - - // Find the base document - basePart = getSinglePartByType(baseContentType); - - // And load it up - try { - SAXReader reader = new SAXReader(); - baseDocument = reader.read(basePart.getInputStream()); - } catch (DocumentException e) { - throw new OpenXML4JException(e.getMessage()); - } catch (IOException ioe) { - throw new OpenXML4JException(ioe.getMessage()); - } - } - - /** - * Checks that the supplied InputStream (which MUST - * support mark and reset, or be a PushbackInputStream) - * has a OOXML (zip) header at the start of it. - * If your InputStream does not support mark / reset, - * then wrap it in a PushBackInputStream, then be - * sure to always use that, and not the original! - * @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream - */ - public static boolean hasOOXMLHeader(InputStream inp) throws IOException { - // We want to peek at the first 4 bytes - inp.mark(4); - - byte[] header = new byte[4]; - IOUtils.readFully(inp, header); - - // Wind back those 4 bytes - if(inp instanceof PushbackInputStream) { - PushbackInputStream pin = (PushbackInputStream)inp; - pin.unread(header); - } else { - inp.reset(); - } - - // Did it match the ooxml zip signature? - return ( - header[0] == POIFSConstants.OOXML_FILE_HEADER[0] && - header[1] == POIFSConstants.OOXML_FILE_HEADER[1] && - header[2] == POIFSConstants.OOXML_FILE_HEADER[2] && - header[3] == POIFSConstants.OOXML_FILE_HEADER[3] - ); - } - - /** - * Fetches the (single) PackagePart with the supplied - * content type. - * @param contentType The content type to search for - * @throws IllegalArgumentException If we don't find a single part of that type - */ - private PackagePart getSinglePartByType(String contentType) throws IllegalArgumentException { - ArrayList parts = - container.getPartsByContentType(contentType); - if(parts.size() != 1) { - throw new IllegalArgumentException("Expecting one entry with content type of " + contentType + ", but found " + parts.size()); - } - return parts.get(0); - } - - /** - * Fetches the (single) PackagePart which is defined as - * the supplied relation content type of the specified part, - * or null if none found. - * @param relationType The relation content type to search for - * @throws IllegalArgumentException If we find more than one part of that type - * TODO: this sucks! Make Package and PackagePart implement common intf that defines getRelationshipsByType & friends - */ - protected PackagePart getSinglePartByRelationType(String relationType, PackagePart part) throws IllegalArgumentException, OpenXML4JException { - PackageRelationshipCollection rels = - part.getRelationshipsByType(relationType); - if(rels.size() == 0) { - return null; - } - if(rels.size() > 1) { - throw new IllegalArgumentException("Found " + rels.size() + " relations for the type " + relationType + ", should only ever be one!"); - } - PackageRelationship rel = rels.getRelationship(0); - return getPackagePart(rel); - } - - /** - * Fetches the (single) PackagePart which is defined as - * the supplied relation content type of the base - * container, or null if none found. - * @param relationType The relation content type to search for - * @throws IllegalArgumentException If we find more than one part of that type - */ - protected PackagePart getSinglePartByRelationType(String relationType) throws IllegalArgumentException, OpenXML4JException { - PackageRelationshipCollection rels = - container.getRelationshipsByType(relationType); - if(rels.size() == 0) { - return null; - } - if(rels.size() > 1) { - throw new IllegalArgumentException("Found " + rels.size() + " relations for the type " + relationType + ", should only ever be one!"); - } - PackageRelationship rel = rels.getRelationship(0); - return getPackagePart(rel); - } - - /** - * Retrieves the PackagePart for the given relation - * id. This will normally come from a r:id attribute - * on part of the base document. - * @param partId The r:id pointing to the other PackagePart - */ - protected PackagePart getRelatedPackagePart(String partId) { - PackageRelationship rel = - basePart.getRelationship(partId); - return getPackagePart(rel); - } - - /** - * Retrieves the PackagePart for the given Relationship - * object. Normally you'll want to go via a content type - * or r:id to get one of those. - */ - protected PackagePart getPackagePart(PackageRelationship rel) { - PackagePartName relName; - try { - relName = PackagingURIHelper.createPartName(rel.getTargetURI()); - } catch(InvalidFormatException e) { - throw new InternalError(e.getMessage()); - } - - PackagePart part = container.getPart(relName); - if(part == null) { - throw new IllegalArgumentException("No part found for rel " + rel); - } - return part; - } - - /** - * Retrieves all the PackageParts which are defined as - * relationships of the base document with the - * specified content type. - */ - protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException { - PackageRelationshipCollection partsC = - basePart.getRelationshipsByType(contentType); - - PackagePart[] parts = new PackagePart[partsC.size()]; - int count = 0; - for (PackageRelationship rel : partsC) { - parts[count] = getPackagePart(rel); - count++; - } - return parts; - } - - /** - * Get the package container. - * @return The package associated to this document. - */ - public Package getPackage() { - return container; - } - - /** - * Get the core document properties (core ooxml properties). - */ - public PackagePropertiesPart getCoreProperties() throws OpenXML4JException, XmlException, IOException { - PackagePart propsPart = getSinglePartByRelationType(CORE_PROPERTIES_REL_TYPE); - if(propsPart == null) { - return null; - } - return (PackagePropertiesPart)propsPart; - } - - /** - * Get the extended document properties (extended ooxml properties) - */ - public CTProperties getExtendedProperties() throws OpenXML4JException, XmlException, IOException { - PackagePart propsPart = getSinglePartByRelationType(EXTENDED_PROPERTIES_REL_TYPE); - - PropertiesDocument props = PropertiesDocument.Factory.parse( - propsPart.getInputStream()); - return props.getProperties(); - } - - /** - * Returns an opened OOXML Package for the supplied File - * @param f File to open - */ - public static Package openPackage(File f) throws InvalidFormatException { - return Package.open(f.toString(), PackageAccess.READ_WRITE); - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hxf/dev/HXFLister.java b/src/scratchpad/ooxml-src/org/apache/poi/hxf/dev/HXFLister.java deleted file mode 100644 index 032b74b6f..000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hxf/dev/HXFLister.java +++ /dev/null @@ -1,133 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hxf.dev; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.PrintStream; -import java.util.ArrayList; - -import org.openxml4j.opc.Package; -import org.openxml4j.opc.PackageAccess; -import org.openxml4j.opc.PackagePart; -import org.openxml4j.opc.PackageRelationship; -import org.openxml4j.opc.PackageRelationshipCollection; - -/** - * Prints out the contents of a HXF (ooxml) container. - * Useful for seeing what parts are defined, and how - * they're all related to each other. - */ -public class HXFLister { - private Package container; - private PrintStream disp; - - public HXFLister(Package container) { - this(container, System.out); - } - public HXFLister(Package container, PrintStream disp) { - this.container = container; - this.disp = disp; - } - - /** - * Figures out how big a given PackagePart is. - */ - public static long getSize(PackagePart part) throws IOException { - InputStream in = part.getInputStream(); - byte[] b = new byte[8192]; - long size = 0; - int read = 0; - - while(read > -1) { - read = in.read(b); - if(read > 0) { - size += read; - } - } - - return size; - } - - /** - * Displays information on all the different - * parts of the OOXML file container. - */ - public void displayParts() throws Exception { - ArrayList parts = container.getParts(); - for (PackagePart part : parts) { - disp.println(part.getPartName()); - disp.println("\t" + part.getContentType()); - - if(! part.getPartName().toString().equals("/docProps/core.xml")) { - disp.println("\t" + getSize(part) + " bytes"); - } - - if(! part.isRelationshipPart()) { - disp.println("\t" + part.getRelationships().size() + " relations"); - for(PackageRelationship rel : part.getRelationships()) { - displayRelation(rel, "\t "); - } - } - } - } - /** - * Displays information on all the different - * relationships between different parts - * of the OOXML file container. - */ - public void displayRelations() throws Exception { - PackageRelationshipCollection rels = - container.getRelationships(); - for (PackageRelationship rel : rels) { - displayRelation(rel, ""); - } - } - private void displayRelation(PackageRelationship rel, String indent) { - disp.println(indent+"Relationship:"); - disp.println(indent+"\tFrom: "+ rel.getSourceURI()); - disp.println(indent+"\tTo: " + rel.getTargetURI()); - disp.println(indent+"\tID: " + rel.getId()); - disp.println(indent+"\tMode: " + rel.getTargetMode()); - disp.println(indent+"\tType: " + rel.getRelationshipType()); - } - - public static void main(String[] args) throws Exception { - if(args.length == 0) { - System.err.println("Use:"); - System.err.println("\tjava HXFLister "); - System.exit(1); - } - - File f = new File(args[0]); - if(! f.exists()) { - System.err.println("Error, file not found!"); - System.err.println("\t" + f.toString()); - System.exit(2); - } - - HXFLister lister = new HXFLister( - Package.open(f.toString(), PackageAccess.READ) - ); - - lister.disp.println(f.toString() + "\n"); - lister.displayParts(); - lister.disp.println(); - lister.displayRelations(); - } -} diff --git a/src/scratchpad/ooxml-testcases/org/apache/poi/hslf/TestHSLFXML.java b/src/scratchpad/ooxml-testcases/org/apache/poi/hslf/TestHSLFXML.java deleted file mode 100644 index fd4653a85..000000000 --- a/src/scratchpad/ooxml-testcases/org/apache/poi/hslf/TestHSLFXML.java +++ /dev/null @@ -1,127 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hslf; - -import java.io.File; - -import org.apache.poi.hxf.HXFDocument; -import org.openxml4j.opc.Package; -import org.openxml4j.opc.PackagePart; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdListEntry; - -import junit.framework.TestCase; - -public class TestHSLFXML extends TestCase { - private File sampleFile; - - protected void setUp() throws Exception { - super.setUp(); - - sampleFile = new File( - System.getProperty("HSLF.testdata.path") + - File.separator + "sample.pptx" - ); - } - - public void testContainsMainContentType() throws Exception { - Package pack = HXFDocument.openPackage(sampleFile); - - boolean found = false; - for(PackagePart part : pack.getParts()) { - if(part.getContentType().equals(HSLFXML.MAIN_CONTENT_TYPE)) { - found = true; - } - System.out.println(part); - } - assertTrue(found); - } - - public void testOpen() throws Exception { - HXFDocument.openPackage(sampleFile); - - HSLFXML xml; - - // With the finalised uri, should be fine - xml = new HSLFXML( - HXFDocument.openPackage(sampleFile) - ); - - // Check the core - assertNotNull(xml.getPresentation()); - - // Check it has some slides - assertTrue( - xml.getSlideReferences().sizeOfSldIdArray() > 0 - ); - assertTrue( - xml.getSlideMasterReferences().sizeOfSldMasterIdArray() > 0 - ); - } - - public void testSlideBasics() throws Exception { - HSLFXML xml = new HSLFXML( - HXFDocument.openPackage(sampleFile) - ); - - // Should have 1 master - assertEquals(1, xml.getSlideMasterReferences().sizeOfSldMasterIdArray()); - assertEquals(1, xml.getSlideMasterReferences().getSldMasterIdArray().length); - - // Should have three sheets - assertEquals(2, xml.getSlideReferences().sizeOfSldIdArray()); - assertEquals(2, xml.getSlideReferences().getSldIdArray().length); - - // Check they're as expected - CTSlideIdListEntry[] slides = xml.getSlideReferences().getSldIdArray(); - assertEquals(256, slides[0].getId()); - assertEquals(257, slides[1].getId()); - assertEquals("rId2", slides[0].getId2()); - assertEquals("rId3", slides[1].getId2()); - - // Now get those objects - assertNotNull(xml.getSlide(slides[0])); - assertNotNull(xml.getSlide(slides[1])); - - // And check they have notes as expected - assertNotNull(xml.getNotes(slides[0])); - assertNotNull(xml.getNotes(slides[1])); - - // And again for the master - CTSlideMasterIdListEntry[] masters = - xml.getSlideMasterReferences().getSldMasterIdArray(); - assertEquals(2147483648l, masters[0].getId()); - assertEquals("rId1", masters[0].getId2()); - assertNotNull(xml.getSlideMaster(masters[0])); - } - - public void testMetadataBasics() throws Exception { - HSLFXML xml = new HSLFXML( - HXFDocument.openPackage(sampleFile) - ); - - assertNotNull(xml.getCoreProperties()); - assertNotNull(xml.getExtendedProperties()); - - assertEquals("Microsoft Office PowerPoint", xml.getExtendedProperties().getApplication()); - assertEquals(0, xml.getExtendedProperties().getCharacters()); - assertEquals(0, xml.getExtendedProperties().getLines()); - - assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue()); - assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue()); - } -} diff --git a/src/scratchpad/ooxml-testcases/org/apache/poi/hslf/extractor/TestHXFPowerPointExtractor.java b/src/scratchpad/ooxml-testcases/org/apache/poi/hslf/extractor/TestHXFPowerPointExtractor.java deleted file mode 100644 index 6a006ab5c..000000000 --- a/src/scratchpad/ooxml-testcases/org/apache/poi/hslf/extractor/TestHXFPowerPointExtractor.java +++ /dev/null @@ -1,109 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hslf.extractor; - -import java.io.File; - -import org.apache.poi.hslf.HSLFXML; -import org.apache.poi.hslf.usermodel.HSLFXMLSlideShow; -import org.apache.poi.hxf.HXFDocument; - -import junit.framework.TestCase; - -/** - * Tests for HXFPowerPointExtractor - */ -public class TestHXFPowerPointExtractor extends TestCase { - /** - * A simple file - */ - private HSLFXML xmlA; - - protected void setUp() throws Exception { - super.setUp(); - - File fileA = new File( - System.getProperty("HSLF.testdata.path") + - File.separator + "sample.pptx" - ); - - xmlA = new HSLFXML(HXFDocument.openPackage(fileA)); - } - - /** - * Get text out of the simple file - */ - public void testGetSimpleText() throws Exception { - new HXFPowerPointExtractor(xmlA.getPackage()); - new HXFPowerPointExtractor(new HSLFXMLSlideShow(xmlA)); - - HXFPowerPointExtractor extractor = - new HXFPowerPointExtractor(xmlA.getPackage()); - extractor.getText(); - - String text = extractor.getText(); - assertTrue(text.length() > 0); - - // Check Basics - assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n")); - assertTrue(text.endsWith("amet\n\n")); - - // Just slides, no notes - text = extractor.getText(true, false); - assertEquals( - "Lorem ipsum dolor sit amet\n" + - "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + - "\n" + - "Lorem ipsum dolor sit amet\n" + - "Lorem\n" + - "ipsum\n" + - "dolor\n" + - "sit\n" + - "amet\n" + - "\n", text - ); - - // Just notes, no slides - text = extractor.getText(false, true); - assertEquals( - "\n\n\n\n", text - ); - - // Both - text = extractor.getText(true, true); - assertEquals( - "Lorem ipsum dolor sit amet\n" + - "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + - "\n\n\n" + - "Lorem ipsum dolor sit amet\n" + - "Lorem\n" + - "ipsum\n" + - "dolor\n" + - "sit\n" + - "amet\n" + - "\n\n\n", text - ); - - // Via set defaults - extractor.setSlidesByDefault(false); - extractor.setNotesByDefault(true); - text = extractor.getText(); - assertEquals( - "\n\n\n\n", text - ); - } -} diff --git a/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java b/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java deleted file mode 100644 index 97453265c..000000000 --- a/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java +++ /dev/null @@ -1,160 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hssf; - -import java.io.File; - -import org.apache.poi.hssf.model.SharedStringsTable; -import org.apache.poi.hxf.HXFDocument; -import org.openxml4j.opc.Package; -import org.openxml4j.opc.PackagePart; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet; - -import junit.framework.TestCase; - -public class TestHSSFXML extends TestCase { - /** - * Uses the old style schemas.microsoft.com schema uri - */ - private File sampleFileBeta; - /** - * Uses the new style schemas.openxmlformats.org schema uri - */ - private File sampleFile; - - protected void setUp() throws Exception { - super.setUp(); - - sampleFile = new File( - System.getProperty("HSSF.testdata.path") + - File.separator + "sample.xlsx" - ); - sampleFileBeta = new File( - System.getProperty("HSSF.testdata.path") + - File.separator + "sample-beta.xlsx" - ); - } - - public void testContainsMainContentType() throws Exception { - Package pack = HXFDocument.openPackage(sampleFile); - - boolean found = false; - for(PackagePart part : pack.getParts()) { - if(part.getContentType().equals(HSSFXML.MAIN_CONTENT_TYPE)) { - found = true; - } - System.out.println(part); - } - assertTrue(found); - } - - public void testOpen() throws Exception { - HXFDocument.openPackage(sampleFile); - HXFDocument.openPackage(sampleFileBeta); - - HSSFXML xml; - - // With an old-style uri, as found in a file produced - // with the office 2007 beta, will fail, as we don't - // translate things - try { - xml = new HSSFXML( - HXFDocument.openPackage(sampleFileBeta) - ); - fail(); - } catch(Exception e) {} - - // With the finalised uri, should be fine - xml = new HSSFXML( - HXFDocument.openPackage(sampleFile) - ); - - // Check it has a workbook - assertNotNull(xml.getWorkbook()); - } - - public void testSheetBasics() throws Exception { - HSSFXML xml = new HSSFXML( - HXFDocument.openPackage(sampleFile) - ); - - // Should have three sheets - assertEquals(3, xml.getSheetReferences().sizeOfSheetArray()); - assertEquals(3, xml.getSheetReferences().getSheetArray().length); - - // Check they're as expected - CTSheet[] sheets = xml.getSheetReferences().getSheetArray(); - assertEquals("Sheet1", sheets[0].getName()); - assertEquals("Sheet2", sheets[1].getName()); - assertEquals("Sheet3", sheets[2].getName()); - assertEquals("rId1", sheets[0].getId()); - assertEquals("rId2", sheets[1].getId()); - assertEquals("rId3", sheets[2].getId()); - - // Now get those objects - assertNotNull(xml.getSheet(sheets[0])); - assertNotNull(xml.getSheet(sheets[1])); - assertNotNull(xml.getSheet(sheets[2])); - } - - public void testMetadataBasics() throws Exception { - HSSFXML xml = new HSSFXML( - HXFDocument.openPackage(sampleFile) - ); - assertNotNull(xml.getCoreProperties()); - assertNotNull(xml.getExtendedProperties()); - - assertEquals("Microsoft Excel", xml.getExtendedProperties().getApplication()); - assertEquals(0, xml.getExtendedProperties().getCharacters()); - assertEquals(0, xml.getExtendedProperties().getLines()); - - assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue()); - assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue()); - } - - public void testSharedStringBasics() throws Exception { - HSSFXML xml = new HSSFXML( - HXFDocument.openPackage(sampleFile) - ); - assertNotNull(xml._getSharedStringsTable()); - - SharedStringsTable sst = xml._getSharedStringsTable(); - assertEquals(10, sst.size()); - - assertEquals("Lorem", sst.get(0)); - for(int i=0; i 0); - - // Check sheet names - assertTrue(text.startsWith("Sheet1")); - assertTrue(text.endsWith("Sheet3\n")); - - // Now without, will have text - extractor.setIncludeSheetNames(false); - text = extractor.getText(); - assertEquals( - "Lorem\t111\n" + - "ipsum\t222\n" + - "dolor\t333\n" + - "sit\t444\n" + - "amet\t555\n" + - "consectetuer\t666\n" + - "adipiscing\t777\n" + - "elit\t888\n" + - "Nunc\t999\n" + - "at\t4995\n" + - "\n\n", text); - - // Now get formulas not their values - extractor.setFormulasNotResults(true); - text = extractor.getText(); - assertEquals( - "Lorem\t111\n" + - "ipsum\t222\n" + - "dolor\t333\n" + - "sit\t444\n" + - "amet\t555\n" + - "consectetuer\t666\n" + - "adipiscing\t777\n" + - "elit\t888\n" + - "Nunc\t999\n" + - "at\tSUM(B1:B9)\n" + - "\n\n", text); - - // With sheet names too - extractor.setIncludeSheetNames(true); - text = extractor.getText(); - assertEquals( - "Sheet1\n" + - "Lorem\t111\n" + - "ipsum\t222\n" + - "dolor\t333\n" + - "sit\t444\n" + - "amet\t555\n" + - "consectetuer\t666\n" + - "adipiscing\t777\n" + - "elit\t888\n" + - "Nunc\t999\n" + - "at\tSUM(B1:B9)\n\n" + - "Sheet2\n\n" + - "Sheet3\n" - , text); - } - - public void testGetComplexText() throws Exception { - new HXFExcelExtractor(xmlB.getPackage()); - new HXFExcelExtractor(new HSSFXMLWorkbook(xmlB)); - - HXFExcelExtractor extractor = - new HXFExcelExtractor(xmlB.getPackage()); - extractor.getText(); - - String text = extractor.getText(); - assertTrue(text.length() > 0); - - // Might not have all formatting it should do! - // TODO decide if we should really have the "null" in there - assertTrue(text.startsWith( - "Avgtxfull\n" + - "null\t(iii) AVERAGE TAX RATES ON ANNUAL" - )); - } - - /** - * Test that we return pretty much the same as - * ExcelExtractor does, when we're both passed - * the same file, just saved as xls and xlsx - */ - public void testComparedToOLE2() throws Exception { - HXFExcelExtractor ooxmlExtractor = - new HXFExcelExtractor(simpleXLSX.getPackage()); - ExcelExtractor ole2Extractor = - new ExcelExtractor(simpleXLS); - - POITextExtractor[] extractors = - new POITextExtractor[] { ooxmlExtractor, ole2Extractor }; - for (int i = 0; i < extractors.length; i++) { - POITextExtractor extractor = extractors[i]; - - String text = extractor.getText().replaceAll("[\r\t]", ""); - //System.out.println(text.length()); - //System.out.println(text); - assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n")); - Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL); - Matcher m = pattern.matcher(text); - assertTrue(m.matches()); - } - } -} diff --git a/src/scratchpad/ooxml-testcases/org/apache/poi/hwpf/TestHWPFXML.java b/src/scratchpad/ooxml-testcases/org/apache/poi/hwpf/TestHWPFXML.java deleted file mode 100644 index 0d8e196f4..000000000 --- a/src/scratchpad/ooxml-testcases/org/apache/poi/hwpf/TestHWPFXML.java +++ /dev/null @@ -1,110 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hwpf; - -import java.io.File; - -import org.apache.poi.hxf.HXFDocument; -import org.openxml4j.opc.Package; -import org.openxml4j.opc.PackagePart; - -import junit.framework.TestCase; - -public class TestHWPFXML extends TestCase { - private File sampleFile; - private File complexFile; - - protected void setUp() throws Exception { - super.setUp(); - - sampleFile = new File( - System.getProperty("HWPF.testdata.path") + - File.separator + "sample.docx" - ); - complexFile = new File( - System.getProperty("HWPF.testdata.path") + - File.separator + "IllustrativeCases.docx" - ); - } - - public void testContainsMainContentType() throws Exception { - Package pack = HXFDocument.openPackage(sampleFile); - - boolean found = false; - for(PackagePart part : pack.getParts()) { - if(part.getContentType().equals(HWPFXML.MAIN_CONTENT_TYPE)) { - found = true; - } - System.out.println(part); - } - assertTrue(found); - } - - public void testOpen() throws Exception { - HXFDocument.openPackage(sampleFile); - HXFDocument.openPackage(complexFile); - - HWPFXML xml; - - // Simple file - xml = new HWPFXML( - HXFDocument.openPackage(sampleFile) - ); - // Check it has key parts - assertNotNull(xml.getDocument()); - assertNotNull(xml.getDocumentBody()); - assertNotNull(xml.getStyle()); - - // Complex file - xml = new HWPFXML( - HXFDocument.openPackage(complexFile) - ); - assertNotNull(xml.getDocument()); - assertNotNull(xml.getDocumentBody()); - assertNotNull(xml.getStyle()); - } - - public void testMetadataBasics() throws Exception { - HWPFXML xml = new HWPFXML( - HXFDocument.openPackage(sampleFile) - ); - assertNotNull(xml.getCoreProperties()); - assertNotNull(xml.getExtendedProperties()); - - assertEquals("Microsoft Office Word", xml.getExtendedProperties().getApplication()); - assertEquals(1315, xml.getExtendedProperties().getCharacters()); - assertEquals(10, xml.getExtendedProperties().getLines()); - - assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue()); - assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue()); - } - - public void testMetadataComplex() throws Exception { - HWPFXML xml = new HWPFXML( - HXFDocument.openPackage(complexFile) - ); - assertNotNull(xml.getCoreProperties()); - assertNotNull(xml.getExtendedProperties()); - - assertEquals("Microsoft Office Outlook", xml.getExtendedProperties().getApplication()); - assertEquals(5184, xml.getExtendedProperties().getCharacters()); - assertEquals(0, xml.getExtendedProperties().getLines()); - - assertEquals(" ", xml.getCoreProperties().getTitleProperty().getValue()); - assertEquals(" ", xml.getCoreProperties().getSubjectProperty().getValue()); - } -} diff --git a/src/scratchpad/ooxml-testcases/org/apache/poi/hwpf/extractor/TestHXFWordExtractor.java b/src/scratchpad/ooxml-testcases/org/apache/poi/hwpf/extractor/TestHXFWordExtractor.java deleted file mode 100644 index 62695b3a8..000000000 --- a/src/scratchpad/ooxml-testcases/org/apache/poi/hwpf/extractor/TestHXFWordExtractor.java +++ /dev/null @@ -1,117 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hwpf.extractor; - -import java.io.File; - -import org.apache.poi.hwpf.HWPFXML; -import org.apache.poi.hwpf.usermodel.HWPFXMLDocument; -import org.apache.poi.hxf.HXFDocument; - -import junit.framework.TestCase; - -/** - * Tests for HXFWordExtractor - */ -public class TestHXFWordExtractor extends TestCase { - /** - * A very simple file - */ - private HWPFXML xmlA; - /** - * A fairly complex file - */ - private HWPFXML xmlB; - - protected void setUp() throws Exception { - super.setUp(); - - File fileA = new File( - System.getProperty("HWPF.testdata.path") + - File.separator + "sample.docx" - ); - File fileB = new File( - System.getProperty("HWPF.testdata.path") + - File.separator + "IllustrativeCases.docx" - ); - - xmlA = new HWPFXML(HXFDocument.openPackage(fileA)); - xmlB = new HWPFXML(HXFDocument.openPackage(fileB)); - } - - /** - * Get text out of the simple file - */ - public void testGetSimpleText() throws Exception { - new HXFWordExtractor(xmlA.getPackage()); - new HXFWordExtractor(new HWPFXMLDocument(xmlA)); - - HXFWordExtractor extractor = - new HXFWordExtractor(xmlA.getPackage()); - extractor.getText(); - - String text = extractor.getText(); - assertTrue(text.length() > 0); - - // Check contents - assertTrue(text.startsWith( - "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc at risus vel erat tempus posuere. Aenean non ante. Suspendisse vehicula dolor sit amet odio." - )); - assertTrue(text.endsWith( - "Phasellus ultricies mi nec leo. Sed tempus. In sit amet lorem at velit faucibus vestibulum.\n" - )); - - // Check number of paragraphs - int ps = 0; - char[] t = text.toCharArray(); - for (int i = 0; i < t.length; i++) { - if(t[i] == '\n') { ps++; } - } - assertEquals(3, ps); - } - - /** - * Tests getting the text out of a complex file - */ - public void testGetComplexText() throws Exception { - HXFWordExtractor extractor = - new HXFWordExtractor(xmlB.getPackage()); - extractor.getText(); - - String text = extractor.getText(); - assertTrue(text.length() > 0); - - char euro = '\u20ac'; - System.err.println("'"+text.substring(text.length() - 20) + "'"); - - // Check contents - assertTrue(text.startsWith( - " \n(V) ILLUSTRATIVE CASES\n\n" - )); - assertTrue(text.endsWith( - "As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n" - )); - - // Check number of paragraphs - int ps = 0; - char[] t = text.toCharArray(); - for (int i = 0; i < t.length; i++) { - if(t[i] == '\n') { ps++; } - } - assertEquals(79, ps); - } -} diff --git a/src/scratchpad/ooxml-testcases/org/apache/poi/hxf/TestDetectAsOOXML.java b/src/scratchpad/ooxml-testcases/org/apache/poi/hxf/TestDetectAsOOXML.java deleted file mode 100644 index 36adb497c..000000000 --- a/src/scratchpad/ooxml-testcases/org/apache/poi/hxf/TestDetectAsOOXML.java +++ /dev/null @@ -1,65 +0,0 @@ - -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - - -package org.apache.poi.hxf; - -import junit.framework.TestCase; -import java.io.*; - -/** - * Class to test that HXF correctly detects OOXML - * documents - */ -public class TestDetectAsOOXML extends TestCase -{ - public String dirname; - - public void setUp() { - dirname = System.getProperty("HSSF.testdata.path"); - } - - public void testOpensProperly() throws Exception - { - File f = new File(dirname + "/sample.xlsx"); - - HXFDocument.openPackage(f); - } - - public void testDetectAsPOIFS() throws Exception { - InputStream in; - - // ooxml file is - in = new PushbackInputStream( - new FileInputStream(dirname + "/SampleSS.xlsx"), 10 - ); - assertTrue(HXFDocument.hasOOXMLHeader(in)); - - // xls file isn't - in = new PushbackInputStream( - new FileInputStream(dirname + "/SampleSS.xls"), 10 - ); - assertFalse(HXFDocument.hasOOXMLHeader(in)); - - // text file isn't - in = new PushbackInputStream( - new FileInputStream(dirname + "/SampleSS.txt"), 10 - ); - assertFalse(HXFDocument.hasOOXMLHeader(in)); - } -}