From b173dc11d063b08fe3ec0af895cbf284537fb1fd Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Fri, 3 Apr 2015 15:54:59 +0000 Subject: [PATCH] XMLPrettyPrint: Don't try to pretty-print non-XML files and print out which file from the ooxml-file fails to parse git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1671095 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/stress/AbstractFileHandler.java | 10 +++++++++ .../org/apache/poi/dev/OOXMLPrettyPrint.java | 22 ++++++++++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java index 881908377..55e2c368f 100644 --- a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java @@ -27,9 +27,11 @@ import java.io.IOException; import java.io.InputStream; import java.util.HashSet; import java.util.Set; +import java.util.zip.ZipException; import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.POITextExtractor; +import org.apache.poi.dev.OOXMLPrettyPrint; import org.apache.poi.extractor.ExtractorFactory; import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; @@ -68,6 +70,14 @@ public abstract class AbstractFileHandler implements FileHandler { } finally { ExtractorFactory.setThreadPrefersEventExtractors(before); } + + /* Did fail for some documents with special XML contents... + try { + OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(), + "/tmp/pretty-" + file.getName() }); + } catch (ZipException e) { + // ignore, not a Zip/OOXML file + }*/ } private void handleExtractingInternal(File file) throws Exception { diff --git a/src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java b/src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java index aab811287..48341d0c9 100644 --- a/src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java +++ b/src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java @@ -18,7 +18,6 @@ package org.apache.poi.dev; import java.io.BufferedOutputStream; import java.io.File; -import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; @@ -40,9 +39,9 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import org.apache.poi.util.IOUtils; import org.w3c.dom.Document; import org.xml.sax.InputSource; -import org.xml.sax.SAXException; /** * Reads a zipped OOXML file and produces a copy with the included @@ -80,8 +79,7 @@ public class OOXMLPrettyPrint { } private static void handleFile(File file, File outFile) throws ZipException, - IOException, FileNotFoundException, SAXException, - TransformerException, ParserConfigurationException { + IOException, TransformerException, ParserConfigurationException { System.out.println("Reading zip-file " + file + " and writing pretty-printed XML to " + outFile); ZipFile zipFile = new ZipFile(file); @@ -99,15 +97,23 @@ public class OOXMLPrettyPrint { } } - private void handle(ZipFile file, ZipOutputStream out) throws SAXException, IOException, TransformerException { + private void handle(ZipFile file, ZipOutputStream out) throws IOException, TransformerException { Enumeration entries = file.entries(); while(entries.hasMoreElements()) { ZipEntry entry = entries.nextElement(); - out.putNextEntry(new ZipEntry(entry.getName())); + String name = entry.getName(); + out.putNextEntry(new ZipEntry(name)); try { - Document document = documentBuilder.parse(new InputSource(file.getInputStream(entry))); - pretty(document, out, 2); + if(name.endsWith(".xml") || name.endsWith(".rels")) { + Document document = documentBuilder.parse(new InputSource(file.getInputStream(entry))); + pretty(document, out, 2); + } else { + System.out.println("Not pretty-printing non-XML file " + name); + IOUtils.copy(file.getInputStream(entry), out); + } + } catch (Exception e) { + throw new IOException("While handling entry " + name, e); } finally { out.closeEntry(); }