From b4f72c034f0c2c616975a8fa51d9e4fdd98b72a7 Mon Sep 17 00:00:00 2001 From: mguessan Date: Thu, 21 Dec 2006 23:38:37 +0000 Subject: [PATCH] Fix office XML : empty and default namespace git-svn-id: http://svn.code.sf.net/p/davmail/code/trunk@16 3d1905a2-6b24-0410-a738-b14d5a86fcbd --- .../davmail/exchange/ExchangeSession.java | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/java/davmail/exchange/ExchangeSession.java b/src/java/davmail/exchange/ExchangeSession.java index 23fc53ba..0844548b 100644 --- a/src/java/davmail/exchange/ExchangeSession.java +++ b/src/java/davmail/exchange/ExchangeSession.java @@ -14,6 +14,9 @@ import org.jdom.Attribute; import org.jdom.JDOMException; import org.jdom.input.DOMBuilder; import org.w3c.tidy.Tidy; +import org.w3c.dom.Document; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; import javax.mail.MessagingException; import javax.mail.internet.MimeUtility; @@ -966,7 +969,19 @@ public class ExchangeSession { DOMBuilder builder = new DOMBuilder(); XmlDocument xmlDocument = new XmlDocument(); try { - xmlDocument.load(builder.build(tidy.parseDOM(inputStream, null))); + Document w3cDocument = tidy.parseDOM(inputStream, null); + // Fix broken Office xml document with empty namespace + NamedNodeMap namedNodeMap = w3cDocument.getDocumentElement().getAttributes(); + for (int i = 0; i < namedNodeMap.getLength(); i++) { + Node node = namedNodeMap.item(i); + String nodeName = node.getNodeName(); + String nodeValue = node.getNodeValue(); + if (nodeName != null && nodeName.startsWith("xmlns") + && (nodeValue == null || nodeValue.length() == 0)) { + w3cDocument.getDocumentElement().removeAttribute(nodeName); + } + } + xmlDocument.load(builder.build(w3cDocument)); } catch (IOException ex1) { logger.error("Exception parsing document", ex1); } catch (JDOMException ex1) { @@ -1040,7 +1055,10 @@ public class ExchangeSession { } // get inline images from htmlBody (without OWA transformation) - ByteArrayInputStream bais = new ByteArrayInputStream(htmlBody.getBytes("UTF-8")); + ByteArrayInputStream bais = new ByteArrayInputStream(htmlBody + // quick fix remove default office namespace + .replaceFirst("xmlns=\".*\"", "") + .getBytes("UTF-8")); XmlDocument xmlBody = tidyDocument(bais); List htmlBodyImgList = xmlBody.getNodes("//img/@src"); @@ -1083,7 +1101,7 @@ public class ExchangeSession { } else if (!contentid.startsWith("http://") && !contentid.startsWith("https://")) { attachment.contentid = contentid; // must patch htmlBody for inline image without cid - htmlBody = htmlBody.replaceFirst(attachment.contentid, "cid:"+attachment.contentid); + htmlBody = htmlBody.replaceFirst(attachment.contentid, "cid:" + attachment.contentid); } } else { logger.warn("More images in OWA body !");