From b4f72c034f0c2c616975a8fa51d9e4fdd98b72a7 Mon Sep 17 00:00:00 2001
From: mguessan <mguessan@3d1905a2-6b24-0410-a738-b14d5a86fcbd>
Date: Thu, 21 Dec 2006 23:38:37 +0000
Subject: [PATCH] Fix office XML : empty and default namespace

git-svn-id: http://svn.code.sf.net/p/davmail/code/trunk@16 3d1905a2-6b24-0410-a738-b14d5a86fcbd
---
 .../davmail/exchange/ExchangeSession.java     | 24 ++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)
diff --git a/src/java/davmail/exchange/ExchangeSession.java b/src/java/davmail/exchange/ExchangeSession.java
index 23fc53ba..0844548b 100644
--- a/src/java/davmail/exchange/ExchangeSession.java
+++ b/src/java/davmail/exchange/ExchangeSession.java
@@ -14,6 +14,9 @@ import org.jdom.Attribute;
 import org.jdom.JDOMException;
 import org.jdom.input.DOMBuilder;
 import org.w3c.tidy.Tidy;
+import org.w3c.dom.Document;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
 
 import javax.mail.MessagingException;
 import javax.mail.internet.MimeUtility;
@@ -966,7 +969,19 @@ public class ExchangeSession {
             DOMBuilder builder = new DOMBuilder();
             XmlDocument xmlDocument = new XmlDocument();
             try {
-                xmlDocument.load(builder.build(tidy.parseDOM(inputStream, null)));
+                Document w3cDocument = tidy.parseDOM(inputStream, null);
+                // Fix broken Office xml document with empty namespace
+                NamedNodeMap namedNodeMap = w3cDocument.getDocumentElement().getAttributes();
+                for (int i = 0; i < namedNodeMap.getLength(); i++) {
+                    Node node = namedNodeMap.item(i);
+                    String nodeName = node.getNodeName();
+                    String nodeValue = node.getNodeValue();
+                    if (nodeName != null && nodeName.startsWith("xmlns")
+                            && (nodeValue == null || nodeValue.length() == 0)) {
+                        w3cDocument.getDocumentElement().removeAttribute(nodeName);
+                    }
+                }
+                xmlDocument.load(builder.build(w3cDocument));
             } catch (IOException ex1) {
                 logger.error("Exception parsing document", ex1);
             } catch (JDOMException ex1) {
@@ -1040,7 +1055,10 @@ public class ExchangeSession {
                 }
 
                 // get inline images from htmlBody (without OWA transformation)
-                ByteArrayInputStream bais = new ByteArrayInputStream(htmlBody.getBytes("UTF-8"));
+                ByteArrayInputStream bais = new ByteArrayInputStream(htmlBody
+                        // quick fix remove default office namespace
+                        .replaceFirst("xmlns=\".*\"", "")
+                        .getBytes("UTF-8"));
                 XmlDocument xmlBody = tidyDocument(bais);
                 List<Attribute> htmlBodyImgList = xmlBody.getNodes("//img/@src");
 
@@ -1083,7 +1101,7 @@ public class ExchangeSession {
                                 } else if (!contentid.startsWith("http://") && !contentid.startsWith("https://")) {
                                     attachment.contentid = contentid;
                                     // must patch htmlBody for inline image without cid
-                                    htmlBody = htmlBody.replaceFirst(attachment.contentid, "cid:"+attachment.contentid);
+                                    htmlBody = htmlBody.replaceFirst(attachment.contentid, "cid:" + attachment.contentid);
                                 }
                             } else {
                                 logger.warn("More images in OWA body !");