Fix office XML : empty and default namespace

git-svn-id: http://svn.code.sf.net/p/davmail/code/trunk@16 3d1905a2-6b24-0410-a738-b14d5a86fcbd
This commit is contained in:
mguessan 2006-12-21 23:38:37 +00:00
parent 45ce559cd6
commit b4f72c034f
1 changed files with 21 additions and 3 deletions

View File

@ -14,6 +14,9 @@ import org.jdom.Attribute;
import org.jdom.JDOMException; import org.jdom.JDOMException;
import org.jdom.input.DOMBuilder; import org.jdom.input.DOMBuilder;
import org.w3c.tidy.Tidy; import org.w3c.tidy.Tidy;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import javax.mail.MessagingException; import javax.mail.MessagingException;
import javax.mail.internet.MimeUtility; import javax.mail.internet.MimeUtility;
@ -966,7 +969,19 @@ public class ExchangeSession {
DOMBuilder builder = new DOMBuilder(); DOMBuilder builder = new DOMBuilder();
XmlDocument xmlDocument = new XmlDocument(); XmlDocument xmlDocument = new XmlDocument();
try { try {
xmlDocument.load(builder.build(tidy.parseDOM(inputStream, null))); Document w3cDocument = tidy.parseDOM(inputStream, null);
// Fix broken Office xml document with empty namespace
NamedNodeMap namedNodeMap = w3cDocument.getDocumentElement().getAttributes();
for (int i = 0; i < namedNodeMap.getLength(); i++) {
Node node = namedNodeMap.item(i);
String nodeName = node.getNodeName();
String nodeValue = node.getNodeValue();
if (nodeName != null && nodeName.startsWith("xmlns")
&& (nodeValue == null || nodeValue.length() == 0)) {
w3cDocument.getDocumentElement().removeAttribute(nodeName);
}
}
xmlDocument.load(builder.build(w3cDocument));
} catch (IOException ex1) { } catch (IOException ex1) {
logger.error("Exception parsing document", ex1); logger.error("Exception parsing document", ex1);
} catch (JDOMException ex1) { } catch (JDOMException ex1) {
@ -1040,7 +1055,10 @@ public class ExchangeSession {
} }
// get inline images from htmlBody (without OWA transformation) // get inline images from htmlBody (without OWA transformation)
ByteArrayInputStream bais = new ByteArrayInputStream(htmlBody.getBytes("UTF-8")); ByteArrayInputStream bais = new ByteArrayInputStream(htmlBody
// quick fix remove default office namespace
.replaceFirst("xmlns=\".*\"", "")
.getBytes("UTF-8"));
XmlDocument xmlBody = tidyDocument(bais); XmlDocument xmlBody = tidyDocument(bais);
List<Attribute> htmlBodyImgList = xmlBody.getNodes("//img/@src"); List<Attribute> htmlBodyImgList = xmlBody.getNodes("//img/@src");
@ -1083,7 +1101,7 @@ public class ExchangeSession {
} else if (!contentid.startsWith("http://") && !contentid.startsWith("https://")) { } else if (!contentid.startsWith("http://") && !contentid.startsWith("https://")) {
attachment.contentid = contentid; attachment.contentid = contentid;
// must patch htmlBody for inline image without cid // must patch htmlBody for inline image without cid
htmlBody = htmlBody.replaceFirst(attachment.contentid, "cid:"+attachment.contentid); htmlBody = htmlBody.replaceFirst(attachment.contentid, "cid:" + attachment.contentid);
} }
} else { } else {
logger.warn("More images in OWA body !"); logger.warn("More images in OWA body !");