Fix office XML : empty and default namespace

git-svn-id: http://svn.code.sf.net/p/davmail/code/trunk@16 3d1905a2-6b24-0410-a738-b14d5a86fcbd
This commit is contained in:
mguessan 2006-12-21 23:38:37 +00:00
parent 45ce559cd6
commit b4f72c034f
1 changed files with 21 additions and 3 deletions

View File

@ -14,6 +14,9 @@ import org.jdom.Attribute;
import org.jdom.JDOMException;
import org.jdom.input.DOMBuilder;
import org.w3c.tidy.Tidy;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import javax.mail.MessagingException;
import javax.mail.internet.MimeUtility;
@ -966,7 +969,19 @@ public class ExchangeSession {
DOMBuilder builder = new DOMBuilder();
XmlDocument xmlDocument = new XmlDocument();
try {
xmlDocument.load(builder.build(tidy.parseDOM(inputStream, null)));
Document w3cDocument = tidy.parseDOM(inputStream, null);
// Fix broken Office xml document with empty namespace
NamedNodeMap namedNodeMap = w3cDocument.getDocumentElement().getAttributes();
for (int i = 0; i < namedNodeMap.getLength(); i++) {
Node node = namedNodeMap.item(i);
String nodeName = node.getNodeName();
String nodeValue = node.getNodeValue();
if (nodeName != null && nodeName.startsWith("xmlns")
&& (nodeValue == null || nodeValue.length() == 0)) {
w3cDocument.getDocumentElement().removeAttribute(nodeName);
}
}
xmlDocument.load(builder.build(w3cDocument));
} catch (IOException ex1) {
logger.error("Exception parsing document", ex1);
} catch (JDOMException ex1) {
@ -1040,7 +1055,10 @@ public class ExchangeSession {
}
// get inline images from htmlBody (without OWA transformation)
ByteArrayInputStream bais = new ByteArrayInputStream(htmlBody.getBytes("UTF-8"));
ByteArrayInputStream bais = new ByteArrayInputStream(htmlBody
// quick fix remove default office namespace
.replaceFirst("xmlns=\".*\"", "")
.getBytes("UTF-8"));
XmlDocument xmlBody = tidyDocument(bais);
List<Attribute> htmlBodyImgList = xmlBody.getNodes("//img/@src");