From 79377c220f7c07b2c27abac969bd6c4e9d295bca Mon Sep 17 00:00:00 2001 From: Matthew Brace Date: Sun, 8 Mar 2009 01:58:13 +0000 Subject: [PATCH] Added WBXML encoder and decoder and the default WBXML code page. Updated Utility.java to provide base64 encoding/decoding in a format needed by WBXML. --- src/com/android/email/Utility.java | 10 +- .../android/email/mail/internet/CodePage.java | 175 ++++++ .../android/email/mail/internet/WBXML.java | 593 ++++++++++++++++++ 3 files changed, 777 insertions(+), 1 deletion(-) create mode 100644 src/com/android/email/mail/internet/CodePage.java create mode 100644 src/com/android/email/mail/internet/WBXML.java diff --git a/src/com/android/email/Utility.java b/src/com/android/email/Utility.java index f726a59b8..371519c15 100644 --- a/src/com/android/email/Utility.java +++ b/src/com/android/email/Utility.java @@ -65,6 +65,15 @@ public class Utility { return new String(decoded); } + /** Another type of Base64 decoding, return the array of bytes instead of the string */ + public static byte[] base64Decode(byte[] encoded) { + if (encoded == null) { + return null; + } + byte[] decoded = new Base64().decode(encoded); + return decoded; + } + public static String base64Encode(String s) { if (s == null) { return s; @@ -191,5 +200,4 @@ public class Utility { // } // } } - } diff --git a/src/com/android/email/mail/internet/CodePage.java b/src/com/android/email/mail/internet/CodePage.java new file mode 100644 index 000000000..b37bcd27a --- /dev/null +++ b/src/com/android/email/mail/internet/CodePage.java @@ -0,0 +1,175 @@ +package com.android.email; + +import java.util.HashMap; + +/** + * This class represents the base class for defined WBXML code pages. It is designed as a + * base class that contains the minimal information needed for a code page (the definition of + * standard WBXML tokens). + * + * @version 1.0 + * @author Matthew Brace + */ +public class CodePage { + protected HashMap codepageTokens = new HashMap(); + protected HashMap codepageStrings = new HashMap(); + protected HashMap attributeTokens = new HashMap(); + protected HashMap attributeStrings = new HashMap(); + protected int codePageIndex = -1; + protected String codePageName = "Base"; + private static final HashMap wbxmlTokens = new HashMap(); + static { + wbxmlTokens.put("switch_page", 0x00); + wbxmlTokens.put("end", 0x01); + wbxmlTokens.put("entity", 0x02); + wbxmlTokens.put("str_i", 0x03); + wbxmlTokens.put("literal", 0x04); + wbxmlTokens.put("ext_i_0", 0x40); + wbxmlTokens.put("ext_i_1", 0x41); + wbxmlTokens.put("ext_i_2", 0x42); + wbxmlTokens.put("pi", 0x43); + wbxmlTokens.put("literal_c", 0x44); + wbxmlTokens.put("ext_t_0", 0x80); + wbxmlTokens.put("ext_t_1", 0x81); + wbxmlTokens.put("ext_t_2", 0x82); + wbxmlTokens.put("str_t", 0x83); + wbxmlTokens.put("literal_a", 0x84); + wbxmlTokens.put("ext_0", 0xc0); + wbxmlTokens.put("ext_1", 0xc1); + wbxmlTokens.put("ext_2", 0xc2); + wbxmlTokens.put("opaque", 0xc3); + wbxmlTokens.put("literal_ac", 0xc4); + } + private static final HashMap wbxmlStrings = new HashMap(); + static { + wbxmlStrings.put(0x00, "switch_page"); + wbxmlStrings.put(0x01, "end"); + wbxmlStrings.put(0x02, "entity"); + wbxmlStrings.put(0x03, "str_i"); + wbxmlStrings.put(0x04, "literal"); + wbxmlStrings.put(0x40, "ext_i_0"); + wbxmlStrings.put(0x41, "ext_i_1"); + wbxmlStrings.put(0x42, "ext_i_2"); + wbxmlStrings.put(0x43, "pi"); + wbxmlStrings.put(0x44, "literal_c"); + wbxmlStrings.put(0x80, "ext_t_0"); + wbxmlStrings.put(0x81, "ext_t_1"); + wbxmlStrings.put(0x82, "ext_t_2"); + wbxmlStrings.put(0x83, "str_t"); + wbxmlStrings.put(0x84, "literal_a"); + wbxmlStrings.put(0xc0, "ext_0"); + wbxmlStrings.put(0xc1, "ext_1"); + wbxmlStrings.put(0xc2, "ext_2"); + wbxmlStrings.put(0xc3, "opaque"); + wbxmlStrings.put(0xc4, "literal_ac"); + } + + /** + * Return the integer value for the standard WBXML token that corresponds to the supplied string. + * + * @param identity The string identity of the WBXML token to retrieve + * @return The integer value of the corresponding token or -1 if it cannot be found + */ + public Integer getWbxmlToken(String identity) { + Integer token = -1; + + if (wbxmlTokens.containsKey(identity)) { + token = wbxmlTokens.get(identity); + } + + return token; + } + + public String getWbxmlString(Integer token) { + String identity = new String(); + + if (wbxmlStrings.containsKey(token)) { + identity = wbxmlStrings.get(token); + } + + return identity; + } + + /** + * Return the code page index for this code page. + * + * @return The integer value of the code page, -1 for the base code page + */ + public Integer getCodePageIndex() { + return codePageIndex; + } + + /** + * Return the string representation for the name of this code page. + * + * @return The namespace of this code page + */ + public String getCodePageName() { + return codePageName; + } + + /** + * Return the integer value for the code page token that corresponds to the supplied string. + * + * @param identity The string identity of the code page token to retrieve + * @return The integer value of the corresponding token or -1 if it cannot be found + */ + public Integer getCodePageToken(String identity) { + Integer token = -1; + + if (codepageTokens.containsKey(identity)) { + token = codepageTokens.get(identity); + } + + return token; + } + + /** + * Return the string value for the code page that corresponds to the supplied token. + * + * @param token The integer value of the token of the string to retrieve + * @return The string value for the supplied token or an empty string if it cannot be found + */ + public String getCodePageString(Integer token) { + String identity = new String(); + + if (codepageStrings.containsKey(token)) { + identity = codepageStrings.get(token); + } + + return identity; + } + + /** + * Return the integer value for the attribute token that corresponds to the supplied string. + * + * @param identity The string identity of the code page token to retrieve + * @return The integer value of the corresponding token or -1 if it cannot be found + */ + public Integer getAttributeToken(String identity) { + Integer token = -1; + + if (attributeTokens.containsKey(identity)) { + token = attributeTokens.get(identity); + } + System.out.println("returning "+token+" for "+identity); + + return token; + } + + /** + * Return the string value for the attribute that corresponds to the supplied token. + * + * @param token The integer value of the token of the string to retrieve + * @return The string value for the supplied token or an empty string if it cannot be found + */ + public String getAttributeString(Integer token) { + String identity = new String(); + + if (attributeStrings.containsKey(token)) { + identity = attributeStrings.get(token); + } + + return identity; + } +} diff --git a/src/com/android/email/mail/internet/WBXML.java b/src/com/android/email/mail/internet/WBXML.java new file mode 100644 index 000000000..382c719a0 --- /dev/null +++ b/src/com/android/email/mail/internet/WBXML.java @@ -0,0 +1,593 @@ +package com.android.email; + +import java.io.BufferedOutputStream; +import java.io.BufferedInputStream; +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Stack; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; + +import org.xml.sax.Attributes; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; +import org.xml.sax.helpers.DefaultHandler; + +import com.android.email.Utility; +import com.android.email.CodePage; + +/** + * This class represents an entity for converting between WBXML and XML. The process uses + * subclasses of the CodePage class that contain data for each code page potentially + * referenced in the document. + * + * @version .1 + * @author Matthew Brace + */ +class WBXML { + /* WBXML ActiveSync specific Code Pages */ + public static final int WBXML_VERSION11 = 0x01; /* WBXML 1.1 */ + public static final int WBXML_VERSION13 = 0x03; /* WBXML 1.3 */ + public static final int WBXML_UNKNOWN_PI = 0x01; /* Unknown public identifier */ + public static final int WBXML_UTF8_ENCODING = 0x6A; /* UTF-8 encoding */ + + private CodePage[] pageList; + private Stack xmlStack; + /** + * Initializes the object to a state ready for converting. + * + * @param codePages Array of CodePage objects, their index in the array corresponds to page id + */ + public WBXML(CodePage[] codePages) { + pageList = codePages; + } + + /** + * Sets the associated array of CodePage objects to be used for converting formats. + * + * @param codePages Array of CodePage objects, their index in the array corresponds to page id + */ + public void setCodePage(CodePage[] codePages) { + pageList = codePages; + } + + /** + * Converts a WBXML input stream to an XML output stream + * + * @param in The WBXML stream to read from + * @param out The XML stream to write to + */ + public void convertWbxmlToXml(InputStream in, OutputStream out) { + BufferedInputStream istream = new BufferedInputStream(in); + BufferedOutputStream ostream = new BufferedOutputStream(out); + CodePage codepage = pageList[0]; + xmlStack = new Stack(); + String buffer = new String(); + int majorVersion = 0; + int minorVersion = 0; + int publicIdentifier = 0; + int charset = 0; + + try { + /* Populate the header information */ + int streamByte = istream.read(); + + /* Major version is the high 4 bits + 1 */ + majorVersion = (streamByte >>> 4) + 1; + /* Minor version is the low 4 bits */ + minorVersion = (streamByte & 15); + publicIdentifier = istream.read(); + charset = istream.read(); + + /* Next is the string table length. ActiveSync doesn't use the string table */ + streamByte = istream.read(); + + /* Send the header information to the output stream */ + if (charset == 0x6a) { + /* ActiveSync only uses UTF-8, so we only support UTF-8 for now */ + buffer = ""; + ostream.write(buffer.getBytes(), 0, buffer.length()); + } else { + throw new IOException("Unknown charset encoding"); + } + + /* process the tags (recursive, state changes in recursion */ + processTagState(istream, ostream, codepage); + + } catch (IOException ioe) { + //Log.e("WBXML", "IOException, doh: "+ ioe); + return; + } catch (Exception e) { + //Log.e("WBXML", "Exception e:" + e); + return; + } + } + + private void processTagState(BufferedInputStream istream, + BufferedOutputStream ostream, + CodePage codepage) throws IOException { + int streamByte = istream.read(); + int attribute = 0; + String currentNamespace = codepage.getCodePageName(); + String outputBuffer = new String(); + + if (streamByte == -1) { + return; + } + + /* Process WBXML tokens */ + if ((streamByte & 15) <= 0x4 && ((streamByte >>> 4) % 4) == 0) { + /* Can't switch on a string, so switch on the raw value */ + switch (streamByte) { + case 0x00: /* switch_page */ + /* Change the current code page based on the next byte */ + int nextByte = istream.read(); + if (pageList[nextByte] != null) { + codepage = pageList[nextByte]; + } + break; + case 0x01: /* end */ + /* Pop the latest entry off the xml stack and close the tag */ + if (!xmlStack.empty()) { + String tagName = (String) xmlStack.pop(); + outputBuffer = ""; + } + break; + case 0x02: /* entity */ + break; + case 0x03: /* str_i */ + StringBuffer inlineString = new StringBuffer(1024); + int stringByte = 0x00; + /* We need to process an indefinitely long string. The terminator is + * based upon the charset encoding. We only handle utf-8 right now, + * so our terminator is null (ie, 0x00) */ + while ((stringByte = istream.read()) > 0) { + inlineString.append((char) stringByte); + } + outputBuffer = inlineString.toString(); + break; + case 0x04: /* literal */ + break; + case 0x40: /* ext_i_0 */ + break; + case 0x41: /* ext_i_1 */ + break; + case 0x42: /* ext_i_2 */ + break; + case 0x43: /* pi */ + break; + case 0x44: /* literal_c */ + break; + case 0x80: /* ext_t_0 */ + break; + case 0x81: /* ext_t_1 */ + break; + case 0x82: /* ext_t_2 */ + break; + case 0x83: /* str_t */ + break; + case 0x84: /* literal_a */ + break; + case 0xc0: /* ext_0 */ + break; + case 0xc1: /* ext_1 */ + break; + case 0xc2: /* ext_2 */ + break; + case 0xc3: /* opaque */ + /* If raw binary data is written to the output buffer, it can invalidate the XML document. + * Instead, append BASE64 to signify the data is base 64 encoded. + */ + /* Opaque binary data. Next byte is the length of the data */ + byte dataLength = (byte)istream.read(); + byte[] data = new byte[dataLength]; + for (int i = 0; i < dataLength; i++) { + data[i] = (byte)istream.read(); + } + /* Write the data we have to the output buffer */ + outputBuffer = new String("BASE64"); + outputBuffer = outputBuffer + Utility.base64Encode(new String(data)); + //outputBuffer = new String(data, "UTF-8"); + break; + case 0xc4: /* literal_ac */ + break; + + } + } else { + /* Process tokens from the code page */ + String elementName = new String(); + /* If bit 6 is set, there is content */ + byte content = (byte)(streamByte & 64); + + if (content > 0) { + /* Remove the content flag */ + streamByte = (streamByte ^ 64); + } + + /* If bit 7 is set, there are attributes */ + attribute = (streamByte & 128); + if (attribute > 0) { + /* Remove the attribute flag */ + streamByte = (streamByte ^ 128); + } + elementName = codepage.getCodePageString(streamByte); + outputBuffer = "<"+currentNamespace+":"+elementName; + + /* If bit 6 is set, it has content */ + if (content > 0) { + xmlStack.push(currentNamespace+":"+elementName); + } + + if (content > 0 && attribute == 0) { + outputBuffer = outputBuffer + ">"; + } else if (content == 0 && attribute == 0) { + outputBuffer = outputBuffer + "/>"; + } + } + + ostream.write(outputBuffer.getBytes(), 0, outputBuffer.length()); + ostream.flush(); + + if (attribute > 0) { + processAttributeState(istream, ostream, codepage); + } + + processTagState(istream, ostream, codepage); + } + + private void processAttributeState(BufferedInputStream istream, + BufferedOutputStream ostream, + CodePage codepage) throws IOException { + boolean attributeDone = false; + int streamByte = istream.read(); + String currentNamespace = codepage.getCodePageName(); + String outputBuffer = new String(); + + if (streamByte == -1) { + return; + } + + /* Process WBXML tokens */ + if ((streamByte & 15) <= 0x4 && ((streamByte >>> 4) % 4) == 0) { + /* Can't switch on a string, so switch on the raw value */ + switch (streamByte) { + case 0x00: /* switch_page */ + /* Change the current code page based on the next byte */ + int nextByte = istream.read(); + if (pageList[nextByte] != null) { + codepage = pageList[nextByte]; + } + break; + case 0x01: /* end */ + /* End to attributes means the current tag is done */ + outputBuffer = ">"; + attributeDone = true; + break; + case 0x02: /* entity */ + break; + case 0x03: /* str_i */ + StringBuffer inlineString = new StringBuffer(1024); + int stringByte = 0x00; + /* We need to process an indefinitely long string. The terminator is + * based upon the charset encoding. We only handle utf-8 right now, + * so our terminator is null (ie, 0x00) */ + while ((stringByte = istream.read()) > 0) { + inlineString.append((char) stringByte); + } + outputBuffer = inlineString.toString(); + break; + case 0x04: /* literal */ + break; + case 0x40: /* ext_i_0 */ + break; + case 0x41: /* ext_i_1 */ + break; + case 0x42: /* ext_i_2 */ + break; + case 0x43: /* pi */ + break; + case 0x44: /* literal_c */ + break; + case 0x80: /* ext_t_0 */ + break; + case 0x81: /* ext_t_1 */ + break; + case 0x82: /* ext_t_2 */ + break; + case 0x83: /* str_t */ + break; + case 0x84: /* literal_a */ + break; + case 0xc0: /* ext_0 */ + break; + case 0xc1: /* ext_1 */ + break; + case 0xc2: /* ext_2 */ + break; + case 0xc3: /* opaque */ + /* If raw binary data is written to the output buffer, it can invalidate the XML document. + * Instead, append BASE64 to signify the data is base 64 encoded. + */ + /* Opaque binary data. Next byte is the length of the data */ + byte dataLength = (byte)istream.read(); + byte[] data = new byte[dataLength]; + for (int i = 0; i < dataLength; i++) { + data[i] = (byte)istream.read(); + } + /* Write the data we have to the output buffer */ + outputBuffer = new String("BASE64"); + outputBuffer = outputBuffer + Utility.base64Encode(new String(data)); + //outputBuffer = new String(data, "UTF-8"); + break; + case 0xc4: /* literal_ac */ + break; + + } + } else { + /* Process tokens from the code page */ + String element = new String(); + + /* We only support single attribute statements. + * This means we can't do fieldnametoken fieldvaluetoken, + * only token = name="token" + */ + element = codepage.getAttributeString(streamByte); + outputBuffer = " " + element; + } + + ostream.write(outputBuffer.getBytes(), 0, outputBuffer.length()); + ostream.flush(); + + if (!attributeDone) { + processAttributeState(istream, ostream, codepage); + } + } + + /** + * Converts an XML input stream to a WBXML output stream + * + * @param in The XML stream to read from + * @param out The WBXML stream to write to + */ + public void convertXmlToWbxml(InputStream in, OutputStream out) { + SAXParserFactory spf = SAXParserFactory.newInstance(); + try { + SAXParser sp = spf.newSAXParser(); + + XMLReader xr = sp.getXMLReader(); + + XMLHandler handler = new XMLHandler(out); + + xr.setContentHandler(handler); + + xr.parse(new InputSource(in)); + } catch (ParserConfigurationException pce) { + //Log.e("WBXML", "ParserConfigurationException in convertXmlToWbxml: " + pce); + } catch (SAXException se) { + //Log.e("WBXML", "SAXException in convertXmlToWbxml: " + se); + } catch (IOException ioe) { + //Log.e("WBXML", "IOException in convertXmlToWbxml: " + ioe); + } + } + + /** + * Handle parsing the XML data stream to convert to WBXML + */ + public class XMLHandler extends DefaultHandler { + private CodePage codepage = pageList[0]; + private BufferedOutputStream ostream; + private ArrayList pendingBuffer; + + public XMLHandler(OutputStream out) { + ostream = new BufferedOutputStream(out); + pendingBuffer = new ArrayList(); + } + + @Override + public void startDocument() throws SAXException { + /* Write our standard document header information */ + try { + /* Version */ + ostream.write(0x03); + /* Unkown public identifier */ + ostream.write(0x01); + /* Only charset we currently use is UTF-8 */ + ostream.write(0x6a); + /* We don't use string tables */ + ostream.write(0x00); + } catch (IOException ioe) { + throw new SAXException("IOException writing header: " + ioe); + } + } + + @Override + public void endDocument() throws SAXException { + /* Make sure the buffer's been written and is empty */ + if (pendingBuffer.size() > 0) { + for (Integer i : pendingBuffer) { + try { + ostream.write(i.byteValue()); + } catch (IOException ioe) { + throw new SAXException("IOException in writing buffer: " + ioe); + } + } + + /* The buffer needs to be cleared */ + pendingBuffer = new ArrayList(); + } + + /* Flush the stream so nothing's pending */ + try { + ostream.flush(); + } catch (IOException ioe) { + throw new SAXException("IOException flushing output stream: " + ioe); + } + } + + @Override + public void startElement(String namespaceURI, String localName, + String qName, Attributes atts) throws SAXException { + if (namespaceURI.endsWith(":")) { + namespaceURI = namespaceURI.substring(0, namespaceURI.length() - 1); + } + if (localName.equals("")) { + if (!qName.equals("")) { + localName = qName.substring(qName.lastIndexOf(":")+1,qName.length()); + } + } + if (namespaceURI.equals("")) { + if (!qName.equals("")) { + namespaceURI = qName.substring(0,qName.lastIndexOf(":")); + } + } + + int startToken = 0; + /* The previous tag needs to be marked as having content if we have a + * start tag and the buffer hasn't been written. + */ + if (pendingBuffer.size() > 0) { + Integer tagByte = pendingBuffer.get(0); + /* 6th bit represents content (64) */ + tagByte |= 64; + pendingBuffer.set(0, tagByte); + + for (Integer i : pendingBuffer) { + try { + ostream.write(i.byteValue()); + } catch (IOException ioe) { + throw new SAXException("IOException writing buffer: " + ioe); + } + } + + /* The buffer needs to be cleared for the next set */ + pendingBuffer = new ArrayList(); + } + + /* The codepage needs to match the namespace so the correct bytes are written. + * Unfortunately, there isn't a better way than just iterating over all of + * the codepages. + */ + if (!codepage.getCodePageName().equals(namespaceURI)) { + for (int i = 0, count = pageList.length - 1; i < count; i++) { + if (pageList[i].getCodePageName().equals(namespaceURI)) { + codepage = pageList[i]; + /* Write the code page change to the stream */ + try { + ostream.write(0x00); + ostream.write(i); + } catch (IOException ioe) { + throw new SAXException("IOException writing page change: " + ioe); + } + i = count; + } + } + } + + startToken = codepage.getCodePageToken(localName); + pendingBuffer.add(startToken); + + /* This is the only location where the attribute information is available */ + if (atts.getLength() > 0) { + CodePage startCodepage = codepage; + + /* 7th bit represents attributes (128) */ + startToken = pendingBuffer.get(0); + startToken |= 128; + pendingBuffer.set(0, startToken); + /* Each attribute information needs to be set in the buffer */ + for (int i = 0, count = atts.getLength(); i < count; i++) { + String attNamespace = atts.getURI(i); + String attLocalName = atts.getLocalName(i); + String attValue = atts.getValue(i); + /* We don't support name/value pairs yet, so lookup by full thing */ + String fullValue = attLocalName + "=\"" + attValue +"\""; + Integer attToken = 0; + + if (attNamespace.endsWith(":")) { + attNamespace = attNamespace.substring(0, attNamespace.length() - 1); + } + + if (attNamespace.equals("")) { + attNamespace = namespaceURI; + } + + /* It's possible to change namespaces mid attribute...stupid XML */ + if (!attNamespace.equals("") && + !attNamespace.equals(codepage.getCodePageName())) { + + for (int j = 0, jcount = pageList.length - 1; j < jcount; j++) { + if (pageList[j].getCodePageName().equals(attNamespace)) { + codepage = pageList[j]; + /* Add the page change to the buffer */ + pendingBuffer.add(0x00); + pendingBuffer.add(j); + + j = jcount; + } + } + } + + attToken = codepage.getAttributeToken(fullValue); + + if (attToken != -1) { + pendingBuffer.add(attToken); + } + + /* End of attribute, add end tag */ + pendingBuffer.add(0x01); + } + } + } + + @Override + public void endElement(String namespaceURI, String localName, String qName) { + /* Write the end tag */ + pendingBuffer.add(0x01); + } + + @Override + public void characters(char ch[], int start, int length) { + String hexString = new String(ch, start, 6); + /* Fix up the tag in the pending buffer if necessary */ + if (pendingBuffer.size() > 0) { + int tagByte = pendingBuffer.get(0); + tagByte |= 64; + pendingBuffer.set(0, tagByte); + } + + /* If it needs to be opaque data, we use a cheap hack. A string starting + * with BASE64 is a base 64 encoded string and should be opaque data. + * Really need to find a better way to deal with this. + */ + if (hexString.equals("BASE64")) { + String encodedData = new String(ch, start + 6, length - 6); + byte[] decodedData = Utility.base64Decode(encodedData.getBytes()); + + /* Add the tag saying opaque data follows */ + pendingBuffer.add(0xc3); + + /* Add the length of opaque data */ + pendingBuffer.add(decodedData.length); + for (int i = 0, count = decodedData.length; i < count; i++) { + pendingBuffer.add((int) decodedData[i]); + } + } else { + /* Add the tag saying an inline string follows */ + pendingBuffer.add(0x03); + + /* Add the string */ + for (int i = start; i < length; i++) { + pendingBuffer.add((int) ch[i]); + } + /* End the string with a null terminator since we only support UTF-8 */ + pendingBuffer.add(0x00); + } + } + } +}