From e612d3b09c6dd02396e8b9f9a992a35ae82ad076 Mon Sep 17 00:00:00 2001
From: liato This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
+ */
+ public static Spanned fromHtml(String source) {
+ return fromHtml(source, null, null);
+ }
+
+ /**
+ * Lazy initialization holder for HTML parser. This class will
+ * a) be preloaded by the zygote, or b) not loaded until absolutely
+ * necessary.
+ */
+ private static class HtmlParser {
+ private static final HTMLSchema schema = new HTMLSchema();
+ }
+
+ /**
+ * Returns displayable styled text from the provided HTML string.
+ * Any <img> tags in the HTML will use the specified ImageGetter
+ * to request a representation of the image (use null if you don't
+ * want this) and the specified TagHandler to handle unknown tags
+ * (specify null if you don't want this).
+ *
+ * This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
+ */
+ public static Spanned fromHtml(String source, ImageGetter imageGetter,
+ TagHandler tagHandler) {
+ Parser parser = new Parser();
+ try {
+ parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
+ } catch (org.xml.sax.SAXNotRecognizedException e) {
+ // Should not happen.
+ throw new RuntimeException(e);
+ } catch (org.xml.sax.SAXNotSupportedException e) {
+ // Should not happen.
+ throw new RuntimeException(e);
+ }
+
+ HtmlToSpannedConverter converter =
+ new HtmlToSpannedConverter(source, imageGetter, tagHandler,
+ parser);
+ return converter.convert();
+ }
+
+ /**
+ * Returns an HTML representation of the provided Spanned text.
+ */
+ public static String toHtml(Spanned text) {
+ StringBuilder out = new StringBuilder();
+ withinHtml(out, text);
+ return out.toString();
+ }
+
+ private static void withinHtml(StringBuilder out, Spanned text) {
+ int len = text.length();
+
+ int next;
+ for (int i = 0; i < text.length(); i = next) {
+ next = text.nextSpanTransition(i, len, ParagraphStyle.class);
+ ParagraphStyle[] style = text.getSpans(i, next, ParagraphStyle.class);
+ String elements = " ";
+ boolean needDiv = false;
+
+ for(int j = 0; j < style.length; j++) {
+ if (style[j] instanceof AlignmentSpan) {
+ Layout.Alignment align =
+ ((AlignmentSpan) style[j]).getAlignment();
+ needDiv = true;
+ if (align == Layout.Alignment.ALIGN_CENTER) {
+ elements = "align=\"center\" " + elements;
+ } else if (align == Layout.Alignment.ALIGN_OPPOSITE) {
+ elements = "align=\"right\" " + elements;
+ } else {
+ elements = "align=\"left\" " + elements;
+ }
+ }
+ }
+ if (needDiv) {
+ out.append(" ");
+
+ int next;
+ for (int i = start; i < end; i = next) {
+ next = TextUtils.indexOf(text, '\n', i, end);
+ if (next < 0) {
+ next = end;
+ }
+
+ int nl = 0;
+
+ while (next < end && text.charAt(next) == '\n') {
+ nl++;
+ next++;
+ }
+
+ withinParagraph(out, text, i, next - nl, nl, next == end);
+ }
+
+ out.append("source
argument is the
+ * string from the "src" attribute; the return value should be
+ * a Drawable representation of the image or null
+ * for a generic replacement image. Make sure you call
+ * setBounds() on your Drawable if it doesn't already have
+ * its bounds set.
+ */
+ public Drawable getDrawable(String source);
+ }
+
+ /**
+ * Is notified when HTML tags are encountered that the parser does
+ * not know how to interpret.
+ */
+ public static interface TagHandler {
+ /**
+ * This method will be called whenn the HTML parser encounters
+ * a tag that it does not know how to interpret.
+ */
+ public void handleTag(boolean opening, String tag,
+ Editable output, XMLReader xmlReader);
+ }
+
+ private Html2() { }
+
+ /**
+ * Returns displayable styled text from the provided HTML string.
+ * Any <img> tags in the HTML will display as a generic
+ * replacement image which your program can then go through and
+ * replace with real images.
+ *
+ * ");
+ }
+
+ withinBlockquote(out, text, i, next);
+
+ for (QuoteSpan quote: quotes) {
+ out.append("
\n");
+ }
+ }
+ }
+
+ private static void withinBlockquote(StringBuilder out, Spanned text,
+ int start, int end) {
+ out.append("");
+ }
+ if (style[j] instanceof URLSpan) {
+ out.append("");
+ }
+ if (style[j] instanceof ImageSpan) {
+ out.append("");
+
+ // Don't output the dummy character underlying the image.
+ i = next;
+ }
+ if (style[j] instanceof AbsoluteSizeSpan) {
+ out.append("");
+ }
+ if (style[j] instanceof ForegroundColorSpan) {
+ out.append("");
+ }
+ if (style[j] instanceof BackgroundColorSpan) {
+ out.append("");
+ }
+ }
+
+ withinStyle(out, text, i, next);
+
+ for (int j = style.length - 1; j >= 0; j--) {
+ if (style[j] instanceof ForegroundColorSpan) {
+ out.append("");
+ }
+ if (style[j] instanceof BackgroundColorSpan) {
+ out.append("");
+ }
+ if (style[j] instanceof AbsoluteSizeSpan) {
+ out.append("");
+ }
+ if (style[j] instanceof URLSpan) {
+ out.append("");
+ }
+ if (style[j] instanceof StrikethroughSpan) {
+ out.append("");
+ }
+ if (style[j] instanceof UnderlineSpan) {
+ out.append("");
+ }
+ if (style[j] instanceof SubscriptSpan) {
+ out.append("");
+ }
+ if (style[j] instanceof SuperscriptSpan) {
+ out.append("");
+ }
+ if (style[j] instanceof TypefaceSpan) {
+ String s = ((TypefaceSpan) style[j]).getFamily();
+
+ if (s.equals("monospace")) {
+ out.append("");
+ }
+ }
+ if (style[j] instanceof StyleSpan) {
+ int s = ((StyleSpan) style[j]).getStyle();
+
+ if ((s & Typeface.BOLD) != 0) {
+ out.append("");
+ }
+ if ((s & Typeface.ITALIC) != 0) {
+ out.append("");
+ }
+ }
+ }
+ }
+
+ String p = last ? "" : "
";
+
+ if (nl == 1) {
+ out.append("
\n");
+ } else if (nl == 2) {
+ out.append(p);
+ } else {
+ for (int i = 2; i < nl; i++) {
+ out.append("
");
+ }
+
+ out.append(p);
+ }
+ }
+
+ private static void withinStyle(StringBuilder out, Spanned text,
+ int start, int end) {
+ for (int i = start; i < end; i++) {
+ char c = text.charAt(i);
+
+ if (c == '<') {
+ out.append("<");
+ } else if (c == '>') {
+ out.append(">");
+ } else if (c == '&') {
+ out.append("&");
+ } else if (c > 0x7E || c < ' ') {
+ out.append("" + ((int) c) + ";");
+ } else if (c == ' ') {
+ while (i + 1 < end && text.charAt(i + 1) == ' ') {
+ out.append(" ");
+ i++;
+ }
+
+ out.append(' ');
+ } else {
+ out.append(c);
+ }
+ }
+ }
+}
+
+class HtmlToSpannedConverter implements ContentHandler {
+
+ private static final float[] HEADER_SIZES = {
+ 1.5f, 1.4f, 1.3f, 1.2f, 1.1f, 1f,
+ };
+
+ private String mSource;
+ private XMLReader mReader;
+ private SpannableStringBuilder mSpannableStringBuilder;
+ private Html2.ImageGetter mImageGetter;
+ private Html2.TagHandler mTagHandler;
+
+ public HtmlToSpannedConverter(
+ String source, Html2.ImageGetter imageGetter, Html2.TagHandler tagHandler,
+ Parser parser) {
+ mSource = source;
+ mSpannableStringBuilder = new SpannableStringBuilder();
+ mImageGetter = imageGetter;
+ mTagHandler = tagHandler;
+ mReader = parser;
+ }
+
+ public Spanned convert() {
+
+ mReader.setContentHandler(this);
+ try {
+ mReader.parse(new InputSource(new StringReader(mSource)));
+ } catch (IOException e) {
+ // We are reading from a string. There should not be IO problems.
+ throw new RuntimeException(e);
+ } catch (SAXException e) {
+ // TagSoup doesn't throw parse exceptions.
+ throw new RuntimeException(e);
+ }
+
+ // Fix flags and range for paragraph-type markup.
+ Object[] obj = mSpannableStringBuilder.getSpans(0, mSpannableStringBuilder.length(), ParagraphStyle.class);
+ for (int i = 0; i < obj.length; i++) {
+ int start = mSpannableStringBuilder.getSpanStart(obj[i]);
+ int end = mSpannableStringBuilder.getSpanEnd(obj[i]);
+
+ // If the last line of the range is blank, back off by one.
+ if (end - 2 >= 0) {
+ if (mSpannableStringBuilder.charAt(end - 1) == '\n' &&
+ mSpannableStringBuilder.charAt(end - 2) == '\n') {
+ end--;
+ }
+ }
+
+ if (end == start) {
+ mSpannableStringBuilder.removeSpan(obj[i]);
+ } else {
+ mSpannableStringBuilder.setSpan(obj[i], start, end, Spannable.SPAN_PARAGRAPH);
+ }
+ }
+
+ return mSpannableStringBuilder;
+ }
+
+ private void handleStartTag(String tag, Attributes attributes) {
+ if (tag.equalsIgnoreCase("br")) {
+ // We don't need to handle this. TagSoup will ensure that there's a for each
+ // so we can safely emite the linebreaks when we handle the close tag.
+ } else if (tag.equalsIgnoreCase("p")) {
+ handleP(mSpannableStringBuilder);
+ } else if (tag.equalsIgnoreCase("div")) {
+ handleP(mSpannableStringBuilder);
+ } else if (tag.equalsIgnoreCase("em")) {
+ start(mSpannableStringBuilder, new Bold());
+ } else if (tag.equalsIgnoreCase("b")) {
+ start(mSpannableStringBuilder, new Bold());
+ } else if (tag.equalsIgnoreCase("strong")) {
+ start(mSpannableStringBuilder, new Italic());
+ } else if (tag.equalsIgnoreCase("cite")) {
+ start(mSpannableStringBuilder, new Italic());
+ } else if (tag.equalsIgnoreCase("dfn")) {
+ start(mSpannableStringBuilder, new Italic());
+ } else if (tag.equalsIgnoreCase("i")) {
+ start(mSpannableStringBuilder, new Italic());
+ } else if (tag.equalsIgnoreCase("big")) {
+ start(mSpannableStringBuilder, new Big());
+ } else if (tag.equalsIgnoreCase("small")) {
+ start(mSpannableStringBuilder, new Small());
+ } else if (tag.equalsIgnoreCase("font")) {
+ startFont(mSpannableStringBuilder, attributes);
+ } else if (tag.equalsIgnoreCase("blockquote")) {
+ handleP(mSpannableStringBuilder);
+ start(mSpannableStringBuilder, new Blockquote());
+ } else if (tag.equalsIgnoreCase("tt")) {
+ start(mSpannableStringBuilder, new Monospace());
+ } else if (tag.equalsIgnoreCase("a")) {
+ startA(mSpannableStringBuilder, attributes);
+ } else if (tag.equalsIgnoreCase("u")) {
+ start(mSpannableStringBuilder, new Underline());
+ } else if (tag.equalsIgnoreCase("sup")) {
+ start(mSpannableStringBuilder, new Super());
+ } else if (tag.equalsIgnoreCase("sub")) {
+ start(mSpannableStringBuilder, new Sub());
+ } else if (tag.length() == 2 &&
+ Character.toLowerCase(tag.charAt(0)) == 'h' &&
+ tag.charAt(1) >= '1' && tag.charAt(1) <= '6') {
+ handleP(mSpannableStringBuilder);
+ start(mSpannableStringBuilder, new Header(tag.charAt(1) - '1'));
+ } else if (tag.equalsIgnoreCase("img")) {
+ startImg(mSpannableStringBuilder, attributes, mImageGetter);
+ } else if (mTagHandler != null) {
+ mTagHandler.handleTag(true, tag, mSpannableStringBuilder, mReader);
+ }
+ }
+
+ private void handleEndTag(String tag) {
+ if (tag.equalsIgnoreCase("br")) {
+ handleBr(mSpannableStringBuilder);
+ } else if (tag.equalsIgnoreCase("p")) {
+ handleP(mSpannableStringBuilder);
+ } else if (tag.equalsIgnoreCase("div")) {
+ handleP(mSpannableStringBuilder);
+ } else if (tag.equalsIgnoreCase("em")) {
+ end(mSpannableStringBuilder, Bold.class, new StyleSpan(Typeface.BOLD));
+ } else if (tag.equalsIgnoreCase("b")) {
+ end(mSpannableStringBuilder, Bold.class, new StyleSpan(Typeface.BOLD));
+ } else if (tag.equalsIgnoreCase("strong")) {
+ end(mSpannableStringBuilder, Italic.class, new StyleSpan(Typeface.ITALIC));
+ } else if (tag.equalsIgnoreCase("cite")) {
+ end(mSpannableStringBuilder, Italic.class, new StyleSpan(Typeface.ITALIC));
+ } else if (tag.equalsIgnoreCase("dfn")) {
+ end(mSpannableStringBuilder, Italic.class, new StyleSpan(Typeface.ITALIC));
+ } else if (tag.equalsIgnoreCase("i")) {
+ end(mSpannableStringBuilder, Italic.class, new StyleSpan(Typeface.ITALIC));
+ } else if (tag.equalsIgnoreCase("big")) {
+ end(mSpannableStringBuilder, Big.class, new RelativeSizeSpan(1.25f));
+ } else if (tag.equalsIgnoreCase("small")) {
+ end(mSpannableStringBuilder, Small.class, new RelativeSizeSpan(0.8f));
+ } else if (tag.equalsIgnoreCase("font")) {
+ endFont(mSpannableStringBuilder);
+ } else if (tag.equalsIgnoreCase("blockquote")) {
+ handleP(mSpannableStringBuilder);
+ end(mSpannableStringBuilder, Blockquote.class, new QuoteSpan());
+ } else if (tag.equalsIgnoreCase("tt")) {
+ end(mSpannableStringBuilder, Monospace.class,
+ new TypefaceSpan("monospace"));
+ } else if (tag.equalsIgnoreCase("a")) {
+ endA(mSpannableStringBuilder);
+ } else if (tag.equalsIgnoreCase("u")) {
+ end(mSpannableStringBuilder, Underline.class, new UnderlineSpan());
+ } else if (tag.equalsIgnoreCase("sup")) {
+ end(mSpannableStringBuilder, Super.class, new SuperscriptSpan());
+ } else if (tag.equalsIgnoreCase("sub")) {
+ end(mSpannableStringBuilder, Sub.class, new SubscriptSpan());
+ } else if (tag.length() == 2 &&
+ Character.toLowerCase(tag.charAt(0)) == 'h' &&
+ tag.charAt(1) >= '1' && tag.charAt(1) <= '6') {
+ handleP(mSpannableStringBuilder);
+ endHeader(mSpannableStringBuilder);
+ } else if (mTagHandler != null) {
+ mTagHandler.handleTag(false, tag, mSpannableStringBuilder, mReader);
+ }
+ }
+
+ private static void handleP(SpannableStringBuilder text) {
+ int len = text.length();
+
+ if (len >= 1 && text.charAt(len - 1) == '\n') {
+ if (len >= 2 && text.charAt(len - 2) == '\n') {
+ return;
+ }
+
+ text.append("\n");
+ return;
+ }
+
+ if (len != 0) {
+ text.append("\n\n");
+ }
+ }
+
+ private static void handleBr(SpannableStringBuilder text) {
+ text.append("\n");
+ }
+
+ private static Object getLast(Spanned text, Class kind) {
+ /*
+ * This knows that the last returned object from getSpans()
+ * will be the most recently added.
+ */
+ Object[] objs = text.getSpans(0, text.length(), kind);
+
+ if (objs.length == 0) {
+ return null;
+ } else {
+ return objs[objs.length - 1];
+ }
+ }
+
+ private static void start(SpannableStringBuilder text, Object mark) {
+ int len = text.length();
+ text.setSpan(mark, len, len, Spannable.SPAN_MARK_MARK);
+ }
+
+ private static void end(SpannableStringBuilder text, Class kind,
+ Object repl) {
+ int len = text.length();
+ Object obj = getLast(text, kind);
+ int where = text.getSpanStart(obj);
+
+ text.removeSpan(obj);
+
+ if (where != len) {
+ text.setSpan(repl, where, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
+ }
+
+ return;
+ }
+
+ private static void startImg(SpannableStringBuilder text,
+ Attributes attributes, Html2.ImageGetter img) {
+ String src = attributes.getValue("", "src");
+ Drawable d = null;
+
+ if (img != null) {
+ d = img.getDrawable(src);
+ }
+
+ if (d == null) {
+ d = Resources.getSystem().
+ getDrawable(R.drawable.unknown_image);
+ d.setBounds(0, 0, d.getIntrinsicWidth(), d.getIntrinsicHeight());
+ }
+
+ int len = text.length();
+ text.append("\uFFFC");
+
+ text.setSpan(new ImageSpan(d, src), len, text.length(),
+ Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
+ }
+
+ private static void startFont(SpannableStringBuilder text,
+ Attributes attributes) {
+ String color = attributes.getValue("", "color");
+ String face = attributes.getValue("", "face");
+ String bgColor = attributes.getValue("", "bgcolor");
+
+ int len = text.length();
+ text.setSpan(new Font(color, face, bgColor), len, len, Spannable.SPAN_MARK_MARK);
+ }
+
+ private static void endFont(SpannableStringBuilder text) {
+ int len = text.length();
+ Object obj = getLast(text, Font.class);
+ int where = text.getSpanStart(obj);
+
+ text.removeSpan(obj);
+
+ if (where != len) {
+ Font f = (Font) obj;
+
+ if (!TextUtils.isEmpty(f.mColor)) {
+ if (f.mColor.startsWith("@")) {
+ Resources res = Resources.getSystem();
+ String name = f.mColor.substring(1);
+ int colorRes = res.getIdentifier(name, "color", "android");
+ if (colorRes != 0) {
+ ColorStateList colors = res.getColorStateList(colorRes);
+ text.setSpan(new TextAppearanceSpan(null, 0, 0, colors, null),
+ where, len,
+ Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
+ }
+ } else {
+ int c = getHtmlColor(f.mColor);
+ if (c != -1) {
+ text.setSpan(new ForegroundColorSpan(c | 0xFF000000),
+ where, len,
+ Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
+ }
+ }
+ }
+
+ if (!TextUtils.isEmpty(f.mBgColor)) {
+ if (f.mBgColor.startsWith("@")) {
+ Resources res = Resources.getSystem();
+ String name = f.mBgColor.substring(1);
+ int colorRes = res.getIdentifier(name, "color", "android");
+ if (colorRes != 0) {
+ ColorStateList colors = res.getColorStateList(colorRes);
+ text.setSpan(new BackgroundColorSpan(colors.getDefaultColor()),
+ where, len,
+ Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
+ }
+ } else {
+ int c = getHtmlColor(f.mBgColor);
+ if (c != -1) {
+ text.setSpan(new BackgroundColorSpan(c | 0xFF000000),
+ where, len,
+ Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
+ }
+ }
+ }
+
+ if (f.mFace != null) {
+ text.setSpan(new TypefaceSpan(f.mFace), where, len,
+ Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
+ }
+ }
+ }
+
+ private static void startA(SpannableStringBuilder text, Attributes attributes) {
+ String href = attributes.getValue("", "href");
+
+ int len = text.length();
+ text.setSpan(new Href(href), len, len, Spannable.SPAN_MARK_MARK);
+ }
+
+ private static void endA(SpannableStringBuilder text) {
+ int len = text.length();
+ Object obj = getLast(text, Href.class);
+ int where = text.getSpanStart(obj);
+
+ text.removeSpan(obj);
+
+ if (where != len) {
+ Href h = (Href) obj;
+
+ if (h.mHref != null) {
+ text.setSpan(new URLSpan(h.mHref), where, len,
+ Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
+ }
+ }
+ }
+
+ private static void endHeader(SpannableStringBuilder text) {
+ int len = text.length();
+ Object obj = getLast(text, Header.class);
+
+ int where = text.getSpanStart(obj);
+
+ text.removeSpan(obj);
+
+ // Back off not to change only the text, not the blank line.
+ while (len > where && text.charAt(len - 1) == '\n') {
+ len--;
+ }
+
+ if (where != len) {
+ Header h = (Header) obj;
+
+ text.setSpan(new RelativeSizeSpan(HEADER_SIZES[h.mLevel]),
+ where, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
+ text.setSpan(new StyleSpan(Typeface.BOLD),
+ where, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
+ }
+ }
+
+ @Override
+ public void setDocumentLocator(Locator locator) {
+ }
+
+ @Override
+ public void startDocument() throws SAXException {
+ }
+
+ @Override
+ public void endDocument() throws SAXException {
+ }
+
+ @Override
+ public void startPrefixMapping(String prefix, String uri) throws SAXException {
+ }
+
+ @Override
+ public void endPrefixMapping(String prefix) throws SAXException {
+ }
+
+ @Override
+ public void startElement(String uri, String localName, String qName, Attributes attributes)
+ throws SAXException {
+ handleStartTag(localName, attributes);
+ }
+
+ @Override
+ public void endElement(String uri, String localName, String qName) throws SAXException {
+ handleEndTag(localName);
+ }
+
+ @Override
+ public void characters(char ch[], int start, int length) throws SAXException {
+ StringBuilder sb = new StringBuilder();
+
+ /*
+ * Ignore whitespace that immediately follows other whitespace;
+ * newlines count as spaces.
+ */
+
+ for (int i = 0; i < length; i++) {
+ char c = ch[i + start];
+
+ if (c == ' ' || c == '\n') {
+ char pred;
+ int len = sb.length();
+
+ if (len == 0) {
+ len = mSpannableStringBuilder.length();
+
+ if (len == 0) {
+ pred = '\n';
+ } else {
+ pred = mSpannableStringBuilder.charAt(len - 1);
+ }
+ } else {
+ pred = sb.charAt(len - 1);
+ }
+
+ if (pred != ' ' && pred != '\n') {
+ sb.append(' ');
+ }
+ } else {
+ sb.append(c);
+ }
+ }
+
+ mSpannableStringBuilder.append(sb);
+ }
+
+ @Override
+ public void ignorableWhitespace(char ch[], int start, int length) throws SAXException {
+ }
+
+ @Override
+ public void processingInstruction(String target, String data) throws SAXException {
+ }
+
+ @Override
+ public void skippedEntity(String name) throws SAXException {
+ }
+
+ private static class Bold { }
+ private static class Italic { }
+ private static class Underline { }
+ private static class Big { }
+ private static class Small { }
+ private static class Monospace { }
+ private static class Blockquote { }
+ private static class Super { }
+ private static class Sub { }
+
+ private static class Font {
+ public String mColor;
+ public String mBgColor;
+ public String mFace;
+
+ public Font(String color, String face, String bgColor) {
+ mColor = color;
+ mFace = face;
+ mBgColor = bgColor;
+ }
+ }
+
+ private static class Href {
+ public String mHref;
+
+ public Href(String href) {
+ mHref = href;
+ }
+ }
+
+ private static class Header {
+ private int mLevel;
+
+ public Header(int level) {
+ mLevel = level;
+ }
+ }
+
+ private static HashMap