diff --git a/src/main/java/eu/siacs/conversations/utils/Emoticons.java b/src/main/java/eu/siacs/conversations/utils/Emoticons.java index b86564ef..56dcfbfb 100644 --- a/src/main/java/eu/siacs/conversations/utils/Emoticons.java +++ b/src/main/java/eu/siacs/conversations/utils/Emoticons.java @@ -1,8 +1,13 @@ package eu.siacs.conversations.utils; +import android.util.Log; + import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import eu.siacs.conversations.Config; + public class Emoticons { private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300,0x1F5FF); @@ -12,11 +17,22 @@ public class Emoticons { private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600,0x26FF); private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700,0x27BF); private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100,0x1F1FF); + private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200,0x1F2FF); private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6,0x1F1FF); private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0,0x25FF); private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80,0xFF); private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300,0x23FF); - private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(GEOMETRIC_SHAPES, LATIN_SUPPLEMENT); + private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030,0x303D); + private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122,0x2139); + + private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23),new UnicodeList(0x2A),new UnicodeRange(0x30,0x39)); + + private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks( + GEOMETRIC_SHAPES, + LATIN_SUPPLEMENT, + CYK_SYMBOLS_AND_PUNCTUATION, + LETTERLIKE_SYMBOLS, + KEYCAP_COMBINEABLE); private static final UnicodeBlocks EMOJIS = new UnicodeBlocks( MISC_SYMBOLS_AND_PICTOGRAPHS, SUPPLEMENTAL_SYMBOLS, @@ -25,9 +41,11 @@ public class Emoticons { MISC_SYMBOLS, DINGBATS, ENCLOSED_ALPHANUMERIC_SUPPLEMENT, + ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, MISC_TECHNICAL); private static final int ZWJ = 0x200D; private static final int VARIATION_16 = 0xFE0F; + private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3; private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB,0x1F3FF); private static List parse(String input) { @@ -88,22 +106,25 @@ public class Emoticons { } } else { int previous = codepoints.get(codepoints.size() -1); - if (SYMBOLIZE.contains(previous)) { + if (COMBINING_ENCLOSING_KEYCAP == codepoint) { + add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16; + } else if (SYMBOLIZE.contains(previous)) { add = codepoint == VARIATION_16; } else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) { add = codepoints.size() == 1; } else if (previous == VARIATION_16) { add = isMerger(codepoint); } else if (FITZPATRICK.contains(previous)) { - add = codepoint == ZWJ || EMOJIS.contains(codepoint); + add = codepoint == ZWJ; } else if (ZWJ == previous) { - add = EMOJIS.contains(codepoint) || FITZPATRICK.contains(codepoint); + add = EMOJIS.contains(codepoint); } else if (isMerger(codepoint)) { add = true; } else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) { add = true; } } + Log.d(Config.LOGTAG,"code point "+String.format("%H",codepoint)+" added="+add); if (add) { codepoints.add(codepoint); return true; @@ -119,21 +140,24 @@ public class Emoticons { public Symbol build() { if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) { return Symbol.NON_EMOJI; + } else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) { + return Symbol.NON_EMOJI; } return codepoints.size() == 0 ? Symbol.NON_EMOJI : Symbol.EMOJI; } } - public static class UnicodeBlocks { - final UnicodeRange[] ranges; + public static class UnicodeBlocks implements UnicodeSet { + final UnicodeSet[] unicodeSets; - public UnicodeBlocks(UnicodeRange... ranges) { - this.ranges = ranges; + public UnicodeBlocks(UnicodeSet... sets) { + this.unicodeSets = sets; } + @Override public boolean contains(int codepoint) { - for(UnicodeRange range : ranges) { - if (range.contains(codepoint)) { + for(UnicodeSet unicodeSet : unicodeSets) { + if (unicodeSet.contains(codepoint)) { return true; } } @@ -141,8 +165,26 @@ public class Emoticons { } } + public interface UnicodeSet { + boolean contains(int codepoint); + } - public static class UnicodeRange { + public static class UnicodeList implements UnicodeSet { + + private final List list; + + public UnicodeList(Integer... codes) { + this.list = Arrays.asList(codes); + } + + @Override + public boolean contains(int codepoint) { + return this.list.contains(codepoint); + } + } + + + public static class UnicodeRange implements UnicodeSet { private final int lower; private final int upper;