Browse Source
git-svn-id: svn://svn.code.sf.net/p/junidecode/code/trunk@2 b90d0908-0dae-468f-8fb5-926cfc7ab88dmaster

186 changed files with 53838 additions and 0 deletions
@ -0,0 +1,7 @@
@@ -0,0 +1,7 @@
|
||||
License: BSD License |
||||
Description: JUnicode |
||||
Implementation-URL: http://junidecode.sf.net |
||||
Implementation-Version: #version# |
||||
Built-By: #user.name# |
||||
Build-Date: #today# |
||||
Main-Class: #mainclass# |
@ -0,0 +1,11 @@
@@ -0,0 +1,11 @@
|
||||
Character Code Point JUnidecode Text::Unidecode |
||||
ล U+014A ng NG |
||||
ล U+014B NG ng |
||||
โฌ U+20AC EUR EU |
||||
โฐ U+20B0 Pf |
||||
โฑ U+20B1 P |
||||
โฒ U+20B2 G |
||||
โณ U+20B3 A |
||||
โด U+20B4 gh |
||||
โต U+20B5 C/ |
||||
All the caracters with code point U+20xx |
@ -0,0 +1,85 @@
@@ -0,0 +1,85 @@
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> |
||||
<modelVersion>4.0.0</modelVersion> |
||||
<groupId>net.sf</groupId> |
||||
<artifactId>junidecode</artifactId> |
||||
<packaging>jar</packaging> |
||||
<version>0.1</version> |
||||
<name>junidecode</name> |
||||
<url>http://junidecode.sf.net</url> |
||||
<properties> |
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
||||
</properties> |
||||
<dependencies> |
||||
<dependency> |
||||
<groupId>junit</groupId> |
||||
<artifactId>junit</artifactId> |
||||
<version>4.5</version> |
||||
<scope>test</scope> |
||||
</dependency> |
||||
</dependencies> |
||||
<build> |
||||
<plugins> |
||||
<plugin> |
||||
<groupId>org.apache.maven.plugins</groupId> |
||||
<artifactId>maven-compiler-plugin</artifactId> |
||||
<configuration> |
||||
<source>1.5</source> |
||||
<target>1.5</target> |
||||
<encoding>UTF-8</encoding> |
||||
</configuration> |
||||
</plugin> |
||||
|
||||
<plugin> |
||||
<groupId>org.apache.maven.plugins</groupId> |
||||
<artifactId>maven-jar-plugin</artifactId> |
||||
<configuration> |
||||
<archive> |
||||
<manifestFile>${project.build.directory}/assembly.manifest</manifestFile> |
||||
</archive> |
||||
</configuration> |
||||
</plugin> |
||||
|
||||
<plugin> |
||||
<artifactId>maven-assembly-plugin</artifactId> |
||||
<configuration> |
||||
<descriptorRefs> |
||||
<descriptorRef>project</descriptorRef> |
||||
</descriptorRefs> |
||||
</configuration> |
||||
</plugin> |
||||
|
||||
<plugin> |
||||
<artifactId>maven-antrun-plugin</artifactId> |
||||
<executions> |
||||
<execution> |
||||
<id>process-classes-fix-manifest</id> |
||||
<phase>process-classes</phase> |
||||
<goals> |
||||
<goal>run</goal> |
||||
</goals> |
||||
<configuration> |
||||
<tasks> |
||||
<copy file="${basedir}/assembly.manifest" todir="${project.build.directory}" overwrite="true"/> |
||||
<tstamp> |
||||
<format property="TODAY" pattern="yyyy-MM-dd hh:mm:ss" /> |
||||
</tstamp> |
||||
<echo>pom.xml: timestamp ${TODAY}</echo> |
||||
<replace dir="${project.build.directory}"> |
||||
<include name="assembly.manifest"/> |
||||
<replacefilter token="#build#" value="${buildLabel}"/> |
||||
<replacefilter token="#version#" value="${version}"/> |
||||
<replacefilter token="#user.name#" value="${user.name}"/> |
||||
<replacefilter token="#today#" value="${TODAY}"/> |
||||
<replacefilter token="#mainclass#" value="net.sf.junidecode.App"/> |
||||
</replace> |
||||
</tasks> |
||||
</configuration> |
||||
</execution> |
||||
</executions> |
||||
</plugin> |
||||
|
||||
</plugins> |
||||
</build> |
||||
</project> |
@ -0,0 +1,77 @@
@@ -0,0 +1,77 @@
|
||||
/* |
||||
* Copyright (c) 2009, Giuseppe Cardone |
||||
* All rights reserved. |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions are met: |
||||
* * Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* * Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in the |
||||
* documentation and/or other materials provided with the distribution. |
||||
* * Neither the name of the author nor the names of the contributors may be |
||||
* used to endorse or promote products derived from this software without |
||||
* specific prior written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY GIUSEPPE CARDONE ''AS IS'' AND ANY |
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
||||
* DISCLAIMED. IN NO EVENT SHALL GIUSEPPE CARDONE BE LIABLE FOR ANY |
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
* |
||||
*/ |
||||
package net.sf.junidecode; |
||||
|
||||
import java.io.BufferedReader; |
||||
import java.io.IOException; |
||||
import java.io.InputStreamReader; |
||||
|
||||
import static net.sf.junidecode.Junidecode.*; |
||||
|
||||
/** |
||||
* Simple example application for JUnidecode. If launched with arguments |
||||
* will strip diacritics and transliterate the arguments. If launched without |
||||
* arguments will read lines from stdin, convert input to ASCII 7-bit and |
||||
* write to stdout. For example: |
||||
* <em>Ελληνικά</em> becomes |
||||
* <em>Ellenika</em>. |
||||
* @author Giuseppe Cardone |
||||
* @version 0.1 |
||||
*/ |
||||
public class App { |
||||
|
||||
/** |
||||
* Private constructor to avoid instatiation. |
||||
*/ |
||||
private App() { |
||||
} |
||||
|
||||
/** |
||||
* Main. |
||||
* @param args Strings to transliterate. If <code>args.length == 0</code> |
||||
* then the input will be read from stdin. |
||||
*/ |
||||
public static void main(String[] args) { |
||||
if (args.length > 0) { |
||||
StringBuilder sb = new StringBuilder(); |
||||
for (String s : args) { |
||||
sb.append(unidecode(s)).append(" "); |
||||
} |
||||
System.out.println(sb.toString().trim()); |
||||
} else { |
||||
try { |
||||
BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); |
||||
String line; |
||||
while ((line = br.readLine()) != null) { |
||||
System.out.println(unidecode(line)); |
||||
} |
||||
} catch (IOException ex) { |
||||
System.err.println(ex.getLocalizedMessage()); |
||||
} |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,631 @@
@@ -0,0 +1,631 @@
|
||||
/* |
||||
* Copyright (c) 2009, Giuseppe Cardone |
||||
* All rights reserved. |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions are met: |
||||
* * Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* * Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in the |
||||
* documentation and/or other materials provided with the distribution. |
||||
* * Neither the name of the author nor the names of the contributors may be |
||||
* used to endorse or promote products derived from this software without |
||||
* specific prior written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY GIUSEPPE CARDONE ''AS IS'' AND ANY |
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
||||
* DISCLAIMED. IN NO EVENT SHALL GIUSEPPE CARDONE BE LIABLE FOR ANY |
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
* |
||||
*/ |
||||
package net.sf.junidecode; |
||||
|
||||
/** |
||||
* Root class of JUnidecode. |
||||
* @author Giuseppe Cardone |
||||
* @version 0.1 |
||||
*/ |
||||
public class Junidecode { |
||||
|
||||
/** |
||||
* Array to cache already loaded maps. |
||||
*/ |
||||
private static final String[][] cache = new String[256][]; |
||||
|
||||
/** |
||||
* Private constructor to avoid instantiation. |
||||
*/ |
||||
private Junidecode() { |
||||
} |
||||
|
||||
/** |
||||
* Strip diacritic marks and transliterates a unicode string to a valid |
||||
* 7-bit ASCII String. |
||||
* @since 0.1 |
||||
* @param s Unicode String to transliterate. |
||||
* @return 7-bit ASCII valid string. |
||||
*/ |
||||
public static String unidecode(final String s) { |
||||
StringBuilder sb = new StringBuilder(); |
||||
String[] map; |
||||
for (int i = 0; i < s.length(); i++) { |
||||
int codepoint = s.codePointAt(i); |
||||
int hi = (codepoint >> 8) & 0xff; |
||||
int low = codepoint & 0xff; |
||||
/* |
||||
* Try to load the code mapping from cache. We could of course keep |
||||
* a big String[256][256] in memory - which would be a little bit |
||||
* faster, but using this array we can keep the memory footprint |
||||
* smaller since the class loader loads the needed classes lazily. |
||||
* When transliterating from cyrillic we'll never load hiragana |
||||
* or greek mappings. |
||||
*/ |
||||
map = cache[hi]; |
||||
if (null == map) { |
||||
switch (hi) { |
||||
case 0x00: |
||||
map = X00.map; |
||||
break; |
||||
case 0x01: |
||||
map = X01.map; |
||||
break; |
||||
case 0x02: |
||||
map = X02.map; |
||||
break; |
||||
case 0x03: |
||||
map = X03.map; |
||||
break; |
||||
case 0x04: |
||||
map = X04.map; |
||||
break; |
||||
case 0x05: |
||||
map = X05.map; |
||||
break; |
||||
case 0x06: |
||||
map = X06.map; |
||||
break; |
||||
case 0x07: |
||||
map = X07.map; |
||||
break; |
||||
case 0x09: |
||||
map = X09.map; |
||||
break; |
||||
case 0x0a: |
||||
map = X0a.map; |
||||
break; |
||||
case 0x0b: |
||||
map = X0b.map; |
||||
break; |
||||
case 0x0c: |
||||
map = X0c.map; |
||||
break; |
||||
case 0x0d: |
||||
map = X0d.map; |
||||
break; |
||||
case 0x0e: |
||||
map = X0e.map; |
||||
break; |
||||
case 0x0f: |
||||
map = X0f.map; |
||||
break; |
||||
case 0x10: |
||||
map = X10.map; |
||||
break; |
||||
case 0x11: |
||||
map = X11.map; |
||||
break; |
||||
case 0x12: |
||||
map = X12.map; |
||||
break; |
||||
case 0x13: |
||||
map = X13.map; |
||||
break; |
||||
case 0x14: |
||||
map = X14.map; |
||||
break; |
||||
case 0x15: |
||||
map = X15.map; |
||||
break; |
||||
case 0x16: |
||||
map = X16.map; |
||||
break; |
||||
case 0x17: |
||||
map = X17.map; |
||||
break; |
||||
case 0x18: |
||||
map = X18.map; |
||||
break; |
||||
case 0x1e: |
||||
map = X1e.map; |
||||
break; |
||||
case 0x1f: |
||||
map = X1f.map; |
||||
break; |
||||
case 0x20: |
||||
map = X20.map; |
||||
break; |
||||
case 0x21: |
||||
map = X21.map; |
||||
break; |
||||
case 0x22: |
||||
map = X22.map; |
||||
break; |
||||
case 0x23: |
||||
map = X23.map; |
||||
break; |
||||
case 0x24: |
||||
map = X24.map; |
||||
break; |
||||
case 0x25: |
||||
map = X25.map; |
||||
break; |
||||
case 0x26: |
||||
map = X26.map; |
||||
break; |
||||
case 0x27: |
||||
map = X27.map; |
||||
break; |
||||
case 0x28: |
||||
map = X28.map; |
||||
break; |
||||
case 0x2e: |
||||
map = X2e.map; |
||||
break; |
||||
case 0x2f: |
||||
map = X2f.map; |
||||
break; |
||||
case 0x30: |
||||
map = X30.map; |
||||
break; |
||||
case 0x31: |
||||
map = X31.map; |
||||
break; |
||||
case 0x32: |
||||
map = X32.map; |
||||
break; |
||||
case 0x33: |
||||
map = X33.map; |
||||
break; |
||||
case 0x4d: |
||||
map = X4d.map; |
||||
break; |
||||
case 0x4e: |
||||
map = X4e.map; |
||||
break; |
||||
case 0x4f: |
||||
map = X4f.map; |
||||
break; |
||||
case 0x50: |
||||
map = X50.map; |
||||
break; |
||||
case 0x51: |
||||
map = X51.map; |
||||
break; |
||||
case 0x52: |
||||
map = X52.map; |
||||
break; |
||||
case 0x53: |
||||
map = X53.map; |
||||
break; |
||||
case 0x54: |
||||
map = X54.map; |
||||
break; |
||||
case 0x55: |
||||
map = X55.map; |
||||
break; |
||||
case 0x56: |
||||
map = X56.map; |
||||
break; |
||||
case 0x57: |
||||
map = X57.map; |
||||
break; |
||||
case 0x58: |
||||
map = X58.map; |
||||
break; |
||||
case 0x59: |
||||
map = X59.map; |
||||
break; |
||||
case 0x5a: |
||||
map = X5a.map; |
||||
break; |
||||
case 0x5b: |
||||
map = X5b.map; |
||||
break; |
||||
case 0x5c: |
||||
map = X5c.map; |
||||
break; |
||||
case 0x5d: |
||||
map = X5d.map; |
||||
break; |
||||
case 0x5e: |
||||
map = X5e.map; |
||||
break; |
||||
case 0x5f: |
||||
map = X5f.map; |
||||
break; |
||||
case 0x60: |
||||
map = X60.map; |
||||
break; |
||||
case 0x61: |
||||
map = X61.map; |
||||
break; |
||||
case 0x62: |
||||
map = X62.map; |
||||
break; |
||||
case 0x63: |
||||
map = X63.map; |
||||
break; |
||||
case 0x64: |
||||
map = X64.map; |
||||
break; |
||||
case 0x65: |
||||
map = X65.map; |
||||
break; |
||||
case 0x66: |
||||
map = X66.map; |
||||
break; |
||||
case 0x67: |
||||
map = X67.map; |
||||
break; |
||||
case 0x68: |
||||
map = X68.map; |
||||
break; |
||||
case 0x69: |
||||
map = X69.map; |
||||
break; |
||||
case 0x6a: |
||||
map = X6a.map; |
||||
break; |
||||
case 0x6b: |
||||
map = X6b.map; |
||||
break; |
||||
case 0x6c: |
||||
map = X6c.map; |
||||
break; |
||||
case 0x6d: |
||||
map = X6d.map; |
||||
break; |
||||
case 0x6e: |
||||
map = X6e.map; |
||||
break; |
||||
case 0x6f: |
||||
map = X6f.map; |
||||
break; |
||||
case 0x70: |
||||
map = X70.map; |
||||
break; |
||||
case 0x71: |
||||
map = X71.map; |
||||
break; |
||||
case 0x72: |
||||
map = X72.map; |
||||
break; |
||||
case 0x73: |
||||
map = X73.map; |
||||
break; |
||||
case 0x74: |
||||
map = X74.map; |
||||
break; |
||||
case 0x75: |
||||
map = X75.map; |
||||
break; |
||||
case 0x76: |
||||
map = X76.map; |
||||
break; |
||||
case 0x77: |
||||
map = X77.map; |
||||
break; |
||||
case 0x78: |
||||
map = X78.map; |
||||
break; |
||||
case 0x79: |
||||
map = X79.map; |
||||
break; |
||||
case 0x7a: |
||||
map = X7a.map; |
||||
break; |
||||
case 0x7b: |
||||
map = X7b.map; |
||||
break; |
||||
case 0x7c: |
||||
map = X7c.map; |
||||
break; |
||||
case 0x7d: |
||||
map = X7d.map; |
||||
break; |
||||
case 0x7e: |
||||
map = X7e.map; |
||||
break; |
||||
case 0x7f: |
||||
map = X7f.map; |
||||
break; |
||||
case 0x80: |
||||
map = X80.map; |
||||
break; |
||||
case 0x81: |
||||
map = X81.map; |
||||
break; |
||||
case 0x82: |
||||
map = X82.map; |
||||
break; |
||||
case 0x83: |
||||
map = X83.map; |
||||
break; |
||||
case 0x84: |
||||
map = X84.map; |
||||
break; |
||||
case 0x85: |
||||
map = X85.map; |
||||
break; |
||||
case 0x86: |
||||
map = X86.map; |
||||
break; |
||||
case 0x87: |
||||
map = X87.map; |
||||
break; |
||||
case 0x88: |
||||
map = X88.map; |
||||
break; |
||||
case 0x89: |
||||
map = X89.map; |
||||
break; |
||||
case 0x8a: |
||||
map = X8a.map; |
||||
break; |
||||
case 0x8b: |
||||
map = X8b.map; |
||||
break; |
||||
case 0x8c: |
||||
map = X8c.map; |
||||
break; |
||||
case 0x8d: |
||||
map = X8d.map; |
||||
break; |
||||
case 0x8e: |
||||
map = X8e.map; |
||||
break; |
||||
case 0x8f: |
||||
map = X8f.map; |
||||
break; |
||||
case 0x90: |
||||
map = X90.map; |
||||
break; |
||||
case 0x91: |
||||
map = X91.map; |
||||
break; |
||||
case 0x92: |
||||
map = X92.map; |
||||
break; |
||||
case 0x93: |
||||
map = X93.map; |
||||
break; |
||||
case 0x94: |
||||
map = X94.map; |
||||
break; |
||||
case 0x95: |
||||
map = X95.map; |
||||
break; |
||||
case 0x96: |
||||
map = X96.map; |
||||
break; |
||||
case 0x97: |
||||
map = X97.map; |
||||
break; |
||||
case 0x98: |
||||
map = X98.map; |
||||
break; |
||||
case 0x99: |
||||
map = X99.map; |
||||
break; |
||||
case 0x9a: |
||||
map = X9a.map; |
||||
break; |
||||
case 0x9b: |
||||
map = X9b.map; |
||||
break; |
||||
case 0x9c: |
||||
map = X9c.map; |
||||
break; |
||||
case 0x9d: |
||||
map = X9d.map; |
||||
break; |
||||
case 0x9e: |
||||
map = X9e.map; |
||||
break; |
||||
case 0x9f: |
||||
map = X9f.map; |
||||
break; |
||||
case 0xa0: |
||||
map = Xa0.map; |
||||
break; |
||||
case 0xa1: |
||||
map = Xa1.map; |
||||
break; |
||||
case 0xa2: |
||||
map = Xa2.map; |
||||
break; |
||||
case 0xa3: |
||||
map = Xa3.map; |
||||
break; |
||||
case 0xa4: |
||||
map = Xa4.map; |
||||
break; |
||||
case 0xac: |
||||
map = Xac.map; |
||||
break; |
||||
case 0xad: |
||||
map = Xad.map; |
||||
break; |
||||
case 0xae: |
||||
map = Xae.map; |
||||
break; |
||||
case 0xaf: |
||||
map = Xaf.map; |
||||
break; |
||||
case 0xb0: |
||||
map = Xb0.map; |
||||
break; |
||||
case 0xb1: |
||||
map = Xb1.map; |
||||
break; |
||||
case 0xb2: |
||||
map = Xb2.map; |
||||
break; |
||||
case 0xb3: |
||||
map = Xb3.map; |
||||
break; |
||||
case 0xb4: |
||||
map = Xb4.map; |
||||
break; |
||||
case 0xb5: |
||||
map = Xb5.map; |
||||
break; |
||||
case 0xb6: |
||||
map = Xb6.map; |
||||
break; |
||||
case 0xb7: |
||||
map = Xb7.map; |
||||
break; |
||||
case 0xb8: |
||||
map = Xb8.map; |
||||
break; |
||||
case 0xb9: |
||||
map = Xb9.map; |
||||
break; |
||||
case 0xba: |
||||
map = Xba.map; |
||||
break; |
||||
case 0xbb: |
||||
map = Xbb.map; |
||||
break; |
||||
case 0xbc: |
||||
map = Xbc.map; |
||||
break; |
||||
case 0xbd: |
||||
map = Xbd.map; |
||||
break; |
||||
case 0xbe: |
||||
map = Xbe.map; |
||||
break; |
||||
case 0xbf: |
||||
map = Xbf.map; |
||||
break; |
||||
case 0xc0: |
||||
map = Xc0.map; |
||||
break; |
||||
case 0xc1: |
||||
map = Xc1.map; |
||||
break; |
||||
case 0xc2: |
||||
map = Xc2.map; |
||||
break; |
||||
case 0xc3: |
||||
map = Xc3.map; |
||||
break; |
||||
case 0xc4: |
||||
map = Xc4.map; |
||||
break; |
||||
case 0xc5: |
||||
map = Xc5.map; |
||||
break; |
||||
case 0xc6: |
||||
map = Xc6.map; |
||||
break; |
||||
case 0xc7: |
||||
map = Xc7.map; |
||||
break; |
||||
case 0xc8: |
||||
map = Xc8.map; |
||||
break; |
||||
case 0xc9: |
||||
map = Xc9.map; |
||||
break; |
||||
case 0xca: |
||||
map = Xca.map; |
||||
break; |
||||
case 0xcb: |
||||
map = Xcb.map; |
||||
break; |
||||
case 0xcc: |
||||
map = Xcc.map; |
||||
break; |
||||
case 0xcd: |
||||
map = Xcd.map; |
||||
break; |
||||
case 0xce: |
||||
map = Xce.map; |
||||
break; |
||||
case 0xcf: |
||||
map = Xcf.map; |
||||
break; |
||||
case 0xd0: |
||||
map = Xd0.map; |
||||
break; |
||||
case 0xd1: |
||||
map = Xd1.map; |
||||
break; |
||||
case 0xd2: |
||||
map = Xd2.map; |
||||
break; |
||||
case 0xd3: |
||||
map = Xd3.map; |
||||
break; |
||||
case 0xd4: |
||||
map = Xd4.map; |
||||
break; |
||||
case 0xd5: |
||||
map = Xd5.map; |
||||
break; |
||||
case 0xd6: |
||||
map = Xd6.map; |
||||
break; |
||||
case 0xd7: |
||||
map = Xd7.map; |
||||
break; |
||||
case 0xf9: |
||||
map = Xf9.map; |
||||
break; |
||||
case 0xfa: |
||||
map = Xfa.map; |
||||
break; |
||||
case 0xfb: |
||||
map = Xfb.map; |
||||
break; |
||||
case 0xfc: |
||||
map = Xfc.map; |
||||
break; |
||||
case 0xfd: |
||||
map = Xfd.map; |
||||
break; |
||||
case 0xfe: |
||||
map = Xfe.map; |
||||
break; |
||||
case 0xff: |
||||
map = Xff.map; |
||||
break; |
||||
default: |
||||
continue; |
||||
} |
||||
/* |
||||
* Cache the new map using the high byte of the code point |
||||
* as index. |
||||
*/ |
||||
cache[hi] = map; |
||||
} |
||||
/* |
||||
* Some code maps contain only 254 elements because the last |
||||
* one is reserved. |
||||
*/ |
||||
if (low < map.length) { |
||||
sb.append(map[low]); |
||||
} |
||||
} |
||||
return sb.toString(); |
||||
} |
||||
} |
@ -0,0 +1,294 @@
@@ -0,0 +1,294 @@
|
||||
/* |
||||
* Copyright (c) 2009, Giuseppe Cardone |
||||
* All rights reserved. |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions are met: |
||||
* * Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* * Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in the |
||||
* documentation and/or other materials provided with the distribution. |
||||
* * Neither the name of the author nor the names of the contributors may be |
||||
* used to endorse or promote products derived from this software without |
||||
* specific prior written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY GIUSEPPE CARDONE ''AS IS'' AND ANY |
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
||||
* DISCLAIMED. IN NO EVENT SHALL GIUSEPPE CARDONE BE LIABLE FOR ANY |
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
* |
||||
*/ |
||||
package net.sf.junidecode; |
||||
|
||||
/** |
||||
* Character map for Unicode characters with codepoint U+01xx. |
||||
* @author Giuseppe Cardone |
||||
* @version 0.1 |
||||
*/ |
||||
class X01 { |
||||
|
||||
public static final String[] map = new String[]{ |
||||
"A", // 0x00
|
||||
"a", // 0x01
|
||||
"A", // 0x02
|
||||
"a", // 0x03
|
||||
"A", // 0x04
|
||||
"a", // 0x05
|
||||
"C", // 0x06
|
||||
"c", // 0x07
|
||||
"C", // 0x08
|
||||
"c", // 0x09
|
||||
"C", // 0x0a
|
||||
"c", // 0x0b
|
||||
"C", // 0x0c
|
||||
"c", // 0x0d
|
||||
"D", // 0x0e
|
||||
"d", // 0x0f
|
||||
"D", // 0x10
|
||||
"d", // 0x11
|
||||
"E", // 0x12
|
||||
"e", // 0x13
|
||||
"E", // 0x14
|
||||
"e", // 0x15
|
||||
"E", // 0x16
|
||||
"e", // 0x17
|
||||
"E", // 0x18
|
||||
"e", // 0x19
|
||||
"E", // 0x1a
|
||||
"e", // 0x1b
|
||||
"G", // 0x1c
|
||||
"g", // 0x1d
|
||||
"G", // 0x1e
|
||||
"g", // 0x1f
|
||||
"G", // 0x20
|
||||
"g", // 0x21
|
||||
"G", // 0x22
|
||||
"g", // 0x23
|
||||
"H", // 0x24
|
||||
"h", // 0x25
|
||||
"H", // 0x26
|
||||
"h", // 0x27
|
||||
"I", // 0x28
|
||||
"i", // 0x29
|
||||
"I", // 0x2a
|
||||
"i", // 0x2b
|
||||
"I", // 0x2c
|
||||
"i", // 0x2d
|
||||
"I", // 0x2e
|
||||
"i", // 0x2f
|
||||
"I", // 0x30
|
||||
"i", // 0x31
|
||||
"IJ", // 0x32
|
||||
"", // 0x33
|
||||
"J", // 0x34
|
||||
"j", // 0x35
|
||||
"K", // 0x36
|
||||
"k", // 0x37
|
||||
"k", // 0x38
|
||||
"L", // 0x39
|
||||
"l", // 0x3a
|
||||
"L", // 0x3b
|
||||
"l", // 0x3c
|
||||
"L", // 0x3d
|
||||
"l", // 0x3e
|
||||
"L", // 0x3f
|
||||
"l", // 0x40
|
||||
"L", // 0x41
|
||||
"l", // 0x42
|
||||
"N", // 0x43
|
||||
"n", // 0x44
|
||||
"N", // 0x45
|
||||
"n", // 0x46
|
||||
"N", // 0x47
|
||||
"n", // 0x48
|
||||
"\'n", // 0x49
|
||||
"NG", // 0x4a
|
||||
"ng", // 0x4b
|
||||
"O", // 0x4c
|
||||
"o", // 0x4d
|
||||
"O", // 0x4e
|
||||
"o", // 0x4f
|
||||
"O", // 0x50
|
||||
"o", // 0x51
|
||||
"OE", // 0x52
|
||||
"oe", // 0x53
|
||||
"R", // 0x54
|
||||
"r", // 0x55
|
||||
"R", // 0x56
|
||||
"r", // 0x57
|
||||
"R", // 0x58
|
||||
"r", // 0x59
|
||||
"S", // 0x5a
|
||||
"s", // 0x5b
|
||||
"S", // 0x5c
|
||||
"s", // 0x5d
|
||||
"S", // 0x5e
|
||||
"s", // 0x5f
|
||||
"S", // 0x60
|
||||
"s", // 0x61
|
||||
"T", // 0x62
|
||||
"t", // 0x63
|
||||
"T", // 0x64
|
||||
"t", // 0x65
|
||||
"T", // 0x66
|
||||
"t", // 0x67
|
||||
"U", // 0x68
|
||||
"u", // 0x69
|
||||
"U", // 0x6a
|
||||
"u", // 0x6b
|
||||
"U", // 0x6c
|
||||
"u", // 0x6d
|
||||
"U", // 0x6e
|
||||
"u", // 0x6f
|
||||
"U", // 0x70
|
||||
"u", // 0x71
|
||||
"U", // 0x72
|
||||
"u", // 0x73
|
||||
"W", // 0x74
|
||||
"w", // 0x75
|
||||
"Y", // 0x76
|
||||
"y", // 0x77
|
||||
"Y", // 0x78
|
||||
"Z", // 0x79
|
||||
"z", // 0x7a
|
||||
"Z", // 0x7b
|
||||
"z", // 0x7c
|
||||
"Z", // 0x7d
|
||||
"z", // 0x7e
|
||||
"s", // 0x7f
|
||||
"b", // 0x80
|
||||
"B", // 0x81
|
||||
"B", // 0x82
|
||||
"b", // 0x83
|
||||
"6", // 0x84
|
||||
"6", // 0x85
|
||||
"O", // 0x86
|
||||
"C", // 0x87
|
||||
"c", // 0x88
|
||||
"D", // 0x89
|
||||
"D", // 0x8a
|
||||
"D", // 0x8b
|
||||
"d", // 0x8c
|
||||
"d", // 0x8d
|
||||
"3", // 0x8e
|
||||
"@", // 0x8f
|
||||
"E", // 0x90
|
||||
"F", // 0x91
|
||||
"f", // 0x92
|
||||
"G", // 0x93
|
||||
"G", // 0x94
|
||||
"hv", // 0x95
|
||||
"I", // 0x96
|
||||
"I", // 0x97
|
||||
"K", // 0x98
|
||||
"k", // 0x99
|
||||
"l", // 0x9a
|
||||
"l", // 0x9b
|
||||
"W", // 0x9c
|
||||
"N", // 0x9d
|
||||
"n", // 0x9e
|
||||
"O", // 0x9f
|
||||
"O", // 0xa0
|
||||
"o", // 0xa1
|
||||
"OI", // 0xa2
|
||||
"oi", // 0xa3
|
||||
"P", // 0xa4
|
||||
"p", // 0xa5
|
||||
"YR", // 0xa6
|
||||
"2", // 0xa7
|
||||
"2", // 0xa8
|
||||
"SH", // 0xa9
|
||||
"sh", // 0xaa
|
||||
"t", // 0xab
|
||||
"T", // 0xac
|
||||
"t", // 0xad
|
||||
"T", // 0xae
|
||||
"U", // 0xaf
|
||||
"u", // 0xb0
|
||||
"Y", // 0xb1
|
||||
"V", // 0xb2
|
||||
"Y", // 0xb3
|
||||
"y", // 0xb4
|
||||
"Z", // 0xb5
|
||||
"z", // 0xb6
|
||||
"ZH", // 0xb7
|
||||
"ZH", // 0xb8
|
||||
"zh", // 0xb9
|
||||
"zh", // 0xba
|
||||
"2", // 0xbb
|
||||
"5", // 0xbc
|
||||
"5", // 0xbd
|
||||
"ts", // 0xbe
|
||||
"w", // 0xbf
|
||||
"|", // 0xc0
|
||||
"||", // 0xc1
|
||||
"|=", // 0xc2
|
||||
"!", // 0xc3
|
||||
"DZ", // 0xc4
|
||||
"Dz", // 0xc5
|
||||
"dz", // 0xc6
|
||||
"LJ", // 0xc7
|
||||
"Lj", // 0xc8
|
||||
"lj", // 0xc9
|
||||
"NJ", // 0xca
|
||||
"Nj", // 0xcb
|
||||
"nj", // 0xcc
|
||||
"A", // 0xcd
|
||||
"a", // 0xce
|
||||
"I", // 0xcf
|
||||
"i", // 0xd0
|
||||
"O", // 0xd1
|
||||
"o", // 0xd2
|
||||
"U", // 0xd3
|
||||
"u", // 0xd4
|
||||
"U", // 0xd5
|
||||
"u", // 0xd6
|
||||
"U", // 0xd7
|
||||
"u", // 0xd8
|
||||
"U", // 0xd9
|
||||
"u", // 0xda
|
||||
"U", // 0xdb
|
||||
"u", // 0xdc
|
||||
"@", // 0xdd
|
||||
"A", // 0xde
|
||||
"a", // 0xdf
|
||||
"A", // 0xe0
|
||||
"a", // 0xe1
|
||||
"AE", // 0xe2
|
||||
"ae", // 0xe3
|
||||
"G", // 0xe4
|
||||
"g", // 0xe5
|
||||
"G", // 0xe6
|
||||
"g", // 0xe7
|
||||
"K", // 0xe8
|
||||
|