001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.common.parsers.core; 015 016import org.apache.commons.lang3.StringUtils; 017 018/** 019 * This class converts alphabetic, numeric, and symbolic Unicode characters 020 * which are not in the first 127 ASCII characters (the "Basic Latin" Unicode 021 * block) into their ASCII equivalents, if one exists. 022 * 023 * Characters from the following Unicode blocks are converted; however, only 024 * those characters with reasonable ASCII alternatives are converted: 025 * 026 * <ul> 027 * <li>C1 Controls and Latin-1 Supplement: <a href="http://www.unicode.org/charts/PDF/U0080.pdf">http://www.unicode.org/charts/PDF/U0080.pdf</a> 028 * <li>Latin Extended-A: <a href="http://www.unicode.org/charts/PDF/U0100.pdf">http://www.unicode.org/charts/PDF/U0100.pdf</a> 029 * <li>Latin Extended-B: <a href="http://www.unicode.org/charts/PDF/U0180.pdf">http://www.unicode.org/charts/PDF/U0180.pdf</a> 030 * <li>Latin Extended Additional: <a href="http://www.unicode.org/charts/PDF/U1E00.pdf">http://www.unicode.org/charts/PDF/U1E00.pdf</a> 031 * <li>Latin Extended-C: <a href="http://www.unicode.org/charts/PDF/U2C60.pdf">http://www.unicode.org/charts/PDF/U2C60.pdf</a> 032 * <li>Latin Extended-D: <a href="http://www.unicode.org/charts/PDF/UA720.pdf">http://www.unicode.org/charts/PDF/UA720.pdf</a> 033 * <li>IPA Extensions: <a href="http://www.unicode.org/charts/PDF/U0250.pdf">http://www.unicode.org/charts/PDF/U0250.pdf</a> 034 * <li>Phonetic Extensions: <a href="http://www.unicode.org/charts/PDF/U1D00.pdf">http://www.unicode.org/charts/PDF/U1D00.pdf</a> 035 * <li>Phonetic Extensions Supplement: <a href="http://www.unicode.org/charts/PDF/U1D80.pdf">http://www.unicode.org/charts/PDF/U1D80.pdf</a> 036 * <li>General Punctuation: <a href="http://www.unicode.org/charts/PDF/U2000.pdf">http://www.unicode.org/charts/PDF/U2000.pdf</a> 037 * <li>Superscripts and Subscripts: <a href="http://www.unicode.org/charts/PDF/U2070.pdf">http://www.unicode.org/charts/PDF/U2070.pdf</a> 038 * <li>Enclosed Alphanumerics: <a href="http://www.unicode.org/charts/PDF/U2460.pdf">http://www.unicode.org/charts/PDF/U2460.pdf</a> 039 * <li>Dingbats: <a href="http://www.unicode.org/charts/PDF/U2700.pdf">http://www.unicode.org/charts/PDF/U2700.pdf</a> 040 * <li>Supplemental Punctuation: <a href="http://www.unicode.org/charts/PDF/U2E00.pdf">http://www.unicode.org/charts/PDF/U2E00.pdf</a> 041 * <li>Alphabetic Presentation Forms: <a href="http://www.unicode.org/charts/PDF/UFB00.pdf">http://www.unicode.org/charts/PDF/UFB00.pdf</a> 042 * <li>Halfwidth and Fullwidth Forms: <a href="http://www.unicode.org/charts/PDF/UFF00.pdf">http://www.unicode.org/charts/PDF/UFF00.pdf</a> 043 * </ul> 044 * 045 * See: <a href="http://en.wikipedia.org/wiki/Latin_characters_in_Unicode">http://en.wikipedia.org/wiki/Latin_characters_in_Unicode</a> 046 * 047 * For example, 'à' will be replaced by 'a'. 048 */ 049public final class ASCIIParser implements Parsable<String> { 050 051 private static ASCIIParser singletonObject = null; 052 053 private ASCIIParser() { 054 } 055 056 public static ASCIIParser getInstance() { 057 synchronized (ASCIIParser.class) { 058 if (singletonObject == null) { 059 singletonObject = new ASCIIParser(); 060 } 061 } 062 return singletonObject; 063 } 064 065 @Override 066 public ParseResult<String> parse(String input) { 067 if (StringUtils.isEmpty(input)) { 068 return ParseResult.fail(); 069 } else { 070 StringBuilder sb = new StringBuilder(); 071 for (char c : input.toCharArray()) { 072 foldToASCII(c, sb); 073 } 074 return ParseResult.success(ParseResult.CONFIDENCE.DEFINITE, sb.toString()); 075 } 076 } 077 078 /** 079 * Converts a single character above ASCII to their ASCII equivalents. 080 * For example, accents are removed from accented characters. 081 * @param c The character to fold 082 * @param sb String builder to append to 083 */ 084 private static final void foldToASCII(final char c, StringBuilder sb) { 085 // Quick test: if it's not in range then just keep current character 086 if (c < '\u0080') { 087 sb.append(c); 088 } else { 089 switch (c) { 090 case '\u00C0': // À [LATIN CAPITAL LETTER A WITH GRAVE] 091 case '\u00C1': // Á [LATIN CAPITAL LETTER A WITH ACUTE] 092 case '\u00C2': //  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX] 093 case '\u00C3': // à [LATIN CAPITAL LETTER A WITH TILDE] 094 case '\u00C4': // Ä [LATIN CAPITAL LETTER A WITH DIAERESIS] 095 case '\u00C5': // Å [LATIN CAPITAL LETTER A WITH RING ABOVE] 096 case '\u0100': // Ā [LATIN CAPITAL LETTER A WITH MACRON] 097 case '\u0102': // Ă [LATIN CAPITAL LETTER A WITH BREVE] 098 case '\u0104': // Ą [LATIN CAPITAL LETTER A WITH OGONEK] 099 case '\u018F': // Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA] 100 case '\u01CD': // Ǎ [LATIN CAPITAL LETTER A WITH CARON] 101 case '\u01DE': // Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON] 102 case '\u01E0': // Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON] 103 case '\u01FA': // Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE] 104 case '\u0200': // Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE] 105 case '\u0202': // Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE] 106 case '\u0226': // Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE] 107 case '\u023A': // Ⱥ [LATIN CAPITAL LETTER A WITH STROKE] 108 case '\u1D00': // ᴀ [LATIN LETTER SMALL CAPITAL A] 109 case '\u1E00': // Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW] 110 case '\u1EA0': // Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW] 111 case '\u1EA2': // Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE] 112 case '\u1EA4': // Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE] 113 case '\u1EA6': // Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE] 114 case '\u1EA8': // Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] 115 case '\u1EAA': // Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE] 116 case '\u1EAC': // Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW] 117 case '\u1EAE': // Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE] 118 case '\u1EB0': // Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE] 119 case '\u1EB2': // Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE] 120 case '\u1EB4': // Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE] 121 case '\u1EB6': // Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW] 122 case '\u24B6': // Ⓐ [CIRCLED LATIN CAPITAL LETTER A] 123 case '\uFF21': // A [FULLWIDTH LATIN CAPITAL LETTER A] 124 sb.append('A'); 125 break; 126 case '\u00E0': // à [LATIN SMALL LETTER A WITH GRAVE] 127 case '\u00E1': // á [LATIN SMALL LETTER A WITH ACUTE] 128 case '\u00E2': // â [LATIN SMALL LETTER A WITH CIRCUMFLEX] 129 case '\u00E3': // ã [LATIN SMALL LETTER A WITH TILDE] 130 case '\u00E4': // ä [LATIN SMALL LETTER A WITH DIAERESIS] 131 case '\u00E5': // å [LATIN SMALL LETTER A WITH RING ABOVE] 132 case '\u0101': // ā [LATIN SMALL LETTER A WITH MACRON] 133 case '\u0103': // ă [LATIN SMALL LETTER A WITH BREVE] 134 case '\u0105': // ą [LATIN SMALL LETTER A WITH OGONEK] 135 case '\u01CE': // ǎ [LATIN SMALL LETTER A WITH CARON] 136 case '\u01DF': // ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON] 137 case '\u01E1': // ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON] 138 case '\u01FB': // ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE] 139 case '\u0201': // ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE] 140 case '\u0203': // ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE] 141 case '\u0227': // ȧ [LATIN SMALL LETTER A WITH DOT ABOVE] 142 case '\u0250': // ɐ [LATIN SMALL LETTER TURNED A] 143 case '\u0259': // ə [LATIN SMALL LETTER SCHWA] 144 case '\u025A': // ɚ [LATIN SMALL LETTER SCHWA WITH HOOK] 145 case '\u1D8F': // ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK] 146 case '\u1D95': // ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK] 147 case '\u1E01': // ạ [LATIN SMALL LETTER A WITH RING BELOW] 148 case '\u1E9A': // ả [LATIN SMALL LETTER A WITH RIGHT HALF RING] 149 case '\u1EA1': // ạ [LATIN SMALL LETTER A WITH DOT BELOW] 150 case '\u1EA3': // ả [LATIN SMALL LETTER A WITH HOOK ABOVE] 151 case '\u1EA5': // ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE] 152 case '\u1EA7': // ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE] 153 case '\u1EA9': // ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] 154 case '\u1EAB': // ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE] 155 case '\u1EAD': // ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW] 156 case '\u1EAF': // ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE] 157 case '\u1EB1': // ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE] 158 case '\u1EB3': // ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE] 159 case '\u1EB5': // ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE] 160 case '\u1EB7': // ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW] 161 case '\u2090': // ₐ [LATIN SUBSCRIPT SMALL LETTER A] 162 case '\u2094': // ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA] 163 case '\u24D0': // ⓐ [CIRCLED LATIN SMALL LETTER A] 164 case '\u2C65': // ⱥ [LATIN SMALL LETTER A WITH STROKE] 165 case '\u2C6F': // Ɐ [LATIN CAPITAL LETTER TURNED A] 166 case '\uFF41': // a [FULLWIDTH LATIN SMALL LETTER A] 167 sb.append('a'); 168 break; 169 case '\uA732': // Ꜳ [LATIN CAPITAL LETTER AA] 170 sb.append('A'); 171 sb.append('A'); 172 break; 173 case '\u00C6': // Æ [LATIN CAPITAL LETTER AE] 174 case '\u01E2': // Ǣ [LATIN CAPITAL LETTER AE WITH MACRON] 175 case '\u01FC': // Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE] 176 case '\u1D01': // ᴁ [LATIN LETTER SMALL CAPITAL AE] 177 sb.append('A'); 178 sb.append('E'); 179 break; 180 case '\uA734': // Ꜵ [LATIN CAPITAL LETTER AO] 181 sb.append('A'); 182 sb.append('O'); 183 break; 184 case '\uA736': // Ꜷ [LATIN CAPITAL LETTER AU] 185 sb.append('A'); 186 sb.append('U'); 187 break; 188 case '\uA738': // Ꜹ [LATIN CAPITAL LETTER AV] 189 case '\uA73A': // Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR] 190 sb.append('A'); 191 sb.append('V'); 192 break; 193 case '\uA73C': // Ꜽ [LATIN CAPITAL LETTER AY] 194 sb.append('A'); 195 sb.append('Y'); 196 break; 197 case '\u249C': // ⒜ [PARENTHESIZED LATIN SMALL LETTER A] 198 sb.append('('); 199 sb.append('a'); 200 sb.append(')'); 201 break; 202 case '\uA733': // ꜳ [LATIN SMALL LETTER AA] 203 sb.append('a'); 204 sb.append('a'); 205 break; 206 case '\u00E6': // æ [LATIN SMALL LETTER AE] 207 case '\u01E3': // ǣ [LATIN SMALL LETTER AE WITH MACRON] 208 case '\u01FD': // ǽ [LATIN SMALL LETTER AE WITH ACUTE] 209 case '\u1D02': // ᴂ [LATIN SMALL LETTER TURNED AE] 210 sb.append('a'); 211 sb.append('e'); 212 break; 213 case '\uA735': // ꜵ [LATIN SMALL LETTER AO] 214 sb.append('a'); 215 sb.append('o'); 216 break; 217 case '\uA737': // ꜷ [LATIN SMALL LETTER AU] 218 sb.append('a'); 219 sb.append('u'); 220 break; 221 case '\uA739': // ꜹ [LATIN SMALL LETTER AV] 222 case '\uA73B': // ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR] 223 sb.append('a'); 224 sb.append('v'); 225 break; 226 case '\uA73D': // ꜽ [LATIN SMALL LETTER AY] 227 sb.append('a'); 228 sb.append('y'); 229 break; 230 case '\u0181': // Ɓ [LATIN CAPITAL LETTER B WITH HOOK] 231 case '\u0182': // Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR] 232 case '\u0243': // Ƀ [LATIN CAPITAL LETTER B WITH STROKE] 233 case '\u0299': // ʙ [LATIN LETTER SMALL CAPITAL B] 234 case '\u1D03': // ᴃ [LATIN LETTER SMALL CAPITAL BARRED B] 235 case '\u1E02': // Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE] 236 case '\u1E04': // Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW] 237 case '\u1E06': // Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW] 238 case '\u24B7': // Ⓑ [CIRCLED LATIN CAPITAL LETTER B] 239 case '\uFF22': // B [FULLWIDTH LATIN CAPITAL LETTER B] 240 sb.append('B'); 241 break; 242 case '\u0180': // ƀ [LATIN SMALL LETTER B WITH STROKE] 243 case '\u0183': // ƃ [LATIN SMALL LETTER B WITH TOPBAR] 244 case '\u0253': // ɓ [LATIN SMALL LETTER B WITH HOOK] 245 case '\u1D6C': // ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE] 246 case '\u1D80': // ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK] 247 case '\u1E03': // ḃ [LATIN SMALL LETTER B WITH DOT ABOVE] 248 case '\u1E05': // ḅ [LATIN SMALL LETTER B WITH DOT BELOW] 249 case '\u1E07': // ḇ [LATIN SMALL LETTER B WITH LINE BELOW] 250 case '\u24D1': // ⓑ [CIRCLED LATIN SMALL LETTER B] 251 case '\uFF42': // b [FULLWIDTH LATIN SMALL LETTER B] 252 sb.append('b'); 253 break; 254 case '\u249D': // ⒝ [PARENTHESIZED LATIN SMALL LETTER B] 255 sb.append('('); 256 sb.append('b'); 257 sb.append(')'); 258 break; 259 case '\u00C7': // Ç [LATIN CAPITAL LETTER C WITH CEDILLA] 260 case '\u0106': // Ć [LATIN CAPITAL LETTER C WITH ACUTE] 261 case '\u0108': // Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX] 262 case '\u010A': // Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE] 263 case '\u010C': // Č [LATIN CAPITAL LETTER C WITH CARON] 264 case '\u0187': // Ƈ [LATIN CAPITAL LETTER C WITH HOOK] 265 case '\u023B': // Ȼ [LATIN CAPITAL LETTER C WITH STROKE] 266 case '\u0297': // ʗ [LATIN LETTER STRETCHED C] 267 case '\u1D04': // ᴄ [LATIN LETTER SMALL CAPITAL C] 268 case '\u1E08': // Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE] 269 case '\u24B8': // Ⓒ [CIRCLED LATIN CAPITAL LETTER C] 270 case '\uFF23': // C [FULLWIDTH LATIN CAPITAL LETTER C] 271 sb.append('C'); 272 break; 273 case '\u00E7': // ç [LATIN SMALL LETTER C WITH CEDILLA] 274 case '\u0107': // ć [LATIN SMALL LETTER C WITH ACUTE] 275 case '\u0109': // ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX] 276 case '\u010B': // ċ [LATIN SMALL LETTER C WITH DOT ABOVE] 277 case '\u010D': // č [LATIN SMALL LETTER C WITH CARON] 278 case '\u0188': // ƈ [LATIN SMALL LETTER C WITH HOOK] 279 case '\u023C': // ȼ [LATIN SMALL LETTER C WITH STROKE] 280 case '\u0255': // ɕ [LATIN SMALL LETTER C WITH CURL] 281 case '\u1E09': // ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE] 282 case '\u2184': // ↄ [LATIN SMALL LETTER REVERSED C] 283 case '\u24D2': // ⓒ [CIRCLED LATIN SMALL LETTER C] 284 case '\uA73E': // Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT] 285 case '\uA73F': // ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT] 286 case '\uFF43': // c [FULLWIDTH LATIN SMALL LETTER C] 287 sb.append('c'); 288 break; 289 case '\u249E': // ⒞ [PARENTHESIZED LATIN SMALL LETTER C] 290 sb.append('('); 291 sb.append('c'); 292 sb.append(')'); 293 break; 294 case '\u00D0': // Ð [LATIN CAPITAL LETTER ETH] 295 case '\u010E': // Ď [LATIN CAPITAL LETTER D WITH CARON] 296 case '\u0110': // Đ [LATIN CAPITAL LETTER D WITH STROKE] 297 case '\u0189': // Ɖ [LATIN CAPITAL LETTER AFRICAN D] 298 case '\u018A': // Ɗ [LATIN CAPITAL LETTER D WITH HOOK] 299 case '\u018B': // Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR] 300 case '\u1D05': // ᴅ [LATIN LETTER SMALL CAPITAL D] 301 case '\u1D06': // ᴆ [LATIN LETTER SMALL CAPITAL ETH] 302 case '\u1E0A': // Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE] 303 case '\u1E0C': // Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW] 304 case '\u1E0E': // Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW] 305 case '\u1E10': // Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA] 306 case '\u1E12': // Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW] 307 case '\u24B9': // Ⓓ [CIRCLED LATIN CAPITAL LETTER D] 308 case '\uA779': // Ꝺ [LATIN CAPITAL LETTER INSULAR D] 309 case '\uFF24': // D [FULLWIDTH LATIN CAPITAL LETTER D] 310 sb.append('D'); 311 break; 312 case '\u00F0': // ð [LATIN SMALL LETTER ETH] 313 case '\u010F': // ď [LATIN SMALL LETTER D WITH CARON] 314 case '\u0111': // đ [LATIN SMALL LETTER D WITH STROKE] 315 case '\u018C': // ƌ [LATIN SMALL LETTER D WITH TOPBAR] 316 case '\u0221': // ȡ [LATIN SMALL LETTER D WITH CURL] 317 case '\u0256': // ɖ [LATIN SMALL LETTER D WITH TAIL] 318 case '\u0257': // ɗ [LATIN SMALL LETTER D WITH HOOK] 319 case '\u1D6D': // ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE] 320 case '\u1D81': // ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK] 321 case '\u1D91': // ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL] 322 case '\u1E0B': // ḋ [LATIN SMALL LETTER D WITH DOT ABOVE] 323 case '\u1E0D': // ḍ [LATIN SMALL LETTER D WITH DOT BELOW] 324 case '\u1E0F': // ḏ [LATIN SMALL LETTER D WITH LINE BELOW] 325 case '\u1E11': // ḑ [LATIN SMALL LETTER D WITH CEDILLA] 326 case '\u1E13': // ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW] 327 case '\u24D3': // ⓓ [CIRCLED LATIN SMALL LETTER D] 328 case '\uA77A': // ꝺ [LATIN SMALL LETTER INSULAR D] 329 case '\uFF44': // d [FULLWIDTH LATIN SMALL LETTER D] 330 sb.append('d'); 331 break; 332 case '\u01C4': // DŽ [LATIN CAPITAL LETTER DZ WITH CARON] 333 case '\u01F1': // DZ [LATIN CAPITAL LETTER DZ] 334 sb.append('D'); 335 sb.append('Z'); 336 break; 337 case '\u01C5': // Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON] 338 case '\u01F2': // Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z] 339 sb.append('D'); 340 sb.append('z'); 341 break; 342 case '\u249F': // ⒟ [PARENTHESIZED LATIN SMALL LETTER D] 343 sb.append('('); 344 sb.append('d'); 345 sb.append(')'); 346 break; 347 case '\u0238': // ȸ [LATIN SMALL LETTER DB DIGRAPH] 348 sb.append('d'); 349 sb.append('b'); 350 break; 351 case '\u01C6': // dž [LATIN SMALL LETTER DZ WITH CARON] 352 case '\u01F3': // dz [LATIN SMALL LETTER DZ] 353 case '\u02A3': // ʣ [LATIN SMALL LETTER DZ DIGRAPH] 354 case '\u02A5': // ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL] 355 sb.append('d'); 356 sb.append('z'); 357 break; 358 case '\u00C8': // È [LATIN CAPITAL LETTER E WITH GRAVE] 359 case '\u00C9': // É [LATIN CAPITAL LETTER E WITH ACUTE] 360 case '\u00CA': // Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX] 361 case '\u00CB': // Ë [LATIN CAPITAL LETTER E WITH DIAERESIS] 362 case '\u0112': // Ē [LATIN CAPITAL LETTER E WITH MACRON] 363 case '\u0114': // Ĕ [LATIN CAPITAL LETTER E WITH BREVE] 364 case '\u0116': // Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE] 365 case '\u0118': // Ę [LATIN CAPITAL LETTER E WITH OGONEK] 366 case '\u011A': // Ě [LATIN CAPITAL LETTER E WITH CARON] 367 case '\u018E': // Ǝ [LATIN CAPITAL LETTER REVERSED E] 368 case '\u0190': // Ɛ [LATIN CAPITAL LETTER OPEN E] 369 case '\u0204': // Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE] 370 case '\u0206': // Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE] 371 case '\u0228': // Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA] 372 case '\u0246': // Ɇ [LATIN CAPITAL LETTER E WITH STROKE] 373 case '\u1D07': // ᴇ [LATIN LETTER SMALL CAPITAL E] 374 case '\u1E14': // Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE] 375 case '\u1E16': // Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE] 376 case '\u1E18': // Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW] 377 case '\u1E1A': // Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW] 378 case '\u1E1C': // Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE] 379 case '\u1EB8': // Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW] 380 case '\u1EBA': // Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE] 381 case '\u1EBC': // Ẽ [LATIN CAPITAL LETTER E WITH TILDE] 382 case '\u1EBE': // Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE] 383 case '\u1EC0': // Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE] 384 case '\u1EC2': // Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] 385 case '\u1EC4': // Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE] 386 case '\u1EC6': // Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW] 387 case '\u24BA': // Ⓔ [CIRCLED LATIN CAPITAL LETTER E] 388 case '\u2C7B': // ⱻ [LATIN LETTER SMALL CAPITAL TURNED E] 389 case '\uFF25': // E [FULLWIDTH LATIN CAPITAL LETTER E] 390 sb.append('E'); 391 break; 392 case '\u00E8': // è [LATIN SMALL LETTER E WITH GRAVE] 393 case '\u00E9': // é [LATIN SMALL LETTER E WITH ACUTE] 394 case '\u00EA': // ê [LATIN SMALL LETTER E WITH CIRCUMFLEX] 395 case '\u00EB': // ë [LATIN SMALL LETTER E WITH DIAERESIS] 396 case '\u0113': // ē [LATIN SMALL LETTER E WITH MACRON] 397 case '\u0115': // ĕ [LATIN SMALL LETTER E WITH BREVE] 398 case '\u0117': // ė [LATIN SMALL LETTER E WITH DOT ABOVE] 399 case '\u0119': // ę [LATIN SMALL LETTER E WITH OGONEK] 400 case '\u011B': // ě [LATIN SMALL LETTER E WITH CARON] 401 case '\u01DD': // ǝ [LATIN SMALL LETTER TURNED E] 402 case '\u0205': // ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE] 403 case '\u0207': // ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE] 404 case '\u0229': // ȩ [LATIN SMALL LETTER E WITH CEDILLA] 405 case '\u0247': // ɇ [LATIN SMALL LETTER E WITH STROKE] 406 case '\u0258': // ɘ [LATIN SMALL LETTER REVERSED E] 407 case '\u025B': // ɛ [LATIN SMALL LETTER OPEN E] 408 case '\u025C': // ɜ [LATIN SMALL LETTER REVERSED OPEN E] 409 case '\u025D': // ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK] 410 case '\u025E': // ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E] 411 case '\u029A': // ʚ [LATIN SMALL LETTER CLOSED OPEN E] 412 case '\u1D08': // ᴈ [LATIN SMALL LETTER TURNED OPEN E] 413 case '\u1D92': // ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK] 414 case '\u1D93': // ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK] 415 case '\u1D94': // ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK] 416 case '\u1E15': // ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE] 417 case '\u1E17': // ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE] 418 case '\u1E19': // ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW] 419 case '\u1E1B': // ḛ [LATIN SMALL LETTER E WITH TILDE BELOW] 420 case '\u1E1D': // ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE] 421 case '\u1EB9': // ẹ [LATIN SMALL LETTER E WITH DOT BELOW] 422 case '\u1EBB': // ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE] 423 case '\u1EBD': // ẽ [LATIN SMALL LETTER E WITH TILDE] 424 case '\u1EBF': // ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE] 425 case '\u1EC1': // ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE] 426 case '\u1EC3': // ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] 427 case '\u1EC5': // ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE] 428 case '\u1EC7': // ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW] 429 case '\u2091': // ₑ [LATIN SUBSCRIPT SMALL LETTER E] 430 case '\u24D4': // ⓔ [CIRCLED LATIN SMALL LETTER E] 431 case '\u2C78': // ⱸ [LATIN SMALL LETTER E WITH NOTCH] 432 case '\uFF45': // e [FULLWIDTH LATIN SMALL LETTER E] 433 sb.append('e'); 434 break; 435 case '\u24A0': // ⒠ [PARENTHESIZED LATIN SMALL LETTER E] 436 sb.append('('); 437 sb.append('e'); 438 sb.append(')'); 439 break; 440 case '\u0191': // Ƒ [LATIN CAPITAL LETTER F WITH HOOK] 441 case '\u1E1E': // Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE] 442 case '\u24BB': // Ⓕ [CIRCLED LATIN CAPITAL LETTER F] 443 case '\uA730': // ꜰ [LATIN LETTER SMALL CAPITAL F] 444 case '\uA77B': // Ꝼ [LATIN CAPITAL LETTER INSULAR F] 445 case '\uA7FB': // ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F] 446 case '\uFF26': // F [FULLWIDTH LATIN CAPITAL LETTER F] 447 sb.append('F'); 448 break; 449 case '\u0192': // ƒ [LATIN SMALL LETTER F WITH HOOK] 450 case '\u1D6E': // ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE] 451 case '\u1D82': // ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK] 452 case '\u1E1F': // ḟ [LATIN SMALL LETTER F WITH DOT ABOVE] 453 case '\u1E9B': // ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE] 454 case '\u24D5': // ⓕ [CIRCLED LATIN SMALL LETTER F] 455 case '\uA77C': // ꝼ [LATIN SMALL LETTER INSULAR F] 456 case '\uFF46': // f [FULLWIDTH LATIN SMALL LETTER F] 457 sb.append('f'); 458 break; 459 case '\u24A1': // ⒡ [PARENTHESIZED LATIN SMALL LETTER F] 460 sb.append('('); 461 sb.append('f'); 462 sb.append(')'); 463 break; 464 case '\uFB00': // ff [LATIN SMALL LIGATURE FF] 465 sb.append('f'); 466 sb.append('f'); 467 break; 468 case '\uFB03': // ffi [LATIN SMALL LIGATURE FFI] 469 sb.append('f'); 470 sb.append('f'); 471 sb.append('i'); 472 break; 473 case '\uFB04': // ffl [LATIN SMALL LIGATURE FFL] 474 sb.append('f'); 475 sb.append('f'); 476 sb.append('l'); 477 break; 478 case '\uFB01': // fi [LATIN SMALL LIGATURE FI] 479 sb.append('f'); 480 sb.append('i'); 481 break; 482 case '\uFB02': // fl [LATIN SMALL LIGATURE FL] 483 sb.append('f'); 484 sb.append('l'); 485 break; 486 case '\u011C': // Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX] 487 case '\u011E': // Ğ [LATIN CAPITAL LETTER G WITH BREVE] 488 case '\u0120': // Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE] 489 case '\u0122': // Ģ [LATIN CAPITAL LETTER G WITH CEDILLA] 490 case '\u0193': // Ɠ [LATIN CAPITAL LETTER G WITH HOOK] 491 case '\u01E4': // Ǥ [LATIN CAPITAL LETTER G WITH STROKE] 492 case '\u01E5': // ǥ [LATIN SMALL LETTER G WITH STROKE] 493 case '\u01E6': // Ǧ [LATIN CAPITAL LETTER G WITH CARON] 494 case '\u01E7': // ǧ [LATIN SMALL LETTER G WITH CARON] 495 case '\u01F4': // Ǵ [LATIN CAPITAL LETTER G WITH ACUTE] 496 case '\u0262': // ɢ [LATIN LETTER SMALL CAPITAL G] 497 case '\u029B': // ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK] 498 case '\u1E20': // Ḡ [LATIN CAPITAL LETTER G WITH MACRON] 499 case '\u24BC': // Ⓖ [CIRCLED LATIN CAPITAL LETTER G] 500 case '\uA77D': // Ᵹ [LATIN CAPITAL LETTER INSULAR G] 501 case '\uA77E': // Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G] 502 case '\uFF27': // G [FULLWIDTH LATIN CAPITAL LETTER G] 503 sb.append('G'); 504 break; 505 case '\u011D': // ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX] 506 case '\u011F': // ğ [LATIN SMALL LETTER G WITH BREVE] 507 case '\u0121': // ġ [LATIN SMALL LETTER G WITH DOT ABOVE] 508 case '\u0123': // ģ [LATIN SMALL LETTER G WITH CEDILLA] 509 case '\u01F5': // ǵ [LATIN SMALL LETTER G WITH ACUTE] 510 case '\u0260': // ɠ [LATIN SMALL LETTER G WITH HOOK] 511 case '\u0261': // ɡ [LATIN SMALL LETTER SCRIPT G] 512 case '\u1D77': // ᵷ [LATIN SMALL LETTER TURNED G] 513 case '\u1D79': // ᵹ [LATIN SMALL LETTER INSULAR G] 514 case '\u1D83': // ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK] 515 case '\u1E21': // ḡ [LATIN SMALL LETTER G WITH MACRON] 516 case '\u24D6': // ⓖ [CIRCLED LATIN SMALL LETTER G] 517 case '\uA77F': // ꝿ [LATIN SMALL LETTER TURNED INSULAR G] 518 case '\uFF47': // g [FULLWIDTH LATIN SMALL LETTER G] 519 sb.append('g'); 520 break; 521 case '\u24A2': // ⒢ [PARENTHESIZED LATIN SMALL LETTER G] 522 sb.append('('); 523 sb.append('g'); 524 sb.append(')'); 525 break; 526 case '\u0124': // Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX] 527 case '\u0126': // Ħ [LATIN CAPITAL LETTER H WITH STROKE] 528 case '\u021E': // Ȟ [LATIN CAPITAL LETTER H WITH CARON] 529 case '\u029C': // ʜ [LATIN LETTER SMALL CAPITAL H] 530 case '\u1E22': // Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE] 531 case '\u1E24': // Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW] 532 case '\u1E26': // Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS] 533 case '\u1E28': // Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA] 534 case '\u1E2A': // Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW] 535 case '\u24BD': // Ⓗ [CIRCLED LATIN CAPITAL LETTER H] 536 case '\u2C67': // Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER] 537 case '\u2C75': // Ⱶ [LATIN CAPITAL LETTER HALF H] 538 case '\uFF28': // H [FULLWIDTH LATIN CAPITAL LETTER H] 539 sb.append('H'); 540 break; 541 case '\u0125': // ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX] 542 case '\u0127': // ħ [LATIN SMALL LETTER H WITH STROKE] 543 case '\u021F': // ȟ [LATIN SMALL LETTER H WITH CARON] 544 case '\u0265': // ɥ [LATIN SMALL LETTER TURNED H] 545 case '\u0266': // ɦ [LATIN SMALL LETTER H WITH HOOK] 546 case '\u02AE': // ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK] 547 case '\u02AF': // ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL] 548 case '\u1E23': // ḣ [LATIN SMALL LETTER H WITH DOT ABOVE] 549 case '\u1E25': // ḥ [LATIN SMALL LETTER H WITH DOT BELOW] 550 case '\u1E27': // ḧ [LATIN SMALL LETTER H WITH DIAERESIS] 551 case '\u1E29': // ḩ [LATIN SMALL LETTER H WITH CEDILLA] 552 case '\u1E2B': // ḫ [LATIN SMALL LETTER H WITH BREVE BELOW] 553 case '\u1E96': // ẖ [LATIN SMALL LETTER H WITH LINE BELOW] 554 case '\u24D7': // ⓗ [CIRCLED LATIN SMALL LETTER H] 555 case '\u2C68': // ⱨ [LATIN SMALL LETTER H WITH DESCENDER] 556 case '\u2C76': // ⱶ [LATIN SMALL LETTER HALF H] 557 case '\uFF48': // h [FULLWIDTH LATIN SMALL LETTER H] 558 sb.append('h'); 559 break; 560 case '\u01F6': // Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR] 561 sb.append('H'); 562 sb.append('V'); 563 break; 564 case '\u24A3': // ⒣ [PARENTHESIZED LATIN SMALL LETTER H] 565 sb.append('('); 566 sb.append('h'); 567 sb.append(')'); 568 break; 569 case '\u0195': // ƕ [LATIN SMALL LETTER HV] 570 sb.append('h'); 571 sb.append('v'); 572 break; 573 case '\u00CC': // Ì [LATIN CAPITAL LETTER I WITH GRAVE] 574 case '\u00CD': // Í [LATIN CAPITAL LETTER I WITH ACUTE] 575 case '\u00CE': // Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX] 576 case '\u00CF': // Ï [LATIN CAPITAL LETTER I WITH DIAERESIS] 577 case '\u0128': // Ĩ [LATIN CAPITAL LETTER I WITH TILDE] 578 case '\u012A': // Ī [LATIN CAPITAL LETTER I WITH MACRON] 579 case '\u012C': // Ĭ [LATIN CAPITAL LETTER I WITH BREVE] 580 case '\u012E': // Į [LATIN CAPITAL LETTER I WITH OGONEK] 581 case '\u0130': // İ [LATIN CAPITAL LETTER I WITH DOT ABOVE] 582 case '\u0196': // Ɩ [LATIN CAPITAL LETTER IOTA] 583 case '\u0197': // Ɨ [LATIN CAPITAL LETTER I WITH STROKE] 584 case '\u01CF': // Ǐ [LATIN CAPITAL LETTER I WITH CARON] 585 case '\u0208': // Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE] 586 case '\u020A': // Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE] 587 case '\u026A': // ɪ [LATIN LETTER SMALL CAPITAL I] 588 case '\u1D7B': // ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE] 589 case '\u1E2C': // Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW] 590 case '\u1E2E': // Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE] 591 case '\u1EC8': // Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE] 592 case '\u1ECA': // Ị [LATIN CAPITAL LETTER I WITH DOT BELOW] 593 case '\u24BE': // Ⓘ [CIRCLED LATIN CAPITAL LETTER I] 594 case '\uA7FE': // ꟾ [LATIN EPIGRAPHIC LETTER I LONGA] 595 case '\uFF29': // I [FULLWIDTH LATIN CAPITAL LETTER I] 596 sb.append('I'); 597 break; 598 case '\u00EC': // ì [LATIN SMALL LETTER I WITH GRAVE] 599 case '\u00ED': // í [LATIN SMALL LETTER I WITH ACUTE] 600 case '\u00EE': // î [LATIN SMALL LETTER I WITH CIRCUMFLEX] 601 case '\u00EF': // ï [LATIN SMALL LETTER I WITH DIAERESIS] 602 case '\u0129': // ĩ [LATIN SMALL LETTER I WITH TILDE] 603 case '\u012B': // ī [LATIN SMALL LETTER I WITH MACRON] 604 case '\u012D': // ĭ [LATIN SMALL LETTER I WITH BREVE] 605 case '\u012F': // į [LATIN SMALL LETTER I WITH OGONEK] 606 case '\u0131': // ı [LATIN SMALL LETTER DOTLESS I] 607 case '\u01D0': // ǐ [LATIN SMALL LETTER I WITH CARON] 608 case '\u0209': // ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE] 609 case '\u020B': // ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE] 610 case '\u0268': // ɨ [LATIN SMALL LETTER I WITH STROKE] 611 case '\u1D09': // ᴉ [LATIN SMALL LETTER TURNED I] 612 case '\u1D62': // ᵢ [LATIN SUBSCRIPT SMALL LETTER I] 613 case '\u1D7C': // ᵼ [LATIN SMALL LETTER IOTA WITH STROKE] 614 case '\u1D96': // ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK] 615 case '\u1E2D': // ḭ [LATIN SMALL LETTER I WITH TILDE BELOW] 616 case '\u1E2F': // ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE] 617 case '\u1EC9': // ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE] 618 case '\u1ECB': // ị [LATIN SMALL LETTER I WITH DOT BELOW] 619 case '\u2071': // ⁱ [SUPERSCRIPT LATIN SMALL LETTER I] 620 case '\u24D8': // ⓘ [CIRCLED LATIN SMALL LETTER I] 621 case '\uFF49': // i [FULLWIDTH LATIN SMALL LETTER I] 622 sb.append('i'); 623 break; 624 case '\u0132': // IJ [LATIN CAPITAL LIGATURE IJ] 625 sb.append('I'); 626 sb.append('J'); 627 break; 628 case '\u24A4': // ⒤ [PARENTHESIZED LATIN SMALL LETTER I] 629 sb.append('('); 630 sb.append('i'); 631 sb.append(')'); 632 break; 633 case '\u0133': // ij [LATIN SMALL LIGATURE IJ] 634 sb.append('i'); 635 sb.append('j'); 636 break; 637 case '\u0134': // Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX] 638 case '\u0248': // Ɉ [LATIN CAPITAL LETTER J WITH STROKE] 639 case '\u1D0A': // ᴊ [LATIN LETTER SMALL CAPITAL J] 640 case '\u24BF': // Ⓙ [CIRCLED LATIN CAPITAL LETTER J] 641 case '\uFF2A': // J [FULLWIDTH LATIN CAPITAL LETTER J] 642 sb.append('J'); 643 break; 644 case '\u0135': // ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX] 645 case '\u01F0': // ǰ [LATIN SMALL LETTER J WITH CARON] 646 case '\u0237': // ȷ [LATIN SMALL LETTER DOTLESS J] 647 case '\u0249': // ɉ [LATIN SMALL LETTER J WITH STROKE] 648 case '\u025F': // ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE] 649 case '\u0284': // ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK] 650 case '\u029D': // ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL] 651 case '\u24D9': // ⓙ [CIRCLED LATIN SMALL LETTER J] 652 case '\u2C7C': // ⱼ [LATIN SUBSCRIPT SMALL LETTER J] 653 case '\uFF4A': // j [FULLWIDTH LATIN SMALL LETTER J] 654 sb.append('j'); 655 break; 656 case '\u24A5': // ⒥ [PARENTHESIZED LATIN SMALL LETTER J] 657 sb.append('('); 658 sb.append('j'); 659 sb.append(')'); 660 break; 661 case '\u0136': // Ķ [LATIN CAPITAL LETTER K WITH CEDILLA] 662 case '\u0198': // Ƙ [LATIN CAPITAL LETTER K WITH HOOK] 663 case '\u01E8': // Ǩ [LATIN CAPITAL LETTER K WITH CARON] 664 case '\u1D0B': // ᴋ [LATIN LETTER SMALL CAPITAL K] 665 case '\u1E30': // Ḱ [LATIN CAPITAL LETTER K WITH ACUTE] 666 case '\u1E32': // Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW] 667 case '\u1E34': // Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW] 668 case '\u24C0': // Ⓚ [CIRCLED LATIN CAPITAL LETTER K] 669 case '\u2C69': // Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER] 670 case '\uA740': // Ꝁ [LATIN CAPITAL LETTER K WITH STROKE] 671 case '\uA742': // Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE] 672 case '\uA744': // Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE] 673 case '\uFF2B': // K [FULLWIDTH LATIN CAPITAL LETTER K] 674 sb.append('K'); 675 break; 676 case '\u0137': // ķ [LATIN SMALL LETTER K WITH CEDILLA] 677 case '\u0199': // ƙ [LATIN SMALL LETTER K WITH HOOK] 678 case '\u01E9': // ǩ [LATIN SMALL LETTER K WITH CARON] 679 case '\u029E': // ʞ [LATIN SMALL LETTER TURNED K] 680 case '\u1D84': // ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK] 681 case '\u1E31': // ḱ [LATIN SMALL LETTER K WITH ACUTE] 682 case '\u1E33': // ḳ [LATIN SMALL LETTER K WITH DOT BELOW] 683 case '\u1E35': // ḵ [LATIN SMALL LETTER K WITH LINE BELOW] 684 case '\u24DA': // ⓚ [CIRCLED LATIN SMALL LETTER K] 685 case '\u2C6A': // ⱪ [LATIN SMALL LETTER K WITH DESCENDER] 686 case '\uA741': // ꝁ [LATIN SMALL LETTER K WITH STROKE] 687 case '\uA743': // ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE] 688 case '\uA745': // ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE] 689 case '\uFF4B': // k [FULLWIDTH LATIN SMALL LETTER K] 690 sb.append('k'); 691 break; 692 case '\u24A6': // ⒦ [PARENTHESIZED LATIN SMALL LETTER K] 693 sb.append('('); 694 sb.append('k'); 695 sb.append(')'); 696 break; 697 case '\u0139': // Ĺ [LATIN CAPITAL LETTER L WITH ACUTE] 698 case '\u013B': // Ļ [LATIN CAPITAL LETTER L WITH CEDILLA] 699 case '\u013D': // Ľ [LATIN CAPITAL LETTER L WITH CARON] 700 case '\u013F': // Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT] 701 case '\u0141': // Ł [LATIN CAPITAL LETTER L WITH STROKE] 702 case '\u023D': // Ƚ [LATIN CAPITAL LETTER L WITH BAR] 703 case '\u029F': // ʟ [LATIN LETTER SMALL CAPITAL L] 704 case '\u1D0C': // ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE] 705 case '\u1E36': // Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW] 706 case '\u1E38': // Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON] 707 case '\u1E3A': // Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW] 708 case '\u1E3C': // Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW] 709 case '\u24C1': // Ⓛ [CIRCLED LATIN CAPITAL LETTER L] 710 case '\u2C60': // Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR] 711 case '\u2C62': // Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE] 712 case '\uA746': // Ꝇ [LATIN CAPITAL LETTER BROKEN L] 713 case '\uA748': // Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE] 714 case '\uA780': // Ꞁ [LATIN CAPITAL LETTER TURNED L] 715 case '\uFF2C': // L [FULLWIDTH LATIN CAPITAL LETTER L] 716 sb.append('L'); 717 break; 718 case '\u013A': // ĺ [LATIN SMALL LETTER L WITH ACUTE] 719 case '\u013C': // ļ [LATIN SMALL LETTER L WITH CEDILLA] 720 case '\u013E': // ľ [LATIN SMALL LETTER L WITH CARON] 721 case '\u0140': // ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT] 722 case '\u0142': // ł [LATIN SMALL LETTER L WITH STROKE] 723 case '\u019A': // ƚ [LATIN SMALL LETTER L WITH BAR] 724 case '\u0234': // ȴ [LATIN SMALL LETTER L WITH CURL] 725 case '\u026B': // ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE] 726 case '\u026C': // ɬ [LATIN SMALL LETTER L WITH BELT] 727 case '\u026D': // ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK] 728 case '\u1D85': // ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK] 729 case '\u1E37': // ḷ [LATIN SMALL LETTER L WITH DOT BELOW] 730 case '\u1E39': // ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON] 731 case '\u1E3B': // ḻ [LATIN SMALL LETTER L WITH LINE BELOW] 732 case '\u1E3D': // ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW] 733 case '\u24DB': // ⓛ [CIRCLED LATIN SMALL LETTER L] 734 case '\u2C61': // ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR] 735 case '\uA747': // ꝇ [LATIN SMALL LETTER BROKEN L] 736 case '\uA749': // ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE] 737 case '\uA781': // ꞁ [LATIN SMALL LETTER TURNED L] 738 case '\uFF4C': // l [FULLWIDTH LATIN SMALL LETTER L] 739 sb.append('l'); 740 break; 741 case '\u01C7': // LJ [LATIN CAPITAL LETTER LJ] 742 sb.append('L'); 743 sb.append('J'); 744 break; 745 case '\u1EFA': // Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL] 746 sb.append('L'); 747 sb.append('L'); 748 break; 749 case '\u01C8': // Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J] 750 sb.append('L'); 751 sb.append('j'); 752 break; 753 case '\u24A7': // ⒧ [PARENTHESIZED LATIN SMALL LETTER L] 754 sb.append('('); 755 sb.append('l'); 756 sb.append(')'); 757 break; 758 case '\u01C9': // lj [LATIN SMALL LETTER LJ] 759 sb.append('l'); 760 sb.append('j'); 761 break; 762 case '\u1EFB': // ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL] 763 sb.append('l'); 764 sb.append('l'); 765 break; 766 case '\u02AA': // ʪ [LATIN SMALL LETTER LS DIGRAPH] 767 sb.append('l'); 768 sb.append('s'); 769 break; 770 case '\u02AB': // ʫ [LATIN SMALL LETTER LZ DIGRAPH] 771 sb.append('l'); 772 sb.append('z'); 773 break; 774 case '\u019C': // Ɯ [LATIN CAPITAL LETTER TURNED M] 775 case '\u1D0D': // ᴍ [LATIN LETTER SMALL CAPITAL M] 776 case '\u1E3E': // Ḿ [LATIN CAPITAL LETTER M WITH ACUTE] 777 case '\u1E40': // Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE] 778 case '\u1E42': // Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW] 779 case '\u24C2': // Ⓜ [CIRCLED LATIN CAPITAL LETTER M] 780 case '\u2C6E': // Ɱ [LATIN CAPITAL LETTER M WITH HOOK] 781 case '\uA7FD': // ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M] 782 case '\uA7FF': // ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M] 783 case '\uFF2D': // M [FULLWIDTH LATIN CAPITAL LETTER M] 784 sb.append('M'); 785 break; 786 case '\u026F': // ɯ [LATIN SMALL LETTER TURNED M] 787 case '\u0270': // ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG] 788 case '\u0271': // ɱ [LATIN SMALL LETTER M WITH HOOK] 789 case '\u1D6F': // ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE] 790 case '\u1D86': // ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK] 791 case '\u1E3F': // ḿ [LATIN SMALL LETTER M WITH ACUTE] 792 case '\u1E41': // ṁ [LATIN SMALL LETTER M WITH DOT ABOVE] 793 case '\u1E43': // ṃ [LATIN SMALL LETTER M WITH DOT BELOW] 794 case '\u24DC': // ⓜ [CIRCLED LATIN SMALL LETTER M] 795 case '\uFF4D': // m [FULLWIDTH LATIN SMALL LETTER M] 796 sb.append('m'); 797 break; 798 case '\u24A8': // ⒨ [PARENTHESIZED LATIN SMALL LETTER M] 799 sb.append('('); 800 sb.append('m'); 801 sb.append(')'); 802 break; 803 case '\u00D1': // Ñ [LATIN CAPITAL LETTER N WITH TILDE] 804 case '\u0143': // Ń [LATIN CAPITAL LETTER N WITH ACUTE] 805 case '\u0145': // Ņ [LATIN CAPITAL LETTER N WITH CEDILLA] 806 case '\u0147': // Ň [LATIN CAPITAL LETTER N WITH CARON] 807 case '\u014A': // Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG] 808 case '\u019D': // Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK] 809 case '\u01F8': // Ǹ [LATIN CAPITAL LETTER N WITH GRAVE] 810 case '\u0220': // Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG] 811 case '\u0274': // ɴ [LATIN LETTER SMALL CAPITAL N] 812 case '\u1D0E': // ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N] 813 case '\u1E44': // Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE] 814 case '\u1E46': // Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW] 815 case '\u1E48': // Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW] 816 case '\u1E4A': // Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW] 817 case '\u24C3': // Ⓝ [CIRCLED LATIN CAPITAL LETTER N] 818 case '\uFF2E': // N [FULLWIDTH LATIN CAPITAL LETTER N] 819 sb.append('N'); 820 break; 821 case '\u00F1': // ñ [LATIN SMALL LETTER N WITH TILDE] 822 case '\u0144': // ń [LATIN SMALL LETTER N WITH ACUTE] 823 case '\u0146': // ņ [LATIN SMALL LETTER N WITH CEDILLA] 824 case '\u0148': // ň [LATIN SMALL LETTER N WITH CARON] 825 case '\u0149': // ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE] 826 case '\u014B': // ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG] 827 case '\u019E': // ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG] 828 case '\u01F9': // ǹ [LATIN SMALL LETTER N WITH GRAVE] 829 case '\u0235': // ȵ [LATIN SMALL LETTER N WITH CURL] 830 case '\u0272': // ɲ [LATIN SMALL LETTER N WITH LEFT HOOK] 831 case '\u0273': // ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK] 832 case '\u1D70': // ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE] 833 case '\u1D87': // ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK] 834 case '\u1E45': // ṅ [LATIN SMALL LETTER N WITH DOT ABOVE] 835 case '\u1E47': // ṇ [LATIN SMALL LETTER N WITH DOT BELOW] 836 case '\u1E49': // ṉ [LATIN SMALL LETTER N WITH LINE BELOW] 837 case '\u1E4B': // ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW] 838 case '\u207F': // ⁿ [SUPERSCRIPT LATIN SMALL LETTER N] 839 case '\u24DD': // ⓝ [CIRCLED LATIN SMALL LETTER N] 840 case '\uFF4E': // n [FULLWIDTH LATIN SMALL LETTER N] 841 sb.append('n'); 842 break; 843 case '\u01CA': // NJ [LATIN CAPITAL LETTER NJ] 844 sb.append('N'); 845 sb.append('J'); 846 break; 847 case '\u01CB': // Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J] 848 sb.append('N'); 849 sb.append('j'); 850 break; 851 case '\u24A9': // ⒩ [PARENTHESIZED LATIN SMALL LETTER N] 852 sb.append('('); 853 sb.append('n'); 854 sb.append(')'); 855 break; 856 case '\u01CC': // nj [LATIN SMALL LETTER NJ] 857 sb.append('n'); 858 sb.append('j'); 859 break; 860 case '\u00D2': // Ò [LATIN CAPITAL LETTER O WITH GRAVE] 861 case '\u00D3': // Ó [LATIN CAPITAL LETTER O WITH ACUTE] 862 case '\u00D4': // Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX] 863 case '\u00D5': // Õ [LATIN CAPITAL LETTER O WITH TILDE] 864 case '\u00D6': // Ö [LATIN CAPITAL LETTER O WITH DIAERESIS] 865 case '\u00D8': // Ø [LATIN CAPITAL LETTER O WITH STROKE] 866 case '\u014C': // Ō [LATIN CAPITAL LETTER O WITH MACRON] 867 case '\u014E': // Ŏ [LATIN CAPITAL LETTER O WITH BREVE] 868 case '\u0150': // Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE] 869 case '\u0186': // Ɔ [LATIN CAPITAL LETTER OPEN O] 870 case '\u019F': // Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE] 871 case '\u01A0': // Ơ [LATIN CAPITAL LETTER O WITH HORN] 872 case '\u01D1': // Ǒ [LATIN CAPITAL LETTER O WITH CARON] 873 case '\u01EA': // Ǫ [LATIN CAPITAL LETTER O WITH OGONEK] 874 case '\u01EC': // Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON] 875 case '\u01FE': // Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE] 876 case '\u020C': // Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE] 877 case '\u020E': // Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE] 878 case '\u022A': // Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON] 879 case '\u022C': // Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON] 880 case '\u022E': // Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE] 881 case '\u0230': // Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON] 882 case '\u1D0F': // ᴏ [LATIN LETTER SMALL CAPITAL O] 883 case '\u1D10': // ᴐ [LATIN LETTER SMALL CAPITAL OPEN O] 884 case '\u1E4C': // Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE] 885 case '\u1E4E': // Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS] 886 case '\u1E50': // Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE] 887 case '\u1E52': // Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE] 888 case '\u1ECC': // Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW] 889 case '\u1ECE': // Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE] 890 case '\u1ED0': // Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE] 891 case '\u1ED2': // Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE] 892 case '\u1ED4': // Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] 893 case '\u1ED6': // Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE] 894 case '\u1ED8': // Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW] 895 case '\u1EDA': // Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE] 896 case '\u1EDC': // Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE] 897 case '\u1EDE': // Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE] 898 case '\u1EE0': // Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE] 899 case '\u1EE2': // Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW] 900 case '\u24C4': // Ⓞ [CIRCLED LATIN CAPITAL LETTER O] 901 case '\uA74A': // Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY] 902 case '\uA74C': // Ꝍ [LATIN CAPITAL LETTER O WITH LOOP] 903 case '\uFF2F': // O [FULLWIDTH LATIN CAPITAL LETTER O] 904 sb.append('O'); 905 break; 906 case '\u00F2': // ò [LATIN SMALL LETTER O WITH GRAVE] 907 case '\u00F3': // ó [LATIN SMALL LETTER O WITH ACUTE] 908 case '\u00F4': // ô [LATIN SMALL LETTER O WITH CIRCUMFLEX] 909 case '\u00F5': // õ [LATIN SMALL LETTER O WITH TILDE] 910 case '\u00F6': // ö [LATIN SMALL LETTER O WITH DIAERESIS] 911 case '\u00F8': // ø [LATIN SMALL LETTER O WITH STROKE] 912 case '\u014D': // ō [LATIN SMALL LETTER O WITH MACRON] 913 case '\u014F': // ŏ [LATIN SMALL LETTER O WITH BREVE] 914 case '\u0151': // ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE] 915 case '\u01A1': // ơ [LATIN SMALL LETTER O WITH HORN] 916 case '\u01D2': // ǒ [LATIN SMALL LETTER O WITH CARON] 917 case '\u01EB': // ǫ [LATIN SMALL LETTER O WITH OGONEK] 918 case '\u01ED': // ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON] 919 case '\u01FF': // ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE] 920 case '\u020D': // ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE] 921 case '\u020F': // ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE] 922 case '\u022B': // ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON] 923 case '\u022D': // ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON] 924 case '\u022F': // ȯ [LATIN SMALL LETTER O WITH DOT ABOVE] 925 case '\u0231': // ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON] 926 case '\u0254': // ɔ [LATIN SMALL LETTER OPEN O] 927 case '\u0275': // ɵ [LATIN SMALL LETTER BARRED O] 928 case '\u1D16': // ᴖ [LATIN SMALL LETTER TOP HALF O] 929 case '\u1D17': // ᴗ [LATIN SMALL LETTER BOTTOM HALF O] 930 case '\u1D97': // ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK] 931 case '\u1E4D': // ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE] 932 case '\u1E4F': // ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS] 933 case '\u1E51': // ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE] 934 case '\u1E53': // ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE] 935 case '\u1ECD': // ọ [LATIN SMALL LETTER O WITH DOT BELOW] 936 case '\u1ECF': // ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE] 937 case '\u1ED1': // ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE] 938 case '\u1ED3': // ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE] 939 case '\u1ED5': // ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] 940 case '\u1ED7': // ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE] 941 case '\u1ED9': // ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW] 942 case '\u1EDB': // ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE] 943 case '\u1EDD': // ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE] 944 case '\u1EDF': // ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE] 945 case '\u1EE1': // ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE] 946 case '\u1EE3': // ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW] 947 case '\u2092': // ₒ [LATIN SUBSCRIPT SMALL LETTER O] 948 case '\u24DE': // ⓞ [CIRCLED LATIN SMALL LETTER O] 949 case '\u2C7A': // ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE] 950 case '\uA74B': // ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY] 951 case '\uA74D': // ꝍ [LATIN SMALL LETTER O WITH LOOP] 952 case '\uFF4F': // o [FULLWIDTH LATIN SMALL LETTER O] 953 sb.append('o'); 954 break; 955 case '\u0152': // Œ [LATIN CAPITAL LIGATURE OE] 956 case '\u0276': // ɶ [LATIN LETTER SMALL CAPITAL OE] 957 sb.append('O'); 958 sb.append('E'); 959 break; 960 case '\uA74E': // Ꝏ [LATIN CAPITAL LETTER OO] 961 sb.append('O'); 962 sb.append('O'); 963 break; 964 case '\u0222': // Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU] 965 case '\u1D15': // ᴕ [LATIN LETTER SMALL CAPITAL OU] 966 sb.append('O'); 967 sb.append('U'); 968 break; 969 case '\u24AA': // ⒪ [PARENTHESIZED LATIN SMALL LETTER O] 970 sb.append('('); 971 sb.append('o'); 972 sb.append(')'); 973 break; 974 case '\u0153': // œ [LATIN SMALL LIGATURE OE] 975 case '\u1D14': // ᴔ [LATIN SMALL LETTER TURNED OE] 976 sb.append('o'); 977 sb.append('e'); 978 break; 979 case '\uA74F': // ꝏ [LATIN SMALL LETTER OO] 980 sb.append('o'); 981 sb.append('o'); 982 break; 983 case '\u0223': // ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU] 984 sb.append('o'); 985 sb.append('u'); 986 break; 987 case '\u01A4': // Ƥ [LATIN CAPITAL LETTER P WITH HOOK] 988 case '\u1D18': // ᴘ [LATIN LETTER SMALL CAPITAL P] 989 case '\u1E54': // Ṕ [LATIN CAPITAL LETTER P WITH ACUTE] 990 case '\u1E56': // Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE] 991 case '\u24C5': // Ⓟ [CIRCLED LATIN CAPITAL LETTER P] 992 case '\u2C63': // Ᵽ [LATIN CAPITAL LETTER P WITH STROKE] 993 case '\uA750': // Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER] 994 case '\uA752': // Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH] 995 case '\uA754': // Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL] 996 case '\uFF30': // P [FULLWIDTH LATIN CAPITAL LETTER P] 997 sb.append('P'); 998 break; 999 case '\u01A5': // ƥ [LATIN SMALL LETTER P WITH HOOK] 1000 case '\u1D71': // ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE] 1001 case '\u1D7D': // ᵽ [LATIN SMALL LETTER P WITH STROKE] 1002 case '\u1D88': // ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK] 1003 case '\u1E55': // ṕ [LATIN SMALL LETTER P WITH ACUTE] 1004 case '\u1E57': // ṗ [LATIN SMALL LETTER P WITH DOT ABOVE] 1005 case '\u24DF': // ⓟ [CIRCLED LATIN SMALL LETTER P] 1006 case '\uA751': // ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER] 1007 case '\uA753': // ꝓ [LATIN SMALL LETTER P WITH FLOURISH] 1008 case '\uA755': // ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL] 1009 case '\uA7FC': // ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P] 1010 case '\uFF50': // p [FULLWIDTH LATIN SMALL LETTER P] 1011 sb.append('p'); 1012 break; 1013 case '\u24AB': // ⒫ [PARENTHESIZED LATIN SMALL LETTER P] 1014 sb.append('('); 1015 sb.append('p'); 1016 sb.append(')'); 1017 break; 1018 case '\u024A': // Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL] 1019 case '\u24C6': // Ⓠ [CIRCLED LATIN CAPITAL LETTER Q] 1020 case '\uA756': // Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER] 1021 case '\uA758': // Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE] 1022 case '\uFF31': // Q [FULLWIDTH LATIN CAPITAL LETTER Q] 1023 sb.append('Q'); 1024 break; 1025 case '\u0138': // ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA] 1026 case '\u024B': // ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL] 1027 case '\u02A0': // ʠ [LATIN SMALL LETTER Q WITH HOOK] 1028 case '\u24E0': // ⓠ [CIRCLED LATIN SMALL LETTER Q] 1029 case '\uA757': // ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER] 1030 case '\uA759': // ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE] 1031 case '\uFF51': // q [FULLWIDTH LATIN SMALL LETTER Q] 1032 sb.append('q'); 1033 break; 1034 case '\u24AC': // ⒬ [PARENTHESIZED LATIN SMALL LETTER Q] 1035 sb.append('('); 1036 sb.append('q'); 1037 sb.append(')'); 1038 break; 1039 case '\u0239': // ȹ [LATIN SMALL LETTER QP DIGRAPH] 1040 sb.append('q'); 1041 sb.append('p'); 1042 break; 1043 case '\u0154': // Ŕ [LATIN CAPITAL LETTER R WITH ACUTE] 1044 case '\u0156': // Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA] 1045 case '\u0158': // Ř [LATIN CAPITAL LETTER R WITH CARON] 1046 case '\u0210': // Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE] 1047 case '\u0212': // Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE] 1048 case '\u024C': // Ɍ [LATIN CAPITAL LETTER R WITH STROKE] 1049 case '\u0280': // ʀ [LATIN LETTER SMALL CAPITAL R] 1050 case '\u0281': // ʁ [LATIN LETTER SMALL CAPITAL INVERTED R] 1051 case '\u1D19': // ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R] 1052 case '\u1D1A': // ᴚ [LATIN LETTER SMALL CAPITAL TURNED R] 1053 case '\u1E58': // Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE] 1054 case '\u1E5A': // Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW] 1055 case '\u1E5C': // Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON] 1056 case '\u1E5E': // Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW] 1057 case '\u24C7': // Ⓡ [CIRCLED LATIN CAPITAL LETTER R] 1058 case '\u2C64': // Ɽ [LATIN CAPITAL LETTER R WITH TAIL] 1059 case '\uA75A': // Ꝛ [LATIN CAPITAL LETTER R ROTUNDA] 1060 case '\uA782': // Ꞃ [LATIN CAPITAL LETTER INSULAR R] 1061 case '\uFF32': // R [FULLWIDTH LATIN CAPITAL LETTER R] 1062 sb.append('R'); 1063 break; 1064 case '\u0155': // ŕ [LATIN SMALL LETTER R WITH ACUTE] 1065 case '\u0157': // ŗ [LATIN SMALL LETTER R WITH CEDILLA] 1066 case '\u0159': // ř [LATIN SMALL LETTER R WITH CARON] 1067 case '\u0211': // ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE] 1068 case '\u0213': // ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE] 1069 case '\u024D': // ɍ [LATIN SMALL LETTER R WITH STROKE] 1070 case '\u027C': // ɼ [LATIN SMALL LETTER R WITH LONG LEG] 1071 case '\u027D': // ɽ [LATIN SMALL LETTER R WITH TAIL] 1072 case '\u027E': // ɾ [LATIN SMALL LETTER R WITH FISHHOOK] 1073 case '\u027F': // ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK] 1074 case '\u1D63': // ᵣ [LATIN SUBSCRIPT SMALL LETTER R] 1075 case '\u1D72': // ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE] 1076 case '\u1D73': // ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE] 1077 case '\u1D89': // ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK] 1078 case '\u1E59': // ṙ [LATIN SMALL LETTER R WITH DOT ABOVE] 1079 case '\u1E5B': // ṛ [LATIN SMALL LETTER R WITH DOT BELOW] 1080 case '\u1E5D': // ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON] 1081 case '\u1E5F': // ṟ [LATIN SMALL LETTER R WITH LINE BELOW] 1082 case '\u24E1': // ⓡ [CIRCLED LATIN SMALL LETTER R] 1083 case '\uA75B': // ꝛ [LATIN SMALL LETTER R ROTUNDA] 1084 case '\uA783': // ꞃ [LATIN SMALL LETTER INSULAR R] 1085 case '\uFF52': // r [FULLWIDTH LATIN SMALL LETTER R] 1086 sb.append('r'); 1087 break; 1088 case '\u24AD': // ⒭ [PARENTHESIZED LATIN SMALL LETTER R] 1089 sb.append('('); 1090 sb.append('r'); 1091 sb.append(')'); 1092 break; 1093 case '\u015A': // Ś [LATIN CAPITAL LETTER S WITH ACUTE] 1094 case '\u015C': // Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX] 1095 case '\u015E': // Ş [LATIN CAPITAL LETTER S WITH CEDILLA] 1096 case '\u0160': // Š [LATIN CAPITAL LETTER S WITH CARON] 1097 case '\u0218': // Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW] 1098 case '\u1E60': // Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE] 1099 case '\u1E62': // Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW] 1100 case '\u1E64': // Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE] 1101 case '\u1E66': // Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE] 1102 case '\u1E68': // Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE] 1103 case '\u24C8': // Ⓢ [CIRCLED LATIN CAPITAL LETTER S] 1104 case '\uA731': // ꜱ [LATIN LETTER SMALL CAPITAL S] 1105 case '\uA785': // ꞅ [LATIN SMALL LETTER INSULAR S] 1106 case '\uFF33': // S [FULLWIDTH LATIN CAPITAL LETTER S] 1107 sb.append('S'); 1108 break; 1109 case '\u015B': // ś [LATIN SMALL LETTER S WITH ACUTE] 1110 case '\u015D': // ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX] 1111 case '\u015F': // ş [LATIN SMALL LETTER S WITH CEDILLA] 1112 case '\u0161': // š [LATIN SMALL LETTER S WITH CARON] 1113 case '\u017F': // ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S] 1114 case '\u0219': // ș [LATIN SMALL LETTER S WITH COMMA BELOW] 1115 case '\u023F': // ȿ [LATIN SMALL LETTER S WITH SWASH TAIL] 1116 case '\u0282': // ʂ [LATIN SMALL LETTER S WITH HOOK] 1117 case '\u1D74': // ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE] 1118 case '\u1D8A': // ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK] 1119 case '\u1E61': // ṡ [LATIN SMALL LETTER S WITH DOT ABOVE] 1120 case '\u1E63': // ṣ [LATIN SMALL LETTER S WITH DOT BELOW] 1121 case '\u1E65': // ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE] 1122 case '\u1E67': // ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE] 1123 case '\u1E69': // ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE] 1124 case '\u1E9C': // ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE] 1125 case '\u1E9D': // ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE] 1126 case '\u24E2': // ⓢ [CIRCLED LATIN SMALL LETTER S] 1127 case '\uA784': // Ꞅ [LATIN CAPITAL LETTER INSULAR S] 1128 case '\uFF53': // s [FULLWIDTH LATIN SMALL LETTER S] 1129 sb.append('s'); 1130 break; 1131 case '\u1E9E': // ẞ [LATIN CAPITAL LETTER SHARP S] 1132 sb.append('S'); 1133 sb.append('S'); 1134 break; 1135 case '\u24AE': // ⒮ [PARENTHESIZED LATIN SMALL LETTER S] 1136 sb.append('('); 1137 sb.append('s'); 1138 sb.append(')'); 1139 break; 1140 case '\u00DF': // ß [LATIN SMALL LETTER SHARP S] 1141 sb.append('s'); 1142 sb.append('s'); 1143 break; 1144 case '\uFB06': // st [LATIN SMALL LIGATURE ST] 1145 sb.append('s'); 1146 sb.append('t'); 1147 break; 1148 case '\u0162': // Ţ [LATIN CAPITAL LETTER T WITH CEDILLA] 1149 case '\u0164': // Ť [LATIN CAPITAL LETTER T WITH CARON] 1150 case '\u0166': // Ŧ [LATIN CAPITAL LETTER T WITH STROKE] 1151 case '\u01AC': // Ƭ [LATIN CAPITAL LETTER T WITH HOOK] 1152 case '\u01AE': // Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK] 1153 case '\u021A': // Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW] 1154 case '\u023E': // Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE] 1155 case '\u1D1B': // ᴛ [LATIN LETTER SMALL CAPITAL T] 1156 case '\u1E6A': // Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE] 1157 case '\u1E6C': // Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW] 1158 case '\u1E6E': // Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW] 1159 case '\u1E70': // Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW] 1160 case '\u24C9': // Ⓣ [CIRCLED LATIN CAPITAL LETTER T] 1161 case '\uA786': // Ꞇ [LATIN CAPITAL LETTER INSULAR T] 1162 case '\uFF34': // T [FULLWIDTH LATIN CAPITAL LETTER T] 1163 sb.append('T'); 1164 break; 1165 case '\u0163': // ţ [LATIN SMALL LETTER T WITH CEDILLA] 1166 case '\u0165': // ť [LATIN SMALL LETTER T WITH CARON] 1167 case '\u0167': // ŧ [LATIN SMALL LETTER T WITH STROKE] 1168 case '\u01AB': // ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK] 1169 case '\u01AD': // ƭ [LATIN SMALL LETTER T WITH HOOK] 1170 case '\u021B': // ț [LATIN SMALL LETTER T WITH COMMA BELOW] 1171 case '\u0236': // ȶ [LATIN SMALL LETTER T WITH CURL] 1172 case '\u0287': // ʇ [LATIN SMALL LETTER TURNED T] 1173 case '\u0288': // ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK] 1174 case '\u1D75': // ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE] 1175 case '\u1E6B': // ṫ [LATIN SMALL LETTER T WITH DOT ABOVE] 1176 case '\u1E6D': // ṭ [LATIN SMALL LETTER T WITH DOT BELOW] 1177 case '\u1E6F': // ṯ [LATIN SMALL LETTER T WITH LINE BELOW] 1178 case '\u1E71': // ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW] 1179 case '\u1E97': // ẗ [LATIN SMALL LETTER T WITH DIAERESIS] 1180 case '\u24E3': // ⓣ [CIRCLED LATIN SMALL LETTER T] 1181 case '\u2C66': // ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE] 1182 case '\uFF54': // t [FULLWIDTH LATIN SMALL LETTER T] 1183 sb.append('t'); 1184 break; 1185 case '\u00DE': // Þ [LATIN CAPITAL LETTER THORN] 1186 case '\uA766': // Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER] 1187 sb.append('T'); 1188 sb.append('H'); 1189 break; 1190 case '\uA728': // Ꜩ [LATIN CAPITAL LETTER TZ] 1191 sb.append('T'); 1192 sb.append('Z'); 1193 break; 1194 case '\u24AF': // ⒯ [PARENTHESIZED LATIN SMALL LETTER T] 1195 sb.append('('); 1196 sb.append('t'); 1197 sb.append(')'); 1198 break; 1199 case '\u02A8': // ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL] 1200 sb.append('t'); 1201 sb.append('c'); 1202 break; 1203 case '\u00FE': // þ [LATIN SMALL LETTER THORN] 1204 case '\u1D7A': // ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH] 1205 case '\uA767': // ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER] 1206 sb.append('t'); 1207 sb.append('h'); 1208 break; 1209 case '\u02A6': // ʦ [LATIN SMALL LETTER TS DIGRAPH] 1210 sb.append('t'); 1211 sb.append('s'); 1212 break; 1213 case '\uA729': // ꜩ [LATIN SMALL LETTER TZ] 1214 sb.append('t'); 1215 sb.append('z'); 1216 break; 1217 case '\u00D9': // Ù [LATIN CAPITAL LETTER U WITH GRAVE] 1218 case '\u00DA': // Ú [LATIN CAPITAL LETTER U WITH ACUTE] 1219 case '\u00DB': // Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX] 1220 case '\u00DC': // Ü [LATIN CAPITAL LETTER U WITH DIAERESIS] 1221 case '\u0168': // Ũ [LATIN CAPITAL LETTER U WITH TILDE] 1222 case '\u016A': // Ū [LATIN CAPITAL LETTER U WITH MACRON] 1223 case '\u016C': // Ŭ [LATIN CAPITAL LETTER U WITH BREVE] 1224 case '\u016E': // Ů [LATIN CAPITAL LETTER U WITH RING ABOVE] 1225 case '\u0170': // Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE] 1226 case '\u0172': // Ų [LATIN CAPITAL LETTER U WITH OGONEK] 1227 case '\u01AF': // Ư [LATIN CAPITAL LETTER U WITH HORN] 1228 case '\u01D3': // Ǔ [LATIN CAPITAL LETTER U WITH CARON] 1229 case '\u01D5': // Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON] 1230 case '\u01D7': // Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE] 1231 case '\u01D9': // Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON] 1232 case '\u01DB': // Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE] 1233 case '\u0214': // Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE] 1234 case '\u0216': // Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE] 1235 case '\u0244': // Ʉ [LATIN CAPITAL LETTER U BAR] 1236 case '\u1D1C': // ᴜ [LATIN LETTER SMALL CAPITAL U] 1237 case '\u1D7E': // ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE] 1238 case '\u1E72': // Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW] 1239 case '\u1E74': // Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW] 1240 case '\u1E76': // Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW] 1241 case '\u1E78': // Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE] 1242 case '\u1E7A': // Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS] 1243 case '\u1EE4': // Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW] 1244 case '\u1EE6': // Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE] 1245 case '\u1EE8': // Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE] 1246 case '\u1EEA': // Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE] 1247 case '\u1EEC': // Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE] 1248 case '\u1EEE': // Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE] 1249 case '\u1EF0': // Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW] 1250 case '\u24CA': // Ⓤ [CIRCLED LATIN CAPITAL LETTER U] 1251 case '\uFF35': // U [FULLWIDTH LATIN CAPITAL LETTER U] 1252 sb.append('U'); 1253 break; 1254 case '\u00F9': // ù [LATIN SMALL LETTER U WITH GRAVE] 1255 case '\u00FA': // ú [LATIN SMALL LETTER U WITH ACUTE] 1256 case '\u00FB': // û [LATIN SMALL LETTER U WITH CIRCUMFLEX] 1257 case '\u00FC': // ü [LATIN SMALL LETTER U WITH DIAERESIS] 1258 case '\u0169': // ũ [LATIN SMALL LETTER U WITH TILDE] 1259 case '\u016B': // ū [LATIN SMALL LETTER U WITH MACRON] 1260 case '\u016D': // ŭ [LATIN SMALL LETTER U WITH BREVE] 1261 case '\u016F': // ů [LATIN SMALL LETTER U WITH RING ABOVE] 1262 case '\u0171': // ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE] 1263 case '\u0173': // ų [LATIN SMALL LETTER U WITH OGONEK] 1264 case '\u01B0': // ư [LATIN SMALL LETTER U WITH HORN] 1265 case '\u01D4': // ǔ [LATIN SMALL LETTER U WITH CARON] 1266 case '\u01D6': // ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON] 1267 case '\u01D8': // ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE] 1268 case '\u01DA': // ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON] 1269 case '\u01DC': // ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE] 1270 case '\u0215': // ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE] 1271 case '\u0217': // ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE] 1272 case '\u0289': // ʉ [LATIN SMALL LETTER U BAR] 1273 case '\u1D64': // ᵤ [LATIN SUBSCRIPT SMALL LETTER U] 1274 case '\u1D99': // ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK] 1275 case '\u1E73': // ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW] 1276 case '\u1E75': // ṵ [LATIN SMALL LETTER U WITH TILDE BELOW] 1277 case '\u1E77': // ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW] 1278 case '\u1E79': // ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE] 1279 case '\u1E7B': // ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS] 1280 case '\u1EE5': // ụ [LATIN SMALL LETTER U WITH DOT BELOW] 1281 case '\u1EE7': // ủ [LATIN SMALL LETTER U WITH HOOK ABOVE] 1282 case '\u1EE9': // ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE] 1283 case '\u1EEB': // ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE] 1284 case '\u1EED': // ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE] 1285 case '\u1EEF': // ữ [LATIN SMALL LETTER U WITH HORN AND TILDE] 1286 case '\u1EF1': // ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW] 1287 case '\u24E4': // ⓤ [CIRCLED LATIN SMALL LETTER U] 1288 case '\uFF55': // u [FULLWIDTH LATIN SMALL LETTER U] 1289 sb.append('u'); 1290 break; 1291 case '\u24B0': // ⒰ [PARENTHESIZED LATIN SMALL LETTER U] 1292 sb.append('('); 1293 sb.append('u'); 1294 sb.append(')'); 1295 break; 1296 case '\u1D6B': // ᵫ [LATIN SMALL LETTER UE] 1297 sb.append('u'); 1298 sb.append('e'); 1299 break; 1300 case '\u01B2': // Ʋ [LATIN CAPITAL LETTER V WITH HOOK] 1301 case '\u0245': // Ʌ [LATIN CAPITAL LETTER TURNED V] 1302 case '\u1D20': // ᴠ [LATIN LETTER SMALL CAPITAL V] 1303 case '\u1E7C': // Ṽ [LATIN CAPITAL LETTER V WITH TILDE] 1304 case '\u1E7E': // Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW] 1305 case '\u1EFC': // Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V] 1306 case '\u24CB': // Ⓥ [CIRCLED LATIN CAPITAL LETTER V] 1307 case '\uA75E': // Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE] 1308 case '\uA768': // Ꝩ [LATIN CAPITAL LETTER VEND] 1309 case '\uFF36': // V [FULLWIDTH LATIN CAPITAL LETTER V] 1310 sb.append('V'); 1311 break; 1312 case '\u028B': // ʋ [LATIN SMALL LETTER V WITH HOOK] 1313 case '\u028C': // ʌ [LATIN SMALL LETTER TURNED V] 1314 case '\u1D65': // ᵥ [LATIN SUBSCRIPT SMALL LETTER V] 1315 case '\u1D8C': // ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK] 1316 case '\u1E7D': // ṽ [LATIN SMALL LETTER V WITH TILDE] 1317 case '\u1E7F': // ṿ [LATIN SMALL LETTER V WITH DOT BELOW] 1318 case '\u24E5': // ⓥ [CIRCLED LATIN SMALL LETTER V] 1319 case '\u2C71': // ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK] 1320 case '\u2C74': // ⱴ [LATIN SMALL LETTER V WITH CURL] 1321 case '\uA75F': // ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE] 1322 case '\uFF56': // v [FULLWIDTH LATIN SMALL LETTER V] 1323 sb.append('v'); 1324 break; 1325 case '\uA760': // Ꝡ [LATIN CAPITAL LETTER VY] 1326 sb.append('V'); 1327 sb.append('Y'); 1328 break; 1329 case '\u24B1': // ⒱ [PARENTHESIZED LATIN SMALL LETTER V] 1330 sb.append('('); 1331 sb.append('v'); 1332 sb.append(')'); 1333 break; 1334 case '\uA761': // ꝡ [LATIN SMALL LETTER VY] 1335 sb.append('v'); 1336 sb.append('y'); 1337 break; 1338 case '\u0174': // Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX] 1339 case '\u01F7': // Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN] 1340 case '\u1D21': // ᴡ [LATIN LETTER SMALL CAPITAL W] 1341 case '\u1E80': // Ẁ [LATIN CAPITAL LETTER W WITH GRAVE] 1342 case '\u1E82': // Ẃ [LATIN CAPITAL LETTER W WITH ACUTE] 1343 case '\u1E84': // Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS] 1344 case '\u1E86': // Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE] 1345 case '\u1E88': // Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW] 1346 case '\u24CC': // Ⓦ [CIRCLED LATIN CAPITAL LETTER W] 1347 case '\u2C72': // Ⱳ [LATIN CAPITAL LETTER W WITH HOOK] 1348 case '\uFF37': // W [FULLWIDTH LATIN CAPITAL LETTER W] 1349 sb.append('W'); 1350 break; 1351 case '\u0175': // ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX] 1352 case '\u01BF': // ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN] 1353 case '\u028D': // ʍ [LATIN SMALL LETTER TURNED W] 1354 case '\u1E81': // ẁ [LATIN SMALL LETTER W WITH GRAVE] 1355 case '\u1E83': // ẃ [LATIN SMALL LETTER W WITH ACUTE] 1356 case '\u1E85': // ẅ [LATIN SMALL LETTER W WITH DIAERESIS] 1357 case '\u1E87': // ẇ [LATIN SMALL LETTER W WITH DOT ABOVE] 1358 case '\u1E89': // ẉ [LATIN SMALL LETTER W WITH DOT BELOW] 1359 case '\u1E98': // ẘ [LATIN SMALL LETTER W WITH RING ABOVE] 1360 case '\u24E6': // ⓦ [CIRCLED LATIN SMALL LETTER W] 1361 case '\u2C73': // ⱳ [LATIN SMALL LETTER W WITH HOOK] 1362 case '\uFF57': // w [FULLWIDTH LATIN SMALL LETTER W] 1363 sb.append('w'); 1364 break; 1365 case '\u24B2': // ⒲ [PARENTHESIZED LATIN SMALL LETTER W] 1366 sb.append('('); 1367 sb.append('w'); 1368 sb.append(')'); 1369 break; 1370 case '\u1E8A': // Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE] 1371 case '\u1E8C': // Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS] 1372 case '\u24CD': // Ⓧ [CIRCLED LATIN CAPITAL LETTER X] 1373 case '\uFF38': // X [FULLWIDTH LATIN CAPITAL LETTER X] 1374 sb.append('X'); 1375 break; 1376 case '\u1D8D': // ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK] 1377 case '\u1E8B': // ẋ [LATIN SMALL LETTER X WITH DOT ABOVE] 1378 case '\u1E8D': // ẍ [LATIN SMALL LETTER X WITH DIAERESIS] 1379 case '\u2093': // ₓ [LATIN SUBSCRIPT SMALL LETTER X] 1380 case '\u24E7': // ⓧ [CIRCLED LATIN SMALL LETTER X] 1381 case '\uFF58': // x [FULLWIDTH LATIN SMALL LETTER X] 1382 sb.append('x'); 1383 break; 1384 case '\u24B3': // ⒳ [PARENTHESIZED LATIN SMALL LETTER X] 1385 sb.append('('); 1386 sb.append('x'); 1387 sb.append(')'); 1388 break; 1389 case '\u00DD': // Ý [LATIN CAPITAL LETTER Y WITH ACUTE] 1390 case '\u0176': // Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX] 1391 case '\u0178': // Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS] 1392 case '\u01B3': // Ƴ [LATIN CAPITAL LETTER Y WITH HOOK] 1393 case '\u0232': // Ȳ [LATIN CAPITAL LETTER Y WITH MACRON] 1394 case '\u024E': // Ɏ [LATIN CAPITAL LETTER Y WITH STROKE] 1395 case '\u028F': // ʏ [LATIN LETTER SMALL CAPITAL Y] 1396 case '\u1E8E': // Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE] 1397 case '\u1EF2': // Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE] 1398 case '\u1EF4': // Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW] 1399 case '\u1EF6': // Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE] 1400 case '\u1EF8': // Ỹ [LATIN CAPITAL LETTER Y WITH TILDE] 1401 case '\u1EFE': // Ỿ [LATIN CAPITAL LETTER Y WITH LOOP] 1402 case '\u24CE': // Ⓨ [CIRCLED LATIN CAPITAL LETTER Y] 1403 case '\uFF39': // Y [FULLWIDTH LATIN CAPITAL LETTER Y] 1404 sb.append('Y'); 1405 break; 1406 case '\u00FD': // ý [LATIN SMALL LETTER Y WITH ACUTE] 1407 case '\u00FF': // ÿ [LATIN SMALL LETTER Y WITH DIAERESIS] 1408 case '\u0177': // ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX] 1409 case '\u01B4': // ƴ [LATIN SMALL LETTER Y WITH HOOK] 1410 case '\u0233': // ȳ [LATIN SMALL LETTER Y WITH MACRON] 1411 case '\u024F': // ɏ [LATIN SMALL LETTER Y WITH STROKE] 1412 case '\u028E': // ʎ [LATIN SMALL LETTER TURNED Y] 1413 case '\u1E8F': // ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE] 1414 case '\u1E99': // ẙ [LATIN SMALL LETTER Y WITH RING ABOVE] 1415 case '\u1EF3': // ỳ [LATIN SMALL LETTER Y WITH GRAVE] 1416 case '\u1EF5': // ỵ [LATIN SMALL LETTER Y WITH DOT BELOW] 1417 case '\u1EF7': // ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE] 1418 case '\u1EF9': // ỹ [LATIN SMALL LETTER Y WITH TILDE] 1419 case '\u1EFF': // ỿ [LATIN SMALL LETTER Y WITH LOOP] 1420 case '\u24E8': // ⓨ [CIRCLED LATIN SMALL LETTER Y] 1421 case '\uFF59': // y [FULLWIDTH LATIN SMALL LETTER Y] 1422 sb.append('y'); 1423 break; 1424 case '\u24B4': // ⒴ [PARENTHESIZED LATIN SMALL LETTER Y] 1425 sb.append('('); 1426 sb.append('y'); 1427 sb.append(')'); 1428 break; 1429 case '\u0179': // Ź [LATIN CAPITAL LETTER Z WITH ACUTE] 1430 case '\u017B': // Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE] 1431 case '\u017D': // Ž [LATIN CAPITAL LETTER Z WITH CARON] 1432 case '\u01B5': // Ƶ [LATIN CAPITAL LETTER Z WITH STROKE] 1433 case '\u021C': // Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH] 1434 case '\u0224': // Ȥ [LATIN CAPITAL LETTER Z WITH HOOK] 1435 case '\u1D22': // ᴢ [LATIN LETTER SMALL CAPITAL Z] 1436 case '\u1E90': // Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX] 1437 case '\u1E92': // Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW] 1438 case '\u1E94': // Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW] 1439 case '\u24CF': // Ⓩ [CIRCLED LATIN CAPITAL LETTER Z] 1440 case '\u2C6B': // Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER] 1441 case '\uA762': // Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z] 1442 case '\uFF3A': // Z [FULLWIDTH LATIN CAPITAL LETTER Z] 1443 sb.append('Z'); 1444 break; 1445 case '\u017A': // ź [LATIN SMALL LETTER Z WITH ACUTE] 1446 case '\u017C': // ż [LATIN SMALL LETTER Z WITH DOT ABOVE] 1447 case '\u017E': // ž [LATIN SMALL LETTER Z WITH CARON] 1448 case '\u01B6': // ƶ [LATIN SMALL LETTER Z WITH STROKE] 1449 case '\u021D': // ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH] 1450 case '\u0225': // ȥ [LATIN SMALL LETTER Z WITH HOOK] 1451 case '\u0240': // ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL] 1452 case '\u0290': // ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK] 1453 case '\u0291': // ʑ [LATIN SMALL LETTER Z WITH CURL] 1454 case '\u1D76': // ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE] 1455 case '\u1D8E': // ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK] 1456 case '\u1E91': // ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX] 1457 case '\u1E93': // ẓ [LATIN SMALL LETTER Z WITH DOT BELOW] 1458 case '\u1E95': // ẕ [LATIN SMALL LETTER Z WITH LINE BELOW] 1459 case '\u24E9': // ⓩ [CIRCLED LATIN SMALL LETTER Z] 1460 case '\u2C6C': // ⱬ [LATIN SMALL LETTER Z WITH DESCENDER] 1461 case '\uA763': // ꝣ [LATIN SMALL LETTER VISIGOTHIC Z] 1462 case '\uFF5A': // z [FULLWIDTH LATIN SMALL LETTER Z] 1463 sb.append('z'); 1464 break; 1465 case '\u24B5': // ⒵ [PARENTHESIZED LATIN SMALL LETTER Z] 1466 sb.append('('); 1467 sb.append('z'); 1468 sb.append(')'); 1469 break; 1470 case '\u2070': // ⁰ [SUPERSCRIPT ZERO] 1471 case '\u2080': // ₀ [SUBSCRIPT ZERO] 1472 case '\u24EA': // ⓪ [CIRCLED DIGIT ZERO] 1473 case '\u24FF': // ⓿ [NEGATIVE CIRCLED DIGIT ZERO] 1474 case '\uFF10': // 0 [FULLWIDTH DIGIT ZERO] 1475 sb.append('0'); 1476 break; 1477 case '\u00B9': // ¹ [SUPERSCRIPT ONE] 1478 case '\u2081': // ₁ [SUBSCRIPT ONE] 1479 case '\u2460': // ① [CIRCLED DIGIT ONE] 1480 case '\u24F5': // ⓵ [DOUBLE CIRCLED DIGIT ONE] 1481 case '\u2776': // ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE] 1482 case '\u2780': // ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE] 1483 case '\u278A': // ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE] 1484 case '\uFF11': // 1 [FULLWIDTH DIGIT ONE] 1485 sb.append('1'); 1486 break; 1487 case '\u2488': // ⒈ [DIGIT ONE FULL STOP] 1488 sb.append('1'); 1489 sb.append('.'); 1490 break; 1491 case '\u2474': // ⑴ [PARENTHESIZED DIGIT ONE] 1492 sb.append('('); 1493 sb.append('1'); 1494 sb.append(')'); 1495 break; 1496 case '\u00B2': // ² [SUPERSCRIPT TWO] 1497 case '\u2082': // ₂ [SUBSCRIPT TWO] 1498 case '\u2461': // ② [CIRCLED DIGIT TWO] 1499 case '\u24F6': // ⓶ [DOUBLE CIRCLED DIGIT TWO] 1500 case '\u2777': // ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO] 1501 case '\u2781': // ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO] 1502 case '\u278B': // ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO] 1503 case '\uFF12': // 2 [FULLWIDTH DIGIT TWO] 1504 sb.append('2'); 1505 break; 1506 case '\u2489': // ⒉ [DIGIT TWO FULL STOP] 1507 sb.append('2'); 1508 sb.append('.'); 1509 break; 1510 case '\u2475': // ⑵ [PARENTHESIZED DIGIT TWO] 1511 sb.append('('); 1512 sb.append('2'); 1513 sb.append(')'); 1514 break; 1515 case '\u00B3': // ³ [SUPERSCRIPT THREE] 1516 case '\u2083': // ₃ [SUBSCRIPT THREE] 1517 case '\u2462': // ③ [CIRCLED DIGIT THREE] 1518 case '\u24F7': // ⓷ [DOUBLE CIRCLED DIGIT THREE] 1519 case '\u2778': // ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE] 1520 case '\u2782': // ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE] 1521 case '\u278C': // ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE] 1522 case '\uFF13': // 3 [FULLWIDTH DIGIT THREE] 1523 sb.append('3'); 1524 break; 1525 case '\u248A': // ⒊ [DIGIT THREE FULL STOP] 1526 sb.append('3'); 1527 sb.append('.'); 1528 break; 1529 case '\u2476': // ⑶ [PARENTHESIZED DIGIT THREE] 1530 sb.append('('); 1531 sb.append('3'); 1532 sb.append(')'); 1533 break; 1534 case '\u2074': // ⁴ [SUPERSCRIPT FOUR] 1535 case '\u2084': // ₄ [SUBSCRIPT FOUR] 1536 case '\u2463': // ④ [CIRCLED DIGIT FOUR] 1537 case '\u24F8': // ⓸ [DOUBLE CIRCLED DIGIT FOUR] 1538 case '\u2779': // ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR] 1539 case '\u2783': // ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR] 1540 case '\u278D': // ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR] 1541 case '\uFF14': // 4 [FULLWIDTH DIGIT FOUR] 1542 sb.append('4'); 1543 break; 1544 case '\u248B': // ⒋ [DIGIT FOUR FULL STOP] 1545 sb.append('4'); 1546 sb.append('.'); 1547 break; 1548 case '\u2477': // ⑷ [PARENTHESIZED DIGIT FOUR] 1549 sb.append('('); 1550 sb.append('4'); 1551 sb.append(')'); 1552 break; 1553 case '\u2075': // ⁵ [SUPERSCRIPT FIVE] 1554 case '\u2085': // ₅ [SUBSCRIPT FIVE] 1555 case '\u2464': // ⑤ [CIRCLED DIGIT FIVE] 1556 case '\u24F9': // ⓹ [DOUBLE CIRCLED DIGIT FIVE] 1557 case '\u277A': // ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE] 1558 case '\u2784': // ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE] 1559 case '\u278E': // ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE] 1560 case '\uFF15': // 5 [FULLWIDTH DIGIT FIVE] 1561 sb.append('5'); 1562 break; 1563 case '\u248C': // ⒌ [DIGIT FIVE FULL STOP] 1564 sb.append('5'); 1565 sb.append('.'); 1566 break; 1567 case '\u2478': // ⑸ [PARENTHESIZED DIGIT FIVE] 1568 sb.append('('); 1569 sb.append('5'); 1570 sb.append(')'); 1571 break; 1572 case '\u2076': // ⁶ [SUPERSCRIPT SIX] 1573 case '\u2086': // ₆ [SUBSCRIPT SIX] 1574 case '\u2465': // ⑥ [CIRCLED DIGIT SIX] 1575 case '\u24FA': // ⓺ [DOUBLE CIRCLED DIGIT SIX] 1576 case '\u277B': // ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX] 1577 case '\u2785': // ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX] 1578 case '\u278F': // ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX] 1579 case '\uFF16': // 6 [FULLWIDTH DIGIT SIX] 1580 sb.append('6'); 1581 break; 1582 case '\u248D': // ⒍ [DIGIT SIX FULL STOP] 1583 sb.append('6'); 1584 sb.append('.'); 1585 break; 1586 case '\u2479': // ⑹ [PARENTHESIZED DIGIT SIX] 1587 sb.append('('); 1588 sb.append('6'); 1589 sb.append(')'); 1590 break; 1591 case '\u2077': // ⁷ [SUPERSCRIPT SEVEN] 1592 case '\u2087': // ₇ [SUBSCRIPT SEVEN] 1593 case '\u2466': // ⑦ [CIRCLED DIGIT SEVEN] 1594 case '\u24FB': // ⓻ [DOUBLE CIRCLED DIGIT SEVEN] 1595 case '\u277C': // ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN] 1596 case '\u2786': // ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN] 1597 case '\u2790': // ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN] 1598 case '\uFF17': // 7 [FULLWIDTH DIGIT SEVEN] 1599 sb.append('7'); 1600 break; 1601 case '\u248E': // ⒎ [DIGIT SEVEN FULL STOP] 1602 sb.append('7'); 1603 sb.append('.'); 1604 break; 1605 case '\u247A': // ⑺ [PARENTHESIZED DIGIT SEVEN] 1606 sb.append('('); 1607 sb.append('7'); 1608 sb.append(')'); 1609 break; 1610 case '\u2078': // ⁸ [SUPERSCRIPT EIGHT] 1611 case '\u2088': // ₈ [SUBSCRIPT EIGHT] 1612 case '\u2467': // ⑧ [CIRCLED DIGIT EIGHT] 1613 case '\u24FC': // ⓼ [DOUBLE CIRCLED DIGIT EIGHT] 1614 case '\u277D': // ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT] 1615 case '\u2787': // ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT] 1616 case '\u2791': // ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT] 1617 case '\uFF18': // 8 [FULLWIDTH DIGIT EIGHT] 1618 sb.append('8'); 1619 break; 1620 case '\u248F': // ⒏ [DIGIT EIGHT FULL STOP] 1621 sb.append('8'); 1622 sb.append('.'); 1623 break; 1624 case '\u247B': // ⑻ [PARENTHESIZED DIGIT EIGHT] 1625 sb.append('('); 1626 sb.append('8'); 1627 sb.append(')'); 1628 break; 1629 case '\u2079': // ⁹ [SUPERSCRIPT NINE] 1630 case '\u2089': // ₉ [SUBSCRIPT NINE] 1631 case '\u2468': // ⑨ [CIRCLED DIGIT NINE] 1632 case '\u24FD': // ⓽ [DOUBLE CIRCLED DIGIT NINE] 1633 case '\u277E': // ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE] 1634 case '\u2788': // ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE] 1635 case '\u2792': // ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE] 1636 case '\uFF19': // 9 [FULLWIDTH DIGIT NINE] 1637 sb.append('9'); 1638 break; 1639 case '\u2490': // ⒐ [DIGIT NINE FULL STOP] 1640 sb.append('9'); 1641 sb.append('.'); 1642 break; 1643 case '\u247C': // ⑼ [PARENTHESIZED DIGIT NINE] 1644 sb.append('('); 1645 sb.append('9'); 1646 sb.append(')'); 1647 break; 1648 case '\u2469': // ⑩ [CIRCLED NUMBER TEN] 1649 case '\u24FE': // ⓾ [DOUBLE CIRCLED NUMBER TEN] 1650 case '\u277F': // ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN] 1651 case '\u2789': // ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN] 1652 case '\u2793': // ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN] 1653 sb.append('1'); 1654 sb.append('0'); 1655 break; 1656 case '\u2491': // ⒑ [NUMBER TEN FULL STOP] 1657 sb.append('1'); 1658 sb.append('0'); 1659 sb.append('.'); 1660 break; 1661 case '\u247D': // ⑽ [PARENTHESIZED NUMBER TEN] 1662 sb.append('('); 1663 sb.append('1'); 1664 sb.append('0'); 1665 sb.append(')'); 1666 break; 1667 case '\u246A': // ⑪ [CIRCLED NUMBER ELEVEN] 1668 case '\u24EB': // ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN] 1669 sb.append('1'); 1670 sb.append('1'); 1671 break; 1672 case '\u2492': // ⒒ [NUMBER ELEVEN FULL STOP] 1673 sb.append('1'); 1674 sb.append('1'); 1675 sb.append('.'); 1676 break; 1677 case '\u247E': // ⑾ [PARENTHESIZED NUMBER ELEVEN] 1678 sb.append('('); 1679 sb.append('1'); 1680 sb.append('1'); 1681 sb.append(')'); 1682 break; 1683 case '\u246B': // ⑫ [CIRCLED NUMBER TWELVE] 1684 case '\u24EC': // ⓬ [NEGATIVE CIRCLED NUMBER TWELVE] 1685 sb.append('1'); 1686 sb.append('2'); 1687 break; 1688 case '\u2493': // ⒓ [NUMBER TWELVE FULL STOP] 1689 sb.append('1'); 1690 sb.append('2'); 1691 sb.append('.'); 1692 break; 1693 case '\u247F': // ⑿ [PARENTHESIZED NUMBER TWELVE] 1694 sb.append('('); 1695 sb.append('1'); 1696 sb.append('2'); 1697 sb.append(')'); 1698 break; 1699 case '\u246C': // ⑬ [CIRCLED NUMBER THIRTEEN] 1700 case '\u24ED': // ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN] 1701 sb.append('1'); 1702 sb.append('3'); 1703 break; 1704 case '\u2494': // ⒔ [NUMBER THIRTEEN FULL STOP] 1705 sb.append('1'); 1706 sb.append('3'); 1707 sb.append('.'); 1708 break; 1709 case '\u2480': // ⒀ [PARENTHESIZED NUMBER THIRTEEN] 1710 sb.append('('); 1711 sb.append('1'); 1712 sb.append('3'); 1713 sb.append(')'); 1714 break; 1715 case '\u246D': // ⑭ [CIRCLED NUMBER FOURTEEN] 1716 case '\u24EE': // ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN] 1717 sb.append('1'); 1718 sb.append('4'); 1719 break; 1720 case '\u2495': // ⒕ [NUMBER FOURTEEN FULL STOP] 1721 sb.append('1'); 1722 sb.append('4'); 1723 sb.append('.'); 1724 break; 1725 case '\u2481': // ⒁ [PARENTHESIZED NUMBER FOURTEEN] 1726 sb.append('('); 1727 sb.append('1'); 1728 sb.append('4'); 1729 sb.append(')'); 1730 break; 1731 case '\u246E': // ⑮ [CIRCLED NUMBER FIFTEEN] 1732 case '\u24EF': // ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN] 1733 sb.append('1'); 1734 sb.append('5'); 1735 break; 1736 case '\u2496': // ⒖ [NUMBER FIFTEEN FULL STOP] 1737 sb.append('1'); 1738 sb.append('5'); 1739 sb.append('.'); 1740 break; 1741 case '\u2482': // ⒂ [PARENTHESIZED NUMBER FIFTEEN] 1742 sb.append('('); 1743 sb.append('1'); 1744 sb.append('5'); 1745 sb.append(')'); 1746 break; 1747 case '\u246F': // ⑯ [CIRCLED NUMBER SIXTEEN] 1748 case '\u24F0': // ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN] 1749 sb.append('1'); 1750 sb.append('6'); 1751 break; 1752 case '\u2497': // ⒗ [NUMBER SIXTEEN FULL STOP] 1753 sb.append('1'); 1754 sb.append('6'); 1755 sb.append('.'); 1756 break; 1757 case '\u2483': // ⒃ [PARENTHESIZED NUMBER SIXTEEN] 1758 sb.append('('); 1759 sb.append('1'); 1760 sb.append('6'); 1761 sb.append(')'); 1762 break; 1763 case '\u2470': // ⑰ [CIRCLED NUMBER SEVENTEEN] 1764 case '\u24F1': // ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN] 1765 sb.append('1'); 1766 sb.append('7'); 1767 break; 1768 case '\u2498': // ⒘ [NUMBER SEVENTEEN FULL STOP] 1769 sb.append('1'); 1770 sb.append('7'); 1771 sb.append('.'); 1772 break; 1773 case '\u2484': // ⒄ [PARENTHESIZED NUMBER SEVENTEEN] 1774 sb.append('('); 1775 sb.append('1'); 1776 sb.append('7'); 1777 sb.append(')'); 1778 break; 1779 case '\u2471': // ⑱ [CIRCLED NUMBER EIGHTEEN] 1780 case '\u24F2': // ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN] 1781 sb.append('1'); 1782 sb.append('8'); 1783 break; 1784 case '\u2499': // ⒙ [NUMBER EIGHTEEN FULL STOP] 1785 sb.append('1'); 1786 sb.append('8'); 1787 sb.append('.'); 1788 break; 1789 case '\u2485': // ⒅ [PARENTHESIZED NUMBER EIGHTEEN] 1790 sb.append('('); 1791 sb.append('1'); 1792 sb.append('8'); 1793 sb.append(')'); 1794 break; 1795 case '\u2472': // ⑲ [CIRCLED NUMBER NINETEEN] 1796 case '\u24F3': // ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN] 1797 sb.append('1'); 1798 sb.append('9'); 1799 break; 1800 case '\u249A': // ⒚ [NUMBER NINETEEN FULL STOP] 1801 sb.append('1'); 1802 sb.append('9'); 1803 sb.append('.'); 1804 break; 1805 case '\u2486': // ⒆ [PARENTHESIZED NUMBER NINETEEN] 1806 sb.append('('); 1807 sb.append('1'); 1808 sb.append('9'); 1809 sb.append(')'); 1810 break; 1811 case '\u2473': // ⑳ [CIRCLED NUMBER TWENTY] 1812 case '\u24F4': // ⓴ [NEGATIVE CIRCLED NUMBER TWENTY] 1813 sb.append('2'); 1814 sb.append('0'); 1815 break; 1816 case '\u249B': // ⒛ [NUMBER TWENTY FULL STOP] 1817 sb.append('2'); 1818 sb.append('0'); 1819 sb.append('.'); 1820 break; 1821 case '\u2487': // ⒇ [PARENTHESIZED NUMBER TWENTY] 1822 sb.append('('); 1823 sb.append('2'); 1824 sb.append('0'); 1825 sb.append(')'); 1826 break; 1827 case '\u00AB': // « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK] 1828 case '\u00BB': // » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK] 1829 case '\u201C': // “ [LEFT DOUBLE QUOTATION MARK] 1830 case '\u201D': // ” [RIGHT DOUBLE QUOTATION MARK] 1831 case '\u201E': // „ [DOUBLE LOW-9 QUOTATION MARK] 1832 case '\u2033': // ″ [DOUBLE PRIME] 1833 case '\u2036': // ‶ [REVERSED DOUBLE PRIME] 1834 case '\u275D': // ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT] 1835 case '\u275E': // ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT] 1836 case '\u276E': // ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT] 1837 case '\u276F': // ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT] 1838 case '\uFF02': // " [FULLWIDTH QUOTATION MARK] 1839 sb.append('"'); 1840 break; 1841 case '\u2018': // ‘ [LEFT SINGLE QUOTATION MARK] 1842 case '\u2019': // ’ [RIGHT SINGLE QUOTATION MARK] 1843 case '\u201A': // ‚ [SINGLE LOW-9 QUOTATION MARK] 1844 case '\u201B': // ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK] 1845 case '\u2032': // ′ [PRIME] 1846 case '\u2035': // ‵ [REVERSED PRIME] 1847 case '\u2039': // ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK] 1848 case '\u203A': // › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK] 1849 case '\u275B': // ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT] 1850 case '\u275C': // ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT] 1851 case '\uFF07': // ' [FULLWIDTH APOSTROPHE] 1852 sb.append('\''); 1853 break; 1854 case '\u2010': // ‐ [HYPHEN] 1855 case '\u2011': // ‑ [NON-BREAKING HYPHEN] 1856 case '\u2012': // ‒ [FIGURE DASH] 1857 case '\u2013': // – [EN DASH] 1858 case '\u2014': // — [EM DASH] 1859 case '\u207B': // ⁻ [SUPERSCRIPT MINUS] 1860 case '\u208B': // ₋ [SUBSCRIPT MINUS] 1861 case '\uFF0D': // - [FULLWIDTH HYPHEN-MINUS] 1862 sb.append('-'); 1863 break; 1864 case '\u2045': // ⁅ [LEFT SQUARE BRACKET WITH QUILL] 1865 case '\u2772': // ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT] 1866 case '\uFF3B': // [ [FULLWIDTH LEFT SQUARE BRACKET] 1867 sb.append('['); 1868 break; 1869 case '\u2046': // ⁆ [RIGHT SQUARE BRACKET WITH QUILL] 1870 case '\u2773': // ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT] 1871 case '\uFF3D': // ] [FULLWIDTH RIGHT SQUARE BRACKET] 1872 sb.append(']'); 1873 break; 1874 case '\u207D': // ⁽ [SUPERSCRIPT LEFT PARENTHESIS] 1875 case '\u208D': // ₍ [SUBSCRIPT LEFT PARENTHESIS] 1876 case '\u2768': // ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT] 1877 case '\u276A': // ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT] 1878 case '\uFF08': // ( [FULLWIDTH LEFT PARENTHESIS] 1879 sb.append('('); 1880 break; 1881 case '\u2E28': // ⸨ [LEFT DOUBLE PARENTHESIS] 1882 sb.append('('); 1883 sb.append('('); 1884 break; 1885 case '\u207E': // ⁾ [SUPERSCRIPT RIGHT PARENTHESIS] 1886 case '\u208E': // ₎ [SUBSCRIPT RIGHT PARENTHESIS] 1887 case '\u2769': // ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT] 1888 case '\u276B': // ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT] 1889 case '\uFF09': // ) [FULLWIDTH RIGHT PARENTHESIS] 1890 sb.append(')'); 1891 break; 1892 case '\u2E29': // ⸩ [RIGHT DOUBLE PARENTHESIS] 1893 sb.append(')'); 1894 sb.append(')'); 1895 break; 1896 case '\u276C': // ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT] 1897 case '\u2770': // ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT] 1898 case '\uFF1C': // < [FULLWIDTH LESS-THAN SIGN] 1899 sb.append('<'); 1900 break; 1901 case '\u276D': // ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT] 1902 case '\u2771': // ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT] 1903 case '\uFF1E': // > [FULLWIDTH GREATER-THAN SIGN] 1904 sb.append('>'); 1905 break; 1906 case '\u2774': // ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT] 1907 case '\uFF5B': // { [FULLWIDTH LEFT CURLY BRACKET] 1908 sb.append('{'); 1909 break; 1910 case '\u2775': // ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT] 1911 case '\uFF5D': // } [FULLWIDTH RIGHT CURLY BRACKET] 1912 sb.append('}'); 1913 break; 1914 case '\u207A': // ⁺ [SUPERSCRIPT PLUS SIGN] 1915 case '\u208A': // ₊ [SUBSCRIPT PLUS SIGN] 1916 case '\uFF0B': // + [FULLWIDTH PLUS SIGN] 1917 sb.append('+'); 1918 break; 1919 case '\u207C': // ⁼ [SUPERSCRIPT EQUALS SIGN] 1920 case '\u208C': // ₌ [SUBSCRIPT EQUALS SIGN] 1921 case '\uFF1D': // = [FULLWIDTH EQUALS SIGN] 1922 sb.append('='); 1923 break; 1924 case '\uFF01': // ! [FULLWIDTH EXCLAMATION MARK] 1925 sb.append('!'); 1926 break; 1927 case '\u203C': // ‼ [DOUBLE EXCLAMATION MARK] 1928 sb.append('!'); 1929 sb.append('!'); 1930 break; 1931 case '\u2049': // ⁉ [EXCLAMATION QUESTION MARK] 1932 sb.append('!'); 1933 sb.append('?'); 1934 break; 1935 case '\uFF03': // # [FULLWIDTH NUMBER SIGN] 1936 sb.append('#'); 1937 break; 1938 case '\uFF04': // $ [FULLWIDTH DOLLAR SIGN] 1939 sb.append('$'); 1940 break; 1941 case '\u2052': // ⁒ [COMMERCIAL MINUS SIGN] 1942 case '\uFF05': // % [FULLWIDTH PERCENT SIGN] 1943 sb.append('%'); 1944 break; 1945 case '\uFF06': // & [FULLWIDTH AMPERSAND] 1946 sb.append('&'); 1947 break; 1948 case '\u204E': // ⁎ [LOW ASTERISK] 1949 case '\uFF0A': // * [FULLWIDTH ASTERISK] 1950 sb.append('*'); 1951 break; 1952 case '\uFF0C': // , [FULLWIDTH COMMA] 1953 sb.append(','); 1954 break; 1955 case '\uFF0E': // . [FULLWIDTH FULL STOP] 1956 sb.append('.'); 1957 break; 1958 case '\u2044': // ⁄ [FRACTION SLASH] 1959 case '\uFF0F': // / [FULLWIDTH SOLIDUS] 1960 sb.append('/'); 1961 break; 1962 case '\uFF1A': // : [FULLWIDTH COLON] 1963 sb.append(':'); 1964 break; 1965 case '\u204F': // ⁏ [REVERSED SEMICOLON] 1966 case '\uFF1B': // ; [FULLWIDTH SEMICOLON] 1967 sb.append(';'); 1968 break; 1969 case '\uFF1F': // ? [FULLWIDTH QUESTION MARK] 1970 sb.append('?'); 1971 break; 1972 case '\u2047': // ⁇ [DOUBLE QUESTION MARK] 1973 sb.append('?'); 1974 sb.append('?'); 1975 break; 1976 case '\u2048': // ⁈ [QUESTION EXCLAMATION MARK] 1977 sb.append('?'); 1978 sb.append('!'); 1979 break; 1980 case '\uFF20': // @ [FULLWIDTH COMMERCIAL AT] 1981 sb.append('@'); 1982 break; 1983 case '\uFF3C': // \ [FULLWIDTH REVERSE SOLIDUS] 1984 sb.append('\\'); 1985 break; 1986 case '\u2038': // ‸ [CARET] 1987 case '\uFF3E': // ^ [FULLWIDTH CIRCUMFLEX ACCENT] 1988 sb.append('^'); 1989 break; 1990 case '\uFF3F': // _ [FULLWIDTH LOW LINE] 1991 sb.append('_'); 1992 break; 1993 case '\u2053': // ⁓ [SWUNG DASH] 1994 case '\uFF5E': // ~ [FULLWIDTH TILDE] 1995 sb.append('~'); 1996 break; 1997 default: 1998 sb.append(c); 1999 break; 2000 } 2001 } 2002 } 2003}