001/* 002 * Copyright 2020 Global Biodiversity Information Facility (GBIF) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.gbif.api.vocabulary; 017 018import java.io.IOException; 019import java.util.Arrays; 020import java.util.Collections; 021import java.util.List; 022import java.util.Locale; 023 024import org.apache.commons.lang3.StringUtils; 025 026import com.fasterxml.jackson.annotation.JsonValue; 027import com.fasterxml.jackson.core.JsonGenerator; 028import com.fasterxml.jackson.core.JsonParser; 029import com.fasterxml.jackson.databind.DeserializationContext; 030import com.fasterxml.jackson.databind.JsonDeserializer; 031import com.fasterxml.jackson.databind.JsonSerializer; 032import com.fasterxml.jackson.databind.KeyDeserializer; 033import com.fasterxml.jackson.databind.SerializerProvider; 034import com.fasterxml.jackson.databind.annotation.JsonDeserialize; 035import com.fasterxml.jackson.databind.annotation.JsonSerialize; 036 037/** 038 * Enumeration for all ISO 639-1 language codes using 2 lower case letters. The enumeration maps to 039 * 3 letter codes and Locales. 040 * 041 * @see <a href="http://en.wikipedia.org/wiki/ISO_639">Wikipedia on ISO-639</a> 042 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/util/Locale.html">Locale javadoc</a> 043 */ 044@JsonSerialize(using = Language.IsoSerializer.class, keyUsing = Language.IsoKeySerializer.class) 045@JsonDeserialize( 046 using = Language.LenientDeserializer.class, 047 keyUsing = Language.LenientKeyDeserializer.class) 048public enum Language { 049 050 /** Abkhazian. */ 051 ABKHAZIAN("ab"), 052 053 /** Afar. */ 054 AFAR("aa"), 055 056 /** Afrikaans. */ 057 AFRIKAANS("af"), 058 059 /** Akan. */ 060 AKAN("ak"), 061 062 /** Albanian. */ 063 ALBANIAN("sq"), 064 065 /** Amharic. */ 066 AMHARIC("am"), 067 068 /** Arabic. */ 069 ARABIC("ar"), 070 071 /** Aragonese. */ 072 ARAGONESE("an"), 073 074 /** Armenian. */ 075 ARMENIAN("hy"), 076 077 /** Assamese. */ 078 ASSAMESE("as"), 079 080 /** Avaric. */ 081 AVARIC("av"), 082 083 /** Avestan. */ 084 AVESTAN("ae"), 085 086 /** Aymara. */ 087 AYMARA("ay"), 088 089 /** Azerbaijani. */ 090 AZERBAIJANI("az"), 091 092 /** Bambara. */ 093 BAMBARA("bm"), 094 095 /** Bashkir. */ 096 BASHKIR("ba"), 097 098 /** Basque. */ 099 BASQUE("eu"), 100 101 /** Belarusian. */ 102 BELARUSIAN("be"), 103 104 /** Bengali. */ 105 BENGALI("bn"), 106 107 /** Bihari. */ 108 BIHARI("bh"), 109 110 /** Bislama. */ 111 BISLAMA("bi"), 112 113 /** Bosnian. */ 114 BOSNIAN("bs"), 115 116 /** Breton. */ 117 BRETON("br"), 118 119 /** Bulgarian. */ 120 BULGARIAN("bg"), 121 122 /** Burmese. */ 123 BURMESE("my"), 124 125 /** Catalan. */ 126 CATALAN("ca"), 127 128 /** Chamorro. */ 129 CHAMORRO("ch"), 130 131 /** Chechen. */ 132 CHECHEN("ce"), 133 134 /** Chinese. */ 135 CHINESE("zh"), 136 137 /** Church Slavic. */ 138 CHURCH_SLAVIC("cu"), 139 140 /** Chuvash. */ 141 CHUVASH("cv"), 142 143 /** Cornish. */ 144 CORNISH("kw"), 145 146 /** Corsican. */ 147 CORSICAN("co"), 148 149 /** Cree. */ 150 CREE("cr"), 151 152 /** Croatian. */ 153 CROATIAN("hr"), 154 155 /** Czech. */ 156 CZECH("cs"), 157 158 /** Danish. */ 159 DANISH("da"), 160 161 /** Divehi. */ 162 DIVEHI("dv"), 163 164 /** Dutch. */ 165 DUTCH("nl"), 166 167 /** Dzongkha. */ 168 DZONGKHA("dz"), 169 170 /** English. */ 171 ENGLISH("en"), 172 173 /** Esperanto. */ 174 ESPERANTO("eo"), 175 176 /** Estonian. */ 177 ESTONIAN("et"), 178 179 /** Ewe. */ 180 EWE("ee"), 181 182 /** Faroese. */ 183 FAROESE("fo"), 184 185 /** Fijian. */ 186 FIJIAN("fj"), 187 188 /** Finnish. */ 189 FINNISH("fi"), 190 191 /** French. */ 192 FRENCH("fr"), 193 194 /** Frisian. */ 195 FRISIAN("fy"), 196 197 /** Fulah. */ 198 FULAH("ff"), 199 200 /** Gallegan. */ 201 GALLEGAN("gl"), 202 203 /** Ganda. */ 204 GANDA("lg"), 205 206 /** Georgian. */ 207 GEORGIAN("ka"), 208 209 /** German. */ 210 GERMAN("de"), 211 212 /** Greek. */ 213 GREEK("el"), 214 215 /** Greenlandic. */ 216 GREENLANDIC("kl"), 217 218 /** Guarani. */ 219 GUARANI("gn"), 220 221 /** Gujarati. */ 222 GUJARATI("gu"), 223 224 /** Haitian. */ 225 HAITIAN("ht"), 226 227 /** Hausa. */ 228 HAUSA("ha"), 229 230 /** Hebrew. */ 231 HEBREW("he"), 232 233 /** Herero. */ 234 HERERO("hz"), 235 236 /** Hindi. */ 237 HINDI("hi"), 238 239 /** Hiri Motu. */ 240 HIRI_MOTU("ho"), 241 242 /** Hungarian. */ 243 HUNGARIAN("hu"), 244 245 /** Icelandic. */ 246 ICELANDIC("is"), 247 248 /** Ido. */ 249 IDO("io"), 250 251 /** Igbo. */ 252 IGBO("ig"), 253 254 /** Indonesian. */ 255 INDONESIAN("id"), 256 257 /** Interlingua. */ 258 INTERLINGUA("ia"), 259 260 /** Interlingue. */ 261 INTERLINGUE("ie"), 262 263 /** Inuktitut. */ 264 INUKTITUT("iu"), 265 266 /** Inupiaq. */ 267 INUPIAQ("ik"), 268 269 /** Irish. */ 270 IRISH("ga"), 271 272 /** Italian. */ 273 ITALIAN("it"), 274 275 /** Japanese. */ 276 JAPANESE("ja"), 277 278 /** Javanese. */ 279 JAVANESE("jv"), 280 281 /** Kannada. */ 282 KANNADA("kn"), 283 284 /** Kanuri. */ 285 KANURI("kr"), 286 287 /** Kashmiri. */ 288 KASHMIRI("ks"), 289 290 /** Kazakh. */ 291 KAZAKH("kk"), 292 293 /** Khmer. */ 294 KHMER("km"), 295 296 /** Kikuyu. */ 297 KIKUYU("ki"), 298 299 /** Kinyarwanda. */ 300 KINYARWANDA("rw"), 301 302 /** Kirghiz. */ 303 KIRGHIZ("ky"), 304 305 /** Komi. */ 306 KOMI("kv"), 307 308 /** Kongo. */ 309 KONGO("kg"), 310 311 /** Korean. */ 312 KOREAN("ko"), 313 314 /** Kurdish. */ 315 KURDISH("ku"), 316 317 /** Kwanyama. */ 318 KWANYAMA("kj"), 319 320 /** Lao. */ 321 LAO("lo"), 322 323 /** Latin. */ 324 LATIN("la"), 325 326 /** Latvian. */ 327 LATVIAN("lv"), 328 329 /** Limburgish. */ 330 LIMBURGISH("li"), 331 332 /** Lingala. */ 333 LINGALA("ln"), 334 335 /** Lithuanian. */ 336 LITHUANIAN("lt"), 337 338 /** Luba-Katanga. */ 339 LUBA_KATANGA("lu"), 340 341 /** Luxembourgish. */ 342 LUXEMBOURGISH("lb"), 343 344 /** Macedonian. */ 345 MACEDONIAN("mk"), 346 347 /** Malagasy. */ 348 MALAGASY("mg"), 349 350 /** Malay. */ 351 MALAY("ms"), 352 353 /** Malayalam. */ 354 MALAYALAM("ml"), 355 356 /** Maltese. */ 357 MALTESE("mt"), 358 359 /** Manx. */ 360 MANX("gv"), 361 362 /** Maori. */ 363 MAORI("mi"), 364 365 /** Marathi. */ 366 MARATHI("mr"), 367 368 /** Marshallese. */ 369 MARSHALLESE("mh"), 370 371 /** Moldavian. */ 372 MOLDAVIAN("mo"), 373 374 /** Mongolian. */ 375 MONGOLIAN("mn"), 376 377 /** Nauru. */ 378 NAURU("na"), 379 380 /** Navajo. */ 381 NAVAJO("nv"), 382 383 /** Ndonga. */ 384 NDONGA("ng"), 385 386 /** Nepali. */ 387 NEPALI("ne"), 388 389 /** North Ndebele. */ 390 NORTH_NDEBELE("nd"), 391 392 /** Northern Sami. */ 393 NORTHERN_SAMI("se"), 394 395 /** Norwegian Bokmål. */ 396 NORWEGIAN_BOKMAL("nb"), 397 398 /** Norwegian Nynorsk. */ 399 NORWEGIAN_NYNORSK("nn"), 400 401 /** Norwegian. */ 402 NORWEGIAN("no"), 403 404 /** Nyanja. */ 405 NYANJA("ny"), 406 407 /** Occitan. */ 408 OCCITAN("oc"), 409 410 /** Ojibwa. */ 411 OJIBWA("oj"), 412 413 /** Oriya. */ 414 ORIYA("or"), 415 416 /** Oromo. */ 417 OROMO("om"), 418 419 /** Ossetian. */ 420 OSSETIAN("os"), 421 422 /** Pali. */ 423 PALI("pi"), 424 425 /** Panjabi. */ 426 PANJABI("pa"), 427 428 /** Persian. */ 429 PERSIAN("fa"), 430 431 /** Polish. */ 432 POLISH("pl"), 433 434 /** Portuguese. */ 435 PORTUGUESE("pt"), 436 437 /** Pushto. */ 438 PUSHTO("ps"), 439 440 /** Quechua. */ 441 QUECHUA("qu"), 442 443 /** Raeto-Romance. */ 444 RAETO_ROMANCE("rm"), 445 446 /** Romanian. */ 447 ROMANIAN("ro"), 448 449 /** Rundi. */ 450 RUNDI("rn"), 451 452 /** Russian. */ 453 RUSSIAN("ru"), 454 455 /** Samoan. */ 456 SAMOAN("sm"), 457 458 /** Sango. */ 459 SANGO("sg"), 460 461 /** Sanskrit. */ 462 SANSKRIT("sa"), 463 464 /** Sardinian. */ 465 SARDINIAN("sc"), 466 467 /** Scottish Gaelic. */ 468 SCOTTISH_GAELIC("gd"), 469 470 /** Serbian. */ 471 SERBIAN("sr"), 472 473 /** Shona. */ 474 SHONA("sn"), 475 476 /** Sichuan Yi. */ 477 SICHUAN_YI("ii"), 478 479 /** Sindhi. */ 480 SINDHI("sd"), 481 482 /** Sinhalese. */ 483 SINHALESE("si"), 484 485 /** Slovak. */ 486 SLOVAK("sk"), 487 488 /** Slovenian. */ 489 SLOVENIAN("sl"), 490 491 /** Somali. */ 492 SOMALI("so"), 493 494 /** South Ndebele. */ 495 SOUTH_NDEBELE("nr"), 496 497 /** Southern Sotho. */ 498 SOUTHERN_SOTHO("st"), 499 500 /** Spanish. */ 501 SPANISH("es"), 502 503 /** Sundanese. */ 504 SUNDANESE("su"), 505 506 /** Swahili. */ 507 SWAHILI("sw"), 508 509 /** Swati. */ 510 SWATI("ss"), 511 512 /** Swedish. */ 513 SWEDISH("sv"), 514 515 /** Tagalog. */ 516 TAGALOG("tl"), 517 518 /** Tahitian. */ 519 TAHITIAN("ty"), 520 521 /** Tajik. */ 522 TAJIK("tg"), 523 524 /** Tamil. */ 525 TAMIL("ta"), 526 527 /** Tatar. */ 528 TATAR("tt"), 529 530 /** Telugu. */ 531 TELUGU("te"), 532 533 /** Thai. */ 534 THAI("th"), 535 536 /** Tibetan. */ 537 TIBETAN("bo"), 538 539 /** Tigrinya. */ 540 TIGRINYA("ti"), 541 542 /** Tonga. */ 543 TONGA("to"), 544 545 /** Tsonga. */ 546 TSONGA("ts"), 547 548 /** Tswana. */ 549 TSWANA("tn"), 550 551 /** Turkish. */ 552 TURKISH("tr"), 553 554 /** Turkmen. */ 555 TURKMEN("tk"), 556 557 /** Twi. */ 558 TWI("tw"), 559 560 /** Uighur. */ 561 UIGHUR("ug"), 562 563 /** Ukrainian. */ 564 UKRAINIAN("uk"), 565 566 /** Urdu. */ 567 URDU("ur"), 568 569 /** Uzbek. */ 570 UZBEK("uz"), 571 572 /** Venda. */ 573 VENDA("ve"), 574 575 /** Vietnamese. */ 576 VIETNAMESE("vi"), 577 578 /** Volapük. */ 579 VOLAPÜK("vo"), 580 581 /** Walloon. */ 582 WALLOON("wa"), 583 584 /** Welsh. */ 585 WELSH("cy"), 586 587 /** Wolof. */ 588 WOLOF("wo"), 589 590 /** Xhosa. */ 591 XHOSA("xh"), 592 593 /** Yiddish. */ 594 YIDDISH("yi"), 595 596 /** Yoruba. */ 597 YORUBA("yo"), 598 599 /** Zhuang. */ 600 ZHUANG("za"), 601 602 /** Zulu. */ 603 ZULU("zu"), 604 605 /** Unknown or Invalid language. */ 606 UNKNOWN(""); 607 608 public static final List<Language> LANGUAGES; 609 610 private final String code; 611 612 static { 613 LANGUAGES = Collections.unmodifiableList(Arrays.asList(Language.values())); 614 } 615 616 /** 617 * @param code the case insensitive 2 or 3 letter codes 618 * @return the matching language or UNKNOWN 619 */ 620 public static Language fromIsoCode(String code) { 621 if (StringUtils.isNotEmpty(code)) { 622 String codeLower = code.toLowerCase().trim(); 623 if (codeLower.length() == 2) { 624 for (Language language : Language.values()) { 625 if (codeLower.equals(language.getIso2LetterCode())) { 626 return language; 627 } 628 } 629 } else if (codeLower.length() == 3) { 630 for (Language language : Language.values()) { 631 if (codeLower.equals(language.getIso3LetterCode())) { 632 return language; 633 } 634 } 635 } 636 } 637 return UNKNOWN; 638 } 639 640 Language(String code) { 641 this.code = code; 642 } 643 644 /** @return the 2 letter iso 639-1 code in lower case. */ 645 public String getIso2LetterCode() { 646 return code; 647 } 648 649 /** @return the 3 letter iso 639-2 code in lower case. */ 650 @JsonValue 651 public String getIso3LetterCode() { 652 return getLocale().getISO3Language(); 653 } 654 655 public Locale getLocale() { 656 return new Locale(code); 657 } 658 659 /** @return the language title in the English language. */ 660 public String getTitleEnglish() { 661 return getLocale().getDisplayLanguage(Locale.ENGLISH); 662 } 663 664 /** @return the language title in the native language. */ 665 public String getTitleNative() { 666 Locale loc = getLocale(); 667 return loc.getDisplayLanguage(loc); 668 } 669 670 /** Serializes the value in a 3 letter ISO format. */ 671 public static class IsoSerializer extends JsonSerializer<Language> { 672 673 @Override 674 public void serialize(Language value, JsonGenerator jgen, SerializerProvider provider) 675 throws IOException { 676 jgen.writeString(value.getIso3LetterCode()); 677 } 678 } 679 680 /** 681 * Deserializes the value from a 3 letter ISO format or the enumeration name itself to maintain as 682 * much backwards compatibility as possible with e.g. the registry api. 683 */ 684 public static class LenientDeserializer extends JsonDeserializer<Language> { 685 @Override 686 public Language deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException { 687 try { 688 if (jp != null && jp.getTextLength() > 0) { 689 return lenientParse(jp.getText()); 690 } else { 691 return Language.UNKNOWN; // none provided 692 } 693 } catch (Exception e) { 694 throw new IOException( 695 "Unable to deserialize language from provided value (hint: not an ISO 2 or 3 character?): " 696 + jp.getText()); 697 } 698 } 699 700 static Language lenientParse(String value) { 701 Language l = Language.fromIsoCode(value); 702 // backwards compatible 703 if (Language.UNKNOWN == l) { 704 try { 705 l = Language.valueOf(value); 706 } catch (IllegalArgumentException e) { 707 l = Language.UNKNOWN; 708 } 709 } 710 return l; 711 } 712 } 713 714 public static class LenientKeyDeserializer extends KeyDeserializer { 715 716 @Override 717 public Object deserializeKey(String key, DeserializationContext ctxt) throws IOException { 718 try { 719 if (StringUtils.isNotEmpty(key)) { 720 return LenientDeserializer.lenientParse(key); 721 } else { 722 return Language.UNKNOWN; // none provided 723 } 724 } catch (Exception e) { 725 throw new IOException( 726 "Unable to deserialize language from provided value (hint: not an ISO 2 or 3 character?): " 727 + key); 728 } 729 } 730 } 731 732 public static class IsoKeySerializer extends JsonSerializer<Language> { 733 734 @Override 735 public void serialize( 736 Language language, JsonGenerator jsonGenerator, SerializerProvider serializerProvider) 737 throws IOException { 738 jsonGenerator.writeFieldName(language.getIso3LetterCode()); 739 } 740 } 741}