001/* 002 * Copyright 2020 Global Biodiversity Information Facility (GBIF) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.gbif.api.vocabulary; 017 018import java.io.IOException; 019import java.util.Arrays; 020import java.util.Collections; 021import java.util.List; 022import java.util.Locale; 023 024import org.apache.commons.lang3.StringUtils; 025 026import com.fasterxml.jackson.annotation.JsonValue; 027import com.fasterxml.jackson.core.JsonGenerator; 028import com.fasterxml.jackson.core.JsonParser; 029import com.fasterxml.jackson.databind.DeserializationContext; 030import com.fasterxml.jackson.databind.JsonDeserializer; 031import com.fasterxml.jackson.databind.JsonSerializer; 032import com.fasterxml.jackson.databind.KeyDeserializer; 033import com.fasterxml.jackson.databind.SerializerProvider; 034import com.fasterxml.jackson.databind.annotation.JsonDeserialize; 035import com.fasterxml.jackson.databind.annotation.JsonSerialize; 036 037/** 038 * Enumeration for all ISO 639-1 language codes using 2 lower case letters. The enumeration maps to 039 * 3 letter codes and Locales. 040 * 041 * @see <a href="http://en.wikipedia.org/wiki/ISO_639">Wikipedia on ISO-639</a> 042 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/util/Locale.html">Locale javadoc</a> 043 */ 044@JsonSerialize(using = Language.IsoSerializer.class, keyUsing = Language.IsoKeySerializer.class) 045@JsonDeserialize( 046 using = Language.LenientDeserializer.class, 047 keyUsing = Language.LenientKeyDeserializer.class) 048public enum Language { 049 050 /** 051 * Abkhazian. 052 */ 053 ABKHAZIAN("ab"), 054 055 /** 056 * Afar. 057 */ 058 AFAR("aa"), 059 060 /** 061 * Afrikaans. 062 */ 063 AFRIKAANS("af"), 064 065 /** 066 * Akan. 067 */ 068 AKAN("ak"), 069 070 /** 071 * Albanian. 072 */ 073 ALBANIAN("sq"), 074 075 /** 076 * Amharic. 077 */ 078 AMHARIC("am"), 079 080 /** 081 * Arabic. 082 */ 083 ARABIC("ar"), 084 085 /** 086 * Aragonese. 087 */ 088 ARAGONESE("an"), 089 090 /** 091 * Armenian. 092 */ 093 ARMENIAN("hy"), 094 095 /** 096 * Assamese. 097 */ 098 ASSAMESE("as"), 099 100 /** 101 * Avaric. 102 */ 103 AVARIC("av"), 104 105 /** 106 * Avestan. 107 */ 108 AVESTAN("ae"), 109 110 /** 111 * Aymara. 112 */ 113 AYMARA("ay"), 114 115 /** 116 * Azerbaijani. 117 */ 118 AZERBAIJANI("az"), 119 120 /** 121 * Bambara. 122 */ 123 BAMBARA("bm"), 124 125 /** 126 * Bashkir. 127 */ 128 BASHKIR("ba"), 129 130 /** 131 * Basque. 132 */ 133 BASQUE("eu"), 134 135 /** 136 * Belarusian. 137 */ 138 BELARUSIAN("be"), 139 140 /** 141 * Bengali. 142 */ 143 BENGALI("bn"), 144 145 /** 146 * Bihari. 147 */ 148 BIHARI("bh"), 149 150 /** 151 * Bislama. 152 */ 153 BISLAMA("bi"), 154 155 /** 156 * Bosnian. 157 */ 158 BOSNIAN("bs"), 159 160 /** 161 * Breton. 162 */ 163 BRETON("br"), 164 165 /** 166 * Bulgarian. 167 */ 168 BULGARIAN("bg"), 169 170 /** 171 * Burmese. 172 */ 173 BURMESE("my"), 174 175 /** 176 * Catalan. 177 */ 178 CATALAN("ca"), 179 180 /** 181 * Chamorro. 182 */ 183 CHAMORRO("ch"), 184 185 /** 186 * Chechen. 187 */ 188 CHECHEN("ce"), 189 190 /** 191 * Chinese. 192 */ 193 CHINESE("zh"), 194 195 /** 196 * Church Slavic. 197 */ 198 CHURCH_SLAVIC("cu"), 199 200 /** 201 * Chuvash. 202 */ 203 CHUVASH("cv"), 204 205 /** 206 * Cornish. 207 */ 208 CORNISH("kw"), 209 210 /** 211 * Corsican. 212 */ 213 CORSICAN("co"), 214 215 /** 216 * Cree. 217 */ 218 CREE("cr"), 219 220 /** 221 * Croatian. 222 */ 223 CROATIAN("hr"), 224 225 /** 226 * Czech. 227 */ 228 CZECH("cs"), 229 230 /** 231 * Danish. 232 */ 233 DANISH("da"), 234 235 /** 236 * Divehi. 237 */ 238 DIVEHI("dv"), 239 240 /** 241 * Dutch. 242 */ 243 DUTCH("nl"), 244 245 /** 246 * Dzongkha. 247 */ 248 DZONGKHA("dz"), 249 250 /** 251 * English. 252 */ 253 ENGLISH("en"), 254 255 /** 256 * Esperanto. 257 */ 258 ESPERANTO("eo"), 259 260 /** 261 * Estonian. 262 */ 263 ESTONIAN("et"), 264 265 /** 266 * Ewe. 267 */ 268 EWE("ee"), 269 270 /** 271 * Faroese. 272 */ 273 FAROESE("fo"), 274 275 /** 276 * Fijian. 277 */ 278 FIJIAN("fj"), 279 280 /** 281 * Finnish. 282 */ 283 FINNISH("fi"), 284 285 /** 286 * French. 287 */ 288 FRENCH("fr"), 289 290 /** 291 * Frisian. 292 */ 293 FRISIAN("fy"), 294 295 /** 296 * Fulah. 297 */ 298 FULAH("ff"), 299 300 /** 301 * Gallegan. 302 */ 303 GALLEGAN("gl"), 304 305 /** 306 * Ganda. 307 */ 308 GANDA("lg"), 309 310 /** 311 * Georgian. 312 */ 313 GEORGIAN("ka"), 314 315 /** 316 * German. 317 */ 318 GERMAN("de"), 319 320 /** 321 * Greek. 322 */ 323 GREEK("el"), 324 325 /** 326 * Greenlandic. 327 */ 328 GREENLANDIC("kl"), 329 330 /** 331 * Guarani. 332 */ 333 GUARANI("gn"), 334 335 /** 336 * Gujarati. 337 */ 338 GUJARATI("gu"), 339 340 /** 341 * Haitian. 342 */ 343 HAITIAN("ht"), 344 345 /** 346 * Hausa. 347 */ 348 HAUSA("ha"), 349 350 /** 351 * Hebrew. 352 */ 353 HEBREW("he"), 354 355 /** 356 * Herero. 357 */ 358 HERERO("hz"), 359 360 /** 361 * Hindi. 362 */ 363 HINDI("hi"), 364 365 /** 366 * Hiri Motu. 367 */ 368 HIRI_MOTU("ho"), 369 370 /** 371 * Hungarian. 372 */ 373 HUNGARIAN("hu"), 374 375 /** 376 * Icelandic. 377 */ 378 ICELANDIC("is"), 379 380 /** 381 * Ido. 382 */ 383 IDO("io"), 384 385 /** 386 * Igbo. 387 */ 388 IGBO("ig"), 389 390 /** 391 * Indonesian. 392 */ 393 INDONESIAN("id"), 394 395 /** 396 * Interlingua. 397 */ 398 INTERLINGUA("ia"), 399 400 /** 401 * Interlingue. 402 */ 403 INTERLINGUE("ie"), 404 405 /** 406 * Inuktitut. 407 */ 408 INUKTITUT("iu"), 409 410 /** 411 * Inupiaq. 412 */ 413 INUPIAQ("ik"), 414 415 /** 416 * Irish. 417 */ 418 IRISH("ga"), 419 420 /** 421 * Italian. 422 */ 423 ITALIAN("it"), 424 425 /** 426 * Japanese. 427 */ 428 JAPANESE("ja"), 429 430 /** 431 * Javanese. 432 */ 433 JAVANESE("jv"), 434 435 /** 436 * Kannada. 437 */ 438 KANNADA("kn"), 439 440 /** 441 * Kanuri. 442 */ 443 KANURI("kr"), 444 445 /** 446 * Kashmiri. 447 */ 448 KASHMIRI("ks"), 449 450 /** 451 * Kazakh. 452 */ 453 KAZAKH("kk"), 454 455 /** 456 * Khmer. 457 */ 458 KHMER("km"), 459 460 /** 461 * Kikuyu. 462 */ 463 KIKUYU("ki"), 464 465 /** 466 * Kinyarwanda. 467 */ 468 KINYARWANDA("rw"), 469 470 /** 471 * Kirghiz. 472 */ 473 KIRGHIZ("ky"), 474 475 /** 476 * Komi. 477 */ 478 KOMI("kv"), 479 480 /** 481 * Kongo. 482 */ 483 KONGO("kg"), 484 485 /** 486 * Korean. 487 */ 488 KOREAN("ko"), 489 490 /** 491 * Kurdish. 492 */ 493 KURDISH("ku"), 494 495 /** 496 * Kwanyama. 497 */ 498 KWANYAMA("kj"), 499 500 /** 501 * Lao. 502 */ 503 LAO("lo"), 504 505 /** 506 * Latin. 507 */ 508 LATIN("la"), 509 510 /** 511 * Latvian. 512 */ 513 LATVIAN("lv"), 514 515 /** 516 * Limburgish. 517 */ 518 LIMBURGISH("li"), 519 520 /** 521 * Lingala. 522 */ 523 LINGALA("ln"), 524 525 /** 526 * Lithuanian. 527 */ 528 LITHUANIAN("lt"), 529 530 /** 531 * Luba-Katanga. 532 */ 533 LUBA_KATANGA("lu"), 534 535 /** 536 * Luxembourgish. 537 */ 538 LUXEMBOURGISH("lb"), 539 540 /** 541 * Macedonian. 542 */ 543 MACEDONIAN("mk"), 544 545 /** 546 * Malagasy. 547 */ 548 MALAGASY("mg"), 549 550 /** 551 * Malay. 552 */ 553 MALAY("ms"), 554 555 /** 556 * Malayalam. 557 */ 558 MALAYALAM("ml"), 559 560 /** 561 * Maltese. 562 */ 563 MALTESE("mt"), 564 565 /** 566 * Manx. 567 */ 568 MANX("gv"), 569 570 /** 571 * Maori. 572 */ 573 MAORI("mi"), 574 575 /** 576 * Marathi. 577 */ 578 MARATHI("mr"), 579 580 /** 581 * Marshallese. 582 */ 583 MARSHALLESE("mh"), 584 585 /** 586 * Moldavian. 587 */ 588 MOLDAVIAN("mo"), 589 590 /** 591 * Mongolian. 592 */ 593 MONGOLIAN("mn"), 594 595 /** 596 * Nauru. 597 */ 598 NAURU("na"), 599 600 /** 601 * Navajo. 602 */ 603 NAVAJO("nv"), 604 605 /** 606 * Ndonga. 607 */ 608 NDONGA("ng"), 609 610 /** 611 * Nepali. 612 */ 613 NEPALI("ne"), 614 615 /** 616 * North Ndebele. 617 */ 618 NORTH_NDEBELE("nd"), 619 620 /** 621 * Northern Sami. 622 */ 623 NORTHERN_SAMI("se"), 624 625 /** 626 * Norwegian Bokmål. 627 */ 628 NORWEGIAN_BOKMAL("nb"), 629 630 /** 631 * Norwegian Nynorsk. 632 */ 633 NORWEGIAN_NYNORSK("nn"), 634 635 /** 636 * Norwegian. 637 */ 638 NORWEGIAN("no"), 639 640 /** 641 * Nyanja. 642 */ 643 NYANJA("ny"), 644 645 /** 646 * Occitan. 647 */ 648 OCCITAN("oc"), 649 650 /** 651 * Ojibwa. 652 */ 653 OJIBWA("oj"), 654 655 /** 656 * Oriya. 657 */ 658 ORIYA("or"), 659 660 /** 661 * Oromo. 662 */ 663 OROMO("om"), 664 665 /** 666 * Ossetian. 667 */ 668 OSSETIAN("os"), 669 670 /** 671 * Pali. 672 */ 673 PALI("pi"), 674 675 /** 676 * Panjabi. 677 */ 678 PANJABI("pa"), 679 680 /** 681 * Persian. 682 */ 683 PERSIAN("fa"), 684 685 /** 686 * Polish. 687 */ 688 POLISH("pl"), 689 690 /** 691 * Portuguese. 692 */ 693 PORTUGUESE("pt"), 694 695 /** 696 * Pushto. 697 */ 698 PUSHTO("ps"), 699 700 /** 701 * Quechua. 702 */ 703 QUECHUA("qu"), 704 705 /** 706 * Raeto-Romance. 707 */ 708 RAETO_ROMANCE("rm"), 709 710 /** 711 * Romanian. 712 */ 713 ROMANIAN("ro"), 714 715 /** 716 * Rundi. 717 */ 718 RUNDI("rn"), 719 720 /** 721 * Russian. 722 */ 723 RUSSIAN("ru"), 724 725 /** 726 * Samoan. 727 */ 728 SAMOAN("sm"), 729 730 /** 731 * Sango. 732 */ 733 SANGO("sg"), 734 735 /** 736 * Sanskrit. 737 */ 738 SANSKRIT("sa"), 739 740 /** 741 * Sardinian. 742 */ 743 SARDINIAN("sc"), 744 745 /** 746 * Scottish Gaelic. 747 */ 748 SCOTTISH_GAELIC("gd"), 749 750 /** 751 * Serbian. 752 */ 753 SERBIAN("sr"), 754 755 /** 756 * Shona. 757 */ 758 SHONA("sn"), 759 760 /** 761 * Sichuan Yi. 762 */ 763 SICHUAN_YI("ii"), 764 765 /** 766 * Sindhi. 767 */ 768 SINDHI("sd"), 769 770 /** 771 * Sinhalese. 772 */ 773 SINHALESE("si"), 774 775 /** 776 * Slovak. 777 */ 778 SLOVAK("sk"), 779 780 /** 781 * Slovenian. 782 */ 783 SLOVENIAN("sl"), 784 785 /** 786 * Somali. 787 */ 788 SOMALI("so"), 789 790 /** 791 * South Ndebele. 792 */ 793 SOUTH_NDEBELE("nr"), 794 795 /** 796 * Southern Sotho. 797 */ 798 SOUTHERN_SOTHO("st"), 799 800 /** 801 * Spanish. 802 */ 803 SPANISH("es"), 804 805 /** 806 * Sundanese. 807 */ 808 SUNDANESE("su"), 809 810 /** 811 * Swahili. 812 */ 813 SWAHILI("sw"), 814 815 /** 816 * Swati. 817 */ 818 SWATI("ss"), 819 820 /** 821 * Swedish. 822 */ 823 SWEDISH("sv"), 824 825 /** 826 * Tagalog. 827 */ 828 TAGALOG("tl"), 829 830 /** 831 * Tahitian. 832 */ 833 TAHITIAN("ty"), 834 835 /** 836 * Tajik. 837 */ 838 TAJIK("tg"), 839 840 /** 841 * Tamil. 842 */ 843 TAMIL("ta"), 844 845 /** 846 * Tatar. 847 */ 848 TATAR("tt"), 849 850 /** 851 * Telugu. 852 */ 853 TELUGU("te"), 854 855 /** 856 * Thai. 857 */ 858 THAI("th"), 859 860 /** 861 * Tibetan. 862 */ 863 TIBETAN("bo"), 864 865 /** 866 * Tigrinya. 867 */ 868 TIGRINYA("ti"), 869 870 /** 871 * Tonga. 872 */ 873 TONGA("to"), 874 875 /** 876 * Tsonga. 877 */ 878 TSONGA("ts"), 879 880 /** 881 * Tswana. 882 */ 883 TSWANA("tn"), 884 885 /** 886 * Turkish. 887 */ 888 TURKISH("tr"), 889 890 /** 891 * Turkmen. 892 */ 893 TURKMEN("tk"), 894 895 /** 896 * Twi. 897 */ 898 TWI("tw"), 899 900 /** 901 * Uighur. 902 */ 903 UIGHUR("ug"), 904 905 /** 906 * Ukrainian. 907 */ 908 UKRAINIAN("uk"), 909 910 /** 911 * Urdu. 912 */ 913 URDU("ur"), 914 915 /** 916 * Uzbek. 917 */ 918 UZBEK("uz"), 919 920 /** 921 * Venda. 922 */ 923 VENDA("ve"), 924 925 /** 926 * Vietnamese. 927 */ 928 VIETNAMESE("vi"), 929 930 /** 931 * Volapük. 932 */ 933 VOLAPÜK("vo"), 934 935 /** 936 * Walloon. 937 */ 938 WALLOON("wa"), 939 940 /** 941 * Welsh. 942 */ 943 WELSH("cy"), 944 945 /** 946 * Wolof. 947 */ 948 WOLOF("wo"), 949 950 /** 951 * Xhosa. 952 */ 953 XHOSA("xh"), 954 955 /** 956 * Yiddish. 957 */ 958 YIDDISH("yi"), 959 960 /** 961 * Yoruba. 962 */ 963 YORUBA("yo"), 964 965 /** 966 * Zhuang. 967 */ 968 ZHUANG("za"), 969 970 /** 971 * Zulu. 972 */ 973 ZULU("zu"), 974 975 /** 976 * Unknown or Invalid language. 977 */ 978 UNKNOWN(""); 979 980 public static final List<Language> LANGUAGES; 981 982 static { 983 LANGUAGES = Collections.unmodifiableList(Arrays.asList(Language.values())); 984 } 985 986 private final String code; 987 988 Language(String code) { 989 this.code = code; 990 } 991 992 /** 993 * @param code the case-insensitive 2 or 3-letter codes 994 * @return the matching language or UNKNOWN 995 */ 996 public static Language fromIsoCode(String code) { 997 if (StringUtils.isEmpty(code)) { 998 return UNKNOWN; 999 } 1000 String codeLower = code.toLowerCase().trim(); 1001 for (Language language : Language.values()) { 1002 if (codeLower.equals(language.getIso2LetterCode()) || codeLower.equals( 1003 language.getIso3LetterCode())) { 1004 return language; 1005 } 1006 } 1007 return UNKNOWN; 1008 } 1009 1010 /** 1011 * @return the 2-letter iso 639-1 code in lower case. 1012 */ 1013 public String getIso2LetterCode() { 1014 return code; 1015 } 1016 1017 /** 1018 * @return the 3-letter iso 639-2 code in lower case. 1019 */ 1020 @JsonValue 1021 public String getIso3LetterCode() { 1022 return getLocale().getISO3Language(); 1023 } 1024 1025 public Locale getLocale() { 1026 return new Locale(code); 1027 } 1028 1029 /** 1030 * @return the language title in the English language. 1031 */ 1032 public String getTitleEnglish() { 1033 return getLocale().getDisplayLanguage(Locale.ENGLISH); 1034 } 1035 1036 /** 1037 * @return the language title in the native language. 1038 */ 1039 public String getTitleNative() { 1040 Locale loc = getLocale(); 1041 return loc.getDisplayLanguage(loc); 1042 } 1043 1044 /** 1045 * Serializes the value in a 3 letter ISO format. 1046 */ 1047 public static class IsoSerializer extends JsonSerializer<Language> { 1048 1049 @Override 1050 public void serialize(Language value, JsonGenerator jgen, SerializerProvider provider) 1051 throws IOException { 1052 jgen.writeString(value.getIso3LetterCode()); 1053 } 1054 } 1055 1056 /** 1057 * Deserializes the value from a 3 letter ISO format or the enumeration name itself to maintain as 1058 * much backwards compatibility as possible with e.g. the registry api. 1059 */ 1060 public static class LenientDeserializer extends JsonDeserializer<Language> { 1061 1062 static Language lenientParse(String value) { 1063 Language l = Language.fromIsoCode(value); 1064 // backwards compatible 1065 if (Language.UNKNOWN == l) { 1066 try { 1067 l = Language.valueOf(value); 1068 } catch (IllegalArgumentException e) { 1069 l = Language.UNKNOWN; 1070 } 1071 } 1072 1073 return l; 1074 } 1075 1076 @Override 1077 public Language deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException { 1078 try { 1079 if (jp != null && jp.getTextLength() > 0) { 1080 return lenientParse(jp.getText()); 1081 } else { 1082 return Language.UNKNOWN; // none provided 1083 } 1084 } catch (Exception e) { 1085 throw new IOException( 1086 "Unable to deserialize language from provided value (hint: not an ISO 2 or 3 character?): " 1087 + jp.getText()); 1088 } 1089 } 1090 } 1091 1092 public static class LenientKeyDeserializer extends KeyDeserializer { 1093 1094 @Override 1095 public Object deserializeKey(String key, DeserializationContext ctxt) throws IOException { 1096 try { 1097 if (StringUtils.isNotEmpty(key)) { 1098 return LenientDeserializer.lenientParse(key); 1099 } else { 1100 return Language.UNKNOWN; // none provided 1101 } 1102 } catch (Exception e) { 1103 throw new IOException( 1104 "Unable to deserialize language from provided value (hint: not an ISO 2 or 3 character?): " 1105 + key); 1106 } 1107 } 1108 } 1109 1110 public static class IsoKeySerializer extends JsonSerializer<Language> { 1111 1112 @Override 1113 public void serialize( 1114 Language language, JsonGenerator jsonGenerator, SerializerProvider serializerProvider) 1115 throws IOException { 1116 jsonGenerator.writeFieldName(language.getIso3LetterCode()); 1117 } 1118 } 1119}