001/*
002 * Copyright 2020 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.api.vocabulary;
017
018import java.io.IOException;
019import java.util.Arrays;
020import java.util.Collections;
021import java.util.List;
022import java.util.Locale;
023
024import org.apache.commons.lang3.StringUtils;
025
026import com.fasterxml.jackson.annotation.JsonValue;
027import com.fasterxml.jackson.core.JsonGenerator;
028import com.fasterxml.jackson.core.JsonParser;
029import com.fasterxml.jackson.databind.DeserializationContext;
030import com.fasterxml.jackson.databind.JsonDeserializer;
031import com.fasterxml.jackson.databind.JsonSerializer;
032import com.fasterxml.jackson.databind.KeyDeserializer;
033import com.fasterxml.jackson.databind.SerializerProvider;
034import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
035import com.fasterxml.jackson.databind.annotation.JsonSerialize;
036
037/**
038 * Enumeration for all ISO 639-1 language codes using 2 lower case letters. The enumeration maps to
039 * 3 letter codes and Locales.
040 *
041 * @see <a href="http://en.wikipedia.org/wiki/ISO_639">Wikipedia on ISO-639</a>
042 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/util/Locale.html">Locale javadoc</a>
043 */
044@JsonSerialize(using = Language.IsoSerializer.class, keyUsing = Language.IsoKeySerializer.class)
045@JsonDeserialize(
046  using = Language.LenientDeserializer.class,
047  keyUsing = Language.LenientKeyDeserializer.class)
048public enum Language {
049
050  /**
051   * Abkhazian.
052   */
053  ABKHAZIAN("ab"),
054
055  /**
056   * Afar.
057   */
058  AFAR("aa"),
059
060  /**
061   * Afrikaans.
062   */
063  AFRIKAANS("af"),
064
065  /**
066   * Akan.
067   */
068  AKAN("ak"),
069
070  /**
071   * Albanian.
072   */
073  ALBANIAN("sq"),
074
075  /**
076   * Amharic.
077   */
078  AMHARIC("am"),
079
080  /**
081   * Arabic.
082   */
083  ARABIC("ar"),
084
085  /**
086   * Aragonese.
087   */
088  ARAGONESE("an"),
089
090  /**
091   * Armenian.
092   */
093  ARMENIAN("hy"),
094
095  /**
096   * Assamese.
097   */
098  ASSAMESE("as"),
099
100  /**
101   * Avaric.
102   */
103  AVARIC("av"),
104
105  /**
106   * Avestan.
107   */
108  AVESTAN("ae"),
109
110  /**
111   * Aymara.
112   */
113  AYMARA("ay"),
114
115  /**
116   * Azerbaijani.
117   */
118  AZERBAIJANI("az"),
119
120  /**
121   * Bambara.
122   */
123  BAMBARA("bm"),
124
125  /**
126   * Bashkir.
127   */
128  BASHKIR("ba"),
129
130  /**
131   * Basque.
132   */
133  BASQUE("eu"),
134
135  /**
136   * Belarusian.
137   */
138  BELARUSIAN("be"),
139
140  /**
141   * Bengali.
142   */
143  BENGALI("bn"),
144
145  /**
146   * Bihari.
147   */
148  BIHARI("bh"),
149
150  /**
151   * Bislama.
152   */
153  BISLAMA("bi"),
154
155  /**
156   * Bosnian.
157   */
158  BOSNIAN("bs"),
159
160  /**
161   * Breton.
162   */
163  BRETON("br"),
164
165  /**
166   * Bulgarian.
167   */
168  BULGARIAN("bg"),
169
170  /**
171   * Burmese.
172   */
173  BURMESE("my"),
174
175  /**
176   * Catalan.
177   */
178  CATALAN("ca"),
179
180  /**
181   * Chamorro.
182   */
183  CHAMORRO("ch"),
184
185  /**
186   * Chechen.
187   */
188  CHECHEN("ce"),
189
190  /**
191   * Chinese.
192   */
193  CHINESE("zh"),
194
195  /**
196   * Church Slavic.
197   */
198  CHURCH_SLAVIC("cu"),
199
200  /**
201   * Chuvash.
202   */
203  CHUVASH("cv"),
204
205  /**
206   * Cornish.
207   */
208  CORNISH("kw"),
209
210  /**
211   * Corsican.
212   */
213  CORSICAN("co"),
214
215  /**
216   * Cree.
217   */
218  CREE("cr"),
219
220  /**
221   * Croatian.
222   */
223  CROATIAN("hr"),
224
225  /**
226   * Czech.
227   */
228  CZECH("cs"),
229
230  /**
231   * Danish.
232   */
233  DANISH("da"),
234
235  /**
236   * Divehi.
237   */
238  DIVEHI("dv"),
239
240  /**
241   * Dutch.
242   */
243  DUTCH("nl"),
244
245  /**
246   * Dzongkha.
247   */
248  DZONGKHA("dz"),
249
250  /**
251   * English.
252   */
253  ENGLISH("en"),
254
255  /**
256   * Esperanto.
257   */
258  ESPERANTO("eo"),
259
260  /**
261   * Estonian.
262   */
263  ESTONIAN("et"),
264
265  /**
266   * Ewe.
267   */
268  EWE("ee"),
269
270  /**
271   * Faroese.
272   */
273  FAROESE("fo"),
274
275  /**
276   * Fijian.
277   */
278  FIJIAN("fj"),
279
280  /**
281   * Finnish.
282   */
283  FINNISH("fi"),
284
285  /**
286   * French.
287   */
288  FRENCH("fr"),
289
290  /**
291   * Frisian.
292   */
293  FRISIAN("fy"),
294
295  /**
296   * Fulah.
297   */
298  FULAH("ff"),
299
300  /**
301   * Gallegan.
302   */
303  GALLEGAN("gl"),
304
305  /**
306   * Ganda.
307   */
308  GANDA("lg"),
309
310  /**
311   * Georgian.
312   */
313  GEORGIAN("ka"),
314
315  /**
316   * German.
317   */
318  GERMAN("de"),
319
320  /**
321   * Greek.
322   */
323  GREEK("el"),
324
325  /**
326   * Greenlandic.
327   */
328  GREENLANDIC("kl"),
329
330  /**
331   * Guarani.
332   */
333  GUARANI("gn"),
334
335  /**
336   * Gujarati.
337   */
338  GUJARATI("gu"),
339
340  /**
341   * Haitian.
342   */
343  HAITIAN("ht"),
344
345  /**
346   * Hausa.
347   */
348  HAUSA("ha"),
349
350  /**
351   * Hebrew.
352   */
353  HEBREW("he"),
354
355  /**
356   * Herero.
357   */
358  HERERO("hz"),
359
360  /**
361   * Hindi.
362   */
363  HINDI("hi"),
364
365  /**
366   * Hiri Motu.
367   */
368  HIRI_MOTU("ho"),
369
370  /**
371   * Hungarian.
372   */
373  HUNGARIAN("hu"),
374
375  /**
376   * Icelandic.
377   */
378  ICELANDIC("is"),
379
380  /**
381   * Ido.
382   */
383  IDO("io"),
384
385  /**
386   * Igbo.
387   */
388  IGBO("ig"),
389
390  /**
391   * Indonesian.
392   */
393  INDONESIAN("id"),
394
395  /**
396   * Interlingua.
397   */
398  INTERLINGUA("ia"),
399
400  /**
401   * Interlingue.
402   */
403  INTERLINGUE("ie"),
404
405  /**
406   * Inuktitut.
407   */
408  INUKTITUT("iu"),
409
410  /**
411   * Inupiaq.
412   */
413  INUPIAQ("ik"),
414
415  /**
416   * Irish.
417   */
418  IRISH("ga"),
419
420  /**
421   * Italian.
422   */
423  ITALIAN("it"),
424
425  /**
426   * Japanese.
427   */
428  JAPANESE("ja"),
429
430  /**
431   * Javanese.
432   */
433  JAVANESE("jv"),
434
435  /**
436   * Kannada.
437   */
438  KANNADA("kn"),
439
440  /**
441   * Kanuri.
442   */
443  KANURI("kr"),
444
445  /**
446   * Kashmiri.
447   */
448  KASHMIRI("ks"),
449
450  /**
451   * Kazakh.
452   */
453  KAZAKH("kk"),
454
455  /**
456   * Khmer.
457   */
458  KHMER("km"),
459
460  /**
461   * Kikuyu.
462   */
463  KIKUYU("ki"),
464
465  /**
466   * Kinyarwanda.
467   */
468  KINYARWANDA("rw"),
469
470  /**
471   * Kirghiz.
472   */
473  KIRGHIZ("ky"),
474
475  /**
476   * Komi.
477   */
478  KOMI("kv"),
479
480  /**
481   * Kongo.
482   */
483  KONGO("kg"),
484
485  /**
486   * Korean.
487   */
488  KOREAN("ko"),
489
490  /**
491   * Kurdish.
492   */
493  KURDISH("ku"),
494
495  /**
496   * Kwanyama.
497   */
498  KWANYAMA("kj"),
499
500  /**
501   * Lao.
502   */
503  LAO("lo"),
504
505  /**
506   * Latin.
507   */
508  LATIN("la"),
509
510  /**
511   * Latvian.
512   */
513  LATVIAN("lv"),
514
515  /**
516   * Limburgish.
517   */
518  LIMBURGISH("li"),
519
520  /**
521   * Lingala.
522   */
523  LINGALA("ln"),
524
525  /**
526   * Lithuanian.
527   */
528  LITHUANIAN("lt"),
529
530  /**
531   * Luba-Katanga.
532   */
533  LUBA_KATANGA("lu"),
534
535  /**
536   * Luxembourgish.
537   */
538  LUXEMBOURGISH("lb"),
539
540  /**
541   * Macedonian.
542   */
543  MACEDONIAN("mk"),
544
545  /**
546   * Malagasy.
547   */
548  MALAGASY("mg"),
549
550  /**
551   * Malay.
552   */
553  MALAY("ms"),
554
555  /**
556   * Malayalam.
557   */
558  MALAYALAM("ml"),
559
560  /**
561   * Maltese.
562   */
563  MALTESE("mt"),
564
565  /**
566   * Manx.
567   */
568  MANX("gv"),
569
570  /**
571   * Maori.
572   */
573  MAORI("mi"),
574
575  /**
576   * Marathi.
577   */
578  MARATHI("mr"),
579
580  /**
581   * Marshallese.
582   */
583  MARSHALLESE("mh"),
584
585  /**
586   * Moldavian.
587   */
588  MOLDAVIAN("mo"),
589
590  /**
591   * Mongolian.
592   */
593  MONGOLIAN("mn"),
594
595  /**
596   * Nauru.
597   */
598  NAURU("na"),
599
600  /**
601   * Navajo.
602   */
603  NAVAJO("nv"),
604
605  /**
606   * Ndonga.
607   */
608  NDONGA("ng"),
609
610  /**
611   * Nepali.
612   */
613  NEPALI("ne"),
614
615  /**
616   * North Ndebele.
617   */
618  NORTH_NDEBELE("nd"),
619
620  /**
621   * Northern Sami.
622   */
623  NORTHERN_SAMI("se"),
624
625  /**
626   * Norwegian Bokmål.
627   */
628  NORWEGIAN_BOKMAL("nb"),
629
630  /**
631   * Norwegian Nynorsk.
632   */
633  NORWEGIAN_NYNORSK("nn"),
634
635  /**
636   * Norwegian.
637   */
638  NORWEGIAN("no"),
639
640  /**
641   * Nyanja.
642   */
643  NYANJA("ny"),
644
645  /**
646   * Occitan.
647   */
648  OCCITAN("oc"),
649
650  /**
651   * Ojibwa.
652   */
653  OJIBWA("oj"),
654
655  /**
656   * Oriya.
657   */
658  ORIYA("or"),
659
660  /**
661   * Oromo.
662   */
663  OROMO("om"),
664
665  /**
666   * Ossetian.
667   */
668  OSSETIAN("os"),
669
670  /**
671   * Pali.
672   */
673  PALI("pi"),
674
675  /**
676   * Panjabi.
677   */
678  PANJABI("pa"),
679
680  /**
681   * Persian.
682   */
683  PERSIAN("fa"),
684
685  /**
686   * Polish.
687   */
688  POLISH("pl"),
689
690  /**
691   * Portuguese.
692   */
693  PORTUGUESE("pt"),
694
695  /**
696   * Pushto.
697   */
698  PUSHTO("ps"),
699
700  /**
701   * Quechua.
702   */
703  QUECHUA("qu"),
704
705  /**
706   * Raeto-Romance.
707   */
708  RAETO_ROMANCE("rm"),
709
710  /**
711   * Romanian.
712   */
713  ROMANIAN("ro"),
714
715  /**
716   * Rundi.
717   */
718  RUNDI("rn"),
719
720  /**
721   * Russian.
722   */
723  RUSSIAN("ru"),
724
725  /**
726   * Samoan.
727   */
728  SAMOAN("sm"),
729
730  /**
731   * Sango.
732   */
733  SANGO("sg"),
734
735  /**
736   * Sanskrit.
737   */
738  SANSKRIT("sa"),
739
740  /**
741   * Sardinian.
742   */
743  SARDINIAN("sc"),
744
745  /**
746   * Scottish Gaelic.
747   */
748  SCOTTISH_GAELIC("gd"),
749
750  /**
751   * Serbian.
752   */
753  SERBIAN("sr"),
754
755  /**
756   * Shona.
757   */
758  SHONA("sn"),
759
760  /**
761   * Sichuan Yi.
762   */
763  SICHUAN_YI("ii"),
764
765  /**
766   * Sindhi.
767   */
768  SINDHI("sd"),
769
770  /**
771   * Sinhalese.
772   */
773  SINHALESE("si"),
774
775  /**
776   * Slovak.
777   */
778  SLOVAK("sk"),
779
780  /**
781   * Slovenian.
782   */
783  SLOVENIAN("sl"),
784
785  /**
786   * Somali.
787   */
788  SOMALI("so"),
789
790  /**
791   * South Ndebele.
792   */
793  SOUTH_NDEBELE("nr"),
794
795  /**
796   * Southern Sotho.
797   */
798  SOUTHERN_SOTHO("st"),
799
800  /**
801   * Spanish.
802   */
803  SPANISH("es"),
804
805  /**
806   * Sundanese.
807   */
808  SUNDANESE("su"),
809
810  /**
811   * Swahili.
812   */
813  SWAHILI("sw"),
814
815  /**
816   * Swati.
817   */
818  SWATI("ss"),
819
820  /**
821   * Swedish.
822   */
823  SWEDISH("sv"),
824
825  /**
826   * Tagalog.
827   */
828  TAGALOG("tl"),
829
830  /**
831   * Tahitian.
832   */
833  TAHITIAN("ty"),
834
835  /**
836   * Tajik.
837   */
838  TAJIK("tg"),
839
840  /**
841   * Tamil.
842   */
843  TAMIL("ta"),
844
845  /**
846   * Tatar.
847   */
848  TATAR("tt"),
849
850  /**
851   * Telugu.
852   */
853  TELUGU("te"),
854
855  /**
856   * Thai.
857   */
858  THAI("th"),
859
860  /**
861   * Tibetan.
862   */
863  TIBETAN("bo"),
864
865  /**
866   * Tigrinya.
867   */
868  TIGRINYA("ti"),
869
870  /**
871   * Tonga.
872   */
873  TONGA("to"),
874
875  /**
876   * Tsonga.
877   */
878  TSONGA("ts"),
879
880  /**
881   * Tswana.
882   */
883  TSWANA("tn"),
884
885  /**
886   * Turkish.
887   */
888  TURKISH("tr"),
889
890  /**
891   * Turkmen.
892   */
893  TURKMEN("tk"),
894
895  /**
896   * Twi.
897   */
898  TWI("tw"),
899
900  /**
901   * Uighur.
902   */
903  UIGHUR("ug"),
904
905  /**
906   * Ukrainian.
907   */
908  UKRAINIAN("uk"),
909
910  /**
911   * Urdu.
912   */
913  URDU("ur"),
914
915  /**
916   * Uzbek.
917   */
918  UZBEK("uz"),
919
920  /**
921   * Venda.
922   */
923  VENDA("ve"),
924
925  /**
926   * Vietnamese.
927   */
928  VIETNAMESE("vi"),
929
930  /**
931   * Volapük.
932   */
933  VOLAPÜK("vo"),
934
935  /**
936   * Walloon.
937   */
938  WALLOON("wa"),
939
940  /**
941   * Welsh.
942   */
943  WELSH("cy"),
944
945  /**
946   * Wolof.
947   */
948  WOLOF("wo"),
949
950  /**
951   * Xhosa.
952   */
953  XHOSA("xh"),
954
955  /**
956   * Yiddish.
957   */
958  YIDDISH("yi"),
959
960  /**
961   * Yoruba.
962   */
963  YORUBA("yo"),
964
965  /**
966   * Zhuang.
967   */
968  ZHUANG("za"),
969
970  /**
971   * Zulu.
972   */
973  ZULU("zu"),
974
975  /**
976   * Unknown or Invalid language.
977   */
978  UNKNOWN("");
979
980  public static final List<Language> LANGUAGES;
981
982  static {
983    LANGUAGES = Collections.unmodifiableList(Arrays.asList(Language.values()));
984  }
985
986  private final String code;
987
988  Language(String code) {
989    this.code = code;
990  }
991
992  /**
993   * @param code the case-insensitive 2 or 3-letter codes
994   * @return the matching language or UNKNOWN
995   */
996  public static Language fromIsoCode(String code) {
997    if (StringUtils.isEmpty(code)) {
998      return UNKNOWN;
999    }
1000    String codeLower = code.toLowerCase().trim();
1001    for (Language language : Language.values()) {
1002      if (codeLower.equals(language.getIso2LetterCode()) || codeLower.equals(
1003        language.getIso3LetterCode())) {
1004        return language;
1005      }
1006    }
1007    return UNKNOWN;
1008  }
1009
1010  /**
1011   * @return the 2-letter iso 639-1 code in lower case.
1012   */
1013  public String getIso2LetterCode() {
1014    return code;
1015  }
1016
1017  /**
1018   * @return the 3-letter iso 639-2 code in lower case.
1019   */
1020  @JsonValue
1021  public String getIso3LetterCode() {
1022    return getLocale().getISO3Language();
1023  }
1024
1025  public Locale getLocale() {
1026    return new Locale(code);
1027  }
1028
1029  /**
1030   * @return the language title in the English language.
1031   */
1032  public String getTitleEnglish() {
1033    return getLocale().getDisplayLanguage(Locale.ENGLISH);
1034  }
1035
1036  /**
1037   * @return the language title in the native language.
1038   */
1039  public String getTitleNative() {
1040    Locale loc = getLocale();
1041    return loc.getDisplayLanguage(loc);
1042  }
1043
1044  /**
1045   * Serializes the value in a 3 letter ISO format.
1046   */
1047  public static class IsoSerializer extends JsonSerializer<Language> {
1048
1049    @Override
1050    public void serialize(Language value, JsonGenerator jgen, SerializerProvider provider)
1051      throws IOException {
1052      jgen.writeString(value.getIso3LetterCode());
1053    }
1054  }
1055
1056  /**
1057   * Deserializes the value from a 3 letter ISO format or the enumeration name itself to maintain as
1058   * much backwards compatibility as possible with e.g. the registry api.
1059   */
1060  public static class LenientDeserializer extends JsonDeserializer<Language> {
1061
1062    static Language lenientParse(String value) {
1063      Language l = Language.fromIsoCode(value);
1064      // backwards compatible
1065      if (Language.UNKNOWN == l) {
1066        try {
1067          l = Language.valueOf(value);
1068        } catch (IllegalArgumentException e) {
1069          l = Language.UNKNOWN;
1070        }
1071      }
1072
1073      return l;
1074    }
1075
1076    @Override
1077    public Language deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException {
1078      try {
1079        if (jp != null && jp.getTextLength() > 0) {
1080          return lenientParse(jp.getText());
1081        } else {
1082          return Language.UNKNOWN; // none provided
1083        }
1084      } catch (Exception e) {
1085        throw new IOException(
1086          "Unable to deserialize language from provided value (hint: not an ISO 2 or 3 character?): "
1087            + jp.getText());
1088      }
1089    }
1090  }
1091
1092  public static class LenientKeyDeserializer extends KeyDeserializer {
1093
1094    @Override
1095    public Object deserializeKey(String key, DeserializationContext ctxt) throws IOException {
1096      try {
1097        if (StringUtils.isNotEmpty(key)) {
1098          return LenientDeserializer.lenientParse(key);
1099        } else {
1100          return Language.UNKNOWN; // none provided
1101        }
1102      } catch (Exception e) {
1103        throw new IOException(
1104          "Unable to deserialize language from provided value (hint: not an ISO 2 or 3 character?): "
1105            + key);
1106      }
1107    }
1108  }
1109
1110  public static class IsoKeySerializer extends JsonSerializer<Language> {
1111
1112    @Override
1113    public void serialize(
1114      Language language, JsonGenerator jsonGenerator, SerializerProvider serializerProvider)
1115      throws IOException {
1116      jsonGenerator.writeFieldName(language.getIso3LetterCode());
1117    }
1118  }
1119}