001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.api.model.checklistbank;
015
016import org.gbif.api.vocabulary.Extension;
017import org.gbif.api.vocabulary.Kingdom;
018import org.gbif.api.vocabulary.Language;
019import org.gbif.api.vocabulary.NameUsageIssue;
020import org.gbif.api.vocabulary.Origin;
021import org.gbif.api.vocabulary.Rank;
022
023import java.util.Date;
024import java.util.HashMap;
025import java.util.Map;
026import java.util.Objects;
027import java.util.StringJoiner;
028import java.util.UUID;
029
030import javax.validation.constraints.Max;
031import javax.validation.constraints.Min;
032import javax.validation.constraints.NotNull;
033import javax.validation.constraints.Past;
034
035import io.swagger.v3.oas.annotations.media.Schema;
036
037/**
038 * Simple metrics about a single, processed checklist dataset in time.
039 * The created timestamp with the dataset key should be unique and can be used to create graphs over time.
040 */
041@SuppressWarnings("unused")
042public class DatasetMetrics {
043  private int key;
044  private UUID datasetKey;
045  private int usagesCount;
046  private int synonymsCount;
047  private int distinctNamesCount;
048  private int nubMatchingCount;
049  private int colMatchingCount;
050  private int nubCoveragePct;
051  private int colCoveragePct;
052  // breakdown by constituent
053  private Map<UUID, Integer> countByConstituent = new HashMap<>();
054  // breakdown by kingdom
055  private Map<Kingdom, Integer> countByKingdom = new HashMap<>();
056  // breakdown by rank
057  private Map<Rank, Integer> countByRank = new HashMap<>();
058  // breakdown common names by language
059  private Map<Language, Integer> countNamesByLanguage = new HashMap<>();
060  // number of extension records by extension
061  private Map<Extension, Integer> countExtRecordsByExtension = new HashMap<>();
062  // breakdown by kingdom
063  private Map<Origin, Integer> countByOrigin = new HashMap<>();
064  // breakdown by issue
065  private Map<NameUsageIssue, Integer> countByIssue = new HashMap<>();
066  // any other dynamic counts
067  private Map<String, Integer> otherCount = new HashMap<>();
068  private Date created;
069  private Date downloaded;
070
071  public int getKey() {
072    return key;
073  }
074
075  public void setKey(int key) {
076    this.key = key;
077  }
078
079  /**
080   * @return percentage of distinct names that match a name in the Catalogue of Life
081   */
082  @Schema(description = "Percentage of distinct names that match a name in the Catalogue of Life.")
083  @Max(100)
084  @Min(0)
085  public int getColCoveragePct() {
086    return colCoveragePct;
087  }
088
089  public void setColCoveragePct(int colCoveragePct) {
090    this.colCoveragePct = colCoveragePct;
091  }
092
093  @Schema(description = "Count of names in each constituent dataset.")
094  @NotNull
095  public Map<UUID, Integer> getCountByConstituent() {
096    return countByConstituent;
097  }
098
099  public void setCountByConstituent(Map<UUID, Integer> countByConstituent) {
100    this.countByConstituent = countByConstituent;
101  }
102
103  @Schema(description = "Number of names in each kingdom.")
104  @NotNull
105  public Map<Kingdom, Integer> getCountByKingdom() {
106    return countByKingdom;
107  }
108
109  public void setCountByKingdom(Map<Kingdom, Integer> countByKingdom) {
110    this.countByKingdom = countByKingdom;
111  }
112
113  @Schema(description = "Number of names at each taxonomic rank.")
114  @NotNull
115  public Map<Rank, Integer> getCountByRank() {
116    return countByRank;
117  }
118
119  public void setCountByRank(Map<Rank, Integer> countByRank) {
120    this.countByRank = countByRank;
121  }
122
123  /**
124   * @return the number of distinct canonical name strings
125   */
126  @Schema(description = "Number of distinct, canonical name strings.")
127  @Min(0)
128  public int getDistinctNamesCount() {
129    return distinctNamesCount;
130  }
131
132  public void setDistinctNamesCount(int distinctNamesCount) {
133    this.distinctNamesCount = distinctNamesCount;
134  }
135
136  @Schema(description = "Number of names having extension data.")
137  @NotNull
138  public Map<Extension, Integer> getCountExtRecordsByExtension() {
139    return countExtRecordsByExtension;
140  }
141
142  public void setCountExtRecordsByExtension(Map<Extension, Integer> countExtRecordsByExtension) {
143    this.countExtRecordsByExtension = countExtRecordsByExtension;
144  }
145
146  /**
147   * @return number of records matching a name in the Catalogue of Life
148   */
149  @Schema(description = "Number of records matching a name in the Catalogue of Life.")
150  @Min(0)
151  public int getColMatchingCount() {
152    return colMatchingCount;
153  }
154
155  public void setColMatchingCount(int colMatchingCount) {
156    this.colMatchingCount = colMatchingCount;
157  }
158
159  /**
160   * @return number of records matching a name in the GBIF backbone taxonomy
161   */
162  @Schema(description = "Number of records matching a name in the GBIF Backbone Taxonomy.")
163  @Min(0)
164  public int getNubMatchingCount() {
165    return nubMatchingCount;
166  }
167
168  public void setNubMatchingCount(int nubMatchingCount) {
169    this.nubMatchingCount = nubMatchingCount;
170  }
171
172  @Schema(description = "Number of vernacular names by language.")
173  @NotNull
174  public Map<Language, Integer> getCountNamesByLanguage() {
175    return countNamesByLanguage;
176  }
177
178  public void setCountNamesByLanguage(Map<Language, Integer> countNamesByLanguage) {
179    this.countNamesByLanguage = countNamesByLanguage;
180  }
181
182  /**
183   * @return number of records with a taxonomic status of a synonym.
184   *         For occurrence records the nub taxonomy status is used
185   */
186  @Schema(description = "Number of records with a taxonomic status of synonym.")
187  @Min(0)
188  public int getSynonymsCount() {
189    return synonymsCount;
190  }
191
192  public void setSynonymsCount(int synonymsCount) {
193    this.synonymsCount = synonymsCount;
194  }
195
196  /**
197   * @return total number of name usage records in checklistbank
198   */
199  @Schema(description = "Total number of name usage records in Checklistbank.")
200  @Min(0)
201  public int getUsagesCount() {
202    return usagesCount;
203  }
204
205  public void setUsagesCount(int usagesCount) {
206    this.usagesCount = usagesCount;
207  }
208
209  /**
210   * @return map of total name usage counts by their origin
211   */
212  @Schema(description = "Total name usages by origin.")
213  @NotNull
214  public Map<Origin, Integer> getCountByOrigin() {
215    return countByOrigin;
216  }
217
218  public void setCountByOrigin(Map<Origin, Integer> countByOrigin) {
219    this.countByOrigin = countByOrigin;
220  }
221
222  /**
223   * @return map of total name usage counts by their interpretation issue
224   */
225  @Schema(description = "Total name usage counts by their interpretation issue.")
226  @NotNull
227  public Map<NameUsageIssue, Integer> getCountByIssue() {
228    return countByIssue;
229  }
230
231  public void setCountByIssue(Map<NameUsageIssue, Integer> countByIssue) {
232    this.countByIssue = countByIssue;
233  }
234
235  /**
236   * @return date this metric was generated. Roughly equivalent with date of indexing
237   */
238  @Schema(description = "Date this metric was generated. Roughly equivalent with date of indexing.")
239  @NotNull
240  @Past
241  public Date getCreated() {
242    return created;
243  }
244
245  public void setCreated(Date created) {
246    this.created = created;
247  }
248
249  /**
250   * @return date new dataset data was downloaded/harvested last time
251   */
252  @Schema(description = "Date new dataset data was downloaded/harvested last time.")
253  @NotNull
254  @Past
255  public Date getDownloaded() {
256    return downloaded;
257  }
258
259  public void setDownloaded(Date downloaded) {
260    this.downloaded = downloaded;
261  }
262
263  @Schema(description = "Dataset key.")
264  @NotNull
265  public UUID getDatasetKey() {
266    return datasetKey;
267  }
268
269  public void setDatasetKey(UUID datasetKey) {
270    this.datasetKey = datasetKey;
271  }
272
273  /**
274   * @return percentage of distinct names that match a name in the GBIF backbone taxonomy
275   */
276  @Schema(description = "Percentage of distinct names that match a name in the GBIF backbone taxonomy.")
277  @Max(100)
278  @Min(0)
279  public int getNubCoveragePct() {
280    return nubCoveragePct;
281  }
282
283  public void setNubCoveragePct(int nubCoveragePct) {
284    this.nubCoveragePct = nubCoveragePct;
285  }
286
287  /**
288   * For an occurrence dataset get the number of records that are interpreted to belong to a certain nub kingdom.
289   * For checklists the number of usages belonging to a certain nub kingdom. Note this is not the kingdom as explicitly
290   * given by the checklist, but the nub kingdom after matching the usages to the nub.
291   *
292   * @param kingdom to get metrics for
293   *
294   * @return the number of records found in the respective kingdom
295   */
296  @Min(0)
297  public int getCountByKingdom(Kingdom kingdom) {
298    return getCountFromMap(countByKingdom, kingdom);
299  }
300
301  /**
302   * Get the metrics by taxonomic rank, i.e. the lowest rank to which an occurrence record was identified
303   * or the rank of the name usage in a checklist.
304   *
305   * @param rank to get metrics for. Mayor Linnéan ranks only down to species plus INFRASPECIFIC_NAME.
306   *
307   * @return the number of records for the given taxonomic rank.
308   */
309  @Min(0)
310  public int getCountByRank(Rank rank) {
311    return getCountFromMap(countByRank, rank);
312  }
313
314  /**
315   * Get the metrics by name usage origin.
316   */
317  @Min(0)
318  public int getCountByOrigin(Origin origin) {
319    return getCountFromMap(countByOrigin, origin);
320  }
321
322  /**
323   * Get the metrics by name usage issue.
324   */
325  @Min(0)
326  public int getCountByIssue(NameUsageIssue issue) {
327    return getCountFromMap(countByIssue, issue);
328  }
329
330  /**
331   * Get the metrics for other dynamic counts.
332   */
333  @Min(0)
334  public int getOtherCount(String key) {
335    return getCountFromMap(otherCount, key);
336  }
337
338  /**
339   * Number of vernacular names in this checklist dataset by language.
340   * For occurrence datasets always 0.
341   *
342   * @param language of common names
343   *
344   * @return the number of records found for the extension
345   */
346  @Min(0)
347  public int getCountNamesByLanguage(Language language) {
348    return getCountFromMap(countNamesByLanguage, language);
349  }
350
351  /**
352   * Number of extension records found in the dataset.
353   *
354   * @return the number of records found for the extension
355   */
356  @Min(0)
357  public int getExtensionRecordCount(Extension extension) {
358    return getCountFromMap(countExtRecordsByExtension, extension);
359  }
360
361  public Map<String, Integer> getOtherCount() {
362    return otherCount;
363  }
364
365  public void setOtherCount(Map<String, Integer> otherCount) {
366    this.otherCount = otherCount;
367  }
368
369  /**
370   * @return value from map if key exists, 0 otherwise
371   */
372  private int getCountFromMap(Map<?, Integer> map, Object key) {
373    if (key == null) {
374      throw new IllegalArgumentException("Null not allowed");
375    }
376    if (map.containsKey(key)) {
377      return map.get(key);
378    }
379    return 0;
380  }
381
382  @Override
383  public boolean equals(Object o) {
384    if (this == o) {
385      return true;
386    }
387    if (o == null || getClass() != o.getClass()) {
388      return false;
389    }
390    DatasetMetrics that = (DatasetMetrics) o;
391    return key == that.key &&
392      usagesCount == that.usagesCount &&
393      synonymsCount == that.synonymsCount &&
394      distinctNamesCount == that.distinctNamesCount &&
395      nubMatchingCount == that.nubMatchingCount &&
396      colMatchingCount == that.colMatchingCount &&
397      nubCoveragePct == that.nubCoveragePct &&
398      colCoveragePct == that.colCoveragePct &&
399      Objects.equals(datasetKey, that.datasetKey) &&
400      Objects.equals(countByConstituent, that.countByConstituent) &&
401      Objects.equals(countByKingdom, that.countByKingdom) &&
402      Objects.equals(countByRank, that.countByRank) &&
403      Objects.equals(countNamesByLanguage, that.countNamesByLanguage) &&
404      Objects.equals(countExtRecordsByExtension, that.countExtRecordsByExtension) &&
405      Objects.equals(countByOrigin, that.countByOrigin) &&
406      Objects.equals(countByIssue, that.countByIssue) &&
407      Objects.equals(otherCount, that.otherCount) &&
408      Objects.equals(created, that.created) &&
409      Objects.equals(downloaded, that.downloaded);
410  }
411
412  @Override
413  public int hashCode() {
414    return Objects
415      .hash(key, datasetKey, usagesCount, synonymsCount, distinctNamesCount, nubMatchingCount,
416        colMatchingCount, nubCoveragePct, colCoveragePct, countByConstituent, countByKingdom,
417        countByRank, countNamesByLanguage, countExtRecordsByExtension, countByOrigin,
418        countByIssue, otherCount, created, downloaded);
419  }
420
421  @Override
422  public String toString() {
423    return new StringJoiner(", ", DatasetMetrics.class.getSimpleName() + "[", "]")
424      .add("key=" + key)
425      .add("datasetKey=" + datasetKey)
426      .add("usagesCount=" + usagesCount)
427      .add("synonymsCount=" + synonymsCount)
428      .add("distinctNamesCount=" + distinctNamesCount)
429      .add("nubMatchingCount=" + nubMatchingCount)
430      .add("colMatchingCount=" + colMatchingCount)
431      .add("nubCoveragePct=" + nubCoveragePct)
432      .add("colCoveragePct=" + colCoveragePct)
433      .add("countByConstituent=" + countByConstituent)
434      .add("countByKingdom=" + countByKingdom)
435      .add("countByRank=" + countByRank)
436      .add("countNamesByLanguage=" + countNamesByLanguage)
437      .add("countExtRecordsByExtension=" + countExtRecordsByExtension)
438      .add("countByOrigin=" + countByOrigin)
439      .add("countByIssue=" + countByIssue)
440      .add("otherCount=" + otherCount)
441      .add("created=" + created)
442      .add("downloaded=" + downloaded)
443      .toString();
444  }
445}