001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.api.model.checklistbank; 015 016import org.gbif.api.vocabulary.Extension; 017import org.gbif.api.vocabulary.Kingdom; 018import org.gbif.api.vocabulary.Language; 019import org.gbif.api.vocabulary.NameUsageIssue; 020import org.gbif.api.vocabulary.Origin; 021import org.gbif.api.vocabulary.Rank; 022 023import java.util.Date; 024import java.util.HashMap; 025import java.util.Map; 026import java.util.Objects; 027import java.util.StringJoiner; 028import java.util.UUID; 029 030import javax.validation.constraints.Max; 031import javax.validation.constraints.Min; 032import javax.validation.constraints.NotNull; 033import javax.validation.constraints.Past; 034 035import io.swagger.v3.oas.annotations.media.Schema; 036 037/** 038 * Simple metrics about a single, processed checklist dataset in time. 039 * The created timestamp with the dataset key should be unique and can be used to create graphs over time. 040 */ 041@SuppressWarnings("unused") 042public class DatasetMetrics { 043 private int key; 044 private UUID datasetKey; 045 private int usagesCount; 046 private int synonymsCount; 047 private int distinctNamesCount; 048 private int nubMatchingCount; 049 private int colMatchingCount; 050 private int nubCoveragePct; 051 private int colCoveragePct; 052 // breakdown by constituent 053 private Map<UUID, Integer> countByConstituent = new HashMap<>(); 054 // breakdown by kingdom 055 private Map<Kingdom, Integer> countByKingdom = new HashMap<>(); 056 // breakdown by rank 057 private Map<Rank, Integer> countByRank = new HashMap<>(); 058 // breakdown common names by language 059 private Map<Language, Integer> countNamesByLanguage = new HashMap<>(); 060 // number of extension records by extension 061 private Map<Extension, Integer> countExtRecordsByExtension = new HashMap<>(); 062 // breakdown by kingdom 063 private Map<Origin, Integer> countByOrigin = new HashMap<>(); 064 // breakdown by issue 065 private Map<NameUsageIssue, Integer> countByIssue = new HashMap<>(); 066 // any other dynamic counts 067 private Map<String, Integer> otherCount = new HashMap<>(); 068 private Date created; 069 private Date downloaded; 070 071 public int getKey() { 072 return key; 073 } 074 075 public void setKey(int key) { 076 this.key = key; 077 } 078 079 /** 080 * @return percentage of distinct names that match a name in the Catalogue of Life 081 */ 082 @Schema(description = "Percentage of distinct names that match a name in the Catalogue of Life.") 083 @Max(100) 084 @Min(0) 085 public int getColCoveragePct() { 086 return colCoveragePct; 087 } 088 089 public void setColCoveragePct(int colCoveragePct) { 090 this.colCoveragePct = colCoveragePct; 091 } 092 093 @Schema(description = "Count of names in each constituent dataset.") 094 @NotNull 095 public Map<UUID, Integer> getCountByConstituent() { 096 return countByConstituent; 097 } 098 099 public void setCountByConstituent(Map<UUID, Integer> countByConstituent) { 100 this.countByConstituent = countByConstituent; 101 } 102 103 @Schema(description = "Number of names in each kingdom.") 104 @NotNull 105 public Map<Kingdom, Integer> getCountByKingdom() { 106 return countByKingdom; 107 } 108 109 public void setCountByKingdom(Map<Kingdom, Integer> countByKingdom) { 110 this.countByKingdom = countByKingdom; 111 } 112 113 @Schema(description = "Number of names at each taxonomic rank.") 114 @NotNull 115 public Map<Rank, Integer> getCountByRank() { 116 return countByRank; 117 } 118 119 public void setCountByRank(Map<Rank, Integer> countByRank) { 120 this.countByRank = countByRank; 121 } 122 123 /** 124 * @return the number of distinct canonical name strings 125 */ 126 @Schema(description = "Number of distinct, canonical name strings.") 127 @Min(0) 128 public int getDistinctNamesCount() { 129 return distinctNamesCount; 130 } 131 132 public void setDistinctNamesCount(int distinctNamesCount) { 133 this.distinctNamesCount = distinctNamesCount; 134 } 135 136 @Schema(description = "Number of names having extension data.") 137 @NotNull 138 public Map<Extension, Integer> getCountExtRecordsByExtension() { 139 return countExtRecordsByExtension; 140 } 141 142 public void setCountExtRecordsByExtension(Map<Extension, Integer> countExtRecordsByExtension) { 143 this.countExtRecordsByExtension = countExtRecordsByExtension; 144 } 145 146 /** 147 * @return number of records matching a name in the Catalogue of Life 148 */ 149 @Schema(description = "Number of records matching a name in the Catalogue of Life.") 150 @Min(0) 151 public int getColMatchingCount() { 152 return colMatchingCount; 153 } 154 155 public void setColMatchingCount(int colMatchingCount) { 156 this.colMatchingCount = colMatchingCount; 157 } 158 159 /** 160 * @return number of records matching a name in the GBIF backbone taxonomy 161 */ 162 @Schema(description = "Number of records matching a name in the GBIF Backbone Taxonomy.") 163 @Min(0) 164 public int getNubMatchingCount() { 165 return nubMatchingCount; 166 } 167 168 public void setNubMatchingCount(int nubMatchingCount) { 169 this.nubMatchingCount = nubMatchingCount; 170 } 171 172 @Schema(description = "Number of vernacular names by language.") 173 @NotNull 174 public Map<Language, Integer> getCountNamesByLanguage() { 175 return countNamesByLanguage; 176 } 177 178 public void setCountNamesByLanguage(Map<Language, Integer> countNamesByLanguage) { 179 this.countNamesByLanguage = countNamesByLanguage; 180 } 181 182 /** 183 * @return number of records with a taxonomic status of a synonym. 184 * For occurrence records the nub taxonomy status is used 185 */ 186 @Schema(description = "Number of records with a taxonomic status of synonym.") 187 @Min(0) 188 public int getSynonymsCount() { 189 return synonymsCount; 190 } 191 192 public void setSynonymsCount(int synonymsCount) { 193 this.synonymsCount = synonymsCount; 194 } 195 196 /** 197 * @return total number of name usage records in checklistbank 198 */ 199 @Schema(description = "Total number of name usage records in Checklistbank.") 200 @Min(0) 201 public int getUsagesCount() { 202 return usagesCount; 203 } 204 205 public void setUsagesCount(int usagesCount) { 206 this.usagesCount = usagesCount; 207 } 208 209 /** 210 * @return map of total name usage counts by their origin 211 */ 212 @Schema(description = "Total name usages by origin.") 213 @NotNull 214 public Map<Origin, Integer> getCountByOrigin() { 215 return countByOrigin; 216 } 217 218 public void setCountByOrigin(Map<Origin, Integer> countByOrigin) { 219 this.countByOrigin = countByOrigin; 220 } 221 222 /** 223 * @return map of total name usage counts by their interpretation issue 224 */ 225 @Schema(description = "Total name usage counts by their interpretation issue.") 226 @NotNull 227 public Map<NameUsageIssue, Integer> getCountByIssue() { 228 return countByIssue; 229 } 230 231 public void setCountByIssue(Map<NameUsageIssue, Integer> countByIssue) { 232 this.countByIssue = countByIssue; 233 } 234 235 /** 236 * @return date this metric was generated. Roughly equivalent with date of indexing 237 */ 238 @Schema(description = "Date this metric was generated. Roughly equivalent with date of indexing.") 239 @NotNull 240 @Past 241 public Date getCreated() { 242 return created; 243 } 244 245 public void setCreated(Date created) { 246 this.created = created; 247 } 248 249 /** 250 * @return date new dataset data was downloaded/harvested last time 251 */ 252 @Schema(description = "Date new dataset data was downloaded/harvested last time.") 253 @NotNull 254 @Past 255 public Date getDownloaded() { 256 return downloaded; 257 } 258 259 public void setDownloaded(Date downloaded) { 260 this.downloaded = downloaded; 261 } 262 263 @Schema(description = "Dataset key.") 264 @NotNull 265 public UUID getDatasetKey() { 266 return datasetKey; 267 } 268 269 public void setDatasetKey(UUID datasetKey) { 270 this.datasetKey = datasetKey; 271 } 272 273 /** 274 * @return percentage of distinct names that match a name in the GBIF backbone taxonomy 275 */ 276 @Schema(description = "Percentage of distinct names that match a name in the GBIF backbone taxonomy.") 277 @Max(100) 278 @Min(0) 279 public int getNubCoveragePct() { 280 return nubCoveragePct; 281 } 282 283 public void setNubCoveragePct(int nubCoveragePct) { 284 this.nubCoveragePct = nubCoveragePct; 285 } 286 287 /** 288 * For an occurrence dataset get the number of records that are interpreted to belong to a certain nub kingdom. 289 * For checklists the number of usages belonging to a certain nub kingdom. Note this is not the kingdom as explicitly 290 * given by the checklist, but the nub kingdom after matching the usages to the nub. 291 * 292 * @param kingdom to get metrics for 293 * 294 * @return the number of records found in the respective kingdom 295 */ 296 @Min(0) 297 public int getCountByKingdom(Kingdom kingdom) { 298 return getCountFromMap(countByKingdom, kingdom); 299 } 300 301 /** 302 * Get the metrics by taxonomic rank, i.e. the lowest rank to which an occurrence record was identified 303 * or the rank of the name usage in a checklist. 304 * 305 * @param rank to get metrics for. Mayor Linnéan ranks only down to species plus INFRASPECIFIC_NAME. 306 * 307 * @return the number of records for the given taxonomic rank. 308 */ 309 @Min(0) 310 public int getCountByRank(Rank rank) { 311 return getCountFromMap(countByRank, rank); 312 } 313 314 /** 315 * Get the metrics by name usage origin. 316 */ 317 @Min(0) 318 public int getCountByOrigin(Origin origin) { 319 return getCountFromMap(countByOrigin, origin); 320 } 321 322 /** 323 * Get the metrics by name usage issue. 324 */ 325 @Min(0) 326 public int getCountByIssue(NameUsageIssue issue) { 327 return getCountFromMap(countByIssue, issue); 328 } 329 330 /** 331 * Get the metrics for other dynamic counts. 332 */ 333 @Min(0) 334 public int getOtherCount(String key) { 335 return getCountFromMap(otherCount, key); 336 } 337 338 /** 339 * Number of vernacular names in this checklist dataset by language. 340 * For occurrence datasets always 0. 341 * 342 * @param language of common names 343 * 344 * @return the number of records found for the extension 345 */ 346 @Min(0) 347 public int getCountNamesByLanguage(Language language) { 348 return getCountFromMap(countNamesByLanguage, language); 349 } 350 351 /** 352 * Number of extension records found in the dataset. 353 * 354 * @return the number of records found for the extension 355 */ 356 @Min(0) 357 public int getExtensionRecordCount(Extension extension) { 358 return getCountFromMap(countExtRecordsByExtension, extension); 359 } 360 361 public Map<String, Integer> getOtherCount() { 362 return otherCount; 363 } 364 365 public void setOtherCount(Map<String, Integer> otherCount) { 366 this.otherCount = otherCount; 367 } 368 369 /** 370 * @return value from map if key exists, 0 otherwise 371 */ 372 private int getCountFromMap(Map<?, Integer> map, Object key) { 373 if (key == null) { 374 throw new IllegalArgumentException("Null not allowed"); 375 } 376 if (map.containsKey(key)) { 377 return map.get(key); 378 } 379 return 0; 380 } 381 382 @Override 383 public boolean equals(Object o) { 384 if (this == o) { 385 return true; 386 } 387 if (o == null || getClass() != o.getClass()) { 388 return false; 389 } 390 DatasetMetrics that = (DatasetMetrics) o; 391 return key == that.key && 392 usagesCount == that.usagesCount && 393 synonymsCount == that.synonymsCount && 394 distinctNamesCount == that.distinctNamesCount && 395 nubMatchingCount == that.nubMatchingCount && 396 colMatchingCount == that.colMatchingCount && 397 nubCoveragePct == that.nubCoveragePct && 398 colCoveragePct == that.colCoveragePct && 399 Objects.equals(datasetKey, that.datasetKey) && 400 Objects.equals(countByConstituent, that.countByConstituent) && 401 Objects.equals(countByKingdom, that.countByKingdom) && 402 Objects.equals(countByRank, that.countByRank) && 403 Objects.equals(countNamesByLanguage, that.countNamesByLanguage) && 404 Objects.equals(countExtRecordsByExtension, that.countExtRecordsByExtension) && 405 Objects.equals(countByOrigin, that.countByOrigin) && 406 Objects.equals(countByIssue, that.countByIssue) && 407 Objects.equals(otherCount, that.otherCount) && 408 Objects.equals(created, that.created) && 409 Objects.equals(downloaded, that.downloaded); 410 } 411 412 @Override 413 public int hashCode() { 414 return Objects 415 .hash(key, datasetKey, usagesCount, synonymsCount, distinctNamesCount, nubMatchingCount, 416 colMatchingCount, nubCoveragePct, colCoveragePct, countByConstituent, countByKingdom, 417 countByRank, countNamesByLanguage, countExtRecordsByExtension, countByOrigin, 418 countByIssue, otherCount, created, downloaded); 419 } 420 421 @Override 422 public String toString() { 423 return new StringJoiner(", ", DatasetMetrics.class.getSimpleName() + "[", "]") 424 .add("key=" + key) 425 .add("datasetKey=" + datasetKey) 426 .add("usagesCount=" + usagesCount) 427 .add("synonymsCount=" + synonymsCount) 428 .add("distinctNamesCount=" + distinctNamesCount) 429 .add("nubMatchingCount=" + nubMatchingCount) 430 .add("colMatchingCount=" + colMatchingCount) 431 .add("nubCoveragePct=" + nubCoveragePct) 432 .add("colCoveragePct=" + colCoveragePct) 433 .add("countByConstituent=" + countByConstituent) 434 .add("countByKingdom=" + countByKingdom) 435 .add("countByRank=" + countByRank) 436 .add("countNamesByLanguage=" + countNamesByLanguage) 437 .add("countExtRecordsByExtension=" + countExtRecordsByExtension) 438 .add("countByOrigin=" + countByOrigin) 439 .add("countByIssue=" + countByIssue) 440 .add("otherCount=" + otherCount) 441 .add("created=" + created) 442 .add("downloaded=" + downloaded) 443 .toString(); 444 } 445}