001/* 002 * Copyright 2020 Global Biodiversity Information Facility (GBIF) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.gbif.api.model.crawler; 017 018import java.util.Date; 019import java.util.Objects; 020import java.util.StringJoiner; 021import java.util.UUID; 022 023import javax.annotation.Nullable; 024import javax.validation.constraints.Min; 025 026import static org.gbif.api.util.PreconditionUtils.checkArgument; 027 028/** 029 * Information about a dataset that is currently being processed. That usually means that we are crawling it at the 030 * moment or are in the process of persisting and interpreting its occurrences. 031 */ 032@SuppressWarnings("unused") 033public class DatasetProcessStatus { 034 035 private UUID datasetKey; 036 private CrawlJob crawlJob; 037 private Date startedCrawling; 038 private Date finishedCrawling; 039 private String crawlContext; 040 private FinishReason finishReason; 041 private ProcessState processStateOccurrence; 042 private ProcessState processStateChecklist; 043 private ProcessState processStateSample; 044 045 // Long instead of Optional<Long> because of JSON serialization issues, Jackson doesn't honor the NON_NULL setting 046 // for the Guava extension 047 private Long declaredCount; 048 049 private long pagesCrawled; 050 051 private long pagesFragmentedSuccessful; 052 private long pagesFragmentedError; 053 private long fragmentsEmitted; 054 055 private long fragmentsReceived; 056 private long rawOccurrencesPersistedNew; 057 private long rawOccurrencesPersistedUpdated; 058 private long rawOccurrencesPersistedUnchanged; 059 private long rawOccurrencesPersistedError; 060 private long fragmentsProcessed; 061 062 private long verbatimOccurrencesPersistedSuccessful; 063 private long verbatimOccurrencesPersistedError; 064 065 private long interpretedOccurrencesPersistedSuccessful; 066 private long interpretedOccurrencesPersistedError; 067 068 public static Builder builder() { 069 return new Builder(); 070 } 071 072 public DatasetProcessStatus() { 073 // This constructor is needed for Jackson deserialization 074 } 075 076 /** 077 * Builds a new object from the builder validating it in the process. 078 * <p/> 079 * We only validate very little (all counts have to be greater than or equal to zero, a few null checks etc.) but not 080 */ 081 public DatasetProcessStatus(Builder builder) { 082 Objects.requireNonNull(builder, "builder can't be null"); 083 084 datasetKey = Objects.requireNonNull(builder.datasetKey, "datasetKey can't be null"); 085 crawlJob = Objects.requireNonNull(builder.crawlJob, "crawlJob can't be null"); 086 startedCrawling = builder.startedCrawling; 087 finishedCrawling = builder.finishedCrawling; 088 finishReason = builder.finishReason; 089 processStateOccurrence = builder.processStateOccurrence; 090 processStateChecklist = builder.processStateChecklist; 091 processStateSample = builder.processStateSample; 092 crawlContext = builder.crawlContext; 093 094 declaredCount = builder.declaredCount; 095 096 pagesCrawled = builder.pagesCrawled; 097 098 pagesFragmentedSuccessful = builder.pagesFragmentedSuccessful; 099 pagesFragmentedError = builder.pagesFragmentedError; 100 fragmentsEmitted = builder.fragmentsEmitted; 101 102 fragmentsReceived = builder.fragmentsReceived; 103 rawOccurrencesPersistedNew = builder.rawOccurrencesPersistedNew; 104 rawOccurrencesPersistedUpdated = builder.rawOccurrencesPersistedUpdated; 105 rawOccurrencesPersistedUnchanged = builder.rawOccurrencesPersistedUnchanged; 106 rawOccurrencesPersistedError = builder.rawOccurrencesPersistedError; 107 fragmentsProcessed = builder.fragmentsProcessed; 108 109 verbatimOccurrencesPersistedSuccessful = builder.verbatimOccurrencesPersistedSuccessful; 110 verbatimOccurrencesPersistedError = builder.verbatimOccurrencesPersistedError; 111 112 interpretedOccurrencesPersistedSuccessful = builder.interpretedOccurrencesPersistedSuccessful; 113 interpretedOccurrencesPersistedError = builder.interpretedOccurrencesPersistedError; 114 115 checkArgument(declaredCount == null || declaredCount >= 0, 116 "declaredCount must be either null or greater than or equal to zero"); 117 118 checkArgument(pagesCrawled >= 0, "pagesCrawled has to be greater than or equal to zero"); 119 120 checkArgument(pagesFragmentedSuccessful >= 0, "pagesFragmentedSuccessful"); 121 checkArgument(pagesFragmentedError >= 0, "pagesFragmentedError has to be greater than or equal to zero"); 122 checkArgument(fragmentsEmitted >= 0, "fragmentsEmitted has to be greater than or equal to zero"); 123 124 checkArgument(fragmentsReceived >= 0, "fragmentsReceived has to be greater than or equal to zero"); 125 checkArgument(rawOccurrencesPersistedNew >= 0, 126 "rawOccurrencesPersistedNew has to be greater than or equal to zero"); 127 checkArgument(rawOccurrencesPersistedUpdated >= 0, 128 "rawOccurrencesPersistedUpdated has to be greater than or equal to zero"); 129 checkArgument(rawOccurrencesPersistedUnchanged >= 0, 130 "rawOccurrencesPersistedUnchanged has to be greater than or equal to zero"); 131 checkArgument(rawOccurrencesPersistedError >= 0, 132 "rawOccurrencesPersistedError has to be greater than or equal to zero"); 133 checkArgument(fragmentsProcessed >= 0, "fragmentsProcessed has to be greater than or equal to zero"); 134 135 checkArgument(verbatimOccurrencesPersistedSuccessful >= 0, 136 "verbatimOccurrencesPersistedSuccessful has to be greater than or equal to zero"); 137 checkArgument(verbatimOccurrencesPersistedError >= 0, 138 "verbatimOccurrencesPersistedError has to be greater than or equal to zero"); 139 140 checkArgument(interpretedOccurrencesPersistedSuccessful >= 0, 141 "interpretedOccurrencesPersistedSuccessful has to be greater than or equal to zero"); 142 checkArgument(interpretedOccurrencesPersistedError >= 0, 143 "interpretedOccurrencesPersistedError has to be greater than or equal to zero"); 144 } 145 146 /** 147 * Last successful crawl context, this is a JSON string. 148 * 149 * @return the last successful crawl context, this is a JSON string 150 */ 151 @Nullable 152 public String getCrawlContext() { 153 return crawlContext; 154 } 155 156 public CrawlJob getCrawlJob() { 157 return crawlJob; 158 } 159 160 /** 161 * Key that identifies the Dataset. 162 * 163 * @return the UUID key that identifies the dataset 164 */ 165 public UUID getDatasetKey() { 166 return datasetKey; 167 } 168 169 @Nullable 170 public Long getDeclaredCount() { 171 return declaredCount; 172 } 173 174 /** 175 * Timestamp of when the crawl was finished. 176 * 177 * @return the timestamp when the crawl finished 178 */ 179 @Nullable 180 public Date getFinishedCrawling() { 181 return finishedCrawling; 182 } 183 184 /** 185 * The reason a crawl is finished. Will be {@code null} if {@link #getFinishedCrawling()} returns {@code null}. 186 * 187 * @return the reason the crawl finished 188 */ 189 @Nullable 190 public FinishReason getFinishReason() { 191 return finishReason; 192 } 193 194 @Nullable 195 public ProcessState getProcessStateOccurrence() { 196 return processStateOccurrence; 197 } 198 199 @Nullable 200 public ProcessState getProcessStateChecklist() { 201 return processStateChecklist; 202 } 203 204 @Nullable 205 public ProcessState getProcessStateSample() { 206 return processStateSample; 207 } 208 209 @Min(0) 210 public long getFragmentsEmitted() { 211 return fragmentsEmitted; 212 } 213 214 /** 215 * Number of fragments that have been processed. 216 * 217 * @return the number of fragments that have been processed 218 */ 219 @Min(0) 220 public long getFragmentsProcessed() { 221 return fragmentsProcessed; 222 } 223 224 @Min(0) 225 public long getFragmentsReceived() { 226 return fragmentsReceived; 227 } 228 229 @Min(0) 230 public long getInterpretedOccurrencesPersistedError() { 231 return interpretedOccurrencesPersistedError; 232 } 233 234 @Min(0) 235 public long getInterpretedOccurrencesPersistedSuccessful() { 236 return interpretedOccurrencesPersistedSuccessful; 237 } 238 239 /** 240 * Number of pages crawled in total. 241 * 242 * @return number of pages crawled 243 */ 244 @Min(0) 245 public long getPagesCrawled() { 246 return pagesCrawled; 247 } 248 249 @Min(0) 250 public long getPagesFragmentedError() { 251 return pagesFragmentedError; 252 } 253 254 /** 255 * Number of pages that have been fragmented. 256 * 257 * @return the number of pages that have been fragmented 258 */ 259 @Min(0) 260 public long getPagesFragmentedSuccessful() { 261 return pagesFragmentedSuccessful; 262 } 263 264 @Min(0) 265 public long getRawOccurrencesPersistedError() { 266 return rawOccurrencesPersistedError; 267 } 268 269 @Min(0) 270 public long getRawOccurrencesPersistedNew() { 271 return rawOccurrencesPersistedNew; 272 } 273 274 @Min(0) 275 public long getRawOccurrencesPersistedUnchanged() { 276 return rawOccurrencesPersistedUnchanged; 277 } 278 279 @Min(0) 280 public long getRawOccurrencesPersistedUpdated() { 281 return rawOccurrencesPersistedUpdated; 282 } 283 284 /** 285 * Timestamp of when the crawl was actually started by a crawler. 286 * 287 * @return the timestamp when the crawl started 288 */ 289 @Nullable 290 public Date getStartedCrawling() { 291 return startedCrawling; 292 } 293 294 @Min(0) 295 public long getVerbatimOccurrencesPersistedError() { 296 return verbatimOccurrencesPersistedError; 297 } 298 299 @Min(0) 300 public long getVerbatimOccurrencesPersistedSuccessful() { 301 return verbatimOccurrencesPersistedSuccessful; 302 } 303 304 305 public void setDatasetKey(UUID datasetKey) { 306 this.datasetKey = datasetKey; 307 } 308 309 310 public void setCrawlJob(CrawlJob crawlJob) { 311 this.crawlJob = crawlJob; 312 } 313 314 315 public void setStartedCrawling(Date startedCrawling) { 316 this.startedCrawling = startedCrawling; 317 } 318 319 320 public void setFinishedCrawling(Date finishedCrawling) { 321 this.finishedCrawling = finishedCrawling; 322 } 323 324 325 public void setCrawlContext(String crawlContext) { 326 this.crawlContext = crawlContext; 327 } 328 329 330 public void setFinishReason(FinishReason finishReason) { 331 this.finishReason = finishReason; 332 } 333 334 public void setProcessStateOccurrence(ProcessState processStateOccurrence) { 335 this.processStateOccurrence = processStateOccurrence; 336 } 337 338 public void setProcessStateChecklist(ProcessState processStateChecklist) { 339 this.processStateChecklist = processStateChecklist; 340 } 341 342 public void setProcessStateSample(ProcessState processStateSample) { 343 this.processStateSample = processStateSample; 344 } 345 346 public void setDeclaredCount(Long declaredCount) { 347 this.declaredCount = declaredCount; 348 } 349 350 351 public void setPagesCrawled(long pagesCrawled) { 352 this.pagesCrawled = pagesCrawled; 353 } 354 355 356 public void setPagesFragmentedSuccessful(long pagesFragmentedSuccessful) { 357 this.pagesFragmentedSuccessful = pagesFragmentedSuccessful; 358 } 359 360 361 public void setPagesFragmentedError(long pagesFragmentedError) { 362 this.pagesFragmentedError = pagesFragmentedError; 363 } 364 365 366 public void setFragmentsEmitted(long fragmentsEmitted) { 367 this.fragmentsEmitted = fragmentsEmitted; 368 } 369 370 371 public void setFragmentsReceived(long fragmentsReceived) { 372 this.fragmentsReceived = fragmentsReceived; 373 } 374 375 376 public void setRawOccurrencesPersistedNew(long rawOccurrencesPersistedNew) { 377 this.rawOccurrencesPersistedNew = rawOccurrencesPersistedNew; 378 } 379 380 381 public void setRawOccurrencesPersistedUpdated(long rawOccurrencesPersistedUpdated) { 382 this.rawOccurrencesPersistedUpdated = rawOccurrencesPersistedUpdated; 383 } 384 385 386 public void setRawOccurrencesPersistedUnchanged(long rawOccurrencesPersistedUnchanged) { 387 this.rawOccurrencesPersistedUnchanged = rawOccurrencesPersistedUnchanged; 388 } 389 390 391 public void setRawOccurrencesPersistedError(long rawOccurrencesPersistedError) { 392 this.rawOccurrencesPersistedError = rawOccurrencesPersistedError; 393 } 394 395 396 public void setFragmentsProcessed(long fragmentsProcessed) { 397 this.fragmentsProcessed = fragmentsProcessed; 398 } 399 400 401 public void setVerbatimOccurrencesPersistedSuccessful(long verbatimOccurrencesPersistedSuccessful) { 402 this.verbatimOccurrencesPersistedSuccessful = verbatimOccurrencesPersistedSuccessful; 403 } 404 405 406 public void setVerbatimOccurrencesPersistedError(long verbatimOccurrencesPersistedError) { 407 this.verbatimOccurrencesPersistedError = verbatimOccurrencesPersistedError; 408 } 409 410 411 public void setInterpretedOccurrencesPersistedSuccessful(long interpretedOccurrencesPersistedSuccessful) { 412 this.interpretedOccurrencesPersistedSuccessful = interpretedOccurrencesPersistedSuccessful; 413 } 414 415 416 public void setInterpretedOccurrencesPersistedError(long interpretedOccurrencesPersistedError) { 417 this.interpretedOccurrencesPersistedError = interpretedOccurrencesPersistedError; 418 } 419 420 @Override 421 public boolean equals(Object o) { 422 if (this == o) { 423 return true; 424 } 425 if (o == null || getClass() != o.getClass()) { 426 return false; 427 } 428 DatasetProcessStatus that = (DatasetProcessStatus) o; 429 return pagesCrawled == that.pagesCrawled && 430 pagesFragmentedSuccessful == that.pagesFragmentedSuccessful && 431 pagesFragmentedError == that.pagesFragmentedError && 432 fragmentsEmitted == that.fragmentsEmitted && 433 fragmentsReceived == that.fragmentsReceived && 434 rawOccurrencesPersistedNew == that.rawOccurrencesPersistedNew && 435 rawOccurrencesPersistedUpdated == that.rawOccurrencesPersistedUpdated && 436 rawOccurrencesPersistedUnchanged == that.rawOccurrencesPersistedUnchanged && 437 rawOccurrencesPersistedError == that.rawOccurrencesPersistedError && 438 fragmentsProcessed == that.fragmentsProcessed && 439 verbatimOccurrencesPersistedSuccessful == that.verbatimOccurrencesPersistedSuccessful && 440 verbatimOccurrencesPersistedError == that.verbatimOccurrencesPersistedError && 441 interpretedOccurrencesPersistedSuccessful == that.interpretedOccurrencesPersistedSuccessful && 442 interpretedOccurrencesPersistedError == that.interpretedOccurrencesPersistedError && 443 Objects.equals(datasetKey, that.datasetKey) && 444 Objects.equals(crawlJob, that.crawlJob) && 445 Objects.equals(startedCrawling, that.startedCrawling) && 446 Objects.equals(finishedCrawling, that.finishedCrawling) && 447 Objects.equals(crawlContext, that.crawlContext) && 448 finishReason == that.finishReason && 449 processStateOccurrence == that.processStateOccurrence && 450 processStateChecklist == that.processStateChecklist && 451 processStateSample == that.processStateSample && 452 Objects.equals(declaredCount, that.declaredCount); 453 } 454 455 @Override 456 public int hashCode() { 457 return Objects 458 .hash(datasetKey, crawlJob, startedCrawling, finishedCrawling, crawlContext, finishReason, 459 processStateOccurrence, processStateChecklist, processStateSample, declaredCount, 460 pagesCrawled, pagesFragmentedSuccessful, pagesFragmentedError, fragmentsEmitted, 461 fragmentsReceived, rawOccurrencesPersistedNew, rawOccurrencesPersistedUpdated, 462 rawOccurrencesPersistedUnchanged, rawOccurrencesPersistedError, fragmentsProcessed, 463 verbatimOccurrencesPersistedSuccessful, verbatimOccurrencesPersistedError, 464 interpretedOccurrencesPersistedSuccessful, interpretedOccurrencesPersistedError); 465 } 466 467 @Override 468 public String toString() { 469 return new StringJoiner(", ", DatasetProcessStatus.class.getSimpleName() + "[", "]") 470 .add("datasetKey=" + datasetKey) 471 .add("crawlJob=" + crawlJob) 472 .add("startedCrawling=" + startedCrawling) 473 .add("finishedCrawling=" + finishedCrawling) 474 .add("crawlContext='" + crawlContext + "'") 475 .add("finishReason=" + finishReason) 476 .add("processStateOccurrence=" + processStateOccurrence) 477 .add("processStateChecklist=" + processStateChecklist) 478 .add("processStateSample=" + processStateSample) 479 .add("declaredCount=" + declaredCount) 480 .add("pagesCrawled=" + pagesCrawled) 481 .add("pagesFragmentedSuccessful=" + pagesFragmentedSuccessful) 482 .add("pagesFragmentedError=" + pagesFragmentedError) 483 .add("fragmentsEmitted=" + fragmentsEmitted) 484 .add("fragmentsReceived=" + fragmentsReceived) 485 .add("rawOccurrencesPersistedNew=" + rawOccurrencesPersistedNew) 486 .add("rawOccurrencesPersistedUpdated=" + rawOccurrencesPersistedUpdated) 487 .add("rawOccurrencesPersistedUnchanged=" + rawOccurrencesPersistedUnchanged) 488 .add("rawOccurrencesPersistedError=" + rawOccurrencesPersistedError) 489 .add("fragmentsProcessed=" + fragmentsProcessed) 490 .add("verbatimOccurrencesPersistedSuccessful=" + verbatimOccurrencesPersistedSuccessful) 491 .add("verbatimOccurrencesPersistedError=" + verbatimOccurrencesPersistedError) 492 .add("interpretedOccurrencesPersistedSuccessful=" + interpretedOccurrencesPersistedSuccessful) 493 .add("interpretedOccurrencesPersistedError=" + interpretedOccurrencesPersistedError) 494 .toString(); 495 } 496 497 public static class Builder { 498 499 private UUID datasetKey; 500 private CrawlJob crawlJob; 501 private Date startedCrawling; 502 private Date finishedCrawling; 503 private String crawlContext; 504 private FinishReason finishReason; 505 private ProcessState processStateOccurrence; 506 private ProcessState processStateChecklist; 507 private ProcessState processStateSample; 508 private Long declaredCount; 509 private long pagesCrawled; 510 private long pagesFragmentedSuccessful; 511 private long pagesFragmentedError; 512 private long fragmentsEmitted; 513 private long fragmentsReceived; 514 private long rawOccurrencesPersistedNew; 515 private long rawOccurrencesPersistedUpdated; 516 private long rawOccurrencesPersistedUnchanged; 517 private long rawOccurrencesPersistedError; 518 private long fragmentsProcessed; 519 private long verbatimOccurrencesPersistedSuccessful; 520 private long verbatimOccurrencesPersistedError; 521 private long interpretedOccurrencesPersistedSuccessful; 522 private long interpretedOccurrencesPersistedError; 523 524 public DatasetProcessStatus build() { 525 return new DatasetProcessStatus(this); 526 } 527 528 public Builder crawlContext(String crawlContext) { 529 this.crawlContext = crawlContext; 530 return this; 531 } 532 533 public Builder crawlJob(CrawlJob crawlJob) { 534 this.crawlJob = crawlJob; 535 return this; 536 } 537 538 public Builder datasetKey(UUID datasetKey) { 539 this.datasetKey = datasetKey; 540 return this; 541 } 542 543 public Builder declaredCount(Long declaredCount) { 544 this.declaredCount = declaredCount; 545 return this; 546 } 547 548 public Builder finishedCrawling(Date finishedCrawling) { 549 this.finishedCrawling = finishedCrawling; 550 return this; 551 } 552 553 public Builder finishReason(FinishReason finishReason) { 554 this.finishReason = finishReason; 555 return this; 556 } 557 558 public Builder processStateOccurrence(ProcessState processStateOccurrence) { 559 this.processStateOccurrence = processStateOccurrence; 560 return this; 561 } 562 563 public Builder processStateChecklist(ProcessState processStateChecklist) { 564 this.processStateChecklist = processStateChecklist; 565 return this; 566 } 567 568 public Builder processStateSample(ProcessState processStateSample) { 569 this.processStateSample = processStateSample; 570 return this; 571 } 572 573 public Builder fragmentsEmitted(long fragmentsEmitted) { 574 this.fragmentsEmitted = fragmentsEmitted; 575 return this; 576 } 577 578 public Builder fragmentsProcessed(long fragmentsProcessed) { 579 this.fragmentsProcessed = fragmentsProcessed; 580 return this; 581 } 582 583 public Builder fragmentsReceived(long fragmentsReceived) { 584 this.fragmentsReceived = fragmentsReceived; 585 return this; 586 } 587 588 public Builder interpretedOccurrencesPersistedError(long interpretedOccurrencesPersistedError) { 589 this.interpretedOccurrencesPersistedError = interpretedOccurrencesPersistedError; 590 return this; 591 } 592 593 public Builder interpretedOccurrencesPersistedSuccessful(long interpretedOccurrencesPersistedSuccessful) { 594 this.interpretedOccurrencesPersistedSuccessful = interpretedOccurrencesPersistedSuccessful; 595 return this; 596 } 597 598 public Builder pagesCrawled(long pagesCrawled) { 599 this.pagesCrawled = pagesCrawled; 600 return this; 601 } 602 603 public Builder pagesFragmentedError(long pagesFragmentedError) { 604 this.pagesFragmentedError = pagesFragmentedError; 605 return this; 606 } 607 608 public Builder pagesFragmentedSuccessful(long pagesFragmentedSuccessful) { 609 this.pagesFragmentedSuccessful = pagesFragmentedSuccessful; 610 return this; 611 } 612 613 public Builder rawOccurrencesPersistedError(long rawOccurrencesPersistedError) { 614 this.rawOccurrencesPersistedError = rawOccurrencesPersistedError; 615 return this; 616 } 617 618 public Builder rawOccurrencesPersistedNew(long rawOccurrencesPersistedNew) { 619 this.rawOccurrencesPersistedNew = rawOccurrencesPersistedNew; 620 return this; 621 } 622 623 public Builder rawOccurrencesPersistedUnchanged(long rawOccurrencesPersistedUnchanged) { 624 this.rawOccurrencesPersistedUnchanged = rawOccurrencesPersistedUnchanged; 625 return this; 626 } 627 628 public Builder rawOccurrencesPersistedUpdated(long rawOccurrencesPersistedUpdated) { 629 this.rawOccurrencesPersistedUpdated = rawOccurrencesPersistedUpdated; 630 return this; 631 } 632 633 public Builder startedCrawling(Date startedCrawling) { 634 this.startedCrawling = startedCrawling; 635 return this; 636 } 637 638 public Builder verbatimOccurrencesPersistedError(long verbatimOccurrencesPersistedError) { 639 this.verbatimOccurrencesPersistedError = verbatimOccurrencesPersistedError; 640 return this; 641 } 642 643 public Builder verbatimOccurrencesPersistedSuccessful(long verbatimOccurrencesPersistedSuccessful) { 644 this.verbatimOccurrencesPersistedSuccessful = verbatimOccurrencesPersistedSuccessful; 645 return this; 646 } 647 } 648}