001/*
002 * Copyright 2020 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.api.model.crawler;
017
018import java.util.Date;
019import java.util.Objects;
020import java.util.StringJoiner;
021import java.util.UUID;
022
023import javax.annotation.Nullable;
024import javax.validation.constraints.Min;
025
026import static org.gbif.api.util.PreconditionUtils.checkArgument;
027
028/**
029 * Information about a dataset that is currently being processed. That usually means that we are crawling it at the
030 * moment or are in the process of persisting and interpreting its occurrences.
031 */
032@SuppressWarnings("unused")
033public class DatasetProcessStatus {
034
035  private UUID datasetKey;
036  private CrawlJob crawlJob;
037  private Date startedCrawling;
038  private Date finishedCrawling;
039  private String crawlContext;
040  private FinishReason finishReason;
041  private ProcessState processStateOccurrence;
042  private ProcessState processStateChecklist;
043  private ProcessState processStateSample;
044
045  // Long instead of Optional<Long> because of JSON serialization issues, Jackson doesn't honor the NON_NULL setting
046  // for the Guava extension
047  private Long declaredCount;
048
049  private long pagesCrawled;
050
051  private long pagesFragmentedSuccessful;
052  private long pagesFragmentedError;
053  private long fragmentsEmitted;
054
055  private long fragmentsReceived;
056  private long rawOccurrencesPersistedNew;
057  private long rawOccurrencesPersistedUpdated;
058  private long rawOccurrencesPersistedUnchanged;
059  private long rawOccurrencesPersistedError;
060  private long fragmentsProcessed;
061
062  private long verbatimOccurrencesPersistedSuccessful;
063  private long verbatimOccurrencesPersistedError;
064
065  private long interpretedOccurrencesPersistedSuccessful;
066  private long interpretedOccurrencesPersistedError;
067
068  public static Builder builder() {
069    return new Builder();
070  }
071
072  public DatasetProcessStatus() {
073    // This constructor is needed for Jackson deserialization
074  }
075
076  /**
077   * Builds a new object from the builder validating it in the process.
078   * <p/>
079   * We only validate very little (all counts have to be greater than or equal to zero, a few null checks etc.) but not
080   */
081  public DatasetProcessStatus(Builder builder) {
082    Objects.requireNonNull(builder, "builder can't be null");
083
084    datasetKey = Objects.requireNonNull(builder.datasetKey, "datasetKey can't be null");
085    crawlJob = Objects.requireNonNull(builder.crawlJob, "crawlJob can't be null");
086    startedCrawling = builder.startedCrawling;
087    finishedCrawling = builder.finishedCrawling;
088    finishReason = builder.finishReason;
089    processStateOccurrence = builder.processStateOccurrence;
090    processStateChecklist = builder.processStateChecklist;
091    processStateSample = builder.processStateSample;
092    crawlContext = builder.crawlContext;
093
094    declaredCount = builder.declaredCount;
095
096    pagesCrawled = builder.pagesCrawled;
097
098    pagesFragmentedSuccessful = builder.pagesFragmentedSuccessful;
099    pagesFragmentedError = builder.pagesFragmentedError;
100    fragmentsEmitted = builder.fragmentsEmitted;
101
102    fragmentsReceived = builder.fragmentsReceived;
103    rawOccurrencesPersistedNew = builder.rawOccurrencesPersistedNew;
104    rawOccurrencesPersistedUpdated = builder.rawOccurrencesPersistedUpdated;
105    rawOccurrencesPersistedUnchanged = builder.rawOccurrencesPersistedUnchanged;
106    rawOccurrencesPersistedError = builder.rawOccurrencesPersistedError;
107    fragmentsProcessed = builder.fragmentsProcessed;
108
109    verbatimOccurrencesPersistedSuccessful = builder.verbatimOccurrencesPersistedSuccessful;
110    verbatimOccurrencesPersistedError = builder.verbatimOccurrencesPersistedError;
111
112    interpretedOccurrencesPersistedSuccessful = builder.interpretedOccurrencesPersistedSuccessful;
113    interpretedOccurrencesPersistedError = builder.interpretedOccurrencesPersistedError;
114
115    checkArgument(declaredCount == null || declaredCount >= 0,
116                  "declaredCount must be either null or greater than or equal to zero");
117
118    checkArgument(pagesCrawled >= 0, "pagesCrawled has to be greater than or equal to zero");
119
120    checkArgument(pagesFragmentedSuccessful >= 0, "pagesFragmentedSuccessful");
121    checkArgument(pagesFragmentedError >= 0, "pagesFragmentedError has to be greater than or equal to zero");
122    checkArgument(fragmentsEmitted >= 0, "fragmentsEmitted has to be greater than or equal to zero");
123
124    checkArgument(fragmentsReceived >= 0, "fragmentsReceived has to be greater than or equal to zero");
125    checkArgument(rawOccurrencesPersistedNew >= 0,
126                  "rawOccurrencesPersistedNew has to be greater than or equal to zero");
127    checkArgument(rawOccurrencesPersistedUpdated >= 0,
128                  "rawOccurrencesPersistedUpdated has to be greater than or equal to zero");
129    checkArgument(rawOccurrencesPersistedUnchanged >= 0,
130                  "rawOccurrencesPersistedUnchanged has to be greater than or equal to zero");
131    checkArgument(rawOccurrencesPersistedError >= 0,
132                  "rawOccurrencesPersistedError has to be greater than or equal to zero");
133    checkArgument(fragmentsProcessed >= 0, "fragmentsProcessed has to be greater than or equal to zero");
134
135    checkArgument(verbatimOccurrencesPersistedSuccessful >= 0,
136                  "verbatimOccurrencesPersistedSuccessful has to be greater than or equal to zero");
137    checkArgument(verbatimOccurrencesPersistedError >= 0,
138                  "verbatimOccurrencesPersistedError has to be greater than or equal to zero");
139
140    checkArgument(interpretedOccurrencesPersistedSuccessful >= 0,
141                  "interpretedOccurrencesPersistedSuccessful has to be greater than or equal to zero");
142    checkArgument(interpretedOccurrencesPersistedError >= 0,
143                  "interpretedOccurrencesPersistedError has to be greater than or equal to zero");
144  }
145
146  /**
147   * Last successful crawl context, this is a JSON string.
148   *
149   * @return the last successful crawl context, this is a JSON string
150   */
151  @Nullable
152  public String getCrawlContext() {
153    return crawlContext;
154  }
155
156  public CrawlJob getCrawlJob() {
157    return crawlJob;
158  }
159
160  /**
161   * Key that identifies the Dataset.
162   *
163   * @return the UUID key that identifies the dataset
164   */
165  public UUID getDatasetKey() {
166    return datasetKey;
167  }
168
169  @Nullable
170  public Long getDeclaredCount() {
171    return declaredCount;
172  }
173
174  /**
175   * Timestamp of when the crawl was finished.
176   *
177   * @return the timestamp when the crawl finished
178   */
179  @Nullable
180  public Date getFinishedCrawling() {
181    return finishedCrawling;
182  }
183
184  /**
185   * The reason a crawl is finished. Will be {@code null} if {@link #getFinishedCrawling()} returns {@code null}.
186   *
187   * @return the reason the crawl finished
188   */
189  @Nullable
190  public FinishReason getFinishReason() {
191    return finishReason;
192  }
193
194  @Nullable
195  public ProcessState getProcessStateOccurrence() {
196    return processStateOccurrence;
197  }
198
199  @Nullable
200  public ProcessState getProcessStateChecklist() {
201    return processStateChecklist;
202  }
203
204  @Nullable
205  public ProcessState getProcessStateSample() {
206    return processStateSample;
207  }
208
209  @Min(0)
210  public long getFragmentsEmitted() {
211    return fragmentsEmitted;
212  }
213
214  /**
215   * Number of fragments that have been processed.
216   *
217   * @return the number of fragments that have been processed
218   */
219  @Min(0)
220  public long getFragmentsProcessed() {
221    return fragmentsProcessed;
222  }
223
224  @Min(0)
225  public long getFragmentsReceived() {
226    return fragmentsReceived;
227  }
228
229  @Min(0)
230  public long getInterpretedOccurrencesPersistedError() {
231    return interpretedOccurrencesPersistedError;
232  }
233
234  @Min(0)
235  public long getInterpretedOccurrencesPersistedSuccessful() {
236    return interpretedOccurrencesPersistedSuccessful;
237  }
238
239  /**
240   * Number of pages crawled in total.
241   *
242   * @return number of pages crawled
243   */
244  @Min(0)
245  public long getPagesCrawled() {
246    return pagesCrawled;
247  }
248
249  @Min(0)
250  public long getPagesFragmentedError() {
251    return pagesFragmentedError;
252  }
253
254  /**
255   * Number of pages that have been fragmented.
256   *
257   * @return the number of pages that have been fragmented
258   */
259  @Min(0)
260  public long getPagesFragmentedSuccessful() {
261    return pagesFragmentedSuccessful;
262  }
263
264  @Min(0)
265  public long getRawOccurrencesPersistedError() {
266    return rawOccurrencesPersistedError;
267  }
268
269  @Min(0)
270  public long getRawOccurrencesPersistedNew() {
271    return rawOccurrencesPersistedNew;
272  }
273
274  @Min(0)
275  public long getRawOccurrencesPersistedUnchanged() {
276    return rawOccurrencesPersistedUnchanged;
277  }
278
279  @Min(0)
280  public long getRawOccurrencesPersistedUpdated() {
281    return rawOccurrencesPersistedUpdated;
282  }
283
284  /**
285   * Timestamp of when the crawl was actually started by a crawler.
286   *
287   * @return the timestamp when the crawl started
288   */
289  @Nullable
290  public Date getStartedCrawling() {
291    return startedCrawling;
292  }
293
294  @Min(0)
295  public long getVerbatimOccurrencesPersistedError() {
296    return verbatimOccurrencesPersistedError;
297  }
298
299  @Min(0)
300  public long getVerbatimOccurrencesPersistedSuccessful() {
301    return verbatimOccurrencesPersistedSuccessful;
302  }
303
304
305  public void setDatasetKey(UUID datasetKey) {
306    this.datasetKey = datasetKey;
307  }
308
309
310  public void setCrawlJob(CrawlJob crawlJob) {
311    this.crawlJob = crawlJob;
312  }
313
314
315  public void setStartedCrawling(Date startedCrawling) {
316    this.startedCrawling = startedCrawling;
317  }
318
319
320  public void setFinishedCrawling(Date finishedCrawling) {
321    this.finishedCrawling = finishedCrawling;
322  }
323
324
325  public void setCrawlContext(String crawlContext) {
326    this.crawlContext = crawlContext;
327  }
328
329
330  public void setFinishReason(FinishReason finishReason) {
331    this.finishReason = finishReason;
332  }
333
334  public void setProcessStateOccurrence(ProcessState processStateOccurrence) {
335    this.processStateOccurrence = processStateOccurrence;
336  }
337
338  public void setProcessStateChecklist(ProcessState processStateChecklist) {
339    this.processStateChecklist = processStateChecklist;
340  }
341
342  public void setProcessStateSample(ProcessState processStateSample) {
343    this.processStateSample = processStateSample;
344  }
345
346  public void setDeclaredCount(Long declaredCount) {
347    this.declaredCount = declaredCount;
348  }
349
350
351  public void setPagesCrawled(long pagesCrawled) {
352    this.pagesCrawled = pagesCrawled;
353  }
354
355
356  public void setPagesFragmentedSuccessful(long pagesFragmentedSuccessful) {
357    this.pagesFragmentedSuccessful = pagesFragmentedSuccessful;
358  }
359
360
361  public void setPagesFragmentedError(long pagesFragmentedError) {
362    this.pagesFragmentedError = pagesFragmentedError;
363  }
364
365
366  public void setFragmentsEmitted(long fragmentsEmitted) {
367    this.fragmentsEmitted = fragmentsEmitted;
368  }
369
370
371  public void setFragmentsReceived(long fragmentsReceived) {
372    this.fragmentsReceived = fragmentsReceived;
373  }
374
375
376  public void setRawOccurrencesPersistedNew(long rawOccurrencesPersistedNew) {
377    this.rawOccurrencesPersistedNew = rawOccurrencesPersistedNew;
378  }
379
380
381  public void setRawOccurrencesPersistedUpdated(long rawOccurrencesPersistedUpdated) {
382    this.rawOccurrencesPersistedUpdated = rawOccurrencesPersistedUpdated;
383  }
384
385
386  public void setRawOccurrencesPersistedUnchanged(long rawOccurrencesPersistedUnchanged) {
387    this.rawOccurrencesPersistedUnchanged = rawOccurrencesPersistedUnchanged;
388  }
389
390
391  public void setRawOccurrencesPersistedError(long rawOccurrencesPersistedError) {
392    this.rawOccurrencesPersistedError = rawOccurrencesPersistedError;
393  }
394
395
396  public void setFragmentsProcessed(long fragmentsProcessed) {
397    this.fragmentsProcessed = fragmentsProcessed;
398  }
399
400
401  public void setVerbatimOccurrencesPersistedSuccessful(long verbatimOccurrencesPersistedSuccessful) {
402    this.verbatimOccurrencesPersistedSuccessful = verbatimOccurrencesPersistedSuccessful;
403  }
404
405
406  public void setVerbatimOccurrencesPersistedError(long verbatimOccurrencesPersistedError) {
407    this.verbatimOccurrencesPersistedError = verbatimOccurrencesPersistedError;
408  }
409
410
411  public void setInterpretedOccurrencesPersistedSuccessful(long interpretedOccurrencesPersistedSuccessful) {
412    this.interpretedOccurrencesPersistedSuccessful = interpretedOccurrencesPersistedSuccessful;
413  }
414
415
416  public void setInterpretedOccurrencesPersistedError(long interpretedOccurrencesPersistedError) {
417    this.interpretedOccurrencesPersistedError = interpretedOccurrencesPersistedError;
418  }
419
420  @Override
421  public boolean equals(Object o) {
422    if (this == o) {
423      return true;
424    }
425    if (o == null || getClass() != o.getClass()) {
426      return false;
427    }
428    DatasetProcessStatus that = (DatasetProcessStatus) o;
429    return pagesCrawled == that.pagesCrawled &&
430      pagesFragmentedSuccessful == that.pagesFragmentedSuccessful &&
431      pagesFragmentedError == that.pagesFragmentedError &&
432      fragmentsEmitted == that.fragmentsEmitted &&
433      fragmentsReceived == that.fragmentsReceived &&
434      rawOccurrencesPersistedNew == that.rawOccurrencesPersistedNew &&
435      rawOccurrencesPersistedUpdated == that.rawOccurrencesPersistedUpdated &&
436      rawOccurrencesPersistedUnchanged == that.rawOccurrencesPersistedUnchanged &&
437      rawOccurrencesPersistedError == that.rawOccurrencesPersistedError &&
438      fragmentsProcessed == that.fragmentsProcessed &&
439      verbatimOccurrencesPersistedSuccessful == that.verbatimOccurrencesPersistedSuccessful &&
440      verbatimOccurrencesPersistedError == that.verbatimOccurrencesPersistedError &&
441      interpretedOccurrencesPersistedSuccessful == that.interpretedOccurrencesPersistedSuccessful &&
442      interpretedOccurrencesPersistedError == that.interpretedOccurrencesPersistedError &&
443      Objects.equals(datasetKey, that.datasetKey) &&
444      Objects.equals(crawlJob, that.crawlJob) &&
445      Objects.equals(startedCrawling, that.startedCrawling) &&
446      Objects.equals(finishedCrawling, that.finishedCrawling) &&
447      Objects.equals(crawlContext, that.crawlContext) &&
448      finishReason == that.finishReason &&
449      processStateOccurrence == that.processStateOccurrence &&
450      processStateChecklist == that.processStateChecklist &&
451      processStateSample == that.processStateSample &&
452      Objects.equals(declaredCount, that.declaredCount);
453  }
454
455  @Override
456  public int hashCode() {
457    return Objects
458      .hash(datasetKey, crawlJob, startedCrawling, finishedCrawling, crawlContext, finishReason,
459        processStateOccurrence, processStateChecklist, processStateSample, declaredCount,
460        pagesCrawled, pagesFragmentedSuccessful, pagesFragmentedError, fragmentsEmitted,
461        fragmentsReceived, rawOccurrencesPersistedNew, rawOccurrencesPersistedUpdated,
462        rawOccurrencesPersistedUnchanged, rawOccurrencesPersistedError, fragmentsProcessed,
463        verbatimOccurrencesPersistedSuccessful, verbatimOccurrencesPersistedError,
464        interpretedOccurrencesPersistedSuccessful, interpretedOccurrencesPersistedError);
465  }
466
467  @Override
468  public String toString() {
469    return new StringJoiner(", ", DatasetProcessStatus.class.getSimpleName() + "[", "]")
470      .add("datasetKey=" + datasetKey)
471      .add("crawlJob=" + crawlJob)
472      .add("startedCrawling=" + startedCrawling)
473      .add("finishedCrawling=" + finishedCrawling)
474      .add("crawlContext='" + crawlContext + "'")
475      .add("finishReason=" + finishReason)
476      .add("processStateOccurrence=" + processStateOccurrence)
477      .add("processStateChecklist=" + processStateChecklist)
478      .add("processStateSample=" + processStateSample)
479      .add("declaredCount=" + declaredCount)
480      .add("pagesCrawled=" + pagesCrawled)
481      .add("pagesFragmentedSuccessful=" + pagesFragmentedSuccessful)
482      .add("pagesFragmentedError=" + pagesFragmentedError)
483      .add("fragmentsEmitted=" + fragmentsEmitted)
484      .add("fragmentsReceived=" + fragmentsReceived)
485      .add("rawOccurrencesPersistedNew=" + rawOccurrencesPersistedNew)
486      .add("rawOccurrencesPersistedUpdated=" + rawOccurrencesPersistedUpdated)
487      .add("rawOccurrencesPersistedUnchanged=" + rawOccurrencesPersistedUnchanged)
488      .add("rawOccurrencesPersistedError=" + rawOccurrencesPersistedError)
489      .add("fragmentsProcessed=" + fragmentsProcessed)
490      .add("verbatimOccurrencesPersistedSuccessful=" + verbatimOccurrencesPersistedSuccessful)
491      .add("verbatimOccurrencesPersistedError=" + verbatimOccurrencesPersistedError)
492      .add("interpretedOccurrencesPersistedSuccessful=" + interpretedOccurrencesPersistedSuccessful)
493      .add("interpretedOccurrencesPersistedError=" + interpretedOccurrencesPersistedError)
494      .toString();
495  }
496
497  public static class Builder {
498
499    private UUID datasetKey;
500    private CrawlJob crawlJob;
501    private Date startedCrawling;
502    private Date finishedCrawling;
503    private String crawlContext;
504    private FinishReason finishReason;
505    private ProcessState processStateOccurrence;
506    private ProcessState processStateChecklist;
507    private ProcessState processStateSample;
508    private Long declaredCount;
509    private long pagesCrawled;
510    private long pagesFragmentedSuccessful;
511    private long pagesFragmentedError;
512    private long fragmentsEmitted;
513    private long fragmentsReceived;
514    private long rawOccurrencesPersistedNew;
515    private long rawOccurrencesPersistedUpdated;
516    private long rawOccurrencesPersistedUnchanged;
517    private long rawOccurrencesPersistedError;
518    private long fragmentsProcessed;
519    private long verbatimOccurrencesPersistedSuccessful;
520    private long verbatimOccurrencesPersistedError;
521    private long interpretedOccurrencesPersistedSuccessful;
522    private long interpretedOccurrencesPersistedError;
523
524    public DatasetProcessStatus build() {
525      return new DatasetProcessStatus(this);
526    }
527
528    public Builder crawlContext(String crawlContext) {
529      this.crawlContext = crawlContext;
530      return this;
531    }
532
533    public Builder crawlJob(CrawlJob crawlJob) {
534      this.crawlJob = crawlJob;
535      return this;
536    }
537
538    public Builder datasetKey(UUID datasetKey) {
539      this.datasetKey = datasetKey;
540      return this;
541    }
542
543    public Builder declaredCount(Long declaredCount) {
544      this.declaredCount = declaredCount;
545      return this;
546    }
547
548    public Builder finishedCrawling(Date finishedCrawling) {
549      this.finishedCrawling = finishedCrawling;
550      return this;
551    }
552
553    public Builder finishReason(FinishReason finishReason) {
554      this.finishReason = finishReason;
555      return this;
556    }
557
558    public Builder processStateOccurrence(ProcessState processStateOccurrence) {
559      this.processStateOccurrence = processStateOccurrence;
560      return this;
561    }
562
563    public Builder processStateChecklist(ProcessState processStateChecklist) {
564      this.processStateChecklist = processStateChecklist;
565      return this;
566    }
567
568    public Builder processStateSample(ProcessState processStateSample) {
569      this.processStateSample = processStateSample;
570      return this;
571    }
572
573    public Builder fragmentsEmitted(long fragmentsEmitted) {
574      this.fragmentsEmitted = fragmentsEmitted;
575      return this;
576    }
577
578    public Builder fragmentsProcessed(long fragmentsProcessed) {
579      this.fragmentsProcessed = fragmentsProcessed;
580      return this;
581    }
582
583    public Builder fragmentsReceived(long fragmentsReceived) {
584      this.fragmentsReceived = fragmentsReceived;
585      return this;
586    }
587
588    public Builder interpretedOccurrencesPersistedError(long interpretedOccurrencesPersistedError) {
589      this.interpretedOccurrencesPersistedError = interpretedOccurrencesPersistedError;
590      return this;
591    }
592
593    public Builder interpretedOccurrencesPersistedSuccessful(long interpretedOccurrencesPersistedSuccessful) {
594      this.interpretedOccurrencesPersistedSuccessful = interpretedOccurrencesPersistedSuccessful;
595      return this;
596    }
597
598    public Builder pagesCrawled(long pagesCrawled) {
599      this.pagesCrawled = pagesCrawled;
600      return this;
601    }
602
603    public Builder pagesFragmentedError(long pagesFragmentedError) {
604      this.pagesFragmentedError = pagesFragmentedError;
605      return this;
606    }
607
608    public Builder pagesFragmentedSuccessful(long pagesFragmentedSuccessful) {
609      this.pagesFragmentedSuccessful = pagesFragmentedSuccessful;
610      return this;
611    }
612
613    public Builder rawOccurrencesPersistedError(long rawOccurrencesPersistedError) {
614      this.rawOccurrencesPersistedError = rawOccurrencesPersistedError;
615      return this;
616    }
617
618    public Builder rawOccurrencesPersistedNew(long rawOccurrencesPersistedNew) {
619      this.rawOccurrencesPersistedNew = rawOccurrencesPersistedNew;
620      return this;
621    }
622
623    public Builder rawOccurrencesPersistedUnchanged(long rawOccurrencesPersistedUnchanged) {
624      this.rawOccurrencesPersistedUnchanged = rawOccurrencesPersistedUnchanged;
625      return this;
626    }
627
628    public Builder rawOccurrencesPersistedUpdated(long rawOccurrencesPersistedUpdated) {
629      this.rawOccurrencesPersistedUpdated = rawOccurrencesPersistedUpdated;
630      return this;
631    }
632
633    public Builder startedCrawling(Date startedCrawling) {
634      this.startedCrawling = startedCrawling;
635      return this;
636    }
637
638    public Builder verbatimOccurrencesPersistedError(long verbatimOccurrencesPersistedError) {
639      this.verbatimOccurrencesPersistedError = verbatimOccurrencesPersistedError;
640      return this;
641    }
642
643    public Builder verbatimOccurrencesPersistedSuccessful(long verbatimOccurrencesPersistedSuccessful) {
644      this.verbatimOccurrencesPersistedSuccessful = verbatimOccurrencesPersistedSuccessful;
645      return this;
646    }
647  }
648}