001package org.gbif.api.model.crawler;
002
003import org.gbif.api.vocabulary.EndpointType;
004
005import java.net.URI;
006import java.util.Map;
007import java.util.UUID;
008import javax.annotation.Nullable;
009import javax.annotation.concurrent.Immutable;
010import javax.annotation.concurrent.ThreadSafe;
011
012import com.google.common.base.Objects;
013import com.google.common.collect.ImmutableMap;
014import org.codehaus.jackson.annotate.JsonCreator;
015import org.codehaus.jackson.annotate.JsonIgnore;
016import org.codehaus.jackson.annotate.JsonProperty;
017
018import static com.google.common.base.Preconditions.checkArgument;
019import static com.google.common.base.Preconditions.checkNotNull;
020
021/**
022 * This class represents a job to be worked on by a crawler. That can be either one of the XML based protocols
023 * (BioCASe, DiGIR, TAPIR) or a DwC-Archive.
024 * <p/>
025 * For now this object will be used in JSON serialized form in ZooKeeper.
026 */
027@Immutable
028@ThreadSafe
029public class CrawlJob {
030
031  private final UUID datasetKey;
032  private final EndpointType endpointType;
033  private final URI targetUrl;
034  private final int attempt;
035  private final ImmutableMap<String, String> properties;
036
037  /**
038   * Creates a new crawl job.
039   *
040   * @param datasetKey  of the dataset to crawl
041   * @param endpointType of the dataset
042   * @param targetUrl    of the dataset
043   * @param attempt      a monotonously increasing counter, increased every time we try to crawl a dataset whether that
044   *                     attempt is successful or not
045   * @param properties   a way to provide protocol or crawl specific options
046   */
047  @JsonCreator
048  public CrawlJob(
049    @JsonProperty("datasetKey") UUID datasetKey,
050    @JsonProperty("endpointType") EndpointType endpointType,
051    @JsonProperty("targetUrl") URI targetUrl,
052    @JsonProperty("attempt") int attempt,
053    @Nullable @JsonProperty("properties") Map<String, String> properties
054  ) {
055    this.datasetKey = checkNotNull(datasetKey);
056    this.endpointType = checkNotNull(endpointType);
057    this.targetUrl = checkNotNull(targetUrl);
058    checkArgument(attempt > 0, "attempt has to be greater than 0");
059    this.attempt = attempt;
060
061    if (properties == null) {
062      this.properties = ImmutableMap.of();
063    } else {
064      this.properties = ImmutableMap.copyOf(properties);
065    }
066  }
067
068  /**
069   * Constructor with mandatory fields.
070   * Properties field is set to null.
071   * @param datasetKey  of the dataset to crawl
072   * @param endpointType of the dataset
073   * @param targetUrl    of the dataset
074   * @param attempt      a monotonously increasing counter, increased every time we try to crawl a dataset whether that
075   *                     attempt is successful or not
076   */
077  public CrawlJob(UUID datasetKey, Integer attempt,EndpointType endpointType, URI targetUrl){
078    //This constructor is used for the MyBatis persistence layer.
079    this.datasetKey = datasetKey;
080    this.attempt = attempt;
081    this.endpointType = endpointType;
082    this.targetUrl = targetUrl;
083    this.properties = null;
084  }
085
086  public UUID getDatasetKey() {
087    return datasetKey;
088  }
089
090  public EndpointType getEndpointType() {
091    return endpointType;
092  }
093
094  /**
095   * Used to save protocol specific information (e.g. contentNamespace for TAPIR and BioCASe).
096   *
097   * @return an immutable map of all the properties
098   */
099  // NOTE: This should be an ImmutableMap but Jackson 1.x can't easily deserialize that
100  public Map<String, String> getProperties() {
101    return properties;
102  }
103
104  public URI getTargetUrl() {
105    return targetUrl;
106  }
107
108  public int getAttempt() {
109    return attempt;
110  }
111
112  @JsonIgnore
113  public String getProperty(String name) {
114    return properties.get(name);
115  }
116
117  @Override
118  public boolean equals(Object obj) {
119    if (this == obj) {
120      return true;
121    }
122    if (!(obj instanceof CrawlJob)) {
123      return false;
124    }
125
126    final CrawlJob other = (CrawlJob) obj;
127    return Objects.equal(this.datasetKey, other.datasetKey)
128           && Objects.equal(this.endpointType, other.endpointType)
129           && Objects.equal(this.targetUrl, other.targetUrl)
130           && Objects.equal(this.attempt, other.attempt)
131           && Objects.equal(this.properties, other.properties);
132  }
133
134  @Override
135  public int hashCode() {
136    return Objects.hashCode(datasetKey, endpointType, targetUrl, attempt, properties);
137  }
138
139  @Override
140  public String toString() {
141    return Objects.toStringHelper(this)
142      .add("datasetKey", datasetKey)
143      .add("endpointType", endpointType)
144      .add("targetUrl", targetUrl)
145      .add("attempt", attempt)
146      .add("properties", properties)
147      .toString();
148  }
149
150}
151