001/* 002 * Copyright 2020 Global Biodiversity Information Facility (GBIF) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.gbif.api.model.crawler; 017 018import org.gbif.api.vocabulary.EndpointType; 019 020import java.net.URI; 021import java.util.Collections; 022import java.util.Map; 023import java.util.Objects; 024import java.util.StringJoiner; 025import java.util.UUID; 026 027import javax.annotation.Nullable; 028import javax.annotation.concurrent.Immutable; 029import javax.annotation.concurrent.ThreadSafe; 030 031import com.fasterxml.jackson.annotation.JsonCreator; 032import com.fasterxml.jackson.annotation.JsonIgnore; 033import com.fasterxml.jackson.annotation.JsonProperty; 034 035import static org.gbif.api.util.PreconditionUtils.checkArgument; 036 037/** 038 * This class represents a job to be worked on by a crawler. That can be either one of the XML based protocols 039 * (BioCASe, DiGIR, TAPIR) or a DwC-Archive. 040 * <p/> 041 * For now this object will be used in JSON serialized form in ZooKeeper. 042 */ 043@Immutable 044@ThreadSafe 045@SuppressWarnings("unused") 046public class CrawlJob { 047 048 private final UUID datasetKey; 049 private final EndpointType endpointType; 050 private final URI targetUrl; 051 private final int attempt; 052 private final Map<String, String> properties; 053 054 /** 055 * Creates a new crawl job. 056 * 057 * @param datasetKey of the dataset to crawl 058 * @param endpointType of the dataset 059 * @param targetUrl of the dataset 060 * @param attempt a monotonously increasing counter, increased every time we try to crawl a dataset whether that 061 * attempt is successful or not 062 * @param properties a way to provide protocol or crawl specific options 063 */ 064 @JsonCreator 065 public CrawlJob( 066 @JsonProperty("datasetKey") UUID datasetKey, 067 @JsonProperty("endpointType") EndpointType endpointType, 068 @JsonProperty("targetUrl") URI targetUrl, 069 @JsonProperty("attempt") int attempt, 070 @Nullable @JsonProperty("properties") Map<String, String> properties 071 ) { 072 this.datasetKey = Objects.requireNonNull(datasetKey); 073 this.endpointType = Objects.requireNonNull(endpointType); 074 this.targetUrl = Objects.requireNonNull(targetUrl); 075 checkArgument(attempt > 0, "attempt has to be greater than 0"); 076 this.attempt = attempt; 077 078 if (properties == null) { 079 this.properties = Collections.emptyMap(); 080 } else { 081 this.properties = Collections.unmodifiableMap(properties); 082 } 083 } 084 085 /** 086 * Constructor with mandatory fields. 087 * Properties field is set to null. 088 * 089 * @param datasetKey of the dataset to crawl 090 * @param endpointType of the dataset 091 * @param targetUrl of the dataset 092 * @param attempt a monotonously increasing counter, increased every time we try to crawl a dataset whether that 093 * attempt is successful or not 094 */ 095 public CrawlJob(UUID datasetKey, Integer attempt, EndpointType endpointType, URI targetUrl) { 096 //This constructor is used for the MyBatis persistence layer. 097 this.datasetKey = datasetKey; 098 this.attempt = attempt; 099 this.endpointType = endpointType; 100 this.targetUrl = targetUrl; 101 this.properties = Collections.emptyMap(); 102 } 103 104 public UUID getDatasetKey() { 105 return datasetKey; 106 } 107 108 public EndpointType getEndpointType() { 109 return endpointType; 110 } 111 112 /** 113 * Used to save protocol specific information (e.g. contentNamespace for TAPIR and BioCASe). 114 * 115 * @return an immutable map of all the properties 116 */ 117 // NOTE: This should be an ImmutableMap but Jackson 1.x can't easily deserialize that 118 public Map<String, String> getProperties() { 119 return properties; 120 } 121 122 public URI getTargetUrl() { 123 return targetUrl; 124 } 125 126 public int getAttempt() { 127 return attempt; 128 } 129 130 @JsonIgnore 131 public String getProperty(String name) { 132 return properties.get(name); 133 } 134 135 @Override 136 public boolean equals(Object o) { 137 if (this == o) { 138 return true; 139 } 140 if (o == null || getClass() != o.getClass()) { 141 return false; 142 } 143 CrawlJob crawlJob = (CrawlJob) o; 144 return attempt == crawlJob.attempt && 145 Objects.equals(datasetKey, crawlJob.datasetKey) && 146 endpointType == crawlJob.endpointType && 147 Objects.equals(targetUrl, crawlJob.targetUrl) && 148 Objects.equals(properties, crawlJob.properties); 149 } 150 151 @Override 152 public int hashCode() { 153 return Objects.hash(datasetKey, endpointType, targetUrl, attempt, properties); 154 } 155 156 @Override 157 public String toString() { 158 return new StringJoiner(", ", CrawlJob.class.getSimpleName() + "[", "]") 159 .add("datasetKey=" + datasetKey) 160 .add("endpointType=" + endpointType) 161 .add("targetUrl=" + targetUrl) 162 .add("attempt=" + attempt) 163 .add("properties=" + properties) 164 .toString(); 165 } 166}