001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.api.service.pipelines; 015 016import org.gbif.api.model.common.paging.Pageable; 017import org.gbif.api.model.common.paging.PagingResponse; 018import org.gbif.api.model.pipelines.PipelineExecution; 019import org.gbif.api.model.pipelines.PipelineProcess; 020import org.gbif.api.model.pipelines.PipelineStep; 021import org.gbif.api.model.pipelines.RunPipelineResponse; 022import org.gbif.api.model.pipelines.StepRunner; 023import org.gbif.api.model.pipelines.StepType; 024import org.gbif.api.model.pipelines.ws.PipelineProcessParameters; 025import org.gbif.api.model.pipelines.ws.RunAllParams; 026 027import java.util.List; 028import java.util.Set; 029import java.util.UUID; 030 031import jakarta.annotation.Nullable; 032import jakarta.validation.constraints.NotBlank; 033import jakarta.validation.constraints.NotNull; 034import jakarta.validation.constraints.Null; 035 036@SuppressWarnings("unused") 037public interface PipelinesHistoryService { 038 039 String STEPS_REQUIRED_MESSAGE = "Steps parameter is required"; 040 String REASON_REQUIRED_MESSAGE = "Reason parameter is required"; 041 042 /** 043 * Lists the history of all {@link PipelineProcess}, sorted descending from the most recent one. 044 * 045 * @param pageable paging request 046 * @return a paged response that contains a list of {@link PipelineProcess}. 047 */ 048 PagingResponse<PipelineProcess> history(Pageable pageable); 049 050 /** 051 * Lists the history of all {@link PipelineProcess} of a dataset, sorted descending from the most 052 * recent one. 053 * 054 * @param datasetKey dataset identifier 055 * @param pageable paging request 056 * @return a paged response that contains a list of {@link PipelineProcess}. 057 */ 058 PagingResponse<PipelineProcess> history(@NotNull UUID datasetKey, Pageable pageable); 059 060 /** 061 * Gets the PipelineProcess identified by the dataset and attempt identifiers. 062 * 063 * @param datasetKey dataset identifier 064 * @param attempt crawl attempt identifier 065 * @return an instance of pipelines process if exists. 066 */ 067 PipelineProcess getPipelineProcess(@NotNull UUID datasetKey, int attempt); 068 069 /** Returns information about all running pipelines executions */ 070 PagingResponse<PipelineProcess> getRunningPipelineProcess( 071 @Nullable StepType stepType, @Nullable StepRunner stepRunner, Pageable pageable); 072 073 /** 074 * Creates/persists a pipelines process of dataset for an attempt identifier. If the process 075 * already exists it returns the existing one. 076 * 077 * @param params pipeline process parameters, contain dataset key and attempt 078 * @return the key of the {@link PipelineProcess} created. 079 */ 080 long createPipelineProcess(@NotNull PipelineProcessParameters params); 081 082 /** 083 * Adds/persists the information of a pipeline execution. 084 * 085 * @param processKey sequential identifier of a pipeline process 086 * @param pipelineExecution pipeline execution data 087 * @return the key of the PipelineExecution created. 088 */ 089 long addPipelineExecution(long processKey, @NotNull PipelineExecution pipelineExecution); 090 091 /** 092 * Gets execution key for running dataset 093 * 094 * @param datasetKey dataset identifier 095 * @return running execution key 096 */ 097 Long getRunningExecutionKey(@NotNull UUID datasetKey); 098 099 /** 100 * Update the information of a pipeline step. 101 * 102 * @param pipelineStep step to be added 103 * @return the key of the PipelineStep created. 104 */ 105 long updatePipelineStep(@NotNull PipelineStep pipelineStep); 106 107 /** 108 * Gets the PipelineStep of the specified keys. 109 * 110 * @param stepKey key of the pipeline step 111 * @return {@link PipelineStep}. 112 */ 113 PipelineStep getPipelineStep(long stepKey); 114 115 /** 116 * Gets the PipelineSteps list of the execution key. 117 * 118 * @param executionKey key of the pipeline execution 119 * @return {@link List<PipelineStep>}. 120 */ 121 List<PipelineStep> getPipelineStepsByExecutionKey(long executionKey); 122 123 /** Mark all pipeline executions as finished to clean running UI */ 124 void markAllPipelineExecutionAsFinished(); 125 126 /** 127 * Mark pipeline execution as finished when all pipelin steps are finished 128 * 129 * @param executionKey key of the pipeline execution 130 */ 131 void markPipelineExecutionIfFinished(long executionKey); 132 133 /** 134 * Change status to ABORTED and set finished date if state is RUNNING, QUEUED or SUBMITTED, and 135 * set pipeline execution as finished 136 * 137 * @param executionKey key of the pipeline execution 138 */ 139 void markPipelineStatusAsAborted(long executionKey); 140 141 /** 142 * Runs the last attempt for all datasets. 143 * 144 * @param steps steps to run 145 * @param reason reason to run 146 * @param useLastSuccessful true if we want to run the latest successful attempt 147 * @param markPreviousAttemptAsFailed previous status can't be wrong, when CLI restarted during 148 * processing a dataset 149 * @param runAllParams parameters, contain datasets to exclude 150 * @param interpretTypes is used for partial interpretation such as only TAXONOMY, METADATA and 151 * etc 152 * @param excludeEventSteps true if we don't want to run the event steps 153 * @return {@link RunPipelineResponse}. 154 */ 155 RunPipelineResponse runAll( 156 @NotBlank(message = STEPS_REQUIRED_MESSAGE) String steps, 157 @NotBlank(message = REASON_REQUIRED_MESSAGE) String reason, 158 boolean useLastSuccessful, 159 boolean markPreviousAttemptAsFailed, 160 @Nullable RunAllParams runAllParams, 161 @Nullable Set<String> interpretTypes, 162 boolean excludeEventSteps); 163 164 /** 165 * Restart last failed pipelines step for a dataset. 166 * 167 * @param datasetKey dataset key 168 * @param steps steps to run 169 * @param reason reason to run 170 * @param useLastSuccessful true if we want to run the latest successful attempt 171 * @param markPreviousAttemptAsFailed previous status can't be wrong, when CLI restarted during 172 * processing a dataset 173 * @param interpretTypes is used for partial interpretation such as only TAXONOMY, METADATA and 174 * etc 175 * @param excludeEventSteps true if we don't want to run the event steps 176 * @return {@link RunPipelineResponse}. 177 */ 178 RunPipelineResponse runPipelineAttempt( 179 @NotNull UUID datasetKey, 180 @NotBlank(message = STEPS_REQUIRED_MESSAGE) String steps, 181 @NotBlank(message = REASON_REQUIRED_MESSAGE) String reason, 182 boolean useLastSuccessful, 183 boolean markPreviousAttemptAsFailed, 184 @Nullable Set<String> interpretTypes, 185 boolean excludeEventSteps); 186 187 /** 188 * Re-run a pipeline step. 189 * 190 * @param datasetKey dataset key 191 * @param attempt attempt to run 192 * @param steps steps to run 193 * @param reason reason to run 194 * @param markPreviousAttemptAsFailed previous status can't be wrong, when CLI restarted during 195 * processing a dataset 196 * @param interpretTypes is used for partial interpretation such as only TAXONOMY, METADATA and 197 * etc 198 * @param excludeEventSteps true if we don't want to run the event steps 199 * @return {@link RunPipelineResponse}. 200 */ 201 RunPipelineResponse runPipelineAttempt( 202 @NotNull UUID datasetKey, 203 int attempt, 204 @NotBlank(message = STEPS_REQUIRED_MESSAGE) String steps, 205 @NotBlank(message = REASON_REQUIRED_MESSAGE) String reason, 206 boolean markPreviousAttemptAsFailed, 207 @Nullable Set<String> interpretTypes, 208 boolean excludeEventSteps); 209 210 /** 211 * Sends email to data administrator about absent identifiers issue with a dataset 212 * 213 * <p>Deprecated: use {@link #notifyAbsentIdentifiers(UUID, int, long, String)} instead. 214 * 215 * @param datasetKey dataset key 216 * @param attempt attempt to run 217 * @param message with failed metrics and other info* 218 */ 219 @Deprecated 220 void sendAbsentIndentifiersEmail(@NotNull UUID datasetKey, int attempt, @NotNull String message); 221 222 /** 223 * Mark failed identifier stage as finished and continue interpretation process for datasets were 224 * identifier stage failed because of a threshold limit 225 * 226 * @param datasetKey dataset key 227 * @param attempt attempt to run 228 */ 229 void allowAbsentIndentifiers(@NotNull UUID datasetKey, int attempt); 230 231 /** 232 * Mark latest failed identifier stage as finished and continue interpretation process for 233 * datasets were identifier stage failed because of a threshold limit 234 * 235 * @param datasetKey dataset key 236 */ 237 void allowAbsentIndentifiers(@NotNull UUID datasetKey); 238 239 /** 240 * Sends a notification to the data administrators about absent identifiers issues with the 241 * dataset. 242 * 243 * @param datasetKey key of the dataset 244 * @param attempt crawling attempt 245 * @param executionKey key of the pipelines execution 246 * @param message cause of the issue 247 */ 248 void notifyAbsentIdentifiers(UUID datasetKey, int attempt, long executionKey, String message); 249 250 /** 251 * Sets the pipeline step state QUEUED only if it's in SUBMITTED state. 252 * 253 * @param key pipeline step key 254 */ 255 void setSubmittedPipelineStepToQueued(long key); 256}