001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.api.service.pipelines; 015 016import org.gbif.api.model.common.paging.Pageable; 017import org.gbif.api.model.common.paging.PagingResponse; 018import org.gbif.api.model.pipelines.PipelineExecution; 019import org.gbif.api.model.pipelines.PipelineProcess; 020import org.gbif.api.model.pipelines.PipelineStep; 021import org.gbif.api.model.pipelines.RunPipelineResponse; 022import org.gbif.api.model.pipelines.StepRunner; 023import org.gbif.api.model.pipelines.StepType; 024import org.gbif.api.model.pipelines.ws.PipelineProcessParameters; 025import org.gbif.api.model.pipelines.ws.RunAllParams; 026 027import java.util.List; 028import java.util.Set; 029import java.util.UUID; 030 031import jakarta.annotation.Nullable; 032import jakarta.validation.constraints.NotBlank; 033import jakarta.validation.constraints.NotNull; 034import jakarta.validation.constraints.Null; 035 036@SuppressWarnings("unused") 037public interface PipelinesHistoryService { 038 039 String STEPS_REQUIRED_MESSAGE = "Steps parameter is required"; 040 String REASON_REQUIRED_MESSAGE = "Reason parameter is required"; 041 042 /** 043 * Lists the history of all {@link PipelineProcess}, sorted descending from the most recent one. 044 * 045 * @param pageable paging request 046 * @return a paged response that contains a list of {@link PipelineProcess}. 047 */ 048 PagingResponse<PipelineProcess> history(Pageable pageable); 049 050 /** 051 * Lists the history of all {@link PipelineProcess} of a dataset, sorted descending from the most 052 * recent one. 053 * 054 * @param datasetKey dataset identifier 055 * @param pageable paging request 056 * @return a paged response that contains a list of {@link PipelineProcess}. 057 */ 058 PagingResponse<PipelineProcess> history(@NotNull UUID datasetKey, Pageable pageable); 059 060 /** 061 * Gets the PipelineProcess identified by the dataset and attempt identifiers. 062 * 063 * @param datasetKey dataset identifier 064 * @param attempt crawl attempt identifier 065 * @return an instance of pipelines process if exists. 066 */ 067 PipelineProcess getPipelineProcess(@NotNull UUID datasetKey, int attempt); 068 069 /** Returns information about all running pipelines executions */ 070 PagingResponse<PipelineProcess> getRunningPipelineProcess( 071 @Nullable StepType stepType, @Nullable StepRunner stepRunner, Pageable pageable); 072 073 /** 074 * Creates/persists a pipelines process of dataset for an attempt identifier. If the process 075 * already exists it returns the existing one. 076 * 077 * @param params pipeline process parameters, contain dataset key and attempt 078 * @return the key of the {@link PipelineProcess} created. 079 */ 080 long createPipelineProcess(@NotNull PipelineProcessParameters params); 081 082 /** 083 * Adds/persists the information of a pipeline execution. 084 * 085 * @param processKey sequential identifier of a pipeline process 086 * @param pipelineExecution pipeline execution data 087 * @return the key of the PipelineExecution created. 088 */ 089 long addPipelineExecution(long processKey, @NotNull PipelineExecution pipelineExecution); 090 091 /** 092 * Gets execution key for running dataset 093 * 094 * @param datasetKey dataset identifier 095 * @return running execution key 096 */ 097 Long getRunningExecutionKey(@NotNull UUID datasetKey); 098 099 /** 100 * Update the information of a pipeline step. 101 * 102 * @param pipelineStep step to be added 103 * @return the key of the PipelineStep created. 104 */ 105 long updatePipelineStep(@NotNull PipelineStep pipelineStep); 106 107 /** 108 * Gets the PipelineStep of the specified keys. 109 * 110 * @param stepKey key of the pipeline step 111 * @return {@link PipelineStep}. 112 */ 113 PipelineStep getPipelineStep(long stepKey); 114 115 /** 116 * Gets the PipelineSteps list of the execution key. 117 * 118 * @param executionKey key of the pipeline execution 119 * @return {@link List<PipelineStep>}. 120 */ 121 List<PipelineStep> getPipelineStepsByExecutionKey(long executionKey); 122 123 /** Mark all pipeline executions as finished to clean running UI */ 124 void markAllPipelineExecutionAsFinished(); 125 126 /** 127 * Mark pipeline execution as finished when all pipelin steps are finished 128 * 129 * @param executionKey key of the pipeline execution 130 */ 131 void markPipelineExecutionIfFinished(long executionKey); 132 133 /** 134 * Change status to ABORTED and set finished date if state is RUNNING, QUEUED or SUBMITTED, and 135 * set pipeline execution as finished 136 * 137 * @param executionKey key of the pipeline execution 138 */ 139 void markPipelineStatusAsAborted(long executionKey); 140 141 /** 142 * Runs the last attempt for all datasets. 143 * 144 * @param steps steps to run 145 * @param reason reason to run 146 * @param useLastSuccessful true if we want to run the latest successful attempt 147 * @param markPreviousAttemptAsFailed previous status can't be wrong, when CLI restarted during 148 * processing a dataset 149 * @param runAllParams parameters, contain datasets to exclude 150 * @param interpretTypes is used for partial interpretation such as only TAXONOMY, METADATA and 151 * etc 152 * @param excludeEventSteps true if we don't want to run the event steps 153 * @return {@link RunPipelineResponse}. 154 */ 155 RunPipelineResponse runAll( 156 @NotBlank(message = STEPS_REQUIRED_MESSAGE) String steps, 157 @NotBlank(message = REASON_REQUIRED_MESSAGE) String reason, 158 boolean useLastSuccessful, 159 boolean markPreviousAttemptAsFailed, 160 @Nullable RunAllParams runAllParams, 161 @Nullable Set<String> interpretTypes, 162 boolean excludeEventSteps, 163 boolean onlyIncludeRequestedStep); 164 165 /** 166 * Restart last failed pipelines step for a dataset. 167 * 168 * @param datasetKey dataset key 169 * @param steps steps to run 170 * @param reason reason to run 171 * @param useLastSuccessful true if we want to run the latest successful attempt 172 * @param markPreviousAttemptAsFailed previous status can't be wrong, when CLI restarted during 173 * processing a dataset 174 * @param interpretTypes is used for partial interpretation such as only TAXONOMY, METADATA and 175 * etc 176 * @param excludeEventSteps true if we don't want to run the event steps 177 * @return {@link RunPipelineResponse}. 178 */ 179 RunPipelineResponse runPipelineAttempt( 180 @NotNull UUID datasetKey, 181 @NotBlank(message = STEPS_REQUIRED_MESSAGE) String steps, 182 @NotBlank(message = REASON_REQUIRED_MESSAGE) String reason, 183 boolean useLastSuccessful, 184 boolean markPreviousAttemptAsFailed, 185 @Nullable Set<String> interpretTypes, 186 boolean excludeEventSteps, 187 boolean onlyIncludeRequestedStep); 188 189 /** 190 * Re-run a pipeline step. 191 * 192 * @param datasetKey dataset key 193 * @param attempt attempt to run 194 * @param steps steps to run 195 * @param reason reason to run 196 * @param markPreviousAttemptAsFailed previous status can't be wrong, when CLI restarted during 197 * processing a dataset 198 * @param interpretTypes is used for partial interpretation such as only TAXONOMY, METADATA and 199 * etc 200 * @param excludeEventSteps true if we don't want to run the event steps 201 * @return {@link RunPipelineResponse}. 202 */ 203 RunPipelineResponse runPipelineAttempt( 204 @NotNull UUID datasetKey, 205 int attempt, 206 @NotBlank(message = STEPS_REQUIRED_MESSAGE) String steps, 207 @NotBlank(message = REASON_REQUIRED_MESSAGE) String reason, 208 boolean markPreviousAttemptAsFailed, 209 @Nullable Set<String> interpretTypes, 210 boolean excludeEventSteps, 211 boolean onlyIncludeRequestedStep); 212 213 /** 214 * Sends email to data administrator about absent identifiers issue with a dataset 215 * 216 * <p>Deprecated: use {@link #notifyAbsentIdentifiers(UUID, int, long, String)} instead. 217 * 218 * @param datasetKey dataset key 219 * @param attempt attempt to run 220 * @param message with failed metrics and other info* 221 */ 222 @Deprecated 223 void sendAbsentIndentifiersEmail(@NotNull UUID datasetKey, int attempt, @NotNull String message); 224 225 /** 226 * Mark failed identifier stage as finished and continue interpretation process for datasets were 227 * identifier stage failed because of a threshold limit 228 * 229 * @param datasetKey dataset key 230 * @param attempt attempt to run 231 */ 232 void allowAbsentIndentifiers(@NotNull UUID datasetKey, int attempt); 233 234 /** 235 * Mark latest failed identifier stage as finished and continue interpretation process for 236 * datasets were identifier stage failed because of a threshold limit 237 * 238 * @param datasetKey dataset key 239 */ 240 void allowAbsentIndentifiers(@NotNull UUID datasetKey); 241 242 /** 243 * Sends a notification to the data administrators about absent identifiers issues with the 244 * dataset. 245 * 246 * @param datasetKey key of the dataset 247 * @param attempt crawling attempt 248 * @param executionKey key of the pipelines execution 249 * @param message cause of the issue 250 */ 251 void notifyAbsentIdentifiers(UUID datasetKey, int attempt, long executionKey, String message); 252 253 /** 254 * Sets the pipeline step state QUEUED only if it's in SUBMITTED state. 255 * 256 * @param key pipeline step key 257 */ 258 void setSubmittedPipelineStepToQueued(long key); 259}