001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.api.service.pipelines;
015
016import org.gbif.api.model.common.paging.Pageable;
017import org.gbif.api.model.common.paging.PagingResponse;
018import org.gbif.api.model.pipelines.PipelineExecution;
019import org.gbif.api.model.pipelines.PipelineProcess;
020import org.gbif.api.model.pipelines.PipelineStep;
021import org.gbif.api.model.pipelines.RunPipelineResponse;
022import org.gbif.api.model.pipelines.StepRunner;
023import org.gbif.api.model.pipelines.StepType;
024import org.gbif.api.model.pipelines.ws.PipelineProcessParameters;
025import org.gbif.api.model.pipelines.ws.RunAllParams;
026
027import java.util.List;
028import java.util.Set;
029import java.util.UUID;
030
031import jakarta.annotation.Nullable;
032import jakarta.validation.constraints.NotBlank;
033import jakarta.validation.constraints.NotNull;
034import jakarta.validation.constraints.Null;
035
036@SuppressWarnings("unused")
037public interface PipelinesHistoryService {
038
039  String STEPS_REQUIRED_MESSAGE = "Steps parameter is required";
040  String REASON_REQUIRED_MESSAGE = "Reason parameter is required";
041
042  /**
043   * Lists the history of all {@link PipelineProcess}, sorted descending from the most recent one.
044   *
045   * @param pageable paging request
046   * @return a paged response that contains a list of {@link PipelineProcess}.
047   */
048  PagingResponse<PipelineProcess> history(Pageable pageable);
049
050  /**
051   * Lists the history of all {@link PipelineProcess} of a dataset, sorted descending from the most
052   * recent one.
053   *
054   * @param datasetKey dataset identifier
055   * @param pageable paging request
056   * @return a paged response that contains a list of {@link PipelineProcess}.
057   */
058  PagingResponse<PipelineProcess> history(@NotNull UUID datasetKey, Pageable pageable);
059
060  /**
061   * Gets the PipelineProcess identified by the dataset and attempt identifiers.
062   *
063   * @param datasetKey dataset identifier
064   * @param attempt crawl attempt identifier
065   * @return an instance of pipelines process if exists.
066   */
067  PipelineProcess getPipelineProcess(@NotNull UUID datasetKey, int attempt);
068
069  /** Returns information about all running pipelines executions */
070  PagingResponse<PipelineProcess> getRunningPipelineProcess(
071      @Nullable StepType stepType, @Nullable StepRunner stepRunner, Pageable pageable);
072
073  /**
074   * Creates/persists a pipelines process of dataset for an attempt identifier. If the process
075   * already exists it returns the existing one.
076   *
077   * @param params pipeline process parameters, contain dataset key and attempt
078   * @return the key of the {@link PipelineProcess} created.
079   */
080  long createPipelineProcess(@NotNull PipelineProcessParameters params);
081
082  /**
083   * Adds/persists the information of a pipeline execution.
084   *
085   * @param processKey sequential identifier of a pipeline process
086   * @param pipelineExecution pipeline execution data
087   * @return the key of the PipelineExecution created.
088   */
089  long addPipelineExecution(long processKey, @NotNull PipelineExecution pipelineExecution);
090
091  /**
092   * Gets execution key for running dataset
093   *
094   * @param datasetKey dataset identifier
095   * @return running execution key
096   */
097  Long getRunningExecutionKey(@NotNull UUID datasetKey);
098
099  /**
100   * Update the information of a pipeline step.
101   *
102   * @param pipelineStep step to be added
103   * @return the key of the PipelineStep created.
104   */
105  long updatePipelineStep(@NotNull PipelineStep pipelineStep);
106
107  /**
108   * Gets the PipelineStep of the specified keys.
109   *
110   * @param stepKey key of the pipeline step
111   * @return {@link PipelineStep}.
112   */
113  PipelineStep getPipelineStep(long stepKey);
114
115  /**
116   * Gets the PipelineSteps list of the execution key.
117   *
118   * @param executionKey key of the pipeline execution
119   * @return {@link List<PipelineStep>}.
120   */
121  List<PipelineStep> getPipelineStepsByExecutionKey(long executionKey);
122
123  /** Mark all pipeline executions as finished to clean running UI */
124  void markAllPipelineExecutionAsFinished();
125
126  /**
127   * Mark pipeline execution as finished when all pipelin steps are finished
128   *
129   * @param executionKey key of the pipeline execution
130   */
131  void markPipelineExecutionIfFinished(long executionKey);
132
133  /**
134   * Change status to ABORTED and set finished date if state is RUNNING, QUEUED or SUBMITTED, and
135   * set pipeline execution as finished
136   *
137   * @param executionKey key of the pipeline execution
138   */
139  void markPipelineStatusAsAborted(long executionKey);
140
141  /**
142   * Runs the last attempt for all datasets.
143   *
144   * @param steps steps to run
145   * @param reason reason to run
146   * @param useLastSuccessful true if we want to run the latest successful attempt
147   * @param markPreviousAttemptAsFailed previous status can't be wrong, when CLI restarted during
148   *     processing a dataset
149   * @param runAllParams parameters, contain datasets to exclude
150   * @param interpretTypes is used for partial interpretation such as only TAXONOMY, METADATA and
151   *     etc
152   * @param excludeEventSteps true if we don't want to run the event steps
153   * @return {@link RunPipelineResponse}.
154   */
155  RunPipelineResponse runAll(
156      @NotBlank(message = STEPS_REQUIRED_MESSAGE) String steps,
157      @NotBlank(message = REASON_REQUIRED_MESSAGE) String reason,
158      boolean useLastSuccessful,
159      boolean markPreviousAttemptAsFailed,
160      @Nullable RunAllParams runAllParams,
161      @Nullable Set<String> interpretTypes,
162      boolean excludeEventSteps);
163
164  /**
165   * Restart last failed pipelines step for a dataset.
166   *
167   * @param datasetKey dataset key
168   * @param steps steps to run
169   * @param reason reason to run
170   * @param useLastSuccessful true if we want to run the latest successful attempt
171   * @param markPreviousAttemptAsFailed previous status can't be wrong, when CLI restarted during
172   *     processing a dataset
173   * @param interpretTypes is used for partial interpretation such as only TAXONOMY, METADATA and
174   *     etc
175   * @param excludeEventSteps true if we don't want to run the event steps
176   * @return {@link RunPipelineResponse}.
177   */
178  RunPipelineResponse runPipelineAttempt(
179      @NotNull UUID datasetKey,
180      @NotBlank(message = STEPS_REQUIRED_MESSAGE) String steps,
181      @NotBlank(message = REASON_REQUIRED_MESSAGE) String reason,
182      boolean useLastSuccessful,
183      boolean markPreviousAttemptAsFailed,
184      @Nullable Set<String> interpretTypes,
185      boolean excludeEventSteps);
186
187  /**
188   * Re-run a pipeline step.
189   *
190   * @param datasetKey dataset key
191   * @param attempt attempt to run
192   * @param steps steps to run
193   * @param reason reason to run
194   * @param markPreviousAttemptAsFailed previous status can't be wrong, when CLI restarted during
195   *     processing a dataset
196   * @param interpretTypes is used for partial interpretation such as only TAXONOMY, METADATA and
197   *     etc
198   * @param excludeEventSteps true if we don't want to run the event steps
199   * @return {@link RunPipelineResponse}.
200   */
201  RunPipelineResponse runPipelineAttempt(
202      @NotNull UUID datasetKey,
203      int attempt,
204      @NotBlank(message = STEPS_REQUIRED_MESSAGE) String steps,
205      @NotBlank(message = REASON_REQUIRED_MESSAGE) String reason,
206      boolean markPreviousAttemptAsFailed,
207      @Nullable Set<String> interpretTypes,
208      boolean excludeEventSteps);
209
210  /**
211   * Sends email to data administrator about absent identifiers issue with a dataset
212   *
213   * <p>Deprecated: use {@link #notifyAbsentIdentifiers(UUID, int, long, String)} instead.
214   *
215   * @param datasetKey dataset key
216   * @param attempt attempt to run
217   * @param message with failed metrics and other info*
218   */
219  @Deprecated
220  void sendAbsentIndentifiersEmail(@NotNull UUID datasetKey, int attempt, @NotNull String message);
221
222  /**
223   * Mark failed identifier stage as finished and continue interpretation process for datasets were
224   * identifier stage failed because of a threshold limit
225   *
226   * @param datasetKey dataset key
227   * @param attempt attempt to run
228   */
229  void allowAbsentIndentifiers(@NotNull UUID datasetKey, int attempt);
230
231  /**
232   * Mark latest failed identifier stage as finished and continue interpretation process for
233   * datasets were identifier stage failed because of a threshold limit
234   *
235   * @param datasetKey dataset key
236   */
237  void allowAbsentIndentifiers(@NotNull UUID datasetKey);
238
239  /**
240   * Sends a notification to the data administrators about absent identifiers issues with the
241   * dataset.
242   *
243   * @param datasetKey key of the dataset
244   * @param attempt crawling attempt
245   * @param executionKey key of the pipelines execution
246   * @param message cause of the issue
247   */
248  void notifyAbsentIdentifiers(UUID datasetKey, int attempt, long executionKey, String message);
249
250  /**
251   * Sets the pipeline step state QUEUED only if it's in SUBMITTED state.
252   *
253   * @param key pipeline step key
254   */
255  void setSubmittedPipelineStepToQueued(long key);
256}