diff --git a/docs/assets/images/favicon.png b/docs/assets/images/favicon.png new file mode 100644 index 000000000..8a0f42d26 Binary files /dev/null and b/docs/assets/images/favicon.png differ diff --git a/docs/css/custom.css b/docs/css/custom.css new file mode 100644 index 000000000..e9c4fed86 --- /dev/null +++ b/docs/css/custom.css @@ -0,0 +1,205 @@ +:root { + /*--main-text-color: #212121;*/ + --md-primary-fg-color: #1976d2; + --brand-blue: #1976d2; + --brand-dark-blue: #242A36; + --caption-color: #4f4f4f; + --brand-lt-blue: #f0f5fb; + --brand-gray: rgb(118, 118, 118); + --brand-lt-gray: rgb(203,204,207); + --brand-red: #e50914; +} + +/* Grid */ +.row { + display: flex; + flex-direction: row; +} +.col-4 { + flex: 0 0 33.3333333333%; + max-width: 33.3333333333%; +} +.col-6 { + flex: 0 0 50%; + max-width: 50%; +} + + + +/* Navbar */ +.md-header { + background-color: white !important; + color: var(--brand-dark-blue); +} +.md-header__title { + visibility: hidden; +} +.md-logo img{ + height: 38px !important; +} +.home { + margin-bottom: -1.2rem !important; +} +.md-search__form { + transition: none !important; +} +.md-search__input:hover { + background-color: #00000042 !important; +} +.md-search__input.focus-visible:hover { + background-color: #fff !important; +} + +/* Fonts */ +body { + color: var(--brand-dark-blue); + font-family: "Roboto", sans-serif !important; + font-weight: 400 !important; +} + +.md-content h1 { + font-family: "Inter", sans-serif !important; + color: var(--brand-dark-blue) !important; + font-size: 32px !important; + font-weight: 700 !important; +} + +.md-content h2 { + font-family: "Inter", sans-serif !important; + color: var(--brand-dark-blue) !important; + font-size: 24px !important; + font-weight: 700 !important; +} + +.md-content h3 { + font-family: "Roboto", sans-serif !important; + color: var(--brand-dark-blue) !important; + font-size: 20px !important; + font-weight: 500 !important; +} + +.md-content h4 { + font-family: "Roboto", sans-serif !important; + color: var(--brand-dark-blue) !important; + font-size: 18px !important; + font-weight: 400 !important; +} + +.btn { + font-family: "Roboto", sans-serif; + font-size: 14px; + border-radius: 0.25rem; +} +.btn-primary { + background: #1976D2; + border: none; + color: white !important; +} + +.hero { + padding-top: 100px; + padding-bottom: 100px; +} + +.hero .heading { + font-size: 56px; + font-weight: 900; + line-height: 68px; +} + +.hero .btn { + font-size: 16px; + padding: 10px 20px; +} + +.hero .illustration { + margin-left: 35px; +} + + +.bullets .heading, .module .heading { + font-family: "Inter", sans-serif; + font-size: 26px; + font-weight: 700; +} +.bullets .row { + margin-bottom: 60px; +} +.bullets .caption { + padding-top: 10px; + padding-right: 30px; +} +.icon { + height: 25px !important; + margin-right: 5px; + vertical-align: -3px; +} + +.caption { + font-weight: 400; + font-size: 17px; + line-height: 24px; + color: var(--caption-color); +} + +.module { + margin-top: 80px; + margin-bottom: 80px; + padding-top: 50px; + padding-bottom: 50px; +} + +.module .caption { + padding-top: 10px; + padding-right: 80px; +} +.module .screenshot { + width: 600px; + height: 337px; + box-shadow:inset 0 1px 0 rgba(255,255,255,.6), 0 22px 70px 4px rgba(0,0,0,0.56), 0 0 0 1px rgba(0, 0, 0, 0.0); + border-radius: 5px; + background-size: cover; +} + +/* Footer */ +.md-copyright__highlight { + background-image: url('/img/netflix-oss.png'); + background-size: contain; + background-repeat: no-repeat; + color: rgba(0,0,0,0); + height: 60px; +} + +/* Comparison block */ +.compare { + background-color: var(--brand-lt-blue); + padding-top: 80px; + padding-bottom: 80px; + margin: 0px -1000px; + text-align: center; +} +.compare .container { + max-width: 61rem; + margin-left: auto; + margin-right: auto; +} + +.compare .heading { + margin-bottom: 30px; + margin-top: 0px; +} +.compare .bubble { + background: #fff; + border-radius: 10px; + padding: 30px; + height: 100%; +} + +.compare .caption { + font-size: 15px; + line-height: 22px; +} + +.compare .row { + margin: 0 0.8rem; +} \ No newline at end of file diff --git a/docs/devguide/architecture/PollTimeoutSeconds.png b/docs/devguide/architecture/PollTimeoutSeconds.png new file mode 100644 index 000000000..35bd32273 Binary files /dev/null and b/docs/devguide/architecture/PollTimeoutSeconds.png differ diff --git a/docs/docs/img/ResponseTimeoutSeconds.png b/docs/devguide/architecture/ResponseTimeoutSeconds.png similarity index 100% rename from docs/docs/img/ResponseTimeoutSeconds.png rename to docs/devguide/architecture/ResponseTimeoutSeconds.png diff --git a/docs/docs/img/TaskFailure.png b/docs/devguide/architecture/TaskFailure.png similarity index 100% rename from docs/docs/img/TaskFailure.png rename to docs/devguide/architecture/TaskFailure.png diff --git a/docs/docs/img/TimeoutSeconds.png b/docs/devguide/architecture/TimeoutSeconds.png similarity index 100% rename from docs/docs/img/TimeoutSeconds.png rename to docs/devguide/architecture/TimeoutSeconds.png diff --git a/docs/docs/img/conductor-architecture.png b/docs/devguide/architecture/conductor-architecture.png similarity index 100% rename from docs/docs/img/conductor-architecture.png rename to docs/devguide/architecture/conductor-architecture.png diff --git a/docs/docs/img/dag_workflow.png b/docs/devguide/architecture/dag_workflow.png similarity index 100% rename from docs/docs/img/dag_workflow.png rename to docs/devguide/architecture/dag_workflow.png diff --git a/docs/docs/img/dag_workflow2.png b/docs/devguide/architecture/dag_workflow2.png similarity index 100% rename from docs/docs/img/dag_workflow2.png rename to docs/devguide/architecture/dag_workflow2.png diff --git a/docs/docs/reference-docs/directed-acyclic-graph.md b/docs/devguide/architecture/directed-acyclic-graph.md similarity index 73% rename from docs/docs/reference-docs/directed-acyclic-graph.md rename to docs/devguide/architecture/directed-acyclic-graph.md index 5c707eccf..f8d08c0b6 100644 --- a/docs/docs/reference-docs/directed-acyclic-graph.md +++ b/docs/devguide/architecture/directed-acyclic-graph.md @@ -10,11 +10,11 @@ A graph is "a collection of vertices (or point) and edges (or lines) that indica By this definition, this is a graph - just not exactly correct in the context of DAGs: -
+![pirate vs global warming graph](pirate_graph.gif) But in the context of workflows, we're thinking of a graph more like this: - +![a regular graph (source: wikipedia)](regular_graph.png) Imagine each vertex as a microservice, and the lines are how the microservices are connected together. However, this graph is not a directed graph - as there is no direction given to each connection. @@ -22,7 +22,7 @@ Imagine each vertex as a microservice, and the lines are how the microservices a A directed graph means that there is a direction to each connection. For example, this graph is directed: - +![directed graph](directed_graph.png) Each arrow has a direction, Point "N" can proceed directly to "B", but "B" cannot proceed to "N" in the opposite direction. @@ -34,13 +34,13 @@ So a Directed Acyclic Graph is a set of vertices where the connections are direc Since a Conductor workflow is a series of vertices that can connect in only a specific direction and cannot loop, a Conductor workflow is thus a directed acyclic graph: - +![Conductor Dag](dag_workflow.png) ### Can a workflow have loops and still be a DAG? Yes. For example, Conductor workflows have Do-While loops: - +![Conductor Dag](dag_workflow2.png) This is still a DAG, because the loop is just shorthand for running the tasks inside the loop over and over again. For example, if the 2nd loop in the above image is run 3 times, the workflow path will be: diff --git a/docs/docs/img/directed_graph.png b/docs/devguide/architecture/directed_graph.png similarity index 100% rename from docs/docs/img/directed_graph.png rename to docs/devguide/architecture/directed_graph.png diff --git a/docs/docs/architecture/overview.md b/docs/devguide/architecture/index.md similarity index 88% rename from docs/docs/architecture/overview.md rename to docs/devguide/architecture/index.md index 81bd00e38..986003dfb 100644 --- a/docs/docs/architecture/overview.md +++ b/docs/devguide/architecture/index.md @@ -1,15 +1,15 @@ -# Overview +# Architecture Overview -![Architecture diagram](/img/conductor-architecture.png) +![Architecture diagram](conductor-architecture.png) The API and storage layers are pluggable and provide ability to work with different backends and queue service providers. ## Runtime Model Conductor follows RPC based communication model where workers are running on a separate machine from the server. Workers communicate with server over HTTP based endpoints and employs polling model for managing work queues. -![Runtime Model of Conductor](/img/overview.png) +![Runtime Model of Conductor](overview.png) -**Notes** +## Notes * Workers are remote systems that communicate over HTTP with the conductor servers. * Task Queues are used to schedule tasks for workers. We use [dyno-queues][1] internally but it can easily be swapped with SQS or similar pub-sub mechanism. diff --git a/docs/docs/img/overview.png b/docs/devguide/architecture/overview.png similarity index 100% rename from docs/docs/img/overview.png rename to docs/devguide/architecture/overview.png diff --git a/docs/docs/img/pirate_graph.gif b/docs/devguide/architecture/pirate_graph.gif similarity index 100% rename from docs/docs/img/pirate_graph.gif rename to docs/devguide/architecture/pirate_graph.gif diff --git a/docs/docs/img/regular_graph.png b/docs/devguide/architecture/regular_graph.png similarity index 100% rename from docs/docs/img/regular_graph.png rename to docs/devguide/architecture/regular_graph.png diff --git a/docs/docs/img/task_states.png b/docs/devguide/architecture/task_states.png similarity index 100% rename from docs/docs/img/task_states.png rename to docs/devguide/architecture/task_states.png diff --git a/docs/docs/architecture/tasklifecycle.md b/docs/devguide/architecture/tasklifecycle.md similarity index 54% rename from docs/docs/architecture/tasklifecycle.md rename to docs/devguide/architecture/tasklifecycle.md index 036071df8..09d7f5371 100644 --- a/docs/docs/architecture/tasklifecycle.md +++ b/docs/devguide/architecture/tasklifecycle.md @@ -1,47 +1,59 @@ +# Task Lifecycle + ## Task state transitions + The figure below depicts the state transitions that a task can go through within a workflow execution. -![Task_States](/img/task_states.png) +![Task States](task_states.png) ## Retries and Failure Scenarios ### Task failure and retries -Retries for failed task executions of each task can be configured independently. retryCount, retryDelaySeconds and retryLogic can be used to configure the retry mechanism. -![Task Failure](/img/TaskFailure.png) +Retries for failed task executions of each task can be configured independently. `retryCount`, `retryDelaySeconds` and `retryLogic` can be used to configure the retry mechanism. + +![Task Failure](TaskFailure.png) 1. Worker (W1) polls for task T1 from the Conductor server and receives the task. 2. Upon processing this task, the worker determines that the task execution is a failure and reports this to the server with FAILED status after 10 seconds. 3. The server will persist this FAILED execution of T1. A new execution of task T1 will be created and scheduled to be polled. This task will be available to be polled after 5 (retryDelaySeconds) seconds. +### Poll Timeout Seconds + +Poll timeout is the maximum amount of time by which a worker needs to poll a task, else the task will be marked as `TIMED_OUT`. + +![Task Poll Timeout](PollTimeoutSeconds.png) + +In the figure above, task T1 does not get polled by the worker within 60 seconds, so Conductor marks it as `TIMED_OUT`. ### Timeout seconds -Timeout is the maximum amount of time that the task must reach a terminal state in, else the task will be marked as TIMED_OUT. -![Task Timeout](/img/TimeoutSeconds.png) +Timeout is the maximum amount of time that the task must reach a terminal state in, else it will be marked as `TIMED_OUT`. + +![Task Timeout](TimeoutSeconds.png) -**0 seconds** -> Worker polls for task T1 from the Conductor server and receives the task. T1 is put into IN_PROGRESS status by the server. -Worker starts processing the task but is unable to process the task at this time. Worker updates the server with T1 set to IN_PROGRESS status and a callback of 9 seconds. +**0 seconds** -> Worker polls for task T1 from the Conductor server and receives the task. T1 is put into `IN_PROGRESS` status by the server. +Worker starts processing the task but is unable to process the task at this time. Worker updates the server with T1 set to `IN_PROGRESS` status and a callback of 9 seconds. Server puts T1 back in the queue but makes it invisible and the worker continues to poll for the task but does not receive T1 for 9 seconds. **9,18 seconds** -> Worker receives T1 from the server and is still unable to process the task and updates the server with a callback of 9 seconds. **27 seconds** -> Worker polls and receives task T1 from the server and is now able to process this task. -**30 seconds** (T1 timeout) -> Server marks T1 as TIMED_OUT because it is not in a terminal state after first being moved to IN_PROGRESS status. Server schedules a new task based on the retry count. - -**32 seconds** -> Worker completes processing of T1 and updates the server with COMPLETED status. Server will ignore this update since T1 has already been moved to a terminal status (TIMED_OUT). +**30 seconds** (T1 timeout) -> Server marks T1 as `TIMED_OUT` because it is not in a terminal state after first being moved to `IN_PROGRESS` status. Server schedules a new task based on the retry count. +**32 seconds** -> Worker completes processing of T1 and updates the server with `COMPLETED` status. Server will ignore this update since T1 has already been moved to a terminal status (`TIMED_OUT`). ### Response timeout seconds + Response timeout is the time within which the worker must respond to the server with an update for the task, else the task will be marked as TIMED_OUT. -![Response Timeout](/img/ResponseTimeoutSeconds.png) +![Response Timeout](ResponseTimeoutSeconds.png) -**0 seconds** -> Worker polls for the task T1 from the Conductor server and receives the task. T1 is put into IN_PROGRESS status by the server. +**0 seconds** -> Worker polls for the task T1 from the Conductor server and receives the task. T1 is put into `IN_PROGRESS` status by the server. Worker starts processing the task but the worker instance dies during this execution. -**20 seconds** (T1 responseTimeout) -> Server marks T1 as TIMED_OUT since the task has not been updated by the worker within the configured responseTimeoutSeconds (20). A new instance of task T1 is scheduled as per the retry configuration. +**20 seconds** (T1 responseTimeout) -> Server marks T1 as `TIMED_OUT` since the task has not been updated by the worker within the configured responseTimeoutSeconds (20). A new instance of task T1 is scheduled as per the retry configuration. **25 seconds** -> The retried instance of T1 is available to be polled by the worker, after the retryDelaySeconds (5) has elapsed. diff --git a/docs/docs/technicaldetails.md b/docs/devguide/architecture/technicaldetails.md similarity index 94% rename from docs/docs/technicaldetails.md rename to docs/devguide/architecture/technicaldetails.md index afcc8f5a0..934489207 100644 --- a/docs/docs/technicaldetails.md +++ b/docs/devguide/architecture/technicaldetails.md @@ -7,13 +7,13 @@ The proto models are auto-generated at compile time using this ProtoGen library. ### Cassandra Persistence -The Cassandra persistence layer currently provides a partial implementation of the ExecutionDAO that supports all the CRUD operations for tasks and workflow execution. The data modelling is done in a denormalized manner and stored in two tables. The “workflows” table houses all the information for a workflow execution including all its tasks and is the source of truth for all the information regarding a workflow and its tasks. The “task_lookup” table, as the name suggests stores a lookup of taskIds to workflowId. This table facilitates the fast retrieval of task data given a taskId. +The Cassandra persistence layer currently provides a partial implementation of the ExecutionDAO that supports all the CRUD operations for tasks and workflow execution. The data modelling is done in a denormalized manner and stored in two tables. The "workflows" table houses all the information for a workflow execution including all its tasks and is the source of truth for all the information regarding a workflow and its tasks. The "task_lookup" table, as the name suggests stores a lookup of taskIds to workflowId. This table facilitates the fast retrieval of task data given a taskId. All the datastore operations that are used during the critical execution path of a workflow have been implemented currently. Few of the operational abilities of the ExecutionDAO are yet to be implemented. This module also does not provide implementations for QueueDAO, PollDataDAO and RateLimitingDAO. We envision using the Cassandra DAO with an external queue implementation, since implementing a queuing recipe on top of Cassandra is an anti-pattern that we want to stay away from. ### External Payload Storage The implementation of this feature is such that the externalization of payloads is fully transparent and automated to the user. Conductor operators can configure the usage of this feature and is completely abstracted and hidden from the user, thereby allowing the operators full control over the barrier limits. Currently, only AWS S3 is supported as a storage system, however, as with all other Conductor components, this is pluggable and can be extended to enable any other object store to be used as an external payload storage system. -The externalization of payloads is enforced using two kinds of [barriers](/externalpayloadstorage.html). Soft barriers are used when the payload size is warranted enough to be stored as part of workflow execution. These payloads will be stored in external storage and used during execution. Hard barriers are enforced to safeguard against voluminous data, and such payloads are rejected and the workflow execution is failed. +The externalization of payloads is enforced using two kinds of [barriers](../../documentation/advanced/externalpayloadstorage.md). Soft barriers are used when the payload size is warranted enough to be stored as part of workflow execution. These payloads will be stored in external storage and used during execution. Hard barriers are enforced to safeguard against voluminous data, and such payloads are rejected and the workflow execution is failed. The payload size is evaluated in the client before being sent over the wire to the server. If the payload size exceeds the configured soft limit, the client makes a request to the server for the location at which the payload is to be stored. In this case where S3 is being used, the server returns a signed url for the location and the client uploads the payload using this signed url. The relative path to the payload object is then stored in the workflow/task metadata. The server can then download this payload from this path and use as needed during execution. This allows the server to control access to the S3 bucket, thereby making the user applications where the worker processes are run completely agnostic of the permissions needed to access this location. diff --git a/docs/docs/bestpractices.md b/docs/devguide/bestpractices.md similarity index 97% rename from docs/docs/bestpractices.md rename to docs/devguide/bestpractices.md index 3889bddaf..d753cbbc8 100644 --- a/docs/docs/bestpractices.md +++ b/docs/devguide/bestpractices.md @@ -1,3 +1,5 @@ +# Best Practices + ## Response Timeout - Configure the responseTimeoutSeconds of each task to be > 0. - Should be less than or equal to timeoutSeconds. diff --git a/docs/devguide/concepts/index.md b/docs/devguide/concepts/index.md new file mode 100644 index 000000000..6eace3d85 --- /dev/null +++ b/docs/devguide/concepts/index.md @@ -0,0 +1,17 @@ +# Basic Concepts +Conductor allows you to build a complex application using simple and granular tasks that do not +need to be aware of or keep track of the state of your application's execution flow. Conductor keeps track of the state, +calls tasks in the right order (sequentially or in parallel, as defined by you), retry calls if needed, handle failure +scenarios gracefully, and outputs the final result. + + +![Workflow screnshot](../../home/devex.png) + +Leveraging workflows in Conductor enables developers to truly focus on their core mission - building their application +code in the languages of their choice. Conductor does the heavy lifting associated with ensuring high +reliability, transactional consistency, and long durability of their workflows. Simply put, wherever your application's +component lives and whichever languages they were written in, you can build a workflow in Conductor to orchestrate their +execution in a reliable & scalable manner. + +[Workflows](workflows.md) and [Tasks](tasks.md) are the two key concepts that underlie the Conductor system. + diff --git a/docs/devguide/concepts/tasks.md b/docs/devguide/concepts/tasks.md new file mode 100644 index 000000000..062ad20cd --- /dev/null +++ b/docs/devguide/concepts/tasks.md @@ -0,0 +1,32 @@ +# Tasks +Tasks are the building blocks of Conductor Workflows. There must be at least one task configured in each Workflow Definition. A typical Conductor workflow defines a lists of tasks that are executed until the completion or termination of the workflow. + +Tasks can be categorized into three types: + +## Types of Tasks +### System Tasks +[**System Tasks**](../../documentation/configuration/workflowdef/systemtasks/index.md) are built-in tasks that are general purpose and re-usable. They are executed within the JVM of the Conductor server and managed by Conductor for execution and scalability. Such tasks allow you to get started without having to write custom workers. + +### Simple Tasks +[**Simple Tasks**](workers.md) or Worker Tasks are implemented by your application and run in a separate environment from Conductor. These tasks talk to the Conductor server via REST/gRPC to poll for tasks and update its status after execution. + +### Operators +[**Operators**](../../documentation/configuration/workflowdef/operators/index.md) are built-in primitives in Conductor that allow you control the flow of tasks in your workflow. Operators are similar to programming constructs such as `for` loops, `switch` blocks, etc. + +## Task Configuration +Task Configurations appear within the `tasks` array property of the Workflow Definition. This array is the blueprint that describes how a workflow will process an input payload by passing it through successive tasks. + +* For all tasks, the configuration will specifiy what **input parameters** the task takes. +* For SIMPLE (worker based) tasks, the configuration will contain a reference to a registered worker `taskName`. +* For System Tasks and Operators, the task configuration will contain important parameters that control the behavior of the task. For example, the task configuration of an HTTP task will specify an endpoint URL and the templatized payload that it will be called with when the task executes. + +## Task Definition +Not to be confused with Task Configurations, [Task Definitions](../../documentation/configuration/taskdef.md) help define default task level parameters like inputs and outputs, timeouts, retries etc. for SIMPLE (i.e. worker implemented) tasks. + +* All simple tasks need to be registered before they can be used by active workflows. +* Task definitions can be registered via the UI, or through the API. +* A registered task definition can be referenced from within different workflows. + +## Task Execution +Each time a workload is passed into a configured task, a Task Execution object is created. This object has a unique ID and represents the result of the operation. This includes the status (i.e. whether the task was completed successfully), and any input, output and variables associated with the task. + diff --git a/docs/docs/gettingstarted/intro.md b/docs/devguide/concepts/why.md similarity index 86% rename from docs/docs/gettingstarted/intro.md rename to docs/devguide/concepts/why.md index 789a7db4b..8acb0d15b 100644 --- a/docs/docs/gettingstarted/intro.md +++ b/docs/devguide/concepts/why.md @@ -1,5 +1,7 @@ # Why Conductor? -## Conductor was built to help Netflix orchestrate microservices based process flows with the following features: +Conductor was built to help Netflix orchestrate microservices based process flows. + +## Features * A distributed server ecosystem, which stores workflow state information efficiently. * Allow creation of process / business flows in which each individual task can be implemented by the same / different microservices. @@ -23,6 +25,6 @@ With peer to peer task choreography, we found it was harder to scale with growing business needs and complexities. Pub/sub model worked for simplest of the flows, but quickly highlighted some of the issues associated with the approach: -* Process flows are “embedded” within the code of multiple application. +* Process flows are "embedded" within the code of multiple application. * Often, there is tight coupling and assumptions around input/output, SLAs etc, making it harder to adapt to changing needs. -* Almost no way to systematically answer “How much are we done with process X”? +* Almost no way to systematically answer "How much are we done with process X"? diff --git a/docs/devguide/concepts/workers.md b/docs/devguide/concepts/workers.md new file mode 100644 index 000000000..faf6c6c85 --- /dev/null +++ b/docs/devguide/concepts/workers.md @@ -0,0 +1,11 @@ +# Workers +A worker is responsible for executing a task. Workers can be implemented in any language, and Conductor provides a polyglot set of worker frameworks that provide features such as polling threads, metrics and server communication that makes creating workers easy. + +Each worker embodies the Microservice design pattern and follows certain basic principles: + +1. Workers are stateless and do not implement a workflow specific logic. +2. Each worker executes a very specific task and produces well defined output given specific inputs. +3. Workers are meant to be idempotent (or should handle cases where the task that partially executed gets rescheduled due to timeouts etc.) +4. Workers do not implement the logic to handle retries etc, that is taken care by the Conductor server. + +Conductor maintains a registry of worker tasks. A task MUST be registered before being used in a workflow. This can be done by creating and saving a **Task Definition**. \ No newline at end of file diff --git a/docs/devguide/concepts/workflows.md b/docs/devguide/concepts/workflows.md new file mode 100644 index 000000000..d4c17618c --- /dev/null +++ b/docs/devguide/concepts/workflows.md @@ -0,0 +1,13 @@ +# Workflows +We will talk about two distinct topics, *defining* a workflow and *executing* a workflow. + +### Workflow Definition +The Workflow Definition is the Conductor primitive that encompasses the flow of your business logic. It contains all the information necessary to describe the behavior of a workflow. + +A Workflow Definition contains a collection of **Task Configurations**. This is the blueprint which specifies the order of execution of +tasks within a workflow. This blueprint also specifies how data/state is passed from one task to another (using task input/output parameters). + +Additionally, the Workflow Definition contains metadata regulating the runtime behavior workflow, such what input and output parameters are expected for the entire workflow, and the workflow's the timeout and retry settings. + +### Workflow Execution +If Workflow Definitions are like OOP classes, then Workflows Executions are like object instances. Each time a Workflow Definition is invoked with a given input, a new *Workflow Execution* with a unique ID is created. Definitions to Executions have a 1:N relationship. diff --git a/docs/docs/faq.md b/docs/devguide/faq.md similarity index 68% rename from docs/docs/faq.md rename to docs/devguide/faq.md index 858d0ecfe..5d14a1835 100644 --- a/docs/docs/faq.md +++ b/docs/devguide/faq.md @@ -1,41 +1,41 @@ # Frequently asked Questions -### How do you schedule a task to be put in the queue after some time (e.g. 1 hour, 1 day etc.) +## How do you schedule a task to be put in the queue after some time (e.g. 1 hour, 1 day etc.) After polling for the task update the status of the task to `IN_PROGRESS` and set the `callbackAfterSeconds` value to the desired time. The task will remain in the queue until the specified second before worker polling for it will receive it again. If there is a timeout set for the task, and the `callbackAfterSeconds` exceeds the timeout value, it will result in task being TIMED_OUT. -### How long can a workflow be in running state? Can I have a workflow that keeps running for days or months? +## How long can a workflow be in running state? Can I have a workflow that keeps running for days or months? Yes. As long as the timeouts on the tasks are set to handle long running workflows, it will stay in running state. -### My workflow fails to start with missing task error +## My workflow fails to start with missing task error Ensure all the tasks are registered via `/metadata/taskdefs` APIs. Add any missing task definition (as reported in the error) and try again. -### Where does my worker run? How does conductor run my tasks? +## Where does my worker run? How does conductor run my tasks? -Conductor does not run the workers. When a task is scheduled, it is put into the queue maintained by Conductor. Workers are required to poll for tasks using `/tasks/poll` API at periodic interval, execute the business logic for the task and report back the results using `POST /tasks` API call. -Conductor, however will run [system tasks](/configuration/systask.html) on the Conductor server. +Conductor does not run the workers. When a task is scheduled, it is put into the queue maintained by Conductor. Workers are required to poll for tasks using `/tasks/poll` API at periodic interval, execute the business logic for the task and report back the results using `POST {{ api_prefix }}/tasks` API call. +Conductor, however will run [system tasks](../documentation/configuration/workflowdef/systemtasks/index.md) on the Conductor server. -### How can I schedule workflows to run at a specific time? +## How can I schedule workflows to run at a specific time? Netflix Conductor itself does not provide any scheduling mechanism. But there is a community project [_Schedule Conductor Workflows_](https://github.com/jas34/scheduledwf) which provides workflow scheduling capability as a pluggable module as well as workflow server. Other way is you can use any of the available scheduling systems to make REST calls to Conductor to start a workflow. Alternatively, publish a message to a supported eventing system like SQS to trigger a workflow. -More details about [eventing](/configuration/eventhandlers.html). +More details about [eventing](../documentation/configuration/eventhandlers.md). -### How do I setup Dynomite cluster? +## How do I setup Dynomite cluster? Visit Dynomite's [Github page](https://github.com/Netflix/dynomite) to find details on setup and support mechanism. -### Can I use conductor with Ruby / Go / Python? +## Can I use conductor with Ruby / Go / Python? Yes. Workers can be written any language as long as they can poll and update the task results via HTTP endpoints. @@ -44,18 +44,18 @@ Conductor provides frameworks for Java and Python to simplify the task of pollin **Note:** Python and Go clients have been contributed by the community. -### How can I get help with Dynomite? +## How can I get help with Dynomite? Visit Dynomite's [Github page](https://github.com/Netflix/dynomite) to find details on setup and support mechanism. -### My workflow is running and the task is SCHEDULED but it is not being processed. +## My workflow is running and the task is SCHEDULED but it is not being processed. Make sure that the worker is actively polling for this task. Navigate to the `Task Queues` tab on the Conductor UI and select your task name in the search box. Ensure that `Last Poll Time` for this task is current. In Conductor 3.x, ```conductor.redis.availabilityZone``` defaults to ```us-east-1c```. Ensure that this matches where your workers are, and that it also matches```conductor.redis.hosts```. -### How do I configure a notification when my workflow completes or fails? +## How do I configure a notification when my workflow completes or fails? When a workflow fails, you can configure a "failure workflow" to run using the```failureWorkflow``` parameter. By default, three parameters are passed: @@ -65,20 +65,20 @@ When a workflow fails, you can configure a "failure workflow" to run using the`` You can also use the Workflow Status Listener: -* Set the workflowStatusListenerEnabled field in your workflow definition to true which enables [notifications](/configuration/workflowdef.html#workflow-notifications). -* Add a custom implementation of the Workflow Status Listener. Refer [this](/extend.html#workflow-status-listener). -* This notification can be implemented in such a way as to either send a notification to an external system or to send an event on the conductor queue to complete/fail another task in another workflow as described [here](/configuration/eventhandlers.html). +* Set the workflowStatusListenerEnabled field in your workflow definition to true which enables [notifications](../documentation/configuration/workflowdef/index.md#workflow-notifications). +* Add a custom implementation of the Workflow Status Listener. Refer [this](../documentation/advanced/extend.md#workflow-status-listener). +* This notification can be implemented in such a way as to either send a notification to an external system or to send an event on the conductor queue to complete/fail another task in another workflow as described [here](../documentation/configuration/eventhandlers.md). -Refer to this [documentation](/configuration/workflowdef.html#workflow-notifications) to extend conductor to send out events/notifications upon workflow completion/failure. +Refer to this [documentation](../documentation/configuration/workflowdef/index.md#workflow-notifications) to extend conductor to send out events/notifications upon workflow completion/failure. -### I want my worker to stop polling and executing tasks when the process is being terminated. (Java client) +## I want my worker to stop polling and executing tasks when the process is being terminated. (Java client) In a `PreDestroy` block within your application, call the `shutdown()` method on the `TaskRunnerConfigurer` instance that you have created to facilitate a graceful shutdown of your worker in case the process is being terminated. -### Can I exit early from a task without executing the configured automatic retries in the task definition? +## Can I exit early from a task without executing the configured automatic retries in the task definition? Set the status to `FAILED_WITH_TERMINAL_ERROR` in the TaskResult object within your worker. This would mark the task as FAILED and fail the workflow without retrying the task as a fail-fast mechanism. diff --git a/docs/docs/how-tos/Monitoring/Conductor-LogLevel.md b/docs/devguide/how-tos/Monitoring/Conductor-LogLevel.md similarity index 100% rename from docs/docs/how-tos/Monitoring/Conductor-LogLevel.md rename to docs/devguide/how-tos/Monitoring/Conductor-LogLevel.md diff --git a/docs/docs/how-tos/Tasks/creating-tasks.md b/docs/devguide/how-tos/Tasks/creating-tasks.md similarity index 84% rename from docs/docs/how-tos/Tasks/creating-tasks.md rename to docs/devguide/how-tos/Tasks/creating-tasks.md index 54ae74e91..d79da9c96 100644 --- a/docs/docs/how-tos/Tasks/creating-tasks.md +++ b/docs/devguide/how-tos/Tasks/creating-tasks.md @@ -1,24 +1,22 @@ # Creating Task Definitions - Tasks can be created using the tasks metadata API -`POST /api/metadata/taskdefs` +`POST {{ api_prefix }}/metadata/taskdefs` This API takes an array of new task definitions. +## Examples ### Example using curl - ```shell -curl 'http://localhost:8080/api/metadata/taskdefs' \ +curl '{{ server_host }}{{ api_prefix }}/metadata/taskdefs' \ -H 'accept: */*' \ -H 'content-type: application/json' \ --data-raw '[{"createdBy":"user","name":"sample_task_name_1","description":"This is a sample task for demo","responseTimeoutSeconds":10,"timeoutSeconds":30,"inputKeys":[],"outputKeys":[],"timeoutPolicy":"TIME_OUT_WF","retryCount":3,"retryLogic":"FIXED","retryDelaySeconds":5,"inputTemplate":{},"rateLimitPerFrequency":0,"rateLimitFrequencyInSeconds":1}]' ``` ### Example using node fetch - ```javascript -fetch("http://localhost:8080/api/metadata/taskdefs", { +fetch("{{ server_host }}{{ api_prefix }}/metadata/taskdefs", { "headers": { "accept": "*/*", "content-type": "application/json", @@ -28,8 +26,7 @@ fetch("http://localhost:8080/api/metadata/taskdefs", { }); ``` ## Best Practices - 1. You can update a set of tasks together in this API -2. Task configurations are important attributes that controls the behavior of this task in a Workflow. Refer to [Task Configurations](/configuration/taskdef.html) for all the options and details' +2. Task configurations are important attributes that controls the behavior of this task in a Workflow. Refer to [Task Configurations](../../../documentation/configuration/taskdef.md) for all the options and details' 3. You can also use the Conductor Swagger UI to update the tasks diff --git a/docs/devguide/how-tos/Tasks/dynamic-vs-switch-tasks.md b/docs/devguide/how-tos/Tasks/dynamic-vs-switch-tasks.md new file mode 100644 index 000000000..0339f4cc0 --- /dev/null +++ b/docs/devguide/how-tos/Tasks/dynamic-vs-switch-tasks.md @@ -0,0 +1,18 @@ +# Dynamic vs Switch Tasks + +Dynamic Tasks are useful in situations when need to run a task of which the task type is determined at runtime instead +of during the configuration. It is similar to the `SWITCH` use case but with `DYNAMIC` +we won't need to preconfigure all case options in the workflow definition itself. Instead, we can mark the task +as `DYNAMIC` and determine which underlying task does it run during the workflow execution itself. + +* Use DYNAMIC task as a replacement for SWITCH if you have too many case options +* DYNAMIC task is an option when you want to programmatically determine the next task to run instead of using expressions +* DYNAMIC task simplifies the workflow execution UI view which will now only show the selected task +* SWITCH task visualization is helpful as a documentation - showing you all options that the workflow could have + taken +* SWITCH task comes with a default task option which can be useful in some use cases + +Learn more about + +* [Dynamic Tasks](../../../documentation/configuration/workflowdef/operators/dynamic-task.md) +* [Switch Tasks](../../../documentation/configuration/workflowdef/operators/switch-task.md) \ No newline at end of file diff --git a/docs/docs/how-tos/Tasks/extending-system-tasks.md b/docs/devguide/how-tos/Tasks/extending-system-tasks.md similarity index 85% rename from docs/docs/how-tos/Tasks/extending-system-tasks.md rename to docs/devguide/how-tos/Tasks/extending-system-tasks.md index 5661d040c..e219fae61 100644 --- a/docs/docs/how-tos/Tasks/extending-system-tasks.md +++ b/docs/devguide/how-tos/Tasks/extending-system-tasks.md @@ -1,6 +1,6 @@ # Extending System Tasks -[System tasks](/configuration/systask.html) allow Conductor to run simple tasks on the server - removing the need to build (and deploy) workers for basic tasks. This allows for automating more mundane tasks without building specific microservices for them. +[System tasks](../../../documentation/configuration/workflowdef/systemtasks/index.md) allow Conductor to run simple tasks on the server - removing the need to build (and deploy) workers for basic tasks. This allows for automating more mundane tasks without building specific microservices for them. However, sometimes it might be necessary to add additional parameters to a System Task to gain the behavior that is desired. @@ -58,7 +58,7 @@ When this workflow is run - it fails, as expected. Now, sometimes an API call might fail due to an issue on the remote server, and retrying the call will result in a response. With many Conductor tasks, ```retryCount```, ```retryDelaySeconds``` and ```retryLogic``` fields can be applied to retry the worker (with the desired parameters). -By default, the [HTTP Task](/reference-docs/http-task.html) does not have ```retryCount```, ```retryDelaySeconds``` or ```retryLogic``` built in. Attempting to add these parameters to a HTTP Task results in an error. +By default, the [HTTP Task](../../../documentation/configuration/workflowdef/systemtasks/http-task.md) does not have ```retryCount```, ```retryDelaySeconds``` or ```retryLogic``` built in. Attempting to add these parameters to a HTTP Task results in an error. ## The Solution diff --git a/docs/docs/how-tos/Tasks/monitoring-task-queues.md b/docs/devguide/how-tos/Tasks/monitoring-task-queues.md similarity index 72% rename from docs/docs/how-tos/Tasks/monitoring-task-queues.md rename to docs/devguide/how-tos/Tasks/monitoring-task-queues.md index 584b89102..4de10727a 100644 --- a/docs/docs/how-tos/Tasks/monitoring-task-queues.md +++ b/docs/devguide/how-tos/Tasks/monitoring-task-queues.md @@ -1,15 +1,11 @@ ---- -sidebar_position: 1 ---- - # Monitoring Task Queues Conductor offers an API and UI interface to monitor the task queues. This is useful to see details of the number of workers polling and monitoring the queue backlog. -### Using the UI +## Using the UI -```http request +```Redirecting ...
+ + \ No newline at end of file diff --git a/docs/docs/img/timeline.png b/docs/home/timeline.png similarity index 100% rename from docs/docs/img/timeline.png rename to docs/home/timeline.png diff --git a/docs/docs/img/workflow.svg b/docs/home/workflow.svg similarity index 100% rename from docs/docs/img/workflow.svg rename to docs/home/workflow.svg diff --git a/docs/docs/img/logo.svg b/docs/img/logo.svg similarity index 100% rename from docs/docs/img/logo.svg rename to docs/img/logo.svg diff --git a/docs/docs/img/netflix-oss.png b/docs/img/netflix-oss.png similarity index 100% rename from docs/docs/img/netflix-oss.png rename to docs/img/netflix-oss.png diff --git a/docs/docs/index.md b/docs/index.md similarity index 73% rename from docs/docs/index.md rename to docs/index.md index c1a0bd096..f9a4f8a6f 100644 --- a/docs/docs/index.md +++ b/docs/index.md @@ -1,28 +1,34 @@ -