diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8c65b4dbb..83a7b3d47 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -60,16 +60,6 @@ build-default: - flowman-dist/target/flowman-dist-*-bin.tar.gz expire_in: 5 days -# List additional build variants (some of them will be built on pushes) -build-hadoop2.6-spark2.3: - stage: build - script: 'mvn ${MAVEN_CLI_OPTS} clean package -Phadoop-2.6 -Pspark-2.3 -Ddockerfile.skip' - artifacts: - name: "flowman-dist-hadoop2.6-spark2.3" - paths: - - flowman-dist/target/flowman-dist-*-bin.tar.gz - expire_in: 5 days - build-hadoop2.6-spark2.4: stage: build script: 'mvn ${MAVEN_CLI_OPTS} clean package -Phadoop-2.6 -Pspark-2.4 -Ddockerfile.skip' @@ -133,17 +123,6 @@ build-hadoop3.2-spark3.1: - flowman-dist/target/flowman-dist-*-bin.tar.gz expire_in: 5 days -build-cdh5.15: - stage: build - except: - - pushes - script: 'mvn ${MAVEN_CLI_OPTS} clean package -PCDH-5.15 -Ddockerfile.skip' - artifacts: - name: "flowman-dist-cdh5.15" - paths: - - flowman-dist/target/flowman-dist-*-bin.tar.gz - expire_in: 5 days - build-cdh6.3: stage: build script: 'mvn ${MAVEN_CLI_OPTS} clean package -PCDH-6.3 -Ddockerfile.skip' diff --git a/.travis.yml b/.travis.yml index 798cd3b47..82065c69b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,14 +19,6 @@ jobs: jdk: openjdk8 script: mvn clean install - - name: Hadoop 2.6 with Spark 2.3 - jdk: openjdk8 - script: mvn clean install -Phadoop-2.6 -Pspark-2.3 -Ddockerfile.skip - - - name: Hadoop 2.7 with Spark 2.3 - jdk: openjdk8 - script: mvn clean install -Phadoop-2.7 -Pspark-2.3 -Ddockerfile.skip - - name: Hadoop 2.6 with Spark 2.4 jdk: openjdk8 script: mvn clean install -Phadoop-2.6 -Pspark-2.4 @@ -51,10 +43,6 @@ jobs: jdk: openjdk8 script: mvn clean install -Phadoop-3.2 -Pspark-3.1 - - name: CDH 5.15 - jdk: openjdk8 - script: mvn clean install -PCDH-5.15 -Ddockerfile.skip - - name: CDH 6.3 jdk: openjdk8 script: mvn clean install -PCDH-6.3 -Ddockerfile.skip diff --git a/BUILDING.md b/BUILDING.md index 69fc5814f..be1fa5956 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -3,7 +3,18 @@ The whole project is built using Maven. The build also includes a Docker image, which requires that Docker is installed on the build machine. -## Build with Maven +## Prerequisites + +You need the following tools installed on your machine: +* JDK 1.8 or later. If you build a variant with Scala 2.11, you have to use JDK 1.8 (and not anything newer like + Java 11). This mainly affects builds with Spark 2.x +* Apache Maven (install via package manager download from https://maven.apache.org/download.cgi) +* npm (install via package manager or download from https://www.npmjs.com/get-npm) +* Windows users also need Hadoop winutils installed. Those can be retrieved from https://github.com/cdarlint/winutils +and later. See some additional details for building on Windows below. + + +# Build with Maven Building Flowman with the default settings (i.e. Hadoop and Spark version) is as easy as @@ -22,9 +33,11 @@ in a complex environment with Kerberos. You can find the `tar.gz` file in the di ## Build on Windows -Although you can normally build Flowman on Windows, you will need the Hadoop WinUtils installed. You can download -the binaries from https://github.com/steveloughran/winutils and install an appropriate version somewhere onto your -machine. Do not forget to set the HADOOP_HOME environment variable to the installation directory of these utils! +Although you can normally build Flowman on Windows, it is recommended to use Linux instead. But nevertheless Windows +is still supported to some extend, but requires some extra care. You will need the Hadoop WinUtils installed. You can +download the binaries from https://github.com/cdarlint/winutils and install an appropriate version somewhere onto +your machine. Do not forget to set the HADOOP_HOME or PATH environment variable to the installation directory of these +utils! You should also configure git such that all files are checked out using "LF" endings instead of "CRLF", otherwise some unittests may fail and Docker images might not be useable. This can be done by setting the git configuration @@ -46,24 +59,23 @@ the `master` branch really builds clean with all unittests passing on Linux. ## Build for Custom Spark / Hadoop Version -Per default, Flowman will be built for fairly recent versions of Spark (2.4.5 as of this writing) and Hadoop (2.8.5). +Per default, Flowman will be built for fairly recent versions of Spark (3.0.2 as of this writing) and Hadoop (3.2.0). But of course you can also build for a different version by either using a profile ```shell -mvn install -Pspark2.3 -Phadoop2.7 -DskipTests +mvn install -Pspark2.4 -Phadoop2.7 -DskipTests ``` This will always select the latest bugfix version within the minor version. You can also specify versions explicitly as follows: ```shell -mvn install -Dspark.version=2.2.1 -Dhadoop.version=2.7.3 +mvn install -Dspark.version=2.4.3 -Dhadoop.version=2.7.3 ``` Note that using profiles is the preferred way, as this guarantees that also dependencies are selected using the correct version. The following profiles are available: -* spark-2.3 * spark-2.4 * spark-3.0 * spark-3.1 @@ -73,37 +85,12 @@ using the correct version. The following profiles are available: * hadoop-2.9 * hadoop-3.1 * hadoop-3.2 -* CDH-5.15 * CDH-6.3 With these profiles it is easy to build Flowman to match your environment. ## Building for Open Source Hadoop and Spark -### Spark 2.3 and Hadoop 2.6: - -```shell -mvn clean install -Pspark-2.3 -Phadoop-2.6 -``` - -### Spark 2.3 and Hadoop 2.7: - -```shell -mvn clean install -Pspark-2.3 -Phadoop-2.7 -``` - -### Spark 2.3 and Hadoop 2.8: - -```shell -mvn clean install -Pspark-2.3 -Phadoop-2.8 -``` - -### Spark 2.3 and Hadoop 2.9: - -```shell -mvn clean install -Pspark-2.3 -Phadoop-2.9 -``` - ### Spark 2.4 and Hadoop 2.6: ```shell @@ -148,13 +135,7 @@ mvn clean install -Pspark-3.1 -Phadoop-3.2 ## Building for Cloudera -The Maven project also contains preconfigured profiles for Cloudera. - -```shell -mvn clean install -Pspark-2.3 -PCDH-5.15 -DskipTests -``` - -Or for Cloudera 6.3 +The Maven project also contains preconfigured profiles for Cloudera CDH 6.3. ```shell mvn clean install -Pspark-2.4 -PCDH-6.3 -DskipTests diff --git a/CHANGELOG.md b/CHANGELOG.md index d1a7e52f6..32344c1ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,16 @@ +# Version 0.17.0 - 2021-06-02 + +* New Flowman Kernel and Flowman Studio application prototypes +* New ParallelExecutor +* Fix before/after dependencies in `count` target +* Default build is now Spark 3.1 + Hadoop 3.2 +* Remove build profiles for Spark 2.3 and CDH 5.15 +* Add MS SQL Server plugin containing JDBC driver +* Speed up file listing for `file` relations +* Use Spark JobGroups +* Better support running Flowman on Windows with appropriate batch scripts + + # Version 0.16.0 - 2021-04-23 * Add logo to Flowman Shell diff --git a/NOTICE b/NOTICE index 788ef2858..f20325963 100644 --- a/NOTICE +++ b/NOTICE @@ -66,6 +66,12 @@ MariaDB Java Client * HOMEPAGE: * https://mariadb.com +MSSQL JDBC Client + * LICENSE + * license/LICENSE-mssql-jdbc.txt + * HOMEPAGE: + * https://github.com/Microsoft/mssql-jdbc + Apache Derby * LICENSE * license/LICENSE-derby.txt (Apache 2.0 License) diff --git a/build-release.sh b/build-release.sh index a4ffa73da..1ff2e12aa 100755 --- a/build-release.sh +++ b/build-release.sh @@ -15,15 +15,10 @@ build_profile() { build_profile hadoop-2.6 spark-2.3 build_profile hadoop-2.6 spark-2.4 -build_profile hadoop-2.7 spark-2.3 build_profile hadoop-2.7 spark-2.4 -build_profile hadoop-2.8 spark-2.3 -build_profile hadoop-2.8 spark-2.4 -build_profile hadoop-2.9 spark-2.3 -build_profile hadoop-2.9 spark-2.4 -build_profile hadoop-2.9 spark-3.0 -build_profile hadoop-3.1 spark-3.0 +build_profile hadoop-2.7 spark-3.0 build_profile hadoop-3.2 spark-3.0 +build_profile hadoop-2.7 spark-3.1 build_profile hadoop-3.2 spark-3.1 build_profile CDH-5.15 build_profile CDH-6.3 diff --git a/docker/pom.xml b/docker/pom.xml index 6eab790aa..89d33e54e 100644 --- a/docker/pom.xml +++ b/docker/pom.xml @@ -10,8 +10,8 @@ com.dimajix.flowman flowman-root - 0.16.0 - .. + 0.17.0 + ../pom.xml diff --git a/docs/building.md b/docs/building.md index e479e6d58..9a336d400 100644 --- a/docs/building.md +++ b/docs/building.md @@ -60,20 +60,19 @@ You might also want to skip unittests (the HBase plugin is currently failing und ### Build for Custom Spark / Hadoop Version -Per default, Flowman will be built for fairly recent versions of Spark (2.4.5 as of this writing) and Hadoop (2.8.5). +Per default, Flowman will be built for fairly recent versions of Spark (3.0.2 as of this writing) and Hadoop (3.2.0). But of course you can also build for a different version by either using a profile - mvn install -Pspark2.2 -Phadoop2.7 -DskipTests + mvn install -Pspark2.4 -Phadoop2.7 -DskipTests This will always select the latest bugfix version within the minor version. You can also specify versions explicitly as follows: - mvn install -Dspark.version=2.2.1 -Dhadoop.version=2.7.3 + mvn install -Dspark.version=2.4.1 -Dhadoop.version=2.7.3 Note that using profiles is the preferred way, as this guarantees that also dependencies are selected using the correct version. The following profiles are available: -* spark-2.3 * spark-2.4 * spark-3.0 * spark-3.1 @@ -83,29 +82,12 @@ using the correct version. The following profiles are available: * hadoop-2.9 * hadoop-3.1 * hadoop-3.2 -* CDH-5.15 * CDH-6.3 With these profiles it is easy to build Flowman to match your environment. ### Building for Open Source Hadoop and Spark -Spark 2.3 and Hadoop 2.6: - - mvn clean install -Pspark-2.3 -Phadoop-2.6 - -Spark 2.3 and Hadoop 2.7: - - mvn clean install -Pspark-2.3 -Phadoop-2.7 - -Spark 2.3 and Hadoop 2.8: - - mvn clean install -Pspark-2.3 -Phadoop-2.8 - -Spark 2.3 and Hadoop 2.9: - - mvn clean install -Pspark-2.3 -Phadoop-2.9 - Spark 2.4 and Hadoop 2.6: mvn clean install -Pspark-2.4 -Phadoop-2.6 @@ -137,11 +119,7 @@ Spark 3.1 and Hadoop 3.2 ### Building for Cloudera -The Maven project also contains preconfigured profiles for Cloudera. - - mvn clean install -Pspark-2.3 -PCDH-5.15 -DskipTests - -Or for Cloudera 6.3 +The Maven project also contains preconfigured profiles for Cloudera CDH 6.3. mvn clean install -Pspark-2.4 -PCDH-6.3 -DskipTests diff --git a/docs/config.md b/docs/config.md index ae5221c20..51e17a0a2 100644 --- a/docs/config.md +++ b/docs/config.md @@ -31,7 +31,11 @@ the existence of targets to decide if a rebuild is required. - `flowman.execution.executor.class` *(type: class)* *(default: `com.dimajix.flowman.execution.SimpleExecutor`)* Configure the executor to use. The default `SimpleExecutor` will process all targets in the correct order -sequentially. + sequentially. The alternative implementation `com.dimajix.flowman.execution.ParallelExecutor` will run multiple + targets in parallel (if they are not depending on each other) + +- `flowman.execution.executor.parallelism` *(type: int)* *(default: 4)* +The number of targets to be executed in parallel, when the `ParallelExecutor` is used. - `flowman.execution.scheduler.class` *(type: class)* *(default: `com.dimajix.flowman.execution.SimpleScheduler`)* Configure the scheduler to use. The default `SimpleScheduler` will sort all targets according to their dependency. diff --git a/docs/spec/mapping/mock.md b/docs/spec/mapping/mock.md index 883bcdec8..be66c290a 100644 --- a/docs/spec/mapping/mock.md +++ b/docs/spec/mapping/mock.md @@ -15,7 +15,7 @@ mappings: ```yaml mappings: - empty_mapping: + some_other_mapping: kind: mock mapping: some_mapping records: @@ -23,6 +23,24 @@ mappings: - [2,null,"cat","black"] ``` +```yaml +mappings: + some_mapping: + kind: mock + mapping: some_mapping + records: + - Campaign ID: DIR_36919 + LineItemID ID: DIR_260390 + SiteID ID: 23374 + CreativeID ID: 292668 + PlacementID ID: 108460 + - Campaign ID: DIR_36919 + LineItemID ID: DIR_260390 + SiteID ID: 23374 + CreativeID ID: 292668 + PlacementID ID: 108460 +``` + ## Fields * `kind` **(mandatory)** *(type: string)*: `mock` @@ -39,7 +57,7 @@ mappings: * `MEMORY_AND_DISK_SER` * `mapping` **(optional)** *(type: string)*: - Specifies the name of the mapping to be mocked. If no name is given, the a mapping with the same name will be + Specifies the name of the mapping to be mocked. If no name is given, then a mapping with the same name will be mocked. Note that this will only work when used as an override mapping in test cases, otherwise an infinite loop would be created by referencing to itself. diff --git a/docs/spec/mapping/values.md b/docs/spec/mapping/values.md index 4602d14e9..36f749fb6 100644 --- a/docs/spec/mapping/values.md +++ b/docs/spec/mapping/values.md @@ -18,8 +18,8 @@ mappings: - name: str_col type: string records: - - [1,"some_string"] - - [2,"cat"] + - [1,"some_string"] + - [2,"cat"] ``` ```yaml @@ -30,8 +30,21 @@ mappings: int_col: integer str_col: string records: - - [1,"some_string"] - - [2,"cat"] + - [1,"some_string"] + - [2,"cat"] +``` + +```yaml +mappings: + fake_input: + kind: values + columns: + int_col: integer + str_col: string + records: + - int_col: 1 + str_col: "some_string" + - str_col: "cat" ``` diff --git a/docs/spec/relation/file.md b/docs/spec/relation/file.md index 40a187e17..4aaf652f2 100644 --- a/docs/spec/relation/file.md +++ b/docs/spec/relation/file.md @@ -71,7 +71,7 @@ relations: Please see the section [Partitioning](#Partitioning) below. -## Description +## Remarks When using `file` relations as data sinks in a [`relation` target](../target/relation.md), then Flowman will manage the whole lifecycle of the directory for you. This means that @@ -81,8 +81,16 @@ whole lifecycle of the directory for you. This means that * The directory specified in `location` will be truncated or individual partitions will be dropped during `clean` phase * The directory specified in `location` tables will be removed during `destroy` phase +### Schema Inference -## Supported File Format +Note that Flowman will rely on schema inference in some important situations, like [mocking](mock.md) and generally +for describing the schema of a relation. This might create unwanted connections to the physical data source, +particular in case of self-contained tests. To prevent Flowman from creating a connection to the physical data +source, you simply need to explicitly specify a schema, which will then be used instead of the physical schema +in all situations where only schema information is required. + + +### Supported File Format File relations support all file formats also supported by Spark. This includes simple text files, CSV files, Parquet files, ORC files and Avro files. Each file format provides its own additional settings which can be specified diff --git a/docs/spec/relation/hiveTable.md b/docs/spec/relation/hiveTable.md index 70e016908..67b213801 100644 --- a/docs/spec/relation/hiveTable.md +++ b/docs/spec/relation/hiveTable.md @@ -123,7 +123,7 @@ relations: files and can be used to workaround some bugs in the Hive backend. -## Description +## Remarks When using Hive tables as data sinks in a [`relation` target](../target/relation.md), then Flowman will manage the whole lifecycle for you. This means that @@ -132,3 +132,10 @@ whole lifecycle for you. This means that * Hive tables will be truncated or individual partitions will be dropped during `clean` phase * Hive tables will be removed during `destroy` phase +### Schema Inference + +Note that Flowman will rely on schema inference in some important situations, like [mocking](mock.md) and generally +for describing the schema of a relation. This might create unwanted connections to the physical data source, +particular in case of self-contained tests. To prevent Flowman from creating a connection to the physical data +source, you simply need to explicitly specify a schema, which will then be used instead of the physical schema +in all situations where only schema information is required. diff --git a/docs/spec/relation/hiveView.md b/docs/spec/relation/hiveView.md index 6cf57c021..00032adc3 100644 --- a/docs/spec/relation/hiveView.md +++ b/docs/spec/relation/hiveView.md @@ -38,6 +38,4 @@ relations: * `mapping` **(optional)** *(string)* *(default: empty)*: Specifies the name of a mapping, which should be translated into SQL and stored in the Hive view. Cannot be used together with `sql`. - - -## Description + diff --git a/docs/spec/relation/jdbc.md b/docs/spec/relation/jdbc.md index 860c9f09f..3ef78d451 100644 --- a/docs/spec/relation/jdbc.md +++ b/docs/spec/relation/jdbc.md @@ -55,4 +55,10 @@ relations: a relation property can overwrite a connection property if it has the same name. -## Description +## Remarks + +Note that Flowman will rely on schema inference in some important situations, like [mocking](mock.md) and generally +for describing the schema of a relation. This might create unwanted connections to the physical data source, +particular in case of self-contained tests. To prevent Flowman from creating a connection to the physical data +source, you simply need to explicitly specify a schema, which will then be used instead of the physical schema +in all situations where only schema information is required. diff --git a/docs/spec/relation/mock.md b/docs/spec/relation/mock.md index b7bbb1529..7c1966c75 100644 --- a/docs/spec/relation/mock.md +++ b/docs/spec/relation/mock.md @@ -13,6 +13,33 @@ relations: relation: real_relation ``` +```yaml +relations: + some_relation: + kind: mock + relation: some_relation + records: + - [1,2,"some_string",""] + - [2,null,"cat","black"] +``` + +```yaml +relations: + data_raw: + kind: mock + records: + - Campaign ID: DIR_36919 + LineItemID ID: DIR_260390 + SiteID ID: 23374 + CreativeID ID: 292668 + PlacementID ID: 108460 + - Campaign ID: DIR_36919 + LineItemID ID: DIR_260390 + SiteID ID: 23374 + CreativeID ID: 292668 + PlacementID ID: 108460 +``` + ## Fields * `kind` **(mandatory)** *(string)*: `null` or `empty` diff --git a/flowman-common/.gitignore b/flowman-common/.gitignore new file mode 100644 index 000000000..b83d22266 --- /dev/null +++ b/flowman-common/.gitignore @@ -0,0 +1 @@ +/target/ diff --git a/flowman-common/pom.xml b/flowman-common/pom.xml new file mode 100644 index 000000000..2426db779 --- /dev/null +++ b/flowman-common/pom.xml @@ -0,0 +1,68 @@ + + + 4.0.0 + flowman-common + Flowman common library + + + com.dimajix.flowman + flowman-root + 0.17.0 + ../pom.xml + + + + + + src/main/resources + true + + + + + + net.alchim31.maven + scala-maven-plugin + + + org.scalatest + scalatest-maven-plugin + + + + + + + com.dimajix.flowman + flowman-scalatest-compat + + + + com.google.guava + guava + + + + log4j + log4j + + + + org.slf4j + slf4j-api + + + + org.scalatest + scalatest_${scala.api_version} + + + + org.scalamock + scalamock_${scala.api_version} + + + + diff --git a/flowman-tools/src/main/resources/com/dimajix/flowman/log4j-defaults.properties b/flowman-common/src/main/resources/com/dimajix/flowman/log4j-defaults.properties similarity index 100% rename from flowman-tools/src/main/resources/com/dimajix/flowman/log4j-defaults.properties rename to flowman-common/src/main/resources/com/dimajix/flowman/log4j-defaults.properties diff --git a/flowman-core/src/main/scala/com/dimajix/common/IdentityHashMap.scala b/flowman-common/src/main/scala/com/dimajix/common/IdentityHashMap.scala similarity index 100% rename from flowman-core/src/main/scala/com/dimajix/common/IdentityHashMap.scala rename to flowman-common/src/main/scala/com/dimajix/common/IdentityHashMap.scala diff --git a/flowman-core/src/main/scala/com/dimajix/common/IdentityHashSet.scala b/flowman-common/src/main/scala/com/dimajix/common/IdentityHashSet.scala similarity index 100% rename from flowman-core/src/main/scala/com/dimajix/common/IdentityHashSet.scala rename to flowman-common/src/main/scala/com/dimajix/common/IdentityHashSet.scala diff --git a/flowman-core/src/main/scala/com/dimajix/common/MapIgnoreCase.scala b/flowman-common/src/main/scala/com/dimajix/common/MapIgnoreCase.scala similarity index 100% rename from flowman-core/src/main/scala/com/dimajix/common/MapIgnoreCase.scala rename to flowman-common/src/main/scala/com/dimajix/common/MapIgnoreCase.scala diff --git a/flowman-core/src/main/scala/com/dimajix/common/Resources.scala b/flowman-common/src/main/scala/com/dimajix/common/Resources.scala similarity index 100% rename from flowman-core/src/main/scala/com/dimajix/common/Resources.scala rename to flowman-common/src/main/scala/com/dimajix/common/Resources.scala diff --git a/flowman-core/src/main/scala/com/dimajix/common/SetIgnoreCase.scala b/flowman-common/src/main/scala/com/dimajix/common/SetIgnoreCase.scala similarity index 100% rename from flowman-core/src/main/scala/com/dimajix/common/SetIgnoreCase.scala rename to flowman-common/src/main/scala/com/dimajix/common/SetIgnoreCase.scala diff --git a/flowman-common/src/main/scala/com/dimajix/common/SynchronizedMap.scala b/flowman-common/src/main/scala/com/dimajix/common/SynchronizedMap.scala new file mode 100644 index 000000000..b63510137 --- /dev/null +++ b/flowman-common/src/main/scala/com/dimajix/common/SynchronizedMap.scala @@ -0,0 +1,186 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.common + +import scala.collection.Iterable +import scala.collection.mutable + + +object SynchronizedMap { + def apply[K,V]() : SynchronizedMap[K,V] = SynchronizedMap(mutable.Map[K,V]()) +} + +/** + * This is a thin wrapper around a [[Map]] which provides synchronized Access. + * @param impl + * @tparam A + */ +case class SynchronizedMap[K,V](impl:mutable.Map[K,V]) { + + /** Tests whether this map contains a binding for a key. + * + * @param key the key + * @return `true` if there is a binding for `key` in this map, `false` otherwise. + */ + def contains(key: K): Boolean = { + synchronized { + impl.contains(key) + } + } + + /** Optionally returns the value associated with a key. + * + * @param key the key value + * @return an option value containing the value associated with `key` in this map, + * or `None` if none exists. + */ + def get(key: K): Option[V] = { + synchronized { + impl.get(key) + } + } + + /** Returns the value associated with a key, or a default value if the key is not contained in the map. + * @param key the key. + * @param default a computation that yields a default value in case no binding for `key` is + * found in the map. + * @tparam V1 the result type of the default computation. + * @return the value associated with `key` if it exists, + * otherwise the result of the `default` computation. + * + * @usecase def getOrElse(key: K, default: => V): V + * @inheritdoc + */ + def getOrElse[V1 >: V](key: K, default: => V1): V1 = { + synchronized { + impl.get(key) + } + match { + case Some(result) => result + case None => default + } + } + + /** Adds a new key/value pair to this map and optionally returns previously bound value. + * If the map already contains a + * mapping for the key, it will be overridden by the new value. + * + * @param key the key to update + * @param value the new value + * @return an option value containing the value associated with the key + * before the `put` operation was executed, or `None` if `key` + * was not defined in the map before. + */ + def put(key: K, value: V) : Unit = { + synchronized { + impl.put(key, value) + } + } + + /** Retrieves the value which is associated with the given key. This + * method invokes the `default` method of the map if there is no mapping + * from the given key to a value. Unless overridden, the `default` method throws a + * `NoSuchElementException`. + * + * @param key the key + * @return the value associated with the given key, or the result of the + * map's `default` method, if none exists. + */ + def apply(key: K) : V = { + synchronized { + impl(key) + } + } + + /** If given key is already in this map, returns associated value. + * + * Otherwise, computes value from given expression `op`, stores with key + * in map and returns that value. + * + * @param key the key to test + * @param op the computation yielding the value to associate with `key`, if + * `key` is previously unbound. + * @return the value associated with key (either previously or as a result + * of executing the method). + */ + def getOrElseUpdate(key: K, op: => V): V = { + synchronized( + impl.get(key) + ) + match { + case Some(result) => result + case None => + val result = op + synchronized(impl.getOrElseUpdate(key, result)) + } + } + + /** Converts this $coll to a sequence. + * + * ```Note```: assumes a fast `size` method. Subclasses should override if this is not true. + */ + def toSeq : collection.Seq[(K,V)] = { + synchronized { + impl.toSeq + } + } + + /** Creates a new iterator over all elements contained in this iterable object. + * + * @return the new iterator + */ + def iterator : Iterator[(K,V)] = { + toSeq.iterator + } + + /** Collects all values of this map in an iterable collection. + * + * @return the values of this map as an iterable. + */ + def values: Iterable[V] = { + synchronized { + Seq(impl.values.toSeq:_*) + } + } + + /** Applies a function `f` to all values produced by this iterator. + * + * @param f the function that is applied for its side-effect to every element. + * The result of function `f` is discarded. + * + * @tparam U the type parameter describing the result of function `f`. + * This result will always be ignored. Typically `U` is `Unit`, + * but this is not necessary. + * + * @note Reuse: $consumesIterator + * + * @usecase def foreach(f: A => Unit): Unit + * @inheritdoc + */ + def foreach[U](f: ((K,V)) => U) : Unit = { + iterator.foreach(f) + } + + /** Removes all bindings from the map. After this operation has completed, + * the map will be empty. + */ + def clear() : Unit = { + synchronized { + impl.clear() + } + } +} diff --git a/flowman-common/src/main/scala/com/dimajix/common/SynchronizedSet.scala b/flowman-common/src/main/scala/com/dimajix/common/SynchronizedSet.scala new file mode 100644 index 000000000..09d82d2c2 --- /dev/null +++ b/flowman-common/src/main/scala/com/dimajix/common/SynchronizedSet.scala @@ -0,0 +1,62 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.common + +import scala.collection.Iterator +import scala.collection.mutable + + +object SynchronizedSet { + def apply[A]() : SynchronizedSet[A] = SynchronizedSet(mutable.Set[A]()) +} + +/** + * This is a thin wrapper around a Set which provides synchronized Access + * @param impl + * @tparam A + */ +case class SynchronizedSet[A](impl:mutable.Set[A]) { + def add(elem: A): Boolean = { + impl.synchronized(impl.add(elem)) + } + + def remove(elem: A): Boolean = { + impl.synchronized(impl.remove(elem)) + } + + def contains(elem: A): Boolean = { + impl.synchronized(impl.contains(elem)) + } + + def find(p: A => Boolean): Option[A] = { + impl.synchronized(impl.find(p)) + } + + def toSeq : collection.Seq[A] = { + impl.synchronized(impl.toSeq) + } + + def iterator : Iterator[A] = toSeq.iterator + + def foreach[U](f: A => U) : Unit = { + toSeq.foreach(f) + } + + def clear() : Unit = { + impl.synchronized(impl.clear()) + } +} diff --git a/flowman-core/src/main/scala/com/dimajix/common/Trilean.scala b/flowman-common/src/main/scala/com/dimajix/common/Trilean.scala similarity index 100% rename from flowman-core/src/main/scala/com/dimajix/common/Trilean.scala rename to flowman-common/src/main/scala/com/dimajix/common/Trilean.scala diff --git a/flowman-core/src/main/scala/com/dimajix/common/TypeRegistry.scala b/flowman-common/src/main/scala/com/dimajix/common/TypeRegistry.scala similarity index 100% rename from flowman-core/src/main/scala/com/dimajix/common/TypeRegistry.scala rename to flowman-common/src/main/scala/com/dimajix/common/TypeRegistry.scala diff --git a/flowman-core/src/main/scala/com/dimajix/common/package.scala b/flowman-common/src/main/scala/com/dimajix/common/common.scala similarity index 96% rename from flowman-core/src/main/scala/com/dimajix/common/package.scala rename to flowman-common/src/main/scala/com/dimajix/common/common.scala index f876fc2ca..be8250129 100644 --- a/flowman-core/src/main/scala/com/dimajix/common/package.scala +++ b/flowman-common/src/main/scala/com/dimajix/common/common.scala @@ -1,5 +1,5 @@ /* - * Copyright 2018-2019 Kaya Kupferschmidt + * Copyright 2018-2021 Kaya Kupferschmidt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/flowman-common/src/main/scala/com/dimajix/common/net/SocketUtils.scala b/flowman-common/src/main/scala/com/dimajix/common/net/SocketUtils.scala new file mode 100644 index 000000000..1e372d8e3 --- /dev/null +++ b/flowman-common/src/main/scala/com/dimajix/common/net/SocketUtils.scala @@ -0,0 +1,27 @@ +package com.dimajix.common.net + +import java.net.InetAddress +import java.net.InetSocketAddress +import java.net.URL + + +object SocketUtils { + def toURL(protocol:String, address:InetSocketAddress, allowAny:Boolean=false) : URL = { + val localIpAddress = { + if(allowAny) + address.getAddress.getHostAddress + else + getLocalAddress(address) + } + val localPort = address.getPort + new URL(protocol, localIpAddress, localPort, "") + } + def getLocalAddress(address:InetSocketAddress) : String = { + if (address.getAddress.isAnyLocalAddress) { + InetAddress.getLocalHost.getHostAddress + } + else { + address.getAddress.getHostAddress + } + } +} diff --git a/flowman-core/src/main/scala/com/dimajix/common/text/CaseUtils.scala b/flowman-common/src/main/scala/com/dimajix/common/text/CaseUtils.scala similarity index 100% rename from flowman-core/src/main/scala/com/dimajix/common/text/CaseUtils.scala rename to flowman-common/src/main/scala/com/dimajix/common/text/CaseUtils.scala diff --git a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/Logging.scala b/flowman-common/src/main/scala/com/dimajix/flowman/common/Logging.scala similarity index 63% rename from flowman-tools/src/main/scala/com/dimajix/flowman/tools/Logging.scala rename to flowman-common/src/main/scala/com/dimajix/flowman/common/Logging.scala index 4e53a055b..5270cf200 100644 --- a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/Logging.scala +++ b/flowman-common/src/main/scala/com/dimajix/flowman/common/Logging.scala @@ -1,4 +1,20 @@ -package com.dimajix.flowman.tools +/* + * Copyright 2020 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.common import java.util.Locale diff --git a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/ParserUtils.scala b/flowman-common/src/main/scala/com/dimajix/flowman/common/ParserUtils.scala similarity index 97% rename from flowman-tools/src/main/scala/com/dimajix/flowman/tools/ParserUtils.scala rename to flowman-common/src/main/scala/com/dimajix/flowman/common/ParserUtils.scala index 8947a9393..95e1ba1bb 100644 --- a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/ParserUtils.scala +++ b/flowman-common/src/main/scala/com/dimajix/flowman/common/ParserUtils.scala @@ -14,8 +14,7 @@ * limitations under the License. */ -package com.dimajix.flowman.tools - +package com.dimajix.flowman.common object ParserUtils { def parseDelimitedList(list:String) : Seq[String] = { diff --git a/flowman-common/src/main/scala/com/dimajix/flowman/common/ThreadUtils.scala b/flowman-common/src/main/scala/com/dimajix/flowman/common/ThreadUtils.scala new file mode 100644 index 000000000..2aeb30ec6 --- /dev/null +++ b/flowman-common/src/main/scala/com/dimajix/flowman/common/ThreadUtils.scala @@ -0,0 +1,89 @@ +/* + * Copyright 2018-2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.common + +import java.lang.Thread.UncaughtExceptionHandler +import java.util.concurrent.ForkJoinPool +import java.util.concurrent.ForkJoinWorkerThread + +import scala.concurrent.Await +import scala.concurrent.ExecutionContext +import scala.concurrent.Future +import scala.concurrent.duration.Duration + +import org.slf4j.LoggerFactory + + +class ThreadUtils +object ThreadUtils { + private val logger = LoggerFactory.getLogger(classOf[ThreadUtils]) + + private class MyForkJoinWorkerThread(prefix:String, pool:ForkJoinPool) extends ForkJoinWorkerThread(pool) { // set the correct classloader here + setContextClassLoader(Thread.currentThread.getContextClassLoader) + setName(prefix + "-" + super.getName) + } + private val exceptionHandler = new UncaughtExceptionHandler { + override def uncaughtException(thread: Thread, throwable: Throwable): Unit = { + logger.error("Uncaught exception: ", throwable) + } + } + + /** + * Creates a new ForkJoinPool + * @param maxThreadNumber + * @return + */ + def newThreadPool(prefix:String, maxThreadNumber:Int): ForkJoinPool = { + val factory = new ForkJoinPool.ForkJoinWorkerThreadFactory { + override final def newThread(pool: ForkJoinPool) = { + new MyForkJoinWorkerThread(prefix, pool) + } + } + new ForkJoinPool( + maxThreadNumber, + factory, + exceptionHandler, + true + ) + } + + /** + * Transforms input collection by applying the given function to each element in parallel fashion. + * + * @param in - the input collection which should be transformed in parallel. + * @param prefix - the prefix assigned to the underlying thread pool. + * @param maxThreads - maximum number of thread can be created during execution. + * @param f - the lambda function will be applied to each element of `in`. + * @tparam I - the type of elements in the input collection. + * @tparam O - the type of elements in resulted collection. + * @return new collection in which each element was given from the input collection `in` by + * applying the lambda function `f`. + */ + def parmap[I, O](in: Seq[I], prefix: String, maxThreads: Int)(f: I => O): Seq[O] = { + val pool = newThreadPool(prefix, maxThreads) + try { + implicit val ec = ExecutionContext.fromExecutor(pool) + + val futures = in.map(x => Future(f(x))) + val futureSeq = Future.sequence(futures) + + Await.result(futureSeq, Duration.Inf) + } finally { + pool.shutdownNow() + } + } +} diff --git a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/ToolConfig.scala b/flowman-common/src/main/scala/com/dimajix/flowman/common/ToolConfig.scala similarity index 97% rename from flowman-tools/src/main/scala/com/dimajix/flowman/tools/ToolConfig.scala rename to flowman-common/src/main/scala/com/dimajix/flowman/common/ToolConfig.scala index 112b7a104..111275b59 100644 --- a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/ToolConfig.scala +++ b/flowman-common/src/main/scala/com/dimajix/flowman/common/ToolConfig.scala @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.dimajix.flowman.tools +package com.dimajix.flowman.common import java.io.File diff --git a/flowman-core/src/test/scala/com/dimajix/common/IdentityHashMapTest.scala b/flowman-common/src/test/scala/com/dimajix/common/IdentityHashMapTest.scala similarity index 100% rename from flowman-core/src/test/scala/com/dimajix/common/IdentityHashMapTest.scala rename to flowman-common/src/test/scala/com/dimajix/common/IdentityHashMapTest.scala diff --git a/flowman-core/src/test/scala/com/dimajix/common/IdentityHashSetTest.scala b/flowman-common/src/test/scala/com/dimajix/common/IdentityHashSetTest.scala similarity index 100% rename from flowman-core/src/test/scala/com/dimajix/common/IdentityHashSetTest.scala rename to flowman-common/src/test/scala/com/dimajix/common/IdentityHashSetTest.scala diff --git a/flowman-core/src/test/scala/com/dimajix/common/MapIgnoreCaseTest.scala b/flowman-common/src/test/scala/com/dimajix/common/MapIgnoreCaseTest.scala similarity index 99% rename from flowman-core/src/test/scala/com/dimajix/common/MapIgnoreCaseTest.scala rename to flowman-common/src/test/scala/com/dimajix/common/MapIgnoreCaseTest.scala index 2980378cc..982f2a94b 100644 --- a/flowman-core/src/test/scala/com/dimajix/common/MapIgnoreCaseTest.scala +++ b/flowman-common/src/test/scala/com/dimajix/common/MapIgnoreCaseTest.scala @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package com.dimajix.common import org.scalatest.flatspec.AnyFlatSpec diff --git a/flowman-core/src/test/scala/com/dimajix/common/TrileanTest.scala b/flowman-common/src/test/scala/com/dimajix/common/TrileanTest.scala similarity index 100% rename from flowman-core/src/test/scala/com/dimajix/common/TrileanTest.scala rename to flowman-common/src/test/scala/com/dimajix/common/TrileanTest.scala diff --git a/flowman-core/src/test/scala/com/dimajix/common/text/CaseUtilsTest.scala b/flowman-common/src/test/scala/com/dimajix/common/text/CaseUtilsTest.scala similarity index 100% rename from flowman-core/src/test/scala/com/dimajix/common/text/CaseUtilsTest.scala rename to flowman-common/src/test/scala/com/dimajix/common/text/CaseUtilsTest.scala diff --git a/flowman-core/pom.xml b/flowman-core/pom.xml index 9787e8bda..665d4acc8 100644 --- a/flowman-core/pom.xml +++ b/flowman-core/pom.xml @@ -9,52 +9,10 @@ com.dimajix.flowman flowman-root - 0.16.0 - .. + 0.17.0 + ../pom.xml - - - - default - - - true - - - - org.apache.spark - spark-avro_${scala.api_version} - ${spark.version} - - - - - - - CDH-5.15 - - - com.databricks - spark-avro_${scala.api_version} - ${spark-avro.version} - - - - - - - spark-2.3 - - - com.databricks - spark-avro_${scala.api_version} - ${spark-avro.version} - - - - - @@ -122,6 +80,11 @@ flowman-spark-testing + + com.dimajix.flowman + flowman-common + + org.apache.hadoop hadoop-client @@ -142,6 +105,11 @@ spark-hive_${scala.api_version} + + org.apache.spark + spark-avro_${scala.api_version} + + com.fasterxml.jackson.core jackson-core diff --git a/flowman-core/src/main/resources/com/dimajix/flowman/flowman.properties b/flowman-core/src/main/resources/com/dimajix/flowman/flowman.properties index defbd4820..0f73285cc 100644 --- a/flowman-core/src/main/resources/com/dimajix/flowman/flowman.properties +++ b/flowman-core/src/main/resources/com/dimajix/flowman/flowman.properties @@ -1 +1,3 @@ version=${project.version} +spark_version=${spark.version} +hadoop_version=${hadoop.version} diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/catalog/Catalog.scala b/flowman-core/src/main/scala/com/dimajix/flowman/catalog/Catalog.scala index f249aa5e9..05aab3cd9 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/catalog/Catalog.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/catalog/Catalog.scala @@ -18,7 +18,7 @@ package com.dimajix.flowman.catalog import java.io.FileNotFoundException -import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.sql.SparkSession import org.apache.spark.sql.SparkShim import org.apache.spark.sql.catalyst.TableIdentifier @@ -43,7 +43,6 @@ import org.apache.spark.sql.execution.command.DropDatabaseCommand import org.apache.spark.sql.execution.command.DropTableCommand import org.apache.spark.sql.types.StructField import org.slf4j.LoggerFactory - import com.dimajix.flowman.config.Configuration import com.dimajix.flowman.model.PartitionField import com.dimajix.flowman.model.PartitionSchema @@ -538,11 +537,14 @@ class Catalog(val spark:SparkSession, val config:Configuration, val externalCata val status = fs.getFileStatus(location) if (status.isDirectory()) { logger.info(s"Deleting all files in directory '$location'") - fs.listStatus(location).foreach(f => fs.delete(f.getPath, true)) + java.lang.System.gc() // Release open file handles on Windows + fs.listStatus(location).foreach { f => + doDelete(fs, f.getPath, true) + } } else if (status.isFile()) { logger.info(s"Deleting single file '$location'") - fs.delete(location, false) + doDelete(fs, location, false) } } catch { case _:FileNotFoundException => @@ -559,7 +561,14 @@ class Catalog(val spark:SparkSession, val config:Configuration, val externalCata val fs = location.getFileSystem(hadoopConf) if (fs.exists(location)) { logger.info(s"Deleting file or directory '$location'") - fs.delete(location, true) + doDelete(fs, location, true) + } + } + + private def doDelete(fs:FileSystem, location:Path, recursive:Boolean) : Unit = { + java.lang.System.gc() // Release open file handles on Windows + if (!fs.delete(location, recursive)) { + logger.warn(s"Cannot delete file or directory '$location'") } } diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/config/FlowmanConf.scala b/flowman-core/src/main/scala/com/dimajix/flowman/config/FlowmanConf.scala index c4b4fb873..bfa3b50b0 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/config/FlowmanConf.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/config/FlowmanConf.scala @@ -70,9 +70,13 @@ object FlowmanConf { .booleanConf .createWithDefault(false) val EXECUTION_EXECUTOR_CLASS = buildConf("flowman.execution.executor.class") - .doc("Class name for executing targets") + .doc("Class name for executor used to run targets") .classConf(classOf[Executor]) .createWithDefault(classOf[SimpleExecutor]) + val EXECUTION_EXECUTOR_PARALLELISM = buildConf("flowman.execution.executor.parallelism") + .doc("Number of parallel targets to execute") + .intConf + .createWithDefault(4) val EXECUTION_SCHEDULER_CLASS = buildConf("flowman.execution.scheduler.class") .doc("Class name for scheduling targets") .classConf(classOf[Scheduler]) diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/execution/CachingExecution.scala b/flowman-core/src/main/scala/com/dimajix/flowman/execution/CachingExecution.scala index b4c1b61a5..8a964a32d 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/execution/CachingExecution.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/execution/CachingExecution.scala @@ -16,7 +16,13 @@ package com.dimajix.flowman.execution +import scala.collection.concurrent.TrieMap import scala.collection.mutable +import scala.concurrent.Await +import scala.concurrent.Future +import scala.concurrent.Promise +import scala.concurrent.duration.Duration +import scala.util.Try import scala.util.control.NonFatal import org.apache.spark.sql.DataFrame @@ -25,7 +31,7 @@ import org.apache.spark.storage.StorageLevel import org.slf4j.Logger import com.dimajix.common.IdentityHashMap -import com.dimajix.flowman.config.FlowmanConf +import com.dimajix.common.SynchronizedMap import com.dimajix.flowman.model.Mapping import com.dimajix.flowman.model.MappingOutputIdentifier import com.dimajix.flowman.types.StructType @@ -34,21 +40,30 @@ import com.dimajix.flowman.types.StructType abstract class CachingExecution(parent:Option[Execution], isolated:Boolean) extends Execution { protected val logger:Logger - private val frameCache:IdentityHashMap[Mapping,Map[String,DataFrame]] = { + private val frameCache:SynchronizedMap[Mapping,Map[String,DataFrame]] = { parent match { case Some(ce:CachingExecution) if !isolated => ce.frameCache case _ => - IdentityHashMap[Mapping,Map[String,DataFrame]]() + SynchronizedMap(IdentityHashMap[Mapping,Map[String,DataFrame]]()) } } - private val schemaCache:IdentityHashMap[Mapping, mutable.Map[String,StructType]] = { + private val frameCacheFutures:SynchronizedMap[Mapping,Future[Map[String,DataFrame]]] = { + parent match { + case Some(ce:CachingExecution) if !isolated => + ce.frameCacheFutures + case _ => + SynchronizedMap(IdentityHashMap[Mapping,Future[Map[String,DataFrame]]]()) + } + } + + private val schemaCache:SynchronizedMap[Mapping,TrieMap[String,StructType]] = { parent match { case Some(ce:CachingExecution) if !isolated => ce.schemaCache case _ => - IdentityHashMap[Mapping, mutable.Map[String,StructType]]() + SynchronizedMap(IdentityHashMap[Mapping,TrieMap[String,StructType]]()) } } @@ -60,7 +75,25 @@ abstract class CachingExecution(parent:Option[Execution], isolated:Boolean) exte override def instantiate(mapping:Mapping) : Map[String,DataFrame] = { require(mapping != null) - frameCache.getOrElseUpdate(mapping, createTables(mapping)) + // We do not simply call getOrElseUpdate, since the creation of the DataFrame might be slow and + // concurrent trials + def createOrWait() : Map[String,DataFrame] = { + val p = Promise[Map[String,DataFrame]]() + val f = frameCacheFutures.getOrElseUpdate(mapping, p.future) + // Check if the returned future is the one we passed in. If that is the case, the current thread + // is responsible for fullfilling the promise + if (f eq p.future) { + val tables = Try(createTables(mapping)) + p.complete(tables) + tables.get + } + else { + // Other threads simply wait for the promise to be fullfilled. + Await.result(f, Duration.Inf) + } + } + + frameCache.getOrElseUpdate(mapping, createOrWait()) } /** @@ -71,7 +104,7 @@ abstract class CachingExecution(parent:Option[Execution], isolated:Boolean) exte * @return */ override def describe(mapping:Mapping, output:String) : StructType = { - schemaCache.getOrElseUpdate(mapping, mutable.Map()) + schemaCache.getOrElseUpdate(mapping, TrieMap()) .getOrElseUpdate(output, { if (!mapping.outputs.contains(output)) throw new NoSuchMappingOutputException(mapping.identifier, output) diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/execution/Execution.scala b/flowman-core/src/main/scala/com/dimajix/flowman/execution/Execution.scala index cf3196a60..277e0003c 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/execution/Execution.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/execution/Execution.scala @@ -107,6 +107,11 @@ abstract class Execution { instances(output) } + /** + * Executes an assertion from a TestSuite. This method ensures that all inputs are instantiated correctly + * @param assertion + * @return + */ def assert(assertion:Assertion) : Seq[AssertionResult] = { val context = assertion.context val inputs = assertion.inputs @@ -124,9 +129,9 @@ abstract class Execution { * @return */ def describe(mapping:Mapping, output:String) : StructType + /** - * Returns the schema for a specific output created by a specific mapping. Note that not all mappings support - * schema analysis beforehand. In such cases, None will be returned. + * Returns the schema for a specific output created by a specific mapping. * @param mapping * @return */ diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/execution/JobListener.scala b/flowman-core/src/main/scala/com/dimajix/flowman/execution/JobListener.scala deleted file mode 100644 index cafb208f0..000000000 --- a/flowman-core/src/main/scala/com/dimajix/flowman/execution/JobListener.scala +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2018-2020 Kaya Kupferschmidt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.dimajix.flowman.execution - -import com.dimajix.flowman.model.JobInstance -import com.dimajix.flowman.model.TargetInstance - - -abstract class JobToken -abstract class TargetToken - - -trait JobListener { - /** - * Starts the run and returns a token, which can be anything - * @param job - * @return - */ - def startJob(job:JobInstance, phase:Phase) : JobToken - - /** - * Sets the status of a job after it has been started - * @param token The token returned by startJob - * @param status - */ - def finishJob(token:JobToken, status:Status) : Unit - - /** - * Starts the run and returns a token, which can be anything - * @param target - * @return - */ - def startTarget(target:TargetInstance, phase:Phase, parent:Option[JobToken]) : TargetToken - - /** - * Sets the status of a job after it has been started - * @param token The token returned by startJob - * @param status - */ - def finishTarget(token:TargetToken, status:Status) : Unit - -} diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/execution/ParallelExecutor.scala b/flowman-core/src/main/scala/com/dimajix/flowman/execution/ParallelExecutor.scala new file mode 100644 index 000000000..cf1a207c3 --- /dev/null +++ b/flowman-core/src/main/scala/com/dimajix/flowman/execution/ParallelExecutor.scala @@ -0,0 +1,154 @@ +/* + * Copyright 2018-2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.execution + +import java.util.concurrent.TimeUnit + +import scala.annotation.tailrec +import scala.collection.mutable +import scala.concurrent.Await +import scala.concurrent.ExecutionContext +import scala.concurrent.Future +import scala.concurrent.duration.Duration +import scala.util.Failure +import scala.util.Success + +import com.dimajix.flowman.common.ThreadUtils +import com.dimajix.flowman.config.FlowmanConf +import com.dimajix.flowman.model.Target + + +class ParallelExecutor extends Executor { + /** + * Executes a list of targets in an appropriate order. + * + * @param execution + * @param context + * @param phase - Phase to execute + * @param targets - List of all targets, even those which should not be executed + * @param filter - Filter predicate to find all targets to be execution + * @param keepGoing - True if errors in one target should not stop other targets from being executed + * @param fn - Function to call. Note that the function is expected not to throw a non-fatal exception. + * @return + */ + def execute(execution: Execution, context:Context, phase: Phase, targets: Seq[Target], filter:Target => Boolean, keepGoing: Boolean)(fn:(Execution,Target,Phase) => Status) : Status = { + val clazz = execution.flowmanConf.getConf(FlowmanConf.EXECUTION_SCHEDULER_CLASS) + val ctor = clazz.getDeclaredConstructor() + val scheduler = ctor.newInstance() + + scheduler.initialize(targets, phase, filter) + + val parallelism = execution.flowmanConf.getConf(FlowmanConf.EXECUTION_EXECUTOR_PARALLELISM) + val threadPool = ThreadUtils.newThreadPool("ParallelExecutor", parallelism) + implicit val ec:ExecutionContext = ExecutionContext.fromExecutorService(threadPool) + + // Allocate state variables for tracking overall Status + val statusLock = new Object + var error = false + var skipped = true + var empty = true + + def executeTarget(target:Target) : Future[Status] = { + Future { + fn(execution, target, phase) + }.andThen { case status => + // Inform scheduler that Target is built + scheduler.synchronized { + scheduler.complete(target) + } + + // Evaluate status + statusLock.synchronized { + status match { + case Success(status) => + empty = false + error |= (status != Status.SUCCESS && status != Status.SKIPPED) + skipped &= (status == Status.SKIPPED) + status + case Failure(_) => + empty = false + error = true + Status.FAILED + } + } + } + } + + def scheduleTargets(): Seq[Future[Status]] = { + val tasks = mutable.ListBuffer[Future[Status]]() + var noMoreWork = false + while (!noMoreWork) { + scheduler.synchronized(scheduler.next()) match { + case Some(target) => + val task = executeTarget(target) + tasks.append(task) + case None => + noMoreWork = true + } + } + tasks + } + + + @tailrec + def wait(tasks:Seq[Future[Status]]) : Unit = { + val runningTasks = tasks.filter(!_.isCompleted) + if (runningTasks.nonEmpty) { + val next = Future.firstCompletedOf(runningTasks) + Await.ready(next, Duration.Inf) + wait(tasks.filter(!_.isCompleted)) + } + } + + @tailrec + def run(tasks:Seq[Future[Status]] = Seq()) : Unit = { + // First wait for tasks + val (finishedTasks,runningTasks) = tasks.partition(_.isCompleted) + if (finishedTasks.isEmpty && runningTasks.nonEmpty) { + val next = Future.firstCompletedOf(runningTasks) + Await.ready(next, Duration.Inf) + } + + // Schedule new Tasks + val newTasks = scheduleTargets() + val allTasks = runningTasks ++ newTasks + if(scheduler.synchronized(scheduler.hasNext()) && (!error || keepGoing)) { + run(allTasks) + } + else { + wait(allTasks) + } + } + + // Now schedule and execute all targets + run() + + // Tidy up! + threadPool.shutdown() + threadPool.awaitTermination(3600, TimeUnit.SECONDS) + + // Evaluate overall status + if (empty) + Status.SUCCESS + else if (error) + Status.FAILED + else if (skipped) + Status.SKIPPED + else + Status.SUCCESS + } +} diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/execution/ProjectContext.scala b/flowman-core/src/main/scala/com/dimajix/flowman/execution/ProjectContext.scala index dd8a1e09d..6aab1cbd5 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/execution/ProjectContext.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/execution/ProjectContext.scala @@ -16,11 +16,10 @@ package com.dimajix.flowman.execution -import scala.collection.mutable +import scala.collection.concurrent.TrieMap import org.slf4j.LoggerFactory -import com.dimajix.flowman.hadoop.File import com.dimajix.flowman.model.Connection import com.dimajix.flowman.model.ConnectionIdentifier import com.dimajix.flowman.model.Identifier @@ -39,7 +38,6 @@ import com.dimajix.flowman.model.TargetIdentifier import com.dimajix.flowman.model.Template import com.dimajix.flowman.model.Test import com.dimajix.flowman.model.TestIdentifier -import com.dimajix.flowman.templating.FileWrapper object ProjectContext { @@ -103,14 +101,14 @@ final class ProjectContext private[execution]( _env + ("project" -> ((ProjectWrapper(_project), SettingLevel.SCOPE_OVERRIDE.level))), _config) { - private val mappings = mutable.Map[String,Mapping]() - private val overrideMappings = mutable.Map[String,Mapping]() - private val relations = mutable.Map[String,Relation]() - private val overrideRelations = mutable.Map[String,Relation]() - private val targets = mutable.Map[String,Target]() - private val connections = mutable.Map[String,Connection]() - private val jobs = mutable.Map[String,Job]() - private val tests = mutable.Map[String,Test]() + private val mappings = TrieMap[String,Mapping]() + private val overrideMappings = TrieMap[String,Mapping]() + private val relations = TrieMap[String,Relation]() + private val overrideRelations = TrieMap[String,Relation]() + private val targets = TrieMap[String,Target]() + private val connections = TrieMap[String,Connection]() + private val jobs = TrieMap[String,Job]() + private val tests = TrieMap[String,Test]() /** * Returns the namespace associated with this context. Can be null @@ -246,11 +244,7 @@ final class ProjectContext private[execution]( connections.getOrElse(identifier.name, extraConnections.get(identifier.name) .orElse(_project.connections.get(identifier.name)) - .map { t => - val instance = t.instantiate(this) - connections.update(identifier.name, instance) - instance - } + .map(t => connections.getOrElseUpdate(identifier.name, t.instantiate(this))) .getOrElse(parent.getConnection(identifier)) ) } @@ -308,15 +302,13 @@ final class ProjectContext private[execution]( } } - private def findOrInstantiate[T](identifier:Identifier[T], templates:Map[String,Template[T]], cache:mutable.Map[String,T]) = { + private def findOrInstantiate[T](identifier:Identifier[T], templates:Map[String,Template[T]], cache:TrieMap[String,T]) = { val name = identifier.name cache.get(name) .orElse { - val m = templates + templates .get(name) - .map(_.instantiate(this)) - m.foreach(m => cache.update(name, m)) - m + .map(m => cache.getOrElseUpdate(name, m.instantiate(this))) } } diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/execution/Runner.scala b/flowman-core/src/main/scala/com/dimajix/flowman/execution/Runner.scala index 9e74f3100..0a1c69dd3 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/execution/Runner.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/execution/Runner.scala @@ -32,7 +32,9 @@ import org.slf4j.LoggerFactory import com.dimajix.common.No import com.dimajix.flowman.config.FlowmanConf import com.dimajix.flowman.execution.JobRunnerImpl.RunnerJobToken +import com.dimajix.flowman.history import com.dimajix.flowman.history.StateStore +import com.dimajix.flowman.history.StateStoreAdaptorListener import com.dimajix.flowman.history.TargetState import com.dimajix.flowman.metric.MetricBoard import com.dimajix.flowman.metric.MetricSystem @@ -49,6 +51,7 @@ import com.dimajix.flowman.model.Template import com.dimajix.flowman.model.Test import com.dimajix.flowman.util.ConsoleColors._ import com.dimajix.flowman.util.withShutdownHook +import com.dimajix.spark.SparkUtils.withJobGroup import com.dimajix.spark.sql.DataFrameUtils @@ -123,11 +126,12 @@ private[execution] sealed class RunnerImpl { * Private implementation of Job specific methods */ private[execution] object JobRunnerImpl { - private final case class RunnerJobToken(tokens:Seq[(JobListener, JobToken)]) extends JobToken - private final case class RunnerTargetToken(tokens:Seq[(JobListener, TargetToken)]) extends TargetToken + private final case class RunnerJobToken(tokens:Seq[(RunnerListener, JobToken)]) extends JobToken + private final case class RunnerTargetToken(tokens:Seq[(RunnerListener, TargetToken)]) extends TargetToken } private[execution] final class JobRunnerImpl(runner:Runner) extends RunnerImpl { private val stateStore = runner.stateStore + private val stateStoreListener = new StateStoreAdaptorListener(stateStore) private val parentExecution = runner.parentExecution /** @@ -185,28 +189,29 @@ private[execution] final class JobRunnerImpl(runner:Runner) extends RunnerImpl { logEnvironment(context) val instance = job.instance(arguments.map { case (k, v) => k -> v.toString }) - val allHooks = (runner.hooks ++ job.hooks).map(_.instantiate(context)) + val allHooks = if (!dryRun) stateStoreListener +: (runner.hooks ++ job.hooks).map(_.instantiate(context)) else Seq() val allMetrics = job.metrics.map(_.instantiate(context)) - withMetrics(execution.metrics, allMetrics) { - val startTime = Instant.now() - val status = recordJob(instance, phase, allHooks, dryRun) { token => - try { - withWallTime(execution.metrics, job.metadata, phase) { + val startTime = Instant.now() + val status = withListeners(job, instance, phase, allHooks) { token => + withMetrics(execution.metrics, allMetrics) { + withWallTime(execution.metrics, job.metadata, phase) { + try { executeJobTargets(execution, context, job, phase, targets, token, force, keepGoing, dryRun) } - } - catch { - case NonFatal(ex) => - logger.error(s"Caught exception during $title:", ex) - Status.FAILED + catch { + case NonFatal(ex) => + logger.error(s"Caught exception during $phase $title:", ex) + Status.FAILED + } } } - val endTime = Instant.now() - val duration = Duration.between(startTime, endTime) - logStatus(title, status, duration, endTime) - status } + + val endTime = Instant.now() + val duration = Duration.between(startTime, endTime) + logStatus(title, status, duration, endTime) + status } } @@ -235,12 +240,12 @@ private[execution] final class JobRunnerImpl(runner:Runner) extends RunnerImpl { val forceDirty = force || execution.flowmanConf.getConf(FlowmanConf.EXECUTION_TARGET_FORCE_DIRTY) val canSkip = !force && checkTarget(instance, phase) - recordTarget(instance, phase, jobToken, dryRun) { + withListeners(target, instance, phase, jobToken) { logSubtitle(s"$phase target '${target.identifier}'") // First checkJob if execution is really required if (canSkip) { - logger.info(cyan("Target '${target.identifier}' up to date for phase '$phase' according to state store, skipping execution")) + logger.info(cyan(s"Target '${target.identifier}' up to date for phase '$phase' according to state store, skipping execution")) logger.info("") Status.SKIPPED } @@ -283,91 +288,83 @@ private[execution] final class JobRunnerImpl(runner:Runner) extends RunnerImpl { target.phases.contains(phase) && targets.exists(_.unapplySeq(target.name).nonEmpty) executor.execute(execution, context, phase, jobTargets, targetFilter, keepGoing) { (execution, target, phase) => - executeTargetPhase(execution, target, phase, token, force, dryRun) + val sc = execution.spark.sparkContext + withJobGroup(sc, target.name, "Flowman target " + target.identifier.toString) { + executeTargetPhase(execution, target, phase, token, force, dryRun) + } } } /** - * Monitors the job execution by invoking all hooks and the state store + * Monitors the job execution by invoking all listeners * @param job * @param phase - * @param hooks + * @param listeners * @param fn * @return */ - private def recordJob(job:JobInstance, phase:Phase, hooks:Seq[Hook], dryRun:Boolean)(fn: RunnerJobToken => Status) : Status = { - def startJob() : Seq[(JobListener, JobToken)] = { - Seq((stateStore, stateStore.startJob(job, phase))) ++ - hooks.flatMap { hook => - try { - Some((hook, hook.startJob(job, phase))) - } catch { - case NonFatal(ex) => - logger.warn("Execution listener threw exception on startJob.", ex) - None - } + private def withListeners(job:Job, instance:JobInstance, phase:Phase, listeners:Seq[RunnerListener])(fn: RunnerJobToken => Status) : Status = { + def startJob() : Seq[(RunnerListener, JobToken)] = { + listeners.flatMap { hook => + try { + Some((hook, hook.startJob(job, instance, phase))) + } catch { + case NonFatal(ex) => + logger.warn(s"Execution listener threw exception on startJob: ${ex.toString}.") + None } + } } - def finishJob(tokens:Seq[(JobListener, JobToken)], status:Status) : Unit = { + def finishJob(tokens:Seq[(RunnerListener, JobToken)], status:Status) : Unit = { tokens.foreach { case (listener, token) => try { listener.finishJob(token, status) } catch { case NonFatal(ex) => - logger.warn("Execution listener threw exception on finishJob.", ex) + logger.warn(s"Execution listener threw exception on finishJob: ${ex.toString}.") } } } - if (dryRun) { - fn(RunnerJobToken(Seq())) - } - else { - val tokens = startJob() - withShutdownHook(finishJob(tokens, Status.FAILED)) { - val status = fn(RunnerJobToken(tokens)) - finishJob(tokens, status) - status - } + val tokens = startJob() + withShutdownHook(finishJob(tokens, Status.FAILED)) { + val status = fn(RunnerJobToken(tokens)) + finishJob(tokens, status) + status } } - private def recordTarget(target:TargetInstance, phase:Phase, job:RunnerJobToken, dryRun:Boolean)(fn: => Status) : Status = { - def startTarget() : Seq[(JobListener, TargetToken)] = { + private def withListeners(target:Target, instance:TargetInstance, phase:Phase, job:RunnerJobToken)(fn: => Status) : Status = { + def startTarget() : Seq[(RunnerListener, TargetToken)] = { job.tokens.flatMap { case(listener,jobToken) => try { - Some((listener, listener.startTarget(target, phase, Some(jobToken)))) + Some((listener, listener.startTarget(target, instance, phase, Some(jobToken)))) } catch { case NonFatal(ex) => - logger.warn("Execution listener threw exception on startTarget.", ex) + logger.warn(s"Execution listener threw exception on startTarget: ${ex.toString}.") None } } } - def finishTarget(tokens:Seq[(JobListener, TargetToken)], status:Status) : Unit = { + def finishTarget(tokens:Seq[(RunnerListener, TargetToken)], status:Status) : Unit = { tokens.foreach { case(listener, token) => try { listener.finishTarget(token, status) } catch { case NonFatal(ex) => - logger.warn("Execution listener threw exception on finishTarget.", ex) + logger.warn(s"Execution listener threw exception on finishTarget: ${ex.toString}.") } } } - if (dryRun) { - fn - } - else { - val tokens = startTarget() - withShutdownHook(finishTarget(tokens, Status.FAILED)) { - val status = fn - finishTarget(tokens, status) - status - } + val tokens = startTarget() + withShutdownHook(finishTarget(tokens, Status.FAILED)) { + val status = fn + finishTarget(tokens, status) + status } } @@ -467,7 +464,10 @@ private[execution] final class TestRunnerImpl(runner:Runner) extends RunnerImpl // Now run tests if fixtures where successful val testStatus = if (buildStatus == Status.SUCCESS || keepGoing) { - executeTestAssertions(execution, context, test, keepGoing, dryRun) + val sc = execution.spark.sparkContext + withJobGroup(sc, test.name, "Flowman test " + test.identifier.toString) { + executeTestAssertions(execution, context, test, keepGoing, dryRun) + } } else { Status.SKIPPED @@ -582,7 +582,10 @@ private[execution] final class TestRunnerImpl(runner:Runner) extends RunnerImpl target.phases.contains(phase) executor.execute(execution, context, phase, targets, targetFilter, keepGoing) { (execution, target, phase) => - executeTestTargetPhase(execution, target, phase, dryRun) + val sc = execution.spark.sparkContext + withJobGroup(sc, target.name, "Flowman target " + target.identifier.toString) { + executeTestTargetPhase(execution, target, phase, dryRun) + } } } diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/execution/RunnerListener.scala b/flowman-core/src/main/scala/com/dimajix/flowman/execution/RunnerListener.scala new file mode 100644 index 000000000..675ea5c45 --- /dev/null +++ b/flowman-core/src/main/scala/com/dimajix/flowman/execution/RunnerListener.scala @@ -0,0 +1,103 @@ +/* + * Copyright 2018-2020 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.execution + +import com.dimajix.flowman.model.Assertion +import com.dimajix.flowman.model.Job +import com.dimajix.flowman.model.JobInstance +import com.dimajix.flowman.model.Target +import com.dimajix.flowman.model.TargetInstance +import com.dimajix.flowman.model.Test +import com.dimajix.flowman.model.TestInstance + + +abstract class Token +abstract class JobToken extends Token +abstract class TargetToken extends Token +abstract class TestToken extends Token +abstract class AssertionToken extends Token + + +trait RunnerListener { + /** + * Starts the run and returns a token, which can be anything + * @param job + * @return + */ + def startJob(job:Job, instance:JobInstance, phase:Phase) : JobToken + + /** + * Sets the status of a job after it has been started + * @param token The token returned by startJob + * @param status + */ + def finishJob(token:JobToken, status:Status) : Unit + + /** + * Starts the run and returns a token, which can be anything + * @param target + * @return + */ + def startTarget(target:Target, instance:TargetInstance, phase:Phase, parent:Option[Token]) : TargetToken + + /** + * Sets the status of a job after it has been started + * @param token The token returned by startJob + * @param status + */ + def finishTarget(token:TargetToken, status:Status) : Unit + + /** + * Starts the test and returns a token, which can be anything + * @param test + * @return + */ + def startTest(test:Test, instance:TestInstance) : TestToken + + /** + * Sets the status of a test after it has been started + * @param token The token returned by startJob + * @param status + */ + def finishTest(token:TestToken, status:Status) : Unit + + /** + * Starts the assertion and returns a token, which can be anything + * @param assertion + * @return + */ + def startAssertion(assertion:Assertion, parent:Option[Token]) : AssertionToken + + /** + * Sets the status of a assertion after it has been started + * @param token The token returned by startJob + * @param status + */ + def finishAssertion(token:AssertionToken, status:Status) : Unit +} + + +abstract class AbstractRunnerListener extends RunnerListener { + override def startJob(job: Job, instance: JobInstance, phase: Phase): JobToken = new JobToken {} + override def finishJob(token: JobToken, status: Status): Unit = {} + override def startTarget(target: Target, instance:TargetInstance, phase: Phase, parent: Option[Token]): TargetToken = new TargetToken {} + override def finishTarget(token: TargetToken, status: Status): Unit = {} + override def startTest(test: Test, instance: TestInstance): TestToken = new TestToken {} + override def finishTest(token: TestToken, status: Status): Unit = {} + override def startAssertion(assertion: Assertion, parent: Option[Token]): AssertionToken = new AssertionToken {} + override def finishAssertion(token: AssertionToken, status: Status): Unit = {} +} diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/execution/ScopeContext.scala b/flowman-core/src/main/scala/com/dimajix/flowman/execution/ScopeContext.scala index 23fe86ee2..e56997ca1 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/execution/ScopeContext.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/execution/ScopeContext.scala @@ -16,6 +16,7 @@ package com.dimajix.flowman.execution +import scala.collection.concurrent.TrieMap import scala.collection.mutable import org.slf4j.LoggerFactory @@ -105,12 +106,12 @@ final class ScopeContext( scopeJobs:Map[String,Template[Job]] = Map(), scopeTests:Map[String,Template[Test]] = Map() ) extends AbstractContext(fullEnv, fullConfig) { - private val mappings = mutable.Map[String,Mapping]() - private val relations = mutable.Map[String,Relation]() - private val targets = mutable.Map[String,Target]() - private val connections = mutable.Map[String,Connection]() - private val jobs = mutable.Map[String,Job]() - private val tests = mutable.Map[String,Test]() + private val mappings = TrieMap[String,Mapping]() + private val relations = TrieMap[String,Relation]() + private val targets = TrieMap[String,Target]() + private val connections = TrieMap[String,Connection]() + private val jobs = TrieMap[String,Job]() + private val tests = TrieMap[String,Test]() /** * Returns the namespace associated with this context. Can be null @@ -143,9 +144,7 @@ final class ScopeContext( case Some(result) => result case None => scopeConnections.get(identifier.name) match { case Some(spec) => - val result = spec.instantiate(this) - connections.put(identifier.name, result) - result + connections.getOrElseUpdate(identifier.name, spec.instantiate(this)) case None => parent.getConnection(identifier) } } @@ -160,9 +159,7 @@ final class ScopeContext( case Some(result) => result case None => scopeMappings.get(identifier.name) match { case Some(spec) => - val result = spec.instantiate(this) - mappings.put(identifier.name, result) - result + mappings.getOrElseUpdate(identifier.name, spec.instantiate(this)) case None => parent.getMapping(identifier, allowOverrides) } } @@ -177,9 +174,7 @@ final class ScopeContext( case Some(result) => result case None => scopeRelations.get(identifier.name) match { case Some(spec) => - val result = spec.instantiate(this) - relations.put(identifier.name, result) - result + relations.getOrElseUpdate(identifier.name, spec.instantiate(this)) case None => parent.getRelation(identifier, allowOverrides) } } @@ -194,9 +189,7 @@ final class ScopeContext( case Some(result) => result case None => scopeTargets.get(identifier.name) match { case Some(spec) => - val result = spec.instantiate(this) - targets.put(identifier.name, result) - result + targets.getOrElseUpdate(identifier.name, spec.instantiate(this)) case None => parent.getTarget(identifier) } } @@ -211,9 +204,7 @@ final class ScopeContext( case Some(result) => result case None => scopeJobs.get(identifier.name) match { case Some(spec) => - val result = spec.instantiate(this) - jobs.put(identifier.name, result) - result + jobs.getOrElseUpdate(identifier.name, spec.instantiate(this)) case None => parent.getJob(identifier) } } @@ -228,9 +219,7 @@ final class ScopeContext( case Some(result) => result case None => scopeTests.get(identifier.name) match { case Some(spec) => - val result = spec.instantiate(this) - tests.put(identifier.name, result) - result + tests.getOrElseUpdate(identifier.name, spec.instantiate(this)) case None => parent.getTest(identifier) } } diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/execution/SimpleExecutor.scala b/flowman-core/src/main/scala/com/dimajix/flowman/execution/SimpleExecutor.scala index a799b6c3b..84eef70f6 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/execution/SimpleExecutor.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/execution/SimpleExecutor.scala @@ -16,8 +16,6 @@ package com.dimajix.flowman.execution -import scala.collection.mutable - import org.slf4j.LoggerFactory import com.dimajix.flowman.config.FlowmanConf diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/hadoop/FileCollector.scala b/flowman-core/src/main/scala/com/dimajix/flowman/hadoop/FileCollector.scala index 067e6a03a..0ead834a8 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/hadoop/FileCollector.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/hadoop/FileCollector.scala @@ -16,21 +16,18 @@ package com.dimajix.flowman.hadoop -import java.io.FileNotFoundException -import java.io.StringWriter - -import scala.math.Ordering +import java.io.{FileNotFoundException, StringWriter} +import com.dimajix.flowman.catalog.PartitionSpec +import com.dimajix.flowman.templating.Velocity import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.FileStatus -import org.apache.hadoop.fs.Path -import org.apache.hadoop.fs.{FileSystem => HadoopFileSystem} +import org.apache.hadoop.fs.{FileStatus, Path, FileSystem => HadoopFileSystem} import org.apache.spark.sql.SparkSession import org.apache.velocity.VelocityContext import org.slf4j.LoggerFactory -import com.dimajix.flowman.catalog.PartitionSpec -import com.dimajix.flowman.templating.Velocity +import scala.collection.parallel.ParIterable +import scala.math.Ordering object FileCollector { @@ -125,60 +122,181 @@ case class FileCollector( def compare(x: FileStatus, y: FileStatus): Int = x.getPath compareTo y.getPath } - private lazy val templateEngine = Velocity.newEngine() - private lazy val templateContext = Velocity.newContext() + private val templateEngine = Velocity.newEngine() + private val templateContext = Velocity.newContext() + private val qualifiedPath = { + val fs = path.getFileSystem(hadoopConf) + path.makeQualified(fs.getUri, fs.getWorkingDirectory) + } + /** + * Resolves the root location and performs any variable substitution. + * @return + */ def resolve() : Path = { resolve(Seq()) } + + /** + * Resolves a single partition and performs any variable substitution. + * @return + */ def resolve(partition:PartitionSpec) : Path = { resolve(partition.toSeq) } + + /** + * Resolves a single partition and performs any variable substitution. + * @return + */ def resolve(partition:Map[String,Any]) : Path = { resolve(partition.toSeq) } + + /** + * Resolves a single partition and performs any variable substitution. + * @return + */ def resolve(partition:Seq[(String,Any)]) : Path = { + val path = resolvePattern(partition) + if (path.nonEmpty) { + new Path(qualifiedPath, path) + } + else { + qualifiedPath + } + } + + /** + * Resolves a single partition and performs any variable substitution. + * @return + */ + def resolvePattern(partition:PartitionSpec) : String = { + resolvePattern(partition.toSeq) + } + + /** + * Resolves a single partition and performs any variable substitution. + * @return + */ + def resolvePattern(partition:Map[String,Any]) : String = { + resolvePattern(partition.toSeq) + } + + /** + * Evaluates the pattern with the given partition + * @param partition + * @return + */ + def resolvePattern(partition:Seq[(String,Any)]) : String = { if (pattern.exists(_.nonEmpty)) { val context = new VelocityContext(templateContext) val partitionValues = defaults ++ partition.toMap partitionValues.foreach(kv => context.put(kv._1, kv._2)) val output = new StringWriter() templateEngine.evaluate(context, output, "FileCollector", pattern.get) - new Path(path, output.getBuffer.toString) + output.getBuffer.toString } else { - path + "" } } /** - * Collects files from the given partitions - * - * @param partitions - * @return - */ + * Collects files from the given partitions. The [[collect]] series + * of methods do not perform any globbing, which means that if the [[FileCollector]] contains any globbing + * patterns, those will be returned. Globbing-patterns which do not match (i.e. no files are found) will not + * be returned. + * + * @param partitions + * @return + */ def collect(partitions:Iterable[PartitionSpec]) : Iterable[Path] = { requirePathAndPattern() - logger.debug(s"Collecting files in location ${path} with pattern '${pattern.get}'") - flatMap(partitions)(collectPath) + logger.debug(s"Collecting files in location ${qualifiedPath} for multiple partitions with pattern '${pattern.get}'") + parFlatMap(partitions)((fs,p) => collectPath(fs,p,false)).toList } + /** + * Collects files from the given partitions. The [[collect]] series + * of methods do not perform any globbing, which means that if the [[FileCollector]] contains any globbing + * patterns, those will be returned. Globbing-patterns which do not match (i.e. no files are found) will not + * be returned. + * + * @param partitions + * @return + */ def collect(partition:PartitionSpec) : Seq[Path] = { requirePathAndPattern() - logger.debug(s"Collecting files in location ${path} for partition ${partition.spec} using pattern '${pattern.get}'") - map(partition)(collectPath) + logger.debug(s"Collecting files in location ${qualifiedPath} for partition ${partition.spec} using pattern '${pattern.get}'") + map(partition)((fs,p) => collectPath(fs,p,false)) } /** - * Collects files from the configured directory. Does not perform partition resolution - * - * @return - */ + * Collects files from the configured directory. Does not perform partition resolution. The [[collect]] series + * of methods do not perform any globbing, which means that if the [[FileCollector]] contains any globbing + * patterns, those will be returned. Globbing-patterns which do not match (i.e. no files are found) will not + * be returned. + * + * @return + */ def collect() : Seq[Path] = { - logger.debug(s"Collecting files in location ${path}, for all partitions ignoring any pattern") - map(collectPath) + logger.debug(s"Collecting files in location ${qualifiedPath}, for all partitions ignoring any pattern") + map((fs,p) => collectPath(fs,p,false)) + } + + /** + * Collects and globs files from the given partitions. Any globbing patterns will be resolved into individual + * files and/or directories. + * + * @param partitions + * @return + */ + def glob(partitions:Iterable[PartitionSpec]) : Iterable[Path] = { + requirePathAndPattern() + + logger.debug(s"Globbing files in location ${qualifiedPath} for multiple partitions with pattern '${pattern.get}'") + parFlatMap(partitions)((fs,p) => collectPath(fs,p,true)).toList + } + + /** + * Collects files from the given partitions. Any globbing patterns will be resolved into individual + * files and/or directories. + * + * @param partitions + * @return + */ + def glob(partition:PartitionSpec) : Seq[Path] = { + requirePathAndPattern() + + logger.debug(s"Globbing files in location ${qualifiedPath} for partition ${partition.spec} using pattern '${pattern.get}'") + map(partition)((fs,p) => collectPath(fs,p,true)) + } + + /** + * Collects files from the configured directory. Does not perform partition resolution. Any globbing patterns will + * be resolved into individual files and/or directories. + * + * @return + */ + def glob() : Seq[Path] = { + logger.debug(s"Globbing files in location ${qualifiedPath}, for all partitions ignoring any pattern") + map((fs,p) => collectPath(fs,p,true)) + } + + /** + * Creates a single globbing expression for all partitions + * @param partitions + * @return + */ + def mkGlob(partitions:Iterable[PartitionSpec]) : Path = { + requirePathAndPattern() + + logger.debug(s"Globbing files in location ${qualifiedPath} for multiple partitions with pattern '${pattern.get}'") + val parts = partitions.map(p => resolvePattern(p)).mkString("{",",","}") + new Path(qualifiedPath, parts) } /** @@ -190,7 +308,7 @@ case class FileCollector( def delete(partitions:Iterable[PartitionSpec]) : Unit = { requirePathAndPattern() - logger.info(s"Deleting files in location ${path} with pattern '${pattern.get}'") + logger.info(s"Deleting files in location ${qualifiedPath} with pattern '${pattern.get}'") foreach(partitions)(deletePath) } @@ -200,7 +318,7 @@ case class FileCollector( * @return */ def delete() : Unit = { - logger.info(s"Deleting files in location ${path}, for all partitions ignoring any pattern") + logger.info(s"Deleting files in location ${qualifiedPath}, for all partitions ignoring any pattern") foreach(deletePath _) } @@ -210,7 +328,7 @@ case class FileCollector( * @return */ def truncate() : Unit = { - logger.info(s"Deleting files in location ${path}, for all partitions ignoring any pattern") + logger.info(s"Deleting files in location ${qualifiedPath}, for all partitions ignoring any pattern") foreach(truncatePath _) } @@ -224,12 +342,12 @@ case class FileCollector( def flatMap[T](partitions:Iterable[PartitionSpec])(fn:(HadoopFileSystem,Path) => Iterable[T]) : Iterable[T] = { requirePathAndPattern() - val fs = path.getFileSystem(hadoopConf) + val fs = qualifiedPath.getFileSystem(hadoopConf) partitions.flatMap(p => fn(fs, resolve(p))) } /** - * Maps all partitions using the given function + * Maps all partitions using the given function. Note that no globbing will be performed by this function. * @param partitions * @param fn * @tparam T @@ -238,28 +356,59 @@ case class FileCollector( def map[T](partitions:Iterable[PartitionSpec])(fn:(HadoopFileSystem,Path) => T) : Iterable[T] = { requirePathAndPattern() - val fs = path.getFileSystem(hadoopConf) + val fs = qualifiedPath.getFileSystem(hadoopConf) partitions.map(p => fn(fs, resolve(p))) } + /** + * Maps a single partition using the given function. Note that no globbing will be performed by this function. + * @param partitions + * @param fn + * @tparam T + * @return + */ def map[T](partition:PartitionSpec)(fn:(HadoopFileSystem,Path) => T) : T = { requirePathAndPattern() - val fs = path.getFileSystem(hadoopConf) + val fs = qualifiedPath.getFileSystem(hadoopConf) fn(fs, resolve(partition)) } def map[T](fn:(HadoopFileSystem,Path) => T) : T = { requirePath() - val fs = path.getFileSystem(hadoopConf) - fn(fs,path) + val fs = qualifiedPath.getFileSystem(hadoopConf) + fn(fs,qualifiedPath) } + def parFlatMap[T](partitions:Iterable[PartitionSpec])(fn:(HadoopFileSystem,Path) => Iterable[T]) : ParIterable[T] = { + requirePathAndPattern() + + val fs = qualifiedPath.getFileSystem(hadoopConf) + partitions.par.flatMap(p => fn(fs, resolve(p))) + } + + def parMap[T](partitions:Iterable[PartitionSpec])(fn:(HadoopFileSystem,Path) => T) : ParIterable[T] = { + requirePathAndPattern() + + val fs = qualifiedPath.getFileSystem(hadoopConf) + partitions.par.map(p => fn(fs, resolve(p))) + } + + /** + * Executes a specific function for a list of partitions. Note that no globbing will be performed by this function. + * @param partitions + * @param fn + */ def foreach(partitions:Iterable[PartitionSpec])(fn:(HadoopFileSystem,Path) => Unit) : Unit = { map(partitions)(fn) } + /** + * Executes a specific function for a list of partitions. Note that no globbing will be performed by this function. + * @param partitions + * @param fn + */ def foreach(fn:(HadoopFileSystem,Path) => Unit) : Unit = { map(fn) } @@ -279,7 +428,7 @@ case class FileCollector( } private def deletePath(fs:HadoopFileSystem, path:Path) : Unit = { - if (!isGlobPath(path)) { + if (!FileUtils.isGlobbingPattern(path)) { logger.info(s"Deleting directory '$path'") fs.delete(path, true) } @@ -287,14 +436,23 @@ case class FileCollector( logger.info(s"Deleting file(s) '$path'") val files = try fs.globStatus(path) catch { case _:FileNotFoundException => null } if (files != null) - files.foreach(f => fs.delete(f.getPath, true)) + files.foreach { f => + if (!fs.delete(f.getPath, true)) { + logger.warn(s"Cannot delete file '${f.getPath}'") + } + } } } - private def collectPath(fs:HadoopFileSystem, path:Path) : Seq[Path] = { - if (isGlobPath(path)) { - globPath(fs, path) + private def collectPath(fs:HadoopFileSystem, path:Path, performGlobbing:Boolean) : Seq[Path] = { + if (FileUtils.isGlobbingPattern(path)) { + if (performGlobbing) { + globPath(fs, path) + } + else { + globPathNonEmpty(fs, path) + } } else { if (fs.exists(path)) @@ -304,14 +462,18 @@ case class FileCollector( } } - private def isGlobPath(pattern: Path): Boolean = { - pattern.toString.exists("{}[]*?\\".toSet.contains) - } private def globPath(fs:HadoopFileSystem, pattern: Path): Seq[Path] = { Option(fs.globStatus(pattern)).map { statuses => statuses.map(_.getPath.makeQualified(fs.getUri, fs.getWorkingDirectory)).toSeq }.getOrElse(Seq.empty[Path]) } + private def globPathNonEmpty(fs:HadoopFileSystem, pattern: Path): Seq[Path] = { + val nonEmpty = Option(fs.globStatus(pattern)).exists { statuses => statuses.nonEmpty } + if (nonEmpty) + Seq(pattern) + else + Seq() + } private def requirePathAndPattern() : Unit = { if (path.toString.isEmpty) diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/hadoop/FileUtils.scala b/flowman-core/src/main/scala/com/dimajix/flowman/hadoop/FileUtils.scala index 051df36ca..2046ebd66 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/hadoop/FileUtils.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/hadoop/FileUtils.scala @@ -22,6 +22,10 @@ import org.apache.hadoop.fs.Path object FileUtils { + def isGlobbingPattern(pattern: Path) : Boolean = { + pattern.toString.exists("{}[]*?\\".toSet.contains) + } + /** * Returns true if the path refers to a successfully written Hadoop/Spark job. This is the case if either the * location refers to an existing file or if the location refers to a directory which contains a "_SUCCESS" file. diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/history/JdbcStateRepository.scala b/flowman-core/src/main/scala/com/dimajix/flowman/history/JdbcStateRepository.scala index 67583b7e6..0718b4778 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/history/JdbcStateRepository.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/history/JdbcStateRepository.scala @@ -28,10 +28,8 @@ import scala.language.higherKinds import org.slf4j.LoggerFactory import slick.jdbc.JdbcProfile -import com.dimajix.flowman.execution.JobToken import com.dimajix.flowman.execution.Phase import com.dimajix.flowman.execution.Status -import com.dimajix.flowman.execution.TargetToken private[history] object JdbcStateRepository { diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/history/JdbcStateStore.scala b/flowman-core/src/main/scala/com/dimajix/flowman/history/JdbcStateStore.scala index 700337003..cabefa3a2 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/history/JdbcStateStore.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/history/JdbcStateStore.scala @@ -29,10 +29,8 @@ import slick.jdbc.H2Profile import slick.jdbc.MySQLProfile import slick.jdbc.PostgresProfile -import com.dimajix.flowman.execution.JobToken import com.dimajix.flowman.execution.Phase import com.dimajix.flowman.execution.Status -import com.dimajix.flowman.execution.TargetToken import com.dimajix.flowman.model.JobInstance import com.dimajix.flowman.model.TargetInstance diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/history/NullStateStore.scala b/flowman-core/src/main/scala/com/dimajix/flowman/history/NullStateStore.scala index a6ef82ad0..1a0ce47d8 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/history/NullStateStore.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/history/NullStateStore.scala @@ -16,10 +16,8 @@ package com.dimajix.flowman.history -import com.dimajix.flowman.execution.JobToken import com.dimajix.flowman.execution.Phase import com.dimajix.flowman.execution.Status -import com.dimajix.flowman.execution.TargetToken import com.dimajix.flowman.model.JobInstance import com.dimajix.flowman.model.TargetInstance diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/history/StateStore.scala b/flowman-core/src/main/scala/com/dimajix/flowman/history/StateStore.scala index 81465022b..b1cb64fc8 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/history/StateStore.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/history/StateStore.scala @@ -16,17 +16,16 @@ package com.dimajix.flowman.history -import com.dimajix.flowman.execution.JobListener -import com.dimajix.flowman.execution.JobToken import com.dimajix.flowman.execution.Phase import com.dimajix.flowman.execution.Status -import com.dimajix.flowman.execution.TargetToken import com.dimajix.flowman.model.JobInstance import com.dimajix.flowman.model.TargetInstance +abstract class JobToken +abstract class TargetToken -abstract class StateStore extends JobListener { +abstract class StateStore { /** * Returns the state of a job, or None if no information is available * @param job @@ -39,14 +38,14 @@ abstract class StateStore extends JobListener { * @param job * @return */ - override def startJob(job:JobInstance, phase:Phase) : JobToken + def startJob(job:JobInstance, phase:Phase) : JobToken /** * Sets the status of a job after it has been started * @param token The token returned by startJob * @param status */ - override def finishJob(token:JobToken, status:Status) : Unit + def finishJob(token:JobToken, status:Status) : Unit /** * Returns the state of a specific target on its last run, or None if no information is available @@ -60,14 +59,14 @@ abstract class StateStore extends JobListener { * @param target * @return */ - override def startTarget(target:TargetInstance, phase:Phase, parent:Option[JobToken]) : TargetToken + def startTarget(target:TargetInstance, phase:Phase, parent:Option[JobToken]) : TargetToken /** * Sets the status of a job after it has been started * @param token The token returned by startJob * @param status */ - override def finishTarget(token:TargetToken, status:Status) : Unit + def finishTarget(token:TargetToken, status:Status) : Unit /** * Returns a list of job matching the query criteria diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/history/StateStoreAdaptorListener.scala b/flowman-core/src/main/scala/com/dimajix/flowman/history/StateStoreAdaptorListener.scala new file mode 100644 index 000000000..514a1c2f3 --- /dev/null +++ b/flowman-core/src/main/scala/com/dimajix/flowman/history/StateStoreAdaptorListener.scala @@ -0,0 +1,52 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.history + +import com.dimajix.flowman.execution +import com.dimajix.flowman.execution.AbstractRunnerListener +import com.dimajix.flowman.execution.Phase +import com.dimajix.flowman.execution.Status +import com.dimajix.flowman.history +import com.dimajix.flowman.model.Job +import com.dimajix.flowman.model.JobInstance +import com.dimajix.flowman.model.Target +import com.dimajix.flowman.model.TargetInstance + + +object StateStoreAdaptorListener { + final case class StateStoreJobToken(token:history.JobToken) extends execution.JobToken + final case class StateStoreTargetToken(token:history.TargetToken) extends execution.TargetToken +} +final class StateStoreAdaptorListener(store:StateStore) extends AbstractRunnerListener { + import StateStoreAdaptorListener._ + + override def startJob(job:Job, instance: JobInstance, phase: Phase): execution.JobToken = { + StateStoreJobToken(store.startJob(instance, phase)) + } + override def finishJob(token: execution.JobToken, status: Status): Unit = { + val t = token.asInstanceOf[StateStoreJobToken].token + store.finishJob(t, status) + } + override def startTarget(target:Target, instance: TargetInstance, phase: Phase, parent: Option[execution.Token]): execution.TargetToken = { + val t = parent.map(_.asInstanceOf[StateStoreJobToken].token) + StateStoreTargetToken(store.startTarget(instance, phase, t)) + } + override def finishTarget(token: execution.TargetToken, status: Status): Unit = { + val t = token.asInstanceOf[StateStoreTargetToken].token + store.finishTarget(t, status) + } +} diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/jdbc/MsSqlServerDialect.scala b/flowman-core/src/main/scala/com/dimajix/flowman/jdbc/MsSqlServerDialect.scala new file mode 100644 index 000000000..3b9d34c99 --- /dev/null +++ b/flowman-core/src/main/scala/com/dimajix/flowman/jdbc/MsSqlServerDialect.scala @@ -0,0 +1,62 @@ +/* + * Copyright 2018-2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.jdbc + +import java.util.Locale + +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.jdbc.JdbcType + +import com.dimajix.flowman.jdbc.MySQLDialect.Statements +import com.dimajix.flowman.types.BinaryType +import com.dimajix.flowman.types.BooleanType +import com.dimajix.flowman.types.FieldType +import com.dimajix.flowman.types.ShortType +import com.dimajix.flowman.types.StringType +import com.dimajix.flowman.types.TimestampType + + +object MsSqlServerDialect extends BaseDialect { + private object Statements extends MsSqlServerStatements(this) + + override def canHandle(url : String): Boolean = url.toLowerCase(Locale.ROOT).startsWith("jdbc:sqlserver") + + override def quoteIdentifier(colName: String): String = { + s""""$colName"""" + } + + override def getJdbcType(dt: FieldType): Option[JdbcType] = dt match { + case TimestampType => Some(JdbcType("DATETIME", java.sql.Types.TIMESTAMP)) + case StringType => Some(JdbcType("NVARCHAR(MAX)", java.sql.Types.NVARCHAR)) + case BooleanType => Some(JdbcType("BIT", java.sql.Types.BIT)) + case BinaryType => Some(JdbcType("VARBINARY(MAX)", java.sql.Types.VARBINARY)) + case ShortType => Some(JdbcType("SMALLINT", java.sql.Types.SMALLINT)) + case _ => super.getJdbcType(dt) + } + + override def statement : SqlStatements = Statements +} + + +class MsSqlServerStatements(dialect: BaseDialect) extends BaseStatements(dialect) { + override def firstRow(table: TableIdentifier, condition:String) : String = { + if (condition.isEmpty) + s"SELECT TOP 1 * FROM ${dialect.quote(table)}" + else + s"SELECT TOP 1 * FROM ${dialect.quote(table)} WHERE $condition" + } +} diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/jdbc/SqlDialects.scala b/flowman-core/src/main/scala/com/dimajix/flowman/jdbc/SqlDialects.scala index 44d9203cb..d501f9a38 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/jdbc/SqlDialects.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/jdbc/SqlDialects.scala @@ -42,6 +42,7 @@ object SqlDialects { registerDialect(HiveDialect) registerDialect(DerbyDialect) registerDialect(MySQLDialect) + registerDialect(MsSqlServerDialect) /** * Fetch the JdbcDialect class corresponding to a given database url. diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/metric/MetricSystem.scala b/flowman-core/src/main/scala/com/dimajix/flowman/metric/MetricSystem.scala index 56e6b0d44..0a150c7f1 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/metric/MetricSystem.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/metric/MetricSystem.scala @@ -16,11 +16,11 @@ package com.dimajix.flowman.metric -import scala.collection.mutable - import com.dimajix.common.IdentityHashSet +import com.dimajix.common.SynchronizedSet import com.dimajix.flowman.execution.Status + trait MetricCatalog { /** * returns all metrics @@ -53,9 +53,9 @@ trait MetricCatalog { class MetricSystem extends MetricCatalog { - private val metricBundles : mutable.Set[MetricBundle] = IdentityHashSet() - private val metricBoards : mutable.Set[MetricBoard] = IdentityHashSet() - private val metricSinks : mutable.Set[MetricSink] = IdentityHashSet() + private val metricBundles : SynchronizedSet[MetricBundle] = SynchronizedSet(IdentityHashSet()) + private val metricBoards : SynchronizedSet[MetricBoard] = SynchronizedSet(IdentityHashSet()) + private val metricSinks : SynchronizedSet[MetricSink] = SynchronizedSet(IdentityHashSet()) /** * Registers an individual metric. It will be wrapped into a bundle. diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/metric/MultiMetricBundle.scala b/flowman-core/src/main/scala/com/dimajix/flowman/metric/MultiMetricBundle.scala index b3e93b52e..47069ad55 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/metric/MultiMetricBundle.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/metric/MultiMetricBundle.scala @@ -16,13 +16,12 @@ package com.dimajix.flowman.metric -import scala.collection.mutable - import com.dimajix.common.IdentityHashSet +import com.dimajix.common.SynchronizedSet final case class MultiMetricBundle(override val name:String, override val labels:Map[String,String]) extends MetricBundle { - private val bundleMetrics : mutable.Set[Metric] = IdentityHashSet() + private val bundleMetrics : SynchronizedSet[Metric] = SynchronizedSet(IdentityHashSet()) def addMetric(metric:Metric) : Unit = { bundleMetrics.add(metric) diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/metric/PrometheusMetricSink.scala b/flowman-core/src/main/scala/com/dimajix/flowman/metric/PrometheusMetricSink.scala index 7275a86c9..1709c333e 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/metric/PrometheusMetricSink.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/metric/PrometheusMetricSink.scala @@ -58,7 +58,7 @@ extends AbstractMetricSink { */ val metrics = board.metrics(catalog(board), status).flatMap { metric => val name = metric.name - val labels = metric.labels.map(kv => s"""${kv._1}="${kv._2}"""").mkString("{", ",", "}") + val labels = metric.labels.map(kv => s"""${kv._1}="${sanitize(kv._2)}"""").mkString("{", ",", "}") metric match { case gauge: GaugeMetric => Some(name -> s"$name$labels ${gauge.value}") case _ => None @@ -88,6 +88,8 @@ extends AbstractMetricSink { httpClient.execute(httpPost, handler) } catch { + case ex:HttpResponseException => + logger.warn(s"Got error response ${ex.getStatusCode} from Prometheus at $url: ${ex.toString}. Payload was:\n$payload") case NonFatal(ex) => logger.warn(s"Cannot publishing metrics to Prometheus at $url: ${ex.toString}") } @@ -95,4 +97,8 @@ extends AbstractMetricSink { httpClient.close() } } + + private def sanitize(str:String) : String = { + str.replace("\"","\\\"").replace("\n","").trim + } } diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/model/Hook.scala b/flowman-core/src/main/scala/com/dimajix/flowman/model/Hook.scala index d8b5464bb..ff97e6a7a 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/model/Hook.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/model/Hook.scala @@ -16,12 +16,15 @@ package com.dimajix.flowman.model +import com.dimajix.flowman.execution.AssertionToken import com.dimajix.flowman.execution.Context -import com.dimajix.flowman.execution.JobListener import com.dimajix.flowman.execution.JobToken import com.dimajix.flowman.execution.Phase +import com.dimajix.flowman.execution.RunnerListener import com.dimajix.flowman.execution.Status import com.dimajix.flowman.execution.TargetToken +import com.dimajix.flowman.execution.TestToken +import com.dimajix.flowman.execution.Token object Hook { @@ -51,7 +54,7 @@ object Hook { } -trait Hook extends Instance with JobListener { +trait Hook extends Instance with RunnerListener { /** * Returns the category of this resource * @return @@ -63,7 +66,7 @@ trait Hook extends Instance with JobListener { * @param job * @return */ - override def startJob(job:JobInstance, phase:Phase) : JobToken + override def startJob(job:Job, instance:JobInstance, phase:Phase) : JobToken /** * Sets the status of a job after it has been started @@ -77,7 +80,7 @@ trait Hook extends Instance with JobListener { * @param target * @return */ - override def startTarget(target:TargetInstance, phase:Phase, parent:Option[JobToken]) : TargetToken + override def startTarget(target:Target, instance:TargetInstance, phase:Phase, parent:Option[Token]) : TargetToken /** * Sets the status of a job after it has been started @@ -93,4 +96,13 @@ trait Hook extends Instance with JobListener { */ abstract class BaseHook extends AbstractInstance with Hook { protected override def instanceProperties: Hook.Properties + + override def startJob(job: Job, instance: JobInstance, phase: Phase): JobToken = new JobToken {} + override def finishJob(token: JobToken, status: Status): Unit = {} + override def startTarget(target: Target, instance:TargetInstance, phase: Phase, parent: Option[Token]): TargetToken = new TargetToken {} + override def finishTarget(token: TargetToken, status: Status): Unit = {} + override def startTest(test: Test, instance: TestInstance): TestToken = new TestToken {} + override def finishTest(token: TestToken, status: Status): Unit = {} + override def startAssertion(assertion: Assertion, parent: Option[Token]): AssertionToken = new AssertionToken {} + override def finishAssertion(token: AssertionToken, status: Status): Unit = {} } diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/model/Job.scala b/flowman-core/src/main/scala/com/dimajix/flowman/model/Job.scala index 7ece4af99..1c3027055 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/model/Job.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/model/Job.scala @@ -53,7 +53,7 @@ final case class JobInstance( require(job != null) require(args != null) - def asMap = + def asMap: Map[String, String] = Map( "namespace" -> namespace, "project" -> project, @@ -336,7 +336,8 @@ final case class Job( } /** - * Parse command line parameters + * Parse command line parameters into [[FieldValue]] entities, which then can be interpolated via the + * [[interpolate]] method. * @param rawArgs * @return */ @@ -379,8 +380,8 @@ final case class Job( } /** - * This method will execute all targets secified in this job in the correct order. It is a convenient wrapper - * around JobExecutor, which actually takes care about all the details + * This method will execute all targets specified in this job in the correct order. It is a convenient wrapper + * using the [[Runner]] class, which actually takes care about all the details. * @param executor * @param phase * @param args diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/model/Module.scala b/flowman-core/src/main/scala/com/dimajix/flowman/model/Module.scala index 3f4d39a50..2a57cfba8 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/model/Module.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/model/Module.scala @@ -75,7 +75,7 @@ object Module { file.list() .filter(_.isFile) .map(f => loadFile(f)) - .reduce((l,r) => l.merge(r)) + .foldLeft(Module())((l,r) => l.merge(r)) } else { loadFile(file) diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/model/SystemSettings.scala b/flowman-core/src/main/scala/com/dimajix/flowman/model/SystemSettings.scala index 5fd4a403c..5f9e6cce0 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/model/SystemSettings.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/model/SystemSettings.scala @@ -37,7 +37,7 @@ object SystemSettings { private val logger = LoggerFactory.getLogger(classOf[SystemSettings]) def file(file: File): SystemSettings = { - logger.info(s"Reading system settings file ${file.toString}") + logger.info(s"Reading Flowman system settings file ${file.toString}") ObjectMapper.read[SystemSettings](file) } def file(filename:String) : SystemSettings = { @@ -47,7 +47,7 @@ object SystemSettings { ObjectMapper.read[SystemSettings](stream) } def url(url:URL) : SystemSettings = { - logger.info(s"Reading system settings from url ${url.toString}") + logger.info(s"Reading Flowman system settings from url ${url.toString}") val con = url.openConnection() con.setUseCaches(false) stream(con.getInputStream) @@ -56,7 +56,7 @@ object SystemSettings { ObjectMapper.parse[SystemSettings](text) } def default() : SystemSettings = { - logger.info(s"Using default system settings") + logger.info(s"Using Flowman default system settings") new SystemSettings } } diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/model/Target.scala b/flowman-core/src/main/scala/com/dimajix/flowman/model/Target.scala index 637636ac4..3860624f6 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/model/Target.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/model/Target.scala @@ -41,7 +41,7 @@ final case class TargetInstance( require(target != null) require(partitions != null) - def asMap = + def asMap: Map[String, String] = Map( "namespace" -> namespace, "project" -> project, diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/model/Test.scala b/flowman-core/src/main/scala/com/dimajix/flowman/model/Test.scala index 92a2701e9..adcbf299a 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/model/Test.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/model/Test.scala @@ -18,6 +18,24 @@ package com.dimajix.flowman.model import com.dimajix.flowman.execution.Context +final case class TestInstance( + namespace:String, + project:String, + test:String +) { + require(namespace != null) + require(project != null) + require(test != null) + + def asMap: Map[String, String] = + Map( + "namespace" -> namespace, + "project" -> project, + "name" -> test, + "test" -> test + ) +} + object Test { object Properties { @@ -94,7 +112,27 @@ final case class Test( override def category: String = "test" override def kind : String = "test" + /** + * Returns an identifier for this test + * @return + */ def identifier : TestIdentifier = TestIdentifier(name, project.map(_.name)) + /** + * Returns a description of the test + * @return + */ def description : Option[String] = instanceProperties.description + + /** + * Returns a TestInstance used for state management + * @return + */ + def instance : TestInstance = { + TestInstance( + namespace.map(_.name).getOrElse(""), + project.map(_.name).getOrElse(""), + name + ) + } } diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/package.scala b/flowman-core/src/main/scala/com/dimajix/flowman/package.scala index 242336662..6f377fbb8 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/package.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/package.scala @@ -20,10 +20,11 @@ import com.dimajix.common.Resources package object flowman { + final private val props = Resources.loadProperties("com/dimajix/flowman/flowman.properties") final val SPARK_VERSION = org.apache.spark.SPARK_VERSION + final val HADOOP_VERSION = org.apache.hadoop.util.VersionInfo.getVersion final val JAVA_VERSION = System.getProperty("java.version") - final val FLOWMAN_VERSION = { - Resources.loadProperties("com/dimajix/flowman/flowman.properties") - .getProperty("version") - } + final val FLOWMAN_VERSION = props.getProperty("version") + final val SPARK_BUILD_VERSION = props.getProperty("spark_version") + final val HADOOP_BUILD_VERSION = props.getProperty("hadoop_version") } diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/storage/FileStore.scala b/flowman-core/src/main/scala/com/dimajix/flowman/storage/FileStore.scala index 9e7d1f298..9b4b60e5e 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/storage/FileStore.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/storage/FileStore.scala @@ -15,6 +15,7 @@ */ package com.dimajix.flowman.storage + import org.apache.hadoop.fs.Path import org.slf4j.LoggerFactory @@ -27,6 +28,11 @@ class FileStore(root:File) extends Store { private val logger = LoggerFactory.getLogger(classOf[FileStore]) private val globPattern = new Path("*/project.{yml,yaml}") + /** + * Loads a project via its name (not its filename or directory) + * @param name + * @return + */ override def loadProject(name: String): Project = { root.glob(globPattern) .flatMap(file => loadProjectManifest(file).map((file, _))) @@ -35,10 +41,24 @@ class FileStore(root:File) extends Store { .getOrElse(throw new NoSuchProjectException(name)) } + /** + * Stores a project inside this persistent storage + * @param project + */ override def storeProject(project: Project): Unit = ??? + /** + * Removes a project from this persistent storage + * @param name + */ override def removeProject(name: String): Unit = ??? + /** + * Retrieves a list of all projects. The returned projects only contain some fundamental information + * like the projects's name, its basedir and so on. The project itself (mappings, relations, targets etc) + * will not be loaded + * @return + */ override def listProjects(): Seq[Project] = { root.glob(globPattern) .flatMap(loadProjectManifest) diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/storage/Store.scala b/flowman-core/src/main/scala/com/dimajix/flowman/storage/Store.scala index 09cdd2b32..3428220a1 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/storage/Store.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/storage/Store.scala @@ -19,9 +19,37 @@ package com.dimajix.flowman.storage import com.dimajix.flowman.model.Project +/** + * The [[Store]] is the abstract class for implementing project stores. These stores offer an abstraction of + * persistent storage, which allows projects to be stored not only in filesystems, but also in databases. To + * enable this flexibility, projects are references solely by their name and not by their physical storage location + * like a path, filename or directory. + */ abstract class Store { + /** + * Loads a project via its name (not its filename or directory) + * @param name + * @return + */ def loadProject(name:String) : Project + + /** + * Stores a project inside this persistent storage + * @param project + */ def storeProject(project: Project) : Unit + + /** + * Removes a project from this persistent storage + * @param name + */ def removeProject(name:String) : Unit + + /** + * Retrieves a list of all projects. The returned projects only contain some fundamental information + * like the projects's name, its basedir and so on. The project itself (mappings, relations, targets etc) + * will not be loaded + * @return + */ def listProjects() : Seq[Project] } diff --git a/flowman-core/src/main/scala/com/dimajix/flowman/templating/Velocity.scala b/flowman-core/src/main/scala/com/dimajix/flowman/templating/Velocity.scala index af4dbd4e5..24a6fd2dc 100644 --- a/flowman-core/src/main/scala/com/dimajix/flowman/templating/Velocity.scala +++ b/flowman-core/src/main/scala/com/dimajix/flowman/templating/Velocity.scala @@ -95,7 +95,9 @@ object Velocity { * Creates a new VelocityEngine * @return */ - def newEngine() : VelocityEngine = { + def newEngine() : VelocityEngine = singletonEngine + + private lazy val singletonEngine = { val ve = new VelocityEngine() ve.setProperty(RuntimeConstants.VM_ARGUMENTS_STRICT, "true") ve.setProperty(RuntimeConstants.RUNTIME_REFERENCES_STRICT, "true") diff --git a/flowman-core/src/test/scala/com/dimajix/flowman/execution/MappingUtilsTest.scala b/flowman-core/src/test/scala/com/dimajix/flowman/execution/MappingUtilsTest.scala index 67f1bdb85..d48ac7ae0 100644 --- a/flowman-core/src/test/scala/com/dimajix/flowman/execution/MappingUtilsTest.scala +++ b/flowman-core/src/test/scala/com/dimajix/flowman/execution/MappingUtilsTest.scala @@ -52,7 +52,7 @@ object MappingUtilsTest { } class MappingUtilsTest extends AnyFlatSpec with Matchers { - "The MappingUtils" should "collect all requirements of a mapping" in { + "The MappingUtils" should "glob all requirements of a mapping" in { val project = Project( "test", mappings = Map( diff --git a/flowman-core/src/test/scala/com/dimajix/flowman/execution/ParallelExecutorTest.scala b/flowman-core/src/test/scala/com/dimajix/flowman/execution/ParallelExecutorTest.scala new file mode 100644 index 000000000..63bc973d3 --- /dev/null +++ b/flowman-core/src/test/scala/com/dimajix/flowman/execution/ParallelExecutorTest.scala @@ -0,0 +1,80 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.execution + +import org.scalamock.scalatest.MockFactory +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +import com.dimajix.flowman.model.Target +import com.dimajix.flowman.model.TargetIdentifier +import com.dimajix.spark.testing.LocalSparkSession + + +class ParallelExecutorTest extends AnyFlatSpec with Matchers with MockFactory with LocalSparkSession { + "The ParallelExecutor" should "return SUCCESS on empty lists" in { + val session = Session.builder().build() + val context = session.context + val execution = session.execution + + val targets = Seq() + + val executor = new ParallelExecutor + val result = executor.execute(execution, context, Phase.BUILD, targets, _ => true, keepGoing = false) { + (execution, target, phase) => Status.SUCCESS + } + + result should be (Status.SUCCESS) + } + + it should "work" in { + val session = Session.builder().build() + val context = session.context + val execution = session.execution + + val t1 = mock[Target] + (t1.identifier _).expects().atLeastOnce().returns(TargetIdentifier("t1", "default")) + (t1.name _).expects().atLeastOnce().returns("t1") + (t1.requires _).expects(*).atLeastOnce().returns(Set()) + (t1.provides _).expects(*).atLeastOnce().returns(Set()) + (t1.before _).expects().atLeastOnce().returns(Seq()) + (t1.after _).expects().atLeastOnce().returns(Seq()) + (t1.phases _).expects().atLeastOnce().returns(Set(Phase.CREATE, Phase.BUILD, Phase.VERIFY, Phase.TRUNCATE, Phase.DESTROY)) + (t1.execute _).expects(*, Phase.BUILD).returns(Unit) + + val t2 = mock[Target] + (t2.identifier _).expects().atLeastOnce().returns(TargetIdentifier("t2", "default")) + (t2.name _).expects().atLeastOnce().returns("t2") + (t2.requires _).expects(*).atLeastOnce().returns(Set()) + (t2.provides _).expects(*).atLeastOnce().returns(Set()) + (t2.before _).expects().atLeastOnce().returns(Seq()) + (t2.after _).expects().atLeastOnce().returns(Seq()) + (t2.phases _).expects().atLeastOnce().returns(Set(Phase.CREATE, Phase.BUILD, Phase.VERIFY, Phase.TRUNCATE, Phase.DESTROY)) + (t2.execute _).expects(*, Phase.BUILD).returns(Unit) + + val targets = Seq(t1, t2) + + val executor = new ParallelExecutor + val result = executor.execute(execution, context, Phase.BUILD, targets, _ => true, keepGoing = false) { + (execution, target, phase) => + target.execute(execution, phase) + Status.SUCCESS + } + + result should be (Status.SUCCESS) + } +} diff --git a/flowman-core/src/test/scala/com/dimajix/flowman/execution/RunnerJobTest.scala b/flowman-core/src/test/scala/com/dimajix/flowman/execution/RunnerJobTest.scala index f920d4bd1..758b5b06a 100644 --- a/flowman-core/src/test/scala/com/dimajix/flowman/execution/RunnerJobTest.scala +++ b/flowman-core/src/test/scala/com/dimajix/flowman/execution/RunnerJobTest.scala @@ -36,7 +36,6 @@ import com.dimajix.flowman.model.Target import com.dimajix.flowman.model.TargetIdentifier import com.dimajix.flowman.model.TargetInstance import com.dimajix.flowman.model.Template -import com.dimajix.flowman.model.Test import com.dimajix.flowman.types.StringType @@ -174,13 +173,13 @@ class RunnerJobTest extends AnyFlatSpec with MockFactory with Matchers { .build() (targetTemplate.instantiate _).expects(*).returns(target) - (target.name _).expects().returns("some_target") + (target.identifier _).expects().atLeastOnce().returns(TargetIdentifier("project/some_target")) + (target.name _).expects().atLeastOnce().returns("some_target") (target.before _).expects().returns(Seq()) (target.after _).expects().returns(Seq()) (target.phases _).expects().atLeastOnce().returns(Set(Phase.BUILD)) (target.requires _).expects(Phase.BUILD).atLeastOnce().returns(Set()) (target.provides _).expects(Phase.BUILD).atLeastOnce().returns(Set()) - (target.identifier _).expects().atLeastOnce().returns(TargetIdentifier("project/some_target")) (target.instance _).expects().atLeastOnce().returns(TargetInstance("default", "project", "some_target")) (target.dirty _).expects(*, Phase.BUILD).returns(Yes) (target.metadata _).expects().atLeastOnce().returns(Metadata(name="some_target", kind="target", category="target")) @@ -361,16 +360,16 @@ class RunnerJobTest extends AnyFlatSpec with MockFactory with Matchers { val jobHook = mock[Hook] val jobJobToken = new JobToken {} val jobTargetToken = new TargetToken {} - (jobHook.startJob _).expects( where( (_:JobInstance, phase:Phase) => phase == Phase.BUILD) ).returning(jobJobToken) + (jobHook.startJob _).expects( where( (_:Job, _:JobInstance, phase:Phase) => phase == Phase.BUILD) ).returning(jobJobToken) (jobHook.finishJob _).expects(jobJobToken, Status.SUCCESS) - (jobHook.startTarget _).expects( where( (_:TargetInstance, phase:Phase, token:Option[JobToken]) => phase == Phase.BUILD && token == Some(jobJobToken))).returning(jobTargetToken) + (jobHook.startTarget _).expects( where( (_:Target, _:TargetInstance, phase:Phase, token:Option[Token]) => phase == Phase.BUILD && token == Some(jobJobToken))).returning(jobTargetToken) (jobHook.finishTarget _).expects(jobTargetToken, Status.SUCCESS) val namespaceHook = mock[Hook] val namespaceJobToken = new JobToken {} val namespaceTargetToken = new TargetToken {} - (namespaceHook.startJob _).expects( where( (_:JobInstance, phase:Phase) => phase == Phase.BUILD) ).returning(namespaceJobToken) + (namespaceHook.startJob _).expects( where( (_:Job, _:JobInstance, phase:Phase) => phase == Phase.BUILD) ).returning(namespaceJobToken) (namespaceHook.finishJob _).expects(namespaceJobToken, Status.SUCCESS) - (namespaceHook.startTarget _).expects( where( (_:TargetInstance, phase:Phase, token:Option[JobToken]) => phase == Phase.BUILD && token == Some(namespaceJobToken))).returning(namespaceTargetToken) + (namespaceHook.startTarget _).expects( where( (_:Target, _:TargetInstance, phase:Phase, token:Option[Token]) => phase == Phase.BUILD && token == Some(namespaceJobToken))).returning(namespaceTargetToken) (namespaceHook.finishTarget _).expects(namespaceTargetToken, Status.SUCCESS) val ns = Namespace( diff --git a/flowman-core/src/test/scala/com/dimajix/flowman/execution/RunnerTestTest.scala b/flowman-core/src/test/scala/com/dimajix/flowman/execution/RunnerTestTest.scala index b816e3005..b9bba6634 100644 --- a/flowman-core/src/test/scala/com/dimajix/flowman/execution/RunnerTestTest.scala +++ b/flowman-core/src/test/scala/com/dimajix/flowman/execution/RunnerTestTest.scala @@ -139,6 +139,7 @@ class RunnerTestTest extends AnyFlatSpec with MockFactory with Matchers with Loc (targetTemplate.instantiate _).expects(*).returns(target) (target.identifier _).expects().atLeastOnce().returns(TargetIdentifier("target", "default")) + (target.name _).expects().atLeastOnce().returns("target") (target.requires _).expects(*).atLeastOnce().returns(Set()) (target.provides _).expects(*).atLeastOnce().returns(Set()) (target.before _).expects().atLeastOnce().returns(Seq()) @@ -151,6 +152,7 @@ class RunnerTestTest extends AnyFlatSpec with MockFactory with Matchers with Loc (fixtureTemplate.instantiate _).expects(*).returns(fixture) (fixture.identifier _).expects().atLeastOnce().returns(TargetIdentifier("fixture", "default")) + (fixture.name _).expects().atLeastOnce().returns("fixture") (fixture.requires _).expects(*).atLeastOnce().returns(Set()) (fixture.provides _).expects(*).atLeastOnce().returns(Set()) (fixture.before _).expects().atLeastOnce().returns(Seq(TargetIdentifier("target", "default"))) @@ -250,6 +252,7 @@ class RunnerTestTest extends AnyFlatSpec with MockFactory with Matchers with Loc (targetTemplate.instantiate _).expects(*).returns(target) (target.identifier _).expects().atLeastOnce().returns(TargetIdentifier("target", "default")) + (target.name _).expects().atLeastOnce().returns("target") (target.requires _).expects(*).atLeastOnce().returns(Set()) (target.provides _).expects(*).atLeastOnce().returns(Set()) (target.before _).expects().atLeastOnce().returns(Seq()) @@ -262,6 +265,7 @@ class RunnerTestTest extends AnyFlatSpec with MockFactory with Matchers with Loc (fixtureTemplate.instantiate _).expects(*).returns(fixture) (fixture.identifier _).expects().atLeastOnce().returns(TargetIdentifier("fixture", "default")) + (fixture.name _).expects().atLeastOnce().returns("fixture") (fixture.requires _).expects(*).atLeastOnce().returns(Set()) (fixture.provides _).expects(*).atLeastOnce().returns(Set()) (fixture.before _).expects().atLeastOnce().returns(Seq(TargetIdentifier("target", "default"))) @@ -330,6 +334,7 @@ class RunnerTestTest extends AnyFlatSpec with MockFactory with Matchers with Loc (targetTemplate.instantiate _).expects(*).returns(target) (target.identifier _).expects().atLeastOnce().returns(TargetIdentifier("target", "default")) + (target.name _).expects().atLeastOnce().returns("target") (target.requires _).expects(*).atLeastOnce().returns(Set()) (target.provides _).expects(*).atLeastOnce().returns(Set()) (target.before _).expects().atLeastOnce().returns(Seq()) diff --git a/flowman-core/src/test/scala/com/dimajix/flowman/execution/SimpleSchedulerTest.scala b/flowman-core/src/test/scala/com/dimajix/flowman/execution/SimpleSchedulerTest.scala index aae973703..5bfdaaf35 100644 --- a/flowman-core/src/test/scala/com/dimajix/flowman/execution/SimpleSchedulerTest.scala +++ b/flowman-core/src/test/scala/com/dimajix/flowman/execution/SimpleSchedulerTest.scala @@ -16,8 +16,6 @@ package com.dimajix.flowman.execution -import scala.collection.mutable - import org.apache.hadoop.fs.Path import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers diff --git a/flowman-core/src/test/scala/com/dimajix/flowman/hadoop/FileCollectorTest.scala b/flowman-core/src/test/scala/com/dimajix/flowman/hadoop/FileCollectorTest.scala index 082c78d13..f36dbd97d 100644 --- a/flowman-core/src/test/scala/com/dimajix/flowman/hadoop/FileCollectorTest.scala +++ b/flowman-core/src/test/scala/com/dimajix/flowman/hadoop/FileCollectorTest.scala @@ -58,6 +58,7 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll fileSystem.mkdirs(new Path(workingDirectory, "data/2017/06/19/")) fileSystem.create(new Path(workingDirectory, "data/2017/06/19/1497830400.i-02255f88.rtb-imp.log")).close() fileSystem.create(new Path(workingDirectory, "data/2017/06/19/1497831300.i-02255f88.rtb-imp.log")).close() + fileSystem.create(new Path(workingDirectory, "data/2017/06/19/1497831300.i-02255f89.rtb-imp.log")).close() fileSystem.create(new Path(workingDirectory, "data/2017/06/19/1497832200.i-02255f88.rtb-imp.log")).close() fileSystem.create(new Path(workingDirectory, "data/2017/06/19/1497833100.i-02255f88.rtb-imp.log")).close() fileSystem.create(new Path(workingDirectory, "data/2017/06/19/1497834000.i-02255f88.rtb-imp.log")).close() @@ -71,7 +72,7 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll val collector = FileCollector.builder(hadoopConf) .path(new Path(workingDirectory, "data/2016/02/01")) .build() - val files = collector.collect() + val files = collector.glob() files.sortBy(_.toString) should be (Seq( new Path(workingDirectory, "data/2016/02/01") @@ -83,7 +84,7 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll .path(workingDirectory) .path(new Path(workingDirectory, "data/2016/0*/0*")) .build() - val files = collector.collect() + val files = collector.glob() files.sortBy(_.toString) should be (Seq( new Path(workingDirectory, "data/2016/01/03"), @@ -94,6 +95,38 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll )) } + it should "not return empty directories when using glob" in { + val collector = FileCollector.builder(hadoopConf) + .path(workingDirectory) + .path(new Path(workingDirectory, "data/2018/0*/0*")) + .build() + val files = collector.glob() + + files.sortBy(_.toString) should be (Seq()) + } + + it should "not glob when using collect" in { + val collector = FileCollector.builder(hadoopConf) + .path(workingDirectory) + .path(new Path(workingDirectory, "data/2016/0*/0*")) + .build() + val files = collector.collect() + + files.sortBy(_.toString) should be (Seq( + new Path(workingDirectory, "data/2016/0*/0*") + )) + } + + it should "not return empty directories when using collect" in { + val collector = FileCollector.builder(hadoopConf) + .path(workingDirectory) + .path(new Path(workingDirectory, "data/2018/0*/0*")) + .build() + val files = collector.collect() + + files.sortBy(_.toString) should be (Seq()) + } + it should "support default values" in { val collector = FileCollector.builder(hadoopConf) .path(workingDirectory) @@ -102,24 +135,24 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll .defaults(Map("year" -> "*", "month" -> "*", "day" -> "*")) .build() - val files1 = collector.collect(Seq(PartitionSpec(Map("year" -> "2016", "month" -> "01", "day" -> "03")))) + val files1 = collector.glob(Seq(PartitionSpec(Map("year" -> "2016", "month" -> "01", "day" -> "03")))) files1.toSeq.sortBy(_.toString) should be (Seq( new Path(workingDirectory, "data/2016/01/03") )) - val files2 = collector.collect(Seq(PartitionSpec(Map("year" -> "2016", "month" -> "01")))) + val files2 = collector.glob(Seq(PartitionSpec(Map("year" -> "2016", "month" -> "01")))) files2.toSeq.sortBy(_.toString) should be (Seq( new Path(workingDirectory, "data/2016/01/03"), new Path(workingDirectory, "data/2016/01/04"), new Path(workingDirectory, "data/2016/01/05") )) - val files3 = collector.collect(Seq(PartitionSpec(Map("year" -> "2016", "day" -> "01")))) + val files3 = collector.glob(Seq(PartitionSpec(Map("year" -> "2016", "day" -> "01")))) files3.toSeq.sortBy(_.toString) should be (Seq( new Path(workingDirectory, "data/2016/02/01") )) - val files4 = collector.collect(Seq(PartitionSpec(Map("year" -> "2016")))) + val files4 = collector.glob(Seq(PartitionSpec(Map("year" -> "2016")))) files4.toSeq.sortBy(_.toString) should be (Seq( new Path(workingDirectory, "data/2016/01/03"), new Path(workingDirectory, "data/2016/01/04"), @@ -129,7 +162,7 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll )) } - it should "collect all directories in given daily range (1)" in { + it should "glob all directories in given daily range (1)" in { val firstDate = UtcTimestamp.of(2016, Month.JANUARY, 3, 0, 0) val lastDate = UtcTimestamp.of(2016, Month.FEBRUARY, 2, 0, 0) val range = RangeValue(firstDate.toString, lastDate.toString) @@ -140,7 +173,7 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll .path(workingDirectory) .pattern("data/$ts.format('yyyy/MM/dd')") .build() - val files = collector.collect(partitions) + val files = collector.glob(partitions) files.toSeq.sortBy(_.toString) should be (Seq( new Path(workingDirectory, "data/2016/01/03"), @@ -150,7 +183,7 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll )) } - it should "collect all files in given daily range (2)" in { + it should "glob all files in given daily range (2)" in { val firstDate = UtcTimestamp.of(2016, Month.JANUARY, 4, 0, 0) val lastDate = UtcTimestamp.of(2016, Month.JANUARY, 5, 0, 0) val range = RangeValue(firstDate.toString, lastDate.toString) @@ -161,14 +194,14 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll .path(workingDirectory) .pattern("data/$ts.format('yyyy/MM/dd')") .build() - val files = collector.collect(partitions) + val files = collector.glob(partitions) files.toSeq.sortBy(_.toString) should be (Seq( new Path(workingDirectory, "data/2016/01/04") )) } - it should "collect all files in given daily range (3)" in { + it should "glob all files in given daily range (3)" in { val firstDate = UtcTimestamp.of(2016, Month.JANUARY, 4, 0, 0) val lastDate = UtcTimestamp.of(2016, Month.JANUARY, 5, 1, 0) val range = RangeValue(firstDate.toString, lastDate.toString) @@ -179,14 +212,14 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll .path(workingDirectory) .pattern("data/$ts.format('yyyy/MM/dd')") .build() - val files = collector.collect(partitions) + val files = collector.glob(partitions) files.toSeq.sortBy(_.toString) should be (Seq( new Path(workingDirectory, "data/2016/01/04") )) } - it should "collect all files in given daily range (4)" in { + it should "glob all files in given daily range (4)" in { val firstDate = UtcTimestamp.of(2016, Month.JANUARY, 4, 0, 0) val lastDate = UtcTimestamp.of(2016, Month.JANUARY, 6, 0, 0) val range = RangeValue(firstDate.toString, lastDate.toString) @@ -197,7 +230,7 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll .path(workingDirectory) .pattern("data/$ts.format('yyyy/MM/dd')") .build() - val files = collector.collect(partitions) + val files = collector.glob(partitions) files.toSeq.sortBy(_.toString) should be (Seq( new Path(workingDirectory, "data/2016/01/04"), @@ -205,7 +238,7 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll )) } - it should "collect all files in given daily range (5)" in { + it should "glob all files in given daily range (5)" in { val firstDate = UtcTimestamp.of(2016, Month.JANUARY, 4, 0, 0) val lastDate = UtcTimestamp.of(2016, Month.JANUARY, 6, 0, 0) val range = RangeValue(firstDate.toString, lastDate.toString) @@ -216,7 +249,7 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll .path(workingDirectory) .pattern("""$ts.format("'data/'yyyy/MM/dd")""") .build() - val files = collector.collect(partitions) + val files = collector.glob(partitions) files.toSeq.sortBy(_.toString) should be (Seq( new Path(workingDirectory, "data/2016/01/04"), @@ -224,7 +257,7 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll )) } - it should "collect all files in given hourly range (1)" in { + it should "glob all files in given hourly range (1)" in { val firstDate = UtcTimestamp.of(2016, Month.JANUARY, 5, 1, 0) val lastDate = UtcTimestamp.of(2016, Month.JANUARY, 5, 2, 0) val range = RangeValue(firstDate.toString, lastDate.toString) @@ -235,14 +268,14 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll .path(workingDirectory) .pattern("""data/$ts.format("yyyy/MM/dd/HH'.seq'")""") .build() - val files = collector.collect(partitions) + val files = collector.glob(partitions) files.toSeq.sortBy(_.toString) should be (Seq( new Path(workingDirectory, "data/2016/01/05/01.seq") )) } - it should "collect all files in given hourly range (2)" in { + it should "glob all files in given hourly range (2)" in { val firstDate = UtcTimestamp.of(2016, Month.JANUARY, 3, 0, 0) val lastDate = UtcTimestamp.of(2016, Month.JANUARY, 5, 2, 0) val range = RangeValue(firstDate.toString, lastDate.toString) @@ -253,7 +286,7 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll .path(workingDirectory) .pattern("""data/$ts.format("yyyy/MM/dd/HH'.seq'")""") .build() - val files = collector.collect(partitions) + val files = collector.glob(partitions) files.toSeq.sortBy(_.toString) should be (Seq( new Path(workingDirectory, "data/2016/01/03/01.seq"), @@ -262,7 +295,7 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll )) } - it should "collect all files in given hourly range (3)" in { + it should "glob all files in given hourly range (3)" in { val firstDate = UtcTimestamp.of(2016, Month.JANUARY, 3, 0, 0) val lastDate = UtcTimestamp.of(2016, Month.JANUARY, 5, 3, 0) val range = RangeValue(firstDate.toString, lastDate.toString) @@ -273,7 +306,7 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll .path(workingDirectory) .pattern("""data/$ts.format("yyyy/MM/dd/HH'.seq'")""") .build() - val files = collector.collect(partitions) + val files = collector.glob(partitions) files.toSeq.sortBy(_.toString) should be (Seq( new Path(workingDirectory, "data/2016/01/03/01.seq"), @@ -283,7 +316,7 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll )) } - it should "collect unixtimestamps as well (1)" in { + it should "glob unixtimestamps as well (1)" in { val firstDate = UtcTimestamp.of(2017, Month.JUNE, 19, 0, 0) val lastDate = UtcTimestamp.of(2017, Month.JUNE, 19, 23, 59) val range = RangeValue(firstDate.toString, lastDate.toString) @@ -294,11 +327,12 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll .path(workingDirectory) .pattern("""data/$ts.format("yyyy/MM/dd")/${ts.toEpochSeconds()}.i-*.log""") .build() - val files = collector.collect(partitions) + val files = collector.glob(partitions) files.toSeq.sortBy(_.toString) should be (Seq( new Path(workingDirectory, "data/2017/06/19/1497830400.i-02255f88.rtb-imp.log"), new Path(workingDirectory, "data/2017/06/19/1497831300.i-02255f88.rtb-imp.log"), + new Path(workingDirectory, "data/2017/06/19/1497831300.i-02255f89.rtb-imp.log"), new Path(workingDirectory, "data/2017/06/19/1497832200.i-02255f88.rtb-imp.log"), new Path(workingDirectory, "data/2017/06/19/1497833100.i-02255f88.rtb-imp.log"), new Path(workingDirectory, "data/2017/06/19/1497834000.i-02255f88.rtb-imp.log"), @@ -306,7 +340,7 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll )) } - it should "collect unixtimestamps as well (2)" in { + it should "glob/collect unixtimestamps as well (2)" in { val firstDate = UtcTimestamp.of(2017, Month.JUNE, 19, 0, 15) val lastDate = UtcTimestamp.of(2017, Month.JUNE, 19, 0, 45) val range = RangeValue(firstDate.toString, lastDate.toString) @@ -317,15 +351,22 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll .path(workingDirectory) .pattern("""data/$ts.format("yyyy/MM/dd")/${ts.toEpochSeconds()}.i-*.log""") .build() - val files = collector.collect(partitions) - files.toSeq.sortBy(_.toString) should be (Seq( + val globbedFiles = collector.glob(partitions) + globbedFiles.toSeq.sortBy(_.toString) should be (Seq( new Path(workingDirectory, "data/2017/06/19/1497831300.i-02255f88.rtb-imp.log"), + new Path(workingDirectory, "data/2017/06/19/1497831300.i-02255f89.rtb-imp.log"), new Path(workingDirectory, "data/2017/06/19/1497832200.i-02255f88.rtb-imp.log") )) + + val collectedFiles = collector.collect(partitions) + collectedFiles.toSeq.sortBy(_.toString) should be (Seq( + new Path(workingDirectory, "data/2017/06/19/1497831300.i-*.log"), + new Path(workingDirectory, "data/2017/06/19/1497832200.i-*.log") + )) } - it should "collect unixtimestamps as well (3)" in { + it should "glob/collect unixtimestamps as well (3)" in { val firstDate = UtcTimestamp.of(2017, Month.JUNE, 19, 0, 15) val lastDate = UtcTimestamp.of(2017, Month.JUNE, 19, 0, 44) val range = RangeValue(firstDate.toString, lastDate.toString) @@ -336,9 +377,16 @@ class FileCollectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll .path(workingDirectory) .pattern("""data/$ts.format("yyyy/MM/dd")/${ts.toEpochSeconds()}.i-*.log""") .build() - val files = collector.collect(partitions) - files.size should be (1) - files.head.toString should be(workingDirectory.toString + "/data/2017/06/19/1497831300.i-02255f88.rtb-imp.log") + val globbedFiles = collector.glob(partitions) + globbedFiles.toSeq.sortBy(_.toString) should be(Seq( + new Path(workingDirectory, "data/2017/06/19/1497831300.i-02255f88.rtb-imp.log"), + new Path(workingDirectory, "data/2017/06/19/1497831300.i-02255f89.rtb-imp.log") + )) + + val collectedFiles = collector.collect(partitions) + collectedFiles.toSeq.sortBy(_.toString) should be(Seq( + new Path(workingDirectory, "data/2017/06/19/1497831300.i-*.log") + )) } } diff --git a/flowman-core/src/test/scala/com/dimajix/flowman/model/ResourceIdentifierTest.scala b/flowman-core/src/test/scala/com/dimajix/flowman/model/ResourceIdentifierTest.scala index 494bb4ca4..d53da98f8 100644 --- a/flowman-core/src/test/scala/com/dimajix/flowman/model/ResourceIdentifierTest.scala +++ b/flowman-core/src/test/scala/com/dimajix/flowman/model/ResourceIdentifierTest.scala @@ -135,9 +135,16 @@ class ResourceIdentifierTest extends AnyFlatSpec with Matchers { } it should "support local files" in { - ResourceIdentifier.ofLocal(new Path("/path/?/with/wildcard")) should be (GlobbingResourceIdentifier("local", "/path/?/with/wildcard")) - ResourceIdentifier.ofLocal(new Path("file:/path/?/with/wildcard")) should be (GlobbingResourceIdentifier("local", "/path/?/with/wildcard")) - ResourceIdentifier.ofLocal(new File("/path/?/with/wildcard")) should be (GlobbingResourceIdentifier("local", "/path/?/with/wildcard")) + if (System.getProperty("os.name").startsWith("Windows")) { + ResourceIdentifier.ofLocal(new Path("C:/path/?/with/wildcard")) should be(GlobbingResourceIdentifier("local", "/C:/path/?/with/wildcard")) + ResourceIdentifier.ofLocal(new Path("file:/C:/path/?/with/wildcard")) should be(GlobbingResourceIdentifier("local", "/C:/path/?/with/wildcard")) + ResourceIdentifier.ofLocal(new File("/C:/path/?/with/wildcard")) should be(GlobbingResourceIdentifier("local", "/C:/path/?/with/wildcard")) + } + else { + ResourceIdentifier.ofLocal(new Path("/path/?/with/wildcard")) should be(GlobbingResourceIdentifier("local", "/path/?/with/wildcard")) + ResourceIdentifier.ofLocal(new Path("file:/path/?/with/wildcard")) should be(GlobbingResourceIdentifier("local", "/path/?/with/wildcard")) + ResourceIdentifier.ofLocal(new File("/path/?/with/wildcard")) should be(GlobbingResourceIdentifier("local", "/path/?/with/wildcard")) + } } it should "support Hive databases" in { diff --git a/flowman-dist/bin/flowexec b/flowman-dist/bin/flowexec index 7be84c1ed..4a2f5e427 100755 --- a/flowman-dist/bin/flowexec +++ b/flowman-dist/bin/flowexec @@ -1,13 +1,13 @@ #!/usr/bin/env bash -basedir=$(readlink -f $(dirname $0)/..) -source $basedir/libexec/flowman-common.sh +basedir=$(readlink -f "$(dirname "$0")"/..) +source "$basedir"/libexec/flowman-common.sh APP_NAME="flowman-tools" APP_VERSION="${project.version}" APP_MAIN="com.dimajix.flowman.tools.exec.Driver" -APP_JAR=$FLOWMAN_HOME/lib/"$APP_NAME-$APP_VERSION.jar" +APP_JAR="$APP_NAME-$APP_VERSION.jar" LIB_JARS="${flowman-tools.classpath}" spark_submit $APP_JAR $LIB_JARS $APP_MAIN "$@" diff --git a/flowman-dist/bin/flowkernel b/flowman-dist/bin/flowkernel new file mode 100644 index 000000000..a58560755 --- /dev/null +++ b/flowman-dist/bin/flowkernel @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +basedir=$(readlink -f "$(dirname "$0")"/..) +source "$basedir"/libexec/flowman-common.sh + +APP_NAME="flowman-kernel" +APP_VERSION="${project.version}" +APP_MAIN="com.dimajix.flowman.kernel.Application" + +APP_JAR="$APP_NAME-$APP_VERSION.jar" +LIB_JARS="${flowman-kernel.classpath}" + +spark_submit $APP_JAR $LIB_JARS $APP_MAIN "$@" diff --git a/flowman-dist/bin/flowman b/flowman-dist/bin/flowman index 7a3c831b4..16eb8f5e5 100755 --- a/flowman-dist/bin/flowman +++ b/flowman-dist/bin/flowman @@ -1,13 +1,13 @@ #!/usr/bin/env bash -basedir=$(readlink -f $(dirname $0)/..) -source $basedir/libexec/flowman-common.sh +basedir=$(readlink -f "$(dirname "$0")"/..) +source "$basedir"/libexec/flowman-common.sh APP_NAME="flowman-tools" APP_VERSION="${project.version}" APP_MAIN="com.dimajix.flowman.tools.main.Driver" -APP_JAR=$FLOWMAN_HOME/lib/"$APP_NAME-$APP_VERSION.jar" +APP_JAR="$APP_NAME-$APP_VERSION.jar" LIB_JARS="${flowman-tools.classpath}" spark_submit $APP_JAR $LIB_JARS $APP_MAIN "$@" diff --git a/flowman-dist/bin/flowserver b/flowman-dist/bin/flowserver index af7fd6f85..5b2be1609 100755 --- a/flowman-dist/bin/flowserver +++ b/flowman-dist/bin/flowserver @@ -1,13 +1,13 @@ #!/usr/bin/env bash -basedir=$(readlink -f $(dirname $0)/..) -source $basedir/libexec/flowman-common.sh +basedir=$(readlink -f "$(dirname "$0")"/..) +source "$basedir"/libexec/flowman-common.sh APP_NAME="flowman-server" APP_VERSION="${project.version}" APP_MAIN="com.dimajix.flowman.server.Application" -APP_JAR=$FLOWMAN_HOME/lib/"$APP_NAME-$APP_VERSION.jar" +APP_JAR="$APP_NAME-$APP_VERSION.jar" LIB_JARS="${flowman-server.classpath}" spark_submit $APP_JAR $LIB_JARS $APP_MAIN "$@" diff --git a/flowman-dist/bin/flowshell b/flowman-dist/bin/flowshell index b7499bba2..2d677d6dd 100755 --- a/flowman-dist/bin/flowshell +++ b/flowman-dist/bin/flowshell @@ -1,13 +1,13 @@ #!/usr/bin/env bash -basedir=$(readlink -f $(dirname $0)/..) -source $basedir/libexec/flowman-common.sh +basedir=$(readlink -f "$(dirname "$0")"/..) +source "$basedir"/libexec/flowman-common.sh APP_NAME="flowman-tools" APP_VERSION="${project.version}" APP_MAIN="com.dimajix.flowman.tools.shell.Shell" -APP_JAR=$FLOWMAN_HOME/lib/"$APP_NAME-$APP_VERSION.jar" +APP_JAR="$APP_NAME-$APP_VERSION.jar" LIB_JARS="${flowman-tools.classpath}" spark_submit $APP_JAR $LIB_JARS $APP_MAIN "$@" diff --git a/flowman-dist/bin/flowshell.cmd b/flowman-dist/bin/flowshell.cmd new file mode 100644 index 000000000..3d389b66f --- /dev/null +++ b/flowman-dist/bin/flowshell.cmd @@ -0,0 +1,7 @@ +@echo off + +rem This is the entry point for running SparkR. To avoid polluting the +rem environment, it just launches a new cmd to do the real work. + +rem The outermost quotes are used to prevent Windows command line parse error rem when there are some quotes in parameters +cmd /V /E /C ""%~dp0flowshell2.cmd" %*" diff --git a/flowman-dist/bin/flowshell2.cmd b/flowman-dist/bin/flowshell2.cmd new file mode 100644 index 000000000..3ed30df2e --- /dev/null +++ b/flowman-dist/bin/flowshell2.cmd @@ -0,0 +1,12 @@ +@echo off + +call "%~dp0../libexec/flowman-common.cmd" + +SET APP_NAME=flowman-tools +SET APP_VERSION=${project.version} +SET APP_MAIN=com.dimajix.flowman.tools.shell.Shell + +SET APP_JAR=%APP_NAME%-%APP_VERSION%.jar +SET LIB_JARS="${flowman-tools.classpath}" + +call "%~dp0../libexec/flowman-launch.cmd" %* diff --git a/flowman-dist/bin/flowstudio b/flowman-dist/bin/flowstudio new file mode 100644 index 000000000..121b7f541 --- /dev/null +++ b/flowman-dist/bin/flowstudio @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +basedir=$(readlink -f "$(dirname "$0")"/..) +source "$basedir"/libexec/flowman-common.sh + +APP_NAME="flowman-studio" +APP_VERSION="${project.version}" +APP_MAIN="com.dimajix.flowman.studio.Application" + +APP_JAR="$(flowman_lib $APP_NAME-$APP_VERSION.jar)" +LIB_JARS="$(flowman_lib ${flowman-studio.classpath})" + +spark_submit $APP_JAR $LIB_JARS $APP_MAIN "$@" diff --git a/flowman-dist/conf/default-namespace.yml.template b/flowman-dist/conf/default-namespace.yml.template index 9e08ec20d..8ed41970c 100644 --- a/flowman-dist/conf/default-namespace.yml.template +++ b/flowman-dist/conf/default-namespace.yml.template @@ -39,6 +39,7 @@ plugins: - flowman-kafka - flowman-mariadb - flowman-mysql + - flowman-mssqlserver - flowman-swagger - flowman-json diff --git a/flowman-dist/conf/flowman-env.cmd.template b/flowman-dist/conf/flowman-env.cmd.template new file mode 100644 index 000000000..7cf3aee0d --- /dev/null +++ b/flowman-dist/conf/flowman-env.cmd.template @@ -0,0 +1,41 @@ +@echo off + +rem Specify Java home (just in case) +rem set JAVA_HOME= + +rem Explicitly override Flowmans home. These settings are detected automatically, but can be overridden +rem set FLOWMAN_HOME= +rem set FLOWMAN_CONF_DIR= + +rem Specify any environment settings and paths +rem set SPARK_HOME= +rem set HADOOP_HOME= +rem set HADOOP_CONF_DIR=%HADOOP_HOME%/conf +rem set YARN_HOME= +rem set HDFS_HOME= +rem set MAPRED_HOME= +rem set HIVE_HOME= +rem set HIVE_CONF_DIR=%HIVE_HOME%/conf + +rem Set the Kerberos principal in YARN cluster +rem set KRB_PRINCIPAL= +rem set KRB_KEYTAB= + +rem Specify the YARN queue to use +rem set YARN_QUEUE= + +rem Specify Spark master +rem set SPARK_MASTER= + +rem Resource configuration. This probably should be overridden either in the application or via the corresponding +rem Spark config variables in an "environment" section. +rem set SPARK_EXECUTOR_CORES=4 +rem set SPARK_EXECUTOR_MEMORY=8G +rem set SPARK_DRIVER_CORES=2 +rem set SPARK_DRIVER_MEMORY=3G + +rem Specify additional Spark options to be passed to spark-submit +rem set SPARK_OPTS= + +rem Use a different spark-submit (for example spark2-submit in Cloudera) +rem set SPARK_SUBMIT= diff --git a/flowman-dist/conf/flowman-env.sh.template b/flowman-dist/conf/flowman-env.sh.template index 4ffb3124b..08ce85dfe 100644 --- a/flowman-dist/conf/flowman-env.sh.template +++ b/flowman-dist/conf/flowman-env.sh.template @@ -64,8 +64,8 @@ fi https_proxy_host=$(echo $https_proxy | sed 's#.*//\([^:]*\).*#\1#') && \ https_proxy_port=$(echo $https_proxy | sed 's#.*//[^:]*:\([0-9]*\)#\1#') && \ if [[ "$https_proxy_host" != "" ]]; then - SPARK_DRIVER_JAVA_OPTS="-Dhttps.proxyHost=${http_proxy_host} -Dhttps.proxyPort=${http_proxy_port} $SPARK_DRIVER_JAVA_OPTS" - SPARK_EXECUTOR_JAVA_OPTS="-Dhttps.proxyHost=${http_proxy_host} -Dhttps.proxyPort=${http_proxy_port} $SPARK_EXECUTOR_JAVA_OPTS" + SPARK_DRIVER_JAVA_OPTS="-Dhttps.proxyHost=${https_proxy_host} -Dhttps.proxyPort=${https_proxy_port} $SPARK_DRIVER_JAVA_OPTS" + SPARK_EXECUTOR_JAVA_OPTS="-Dhttps.proxyHost=${https_proxy_host} -Dhttps.proxyPort=${https_proxy_port} $SPARK_EXECUTOR_JAVA_OPTS" fi # Set AWS credentials if required. You can also specify these in project config diff --git a/flowman-dist/libexec/flowman-common.cmd b/flowman-dist/libexec/flowman-common.cmd new file mode 100644 index 000000000..e950ed0dd --- /dev/null +++ b/flowman-dist/libexec/flowman-common.cmd @@ -0,0 +1,65 @@ +@echo off + +rem Set Flowman directories +if "x%FLOWMAN_HOME%"=="x" ( + call :NORMALIZEPATH FLOWMAN_HOME %~dp0\.. +) +if "x%FLOWMAN_CONF_DIR%"=="x" ( + set FLOWMAN_CONF_DIR=%FLOWMAN_HOME%\conf +) + +rem Load environment file if present +if exist "%FLOWMAN_CONF_DIR%\flowman-env.cmd" ( + call "%FLOWMAN_CONF_DIR%\flowman-env.cmd" +) + +if exist "%HADOOP_HOME%\etc\hadoop\hadoop-env.cmd" ( + call "%HADOOP_HOME%\etc\hadoop\hadoop-env.cmd" +) + +rem Set basic Spark options +if "x%SPARK_SUBMIT%"=="x" ( + set SPARK_SUBMIT="%SPARK_HOME%\bin\spark-submit.cmd" +) +if "x%SPARK_OPTS%"=="x" ( + set SPARK_OPTS= +) +if "x%SPARK_DRIVER_JAVA_OPTS%"=="x" ( + set SPARK_DRIVER_JAVA_OPTS=-server +) +if "x%SPARK_EXECUTOR_JAVA_OPTS%"=="x" ( + set SPARK_EXECUTOR_JAVA_OPTS=-server +) + + +rem Add Optional settings to SPARK_OPTS +if not "x%KRB_PRINCIPAL%" == "x" ( + set SPARK_OPTS=--principal %KRB_PRINCIPAL% --keytab %KRB_KEYTAB% %SPARK_OPTS% +) +if not "x%YARN_QUEUE%" == "x" ( + set SPARK_OPTS=--queue %YARN_QUEUE% %SPARK_OPTS% +) +if not "x%SPARK_MASTER%" == "x" ( + set SPARK_OPTS=--master %SPARK_MASTER% %SPARK_OPTS% +) +if not "x%SPARK_EXECUTOR_CORES%" == "x" ( + set SPARK_OPTS=--executor-cores %SPARK_EXECUTOR_CORES% %SPARK_OPTS% +) +if not "x%SPARK_EXECUTOR_MEMORY%" == "x" ( + set SPARK_OPTS=--executor-memory %SPARK_EXECUTOR_MEMORY% %SPARK_OPTS% +) +if not "x%SPARK_DRIVER_CORES%" == "x" ( + set SPARK_OPTS=--driver-cores %SPARK_DRIVER_CORES% %SPARK_OPTS% +) +if not "x%SPARK_DRIVER_MEMORY%" == "x" ( + set SPARK_OPTS=--driver-memory %SPARK_DRIVER_MEMORY% %SPARK_OPTS% +) + + +:: ========== FUNCTIONS ========== +exit /b + +:NORMALIZEPATH + set %~1=%~f2 + goto :eof + diff --git a/flowman-dist/libexec/flowman-common.sh b/flowman-dist/libexec/flowman-common.sh index 1a2bf6050..940cf0b07 100644 --- a/flowman-dist/libexec/flowman-common.sh +++ b/flowman-dist/libexec/flowman-common.sh @@ -5,16 +5,16 @@ export FLOWMAN_HOME=${FLOWMAN_HOME=$(readlink -f $(dirname $0)/..)} export FLOWMAN_CONF_DIR=${FLOWMAN_CONF_DIR=$FLOWMAN_HOME/conf} # Load environment file if present -if [ -f $FLOWMAN_CONF_DIR/flowman-env.sh ]; then - source $FLOWMAN_CONF_DIR/flowman-env.sh +if [ -f "$FLOWMAN_CONF_DIR/flowman-env.sh" ]; then + source "$FLOWMAN_CONF_DIR/flowman-env.sh" fi -if [ -f $HADOOP_HOME/etc/hadoop/hadoop-env.sh ]; then - source $HADOOP_HOME/etc/hadoop/hadoop-env.sh +if [ -f "$HADOOP_HOME/etc/hadoop/hadoop-env.sh" ]; then + source "$HADOOP_HOME/etc/hadoop/hadoop-env.sh" fi # Set basic Spark options -: ${SPARK_SUBMIT:=$SPARK_HOME/bin/spark-submit} +: ${SPARK_SUBMIT:="$SPARK_HOME"/bin/spark-submit} : ${SPARK_OPTS:=""} : ${SPARK_DRIVER_JAVA_OPTS:="-server"} : ${SPARK_EXECUTOR_JAVA_OPTS:="-server"} @@ -23,13 +23,13 @@ fi # Build Spark dist classpath if [ "$SPARK_DIST_CLASSPATH" = "" ]; then if [ -d "$HADOOP_HOME" ]; then - export SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:$HADOOP_HOME/*.jar:$HADOOP_HOME/lib/*.jar" + export SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:"$HADOOP_HOME"/*.jar:$HADOOP_HOME/lib/*.jar" fi if [ -d "$HADOOP_CONF_DIR" ]; then - export SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:$HADOOP_CONF_DIR/*" + export SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:"$HADOOP_CONF_DIR"/*" fi if [ -d "$HADOOP_HOME/share/hadoop/common" ]; then - export SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:$HADOOP_HOME/share/hadoop/common/*.jar:$HADOOP_HOME/share/hadoop/common/lib/*.jar" + export SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:"$HADOOP_HOME"/share/hadoop/common/*.jar:$HADOOP_HOME/share/hadoop/common/lib/*.jar" fi if [ -d "$YARN_HOME" ]; then @@ -74,12 +74,18 @@ if [ "$SPARK_DRIVER_MEMORY" != "" ]; then fi + +flowman_lib() { + echo $1 | awk -F, '{for(i=1;i<=NF;i++) printf("%s%s",ENVIRON["FLOWMAN_HOME"]"/"$i,(i com.dimajix.flowman flowman-root - 0.16.0 - .. + 0.17.0 + ../pom.xml @@ -28,7 +28,7 @@ ${project.build.directory} - target/flowman-server-${project.version}-properties.properties,target/flowman-tools-${project.version}-properties.properties + target/flowman-server-${project.version}-properties.properties,target/flowman-tools-${project.version}-properties.properties,target/flowman-studio-${project.version}-properties.properties,target/flowman-kernel-${project.version}-properties.properties . @@ -143,6 +143,14 @@ bin ${project.build.directory} + + com.dimajix.flowman + flowman-plugin-mssqlserver + ${project.version} + tar.gz + bin + ${project.build.directory} + @@ -170,6 +178,22 @@ properties ${project.build.directory} + + com.dimajix.flowman + flowman-studio + ${project.version} + properties + properties + ${project.build.directory} + + + com.dimajix.flowman + flowman-kernel + ${project.version} + properties + properties + ${project.build.directory} + @@ -206,6 +230,14 @@ com.dimajix.flowman flowman-server + + com.dimajix.flowman + flowman-kernel + + + com.dimajix.flowman + flowman-studio + diff --git a/flowman-dist/src/main/assembly/assembly.xml b/flowman-dist/src/main/assembly/assembly.xml index e1dfb7ef4..377ca771b 100644 --- a/flowman-dist/src/main/assembly/assembly.xml +++ b/flowman-dist/src/main/assembly/assembly.xml @@ -38,6 +38,8 @@ com.dimajix.flowman:flowman-tools com.dimajix.flowman:flowman-server + com.dimajix.flowman:flowman-kernel + com.dimajix.flowman:flowman-studio diff --git a/flowman-dsl/pom.xml b/flowman-dsl/pom.xml index a2b54c637..8ed9957ff 100644 --- a/flowman-dsl/pom.xml +++ b/flowman-dsl/pom.xml @@ -9,8 +9,8 @@ flowman-root com.dimajix.flowman - 0.16.0 - .. + 0.17.0 + ../pom.xml diff --git a/flowman-kernel/.gitignore b/flowman-kernel/.gitignore new file mode 100644 index 000000000..b83d22266 --- /dev/null +++ b/flowman-kernel/.gitignore @@ -0,0 +1 @@ +/target/ diff --git a/flowman-kernel/pom.xml b/flowman-kernel/pom.xml new file mode 100644 index 000000000..4a11cd415 --- /dev/null +++ b/flowman-kernel/pom.xml @@ -0,0 +1,152 @@ + + + 4.0.0 + flowman-kernel + Flowman Kernel + + + flowman-root + com.dimajix.flowman + 0.17.0 + ../pom.xml + + + + + + net.alchim31.maven + scala-maven-plugin + + + org.scalatest + scalatest-maven-plugin + + + + org.apache.maven.plugins + maven-dependency-plugin + + + initialize + + build-classpath + + + runtime + flowman-kernel.classpath + false + / + , + lib + + json,org.everit.json.schema,velocity-engine-core + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + copy-resources + process-resources + + copy-resources + + + ${project.build.directory}/properties + + + src/main/properties + + **/* + + true + + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + attach-artifacts + package + + attach-artifact + + + + + ${project.build.directory}/properties/flowman-kernel.properties + properties + properties + + + + + + + + + + + + com.dimajix.flowman + flowman-tools + + + + com.dimajix.flowman + flowman-scalatest-compat + + + + org.apache.hadoop + hadoop-client + + + + org.apache.spark + spark-sql_${scala.api_version} + + + + log4j + log4j + + + + args4j + args4j + + + + com.typesafe.akka + akka-http_${scala.api_version} + + + com.typesafe.akka + akka-http-spray-json_${scala.api_version} + + + com.github.swagger-akka-http + swagger-akka-http_${scala.api_version} + + + org.webjars + swagger-ui + + + + org.scalatest + scalatest_${scala.api_version} + + + diff --git a/flowman-kernel/src/main/properties/flowman-kernel.properties b/flowman-kernel/src/main/properties/flowman-kernel.properties new file mode 100644 index 000000000..8fb8b6415 --- /dev/null +++ b/flowman-kernel/src/main/properties/flowman-kernel.properties @@ -0,0 +1 @@ +flowman-kernel.classpath=${flowman-kernel.classpath} diff --git a/flowman-kernel/src/main/resources/com/dimajix/flowman/kernel/flowman-kernel.properties b/flowman-kernel/src/main/resources/com/dimajix/flowman/kernel/flowman-kernel.properties new file mode 100644 index 000000000..7ff604ff0 --- /dev/null +++ b/flowman-kernel/src/main/resources/com/dimajix/flowman/kernel/flowman-kernel.properties @@ -0,0 +1 @@ +kernel.server.request.timeout=120 diff --git a/flowman-kernel/src/main/resources/swagger/index.html b/flowman-kernel/src/main/resources/swagger/index.html new file mode 100644 index 000000000..2527a53c6 --- /dev/null +++ b/flowman-kernel/src/main/resources/swagger/index.html @@ -0,0 +1,95 @@ + + + + + + Swagger UI + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/Application.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/Application.scala new file mode 100644 index 000000000..256e4957c --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/Application.scala @@ -0,0 +1,95 @@ +/* + * Copyright 2019 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel + +import scala.util.Failure +import scala.util.Success +import scala.util.Try + +import org.kohsuke.args4j.CmdLineException + +import com.dimajix.flowman.common.Logging +import com.dimajix.flowman.kernel.rest.Server +import com.dimajix.flowman.spec.splitSettings +import com.dimajix.flowman.tools.Tool + + +object Application { + def main(args: Array[String]) : Unit = { + Logging.init() + Try { + run(args:_*) + } + match { + case Success (true) => + System.exit(0) + case Success (false) => + System.exit(1) + case Failure(ex:CmdLineException) => + System.err.println(ex.getMessage) + ex.getParser.printUsage(System.err) + System.err.println + System.exit(1) + case Failure(exception) => + exception.printStackTrace(System.err) + System.exit(1) + } + } + + def run(args: String*) : Boolean = { + val options = new Arguments(args.toArray) + // Check if only help is requested + if (options.help) { + options.printHelp(System.out) + true + } + else { + Logging.setSparkLogging(options.sparkLogging) + + val server = new Application(options) + server.run() + } + } +} + + +class Application(options:Arguments) extends Tool { + def run() : Boolean = { + val config = splitSettings(options.config) + val environment = splitSettings(options.environment) + val session = createSession( + options.sparkMaster, + options.sparkName, + additionalConfigs = config.toMap, + additionalEnvironment = environment.toMap, + profiles = options.profiles + ) + + val conf = Configuration.loadDefaults() + .setBindHost(options.bindHost) + .setBindPort(options.bindPort) + .setKernelId(options.kernelId) + .setStudioUrl(options.studioUrl) + .setStudioSecret(options.kernelSecret) + val server = new Server(conf, session) + server.run() + + session.shutdown() + + true + } +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/Arguments.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/Arguments.scala new file mode 100644 index 000000000..721ce454d --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/Arguments.scala @@ -0,0 +1,87 @@ +/* + * Copyright 2018 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel + +import java.io.PrintStream +import java.net.URL + +import scala.collection.JavaConverters._ + +import org.kohsuke.args4j.Argument +import org.kohsuke.args4j.CmdLineParser +import org.kohsuke.args4j.Option +import org.kohsuke.args4j.spi.SubCommand +import org.kohsuke.args4j.spi.SubCommandHandler +import org.kohsuke.args4j.spi.SubCommands + +import com.dimajix.flowman.tools.exec.info.InfoCommand +import com.dimajix.flowman.tools.exec.job.JobCommand +import com.dimajix.flowman.tools.exec.mapping.MappingCommand +import com.dimajix.flowman.tools.exec.model.ModelCommand +import com.dimajix.flowman.tools.exec.namespace.NamespaceCommand +import com.dimajix.flowman.tools.exec.project.ProjectCommand +import com.dimajix.flowman.tools.exec.target.TargetCommand +import com.dimajix.flowman.tools.exec.test.TestCommand + + +class Arguments(args:Array[String]) { + @Option(name = "-h", aliases=Array("--help"), usage = "show help", help=true) + var _help: Boolean = false + @Option(name = "-P", aliases=Array("--profile"), usage = "activate profile with specified name", metaVar = "") + var profiles: Array[String] = Array() + @Option(name = "-D", aliases=Array("--env"), usage = "set environment variables which can be accessed inside config", metaVar = "") + var environment: Array[String] = Array() + @Option(name = "--conf", usage = "set a Flowman or Spark config", metaVar = "=") + var config: Array[String] = Array() + @Option(name = "--bind-host", usage = "set the host to bind the REST API to", metaVar = "") + var bindHost: String = "0.0.0.0" + @Option(name = "--bind-port", usage = "set the port to bind the REST API to. Use 0 for random port.", metaVar = "") + var bindPort: Int = 8080 + @Option(name = "--kernel-id", usage = "set the kernel ID used for registration", metaVar = "") + var kernelId: String = "" + @Option(name = "--kernel-secret", usage = "set the secret to use for communication with the Flowman Studio server", metaVar = "") + var kernelSecret: String = "" + @Option(name = "--studio-url", usage = "set the URL to register to", metaVar = "") + var studioUrl: String = "" + @Option(name = "--spark-master", usage = "set the master for Spark", metaVar = "") + var sparkMaster: String = "" + @Option(name = "--spark-logging", usage = "set the log level for Spark", metaVar = "") + var sparkLogging: String = "WARN" + @Option(name = "--spark-name", usage = "set the Spark application name", metaVar = "") + var sparkName: String = "Flowman Kernel" + + /** + * Returns true if a help message is requested + * @return + */ + def help : Boolean = _help + + /** + * Prints a context-aware help message + */ + def printHelp(out:PrintStream = System.err) : Unit = { + new CmdLineParser(this).printUsage(out) + out.println + } + + parseArgs(args) + + private def parseArgs(args: Array[String]) { + val parser: CmdLineParser = new CmdLineParser(this) + parser.parseArgument(args.toList.asJava) + } +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/Configuration.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/Configuration.scala new file mode 100644 index 000000000..f47f3bce7 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/Configuration.scala @@ -0,0 +1,107 @@ +/* + * Copyright 2019 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel + +import java.io.FileInputStream +import java.net.URL +import java.util.Properties + + +object Configuration { + val SERVER_BIND_HOST = "kernel.server.bind.host" + val SERVER_BIND_PORT = "kernel.server.bind.port" + val SERVER_REQUEST_TIMEOUT = "kernel.server.request.timeout" + val SERVER_IDLE_TIMEOUT = "kernel.server.idle.timeout" + val SERVER_BIND_TIMEOUT = "kernel.server.bind.timeout" + val SERVER_LINGER_TIMEOUT = "kernel.server.linger.timeout" + val KERNEL_ID = "kernel.server.id" + val STUDIO_URL = "kernel.studio.url" + val STUDIO_SECRET = "kernel.studio.secret" + + private def defaultProperties() : Properties = { + val loader = Thread.currentThread.getContextClassLoader + val url = loader.getResource("com/dimajix/flowman/kernel/flowman-kernel.properties") + val properties = new Properties() + properties.load(url.openStream()) + properties + } + + /** + * Load a Configuration from a Properties file + * @param filename + * @return + */ + def load(filename:String) : Configuration= { + val properties = defaultProperties() + properties.load(new FileInputStream(filename)) + new Configuration(properties) + } + + /** + * Loads built-in default configuration + * @return + */ + def loadDefaults() : Configuration = { + val properties = defaultProperties() + new Configuration(properties) + } +} + +class Configuration(properties: Properties) { + import Configuration._ + + def setBindHost(host: String) : Configuration = { + properties.setProperty(SERVER_BIND_HOST, host) + this + } + def setBindPort(port: Int) : Configuration = { + properties.setProperty(SERVER_BIND_PORT, port.toString) + this + } + def setKernelId(id: String) : Configuration = { + properties.setProperty(KERNEL_ID, id) + this + } + def setStudioUrl(url: String) : Configuration = { + properties.setProperty(STUDIO_URL, url) + this + } + def setStudioSecret(secret: String) : Configuration = { + properties.setProperty(STUDIO_SECRET, secret) + this + } + + def getBindHost() : String = properties.getProperty(SERVER_BIND_HOST, "0.0.0.0") + def getBindPort() : Int = properties.getProperty(SERVER_BIND_PORT, "8080").toInt + + def getRequestTimeout() : Int = properties.getProperty(SERVER_REQUEST_TIMEOUT, "20").toInt + def getIdleTimeout() : Int = properties.getProperty(SERVER_IDLE_TIMEOUT, "60").toInt + def getBindTimeout() : Int = properties.getProperty(SERVER_BIND_TIMEOUT, "1").toInt + def getLingerTimeout() : Int = properties.getProperty(SERVER_LINGER_TIMEOUT, "60").toInt + + def getKernelId() : String = properties.getProperty(KERNEL_ID, "") + + def getStudioUrl() : Option[URL] = { + Some(properties.getProperty(STUDIO_URL, "http://localhost:8080").trim) + .filter(_.nonEmpty) + .map (new URL(_)) + } + def getStudioSecret() : Option[String] = { + Some(properties.getProperty(STUDIO_SECRET, "").trim) + .filter(_.nonEmpty) + } +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Converter.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Converter.scala new file mode 100644 index 000000000..ce42c5f0c --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Converter.scala @@ -0,0 +1,114 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.model + +import com.dimajix.flowman.kernel.service +import com.dimajix.flowman.model + + +object Converter { + def of(ns:model.Namespace) : Namespace = { + Namespace( + ns.name, + ns.environment, + ns.config, + ns.profiles.keys.toSeq, + ns.connections.keys.toSeq, + ns.plugins + ) + } + + def of(project:model.Project) : Project = { + Project( + project.name, + project.version, + project.description, + project.filename.map(_.toString), + project.basedir.map(_.toString), + project.environment, + project.config, + project.profiles.keys.toSeq, + project.connections.keys.toSeq, + project.jobs.keys.toSeq, + project.targets.keys.toSeq + ) + } + + def of(job:model.Job) : Job = { + Job( + job.name, + job.description, + job.targets.map(_.toString), + job.parameters.map(_.name), + job.environment + ) + } + + def of(test:model.Test) : Test = { + Test( + test.name, + test.description, + test.labels + ) + } + + def of(target:model.Target) : Target = { + Target( + target.name, + target.kind, + target.labels, + target.before.map(_.toString), + target.after.map(_.toString) + ) + } + + def of(mapping:model.Mapping) : Mapping = { + Mapping( + mapping.name, + mapping.kind, + mapping.broadcast, + mapping.cache.description, + mapping.checkpoint, + mapping.inputs.map(_.toString), + mapping.outputs, + mapping.labels + ) + } + + def of(relation:model.Relation) : Relation = { + Relation( + relation.name, + relation.kind, + relation.labels + ) + } + + def of(job:service.JobTask) : JobTask = { + JobTask( + job.id, + job.job.identifier.toString, + job.phase.toString, + job.lifecycle.map(_.toString), + job.rawArgs, + job.force, + job.keepGoing, + job.dryRun, + job.status.toString + ) + } + +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Job.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Job.scala new file mode 100644 index 000000000..2dc39aaa6 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Job.scala @@ -0,0 +1,41 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.model + + +case class Job( + name:String, + description:Option[String], + targets:Seq[String], + parameters:Seq[String], + environment:Map[String,String] +) + + +case class JobList( + jobs:Seq[String] +) + +case class RunJobRequest( + job:String, + phase:String, + args:Map[String,String], + force:Boolean, + keepGoing:Boolean, + dryRun:Boolean +) + diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/JsonSupport.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/JsonSupport.scala new file mode 100644 index 000000000..28218e9af --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/JsonSupport.scala @@ -0,0 +1,69 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.model + +import java.time.ZonedDateTime +import java.time.format.DateTimeFormatter + +import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport +import spray.json.DefaultJsonProtocol +import spray.json.DeserializationException +import spray.json.JsString +import spray.json.JsValue +import spray.json.JsonFormat +import spray.json.RootJsonFormat + + +trait JsonSupport extends DefaultJsonProtocol with SprayJsonSupport { + implicit object ZonedDateTimeFormat extends JsonFormat[ZonedDateTime] { + final val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss") + def write(value:ZonedDateTime) : JsString = { + JsString(value.format(formatter)) + } + def read(value:JsValue) : ZonedDateTime = { + value match { + case JsString(dt) => ZonedDateTime.parse(dt, formatter) + case _ => throw DeserializationException("Not a boolean") + } + } + } + + implicit val kernelRegistrationRequestFormat: RootJsonFormat[KernelRegistrationRequest] = jsonFormat2(KernelRegistrationRequest) + implicit val statusFormat: RootJsonFormat[Status] = jsonFormat1(Status) + implicit val namespaceFormat: RootJsonFormat[Namespace] = jsonFormat6(Namespace) + implicit val projectFormat: RootJsonFormat[Project] = jsonFormat11(Project) + implicit val projectListFormat: RootJsonFormat[ProjectList] = jsonFormat1(ProjectList) + implicit val jobFormat: RootJsonFormat[Job] = jsonFormat5(Job) + implicit val jobListFormat: RootJsonFormat[JobList] = jsonFormat1(JobList) + implicit val jobTaskFormat: RootJsonFormat[JobTask] = jsonFormat9(JobTask) + implicit val runJobRequestFormat: RootJsonFormat[RunJobRequest] = jsonFormat6(RunJobRequest) + implicit val testFormat: RootJsonFormat[Test] = jsonFormat3(Test) + implicit val testListFormat: RootJsonFormat[TestList] = jsonFormat1(TestList) + implicit val targetFormat: RootJsonFormat[Target] = jsonFormat5(Target) + implicit val targetListFormat: RootJsonFormat[TargetList] = jsonFormat1(TargetList) + implicit val mappingFormat: RootJsonFormat[Mapping] = jsonFormat8(Mapping) + implicit val mappingListFormat: RootJsonFormat[MappingList] = jsonFormat1(MappingList) + implicit val relationFormat: RootJsonFormat[Relation] = jsonFormat3(Relation) + implicit val relationListFormat: RootJsonFormat[RelationList] = jsonFormat1(RelationList) + implicit val sessionFormat: RootJsonFormat[Session] = jsonFormat5(Session) + implicit val createSessionRequestFormat: RootJsonFormat[CreateSessionRequest] = jsonFormat2(CreateSessionRequest) + implicit val sessionListFormat: RootJsonFormat[SessionList] = jsonFormat1(SessionList) +} + + +object JsonSupport extends JsonSupport { +} diff --git a/flowman-server/src/main/scala/com/dimajix/flowman/server/executor/InProcessExecutor.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Kernel.scala similarity index 79% rename from flowman-server/src/main/scala/com/dimajix/flowman/server/executor/InProcessExecutor.scala rename to flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Kernel.scala index b02e902ae..6d9590008 100644 --- a/flowman-server/src/main/scala/com/dimajix/flowman/server/executor/InProcessExecutor.scala +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Kernel.scala @@ -1,5 +1,5 @@ /* - * Copyright 2019 Kaya Kupferschmidt + * Copyright 2021 Kaya Kupferschmidt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,9 @@ * limitations under the License. */ -package com.dimajix.flowman.server.executor +package com.dimajix.flowman.kernel.model -class InProcessExecutor { - -} +case class KernelRegistrationRequest( + id:String, + url:String +) diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Mapping.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Mapping.scala new file mode 100644 index 000000000..9329cb1e4 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Mapping.scala @@ -0,0 +1,32 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.model + +case class Mapping( + name:String, + kind:String, + broadcast:Boolean, + cache:String, + checkpoint:Boolean, + inputs:Seq[String], + outputs:Seq[String], + labels:Map[String,String] +) + +case class MappingList( + mappings:Seq[String] +) diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Namespace.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Namespace.scala new file mode 100644 index 000000000..27bb8f300 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Namespace.scala @@ -0,0 +1,28 @@ +/* + * Copyright 2019 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.model + + +case class Namespace( + name:String, + environment: Map[String,String], + config: Map[String,String], + profiles: Seq[String], + connections: Seq[String], + plugins: Seq[String] +) { +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Project.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Project.scala new file mode 100644 index 000000000..411c49f94 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Project.scala @@ -0,0 +1,36 @@ +/* + * Copyright 2019 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.model + + +case class Project( + name:String, + version:Option[String], + description: Option[String], + filename : Option[String], + basedir : Option[String], + environment: Map[String,String], + config: Map[String,String], + profiles: Seq[String], + connections: Seq[String], + jobs: Seq[String], + targets: Seq[String] +) + +case class ProjectList( + projects:Seq[Project] +) diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Relation.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Relation.scala new file mode 100644 index 000000000..b26200d63 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Relation.scala @@ -0,0 +1,27 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.model + +case class Relation( + name:String, + kind:String, + labels:Map[String,String] +) + +case class RelationList( + relations:Seq[String] +) diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Session.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Session.scala new file mode 100644 index 000000000..e20a53bce --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Session.scala @@ -0,0 +1,36 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.model + + +case class Session( + id: String, + namespace: String, + project: String, + config: Map[String,String] = Map(), + environment: Map[String,String] = Map() +) + +case class SessionList( + sessions:Seq[Session] +) + + +case class CreateSessionRequest( + projectName:Option[String], + projectPath:Option[String] +) diff --git a/flowman-server/src/main/scala/com/dimajix/flowman/server/executor/SshExecutor.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Status.scala similarity index 82% rename from flowman-server/src/main/scala/com/dimajix/flowman/server/executor/SshExecutor.scala rename to flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Status.scala index d1c10b1f1..3be52fd72 100644 --- a/flowman-server/src/main/scala/com/dimajix/flowman/server/executor/SshExecutor.scala +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Status.scala @@ -1,5 +1,5 @@ /* - * Copyright 2019 Kaya Kupferschmidt + * Copyright 2021 Kaya Kupferschmidt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,7 @@ * limitations under the License. */ -package com.dimajix.flowman.server.executor - -class SshExecutor { +package com.dimajix.flowman.kernel.model +sealed case class Status(status:String) { } diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Target.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Target.scala new file mode 100644 index 000000000..7897c1822 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Target.scala @@ -0,0 +1,30 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.model + +case class Target( + name:String, + kind:String, + labels:Map[String,String], + before:Seq[String], + after:Seq[String] +) + + +case class TargetList( + targets:Seq[String] +) diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Task.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Task.scala new file mode 100644 index 000000000..1aadf1fbd --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Task.scala @@ -0,0 +1,33 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.model + + +abstract class Task + +case class JobTask( + id:String, + job:String, + phase:String, + lifecycle:Seq[String], + args:Map[String,String], + force:Boolean, + keepGoing:Boolean, + dryRun:Boolean, + status:String +) extends Task { +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Test.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Test.scala new file mode 100644 index 000000000..0c2162189 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/model/Test.scala @@ -0,0 +1,28 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.model + +case class Test( + name:String, + description:Option[String], + labels:Map[String,String] +) + + +case class TestList( + tests:Seq[String] +) diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/JobEndpoint.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/JobEndpoint.scala new file mode 100644 index 000000000..ff9280691 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/JobEndpoint.scala @@ -0,0 +1,143 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.rest + +import scala.util.Failure +import scala.util.Success +import scala.util.Try + +import akka.http.scaladsl.model.HttpResponse +import akka.http.scaladsl.model.StatusCodes +import akka.http.scaladsl.server +import io.swagger.annotations.Api +import io.swagger.annotations.ApiImplicitParam +import io.swagger.annotations.ApiImplicitParams +import io.swagger.annotations.ApiOperation +import io.swagger.annotations.ApiParam +import io.swagger.annotations.ApiResponse +import io.swagger.annotations.ApiResponses +import javax.ws.rs.GET +import javax.ws.rs.POST +import javax.ws.rs.Path + +import com.dimajix.flowman.execution.Phase +import com.dimajix.flowman.kernel.model.Converter +import com.dimajix.flowman.kernel.model.Job +import com.dimajix.flowman.kernel.model.JobList +import com.dimajix.flowman.kernel.model.RunJobRequest +import com.dimajix.flowman.kernel.model.JobTask +import com.dimajix.flowman.kernel.service.SessionService +import com.dimajix.flowman.model + + +@Api(value = "job", produces = "application/json", consumes = "application/json") +@Path("/session/{session}/job") +@ApiImplicitParams(Array( + new ApiImplicitParam(name = "session", value = "Session ID", required = true, dataType = "string", paramType = "path") +)) +@ApiResponses(Array( + new ApiResponse(code = 404, message = "Session or job not found"), + new ApiResponse(code = 500, message = "Internal server error") +)) +class JobEndpoint { + import akka.http.scaladsl.server.Directives._ + + import com.dimajix.flowman.kernel.model.JsonSupport._ + + def routes(session:SessionService) : server.Route = pathPrefix("job") {( + pathEndOrSingleSlash { + redirectToNoTrailingSlashIfPresent(StatusCodes.Found) { + listJobs(session) + } + } + ~ + pathPrefix(Segment) { jobName => + ( + pathEndOrSingleSlash { + redirectToNoTrailingSlashIfPresent(StatusCodes.Found) { + getJob(session, jobName) + } + } + ~ + path("run") { + runJob(session, jobName) + } + ) + } + )} + + @GET + @ApiOperation(value = "Return list of all jobs", nickname = "listJobs", httpMethod = "GET") + @ApiResponses(Array( + new ApiResponse(code = 200, message = "List of all jobs", response = classOf[JobList]) + )) + def listJobs(@ApiParam(hidden = true) session: SessionService) : server.Route = { + get { + val result = JobList( + session.listJobs() + ) + complete(result) + } + } + + @GET + @Path("/{job}") + @ApiOperation(value = "Get job", nickname = "getJob", httpMethod = "GET") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "job", value = "Job Name", required = true, dataType = "string", paramType = "path") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Information about the job", response = classOf[Job]) + )) + def getJob(@ApiParam(hidden = true) session: SessionService, @ApiParam(hidden = true) job:String) : server.Route = { + get { + withJob(session, job) { job => + complete(Converter.of(job)) + } + } + } + + @POST + @Path("/{job}/run") + @ApiOperation(value = "Run job", nickname = "runJob", httpMethod = "POST") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "job", value = "Job Name", required = true, dataType = "string", paramType = "path") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Job run", response = classOf[JobTask]) + )) + def runJob(@ApiParam(hidden = true) session:SessionService, @ApiParam(hidden = true) job:String) : server.Route = { + post { + entity(as[RunJobRequest]) { jobRun => + withJob(session, job) { job => + val phase = Phase.ofString(jobRun.phase) + val run = session.tasks.runJob(job, phase, jobRun.args, jobRun.force, jobRun.keepGoing, jobRun.dryRun) + complete(Converter.of(run)) + } + } + } + } + + private def withJob(session:SessionService, jobName:String)(fn:(model.Job) => server.Route) : server.Route = { + Try { + session.getJob(jobName) + } match { + case Success(job) => fn(job) + case Failure(_) => complete(HttpResponse(status = StatusCodes.NotFound)) + } + } +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/MappingEndpoint.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/MappingEndpoint.scala new file mode 100644 index 000000000..4d4d2d102 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/MappingEndpoint.scala @@ -0,0 +1,112 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.rest + +import scala.util.Failure +import scala.util.Success +import scala.util.Try + +import akka.http.scaladsl.model.HttpResponse +import akka.http.scaladsl.model.StatusCodes +import akka.http.scaladsl.server +import io.swagger.annotations.Api +import io.swagger.annotations.ApiImplicitParam +import io.swagger.annotations.ApiImplicitParams +import io.swagger.annotations.ApiOperation +import io.swagger.annotations.ApiParam +import io.swagger.annotations.ApiResponse +import io.swagger.annotations.ApiResponses +import javax.ws.rs.GET +import javax.ws.rs.Path + +import com.dimajix.flowman.kernel.model.Converter +import com.dimajix.flowman.kernel.model.Mapping +import com.dimajix.flowman.kernel.model.MappingList +import com.dimajix.flowman.kernel.service.SessionService +import com.dimajix.flowman.model + + +@Api(value = "/session/{session}/mapping", produces = "application/json", consumes = "application/json") +@Path("/session/{session}/mapping") +@ApiImplicitParams(Array( + new ApiImplicitParam(name = "session", value = "Session ID", required = true, dataType = "string", paramType = "path") +)) +@ApiResponses(Array( + new ApiResponse(code = 404, message = "Session or mapping not found"), + new ApiResponse(code = 500, message = "Internal server error") +)) +class MappingEndpoint { + import akka.http.scaladsl.server.Directives._ + + import com.dimajix.flowman.kernel.model.JsonSupport._ + + def routes(session:SessionService) : server.Route = pathPrefix("mapping") {( + pathEndOrSingleSlash { + redirectToNoTrailingSlashIfPresent(StatusCodes.Found) { + listMappings(session) + } + } + ~ + pathPrefix(Segment) { mappingName => ( + pathEndOrSingleSlash { + redirectToNoTrailingSlashIfPresent(StatusCodes.Found) { + getMapping(session, mappingName) + } + }) + } + )} + + @GET + @ApiOperation(value = "Return list of all jobs", nickname = "listMappings", httpMethod = "GET") + @ApiResponses(Array( + new ApiResponse(code = 200, message = "List of all mappings", response = classOf[MappingList]) + )) + def listMappings(@ApiParam(hidden = true) session: SessionService) : server.Route = { + get { + val result = MappingList( + session.listMappings() + ) + complete(result) + } + } + + @GET + @Path("/{mapping}") + @ApiOperation(value = "Get mapping", nickname = "getMapping", httpMethod = "GET") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "mapping", value = "Mapping Name", required = true, dataType = "string", paramType = "path") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Information about the mapping", response = classOf[Mapping]) + )) + def getMapping(@ApiParam(hidden = true) session: SessionService, @ApiParam(hidden = true) mapping:String) : server.Route = { + get { + withMapping(session, mapping) { mapping => + complete(Converter.of(mapping)) + } + } + } + + private def withMapping(session:SessionService, mappingName:String)(fn:(model.Mapping) => server.Route) : server.Route = { + Try { + session.getMapping(mappingName) + } match { + case Success(mapping) => fn(mapping) + case Failure(_) => complete(HttpResponse(status = StatusCodes.NotFound)) + } + } +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/NamespaceEndpoint.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/NamespaceEndpoint.scala new file mode 100644 index 000000000..a613ac499 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/NamespaceEndpoint.scala @@ -0,0 +1,54 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.rest + +import akka.http.scaladsl.server +import io.swagger.annotations.Api +import io.swagger.annotations.ApiOperation +import io.swagger.annotations.ApiResponse +import io.swagger.annotations.ApiResponses +import javax.ws.rs.Path + +import com.dimajix.flowman.kernel.model.Converter +import com.dimajix.flowman.kernel.model.Namespace +import com.dimajix.flowman.model + + +@Api(value = "/namespace", produces = "application/json", consumes = "application/json") +@Path("/namespace") +class NamespaceEndpoint(ns:model.Namespace) { + import akka.http.scaladsl.server.Directives._ + + import com.dimajix.flowman.kernel.model.JsonSupport._ + + def routes : server.Route = pathPrefix("namespace") { + pathEndOrSingleSlash { + info() + } + } + + @ApiOperation(value = "Return information on the current namespace", nickname = "getNamespace", httpMethod = "GET") + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Information about namespace", response = classOf[Namespace]) + )) + def info() : server.Route = { + get { + complete(Converter.of(ns)) + } + } +} + diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/PingEndpoint.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/PingEndpoint.scala new file mode 100644 index 000000000..70ed6b01d --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/PingEndpoint.scala @@ -0,0 +1,62 @@ +/* + * Copyright 2019 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.rest + +import akka.http.scaladsl.server +import io.swagger.annotations.Api +import io.swagger.annotations.ApiImplicitParam +import io.swagger.annotations.ApiImplicitParams +import io.swagger.annotations.ApiOperation +import io.swagger.annotations.ApiResponse +import io.swagger.annotations.ApiResponses +import javax.ws.rs.POST +import javax.ws.rs.Path + +import com.dimajix.flowman.kernel.model.Status + + +@Api(value = "/ping", produces = "application/json", consumes = "text/plain") +@Path("/ping") +class PingEndpoint { + import akka.http.scaladsl.server.Directives._ + + import com.dimajix.flowman.kernel.model.JsonSupport._ + + def routes : server.Route = pathPrefix("ping") { + pathEnd { + ping() + } + } + + @POST + @Path("/") + @ApiOperation(value = "Ping", nickname = "ping", httpMethod = "POST") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "ping message", value = "some message to ping", required = true, + dataTypeClass = classOf[String], paramType = "body") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Ping", response = classOf[String]) + )) + def ping() : server.Route = { + post { + entity(as[String]) { body => + complete(Status("success")) + } + } + } +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/ProjectEndpoint.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/ProjectEndpoint.scala new file mode 100644 index 000000000..c661397e4 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/ProjectEndpoint.scala @@ -0,0 +1,101 @@ +/* + * Copyright 2018 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.rest + +import scala.util.Failure +import scala.util.Success +import scala.util.Try + +import akka.http.scaladsl.model.StatusCodes +import akka.http.scaladsl.server +import akka.http.scaladsl.server.Route +import io.swagger.annotations.Api +import io.swagger.annotations.ApiImplicitParam +import io.swagger.annotations.ApiImplicitParams +import io.swagger.annotations.ApiOperation +import io.swagger.annotations.ApiParam +import io.swagger.annotations.ApiResponse +import io.swagger.annotations.ApiResponses +import javax.ws.rs.Path + +import com.dimajix.flowman.execution.NoSuchProjectException +import com.dimajix.flowman.kernel.model.Converter +import com.dimajix.flowman.kernel.model.Project +import com.dimajix.flowman.kernel.model.ProjectList +import com.dimajix.flowman.storage.Store + + +@Api(value = "/project", produces = "application/json", consumes = "application/json") +@Path("/project") +@ApiResponses(Array( + new ApiResponse(code = 500, message = "Internal server error") +)) +class ProjectEndpoint(store:Store) { + import akka.http.scaladsl.server.Directives._ + + import com.dimajix.flowman.kernel.model.JsonSupport._ + + def routes : Route = pathPrefix("project") {( + pathEndOrSingleSlash { + redirectToNoTrailingSlashIfPresent(StatusCodes.Found) { + listProjects() + } + } + ~ + pathPrefix(Segment) { project => + pathEndOrSingleSlash { + redirectToNoTrailingSlashIfPresent(StatusCodes.Found) { + infoProject(project) + } + } + } + )} + + @Path("/") + @ApiOperation(value = "Retrieve a list of all projects", nickname = "getProjects", httpMethod = "GET") + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Project information", response = classOf[ProjectList]) + )) + def listProjects(): server.Route = { + val result = store.listProjects() + complete(ProjectList(result.map(Converter.of))) + } + + @Path("/{project}") + @ApiOperation(value = "Retrieve general information about a project", nickname = "getProject", httpMethod = "GET") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "project", value = "name of project", required = true, + dataType = "string", paramType = "path") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Project information", response = classOf[Project]), + new ApiResponse(code = 404, message = "Project not found", response = classOf[Project]) + )) + def infoProject(@ApiParam(hidden = true) project:String): server.Route = { + Try { + Converter.of(store.loadProject(project)) + } + match { + case Success(result) => + complete(result) + case Failure(_:NoSuchProjectException) => + complete(StatusCodes.NotFound) + case Failure(ex) => + complete(StatusCodes.InternalServerError) + } + } +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/RelationEndpoint.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/RelationEndpoint.scala new file mode 100644 index 000000000..9bc584c3d --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/RelationEndpoint.scala @@ -0,0 +1,113 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.rest + +import scala.util.Failure +import scala.util.Success +import scala.util.Try + +import akka.http.scaladsl.model.HttpResponse +import akka.http.scaladsl.model.StatusCodes +import akka.http.scaladsl.server +import io.swagger.annotations.Api +import io.swagger.annotations.ApiImplicitParam +import io.swagger.annotations.ApiImplicitParams +import io.swagger.annotations.ApiOperation +import io.swagger.annotations.ApiParam +import io.swagger.annotations.ApiResponse +import io.swagger.annotations.ApiResponses +import javax.ws.rs.GET +import javax.ws.rs.Path + +import com.dimajix.flowman.kernel.model.Converter +import com.dimajix.flowman.kernel.model.Relation +import com.dimajix.flowman.kernel.model.RelationList +import com.dimajix.flowman.kernel.service.SessionService +import com.dimajix.flowman.model + + +@Api(value = "/session/{session}/relation", produces = "application/json", consumes = "application/json") +@Path("/session/{session}/relation") +@ApiImplicitParams(Array( + new ApiImplicitParam(name = "session", value = "Session ID", required = true, dataType = "string", paramType = "path") +)) +@ApiResponses(Array( + new ApiResponse(code = 404, message = "Session or relation not found"), + new ApiResponse(code = 500, message = "Internal server error") +)) +class RelationEndpoint { + import akka.http.scaladsl.server.Directives._ + + import com.dimajix.flowman.kernel.model.JsonSupport._ + + def routes(session:SessionService) : server.Route = pathPrefix("relation") {( + pathEndOrSingleSlash { + redirectToNoTrailingSlashIfPresent(StatusCodes.Found) { + listRelations(session) + } + } + ~ + pathPrefix(Segment) { relationName => ( + pathEndOrSingleSlash { + redirectToNoTrailingSlashIfPresent(StatusCodes.Found) { + getRelation(session, relationName) + } + }) + } + )} + + @GET + @ApiOperation(value = "Return list of all jobs", nickname = "listRelations", httpMethod = "GET") + @ApiResponses(Array( + new ApiResponse(code = 200, message = "List of all relations", response = classOf[RelationList]) + )) + def listRelations(@ApiParam(hidden = true) session: SessionService) : server.Route = { + get { + val result = RelationList( + session.listRelations() + ) + complete(result) + } + } + + @GET + @Path("/{relation}") + @ApiOperation(value = "Get relation", nickname = "getRelation", httpMethod = "GET") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "relation", value = "Relation Name", required = true, dataType = "string", paramType = "path") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Information about the relation", response = classOf[Relation]) + )) + def getRelation(@ApiParam(hidden = true) session: SessionService, @ApiParam(hidden = true) relation:String) : server.Route = { + get { + withRelation(session, relation) { relation => + complete(Converter.of(relation)) + } + } + } + + + private def withRelation(session:SessionService, relationName:String)(fn:(model.Relation) => server.Route) : server.Route = { + Try { + session.getRelation(relationName) + } match { + case Success(relation) => fn(relation) + case Failure(_) => complete(HttpResponse(status = StatusCodes.NotFound)) + } + } +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/Server.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/Server.scala new file mode 100644 index 000000000..c47bc9943 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/Server.scala @@ -0,0 +1,135 @@ +/* + * Copyright 2019 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.rest + +import java.net.InetSocketAddress + +import scala.concurrent.Await +import scala.concurrent.ExecutionContextExecutor +import scala.concurrent.Promise +import scala.concurrent.duration.Duration +import scala.util.Failure +import scala.util.Success + +import akka.Done +import akka.actor.ActorSystem +import akka.http.scaladsl.Http +import akka.http.scaladsl.model.StatusCodes.Found +import akka.http.scaladsl.model.Uri +import akka.http.scaladsl.settings.ServerSettings +import akka.stream.ActorMaterializer +import akka.stream.scaladsl.Sink +import org.slf4j.LoggerFactory + +import com.dimajix.common.net.SocketUtils +import com.dimajix.flowman.execution.Session +import com.dimajix.flowman.kernel.Configuration +import com.dimajix.flowman.kernel.model.KernelRegistrationRequest + + +class Server( + conf:Configuration, + rootSession:Session +) { + import akka.http.scaladsl.client.RequestBuilding._ + import akka.http.scaladsl.server.Directives._ + + import com.dimajix.flowman.kernel.model.JsonSupport._ + + private val logger = LoggerFactory.getLogger(classOf[Server]) + + implicit private val system: ActorSystem = ActorSystem("flowman") + implicit private val materializer: ActorMaterializer = ActorMaterializer() + implicit private val executionContext: ExecutionContextExecutor = system.dispatcher + + private val shutdownPromise = Promise[Done]() + private val shutdownEndpoint = new ShutdownEndpoint(shutdownPromise.trySuccess(Done)) + private val pingEndpoint = new PingEndpoint + private val projectEndpoint = new ProjectEndpoint(rootSession.store) + private val namespaceEndpoint = new NamespaceEndpoint(rootSession.namespace.get) + private val sessionEndpoint = new SessionEndpoint(rootSession) + + def run(): Unit = { + val route = ( + pathPrefix("api") {( + shutdownEndpoint.routes + ~ + pingEndpoint.routes + ~ + projectEndpoint.routes + ~ + namespaceEndpoint.routes + ~ + sessionEndpoint.routes + ~ + SwaggerDocEndpoint.routes + )} + ~ + pathPrefix("swagger") {( + pathEndOrSingleSlash { + redirectToTrailingSlashIfMissing(Found) { + getFromResource("swagger/index.html") + } + } + ~ + getFromResourceDirectory("META-INF/resources/webjars/swagger-ui/3.22.2") + )} + ) + + logger.info("Starting Flowman kernel") + + val settings = ServerSettings(system) + .withVerboseErrorMessages(true) + + val server = Http().bind(conf.getBindHost(), conf.getBindPort(), akka.http.scaladsl.ConnectionContext.noEncryption(), settings) + .to(Sink.foreach { connection => + logger.info("Accepted new connection from " + connection.remoteAddress) + connection.handleWith(route) + }) + .run() + + server.foreach { binding => + val listenUrl = SocketUtils.toURL("http", binding.localAddress, allowAny = true) + logger.info(s"Flowman kernel online at $listenUrl") + + register(binding.localAddress) + } + + Await.ready(shutdownPromise.future, Duration.Inf) + } + + /** + * Register kernel at Flowman Studio + * @param localAddress + */ + private def register(localAddress:InetSocketAddress) : Unit = { + conf.getStudioUrl().foreach { url => + val localUrl = SocketUtils.toURL("http", localAddress) + + logger.info(s"Registering Flowman kernel running at $localUrl with Flowman Studio running at $url") + val request = KernelRegistrationRequest(id=conf.getKernelId(), url=localUrl.toString) + val studioUri = Uri(url.toString) + val uri = studioUri.withPath(studioUri.path / "api" / "registry") + + Http().singleRequest(Post(uri, request)) + .onComplete { + case Success(res) => println(res) + case Failure(_) => sys.error("something wrong") + } + } + } +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/SessionEndpoint.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/SessionEndpoint.scala new file mode 100644 index 000000000..10224929d --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/SessionEndpoint.scala @@ -0,0 +1,239 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.rest + +import scala.util.Failure +import scala.util.Success +import scala.util.Try + +import akka.http.scaladsl.model.HttpResponse +import akka.http.scaladsl.model.StatusCodes +import akka.http.scaladsl.server +import io.swagger.annotations.Api +import io.swagger.annotations.ApiImplicitParam +import io.swagger.annotations.ApiImplicitParams +import io.swagger.annotations.ApiOperation +import io.swagger.annotations.ApiParam +import io.swagger.annotations.ApiResponse +import io.swagger.annotations.ApiResponses +import javax.ws.rs.DELETE +import javax.ws.rs.GET +import javax.ws.rs.POST +import javax.ws.rs.Path +import org.slf4j.LoggerFactory + +import com.dimajix.flowman.execution +import com.dimajix.flowman.kernel.model.Converter +import com.dimajix.flowman.kernel.model.CreateSessionRequest +import com.dimajix.flowman.kernel.model.Project +import com.dimajix.flowman.kernel.model.Session +import com.dimajix.flowman.kernel.model.SessionList +import com.dimajix.flowman.kernel.model.Status +import com.dimajix.flowman.kernel.service.SessionManager +import com.dimajix.flowman.kernel.service.SessionService + + +@Api(value = "/session", produces = "application/json", consumes = "application/json") +@Path("/session") +@ApiResponses(Array( + new ApiResponse(code = 500, message = "Internal server error") +)) +class SessionEndpoint(rootSession:execution.Session) { + import akka.http.scaladsl.server.Directives._ + + import com.dimajix.flowman.kernel.model.JsonSupport._ + + private val logger = LoggerFactory.getLogger(classOf[SessionEndpoint]) + private val sessionManager:SessionManager = new SessionManager(rootSession) + private val jobEndpoint:JobEndpoint = new JobEndpoint + private val mappingEndpoint:MappingEndpoint = new MappingEndpoint + private val relationEndpoint:RelationEndpoint = new RelationEndpoint + private val targetEndpoint:TargetEndpoint = new TargetEndpoint + private val testEndpoint:TestEndpoint = new TestEndpoint + + def routes : server.Route = pathPrefix("session") {( + pathEndOrSingleSlash {( + listSessions() + ~ + createSession() + )} + ~ + pathPrefix(Segment) { session => + withSession(session) { session => + ( + pathEndOrSingleSlash {( + getSession(session) + ~ + closeSession(session) + )} + ~ + path("project") { + getProject(session) + } + ~ + path("reset") { + resetSession(session) + } + ~ + jobEndpoint.routes(session) + ~ + mappingEndpoint.routes(session) + ~ + relationEndpoint.routes(session) + ~ + targetEndpoint.routes(session) + ~ + testEndpoint.routes(session) + )} + } + )} + + @GET + @ApiOperation(value = "Return list of all active sessions", nickname = "listSessions", httpMethod = "GET") + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Information about namespace", response = classOf[SessionList]) + )) + def listSessions() : server.Route = { + get { + val result = SessionList(sessionManager.list().map(s => Session(s.id, s.namespace.name, s.project.name))) + complete(result) + } + } + + @POST + @ApiOperation(value = "Create new session", nickname = "createSession", httpMethod = "POST") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "session request", value = "session parameters and project name", required = true, + dataTypeClass = classOf[CreateSessionRequest], paramType = "body") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Create a new session and opens a project", response = classOf[Session]), + new ApiResponse(code = 400, message = "Bad request", response = classOf[Session]) + )) + def createSession() : server.Route = { + post { + entity(as[CreateSessionRequest]) { request => + request.projectPath.map { p => + val path = new org.apache.hadoop.fs.Path(p) + Try { sessionManager.createSession(path) } + } + .orElse { + request.projectName.map { p => + Try { sessionManager.createSession(p) } + } + } + .map { + case Success(session) => + val result = Session( + id = session.id, + namespace = session.namespace.name, + project = session.project.name, + config = session.context.config.toMap, + environment = session.context.environment.toMap.map(kv => kv._1 -> kv._2.toString) + ) + complete(result) + case Failure(e) => + logger.warn(s"Cannot load project. Request was $request, error is ${e.getMessage}") + complete(HttpResponse(status = StatusCodes.InternalServerError)) + } + .getOrElse { + complete(HttpResponse(status = StatusCodes.BadRequest)) + } + } + } + } + + @GET + @Path("/{session}") + @ApiOperation(value = "Get session", nickname = "getSession", httpMethod = "GET") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "session", value = "Session ID", required = true, dataType = "string", paramType = "path") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Retrieve session information", response = classOf[Session]), + new ApiResponse(code = 404, message = "Session not found") + )) + def getSession(@ApiParam(hidden = true) session:SessionService) : server.Route = { + get { + val result = Session( + id = session.id, + namespace = session.namespace.name, + project = session.project.name, + config = session.context.config.toMap, + environment = session.context.environment.toMap.map(kv => kv._1 -> kv._2.toString) + ) + complete(result) + } + } + + @POST + @Path("/{session}/reset") + @ApiOperation(value = "Reset session", nickname = "resetSession", httpMethod = "POST") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "session", value = "Session ID", required = true, dataType = "string", paramType = "path") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Retrieve session information"), + new ApiResponse(code = 404, message = "Session not found") + )) + def resetSession(@ApiParam(hidden = true) session: SessionService) : server.Route = { + post { + session.reset() + complete(Status("success")) + } + } + + @DELETE + @Path("/{session}") + @ApiOperation(value = "Close session", nickname = "closeSession", httpMethod = "DELETE") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "session", value = "Session ID", required = true, dataType = "string", paramType = "path") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Close current session (and project)"), + new ApiResponse(code = 404, message = "Session not found") + )) + def closeSession(@ApiParam(hidden = true) session:SessionService) : server.Route = { + delete { + session.close() + complete(Status("success")) + } + } + + @GET + @Path("/{session}/project") + @ApiOperation(value = "Get project", nickname = "getProject", httpMethod = "GET") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "session", value = "Session ID", required = true, dataType = "string", paramType = "path") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Information about the project", response = classOf[Project]), + new ApiResponse(code = 404, message = "Session not found") + )) + def getProject(@ApiParam(hidden = true) session:SessionService) : server.Route = { + get { + complete(Converter.of(session.project)) + } + } + + private def withSession(sessionId:String)(fn:(SessionService) => server.Route) : server.Route = { + sessionManager.getSession(sessionId) match { + case Some(session) => fn(session) + case None => complete(HttpResponse(status = StatusCodes.NotFound)) + } + } +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/ShutdownEndpoint.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/ShutdownEndpoint.scala new file mode 100644 index 000000000..2ab2b2e62 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/ShutdownEndpoint.scala @@ -0,0 +1,57 @@ +/* + * Copyright 2019 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.rest + +import akka.http.scaladsl.server +import io.swagger.annotations.Api +import io.swagger.annotations.ApiOperation +import io.swagger.annotations.ApiResponse +import io.swagger.annotations.ApiResponses +import javax.ws.rs.POST +import javax.ws.rs.Path + +import com.dimajix.flowman.kernel.model.Status + + +@Api(value = "/shutdown", produces = "application/json", consumes = "text/plain") +@Path("/shutdown") +class ShutdownEndpoint(fn: => Unit) { + import akka.http.scaladsl.server.Directives._ + + import com.dimajix.flowman.kernel.model.JsonSupport._ + + def routes : server.Route = pathPrefix("shutdown") { + pathEnd { + shutdown() + } + } + + @POST + @Path("/") + @ApiOperation(value = "Shutdown kernel", nickname = "shutdown", httpMethod = "POST") + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Ping", response = classOf[String]) + )) + def shutdown() : server.Route = { + post { + entity(as[String]) { body => + fn + complete(Status("success")) + } + } + } +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/SwaggerDocEndpoint.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/SwaggerDocEndpoint.scala new file mode 100644 index 000000000..b23fe81d2 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/SwaggerDocEndpoint.scala @@ -0,0 +1,24 @@ +package com.dimajix.flowman.kernel.rest + +import com.github.swagger.akka.SwaggerHttpService +import com.github.swagger.akka.model.Info +import io.swagger.models.auth.BasicAuthDefinition + + +object SwaggerDocEndpoint extends SwaggerHttpService { + override def apiClasses = Set( + classOf[ShutdownEndpoint], + classOf[PingEndpoint], + classOf[ProjectEndpoint], + classOf[NamespaceEndpoint], + classOf[SessionEndpoint], + classOf[JobEndpoint] + ) + override def host = "" + override def basePath: String = "/api/" + override def apiDocsPath: String = "swagger" + override def info = Info(version = "1.0") + // override val externalDocs = Some(new ExternalDocs("Core Docs", "http://acme.com/docs")) + // override def securitySchemeDefinitions = Map("basicAuth" -> new BasicAuthDefinition()) + override val unwantedDefinitions = Seq() +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/TargetEndpoint.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/TargetEndpoint.scala new file mode 100644 index 000000000..4aefd9dd0 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/TargetEndpoint.scala @@ -0,0 +1,114 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.rest + +import scala.util.Failure +import scala.util.Success +import scala.util.Try + +import akka.http.scaladsl.model.HttpResponse +import akka.http.scaladsl.model.StatusCodes +import akka.http.scaladsl.server +import io.swagger.annotations.Api +import io.swagger.annotations.ApiImplicitParam +import io.swagger.annotations.ApiImplicitParams +import io.swagger.annotations.ApiOperation +import io.swagger.annotations.ApiParam +import io.swagger.annotations.ApiResponse +import io.swagger.annotations.ApiResponses +import javax.ws.rs.GET +import javax.ws.rs.Path + +import com.dimajix.flowman.kernel.model.Converter +import com.dimajix.flowman.kernel.model.Mapping +import com.dimajix.flowman.kernel.model.Target +import com.dimajix.flowman.kernel.model.TargetList +import com.dimajix.flowman.kernel.service.SessionService +import com.dimajix.flowman.model + + +@Api(value = "/session/{session}/target", produces = "application/json", consumes = "application/json") +@Path("/session/{session}/target") +@ApiImplicitParams(Array( + new ApiImplicitParam(name = "session", value = "Session ID", required = true, dataType = "string", paramType = "path") +)) +@ApiResponses(Array( + new ApiResponse(code = 404, message = "Session or target not found"), + new ApiResponse(code = 500, message = "Internal server error") +)) +class TargetEndpoint { + import akka.http.scaladsl.server.Directives._ + + import com.dimajix.flowman.kernel.model.JsonSupport._ + + def routes(session:SessionService) : server.Route = pathPrefix("target") {( + pathEndOrSingleSlash { + redirectToNoTrailingSlashIfPresent(StatusCodes.Found) { + listTargets(session) + } + } + ~ + pathPrefix(Segment) { targetName => ( + pathEndOrSingleSlash { + redirectToNoTrailingSlashIfPresent(StatusCodes.Found) { + getTarget(session, targetName) + } + }) + } + )} + + @GET + @ApiOperation(value = "Return list of all jobs", nickname = "listTargets", httpMethod = "GET") + @ApiResponses(Array( + new ApiResponse(code = 200, message = "List of all targets", response = classOf[TargetList]) + )) + def listTargets(@ApiParam(hidden = true) session: SessionService) : server.Route = { + get { + val result = TargetList( + session.listTargets() + ) + complete(result) + } + } + + @GET + @Path("/{target}") + @ApiOperation(value = "Get target", nickname = "getTarget", httpMethod = "GET") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "target", value = "Target Name", required = true, dataType = "string", paramType = "path") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Information about the target", response = classOf[Target]) + )) + def getTarget(@ApiParam(hidden = true) session: SessionService, @ApiParam(hidden = true) target:String) : server.Route = { + get { + withTarget(session, target) { target => + complete(Converter.of(target)) + } + } + } + + + private def withTarget(session:SessionService, targetName:String)(fn:(model.Target) => server.Route) : server.Route = { + Try { + session.getTarget(targetName) + } match { + case Success(target) => fn(target) + case Failure(_) => complete(HttpResponse(status = StatusCodes.NotFound)) + } + } +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/TaskEndpoint.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/TaskEndpoint.scala new file mode 100644 index 000000000..5e6ec3f10 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/TaskEndpoint.scala @@ -0,0 +1,36 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.rest + +import akka.http.scaladsl.server +import akka.http.scaladsl.server.Directives.pathEndOrSingleSlash +import akka.http.scaladsl.server.Directives.pathPrefix +import io.swagger.annotations.Api +import javax.ws.rs.Path + +import com.dimajix.flowman.kernel.service.SessionService + + +@Api(value = "/session/{session}/task", produces = "application/json", consumes = "application/json") +@Path("/session/{session}/task") +class TaskEndpoint { + def routes(session:SessionService) : server.Route = pathPrefix("task") {( + pathEndOrSingleSlash {( + ??? + )} + )} +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/TestEndpoint.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/TestEndpoint.scala new file mode 100644 index 000000000..db7f3c4fb --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/rest/TestEndpoint.scala @@ -0,0 +1,113 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.rest + +import scala.util.Failure +import scala.util.Success +import scala.util.Try + +import akka.http.scaladsl.model.HttpResponse +import akka.http.scaladsl.model.StatusCodes +import akka.http.scaladsl.server +import io.swagger.annotations.Api +import io.swagger.annotations.ApiImplicitParam +import io.swagger.annotations.ApiImplicitParams +import io.swagger.annotations.ApiOperation +import io.swagger.annotations.ApiParam +import io.swagger.annotations.ApiResponse +import io.swagger.annotations.ApiResponses +import javax.ws.rs.GET +import javax.ws.rs.Path + +import com.dimajix.flowman.kernel.model.Converter +import com.dimajix.flowman.kernel.model.Test +import com.dimajix.flowman.kernel.model.TestList +import com.dimajix.flowman.kernel.service.SessionService +import com.dimajix.flowman.model + + +@Api(value = "/session/{session}/test", produces = "application/json", consumes = "application/json") +@Path("/session/{session}/test") +@ApiImplicitParams(Array( + new ApiImplicitParam(name = "session", value = "Session ID", required = true, dataType = "string", paramType = "path") +)) +@ApiResponses(Array( + new ApiResponse(code = 404, message = "Session or test not found"), + new ApiResponse(code = 500, message = "Internal server error") +)) +class TestEndpoint { + import akka.http.scaladsl.server.Directives._ + + import com.dimajix.flowman.kernel.model.JsonSupport._ + + def routes(session:SessionService) : server.Route = pathPrefix("test") {( + pathEndOrSingleSlash { + redirectToNoTrailingSlashIfPresent(StatusCodes.Found) { + listTests(session) + } + } + ~ + pathPrefix(Segment) { testName => ( + pathEndOrSingleSlash { + redirectToNoTrailingSlashIfPresent(StatusCodes.Found) { + getTest(session, testName) + } + }) + } + )} + + @GET + @ApiOperation(value = "Return list of all jobs", nickname = "listTests", httpMethod = "GET") + @ApiResponses(Array( + new ApiResponse(code = 200, message = "List of all tests", response = classOf[TestList]) + )) + def listTests(@ApiParam(hidden = true) session: SessionService) : server.Route = { + get { + val result = TestList( + session.listTests() + ) + complete(result) + } + } + + @GET + @Path("/{test}") + @ApiOperation(value = "Get test", nickname = "getTest", httpMethod = "GET") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "test", value = "Test Name", required = true, dataType = "string", paramType = "path") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Information about the test", response = classOf[Test]) + )) + def getTest(@ApiParam(hidden = true) session: SessionService, @ApiParam(hidden = true) test:String) : server.Route = { + get { + withTest(session, test) { test => + complete(Converter.of(test)) + } + } + } + + + private def withTest(session:SessionService, testName:String)(fn:(model.Test) => server.Route) : server.Route = { + Try { + session.getTest(testName) + } match { + case Success(test) => fn(test) + case Failure(_) => complete(HttpResponse(status = StatusCodes.NotFound)) + } + } +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/service/SessionManager.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/service/SessionManager.scala new file mode 100644 index 000000000..5c51d6f42 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/service/SessionManager.scala @@ -0,0 +1,138 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.service + +import java.lang.Thread.UncaughtExceptionHandler +import java.util.concurrent.ForkJoinPool +import java.util.concurrent.ForkJoinWorkerThread + +import scala.collection.mutable +import scala.concurrent.ExecutionContext + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.slf4j.LoggerFactory + +import com.dimajix.flowman.execution +import com.dimajix.flowman.hadoop.FileSystem +import com.dimajix.flowman.model.Project + + +object SessionManager { + private val logger = LoggerFactory.getLogger(classOf[SessionManager]) + private class MyForkJoinWorkerThread(pool: ForkJoinPool) extends ForkJoinWorkerThread(pool) { // set the correct classloader here + setContextClassLoader(Thread.currentThread.getContextClassLoader) + } + private object MyForkJoinWorkerThreadFactory extends ForkJoinPool.ForkJoinWorkerThreadFactory { + override final def newThread(pool: ForkJoinPool) = new MyForkJoinWorkerThread(pool) + } + private val exceptionHandler = new UncaughtExceptionHandler { + override def uncaughtException(thread: Thread, throwable: Throwable): Unit = { + logger.error("Uncaught exception: ", throwable) + } + } +} + +class SessionManager(rootSession:execution.Session) { + import SessionManager._ + private val sessions = mutable.ListBuffer[SessionService]() + private val threadPool = new ForkJoinPool(4, MyForkJoinWorkerThreadFactory, exceptionHandler, true) + private implicit val executionContext = ExecutionContext.fromExecutorService(threadPool) + + /** + * Returns a list of all active [[SessionService]]s + * @return + */ + def list() : Seq[SessionService] = { + val result = mutable.ListBuffer[SessionService]() + sessions.synchronized { + result.append(sessions:_*) + } + result + } + + /** + * Returns the [[SessionService]] for a specific id. If no such session is known, [[None]] will be returned + * instead + * @param id + * @return + */ + def getSession(id:String) : Option[SessionService] = { + var result:Option[SessionService] = None + sessions.synchronized { + result = sessions.find(_.id == id) + } + result + } + + + /** + * Creates a new [[SessionService]] by loading a new project. The project is specified via its name, as returned + * by [[rootSession]] + * @param projectPath + * @return + */ + def createSession(projectName:String) : SessionService = { + val project = rootSession.store.loadProject(projectName) + createSession(project) + } + + /** + * Creates a new [[SessionService]] by loading a new project. The project is specified via a path, which needs + * to point to a location resolvable by the Hadoop filesystem layer. + * @param projectPath + * @return + */ + def createSession(projectPath:Path) : SessionService = { + val project = loadProject(projectPath) + createSession(project) + } + + private def createSession(project:Project) : SessionService = { + val session = rootSession.newSession(project) + val svc = new SessionService(this, session) + + sessions.synchronized { + sessions.append(svc) + } + + svc + } + + private[service] def removeSession(svc:SessionService) : Unit = { + val id = svc.id + sessions.synchronized { + val index = sessions.indexWhere(_.id == id) + if (index >= 0) { + sessions.remove(index) + } + } + } + + private def loadProject(projectPath:Path) : Project = { + // Create Hadoop FileSystem instance + val hadoopConfig = new Configuration() + val fs = FileSystem(hadoopConfig) + + // Load Project. If no schema is specified, load from local file system + val projectUri = projectPath.toUri + if (projectUri.getAuthority == null && projectUri.getScheme == null) + Project.read.file(fs.local(projectPath)) + else + Project.read.file(fs.file(projectPath)) + } +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/service/SessionService.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/service/SessionService.scala new file mode 100644 index 000000000..d4584ed08 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/service/SessionService.scala @@ -0,0 +1,132 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.service + +import java.io.Closeable +import java.lang.Thread.UncaughtExceptionHandler +import java.util.UUID +import java.util.concurrent.ForkJoinPool +import java.util.concurrent.ForkJoinWorkerThread +import java.util.concurrent.TimeUnit + +import scala.concurrent.ExecutionContext + +import org.slf4j.LoggerFactory + +import com.dimajix.flowman.execution.Context +import com.dimajix.flowman.execution.Execution +import com.dimajix.flowman.execution.Runner +import com.dimajix.flowman.execution.Session +import com.dimajix.flowman.model.Job +import com.dimajix.flowman.model.JobIdentifier +import com.dimajix.flowman.model.Mapping +import com.dimajix.flowman.model.MappingIdentifier +import com.dimajix.flowman.model.Namespace +import com.dimajix.flowman.model.Project +import com.dimajix.flowman.model.Relation +import com.dimajix.flowman.model.RelationIdentifier +import com.dimajix.flowman.model.Target +import com.dimajix.flowman.model.TargetIdentifier +import com.dimajix.flowman.model.Test +import com.dimajix.flowman.model.TestIdentifier + + +class SessionService(_manager:SessionManager, _session:Session)(implicit ec:ExecutionContext) extends Closeable { + private var _job: Option[Job] = None + private var _test: Option[Test] = None + private var _context : Context = _session.getContext(_session.project.get) + + val tasks = new TaskService(this) + + def executionContext:ExecutionContext = ec + + val id : String = UUID.randomUUID().toString + val namespace : Namespace = _session.namespace.get + val project : Project = _session.project.get + + def session : Session = _session + def context : Context = _context + def execution : Execution = _session.execution + def runner: Runner = _session.runner + + override def close(): Unit = { + _manager.removeSession(this) + } + + def job: Option[Job] = _job + def test: Option[Test] = _test + + def reset() : Unit = { + _context = _session.getContext(project) + _session.execution.cleanup() + _job = None + _test = None + } + + def listJobs() : Seq[String] = project.jobs.keys.toSeq + def getJob(name:String) : Job = { + context.getJob(JobIdentifier(name)) + } + def enterJob(job: Job, args:Map[String,String]): Unit = { + val jargs = job.arguments(args) + _context = runner.withJobContext(job,jargs) { (context,args) => context } + _session.execution.cleanup() + _test = None + _job = Some(job) + } + def leaveJob(): Unit = { + _context = _session.getContext(project) + _session.execution.cleanup() + _job = None + _test = None + } + + def listTargets() : Seq[String] = project.targets.keys.toSeq + def getTarget(name:String) : Target = { + _context.getTarget(TargetIdentifier(name)) + } + + def listTests() : Seq[String] = project.tests.keys.toSeq + def getTest(name:String) : Test = { + _context.getTest(TestIdentifier(name)) + } + def enterTest(test: Test): Unit = { + _context = _session.runner.withTestContext(test) { context => context } + _session.execution.cleanup() + _job = None + _test = Some(test) + } + def leaveTest(): Unit = { + _context = _session.getContext(project) + _session.execution.cleanup() + _job = None + _test = None + } + + def listMappings() : Seq[String] = project.mappings.keys.toSeq + def getMapping(name:String) : Mapping = { + _context.getMapping(MappingIdentifier(name)) + } + def collectMapping(mapping:Mapping, output:String) = ??? + def describeMapping(mapping:Mapping, output:String) = ??? + + def listRelations() : Seq[String] = project.relations.keys.toSeq + def getRelation(name:String) : Relation = { + _context.getRelation(RelationIdentifier(name)) + } + def collectRelation(relation:Relation) = ??? +} diff --git a/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/service/TaskService.scala b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/service/TaskService.scala new file mode 100644 index 000000000..7b1437c33 --- /dev/null +++ b/flowman-kernel/src/main/scala/com/dimajix/flowman/kernel/service/TaskService.scala @@ -0,0 +1,140 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.service + +import java.util.UUID + +import scala.collection.mutable +import scala.concurrent.ExecutionContext +import scala.concurrent.Future +import scala.reflect.ClassTag +import scala.util.Failure +import scala.util.Success +import scala.util.Try + +import com.dimajix.flowman.execution.Lifecycle +import com.dimajix.flowman.execution.Phase +import com.dimajix.flowman.execution.Status +import com.dimajix.flowman.model.Job +import com.dimajix.flowman.model.Target + + + +sealed abstract class Task[T] { + val id:String = UUID.randomUUID().toString + + def result:Future[T] + + final def isComplete:Boolean = result.isCompleted + final def isRunning:Boolean = !result.isCompleted + final def value:Option[Try[T]] = result.value +} +case class JobTask( + job:Job, + phase:Phase, + lifecycle: Seq[Phase], + rawArgs:Map[String,String], + args:Map[String,Any], + force:Boolean, + keepGoing:Boolean, + dryRun:Boolean, + override val result:Future[Status] +) extends Task[Status] { + final def status:Status = { + if (result.isCompleted) { + result.value.get match { + case Success(value) => value + case Failure(_) => Status.FAILED + } + } + else { + Status.RUNNING + } + } +} + + +class TaskService(sessionService: SessionService) { + implicit private val executionContext:ExecutionContext = sessionService.executionContext + private val tasks = mutable.ListBuffer[Task[_]]() + + private def listType[T : ClassTag]() : Seq[T] = { + val entities = mutable.ListBuffer[T]() + tasks.synchronized { + tasks.foreach { + case j:T => entities.append(j) + case _ => + } + } + entities + } + + /** + * Returns a list of all tasks for jobs + * @return + */ + def listJobs() : Seq[JobTask] = listType[JobTask]() + + /** + * Executes a single job + * @param job + * @param phase + * @param args + * @param force + * @param keepGoing + * @param dryRun + * @return + */ + def runJob(job:Job, phase:Phase, args:Map[String,String], force:Boolean=false, keepGoing:Boolean=false, dryRun:Boolean=false) : JobTask = { + val jobArgs = job.arguments(args) + val lifecycle = Lifecycle.ofPhase(phase) + val runner = sessionService.runner + + val future = Future { + runner.executeJob(job, lifecycle, jobArgs, force = force, keepGoing = keepGoing, dryRun = dryRun) + } + + val task = JobTask( + job, + phase, + lifecycle, + args, + jobArgs, + force = force, + keepGoing = keepGoing, + dryRun = dryRun, + result = future + ) + val id = task.id + + tasks.synchronized { + tasks.append(task) + } + future.onComplete { status => + tasks.synchronized { + val index = tasks.indexWhere(_.id == id) + if (index >= 0) { + tasks.remove(index) + } + } + } + + task + } + + def runTarget(target:Target, phase:Phase) : Unit = ??? +} diff --git a/flowman-kernel/src/test/scala/com/dimajix/flowman/kernel/rest/SessionEndpointTest.scala b/flowman-kernel/src/test/scala/com/dimajix/flowman/kernel/rest/SessionEndpointTest.scala new file mode 100644 index 000000000..b3996631c --- /dev/null +++ b/flowman-kernel/src/test/scala/com/dimajix/flowman/kernel/rest/SessionEndpointTest.scala @@ -0,0 +1,25 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.rest + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + + +class SessionEndpointTest extends AnyFlatSpec with Matchers { + +} diff --git a/flowman-kernel/src/test/scala/com/dimajix/flowman/kernel/service/SessionManagerTest.scala b/flowman-kernel/src/test/scala/com/dimajix/flowman/kernel/service/SessionManagerTest.scala new file mode 100644 index 000000000..7940b8f5e --- /dev/null +++ b/flowman-kernel/src/test/scala/com/dimajix/flowman/kernel/service/SessionManagerTest.scala @@ -0,0 +1,25 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.service + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + + +class SessionManagerTest extends AnyFlatSpec with Matchers { + +} diff --git a/flowman-kernel/src/test/scala/com/dimajix/flowman/kernel/service/SessionServiceTest.scala b/flowman-kernel/src/test/scala/com/dimajix/flowman/kernel/service/SessionServiceTest.scala new file mode 100644 index 000000000..32794b88e --- /dev/null +++ b/flowman-kernel/src/test/scala/com/dimajix/flowman/kernel/service/SessionServiceTest.scala @@ -0,0 +1,25 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.service + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + + +class SessionServiceTest extends AnyFlatSpec with Matchers { + +} diff --git a/flowman-kernel/src/test/scala/com/dimajix/flowman/kernel/service/TaskServiceTest.scala b/flowman-kernel/src/test/scala/com/dimajix/flowman/kernel/service/TaskServiceTest.scala new file mode 100644 index 000000000..99b16e550 --- /dev/null +++ b/flowman-kernel/src/test/scala/com/dimajix/flowman/kernel/service/TaskServiceTest.scala @@ -0,0 +1,25 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.kernel.service + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + + +class TaskServiceTest extends AnyFlatSpec with Matchers { + +} diff --git a/flowman-parent/pom.xml b/flowman-parent/pom.xml index ae5a0da09..a3843f82c 100644 --- a/flowman-parent/pom.xml +++ b/flowman-parent/pom.xml @@ -10,8 +10,8 @@ com.dimajix.flowman flowman-root - 0.16.0 - .. + 0.17.0 + ../pom.xml @@ -97,7 +97,7 @@ true net.alchim31.maven scala-maven-plugin - 4.4.0 + 4.5.1 ${scala.version} ${scala.api_version} @@ -119,17 +119,9 @@ - scala-compile-first - process-resources add-source compile - - - - scala-test-compile - process-test-resources - testCompile @@ -254,6 +246,12 @@ ${flowman.version} provided + + com.dimajix.flowman + flowman-common + ${flowman.version} + provided + com.dimajix.flowman flowman-core @@ -320,6 +318,12 @@ ${flowman.version} provided + + com.dimajix.flowman + flowman-plugin-mssqlserver + ${flowman.version} + provided + com.dimajix.flowman flowman-plugin-swagger @@ -426,12 +430,6 @@ ${spark.version} provided - - com.databricks - spark-avro_${scala.api_version} - ${spark-avro.version} - provided - org.xerial.snappy diff --git a/flowman-plugins/aws/pom.xml b/flowman-plugins/aws/pom.xml index a951debc2..c4e0a314a 100644 --- a/flowman-plugins/aws/pom.xml +++ b/flowman-plugins/aws/pom.xml @@ -9,8 +9,8 @@ com.dimajix.flowman flowman-root - 0.16.0 - ../.. + 0.17.0 + ../../pom.xml diff --git a/flowman-plugins/azure/pom.xml b/flowman-plugins/azure/pom.xml index 221c03d50..6a1dc7455 100644 --- a/flowman-plugins/azure/pom.xml +++ b/flowman-plugins/azure/pom.xml @@ -9,8 +9,8 @@ com.dimajix.flowman flowman-root - 0.16.0 - ../.. + 0.17.0 + ../../pom.xml diff --git a/flowman-plugins/impala/pom.xml b/flowman-plugins/impala/pom.xml index 559ad2f4d..7826e1f8b 100644 --- a/flowman-plugins/impala/pom.xml +++ b/flowman-plugins/impala/pom.xml @@ -9,8 +9,8 @@ com.dimajix.flowman flowman-root - 0.16.0 - ../.. + 0.17.0 + ../../pom.xml diff --git a/flowman-plugins/json/pom.xml b/flowman-plugins/json/pom.xml index aefb90c2e..52ea355b9 100644 --- a/flowman-plugins/json/pom.xml +++ b/flowman-plugins/json/pom.xml @@ -9,8 +9,8 @@ com.dimajix.flowman flowman-root - 0.16.0 - ../.. + 0.17.0 + ../../pom.xml diff --git a/flowman-plugins/kafka/pom.xml b/flowman-plugins/kafka/pom.xml index d68d67ae6..40de12ae1 100644 --- a/flowman-plugins/kafka/pom.xml +++ b/flowman-plugins/kafka/pom.xml @@ -9,8 +9,8 @@ com.dimajix.flowman flowman-root - 0.16.0 - ../.. + 0.17.0 + ../../pom.xml diff --git a/flowman-plugins/mariadb/pom.xml b/flowman-plugins/mariadb/pom.xml index 06f7d166b..b1ecbcc35 100644 --- a/flowman-plugins/mariadb/pom.xml +++ b/flowman-plugins/mariadb/pom.xml @@ -9,8 +9,8 @@ com.dimajix.flowman flowman-root - 0.16.0 - ../.. + 0.17.0 + ../../pom.xml diff --git a/flowman-plugins/mssqlserver/.gitignore b/flowman-plugins/mssqlserver/.gitignore new file mode 100644 index 000000000..b83d22266 --- /dev/null +++ b/flowman-plugins/mssqlserver/.gitignore @@ -0,0 +1 @@ +/target/ diff --git a/flowman-plugins/mssqlserver/pom.xml b/flowman-plugins/mssqlserver/pom.xml new file mode 100644 index 000000000..1742889bf --- /dev/null +++ b/flowman-plugins/mssqlserver/pom.xml @@ -0,0 +1,53 @@ + + + 4.0.0 + flowman-plugin-mssqlserver + Flowman MS SQL Server plugin + + + com.dimajix.flowman + flowman-root + 0.17.0 + ../../pom.xml + + + + flowman-mssqlserver + ${project.version} + ${project.build.finalName}.jar + 9.2.1.jre8 + + + + + + src/main/resources + true + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + + + + + com.dimajix.flowman + flowman-core + provided + + + + com.microsoft.sqlserver + mssql-jdbc + ${mssqlserver-java-client.version} + + + + + diff --git a/flowman-plugins/mssqlserver/src/main/assembly/assembly.xml b/flowman-plugins/mssqlserver/src/main/assembly/assembly.xml new file mode 100644 index 000000000..9dc35b6db --- /dev/null +++ b/flowman-plugins/mssqlserver/src/main/assembly/assembly.xml @@ -0,0 +1,32 @@ + + bin + + tar.gz + + false + + + ${project.build.outputDirectory} + plugins/${plugin.name} + 0644 + 0755 + + plugin.yml + + + + + + + plugins/${plugin.name} + false + false + false + runtime + true + + + diff --git a/flowman-plugins/mssqlserver/src/main/resources/plugin.yml b/flowman-plugins/mssqlserver/src/main/resources/plugin.yml new file mode 100644 index 000000000..e2f82485b --- /dev/null +++ b/flowman-plugins/mssqlserver/src/main/resources/plugin.yml @@ -0,0 +1,6 @@ +name: ${plugin.name} +description: ${project.name} +version: ${plugin.version} +isolation: false +jars: + - mssql-jdbc-${mssqlserver-java-client.version}.jar diff --git a/flowman-plugins/mysql/pom.xml b/flowman-plugins/mysql/pom.xml index 16e97a25f..6cad9acab 100644 --- a/flowman-plugins/mysql/pom.xml +++ b/flowman-plugins/mysql/pom.xml @@ -9,8 +9,8 @@ com.dimajix.flowman flowman-root - 0.16.0 - ../.. + 0.17.0 + ../../pom.xml diff --git a/flowman-plugins/swagger/pom.xml b/flowman-plugins/swagger/pom.xml index 182c2550b..e42c174e1 100644 --- a/flowman-plugins/swagger/pom.xml +++ b/flowman-plugins/swagger/pom.xml @@ -9,8 +9,8 @@ com.dimajix.flowman flowman-root - 0.16.0 - ../.. + 0.17.0 + ../../pom.xml diff --git a/flowman-scalatest-compat/pom.xml b/flowman-scalatest-compat/pom.xml index 00d91e572..d69654922 100644 --- a/flowman-scalatest-compat/pom.xml +++ b/flowman-scalatest-compat/pom.xml @@ -9,8 +9,8 @@ com.dimajix.flowman flowman-root - 0.16.0 - .. + 0.17.0 + ../pom.xml diff --git a/flowman-ui/.browserslistrc b/flowman-server-ui/.browserslistrc similarity index 100% rename from flowman-ui/.browserslistrc rename to flowman-server-ui/.browserslistrc diff --git a/flowman-ui/.eslintrc.js b/flowman-server-ui/.eslintrc.js similarity index 100% rename from flowman-ui/.eslintrc.js rename to flowman-server-ui/.eslintrc.js diff --git a/flowman-ui/.gitignore b/flowman-server-ui/.gitignore similarity index 100% rename from flowman-ui/.gitignore rename to flowman-server-ui/.gitignore diff --git a/flowman-ui/README.md b/flowman-server-ui/README.md similarity index 100% rename from flowman-ui/README.md rename to flowman-server-ui/README.md diff --git a/flowman-ui/babel.config.js b/flowman-server-ui/babel.config.js similarity index 100% rename from flowman-ui/babel.config.js rename to flowman-server-ui/babel.config.js diff --git a/flowman-ui/package-lock.json b/flowman-server-ui/package-lock.json similarity index 99% rename from flowman-ui/package-lock.json rename to flowman-server-ui/package-lock.json index b7a5a1631..9c45e0814 100644 --- a/flowman-ui/package-lock.json +++ b/flowman-server-ui/package-lock.json @@ -1,5 +1,5 @@ { - "name": "flowman-ui", + "name": "flowman-server-ui", "version": "0.1.0", "lockfileVersion": 1, "requires": true, diff --git a/flowman-ui/package.json b/flowman-server-ui/package.json similarity index 96% rename from flowman-ui/package.json rename to flowman-server-ui/package.json index 7c6988764..13398c647 100644 --- a/flowman-ui/package.json +++ b/flowman-server-ui/package.json @@ -1,5 +1,5 @@ { - "name": "flowman-ui", + "name": "flowman-server-ui", "version": "0.1.0", "private": true, "scripts": { diff --git a/flowman-ui/pom.xml b/flowman-server-ui/pom.xml similarity index 94% rename from flowman-ui/pom.xml rename to flowman-server-ui/pom.xml index 5b9f0f1be..2f5761f41 100644 --- a/flowman-ui/pom.xml +++ b/flowman-server-ui/pom.xml @@ -3,14 +3,14 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - flowman-ui - Flowman UI + flowman-server-ui + Flowman Server UI com.dimajix.flowman flowman-root - 0.16.0 - .. + 0.17.0 + ../pom.xml diff --git a/flowman-ui/postcss.config.js b/flowman-server-ui/postcss.config.js similarity index 100% rename from flowman-ui/postcss.config.js rename to flowman-server-ui/postcss.config.js diff --git a/flowman-ui/public/favicon.ico b/flowman-server-ui/public/favicon.ico similarity index 100% rename from flowman-ui/public/favicon.ico rename to flowman-server-ui/public/favicon.ico diff --git a/flowman-ui/public/index.html b/flowman-server-ui/public/index.html similarity index 81% rename from flowman-ui/public/index.html rename to flowman-server-ui/public/index.html index 237a67524..89f0d38a8 100644 --- a/flowman-ui/public/index.html +++ b/flowman-server-ui/public/index.html @@ -11,7 +11,7 @@
diff --git a/flowman-ui/src/App.vue b/flowman-server-ui/src/App.vue similarity index 100% rename from flowman-ui/src/App.vue rename to flowman-server-ui/src/App.vue diff --git a/flowman-ui/src/assets/logo.png b/flowman-server-ui/src/assets/logo.png similarity index 100% rename from flowman-ui/src/assets/logo.png rename to flowman-server-ui/src/assets/logo.png diff --git a/flowman-ui/src/components/MainNavigationDrawer.vue b/flowman-server-ui/src/components/MainNavigationDrawer.vue similarity index 98% rename from flowman-ui/src/components/MainNavigationDrawer.vue rename to flowman-server-ui/src/components/MainNavigationDrawer.vue index 532bcdf7e..326189b05 100644 --- a/flowman-ui/src/components/MainNavigationDrawer.vue +++ b/flowman-server-ui/src/components/MainNavigationDrawer.vue @@ -84,7 +84,7 @@ diff --git a/flowman-studio-ui/src/components/Flow.vue b/flowman-studio-ui/src/components/Flow.vue new file mode 100644 index 000000000..b5db233b2 --- /dev/null +++ b/flowman-studio-ui/src/components/Flow.vue @@ -0,0 +1,5 @@ + diff --git a/flowman-studio-ui/src/components/JobProperties.vue b/flowman-studio-ui/src/components/JobProperties.vue new file mode 100644 index 000000000..8008c860d --- /dev/null +++ b/flowman-studio-ui/src/components/JobProperties.vue @@ -0,0 +1,69 @@ + + + diff --git a/flowman-studio-ui/src/components/LogOutput.vue b/flowman-studio-ui/src/components/LogOutput.vue new file mode 100644 index 000000000..f52f63cec --- /dev/null +++ b/flowman-studio-ui/src/components/LogOutput.vue @@ -0,0 +1,54 @@ + + + diff --git a/flowman-studio-ui/src/components/MappingOutput.vue b/flowman-studio-ui/src/components/MappingOutput.vue new file mode 100644 index 000000000..166fe3dfc --- /dev/null +++ b/flowman-studio-ui/src/components/MappingOutput.vue @@ -0,0 +1,122 @@ + + + + diff --git a/flowman-studio-ui/src/components/MappingProperties.vue b/flowman-studio-ui/src/components/MappingProperties.vue new file mode 100644 index 000000000..3a33dc86e --- /dev/null +++ b/flowman-studio-ui/src/components/MappingProperties.vue @@ -0,0 +1,79 @@ + + + diff --git a/flowman-studio-ui/src/components/NavigationMenu.vue b/flowman-studio-ui/src/components/NavigationMenu.vue new file mode 100644 index 000000000..fff794cc7 --- /dev/null +++ b/flowman-studio-ui/src/components/NavigationMenu.vue @@ -0,0 +1,254 @@ + + + + diff --git a/flowman-studio-ui/src/components/ProjectSelector.vue b/flowman-studio-ui/src/components/ProjectSelector.vue new file mode 100644 index 000000000..28cc323d3 --- /dev/null +++ b/flowman-studio-ui/src/components/ProjectSelector.vue @@ -0,0 +1,80 @@ + + + diff --git a/flowman-studio-ui/src/components/RelationProperties.vue b/flowman-studio-ui/src/components/RelationProperties.vue new file mode 100644 index 000000000..d14290bcf --- /dev/null +++ b/flowman-studio-ui/src/components/RelationProperties.vue @@ -0,0 +1,59 @@ + + + diff --git a/flowman-studio-ui/src/components/Sessions.vue b/flowman-studio-ui/src/components/Sessions.vue new file mode 100644 index 000000000..0b45d3c9b --- /dev/null +++ b/flowman-studio-ui/src/components/Sessions.vue @@ -0,0 +1,173 @@ + + + + diff --git a/flowman-studio-ui/src/components/TargetProperties.vue b/flowman-studio-ui/src/components/TargetProperties.vue new file mode 100644 index 000000000..3540174b7 --- /dev/null +++ b/flowman-studio-ui/src/components/TargetProperties.vue @@ -0,0 +1,62 @@ + + + diff --git a/flowman-studio-ui/src/components/TestProperties.vue b/flowman-studio-ui/src/components/TestProperties.vue new file mode 100644 index 000000000..bdba23fee --- /dev/null +++ b/flowman-studio-ui/src/components/TestProperties.vue @@ -0,0 +1,60 @@ + + + diff --git a/flowman-studio-ui/src/main.js b/flowman-studio-ui/src/main.js new file mode 100644 index 000000000..38f19eb17 --- /dev/null +++ b/flowman-studio-ui/src/main.js @@ -0,0 +1,19 @@ +import Vue from 'vue' +import VueSSE from 'vue-sse' +import AsyncComputed from 'vue-async-computed' +import '@/plugins/axios' +import vuetify from '@/plugins/vuetify' +import Api from '@/services/api' +import Workbench from '@/App.vue' + +Vue.config.productionTip = false + +Vue.use(VueSSE) +Vue.use(Api) +Vue.use(AsyncComputed) + +new Vue({ + vuetify, + render: h => h(Workbench), + data: {} +}).$mount('#app') diff --git a/flowman-studio-ui/src/plugins/axios.js b/flowman-studio-ui/src/plugins/axios.js new file mode 100644 index 000000000..541021a2b --- /dev/null +++ b/flowman-studio-ui/src/plugins/axios.js @@ -0,0 +1,62 @@ +"use strict"; + +import Vue from 'vue'; +import axios from "axios"; + +// Full config: https://github.com/axios/axios#request-config +// axios.defaults.baseURL = process.env.baseURL || process.env.apiUrl || ''; +// axios.defaults.headers.common['Authorization'] = AUTH_TOKEN; +// axios.defaults.headers.post['Content-Type'] = 'application/x-www-form-urlencoded'; + +let config = { + //baseURL: "http://localhost:8082/" + // baseURL: process.env.baseURL || process.env.apiUrl || "" + // timeout: 60 * 1000, // Timeout + // withCredentials: true, // Check cross-site Access-Control +}; + +const _axios = axios.create(config); + +_axios.interceptors.request.use( + function(config) { + // Do something before request is sent + return config; + }, + function(error) { + // Do something with request error + return Promise.reject(error); + } +); + +// Add a response interceptor +_axios.interceptors.response.use( + function(response) { + // Do something with response data + return response; + }, + function(error) { + // Do something with response error + return Promise.reject(error); + } +); + +Plugin.install = function(Vue) { + Vue.axios = _axios; + window.axios = _axios; + Object.defineProperties(Vue.prototype, { + axios: { + get() { + return _axios; + } + }, + $axios: { + get() { + return _axios; + } + }, + }); +}; + +Vue.use(Plugin) + +export default Plugin; diff --git a/flowman-studio-ui/src/plugins/vuetify.js b/flowman-studio-ui/src/plugins/vuetify.js new file mode 100644 index 000000000..5bdec19d1 --- /dev/null +++ b/flowman-studio-ui/src/plugins/vuetify.js @@ -0,0 +1,7 @@ +import Vue from 'vue'; +import Vuetify from 'vuetify/lib/framework'; + +Vue.use(Vuetify); + +export default new Vuetify({ +}); diff --git a/flowman-studio-ui/src/services/api.js b/flowman-studio-ui/src/services/api.js new file mode 100644 index 000000000..a17bdc5c4 --- /dev/null +++ b/flowman-studio-ui/src/services/api.js @@ -0,0 +1,110 @@ +import axios from 'axios'; + + +export default { + install(Vue) { + const api = { + state: Vue.observable({ + kernel: null, + session: null + }), + + getNamespace() { + return axios.get('/api/namespace') + .then(response => response.data) + }, + + launchKernel() { + return axios.post('/api/kernel') + .then(response => response.data) + }, + shutdownKernel(kernel) { + return axios.delete('/api/kernel/' + kernel) + .then(response => response.data) + }, + listKernels() { + return axios.get('/api/kernel') + .then(response => response.data) + }, + + listSessions(kernel) { + return axios.get('/api/kernel/' + kernel + "/session") + .then(response => response.data) + }, + resetSession(kernel, session) { + return axios.post('/api/kernel/' + kernel + "/session/" + session + "/reset") + }, + closeSession(kernel, session) { + return axios.delete('/api/kernel/' + kernel + "/session/" + session) + }, + + setCurrentSession(kernel, session) { + this.state.kernel = kernel + this.state.session = session + }, + + getKernelLog() { + return Vue.$sse.create({ + url:'/api/kernel/' + this.state.kernel + '/log', + format: 'json' + }) + }, + + listProjects(kernel) { + return axios.get('/api/kernel/' + kernel + '/project') + .then(response => response.data) + }, + openProject(kernel, project) { + return axios.post('/api/kernel/' + kernel + '/session', {projectName: project}) + .then(response => response.data) + }, + + getCurrentSession() { + return axios.get('/api/kernel/' + this.state.kernel + "/session/" + this.state.session) + .then(response => response.data) + }, + listJobs() { + return axios.get('/api/kernel/' + this.state.kernel + "/session/" + this.state.session + "/job") + .then(response => response.data) + }, + getJob(job) { + return axios.get('/api/kernel/' + this.state.kernel + "/session/" + this.state.session + "/job/" + job) + .then(response => response.data) + }, + listTargets() { + return axios.get('/api/kernel/' + this.state.kernel + "/session/" + this.state.session + "/target") + .then(response => response.data) + }, + getTarget(target) { + return axios.get('/api/kernel/' + this.state.kernel + "/session/" + this.state.session + "/target/" + target) + .then(response => response.data) + }, + listTests() { + return axios.get('/api/kernel/' + this.state.kernel + "/session/" + this.state.session + "/test") + .then(response => response.data) + }, + getTest(test) { + return axios.get('/api/kernel/' + this.state.kernel + "/session/" + this.state.session + "/test/" + test) + .then(response => response.data) + }, + listMappings() { + return axios.get('/api/kernel/' + this.state.kernel + "/session/" + this.state.session + "/mapping") + .then(response => response.data) + }, + getMapping(mapping) { + return axios.get('/api/kernel/' + this.state.kernel + "/session/" + this.state.session + "/mapping/" + mapping) + .then(response => response.data) + }, + listRelations() { + return axios.get('/api/kernel/' + this.state.kernel + "/session/" + this.state.session + "/relation") + .then(response => response.data) + }, + getRelation(relation) { + return axios.get('/api/kernel/' + this.state.kernel + "/session/" + this.state.session + "/relation/" + relation) + .then(response => response.data) + }, + }; + + Vue.prototype.$api = api + } +}; diff --git a/flowman-studio-ui/vue.config.js b/flowman-studio-ui/vue.config.js new file mode 100644 index 000000000..d106e4a61 --- /dev/null +++ b/flowman-studio-ui/vue.config.js @@ -0,0 +1,24 @@ +module.exports = { + outputDir: 'target/classes/META-INF/resources/webjars/flowman-studio-ui', + transpileDependencies: [ + 'vuetify' + ], + pages: { + 'index': { + 'entry': './src/main.js', + 'template': 'public/index.html', + 'title': 'Flowman Studio', + 'chucnks': ['chunk-vendors', 'chunk-common', 'index'] + } + }, + devServer: { + port: 8088, + proxy: { + '^/api': { + target: 'http://localhost:8080', + ws: true, + changeOrigin: true + } + } + } +} diff --git a/flowman-studio/.gitignore b/flowman-studio/.gitignore new file mode 100644 index 000000000..b83d22266 --- /dev/null +++ b/flowman-studio/.gitignore @@ -0,0 +1 @@ +/target/ diff --git a/flowman-studio/pom.xml b/flowman-studio/pom.xml new file mode 100644 index 000000000..16d764129 --- /dev/null +++ b/flowman-studio/pom.xml @@ -0,0 +1,164 @@ + + + 4.0.0 + flowman-studio + Flowman Studio + + + flowman-root + com.dimajix.flowman + 0.17.0 + ../pom.xml + + + + + + net.alchim31.maven + scala-maven-plugin + + + org.scalatest + scalatest-maven-plugin + + + + org.apache.maven.plugins + maven-dependency-plugin + + + initialize + + build-classpath + + + runtime + flowman-studio.classpath + false + / + , + lib + + json,org.everit.json.schema,velocity-engine-core + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + copy-resources + process-resources + + copy-resources + + + ${project.build.directory}/properties + + + src/main/properties + + **/* + + true + + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + attach-artifacts + package + + attach-artifact + + + + + ${project.build.directory}/properties/flowman-studio.properties + properties + properties + + + + + + + + + + + + com.dimajix.flowman + flowman-common + + + + com.dimajix.flowman + flowman-studio-ui + + + + com.dimajix.flowman + flowman-scalatest-compat + + + + org.apache.hadoop + hadoop-client + + + + org.apache.spark + spark-sql_${scala.api_version} + + + + log4j + log4j + + + + args4j + args4j + + + + com.typesafe.akka + akka-http_${scala.api_version} + 10.1.8 + + + + com.typesafe.akka + akka-http-spray-json_${scala.api_version} + 10.1.8 + + + + com.github.swagger-akka-http + swagger-akka-http_${scala.api_version} + 1.1.0 + + + + org.webjars + swagger-ui + 3.22.2 + + + + org.scalatest + scalatest_${scala.api_version} + + + diff --git a/flowman-studio/src/main/properties/flowman-studio.properties b/flowman-studio/src/main/properties/flowman-studio.properties new file mode 100644 index 000000000..52cdc6e64 --- /dev/null +++ b/flowman-studio/src/main/properties/flowman-studio.properties @@ -0,0 +1 @@ +flowman-studio.classpath=${flowman-studio.classpath} diff --git a/flowman-studio/src/main/resources/com/dimajix/flowman/studio/flowman-studio.properties b/flowman-studio/src/main/resources/com/dimajix/flowman/studio/flowman-studio.properties new file mode 100644 index 000000000..9c0619393 --- /dev/null +++ b/flowman-studio/src/main/resources/com/dimajix/flowman/studio/flowman-studio.properties @@ -0,0 +1 @@ +studio.server.request.timeout=120 diff --git a/flowman-studio/src/main/resources/swagger/index.html b/flowman-studio/src/main/resources/swagger/index.html new file mode 100644 index 000000000..2527a53c6 --- /dev/null +++ b/flowman-studio/src/main/resources/swagger/index.html @@ -0,0 +1,95 @@ + + + + + + Swagger UI + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/Application.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/Application.scala new file mode 100644 index 000000000..70ba7e0a7 --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/Application.scala @@ -0,0 +1,44 @@ +/* + * Copyright 2019 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio + +import com.dimajix.flowman.common.Logging +import com.dimajix.flowman.studio.rest.Server + + +object Application { + def main(args: Array[String]) : Unit = { + Logging.init() + + val server = new Application() + val result = server.run() + System.exit(if (result) 0 else 1) + } + +} + + +class Application { + def run() : Boolean = { + val conf = Configuration.loadDefaults() + + val server = new Server(conf) + server.run() + + true + } +} diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/Configuration.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/Configuration.scala new file mode 100644 index 000000000..dcee62c8b --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/Configuration.scala @@ -0,0 +1,70 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio + +import java.io.FileInputStream +import java.util.Properties + + +object Configuration { + val SERVER_BIND_HOST = "studio.server.bind.host" + val SERVER_BIND_PORT = "studio.server.bind.port" + val SERVER_REQUEST_TIMEOUT = "studio.server.request.timeout" + val SERVER_IDLE_TIMEOUT = "studio.server.idle.timeout" + val SERVER_BIND_TIMEOUT = "studio.server.bind.timeout" + val SERVER_LINGER_TIMEOUT = "studio.server.linger.timeout" + + private def defaultProperties() : Properties = { + val loader = Thread.currentThread.getContextClassLoader + val url = loader.getResource("com/dimajix/flowman/studio/flowman-studio.properties") + val properties = new Properties() + properties.load(url.openStream()) + properties + } + + /** + * Load a Configuration from a Properties file + * @param filename + * @return + */ + def load(filename:String) : Configuration= { + val properties = defaultProperties() + properties.load(new FileInputStream(filename)) + new Configuration(properties) + } + + /** + * Loads built-in default configuration + * @return + */ + def loadDefaults() : Configuration = { + val properties = defaultProperties() + new Configuration(properties) + } +} + +class Configuration(properties: Properties) { + import Configuration._ + + def getBindHost() : String = properties.getProperty(SERVER_BIND_HOST, "0.0.0.0") + def getBindPort() : Int = properties.getProperty(SERVER_BIND_PORT, "8080").toInt + + def getRequestTimeout() : Int = properties.getProperty(SERVER_REQUEST_TIMEOUT, "20").toInt + def getIdleTimeout() : Int = properties.getProperty(SERVER_IDLE_TIMEOUT, "60").toInt + def getBindTimeout() : Int = properties.getProperty(SERVER_BIND_TIMEOUT, "1").toInt + def getLingerTimeout() : Int = properties.getProperty(SERVER_LINGER_TIMEOUT, "60").toInt +} diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/Converter.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/Converter.scala new file mode 100644 index 000000000..00d427d0c --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/Converter.scala @@ -0,0 +1,14 @@ +package com.dimajix.flowman.studio.model + +import com.dimajix.flowman.studio.service + + +object Converter { + def of(kernel:service.KernelService) : Kernel = { + Kernel(kernel.id, kernel.url.map(_.toString), kernel.state.toString) + } + + def of(launcher: service.Launcher) : Launcher = { + Launcher(launcher.name, launcher.description) + } +} diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/JsonSupport.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/JsonSupport.scala new file mode 100644 index 000000000..e4580efaf --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/JsonSupport.scala @@ -0,0 +1,55 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.model + +import java.time.ZonedDateTime +import java.time.format.DateTimeFormatter + +import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport +import spray.json.DefaultJsonProtocol +import spray.json.DeserializationException +import spray.json.JsString +import spray.json.JsValue +import spray.json.JsonFormat +import spray.json.RootJsonFormat + + +trait JsonSupport extends DefaultJsonProtocol with SprayJsonSupport { + implicit object ZonedDateTimeFormat extends JsonFormat[ZonedDateTime] { + final val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss") + def write(value:ZonedDateTime) : JsString = { + JsString(value.toLocalDateTime.format(formatter)) + } + def read(value:JsValue) : ZonedDateTime = { + value match { + case JsString(dt) => ZonedDateTime.parse(dt, formatter) + case _ => throw DeserializationException("Not a string") + } + } + } + + implicit val kernelLogMessageFormat: RootJsonFormat[KernelLogMessage] = jsonFormat3(KernelLogMessage) + implicit val statusFormat: RootJsonFormat[Status] = jsonFormat1(Status) + implicit val kernelRegistrationRequestFormat: RootJsonFormat[KernelRegistrationRequest] = jsonFormat2(KernelRegistrationRequest) + implicit val kernelFormat: RootJsonFormat[Kernel] = jsonFormat3(Kernel) + implicit val kernelListFormat: RootJsonFormat[KernelList] = jsonFormat1(KernelList) + implicit val launcherFormat: RootJsonFormat[Launcher] = jsonFormat2(Launcher) + implicit val launcherListFormat: RootJsonFormat[LauncherList] = jsonFormat1(LauncherList) +} + +object JsonSupport extends JsonSupport { +} diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/Kernel.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/Kernel.scala new file mode 100644 index 000000000..c83602917 --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/Kernel.scala @@ -0,0 +1,41 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.model + +import java.time.ZonedDateTime + + +case class KernelRegistrationRequest( + id:String, + url:String +) + +case class Kernel( + id:String, + url:Option[String], + state:String +) + +case class KernelList( + kernels:Seq[Kernel] +) + +case class KernelLogMessage( + kernelId:String, + timestamp:ZonedDateTime, + message:String +) diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/Launcher.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/Launcher.scala new file mode 100644 index 000000000..5507422b0 --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/Launcher.scala @@ -0,0 +1,26 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.model + +case class Launcher( + name:String, + description:String +) + +case class LauncherList( + launchers:Seq[Launcher] +) diff --git a/flowman-server/src/main/scala/com/dimajix/flowman/server/executor/Executor.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/Session.scala similarity index 85% rename from flowman-server/src/main/scala/com/dimajix/flowman/server/executor/Executor.scala rename to flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/Session.scala index fcc9d28f7..fb7641b21 100644 --- a/flowman-server/src/main/scala/com/dimajix/flowman/server/executor/Executor.scala +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/Session.scala @@ -1,5 +1,5 @@ /* - * Copyright 2019 Kaya Kupferschmidt + * Copyright 2021 Kaya Kupferschmidt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,8 @@ * limitations under the License. */ -package com.dimajix.flowman.server.executor +package com.dimajix.flowman.studio.model -class Executor { +class Session { } diff --git a/flowman-server/src/main/scala/com/dimajix/flowman/server/executor/LocalExecutor.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/Status.scala similarity index 82% rename from flowman-server/src/main/scala/com/dimajix/flowman/server/executor/LocalExecutor.scala rename to flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/Status.scala index 056ca90e5..2e9b6361f 100644 --- a/flowman-server/src/main/scala/com/dimajix/flowman/server/executor/LocalExecutor.scala +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/model/Status.scala @@ -1,5 +1,5 @@ /* - * Copyright 2019 Kaya Kupferschmidt + * Copyright 2021 Kaya Kupferschmidt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,7 @@ * limitations under the License. */ -package com.dimajix.flowman.server.executor - -class LocalExecutor { +package com.dimajix.flowman.studio.model +sealed case class Status(status:String) { } diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/KernelEndpoint.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/KernelEndpoint.scala new file mode 100644 index 000000000..2ee0cf5b4 --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/KernelEndpoint.scala @@ -0,0 +1,214 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.rest + +import java.time.Clock +import java.time.ZonedDateTime + +import akka.http.scaladsl.marshalling.sse.EventStreamMarshalling._ +import akka.http.scaladsl.model.HttpResponse +import akka.http.scaladsl.model.StatusCodes +import akka.http.scaladsl.model.StatusCodes.Found +import akka.http.scaladsl.model.Uri +import akka.http.scaladsl.model.sse.ServerSentEvent +import akka.http.scaladsl.server +import akka.stream.OverflowStrategy +import akka.stream.scaladsl.Source +import io.swagger.annotations.Api +import io.swagger.annotations.ApiImplicitParam +import io.swagger.annotations.ApiImplicitParams +import io.swagger.annotations.ApiOperation +import io.swagger.annotations.ApiParam +import io.swagger.annotations.ApiResponse +import io.swagger.annotations.ApiResponses +import javax.ws.rs.DELETE +import javax.ws.rs.GET +import javax.ws.rs.POST +import javax.ws.rs.PUT +import javax.ws.rs.Path + +import com.dimajix.flowman.studio.model.Converter +import com.dimajix.flowman.studio.model.Kernel +import com.dimajix.flowman.studio.model.KernelList +import com.dimajix.flowman.studio.model.KernelLogMessage +import com.dimajix.flowman.studio.service.KernelManager +import com.dimajix.flowman.studio.service.KernelService +import com.dimajix.flowman.studio.service.KernelState +import com.dimajix.flowman.studio.service.LaunchEnvironment +import com.dimajix.flowman.studio.service.LauncherManager + + +@Api(value = "kernel", produces = "application/json", consumes = "application/json") +@Path("/kernel") +@ApiResponses(Array( + new ApiResponse(code = 500, message = "Internal Server Error") +)) +class KernelEndpoint(kernelManager:KernelManager, launcherManager:LauncherManager) { + import akka.http.scaladsl.server.Directives._ + + import com.dimajix.flowman.studio.model.JsonSupport._ + + private val clock = Clock.systemDefaultZone() + + def routes : server.Route = pathPrefix("kernel") {( + pathEndOrSingleSlash { + redirectToNoTrailingSlashIfPresent(Found) {( + createKernel() + ~ + listKernel() + )} + } + ~ + pathPrefix(Segment) { kernel => ( + pathEndOrSingleSlash { + redirectToNoTrailingSlashIfPresent(Found) {( + getKernel(kernel) + ~ + stopKernel(kernel) + )} + } + ~ + path("log") { + getKernelLog(kernel) + } + ~ + invokeKernel(kernel) + )} + )} + + @POST + @ApiOperation(value = "Launch a new kernel", nickname = "createKernel", httpMethod = "POST") + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Create new kernel", response = classOf[Kernel]) + )) + def createKernel() : server.Route = { + post { + val launcher = launcherManager.list().head + val env = LaunchEnvironment() + val kernel = kernelManager.launchKernel(launcher, env) + complete(Converter.of(kernel)) + } + } + + @GET + @ApiOperation(value = "List all known kernels", nickname = "listKernels", httpMethod = "GET") + @ApiResponses(Array( + new ApiResponse(code = 200, message = "List of kernels", response = classOf[KernelList]) + )) + def listKernel() : server.Route = { + get { + val kernels = kernelManager.list().filter(_.state != KernelState.TERMINATED) + val result = KernelList(kernels.map(Converter.of)) + complete(result) + } + } + + @GET + @Path("/{kernel}") + @ApiOperation(value = "Retrieve a specific kernel by its ID", nickname = "getKernel", httpMethod = "GET") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "kernel", value = "Kernel ID", required = true, dataType = "string", paramType = "path") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Retrieve information about a specific kernel", response = classOf[Kernel]), + new ApiResponse(code = 404, message = "Kernel not found") + )) + def getKernel(@ApiParam(hidden = true) kernel:String) : server.Route = { + get { + withKernel(kernel) { kernel => + val result = Converter.of(kernel) + complete(result) + } + } + } + + @DELETE + @Path("/{kernel}") + @ApiOperation(value = "Shutdown a running kernel", nickname = "stopKernel", httpMethod = "DELETE") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "kernel", value = "Kernel ID", required = true, dataType = "string", paramType = "path") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Successfully stopped kernel", response = classOf[String]), + new ApiResponse(code = 404, message = "Kernel not found") + )) + def stopKernel(@ApiParam(hidden = true) kernel:String) : server.Route = { + delete { + withKernel(kernel) { kernel => + kernel.shutdown() + complete("success") + } + } + } + + @GET + @Path("/{kernel}/log") + @ApiOperation(value = "Retrieve the kernel log as a SSE stream", nickname = "getKernelLog", httpMethod = "GET") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "kernel", value = "Kernel ID", required = true, dataType = "string", paramType = "path") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Successfully retrieved kernel log"), + new ApiResponse(code = 404, message = "Kernel not found") + )) + def getKernelLog(@ApiParam(hidden = true) kernel:String) : server.Route = { + get { + withKernel(kernel) { kernel => + complete { + Source.fromPublisher(kernel.messages) + .map { message => + val event = KernelLogMessage(kernel.id, ZonedDateTime.now(), message) + ServerSentEvent(kernelLogMessageFormat.write(event).toString(), Some("message")) + } + } + } + } + } + + @GET + @PUT + @POST + @DELETE + @Path("/{kernel}/") + @ApiOperation(value = "Invoke a kernel", nickname = "invokeKernel", httpMethod = "GET") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "kernel", value = "Kernel ID", required = true, dataType = "string", paramType = "path") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Successfully invoked kernel"), + new ApiResponse(code = 404, message = "Kernel not found") + )) + def invokeKernel(@ApiParam(hidden = true) kernel:String) : server.Route = { + withKernel(kernel) { kernel => + extractUnmatchedPath { path => + extractRequest { request => + val uri = request.uri.withPath(Uri.Path("/api") ++ path) + val newRequest = request.withUri(uri) + // TODO: Think about redirects etc... + complete(kernel.invoke(newRequest)) + } + } + } + } + + private def withKernel(kernel:String)(fn:KernelService => server.Route) : server.Route = { + kernelManager.getKernel(kernel) match { + case Some(svc) => fn(svc) + case None => complete(HttpResponse(status = StatusCodes.NotFound)) + } + } +} diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/LauncherEndpoint.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/LauncherEndpoint.scala new file mode 100644 index 000000000..354797a5e --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/LauncherEndpoint.scala @@ -0,0 +1,88 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.rest + +import akka.http.scaladsl.model.HttpResponse +import akka.http.scaladsl.model.StatusCodes +import akka.http.scaladsl.server +import io.swagger.annotations.Api +import io.swagger.annotations.ApiImplicitParam +import io.swagger.annotations.ApiImplicitParams +import io.swagger.annotations.ApiOperation +import io.swagger.annotations.ApiParam +import io.swagger.annotations.ApiResponse +import io.swagger.annotations.ApiResponses +import javax.ws.rs.GET +import javax.ws.rs.Path + +import com.dimajix.flowman.studio.model.Converter +import com.dimajix.flowman.studio.model.Kernel +import com.dimajix.flowman.studio.model.LauncherList +import com.dimajix.flowman.studio.service.LauncherManager + + +@Api(value = "launcher", produces = "application/json", consumes = "application/json") +@Path("/launcher") +class LauncherEndpoint(launcherManager: LauncherManager) { + import akka.http.scaladsl.server.Directives._ + + import com.dimajix.flowman.studio.model.JsonSupport._ + + def routes : server.Route = pathPrefix("launcher") {( + pathEndOrSingleSlash { + listLaunchers() + } + ~ + pathPrefix(Segment) { kernel => + pathEndOrSingleSlash { + getLauncher(kernel) + } + } + ) + } + + @GET + @ApiOperation(value = "listLaunchers", nickname = "List all Launchers", httpMethod = "GET") + @ApiResponses(Array( + new ApiResponse(code = 200, message = "List of launchers", response = classOf[LauncherList]) + )) + def listLaunchers() : server.Route = { + get { + val result = LauncherList(launcherManager.list().map(Converter.of)) + complete(result) + } + } + + @GET + @Path("/{launcher}") + @ApiOperation(value = "getLauncher", nickname = "Get a specific Laucnher by ID", httpMethod = "GET") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "kernel", value = "Kernel ID", required = true, dataType = "string", paramType = "path") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Retrieve information about a specific kernel", response = classOf[Kernel]), + new ApiResponse(code = 404, message = "Kernel not found") + )) + private def getLauncher(@ApiParam(hidden = true) launcher:String) : server.Route = { + get { + launcherManager.getLauncher(launcher) match { + case Some(svc) => complete(Converter.of(svc)) + case None => complete(HttpResponse(status = StatusCodes.NotFound)) + } + } + } +} diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/PingEndpoint.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/PingEndpoint.scala new file mode 100644 index 000000000..2ef63dfc2 --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/PingEndpoint.scala @@ -0,0 +1,61 @@ +/* + * Copyright 2019 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.rest + +import akka.http.scaladsl.server +import io.swagger.annotations.Api +import io.swagger.annotations.ApiImplicitParam +import io.swagger.annotations.ApiImplicitParams +import io.swagger.annotations.ApiOperation +import io.swagger.annotations.ApiResponse +import io.swagger.annotations.ApiResponses +import javax.ws.rs.POST +import javax.ws.rs.Path + +import com.dimajix.flowman.studio.model.Status + + +@Api(value = "ping", produces = "application/json", consumes = "application/json") +@Path("/ping") +class PingEndpoint { + import akka.http.scaladsl.server.Directives._ + import com.dimajix.flowman.studio.model.JsonSupport._ + + def routes : server.Route = pathPrefix("ping") { + pathEnd { + ping() + } + } + + @POST + @Path("") + @ApiOperation(value = "Ping", nickname = "ping", httpMethod = "POST") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "ping message", value = "some message to ping", required = true, + dataTypeClass = classOf[String], paramType = "body") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Ping", response = classOf[String]) + )) + private def ping() : server.Route = { + post { + entity(as[String]) { body => + complete(Status("success")) + } + } + } +} diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/RegistryEndpoint.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/RegistryEndpoint.scala new file mode 100644 index 000000000..c7bbbfd1d --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/RegistryEndpoint.scala @@ -0,0 +1,98 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.rest + +import java.net.URL + +import akka.http.scaladsl.model.HttpResponse +import akka.http.scaladsl.model.StatusCodes +import akka.http.scaladsl.server +import io.swagger.annotations.Api +import io.swagger.annotations.ApiImplicitParam +import io.swagger.annotations.ApiImplicitParams +import io.swagger.annotations.ApiOperation +import io.swagger.annotations.ApiParam +import io.swagger.annotations.ApiResponse +import io.swagger.annotations.ApiResponses +import javax.ws.rs.DELETE +import javax.ws.rs.POST +import javax.ws.rs.Path + +import com.dimajix.flowman.studio.model.KernelRegistrationRequest +import com.dimajix.flowman.studio.service.KernelManager +import com.dimajix.flowman.studio.service.KernelService + + +@Api(value = "registry", produces = "application/json", consumes = "application/json") +@Path("/registry") +class RegistryEndpoint(kernelManager:KernelManager) { + import akka.http.scaladsl.server.Directives._ + import com.dimajix.flowman.studio.model.JsonSupport._ + + def routes : server.Route = pathPrefix("registry") {( + pathEndOrSingleSlash {( + registerKernel() + )} + ~ + pathPrefix(Segment) { kernel => ( + pathEndOrSingleSlash {( + removeKernel(kernel) + )} + )} + )} + + @POST + @ApiOperation(value = "Register kernel", nickname = "registerKernel", httpMethod = "POST") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "kernel registration request", value = "Kernel parameters", required = true, + dataTypeClass = classOf[KernelRegistrationRequest], paramType = "body") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Successfully registered kernel"), + new ApiResponse(code = 404, message = "Kernel not found") + )) + private def registerKernel() : server.Route = { + post { + entity(as[KernelRegistrationRequest]) { request => + kernelManager.registerKernel(request.id, new URL(request.url)) + complete("success") + } + } + } + + @DELETE + @Path("/{kernel}") + @ApiOperation(value = "Unregister kernel", nickname = "unregisterKernel", httpMethod = "DELETE") + @ApiImplicitParams(Array( + new ApiImplicitParam(name = "kernel", value = "Kernel ID", required = true, dataType = "string", paramType = "path") + )) + @ApiResponses(Array( + new ApiResponse(code = 200, message = "Successfully removed kernel"), + new ApiResponse(code = 404, message = "Kernel not found") + )) + private def removeKernel(@ApiParam(hidden = true) kernel:String) : server.Route = { + delete { + kernelManager.getKernel(kernel) match { + case None => + complete(HttpResponse(status = StatusCodes.NotFound)) + case Some(kernel) => + kernelManager.unregisterKernel(kernel) + complete("success") + } + } + } +} diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/Server.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/Server.scala new file mode 100644 index 000000000..2f48dcf33 --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/Server.scala @@ -0,0 +1,106 @@ +/* + * Copyright 2019 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.rest + +import scala.concurrent.Await +import scala.concurrent.ExecutionContextExecutor +import scala.concurrent.Promise +import scala.concurrent.duration.Duration + +import akka.Done +import akka.actor.ActorSystem +import akka.http.scaladsl.Http +import akka.http.scaladsl.model.StatusCodes.Found +import akka.http.scaladsl.settings.ServerSettings +import akka.stream.ActorMaterializer +import akka.stream.scaladsl.Sink +import org.slf4j.LoggerFactory + +import com.dimajix.common.net.SocketUtils +import com.dimajix.flowman.studio.Configuration +import com.dimajix.flowman.studio.service.KernelManager +import com.dimajix.flowman.studio.service.LauncherManager +import com.dimajix.flowman.studio.service.LocalLauncher + + +class Server( + conf:Configuration +) { + import akka.http.scaladsl.server.Directives._ + + private val logger = LoggerFactory.getLogger(classOf[Server]) + + implicit private val system: ActorSystem = ActorSystem("flowman") + implicit private val materializer: ActorMaterializer = ActorMaterializer() + implicit private val executionContext: ExecutionContextExecutor = system.dispatcher + + private val launcherManager = new LauncherManager + private val kernelManager = new KernelManager + private val pingEndpoint = new PingEndpoint + private val launcherEndpoint = new LauncherEndpoint(launcherManager) + private val registryEndpoint = new RegistryEndpoint(kernelManager) + private val kernelEndpoint = new KernelEndpoint(kernelManager, launcherManager) + + def run(): Unit = { + val route = ( + pathPrefix("api") {( + pingEndpoint.routes + ~ + registryEndpoint.routes + ~ + launcherEndpoint.routes + ~ + kernelEndpoint.routes + ~ + SwaggerDocEndpoint.routes + )} + ~ + pathPrefix("swagger") {( + pathEndOrSingleSlash { + redirectToTrailingSlashIfMissing(Found) { + getFromResource("swagger/index.html") + } + } + ~ + getFromResourceDirectory("META-INF/resources/webjars/swagger-ui/3.22.2") + )} + ) + + logger.info("Starting Flowman Studio") + + val settings = ServerSettings(system) + .withVerboseErrorMessages(true) + + val server = Http().bind(conf.getBindHost(), conf.getBindPort(), akka.http.scaladsl.ConnectionContext.noEncryption(), settings) + .to(Sink.foreach { connection => + logger.info("Accepted new connection from " + connection.remoteAddress) + connection.handleWith(route) + }) + .run() + + server.foreach { binding => + val listenUrl = SocketUtils.toURL("http", binding.localAddress, allowAny = true) + logger.info(s"Flowman Studio online at $listenUrl") + + val localUrl = SocketUtils.toURL("http", binding.localAddress, allowAny = false) + + launcherManager.addLauncher(new LocalLauncher(localUrl, system)) + } + + Await.ready(Promise[Done].future, Duration.Inf) + } +} diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/SwaggerDocEndpoint.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/SwaggerDocEndpoint.scala new file mode 100644 index 000000000..fd3429030 --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/rest/SwaggerDocEndpoint.scala @@ -0,0 +1,21 @@ +package com.dimajix.flowman.studio.rest + +import com.github.swagger.akka.SwaggerHttpService +import com.github.swagger.akka.model.Info + + +object SwaggerDocEndpoint extends SwaggerHttpService { + override def apiClasses = Set( + classOf[KernelEndpoint], + classOf[PingEndpoint], + classOf[RegistryEndpoint], + classOf[LauncherEndpoint] + ) + override def host = "" + override def basePath: String = "/api/" + override def apiDocsPath: String = "swagger" + override def info = Info(version = "1.0") + // override val externalDocs = Some(new ExternalDocs("Core Docs", "http://acme.com/docs")) + // override def securitySchemeDefinitions = Map("basicAuth" -> new BasicAuthDefinition()) + override val unwantedDefinitions = Seq() +} diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/KernelManager.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/KernelManager.scala new file mode 100644 index 000000000..832446f3e --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/KernelManager.scala @@ -0,0 +1,112 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.service + +import java.net.URL +import java.util.UUID +import java.util.concurrent.TimeUnit + +import scala.collection.mutable +import scala.concurrent.duration.FiniteDuration + +import akka.actor.ActorSystem +import org.slf4j.LoggerFactory + + +final class KernelManager(implicit system:ActorSystem) { + implicit private val ec = system.dispatcher + private val logger = LoggerFactory.getLogger(classOf[KernelManager]) + private val kernels = mutable.ListBuffer[KernelService]() + + system.scheduler.schedule( + FiniteDuration(10, TimeUnit.SECONDS), + FiniteDuration(10, TimeUnit.SECONDS)) { + // Find all IDs of all terminated kernels + val terminatedKernels = list().filter(_.state == KernelState.TERMINATED).map(_.id).toSet + + // Clean up terminated kernels + kernels.synchronized { + val index = kernels.indexWhere(k => terminatedKernels.contains(k.id)) + if (index >= 0) { + kernels.remove(index) + } + } + } + + /** + * Lists all known Kernels + * @return + */ + def list() : Seq[KernelService] = { + val result = mutable.ListBuffer[KernelService]() + kernels.synchronized { + result.append(kernels:_*) + } + result + } + + def getKernel(id:String) : Option[KernelService] = { + var result:Option[KernelService] = None + kernels.synchronized { + result = kernels.find(_.id == id) + } + result + } + + def launchKernel(launcher:Launcher, env:LaunchEnvironment) : KernelService = { + val id = UUID.randomUUID().toString + val secret = UUID.randomUUID().toString + val finalEnv = env.copy(id=id, secret=secret) + + // Launch kernel process + val process = launcher.launch(finalEnv) + val svc = new KernelService(id, secret, process)(system) + + kernels.synchronized { + kernels.append(svc) + } + + svc + } + + /** + * This method is indirectly called by a kernel (via the Studio REST interface) in order to register its URL + * for further communications + * @param id + * @param url + */ + def registerKernel(id:String, url:URL) : Unit = { + val svc = getKernel(id) + svc match { + case Some(svc) => + logger.info(s"Register known kernel $id at $url") + svc.setUrl(url) + case None => + logger.info(s"Register unknown kernel $id at $url") + } + } + + def unregisterKernel(kernel:KernelService) : Unit = { + val id = kernel.id + kernels.synchronized { + val index = kernels.indexWhere(_.id == id) + if (index >= 0) { + kernels.remove(index) + } + } + } +} diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/KernelService.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/KernelService.scala new file mode 100644 index 000000000..0f5f98d61 --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/KernelService.scala @@ -0,0 +1,118 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.service + +import java.net.URL + +import scala.concurrent.ExecutionContextExecutor +import scala.concurrent.Future +import scala.util.Failure +import scala.util.Success + +import akka.actor.ActorSystem +import akka.http.scaladsl.Http +import akka.http.scaladsl.client.RequestBuilding.Post +import akka.http.scaladsl.model.HttpRequest +import akka.http.scaladsl.model.HttpResponse +import akka.http.scaladsl.model.StatusCodes +import akka.http.scaladsl.model.Uri +import org.reactivestreams.Publisher + + +sealed abstract class KernelState +object KernelState { + case object STARTING extends KernelState + case object RUNNING extends KernelState + case object STOPPING extends KernelState + case object TERMINATED extends KernelState +} + +final class KernelService(val id:String, val secret: String, process: Process)(implicit system:ActorSystem) { + private implicit val ec: ExecutionContextExecutor = system.dispatcher + private var _url:Option[URL] = None + + def url : Option[URL] = _url + private[service] def setUrl(url:URL) : Unit = {_url = Some(url)} + + /** + * Returns a publisher for all console messages produced by the process + * @return + */ + def messages : Publisher[String] = process.messages + + /** + * Returns the state of the kernel as known by the service. This may polling the process state and its + * registration state + * @return + */ + def state : KernelState = { + process.state match { + case ProcessState.STARTING => KernelState.STARTING + case ProcessState.RUNNING => + if (url.isEmpty) + KernelState.STARTING + else + KernelState.RUNNING + case ProcessState.STOPPING => KernelState.STOPPING + case ProcessState.TERMINATED => KernelState.TERMINATED + } + } + + /** + * Returns true if the Kernel is still alive + * @return + */ + def isAlive() : Boolean = { + process.state == ProcessState.RUNNING + } + + /** + * Stops the Kernel by issuing a shutdown request to the kernel via its REST interface. + */ + def shutdown() : Unit = { + _url match { + case Some(url) => + val uri = Uri(url.toString).withPath(Uri.Path("/api/shutdown")) + Http().singleRequest(Post(uri)) + .onComplete { + case Success(res) => + println(res) + case Failure(_) => + process.shutdown() + } + case None => + process.shutdown() + } + } + + /** + * Forwards a [[HttpRequest]] to the kernel. This function will replace the host and port by the appropriate + * values, but the path has already to be correct. Note that the resulting [[HttpResponse]] will not be modified, + * which means that a HTTP redirect will contain the kernel host for example. + * @param request + * @return + */ + def invoke(request:HttpRequest) : Future[HttpResponse] = { + _url match { + case Some(url) => + val uri = request.uri.withHost(url.getHost).withPort(url.getPort) + val finalRequest = request.copy(uri=uri) + Http().singleRequest(finalRequest) + case None => Future.successful(HttpResponse(StatusCodes.BadGateway)) + } + } +} diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/LaunchEnvironment.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/LaunchEnvironment.scala new file mode 100644 index 000000000..54694779f --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/LaunchEnvironment.scala @@ -0,0 +1,25 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.service + + +case class LaunchEnvironment( + id:String="", + secret:String="" +) { + +} diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/Launcher.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/Launcher.scala new file mode 100644 index 000000000..a6804b01c --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/Launcher.scala @@ -0,0 +1,24 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.service + + +abstract class Launcher { + def name : String + def description : String + def launch(env:LaunchEnvironment) : Process +} diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/LauncherManager.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/LauncherManager.scala new file mode 100644 index 000000000..7de261ad1 --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/LauncherManager.scala @@ -0,0 +1,64 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.service + +import scala.collection.mutable + + +final class LauncherManager { + private val launchers = mutable.ListBuffer[Launcher]() + + /** + * Returns a list of all available Launchers + * @return + */ + def list() : Seq[Launcher] = { + val result = mutable.ListBuffer[Launcher]() + launchers.synchronized { + result.append(launchers:_*) + } + result + } + + /** + * Retrieves a specific launcher by its name. If no such launcher is known, [[None]] will be returned instead + * @param name + * @return + */ + def getLauncher(name:String) : Option[Launcher] = { + var result:Option[Launcher] = None + launchers.synchronized { + result = launchers.find(_.name == name) + } + result + } + + /** + * Registers a new Launcher. If another launcher with the same name is already registered, it will be replaced + * with the new launcher + * @param launcher + */ + def addLauncher(launcher:Launcher) : Unit = { + launchers.synchronized { + val name = launcher.name + val idx = launchers.indexWhere(_.name == name) + if (idx >= 0) + launchers.remove(idx) + launchers.append(launcher) + } + } +} diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/LocalLauncher.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/LocalLauncher.scala new file mode 100644 index 000000000..1499cb706 --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/LocalLauncher.scala @@ -0,0 +1,57 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.service + +import java.io.File +import java.net.URL + +import akka.actor.ActorSystem +import org.slf4j.LoggerFactory + +import com.dimajix.flowman.common.ToolConfig + + +class LocalLauncher(studioUrl:URL, system:ActorSystem) extends Launcher { + private val logger = LoggerFactory.getLogger(classOf[LocalLauncher]) + + override def name: String = "local" + + override def description: String = "Default local launcher" + + override def launch(env:LaunchEnvironment) : Process = { + val cmd = new File(ToolConfig.homeDirectory.getOrElse(throw new RuntimeException("FLOWMAN_HOME not set")), "bin/flowkernel").toString + val args = Seq( + "--kernel-id", env.id, + "--kernel-secret", env.secret, + "--bind-host", "localhost", + "--bind-port", "0", + "--studio-url", studioUrl.toString + ) + val extraEnv = Seq[(String,String)]() + + logger.info( + s"""Launching local process + | kernel-id: ${env.id} + | kernel-secret: ${env.secret} + | cmd: $cmd + | args: ${args.mkString(" ")} + | extraEnv: ${extraEnv.map(kv => kv._1 + "=" + kv._2).mkString("\n ")}""".stripMargin) + + val builder = sys.process.Process.apply(cmd +: args, None, extraEnv:_*) + new LocalProcess(builder, system) + } +} diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/LocalProcess.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/LocalProcess.scala new file mode 100644 index 000000000..bdf0ef43a --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/LocalProcess.scala @@ -0,0 +1,99 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.service + +import scala.concurrent.ExecutionContext +import scala.concurrent.Future +import scala.sys.process.ProcessBuilder +import scala.sys.process.ProcessLogger +import scala.util.Try + +import akka.actor.ActorSystem +import akka.stream.ActorMaterializer +import akka.stream.OverflowStrategy +import akka.stream.scaladsl.Keep +import akka.stream.scaladsl.Sink +import akka.stream.scaladsl.Source +import org.reactivestreams.Publisher +import org.slf4j.LoggerFactory + + +class LocalProcess(builder:ProcessBuilder, system:ActorSystem) extends Process { + private val logger = LoggerFactory.getLogger(classOf[LocalProcess]) + private implicit val as: ActorSystem = system + private implicit val ec: ExecutionContext = system.dispatcher + private implicit val materializer: ActorMaterializer = ActorMaterializer() + + private var terminated = false + private val (loggerQueue, loggerPublisher) = Source + .queue[String](1000, OverflowStrategy.dropHead) + .toMat(Sink.asPublisher(fanout = true))(Keep.both) + .run() + private val processLogger = new ProcessLogger { + override def out(s: => String): Unit = { + logger.info("stdout: " + s) + loggerQueue.offer(s) + } + override def err(s: => String): Unit = { + logger.info("stderr: " + s) + loggerQueue.offer(s) + } + override def buffer[T](f: => T): T = f + } + private val process = builder.run(processLogger, connectInput = false) + private val exitValue = Future { + val result = process.exitValue() + loggerQueue.complete() + terminated = true + result + } + + /** + * Tries to shutdown the process + */ + override def shutdown(): Unit = { + if (!terminated) { + terminated = true + process.destroy() + } + } + + /** + * Returns the current state of the process + * @return + */ + override def state : ProcessState = { + // We cannot use process.isAlive, since this method is not available in Scala 2.11 + if (exitValue.isCompleted) { + if (!terminated) + ProcessState.STARTING + else + ProcessState.TERMINATED + } else { + if (terminated) + ProcessState.STOPPING + else + ProcessState.RUNNING + } + } + + /** + * Returns a publisher for all console messages produced by the process + * @return + */ + override def messages : Publisher[String] = loggerPublisher +} diff --git a/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/Process.scala b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/Process.scala new file mode 100644 index 000000000..c3c0f1bf1 --- /dev/null +++ b/flowman-studio/src/main/scala/com/dimajix/flowman/studio/service/Process.scala @@ -0,0 +1,48 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.service + +import org.reactivestreams.Publisher + + +sealed abstract class ProcessState +object ProcessState { + case object STARTING extends ProcessState + case object RUNNING extends ProcessState + case object STOPPING extends ProcessState + case object TERMINATED extends ProcessState +} + + +abstract class Process { + /** + * Tries to shutdown the process + */ + def shutdown() : Unit + + /** + * Returns the current state of the process + * @return + */ + def state : ProcessState + + /** + * Returns a publisher for all console messages produced by the process + * @return + */ + def messages : Publisher[String] +} diff --git a/flowman-studio/src/test/scala/com/dimajix/flowman/studio/service/LocalLauncherTest.scala b/flowman-studio/src/test/scala/com/dimajix/flowman/studio/service/LocalLauncherTest.scala new file mode 100644 index 000000000..5f78ececd --- /dev/null +++ b/flowman-studio/src/test/scala/com/dimajix/flowman/studio/service/LocalLauncherTest.scala @@ -0,0 +1,25 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.service + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + + +class LocalLauncherTest extends AnyFlatSpec with Matchers { + +} diff --git a/flowman-studio/src/test/scala/com/dimajix/flowman/studio/service/LocalProcessTest.scala b/flowman-studio/src/test/scala/com/dimajix/flowman/studio/service/LocalProcessTest.scala new file mode 100644 index 000000000..e49cfd8c4 --- /dev/null +++ b/flowman-studio/src/test/scala/com/dimajix/flowman/studio/service/LocalProcessTest.scala @@ -0,0 +1,25 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dimajix.flowman.studio.service + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + + +class LocalProcessTest extends AnyFlatSpec with Matchers { + +} diff --git a/flowman-testing/pom.xml b/flowman-testing/pom.xml index 41067f9c4..e5aa85475 100644 --- a/flowman-testing/pom.xml +++ b/flowman-testing/pom.xml @@ -9,8 +9,8 @@ com.dimajix.flowman flowman-root - 0.16.0 - .. + 0.17.0 + ../pom.xml diff --git a/flowman-testing/src/main/scala/com/dimajix/flowman/testing/Runner.scala b/flowman-testing/src/main/scala/com/dimajix/flowman/testing/Runner.scala index 6b88e799b..7bfe08dba 100644 --- a/flowman-testing/src/main/scala/com/dimajix/flowman/testing/Runner.scala +++ b/flowman-testing/src/main/scala/com/dimajix/flowman/testing/Runner.scala @@ -31,6 +31,7 @@ import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession import org.apache.spark.sql.internal.SQLConf +import com.dimajix.flowman.common.Logging import com.dimajix.flowman.execution.Phase import com.dimajix.flowman.execution.Session import com.dimajix.flowman.execution.Status @@ -53,11 +54,7 @@ object Runner { PropertyConfigurator.configure(url) } - // Adjust Spark logging level - val l = org.apache.log4j.Level.toLevel("WARN") - org.apache.log4j.Logger.getLogger("org").setLevel(l) - org.apache.log4j.Logger.getLogger("akka").setLevel(l) - org.apache.log4j.Logger.getLogger("hive").setLevel(l) + Logging.setSparkLogging("WARN") } setupLogging() @@ -159,12 +156,19 @@ class Runner private( sparkMaster:String, sparkName:String ) { - + /** Temp directory which can be used for storing test data */ val tempDir : File = createTempDir() + /** Hive MetaStore directory inside the [[tempDir]] */ val metastorePath : String = new File(tempDir, "metastore").getCanonicalPath + + /** Hive Warehouse directory inside the [[tempDir]] */ val warehousePath : String = new File(tempDir, "wharehouse").getCanonicalPath + + /** Spark checkpoint directory inside the [[tempDir]] */ val checkpointPath : String = new File(tempDir, "checkpoints").getCanonicalPath + + /** Spark streaming checkpoint directory inside the [[tempDir]] */ val streamingCheckpointPath : String = new File(tempDir, "streamingCheckpoints").getCanonicalPath // Spark override properties @@ -218,7 +222,7 @@ class Runner private( } /** - * Run a single job within the project + * Run the specified phases of a single job within the project with the given arguments * @param jobName * @param args * @return @@ -229,6 +233,13 @@ class Runner private( runJob(job, phases, args) } + /** + * RUn the specified phases of a job with the specified arguments + * @param job + * @param phases + * @param args + * @return + */ def runJob(job:Job, phases:Seq[Phase], args:Map[String,String]) : Boolean = { val runner = session.runner val result = runner.executeJob(job, phases, args, force=true) @@ -240,10 +251,24 @@ class Runner private( } } + /** + * Run the specified phases of a job with the specified arguments + * @param job + * @param phases + * @param args + * @return + */ def runJob(jobName:String, phases:Seq[Phase], args:java.util.Map[String,String]) : Boolean = { runJob(jobName, phases, args.asScala.toMap) } + /** + * Run the specified phases of a job with the specified arguments (Java API) + * @param job + * @param phases + * @param args + * @return + */ def runJob(jobName:String, phases:java.util.List[Phase], args:java.util.Map[String,String]) : Boolean = { runJob(jobName, phases.asScala, args.asScala.toMap) } @@ -261,6 +286,11 @@ class Runner private( runTest(test) } + /** + * Runs an individual test + * @param test + * @return + */ def runTest(test:Test) : Boolean = { val runner = session.runner val result = runner.executeTest(test) @@ -276,10 +306,10 @@ class Runner private( * Runs all non-empty tests in a project. Tests without any assertions will be skipped. * @return */ - def runTests() : Boolean = { + def runTests(parallel:Boolean=false) : Boolean = { val context = session.getContext(project) - project.tests.keys.toSeq.forall { testName => + def run(testName:String) : Boolean = { val test = context.getTest(TestIdentifier(testName)) if (test.assertions.nonEmpty) { runTest(test) @@ -288,6 +318,12 @@ class Runner private( true } } + + val testNames = project.tests.keys.toSeq + if (parallel) + testNames.par.forall(run) + else + testNames.forall(run) } /** diff --git a/flowman-tools/pom.xml b/flowman-tools/pom.xml index cd69ad78a..a27fd37e3 100644 --- a/flowman-tools/pom.xml +++ b/flowman-tools/pom.xml @@ -9,8 +9,8 @@ com.dimajix.flowman flowman-root - 0.16.0 - .. + 0.17.0 + ../pom.xml @@ -37,8 +37,9 @@ runtime flowman-tools.classpath false + / , - $FLOWMAN_HOME/lib + lib json,org.everit.json.schema,velocity-engine-core diff --git a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/StatefulTool.scala b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/StatefulTool.scala index 466c1eb90..34e6d02f9 100644 --- a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/StatefulTool.scala +++ b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/StatefulTool.scala @@ -75,9 +75,11 @@ class StatefulTool( } def leaveJob(): Unit = { - _context = _session.getContext(project) - _session.execution.cleanup() - _job = None + if (_job.nonEmpty) { + _context = _session.getContext(project) + _session.execution.cleanup() + _job = None + } } def enterTest(test: Test): Unit = { @@ -88,8 +90,10 @@ class StatefulTool( } def leaveTest(): Unit = { - _context = _session.getContext(project) - _session.execution.cleanup() - _test = None + if (_test.nonEmpty) { + _context = _session.getContext(project) + _session.execution.cleanup() + _test = None + } } } diff --git a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/Tool.scala b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/Tool.scala index ae7f1b932..a56fb85be 100644 --- a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/Tool.scala +++ b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/Tool.scala @@ -17,13 +17,11 @@ package com.dimajix.flowman.tools import java.io.File -import java.util.Locale import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path -import org.apache.log4j.PropertyConfigurator -import org.slf4j.LoggerFactory +import com.dimajix.flowman.common.ToolConfig import com.dimajix.flowman.config.FlowmanConf import com.dimajix.flowman.execution.Session import com.dimajix.flowman.hadoop.FileSystem @@ -31,13 +29,9 @@ import com.dimajix.flowman.model.Namespace import com.dimajix.flowman.model.Project import com.dimajix.flowman.model.SystemSettings import com.dimajix.flowman.plugin.PluginManager -import com.dimajix.flowman.spec.NamespaceSpec -import com.dimajix.flowman.tools.exec.Driver class Tool { - private val logger = LoggerFactory.getLogger(classOf[Tool]) - // First create PluginManager val plugins:PluginManager = createPluginManager() // Second load global system settings (including plugins for namespaces) diff --git a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/Driver.scala b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/Driver.scala index b87e76a83..bb321504d 100644 --- a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/Driver.scala +++ b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/Driver.scala @@ -25,13 +25,17 @@ import org.kohsuke.args4j.CmdLineException import org.slf4j.LoggerFactory import com.dimajix.flowman.FLOWMAN_VERSION +import com.dimajix.flowman.HADOOP_BUILD_VERSION import com.dimajix.flowman.JAVA_VERSION import com.dimajix.flowman.SPARK_VERSION +import com.dimajix.flowman.HADOOP_VERSION +import com.dimajix.flowman.SPARK_BUILD_VERSION +import com.dimajix.flowman.common.Logging +import com.dimajix.flowman.common.ToolConfig import com.dimajix.flowman.spec.splitSettings -import com.dimajix.flowman.tools.Logging import com.dimajix.flowman.tools.Tool -import com.dimajix.flowman.tools.ToolConfig import com.dimajix.flowman.util.ConsoleColors +import com.dimajix.flowman.util.ConsoleColors.yellow object Driver { @@ -112,7 +116,13 @@ class Driver(options:Arguments) extends Tool { ) val context = session.getContext(project) - logger.info(s"Flowman $FLOWMAN_VERSION using Spark version $SPARK_VERSION and Java version $JAVA_VERSION") + logger.info(s"Flowman $FLOWMAN_VERSION using Spark version $SPARK_VERSION and Hadoop Version $HADOOP_VERSION and Java version $JAVA_VERSION") + if (SPARK_VERSION != SPARK_BUILD_VERSION || HADOOP_VERSION != HADOOP_BUILD_VERSION) { + logger.warn(yellow("Detected Version mismatch between build and execution:")) + logger.warn(yellow(s" Hadoop build version: ${HADOOP_BUILD_VERSION}, Hadoop execution version: ${HADOOP_VERSION}")) + logger.warn(yellow(s" Spark build version: ${SPARK_BUILD_VERSION}, Spark execution version: ${SPARK_VERSION}")) + logger.warn(yellow("It is highly recommended to use matching versions, specifically for Spark.")) + } val result = options.command.execute(session, project, context) session.shutdown() diff --git a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/VersionCommand.scala b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/VersionCommand.scala index 3a9e68947..74298768a 100644 --- a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/VersionCommand.scala +++ b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/VersionCommand.scala @@ -19,10 +19,10 @@ package com.dimajix.flowman.tools.exec import com.dimajix.flowman.FLOWMAN_VERSION import com.dimajix.flowman.JAVA_VERSION import com.dimajix.flowman.SPARK_VERSION +import com.dimajix.flowman.common.ToolConfig import com.dimajix.flowman.execution.Context import com.dimajix.flowman.execution.Session import com.dimajix.flowman.model.Project -import com.dimajix.flowman.tools.ToolConfig class VersionCommand extends Command { diff --git a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/info/InfoCommand.scala b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/info/InfoCommand.scala index 7c597eb52..d14fa8ed4 100644 --- a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/info/InfoCommand.scala +++ b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/info/InfoCommand.scala @@ -18,10 +18,10 @@ package com.dimajix.flowman.tools.exec.info import scala.collection.JavaConverters._ +import com.dimajix.flowman.common.ToolConfig import com.dimajix.flowman.execution.Context import com.dimajix.flowman.execution.Session import com.dimajix.flowman.model.Project -import com.dimajix.flowman.tools.ToolConfig import com.dimajix.flowman.tools.exec.Command diff --git a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/mapping/ShowCommand.scala b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/mapping/ShowCommand.scala index 656ce9991..a83a027ee 100644 --- a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/mapping/ShowCommand.scala +++ b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/mapping/ShowCommand.scala @@ -25,6 +25,7 @@ import org.kohsuke.args4j.Argument import org.kohsuke.args4j.Option import org.slf4j.LoggerFactory +import com.dimajix.flowman.common.ParserUtils import com.dimajix.flowman.execution.Context import com.dimajix.flowman.execution.NoSuchMappingException import com.dimajix.flowman.execution.Phase @@ -32,7 +33,6 @@ import com.dimajix.flowman.execution.Session import com.dimajix.flowman.model.MappingOutputIdentifier import com.dimajix.flowman.model.Project import com.dimajix.flowman.spec.target.ConsoleTarget -import com.dimajix.flowman.tools.ParserUtils import com.dimajix.flowman.tools.exec.Command diff --git a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/model/PhaseCommand.scala b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/model/PhaseCommand.scala index 42ce0ed6d..801622e28 100644 --- a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/model/PhaseCommand.scala +++ b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/model/PhaseCommand.scala @@ -20,6 +20,7 @@ import org.kohsuke.args4j.Argument import org.kohsuke.args4j.Option import org.slf4j.LoggerFactory +import com.dimajix.flowman.common.ParserUtils import com.dimajix.flowman.execution.Context import com.dimajix.flowman.execution.Phase import com.dimajix.flowman.execution.ScopeContext @@ -29,7 +30,6 @@ import com.dimajix.flowman.model.Job import com.dimajix.flowman.model.Project import com.dimajix.flowman.model.TargetIdentifier import com.dimajix.flowman.spec.target.RelationTargetSpec -import com.dimajix.flowman.tools.ParserUtils import com.dimajix.flowman.tools.exec.Command diff --git a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/model/ShowCommand.scala b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/model/ShowCommand.scala index e283b682a..db9e4752a 100644 --- a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/model/ShowCommand.scala +++ b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/exec/model/ShowCommand.scala @@ -25,13 +25,13 @@ import org.kohsuke.args4j.Argument import org.kohsuke.args4j.Option import org.slf4j.LoggerFactory +import com.dimajix.flowman.common.ParserUtils import com.dimajix.flowman.execution.Context import com.dimajix.flowman.execution.Phase import com.dimajix.flowman.execution.Session import com.dimajix.flowman.model.Project import com.dimajix.flowman.model.RelationIdentifier import com.dimajix.flowman.spec.target.ConsoleTarget -import com.dimajix.flowman.tools.ParserUtils import com.dimajix.flowman.tools.exec.Command import com.dimajix.flowman.types.SingleValue diff --git a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/main/Driver.scala b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/main/Driver.scala index f0bbd2a5a..d8fac49f6 100644 --- a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/main/Driver.scala +++ b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/main/Driver.scala @@ -22,8 +22,8 @@ import scala.util.Try import org.apache.hadoop.fs.Path +import com.dimajix.flowman.common.Logging import com.dimajix.flowman.spec.splitSettings -import com.dimajix.flowman.tools.Logging import com.dimajix.flowman.tools.Tool diff --git a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/shell/ExitCommand.scala b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/shell/ExitCommand.scala index d13275fec..6e0fafdd6 100644 --- a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/shell/ExitCommand.scala +++ b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/shell/ExitCommand.scala @@ -1,3 +1,19 @@ +/* + * Copyright 2021 Kaya Kupferschmidt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.dimajix.flowman.tools.shell import com.dimajix.flowman.execution.Context @@ -8,6 +24,7 @@ import com.dimajix.flowman.tools.exec.Command class ExitCommand extends Command { override def execute(session: Session, project: Project, context: Context): Boolean = { + session.shutdown() System.exit(0) true } diff --git a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/shell/Shell.scala b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/shell/Shell.scala index 72622a0f9..84a923898 100644 --- a/flowman-tools/src/main/scala/com/dimajix/flowman/tools/shell/Shell.scala +++ b/flowman-tools/src/main/scala/com/dimajix/flowman/tools/shell/Shell.scala @@ -28,20 +28,23 @@ import dev.dirs.ProjectDirectories import org.apache.hadoop.fs.Path import org.jline.reader.LineReader import org.jline.reader.LineReaderBuilder -import org.jline.reader.impl.DefaultParser import org.jline.reader.impl.history.DefaultHistory +import org.jline.terminal.Terminal import org.jline.terminal.TerminalBuilder import org.kohsuke.args4j.CmdLineException import org.kohsuke.args4j.CmdLineParser import com.dimajix.flowman.FLOWMAN_VERSION +import com.dimajix.flowman.HADOOP_BUILD_VERSION +import com.dimajix.flowman.HADOOP_VERSION import com.dimajix.flowman.JAVA_VERSION +import com.dimajix.flowman.SPARK_BUILD_VERSION import com.dimajix.flowman.SPARK_VERSION +import com.dimajix.flowman.common.Logging +import com.dimajix.flowman.common.ToolConfig import com.dimajix.flowman.spec.splitSettings -import com.dimajix.flowman.tools.Logging import com.dimajix.flowman.tools.StatefulTool -import com.dimajix.flowman.tools.ToolConfig -import com.dimajix.flowman.util.withShutdownHook +import com.dimajix.flowman.util.ConsoleColors.yellow object Shell { @@ -103,15 +106,16 @@ class Shell(args:Arguments) extends StatefulTool( args.sparkMaster, args.sparkName ) { - val historyFile = new File( + private val historyFile = new File( ProjectDirectories.from("com", "dimajix", "Flowman").dataDir, "shell-history") + /** * Main method for running this command * @return */ def run() : Boolean = { - val terminal = TerminalBuilder.builder() + val terminal:Terminal = TerminalBuilder.builder() .build() val console = LineReaderBuilder.builder() .appName("Flowman") @@ -126,6 +130,14 @@ class Shell(args:Arguments) extends StatefulTool( .build() val writer = terminal.writer() + val signalHandler = new Terminal.SignalHandler { + override def handle(signal: Terminal.Signal): Unit = { + writer.println("Aborting all Spark jobs on user request") + session.spark.sparkContext.cancelAllJobs() + } + } + //terminal.handle(Signal.INT, signalHandler) + console.getHistory.load() Runtime.getRuntime.addShutdownHook(new Thread() { override def run() : Unit = console.getHistory.save() }) @@ -139,8 +151,14 @@ class Shell(args:Arguments) extends StatefulTool( writer.println("\nWelcome to") writer.println(s"$logo $FLOWMAN_VERSION\n") - writer.println(s"Using Spark version $SPARK_VERSION and Java version $JAVA_VERSION\n") - writer.println("Type in 'help' for getting help") + writer.println(s"Using Spark version $SPARK_VERSION and Hadoop version $HADOOP_VERSION and Java version $JAVA_VERSION") + if (SPARK_VERSION != SPARK_BUILD_VERSION || HADOOP_VERSION != HADOOP_BUILD_VERSION) { + writer.println(yellow("Detected Version mismatch between build and execution:")) + writer.println(yellow(s" Hadoop build version: ${HADOOP_BUILD_VERSION}, Hadoop execution version: ${HADOOP_VERSION}")) + writer.println(yellow(s" Spark build version: ${SPARK_BUILD_VERSION}, Spark execution version: ${SPARK_VERSION}")) + writer.println(yellow("It is highly recommended to use matching versions, specifically for Spark.")) + } + writer.println("\nType in 'help' for getting help") // REPL-loop while (true) { diff --git a/licenses/LICENSE-mssql-jdbc.txt b/licenses/LICENSE-mssql-jdbc.txt new file mode 100644 index 000000000..1482bf63b --- /dev/null +++ b/licenses/LICENSE-mssql-jdbc.txt @@ -0,0 +1,14 @@ +Copyright(c) 2020 Microsoft Corporation +All rights reserved. + +MIT License +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files(the "Software"), +to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, +and / or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions : + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. diff --git a/pom.xml b/pom.xml index 2a22d8d48..d57e3457a 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ 4.0.0 com.dimajix.flowman flowman-root - 0.16.0 + 0.17.0 pom Flowman root pom A Spark based ETL tool @@ -56,7 +56,6 @@ 2.33 2.4.0 1.9.13 - 2.3 4.0.0 10.12.1.1 1.2.17 @@ -78,29 +77,29 @@ 1.6 3.9.9.Final - + 3.2.0 3.2 1.9.3 1.11 - + 2.12.10 2.12 - 3.0.8 - 3.0 + 3.2.5 + 3.2 1.1.2 - 3.0.2 - 3.0 + 3.1.1 + 3.1 compile - 4.1.47.Final + 4.1.51.Final 2.10.0 2.10.0 2.8 - 2.4.1 - 3.4.14 + 2.6.0 + 3.5.7 1.8.2 - 3.6.6 + 3.7.0-M5 2.3 14.0.1 1.7.30 @@ -138,58 +137,6 @@ - - CDH-5.15 - - - cloudera - https://repository.cloudera.com/artifactory/cloudera-repos/ - - - - cdh5 - cdh5.15.1 - 2.11.12 - 2.11 - 3.0.8 - 3.0 - 1.0.4 - 2.3.0.cloudera3 - provided - 2.3 - 2.6.0-${cdh.version} - 2.6 - 1.1.0-${cdh.version} - 1.2.0-${cdh.version} - 0.10.0-kafka-2.1.0 - 3.4.5-${cdh.version} - 1.7.6-${cdh.version} - 12.0.1 - 3.2.11 - 1.1.1 - 1.1.7.3 - 4.1.42.Final - 2.6.7 - 2.6.7.1 - 4.5.4 - 4.4.7 - 3.1.5 - 3.2.1 - 1.4.1 - 3.5 - 1.7.5 - - - - - org.mortbay.jetty - jetty-sslengine - 6.1.26.cloudera.4 - - - - - CDH-6.3 @@ -243,36 +190,6 @@ - - spark-2.3 - - 2.11.8 - 2.11 - 3.0.8 - 3.0 - 1.0.4 - 2.3.3 - 2.3 - 1.1.7.3 - 4.1.17.Final - 2.6.7 - 2.6.7.1 - 2.6 - 1.7.7 - 0.10.0.1 - 3.4.6 - 3.2.11 - 1.1.1 - 12.0 - 1.7.16 - 4.5.4 - 4.4.7 - 3.1.5 - 3.2.2 - 1.4.1 - 3.5 - - spark-2.4 @@ -281,7 +198,7 @@ 3.0.8 3.0 1.1.0 - 2.4.7 + 2.4.8 2.4 1.1.7.3 4.1.47.Final @@ -424,6 +341,7 @@ flowman-scalatest-compat flowman-spark-testing flowman-spark-extensions + flowman-common flowman-core flowman-spec flowman-dsl @@ -434,14 +352,18 @@ flowman-plugins/impala flowman-plugins/mariadb flowman-plugins/mysql + flowman-plugins/mssqlserver flowman-plugins/swagger flowman-plugins/json flowman-testing flowman-tools - flowman-ui + flowman-server-ui flowman-server - flowman-dist + flowman-kernel + flowman-studio-ui + flowman-studio flowman-parent + flowman-dist docker @@ -607,7 +529,7 @@ true net.alchim31.maven scala-maven-plugin - 4.4.0 + 4.5.1 ${scala.version} ${scala.api_version} @@ -629,17 +551,9 @@ - scala-compile-first - process-resources add-source compile - - - - scala-test-compile - process-test-resources - testCompile @@ -886,6 +800,11 @@ flowman-spark-extensions ${project.version}
+ + com.dimajix.flowman + flowman-common + ${project.version} + com.dimajix.flowman flowman-core @@ -914,7 +833,7 @@ com.dimajix.flowman - flowman-ui + flowman-server-ui ${project.version} @@ -922,6 +841,21 @@ flowman-server ${project.version} + + com.dimajix.flowman + flowman-kernel + ${project.version} + + + com.dimajix.flowman + flowman-studio + ${project.version} + + + com.dimajix.flowman + flowman-studio-ui + ${project.version} + com.dimajix.flowman flowman-plugin-aws @@ -1099,13 +1033,6 @@ test - - com.databricks - spark-avro_${scala.api_version} - ${spark-avro.version} - compile - - org.xerial.snappy snappy-java @@ -1297,13 +1224,6 @@ provided - - net.sf.opencsv - opencsv - ${opencsv.version} - provided - - org.codehaus.jackson jackson-mapper-asl @@ -1316,7 +1236,6 @@ ${jackson_asl.version} provided - org.codehaus.jackson jackson-jaxrs @@ -1555,6 +1474,22 @@ provided + + com.typesafe.akka + akka-http_${scala.api_version} + 10.1.14 + + + com.typesafe.akka + akka-http-spray-json_${scala.api_version} + 10.1.14 + + + com.github.swagger-akka-http + swagger-akka-http_${scala.api_version} + 1.1.2 + + io.swagger swagger-parser @@ -1579,6 +1514,11 @@ 1.6.0 compile + + org.webjars + swagger-ui + 3.22.2 + com.google.re2j