Skip to content

Commit

Permalink
Merge pull request #52 from dacort/feature/036_support
Browse files Browse the repository at this point in the history
0.36 support
  • Loading branch information
dacort authored Aug 2, 2020
2 parents 9e4c872 + 3240b95 commit e40c453
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 33 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM metabase/metabase:v0.35.2
FROM metabase/metabase:v0.36.2

# A metabase user/group is manually added in https://github.com/metabase/metabase/blob/master/bin/docker/run_metabase.sh
# Make the UID and GID match
ADD --chown=2000:2000 \
https://github.com/dacort/metabase-athena-driver/releases/download/v1.0.0/athena.metabase-driver.jar \
./target/uberjar/athena.metabase-driver.jar \
/plugins/athena.metabase-driver.jar
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.

## Contributing

### Prerequisites

- [Leiningen](https://leiningen.org/)
- [Install metabase-core](https://github.com/metabase/metabase/wiki/Writing-a-Driver:-Packaging-a-Driver-&-Metabase-Plugin-Basics#installing-metabase-core-locally)

### Build from source

I'm not familiar enough with `lein` to know if there is a better way to include a jar from a static URL, so for the time being we download it manually.
Expand Down
35 changes: 16 additions & 19 deletions src/metabase/driver/athena.clj
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
(:refer-clojure :exclude [second])
(:require [metabase.driver.schema-parser :as schema-parser]
[clojure.java.jdbc :as jdbc]
[clojure.string :as str]
[clojure.string :as string]
[clojure.tools.logging :as log]
[clojure.set :as set]
[medley.core :as m]
[metabase.query-processor]
[metabase.models
[field :as field :refer [Field]]]
Expand All @@ -13,21 +14,17 @@
[core :as hsql]]
[java-time :as t]
[metabase.driver :as driver]
[metabase.driver.common :as driver.common]
[metabase.driver.sql-jdbc
[common :as sql-jdbc.common]
[connection :as sql-jdbc.conn]
[execute :as sql-jdbc.execute]
[sync :as sql-jdbc.sync]]
[metabase.driver.sql-jdbc.execute.legacy-impl :as legacy]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.driver.sql.util.unprepare :as unprepare]
[metabase.util
[date-2 :as u.date]
[honeysql-extensions :as hx]
[i18n :refer [trs]]]
[metabase.util :as u]
[clojure.string :as string])
[metabase.util :as u])
(:import [java.sql DatabaseMetaData Timestamp]
(java.time OffsetDateTime ZonedDateTime)))

Expand All @@ -52,7 +49,7 @@
"Returns the endpoint URL for a specific region"
[region]
(cond
(str/starts-with? region "cn-") ".amazonaws.com.cn"
(string/starts-with? region "cn-") ".amazonaws.com.cn"
:else ".amazonaws.com"))

(defmethod sql-jdbc.conn/connection-details->spec :athena [_ {:keys [region access_key secret_key s3_staging_dir workgroup db], :as details}]
Expand All @@ -66,8 +63,8 @@
:workgroup workgroup
:AwsRegion region
; :LogLevel 6
}
(when (str/blank? access_key)
}
(when (string/blank? access_key)
{:AwsCredentialsProviderClass "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain"})
(dissoc details :db))
(sql-jdbc.common/handle-additional-options details, :seperator-style :semicolon)))
Expand Down Expand Up @@ -193,20 +190,22 @@
(defn sync-table-with-nested-field [database schema table-name]
(->> (run-query database (str "DESCRIBE `" schema "`.`" table-name "`;"))
(remove-invalid-columns)
(map-indexed #(merge %2 {:database-position %1}))
(map schema-parser/parse-schema)
(doall)
(set)))

(defn sync-table-without-nested-field [driver columns]
(set
(for [{database-type :type_name
column-name :column_name
remarks :remarks} columns]
(for [[idx {database-type :type_name
column-name :column_name
remarks :remarks}] (m/indexed columns)]
(merge
{:name column-name
:database-type database-type
:base-type (database-type->base-type-or-warn driver database-type)}
(when (not (str/blank? remarks))
{:name column-name
:database-type database-type
:base-type (database-type->base-type-or-warn driver database-type)
:database-position idx}
(when (not (string/blank? remarks))
{:field-comment remarks})))))
;; Not all tables in the Data Catalog are guaranted to be compatible with Athena
;; If an exception is thrown, log and throw an error
Expand Down Expand Up @@ -238,8 +237,6 @@
(catch Throwable e (set nil)))))))

;; EXTERNAL_TABLE is required for Athena


(defn- get-tables [^DatabaseMetaData metadata, ^String schema-or-nil, ^String db-name-or-nil]
;; tablePattern "%" = match all tables
(with-open [rs (.getTables metadata db-name-or-nil schema-or-nil "%"
Expand All @@ -256,7 +253,7 @@
(let [remarks (:remarks table)]
{:name (:table_name table)
:schema schema
:description (when-not (str/blank? remarks)
:description (when-not (string/blank? remarks)
remarks)}))))))

;; You may want to exclude a specific database - this can be done here
Expand Down
22 changes: 12 additions & 10 deletions src/metabase/driver/schema_parser.clj
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,32 @@
(defn- column->base-type [column-type]
(sql-jdbc.sync/database-type->base-type :athena (keyword (re-find #"\w+" column-type))))

(defn- create-nested-fields [schema]
(defn- create-nested-fields [schema database-position]
(set (map (fn [[k v]]
(let [root {:name (name k)
:base-type (cond (map? v) :type/Dictionary
(sequential? v) :type/Array
:else (column->base-type v))
:database-type (cond (map? v) "map"
(sequential? v) "array"
:else v)}]
:else v)
:database-position database-position}]
(cond
(map? v) (assoc root :nested-fields (create-nested-fields v))
(map? v) (assoc root :nested-fields (create-nested-fields v database-position))
:else root)))
schema)))

(defn- parse-struct-type-field [field-info]
(defn- parse-struct-type-field [field-info database-position]
(let [root-field-name (:name field-info)
schema (hsp/hive-schema->map (:type field-info))]
{:name root-field-name
:base-type :type/Dictionary
:database-type "struct"
:nested-fields (create-nested-fields schema)}))
:database-position database-position
:nested-fields (create-nested-fields schema database-position)}))

(defn- parse-array-type-field [field-info]
{:name (:name field-info) :base-type :type/Array :database-type "array"})
(defn- parse-array-type-field [field-info database-position]
{:name (:name field-info) :base-type :type/Array :database-type "array" :database-position database-position})

(defn- is-struct-type-field? [field-info]
(clojure.string/starts-with? (:type field-info) "struct"))
Expand All @@ -41,6 +43,6 @@
[field-info]
(cond
; :TODO Should we also validate maps?
(is-struct-type-field? field-info) (parse-struct-type-field field-info)
(is-array-type-field? field-info) (parse-array-type-field field-info)
:else {:name (:name field-info) :base-type (column->base-type (:type field-info)) :database-type (:type field-info)}))
(is-struct-type-field? field-info) (parse-struct-type-field field-info (:database-position field-info))
(is-array-type-field? field-info) (parse-array-type-field field-info (:database-position field-info))
:else {:name (:name field-info) :base-type (column->base-type (:type field-info)) :database-type (:type field-info) :database-position (:database-position field-info)}))
31 changes: 29 additions & 2 deletions test_unit/metabase/driver/athena_test.clj
Original file line number Diff line number Diff line change
@@ -1,6 +1,33 @@
(ns metabase.driver.athena-test
(:require [clojure.test :refer :all]
[metabase.driver.athena :refer [endpoint-for-region]]))
[metabase.driver.athena :refer [endpoint-for-region sync-table-with-nested-field sync-table-without-nested-field]]))

(def nested_schema_str
"key int from deserializer
data struct<name:string> from deserializer")

(def nested_schema
[{:col_name "key", :data_type "int"}
{:col_name "data", :data_type "struct<name:string>"}])

(def flat_schema_columns
[{:column_name "id", :type_name "string"}
{:column_name "ts", :type_name "string"}])

(deftest syncer
(testing "sync with nested fields"
(with-redefs [metabase.driver.athena/run-query (fn [_ _] nested_schema)]
(is (=
#{{:name "key", :base-type :type/Integer, :database-type "int", :database-position 0}
{:name "data", :base-type :type/Dictionary, :database-type "struct"
:nested-fields #{{:name "name", :base-type :type/Text, :database-type "string", :database-position 1}}, :database-position 1}}
(sync-table-with-nested-field "test" "test" "test")))))

(testing "sync without nested fields"
(is (=
#{{:name "id", :base-type :type/Text, :database-type "string", :database-position 0}
{:name "ts", :base-type :type/Text, :database-type "string", :database-position 1}}
(sync-table-without-nested-field :athena flat_schema_columns)))))

(deftest endpoint
(testing "AWS Endpoint URL"
Expand All @@ -18,4 +45,4 @@

(is (=
".amazonaws.com.cn"
(endpoint-for-region "cn-northwest-1")))))
(endpoint-for-region "cn-northwest-1")))))

0 comments on commit e40c453

Please sign in to comment.