From 30954d075ee6eeb4533d4f0d5adde4d00ba841bd Mon Sep 17 00:00:00 2001 From: fivetran-joemarkiewicz Date: Fri, 29 Jan 2021 15:57:41 -0600 Subject: [PATCH 1/5] dbt v0.19.0 update --- dbt_project.yml | 4 ++-- integration_tests/requirements.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dbt_project.yml b/dbt_project.yml index dd191b2..c0a5353 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,9 +1,9 @@ config-version: 2 name: 'github_source' -version: '0.2.0' +version: '0.2.1' -require-dbt-version: [">=0.18.0", "<0.19.0"] +require-dbt-version: [">=0.18.0", "<0.20.0"] models: github_source: diff --git a/integration_tests/requirements.txt b/integration_tests/requirements.txt index bb7a973..38e8ae6 100644 --- a/integration_tests/requirements.txt +++ b/integration_tests/requirements.txt @@ -1 +1 @@ -dbt==0.18.0 \ No newline at end of file +dbt==0.19.0 \ No newline at end of file From 2434b4afbb67932149e0bde24461ba8749035685 Mon Sep 17 00:00:00 2001 From: fivetran-joemarkiewicz Date: Mon, 1 Feb 2021 16:40:35 -0600 Subject: [PATCH 2/5] passthrough removal and team/repo_team add --- README.md | 2 +- dbt_project.yml | 11 +---- macros/get_repo_team_columns.sql | 12 ++++++ macros/get_team_columns.sql | 16 ++++++++ macros/get_user_columns.sql | 11 +---- models/src_github.yml | 24 +++++++++++ models/stg_github.yml | 33 ++++++++++++++- models/stg_github__issue.sql | 13 ------ models/stg_github__issue_closed_history.sql | 15 +------ models/stg_github__issue_comment.sql | 15 +------ models/stg_github__issue_merged.sql | 15 +------ models/stg_github__pull_request.sql | 15 +------ models/stg_github__pull_request_review.sql | 15 +------ models/stg_github__repo_team.sql | 36 ++++++++++++++++ models/stg_github__repository.sql | 13 ------ ...stg_github__requested_reviewer_history.sql | 15 +------ models/stg_github__team.sql | 41 +++++++++++++++++++ models/stg_github__user.sql | 17 +------- models/tmp/stg_github__repo_team_tmp.sql | 2 + models/tmp/stg_github__team_tmp.sql | 2 + 20 files changed, 177 insertions(+), 146 deletions(-) create mode 100644 macros/get_repo_team_columns.sql create mode 100644 macros/get_team_columns.sql create mode 100644 models/stg_github__repo_team.sql create mode 100644 models/stg_github__team.sql create mode 100644 models/tmp/stg_github__repo_team_tmp.sql create mode 100644 models/tmp/stg_github__team_tmp.sql diff --git a/README.md b/README.md index 4eab8c4..ad5207d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# GitHub ([docs](https://dbt-github-source.netlify.app/)) +# GitHub Source This package models GitHub data from [Fivetran's connector](https://fivetran.com/docs/applications/GitHub). It uses data in the format described by [this ERD](https://docs.google.com/presentation/d/1lx6ez7-x-s-n2JCnCi3SjG4XMmx9ysNUvaNCaWc3I_I/edit). diff --git a/dbt_project.yml b/dbt_project.yml index c0a5353..51e75c4 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -21,15 +21,8 @@ vars: issue: "{{ source('github', 'issue') }}" pull_request_review: "{{ source('github', 'pull_request_review') }}" pull_request: "{{ source('github', 'pull_request') }}" + repo_team: "{{ source('github', 'repo_team') }}" repository: "{{ source('github', 'repository') }}" requested_reviewer_history: "{{ source('github', 'requested_reviewer_history') }}" + team: "{{ source('github', 'team') }}" user: "{{ source('github', 'user') }}" - issue_closed_history_pass_through_columns: [] - issue_comment_pass_through_columns: [] - issue_merged_pass_through_columns: [] - issue_pass_through_columns: [] - pull_request_review_pass_through_columns: [] - pull_request_pass_through_columns: [] - repository_pass_through_columns: [] - requested_reviewer_history_pass_through_columns: [] - user_pass_through_columns: [] diff --git a/macros/get_repo_team_columns.sql b/macros/get_repo_team_columns.sql new file mode 100644 index 0000000..438f08b --- /dev/null +++ b/macros/get_repo_team_columns.sql @@ -0,0 +1,12 @@ +{% macro get_repo_team_columns() %} + +{% set columns = [ + {"name": "_fivetran_synced", "datatype": dbt_utils.type_timestamp()}, + {"name": "permission", "datatype": dbt_utils.type_string()}, + {"name": "repository_id", "datatype": dbt_utils.type_int()}, + {"name": "team_id", "datatype": dbt_utils.type_int()} +] %} + +{{ return(columns) }} + +{% endmacro %} diff --git a/macros/get_team_columns.sql b/macros/get_team_columns.sql new file mode 100644 index 0000000..2f16426 --- /dev/null +++ b/macros/get_team_columns.sql @@ -0,0 +1,16 @@ +{% macro get_team_columns() %} + +{% set columns = [ + {"name": "_fivetran_synced", "datatype": dbt_utils.type_timestamp()}, + {"name": "description", "datatype": dbt_utils.type_string()}, + {"name": "id", "datatype": dbt_utils.type_int()}, + {"name": "name", "datatype": dbt_utils.type_string()}, + {"name": "org_id", "datatype": dbt_utils.type_int()}, + {"name": "parent_id", "datatype": dbt_utils.type_int()}, + {"name": "privacy", "datatype": dbt_utils.type_string()}, + {"name": "slug", "datatype": dbt_utils.type_string()} +] %} + +{{ return(columns) }} + +{% endmacro %} diff --git a/macros/get_user_columns.sql b/macros/get_user_columns.sql index 9dd2e56..f7d2291 100644 --- a/macros/get_user_columns.sql +++ b/macros/get_user_columns.sql @@ -1,19 +1,10 @@ {% macro get_user_columns() %} {% set columns = [ - {"name": "_fivetran_synced", "datatype": dbt_utils.type_timestamp()}, - {"name": "bio", "datatype": dbt_utils.type_string()}, - {"name": "blog", "datatype": dbt_utils.type_string()}, {"name": "company", "datatype": dbt_utils.type_string()}, - {"name": "created_at", "datatype": dbt_utils.type_timestamp()}, - {"name": "hireable", "datatype": "boolean"}, {"name": "id", "datatype": dbt_utils.type_int()}, - {"name": "location", "datatype": dbt_utils.type_string()}, {"name": "login", "datatype": dbt_utils.type_string()}, - {"name": "name", "datatype": dbt_utils.type_string()}, - {"name": "site_admin", "datatype": "boolean"}, - {"name": "type", "datatype": dbt_utils.type_string()}, - {"name": "updated_at", "datatype": dbt_utils.type_timestamp()} + {"name": "name", "datatype": dbt_utils.type_string()} ] %} {{ return(columns) }} diff --git a/models/src_github.yml b/models/src_github.yml index 63232ef..73a6adf 100644 --- a/models/src_github.yml +++ b/models/src_github.yml @@ -117,6 +117,14 @@ sources: description: Foreign key that references the repository table, referencing the current branch. - name: head_user_id description: Foreign key that references the user table, referencing who created the current branch. + + - name: repo_team + description: Table representing the mapping relationships between repositiories and teams + columns: + - name: repository_id + description: Reference to the respective repository for the record. + - name: team_id + description: Reference to the respective team for the record. - name: repository description: Table representing the file structure of a directory under git control @@ -143,6 +151,22 @@ sources: description: Foreign key that references the user table, representing the user that was requested to review a PR. - name: removed description: Boolean variable indicating if the requester was removed from the PR (true) or added to the PR (false). + + - name: team + description: Table containing teams and team details + columns: + - name: id + description: System generated unique id for the team. + - name: description + description: User created description of the team. + - name: name + description: User created name of the team. + - name: parent_id + description: Reference to the parent team. + - name: privacy + description: Type of privacy permissions associated with the team. + - name: slug + description: Url friendly version of the team name. - name: user description: Table representing contributors to a git project diff --git a/models/stg_github.yml b/models/stg_github.yml index 040da9c..6ffb7e9 100644 --- a/models/stg_github.yml +++ b/models/stg_github.yml @@ -118,6 +118,18 @@ models: description: Foreign key that references the repository table, referencing the current branch. - name: head_user_id description: Foreign key that references the user table, referencing who created the current branch. + + - name: stg_github__repo_team + description: Table representing the mapping relationships between repositiories and teams + columns: + - name: repository_id + description: Reference to the respective repository for the record. + tests: + - not_null + - name: team_id + description: Reference to the respective team for the record. + tests: + - not_null - name: stg_github__repository description: Table representing the file structure of a directory under git control @@ -144,7 +156,26 @@ models: description: Foreign key that references the user table, representing the user that was requested to review a PR. - name: removed description: Boolean variable indicating if the requester was removed from the PR (true) or added to the PR (false). - + + - name: stg_github__team + description: Table containing teams and team details + columns: + - name: team_id + description: System generated unique id for the team. + tests: + - unique + - not_null + - name: description + description: User created description of the team. + - name: name + description: User created name of the team. + - name: parent_id + description: Reference to the parent team. + - name: privacy + description: Type of privacy permissions associated with the team. + - name: slug + description: Url friendly version of the team name. + - name: stg_github__user description: Table representing contributors to a git project freshness: null diff --git a/models/stg_github__issue.sql b/models/stg_github__issue.sql index fee7256..b17b064 100644 --- a/models/stg_github__issue.sql +++ b/models/stg_github__issue.sql @@ -19,12 +19,6 @@ with issue as ( ) }} - --The below script allows for pass through columns. - {% if var('issue_pass_through_columns') %} - , - {{ var('issue_pass_through_columns') | join (", ")}} - - {% endif %} from issue ), fields as ( @@ -43,13 +37,6 @@ with issue as ( title, updated_at, user_id - - --The below script allows for pass through columns. - {% if var('issue_pass_through_columns') %} - , - {{ var('issue_pass_through_columns') | join (", ")}} - - {% endif %} from macro ) diff --git a/models/stg_github__issue_closed_history.sql b/models/stg_github__issue_closed_history.sql index 434ad36..1b20c2e 100644 --- a/models/stg_github__issue_closed_history.sql +++ b/models/stg_github__issue_closed_history.sql @@ -12,19 +12,13 @@ with issue_closed_history as ( For more information refer to our dbt_fivetran_utils documentation (https://github.com/fivetran/dbt_fivetran_utils.git). */ - {{ + {{ fivetran_utils.fill_staging_columns( source_columns=adapter.get_columns_in_relation(ref('stg_github__issue_closed_history_tmp')), staging_columns=get_issue_closed_history_columns() ) }} - --The below script allows for pass through columns. - {% if var('issue_closed_history_pass_through_columns') %} - , - {{ var('issue_closed_history_pass_through_columns') | join (", ")}} - - {% endif %} from issue_closed_history ), fields as ( @@ -34,13 +28,6 @@ with issue_closed_history as ( updated_at, closed as is_closed - --The below script allows for pass through columns. - {% if var('issue_closed_history_pass_through_columns') %} - , - {{ var('issue_closed_history_pass_through_columns') | join (", ")}} - - {% endif %} - from macro ) diff --git a/models/stg_github__issue_comment.sql b/models/stg_github__issue_comment.sql index 9b97fd3..6c644ac 100644 --- a/models/stg_github__issue_comment.sql +++ b/models/stg_github__issue_comment.sql @@ -12,19 +12,13 @@ with issue_comment as ( For more information refer to our dbt_fivetran_utils documentation (https://github.com/fivetran/dbt_fivetran_utils.git). */ - {{ + {{ fivetran_utils.fill_staging_columns( source_columns=adapter.get_columns_in_relation(ref('stg_github__issue_comment_tmp')), staging_columns=get_issue_comment_columns() ) }} - --The below script allows for pass through columns. - {% if var('issue_comment_pass_through_columns') %} - , - {{ var('issue_comment_pass_through_columns') | join (", ")}} - - {% endif %} from issue_comment ), fields as ( @@ -34,13 +28,6 @@ with issue_comment as ( issue_id, user_id - --The below script allows for pass through columns. - {% if var('issue_comment_pass_through_columns') %} - , - {{ var('issue_comment_pass_through_columns') | join (", ")}} - - {% endif %} - from macro ) diff --git a/models/stg_github__issue_merged.sql b/models/stg_github__issue_merged.sql index a4bae8c..d8ba5df 100644 --- a/models/stg_github__issue_merged.sql +++ b/models/stg_github__issue_merged.sql @@ -12,19 +12,13 @@ with issue_merged as ( For more information refer to our dbt_fivetran_utils documentation (https://github.com/fivetran/dbt_fivetran_utils.git). */ - {{ + {{ fivetran_utils.fill_staging_columns( source_columns=adapter.get_columns_in_relation(ref('stg_github__issue_merged_tmp')), staging_columns=get_issue_merged_columns() ) }} - --The below script allows for pass through columns. - {% if var('issue_merged_pass_through_columns') %} - , - {{ var('issue_merged_pass_through_columns') | join (", ")}} - - {% endif %} from issue_merged ), fields as ( @@ -33,13 +27,6 @@ with issue_merged as ( issue_id, merged_at - --The below script allows for pass through columns. - {% if var('issue_merged_pass_through_columns') %} - , - {{ var('issue_merged_pass_through_columns') | join (", ")}} - - {% endif %} - from macro ) diff --git a/models/stg_github__pull_request.sql b/models/stg_github__pull_request.sql index c61676b..a36819b 100644 --- a/models/stg_github__pull_request.sql +++ b/models/stg_github__pull_request.sql @@ -12,19 +12,13 @@ with pull_request as ( For more information refer to our dbt_fivetran_utils documentation (https://github.com/fivetran/dbt_fivetran_utils.git). */ - {{ + {{ fivetran_utils.fill_staging_columns( source_columns=adapter.get_columns_in_relation(ref('stg_github__pull_request_tmp')), staging_columns=get_pull_request_columns() ) }} - --The below script allows for pass through columns. - {% if var('pull_request_pass_through_columns') %} - , - {{ var('pull_request_pass_through_columns') | join (", ")}} - - {% endif %} from pull_request ), fields as ( @@ -35,13 +29,6 @@ with pull_request as ( head_repo_id, head_user_id - --The below script allows for pass through columns. - {% if var('pull_request_pass_through_columns') %} - , - {{ var('pull_request_pass_through_columns') | join (", ")}} - - {% endif %} - from macro ) diff --git a/models/stg_github__pull_request_review.sql b/models/stg_github__pull_request_review.sql index 86ec8e4..48d6622 100644 --- a/models/stg_github__pull_request_review.sql +++ b/models/stg_github__pull_request_review.sql @@ -12,19 +12,13 @@ with pull_request_review as ( For more information refer to our dbt_fivetran_utils documentation (https://github.com/fivetran/dbt_fivetran_utils.git). */ - {{ + {{ fivetran_utils.fill_staging_columns( source_columns=adapter.get_columns_in_relation(ref('stg_github__pull_request_review_tmp')), staging_columns=get_pull_request_review_columns() ) }} - --The below script allows for pass through columns. - {% if var('pull_request_review_pass_through_columns') %} - , - {{ var('pull_request_review_pass_through_columns') | join (", ")}} - - {% endif %} from pull_request_review ), fields as ( @@ -36,13 +30,6 @@ with pull_request_review as ( state, user_id - --The below script allows for pass through columns. - {% if var('pull_request_review_pass_through_columns') %} - , - {{ var('pull_request_review_pass_through_columns') | join (", ")}} - - {% endif %} - from macro ) diff --git a/models/stg_github__repo_team.sql b/models/stg_github__repo_team.sql new file mode 100644 index 0000000..b9a9f9c --- /dev/null +++ b/models/stg_github__repo_team.sql @@ -0,0 +1,36 @@ + +with base as ( + + select * + from {{ ref('stg_github__repo_team_tmp') }} + +), + +fields as ( + + select + /* + The below macro is used to generate the correct SQL for package staging models. It takes a list of columns + that are expected/needed (staging_columns from dbt_github_source/models/tmp/) and compares it with columns + in the source (source_columns from dbt_github_source/macros/). + + For more information refer to our dbt_fivetran_utils documentation (https://github.com/fivetran/dbt_fivetran_utils.git). + */ + {{ + fivetran_utils.fill_staging_columns( + source_columns=adapter.get_columns_in_relation(ref('stg_github__repo_team_tmp')), + staging_columns=get_repo_team_columns() + ) + }} + + from base + +), final as ( + + select + repository_id, + team_id + from fields +) + +select * from final diff --git a/models/stg_github__repository.sql b/models/stg_github__repository.sql index fa6b328..d7ffa29 100644 --- a/models/stg_github__repository.sql +++ b/models/stg_github__repository.sql @@ -19,12 +19,6 @@ with repository as ( ) }} - --The below script allows for pass through columns. - {% if var('repository_pass_through_columns') %} - , - {{ var('repository_pass_through_columns') | join (", ")}} - - {% endif %} from repository ), fields as ( @@ -34,13 +28,6 @@ with repository as ( full_name, private as is_private - --The below script allows for pass through columns. - {% if var('repository_pass_through_columns') %} - , - {{ var('repository_pass_through_columns') | join (", ")}} - - {% endif %} - from macro ) diff --git a/models/stg_github__requested_reviewer_history.sql b/models/stg_github__requested_reviewer_history.sql index d08f8dd..2be3d29 100644 --- a/models/stg_github__requested_reviewer_history.sql +++ b/models/stg_github__requested_reviewer_history.sql @@ -12,19 +12,13 @@ with requested_reviewer_history as ( For more information refer to our dbt_fivetran_utils documentation (https://github.com/fivetran/dbt_fivetran_utils.git). */ - {{ + {{ fivetran_utils.fill_staging_columns( source_columns=adapter.get_columns_in_relation(ref('stg_github__requested_reviewer_history_tmp')), staging_columns=get_requested_reviewer_history_columns() ) }} - --The below script allows for pass through columns. - {% if var('requested_reviewer_history_pass_through_columns') %} - , - {{ var('requested_reviewer_history_pass_through_columns') | join (", ")}} - - {% endif %} from requested_reviewer_history ), fields as ( @@ -35,13 +29,6 @@ with requested_reviewer_history as ( requested_id, removed - --The below script allows for pass through columns. - {% if var('requested_reviewer_history_pass_through_columns') %} - , - {{ var('requested_reviewer_history_pass_through_columns') | join (", ")}} - - {% endif %} - from macro ) diff --git a/models/stg_github__team.sql b/models/stg_github__team.sql new file mode 100644 index 0000000..44df1d0 --- /dev/null +++ b/models/stg_github__team.sql @@ -0,0 +1,41 @@ + +with base as ( + + select * + from {{ ref('stg_github__team_tmp') }} + +), + +fields as ( + + select + /* + The below macro is used to generate the correct SQL for package staging models. It takes a list of columns + that are expected/needed (staging_columns from dbt_github_source/models/tmp/) and compares it with columns + in the source (source_columns from dbt_github_source/macros/). + + For more information refer to our dbt_fivetran_utils documentation (https://github.com/fivetran/dbt_fivetran_utils.git). + */ + {{ + fivetran_utils.fill_staging_columns( + source_columns=adapter.get_columns_in_relation(ref('stg_github__team_tmp')), + staging_columns=get_team_columns() + ) + }} + + from base + +), final as ( + + select + id as team_id, + description, + name, + parent_id, + privacy, + slug + from fields +) + +select * +from final diff --git a/models/stg_github__user.sql b/models/stg_github__user.sql index 51b5a8f..91c5d8e 100644 --- a/models/stg_github__user.sql +++ b/models/stg_github__user.sql @@ -18,13 +18,8 @@ with github_user as ( staging_columns=get_user_columns() ) }} - - --The below script allows for pass through columns. - {% if var('user_pass_through_columns') %} - , - {{ var('user_pass_through_columns') | join (", ")}} - - {% endif %} + + from github_user ), fields as ( @@ -33,16 +28,8 @@ with github_user as ( id as user_id, login as login_name, name, - bio, company - --The below script allows for pass through columns. - {% if var('user_pass_through_columns') %} - , - {{ var('user_pass_through_columns') | join (", ")}} - - {% endif %} - from macro ) diff --git a/models/tmp/stg_github__repo_team_tmp.sql b/models/tmp/stg_github__repo_team_tmp.sql new file mode 100644 index 0000000..cb732c1 --- /dev/null +++ b/models/tmp/stg_github__repo_team_tmp.sql @@ -0,0 +1,2 @@ +select * +from {{ var('repo_team') }} diff --git a/models/tmp/stg_github__team_tmp.sql b/models/tmp/stg_github__team_tmp.sql new file mode 100644 index 0000000..1fe5d3a --- /dev/null +++ b/models/tmp/stg_github__team_tmp.sql @@ -0,0 +1,2 @@ +select * +from {{ var('team') }} From 6bed8301f2e50a6e316ed37ad4c7231a4a6111e6 Mon Sep 17 00:00:00 2001 From: fivetran-joemarkiewicz Date: Mon, 1 Feb 2021 16:51:21 -0600 Subject: [PATCH 3/5] team and repo_team data add --- .../data/github_repo_team_data.csv | 9 +++++++++ integration_tests/data/github_team_data.csv | 20 +++++++++++++++++++ integration_tests/dbt_project.yml | 4 +++- 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 integration_tests/data/github_repo_team_data.csv create mode 100644 integration_tests/data/github_team_data.csv diff --git a/integration_tests/data/github_repo_team_data.csv b/integration_tests/data/github_repo_team_data.csv new file mode 100644 index 0000000..14e978a --- /dev/null +++ b/integration_tests/data/github_repo_team_data.csv @@ -0,0 +1,9 @@ +repository_id,team_id,_fivetran_synced,permission +256633524,3800337,2021-02-01 20:21:23,f43dff9a0dc54f0643d0c6d7971635f0 +259738665,3800337,2021-02-01 20:21:20,21a361d96e3e13f5f109748c2a9d2434 +261609176,3800337,2021-02-01 20:20:50,21a361d96e3e13f5f109748c2a9d2434 +267084139,3800337,2021-02-01 20:20:59,21a361d96e3e13f5f109748c2a9d2434 +270778238,3800337,2021-02-01 20:21:02,21a361d96e3e13f5f109748c2a9d2434 +270778658,3800337,2021-02-01 20:20:48,21a361d96e3e13f5f109748c2a9d2434 +249819935,3800337,2021-02-01 20:21:17,21232f297a57a5a743894a0e4a801fc3 +255745202,3800337,2021-02-01 20:20:57,21232f297a57a5a743894a0e4a801fc3 \ No newline at end of file diff --git a/integration_tests/data/github_team_data.csv b/integration_tests/data/github_team_data.csv new file mode 100644 index 0000000..717ac68 --- /dev/null +++ b/integration_tests/data/github_team_data.csv @@ -0,0 +1,20 @@ +id,_fivetran_synced,description,name,org_id,parent_id,privacy,slug +4084414,2021-02-01 20:21:58,2a70fb7477c294559ceb246e1109879f,2a70fb7477c294559ceb246e1109879f,2722259,,closed,51762626b4f785729159fd35eea74deb +1736772,2021-02-01 20:21:34,f9ae5e5c44e59f94680af569879f704a,f9ae5e5c44e59f94680af569879f704a,2722259,,closed,ed881bac6397ede33c0a285c9f50bb83 +3800337,2021-02-01 20:20:44,,8ce941e0e955637810668b26de666709,2722259,,closed,b6ccd40d2285deac430714c82f73e033 +3691324,2021-02-01 20:21:52,90b93123a3715df57884b232c243e07f,90b93123a3715df57884b232c243e07f,2722259,,closed,68808647ca708409f62bb468d35b29b9 +3897383,2021-02-01 20:21:56,,0798b0f87691433adce62e64ae10368c,2722259,,closed,3ae44e39e0463bfbdb1f57edad45f076 +3370445,2021-02-01 20:21:46,b206a1b4ea1097761f78e8876f6da779,b206a1b4ea1097761f78e8876f6da779,2722259,,closed,6a21b6995a068148bbb65c8f949b3fb2 +4330096,2021-02-01 20:22:01,300fc3aaac0611d491f320a55a8e46c1,300fc3aaac0611d491f320a55a8e46c1,2722259,,closed,062e3d966f8c4ae795571a4bf47e736b +3580756,2021-02-01 20:21:51,c207b8bdb077b377db36bacb9f5a1257,c207b8bdb077b377db36bacb9f5a1257,2722259,,closed,b9fd94f8bf381b49802b6b686cb9d5ac +3278556,2021-02-01 20:21:42,aad810b7fb06abc684ac6db9da727850,aad810b7fb06abc684ac6db9da727850,2722259,,closed,5da9d5b39f22e3105b78710047a015b3 +4039811,2021-02-01 20:21:58,,fd660e3ca963772f028c7513b025a569,2722259,,closed,d015268b3257760bb7cbd08abf69900d +3196507,2021-02-01 20:21:39,,ab0268fb8036a892dc341945cb7ae3be,2722259,1736772,closed,5d554bc5f3d2cd182cdd0952b1fb87ca +3259305,2021-02-01 20:21:41,db5eb84117d06047c97c9a0191b5fffe,db5eb84117d06047c97c9a0191b5fffe,2722259,1736772,closed,434990c8a25d2be94863561ae98bd682 +3315034,2021-02-01 20:21:42,,1458f7aba7fa08b3920d2836d95ce518,2722259,1736772,closed,a4af91ff4f8307c46cb0c4f7413f4b30 +3216430,2021-02-01 20:21:41,73595e157019f972a82f2fea61332a73,73595e157019f972a82f2fea61332a73,2722259,1736772,closed,0aabba303fd03b89a5112e1daa37026f +3344396,2021-02-01 20:21:43,,4b10734d7c577bcaf9eede1d1903ca91,2722259,1736772,closed,167ff30c5034ca70abe2d60a6e760448 +3350244,2021-02-01 20:21:44,,deb10517653c255364175796ace3553f,2722259,1736772,closed,f5bf48aa40cad7891eb709fcf1fde128 +3531018,2021-02-01 20:21:47,,8e1743cf3475c1466048d765e16da43b,2722259,3156820,closed,0c0134c0cbebf48be8c95920f5ea74fc +3531119,2021-02-01 20:21:48,30162ed78b6c10f731411f2fc440c24f,30162ed78b6c10f731411f2fc440c24f,2722259,3156820,closed,a189c633d9995e11bf8607170ec9a4b8 +3531038,2021-02-01 20:21:48,df40477e9a75456ed53400041634f94b,df40477e9a75456ed53400041634f94b,2722259,3156826,closed,1ed75a5d1e2c0bb05f410077765af687 \ No newline at end of file diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 2c088dc..91bdc03 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -1,5 +1,5 @@ name: 'github_source_integration_tests' -version: '0.2.0' +version: '0.2.1' config-version: 2 profile: 'integration_tests' @@ -15,7 +15,9 @@ vars: pull_request_review: "{{ ref('github_pull_request_review_data') }}" pull_request: "{{ ref('github_pull_request_data') }}" repository: "{{ ref('github_repository_data') }}" + repo_team: "{{ ref('github_repo_team_data') }}" requested_reviewer_history: "{{ ref('github_requested_reviewer_history_data') }}" + team: "{{ ref('github_team_data') }}" user: "{{ ref('github_user_data') }}" github_schema: github_source_integration_tests From 0153c80d24707e9fa0585cfe692c0dc211422bd1 Mon Sep 17 00:00:00 2001 From: fivetran-joemarkiewicz Date: Mon, 1 Feb 2021 17:51:00 -0600 Subject: [PATCH 4/5] circle fix --- integration_tests/dbt_project.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 91bdc03..a333329 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -37,6 +37,9 @@ seeds: closed_at: timestamp created_at: timestamp updated_at: timestamp + github_issue_closed_history_data: + +column_types: + updated_at: timestamp github_pull_request_review_data: +column_types: id: "{%- if target.type == 'bigquery' -%} INT64 {%- else -%} bigint {%- endif -%}" From cd75ee100112ceacb736a4114e12c9cfa47168b9 Mon Sep 17 00:00:00 2001 From: fivetran-joemarkiewicz Date: Tue, 2 Feb 2021 16:23:51 -0600 Subject: [PATCH 5/5] yml readme and format changes --- README.md | 25 +------------------------ dbt_project.yml | 26 +++++++++++++------------- integration_tests/dbt_project.yml | 27 ++++++++++++++------------- models/src_github.yml | 2 ++ models/stg_github.yml | 6 +++++- 5 files changed, 35 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index ad5207d..dbe3904 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# GitHub Source +# GitHub (Source) This package models GitHub data from [Fivetran's connector](https://fivetran.com/docs/applications/GitHub). It uses data in the format described by [this ERD](https://docs.google.com/presentation/d/1lx6ez7-x-s-n2JCnCi3SjG4XMmx9ysNUvaNCaWc3I_I/edit). @@ -36,32 +36,9 @@ vars: github_schema: your_schema_name ``` -This package includes all source columns defined in the `generate_columns.sql` macro. To add additional columns to this package, do so using our pass-through column variables. This is extremely useful if you'd like to include custom fields to the package. - - -```yml -# dbt_project.yml - -... -vars: - github_source: - issue_closed_history_pass_through_columns: [my_issue_closed_history_custom_field] - issue_comment_pass_through_columns: [issue_comment_body, issue_comment_created_at] - issue_merged_pass_through_columns: [issue_merged_commit_sha, issue_merged_actor_id] - issue_pass_through_columns: [cool_custom_issue_field, im_an_issue_too] - pull_request_review_pass_through_columns: [pull_request_review_body] - pull_request_pass_through_columns: [pull_request_base_label, pull_request_head_ref] - repository_pass_through_columns: [repository_language, repository_fork, repository_description] - requested_reviewer_history_pass_through_columns: [requested_reviewer_history_actor] - user_pass_through_columns: [user_hireable, one_more_user_field, im_the_last_user_field] - -``` - ## Database support This package has been tested on BigQuery, Snowflake and Redshift. -Coming soon -- compatibility with Spark - ## Contributions diff --git a/dbt_project.yml b/dbt_project.yml index 51e75c4..13b2dd2 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -13,16 +13,16 @@ models: vars: github_source: - issue_assignee: "{{ source('github', 'issue_assignee') }}" - issue_closed_history: "{{ source('github', 'issue_closed_history') }}" - issue_comment: "{{ source('github', 'issue_comment') }}" - issue_label: "{{ source('github', 'issue_label') }}" - issue_merged: "{{ source('github', 'issue_merged') }}" - issue: "{{ source('github', 'issue') }}" - pull_request_review: "{{ source('github', 'pull_request_review') }}" - pull_request: "{{ source('github', 'pull_request') }}" - repo_team: "{{ source('github', 'repo_team') }}" - repository: "{{ source('github', 'repository') }}" - requested_reviewer_history: "{{ source('github', 'requested_reviewer_history') }}" - team: "{{ source('github', 'team') }}" - user: "{{ source('github', 'user') }}" + issue_assignee: "{{ source('github', 'issue_assignee') }}" + issue_closed_history: "{{ source('github', 'issue_closed_history') }}" + issue_comment: "{{ source('github', 'issue_comment') }}" + issue_label: "{{ source('github', 'issue_label') }}" + issue_merged: "{{ source('github', 'issue_merged') }}" + issue: "{{ source('github', 'issue') }}" + pull_request_review: "{{ source('github', 'pull_request_review') }}" + pull_request: "{{ source('github', 'pull_request') }}" + repo_team: "{{ source('github', 'repo_team') }}" + repository: "{{ source('github', 'repository') }}" + requested_reviewer_history: "{{ source('github', 'requested_reviewer_history') }}" + team: "{{ source('github', 'team') }}" + user: "{{ source('github', 'user') }}" diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index a333329..61b1bae 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -6,19 +6,20 @@ profile: 'integration_tests' vars: github_source: - issue_assignee: "{{ ref('github_issue_assignee_data') }}" - issue_closed_history: "{{ ref('github_issue_closed_history_data') }}" - issue_comment: "{{ ref('github_issue_comment_data') }}" - issue_label: "{{ ref('github_issue_label_data') }}" - issue_merged: "{{ ref('github_issue_merged_data') }}" - issue: "{{ ref('github_issue_data') }}" - pull_request_review: "{{ ref('github_pull_request_review_data') }}" - pull_request: "{{ ref('github_pull_request_data') }}" - repository: "{{ ref('github_repository_data') }}" - repo_team: "{{ ref('github_repo_team_data') }}" - requested_reviewer_history: "{{ ref('github_requested_reviewer_history_data') }}" - team: "{{ ref('github_team_data') }}" - user: "{{ ref('github_user_data') }}" + issue_assignee: "{{ ref('github_issue_assignee_data') }}" + issue_closed_history: "{{ ref('github_issue_closed_history_data') }}" + issue_comment: "{{ ref('github_issue_comment_data') }}" + issue_label: "{{ ref('github_issue_label_data') }}" + issue_merged: "{{ ref('github_issue_merged_data') }}" + issue: "{{ ref('github_issue_data') }}" + pull_request_review: "{{ ref('github_pull_request_review_data') }}" + pull_request: "{{ ref('github_pull_request_data') }}" + repository: "{{ ref('github_repository_data') }}" + repo_team: "{{ ref('github_repo_team_data') }}" + requested_reviewer_history: "{{ ref('github_requested_reviewer_history_data') }}" + team: "{{ ref('github_team_data') }}" + user: "{{ ref('github_user_data') }}" + github_schema: github_source_integration_tests seeds: diff --git a/models/src_github.yml b/models/src_github.yml index 73a6adf..41d4154 100644 --- a/models/src_github.yml +++ b/models/src_github.yml @@ -69,6 +69,8 @@ sources: description: System generated unique id for the issue. This is not the number that appears in the URL. - name: body description: The text of the main description of the issue. + - name: locked + description: Boolean indicating whether the issue is locked. - name: closed_at description: Timestamp of when the issue was closed, NULL for issues that are open. - name: created_at diff --git a/models/stg_github.yml b/models/stg_github.yml index 6ffb7e9..c9722d7 100644 --- a/models/stg_github.yml +++ b/models/stg_github.yml @@ -36,11 +36,12 @@ models: description: Foreign key that references the issue table - name: label description: Text of the label - + - name: stg_github__issue_comment description: Table that contains comments made on issues columns: - name: issue_comment_id + description: System generated unique id for the issue comment. tests: - unique - not_null @@ -69,6 +70,8 @@ models: description: The text of the main description of the issue. - name: closed_at description: Timestamp of when the issue was closed, NULL for issues that are open. + - name: is_locked + description: Boolean indicating whether the issue is locked. - name: created_at description: Timestamp of when the issue was created. - name: milestone_id @@ -92,6 +95,7 @@ models: description: Table containing reviews made to pull requests columns: - name: pull_request_review_id + description: System generated unique id for the pull request review. tests: - unique - not_null