From df1ef3fba17a0c1f728d58ece383e59a2c063b3f Mon Sep 17 00:00:00 2001 From: Carl Cervone <42869436+ccerv1@users.noreply.github.com> Date: Tue, 1 Oct 2024 20:03:22 -0400 Subject: [PATCH] feat(dbt): add gitcoin funding event data (#2289) * feat(dbt): add sources and staging models for gitcoin funding events * feat(dbt): map gitcoin projects to oso ids * feat(dbt): add raw gitcoin funding event data --- warehouse/dbt/models/gitcoin_sources.yml | 15 +++++++ .../funding/int_gitcoin_funding_events.sql | 23 +++++++++++ .../funding/int_gitcoin_project_directory.sql | 39 +++++++++++++++++++ warehouse/dbt/models/passport_sources.yml | 8 ---- .../gitcoin/stg_gitcoin__donations.sql | 14 +++++++ .../staging/gitcoin/stg_gitcoin__matching.sql | 10 +++++ .../gitcoin/stg_gitcoin__project_groups.sql | 13 +++++++ .../gitcoin/stg_gitcoin__project_lookup.sql | 5 +++ .../stg_passport__scores.sql | 0 9 files changed, 119 insertions(+), 8 deletions(-) create mode 100644 warehouse/dbt/models/gitcoin_sources.yml create mode 100644 warehouse/dbt/models/intermediate/funding/int_gitcoin_funding_events.sql create mode 100644 warehouse/dbt/models/intermediate/funding/int_gitcoin_project_directory.sql delete mode 100644 warehouse/dbt/models/passport_sources.yml create mode 100644 warehouse/dbt/models/staging/gitcoin/stg_gitcoin__donations.sql create mode 100644 warehouse/dbt/models/staging/gitcoin/stg_gitcoin__matching.sql create mode 100644 warehouse/dbt/models/staging/gitcoin/stg_gitcoin__project_groups.sql create mode 100644 warehouse/dbt/models/staging/gitcoin/stg_gitcoin__project_lookup.sql rename warehouse/dbt/models/staging/{passport => gitcoin}/stg_passport__scores.sql (100%) diff --git a/warehouse/dbt/models/gitcoin_sources.yml b/warehouse/dbt/models/gitcoin_sources.yml new file mode 100644 index 000000000..3155aca85 --- /dev/null +++ b/warehouse/dbt/models/gitcoin_sources.yml @@ -0,0 +1,15 @@ +sources: + - name: gitcoin + database: opensource-observer + schema: gitcoin + tables: + - name: passport_scores + identifier: passport_scores + - name: all_matching + identifier: all_matching + - name: all_donations + identifier: all_donations + - name: project_lookup + identifier: project_lookup + - name: project_groups_summary + identifier: project_groups_summary \ No newline at end of file diff --git a/warehouse/dbt/models/intermediate/funding/int_gitcoin_funding_events.sql b/warehouse/dbt/models/intermediate/funding/int_gitcoin_funding_events.sql new file mode 100644 index 000000000..1808749b3 --- /dev/null +++ b/warehouse/dbt/models/intermediate/funding/int_gitcoin_funding_events.sql @@ -0,0 +1,23 @@ +select + transaction_hash, + donation_timestamp as event_time, + round_id, + round_number, + chain_id, + gitcoin_project_id, + donor_address, + amount_in_usd, + 'crowdfunding' as funding_type +from {{ ref('stg_gitcoin__donations') }} +union all +select + null as transaction_hash, + null as event_time, + round_id, + round_number, + chain_id, + gitcoin_project_id, + null as donor_address, + amount_in_usd, + 'matching_grant' as funding_type +from {{ ref('stg_gitcoin__matching') }} diff --git a/warehouse/dbt/models/intermediate/funding/int_gitcoin_project_directory.sql b/warehouse/dbt/models/intermediate/funding/int_gitcoin_project_directory.sql new file mode 100644 index 000000000..5b0f9fd2c --- /dev/null +++ b/warehouse/dbt/models/intermediate/funding/int_gitcoin_project_directory.sql @@ -0,0 +1,39 @@ +with gitcoin_projects as ( + select distinct + pg.gitcoin_group_id, + pg.latest_project_github, + pg.latest_project_recipient_address, + project_lookup.gitcoin_project_id + from {{ ref('stg_gitcoin__project_groups') }} as pg + left join {{ ref('stg_gitcoin__project_lookup') }} as project_lookup + on pg.gitcoin_group_id = project_lookup.gitcoin_group_id + where not ( + not regexp_contains(pg.latest_project_github, '^[a-zA-Z0-9_-]+$') + or pg.latest_project_github like '%?%' + or pg.latest_project_github = 'none' + or length(pg.latest_project_github) > 39 + ) +), + +oso_projects as ( + select distinct + wallets.project_id as oso_project_id, + wallets.artifact_name as address, + repos.artifact_namespace as repo_owner + from {{ ref('int_artifacts_in_ossd_by_project') }} as wallets + cross join {{ ref('int_artifacts_in_ossd_by_project') }} as repos + where + wallets.artifact_type = 'WALLET' + and repos.artifact_source = 'GITHUB' + and wallets.project_id = repos.project_id +) + +select distinct + gitcoin_projects.gitcoin_group_id, + gitcoin_projects.gitcoin_project_id, + oso_projects.oso_project_id +from gitcoin_projects +inner join oso_projects on ( + gitcoin_projects.latest_project_github = oso_projects.repo_owner + and gitcoin_projects.latest_project_recipient_address = oso_projects.address +) diff --git a/warehouse/dbt/models/passport_sources.yml b/warehouse/dbt/models/passport_sources.yml deleted file mode 100644 index 38e1ec684..000000000 --- a/warehouse/dbt/models/passport_sources.yml +++ /dev/null @@ -1,8 +0,0 @@ -sources: - - name: gitcoin - database: opensource-observer - schema: gitcoin - tables: - - name: passport_scores - identifier: passport_scores - \ No newline at end of file diff --git a/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__donations.sql b/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__donations.sql new file mode 100644 index 000000000..0b1f650ec --- /dev/null +++ b/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__donations.sql @@ -0,0 +1,14 @@ +select distinct + `source` as gitcoin_data_source, + `timestamp` as donation_timestamp, + round_id, + round_num as round_number, + chain_id, + project_id as gitcoin_project_id, + amount_in_usd, + LOWER(recipient_address) as project_recipient_address, + LOWER(donor_address) as donor_address, + LOWER(transaction_hash) as transaction_hash, + TRIM(project_name) as project_application_title +from {{ source("gitcoin", "all_donations") }} +where amount_in_usd > 0 diff --git a/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__matching.sql b/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__matching.sql new file mode 100644 index 000000000..45f3eabeb --- /dev/null +++ b/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__matching.sql @@ -0,0 +1,10 @@ +select distinct + round_id as round_id, + round_num as round_number, + chain_id, + project_id as gitcoin_project_id, + match_amount_in_usd as amount_in_usd, + TRIM(title) as round_title, + LOWER(recipient_address) as project_recipient_address +from {{ source("gitcoin", "all_matching") }} +where match_amount_in_usd > 0 diff --git a/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__project_groups.sql b/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__project_groups.sql new file mode 100644 index 000000000..81aa0bf0f --- /dev/null +++ b/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__project_groups.sql @@ -0,0 +1,13 @@ +select distinct + group_id as gitcoin_group_id, + latest_created_project_id as latest_gitcoin_project_id, + total_amount_donated as total_amount_donated_in_usd, + application_count as group_application_count, + latest_created_application as latest_project_application_timestamp, + latest_source as latest_gitcoin_data_source, + TRIM(title) as project_application_title, + LOWER(latest_payout_address) as latest_project_recipient_address, + TRIM(LOWER(latest_website)) as latest_project_website, + TRIM(LOWER(latest_project_twitter)) as latest_project_twitter, + TRIM(LOWER(latest_project_github)) as latest_project_github +from {{ source("gitcoin", "project_groups_summary") }} diff --git a/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__project_lookup.sql b/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__project_lookup.sql new file mode 100644 index 000000000..914f7286d --- /dev/null +++ b/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__project_lookup.sql @@ -0,0 +1,5 @@ +select distinct + group_id as gitcoin_group_id, + project_id as gitcoin_project_id, + source as latest_gitcoin_data_source +from {{ source("gitcoin", "project_lookup") }} diff --git a/warehouse/dbt/models/staging/passport/stg_passport__scores.sql b/warehouse/dbt/models/staging/gitcoin/stg_passport__scores.sql similarity index 100% rename from warehouse/dbt/models/staging/passport/stg_passport__scores.sql rename to warehouse/dbt/models/staging/gitcoin/stg_passport__scores.sql