diff --git a/.gitignore b/.gitignore index 53676d1..ea8c4bf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1 @@ - -dbcredentials.txt -env -*.pyc +/target diff --git a/License.md b/License.md new file mode 100644 index 0000000..8dada3e --- /dev/null +++ b/License.md @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..9bd6c0a --- /dev/null +++ b/README.md @@ -0,0 +1,12 @@ +# analyst-collective/analytics + +This repository serves as an index for various dbt-based analytics packages. Please add additional packages by submitting PRs. + +- [Stripe](https://github.com/fishtown-analytics/stripe) +- [Snowplow](https://github.com/fishtown-analytics/snowplow) +- [Quickbooks](https://github.com/fishtown-analytics/quickbooks) +- [Zendesk](https://github.com/analyst-collective/zendesk) +- [Mailchimp](https://github.com/analyst-collective/mailchimp) + + +These packages are all installed and built using dbt. For additional information on dbt, go [here](https://github.com/analyst-collective/dbt). diff --git a/analysis/event_stream/funnel.sql b/analysis/event_stream/funnel.sql deleted file mode 100644 index 9591123..0000000 --- a/analysis/event_stream/funnel.sql +++ /dev/null @@ -1,42 +0,0 @@ -WITH -source as ( - select * from analyst_collective.snowplow_events -- change this view for your analysis -), -step_1 as ( - SELECT MIN("@timestamp") as "@timestamp", "@user_id" - FROM source - WHERE "@event" = 'page_view' -- filter by whichever columns you need - GROUP BY "@user_id" -), --- add more steps as you need. If you do add more steps, make sure to add a join below -step_2 as ( - SELECT MIN("@timestamp") as "@timestamp", "@user_id" - FROM source - WHERE "@event" = 'page_ping' - GROUP BY "@user_id" -), -step_3 as ( - SELECT MIN("@timestamp") as "@timestamp", "@user_id" - FROM source - WHERE "@event" = 'link_click' - GROUP BY "@user_id" -), -funnel as ( - --where the magic happens! - SELECT step_1."@user_id" as "step_1_users", - step_2."@user_id" as "step_2_users", - step_3."@user_id" as "step_3_users" - from step_1 - -- add more joins to make funnels with more steps - LEFT OUTER JOIN step_2 ON step_1."@user_id" = step_2."@user_id" and step_1."@timestamp" < step_2."@timestamp" - LEFT OUTER JOIN step_3 ON step_2."@user_id" = step_3."@user_id" and step_2."@timestamp" < step_3."@timestamp" - -- filter by time here - where step_1."@timestamp" > getdate() - interval '1 week' -) - -select -count(distinct step_1_users) as step_1, -count(distinct step_2_users) as step_2, -count(distinct step_3_users) as step_3 - -from funnel diff --git a/analysis/event_stream/interface.txt b/analysis/event_stream/interface.txt deleted file mode 100644 index a2d0144..0000000 --- a/analysis/event_stream/interface.txt +++ /dev/null @@ -1,9 +0,0 @@ - -######################################## -# Event Stream Interface # -######################################## - -timestamp timestamp -event varchar -user_id varchar - diff --git a/analysis/event_stream/timeseries.sql b/analysis/event_stream/timeseries.sql deleted file mode 100644 index 86e50fc..0000000 --- a/analysis/event_stream/timeseries.sql +++ /dev/null @@ -1,8 +0,0 @@ - -SELECT - date_trunc('day', "@timestamp"), -- use second, minute, hour, day, week, month, quarter, etc - count(*) -from analyst_collective.snowplow_events -where "@timestamp" > getdate() - interval '1 week' - --and "@event" = 'signup' -- filter fields here -group by 1 order by 1 desc diff --git a/config.json b/config.json deleted file mode 100644 index dc5b69e..0000000 --- a/config.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "models" : ["pardot", "segment", "snowplow"], - "schema" : "analyst_collective" -} diff --git a/dbt_project.yml b/dbt_project.yml new file mode 100644 index 0000000..24521e7 --- /dev/null +++ b/dbt_project.yml @@ -0,0 +1,17 @@ +#settings specifically for this models directory +#config other dbt settings within ~/.dbt/profiles.yml +name: 'Analyst_Collective' +version: '1.0' + +source-paths: ["models"] +target-path: "target" +clean-targets: ["target"] +test-paths: ["test"] + +model-defaults: + materialized: false + enabled: true + +#models: +# zuora: +# materialized: true diff --git a/deprecated-analysis/mrr/active_mrr.sql b/deprecated-analysis/mrr/active_mrr.sql new file mode 100644 index 0000000..b07f11a --- /dev/null +++ b/deprecated-analysis/mrr/active_mrr.sql @@ -0,0 +1,27 @@ +with +charges_for_active_plans as +( + select * + from {{env.schema}}.zuora_subscriptions_w_charges_and_amendments + where + -- make sure the subscription is active + subscr_status = 'Active' + and + ( + -- make sure the rate plan charge is current + ( + rpc_start <= current_date + and + rpc_end >= current_date + ) + or subscr_term_type = 'EVERGREEN' + ) + and rpc_last_segment = TRUE +) + + +-- get the active mrr per account +select account_number, round(sum(mrr),2) as active_mrr +from charges_for_active_plans +group by account_number + diff --git a/deprecated-analysis/mrr/total_mrr_by_month.sql b/deprecated-analysis/mrr/total_mrr_by_month.sql new file mode 100644 index 0000000..0b618d8 --- /dev/null +++ b/deprecated-analysis/mrr/total_mrr_by_month.sql @@ -0,0 +1,90 @@ +with + +subscriptions as +( + select * + from {{env.schema}}.zuora_subscriptions_w_charges_and_amendments +), + +-- genereate calendar dates, starting with the first subscription date +dates as +( + select date_day, date_trunc('month',date_day)::date as date_month + from + ( + select (first_subscr + row_number() over (order by true))::date as date_day + from subscriptions + ) + where date_day <= current_date +), + + +-- get all charges up to each date in the calendar +charges_up_to_each_date as +( + select + date_day, date_month, account_number, mrr, rpc_start, rpc_end, rpc_last_segment, + amend_start, amend_id, subscr_term_type, subscr_start, subscr_end, subscr_id, + subscr_name, subscr_version, + dateadd(month,1,date_month) as date_month_plus_one, + max(date_day) over (partition by subscr_name, dateadd(month,1,date_month)) as max_subscr_trunc_date, + max(subscr_version) over (partition by subscr_name, date_month) as max_subscr_version_within_date + from dates a + left join subscriptions b + on 1=1 + and rpc_start <= date_day + and rpc_last_segment = 'TRUE' +), + +all_charges_by_month as +( + select date_month, mrr + from charges_up_to_each_date + where + ( + -- make sure the subscriptions are EVERGREEN/falling into an appropriate bucket + ( + ( + rpc_start <= dateadd(month, 1, date_month) + and + rpc_end >= dateadd(month, 1, date_month) + ) + and + ( + amend_start > dateadd(month, 1, date_month) + or + amend_start is null + ) + ) + or + ( + subscr_term_type = 'EVERGREEN' + and subscr_start <= dateadd(month, 1, date_month) + and + ( + subscr_end is null + or + subscr_end >= dateadd(month, 1, date_month) + ) + and + ( + amend_start > dateadd(month, 1, date_month) + or + amend_start is null + ) + ) + ) + and date_day = max_subscr_trunc_date + and subscr_version = max_subscr_version_within_date + and dateadd(month, 1, date_month) <= current_date +) + + + +-- get mrr for each month +select date_month, sum(mrr) as total_mrr +from all_charges_by_month +group by date_month +order by date_month + + diff --git a/deprecated-models/trello/trello_card_location.sql b/deprecated-models/trello/trello_card_location.sql new file mode 100644 index 0000000..f6ebe45 --- /dev/null +++ b/deprecated-models/trello/trello_card_location.sql @@ -0,0 +1,18 @@ +select + id, + idmembercreator, + date, + "type", + data__card__id, + data__card__name, + coalesce(data__list__id, + data__listafter__id, + lag(coalesce(data__list__id, data__listafter__id)) ignore nulls over (partition by data__card__id order by date)) as data__list__id, + coalesce(data__boardtarget__id, data__board__id) as data__board__id, + coalesce(data__card__closed, + lag(data__card__closed) ignore nulls over (partition by data__card__id order by date), + false) as data__card__closed +from trello_growth.trello_actions +where + data__card__id is not null + and "type" in ('createCard', 'updateCard', 'moveCardFromBoard', 'moveCardToBoard', 'commentCard') diff --git a/deprecated-models/trello/trello_model_tests.sql b/deprecated-models/trello/trello_model_tests.sql new file mode 100644 index 0000000..b68a3c1 --- /dev/null +++ b/deprecated-models/trello/trello_model_tests.sql @@ -0,0 +1,21 @@ +with null_boards_or_lists as +( + select id + from {{ref('trello_card_location')}} + where + data__board__id is null + or data__list__id is null +) +select + 'no_null_boards_or_lists' as name, + 'All location entries have a non-null board and list id' as description, +count(*) = 0 as result +from null_boards_or_lists + +union all + +select + 'fresher_than_one_day', + 'Most recent entry is no more than one day old', +max(date::timestamp) > current_date - '1 day'::interval +from {{ref('trello_card_location')}} diff --git a/deprecated-models/zuora/zuora_account.sql b/deprecated-models/zuora/zuora_account.sql new file mode 100644 index 0000000..233c201 --- /dev/null +++ b/deprecated-models/zuora/zuora_account.sql @@ -0,0 +1,5 @@ +select + id as account_id, + accountnumber as account_number, + * +from zuora.zuora_account diff --git a/deprecated-models/zuora/zuora_amendment.sql b/deprecated-models/zuora/zuora_amendment.sql new file mode 100644 index 0000000..567b2e9 --- /dev/null +++ b/deprecated-models/zuora/zuora_amendment.sql @@ -0,0 +1,6 @@ +select + id as amend_id, + subscriptionid as subscr_id, + effectivedate::timestamp as amend_start, + * +from zuora.zuora_amendment diff --git a/deprecated-models/zuora/zuora_rate_plan.sql b/deprecated-models/zuora/zuora_rate_plan.sql new file mode 100644 index 0000000..e68af44 --- /dev/null +++ b/deprecated-models/zuora/zuora_rate_plan.sql @@ -0,0 +1,5 @@ +select + id as rate_plan_id, + subscriptionid as subscr_id, + * +from zuora.zuora_rate_plan diff --git a/deprecated-models/zuora/zuora_rate_plan_charge.sql b/deprecated-models/zuora/zuora_rate_plan_charge.sql new file mode 100644 index 0000000..eecf00d --- /dev/null +++ b/deprecated-models/zuora/zuora_rate_plan_charge.sql @@ -0,0 +1,8 @@ +select + rateplanid as rate_plan_id, + effectivestartdate::timestamp as rpc_start, + effectiveenddate::timestamp as rpc_end, + mrr as "@mrr", + islastsegment as rpc_last_segment, + * +from zuora.zuora_rate_plan_charge diff --git a/deprecated-models/zuora/zuora_subscription.sql b/deprecated-models/zuora/zuora_subscription.sql new file mode 100644 index 0000000..ecae2de --- /dev/null +++ b/deprecated-models/zuora/zuora_subscription.sql @@ -0,0 +1,12 @@ +select + id as subscr_id, + status as subscr_status, + termtype as subscr_term_type, + accountid as account_id, + contracteffectivedate::timestamp as subscr_start, + subscriptionenddate::timestamp as subscr_end, + name as subscr_name, + "version#392c30e6081c24fb78ddf6d622de4f33"::integer + as subscr_version, + * +from zuora.zuora_subscription diff --git a/deprecated-models/zuora/zuora_subscriptions_w_charges_and_amendments.sql b/deprecated-models/zuora/zuora_subscriptions_w_charges_and_amendments.sql new file mode 100644 index 0000000..d2776ef --- /dev/null +++ b/deprecated-models/zuora/zuora_subscriptions_w_charges_and_amendments.sql @@ -0,0 +1,27 @@ +-- get all subscriptions with possible ammendments for all accounts +with subscr_w_amendments as +( + select + account_number, acc.account_id, sub.subscr_id, + subscr_name, subscr_status, subscr_term_type, + subscr_start, subscr_end, subscr_version, amend_id, amend_start + from {{ref('zuora_account')}} acc + inner join {{ref('zuora_subscription')}} sub + on acc.account_id = sub.account_id + -- add ammendments + left outer join {{ref('zuora_amendment')}} amend + on sub.subscr_id = amend.subscr_id +) + +select + account_number, account_id, sub.subscr_id, + subscr_name, subscr_status, subscr_term_type, + subscr_start, subscr_end, subscr_version, amend_id, amend_start, + rpc_start, rpc_end, rpc_last_segment, + min(subscr_start) over() as first_subscr, + "@mrr" as mrr +from subscr_w_amendments sub +inner join {{ref('zuora_rate_plan')}} rp + on rp.subscr_id = sub.subscr_id +inner join {{ref('zuora_rate_plan_charge')}} rpc + on rpc.rate_plan_id = rp.rate_plan_id diff --git a/logical-model-flow.pdf b/logical-model-flow.pdf deleted file mode 100644 index f30c171..0000000 Binary files a/logical-model-flow.pdf and /dev/null differ diff --git a/models/pardot/model.sql b/models/pardot/model.sql deleted file mode 100644 index da9c512..0000000 --- a/models/pardot/model.sql +++ /dev/null @@ -1,104 +0,0 @@ -create or replace view {schema}.visitoractivity_types_meta as ( - - --these literal values are pulled from pardot's api docs here: - --http://developer.pardot.com/kb/object-field-references/#visitor-activity - --they change periodically over time and this query will need to be correspondingly modified. - select 1 as type, 'Click' as type_decoded union all - select 2, 'View' union all - select 3, 'Error' union all - select 4, 'Success' union all - select 5, 'Session' union all - select 6, 'Sent' union all - select 7, 'Search' union all - select 8, 'New Opportunity' union all - select 9, 'Opportunity Won' union all - select 10, 'Opportunity Lost' union all - select 11, 'Open' union all - select 12, 'Unsubscribe Page' union all - select 13, 'Bounced' union all - select 14, 'Spam Complaint' union all - select 15, 'Email Preference Page' union all - select 16, 'Resubscribed' union all - select 17, 'Click (Third Party)' union all - select 18, 'Opportunity Reopened' union all - select 19, 'Opportunity Linked' union all - select 20, 'Visit' union all - select 21, 'Custom URL click' union all - select 22, 'Olark Chat' union all - select 23, 'Invited to Webinar' union all - select 24, 'Attended Webinar' union all - select 25, 'Registered for Webinar' union all - select 26, 'Social Post Click' union all - select 27, 'Video View' union all - select 28, 'Event Registered' union all - select 29, 'Event Checked In' union all - select 30, 'Video Conversion' union all - select 31, 'UserVoice Suggestion' union all - select 32, 'UserVoice Comment' union all - select 33, 'UserVoice Ticket' union all - select 34, 'Video Watched (>= 75% watched)' - -); - - - -create or replace view {schema}.visitoractivity_events_meta as ( - - --even with the type decoding that Pardot specifically provides, actually what is going on in a given event - --is somewhat ambiguous. this is an attempt to map type and type_name to a more event-based "event action" field - --which is always written in more standard action-oriented terms. - select 22 as "type", 'Chat Transcript' as type_name, 'chatted via olark' as event_name union all - select 21, 'Custom Redirect', 'clicked a custom redirect' union all - select 6, 'Email', 'sent an email' union all - select 11, 'Email', 'opened an email' union all - select 13, 'Email', 'bounced email' union all - select 14, 'Email', 'reported spam' union all - select 1, 'Email Tracker', 'clicked on email link' union all - select 28, 'Event', 'registered for event' union all - select 29, 'Event', 'checked in at event' union all - select 2, 'File', 'viewed a file' union all - select 3, 'Form', 'submitted a form with an error' union all - select 2, 'Form', 'viewed a form' union all - select 4, 'Form', 'successfully submitted a form' union all - select 4, 'Form Handler', 'successfully submitted a form handler' union all - select 2, 'Landing Page', 'viewed a landing page' union all - select 4, 'Landing Page', 'successfully submitted the form on a landing page' union all - select 3, 'Landing Page', 'submitted the form on a landing page with an error' union all - select 2, 'Multivariate Landing Page', 'viewed multivariate landing page' union all - select 4, 'Multivariate Landing Page', 'successfully submitted multivariate landing page' union all - select 3, 'Multivariate Landing Page', 'submitted multivariate landing page with an error' union all - select 8, 'New Opportunity', 'opened opportunity' union all - select 19, 'Opportunity Associated', 'linked existing opportunity' union all - select 10, 'Opportunity Lost', 'lost opportunity' union all - select 9, 'Opportunity Won', 'won opportunity' union all - select 2, 'Page View', 'viewed highlighted page' union all - select 34, 'Video', 'watched 75% or more of video' union all - select 27, 'Video', 'watched video' union all - select 30, 'Video', 'converted from video call to action' union all - select 20, 'Visit', 'visited website' union all - select 25, 'Webinar', 'registered for webinar' union all - select 24, 'Webinar', 'attended webinar' union all - select 18, '', 'reopened opportunity' - -); - - -create or replace view {schema}.visitoractivity as ( - --this table has a bunch of types that really should be event actions but are very poorly formulated. - --the custom logic in this view is an attempt to fix that. - --not all of the various type / type_name combinations have been accounted for yet; I still need to determine exactly what some of them mean. - select - -- event_stream interface - va.created_at as "@timestamp", - t.type_decoded as "@event", - va.prospect_id as "@user_id", - va.* - from - olga_pardot.visitoractivity va - inner join {schema}.visitoractivity_events_meta e - on va."type" = e."type" and va.type_name = e.type_name - inner join {schema}.visitoractivity_types_meta t - on va."type" = t."type" -); - -comment on view {schema}.visitoractivity is 'timeseries,funnel,cohort'; diff --git a/models/segment/model.sql b/models/segment/model.sql deleted file mode 100644 index 0971507..0000000 --- a/models/segment/model.sql +++ /dev/null @@ -1,12 +0,0 @@ -create or replace view {schema}.track as ( - select - "timestamp"::timestamp as "@timestamp", - "event" as "@event", - "userid" as "@user_id", - * - - from - segment.track -); - -comment on view {schema}.track is 'timeseries,funnel,cohort'; \ No newline at end of file diff --git a/models/snowplow/model.sql b/models/snowplow/model.sql deleted file mode 100644 index 62a2f4a..0000000 --- a/models/snowplow/model.sql +++ /dev/null @@ -1,11 +0,0 @@ -create or replace view {schema}.events as ( - select - "collector_tstamp" as "@timestamp", - "event_name" as "@event", - "domain_userid" as "@user_id", - * - from - atomic.events -); - -comment on view {schema}.events is 'timeseries,funnel,cohort'; \ No newline at end of file diff --git a/notes.txt b/notes.txt deleted file mode 100644 index 11531de..0000000 --- a/notes.txt +++ /dev/null @@ -1,17 +0,0 @@ -sql construction conventions -- first layer should simply set fields and table / schema -- second layer should be filter. if no records to be filtered out, simply implement as select *. -- third layer should be transformations. this could include datatype conversions, mapping, and other simple transformations to make the data more standardized and consumable. *all transformations must meet the strict definition of universal applicability.* -- see logical-model-flow.pdf for a visual representation of the structure. -- all files should be DDL (should create permanent database objects, not just execute queries) - -questions/notes -- need to create a destination schema for views created. - - should be separate schema for each source system or all together in a single schema? - - should scripts automatically drop / recreate schemas? much cleaner but high potential for fuckup by users not paying attention. - - should the schema for the intermediate views (base, filtered, transformed) be separate from the schema for the final views? i think so... -- how should unit / integration / regression testing work? the last two, especially, are a huge deal. -- what's the best way to execute a bunch of sql statements in a row even with the sql source being in independent files? -- how do these get documented in an SEO-friendly way? coming across one of these in github will scare off most medium-technical biz users... -- we should consider making a cleanup script that drops all of the views created. it's annoying to clean up after them. -- is there a way we can provide a deduplication layer? this is a big problem that yevgeniy runs into; we should think about it. diff --git a/scripts/analyst_collective/__init__.py b/scripts/analyst_collective/__init__.py deleted file mode 100644 index a3b5826..0000000 --- a/scripts/analyst_collective/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ - - -from credentials import Credentials -from runner import Runner diff --git a/scripts/analyst_collective/credentials.py b/scripts/analyst_collective/credentials.py deleted file mode 100644 index a854c78..0000000 --- a/scripts/analyst_collective/credentials.py +++ /dev/null @@ -1,14 +0,0 @@ - - -class Credentials(object): - def __init__(self, filename): - with open(filename) as creds_fh: - creds = creds_fh.read().strip().splitlines() - - if len(creds) != 5: - raise RuntimeError("Credentials file {} invalid!".format(filename)) - - user, pw, host, port, db = creds - - self.conn_string = "dbname='{}' port='{}' user='{}' password='{}' host='{}'".format(db, port, user, pw, host) - diff --git a/scripts/analyst_collective/runner.py b/scripts/analyst_collective/runner.py deleted file mode 100644 index b05d695..0000000 --- a/scripts/analyst_collective/runner.py +++ /dev/null @@ -1,61 +0,0 @@ - -import sqlparse, psycopg2, sys, os - -class Runner(object): - def __init__(self, config, creds, models_dir): - self.config = config - self.creds = creds - self.models_dir = models_dir - - self.connection = psycopg2.connect(creds.conn_string) - - def models(self): - return self.config['models'] - - def try_create_schema(self): - sql = self.interpolate("create schema if not exists {schema}") - self.execute(sql) - - def clean_schema(self): - self.try_create_schema() - - def execute(self, sql): - debug = sql.replace("\n", " ").strip()[0:200] - print "Running: {}".format(debug) - with self.connection as connection: - with connection.cursor() as cursor: - cursor.execute(sql) - print " {}".format(cursor.statusmessage) - - def interpolate(self, sql, model_name=""): - try: - return sql.format(model=model_name, **self.config) - except KeyError as e: - print "Error interpolating key: {{{error_key}}} in model: {model}".format(error_key=str(e).replace("'", ""), model=model_name) - return "" - - def add_prefix(self, uninterpolated_sql, model): - match = "{schema}." - replace = "{schema}.{model}_" - return uninterpolated_sql.replace(match, replace) - - def create_models(self): - for model_name in self.models(): - # right now, this only checks for model.sql in the model dir. It can ideally load the SQL file DAG - model_file = os.path.join(self.models_dir, model_name, 'model.sql') - - contents = "" - with open(model_file) as model_fh: - contents = model_fh.read() - - statements = sqlparse.parse(contents); - for statement in statements: - prefixed = self.add_prefix(str(statement), model_name) - sql = self.interpolate(prefixed, model_name) - - if len(sql.strip()) == 0: - # we could throw an error here! Definitely don't execute the sql though - continue - - self.execute(sql) - diff --git a/scripts/main.py b/scripts/main.py deleted file mode 100644 index e83e3b1..0000000 --- a/scripts/main.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python - -from analyst_collective import Credentials, Runner -import argparse, json, sys, os - -parser = argparse.ArgumentParser(description='Analyst Collective Runner') -parser.add_argument('--credentials', default="dbcredentials.txt", type=str, help='Path to database credentials file') -parser.add_argument('--config', default="../config.json", type=str, help='Path to analyst collective config file') - -args = parser.parse_args() - -creds = Credentials(args.credentials) - -config = None -with open(args.config) as config_fh: - contents = config_fh.read() - try: - config = json.loads(contents) - except ValueError as e: - print "Could not parse config file {}".format(args.config), e - sys.exit(1) - -models_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'models') - -runner = Runner(config, creds, models_dir) -runner.clean_schema() -runner.create_models() diff --git a/scripts/requirements.txt b/scripts/requirements.txt deleted file mode 100644 index ee9ca6f..0000000 --- a/scripts/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -sqlparse==0.1.18 -wheel==0.24.0 -six==1.10.0 -psycopg2==2.6.1