From 65a35cd889c1407fb64c8452c229df08c39c765b Mon Sep 17 00:00:00 2001 From: Henry Hund Date: Mon, 14 Mar 2016 14:57:40 -0400 Subject: [PATCH 1/8] starting ppc analysis --- .gitignore | 1 + config.json | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) delete mode 100644 config.json diff --git a/.gitignore b/.gitignore index 53676d1..e859da0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ dbcredentials.txt env *.pyc +config.json diff --git a/config.json b/config.json deleted file mode 100644 index 2042e57..0000000 --- a/config.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "models" : ["pardot", "segment", "snowplow", "trello"], - "schema" : "analyst_collective" -} From f19c58ba3853ef808e61d866dbf813f17b11efd0 Mon Sep 17 00:00:00 2001 From: Henry Hund Date: Mon, 14 Mar 2016 18:23:22 -0400 Subject: [PATCH 2/8] define adwords_summary schema --- models/adwords/model.sql | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 models/adwords/model.sql diff --git a/models/adwords/model.sql b/models/adwords/model.sql new file mode 100644 index 0000000..664ef64 --- /dev/null +++ b/models/adwords/model.sql @@ -0,0 +1,25 @@ +create or replace view {schema}.adwords_summary as ( + with ad_data as + ( + select + lower(addestinationurl) as cleanurl, + * + from + adwords.adwords12218869_v2 -- How do I make this work regardless of the extension? + ) + select + REPLACE(REGEXP_SUBSTR(cleanurl,'utm_source=[^&]*'),'utm_source=','') as "@utm_source", + REPLACE(REGEXP_SUBSTR(cleanurl,'utm_medium=[^&]*'),'utm_medium=','') as "@utm_medium", + REPLACE(REGEXP_SUBSTR(cleanurl,'utm_campaign=[^&]*'),'utm_campaign=','') as "@utm_campaign", + REPLACE(REGEXP_SUBSTR(cleanurl,'utm_content=[^&]*'),'utm_content=','') as "@utm_content", + REPLACE(REGEXP_SUBSTR(cleanurl,'utm_term=[^&]*'),'utm_term=','') as "@utm_term", + impressions::integer as "@impressions", + adcost::float as "@cost", + date::date as "@date", + adclicks::integer as "@clicks", + SPLIT_PART(cleanurl,'?',1) as "@base_url", + SPLIT_PART(cleanurl,'?',2) as querystring, + * + from + ad_data + ) From 364eadecfbd174b1d5577191d867292a09c38076 Mon Sep 17 00:00:00 2001 From: Henry Hund Date: Mon, 14 Mar 2016 18:57:03 -0400 Subject: [PATCH 3/8] added adwords model test and started ppc analysis --- analysis/ppc/analysis.sql | 46 ++++++++++++++++++++++++++++++++++ models/adwords/model_tests.sql | 11 ++++++++ 2 files changed, 57 insertions(+) create mode 100644 analysis/ppc/analysis.sql create mode 100644 models/adwords/model_tests.sql diff --git a/analysis/ppc/analysis.sql b/analysis/ppc/analysis.sql new file mode 100644 index 0000000..7ec3475 --- /dev/null +++ b/analysis/ppc/analysis.sql @@ -0,0 +1,46 @@ +create or replace view {schema}.ppc_consolidated_summary as ( + ( + select + * + from + ( + select + 'Google' as "@adnetwork", + "@utm_source", + "@utm_medium", + "@utm_campaign", + "@utm_content", + "@utm_term", + "@impressions"::integer, + "@cost"::float, + "@date"::date, + "@clicks"::integer, + "@base_url" + from + {schema}.adwords_summary + ) + ) + -- union if other adnetworks present. can we make this happen automatically? + -- union + -- ( + -- select + -- * + -- from + -- ( + -- select + -- 'Facebook' as "@adnetwork", + -- "@utm_source", + -- "@utm_medium", + -- "@utm_campaign", + -- "@utm_content", + -- "@utm_term", + -- "@impressions"::integer, + -- "@cost"::float, + -- "@date"::date, + -- "@clicks"::integer, + -- "@base_url" + -- from + -- {schema}.facebook_summary + -- ) + -- ) +) diff --git a/models/adwords/model_tests.sql b/models/adwords/model_tests.sql new file mode 100644 index 0000000..145a376 --- /dev/null +++ b/models/adwords/model_tests.sql @@ -0,0 +1,11 @@ +create or replace view {schema}.model_tests + (name, description, result) + as ( + + select + 'adwords_fresher_than_one_day', + 'Most recent adwords entry is no more than one day old', + max("@date"::date) > current_date - '1 day'::interval + from {schema}.adwords_summary + + ); From 28b25299a5511d571e87744ea1f5349fc8c525fe Mon Sep 17 00:00:00 2001 From: Henry Hund Date: Mon, 14 Mar 2016 19:00:05 -0400 Subject: [PATCH 4/8] added interface for ppc --- analysis/ppc/interface.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 analysis/ppc/interface.txt diff --git a/analysis/ppc/interface.txt b/analysis/ppc/interface.txt new file mode 100644 index 0000000..b693f92 --- /dev/null +++ b/analysis/ppc/interface.txt @@ -0,0 +1,15 @@ +######################################## +# PPC Interface # +######################################## + +adnetwork varchar +utm_source varchar +utm_medium varchar +utm_campaign varchar +utm_content varchar +utm_term varchar +impressions integer +cost float +clicks integer +date date +base_url varchar From c0eb8adfdee232a9274b7a72f41c6522ea2a669d Mon Sep 17 00:00:00 2001 From: Henry Hund Date: Tue, 15 Mar 2016 12:30:19 -0400 Subject: [PATCH 5/8] added campaign_name --- analysis/ppc/analysis.sql | 2 ++ models/adwords/model.sql | 1 + 2 files changed, 3 insertions(+) diff --git a/analysis/ppc/analysis.sql b/analysis/ppc/analysis.sql index 7ec3475..34d42b4 100644 --- a/analysis/ppc/analysis.sql +++ b/analysis/ppc/analysis.sql @@ -6,6 +6,7 @@ create or replace view {schema}.ppc_consolidated_summary as ( ( select 'Google' as "@adnetwork", + "@campaign_name", "@utm_source", "@utm_medium", "@utm_campaign", @@ -29,6 +30,7 @@ create or replace view {schema}.ppc_consolidated_summary as ( -- ( -- select -- 'Facebook' as "@adnetwork", + -- "@campaign_name", -- "@utm_source", -- "@utm_medium", -- "@utm_campaign", diff --git a/models/adwords/model.sql b/models/adwords/model.sql index 664ef64..5e771a0 100644 --- a/models/adwords/model.sql +++ b/models/adwords/model.sql @@ -13,6 +13,7 @@ create or replace view {schema}.adwords_summary as ( REPLACE(REGEXP_SUBSTR(cleanurl,'utm_campaign=[^&]*'),'utm_campaign=','') as "@utm_campaign", REPLACE(REGEXP_SUBSTR(cleanurl,'utm_content=[^&]*'),'utm_content=','') as "@utm_content", REPLACE(REGEXP_SUBSTR(cleanurl,'utm_term=[^&]*'),'utm_term=','') as "@utm_term", + campaign as "@campaign_name" impressions::integer as "@impressions", adcost::float as "@cost", date::date as "@date", From 8f505b4002b41d4e2a9d61f84e551283b64feee4 Mon Sep 17 00:00:00 2001 From: Henry Hund Date: Tue, 15 Mar 2016 12:35:48 -0400 Subject: [PATCH 6/8] updated interface --- analysis/ppc/interface.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/analysis/ppc/interface.txt b/analysis/ppc/interface.txt index b693f92..755d65b 100644 --- a/analysis/ppc/interface.txt +++ b/analysis/ppc/interface.txt @@ -3,6 +3,7 @@ ######################################## adnetwork varchar +campaign_name varchar utm_source varchar utm_medium varchar utm_campaign varchar From 9d639884eb8c340a8fcebf11e45e6ac0e7dc272e Mon Sep 17 00:00:00 2001 From: Henry Hund Date: Tue, 15 Mar 2016 13:29:00 -0400 Subject: [PATCH 7/8] added facebook model and incorporated into ppc summary --- analysis/ppc/analysis.sql | 46 ++++++++++++++--------------- models/facebook_ads/model.sql | 35 ++++++++++++++++++++++ models/facebook_ads/model_tests.sql | 11 +++++++ 3 files changed, 69 insertions(+), 23 deletions(-) create mode 100644 models/facebook_ads/model.sql create mode 100644 models/facebook_ads/model_tests.sql diff --git a/analysis/ppc/analysis.sql b/analysis/ppc/analysis.sql index 34d42b4..2dbfac3 100644 --- a/analysis/ppc/analysis.sql +++ b/analysis/ppc/analysis.sql @@ -22,27 +22,27 @@ create or replace view {schema}.ppc_consolidated_summary as ( ) ) -- union if other adnetworks present. can we make this happen automatically? - -- union - -- ( - -- select - -- * - -- from - -- ( - -- select - -- 'Facebook' as "@adnetwork", - -- "@campaign_name", - -- "@utm_source", - -- "@utm_medium", - -- "@utm_campaign", - -- "@utm_content", - -- "@utm_term", - -- "@impressions"::integer, - -- "@cost"::float, - -- "@date"::date, - -- "@clicks"::integer, - -- "@base_url" - -- from - -- {schema}.facebook_summary - -- ) - -- ) + union all + ( + select + * + from + ( + select + 'Facebook' as "@adnetwork", + "@campaign_name", + "@utm_source", + "@utm_medium", + "@utm_campaign", + "@utm_content", + "@utm_term", + "@impressions"::integer, + "@cost"::float, + "@date"::date, + "@clicks"::integer, + "@base_url" + from + {schema}.facebook_summary + ) + ) ) diff --git a/models/facebook_ads/model.sql b/models/facebook_ads/model.sql new file mode 100644 index 0000000..5b1e1dd --- /dev/null +++ b/models/facebook_ads/model.sql @@ -0,0 +1,35 @@ +create or replace view ac_hhund.facebook_ads_summary as ( + with ad_data as + ( + select + ag.name as "adgroup_name", + lower(nvl(object_url,link_url,object_story_spec__link_data__link)) as addestinationurl, + i.* + from + facebook.facebook_insights_101441173373823 i + join + facebook.facebook_adgroup_101441173373823 ag + on + ag.id = i.adgroup_id + join + facebook.facebook_adcreative_101441173373823 ac + on + ag.creative__id = ac.id + ) + select + REPLACE(REGEXP_SUBSTR(addestinationurl,'utm_source=[^&]*'),'utm_source=','') as "@utm_source", + REPLACE(REGEXP_SUBSTR(addestinationurl,'utm_medium=[^&]*'),'utm_medium=','') as "@utm_medium", + REPLACE(REGEXP_SUBSTR(addestinationurl,'utm_campaign=[^&]*'),'utm_campaign=','') as "@utm_campaign", + REPLACE(REGEXP_SUBSTR(addestinationurl,'utm_content=[^&]*'),'utm_content=','') as "@utm_content", + REPLACE(REGEXP_SUBSTR(addestinationurl,'utm_term=[^&]*'),'utm_term=','') as "@utm_term", + adgroup_name as "@campaign_name", + impressions::integer as "@impressions", + spend::float as "@cost", + date_start::date as "@date", + clicks::integer as "@clicks", + SPLIT_PART(addestinationurl,'?',1) as "@base_url", + SPLIT_PART(addestinationurl,'?',2) as querystring, + * + from + ad_data + ) diff --git a/models/facebook_ads/model_tests.sql b/models/facebook_ads/model_tests.sql new file mode 100644 index 0000000..00dcdb4 --- /dev/null +++ b/models/facebook_ads/model_tests.sql @@ -0,0 +1,11 @@ +create or replace view {schema}.model_tests + (name, description, result) + as ( + + select + 'facebook_ads_fresher_than_one_day', + 'Most recent facebook ads entry is no more than one day old', + max("@date"::date) > current_date - '1 day'::interval + from {schema}.facebook_ads_summary + + ); From 07a2a8392fc481d3d415384642e084a999dbf065 Mon Sep 17 00:00:00 2001 From: Henry Hund Date: Thu, 17 Mar 2016 11:54:57 -0400 Subject: [PATCH 8/8] fixed pull request notes. refactored ppc analysis to be a model --- analysis/ppc/interface.txt | 16 ---------------- models/facebook_ads/model.sql | 4 ++-- .../ppc/analysis.sql => models/ppc/model.sql | 3 +++ 3 files changed, 5 insertions(+), 18 deletions(-) delete mode 100644 analysis/ppc/interface.txt rename analysis/ppc/analysis.sql => models/ppc/model.sql (86%) diff --git a/analysis/ppc/interface.txt b/analysis/ppc/interface.txt deleted file mode 100644 index 755d65b..0000000 --- a/analysis/ppc/interface.txt +++ /dev/null @@ -1,16 +0,0 @@ -######################################## -# PPC Interface # -######################################## - -adnetwork varchar -campaign_name varchar -utm_source varchar -utm_medium varchar -utm_campaign varchar -utm_content varchar -utm_term varchar -impressions integer -cost float -clicks integer -date date -base_url varchar diff --git a/models/facebook_ads/model.sql b/models/facebook_ads/model.sql index 5b1e1dd..a389f11 100644 --- a/models/facebook_ads/model.sql +++ b/models/facebook_ads/model.sql @@ -1,8 +1,8 @@ -create or replace view ac_hhund.facebook_ads_summary as ( +create or replace view {schema}.facebook_ads_summary as ( with ad_data as ( select - ag.name as "adgroup_name", + ag.name as adgroup_name, lower(nvl(object_url,link_url,object_story_spec__link_data__link)) as addestinationurl, i.* from diff --git a/analysis/ppc/analysis.sql b/models/ppc/model.sql similarity index 86% rename from analysis/ppc/analysis.sql rename to models/ppc/model.sql index 2dbfac3..1af016e 100644 --- a/analysis/ppc/analysis.sql +++ b/models/ppc/model.sql @@ -1,3 +1,6 @@ +-- assuming there is google adwords and facebook ads. +-- need to make this flexible such that we can detect any relevant paid ad platforms and union them in. + create or replace view {schema}.ppc_consolidated_summary as ( ( select