-
Notifications
You must be signed in to change notification settings - Fork 467
/
Exploring Your Ecommerce Dataset with SQL in Google BigQuery
93 lines (93 loc) · 2.92 KB
/
Exploring Your Ecommerce Dataset with SQL in Google BigQuery
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
bq query --use_legacy_sql=false \
'SELECT COUNT(*) as num_duplicate_rows, * FROM
`data-to-insights.ecommerce.all_sessions_raw`
GROUP BY
fullVisitorId, channelGrouping, time, country, city, totalTransactionRevenue, transactions, timeOnSite, pageviews, sessionQualityDim, date, visitId, type, productRefundAmount, productQuantity, productPrice, productRevenue, productSKU, v2ProductName, v2ProductCategory, productVariant, currencyCode, itemQuantity, itemRevenue, transactionRevenue, transactionId, pageTitle, searchKeyword, pagePathLevel1, eCommerceAction_type, eCommerceAction_step, eCommerceAction_option
HAVING num_duplicate_rows > 1;'
bq query --use_legacy_sql=false \
'SELECT
fullVisitorId,
visitId,
date,
time,
v2ProductName,
productSKU,
type,
eCommerceAction_type,
eCommerceAction_step,
eCommerceAction_option,
transactionRevenue,
transactionId,
COUNT(*) as row_count
FROM
`data-to-insights.ecommerce.all_sessions`
GROUP BY 1,2,3 ,4, 5, 6, 7, 8, 9, 10,11,12
HAVING row_count > 1 # find duplicates'
bq query --use_legacy_sql=false \
'#standardSQL
SELECT
COUNT(*) AS product_views,
COUNT(DISTINCT fullVisitorId) AS unique_visitors
FROM `data-to-insights.ecommerce.all_sessions`;'
bq query --use_legacy_sql=false \
'SELECT
COUNT(DISTINCT fullVisitorId) AS unique_visitors,
channelGrouping
FROM `data-to-insights.ecommerce.all_sessions`
GROUP BY channelGrouping
ORDER BY channelGrouping DESC;'
bq query --use_legacy_sql=false \
'SELECT
(v2ProductName) AS ProductName
FROM `data-to-insights.ecommerce.all_sessions`
GROUP BY ProductName
ORDER BY ProductName'
bq query --use_legacy_sql=false \
'SELECT
COUNT(*) AS product_views,
(v2ProductName) AS ProductName
FROM `data-to-insights.ecommerce.all_sessions`
WHERE type = "PAGE"
GROUP BY v2ProductName
ORDER BY product_views DESC
LIMIT 5;'
bq query --use_legacy_sql=false \
'WITH unique_product_views_by_person AS (
-- find each unique product viewed by each visitor
SELECT
fullVisitorId,
(v2ProductName) AS ProductName
FROM `data-to-insights.ecommerce.all_sessions`
WHERE type = "PAGE"
GROUP BY fullVisitorId, v2ProductName )
-- aggregate the top viewed products and sort them
SELECT
COUNT(*) AS unique_view_count,
ProductName
FROM unique_product_views_by_person
GROUP BY ProductName
ORDER BY unique_view_count DESC
LIMIT 5'
bq query --use_legacy_sql=false \
'SELECT
COUNT(*) AS product_views,
COUNT(productQuantity) AS orders,
SUM(productQuantity) AS quantity_product_ordered,
v2ProductName
FROM `data-to-insights.ecommerce.all_sessions`
WHERE type = "PAGE"
GROUP BY v2ProductName
ORDER BY product_views DESC
LIMIT 5;'
bq query --use_legacy_sql=false \
'SELECT
COUNT(*) AS product_views,
COUNT(productQuantity) AS orders,
SUM(productQuantity) AS quantity_product_ordered,
SUM(productQuantity) / COUNT(productQuantity) AS avg_per_order,
(v2ProductName) AS ProductName
FROM `data-to-insights.ecommerce.all_sessions`
WHERE type = "PAGE"
GROUP BY v2ProductName
ORDER BY product_views DESC
LIMIT 5;'