-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy path00_Lakehouse_초기화_노트북.sql
120 lines (105 loc) · 2.84 KB
/
00_Lakehouse_초기화_노트북.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
-- Databricks notebook source
-- MAGIC %python
-- MAGIC databricks_user = spark.sql("SELECT current_user()").collect()[0][0].split('@')[0].replace(".", "_")
-- MAGIC
-- MAGIC spark.sql("DROP DATABASE IF EXISTS delta_{}_db CASCADE".format(str(databricks_user)))
-- MAGIC spark.sql("CREATE DATABASE IF NOT EXISTS delta_{}_db".format(str(databricks_user)))
-- MAGIC spark.sql("USE delta_{}_db".format(str(databricks_user)))
-- MAGIC print("데이터베이스명 : delta_{}_db".format((databricks_user)))
-- COMMAND ----------
CREATE TABLE IF NOT EXISTS user_data_raw_csv(
userid STRING,
gender STRING,
age INT,
height INT,
weight INT,
smoker STRING,
familyhistory STRING,
chosestlevs STRING,
bp STRING,
risk INT)
USING csv
OPTIONS (path "/databricks-datasets/iot-stream/data-user/userData.csv", header "true");
-- COMMAND ----------
CREATE TABLE IF NOT EXISTS device_data_raw_json(
--컬럼 내용 추가
calories_burnt DOUBLE,
device_id INT,
id STRING,
miles_walked DOUBLE,
num_steps INT,
`timestamp` TIMESTAMP,
user_id STRING,
value STRING
)
USING JSON
OPTIONS (path "/databricks-datasets/iot-stream/data-device")
-- COMMAND ----------
CREATE TABLE IF NOT EXISTS device_data_bronze_delta
USING DELTA
PARTITIONED BY (device_id)
AS SELECT * FROM JSON.`/databricks-datasets/iot-stream/data-device`;
SELECT * FROM device_data_bronze_delta;
-- COMMAND ----------
CREATE TABLE IF NOT EXISTS user_data_bronze_delta
USING DELTA
PARTITIONED BY (gender)
COMMENT "User Data Raw Table - No Transformations"
AS SELECT * FROM user_data_raw_csv;
-- COMMAND ----------
CREATE TABLE IF NOT EXISTS user_data_silver
USING DELTA
SELECT
userid AS user_id,
gender,
age,
height,
weight,
smoker,
familyhistory AS family_history,
chosestlevs AS cholest_levs,
bp AS blood_pressure,
risk
FROM user_data_bronze_delta;
-- COMMAND ----------
CREATE TABLE IF NOT EXISTS device_data_silver
USING DELTA
SELECT
id,
device_id,
user_id,
calories_burnt,
miles_walked,
num_steps,
miles_walked/num_steps as stride,
timestamp,
DATE(timestamp) as date
FROM device_data_bronze_delta;
-- COMMAND ----------
CREATE TABLE IF NOT EXISTS user_daily_averages_gold
USING DELTA
SELECT
u.user_id,
u.gender,
u.age,
u.blood_pressure,
u.cholest_levs,
AVG(calories_burnt) as avg_calories_burnt,
AVG(num_steps) as avg_num_steps,
AVG(miles_walked) as avg_miles_walked
FROM user_data_silver u
LEFT JOIN
(SELECT
user_id,
date,
MAX(calories_burnt) as calories_burnt,
MAX(num_steps) as num_steps,
MAX(miles_walked) as miles_walked
FROM device_data_silver
GROUP BY user_id, date) as daily
ON daily.user_id = u.user_id
GROUP BY u.user_id,
u.gender,
u.age,
u.blood_pressure,
u.cholest_levs