-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmultivariable_data_mining.sql
130 lines (126 loc) · 6.07 KB
/
multivariable_data_mining.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/*=====================================================================*/
/* Multi-variable Data Mining models/predictions */
/*=====================================================================*/
/* Write a model that would predict the success (defined as grade>=90) */
/* of students based on their program and sex */
/* - First segment the raw data based on program and sex */
/* - Next perform average on the various multi-variable combinations */
/*=====================================================================*/
SELECT avg(Grad_M_seg) as "Graduate M grade>90 prediction%",
avg(Grad_F_seg) as "Graduate F grade>90 prediction%",
avg(undGrad_M_seg)as "Undergrad M grade>90 rediction%",
avg(undGrad_F_seg)as "Undergrad F grade>90 prediction%"
FROM (
SELECT program, sex /*independent variables*/
grade, /*fact, or target variable*/
case when program='graduate' and sex='M' then
case when grade>=90 then 1 else 0 end
end as Grad_M_seg,
case when program='graduate' and sex='F' then
case when grade>=90 then 1 else 0 end
end as Grad_F_seg,
case when program='undergraduate' and sex='M' then
case when grade>=90 then 1 else 0 end
end as undGrad_M_seg,
case when program='undergraduate' and sex='F' then
case when grade>=90 then 1 else 0 end
end as undGrad_F_seg
FROM grade natural join student
) raw_data
;
/*=================================================================*/
/* Write a model that would predict if there is instructor/student */
/* grade collusion based on sex */
/*=================================================================*/
select round(avg(MM_seg),4) as "insM/stuM grade>90 prediction%",
round(avg(MF_seg),4) as "insM/stuF grade>90 prediction%",
round(avg(FM_seg),4) as "insF/stuM grade>90 prediction%",
round(avg(FF_seg),4) as "insF/stuF grade>90 prediction%"
from (
select i.sex as inst_sex, s.sex as stu_sex, /*dimensions*/
grade, /*fact*/
case when i.sex='M' and s.sex='M' then
case when grade>90 then 1 else 0 end
end as MM_seg,
case when i.sex='M' and s.sex='F' then
case when grade>90 then 1 else 0 end
end as MF_seg,
case when i.sex='F' and s.sex='M' then
case when grade>90 then 1 else 0 end
end as FM_seg,
case when i.sex='F' and s.sex='F' then
case when grade>90 then 1 else 0 end
end as FF_seg
from grade join student s using(student_id)
join instructor i using(instructor_id)
) raw_data
;
/*===============================================================*/
/* Write a model that would predict the probability of obtaining */
/* a grade >= 85 based on program and school_year */
/*===============================================================*/
select round(avg(yr1),4) as "year1 grade>=85 prediction%",
round(avg(yr2),4) as "year2 grade>=85 prediction%",
round(avg(yr3),4) as "year3 grade>=85 prediction%",
round(avg(yr4),4) as "year4 grade>=85 prediction%",
round(avg(yr5),4) as "yearG1 grade>=85 prediction%",
round(avg(yr6),4) as "yearG2 grade>=85 prediction%"
from (
select program, school_year,
grade,
case when program='undergraduate' and school_year=1 then
case when grade>=85 then 1 else 0 end
end as yr1,
case when program='undergraduate' and school_year=2 then
case when grade>85 then 1 else 0 end
end as yr2,
case when program='undergraduate' and school_year=3 then
case when grade>=85 then 1 else 0 end
end as yr3,
case when program='undergraduate' and school_year=4 then
case when grade>85 then 1 else 0 end
end as yr4,
case when program='graduate' and school_year=1 then
case when grade>=85 then 1 else 0 end
end as yr5,
case when program='graduate' and school_year=2 then
case when grade>85 then 1 else 0 end
end as yr6
from grade natural join student
) raw_data
;
/*===========================================================================*/
/* Write a model that would predict the probability of grade>=90 for student */
/* based on student sex and course department/concentration */
/*===========================================================================*/
select round(avg(IT_M), 4) as "M-IT success prediction",
round(avg(IT_F), 4) as "F-IT success prediction",
round(avg(Web_M),4) as "M-Web success prediction",
round(avg(Web_F),4) as "F-Web success prediction",
round(avg(DB_M), 4) as "M-DB success prediction",
round(avg(DB_F), 4) as "F-DB success prediction"
from (
select department, sex
grade,
case when department like 'Information%' and sex='M' then
case when grade>=90 then 1 else 0 end
end as IT_M,
case when department like 'Information%' and sex='F' then
case when grade>=90 then 1 else 0 end
end as IT_F,
case when department like 'Web%' and sex='M' then
case when grade>=90 then 1 else 0 end
end as Web_M,
case when department like 'Web%' and sex='F' then
case when grade>=90 then 1 else 0 end
end as Web_F,
case when department like 'Database%' and sex='M' then
case when grade>=90 then 1 else 0 end
end as DB_M,
case when department like 'Database%' and sex='F' then
case when grade>=90 then 1 else 0 end
end as DB_F
from grade natural join student
natural join course
) raw_data
;