From 0df137389ce51f1daee15dc0e257ea474ee739d7 Mon Sep 17 00:00:00 2001 From: Jake Radzikowski Date: Mon, 18 Jul 2022 21:18:10 -0700 Subject: [PATCH 1/2] Update DataFrame to download json player salary. --- .../REF-Data Processing with DataFrame.ipynb | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/machine-learning/REF-Data Processing with DataFrame.ipynb b/machine-learning/REF-Data Processing with DataFrame.ipynb index 7de0b9b..0144437 100644 --- a/machine-learning/REF-Data Processing with DataFrame.ipynb +++ b/machine-learning/REF-Data Processing with DataFrame.ipynb @@ -165,7 +165,7 @@ { "output_type": "execute_result", "data": { - "text/html": "
indexInfoBirthdayNatHeightWeightDraftYearOverallDraftHandLast NameFirst NamePositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIce
0
DataTypeSystem.StringSystem.StringSystem.SingleSystem.SingleSystem.SingleSystem.SingleSystem.StringSystem.StringSystem.StringSystem.StringSystem.StringSystem.SingleSystem.SingleSystem.SingleSystem.SingleSystem.SingleSystem.SingleSystem.Single
1
Length (excluding null values)1010101077101010101010101010101010
" + "text/html": "
indexInfoBirthdayNatHeightWeightDraftYearOverallDraftHandLast NameFirst NamePositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIce
0
DataTypeSystem.StringSystem.StringSystem.SingleSystem.SingleSystem.SingleSystem.SingleSystem.StringSystem.StringSystem.StringSystem.StringSystem.StringSystem.SingleSystem.SingleSystem.SingleSystem.SingleSystem.SingleSystem.SingleSystem.Single
1
Length (excluding null values)1010101077101010101010101010101010
" }, "execution_count": 1, "metadata": {} @@ -194,7 +194,7 @@ { "output_type": "execute_result", "data": { - "text/html": "
indexBirthdayNatHeightWeightDraftYearOverallDraftHandLast NameFirst NamePositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIce
0
88-16-04USA
72
218
2006
7
ROkposoKyleRWBUF
65
19
26
45
24
1443
73983
1
90-08-10USA
76
210
2009
114
LHelgesonSethDN.J
9
1
0
1
15
177
7273
2
96-26-11USA
77
203
2015
37
RCarloBrandonDBOS
82
6
10
16
59
2080
102414
3
90-16-11USA
74
219
<null>
<null>
LSchallerTimCBOS
59
7
7
14
23
1035
43436
4
92-20-03USA
72
215
2010
37
RFaulkJustinDCAR
75
17
20
37
32
1987
104133
5
94-01-05USA
74
205
2012
120
LSlavinJaccobDCAR
82
5
29
34
12
2135
115316
6
90-20-06USA
75
221
2008
128
RPaterynGregDDAL/MTL
36
1
8
9
10
720
33312
7
90-27-05USA
74
196
2009
198
RDowdNicCL.A
70
6
16
22
25
1230
52314
8
90-16-07USA
75
221
<null>
<null>
LLashoffBrianDDET
5
0
0
0
0
93
3754
9
86-09-08USA
71
197
<null>
<null>
RCannonePatrickCMIN
3
0
0
0
0
35
1419
" + "text/html": "
indexBirthdayNatHeightWeightDraftYearOverallDraftHandLast NameFirst NamePositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIce
0
88-16-04USA
72
218
2006
7
ROkposoKyleRWBUF
65
19
26
45
24
1443
73983
1
90-08-10USA
76
210
2009
114
LHelgesonSethDN.J
9
1
0
1
15
177
7273
2
96-26-11USA
77
203
2015
37
RCarloBrandonDBOS
82
6
10
16
59
2080
102414
3
90-16-11USA
74
219
<null>
<null>
LSchallerTimCBOS
59
7
7
14
23
1035
43436
4
92-20-03USA
72
215
2010
37
RFaulkJustinDCAR
75
17
20
37
32
1987
104133
5
94-01-05USA
74
205
2012
120
LSlavinJaccobDCAR
82
5
29
34
12
2135
115316
6
90-20-06USA
75
221
2008
128
RPaterynGregDDAL/MTL
36
1
8
9
10
720
33312
7
90-27-05USA
74
196
2009
198
RDowdNicCL.A
70
6
16
22
25
1230
52314
8
90-16-07USA
75
221
<null>
<null>
LLashoffBrianDDET
5
0
0
0
0
93
3754
9
86-09-08USA
71
197
<null>
<null>
RCannonePatrickCMIN
3
0
0
0
0
35
1419
" }, "execution_count": 1, "metadata": {} @@ -227,7 +227,7 @@ { "output_type": "execute_result", "data": { - "text/html": "
indexInfoBirthdayNatHeight_cmWeight_kgDraftYearOverallDraftHandLast NameFirst NamePositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIce
0
DataTypeSystem.StringSystem.StringSystem.SingleSystem.SingleSystem.SingleSystem.SingleSystem.StringSystem.StringSystem.StringSystem.StringSystem.StringSystem.SingleSystem.SingleSystem.SingleSystem.SingleSystem.SingleSystem.SingleSystem.Single
1
Length (excluding null values)1010101088101010101010101010101010
" + "text/html": "
indexInfoBirthdayNatHeight_cmWeight_kgDraftYearOverallDraftHandLast NameFirst NamePositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIce
0
DataTypeSystem.StringSystem.StringSystem.SingleSystem.SingleSystem.SingleSystem.SingleSystem.StringSystem.StringSystem.StringSystem.StringSystem.StringSystem.SingleSystem.SingleSystem.SingleSystem.SingleSystem.SingleSystem.SingleSystem.Single
1
Length (excluding null values)1010101088101010101010101010101010
" }, "execution_count": 1, "metadata": {} @@ -256,7 +256,7 @@ { "output_type": "execute_result", "data": { - "text/html": "
indexBirthdayNatHeight_cmWeight_kgDraftYearOverallDraftHandLast NameFirst NamePositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIce
0
90-11-21SWE
180
84
2009
53
LRodinAntonRWVAN
3
0
1
1
0
40
1604
1
86-04-28CZE
188
107
2004
180
RPolakRomanDTOR
75
4
7
11
65
1817
80643
2
90-11-29CZE
201
100
<null>
<null>
RSustrAndrejDT.B
80
3
11
14
43
1813
84427
3
96-05-25CZE
183
82
2014
25
RPastrnakDavidRW/LWBOS
75
34
36
70
34
1597
80921
4
94/03/23SWE
190
95
2012
81
RSundqvistOskarCPIT
10
0
0
0
2
138
5504
5
90-06-05CZE
183
93
2010
66
RGudasRadkoDPHI
67
6
17
23
93
1739
77555
6
90-06-01CHE
185
91
2008
38
LJosiRomanDNSH
72
12
37
49
18
2076
108323
7
86-08-01SWE
180
86
2005
216
RStralmanAntonDT.B
73
5
17
22
20
1963
100304
8
92-11-12SWE
190
98
2011
4
RLarssonAdamDEDM
79
4
15
19
55
2055
95509
9
89-11-21FRA
183
91
<null>
<null>
LRousselAntoineLWDAL
60
12
15
27
115
1427
55866
" + "text/html": "
indexBirthdayNatHeight_cmWeight_kgDraftYearOverallDraftHandLast NameFirst NamePositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIce
0
90-11-21SWE
180
84
2009
53
LRodinAntonRWVAN
3
0
1
1
0
40
1604
1
86-04-28CZE
188
107
2004
180
RPolakRomanDTOR
75
4
7
11
65
1817
80643
2
90-11-29CZE
201
100
<null>
<null>
RSustrAndrejDT.B
80
3
11
14
43
1813
84427
3
96-05-25CZE
183
82
2014
25
RPastrnakDavidRW/LWBOS
75
34
36
70
34
1597
80921
4
94/03/23SWE
190
95
2012
81
RSundqvistOskarCPIT
10
0
0
0
2
138
5504
5
90-06-05CZE
183
93
2010
66
RGudasRadkoDPHI
67
6
17
23
93
1739
77555
6
90-06-01CHE
185
91
2008
38
LJosiRomanDNSH
72
12
37
49
18
2076
108323
7
86-08-01SWE
180
86
2005
216
RStralmanAntonDT.B
73
5
17
22
20
1963
100304
8
92-11-12SWE
190
98
2011
4
RLarssonAdamDEDM
79
4
15
19
55
2055
95509
9
89-11-21FRA
183
91
<null>
<null>
LRousselAntoineLWDAL
60
12
15
27
115
1427
55866
" }, "execution_count": 1, "metadata": {} @@ -287,7 +287,7 @@ { "output_type": "execute_result", "data": { - "text/html": "
indexValuesCounts
0
SWE
4
1
CZE
4
2
CHE
1
3
FRA
1
" + "text/html": "
indexValuesCounts
0
SWE
4
1
CZE
4
2
CHE
1
3
FRA
1
" }, "execution_count": 1, "metadata": {} @@ -376,7 +376,7 @@ { "output_type": "execute_result", "data": { - "text/html": "
indexBirthdayNatHeightWeightDraftYearOverallDraftHandLast NameFirst NamePositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIce
0
96-25-05CZE
72
181
2014
25
RPastrnakDavidRW/LWBOS
75
34
36
70
34
1597
80921
" + "text/html": "
indexBirthdayNatHeightWeightDraftYearOverallDraftHandLast NameFirst NamePositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIce
0
90-05-06CZE
72
205
2010
66
RGudasRadkoDPHI
67
6
17
23
93
1739
77555
" }, "execution_count": 1, "metadata": {} @@ -398,7 +398,7 @@ { "output_type": "execute_result", "data": { - "text/html": "
indexBirthdayNatHeightWeightDraftYearOverallDraftHandLast NameFirst NamePositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIce
0
90-27-05USA
74
196
2009
198
RDowdNicCL.A
70
6
16
22
25
1230
52314
" + "text/html": "
indexBirthdayNatHeightWeightDraftYearOverallDraftHandLast NameFirst NamePositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIce
0
90-16-11USA
74
219
<null>
<null>
LSchallerTimCBOS
59
7
7
14
23
1035
43436
" }, "execution_count": 1, "metadata": {} @@ -448,7 +448,7 @@ { "output_type": "execute_result", "data": { - "text/html": "
indexBirthdayNatHeightWeightDraftYearOverallDraftHandLast NameFirst NamePositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIce
0
88-16-04USA
72
218
2006
7
ROkposoKyleRWBUF
65
19
26
45
24
1443
73983
1
90-08-10USA
76
210
2009
114
LHelgesonSethDN.J
9
1
0
1
15
177
7273
2
96-26-11USA
77
203
2015
37
RCarloBrandonDBOS
82
6
10
16
59
2080
102414
3
90-16-11USA
74
219
<null>
<null>
LSchallerTimCBOS
59
7
7
14
23
1035
43436
4
92-20-03USA
72
215
2010
37
RFaulkJustinDCAR
75
17
20
37
32
1987
104133
5
94-01-05USA
74
205
2012
120
LSlavinJaccobDCAR
82
5
29
34
12
2135
115316
6
90-20-06USA
75
221
2008
128
RPaterynGregDDAL/MTL
36
1
8
9
10
720
33312
7
90-27-05USA
74
196
2009
198
RDowdNicCL.A
70
6
16
22
25
1230
52314
8
90-16-07USA
75
221
<null>
<null>
LLashoffBrianDDET
5
0
0
0
0
93
3754
9
86-09-08USA
71
197
<null>
<null>
RCannonePatrickCMIN
3
0
0
0
0
35
1419
10
90-21-11SWE
71
185
2009
53
LRodinAntonRWVAN
3
0
1
1
0
40
1604
11
86-28-04CZE
74
236
2004
180
RPolakRomanDTOR
75
4
7
11
65
1817
80643
12
90-29-11CZE
79
220
<null>
<null>
RSustrAndrejDT.B
80
3
11
14
43
1813
84427
13
96-25-05CZE
72
181
2014
25
RPastrnakDavidRW/LWBOS
75
34
36
70
34
1597
80921
14
94-23-03SWE
75
209
2012
81
RSundqvistOskarCPIT
10
0
0
0
2
138
5504
15
90-05-06CZE
72
205
2010
66
RGudasRadkoDPHI
67
6
17
23
93
1739
77555
16
90-01-06CHE
73
201
2008
38
LJosiRomanDNSH
72
12
37
49
18
2076
108323
17
86-01-08SWE
71
190
2005
216
RStralmanAntonDT.B
73
5
17
22
20
1963
100304
18
92-12-11SWE
75
216
2011
4
RLarssonAdamDEDM
79
4
15
19
55
2055
95509
19
89-21-11FRA
72
201
<null>
<null>
LRousselAntoineLWDAL
60
12
15
27
115
1427
55866
" + "text/html": "
indexBirthdayNatHeightWeightDraftYearOverallDraftHandLast NameFirst NamePositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIce
0
88-16-04USA
72
218
2006
7
ROkposoKyleRWBUF
65
19
26
45
24
1443
73983
1
90-08-10USA
76
210
2009
114
LHelgesonSethDN.J
9
1
0
1
15
177
7273
2
96-26-11USA
77
203
2015
37
RCarloBrandonDBOS
82
6
10
16
59
2080
102414
3
90-16-11USA
74
219
<null>
<null>
LSchallerTimCBOS
59
7
7
14
23
1035
43436
4
92-20-03USA
72
215
2010
37
RFaulkJustinDCAR
75
17
20
37
32
1987
104133
5
94-01-05USA
74
205
2012
120
LSlavinJaccobDCAR
82
5
29
34
12
2135
115316
6
90-20-06USA
75
221
2008
128
RPaterynGregDDAL/MTL
36
1
8
9
10
720
33312
7
90-27-05USA
74
196
2009
198
RDowdNicCL.A
70
6
16
22
25
1230
52314
8
90-16-07USA
75
221
<null>
<null>
LLashoffBrianDDET
5
0
0
0
0
93
3754
9
86-09-08USA
71
197
<null>
<null>
RCannonePatrickCMIN
3
0
0
0
0
35
1419
10
90-21-11SWE
71
185
2009
53
LRodinAntonRWVAN
3
0
1
1
0
40
1604
11
86-28-04CZE
74
236
2004
180
RPolakRomanDTOR
75
4
7
11
65
1817
80643
12
90-29-11CZE
79
220
<null>
<null>
RSustrAndrejDT.B
80
3
11
14
43
1813
84427
13
96-25-05CZE
72
181
2014
25
RPastrnakDavidRW/LWBOS
75
34
36
70
34
1597
80921
14
94-23-03SWE
75
209
2012
81
RSundqvistOskarCPIT
10
0
0
0
2
138
5504
15
90-05-06CZE
72
205
2010
66
RGudasRadkoDPHI
67
6
17
23
93
1739
77555
16
90-01-06CHE
73
201
2008
38
LJosiRomanDNSH
72
12
37
49
18
2076
108323
17
86-01-08SWE
71
190
2005
216
RStralmanAntonDT.B
73
5
17
22
20
1963
100304
18
92-12-11SWE
75
216
2011
4
RLarssonAdamDEDM
79
4
15
19
55
2055
95509
19
89-21-11FRA
72
201
<null>
<null>
LRousselAntoineLWDAL
60
12
15
27
115
1427
55866
" }, "execution_count": 1, "metadata": {} @@ -483,7 +483,7 @@ { "output_type": "execute_result", "data": { - "text/html": "
indexBirthdayNatHeightWeightDraftYearOverallDraftHandLast NameFirst NamePositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIce
0
88-16-04USA
72
218
2006
7
ROkposoKyleRWBUF
65
19
26
45
24
1443
73983
1
90-08-10USA
76
210
2009
114
LHelgesonSethDN.J
9
1
0
1
15
177
7273
2
96-26-11USA
77
203
2015
37
RCarloBrandonDBOS
82
6
10
16
59
2080
102414
3
90-16-11USA
74
219
2000
59.5
LSchallerTimCBOS
59
7
7
14
23
1035
43436
4
92-20-03USA
72
215
2010
37
RFaulkJustinDCAR
75
17
20
37
32
1987
104133
5
94-01-05USA
74
205
2012
120
LSlavinJaccobDCAR
82
5
29
34
12
2135
115316
6
90-20-06USA
75
221
2008
128
RPaterynGregDDAL/MTL
36
1
8
9
10
720
33312
7
90-27-05USA
74
196
2009
198
RDowdNicCL.A
70
6
16
22
25
1230
52314
8
90-16-07USA
75
221
2000
59.5
LLashoffBrianDDET
5
0
0
0
0
93
3754
9
86-09-08USA
71
197
2000
59.5
RCannonePatrickCMIN
3
0
0
0
0
35
1419
10
90-21-11SWE
71
185
2009
53
LRodinAntonRWVAN
3
0
1
1
0
40
1604
11
86-28-04CZE
74
236
2004
180
RPolakRomanDTOR
75
4
7
11
65
1817
80643
12
90-29-11CZE
79
220
2000
59.5
RSustrAndrejDT.B
80
3
11
14
43
1813
84427
13
96-25-05CZE
72
181
2014
25
RPastrnakDavidRW/LWBOS
75
34
36
70
34
1597
80921
14
94-23-03SWE
75
209
2012
81
RSundqvistOskarCPIT
10
0
0
0
2
138
5504
15
90-05-06CZE
72
205
2010
66
RGudasRadkoDPHI
67
6
17
23
93
1739
77555
16
90-01-06CHE
73
201
2008
38
LJosiRomanDNSH
72
12
37
49
18
2076
108323
17
86-01-08SWE
71
190
2005
216
RStralmanAntonDT.B
73
5
17
22
20
1963
100304
18
92-12-11SWE
75
216
2011
4
RLarssonAdamDEDM
79
4
15
19
55
2055
95509
19
89-21-11FRA
72
201
2000
59.5
LRousselAntoineLWDAL
60
12
15
27
115
1427
55866
" + "text/html": "
indexBirthdayNatHeightWeightDraftYearOverallDraftHandLast NameFirst NamePositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIce
0
88-16-04USA
72
218
2006
7
ROkposoKyleRWBUF
65
19
26
45
24
1443
73983
1
90-08-10USA
76
210
2009
114
LHelgesonSethDN.J
9
1
0
1
15
177
7273
2
96-26-11USA
77
203
2015
37
RCarloBrandonDBOS
82
6
10
16
59
2080
102414
3
90-16-11USA
74
219
2000
59.5
LSchallerTimCBOS
59
7
7
14
23
1035
43436
4
92-20-03USA
72
215
2010
37
RFaulkJustinDCAR
75
17
20
37
32
1987
104133
5
94-01-05USA
74
205
2012
120
LSlavinJaccobDCAR
82
5
29
34
12
2135
115316
6
90-20-06USA
75
221
2008
128
RPaterynGregDDAL/MTL
36
1
8
9
10
720
33312
7
90-27-05USA
74
196
2009
198
RDowdNicCL.A
70
6
16
22
25
1230
52314
8
90-16-07USA
75
221
2000
59.5
LLashoffBrianDDET
5
0
0
0
0
93
3754
9
86-09-08USA
71
197
2000
59.5
RCannonePatrickCMIN
3
0
0
0
0
35
1419
10
90-21-11SWE
71
185
2009
53
LRodinAntonRWVAN
3
0
1
1
0
40
1604
11
86-28-04CZE
74
236
2004
180
RPolakRomanDTOR
75
4
7
11
65
1817
80643
12
90-29-11CZE
79
220
2000
59.5
RSustrAndrejDT.B
80
3
11
14
43
1813
84427
13
96-25-05CZE
72
181
2014
25
RPastrnakDavidRW/LWBOS
75
34
36
70
34
1597
80921
14
94-23-03SWE
75
209
2012
81
RSundqvistOskarCPIT
10
0
0
0
2
138
5504
15
90-05-06CZE
72
205
2010
66
RGudasRadkoDPHI
67
6
17
23
93
1739
77555
16
90-01-06CHE
73
201
2008
38
LJosiRomanDNSH
72
12
37
49
18
2076
108323
17
86-01-08SWE
71
190
2005
216
RStralmanAntonDT.B
73
5
17
22
20
1963
100304
18
92-12-11SWE
75
216
2011
4
RLarssonAdamDEDM
79
4
15
19
55
2055
95509
19
89-21-11FRA
72
201
2000
59.5
LRousselAntoineLWDAL
60
12
15
27
115
1427
55866
" }, "execution_count": 1, "metadata": {} @@ -530,8 +530,10 @@ "using System.IO; \n", "using Microsoft.ML; \n", "\n", + "var playerSalaryPath = EnsureDataSetDownloaded(\"playerSalary.json\");\n", + "\n", "// Read in JSON file\n", - "string jsonString = File.ReadAllText(@\"data/playerSalary.json\");\n", + "string jsonString = File.ReadAllText(playerSalaryPath);\n", "var players = JsonSerializer.Deserialize>(jsonString);\n", "\n", "// Load it into an IDataView\n", @@ -547,7 +549,7 @@ { "output_type": "execute_result", "data": { - "text/html": "
indexNameSalary
0
Adam Larsson
3000000
1
Andrej Sustr
1600000
2
Antoine Roussel
2200000
3
Anton Rodin
950000
4
Anton Stralman
4500000
5
Brandon Carlo
717500
6
Brian Lashoff
650000
7
David Pastrnak
925000
8
Greg Pateryn
750000
9
Jaccob Slavin
742500
10
Justin Faulk
5500000
11
Kyle Okposo
8000000
12
Oskar Sundqvist
792500
13
Patrick Cannone
600000
14
Radko Gudas
4000000
15
Roman Josi
4250000
16
Roman Polak
2250000
17
Seth Helgeson
600000
18
Tim Schaller
600000
" + "text/html": "
indexNameSalary
0
Adam Larsson
3000000
1
Andrej Sustr
1600000
2
Antoine Roussel
2200000
3
Anton Rodin
950000
4
Anton Stralman
4500000
5
Brandon Carlo
717500
6
Brian Lashoff
650000
7
David Pastrnak
925000
8
Greg Pateryn
750000
9
Jaccob Slavin
742500
10
Justin Faulk
5500000
11
Kyle Okposo
8000000
12
Oskar Sundqvist
792500
13
Patrick Cannone
600000
14
Radko Gudas
4000000
15
Roman Josi
4250000
16
Roman Polak
2250000
17
Seth Helgeson
600000
18
Tim Schaller
600000
" }, "execution_count": 1, "metadata": {} @@ -630,7 +632,7 @@ { "output_type": "execute_result", "data": { - "text/html": "
indexBirthdayNatHeightWeightDraftYearOverallDraftHandPositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIceFullNameSalary
0
92-12-11SWE
75
216
2011
4
RDEDM
79
4
15
19
55
2055
95509
Adam Larsson
3000000
1
90-29-11CZE
79
220
2000
59.5
RDT.B
80
3
11
14
43
1813
84427
Andrej Sustr
1600000
2
89-21-11FRA
72
201
2000
59.5
LLWDAL
60
12
15
27
115
1427
55866
Antoine Roussel
2200000
3
90-21-11SWE
71
185
2009
53
LRWVAN
3
0
1
1
0
40
1604
Anton Rodin
950000
4
86-01-08SWE
71
190
2005
216
RDT.B
73
5
17
22
20
1963
100304
Anton Stralman
4500000
5
96-26-11USA
77
203
2015
37
RDBOS
82
6
10
16
59
2080
102414
Brandon Carlo
717500
6
90-16-07USA
75
221
2000
59.5
LDDET
5
0
0
0
0
93
3754
Brian Lashoff
650000
7
96-25-05CZE
72
181
2014
25
RRW/LWBOS
75
34
36
70
34
1597
80921
David Pastrnak
925000
8
90-20-06USA
75
221
2008
128
RDDAL/MTL
36
1
8
9
10
720
33312
Greg Pateryn
750000
9
94-01-05USA
74
205
2012
120
LDCAR
82
5
29
34
12
2135
115316
Jaccob Slavin
742500
10
92-20-03USA
72
215
2010
37
RDCAR
75
17
20
37
32
1987
104133
Justin Faulk
5500000
11
88-16-04USA
72
218
2006
7
RRWBUF
65
19
26
45
24
1443
73983
Kyle Okposo
8000000
12
90-27-05USA
74
196
2009
198
RCL.A
70
6
16
22
25
1230
52314
Nic Dowd
<null>
13
94-23-03SWE
75
209
2012
81
RCPIT
10
0
0
0
2
138
5504
Oskar Sundqvist
792500
14
86-09-08USA
71
197
2000
59.5
RCMIN
3
0
0
0
0
35
1419
Patrick Cannone
600000
15
90-05-06CZE
72
205
2010
66
RDPHI
67
6
17
23
93
1739
77555
Radko Gudas
4000000
16
90-01-06CHE
73
201
2008
38
LDNSH
72
12
37
49
18
2076
108323
Roman Josi
4250000
17
86-28-04CZE
74
236
2004
180
RDTOR
75
4
7
11
65
1817
80643
Roman Polak
2250000
18
90-08-10USA
76
210
2009
114
LDN.J
9
1
0
1
15
177
7273
Seth Helgeson
600000
19
90-16-11USA
74
219
2000
59.5
LCBOS
59
7
7
14
23
1035
43436
Tim Schaller
600000
" + "text/html": "
indexBirthdayNatHeightWeightDraftYearOverallDraftHandPositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIceFullNameSalary
0
92-12-11SWE
75
216
2011
4
RDEDM
79
4
15
19
55
2055
95509
Adam Larsson
3000000
1
90-29-11CZE
79
220
2000
59.5
RDT.B
80
3
11
14
43
1813
84427
Andrej Sustr
1600000
2
89-21-11FRA
72
201
2000
59.5
LLWDAL
60
12
15
27
115
1427
55866
Antoine Roussel
2200000
3
90-21-11SWE
71
185
2009
53
LRWVAN
3
0
1
1
0
40
1604
Anton Rodin
950000
4
86-01-08SWE
71
190
2005
216
RDT.B
73
5
17
22
20
1963
100304
Anton Stralman
4500000
5
96-26-11USA
77
203
2015
37
RDBOS
82
6
10
16
59
2080
102414
Brandon Carlo
717500
6
90-16-07USA
75
221
2000
59.5
LDDET
5
0
0
0
0
93
3754
Brian Lashoff
650000
7
96-25-05CZE
72
181
2014
25
RRW/LWBOS
75
34
36
70
34
1597
80921
David Pastrnak
925000
8
90-20-06USA
75
221
2008
128
RDDAL/MTL
36
1
8
9
10
720
33312
Greg Pateryn
750000
9
94-01-05USA
74
205
2012
120
LDCAR
82
5
29
34
12
2135
115316
Jaccob Slavin
742500
10
92-20-03USA
72
215
2010
37
RDCAR
75
17
20
37
32
1987
104133
Justin Faulk
5500000
11
88-16-04USA
72
218
2006
7
RRWBUF
65
19
26
45
24
1443
73983
Kyle Okposo
8000000
12
90-27-05USA
74
196
2009
198
RCL.A
70
6
16
22
25
1230
52314
Nic Dowd
<null>
13
94-23-03SWE
75
209
2012
81
RCPIT
10
0
0
0
2
138
5504
Oskar Sundqvist
792500
14
86-09-08USA
71
197
2000
59.5
RCMIN
3
0
0
0
0
35
1419
Patrick Cannone
600000
15
90-05-06CZE
72
205
2010
66
RDPHI
67
6
17
23
93
1739
77555
Radko Gudas
4000000
16
90-01-06CHE
73
201
2008
38
LDNSH
72
12
37
49
18
2076
108323
Roman Josi
4250000
17
86-28-04CZE
74
236
2004
180
RDTOR
75
4
7
11
65
1817
80643
Roman Polak
2250000
18
90-08-10USA
76
210
2009
114
LDN.J
9
1
0
1
15
177
7273
Seth Helgeson
600000
19
90-16-11USA
74
219
2000
59.5
LCBOS
59
7
7
14
23
1035
43436
Tim Schaller
600000
" }, "execution_count": 1, "metadata": {} @@ -660,7 +662,7 @@ { "output_type": "execute_result", "data": { - "text/html": "
indexBirthdayNatHeightWeightDraftYearOverallDraftHandPositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIceFullNameSalary
0
92-12-11SWE
75
216
2011
4
RDEDM
79
4
15
19
55
2055
95509
Adam Larsson
3000000
1
90-29-11CZE
79
220
2000
59.5
RDT.B
80
3
11
14
43
1813
84427
Andrej Sustr
1600000
2
89-21-11FRA
72
201
2000
59.5
LLWDAL
60
12
15
27
115
1427
55866
Antoine Roussel
2200000
3
90-21-11SWE
71
185
2009
53
LRWVAN
3
0
1
1
0
40
1604
Anton Rodin
950000
4
86-01-08SWE
71
190
2005
216
RDT.B
73
5
17
22
20
1963
100304
Anton Stralman
4500000
5
96-26-11USA
77
203
2015
37
RDBOS
82
6
10
16
59
2080
102414
Brandon Carlo
717500
6
90-16-07USA
75
221
2000
59.5
LDDET
5
0
0
0
0
93
3754
Brian Lashoff
650000
7
96-25-05CZE
72
181
2014
25
RRW/LWBOS
75
34
36
70
34
1597
80921
David Pastrnak
925000
8
90-20-06USA
75
221
2008
128
RDDAL/MTL
36
1
8
9
10
720
33312
Greg Pateryn
750000
9
94-01-05USA
74
205
2012
120
LDCAR
82
5
29
34
12
2135
115316
Jaccob Slavin
742500
10
92-20-03USA
72
215
2010
37
RDCAR
75
17
20
37
32
1987
104133
Justin Faulk
5500000
11
88-16-04USA
72
218
2006
7
RRWBUF
65
19
26
45
24
1443
73983
Kyle Okposo
8000000
12
94-23-03SWE
75
209
2012
81
RCPIT
10
0
0
0
2
138
5504
Oskar Sundqvist
792500
13
86-09-08USA
71
197
2000
59.5
RCMIN
3
0
0
0
0
35
1419
Patrick Cannone
600000
14
90-05-06CZE
72
205
2010
66
RDPHI
67
6
17
23
93
1739
77555
Radko Gudas
4000000
15
90-01-06CHE
73
201
2008
38
LDNSH
72
12
37
49
18
2076
108323
Roman Josi
4250000
16
86-28-04CZE
74
236
2004
180
RDTOR
75
4
7
11
65
1817
80643
Roman Polak
2250000
17
90-08-10USA
76
210
2009
114
LDN.J
9
1
0
1
15
177
7273
Seth Helgeson
600000
18
90-16-11USA
74
219
2000
59.5
LCBOS
59
7
7
14
23
1035
43436
Tim Schaller
600000
" + "text/html": "
indexBirthdayNatHeightWeightDraftYearOverallDraftHandPositionTeamGamesPlayedGoalsAssistsPointsPIMShiftsTimeOnIceFullNameSalary
0
92-12-11SWE
75
216
2011
4
RDEDM
79
4
15
19
55
2055
95509
Adam Larsson
3000000
1
90-29-11CZE
79
220
2000
59.5
RDT.B
80
3
11
14
43
1813
84427
Andrej Sustr
1600000
2
89-21-11FRA
72
201
2000
59.5
LLWDAL
60
12
15
27
115
1427
55866
Antoine Roussel
2200000
3
90-21-11SWE
71
185
2009
53
LRWVAN
3
0
1
1
0
40
1604
Anton Rodin
950000
4
86-01-08SWE
71
190
2005
216
RDT.B
73
5
17
22
20
1963
100304
Anton Stralman
4500000
5
96-26-11USA
77
203
2015
37
RDBOS
82
6
10
16
59
2080
102414
Brandon Carlo
717500
6
90-16-07USA
75
221
2000
59.5
LDDET
5
0
0
0
0
93
3754
Brian Lashoff
650000
7
96-25-05CZE
72
181
2014
25
RRW/LWBOS
75
34
36
70
34
1597
80921
David Pastrnak
925000
8
90-20-06USA
75
221
2008
128
RDDAL/MTL
36
1
8
9
10
720
33312
Greg Pateryn
750000
9
94-01-05USA
74
205
2012
120
LDCAR
82
5
29
34
12
2135
115316
Jaccob Slavin
742500
10
92-20-03USA
72
215
2010
37
RDCAR
75
17
20
37
32
1987
104133
Justin Faulk
5500000
11
88-16-04USA
72
218
2006
7
RRWBUF
65
19
26
45
24
1443
73983
Kyle Okposo
8000000
12
94-23-03SWE
75
209
2012
81
RCPIT
10
0
0
0
2
138
5504
Oskar Sundqvist
792500
13
86-09-08USA
71
197
2000
59.5
RCMIN
3
0
0
0
0
35
1419
Patrick Cannone
600000
14
90-05-06CZE
72
205
2010
66
RDPHI
67
6
17
23
93
1739
77555
Radko Gudas
4000000
15
90-01-06CHE
73
201
2008
38
LDNSH
72
12
37
49
18
2076
108323
Roman Josi
4250000
16
86-28-04CZE
74
236
2004
180
RDTOR
75
4
7
11
65
1817
80643
Roman Polak
2250000
17
90-08-10USA
76
210
2009
114
LDN.J
9
1
0
1
15
177
7273
Seth Helgeson
600000
18
90-16-11USA
74
219
2000
59.5
LCBOS
59
7
7
14
23
1035
43436
Tim Schaller
600000
" }, "execution_count": 1, "metadata": {} From 5b8de9d0737764e8d0e6fa51d8c7fd2cb8f61804 Mon Sep 17 00:00:00 2001 From: Jake Radzikowski Date: Mon, 18 Jul 2022 21:30:28 -0700 Subject: [PATCH 2/2] Fix Taxi Notebook --- .../E2E-Regression with Taxi Dataset.ipynb | 182 +++--------------- 1 file changed, 26 insertions(+), 156 deletions(-) diff --git a/machine-learning/E2E-Regression with Taxi Dataset.ipynb b/machine-learning/E2E-Regression with Taxi Dataset.ipynb index c602dc3..a1cfa23 100644 --- a/machine-learning/E2E-Regression with Taxi Dataset.ipynb +++ b/machine-learning/E2E-Regression with Taxi Dataset.ipynb @@ -49,9 +49,7 @@ { "output_type": "execute_result", "data": { - "text/html": [ - "
Restore sources
  • https://pkgs.dev.azure.com/dnceng/public/_packaging/MachineLearning/nuget/v3/index.json
Installed Packages
  • Microsoft.Data.Analysis, 0.20.0-preview.22356.1
  • Microsoft.ML.AutoML, 0.20.0-preview.22356.1
  • Plotly.NET.CSharp, 0.0.1
  • Plotly.NET.Interactive, 3.0.2
" - ] + "text/html": "
Restore sources
  • https://pkgs.dev.azure.com/dnceng/public/_packaging/MachineLearning/nuget/v3/index.json
Installed Packages
  • Microsoft.Data.Analysis, 0.20.0-preview.22356.1
  • Microsoft.ML.AutoML, 0.20.0-preview.22356.1
  • Plotly.NET.CSharp, 0.0.1
  • Plotly.NET.Interactive, 3.0.2
" }, "execution_count": 1, "metadata": {} @@ -59,9 +57,7 @@ { "output_type": "execute_result", "data": { - "text/markdown": [ - "Loading extensions from `Plotly.NET.Interactive.dll`" - ] + "text/markdown": "Loading extensions from `Microsoft.Data.Analysis.Interactive.dll`" }, "execution_count": 1, "metadata": {} @@ -69,9 +65,7 @@ { "output_type": "execute_result", "data": { - "text/markdown": [ - "Loading extensions from `Microsoft.ML.AutoML.Interactive.dll`" - ] + "text/markdown": "Loading extensions from `Plotly.NET.Interactive.dll`" }, "execution_count": 1, "metadata": {} @@ -79,9 +73,7 @@ { "output_type": "execute_result", "data": { - "text/markdown": [ - "Loading extensions from `Microsoft.Data.Analysis.Interactive.dll`" - ] + "text/markdown": "Loading extensions from `Microsoft.ML.AutoML.Interactive.dll`" }, "execution_count": 1, "metadata": {} @@ -186,7 +178,7 @@ "\n", " var experiment = mlContext.Auto().CreateExperiment()\n", " .SetPipeline(pipeline)\n", - " .SetTrainingTimeInSeconds(50)\n", + " .SetTrainingTimeInSeconds(10)\n", " .SetDataset(trainTestSplit.TrainSet, validateTestSplit.TrainSet)\n", " .SetEvaluateMetric(RegressionMetric.RSquared, \"fare_amount\", \"Score\")\n", "\t\t\t\t\t.SetMonitor(monitor);\n", @@ -199,12 +191,20 @@ ], "outputs": [ { - "output_type": "error", - "ename": "", - "evalue": "(3,10): error CS0103: The name 'DataFrame' does not exist in the current context\r\n(4,21): error CS0246: The type or namespace name 'MLContext' could not be found (are you missing a using directive or an assembly reference?)\r\n(13,19): error CS0246: The type or namespace name 'NotebookMonitor' could not be found (are you missing a using directive or an assembly reference?)\r\n(19,40): error CS0103: The name 'RegressionMetric' does not exist in the current context", - "traceback": [ - null - ] + "output_type": "execute_result", + "data": { + "text/plain": "taxi-fare.csv found here: C:\\dev\\csharp-notebooks\\machine-learning\\data\\taxi-fare.csv\r\n" + }, + "execution_count": 1, + "metadata": {} + }, + { + "output_type": "execute_result", + "data": { + "text/html": "

Best Trial

Id: 23

Trainer: ReplaceMissingValues=>OneHotHashEncoding=>Concatenate=>FastTreeRegression

Parameters: {\r\n "0": {\r\n "OutputColumnNames": [\r\n "rate_code",\r\n "passenger_count",\r\n "trip_time_in_secs",\r\n "trip_distance"\r\n ],\r\n "InputColumnNames": [\r\n "rate_code",\r\n "passenger_count",\r\n "trip_time_in_secs",\r\n "trip_distance"\r\n ]\r\n },\r\n "1": {\r\n "OutputColumnNames": [\r\n "vendor_id",\r\n "payment_type"\r\n ],\r\n "InputColumnNames": [\r\n "vendor_id",\r\n "payment_type"\r\n ]\r\n },\r\n "2": {\r\n "InputColumnNames": [\r\n "rate_code",\r\n "passenger_count",\r\n "trip_time_in_secs",\r\n "trip_distance",\r\n "vendor_id",\r\n "payment_type"\r\n ],\r\n "OutputColumnName": "Features"\r\n },\r\n "3": {\r\n "NumberOfLeaves": 6,\r\n "MinimumExampleCountPerLeaf": 26,\r\n "NumberOfTrees": 18,\r\n "MaximumBinCountPerFeature": 238,\r\n "FeatureFraction": 0.99999999,\r\n "LearningRate": 0.9999997766729865,\r\n "LabelColumnName": "fare_amount",\r\n "FeatureColumnName": "Features"\r\n }\r\n}

Active Trial

Id: 27

Trainer: ReplaceMissingValues=>OneHotHashEncoding=>Concatenate=>LbfgsPoissonRegressionRegression

Parameters: {\r\n "0": {\r\n "OutputColumnNames": [\r\n "rate_code",\r\n "passenger_count",\r\n "trip_time_in_secs",\r\n "trip_distance"\r\n ],\r\n "InputColumnNames": [\r\n "rate_code",\r\n "passenger_count",\r\n "trip_time_in_secs",\r\n "trip_distance"\r\n ]\r\n },\r\n "1": {\r\n "OutputColumnNames": [\r\n "vendor_id",\r\n "payment_type"\r\n ],\r\n "InputColumnNames": [\r\n "vendor_id",\r\n "payment_type"\r\n ]\r\n },\r\n "2": {\r\n "InputColumnNames": [\r\n "rate_code",\r\n "passenger_count",\r\n "trip_time_in_secs",\r\n "trip_distance",\r\n "vendor_id",\r\n "payment_type"\r\n ],\r\n "OutputColumnName": "Features"\r\n },\r\n "3": {\r\n "L1Regularization": 0.03125,\r\n "L2Regularization": 0.7539817,\r\n "LabelColumnName": "fare_amount",\r\n "FeatureColumnName": "Features"\r\n }\r\n}

Plot Metrics over Trials

\n
\n
\r\n\r\n\n \n
\n

All Trials Table

DataFrame - 27 rows

indexTrialMetricTrainerParameters
0
0
0.9191953
ReplaceMissingValues=>OneHotEncoding=>Concatenate=>FastForestRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"NumberOfTrees":4,"NumberOfLeaves":4,"FeatureFraction":1,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
1
1
0.9191953
ReplaceMissingValues=>OneHotHashEncoding=>Concatenate=>FastForestRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"NumberOfTrees":4,"NumberOfLeaves":4,"FeatureFraction":1,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
2
2
0.87440413
ReplaceMissingValues=>OneHotHashEncoding=>Concatenate=>LightGbmRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"NumberOfLeaves":4,"MinimumExampleCountPerLeaf":20,"LearningRate":1,"NumberOfTrees":4,"SubsampleFraction":1,"MaximumBinCountPerFeature":255,"FeatureFraction":1,"L1Regularization":2E-10,"L2Regularization":1,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
3
3
-0.20010807
ReplaceMissingValues=>OneHotHashEncoding=>Concatenate=>FastTreeRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"NumberOfLeaves":4,"MinimumExampleCountPerLeaf":20,"NumberOfTrees":4,"MaximumBinCountPerFeature":255,"FeatureFraction":1,"LearningRate":0.09999999999999998,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
4
4
0.69277
ReplaceMissingValues=>OneHotEncoding=>Concatenate=>SdcaRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"L1Regularization":1,"L2Regularization":0.1,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
5
5
-0.20007557
ReplaceMissingValues=>OneHotEncoding=>Concatenate=>FastTreeRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"NumberOfLeaves":4,"MinimumExampleCountPerLeaf":20,"NumberOfTrees":4,"MaximumBinCountPerFeature":254,"FeatureFraction":1,"LearningRate":0.09999999999999998,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
6
6
0.6884077
ReplaceMissingValues=>OneHotHashEncoding=>Concatenate=>SdcaRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"L1Regularization":1,"L2Regularization":0.1,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
7
7
-2.490156
ReplaceMissingValues=>OneHotHashEncoding=>Concatenate=>LbfgsPoissonRegressionRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"L1Regularization":1,"L2Regularization":1,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
8
8
0.8733287
ReplaceMissingValues=>OneHotEncoding=>Concatenate=>LightGbmRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"NumberOfLeaves":4,"MinimumExampleCountPerLeaf":20,"LearningRate":1,"NumberOfTrees":4,"SubsampleFraction":1,"MaximumBinCountPerFeature":254,"FeatureFraction":1,"L1Regularization":2E-10,"L2Regularization":1,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
9
9
0.9233686
ReplaceMissingValues=>OneHotHashEncoding=>Concatenate=>FastForestRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"NumberOfTrees":11,"NumberOfLeaves":4,"FeatureFraction":1,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
10
10
0.9191953
ReplaceMissingValues=>OneHotEncoding=>Concatenate=>FastForestRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"NumberOfTrees":4,"NumberOfLeaves":4,"FeatureFraction":1,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
11
11
-2.4718552
ReplaceMissingValues=>OneHotEncoding=>Concatenate=>LbfgsPoissonRegressionRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"L1Regularization":1,"L2Regularization":1,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
12
12
0.8746236
ReplaceMissingValues=>OneHotHashEncoding=>Concatenate=>LightGbmRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"NumberOfLeaves":4,"MinimumExampleCountPerLeaf":20,"LearningRate":0.9999997766729865,"NumberOfTrees":4,"SubsampleFraction":0.9999997766729865,"MaximumBinCountPerFeature":154,"FeatureFraction":0.99999999,"L1Regularization":3.379681956443246E-10,"L2Regularization":0.9999997766729865,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
13
13
0.6880244
ReplaceMissingValues=>OneHotHashEncoding=>Concatenate=>FastTreeRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"NumberOfLeaves":4,"MinimumExampleCountPerLeaf":39,"NumberOfTrees":6,"MaximumBinCountPerFeature":305,"FeatureFraction":0.9837840191521533,"LearningRate":0.20718877532322955,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
14
14
0.67887706
ReplaceMissingValues=>OneHotEncoding=>Concatenate=>SdcaRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"L1Regularization":2.1625226,"L2Regularization":1.4341433,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
15
15
0.8612328
ReplaceMissingValues=>OneHotEncoding=>Concatenate=>FastTreeRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"NumberOfLeaves":4,"MinimumExampleCountPerLeaf":19,"NumberOfTrees":4,"MaximumBinCountPerFeature":122,"FeatureFraction":0.99999999,"LearningRate":0.5090129633464775,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
16
16
0.68568695
ReplaceMissingValues=>OneHotHashEncoding=>Concatenate=>SdcaRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"L1Regularization":0.06310504,"L2Regularization":0.079379335,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
17
17
-2.4738095
ReplaceMissingValues=>OneHotHashEncoding=>Concatenate=>LbfgsPoissonRegressionRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"L1Regularization":0.07517863,"L2Regularization":2.7048025,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
18
18
0.9222903
ReplaceMissingValues=>OneHotHashEncoding=>Concatenate=>FastForestRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"NumberOfTrees":59,"NumberOfLeaves":31,"FeatureFraction":1,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
19
19
0.9215755
ReplaceMissingValues=>OneHotEncoding=>Concatenate=>LightGbmRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"NumberOfLeaves":4,"MinimumExampleCountPerLeaf":21,"LearningRate":0.7199164789739659,"NumberOfTrees":10,"SubsampleFraction":0.4249173292039678,"MaximumBinCountPerFeature":216,"FeatureFraction":0.99999999,"L1Regularization":2.893108729767549E-09,"L2Regularization":0.5629755267771762,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
20
20
0.90774864
ReplaceMissingValues=>OneHotEncoding=>Concatenate=>FastForestRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"NumberOfTrees":4,"NumberOfLeaves":13,"FeatureFraction":0.8620007,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
21
21
-2.463893
ReplaceMissingValues=>OneHotEncoding=>Concatenate=>LbfgsPoissonRegressionRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"L1Regularization":0.73406476,"L2Regularization":15.725734,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
22
22
0.91161895
ReplaceMissingValues=>OneHotHashEncoding=>Concatenate=>LightGbmRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"NumberOfLeaves":4,"MinimumExampleCountPerLeaf":36,"LearningRate":0.9999997766729865,"NumberOfTrees":10,"SubsampleFraction":0.09153406435524249,"MaximumBinCountPerFeature":104,"FeatureFraction":0.9836223114153516,"L1Regularization":6.761285733176898E-09,"L2Regularization":0.06779600816862573,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
23
23
0.94230247
ReplaceMissingValues=>OneHotHashEncoding=>Concatenate=>FastTreeRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"NumberOfLeaves":6,"MinimumExampleCountPerLeaf":26,"NumberOfTrees":18,"MaximumBinCountPerFeature":238,"FeatureFraction":0.99999999,"LearningRate":0.9999997766729865,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
24
24
0.7748336
ReplaceMissingValues=>OneHotEncoding=>Concatenate=>SdcaRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"L1Regularization":0.46242294,"L2Regularization":0.03125,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
25
25
0.8846048
ReplaceMissingValues=>OneHotEncoding=>Concatenate=>FastTreeRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"NumberOfLeaves":4,"MinimumExampleCountPerLeaf":13,"NumberOfTrees":5,"MaximumBinCountPerFeature":183,"FeatureFraction":0.9854389369700658,"LearningRate":0.9999997766729865,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
26
26
-0.011939165
ReplaceMissingValues=>OneHotHashEncoding=>Concatenate=>SdcaRegression{"0":{"OutputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"],"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance"]},"1":{"OutputColumnNames":["vendor_id","payment_type"],"InputColumnNames":["vendor_id","payment_type"]},"2":{"InputColumnNames":["rate_code","passenger_count","trip_time_in_secs","trip_distance","vendor_id","payment_type"],"OutputColumnName":"Features"},"3":{"L1Regularization":15.846596,"L2Regularization":0.12597738,"LabelColumnName":"fare_amount","FeatureColumnName":"Features"}}
Page1
" + }, + "execution_count": 1, + "metadata": {} } ] }, @@ -250,9 +250,7 @@ { "output_type": "execute_result", "data": { - "text/html": [ - "
indexvalue
0
8.47422
" - ] + "text/html": "
indexvalue
0
7.7455807
" }, "execution_count": 1, "metadata": {} @@ -284,9 +282,7 @@ { "output_type": "execute_result", "data": { - "text/html": [ - "
MeanAbsoluteErrorMeanSquaredErrorRootMeanSquaredErrorLossFunctionRSquared
0.9971379424072447
7.954096814346052
2.8203008375607825
7.954096719333472
0.913846968172443
" - ] + "text/html": "
MeanAbsoluteErrorMeanSquaredErrorRootMeanSquaredErrorLossFunctionRSquared
0.5649874126987798
5.622860574802078
2.3712571717977107
5.622860676498588
0.9390972353782363
" }, "execution_count": 1, "metadata": {} @@ -359,38 +355,7 @@ { "output_type": "execute_result", "data": { - "text/html": [ - "\n", - "
\n", - "
\r\n", - "\r\n", - "\n", - " \n", - "
\n" - ] + "text/html": "\n
\n
\r\n\r\n\n \n
\n" }, "execution_count": 1, "metadata": {} @@ -439,38 +404,7 @@ { "output_type": "execute_result", "data": { - "text/html": [ - "\n", - "
\n", - "
\r\n", - "\r\n", - "\n", - " \n", - "
\n" - ] + "text/html": "\n
\n
\r\n\r\n\n \n
\n" }, "execution_count": 1, "metadata": {} @@ -527,9 +461,7 @@ { "output_type": "execute_result", "data": { - "text/html": [ - "
indexFeatureR-Squared Impact
0
trip_distance
0.6755704315755394
1
rate_code
0.4330715253461801
2
trip_time_in_secs
0.30444644630706363
3
vendor_id.Bit0
0
4
payment_type.Bit3
0
5
vendor_id.Bit1
0
6
payment_type.Bit1
0
7
payment_type.Bit0
0
8
vendor_id.Bit2
0
9
passenger_count
0
10
payment_type.Bit2
0
" - ] + "text/html": "
indexFeatureR-Squared Impact
0
trip_distance
0.44412661810471143
1
rate_code
0.3546294277483064
2
trip_time_in_secs
0.30483750790979536
3
payment_type.CRD
0.02611318647718497
4
payment_type.CSH
0.018381401206037262
5
vendor_id.VTS
0.0004934284459933158
6
payment_type.UNK
9.858866455236814E-05
7
payment_type.DIS
1.1335374079712857E-05
8
vendor_id.CMT
0
9
passenger_count
0
10
payment_type.NOC
0
" }, "execution_count": 1, "metadata": {} @@ -554,38 +486,7 @@ { "output_type": "execute_result", "data": { - "text/html": [ - "\n", - "
\n", - "
\r\n", - "\r\n", - "\n", - " \n", - "
\n" - ] + "text/html": "\n
\n
\r\n\r\n\n \n
\n" }, "execution_count": 1, "metadata": {} @@ -614,38 +515,7 @@ { "output_type": "execute_result", "data": { - "text/html": [ - "\n", - "
\n", - "
\r\n", - "\r\n", - "\n", - " \n", - "
\n" - ] + "text/html": "\n
\n
\r\n\r\n\n \n
\n" }, "execution_count": 1, "metadata": {}