-
Notifications
You must be signed in to change notification settings - Fork 0
/
Trying Automated ML - Zindi.html
110 lines (102 loc) · 34 KB
/
Trying Automated ML - Zindi.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
<!DOCTYPE html>
<!-- saved from url=(0049)https://zindi.africa/learning/trying-automated-ml -->
<html lang="en" class="wf-nunito-n3-active wf-nunito-n4-active wf-active" inmaintabuse="jlb"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<!-- nextgen -->
<meta http-equiv="x-ua-compatible" content="ie=edge">
<meta name="viewport" content="width=device-width, initial-scale=1, viewport-fit=cover">
<link rel="apple-touch-icon-precomposed" sizes="57x57" href="https://assets.zindi.africa/apple-touch-icon-57x57.png">
<link rel="apple-touch-icon-precomposed" sizes="114x114" href="https://assets.zindi.africa/apple-touch-icon-114x114.png">
<link rel="apple-touch-icon-precomposed" sizes="72x72" href="https://assets.zindi.africa/apple-touch-icon-72x72.png">
<link rel="apple-touch-icon-precomposed" sizes="144x144" href="https://assets.zindi.africa/apple-touch-icon-144x144.png">
<link rel="apple-touch-icon-precomposed" sizes="120x120" href="https://assets.zindi.africa/apple-touch-icon-120x120.png">
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="https://assets.zindi.africa/apple-touch-icon-152x152.png">
<link rel="icon" type="image/png" href="https://assets.zindi.africa/favicon-32x32.png" sizes="32x32">
<link rel="icon" type="image/png" href="https://assets.zindi.africa/favicon-16x16.png" sizes="16x16">
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:site" content="@ZindiAfrica">
<meta name="application-name" content=" ">
<meta name="msapplication-TileColor" content="#FFFFFF">
<meta name="msapplication-TileImage" content="https://assets.zindi.africa/mstile-144x144.png">
<script async="" src="./Trying Automated ML - Zindi_files/tag.js"></script><script async="" src="./Trying Automated ML - Zindi_files/gtm.js"></script><script src="./Trying Automated ML - Zindi_files/webfont.js" async=""></script><script type="text/javascript">
var WebFontConfig = {
google: {
families: ["Nunito:300,400"],
},
timeout: 2000,
}
;(function (d) {
var h = d.documentElement
var onerror = function () {
h.className += "wf-inactive"
}
var st = setTimeout(onerror, 1000)
h.className += "wf-loading"
var wf = d.createElement("script"),
s = d.scripts[0]
wf.src = "https://ajax.googleapis.com/ajax/libs/webfont/1.6.26/webfont.js"
wf.async = true
wf.onerror = onerror
wf.onload = function () {
clearTimeout(st)
}
s.parentNode.insertBefore(wf, s)
})(document)
</script>
<title>Trying Automated ML - Zindi</title><meta data-react-helmet="true" name="description" content="Zindi is a data science competition platform with the mission of building the data science ecosystem in Africa. Zindi hosts a community of data scientists dedicated to solving the continent's most pressing problems through machine learning and artificial intelligence."><meta data-react-helmet="true" property="og:site_name" content="Zindi"><meta data-react-helmet="true" property="twitter:site_name" content="Zindi"><meta data-react-helmet="true" property="og:title" content="Trying Automated ML"><meta data-react-helmet="true" property="og:description" content="Some students had asked me for my opinion on automated tools for machine learning. The thought occurred to me that I hadn’t done much with them recently, and it was about time I gave the much-hyped time-savers a go – after all, aren’t they going to make data scientists like me redundant?"><meta data-react-helmet="true" property="og:image" content="https://zindpublic.blob.core.windows.net/public/uploads/blog_post/image/27/big_thumb_1dd2654a-f888-44f3-874e-90fc75767259.jpeg"><meta data-react-helmet="true" property="twitter:title" content="Trying Automated ML"><meta data-react-helmet="true" property="twitter:description" content="Some students had asked me for my opinion on automated tools for machine learning. The thought occurred to me that I hadn’t done much with them recently, and it was about time I gave the much-hyped time-savers a go – after all, aren’t they going to make data scientists like me redundant?"><meta data-react-helmet="true" property="twitter:image" content="https://zindpublic.blob.core.windows.net/public/uploads/blog_post/image/27/big_thumb_1dd2654a-f888-44f3-874e-90fc75767259.jpeg">
<script>
;(function (w, d, s, l, i) {
w[l] = w[l] || []
w[l].push({ "gtm.start": new Date().getTime(), event: "gtm.js" })
var f = d.getElementsByTagName(s)[0],
j = d.createElement(s),
dl = l != "dataLayer" ? "&l=" + l : ""
j.async = true
j.src = "https://www.googletagmanager.com/gtm.js?id=" + i + dl
f.parentNode.insertBefore(j, f)
})(window, document, "script", "dataLayer", "GTM-KRG85D8")
</script>
<link rel="stylesheet" href="./Trying Automated ML - Zindi_files/css" media="all"><link href="./Trying Automated ML - Zindi_files/bundle.d57dc7d8a01460a702c7.css" rel="stylesheet"></head>
<body _c_t_common="1" data-new-gr-c-s-check-loaded="14.1020.0" data-gr-ext-installed="">
<noscript> You need to enable JavaScript to run this app. </noscript>
<div id="app"><div class="App__container___fj0c9"><div class="App__section___1nGff"></div><div class="App__section___1nGff"><div class="App__contained___3emDO"><div class="Header__container___3FtbH"><a href="https://zindi.africa/"><div class="Header__logo___1eRaO">Zindi</div></a><div class="Header__menuContainer___2izgT"><div class="Menu__container___1sjgb"><a class="Menu__link___3x4C4" href="https://zindi.africa/competitions"><span class="Menu__linkInner___3LB5N">Compete</span></a><a aria-current="page" class="Menu__link___3x4C4 Menu__activeLink___1MA6r" href="https://zindi.africa/learning"><span class="Menu__linkInner___3LB5N">Learn</span></a><a class="Menu__link___3x4C4" href="https://zindi.africa/jobs"><span class="Menu__linkInner___3LB5N">Find a Job</span></a></div><div class="Menu__container___1sjgb Header__menuRight___3wiDe Menu__justifyRight___3r6Ws"><a class="Menu__link___3x4C4" href="https://zindi.africa/inbox"><span class="Menu__linkInner___3LB5N"><div class="Inbox__container___3xHCr"><svg class="Inbox__messagesIcon___2XP8V" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M4 4h16c1.1 0 2 .9 2 2v12c0 1.1-.9 2-2 2H4c-1.1 0-2-.9-2-2V6c0-1.1.9-2 2-2z"></path><polyline points="22,6 12,13 2,6"></polyline></svg><div class="Inbox__unseen___31NED"></div></div></span></a></div></div><div class="Header__userMenu___2iOts"><div class="UserMenu__container___ypkko"><button class="Button__base___NhksY Button__blank-normal___1nB5F UserMenu__user___a0zJo"><span class="Button__inner___3jkeF"><span class="User__container___18HoF User__size-normal___26ZPA"><img class="User__avatar___6aNx2" src="./Trying Automated ML - Zindi_files/thumb.default.png" alt=""><span class="User__username___64PE2">Glencode</span></span></span></button><div class=""></div></div></div></div></div></div><div class="App__section___1nGff App__content___WFkDX"><div class="WithSubheader__container___3qd5U"><div class="WithSubheader__header___2o1oX WithSubheader__withHeader___35ECw"><div class="BlogPost__headerImage___2fAz4" style="background-image: url("https://zindpublic.blob.core.windows.net/public/uploads/blog_post/image/27/header_1dd2654a-f888-44f3-874e-90fc75767259.jpeg");"></div></div><div><div class="App__contained___3emDO"><div class="Paper__paper___2M-1R Paper__padding-1___3sKLR BlogPost__paper___1D3Be"><div class="BlogPost__date___3BhZy">30 Aug 2019, 11:16</div><h2 class="BlogPost__title___RUU5Z">Trying Automated ML</h2><div class="Html__container___1AJFz BlogPost__intro___31fc-"><p>Some students had asked me for my opinion on automated tools for machine learning. The thought occurred to me that I hadn’t done much with them recently, and it was about time I gave the much-hyped time-savers a go – after all, aren’t they going to make data scientists like me redundant?</p><p>In today’s post, I’ll be trying out <a href="https://cloud.google.com/automl/" target="_blank" rel="noreferrer noopener">Google’s AutoML tool</a> by throwing various datasets at it and seeing how well it does. To make things interesting, the datasets I’ll be using will be from Zindi competitions, letting us see where AutoML would rank on the player leader-board. I should note that these experiments are a learning exercise, and actually using AutoML to win contests is almost certainly against the rules. But with that caveat out the way, let’s get started!</p><h3>How it works</h3><p>AutoML (and other similar tools) aims to automate one step of the ML pipeline – that of model selection and tuning. You give it a dataset to work on, specify column types, choose an output column and specify how long you’d like it to train for (you pay per hour). Then sit back and wait. Behind the scenes, AutoML tries many different models and slowly optimizes network architecture, parameters, weights… essentially everything that one could possibly tweak to improve performance gets tweaked. At the end of it, you get a (very complicated) model that you can then deploy with their services or use to make batch predictions.</p><p></p><div class="image">
<img src="./Trying Automated ML - Zindi_files/7ab1f8bb-1f20-4f19-a3f6-05169fad7063.png"> </div><p></p><p><span style="font-style: italic;" class="">Figure 1. The first step with AutoML tables – Importing the data.</span></p><p>The resultant models are fairly complex (mine were ~1GB each fully trained) and are not something you can simply download and use locally – you must deploy them via Google (for an extra fee). This, coupled with the cost of training models, makes it fairly expensive to experiment with if you use up your trial credits – so use them wisely.</p><p>Fortunately, there are other ways to achieve broadly the same result. For example, AutoKeras. Read more about that <a href="https://towardsdatascience.com/autokeras-the-killer-of-googles-automl-9e84c552a319" target="_blank" rel="noreferrer noopener">here</a>.</p><h3>Experiment 1: <a href="http://zindi.africa/competitions/farm-pin-crop-detection-challenge" target="_blank" rel="noreferrer noopener">Farm Pin Crop Detection</a></h3><p>This competition involves a classification problem, with the goal being to predict which crop is present in a given field. The training data is provided as field outlines and satellite images – not something that can effortlessly slot into AutoML tables. This meant that the first step was to sample the image bands for the different fields, and export the values to a CSV files for later analysis (as described in <a href="https://datasciencecastnet.home.blog/2019/06/27/tutorial-improving-crop-type-predictions/" target="_blank" rel="noreferrer noopener">this post</a>). This done, I uploaded the resultant training file to cloud storage, selected the table, chose my input and output columns and hit go.</p><p></p><div class="image">
<img src="./Trying Automated ML - Zindi_files/e1eacf50-1971-4946-bdde-90a2d2fa3c14.png"> </div><p></p><p><span style="font-style: italic;" class="">Figure 2. AutoML ‘Evaluate’ tab showing model performance.</span></p><p>The scoring metric for this competition is log loss. My previous best (using the same training data to train a random forest model) scored around 0.64 (~20th on the leaderboard). So a score of <0.6 looked promising. I uploaded the test set, hit predict and then manually cleaned up the output to match the submission format for Zindi. Score? <span style="font-weight: bold;" class="">0.546, putting me in 12th place.</span> No feature engineering besides sampling some satellite images, no manual tweaking of model parameters…. not bad! This was before the rules for Farm Pin were updated. </p><p>I was quite pleased with this result. I enjoy the feature engineering side of things, but the tedium of hyper-parameter tuning is less appealing to me. If this tool can magically let me skip that step, it’s a win in my book! I may re-visit this with some added features, information from more images and perhaps a trick or two to enlarge the training set.</p><h3>Experiment 2: <a href="http://zindi.africa/competitions/traffic-jam-predicting-peoples-movement-into-nairobi" target="_blank" rel="noreferrer noopener">Traffic Jam</a></h3><p>Spurred on by the first success, I turned to the Traffic Jam competition since I still had the dataset on my laptop. This was a regression problem, with the goal being to predict the number of tickets sold for a given trip into Nairobi. The training data was fairly sparse, with only ~2000 rows to work from. Still, I figured it was worth a shot and threw a few node hours worth of Google-managed ML magic at the problem.</p><p></p><div class="image">
<img src="./Trying Automated ML - Zindi_files/086f3f68-759e-4219-b472-286db11680f9.png"> </div><p></p><p><span style="font-style: italic;" class="">An MAE of 3.4, hypothetically equivalent to ~3rd place!</span></p><p>The evaluation results had me excited – and MAE of 3.4 would have placed the model in third place had the competition remained open. I hastily uploaded the predictions to Zindi, to see the score of… 5.3 (160th place). Now, I might be missing some glaring error in the way I formatted predictions for upload, but I suspect that the issue is with AutoML. It’s not really designed for such small datasets. From the website: “Depending on how many features your dataset has, 1,000 rows might not be enough to train a high-performing model.” The impressive MAE shown in the results tab is for one particular test set, and it seems that for the Zindi test set we were simply not as lucky. Another potential factor: The random test set will have sampled from the same date range as the training data, whereas the Zindi test set was for a different time period. In cases like this, a non-shuffled test/train split can be a better indicator of true performance.</p><p>So, we’ve learnt something new! The magic tool isn’t magic, and just like any other method it needs good training data to make good predictions.</p><h3>Experiment 3: <a href="http://zindi.africa/competitions/sendy-logistics-challenge" target="_blank" rel="noreferrer noopener">Sendy</a></h3><p>I couldn’t resist trying it out once more on the newly launched Sendy Competition. I merged the Riders info into the train and test sets, uploaded the data, gave it an hour of training time and set it going. The goal is to minimize RMSE when predicting travel time between two locations (for deliveries). I also did some modelling myself while I waited for the AutoML training to finish.</p><p>Scores (RMSE for predicted time in seconds)
My first attempt (Catboost on provided data): 734 (7th place when this post was written)
First place: 721
Google AutoML: <span style="font-weight: bold;" class="">724 (4th place until I convince them to remove my latest entry</span>)</p><p>Not too shabby! To me, one of the great uses of a tool like this is to give a ballpark for what a good model looks like. Without the Zindi leaderboard, I wouldn’t have a way to gauge my model performance. Is it good? Could it get better with the same data? Now I can compare to the AutoML, using it as a ‘probably close to best’ measure.</p><h3>Where next?</h3><p>These quick tests have convinced me that these automated tools can be a useful part of my workflow, but are not a complete replacement for manual experimentation, exploration, feature engineering and modelling. I intend to play around more with AutoML and other tools in the near future, so stay tuned for a continuation of this series.</p><h3>Try your hand at a Zindi problem</h3><p>Johno has shown you what auto ml can do but what can you do? Sign up to <a href="http://zindi.africa/" target="_blank" rel="noreferrer noopener">Zindi</a> and enter the Sendy challenge. You can stand a chance to win prizes to the value of $8,000. </p><h3>About the author</h3><p>Jonathan Whitaker (<a href="http://zindi.africa/users/Johnowhitaker" target="_blank" rel="noreferrer noopener">Johnowhitaker</a>) enjoys playing with data, writing tutorials and sharing his findings as he trawls through the world of data science. Johno was the 2nd Zindian to join Zindi and we are grateful for his contributions to the platform. </p><p>You can read the original blog post here: <a href="https://datasciencecastnet.home.blog/2019/08/27/trying-automated-ml/" target="_blank" rel="noreferrer noopener">https://datasciencecastnet.home.blog/2019/08/27/trying-automated-ml/</a></p><p>Johno has a ‘tutorials’ section and a datasets page cataloging useful sources from around the web and any new data he has generated.</p><p></p></div></div></div></div></div></div><div class="App__section___1nGff"><div class="Footer__container___3vGXM"><div class="App__contained___3emDO"><div class="Footer__links___dDoS-"><div class="Footer__column___1yO21"><div><a href="https://zindi.africa/competitions">Competitions</a></div><div><a href="https://zindi.africa/hackathons">Hackathons</a></div><div><a href="https://zindi.africa/data_scientists">Data Scientists</a></div><div><a href="https://zindi.africa/discussions">Discussions</a></div><div><a href="https://zindi.africa/jobs">Jobs Board</a></div></div><div class="Footer__column___1yO21"><div><a href="https://zindi.africa/hosting_competition">Host competition</a></div><div><a href="https://zindi.africa/about">About Us</a></div><div><a href="https://zindi.africa/partners">Our Partners</a></div><div><a href="https://zindi.africa/contact_us">Contact Us</a></div></div><div class="Footer__column___1yO21"><div><a href="https://zindi.africa/rules">Rules and Guidelines</a></div><div><a href="https://zindi.africa/terms">Terms of Use</a></div><div><a href="https://zindi.africa/privacy">Privacy Policy</a></div><div><a href="https://zindi.africa/faq">FAQs</a></div></div><div class="Footer__column___1yO21"><div><a target="_blank" rel="noopener noreferrer" href="https://www.linkedin.com/company/zindi-africa">LinkedIn</a></div><div><a target="_blank" rel="noopener noreferrer" href="https://www.facebook.com/ZindiAfrica-311192052980655">Facebook</a></div><div><a target="_blank" rel="noopener noreferrer" href="https://twitter.com/ZindiAfrica">Twitter</a></div><div><a target="_blank" rel="noopener noreferrer" href="https://www.instagram.com/zindi.africa">Instagram</a></div><div><a target="_blank" rel="noopener noreferrer" href="https://zindi.medium.com/">Medium</a></div><div><a target="_blank" rel="noopener noreferrer" href="https://www.youtube.com/channel/UCQHq2JY2BqY2UTDCmVWyGBw">Youtube</a></div><div><a target="_blank" rel="noopener noreferrer" href="https://github.com/zindiafrica">Github</a></div></div></div><div class="Footer__logo___ZtNwP">Zindi</div></div></div></div></div></div><script>window.__INITIAL_STATE__ = {"blogPosts":{"data":{},"queries":{}},"comments":{"data":{},"queries":{}},"competitionTags":{"data":{},"queries":{}},"competitions":{"data":{},"queries":{}},"conspiracyParticipations":{"data":{},"queries":{}},"discussions":{"data":{},"queries":{}},"fullBlogPosts":{"data":{"trying-automated-ml":{"id":"trying-automated-ml","image":"https://zindpublic.blob.core.windows.net/public/uploads/blog_post/image/27/header_1dd2654a-f888-44f3-874e-90fc75767259.jpeg","big_image":"https://zindpublic.blob.core.windows.net/public/uploads/blog_post/image/27/big_thumb_1dd2654a-f888-44f3-874e-90fc75767259.jpeg","header_image":"https://zindpublic.blob.core.windows.net/public/uploads/blog_post/image/27/header_1dd2654a-f888-44f3-874e-90fc75767259.jpeg","title":"Trying Automated ML","intro_html":"<p>Some students had asked me for my opinion on automated tools for machine learning. The thought occurred to me that I hadn’t done much with them recently, and it was about time I gave the much-hyped time-savers a go – after all, aren’t they going to make data scientists like me redundant?</p>","intro_plain":"Some students had asked me for my opinion on automated tools for machine learning. The thought occurred to me that I hadn’t done much with them recently, and it was about time I gave the much-hyped time-savers a go – after all, aren’t they going to make data scientists like me redundant?","content_html":"<p>In today’s post, I’ll be trying out <a href=\"https://cloud.google.com/automl/\" target=\"_blank\" rel=\"noreferrer noopener\">Google’s AutoML tool</a> by throwing various datasets at it and seeing how well it does. To make things interesting, the datasets I’ll be using will be from Zindi competitions, letting us see where AutoML would rank on the player leader-board. I should note that these experiments are a learning exercise, and actually using AutoML to win contests is almost certainly against the rules. But with that caveat out the way, let’s get started!</p><h3>How it works</h3><p>AutoML (and other similar tools) aims to automate one step of the ML pipeline – that of model selection and tuning. You give it a dataset to work on, specify column types, choose an output column and specify how long you’d like it to train for (you pay per hour). Then sit back and wait. Behind the scenes, AutoML tries many different models and slowly optimizes network architecture, parameters, weights… essentially everything that one could possibly tweak to improve performance gets tweaked. At the end of it, you get a (very complicated) model that you can then deploy with their services or use to make batch predictions.</p><p><div class=\"image\">\n<img src=\"https://zindpublic.blob.core.windows.net/public/uploads/image_attachment/image/163/7ab1f8bb-1f20-4f19-a3f6-05169fad7063.png\"> </div></p><p><span style=\"font-style: italic;\" class=\"\">Figure 1. The first step with AutoML tables – Importing the data.</span></p><p>The resultant models are fairly complex (mine were ~1GB each fully trained) and are not something you can simply download and use locally – you must deploy them via Google (for an extra fee). This, coupled with the cost of training models, makes it fairly expensive to experiment with if you use up your trial credits – so use them wisely.</p><p>Fortunately, there are other ways to achieve broadly the same result. For example, AutoKeras. Read more about that <a href=\"https://towardsdatascience.com/autokeras-the-killer-of-googles-automl-9e84c552a319\" target=\"_blank\" rel=\"noreferrer noopener\">here</a>.</p><h3>Experiment 1: <a href=\"http://zindi.africa/competitions/farm-pin-crop-detection-challenge\" target=\"_blank\" rel=\"noreferrer noopener\">Farm Pin Crop Detection</a></h3><p>This competition involves a classification problem, with the goal being to predict which crop is present in a given field. The training data is provided as field outlines and satellite images – not something that can effortlessly slot into AutoML tables. This meant that the first step was to sample the image bands for the different fields, and export the values to a CSV files for later analysis (as described in <a href=\"https://datasciencecastnet.home.blog/2019/06/27/tutorial-improving-crop-type-predictions/\" target=\"_blank\" rel=\"noreferrer noopener\">this post</a>). This done, I uploaded the resultant training file to cloud storage, selected the table, chose my input and output columns and hit go.</p><p><div class=\"image\">\n<img src=\"https://zindpublic.blob.core.windows.net/public/uploads/image_attachment/image/164/e1eacf50-1971-4946-bdde-90a2d2fa3c14.png\"> </div></p><p><span style=\"font-style: italic;\" class=\"\">Figure 2. AutoML ‘Evaluate’ tab showing model performance.</span></p><p>The scoring metric for this competition is log loss. My previous best (using the same training data to train a random forest model) scored around 0.64 (~20th on the leaderboard). So a score of <0.6 looked promising. I uploaded the test set, hit predict and then manually cleaned up the output to match the submission format for Zindi. Score? <span style=\"font-weight: bold;\" class=\"\">0.546, putting me in 12th place.</span> No feature engineering besides sampling some satellite images, no manual tweaking of model parameters…. not bad! This was before the rules for Farm Pin were updated. </p><p>I was quite pleased with this result. I enjoy the feature engineering side of things, but the tedium of hyper-parameter tuning is less appealing to me. If this tool can magically let me skip that step, it’s a win in my book! I may re-visit this with some added features, information from more images and perhaps a trick or two to enlarge the training set.</p><h3>Experiment 2: <a href=\"http://zindi.africa/competitions/traffic-jam-predicting-peoples-movement-into-nairobi\" target=\"_blank\" rel=\"noreferrer noopener\">Traffic Jam</a></h3><p>Spurred on by the first success, I turned to the Traffic Jam competition since I still had the dataset on my laptop. This was a regression problem, with the goal being to predict the number of tickets sold for a given trip into Nairobi. The training data was fairly sparse, with only ~2000 rows to work from. Still, I figured it was worth a shot and threw a few node hours worth of Google-managed ML magic at the problem.</p><p><div class=\"image\">\n<img src=\"https://zindpublic.blob.core.windows.net/public/uploads/image_attachment/image/166/086f3f68-759e-4219-b472-286db11680f9.png\"> </div></p><p><span style=\"font-style: italic;\" class=\"\">An MAE of 3.4, hypothetically equivalent to ~3rd place!</span></p><p>The evaluation results had me excited – and MAE of 3.4 would have placed the model in third place had the competition remained open. I hastily uploaded the predictions to Zindi, to see the score of… 5.3 (160th place). Now, I might be missing some glaring error in the way I formatted predictions for upload, but I suspect that the issue is with AutoML. It’s not really designed for such small datasets. From the website: “Depending on how many features your dataset has, 1,000 rows might not be enough to train a high-performing model.” The impressive MAE shown in the results tab is for one particular test set, and it seems that for the Zindi test set we were simply not as lucky. Another potential factor: The random test set will have sampled from the same date range as the training data, whereas the Zindi test set was for a different time period. In cases like this, a non-shuffled test/train split can be a better indicator of true performance.</p><p>So, we’ve learnt something new! The magic tool isn’t magic, and just like any other method it needs good training data to make good predictions.</p><h3>Experiment 3: <a href=\"http://zindi.africa/competitions/sendy-logistics-challenge\" target=\"_blank\" rel=\"noreferrer noopener\">Sendy</a></h3><p>I couldn’t resist trying it out once more on the newly launched Sendy Competition. I merged the Riders info into the train and test sets, uploaded the data, gave it an hour of training time and set it going. The goal is to minimize RMSE when predicting travel time between two locations (for deliveries). I also did some modelling myself while I waited for the AutoML training to finish.</p><p>Scores (RMSE for predicted time in seconds)\nMy first attempt (Catboost on provided data): 734 (7th place when this post was written)\nFirst place: 721\nGoogle AutoML: <span style=\"font-weight: bold;\" class=\"\">724 (4th place until I convince them to remove my latest entry</span>)</p><p>Not too shabby! To me, one of the great uses of a tool like this is to give a ballpark for what a good model looks like. Without the Zindi leaderboard, I wouldn’t have a way to gauge my model performance. Is it good? Could it get better with the same data? Now I can compare to the AutoML, using it as a ‘probably close to best’ measure.</p><h3>Where next?</h3><p>These quick tests have convinced me that these automated tools can be a useful part of my workflow, but are not a complete replacement for manual experimentation, exploration, feature engineering and modelling. I intend to play around more with AutoML and other tools in the near future, so stay tuned for a continuation of this series.</p><h3>Try your hand at a Zindi problem</h3><p>Johno has shown you what auto ml can do but what can you do? Sign up to <a href=\"http://zindi.africa/\" target=\"_blank\" rel=\"noreferrer noopener\">Zindi</a> and enter the Sendy challenge. You can stand a chance to win prizes to the value of $8,000. </p><h3>About the author</h3><p>Jonathan Whitaker (<a href=\"http://zindi.africa/users/Johnowhitaker\" target=\"_blank\" rel=\"noreferrer noopener\">Johnowhitaker</a>) enjoys playing with data, writing tutorials and sharing his findings as he trawls through the world of data science. Johno was the 2nd Zindian to join Zindi and we are grateful for his contributions to the platform. </p><p>You can read the original blog post here: <a href=\"https://datasciencecastnet.home.blog/2019/08/27/trying-automated-ml/\" target=\"_blank\" rel=\"noreferrer noopener\">https://datasciencecastnet.home.blog/2019/08/27/trying-automated-ml/</a></p><p>Johno has a ‘tutorials’ section and a datasets page cataloging useful sources from around the web and any new data he has generated.</p><p></p>","published_at":"2019-08-30T08:16:45.434Z"}},"queries":{"\"trying-automated-ml\"":{"data":"trying-automated-ml","loading":false,"error":null}}},"fullCompetitions":{},"fullDiscussions":{"data":{},"queries":{"default":{"loading":false,"error":null}}},"fullJobs":{"data":{},"queries":{}},"jobs":{"data":{},"queries":{}},"jobApplications":{"data":{},"queries":{}},"myTeams":{},"notificationSubscriptions":{"data":{},"queries":{}},"participations":{"data":{},"queries":{}},"submissions":{"data":{},"queries":{}},"submissionLimits":{"data":{},"queries":{}},"teams":{"data":{},"queries":{}},"userDiscussions":{"data":{},"queries":{}},"userParticipations":{"data":{},"queries":{}},"userProfiles":{"users":{}},"users":{"data":{},"queries":{}}}</script>
<script>
window.ga =
window.ga ||
function () {
;(ga.q = ga.q || []).push(arguments)
}
ga.l = +new Date()
ga("create", "UA-125419148-1", "auto")
ga("send", "pageview")
ga('set', 'appName', 'zindi.web')
ga('set', 'dimension1', 'nextgen');
</script>
<script async="" src="./Trying Automated ML - Zindi_files/analytics.js"></script>
<noscript><iframe
src="https://www.googletagmanager.com/ns.html?id=GTM-KRG85D8"
height="0"
width="0"
style="display: none; visibility: hidden"
></iframe
></noscript>
<script>!function(l){function e(e){for(var r,t,n=e[0],o=e[1],u=e[2],i=0,a=[];i<n.length;i++)t=n[i],Object.prototype.hasOwnProperty.call(p,t)&&p[t]&&a.push(p[t][0]),p[t]=0;for(r in o)Object.prototype.hasOwnProperty.call(o,r)&&(l[r]=o[r]);for(s&&s(e);a.length;)a.shift()();return c.push.apply(c,u||[]),f()}function f(){for(var e,r=0;r<c.length;r++){for(var t=c[r],n=!0,o=1;o<t.length;o++){var u=t[o];0!==p[u]&&(n=!1)}n&&(c.splice(r--,1),e=i(i.s=t[0]))}return e}var t={},p={runtime:0},c=[];function i(e){if(t[e])return t[e].exports;var r=t[e]={i:e,l:!1,exports:{}};return l[e].call(r.exports,r,r.exports,i),r.l=!0,r.exports}i.m=l,i.c=t,i.d=function(e,r,t){i.o(e,r)||Object.defineProperty(e,r,{enumerable:!0,get:t})},i.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},i.t=function(r,e){if(1&e&&(r=i(r)),8&e)return r;if(4&e&&"object"==typeof r&&r&&r.__esModule)return r;var t=Object.create(null);if(i.r(t),Object.defineProperty(t,"default",{enumerable:!0,value:r}),2&e&&"string"!=typeof r)for(var n in r)i.d(t,n,function(e){return r[e]}.bind(null,n));return t},i.n=function(e){var r=e&&e.__esModule?function(){return e.default}:function(){return e};return i.d(r,"a",r),r},i.o=function(e,r){return Object.prototype.hasOwnProperty.call(e,r)},i.p="https://assets.zindi.africa/";var r=(n=window.webpackJsonp=window.webpackJsonp||[]).push.bind(n);n.push=e;for(var n=n.slice(),o=0;o<n.length;o++)e(n[o]);var s=r;f()}([]);
//# sourceMappingURL=runtime.8d4eb9324d7c3d54849b.js.map</script><script type="text/javascript" src="./Trying Automated ML - Zindi_files/vendor.019171b12285f6597772.js" defer=""></script><script type="text/javascript" src="./Trying Automated ML - Zindi_files/bundle.9c8d652f1bf8225cb2ca.js" defer=""></script>
<div class="ReactModalPortal"></div>
<script type="text/javascript" id="">(function(a,e,f,g,b,c,d){a[b]=a[b]||function(){(a[b].a=a[b].a||[]).push(arguments)};a[b].l=1*new Date;c=e.createElement(f);d=e.getElementsByTagName(f)[0];c.async=1;c.src=g;d.parentNode.insertBefore(c,d)})(window,document,"script","https://mc.yandex.ru/metrika/tag.js","ym");ym(67869277,"init",{clickmap:!0,trackLinks:!0,accurateTrackBounce:!0,webvisor:!0});</script>
<noscript><div><img src="https://mc.yandex.ru/watch/67869277" style="position:absolute; left:-9999px;" alt=""></div></noscript>
<div id="fatkun-drop-panel">
<a id="fatkun-drop-panel-close-btn">×</a>
<div id="fatkun-drop-panel-inner">
<div class="fatkun-content">
<svg class="fatkun-icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="5892"><path d="M494.933333 782.933333c2.133333 2.133333 4.266667 4.266667 8.533334 6.4h8.533333c6.4 0 10.666667-2.133333 14.933333-6.4l2.133334-2.133333 275.2-275.2c8.533333-8.533333 8.533333-21.333333 0-29.866667-8.533333-8.533333-21.333333-8.533333-29.866667 0L533.333333 716.8V128c0-12.8-8.533333-21.333333-21.333333-21.333333s-21.333333 8.533333-21.333333 21.333333v588.8L249.6 475.733333c-8.533333-8.533333-21.333333-8.533333-29.866667 0-8.533333 8.533333-8.533333 21.333333 0 29.866667l275.2 277.333333zM853.333333 874.666667H172.8c-12.8 0-21.333333 8.533333-21.333333 21.333333s8.533333 21.333333 21.333333 21.333333H853.333333c12.8 0 21.333333-8.533333 21.333334-21.333333s-10.666667-21.333333-21.333334-21.333333z" p-id="5893"></path></svg>
<div class="fatkun-title">Drag and Drop</div>
<div class="fatkun-desc">The image will be downloaded</div>
</div>
</div>
</div></body></html>