-
Notifications
You must be signed in to change notification settings - Fork 0
/
Climbing the Ladder_ Image Recognition for ML Competitions (Tutorial) - Zindi.html
117 lines (109 loc) · 35.4 KB
/
Climbing the Ladder_ Image Recognition for ML Competitions (Tutorial) - Zindi.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
<!DOCTYPE html>
<!-- saved from url=(0096)https://zindi.africa/learning/climbing-the-ladder-image-recognition-for-ml-competitions-tutorial -->
<html lang="en" class="wf-loadingwf-inactive wf-nunito-n3-active wf-nunito-n4-active wf-active"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<!-- nextgen -->
<meta http-equiv="x-ua-compatible" content="ie=edge">
<meta name="viewport" content="width=device-width, initial-scale=1, viewport-fit=cover">
<link rel="apple-touch-icon-precomposed" sizes="57x57" href="https://assets.zindi.africa/apple-touch-icon-57x57.png">
<link rel="apple-touch-icon-precomposed" sizes="114x114" href="https://assets.zindi.africa/apple-touch-icon-114x114.png">
<link rel="apple-touch-icon-precomposed" sizes="72x72" href="https://assets.zindi.africa/apple-touch-icon-72x72.png">
<link rel="apple-touch-icon-precomposed" sizes="144x144" href="https://assets.zindi.africa/apple-touch-icon-144x144.png">
<link rel="apple-touch-icon-precomposed" sizes="120x120" href="https://assets.zindi.africa/apple-touch-icon-120x120.png">
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="https://assets.zindi.africa/apple-touch-icon-152x152.png">
<link rel="icon" type="image/png" href="https://assets.zindi.africa/favicon-32x32.png" sizes="32x32">
<link rel="icon" type="image/png" href="https://assets.zindi.africa/favicon-16x16.png" sizes="16x16">
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:site" content="@ZindiAfrica">
<meta name="application-name" content=" ">
<meta name="msapplication-TileColor" content="#FFFFFF">
<meta name="msapplication-TileImage" content="https://assets.zindi.africa/mstile-144x144.png">
<script async="" src="./Climbing the Ladder_ Image Recognition for ML Competitions (Tutorial) - Zindi_files/tag.js"></script><script async="" src="./Climbing the Ladder_ Image Recognition for ML Competitions (Tutorial) - Zindi_files/gtm.js"></script><script src="./Climbing the Ladder_ Image Recognition for ML Competitions (Tutorial) - Zindi_files/webfont.js" async=""></script><script type="text/javascript">
var WebFontConfig = {
google: {
families: ["Nunito:300,400"],
},
timeout: 2000,
}
;(function (d) {
var h = d.documentElement
var onerror = function () {
h.className += "wf-inactive"
}
var st = setTimeout(onerror, 1000)
h.className += "wf-loading"
var wf = d.createElement("script"),
s = d.scripts[0]
wf.src = "https://ajax.googleapis.com/ajax/libs/webfont/1.6.26/webfont.js"
wf.async = true
wf.onerror = onerror
wf.onload = function () {
clearTimeout(st)
}
s.parentNode.insertBefore(wf, s)
})(document)
</script>
<title>Climbing the Ladder: Image Recognition for ML Competitions (Tutorial) - Zindi</title><meta data-react-helmet="true" name="description" content="Zindi is a data science competition platform with the mission of building the data science ecosystem in Africa. Zindi hosts a community of data scientists dedicated to solving the continent's most pressing problems through machine learning and artificial intelligence."><meta data-react-helmet="true" property="og:site_name" content="Zindi"><meta data-react-helmet="true" property="twitter:site_name" content="Zindi"><meta data-react-helmet="true" property="og:title" content="Climbing the Ladder: Image Recognition for ML Competitions (Tutorial)"><meta data-react-helmet="true" property="og:description" content="OK, so you've learnt how to train an image classification model, and you try it out on a Zindi competition. Your score is decent, but there's a group of folks sitting above you on the leaderboard and you'd like to know what they have that you don't. Are they cheating? Is there a secret model type that only the Guild of Grand-Masters can access? Is it a conspiracy to avoid giving out prize money? No. Eking out that final 1% accuracy requires a whole bag of tricks. Today, we'll take a look at what some of the cool kids are using these days, and show you how you can start to climb towards the top in your next image recognition challenge."><meta data-react-helmet="true" property="og:image" content="https://zindpublic.blob.core.windows.net/public/uploads/blog_post/image/61/big_thumb_ba815de5-8962-4e93-9c13-04ec14807c1e.jpg"><meta data-react-helmet="true" property="twitter:title" content="Climbing the Ladder: Image Recognition for ML Competitions (Tutorial)"><meta data-react-helmet="true" property="twitter:description" content="OK, so you've learnt how to train an image classification model, and you try it out on a Zindi competition. Your score is decent, but there's a group of folks sitting above you on the leaderboard and you'd like to know what they have that you don't. Are they cheating? Is there a secret model type that only the Guild of Grand-Masters can access? Is it a conspiracy to avoid giving out prize money? No. Eking out that final 1% accuracy requires a whole bag of tricks. Today, we'll take a look at what some of the cool kids are using these days, and show you how you can start to climb towards the top in your next image recognition challenge."><meta data-react-helmet="true" property="twitter:image" content="https://zindpublic.blob.core.windows.net/public/uploads/blog_post/image/61/big_thumb_ba815de5-8962-4e93-9c13-04ec14807c1e.jpg">
<script>
;(function (w, d, s, l, i) {
w[l] = w[l] || []
w[l].push({ "gtm.start": new Date().getTime(), event: "gtm.js" })
var f = d.getElementsByTagName(s)[0],
j = d.createElement(s),
dl = l != "dataLayer" ? "&l=" + l : ""
j.async = true
j.src = "https://www.googletagmanager.com/gtm.js?id=" + i + dl
f.parentNode.insertBefore(j, f)
})(window, document, "script", "dataLayer", "GTM-KRG85D8")
</script>
<link rel="stylesheet" href="./Climbing the Ladder_ Image Recognition for ML Competitions (Tutorial) - Zindi_files/css" media="all"><link href="./Climbing the Ladder_ Image Recognition for ML Competitions (Tutorial) - Zindi_files/bundle.d57dc7d8a01460a702c7.css" rel="stylesheet"></head>
<body _c_t_common="1" data-new-gr-c-s-check-loaded="14.1020.0" data-gr-ext-installed="">
<noscript> You need to enable JavaScript to run this app. </noscript>
<div id="app"><div class="App__container___fj0c9"><div class="App__section___1nGff"></div><div class="App__section___1nGff"><div class="App__contained___3emDO"><div class="Header__container___3FtbH"><a href="https://zindi.africa/"><div class="Header__logo___1eRaO">Zindi</div></a><div class="Header__menuContainer___2izgT"><div class="Menu__container___1sjgb"><a class="Menu__link___3x4C4" href="https://zindi.africa/competitions"><span class="Menu__linkInner___3LB5N">Compete</span></a><a aria-current="page" class="Menu__link___3x4C4 Menu__activeLink___1MA6r" href="https://zindi.africa/learning"><span class="Menu__linkInner___3LB5N">Learn</span></a><a class="Menu__link___3x4C4" href="https://zindi.africa/jobs"><span class="Menu__linkInner___3LB5N">Find a Job</span></a></div><div class="Menu__container___1sjgb Header__menuRight___3wiDe Menu__justifyRight___3r6Ws"><a class="Menu__link___3x4C4" href="https://zindi.africa/inbox"><span class="Menu__linkInner___3LB5N"><div class="Inbox__container___3xHCr"><svg class="Inbox__messagesIcon___2XP8V" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M4 4h16c1.1 0 2 .9 2 2v12c0 1.1-.9 2-2 2H4c-1.1 0-2-.9-2-2V6c0-1.1.9-2 2-2z"></path><polyline points="22,6 12,13 2,6"></polyline></svg><div class="Inbox__unseen___31NED"></div></div></span></a></div></div><div class="Header__userMenu___2iOts"><div class="UserMenu__container___ypkko"><button class="Button__base___NhksY Button__blank-normal___1nB5F UserMenu__user___a0zJo"><span class="Button__inner___3jkeF"><span class="User__container___18HoF User__size-normal___26ZPA"><img class="User__avatar___6aNx2" src="./Climbing the Ladder_ Image Recognition for ML Competitions (Tutorial) - Zindi_files/thumb.default.png" alt=""><span class="User__username___64PE2">Glencode</span></span></span></button><div class=""></div></div></div></div></div></div><div class="App__section___1nGff App__content___WFkDX"><div class="WithSubheader__container___3qd5U"><div class="WithSubheader__header___2o1oX WithSubheader__withHeader___35ECw"><div class="BlogPost__headerImage___2fAz4" style="background-image: url("https://zindpublic.blob.core.windows.net/public/uploads/blog_post/image/61/header_ba815de5-8962-4e93-9c13-04ec14807c1e.jpg");"></div></div><div><div class="App__contained___3emDO"><div class="Paper__paper___2M-1R Paper__padding-1___3sKLR BlogPost__paper___1D3Be"><div class="BlogPost__date___3BhZy">1 May 2020, 11:16</div><h2 class="BlogPost__title___RUU5Z">Climbing the Ladder: Image Recognition for ML Competitions (Tutorial)</h2><div class="Html__container___1AJFz BlogPost__intro___31fc-"><p>OK, so you've learnt how to train an image classification model, and you try it out on a Zindi competition. Your score is decent, but there's a group of folks sitting above you on the leaderboard and you'd like to know what they have that you don't. Are they cheating? Is there a secret model type that only the Guild of Grand-Masters can access? Is it a conspiracy to avoid giving out prize money? No. Eking out that final 1% accuracy requires a whole bag of tricks. Today, we'll take a look at what some of the cool kids are using these days, and show you how you can start to climb towards the top in your next image recognition challenge.</p><h3>A good baseline</h3><p>Before we get fancy, we're going to assume you're already using some good practices in your model. Specifically:</p><p><span style="font-weight: bold;" class="">Transfer learning:</span> Starting from scratch means using lots of data, and that's not always available. Fortunately, we have many wonderful models available to us that have been trained on datasets like <a href="http://www.image-net.org/" target="_blank" rel="noreferrer noopener">Imagenet</a>. These pre-trained models already contain building blocks for recognizing all kinds of images, and we can re-purpose them for a new task. Pre-trained models are available through most deep learning libraries - in many, they're the default.</p><p><span style="font-weight: bold;" class="">Data augmentation:</span> Showing your model the exact same image multiple times is a good way to cause it to fixate on that specific image, rather than the idea behind it. With data augmentation, we create several similar images based on the initial example. Many libraries have data augmentation built in, but there are also stand-alone tools like <a href="https://albumentations.ai/" target="_blank" rel="noreferrer noopener">Albumentations</a> that can be added to your pipeline, and you can always dive in and get creative doing some of your own.</p><p></p><div class="image">
<img src="./Climbing the Ladder_ Image Recognition for ML Competitions (Tutorial) - Zindi_files/e83360a4-6131-40c4-b8bb-423d13199a61.png"> </div><p></p><p><span style="font-style: italic;" class="">Data with augmentations applied - note the warping on some images</span></p><p>Cyclic learning rates, choosing a good learning rate, gradually unfreezing and training a pre-trained model... all techniques that have become mainstream in the past few years. You'll see fancy LR schedulers varying the LR, and everyone wants to invent the latest profile. Use whatever comes with your library - see the demo notebook where we use lr_find and fit_one_cycle methods from fastai.</p><p>We'll be looking at the <a href="https://zindi.africa/competitions/miia-pothole-image-classification-challenge" target="_blank" rel="noreferrer noopener">MIIA Potholes Image Classification Challenge</a> in the demo notebook accompanying this tutorial. We use the new (beta) fastai2 library to train a baseline using a pre-trained resnet34 model. This is similar to the starter notebook shared when the competition first launched. We train it for a few minutes and submit, to get a score of 0.81. This will be our starting point when looking at potential improvements.</p><h3>The obvious sliders</h3><p>There are some 'obvious' ways to get a better score, including:</p><ul class="public-DraftStyleDefault-ul">
<li>Training for longer</li>
<li>Using a larger image size</li>
<li>Trying a bigger network</li>
</ul><p>Any of these will up your score, but they also increase training time. Before we deploy these easy wins, let's look at the less obvious options in our bag of tricks.</p><h3>Progressive resizing</h3><p></p><div class="image">
<img src="./Climbing the Ladder_ Image Recognition for ML Competitions (Tutorial) - Zindi_files/7bbac648-a9cc-4751-b382-bd5e0dc170df.jpg"> </div><p></p><p>Popularized by the fastai course, progressive resizing involves training the network on smaller images first, and then re-training on larger and larger images. For some datasets this makes a big difference, and it's faster than just training more on large images from the start. Begin with 64x64 and work up, doubling each time, but think about whether this approach fits the problem. For potholes, can you even see them at lower resolutions?</p><h3>
<span style="font-weight: bold;" class="">Test-time augmentation</span></h3><p>This is an easy win, but one that many people don't know about. Remember those data augmentations we talked about? What if you applied them to your test set, to get several different versions of each test image. You could make predictions for each, and average them. Simple, but effective! From our baseline score of 0.81, we can lower our score to 0.64 by replacing learn.get_preds(...) with learn.tta(...), letting fastai do the heavy lifting for us as it runs predictions on 8 augmented versions of each test image and combines the predictions.</p><p>As always, there are many ways to do this. For some competitions (<a href="https://zindi.africa/competitions/iclr-workshop-challenge-1-cgiar-computer-vision-for-crop-disease" target="_blank" rel="noreferrer noopener">CGIAR Computer Vision for Crop Disease</a> for example), cropping in close to a larger image and making predictions for several sub-images helps a lot. For others, you need to be careful to keep the subject in view when doing your augmentation. Whichever method you choose, this is a good way to get better performance with very little effort.</p><h3>Even more data augmentation</h3><p>Let's talk about custom data augmentations. In some cases, there is something specific about the data that makes it worth thinking about how you'll be transforming the images before feeding them to your network. If it's satellite or aerial imagery, who cares about orientation? Flip those beauties vertically for extra fun. If it's pictures of sea creatures surrounded by large areas of gray background, crop them intelligently to zoom in on the features of interest. And looking at our example, what do we see? Lots of sky, lots of dashboard. Look at this transform:</p><p></p><div class="image">
<img src="./Climbing the Ladder_ Image Recognition for ML Competitions (Tutorial) - Zindi_files/01104098-3049-407a-a128-fe86e8dde80f.png"> </div><p></p><p><span style="font-style: italic;" class="">Transforming an image to exclude unnecessary parts</span></p><p>That winning entry in potholes, which I carefully screenshotted for my CV before sharing this tutorial, was based almost entirely on this single idea - there are no potholes in the sky!</p><h3>Ensembles</h3><p>If one model is good, more must be better! Training several different models and then averaging their predictions is another way to buy yourself some extra accuracy. This can get a little extreme, with vast ensembles of models, which is why you see many platforms moving towards compute/time limits. But the base idea is good. Here's an example from the winner of the <a href="https://zindi.africa/blog/meet-the-winners-of-umojahack-1-saeon-marine-invertebrates-identification-challenge" target="_blank" rel="noreferrer noopener">SAEON Invertebrates Challenge</a>, <a href="https://zindi.africa/users/rinnqd" target="_blank" rel="noreferrer noopener">Rinnqd</a>:</p><ul class="public-DraftStyleDefault-ul">
<li>resnet50 with size 224x224, 256x256, 512x512</li>
<li>resnet152 with size 224x224, 350x350</li>
<li>effcientnet b0 with size 224x224, 512x512</li>
<li>efficientnet b4 with size 224x224</li>
</ul><p>To make the most of an ensemble, you need variety. Train a few different architectures, or train some on different subsets of the data. I've had success with imbalanced datasets training one model on the common classes, another on the whole dataset and combining the predictions with adjustments made to get a better overall score.</p><p>Personally, I dislike the idea that more compute can buy you performance, and I like to see what can be done with a single model. If you've trained one model well, and done all the other work, then maybe you can try a few more and average. But have a heart for those of us without dedicated hardware, and don't just throw resources at the problem.</p><h3>Research current hot topics</h3><p>If there's lots of unlabeled data lying around for use, there are some other fun ways to boost your models. We won't go deeply into them now, but search around for the work being done on semi-supervised learning. Truly amazing stuff.</p><p>There's also lots of hype around strategies like MixUp, NoisyStudent, and so many more. Trying to implement one of these ideas is a good project, and might well improve your score. I don't have specifics to share, but keep an eye on what folks are talking about, and if something comes up enough that you start to wonder what it is then that's a sign that you should take a closer look and see if anyone has shared a tutorial or something about it yet.</p></div></div></div></div></div></div><div class="App__section___1nGff"><div class="Footer__container___3vGXM"><div class="App__contained___3emDO"><div class="Footer__links___dDoS-"><div class="Footer__column___1yO21"><div><a href="https://zindi.africa/competitions">Competitions</a></div><div><a href="https://zindi.africa/hackathons">Hackathons</a></div><div><a href="https://zindi.africa/data_scientists">Data Scientists</a></div><div><a href="https://zindi.africa/discussions">Discussions</a></div><div><a href="https://zindi.africa/jobs">Jobs Board</a></div></div><div class="Footer__column___1yO21"><div><a href="https://zindi.africa/hosting_competition">Host competition</a></div><div><a href="https://zindi.africa/about">About Us</a></div><div><a href="https://zindi.africa/partners">Our Partners</a></div><div><a href="https://zindi.africa/contact_us">Contact Us</a></div></div><div class="Footer__column___1yO21"><div><a href="https://zindi.africa/rules">Rules and Guidelines</a></div><div><a href="https://zindi.africa/terms">Terms of Use</a></div><div><a href="https://zindi.africa/privacy">Privacy Policy</a></div><div><a href="https://zindi.africa/faq">FAQs</a></div></div><div class="Footer__column___1yO21"><div><a target="_blank" rel="noopener noreferrer" href="https://www.linkedin.com/company/zindi-africa">LinkedIn</a></div><div><a target="_blank" rel="noopener noreferrer" href="https://www.facebook.com/ZindiAfrica-311192052980655">Facebook</a></div><div><a target="_blank" rel="noopener noreferrer" href="https://twitter.com/ZindiAfrica">Twitter</a></div><div><a target="_blank" rel="noopener noreferrer" href="https://www.instagram.com/zindi.africa">Instagram</a></div><div><a target="_blank" rel="noopener noreferrer" href="https://zindi.medium.com/">Medium</a></div><div><a target="_blank" rel="noopener noreferrer" href="https://www.youtube.com/channel/UCQHq2JY2BqY2UTDCmVWyGBw">Youtube</a></div><div><a target="_blank" rel="noopener noreferrer" href="https://github.com/zindiafrica">Github</a></div></div></div><div class="Footer__logo___ZtNwP">Zindi</div></div></div></div></div></div><script>window.__INITIAL_STATE__ = {"blogPosts":{"data":{},"queries":{}},"comments":{"data":{},"queries":{}},"competitionTags":{"data":{},"queries":{}},"competitions":{"data":{},"queries":{}},"conspiracyParticipations":{"data":{},"queries":{}},"discussions":{"data":{},"queries":{}},"fullBlogPosts":{"data":{"climbing-the-ladder-image-recognition-for-ml-competitions-tutorial":{"id":"climbing-the-ladder-image-recognition-for-ml-competitions-tutorial","image":"https://zindpublic.blob.core.windows.net/public/uploads/blog_post/image/61/header_ba815de5-8962-4e93-9c13-04ec14807c1e.jpg","big_image":"https://zindpublic.blob.core.windows.net/public/uploads/blog_post/image/61/big_thumb_ba815de5-8962-4e93-9c13-04ec14807c1e.jpg","header_image":"https://zindpublic.blob.core.windows.net/public/uploads/blog_post/image/61/header_ba815de5-8962-4e93-9c13-04ec14807c1e.jpg","title":"Climbing the Ladder: Image Recognition for ML Competitions (Tutorial)","intro_html":"<p>OK, so you've learnt how to train an image classification model, and you try it out on a Zindi competition. Your score is decent, but there's a group of folks sitting above you on the leaderboard and you'd like to know what they have that you don't. Are they cheating? Is there a secret model type that only the Guild of Grand-Masters can access? Is it a conspiracy to avoid giving out prize money? No. Eking out that final 1% accuracy requires a whole bag of tricks. Today, we'll take a look at what some of the cool kids are using these days, and show you how you can start to climb towards the top in your next image recognition challenge.</p>","intro_plain":"OK, so you've learnt how to train an image classification model, and you try it out on a Zindi competition. Your score is decent, but there's a group of folks sitting above you on the leaderboard and you'd like to know what they have that you don't. Are they cheating? Is there a secret model type that only the Guild of Grand-Masters can access? Is it a conspiracy to avoid giving out prize money? No. Eking out that final 1% accuracy requires a whole bag of tricks. Today, we'll take a look at what some of the cool kids are using these days, and show you how you can start to climb towards the top in your next image recognition challenge.","content_html":"<h3>A good baseline</h3><p>Before we get fancy, we're going to assume you're already using some good practices in your model. Specifically:</p><p><span style=\"font-weight: bold;\" class=\"\">Transfer learning:</span> Starting from scratch means using lots of data, and that's not always available. Fortunately, we have many wonderful models available to us that have been trained on datasets like <a href=\"http://www.image-net.org/\" target=\"_blank\" rel=\"noreferrer noopener\">Imagenet</a>. These pre-trained models already contain building blocks for recognizing all kinds of images, and we can re-purpose them for a new task. Pre-trained models are available through most deep learning libraries - in many, they're the default.</p><p><span style=\"font-weight: bold;\" class=\"\">Data augmentation:</span> Showing your model the exact same image multiple times is a good way to cause it to fixate on that specific image, rather than the idea behind it. With data augmentation, we create several similar images based on the initial example. Many libraries have data augmentation built in, but there are also stand-alone tools like <a href=\"https://albumentations.ai/\" target=\"_blank\" rel=\"noreferrer noopener\">Albumentations</a> that can be added to your pipeline, and you can always dive in and get creative doing some of your own.</p><p><div class=\"image\">\n<img src=\"https://zindpublic.blob.core.windows.net/public/uploads/image_attachment/image/398/e83360a4-6131-40c4-b8bb-423d13199a61.png\"> </div></p><p><span style=\"font-style: italic;\" class=\"\">Data with augmentations applied - note the warping on some images</span></p><p>Cyclic learning rates, choosing a good learning rate, gradually unfreezing and training a pre-trained model... all techniques that have become mainstream in the past few years. You'll see fancy LR schedulers varying the LR, and everyone wants to invent the latest profile. Use whatever comes with your library - see the demo notebook where we use lr_find and fit_one_cycle methods from fastai.</p><p>We'll be looking at the <a href=\"https://zindi.africa/competitions/miia-pothole-image-classification-challenge\" target=\"_blank\" rel=\"noreferrer noopener\">MIIA Potholes Image Classification Challenge</a> in the demo notebook accompanying this tutorial. We use the new (beta) fastai2 library to train a baseline using a pre-trained resnet34 model. This is similar to the starter notebook shared when the competition first launched. We train it for a few minutes and submit, to get a score of 0.81. This will be our starting point when looking at potential improvements.</p><h3>The obvious sliders</h3><p>There are some 'obvious' ways to get a better score, including:</p><ul class=\"public-DraftStyleDefault-ul\">\n<li>Training for longer</li>\n<li>Using a larger image size</li>\n<li>Trying a bigger network</li>\n</ul><p>Any of these will up your score, but they also increase training time. Before we deploy these easy wins, let's look at the less obvious options in our bag of tricks.</p><h3>Progressive resizing</h3><p><div class=\"image\">\n<img src=\"https://zindpublic.blob.core.windows.net/public/uploads/image_attachment/image/399/7bbac648-a9cc-4751-b382-bd5e0dc170df.jpg\"> </div></p><p>Popularized by the fastai course, progressive resizing involves training the network on smaller images first, and then re-training on larger and larger images. For some datasets this makes a big difference, and it's faster than just training more on large images from the start. Begin with 64x64 and work up, doubling each time, but think about whether this approach fits the problem. For potholes, can you even see them at lower resolutions?</p><h3>\n<span style=\"font-weight: bold;\" class=\"\">Test-time augmentation</span></h3><p>This is an easy win, but one that many people don't know about. Remember those data augmentations we talked about? What if you applied them to your test set, to get several different versions of each test image. You could make predictions for each, and average them. Simple, but effective! From our baseline score of 0.81, we can lower our score to 0.64 by replacing learn.get_preds(...) with learn.tta(...), letting fastai do the heavy lifting for us as it runs predictions on 8 augmented versions of each test image and combines the predictions.</p><p>As always, there are many ways to do this. For some competitions (<a href=\"https://zindi.africa/competitions/iclr-workshop-challenge-1-cgiar-computer-vision-for-crop-disease\" target=\"_blank\" rel=\"noreferrer noopener\">CGIAR Computer Vision for Crop Disease</a> for example), cropping in close to a larger image and making predictions for several sub-images helps a lot. For others, you need to be careful to keep the subject in view when doing your augmentation. Whichever method you choose, this is a good way to get better performance with very little effort.</p><h3>Even more data augmentation</h3><p>Let's talk about custom data augmentations. In some cases, there is something specific about the data that makes it worth thinking about how you'll be transforming the images before feeding them to your network. If it's satellite or aerial imagery, who cares about orientation? Flip those beauties vertically for extra fun. If it's pictures of sea creatures surrounded by large areas of gray background, crop them intelligently to zoom in on the features of interest. And looking at our example, what do we see? Lots of sky, lots of dashboard. Look at this transform:</p><p><div class=\"image\">\n<img src=\"https://zindpublic.blob.core.windows.net/public/uploads/image_attachment/image/400/01104098-3049-407a-a128-fe86e8dde80f.png\"> </div></p><p><span style=\"font-style: italic;\" class=\"\">Transforming an image to exclude unnecessary parts</span></p><p>That winning entry in potholes, which I carefully screenshotted for my CV before sharing this tutorial, was based almost entirely on this single idea - there are no potholes in the sky!</p><h3>Ensembles</h3><p>If one model is good, more must be better! Training several different models and then averaging their predictions is another way to buy yourself some extra accuracy. This can get a little extreme, with vast ensembles of models, which is why you see many platforms moving towards compute/time limits. But the base idea is good. Here's an example from the winner of the <a href=\"https://zindi.africa/blog/meet-the-winners-of-umojahack-1-saeon-marine-invertebrates-identification-challenge\" target=\"_blank\" rel=\"noreferrer noopener\">SAEON Invertebrates Challenge</a>, <a href=\"https://zindi.africa/users/rinnqd\" target=\"_blank\" rel=\"noreferrer noopener\">Rinnqd</a>:</p><ul class=\"public-DraftStyleDefault-ul\">\n<li>resnet50 with size 224x224, 256x256, 512x512</li>\n<li>resnet152 with size 224x224, 350x350</li>\n<li>effcientnet b0 with size 224x224, 512x512</li>\n<li>efficientnet b4 with size 224x224</li>\n</ul><p>To make the most of an ensemble, you need variety. Train a few different architectures, or train some on different subsets of the data. I've had success with imbalanced datasets training one model on the common classes, another on the whole dataset and combining the predictions with adjustments made to get a better overall score.</p><p>Personally, I dislike the idea that more compute can buy you performance, and I like to see what can be done with a single model. If you've trained one model well, and done all the other work, then maybe you can try a few more and average. But have a heart for those of us without dedicated hardware, and don't just throw resources at the problem.</p><h3>Research current hot topics</h3><p>If there's lots of unlabeled data lying around for use, there are some other fun ways to boost your models. We won't go deeply into them now, but search around for the work being done on semi-supervised learning. Truly amazing stuff.</p><p>There's also lots of hype around strategies like MixUp, NoisyStudent, and so many more. Trying to implement one of these ideas is a good project, and might well improve your score. I don't have specifics to share, but keep an eye on what folks are talking about, and if something comes up enough that you start to wonder what it is then that's a sign that you should take a closer look and see if anyone has shared a tutorial or something about it yet.</p>","published_at":"2020-05-01T08:16:01.823Z"}},"queries":{"\"climbing-the-ladder-image-recognition-for-ml-competitions-tutorial\"":{"data":"climbing-the-ladder-image-recognition-for-ml-competitions-tutorial","loading":false,"error":null}}},"fullCompetitions":{},"fullDiscussions":{"data":{},"queries":{"default":{"loading":false,"error":null}}},"fullJobs":{"data":{},"queries":{}},"jobs":{"data":{},"queries":{}},"jobApplications":{"data":{},"queries":{}},"myTeams":{},"notificationSubscriptions":{"data":{},"queries":{}},"participations":{"data":{},"queries":{}},"submissions":{"data":{},"queries":{}},"submissionLimits":{"data":{},"queries":{}},"teams":{"data":{},"queries":{}},"userDiscussions":{"data":{},"queries":{}},"userParticipations":{"data":{},"queries":{}},"userProfiles":{"users":{}},"users":{"data":{},"queries":{}}}</script>
<script>
window.ga =
window.ga ||
function () {
;(ga.q = ga.q || []).push(arguments)
}
ga.l = +new Date()
ga("create", "UA-125419148-1", "auto")
ga("send", "pageview")
ga('set', 'appName', 'zindi.web')
ga('set', 'dimension1', 'nextgen');
</script>
<script async="" src="./Climbing the Ladder_ Image Recognition for ML Competitions (Tutorial) - Zindi_files/analytics.js"></script>
<noscript><iframe
src="https://www.googletagmanager.com/ns.html?id=GTM-KRG85D8"
height="0"
width="0"
style="display: none; visibility: hidden"
></iframe
></noscript>
<script>!function(l){function e(e){for(var r,t,n=e[0],o=e[1],u=e[2],i=0,a=[];i<n.length;i++)t=n[i],Object.prototype.hasOwnProperty.call(p,t)&&p[t]&&a.push(p[t][0]),p[t]=0;for(r in o)Object.prototype.hasOwnProperty.call(o,r)&&(l[r]=o[r]);for(s&&s(e);a.length;)a.shift()();return c.push.apply(c,u||[]),f()}function f(){for(var e,r=0;r<c.length;r++){for(var t=c[r],n=!0,o=1;o<t.length;o++){var u=t[o];0!==p[u]&&(n=!1)}n&&(c.splice(r--,1),e=i(i.s=t[0]))}return e}var t={},p={runtime:0},c=[];function i(e){if(t[e])return t[e].exports;var r=t[e]={i:e,l:!1,exports:{}};return l[e].call(r.exports,r,r.exports,i),r.l=!0,r.exports}i.m=l,i.c=t,i.d=function(e,r,t){i.o(e,r)||Object.defineProperty(e,r,{enumerable:!0,get:t})},i.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},i.t=function(r,e){if(1&e&&(r=i(r)),8&e)return r;if(4&e&&"object"==typeof r&&r&&r.__esModule)return r;var t=Object.create(null);if(i.r(t),Object.defineProperty(t,"default",{enumerable:!0,value:r}),2&e&&"string"!=typeof r)for(var n in r)i.d(t,n,function(e){return r[e]}.bind(null,n));return t},i.n=function(e){var r=e&&e.__esModule?function(){return e.default}:function(){return e};return i.d(r,"a",r),r},i.o=function(e,r){return Object.prototype.hasOwnProperty.call(e,r)},i.p="https://assets.zindi.africa/";var r=(n=window.webpackJsonp=window.webpackJsonp||[]).push.bind(n);n.push=e;for(var n=n.slice(),o=0;o<n.length;o++)e(n[o]);var s=r;f()}([]);
//# sourceMappingURL=runtime.8d4eb9324d7c3d54849b.js.map</script><script type="text/javascript" src="./Climbing the Ladder_ Image Recognition for ML Competitions (Tutorial) - Zindi_files/vendor.019171b12285f6597772.js" defer=""></script><script type="text/javascript" src="./Climbing the Ladder_ Image Recognition for ML Competitions (Tutorial) - Zindi_files/bundle.9c8d652f1bf8225cb2ca.js" defer=""></script>
<div class="ReactModalPortal"></div>
<script type="text/javascript" id="">(function(a,e,f,g,b,c,d){a[b]=a[b]||function(){(a[b].a=a[b].a||[]).push(arguments)};a[b].l=1*new Date;c=e.createElement(f);d=e.getElementsByTagName(f)[0];c.async=1;c.src=g;d.parentNode.insertBefore(c,d)})(window,document,"script","https://mc.yandex.ru/metrika/tag.js","ym");ym(67869277,"init",{clickmap:!0,trackLinks:!0,accurateTrackBounce:!0,webvisor:!0});</script>
<noscript><div><img src="https://mc.yandex.ru/watch/67869277" style="position:absolute; left:-9999px;" alt=""></div></noscript>
<div id="fatkun-drop-panel">
<a id="fatkun-drop-panel-close-btn">×</a>
<div id="fatkun-drop-panel-inner">
<div class="fatkun-content">
<svg class="fatkun-icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="5892"><path d="M494.933333 782.933333c2.133333 2.133333 4.266667 4.266667 8.533334 6.4h8.533333c6.4 0 10.666667-2.133333 14.933333-6.4l2.133334-2.133333 275.2-275.2c8.533333-8.533333 8.533333-21.333333 0-29.866667-8.533333-8.533333-21.333333-8.533333-29.866667 0L533.333333 716.8V128c0-12.8-8.533333-21.333333-21.333333-21.333333s-21.333333 8.533333-21.333333 21.333333v588.8L249.6 475.733333c-8.533333-8.533333-21.333333-8.533333-29.866667 0-8.533333 8.533333-8.533333 21.333333 0 29.866667l275.2 277.333333zM853.333333 874.666667H172.8c-12.8 0-21.333333 8.533333-21.333333 21.333333s8.533333 21.333333 21.333333 21.333333H853.333333c12.8 0 21.333333-8.533333 21.333334-21.333333s-10.666667-21.333333-21.333334-21.333333z" p-id="5893"></path></svg>
<div class="fatkun-title">Drag and Drop</div>
<div class="fatkun-desc">The image will be downloaded</div>
</div>
</div>
</div></body></html>