-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathSpatial_Weights_as_Distance_Functions.html
635 lines (575 loc) · 43.2 KB
/
Spatial_Weights_as_Distance_Functions.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<meta name="author" content="Luc Anselin and Grant Morrison" />
<meta name="date" content="2019-12-19" />
<title>Spatial Weights as Distance Functions</title>
<script src="Spatial_Weights_as_Distance_Functions_files/jquery-1.11.3/jquery.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="Spatial_Weights_as_Distance_Functions_files/bootstrap-3.3.5/css/bootstrap.min.css" rel="stylesheet" />
<script src="Spatial_Weights_as_Distance_Functions_files/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="Spatial_Weights_as_Distance_Functions_files/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="Spatial_Weights_as_Distance_Functions_files/bootstrap-3.3.5/shim/respond.min.js"></script>
<script src="Spatial_Weights_as_Distance_Functions_files/navigation-1.1/tabsets.js"></script>
<link href="Spatial_Weights_as_Distance_Functions_files/highlightjs-9.12.0/default.css" rel="stylesheet" />
<script src="Spatial_Weights_as_Distance_Functions_files/highlightjs-9.12.0/highlight.js"></script>
<style type="text/css">code{white-space: pre;}</style>
<style type="text/css">
pre:not([class]) {
background-color: white;
}
</style>
<script type="text/javascript">
if (window.hljs) {
hljs.configure({languages: []});
hljs.initHighlightingOnLoad();
if (document.readyState && document.readyState === "complete") {
window.setTimeout(function() { hljs.initHighlighting(); }, 0);
}
}
</script>
<style type="text/css">
h1 {
font-size: 34px;
}
h1.title {
font-size: 38px;
}
h2 {
font-size: 30px;
}
h3 {
font-size: 24px;
}
h4 {
font-size: 18px;
}
h5 {
font-size: 16px;
}
h6 {
font-size: 12px;
}
.table th:not([align]) {
text-align: left;
}
</style>
<link rel="stylesheet" href="tutor.css" type="text/css" />
<style type = "text/css">
.main-container {
max-width: 940px;
margin-left: auto;
margin-right: auto;
}
code {
color: inherit;
background-color: rgba(0, 0, 0, 0.04);
}
img {
max-width:100%;
}
.tabbed-pane {
padding-top: 12px;
}
.html-widget {
margin-bottom: 20px;
}
button.code-folding-btn:focus {
outline: none;
}
summary {
display: list-item;
}
</style>
<!-- tabsets -->
<style type="text/css">
.tabset-dropdown > .nav-tabs {
display: inline-table;
max-height: 500px;
min-height: 44px;
overflow-y: auto;
background: white;
border: 1px solid #ddd;
border-radius: 4px;
}
.tabset-dropdown > .nav-tabs > li.active:before {
content: "";
font-family: 'Glyphicons Halflings';
display: inline-block;
padding: 10px;
border-right: 1px solid #ddd;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li.active:before {
content: "";
border: none;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open:before {
content: "";
font-family: 'Glyphicons Halflings';
display: inline-block;
padding: 10px;
border-right: 1px solid #ddd;
}
.tabset-dropdown > .nav-tabs > li.active {
display: block;
}
.tabset-dropdown > .nav-tabs > li > a,
.tabset-dropdown > .nav-tabs > li > a:focus,
.tabset-dropdown > .nav-tabs > li > a:hover {
border: none;
display: inline-block;
border-radius: 4px;
background-color: transparent;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li {
display: block;
float: none;
}
.tabset-dropdown > .nav-tabs > li {
display: none;
}
</style>
<!-- code folding -->
</head>
<body>
<div class="container-fluid main-container">
<div class="fluid-row" id="header">
<h1 class="title toc-ignore">Spatial Weights as Distance Functions</h1>
<h3 class="subtitle">R Notes</h3>
<h4 class="author">Luc Anselin and Grant Morrison<a href="#fn1" class="footnoteRef" id="fnref1"><sup>1</sup></a></h4>
<h4 class="date">12/19/2019</h4>
</div>
<div id="TOC">
<ul>
<li><a href="#introduction">Introduction</a><ul>
<li><a href="#objectives">Objectives</a><ul>
<li><a href="#r-packages-used">R Packages used</a></li>
<li><a href="#r-commands-used">R Commands used</a></li>
</ul></li>
</ul></li>
<li><a href="#preliminaries">Preliminaries</a><ul>
<li><a href="#load-packages">Load packages</a></li>
<li><a href="#obtaining-the-data-from-the-geoda-website">Obtaining the Data from the GeoDa website</a></li>
</ul></li>
<li><a href="#inverse-distance-weights">Inverse Distance Weights</a><ul>
<li><a href="#concepts">Concepts</a></li>
<li><a href="#creating-inverse-distance-functions-for-distance-bands">Creating inverse distance functions for distance bands</a><ul>
<li><a href="#properties-of-inverse-distance-weights">Properties of inverse distance weights</a></li>
<li><a href="#using-non-geographical-coordinates">Using non-geographical coordinates</a></li>
</ul></li>
<li><a href="#creating-inverse-distance-functions-for-k-nearest-neighbors">Creating inverse distance functions for k-nearest neighbors</a></li>
</ul></li>
<li><a href="#kernal-weights">Kernal Weights</a><ul>
<li><a href="#concepts-1">Concepts</a></li>
<li><a href="#creating-kernal-weights">Creating Kernal weights</a><ul>
<li><a href="#uniform">Uniform</a></li>
<li><a href="#triangular">Triangular</a></li>
<li><a href="#epanechnikov">Epanechnikov</a></li>
<li><a href="#quartic">Quartic</a></li>
<li><a href="#gaussian">Gaussian</a></li>
<li><a href="#variable-bandwidth">Variable bandwidth</a></li>
<li><a href="#treatment-of-diagonal-elements">Treatment of diagonal elements</a></li>
<li><a href="#properties-of-kernal-weights">Properties of kernal weights</a></li>
</ul></li>
</ul></li>
</ul>
</div>
<p><br></p>
<div id="introduction" class="section level2 unnumbered">
<h2>Introduction</h2>
<p>This notebook covers the functionality of the <a href="https://geodacenter.github.io/workbook/4c_distance_functions/lab4c.html">Spatial Weights as Distance Functions</a> section of the GeoDa workbook. We refer to that document for details on the methodology, references, etc. The goal of these notes is to approximate as closely as possible the operations carried out using GeoDa by means of a range of R packages.</p>
<p>The notes are written with R beginners in mind, more seasoned R users can probably skip most of the comments on data structures and other R particulars. Also, as always in R, there are typically several ways to achieve a specific objective, so what is shown here is just one way that works, but there often are others (that may even be more elegant, work faster, or scale better).</p>
<p>For this notebook, we use Cleveland house price data. Our goal in this lab is show how to assign spatial weights based on different distance functions.</p>
<div id="objectives" class="section level3 unnumbered">
<h3>Objectives</h3>
<p>After completing the notebook, you should know how to carry out the following tasks:</p>
<ul>
<li><p>Compute inverse distance functions</p></li>
<li><p>Compute kernal weights functions</p></li>
<li><p>Assess the characteristics of weights based on distance functions</p></li>
</ul>
<div id="r-packages-used" class="section level4 unnumbered">
<h4>R Packages used</h4>
<ul>
<li><p><strong>sf</strong>: To read in the shapefile.</p></li>
<li><p><strong>spdep</strong>: To create k-nearest neighbors and distance-band neighbors, calculate distances between neighbors, convert to a weights structure, and coercion methods to sparse matrices.</p></li>
<li><p><strong>geodaData</strong>: To access the data for this notebook.</p></li>
</ul>
</div>
<div id="r-commands-used" class="section level4 unnumbered">
<h4>R Commands used</h4>
<p>Below follows a list of the commands used in this notebook. For further details and a comprehensive list of options, please consult the <a href="https://www.rdocumentation.org">R documentation</a>.</p>
<ul>
<li><p><strong>Base R</strong>: <code>install.packages</code>, <code>library</code>, <code>setwd</code>, <code>class</code>, <code>str</code>, <code>lapply</code>, <code>attributes</code>, <code>summary</code>, <code>head</code>, <code>seq</code>, <code>as</code>, <code>cbind</code>, <code>max</code>, <code>unlist</code>, <code>length</code>, <code>sqrt</code>, <code>exp</code>, <code>diag</code>, <code>sort</code>, <code>append</code></p></li>
<li><p><strong>sf</strong>: <code>st_read</code>, <code>plot</code></p></li>
<li><p><strong>spdep</strong>: <code>knn2nb</code>, <code>dnearneigh</code>, <code>knearneigh</code>, <code>nb2listw</code>, <code>mat2listw</code></p></li>
</ul>
</div>
</div>
</div>
<div id="preliminaries" class="section level2 unnumbered">
<h2>Preliminaries</h2>
<p>Before starting, make sure to have the latest version of R and of packages that are compiled for the matching version of R (this document was created using R 3.5.1 of 2018-07-02). Also, optionally, set a working directory, even though we will not actually be saving any files.<a href="#fn2" class="footnoteRef" id="fnref2"><sup>2</sup></a></p>
<div id="load-packages" class="section level3 unnumbered">
<h3>Load packages</h3>
<p>First, we load all the required packages using the <code>library</code> command. If you don’t have some of these in your system, make sure to install them first as well as their dependencies.<a href="#fn3" class="footnoteRef" id="fnref3"><sup>3</sup></a> You will get an error message if something is missing. If needed, just install the missing piece and everything will work after that.</p>
<pre class="r"><code>library(sf)
library(spdep)
library(geodaData)</code></pre>
</div>
<div id="obtaining-the-data-from-the-geoda-website" class="section level3 unnumbered">
<h3>Obtaining the Data from the GeoDa website</h3>
<p>All of the data for the R notebooks is available in the <strong>geodaData</strong> package. We loaded the library earlier, now to access the individual data sets, we use the double colon notation. This works similar to to accessing a variable with <code>$</code>, in that a drop down menu will appear with a list of the datasets included in the package. For this notebook, we use <code>clev_pts</code>.</p>
<p>Otherwise, Tt get the data for this notebook, you will and to go to <a href="https://geodacenter.github.io/data-and-lab//clev_sls_154_core/">Cleveland Home Sales</a> The download format is a zipfile, so you will need to unzip it by double clicking on the file in your file finder. From there move the resulting folder titled: nyc into your working directory to continue. Once that is done, you can use the <strong>sf</strong> function: <code>st_read()</code> to read the shapefile into your R environment.</p>
<pre class="r"><code>clev.points <- geodaData::clev_pts</code></pre>
</div>
</div>
<div id="inverse-distance-weights" class="section level2 unnumbered">
<h2>Inverse Distance Weights</h2>
<div id="concepts" class="section level3 unnumbered">
<h3>Concepts</h3>
<p>One can readily view spatial weights based on a distance cut-off as representing a step function, with a value of 1 for neighbors with <span class="math inline">\(d_{ij} < \delta\)</span>, and a value of 0 for others. As before, <span class="math inline">\(d_{ij}\)</span> stands for the distance between observations i and j, and <span class="math inline">\(\delta\)</span> is the bandwidth.</p>
<p>A straightforward extension of this principle is to consider a continuous parameterized function of distance itself: <span class="math display">\[w_{ij}=f(d_{ij},\theta)\]</span> with f as a functional form and <span class="math inline">\(\theta\)</span> a vector of parameters.</p>
<p>In order to conform to Tobler’s first law of geography, a distance decay effect must be respected. In other words, the value of the function of distance needs to decrease with a growing distance. More formally, the partial derivative of the distance function with respect to distance should be negative, <span class="math inline">\(\partial{}w_{ij}/\partial{}d_{ij}<0\)</span> .</p>
<p>Commonly used distance functions are the inverse, with <span class="math inline">\(w_{ij}=1/d_{ij}^\alpha\)</span>(and <span class="math inline">\(\alpha\)</span> as a parameter), and the negative exponential, with <span class="math inline">\(w_{ij}=e^{-\beta d_{ij}}\)</span>(and <span class="math inline">\(\beta\)</span> as a parameter). The functions are often combined with a distance cut-off criterion, such that <span class="math inline">\(w_{ij}=0\)</span> for <span class="math inline">\(d_{ij}>\delta\)</span>.</p>
<p>In practice, the parameters are seldom estimated, but typically set to a fixed value, such as <span class="math inline">\(\alpha=1\)</span> for inverse distance weights (<span class="math inline">\(1/d_{ij}\)</span>), and <span class="math inline">\(\alpha=2\)</span> for gravity weights (<span class="math inline">\(1/d_{ij}^2\)</span>). By convention, the diagonal elements of the spatial weights are set to zero and not computed. Plugging in a value of <span class="math inline">\(d_{ii}=0\)</span> would yield division by zero for inverse distance weights.</p>
<p>The distance-based weights depend not only on the parameter value and functional form, but also on the metric used for distance. Since the weights are inversely related to distance, large values for the latter will yield small values for the former, and vice versa. This may be a problem in practice when the distances are so large (i.e., measured in small units) that the corresponding inverse distance weights become close to zero, possibly resulting in a zero spatial weights matrix.</p>
<p>In addition, a potential problem may occur when the distance metric is such that distances take on values less than one. As a consequence, some inverse distance values may be larger than one, which is typically not a desired result.</p>
<p>Rescaling of the coordinates will fix both problems.</p>
</div>
<div id="creating-inverse-distance-functions-for-distance-bands" class="section level3">
<h3>Creating inverse distance functions for distance bands</h3>
<p>To create our inverse disatnce weights, we follow the steps involved with creating distance-band neighbors along with a few additional steps to calculate and assign the weight values. Here we will go over a basic outline of the steps to create the inverse distance weights. First we calculate our distance-band neighbors. Next we get the distances between each neighbors stored in the same format as the neighbors data structure. Then we apply a function to each element in this structure, giving us the inverse distances. Finally we assign these as the weight values when converting from class <strong>nb</strong> to class <strong>listw</strong>.</p>
<p>We begin by putting our coordinates in a separate matrix from <strong>clev.points</strong></p>
<pre class="r"><code>coords <- cbind(clev.points$x,clev.points$y)</code></pre>
<p>In order to calulate our distance-band neighbors, we need an upper and lower distance bound. The lower is always 0 for the most part. We can put anything for the upper, but we will pick a value, that keeps isolates out of our distance-band-neighbors. To do this we need to find the k-nearest neighbors for k = 1, then get the maximum distance between points. This is covered in the distance-band spatial weights notebook, but we will go through the steps here.</p>
<p>To get the k-nearest neighbors for k = 1, we need two function from the <strong>spdep</strong> library: <code>knn2nb</code> and <code>knearneigh</code>. <code>knearneigh</code> calculates the neighbors and stores the information in class <strong>knn</strong>, and <code>knn2nb</code> converts the class to <strong>nb</strong>, so we can work with it further.</p>
<pre class="r"><code>k1 <- knn2nb(knearneigh(coords))</code></pre>
<p>Computing the critcal threshold will require a few functions now that we have a neighbors list. First step is to get the distances between each point and it’s closest neighbor. This can be done with the <code>nbdists</code>. With these distances, we just need to find the maximum. For this we use the <code>max</code> command. However, we cannot do this with lists, so we must first get a data type that works for the <code>max</code> command, in our case, we use <code>unlist</code></p>
<pre class="r"><code>critical.threshold <- max(unlist(nbdists(k1,coords)))
critical.threshold</code></pre>
<pre><code>## [1] 3598.055</code></pre>
<p>We have all the necessary components to calculate the distance-band neighbors. To get these we use <code>dnearneigh</code>. The parameters needed are the coordinates, a lower distance bound and an upper distance bound.</p>
<pre class="r"><code>nb.dist.band <- dnearneigh(coords, 0, critical.threshold)</code></pre>
<p>To get inverse distance, we need to calculate the distances between all of the neighbors. for this we will use <code>nbdists</code>, which gives us the distances in a similar structure to our input neighbors list. To use this function we need to input the neighbors list and the coordinates.</p>
<pre class="r"><code>distances <- nbdists(nb.dist.band,coords)
distances[1]</code></pre>
<pre><code>## [[1]]
## [1] 385.161 3013.071 1160.312 1858.904 3367.150 2525.503 3253.025 3390.735
## [9] 3369.644</code></pre>
<p>Calculating the inverse distances will require a function that applies 1/x over the entire <strong>distances</strong> data structure. We will use <code>lapply</code> to accomplish this. The parameters needed are the distances, and a function which we specify in <code>lapply</code>. We use the <code>function</code> operator with <strong>(1/x)</strong> to get the appropriate function.</p>
<pre class="r"><code>invd1 <- lapply(distances, function(x) (1/x))</code></pre>
<p>Here we check the length of the inverse distances to make sure it lines up with our neighbors list.</p>
<pre class="r"><code>length(invd1)</code></pre>
<pre><code>## [1] 205</code></pre>
<p>We check the first element of the resulting data structure to make sure it is in line with the neighbors list structure. This is important because we will need the structures to correspond in order to assign the inverse distances as the weight values when converting from a neighbors list or class <strong>nb</strong> to a weight structure: class <strong>listw</strong>.</p>
<pre class="r"><code>invd1[1]</code></pre>
<pre><code>## [[1]]
## [1] 0.0025963168 0.0003318873 0.0008618371 0.0005379514 0.0002969871
## [6] 0.0003959608 0.0003074062 0.0002949213 0.0002967673</code></pre>
<p>A key insight from the first element of the inverse distance structure is that the values are very small, or too close to zero. The unit of distance for our dataset is in feet. This means distance values between points can be quite large and result in small inverses. To correct for this scale dependence, we can rescale the distances by repeating the inverse calculations, while adjusting the scale. We can make this adjustment by dividing <strong>x</strong> in our function by 100, before calculating the inverses.</p>
<pre class="r"><code>invd1a <- lapply(distances, function(x) (1/(x/100)))
invd1a[1]</code></pre>
<pre><code>## [[1]]
## [1] 0.25963168 0.03318873 0.08618371 0.05379514 0.02969871 0.03959608 0.03074062
## [8] 0.02949213 0.02967673</code></pre>
<p>Now that we have properly scaled inverse distances, we can assign them as weight values. This is done in the conversion function <code>nb2listw</code>. To assign the weights, we use the <code>glist =</code> argument. For this to work we also have to specify <code>style = "B"</code>, otherwise the <code>listw</code> function will use the default row standardization.</p>
<pre class="r"><code>invd.weights <- nb2listw(nb.dist.band,glist = invd1a,style = "B")</code></pre>
<p>Here we take a cursory look at our weights with <code>summary</code> to get basic imformation and statistics.</p>
<pre class="r"><code>summary(invd.weights)</code></pre>
<pre><code>## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 205
## Number of nonzero links: 2592
## Percentage nonzero weights: 6.167757
## Average number of links: 12.6439
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
## 6 6 9 5 5 10 8 10 13 12 11 6 17 8 9 11 13 6 10 7 2 4 6 2 1 1
## 27 28 29 30 32
## 1 2 1 1 2
## 6 least connected regions:
## 59 114 115 116 117 198 with 1 link
## 2 most connected regions:
## 82 88 with 32 links
##
## Weights style: B
## Weights constants summary:
## n nn S0 S1 S2
## B 205 42025 180.2882 145.9202 1018.442</code></pre>
<p>We can check the values of the weights by using <code>$weights</code> to access the values.</p>
<pre class="r"><code>invd.weights$weights[1]</code></pre>
<pre><code>## [[1]]
## [1] 0.25963168 0.03318873 0.08618371 0.05379514 0.02969871 0.03959608 0.03074062
## [8] 0.02949213 0.02967673</code></pre>
<div id="properties-of-inverse-distance-weights" class="section level4 unnumbered">
<h4>Properties of inverse distance weights</h4>
<p>Since the properties only pertain to the connectivity structure implied by the weights, they are identical to the ones obtained for the standard distance-band weights. It is important to keep in mind that the actual values for the weights are ignored in this operation.</p>
<pre class="r"><code>plot(invd.weights, coords, lwd=.2, col="blue", cex = .5)</code></pre>
<p><img src="Spatial_Weights_as_Distance_Functions_files/figure-html/unnamed-chunk-16-1.png" width="672" /></p>
<p>The connectivity map and the connectivity graph associated with the weights are the same as before as well.</p>
</div>
<div id="using-non-geographical-coordinates" class="section level4 unnumbered">
<h4>Using non-geographical coordinates</h4>
<p>So far we have been using x and y coordinates for the inputs into distance calculates, but it is important to note that you can use any two variables contained in the dataset in place of x and y coordinates. For example, this allows for the computation of so-called socio-economic weights, where the difference between two locations on any two variables can be used as the distance metric. We don’t do this in this notebook, as the only meaningful variable in our dataset is housing prices.</p>
</div>
</div>
<div id="creating-inverse-distance-functions-for-k-nearest-neighbors" class="section level3 unnumbered">
<h3>Creating inverse distance functions for k-nearest neighbors</h3>
<p>We can compute inverse distance weights for k-nearest neighbors using the same approach as for distance-band neighbors. The only difference being that we don’t have to calculate a critical threshold for k-nearest neighbors.</p>
<p>We start by getting the k-nearest neighbors for k = 6. We do this with <code>knearneigh</code> and <code>knn2nb</code>.</p>
<pre class="r"><code>k6 <- knn2nb(knearneigh(coords, k = 6))</code></pre>
<p>Now that we have the neighbors list we need all of the distances between neighbors in a similar data structure, which we use <code>nbdist</code> for again.</p>
<pre class="r"><code>k.distances <- nbdists(k6, coords)</code></pre>
<p>Here we calculate the inverse distances, keeping in mind the scale from the distance-band weights from earlier.</p>
<pre class="r"><code>invd2a <- lapply(k.distances, function(x) (1/(x/100)))
invd2a[1]</code></pre>
<pre><code>## [[1]]
## [1] 0.25963168 0.03318873 0.08618371 0.05379514 0.03959608 0.03074062</code></pre>
<p>Lastly, we assign the weight values with the <code>glist =</code> parameter and speficy the <code>style</code> as “B” to avoid default computations.</p>
<pre class="r"><code>invd.weights.knn <- nb2listw(k6,glist = invd2a,style = "B")
invd.weights.knn$weights[1]</code></pre>
<pre><code>## [[1]]
## [1] 0.25963168 0.03318873 0.08618371 0.05379514 0.03959608 0.03074062</code></pre>
</div>
</div>
<div id="kernal-weights" class="section level2 unnumbered">
<h2>Kernal Weights</h2>
<div id="concepts-1" class="section level3 unnumbered">
<h3>Concepts</h3>
<p>Kernel weights are used in non-parametric approaches to model spatial covariance, such as in the HAC method for heteroskedastic and spatial autocorrelation consistent variance estimates.</p>
<p>The kernel weights are defined as a function K(z) of the ratio between the distance dij from i to j, and the bandwidth <span class="math inline">\(h_i\)</span>, with <span class="math inline">\(z=d_{ij}/h_i\)</span>. This ensures that z is always less than 1. For distances greater than the bandwidth, K(z)=0.</p>
<p>We will go over five different kernal weights functions that are supported by GeoDa:</p>
<ul>
<li><p>Uniform, <span class="math inline">\(K(z) = 1/2\)</span> for <span class="math inline">\(\mid z \mid < 1\)</span></p></li>
<li><p>Triangular, <span class="math inline">\(K(z) = (1 - \mid z \mid )\)</span> for <span class="math inline">\(\mid z \mid < 1\)</span></p></li>
<li><p>Quadratic or Epanechnikov, <span class="math inline">\(K(z) = (3/4)(1 - z^2)\)</span> for <span class="math inline">\(\mid z \mid < 1\)</span></p></li>
<li><p>Quartic, <span class="math inline">\(K(z) = (15/16)(1 - z^2)^2\)</span> for <span class="math inline">\(\mid z \mid < 1\)</span></p></li>
<li><p>Gaussian, <span class="math inline">\(K(z) = (2\pi)^{1/2}\exp(-z^2/2)\)</span></p></li>
</ul>
<p>Typically, the value for the diagonal elements of the weights is set to 1, although GeoDa allows for the actual kernel value to be used as well. We will go through both of these options too.</p>
<p>Many careful decisions must be made in selecting a kernel weights function. Apart from the choice of a functional form for K( ), a crucial aspect is the selection of the bandwidth. In the literature, the latter is found to be more important than the functional form.</p>
<p>A drawback of fixed bandwidth kernel weights is that the number of non-zero weights can vary considerably, especially when the density of the point locations is not uniform throughout space. This is the same problem encountered for the distance band spatial weights.</p>
<p>In GeoDa, there are two types of fixed bandwidths for kernel weights. One is the max-min distance used earlier (the largest of the nearest-neighbor distances). The other is the maximum distance for a given specification of k-nearest neighbors. For example, with knn set to a given value, this is the distance between the selected k-nearest neighbors pairs that are the farthest apart.</p>
</div>
<div id="creating-kernal-weights" class="section level3 unnumbered">
<h3>Creating Kernal weights</h3>
<p>In creating kernal weights, we will cover two important options: the fixed bandwidth and the variable bandwidth. For the fixed bandwidth, we will be using distance-band neighbors. For the variable bandwidth we will need kth-nearest neighbors.</p>
<p>To start, we will compute a new distance-band neighbors list with the critcial threshold, calculated earlier in the notebook.</p>
<pre class="r"><code>kernal.nb <- dnearneigh(coords, 0, critical.threshold)</code></pre>
<p>Before we start computing kernal weights, we need to add the diagonal elements to our neighbors list. We do this because in the kernal weights methods, the diagonal element is either assigned a value of 1 or is computed in the kernal function with a distance of 0. It is important to note that the diagonal element means a point is a neighbor of its own self when include in the neighbors list.</p>
<p><strong>spdep</strong> has a built in function for this. <code>include.self</code> can be used to add the diagonal elements to a neighbors list of class <strong>nb</strong>.</p>
<pre class="r"><code>include.self(kernal.nb)</code></pre>
<pre><code>## Neighbour list object:
## Number of regions: 205
## Number of nonzero links: 2797
## Percentage nonzero weights: 6.655562
## Average number of links: 13.6439</code></pre>
<pre class="r"><code>kernal.nb[[2]]</code></pre>
<pre><code>## [1] 1 6 7 8 9 10 31 32 34</code></pre>
<p>With the diagonal elements, we can proceed further. To compute the kernal weight values, we need the corresponding distances for each neighbor. We do this with <code>nbdists</code>, same as earlier.</p>
<pre class="r"><code>kernalw.distances <- nbdists(kernal.nb, coords)
kernalw.distances[1]</code></pre>
<pre><code>## [[1]]
## [1] 385.161 3013.071 1160.312 1858.904 3367.150 2525.503 3253.025 3390.735
## [9] 3369.644</code></pre>
<p>When checking the first row of the distances, we see a 0. This is the distance value for the diagonal element.</p>
<div id="uniform" class="section level4 unnumbered">
<h4>Uniform</h4>
<p><span class="math inline">\(K(z) = 1/2\)</span> for <span class="math inline">\(\mid z \mid < 1\)</span></p>
<p>To get uniform weights, we use a similar method to the inverse disatnce weights. We use <code>lapply</code> to apply a function to all elements of our distance structure. The function, in this case, is <code>0 * x + .5</code>. We do this to assign uniform weights of .5, the <code>0*x</code> is a necessary addition to get <code>lapply</code> to work properly.</p>
<pre class="r"><code>uniform <- lapply(kernalw.distances, function(x) x*0 + .5)
uniform[1]</code></pre>
<pre><code>## [[1]]
## [1] 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5</code></pre>
<p>Then to assign the weights, we use the same procedure as the inverse distance weights. We use the <code>glist</code> argument to explicity assign the weight we calculated above.</p>
<pre class="r"><code>uniform.weights <- nb2listw(kernal.nb,glist = uniform,style = "B")</code></pre>
</div>
<div id="triangular" class="section level4 unnumbered">
<h4>Triangular</h4>
<p><span class="math inline">\(K(z) = (1 - \mid z \mid )\)</span> for <span class="math inline">\(\mid z \mid < 1\)</span></p>
<p>Same process, for triangular, we just apply a different function to the distances. We use <code>abs</code> to get the absolute value in our caluculations.</p>
<pre class="r"><code>triangular <- lapply(kernalw.distances, function(x) 1- abs((x/critical.threshold)))
triangular[1]</code></pre>
<pre><code>## [[1]]
## [1] 0.89295300 0.16258344 0.67751688 0.48335866 0.06417492 0.29809225 0.09589360
## [8] 0.05762014 0.06348183</code></pre>
<pre class="r"><code>triang.weights <- nb2listw(kernal.nb,glist = triangular,style = "B")</code></pre>
<pre><code>## Warning in nb2listw(kernal.nb, glist = triangular, style = "B"): zero sum
## general weights</code></pre>
<pre class="r"><code>triang.weights$weights[1]</code></pre>
<pre><code>## [[1]]
## [1] 0.89295300 0.16258344 0.67751688 0.48335866 0.06417492 0.29809225 0.09589360
## [8] 0.05762014 0.06348183</code></pre>
</div>
<div id="epanechnikov" class="section level4 unnumbered">
<h4>Epanechnikov</h4>
<p>Quadratic or Epanechnikov, <span class="math inline">\(K(z) = (3/4)(1 - z^2)\)</span> for <span class="math inline">\(\mid z \mid < 1\)</span></p>
<pre class="r"><code>epanechnikov <- lapply(kernalw.distances, function(x) .75*(1-(x/critical.threshold)^2))
epanechnikov[1]</code></pre>
<pre><code>## [[1]]
## [1] 0.74140570 0.22405013 0.67200348 0.54981129 0.09317357 0.38049413 0.13694371
## [8] 0.08394016 0.09220029</code></pre>
<pre class="r"><code>epan.weights <- nb2listw(kernal.nb,glist = epanechnikov,style = "B")</code></pre>
<pre><code>## Warning in nb2listw(kernal.nb, glist = epanechnikov, style = "B"): zero sum
## general weights</code></pre>
<pre class="r"><code>epan.weights$weights[1]</code></pre>
<pre><code>## [[1]]
## [1] 0.74140570 0.22405013 0.67200348 0.54981129 0.09317357 0.38049413 0.13694371
## [8] 0.08394016 0.09220029</code></pre>
</div>
<div id="quartic" class="section level4 unnumbered">
<h4>Quartic</h4>
<p><span class="math inline">\(K(z) = (15/16)(1 - z^2)^2\)</span> for <span class="math inline">\(\mid z \mid < 1\)</span></p>
<pre class="r"><code>quartic <- lapply(kernalw.distances, function(x) (15/16)*(1-(x/critical.threshold)^2)^2)
quartic[1]</code></pre>
<pre><code>## [[1]]
## [1] 0.91613736 0.08366410 0.75264779 0.50382076 0.01446886 0.24129297 0.03125597
## [8] 0.01174325 0.01416816</code></pre>
<pre class="r"><code>quartic.weights <- nb2listw(kernal.nb,glist = quartic,style = "B")</code></pre>
<pre><code>## Warning in nb2listw(kernal.nb, glist = quartic, style = "B"): zero sum general
## weights</code></pre>
<pre class="r"><code>quartic.weights$weights[1]</code></pre>
<pre><code>## [[1]]
## [1] 0.91613736 0.08366410 0.75264779 0.50382076 0.01446886 0.24129297 0.03125597
## [8] 0.01174325 0.01416816</code></pre>
</div>
<div id="gaussian" class="section level4 unnumbered">
<h4>Gaussian</h4>
<p><span class="math inline">\(K(z) = (2\pi)^{1/2}\exp(-z^2/2)\)</span></p>
<p>For this formula we need the <code>sqrt</code> function and the <code>exp</code> function, but other than that, it is a similar contruction as the others.</p>
<pre class="r"><code>gaussian.w <- lapply(kernalw.distances, function(x) sqrt(2*pi)*exp((-(x/critical.threshold)^2)/2))
gaussian.w[1]</code></pre>
<pre><code>## [[1]]
## [1] 2.492308 1.765273 2.379620 2.193458 1.617779 1.959327 1.665681 1.607851
## [9] 1.616730</code></pre>
<pre class="r"><code>gaussian.weights <- nb2listw(kernal.nb,glist = gaussian.w,style = "B")
gaussian.weights$weights[1]</code></pre>
<pre><code>## [[1]]
## [1] 2.492308 1.765273 2.379620 2.193458 1.617779 1.959327 1.665681 1.607851
## [9] 1.616730</code></pre>
</div>
<div id="variable-bandwidth" class="section level4 unnumbered">
<h4>Variable bandwidth</h4>
<p>Now that we have covered the 5 types of kernal weight function, implemented by GeoDa, we will work to emulate the example from the corresponding GeoDa workbook in R. The options in this example are conveniently done with GeoDa, but in our case there will be more leg work to get this done. We will be doiing a variable bandwidth with diagonal elements set to a value of 1 for a triangular kernal.</p>
<p>For the variable bandwidth, we will be using <strong>k6</strong>: a k-nearest neighbors list, created earlier in the notebook. We already have the associated disatnces in <strong>k.distances</strong>. We will be directly altering the distance object, so we will assign a copy <strong>k.disatnces1</strong>.</p>
<pre class="r"><code>k.distances1 <- k.distances</code></pre>
<p>In order to implement our variable bandwidth, we will need to loop through each element of <strong>k.distances</strong>, find the maximum distance of each row, then apply the triangular kernal weight function of that row with the bandwidth being used to calculate the z values for the K(z) function.</p>
<p>To begin, we make a <code>for</code> loop using the <code>in</code> operator. The range we specify is 1 to the length of <strong>k.distances</strong>. We get this length with <code>length</code>. This will allow us to excute the statements in the loop on i-values 1 to 205.</p>
<p>The first thing we need in the loop is the variable bandwidth value for the ith row. This is easily done by callling the <code>max</code> function on the row. We get the associated row by <strong>k.distances[[i]]</strong>.</p>
<p>Next we compute the new row with our triagular kernal function. We use the <code>abs</code> function for absolute value. Lastly, we assign the new row values to the the ith row of the <strong>k.distances</strong> structure.</p>
<pre class="r"><code>for (i in 1:length(k.distances1)){
bandwidth <- max(k.distances1[[i]])
new_row <- 1- abs(k.distances1[[i]] / bandwidth)
k.distances1[[i]] <- new_row
}
k.distances1[[1]]</code></pre>
<pre><code>## [1] 0.88159911 0.07376327 0.64331286 0.42856135 0.22364475 0.00000000</code></pre>
<p>There is one potential issue with what we have done so far for the variable bandwidth. Our bandwidth is the same as the largest distance in each row, so one neighbor will get 0 weight in the resulting weight structure for most of our functions. To give weight to this value, we will need to adjust the associated bandwidths, by getting a value that is between the 6th nearest neighbor and the 7th nearest neighbor. We will do this by taking the average of the two values for our bandwith calculations. This will require a few extra steps and adjustments to our <code>for</code> loop.</p>
<p>The first thing we need to implement this is the k-nearest neighbors for k = 7. This is the same process as our previous calculations for k-nearest neighbors.</p>
<pre class="r"><code>k7 <- knn2nb(knearneigh(coords, k = 7))</code></pre>
<p>Next we get the associated distances using <code>nbdists</code>.</p>
<pre class="r"><code>k7.distances <- nbdists(k7, coords)</code></pre>
<p>To avoid altering the original <strong>k.distances</strong>, we will assign a new variable to hold the necessary information.</p>
<pre class="r"><code>k.distances2 <- k.distances</code></pre>
<p>Here we remake the previous <code>for</code> loop with a few changes. Now we loop through and find the max distance for both the 7th nearest neighbors and 6th nearest neighbors, then get the average between the two before computing the kernal weight function.</p>
<pre class="r"><code>for (i in 1:length(k.distances)){
maxk6 <- max(k.distances2[[i]])
maxk7 <- max(k7.distances[[i]])
bandwidth <- (maxk6 + maxk7) /2
new_row <- 1- abs(k.distances2[[i]] / bandwidth)
k.distances2[[i]] <- new_row
}
k.distances2[[1]]</code></pre>
<pre><code>## [1] 0.88364023 0.08973071 0.64946181 0.43841241 0.23702838 0.01723905</code></pre>
<pre class="r"><code>var.band.weights <- nb2listw(k6,glist = k.distances2,style = "B")
var.band.weights$weights[[1]]</code></pre>
<pre><code>## [1] 0.88364023 0.08973071 0.64946181 0.43841241 0.23702838 0.01723905</code></pre>
<p>With our new weights structure all the neighbors included have a nonzero weight.</p>
</div>
<div id="treatment-of-diagonal-elements" class="section level4 unnumbered">
<h4>Treatment of diagonal elements</h4>
<p>As of now, we have just been applying the kernal function to the diagonal elements. The default in GeoDa is to assign a value of 1 to these elements. For us to do this, we need a little extra work. We will take advantage of the coercion methods that <strong>spdep</strong> provides from class <strong>listw</strong> to <strong>RsparseMatrix</strong> of the <strong>Matrix</strong> package. Once converted to class **RsparseMatrix, we can assign values of 1 to the diagonal elements, then convert back.</p>
<p>To start we use the <code>as</code> function with <strong>var.band.weights</strong> as the first parameter. We specify the class to convert to with the string: “RsparseMatrix”. We use <strong>var.band.weights</strong> to remake the GeoDa workbook example.</p>
<pre class="r"><code>B <- as(var.band.weights, "RsparseMatrix")</code></pre>
<pre><code>## Warning: Function as_dgRMatrix_listw moved to the spatialreg package</code></pre>
<pre><code>## Warning in as_dgRMatrix_listw(from): install the spatialreg package</code></pre>
<p>Now that we have converted, we can assign values of 1 to the diagonal elements with the <code>diag</code> function.</p>
<pre class="r"><code>diag(B) <- 1</code></pre>
<p>With this, we can now convert back to class <strong>listw</strong> with the <strong>spdep</strong> function <code>mat2listw</code>. The function is pretty self explanatory, as it converts from a matrix the <strong>listw</strong>. We need one extra step to accomplish the conversion. We first need to convert <strong>B</strong> to class <strong>dgTMatrix</strong> before we can use the <code>mat2listw</code> function.</p>
<pre class="r"><code>var.band.w2 <- mat2listw(as(B, "dgTMatrix"))</code></pre>
</div>
<div id="properties-of-kernal-weights" class="section level4 unnumbered">
<h4>Properties of kernal weights</h4>
<p>The connectivity plot will be the same for kernal weights as the 6th nearest neighbors structure. This is because they have the same neighbors list and connectivity ignores the weights themselves. While our connectivity plot will be the same, the histogram and summary stats will be different from the ones in GeoDa. This is because we have to add the diagonal elements to the neighbors structure before moving forward. This is seen below when our 6th-nearest neighbors has an average of 7 links. To get a more accurate view of the connectivity properties, the structure will have to be examined before the diagonal elements are added.</p>
<pre class="r"><code>summary(var.band.w2)</code></pre>
<pre><code>## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 205
## Number of nonzero links: 1435
## Percentage nonzero weights: 3.414634
## Average number of links: 7
## Non-symmetric neighbours list
## Link number distribution:
##
## 7
## 205
## 205 least connected regions:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 with 7 links
## 205 most connected regions:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 with 7 links
##
## Weights style: M
## Weights constants summary:
## n nn S0 S1 S2
## M 205 42025 622.5036 844.1885 7916.201</code></pre>
<pre class="r"><code>plot(var.band.w2, coords, lwd=.2, col="blue", cex = .5)</code></pre>
<p><img src="Spatial_Weights_as_Distance_Functions_files/figure-html/unnamed-chunk-44-1.png" width="672" /></p>
</div>
</div>
</div>
<div class="footnotes">
<hr />
<ol>
<li id="fn1"><p>University of Chicago, Center for Spatial Data Science – <a href="mailto:[email protected]">[email protected]</a>,<a href="mailto:[email protected]">[email protected]</a><a href="#fnref1">↩</a></p></li>
<li id="fn2"><p>Use <code>setwd(directorypath)</code> to specify the working directory.<a href="#fnref2">↩</a></p></li>
<li id="fn3"><p>Use <code>install.packages(packagename)</code>.<a href="#fnref3">↩</a></p></li>
</ol>
</div>
</div>
<script>
// add bootstrap table styles to pandoc tables
function bootstrapStylePandocTables() {
$('tr.header').parent('thead').parent('table').addClass('table table-condensed');
}
$(document).ready(function () {
bootstrapStylePandocTables();
});
</script>
<!-- tabsets -->
<script>
$(document).ready(function () {
window.buildTabsets("TOC");
});
$(document).ready(function () {
$('.tabset-dropdown > .nav-tabs > li').click(function () {
$(this).parent().toggleClass('nav-tabs-open')
});
});
</script>
<!-- code folding -->
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>