-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathanalysis.Rnw
1876 lines (1508 loc) · 63.9 KB
/
analysis.Rnw
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
% -*- mode: noweb; noweb-default-code-mode: R-mode; -*-
\SweaveOpts{ results=hide}
\SweaveOpts{ include=FALSE}
\SweaveOpts{ echo=FALSE}
\SweaveOpts{ engine=R}
\SweaveOpts{ keep.source= TRUE}
\SweaveOpts{ eval=FALSE}
\SweaveOpts{ eval=TRUE}
\graphicspath{ {analysis/} }
\chapter{Analysis}
\label{cha:analysis}
<<init>>=
options( prompt= " ", continue= " ", width= 60)
options(error= function(){
## recover()
options( prompt= "> ", continue= "+ ", width= 80)
})
source( "~/thesis/code/peel.R")
source( "~/thesis/code/maps.R")
texWd <- path.expand( "~/thesis/analysis")
rasterWd <- path.expand( "~/thesis/data/analysis")
dataPath <- path.expand( "~/thesis/data")
setwd( rasterWd)
overwriteRasters <- TRUE
overwriteFigures <- TRUE
# studyArea used to work out RMSE
# calcs and tables
##studyArea <- "thumb"
studyArea <- "mlct"
# bands are numbered from one but
# classes from zero. Used for stacks/brick
# where bands correspond to classes
peelBands <- peelClasses +1
# mask and agland exported from GRASS
# no need to mask or crop
cusaMask <- raster( sprintf( "%s/mask_cusa.tif",
dataPath))
cusaExtent <- extent( cusaMask)
thumbExtent <- extent( -( 83 +30 /60), -( 82 +25 /60),
42 +55 /60, 44 +5 /60 )
# default raster() output
# has geographic proj, full extent
# by default
world <- raster()
res(world) <- 5/60
grid <- raster( cusaMask)
grid[] <- cellsFromExtent( world, grid)
grid <- raster::mask( grid, cusaMask)
nulls <- raster( cusaMask)
nulls[] <- NA
zeroes <- raster( cusaMask)
zeroes[] <- 0
ones <- raster( cusaMask)
ones[] <- 1
if( studyArea == "thumb") {
cusaMask <- crop( cusaMask, thumbExtent)
}
acresFile <- paste( "acres",
paste( studyArea, ".tif", sep=""),
sep="_")
if( overwriteRasters) {
acres <- area( cusaMask) *247.105381
acres <- writeRaster( acres,
filename= acresFile,
overwrite= TRUE)
} else acres <- raster( acresFile)
agland <- stack( list.files( paste( dataPath, "agland", sep="/"),
patt= "(cropland|pasture).tif$",
full.names= TRUE))
layerNames(agland) <- c("crop", "open")
agland <- setMinMax( agland)
aglandCrop <- unstack( agland)[[ 1]]
if( studyArea == "thumb") {
agland <- crop( agland, thumbExtent)
}
agg05 <-
brick( list.files( dataPath,
patt= paste( studyArea, "_Amin_0.5_agg.tif", sep=""),
full.names= TRUE))
layerNames( agg05) <- names( peelClasses)
nomos05 <-
brick( list.files( dataPath,
patt= paste( studyArea, "_Amin_0.5_nomosaic.tif", sep=""),
full.names= TRUE))
layerNames( nomos05) <- c( names( peelClasses)[ -8], "total")
agg1 <-
brick( list.files( dataPath,
patt= paste( studyArea, "1_agg.tif", sep=""),
full.names= TRUE))
layerNames( agg1) <- names( peelClasses)
nomos1 <-
brick( list.files( dataPath,
patt= paste( studyArea, "1_Amin_1_nomosaic.tif", sep=""),
full.names= TRUE))
layerNames( nomos1) <- c( names( peelClasses)[ -8], "total")
nlcd <-
brick( sapply( names( peelClasses),
function( cover) {
if( cover == "mosaic") {
zeroes
} else {
fn <-
list.files( paste( dataPath, "nlcd",
sep = "/"),
patt= paste( "nlcd", cover, "5min.tif$",
sep = "_"),
full.names= TRUE)
crop( raster( fn), cusaMask)
}}))
nlcd <- writeRaster( nlcd,
filename= paste( path.expand(rasterWd),
"nlcd.tif",
sep= "/"),
overwrite= TRUE)
layerNames( nlcd) <- names( peelClasses)
rasterNames <- c( "agland", "nlcd", "agg05", "agg1", "nomos05", "nomos1")
dataSets <- sapply( rasterNames, function( n) eval( parse( text=n)))
areas <- llply( dataSets,
function( d) {
res <- cellStats( d *acres, sum)
names( res) <- layerNames( d)
res
})
areasDf <-
ldply( areas, function( a) {
melt( t( as.data.frame( a)))
})
areasDf <-
areasDf[, c( 1, 3, 4)]
colnames( areasDf) <-
c( "map", "class", "acres")
areasDf$map <-
factor( areasDf$map,
levels= rasterNames)
legendOrder <- rev( c( 6, 4, 2, 3, 9, 7, 5, 1, 8))
areasDf$class <-
factor( areasDf$class,
levels= c( names( peelLegend)[ rev( legendOrder)], "total"))
if( overwriteFigures) areasPlot <-
qplot( map, acres /10^6,
data= subset(areasDf, class != "total"),
geom="bar", position= "stack",
fill= class,
stat="summary", fun.y="sum") +
scale_fill_manual( "",
values= peelLegend[ legendOrder],
##peelLegend[ levels( areasDf$class)[1:9]],
breaks= names( peelLegend)[ legendOrder]) +
scale_y_continuous( "M acres",
limits= c(0,2000)) +
theme_bw( base_family= "serif") +
scale_x_discrete( "",
limits= rasterNames[ c( 1, 2, 4, 3, 6, 5)],
breaks= rasterNames[ c( 1, 2, 4, 3, 6, 5)],
labels= expression("Agland2000", "NLCD",
atop( atop( textstyle( "MLCT"),
textstyle( A[ min] ==1.0)),
phantom(0)),
atop( atop( textstyle( "MLCT"),
textstyle( A[ min] ==0.5)),
phantom(0)),
atop( atop( textstyle( "MLCT"),
textstyle( A[ min] ==1.0)),
"No Mosaic"),
atop( atop( textstyle( "MLCT"),
textstyle( A[ min] ==0.5)),
"No Mosaic")))
areasCt <- cast( areasDf, class ~ map,
value= "acres",
subset= class != "total",
sum,
margins="grand_row")[, -1]
rownames( areasCt) <- levels( areasDf$class)
@ %def
In this chapter we will describe a procedure for combining information
from the data sets described in \autoref{cha:datasets} using the same
sub-pixel analysis data structure at $5'$ resolution for the
conterminous USA9 (cUSA) to produce a data set that exhibits high
accuracy in the distribution of agricultural production according to
the Agland2000 data set introduced in \autoref{sec:agland2000}, that
provides a realistic characterization of other uses and covers as
suggested by MLCT data from \autoref{sec:mlct} and particular aspects
of the NLCD from \autoref{sec:nlcd}.
In this chapter we will evaluate the progress of the analysis in terms
of areas given both in millions of acres (Ma) and millions of hectare
(Mha). It is important to note that these areas cannot be computed
directly from the geographic grid in which the data is contained and
our maps are rendered. Because these $5'$ grid cells are actually
sections of a spheroid projected onto a plane, the areas that a given
cell encompasses is not constant. A first-order approximation of
these areas can be obtained as a function of the earth's mean radius
and the cosine of a cell's latitude. In this way areas are maximum at
the equator and approach zero as position approaches the poles.
Conveniently Hijmans' \texttt{raster} package for the \texttt{R}
analysis software provides a function \texttt{area()} that accepts any
raster data set in geographical coordinates (longitude, latitude) as
an input, producing a new raster data set whose values are the areas
of the former in km$^{\textrm{2}}$. It is a simple matter to convert these to
acres by subsequently scaling that result by a constant. See the
source code in the appendix for further details.
We start by tabulating the aggregate areas by PEEL class for the data
sets that we are using as inputs, MLCT, NLCD, and Agland2000. We
evaluate the accuracy of cropland distribution in the MLCT data as a
function of two values of the $A_{min}$ parameter, which is defined as
the minimum fraction of an MLCT pixel at its native resolution
assigned to the primary class prior to aggregation to the $5'$ PEEL
model analysis grid. $A_{min}=1.0$ represents consideration of only
the primary class. $A_{min}=0.5$ indicates that in the hypothetical
situation of zero classification confidence the primary class would be
assigned half of the area of that particular MLCT pixel, therefore
this value represents maximum incorporation of the secondary class in
our analytical framework, as modulated by the MLCT classification
confidence data. These intermediate results are compared on the basis
of root mean squared error (RMSE) metrics calculated relative to the
distribution of cropland given in the Agland2000 data at the end of
\autoref{sec:comparison}.
In \autoref{sec:nlcd_offsets} we describe a method for selectively
incorporating cover fractions for particular classes from the NLCD
data set due to a perceived underestimation of those classes by MLCT
due primarily to its lower resolution. Those classes are water,
wetland, and urban. In the PEEL classification the ``urban'' class is
broadened to include rural infrastructure that MLCT effectively counts
as cropland. Accepting NLCD's quantification of these classes as
truth is intended to counteract a perceived overestimation of cropland
area in MLCT caused in part by a discrepancy in formulation of these
data sets, Agland2000 representing actual harvested areas and MLCT
catching up lots of ancillary land that may be associated with
cultivated land but is not directly involved in crop production. The
result is an adjusted version of the MLCT data as amended by these
NLCD offsets.
Section~\ref{sec:fusion} presents the results of fusing our adjusted MLCT
map with the Agland2000 data by accepting the Agland2000 value for
cropland as truth where possible and scaling other classes
proportionally to describe the remainder of the landscape for
purposes of the PEEL model. This operation is constrained by our
decision to retain the NLCD offsets as firm figures for those classes
in order to account for varying degrees of infrastructure development
represented by the so-called urban class and water or wetland features
not resolved in the MLCT data. Where Agland2000 conflicts with this
constraint the cropland fraction is reduced accordingly.
Finally \autoref{sec:peel} shows how information from the
\texttt{175Crops2000} data set is used to disaggregate the cropland
given by the result of the previous step in order to provide a rough
characterization of the distribution of production of major crop
commodities, corn (maize), soybean, wheat, rice, sugarcane, other
cereals, and other field crops. This is important for the PEEL model
because the intent is to model transitions in production in response
to forecast commodity prices in addition to other drivers.
Through the offset and fusion steps we use decreasing RMSE figures to
show that our complete characterization of the landscape is improving
in accuracy with respect to \texttt{Agland2000}, the census-based
distribution of productive cropland so that when other cover classes
are scaled the distortions are minimized.
\section{Comparison of Aggregate Areas}
\label{sec:comparison}
@
<<tab_areas, results=tex, eval=TRUE>>=
local({
colnames( areasCt) <- c( "Agland2000", "NLCD",
"\\pbox[c][][c]{3in}{Aggregated\\\\$A_{min}=0.5$}",
"\\pbox[c][][c]{3in}{Aggregated\\\\$A_{min}=1.0$}",
"\\pbox[c][][c]{3in}{No Mosaic\\\\$A_{min}=0.5$}",
"\\smallskip\\pbox[c][][c]{3in}{No Mosaic\\\\$A_{min}=1.0$}")
print( xtable( areasCt / 10^6,
caption= "Total Acreages by Map and Cover",
label= "tab:areas",
digits= 1),
add.to.row= list(
pos= list( 0, nrow( areasCt)),
command= rep("\\noalign{\\smallskip}", times= 2)),
size= "small",
sanitize.colnames.function= function(x) x)
})
@ %def
\begin{figure}[ht]
\centering
<<fig_areas>>=
if( overwriteFigures) {
setwd( texWd)
my.ggsave( texWd, "fig_areas.pdf",
device= pdf,
plot= areasPlot,
width= 6,
height=6)
}
@
\includegraphics{fig_areas}
\caption{Total Acreages by Map and Cover}
\label{fig:areas}
\end{figure}
After decomposing the mosaic class MLCT indicates
\Sexpr{printAreas(areasCt["crop","nomos05"])} of cropland for
$A_{min}=0.5$ and \Sexpr{printAreas(areasCt["crop","nomos1"])} for
$A_{min}=1.0$ in the cUSA in 2001. Aglands2000 indicates roughly
\Sexpr{printAreas(areasCt["crop","agland"])} of cropland. The
inability of the MLCT data set to resolve rural transportation
networks, minor settlements, and small water or wetland features is a
major contribution to the surplus of cropland acreage indicated by the
MLCT. Due to its greater resolution, ~30m vs. ~500m, the NLCD is
better suited at discerning developed areas in rural landscapes
ranging from rural roads to farmsteads to small communities that do
not show up in the MLCT data. There is a total area of roughly
\Sexpr{printAreas(areasCt["urban","nlcd"]-areasCt["urban","agg05"])}
of development remaining after subtracting the MLCT urban class from
all developed classes in the NLCD after they have both been aggregated
to the $5'$ grid. Applying this area as an offset to the cropland
area in Aglands2000 brings us closer to the expected acreage under
cultivation in 2001, although this assumes that all of that
development intersects with MLCT cropland area.
The purpose for processing the MLCT for two values of $A_{min}$ as
described in \autoref{cha:datasets} was to evaluate whether or not
information from the secondary cover type contributes positively to
the accuracy of the data set we seek to synthesize. The primary
objective of this synthesis is to achieve accuracy in cropland
distribution. Because the cropland layer in the Agland2000 data set
is derived from county-level production census statistics we adopt
this as the ground truth and will endeavor to adjust our product
accordingly. Although MLCT overstates cropland acreage for both
$A_{min}=0.5$ and $A_{min}=1.0$ the discrimination among the two is made
by the distribution of errors rather than the aggregate error.
<<nomosDiff>>=
if( overwriteFigures) {
nomosDiff1 <- getPeelBand( nomos1, "crop") -aglandCrop
layerNames( nomosDiff1) <- "crop"
nomosDiffPlot1 <- coverMaps( nomosDiff1, classes= "crop", samp= 0.2) +
scale_fill_gradientn( "diff", colours= rev( brewer.pal( 11, "BrBG")),
limits= c( 1, -1),
breaks= seq( 1, -1, by= -0.2))
nomosDiff05 <- getPeelBand( nomos05, "crop") -aglandCrop
layerNames( nomosDiff05) <- "crop"
nomosDiffPlot05 <- coverMaps( nomosDiff05, classes= "crop", samp= 0.2) +
scale_fill_gradientn( "diff", colours= rev( brewer.pal( 11, "BrBG")),
limits= c( 1, -1),
breaks= seq( 1, -1, by= -0.2))
}
@
\begin{figure}[ht]
\centering
<<fig_nomosDiff1>>=
if( overwriteFigures) {
my.ggsave( texWd, "fig_nomosDiff1.png", plot= nomosDiffPlot1)
}
@
\includegraphics{fig_nomosDiff1}
\caption{Difference between MLCT (no mosaic, $A_{min}=1.0$) and Agland2000 crop}
\label{fig:nomosDiff1}
\end{figure}
\begin{figure}[ht]
\centering
<<fig_nomosDiff05>>=
if( overwriteFigures) {
my.ggsave( texWd, "fig_nomosDiff05.png", plot= nomosDiffPlot05)
}
@
\includegraphics{fig_nomosDiff05}
\caption{Difference between MLCT (no mosaic, $A_{min}=0.5$) and Agland2000 crop}
\label{fig:nomosDiff05}
\end{figure}
\autoref{fig:nomosDiff1} and \autoref{fig:nomosDiff05} show the
cell-by-cell differences between the MLCT-derived data set that we
have calculated after mosaic decomposition and the Agland2000 cropland
map. To summarize and compare these errors we calculate the root of
the mean squared error (RMSE) given by:
$$
\operatorname{RMSE}=\sqrt{\frac{\sum_{i=1}^{n}(\hat\theta_i-\theta_i )^2}{n}}
$$
where $\hat\theta_i$ are the predictions derived from the respective
MLCT derivations and $\theta_i$ are the observations taken from the
Agland2000 data set.
@
<<rmse>>=
rmseDf <- ldply( list("nomos05", "nomos1"),
function( brickName) {
rmseRast( getPeelBand( get( brickName), "crop"),
aglandCrop)
})
rmseDf <- cbind( c( 0.5, 1.0), rmseDf)
colnames( rmseDf) <- c( "$A_{min}$", "RMSE")
cropScatDf <-
data.frame( as( stack( getPeelBand( nomos05, "crop"),
getPeelBand( nomos1, "crop"),
aglandCrop,
raster::mask(acres, cusaMask)),
"SpatialGridDataFrame"))
colnames(cropScatDf) <-
c( "nomos05", "nomos1", "agland", "acres", "lon", "lat")
cropScatDf$weight <- with( cropScatDf, acres/ max(acres))
if( overwriteFigures) hexPlot1 <-
ggplot( data= cropScatDf,
aes( agland, nomos1)) +
stat_binhex( binwidth= c( 0.025, 0.025)) +
scale_fill_gradientn( colours= brewer.pal( 6, "YlGn"),
trans= "log10",
limits=c( 10, 10000)) +
geom_abline( alpha=0.4) +
scale_x_continuous( "Agland2000",
expand= c( 0,0.0125)) +
scale_y_continuous( expression( paste("MLCT, ", A[min] == 1.0)),
expand= c( 0,0.0125)) +
theme_bw( base_family= "serif") +
coord_equal() +
opts( panel.grid.minor= theme_blank(),
panel.grid.major= theme_blank(),
panel.background= theme_blank())
@
\begin{figure}[ht]
\centering
<<fig_hexplot1>>=
if( overwriteFigures) {
my.ggsave( texWd, "fig_hexPlot1.pdf",
dev= pdf,
plot= hexPlot1)
## width= 4.5,
## height= 4.5)
}
@
\includegraphics{fig_hexPlot1}
\caption{Hexbin plot of MLCT crop ($A_{min}=1.0$, no mosaic) versus Agland2000 cropland}
\label{fig:hexplot1}
\end{figure}
To examine the relationships between the distributions of cropland
that we derive from the MLCT data relative to the Agland2000 data we
will use ``hexbin'' plots which are essentially two-dimensional
histograms that show the number of grid cells that occur within
discrete regions of the space defined by coordinates that are cropland
fractions for the two data sets. This operates much like a common
scatter plot but for data sets with as many observations as we wish to
include it gives a cleaner representation of that structure. For our
plots we have chosen to employ a logarithmic scale because of the wide
range of counts calculated for the bins. This gives a more complete
picture of the overall dispersion and local concentration of the
observations. Our first example of such a plot is
\autoref{fig:hexplot1} which plots the crop fractions of MLCT with
$A_{min}=1$ versus those of the Agland2000 crop map. As one would
expect there is an overall correlation among these variables,
especially given that Agland2000 provides prior probabilities to the
MLCT classification. It is clear that the MLCT primary class
exhibits a positive bias overall, although a subset that is negatively
biased is also apparent for low values of the Agland2000 crop fraction
in the interval $[0.1,0.5]$. Also of particular note is the drastic
decrease in correlation when Aglands2000 reaches 1.0 relative to the
stronger relationship over the interval $[0.8,1.0)$. It is difficult
to speculate on the nature of this structure, but suffice it to say
that there is something peculiar about the Agland2000 allocation
procedure that drives the crop fraction to its maximum in areas where
the remote sensing data clearly resists such a characterization. This
may be caused by systematic errors in the agricultural census data
that drive the Agland2000 algorithm forcing unrealistically high
concentrations in order to satisfy the algorithm's constraints.
\begin{figure}[ht]
\centering
<<fig_hexplot05>>=
if( overwriteFigures) {
my.ggsave( texWd, "fig_hexPlot05.pdf",
device= pdf,
plot= hexPlot1 +
aes(agland, nomos05) +
scale_y_continuous( expression(paste("MLCT, ", A[min] == 0.5)),
limits= c( 0, 1),
breaks= seq( 0, 1, by= 0.2),
expand= c( 0,0.0125)))
}
@
\includegraphics{fig_hexPlot05}
\caption{Hexbin plot of MLCT crop ($A_{min}=0.5$, no mosaic) versus Agland2000 cropland}
\label{fig:hexplot05}
\end{figure}
@
<<table_rmse, results=tex, eval=TRUE>>=
print( xtable( rmseDf,
caption= "RMSE, MLCT vs. Agland2000 crop",
label= "tab:rmse",
digits= c( 0, 1, 3)),
include.rownames= FALSE,
sanitize.colnames.function= function(x) x)
@ %def
\todo{Consider also calculating statistical bias (average error)?}
We expect that setting $A_{min}=1$ will produce a maximum overall bias
and attendant error by assigning entire pixels to the cropland class
and not allowing for the possibility of mixed covers. The results on
\autoref{tab:rmse} indicate that $A_{min}=0.5$ is more representative
of the distribution of cropland because although the total area
indicated is higher according to \autoref{tab:areas}, there is less
error on a cell-by-cell basis indicating that it does a better job of
representing the spatial distribution than $A_{min}=1.0$. This is
reflected in the structure revealed by \autoref{fig:hexplot05} where
fewer cells in the MLCT data are set at 100\% crop because of
including the secondary class in calculating $5'$ coverage fractions.
Where crop was included in a secondary class it also caused cells of
near-zero value for MLCT to lift away from the x-axis. The
uncorrelated observations for Agland2000 equal to 1.0 are still
present, however. This result is adequate for our purposes to
determine that our logic in considering the secondary class in the
manner we have for $A_{min}=0.5$ is correct. From this point forward
we will consider only the statistics derived from setting
$A_{min}=0.5$ for the aggregation of the MLCT data due to this
improved fit with Agland2000 cropland and its full consideration of
all information imparted by the MLCT data.
\section{NLCD Offsets}
\label{sec:nlcd_offsets}
From \autoref{tab:areas} it is apparent that the MLCT results are
negatively biased in the total areas assigned to water, wetland, and
urban features relative to the NLCD. It is clear from visual
inspection that features of these classes tend to have smaller
characteristic dimensions which causes them to be overlooked in the
MLCT data due to its resolution. The most obvious example is the
rural transportation networks in areas surveyed under the Public Land
Survey System (PLSS) where roads have been laid out on a generally
regular grid of square miles. In the PEEL classification this
infrastructure is included in the urban class as another form of
developed land, perhaps making ``urban'' somewhat of a misnomer, but
it hails to its origins in the IGBP classification scheme and provides
a short label, a great convenience in programming. It is important to
represent wetlands and water features in our input to the PEEL model
because these areas have high likelihoods of being set aside for
conservation purposes, which would be represented as a constraint on
land conversion in the model. In the event that NLCD overestimates
these areas it would be an acceptable error to carry over to the PEEL
model in order to be conservative in allowing for conservation
measures in a greater number of grid cells, absent more precise LULC
data with respect to the water and wetland classes.
To merge this information from the NLCD we begin by simply accepting
the areas for water, wetland, and urban classes in the reclassified,
$5'$-aggregated version of NLCD that we have computed as truth and
calculate offsets for those classes versus our $5'$ MLCT data by
straight subtraction. Where NLCD is greater the difference will be
positive and so a positive offset will be added to the fraction
already present for any one of the ``truth'' classes from NLCD. The
other classes are then adjusted so that they are present in proportion
to each other as indicated by MLCT but in the area remaining after
accepting the water, wetland, and urban areas from NLCD. The additive
offsets needed to achieve this balance and account for the entire area
of the cell are calculated so that the effects of this process on all
classes may be considered on a common basis.
For the calculation of the offsets we drop back to the result of
aggregating the MLCT data to $5'$ with $A_{min}=0.5$ prior to mosaic
decomposition. Presumably there are rural roads comprised of 30m NLCD
pixels cutting through the lower-resolution MLCT pixels including
those classified as mosaic. In fact, by its very nature as a hybrid
class made up of natural cover and agricultural land use we expect
roads to be an important component of the landscape.
\autoref{fig:offsets1} and \autoref{fig:offsets2} show the spatial
distributions of the offsets calculated based on our assumptions about
the water, wetland, and urban classes in the NLCD. We have verified
that these offsets sum to zero for each grid cell. Any area deducted
from one class must be added to one or more classes in the same cell
in order to conserve the total area and maintain the sum of the
fractions at 1.0.
@
<<offsets_calc>>=
nlcdKeep <- stack( llply( names( peelClasses), function( class) {
if( class %in% c( "water", "wetland", "urban"))
ones else zeroes
}))
nlcdIgnore <- stack( llply( names( peelClasses), function( class) {
if( class %in% c( "water", "wetland", "urban"))
zeroes else ones
}))
nlcdKeepOffsets <-
(nlcd -agg05) *nlcdKeep
mlctKeep <- agg05 *nlcdIgnore
nlcdIgnoreOffsets <-
overlay( mlctKeep, sum( mlctKeep), sum( nlcdKeepOffsets),
fun= function( mk, smk, snko) {
ifelse( mk == 0 & smk ==0,
0,
-1 *mk /smk *snko)
})
nlcdOffsets <- nlcdKeepOffsets +nlcdIgnoreOffsets
nlcdOffsets <-
writeRaster( nlcdOffsets,
filename= paste( rasterWd, "nlcdOffsets.tif", sep= "/"),
overwrite= TRUE)
nlcdOffsets <- stack( nlcdOffsets, sum( nlcdOffsets))
layerNames( nlcdOffsets) <- c( names( peelClasses), "total")
thumbNlcdOffsets <- crop( nlcdOffsets, thumbExtent)
offsetsMap1 <- coverDiffMaps( nlcdOffsets, samp= 0.4,
classes= layerNames( nlcdOffsets)[ 1:5]) +
coord_equal()
offsetsMap2 <- coverDiffMaps( nlcdOffsets, samp= 0.4,
classes= layerNames( nlcdOffsets)[ 6:10]) +
coord_equal()
thumbOffsetsMap <-
coverDiffMaps( thumbNlcdOffsets,
classes= layerNames( thumbNlcdOffsets)[-10]) +
facet_wrap( ~variable)
@
\begin{figure}[h]
\centering
@
<<fig_offsetsmap1>>=
if( overwriteFigures) {
my.ggsave( texWd, "fig_offsets1.png", plot= offsetsMap1, height= 7)
}
@ %def
\includegraphics{fig_offsets1}
\caption{NLCD offsets}
\label{fig:offsets1}
\end{figure}
\begin{figure}[h]
\centering
@
<<fig_offsets2>>=
if( overwriteFigures) {
my.ggsave( texWd, "fig_offsets2.png", plot= offsetsMap2, height= 7)
}
@ %def
\includegraphics{fig_offsets2}
\caption{NLCD offsets (cont.)}
\label{fig:offsets2}
\end{figure}
The maps of these offsets are shown using a logarithmic scale in
order to bring attention both to areas of significant adjustment,
greater than 10\%, as well as to show the extent to which small
adjustments on the order of 1--5\% occur. From these maps we can see
the detailed structure of drainage networks in the water class and
population centers in the urban class which could easily be confused
with the vegetative classes in the MLCT classification. This refers,
for example, to heavily wooded suburbs where transportation
infrastructure is obscured and difficult to resolve. The offsets for
the NLCD truth classes are generally positive, although not strictly
so because the algorithm does not preclude the possibility that MLCT
may locally overestimate these classes in particular regions and still
suffer an aggregate deficit relative to NLCD.
@
<<cor_offsets>>=
corOffsets <- cor( data.frame(as( nlcdOffsets, "SpatialGridDataFrame"))[, 1:9],
use= "complete.obs")
colnames( corOffsets) <- names( peelClasses)
rownames( corOffsets) <- names( peelClasses)
corOffsetsPlot <-
ggplot( melt( corOffsets),
aes( x=X1, y=X2, fill= value)) +
geom_tile() +
theme_bw( base_family= "serif") +
opts( panel.grid.minor= theme_blank(),
panel.grid.major= theme_blank(),
panel.background= theme_blank(),
axis.title.x= theme_blank(),
axis.text.x= theme_text( angle= 90, hjust=1),
axis.title.y= theme_blank()) +
scale_x_discrete( limits= colnames( corOffsets)) +
scale_y_discrete( limits= colnames( corOffsets)) +
scale_fill_gradientn( "", colours= rev( brewer.pal( 11, "BrBG")),
limits= c( 1.0, -1.0),
breaks= seq( 1.0, -1.0, by= -0.2))
if( overwriteFigures) {
oldWd <- setwd( texWd)
ggsave( "fig_corOffsets.pdf",
device= pdf,
plot= corOffsetsPlot,
height= 4.5,
width= 4.5)
setwd( oldWd)
}
@ %def
\begin{figure}[ht]
\centering
\includegraphics{fig_corOffsets}
\caption{Correlation matrix of NLCD offsets}
\label{fig:corOffsets}
\end{figure}
\autoref{fig:corOffsets} shows the result of calculating a matrix of
correlations among the offsets calculated for each class based on
NLCD. Each cell in the matrix reflects the value of the statistical
correlation between the corresponding classes within the NLCD offset
maps resulting from the algorithm described above. This gives us an
overall sense of the effect of applying these offsets by showing which
changes are strongly correlated, whether it be positively or
negatively. It is a symmetric matrix because the classes on both axes
are from the same data set and any single classes is, of course,
perfectly correlated with itself. Going in we would expect to see
negative correlations between classes accepted as truth from NLCD,
water, wetland, and urban, and the other classes because the purpose
of applying these offsets was to bring the total areas of these
classes up, which can only happen at the expense of the other classes.
For example the wetland offsets show strong negative correlations with
forest, shrub, and mosaic. This stands to reason as a likely problem
with classification due to fundamental differences in the remote
sensing data such as resolution, the interpretation thereof, or
disagreement/overlap in class definitions. Many areas of forest
and shrub land can exhibit properties of a wetland when standing water
and high soil moisture are persistent. The ``NLCD truth'' classes'
offsets are positively correlated with one another because they are
generally positive everywhere. Likewise, non-truth classes are
positively correlated with one another because they are all being
assigned negative offsets to make room for the increased values of
water, wetland, and urban fractions. The crop and mosaic classes are
most strongly negatively correlated with urban which reflects the
widespread adjustments to account for rural transportation networks
and smaller settlements.
The resulting offsets are added to the aggregated fractions calculated
from the MLCT with $A_{min}=0.5$. The mosaic decomposition step is
readily applied to the adjusted data set because the adjusted
fractions are fundamentally in same form as the intermediate form of
the MLCT data calculated in \autoref{sec:decomposition}, only the
values have changed.
<<cusa_offset>>=
setwd( rasterWd)
# reload offsets to get rid
# of total layer
nlcdOffsets <- brick( paste( rasterWd, "nlcdOffsets.tif", sep="/"))
layerNames( nlcdOffsets) <- names( peelClasses)
mlctAdj <- list( Amin=0.5)
mlctAdj$agg <-
if( overwriteRasters) {
overlay( agg05, nlcdOffsets,
fun= sum,
filename= "agg05Adj.tif",
overwrite= TRUE)
} else brick( list.files( rasterWd,
patt= "agg05Adj.tif",
full.names= TRUE))
layerNames( mlctAdj$agg) <- names( peelClasses)
mlctAdj <- decomposeMosaic( mlctAdj, overwrite= overwriteRasters, progress= "text")
@
<<areas2>>=
# reuse area table code from above; better to implement a function?
rasterNames2 <- c( "agland", "nlcd", "agg05", "nomos05",
"nlcdOffsets", "mlctAdj$agg", "mlctAdj$nomos")
dataSets2 <- sapply( rasterNames2,
function( n) eval( parse( text=n)))
areas2 <- llply( dataSets2,
function( d) {
res <- cellStats( d *acres, sum)
names( res) <- layerNames( d)
res
})
areasDf2 <- ldply( areas2, function( a) {
melt( t( as.data.frame( a)))
})
areasDf2 <-
areasDf2[, c( 1, 3, 4)]
colnames( areasDf2) <-
c( "map", "class", "acres")
areasDf2 <-
transform( areasDf2,
class= factor( class,
levels= c( names( peelLegend)[ rev( legendOrder)], "total")),
## c("crop", "open",
## names( peelClasses)[-c(4,6,8)],
## "mosaic", "total")),
map= factor( map,
levels= rasterNames2))
areasCt2 <- cast( areasDf2,
class ~ map,
subset= class != "total",
value= "acres",
sum,
margins="grand_row")
rownames( areasCt2) <- areasCt2[, "class"]
areasCt2 <- areasCt2[, -1]
areasCt2 <- areasCt2[ c( names( peelClasses), "(all)"), rasterNames2]
@
<<restack_check>>=
## check that everything balances
## output of decomposeMosaic is not brick()ed properly
## in the sense that the layer set is incomplete
## and out of order
restack <- function( peelBrick) {
u <- unstack( peelBrick)
names( u) <- layerNames( peelBrick)
r <- do.call( stack,
llply( names( peelClasses),
function( cover) {
if( is.null( u[[ cover]]))
zeroes
else
u[[ cover]]
}))
layerNames( r) <- names( peelClasses)
r
}
# restack() takes any of the bricks/stacks from
# previous functions and rearranges the layers
# to match the PEEL classes, inserting layers of
# zeroes as needed
restackOverlay <- function( rasterList, fun) {
l <- llply( rasterList, restack)
names( l) <- NULL
do.call( overlay, c( l, fun=fun))
}
# restackOverlay() runs its arguments through restack()
# and applies a function to its outputs
@
@
<<table_restack_check, results=tex, eval=FALSE>>=
check <- restackOverlay( c( mlctAdj[ c("nomos", "delta")],
nlcdOffsets,
agg05),
function( n, d, o, a) n-d-o-a)
layerNames(check) <- names( peelClasses)
checkTable <-
xtable( cbind( class=peelClasses,
min=minValue( check),
max=maxValue(check)),
caption= "Balance of adjustment fractions and original MLCT aggregation",
label= "tab:restack_check")
digits( checkTable) <- c( 0, 0,-2,-2)
print( checkTable)
@ %def
To assess whether the process of adding in the NLCD offsets has
improved overall cropland accuracy we can perform the same error
calculation from above and extend Table~\ref{tab:rmse} with the new
result, giving us Table~\ref{tab:rmse2}.