-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_regression_models.py
166 lines (141 loc) · 6.67 KB
/
run_regression_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
"""Run carbon model on all the IPCC masks."""
import glob
import os
import logging
import multiprocessing
import numpy
from run_model import GLOBAL_BOUNDING_BOX_TUPLE
from ecoshard import taskgraph
from ecoshard import geoprocessing
BASE_FOREST_MASK_PATH = './output_global/forest_mask_esa.tif'
CARBON_MODEL_PATH = './models/hansen_model_2022_07_14.dat'
PREDICTOR_RASTER_DIR = './processed_rasters'
PRE_WARP_DIR = os.path.join(PREDICTOR_RASTER_DIR, f'pre_warped_{GLOBAL_BOUNDING_BOX_TUPLE[0]}')
LOGGER = logging.getLogger(__name__)
LOG_FORMAT = (
'%(asctime)s (%(relativeCreated)d) %(levelname)s %(name)s'
' [%(funcName)s:%(lineno)d] %(message)s')
logging.basicConfig(
level=logging.DEBUG,
format=LOG_FORMAT)
def sum_raster(raster_path):
"""Return the sum of non-nodata value pixels in ``raster_path``."""
running_sum = 0.0
nodata = geoprocessing.get_raster_info(raster_path)['nodata'][0]
for _, block_array in geoprocessing.iterblocks((raster_path, 1)):
if nodata is not None:
valid_array = block_array != nodata
else:
valid_array = slice(-1)
running_sum += numpy.sum(block_array[valid_array])
return running_sum
def sum_by_mask(raster_path, mask_path):
"""Return tuple of sum of non-nodata values in raster_path in and out of mask.
Returns:
(in sum, out sum)
"""
in_running_sum = 0.0
out_running_sum = 0.0
nodata = geoprocessing.get_raster_info(raster_path)['nodata'][0]
for ((_, block_array), (_, mask_array)) in \
zip(geoprocessing.iterblocks((raster_path, 1)),
geoprocessing.iterblocks((mask_path, 1))):
if nodata is not None:
valid_array = block_array != nodata
else:
valid_array = numpy.ones(block_array.shape, dtype=bool)
in_running_sum += numpy.sum(block_array[valid_array & (mask_array == 1)])
out_running_sum += numpy.sum(block_array[valid_array & (mask_array != 1)])
return (in_running_sum, out_running_sum)
def add_masks(mask_a_path, mask_b_path, target_path):
"""Combine two masks as a logical OR.
Args:
mask_a_path, mask_b_path (str): path to mask rasters with 1 indicating mask
target_path (str): path to combined mask raster.
Return:
None
"""
raster_info = geoprocessing.get_raster_info(mask_a_path)
nodata = raster_info['nodata'][0]
def _add_masks(mask_a_array, mask_b_array):
"""Combine a and b."""
valid_mask = (mask_a_array == 1) | (mask_b_array == 1)
return valid_mask
geoprocessing.raster_calculator(
[(mask_a_path, 1), (mask_b_path, 1)],
_add_masks, target_path, raster_info['datatype'], nodata)
def main():
"""Entry point."""
task_graph = taskgraph.TaskGraph('./output_global/regression_optimization', multiprocessing.cpu_count(), 15)
search_path = './output_global/regression_optimization/regressioncoarsened_marginal_value_regression_mask_*full_forest_mask.tif'
raster_sum_list = []
transient_run = False
for full_forest_mask_path in glob.glob(search_path):
if '_coarse_' in full_forest_mask_path:
continue
area_substring = full_forest_mask_path.split('_')[-4]
new_forest_mask_path = f'./output_global/regression_optimization/regressioncoarsened_marginal_value_regression_mask_{area_substring}_new_forest_mask.tif'
modeled_carbon_path = f'./output_global/regression_optimization/regressioncoarsened_marginal_value_regression_mask_{area_substring}_regression.tif'
count_full_forest_pixel_task = task_graph.add_task(
func=sum_raster,
args=(full_forest_mask_path,),
transient_run=transient_run,
task_name=f'sum raster of {full_forest_mask_path}',
store_result=True)
sum_in_out_forest_carbon_density_by_mask_task = task_graph.add_task(
func=sum_by_mask,
args=(modeled_carbon_path, new_forest_mask_path),
store_result=True,
transient_run=transient_run,
task_name=f'separate out old and new carbon for {modeled_carbon_path}')
# count number of new forest pixels
count_new_forest_pixel_task = task_graph.add_task(
func=sum_raster,
args=(new_forest_mask_path,),
transient_run=transient_run,
task_name=f'sum raster of {new_forest_mask_path}',
store_result=True)
raster_sum_list.append(
(os.path.basename(modeled_carbon_path),
count_full_forest_pixel_task,
count_new_forest_pixel_task,
sum_in_out_forest_carbon_density_by_mask_task))
task_graph.join()
raster_info = geoprocessing.get_raster_info(new_forest_mask_path)
with open('regression_optimization_regression_modeled_carbon.csv', 'w') as opt_table:
opt_table.write(
'file,'
'number of forest pixels,'
'number of old forest pixels,'
'number of new forest pixels,'
'sum of carbon density for all forest pixels,'
'sum of carbon density for old forest pixels,'
'sum of carbon density for new forest pixels,'
'carbon density per pixel for all forest,'
'carbon density per pixel in old forest,'
'carbon density per pixel in new forest,'
'carbon density per pixel in esa scenario,'
'area of pixel in m^2\n')
for path, count_full_forest_pixel_task, count_new_forest_pixel_task, sum_in_out_forest_carbon_density_by_mask_task in raster_sum_list:
LOGGER.debug(f'processing {path}')
new_carbon_density_sum = sum_in_out_forest_carbon_density_by_mask_task.get()[0]
old_carbon_density_sum = sum_in_out_forest_carbon_density_by_mask_task.get()[1]
all_forest_pixel_count = count_full_forest_pixel_task.get()
new_forest_pixel_count = count_new_forest_pixel_task.get()
old_forest_pixel_count = all_forest_pixel_count-new_forest_pixel_count
opt_table.write(
f'{path},'
f'{all_forest_pixel_count},'
f'{old_forest_pixel_count},'
f'{new_forest_pixel_count},'
f'{old_carbon_density_sum+new_carbon_density_sum},'
f'{old_carbon_density_sum},'
f'{new_carbon_density_sum},'
f'{(new_carbon_density_sum+old_carbon_density_sum)/(all_forest_pixel_count)},'
f'{old_carbon_density_sum/old_forest_pixel_count},'
f'{new_carbon_density_sum/new_forest_pixel_count},'
f'{abs(numpy.prod(raster_info["pixel_size"]))}\n')
task_graph.join()
task_graph.close()
if __name__ == '__main__':
main()