forked from osm2pgsql-dev/osm2pgsql
-
Notifications
You must be signed in to change notification settings - Fork 0
/
osmdata.cpp
443 lines (363 loc) · 13.4 KB
/
osmdata.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
#include <cstdio>
#include <functional>
#include <future>
#include <mutex>
#include <stdexcept>
#include <utility>
#include <vector>
#include <osmium/thread/pool.hpp>
#include "middle.hpp"
#include "node-ram-cache.hpp"
#include "osmdata.hpp"
#include "output.hpp"
osmdata_t::osmdata_t(std::shared_ptr<middle_t> mid_,
std::shared_ptr<output_t> const &out_,
std::shared_ptr<reprojection> proj)
: mid(mid_), projection(proj)
{
outs.push_back(out_);
}
osmdata_t::osmdata_t(std::shared_ptr<middle_t> mid_,
std::vector<std::shared_ptr<output_t> > const &outs_,
std::shared_ptr<reprojection> proj)
: mid(mid_), outs(outs_), projection(proj)
{
if (outs.empty()) {
throw std::runtime_error("Must have at least one output, but none have "
"been configured.");
}
}
osmdata_t::~osmdata_t()
{
}
int osmdata_t::node_add(osmium::Node const &node)
{
mid->nodes_set(node);
int status = 0;
for (auto &out : outs) {
status |= out->node_add(node);
}
return status;
}
int osmdata_t::way_add(osmium::Way *way)
{
mid->ways_set(*way);
int status = 0;
for (auto& out: outs) {
status |= out->way_add(way);
}
return status;
}
int osmdata_t::relation_add(osmium::Relation const &rel)
{
mid->relations_set(rel);
int status = 0;
for (auto& out: outs) {
status |= out->relation_add(rel);
}
return status;
}
int osmdata_t::node_modify(osmium::Node const &node)
{
slim_middle_t *slim = dynamic_cast<slim_middle_t *>(mid.get());
slim->nodes_delete(node.id());
slim->nodes_set(node);
int status = 0;
for (auto& out: outs) {
status |= out->node_modify(node);
}
slim->node_changed(node.id());
return status;
}
int osmdata_t::way_modify(osmium::Way *way)
{
slim_middle_t *slim = dynamic_cast<slim_middle_t *>(mid.get());
slim->ways_delete(way->id());
slim->ways_set(*way);
int status = 0;
for (auto& out: outs) {
status |= out->way_modify(way);
}
slim->way_changed(way->id());
return status;
}
int osmdata_t::relation_modify(osmium::Relation const &rel)
{
slim_middle_t *slim = dynamic_cast<slim_middle_t *>(mid.get());
slim->relations_delete(rel.id());
slim->relations_set(rel);
int status = 0;
for (auto& out: outs) {
status |= out->relation_modify(rel);
}
slim->relation_changed(rel.id());
return status;
}
int osmdata_t::node_delete(osmid_t id) {
slim_middle_t *slim = dynamic_cast<slim_middle_t *>(mid.get());
int status = 0;
for (auto& out: outs) {
status |= out->node_delete(id);
}
slim->nodes_delete(id);
return status;
}
int osmdata_t::way_delete(osmid_t id) {
slim_middle_t *slim = dynamic_cast<slim_middle_t *>(mid.get());
int status = 0;
for (auto& out: outs) {
status |= out->way_delete(id);
}
slim->ways_delete(id);
return status;
}
int osmdata_t::relation_delete(osmid_t id) {
slim_middle_t *slim = dynamic_cast<slim_middle_t *>(mid.get());
int status = 0;
for (auto& out: outs) {
status |= out->relation_delete(id);
}
slim->relations_delete(id);
return status;
}
void osmdata_t::start() {
for (auto& out: outs) {
out->start();
}
mid->start(outs[0]->get_options());
}
namespace {
//TODO: have the main thread using the main middle to query the middle for batches of ways (configurable number)
//and stuffing those into the work queue, so we have a single producer multi consumer threaded queue
//since the fetching from middle should be faster than the processing in each backend.
struct pending_threaded_processor : public middle_t::pending_processor {
typedef std::vector<std::shared_ptr<output_t>> output_vec_t;
typedef std::pair<std::shared_ptr<const middle_query_t>, output_vec_t> clone_t;
static void do_jobs(output_vec_t const& outputs, pending_queue_t& queue, size_t& ids_done, std::mutex& mutex, int append, bool ways) {
while (true) {
//get the job off the queue synchronously
pending_job_t job;
mutex.lock();
if(queue.empty()) {
mutex.unlock();
break;
}
else {
job = queue.top();
queue.pop();
}
mutex.unlock();
//process it
if(ways)
outputs.at(job.output_id)->pending_way(job.osm_id, append);
else
outputs.at(job.output_id)->pending_relation(job.osm_id, append);
mutex.lock();
++ids_done;
mutex.unlock();
}
}
//starts up count threads and works on the queue
pending_threaded_processor(std::shared_ptr<middle_query_t> mid,
const output_vec_t &outs, size_t thread_count,
int append)
//note that we cant hint to the stack how large it should be ahead of time
//we could use a different datastructure like a deque or vector but then
//the outputs the enqueue jobs would need the version check for the push(_back) method
: outs(outs),
ids_queued(0),
append(append),
queue(),
ids_done(0)
{
//clone all the things we need
clones.reserve(thread_count);
for (size_t i = 0; i < thread_count; ++i) {
//clone the middle
std::shared_ptr<const middle_query_t> mid_clone = mid->get_instance();
//clone the outs
output_vec_t out_clones;
for (const auto& out: outs) {
out_clones.push_back(out->clone(mid_clone.get()));
}
//keep the clones for a specific thread to use
clones.push_back(clone_t(mid_clone, out_clones));
}
}
~pending_threaded_processor() {}
void enqueue_ways(osmid_t id) {
for(size_t i = 0; i < outs.size(); ++i) {
outs[i]->enqueue_ways(queue, id, i, ids_queued);
}
}
//waits for the completion of all outstanding jobs
void process_ways() {
//reset the number we've done
ids_done = 0;
fprintf(stderr, "\nGoing over pending ways...\n");
fprintf(stderr, "\t%zu ways are pending\n", ids_queued);
fprintf(stderr, "\nUsing %zu helper-processes\n", clones.size());
time_t start = time(nullptr);
//make the threads and start them
std::vector<std::future<void>> workers;
for (size_t i = 0; i < clones.size(); ++i) {
workers.push_back(std::async(std::launch::async,
do_jobs, std::cref(clones[i].second),
std::ref(queue), std::ref(ids_done),
std::ref(mutex), append, true));
}
//TODO: print out partial progress
for (auto& w: workers) {
try {
w.get();
} catch (...) {
// drain the queue, so that the other workers finish
mutex.lock();
while (!queue.empty()) {
queue.pop();
}
mutex.unlock();
throw;
}
}
time_t finish = time(nullptr);
fprintf(stderr, "\rFinished processing %zu ways in %i s\n\n", ids_queued, (int)(finish - start));
if (finish - start > 0)
fprintf(stderr, "%zu Pending ways took %ds at a rate of %.2f/s\n", ids_queued, (int)(finish - start),
((double)ids_queued / (double)(finish - start)));
ids_queued = 0;
ids_done = 0;
//collect all the new rels that became pending from each
//output in each thread back to their respective main outputs
for (const auto& clone: clones) {
//for each clone/original output
for(output_vec_t::const_iterator original_output = outs.begin(), clone_output = clone.second.begin();
original_output != outs.end() && clone_output != clone.second.end(); ++original_output, ++clone_output) {
//done copying ways for now
clone_output->get()->commit();
//merge the pending from this threads copy of output back
original_output->get()->merge_pending_relations(clone_output->get());
}
}
}
void enqueue_relations(osmid_t id) {
for(size_t i = 0; i < outs.size(); ++i) {
outs[i]->enqueue_relations(queue, id, i, ids_queued);
}
}
void process_relations() {
//reset the number we've done
ids_done = 0;
fprintf(stderr, "\nGoing over pending relations...\n");
fprintf(stderr, "\t%zu relations are pending\n", ids_queued);
fprintf(stderr, "\nUsing %zu helper-processes\n", clones.size());
time_t start = time(nullptr);
//make the threads and start them
std::vector<std::future<void>> workers;
for (size_t i = 0; i < clones.size(); ++i) {
workers.push_back(std::async(std::launch::async,
do_jobs, std::cref(clones[i].second),
std::ref(queue), std::ref(ids_done),
std::ref(mutex), append, false));
}
for (auto& w: workers) {
try {
w.get();
} catch (...) {
// drain the queue, so the other worker finish immediately
mutex.lock();
while (!queue.empty()) {
queue.pop();
}
mutex.unlock();
throw;
}
}
time_t finish = time(nullptr);
fprintf(stderr, "\rFinished processing %zu relations in %i s\n\n", ids_queued, (int)(finish - start));
if (finish - start > 0)
fprintf(stderr, "%zu Pending relations took %ds at a rate of %.2f/s\n", ids_queued, (int)(finish - start),
((double)ids_queued / (double)(finish - start)));
ids_queued = 0;
ids_done = 0;
//collect all expiry tree informations together into one
for (const auto& clone: clones) {
//for each clone/original output
for(output_vec_t::const_iterator original_output = outs.begin(), clone_output = clone.second.begin();
original_output != outs.end() && clone_output != clone.second.end(); ++original_output, ++clone_output) {
//done copying rels for now
clone_output->get()->commit();
//merge the expire tree from this threads copy of output back
original_output->get()->merge_expire_trees(clone_output->get());
}
}
}
private:
//middle and output copies
std::vector<clone_t> clones;
output_vec_t outs; //would like to move ownership of outs to osmdata_t and middle passed to output_t instead of owned by it
//how many jobs do we have in the queue to start with
size_t ids_queued;
//appending to output that is already there (diff processing)
bool append;
//job queue
pending_queue_t queue;
//how many ids within the job have been processed
size_t ids_done;
//so the threads can manage some of the shared state
std::mutex mutex;
};
} // anonymous namespace
void osmdata_t::stop() {
/* Commit the transactions, so that multiple processes can
* access the data simultanious to process the rest in parallel
* as well as see the newly created tables.
*/
mid->commit();
for (auto& out: outs) {
//TODO: each of the outs can be in parallel
out->commit();
}
// should be the same for all outputs
const bool append = outs[0]->get_options()->append;
{
//threaded pending processing
pending_threaded_processor ptp(
mid, outs, outs[0]->get_options()->num_procs, append);
if (!outs.empty()) {
//This stage takes ways which were processed earlier, but might be
//involved in a multipolygon relation. They could also be ways that
//were modified in diff processing.
mid->iterate_ways(ptp);
//This is like pending ways, except there aren't pending relations
//on import, only on update.
//TODO: Can we skip this on import?
mid->iterate_relations(ptp);
}
}
// Clustering, index creation, and cleanup.
// All the intensive parts of this are long-running PostgreSQL commands
{
auto *opts = outs[0]->get_options();
osmium::thread::Pool pool(opts->parallel_indexing ? opts->num_procs : 1,
512);
if (opts->droptemp) {
// When dropping middle tables, make sure they are gone before
// indexing starts.
mid->stop(pool);
}
for (auto &out : outs) {
out->stop(&pool);
}
if (!opts->droptemp) {
// When keeping middle tables, there is quite a large index created
// which is better done after the output tables have been copied.
// Note that --disable-parallel-indexing needs to be used to really
// force the order.
mid->stop(pool);
}
// Waiting here for pool to execute all tasks.
// XXX If one of them has an error, all other will finish first,
// which may take a long time.
}
}