From 257055d812558f96bdd8ecfd42ab02750778e89d Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Fri, 9 Apr 2021 10:29:19 +0300 Subject: [PATCH] Metalevels in admin levels; country autodivision by metalevels and mwm size estimation --- .gitignore | 1 + web/app/auto_split.py | 95 +++---- web/app/borders_api.py | 58 +++- web/app/borders_api_utils.py | 24 +- web/app/countries_division.py | 502 ++++++++++++++++++--------------- web/app/countries_structure.py | 286 ++++++++++++++++--- web/app/simple_splitting.py | 186 ++++++++++++ web/app/static/borders.js | 46 ++- web/app/subregions.py | 131 +++++---- web/app/templates/index.html | 2 + 10 files changed, 947 insertions(+), 384 deletions(-) create mode 100644 web/app/simple_splitting.py diff --git a/.gitignore b/.gitignore index c266415..67a6a07 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ __pycache__ *.pyc .idea +.DS_Store nohup.out diff --git a/web/app/auto_split.py b/web/app/auto_split.py index 56d2adb..447cc4a 100644 --- a/web/app/auto_split.py +++ b/web/app/auto_split.py @@ -6,18 +6,19 @@ OSM_TABLE as osm_table, MWM_SIZE_THRESHOLD, ) -from subregions import get_subregions_info +from subregions import ( + get_regions_info, + get_subregions_info, +) class DisjointClusterUnion: """Disjoint set union implementation for administrative subregions.""" - def __init__(self, region_id, subregions, next_level, mwm_size_thr=None): + def __init__(self, subregions, mwm_size_thr=None): assert all(s_data['mwm_size_est'] is not None for s_data in subregions.values()) - self.region_id = region_id self.subregions = subregions - self.next_level = next_level self.mwm_size_thr = mwm_size_thr or MWM_SIZE_THRESHOLD self.representatives = {sub_id: sub_id for sub_id in subregions} # A cluster is one or more subregions with common borders @@ -33,6 +34,22 @@ def __init__(self, region_id, subregions, next_level, mwm_size_thr=None): 'finished': False, # True if the cluster cannot be merged with another } + def try_collapse_into_one(self): + sum_mwm_size_est = sum(s_data['mwm_size_est'] + for s_data in self.subregions.values()) + if sum_mwm_size_est <= self.mwm_size_thr: + a_subregion_id = next(iter(self.subregions)) + self.clusters = {} + self.clusters[a_subregion_id] = { + 'representative': a_subregion_id, + 'subregion_ids': list(self.subregions.keys()), + 'mwm_size_est': sum_mwm_size_est, + 'finished': True + } + return True + else: + return False + def get_smallest_cluster(self): """Find minimal cluster.""" smallest_cluster_id = min( @@ -143,15 +160,14 @@ def calculate_common_border_matrix(conn, subregion_ids): return common_border_matrix -def find_golden_splitting(conn, border_id, next_level, mwm_size_thr): - subregions = get_subregions_info(conn, border_id, osm_table, - next_level, need_cities=True) - if not subregions: - return - if any(s_data['mwm_size_est'] is None for s_data in subregions.values()): +def combine_into_clusters(conn, regions, mwm_size_thr): + """Merge regions into clusters up to mwm_size_thr""" + + if any(s_data['mwm_size_est'] is None for s_data in regions.values()): return - dcu = DisjointClusterUnion(border_id, subregions, next_level, mwm_size_thr) + dcu = DisjointClusterUnion(regions, mwm_size_thr) + all_subregion_ids = dcu.get_all_subregion_ids() common_border_matrix = calculate_common_border_matrix(conn, all_subregion_ids) @@ -172,57 +188,40 @@ def find_golden_splitting(conn, border_id, next_level, mwm_size_thr): return dcu -def get_union_sql(subregion_ids): - assert(len(subregion_ids) > 0) - if len(subregion_ids) == 1: - return f""" - SELECT way FROM {osm_table} WHERE osm_id={subregion_ids[0]} - """ - else: - return f""" - SELECT ST_Union( - ({get_union_sql(subregion_ids[0:1])}), - ({get_union_sql(subregion_ids[1:])}) - ) - """ +def split_region_at_admin_level(conn, region_id, next_level, mwm_size_thr): + subregions = get_subregions_info(conn, region_id, osm_table, next_level) + if not subregions: + return + dcu = combine_into_clusters(conn, subregions, mwm_size_thr) + save_splitting_to_db(conn, region_id, next_level, dcu) -def save_splitting_to_db(conn, dcu: DisjointClusterUnion): +def save_splitting_to_db(conn, region_id, next_level, dcu: DisjointClusterUnion): with conn.cursor() as cursor: # Remove previous splitting of the region cursor.execute(f""" DELETE FROM {autosplit_table} - WHERE osm_border_id = {dcu.region_id} + WHERE osm_border_id = {region_id} AND mwm_size_thr = {dcu.mwm_size_thr} - AND next_level = {dcu.next_level} + AND next_level = {next_level} """) - for cluster_id, data in dcu.clusters.items(): - subregion_ids = data['subregion_ids'] - subregion_ids_array_str = ( - '{' + ','.join(str(x) for x in subregion_ids) + '}' - ) - cluster_geometry_sql = get_union_sql(subregion_ids) + for cluster_id, cluster_data in dcu.clusters.items(): + subregion_ids = cluster_data['subregion_ids'] + subregion_ids_str = ','.join(str(x) for x in subregion_ids) + subregion_ids_array_str = '{' + subregion_ids_str + '}' cursor.execute(f""" INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom, next_level, mwm_size_thr, mwm_size_est) VALUES ( - {dcu.region_id}, + {region_id}, '{subregion_ids_array_str}', - ({cluster_geometry_sql}), - {dcu.next_level}, + ( + SELECT ST_Union(way) FROM {osm_table} + WHERE osm_id IN ({subregion_ids_str}) + ), + {next_level}, {dcu.mwm_size_thr}, - {data['mwm_size_est']} + {cluster_data['mwm_size_est']} ) """) conn.commit() - - -def split_region(conn, region_id, next_level, mwm_size_thr): - dcu = find_golden_splitting(conn, region_id, next_level, mwm_size_thr) - if dcu is None: - return - save_splitting_to_db(conn, dcu) - - ## May need to debug - #from auto_split_debug import save_splitting_to_file - #save_splitting_to_file(conn, dcu) diff --git a/web/app/borders_api.py b/web/app/borders_api.py index cd04e34..047ae64 100755 --- a/web/app/borders_api.py +++ b/web/app/borders_api.py @@ -20,6 +20,7 @@ import config from borders_api_utils import * from countries_structure import ( + auto_divide_country, CountryStructureException, create_countries_initial_structure, ) @@ -28,6 +29,7 @@ borders_to_xml, lines_to_xml, ) +from simple_splitting import simple_split from subregions import ( get_child_region_ids, get_parent_region_id, @@ -233,15 +235,22 @@ def prepare_sql_search_string(string): @app.route('/search') def search(): query = request.args.get('q') - sql_search_string = prepare_sql_search_string(query) + # query may contain region id or a part of its name + try: + region_id = int(query) + search_value = region_id + is_id = True + except ValueError: + search_value = prepare_sql_search_string(query) + is_id = False with g.conn.cursor() as cursor: cursor.execute(f""" SELECT ST_XMin(geom), ST_YMin(geom), ST_XMax(geom), ST_YMax(geom) FROM {config.BORDERS_TABLE} - WHERE name ILIKE %s + WHERE {'id =' if is_id else 'name ILIKE'} %s ORDER BY (ST_Area(geography(geom))) - LIMIT 1""", (sql_search_string,) + LIMIT 1""", (search_value,) ) if cursor.rowcount > 0: rec = cursor.fetchone() @@ -249,6 +258,38 @@ def search(): return jsonify(status='not found') +@app.route('/simple_split') +@check_write_access +@validate_args_types(id=int) +def simple_split_endpoint(): + """Split into 2/4 parts with straight lines""" + region_id = int(request.args.get('id')) + with g.conn.cursor() as cursor: + cursor.execute(f""" + SELECT name, mwm_size_est + FROM {config.BORDERS_TABLE} + WHERE id = %s""", (region_id,)) + if cursor.rowcount == 0: + return jsonify(status=f"Region {region_id} not found") + name, mwm_size_est = cursor.fetchone() + if mwm_size_est is None: + mwm_size_est = update_border_mwm_size_estimation(g.conn, region_id) + if mwm_size_est is not None: + return jsonify(status='MWM size estimation was updated') + else: + return jsonify(status="Cannot esitmate region mwm size") + region = { + 'id': region_id, + 'name': name, + 'mwm_size_est': mwm_size_est, + } + + if simple_split(g.conn, region): + g.conn.commit() + return jsonify(status='ok') + return jsonify(status="Can't split region into parts") + + @app.route('/split') @check_write_access @validate_args_types(id=int) @@ -863,7 +904,7 @@ def export_poly(): borders_table = request.args.get('table') borders_table = config.OTHER_TABLES.get(borders_table, config.BORDERS_TABLE) - fetch_borders_args = {'table': borders_table, 'only_leaves': True} + fetch_borders_args = {'table': borders_table, 'only_leaves': False} if 'xmin' in request.args: # If one coordinate is given then others are also expected. @@ -994,6 +1035,15 @@ def border(): return jsonify(status='ok', geojson=borders[0]) +@app.route('/auto_divide_country') +@validate_args_types(id=int) +def auto_divide_country_endpoint(): + country_id = int(request.args.get('id')) + errors, warnings = auto_divide_country(g.conn, country_id) + if errors: + return jsonify(status='
'.join(errors[:3])) + return jsonify(status='ok', warnings=warnings[:10]) + @app.route('/start_over') def start_over(): try: diff --git a/web/app/borders_api_utils.py b/web/app/borders_api_utils.py index e152919..ea5f800 100644 --- a/web/app/borders_api_utils.py +++ b/web/app/borders_api_utils.py @@ -8,7 +8,7 @@ BORDERS_TABLE as borders_table, OSM_TABLE as osm_table, ) -from auto_split import split_region +from auto_split import split_region_at_admin_level from subregions import ( get_parent_region_id, get_region_country, @@ -70,6 +70,9 @@ def fetch_borders(**kwargs): for rec in cursor: region_id = rec[8] country_id, country_name = get_region_country(g.conn, region_id) + if country_id is None: + # This means region_id was deleted from the DB meanwhile. + continue props = { 'name': rec[0] or '', 'nodes': rec[2], 'modified': rec[3], 'disabled': rec[4], 'count_k': rec[5], 'comment': rec[6], @@ -152,7 +155,7 @@ def get_clusters_for_preview_one(region_id, next_level, mwm_size_thr): """, splitting_sql_params ) if cursor.rowcount == 0: - split_region(g.conn, region_id, next_level, mwm_size_thr) + split_region_at_admin_level(g.conn, region_id, next_level, mwm_size_thr) cursor.execute(f""" SELECT subregion_ids[1], @@ -258,7 +261,7 @@ def divide_into_clusters(region_ids, next_level, mwm_size_thr): """, splitting_sql_params ) if cursor.rowcount == 0: - split_region(g.conn, region_id, next_level, mwm_size_thr) + split_region_at_admin_level(g.conn, region_id, next_level, mwm_size_thr) free_id = get_free_id() counter = 0 @@ -395,7 +398,7 @@ def find_potential_parents(region_id): return parents -def copy_region_from_osm(conn, region_id, name=None, parent_id='not_passed'): +def copy_region_from_osm(conn, region_id, name=None, parent_id='not_passed', mwm_size_est=None): errors, warnings = [], [] with conn.cursor() as cursor: # Check if this id already in use @@ -407,21 +410,18 @@ def copy_region_from_osm(conn, region_id, name=None, parent_id='not_passed'): return errors, warnings name_expr = f"'{name}'" if name else "name" - parent_id_expr = f"{parent_id}" if isinstance(parent_id, int) else "NULL" + parent_id_sql = None if parent_id == 'not_passed' else parent_id cursor.execute(f""" INSERT INTO {borders_table} - (id, geom, name, parent_id, modified, count_k) - SELECT osm_id, way, {name_expr}, {parent_id_expr}, now(), -1 + (id, geom, name, parent_id, modified, count_k, mwm_size_est) + SELECT osm_id, way, {name_expr}, %s, now(), -1, %s FROM {osm_table} WHERE osm_id = %s - """, (region_id,) + """, (parent_id_sql, mwm_size_est, region_id,) ) if parent_id == 'not_passed': assign_region_to_lowest_parent(conn, region_id) - try: - update_border_mwm_size_estimation(conn, region_id) - except Exception as e: - warnings.append(str(e)) + return errors, warnings diff --git a/web/app/countries_division.py b/web/app/countries_division.py index da64d89..de208ff 100644 --- a/web/app/countries_division.py +++ b/web/app/countries_division.py @@ -1,243 +1,293 @@ import itertools - # admin_level => list of countries which should be initially divided at one admin level unilevel_countries = { - 2: [ - 'Afghanistan', - 'Albania', - 'Algeria', - 'Andorra', - 'Angola', - 'Antigua and Barbuda', - 'Armenia', - 'Australia', # need to be divided at level 4 but has many small islands of level 4 - 'Azerbaijan', # has 2 non-covering 3-level regions - 'Bahrain', - 'Barbados', - 'Belize', - 'Benin', - 'Bermuda', - 'Bhutan', - 'Botswana', - 'British Sovereign Base Areas', # ! include into Cyprus - 'British Virgin Islands', - 'Bulgaria', - 'Burkina Faso', - 'Burundi', - 'Cambodia', - 'Cameroon', - 'Cape Verde', - 'Central African Republic', - 'Chad', - 'Chile', - 'Colombia', - 'Comoros', - 'Congo-Brazzaville', # BUG whith autodivision at level 4 - 'Cook Islands', - 'Costa Rica', - 'Croatia', # next level = 6 - 'Cuba', - 'Cyprus', - "Côte d'Ivoire", - 'Democratic Republic of the Congo', - 'Djibouti', - 'Dominica', - 'Dominican Republic', - 'East Timor', - 'Ecuador', - 'Egypt', - 'El Salvador', - 'Equatorial Guinea', - 'Eritrea', - 'Estonia', - 'Eswatini', - 'Ethiopia', - 'Falkland Islands', - 'Faroe Islands', - 'Federated States of Micronesia', - 'Fiji', - 'Gabon', - 'Georgia', - 'Ghana', - 'Gibraltar', - 'Greenland', - 'Grenada', - 'Guatemala', - 'Guernsey', - 'Guinea', - 'Guinea-Bissau', - 'Guyana', - 'Haiti', - 'Honduras', - 'Iceland', - 'Indonesia', - 'Iran', - 'Iraq', - 'Isle of Man', - 'Israel', # ! don't forget to separate Jerusalem - 'Jamaica', - 'Jersey', - 'Jordan', - 'Kazakhstan', - 'Kenya', # ! level 3 doesn't cover the whole country - 'Kiribati', - 'Kosovo', - 'Kuwait', - 'Kyrgyzstan', - 'Laos', - 'Latvia', - 'Lebanon', - 'Liberia', - 'Libya', - 'Liechtenstein', - 'Lithuania', - 'Luxembourg', - 'Madagascar', - 'Malaysia', - 'Maldives', - 'Mali', - 'Malta', - 'Marshall Islands', - 'Martinique', - 'Mauritania', - 'Mauritius', - 'Mexico', - 'Moldova', - 'Monaco', - 'Mongolia', - 'Montenegro', - 'Montserrat', - 'Mozambique', - 'Myanmar', - 'Namibia', - 'Nauru', - 'Nicaragua', - 'Niger', - 'Nigeria', - 'Niue', - 'North Korea', - 'North Macedonia', - 'Oman', - 'Palau', - # ! 'Palestina' is not a country in OSM - need make an mwm - 'Panama', - 'Papua New Guinea', - 'Peru', # need split-merge - 'Philippines', # split at level 3 and merge or not merge - 'Qatar', - 'Romania', # need split-merge - 'Rwanda', - 'Saint Helena, Ascension and Tristan da Cunha', - 'Saint Kitts and Nevis', - 'Saint Lucia', - 'Saint Vincent and the Grenadines', - 'San Marino', - 'Samoa', - 'Saudi Arabia', - 'Senegal', - 'Seychelles', - 'Sierra Leone', - 'Singapore', - 'Slovakia', # ! split at level 3 then 4, and add Bratislava region (4) - 'Slovenia', - 'Solomon Islands', - 'Somalia', - 'South Georgia and the South Sandwich Islands', - 'South Korea', - 'South Sudan', - 'South Ossetia', # ! don't forget to divide from Georgia - 'Sri Lanka', - 'Sudan', - 'São Tomé and Príncipe', - 'Suriname', - 'Switzerland', - 'Syria', - 'Taiwan', - 'Tajikistan', - 'Thailand', - 'The Bahamas', - 'The Gambia', - 'Togo', - 'Tokelau', - 'Tonga', - 'Trinidad and Tobago', - 'Tunisia', - 'Turkmenistan', - 'Turks and Caicos Islands', - 'Tuvalu', - 'United Arab Emirate', - 'Uruguay', - 'Uzbekistan', - 'Vanuatu', - 'Venezuela', # level 3 not comprehensive - 'Vietnam', - # ! don't forget 'Wallis and Futuna', belongs to France - 'Yemen', - 'Zambia', - 'Zimbabwe', - ], - 3: [ - 'Malawi', - 'Nepal', # ! one region is lost after division - 'Pakistan', - 'Paraguay', - 'Tanzania', - 'Turkey', - 'Uganda', - ], - 4: [ - 'Austria', - 'Bangladesh', - 'Belarus', # maybe need merge capital region with the province - 'Belgium', # maybe need merge capital region into encompassing province - 'Bolivia', - 'Bosnia and Herzegovina', # other levels - 5, 6, 7 - are incomplete. - 'Canada', - 'China', # ! don't forget about Macau and Hong Kong of level 3 not covered by level 4 - 'Denmark', - 'Greece', # ! has one small 3-level subregion! - 'Hungary', # maybe multilevel division at levels [4, 5] ? - 'India', - 'Italy', - 'Japan', # ? About 50 4-level subregions, some of which requires further division - 'Morocco', # ! not all regions appear after substitution with level 4 - 'New Zealand', # ! don't forget islands to the north and south - 'Norway', - 'Poland', # 380(!) subregions of AL=6 - 'Portugal', - 'Russia', - 'Serbia', - 'South Africa', - 'Spain', - 'Ukraine', - 'United States', - ], - 5: [ - 'Ireland', # ! 5-level don't cover the whole country - ], - 6: [ - 'Czechia', - ] + 2: [ + 'Afghanistan', + 'Albania', + 'Algeria', + 'Andorra', + 'Angola', + 'Antigua and Barbuda', + 'Armenia', + 'Australia', # need to be divided at level 4 but has many small islands of level 4 + 'Azerbaijan', # has 2 non-covering 3-level regions + 'Bahrain', + 'Barbados', + 'Belize', + 'Benin', + 'Bermuda', + 'Bhutan', + 'Botswana', + 'British Sovereign Base Areas', # ! include into Cyprus + 'British Virgin Islands', + 'Bulgaria', + 'Burkina Faso', + 'Burundi', + 'Cambodia', + 'Cameroon', + 'Cape Verde', + 'Central African Republic', + 'Chad', + 'Chile', + 'Colombia', + 'Comoros', + 'Congo-Brazzaville', # BUG whith autodivision at level 4 + 'Cook Islands', + 'Costa Rica', + 'Croatia', # next level = 6 + 'Cuba', + 'Cyprus', + "Côte d'Ivoire", + 'Democratic Republic of the Congo', + 'Djibouti', + 'Dominica', + 'Dominican Republic', + 'East Timor', + 'Ecuador', + 'Egypt', + 'El Salvador', + 'Equatorial Guinea', + 'Eritrea', + 'Estonia', + 'Eswatini', + 'Ethiopia', + 'Falkland Islands', + 'Faroe Islands', + 'Federated States of Micronesia', + 'Fiji', + 'Gabon', + 'Georgia', + 'Ghana', + 'Gibraltar', + 'Greenland', + 'Grenada', + 'Guatemala', + 'Guernsey', + 'Guinea', + 'Guinea-Bissau', + 'Guyana', + 'Haiti', + 'Honduras', + 'Iceland', + 'Indonesia', + 'Iran', + 'Iraq', + 'Isle of Man', + 'Israel', # ! don't forget to separate Jerusalem + 'Jamaica', + 'Jersey', + 'Jordan', + 'Kazakhstan', + 'Kenya', # ! level 3 doesn't cover the whole country + 'Kiribati', + 'Kosovo', + 'Kuwait', + 'Kyrgyzstan', + 'Laos', + 'Latvia', + 'Lebanon', + 'Liberia', + 'Libya', + 'Liechtenstein', + 'Lithuania', + 'Luxembourg', + 'Madagascar', + 'Malaysia', + 'Maldives', + 'Mali', + 'Malta', + 'Marshall Islands', + 'Martinique', + 'Mauritania', + 'Mauritius', + 'Mexico', + 'Moldova', + 'Monaco', + 'Mongolia', + 'Montenegro', + 'Montserrat', + 'Mozambique', + 'Myanmar', + 'Namibia', + 'Nauru', + 'Nicaragua', + 'Niger', + 'Nigeria', + 'Niue', + 'North Korea', + 'North Macedonia', + 'Oman', + 'Palau', + # ! 'Palestina' is not a country in OSM - need make an mwm + 'Panama', + 'Papua New Guinea', + 'Peru', # need split-merge + 'Philippines', # split at level 3 and merge or not merge + 'Qatar', + 'Romania', # need split-merge + 'Rwanda', + 'Saint Helena, Ascension and Tristan da Cunha', + 'Saint Kitts and Nevis', + 'Saint Lucia', + 'Saint Vincent and the Grenadines', + 'San Marino', + 'Samoa', + 'Saudi Arabia', + 'Senegal', + 'Seychelles', + 'Sierra Leone', + 'Singapore', + 'Slovakia', # ! split at level 3 then 4, and add Bratislava region (4) + 'Slovenia', + 'Solomon Islands', + 'Somalia', + 'South Georgia and the South Sandwich Islands', + 'South Korea', + 'South Sudan', + 'South Ossetia', # ! don't forget to divide from Georgia + 'Sri Lanka', + 'Sudan', + 'São Tomé and Príncipe', + 'Suriname', + 'Switzerland', + 'Syria', + 'Taiwan', + 'Tajikistan', + 'Thailand', + 'The Bahamas', + 'The Gambia', + 'Togo', + 'Tokelau', + 'Tonga', + 'Trinidad and Tobago', + 'Tunisia', + 'Turkmenistan', + 'Turks and Caicos Islands', + 'Tuvalu', + 'United Arab Emirate', + 'Uruguay', + 'Uzbekistan', + 'Vanuatu', + 'Venezuela', # level 3 not comprehensive + 'Vietnam', + # ! don't forget 'Wallis and Futuna', belongs to France + 'Yemen', + 'Zambia', + 'Zimbabwe', + ], + 3: [ + 'Malawi', + 'Nepal', # ! one region is lost after division + 'Pakistan', + 'Paraguay', + 'Tanzania', + 'Turkey', + 'Uganda', + ], + 4: [ + 'Austria', + 'Bangladesh', + 'Belarus', # maybe need merge capital region with the province + 'Belgium', # maybe need merge capital region into encompassing province + 'Bolivia', + 'Bosnia and Herzegovina', # other levels - 5, 6, 7 - are incomplete. + 'Canada', + 'China', # ! don't forget about Macau and Hong Kong of level 3 not covered by level 4 + 'Denmark', + 'Greece', # ! has one small 3-level subregion! + 'Hungary', # maybe multilevel division at levels [4, 5] ? + 'India', + 'Italy', + 'Japan', # ? About 50 4-level subregions, some of which requires further division + 'Morocco', # ! not all regions appear after substitution with level 4 + 'New Zealand', # ! don't forget islands to the north and south + 'Norway', + 'Poland', # 380(!) subregions of AL=6 + 'Portugal', + 'Russia', + 'Serbia', + 'South Africa', + 'Spain', + 'Ukraine', + 'United States', + ], + 5: [ + 'Ireland', # ! 5-level don't cover the whole country + ], + 6: [ + 'Czechia', + ] } # Country name => list of admin levels to which it should be initially divided. # 'Germany': [4, 5] implies that the country is divided at level 4 at first, then all # 4-level subregions are divided into subregions of level 5 (if any) multilevel_countries = { - 'Brazil': [3, 4], - 'Finland': [3, 6], # [3,5,6] in more fresh data? # division by level 6 seems ideal - 'France': [3, 4], - 'Germany': [4, 5], # not the whole country is covered by units of AL=5 - 'Netherlands': [3, 4], # there are carribean lands of level both 3 and 4 - 'Sweden': [3, 4], # division by level 4 seems ideal - 'United Kingdom': [4, 5], # level 5 is necessary but not comprehensive + 'Brazil': [3, 4], + 'Finland': [3, 6], # [3,5,6] in more fresh data? # division by level 6 seems ideal + 'France': [3, 4], + 'Germany': [4, 5], # not the whole country is covered by units of AL=5 + 'Netherlands': [3, 4], # there are carribean lands of level both 3 and 4 + 'Sweden': [3, 4], # division by level 4 seems ideal + 'United Kingdom': [4, 5], # level 5 is necessary but not comprehensive } country_initial_levels = dict(itertools.chain( ((country, ([level] if level > 2 else [])) - for level, countries in unilevel_countries.items() - for country in countries), + for level, countries in unilevel_countries.items() + for country in countries), multilevel_countries.items() )) + +# The dict value is tuple of 2 items. +# First: array of admin levels of mandatory, non-coarsable regions. +# Second: array of lower admin levels at which united may be merged. +# Each "admin level" may be a number or a tuple of numbers - "meta admin_level". +# E.g. in Japan [6,7] is a county metalevel: rural counties are (7), cities are (6) +# and may be divided into (7)-subregions. Any level of a metalevel may be +# non-comprehensive in the sense it should not cover the whole upper-metalevel unit, +# but altogether they should. +country_levels = { + 'Afghanistan': ([], [4, 5]), + 'Australia': ([4], [6]), + 'Austria': None, + 'Belarus': ([4], [6]), + 'Belgium': None, + 'Brazil': None, + 'China': None, + 'France': None, + 'Germany': ([(4, 5)], [6]), + 'Greece': ([(3, 4)], [5, 6]), + 'India': None, + 'Indonesia': None, + 'Italy': None, + 'Iran': ([4], [5, 6]), + 'Ireland': None, + 'Japan': ([(4, 5)], [(6, 7)]), + 'Mali': ([4], [6]), + 'Netherlands': ([3, 4], [8]), + 'Norway': None, + 'Russia': None, + 'Slovakia': ([(3, 4)], []), + 'Spain': None, + 'Sweden': None, + 'Finland': None, + 'Tanzania': ([3, 4], [5]), + 'Turkey': ([3, 4], [6]), + 'United Kingdom': None, + 'United States': (4, 6), +} + +# Transform each metalevel to list if it is not +country_levels = { + c: ([ + [[ml] if isinstance(ml, int) else ml for ml in mls] + for mls in mls_sequence + ] + if mls_sequence is not None + else None + ) + for c, mls_sequence in country_levels.items() +} diff --git a/web/app/countries_structure.py b/web/app/countries_structure.py index 5567cb4..2a0bc7a 100644 --- a/web/app/countries_structure.py +++ b/web/app/countries_structure.py @@ -1,13 +1,29 @@ +import itertools + +import config + +from auto_split import( + combine_into_clusters, +) from borders_api_utils import ( copy_region_from_osm, divide_region_into_subregions, + get_free_id, get_osm_border_name_by_osm_id, ) from config import ( BORDERS_TABLE as borders_table, + MWM_SIZE_THRESHOLD, OSM_TABLE as osm_table ) -from countries_division import country_initial_levels +from countries_division import country_levels +from simple_splitting import simple_split +from subregions import ( + get_regions_basic_info, + get_regions_info, + get_geometrical_subregions, + update_border_mwm_size_estimation, +) class CountryStructureException(Exception): @@ -19,52 +35,248 @@ def _clear_borders(conn): cursor.execute(f"DELETE FROM {borders_table}") -def _make_country_structure(conn, country_osm_id): - country_name = get_osm_border_name_by_osm_id(conn, country_osm_id) +def checksum_area(conn, regions, region_id): + """Returns True if the sum of subregion areas (almost) equal + to the region area. + """ + region = regions[region_id] + children = [r for r in regions.values() if r['parent_id'] == region_id] + regions_without_area = [r for r in itertools.chain(children, [region]) + if 'land_area' not in r] + regions_without_area_ids = [r['id'] for r in regions_without_area] + regions_info = get_regions_basic_info(conn, regions_without_area_ids, osm_table) + for r_id, r_data in regions_info.items(): + regions[r_id]['land_area'] = r_data['land_area'] + + children_area = sum(r['land_area'] for r in children) + has_lost_subregions = (children_area < 0.99 * region['land_area']) + return not has_lost_subregions + + +def _amend_regions_with_mwm_size(conn, regions): + region_ids_without_size = [s_id for s_id, s_data in regions.items() + if 'mwm_size_est' not in s_data] + extra_regions = get_regions_info(conn, region_ids_without_size, osm_table) + for s_id, s_data in extra_regions.items(): + regions[s_id]['mwm_size_est'] = s_data['mwm_size_est'] + + +def auto_divide_country(conn, country_id): + country_name = get_osm_border_name_by_osm_id(conn, country_id) + metalevels = country_levels.get(country_name, None) + if metalevels is None: + e, w = copy_region_from_osm(conn, country_id) + conn.commit() + return e, w + + regions = { + country_id: { + 'id': country_id, + 'name': country_name, + 'al': 2, + 'parent_id': None + } + } + + all_metalevels = metalevels[0] + metalevels[1] + fill_regions_structure(conn, regions, country_id, all_metalevels) + non_mergeable_metalevels = metalevels[0] + + for metalevel, lower_metalevel in list(zip(all_metalevels[:-1], all_metalevels[1:]))[::-1]: + if lower_metalevel in non_mergeable_metalevels: + break + # Find regions at metalevel that composed of subregions at lower_metalevel + region_ids_at_metalevel = [r['id'] for r in regions.values() + if r['al'] in metalevel] + for region_id in region_ids_at_metalevel: + if checksum_area(conn, regions, region_id): + regions[region_id]['has_lost_subregions'] = False + children = [r for r in regions.values() + if r['parent_id'] == region_id] + mergeable_children = {ch['id']: ch for ch in children + if 'clusters' not in ch} + _amend_regions_with_mwm_size(conn, mergeable_children) + dcu = combine_into_clusters(conn, + mergeable_children, config.MWM_SIZE_THRESHOLD) + regions[region_id]['mwm_size_est'] = sum(ch['mwm_size_est'] + for ch in children) + if len(children) == len(mergeable_children): + # If the sum of subregions is less than mwm_size_thr + # then collapse clusters into one despite of geometrical connectivity + dcu.try_collapse_into_one() + + if len(dcu.clusters) == 1 and len(children) == len(mergeable_children): + regions[region_id]['merged_up_to_itself'] = True + for ch in children: + regions[ch['id']]['merged'] = True + else: + real_clusters = { + cl_id: cl_data + for cl_id, cl_data in dcu.clusters.items() + if len(cl_data['subregion_ids']) > 1 + } + regions[region_id]['clusters'] = real_clusters + for cluster in real_clusters.values(): + for s_id in cluster['subregion_ids']: + regions[s_id]['merged'] = True + else: + regions[region_id]['has_lost_subregions'] = True + + warnings = [] + save_country_structure_to_db(conn, regions) + conn.commit() + return [], warnings + + +def save_country_structure_to_db(conn, regions): + parent_ids = set(r['parent_id'] for r in regions.values() if r['parent_id'] is not None) + leaf_ids = set(regions.keys()) - parent_ids + for leaf_id in leaf_ids: + regions[leaf_id]['is_leaf'] = True - copy_region_from_osm(conn, country_osm_id, parent_id=None) + def save_clusters_to_db(conn, region_id): + assert('clusters' in regions[region_id]) + free_id = get_free_id() + with conn.cursor() as cursor: + parent_name = regions[region_id]['name'] + counter = 0 + for cl_id, cl_data in regions[region_id]['clusters'].items(): + if len(cl_data['subregion_ids']) == 1: + subregion_id = cl_data['subregion_ids'][0] + subregion_name = regions[subregion_id]['name'] + cursor.execute(f""" + INSERT INTO {borders_table} (id, name, parent_id, geom, + modified, count_k, mwm_size_est) + VALUES ( + {subregion_id}, + %s, + {region_id}, + ( + SELECT way FROM {osm_table} + WHERE osm_id = {subregion_id} + ), + now(), + -1, + {cl_data['mwm_size_est']} + ) + """, (subregion_name,)) + else: + counter += 1 + subregion_ids_str = ','.join(str(x) for x in cl_data['subregion_ids']) + cursor.execute(f""" + INSERT INTO {borders_table} (id, name, parent_id, geom, + modified, count_k, mwm_size_est) + VALUES ( + {free_id}, + %s, + {region_id}, + ( + SELECT ST_Union(way) FROM {osm_table} + WHERE osm_id IN ({subregion_ids_str}) + ), + now(), + -1, + {cl_data['mwm_size_est']} + ) + """, (f"{parent_name}_{counter}",)) + free_id -= 1 - if country_initial_levels.get(country_name): - admin_levels = country_initial_levels[country_name] - prev_admin_levels = [2] + admin_levels[:-1] - prev_level_region_ids = [country_osm_id] + def save_region_structure_to_db(conn, region_id): + r_data = regions[region_id] + if r_data.get('merged') == True: + return + copy_region_from_osm(conn, region_id, + parent_id=r_data['parent_id'], + mwm_size_est=r_data.get('mwm_size_est')) + if r_data.get('has_lost_subregions') or r_data.get('is_leaf'): + region_container = {k: v for k, v in regions.items() if k == region_id} + region_data = region_container[region_id] + mwm_size_est = update_border_mwm_size_estimation(conn, region_id) + region_data['mwm_size_est'] = mwm_size_est + if (mwm_size_est is not None and + mwm_size_est > MWM_SIZE_THRESHOLD): + simple_split(conn, region_data) + else: + children_ids = set(r['id'] for r in regions.values() + if r['parent_id'] == region_id) + children_in_clusters = set(itertools.chain.from_iterable( + cl['subregion_ids'] for cl in r_data.get('clusters', {}).values())) + standalone_children_ids = children_ids - children_in_clusters + if 'clusters' in r_data: + save_clusters_to_db(conn, region_id) + for ch_id in standalone_children_ids: + save_region_structure_to_db(conn, ch_id) - for admin_level, prev_level in zip(admin_levels, prev_admin_levels): - current_level_region_ids = [] - for region_id in prev_level_region_ids: - subregion_ids = divide_region_into_subregions( - conn, region_id, admin_level) - current_level_region_ids.extend(subregion_ids) - prev_level_region_ids = current_level_region_ids + + country_id = [k for k, v in regions.items() if v['parent_id'] is None] + assert len(country_id) == 1 + country_id = country_id[0] + + save_region_structure_to_db(conn, country_id) + conn.commit() + + +def fill_regions_structure(conn, regions, region_id, metalevels): + """Given regions tree-like dict, amend it by splitting region_id + region at metalevels. + """ + leaf_ids = [region_id] + for metalevel in metalevels: + for leaf_id in leaf_ids: + fill_region_structure_at_metalevel(conn, regions, leaf_id, metalevel) + leaf_ids = [ + r_id for r_id in + (set(regions.keys()) - set(r['parent_id'] for r in regions.values())) + if regions[r_id]['al'] in metalevel + ] + + +def fill_region_structure_at_metalevel(conn, regions, region_id, metalevel): + """Divides a region with "region_id" into subregions of specified admin level(s). + Updates the "regions" tree-like dict: + region_id : {'id': region_id, 'al': admin_level, 'parent_id': parent_id} + """ + + def process_subregions_of(region_id): + subregion_ids_by_level = [] + # "regions" dict is used from the closure + for sublevel in (lev for lev in metalevel if lev > regions[region_id]['al']): + subregions = get_geometrical_subregions( + conn, region_id, osm_table, sublevel + ) + subregion_ids = list(subregions.keys()) + subregion_ids_by_level.append(subregion_ids) + for s_id in subregion_ids: + # As a first approximation, assign all found subregions + # of all sublevels to the region. This may change in deeper recursion calls. + if s_id not in regions: + regions[s_id] = { + 'id': s_id, + 'name': subregions[s_id], + 'parent_id': region_id, + 'al': sublevel, + + } + else: + regions[s_id]['parent_id'] = region_id + + for layer in subregion_ids_by_level: + for s_id in layer: + process_subregions_of(s_id) + + process_subregions_of(region_id) def create_countries_initial_structure(conn): _clear_borders(conn) with conn.cursor() as cursor: - # TODO: process overlapping countries, like Ukraine and Russia with common Crimea cursor.execute(f""" - SELECT osm_id + SELECT osm_id, name FROM {osm_table} WHERE admin_level = 2 """ ) - for country_osm_id, *_ in cursor: - _make_country_structure(conn, country_osm_id) - conn.commit() - return - - -def _get_country_osm_id_by_name(conn, name): - with conn.cursor() as cursor: - cursor.execute(f""" - SELECT osm_id FROM {osm_table} - WHERE admin_level = 2 AND name = %s - """, (name,)) - row_count = cursor.rowcount - if row_count > 1: - raise CountryStructureException(f'More than one country "{name}"') - rec = cursor.fetchone() - if not rec: - raise CountryStructureException(f'Not found country "{name}"') - return rec[0] - + for country_osm_id, country_name in cursor: + # Only create small countries - to not forget to create them manually + if country_name not in country_levels: + auto_divide_country(conn, country_osm_id) diff --git a/web/app/simple_splitting.py b/web/app/simple_splitting.py new file mode 100644 index 0000000..5f41c54 --- /dev/null +++ b/web/app/simple_splitting.py @@ -0,0 +1,186 @@ +import json + +from borders_api_utils import ( + get_free_id, +) +from config import ( + BORDERS_TABLE as borders_table, + MWM_SIZE_THRESHOLD, +) +from subregions import ( + update_border_mwm_size_estimation, +) + + +def simple_split(conn, region): + """Split region {'id', 'name', 'mwm_size_est'} (already present in borders table) + into 2 or 4 parts""" + + mwm_size_est = region['mwm_size_est'] + #print(f"simple_split, size = {mwm_size_est}, MWM_SIZE_THRESHOLD={MWM_SIZE_THRESHOLD}") + + if mwm_size_est is None or mwm_size_est > 2 * MWM_SIZE_THRESHOLD: + return split_into_4_parts(conn, region) + else: + return split_into_2_parts(conn, region) + + +def split_into_2_parts(conn, region): + bbox = get_region_bbox(conn, region['id']) + width = bbox[2] - bbox[0] + height = bbox[3] - bbox[1] + split_vertically = (width > height) + + if split_vertically: + mid_lon = (bbox[2] + bbox[0]) / 2 + min_lat = bbox[1] + max_lat = bbox[3] + line_sql = f"LINESTRING({mid_lon} {min_lat}, {mid_lon} {max_lat})" + position_tag = f"(ST_XMin(geom) + ST_XMax(geom)) / 2 < {mid_lon}" + name_tags = ('west', 'east') + else: + mid_lat = (bbox[3] + bbox[1]) / 2 + min_lon = bbox[0] + max_lon = bbox[2] + line_sql = f"LINESTRING({min_lon} {mid_lat}, {max_lon} {mid_lat})" + position_tag = f"(ST_YMin(geom) + ST_YMax(geom)) / 2 < {mid_lat}" + name_tags = ('south', 'north') + + free_id = get_free_id() + ids = (free_id, free_id - 1) + + with conn.cursor() as cursor: + with conn.cursor() as insert_cursor: + cursor.execute(f""" + SELECT ST_AsText(ST_CollectionExtract(ST_MakeValid(ST_Collect(geom)), 3)) AS geom, + {position_tag} AS is_lower + FROM ( + SELECT + (ST_DUMP( + ST_Split( + ( + SELECT geom FROM {borders_table} + WHERE id = {region['id']} + ), + ST_GeomFromText('{line_sql}', 4326) + ) + ) + ).geom as geom + ) q + GROUP BY {position_tag} + ORDER BY 2 DESC + """) + if cursor.rowcount < 2: + return False + for i, ((geom, is_lower), b_id, name_tag) in enumerate(zip(cursor, ids, name_tags)): + insert_cursor.execute(f""" + INSERT INTO {borders_table} (id, name, parent_id, geom, + modified, count_k, mwm_size_est) + VALUES ( + {b_id}, + %s, + {region['id']}, + ST_GeomFromText(%s, 4326), + now(), + -1, + NULL + ) + """, (f"{region['name']}_{name_tag}", geom) + ) + for b_id in ids: + update_border_mwm_size_estimation(conn, b_id) + return True + + +def split_into_4_parts(conn, region): + bbox = get_region_bbox(conn, region['id']) + mid_lon = (bbox[2] + bbox[0]) / 2 + mid_lat = (bbox[3] + bbox[1]) / 2 + min_lat = bbox[1] + max_lat = bbox[3] + min_lon = bbox[0] + max_lon = bbox[2] + position_tag_X = f"(ST_XMin(geom) + ST_XMax(geom)) / 2 < {mid_lon}" + position_tag_Y = f"(ST_YMin(geom) + ST_YMax(geom)) / 2 < {mid_lat}" + line_sql = ( + "LINESTRING(" + f"{min_lon} {mid_lat}," + f"{max_lon} {mid_lat}," + f"{max_lon} {min_lat}," + f"{mid_lon} {min_lat}," + f"{mid_lon} {max_lat}" + ")" + ) + + # 4 quadrants are defined by a pair of (position_tag_X, position_tag_Y) + name_tags = { + (True, True) : 'southwest', + (True, False) : 'northwest', + (False, True) : 'southeast', + (False, False): 'northeast' + } + + + with conn.cursor() as cursor: + with conn.cursor() as insert_cursor: + query = f""" + SELECT ST_AsText(ST_CollectionExtract(ST_MakeValid(ST_Collect(geom)), 3)) AS geom, + {position_tag_X}, + {position_tag_Y} + FROM ( + SELECT + (ST_DUMP( + ST_Split( + ( + SELECT geom FROM {borders_table} + WHERE id = {region['id']} + ), + ST_GeomFromText('{line_sql}', 4326) + ) + ) + ).geom as geom + ) q + GROUP BY {position_tag_X}, {position_tag_Y} + """ + cursor.execute(query) + if cursor.rowcount < 2: + return False + + free_id = get_free_id() + used_ids = [] + for geom, is_lower_X, is_lower_Y in cursor: + name_tag = name_tags[(is_lower_X, is_lower_Y)] + insert_cursor.execute(f""" + INSERT INTO {borders_table} (id, name, parent_id, geom, + modified, count_k, mwm_size_est) + VALUES ( + {free_id}, + %s, + {region['id']}, + ST_GeomFromText(%s, 4326), + now(), + -1, + NULL + ) + """, (f"{region['name']}_{name_tag}", geom) + ) + used_ids.append(free_id) + free_id -= 1 + for b_id in used_ids: + update_border_mwm_size_estimation(conn, b_id) + return True + + +def get_region_bbox(conn, region_id): + """Return [xmin, ymin, xmax, ymax] array for the region from borders table""" + with conn.cursor() as cursor: + cursor.execute(f""" + SELECT ST_AsGeoJSON(BOX2D(geom)) + FROM {borders_table} + WHERE id = %s + """, (region_id,)) + geojson = json.loads(cursor.fetchone()[0]) + bb = geojson['coordinates'][0] + # bb[0] is the [xmin, ymin] corner point, bb[2] - [xmax, ymax] + return bb[0] + bb[2] + diff --git a/web/app/static/borders.js b/web/app/static/borders.js index 3ade574..4fd5a0e 100644 --- a/web/app/static/borders.js +++ b/web/app/static/borders.js @@ -598,6 +598,17 @@ function bDisable() { }); } +function bSimpleSplit() { + if (!selectedId || !(selectedId in borders)) + return; + $.ajax(getServer('simple_split'), { + data: { + 'id': selectedId + }, + success: makeAnswerHandler(updateBorders) + }); +} + function bDelete() { if (!selectedId || !(selectedId in borders)) return; @@ -977,18 +988,31 @@ function updatePointList(data) { a.onclick = (function(id, name) { return function() { pPointSelect(id, name); - return false - } + return false; + }; })(b['id'], b['name']); - list.append(a, $('
')); - $(a).text(b['admin_level'] + ': ' + b['name'] + ' (' + Math.round(b[ - 'area']) + ' км²)'); + list.append(a); + $(a).text(b['admin_level'] + ': ' + b['name'] + ' (' + + Math.round(b['area']) + ' км²)'); + if (b['admin_level'] == 2) { + var auto_divide_link = document.createElement('a'); + auto_divide_link.href = '#'; + auto_divide_link.onclick = (function(id) { + return function() { + pAutoDivideCountry(id); + return false; + }; + })(b['id']); + $(auto_divide_link).text("!!autodivide!!"); + list.append("
   ", auto_divide_link); + } + list.append("
"); } } -function pPointSelect(id, name1) { +function pPointSelect(id, osm_name) { var name = $('#p_name').val(); - name = name.replace('*', name1); + name = name.replace('*', osm_name); $.ajax(getServer('from_osm'), { data: { 'name': name, @@ -999,6 +1023,14 @@ function pPointSelect(id, name1) { bPointCancel(); } +function pAutoDivideCountry(id) { + $.ajax(getServer('auto_divide_country'), { + data: {'id': id}, + success: makeAnswerHandler(updateBorders) + }); + bPointCancel(); +} + function bPointCancel() { $('#point').hide(); $('#actions').show(); diff --git a/web/app/subregions.py b/web/app/subregions.py index db21dee..7690aec 100644 --- a/web/app/subregions.py +++ b/web/app/subregions.py @@ -12,6 +12,21 @@ from mwm_size_predictor import MwmSizePredictor +def get_regions_info(conn, region_ids, regions_table, need_cities=False): + """Get regions info including mwm_size_est in the form of + dict {region_id => region data} + """ + regions_info = get_regions_basic_info(conn, region_ids, regions_table) + _add_mwm_size_estimation(conn, regions_info, regions_table, need_cities) + keys = ('name', 'mwm_size_est') + if need_cities: + keys = keys + ('cities',) + return {region_id: {k: region_data[k] for k in keys + if k in region_data} + for region_id, region_data in regions_info.items() + } + + def get_subregions_info(conn, region_id, region_table, next_level, need_cities=False): """ @@ -21,72 +36,78 @@ def get_subregions_info(conn, region_id, region_table, :param next_level: admin level of subregions to find :return: dict {subregion_id => subregion data} including area and population info """ - subregion_ids = _get_geometrical_subregion_ids(conn, region_id, - region_table, next_level) - subregions = _get_regions_basic_info(conn, subregion_ids) - _add_mwm_size_estimation(conn, subregions, need_cities) - keys = ('name', 'mwm_size_est') - if need_cities: - keys = keys + ('cities',) - return {subregion_id: {k: subregion_data[k] for k in keys - if k in subregion_data} - for subregion_id, subregion_data in subregions.items() - } + subregions = get_geometrical_subregions(conn, region_id, + region_table, next_level) + subregion_ids = list(subregions.keys()) + return get_regions_info(conn, subregion_ids, osm_table, need_cities) -def _get_geometrical_subregion_ids(conn, region_id, region_table, next_level): +def get_geometrical_subregions(conn, region_id, region_table, next_level): region_id_column, region_geom_column = ( ('id', 'geom') if region_table == borders_table else ('osm_id', 'way') ) with conn.cursor() as cursor: cursor.execute(f""" - SELECT subreg.osm_id + SELECT subreg.osm_id, subreg.name FROM {region_table} reg, {osm_table} subreg WHERE reg.{region_id_column} = %s AND subreg.admin_level = %s AND ST_Contains(reg.{region_geom_column}, subreg.way) """, (region_id, next_level) ) - return list(rec[0] for rec in cursor) + return {s_id: name for s_id, name in cursor} -def _get_regions_basic_info(conn, region_ids): +def get_regions_basic_info(conn, region_ids, regions_table, need_land_area=True): """Gets name, land_area for regions in OSM borders table""" if not region_ids: return {} + region_id_column, region_geom_column = ( + ('id', 'geom') if regions_table == borders_table else + ('osm_id', 'way') + ) region_ids_str = ','.join(str(x) for x in region_ids) - with conn.cursor() as cursor: - cursor.execute(f""" - SELECT reg.osm_id, reg.name, + land_area_expr = ( + 'NULL' if not need_land_area + else f""" ST_Area( geography( ST_Intersection( - reg.way, + reg.{region_geom_column}, ( SELECT ST_Union(c.geom) FROM {land_polygons_table} c - WHERE c.geom && reg.way + WHERE c.geom && reg.{region_geom_column} ) ) ) - ) / 1.0E+6 land_area - FROM {osm_table} reg - WHERE osm_id in ({region_ids_str}) + ) / 1.0E+6 + """ + ) + with conn.cursor() as cursor: + cursor.execute(f""" + SELECT reg.{region_id_column}, reg.name, + ST_Area(reg.{region_geom_column}) / 1.0E+6 area, + {land_area_expr} land_area + FROM {regions_table} reg + WHERE {region_id_column} in ({region_ids_str}) """ ) regions = {} - for osm_id, name, land_area in cursor: + for r_id, name, area, land_area in cursor: region_data = { - 'osm_id': osm_id, + 'id': r_id, 'name': name, - 'land_area': land_area, + 'area': area, } - regions[osm_id] = region_data + if need_land_area: + region_data['land_area'] = land_area + regions[r_id] = region_data return regions -def _add_population_data(conn, regions, need_cities): +def _add_population_data(conn, regions, regions_table, need_cities): """Adds population data only for regions that are suitable for mwm size estimation. """ @@ -106,13 +127,18 @@ def _add_population_data(conn, regions, need_cities): if need_cities: data['cities'] = [] + region_id_column, region_geom_column = ( + ('id', 'geom') if regions_table == borders_table else + ('osm_id', 'way') + ) + region_ids_str = ','.join(str(x) for x in region_ids) with conn.cursor() as cursor: cursor.execute(f""" - SELECT b.osm_id, p.name, coalesce(p.population, 0), p.place - FROM {osm_table} b, {osm_places_table} p - WHERE b.osm_id IN ({region_ids_str}) - AND ST_Contains(b.way, p.center) + SELECT b.{region_id_column}, p.name, coalesce(p.population, 0), p.place + FROM {regions_table} b, {osm_places_table} p + WHERE b.{region_id_column} IN ({region_ids_str}) + AND ST_Contains(b.{region_geom_column}, p.center) """ ) for region_id, place_name, place_population, place_type in cursor: @@ -129,7 +155,7 @@ def _add_population_data(conn, regions, need_cities): region_data['hamlet_cnt'] += 1 -def _add_coastline_length(conn, regions): +def _add_coastline_length(conn, regions, regions_table): if not regions: return @@ -138,14 +164,20 @@ def _add_coastline_length(conn, regions): region_ids_str = ','.join(str(x) for x in regions.keys()) + region_id_column, region_geom_column = ( + ('id', 'geom') if regions_table == borders_table else + ('osm_id', 'way') + ) + with conn.cursor() as cursor: cursor.execute(f""" WITH buffered_borders AS ( -- 0.001 degree ~ 100 m - ocean buffer stripe to overcome difference -- in coastline and borders - SELECT id, ST_Buffer(geom, 0.001) geom - FROM {borders_table} - WHERE id IN ({region_ids_str}) + SELECT {region_id_column} id, + ST_Buffer({region_geom_column}, 0.001) geom + FROM {regions_table} + WHERE {region_id_column} IN ({region_ids_str}) ) SELECT bb.id, SUM( @@ -166,15 +198,13 @@ def _add_coastline_length(conn, regions): regions[b_id]['coastline_length'] = coastline_length -def _add_mwm_size_estimation(conn, regions, need_cities): +def _add_mwm_size_estimation(conn, regions, regions_table, need_cities): for region_data in regions.values(): region_data['mwm_size_est'] = None - _add_population_data(conn, regions, need_cities) - _add_coastline_length(conn, regions) + _add_population_data(conn, regions, regions_table, need_cities) + _add_coastline_length(conn, regions, regions_table) - #from pprint import pprint as pp - #pp(regions) regions_to_predict = [ ( s_id, @@ -202,16 +232,16 @@ def _add_mwm_size_estimation(conn, regions, need_cities): def update_border_mwm_size_estimation(conn, border_id): - regions = _get_regions_basic_info(conn, [border_id]) + regions = get_regions_basic_info(conn, [border_id], borders_table) if math.isnan(regions[border_id]['land_area']): - e = Exception(f"Area is NaN for border '{name}' ({border_id})") + e = Exception(f"Area is NaN for border '{regions[border_id]['name']}' ({border_id})") raise e - _add_mwm_size_estimation(conn, regions, need_cities=False) + _add_mwm_size_estimation(conn, regions, borders_table, need_cities=False) mwm_size_est = regions[border_id].get('mwm_size_est') # mwm_size_est may be None. Python's None is converted to NULL - # during %s substitution in execute(). + # during %s substitution in cursor.execute(). with conn.cursor() as cursor: cursor.execute(f""" UPDATE {borders_table} @@ -219,6 +249,7 @@ def update_border_mwm_size_estimation(conn, border_id): WHERE id = %s """, (mwm_size_est, border_id,)) conn.commit() + return mwm_size_est def is_administrative_region(conn, region_id): @@ -248,12 +279,14 @@ def get_region_country(conn, region_id): possibly itself. """ predecessors = get_predecessors(conn, region_id) - return predecessors[-1] + return predecessors[-1] if predecessors is not None else (None, None) def get_predecessors(conn, region_id): """Returns the list of (id, name)-tuples of all predecessors, - starting from the very region_id. + starting from the very region_id, and None if there is no + requested region or one of its predecessors in the DB which + may occur due to other queries to the DB. """ predecessors = [] cursor = conn.cursor() @@ -265,9 +298,7 @@ def get_predecessors(conn, region_id): ) rec = cursor.fetchone() if not rec: - raise Exception( - f"No record in '{borders_table}' table with id = {region_id}" - ) + return None predecessors.append(rec[0:2]) parent_id = rec[2] if not parent_id: diff --git a/web/app/templates/index.html b/web/app/templates/index.html index 669416f..0713c43 100644 --- a/web/app/templates/index.html +++ b/web/app/templates/index.html @@ -227,6 +227,8 @@
+ +