diff --git a/db/create_tables.sql b/db/create_tables.sql index b214ae7..3877c81 100644 --- a/db/create_tables.sql +++ b/db/create_tables.sql @@ -38,6 +38,7 @@ CREATE TABLE splitting ( subregion_ids BIGINT[] NOT NULL, mwm_size_est REAL NOT NULL, mwm_size_thr INTEGER NOT NULL, -- mwm size threshold in Kb, 4-bytes INTEGER is enough + next_level INTEGER NOT NULL, geom geometry NOT NULL ); -CREATE INDEX splitting_idx ON splitting (osm_border_id, mwm_size_thr); +CREATE INDEX splitting_idx ON splitting (osm_border_id, mwm_size_thr, next_level); diff --git a/web/app/auto_split.py b/web/app/auto_split.py index 2649663..56d2adb 100644 --- a/web/app/auto_split.py +++ b/web/app/auto_split.py @@ -12,9 +12,12 @@ class DisjointClusterUnion: """Disjoint set union implementation for administrative subregions.""" - def __init__(self, region_id, subregions, mwm_size_thr=None): + def __init__(self, region_id, subregions, next_level, mwm_size_thr=None): + assert all(s_data['mwm_size_est'] is not None + for s_data in subregions.values()) self.region_id = region_id self.subregions = subregions + self.next_level = next_level self.mwm_size_thr = mwm_size_thr or MWM_SIZE_THRESHOLD self.representatives = {sub_id: sub_id for sub_id in subregions} # A cluster is one or more subregions with common borders @@ -84,7 +87,8 @@ def get_best_cluster_to_join_with(small_cluster_id, for subregion_id in subregion_ids: for other_subregion_id, length in common_border_matrix[subregion_id].items(): other_cluster_id = dcu.find_cluster(other_subregion_id) - if other_cluster_id != small_cluster_id: + if (other_cluster_id != small_cluster_id and + not dcu.clusters[other_cluster_id]['finished']): common_borders[other_cluster_id] += length if not common_borders: return None @@ -144,8 +148,10 @@ def find_golden_splitting(conn, border_id, next_level, mwm_size_thr): next_level, need_cities=True) if not subregions: return + if any(s_data['mwm_size_est'] is None for s_data in subregions.values()): + return - dcu = DisjointClusterUnion(border_id, subregions, mwm_size_thr) + dcu = DisjointClusterUnion(border_id, subregions, next_level, mwm_size_thr) all_subregion_ids = dcu.get_all_subregion_ids() common_border_matrix = calculate_common_border_matrix(conn, all_subregion_ids) @@ -188,6 +194,7 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion): DELETE FROM {autosplit_table} WHERE osm_border_id = {dcu.region_id} AND mwm_size_thr = {dcu.mwm_size_thr} + AND next_level = {dcu.next_level} """) for cluster_id, data in dcu.clusters.items(): subregion_ids = data['subregion_ids'] @@ -196,12 +203,13 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion): ) cluster_geometry_sql = get_union_sql(subregion_ids) cursor.execute(f""" - INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, - geom, mwm_size_thr, mwm_size_est) + INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom, + next_level, mwm_size_thr, mwm_size_est) VALUES ( {dcu.region_id}, '{subregion_ids_array_str}', ({cluster_geometry_sql}), + {dcu.next_level}, {dcu.mwm_size_thr}, {data['mwm_size_est']} ) diff --git a/web/app/borders_api.py b/web/app/borders_api.py index c04c46d..cd04e34 100755 --- a/web/app/borders_api.py +++ b/web/app/borders_api.py @@ -218,16 +218,30 @@ def get_server_configuration(): mwm_size_thr=config.MWM_SIZE_THRESHOLD) +def prepare_sql_search_string(string): + if string.startswith('^'): + string = string[1:] + else: + string = f"%{string}" + if string.endswith('$'): + string = string[:-1] + else: + string = f"{string}%" + return string + + @app.route('/search') def search(): query = request.args.get('q') + sql_search_string = prepare_sql_search_string(query) + with g.conn.cursor() as cursor: cursor.execute(f""" SELECT ST_XMin(geom), ST_YMin(geom), ST_XMax(geom), ST_YMax(geom) FROM {config.BORDERS_TABLE} WHERE name ILIKE %s ORDER BY (ST_Area(geography(geom))) - LIMIT 1""", (f'%{query}%',) + LIMIT 1""", (sql_search_string,) ) if cursor.rowcount > 0: rec = cursor.fetchone() @@ -312,10 +326,10 @@ def join_borders(): with g.conn.cursor() as cursor: try: borders_table = config.BORDERS_TABLE - free_id = get_free_id() + joint_id = get_free_id() cursor.execute(f""" UPDATE {borders_table} - SET id = {free_id}, + SET id = {joint_id}, geom = ST_Union({borders_table}.geom, b2.geom), mwm_size_est = {borders_table}.mwm_size_est + b2.mwm_size_est, count_k = -1 @@ -326,6 +340,26 @@ def join_borders(): except psycopg2.Error as e: g.conn.rollback() return jsonify(status=str(e)) + + # If joint_id is the only child of its parent, then leave only parent + parent_id = get_parent_region_id(g.conn, joint_id) + if parent_id is not None: + cursor.execute(f""" + SELECT count(*) FROM {borders_table} WHERE parent_id = %s + """, (parent_id,) + ) + children_cnt = cursor.fetchone()[0] + if children_cnt == 1: + cursor.execute(f""" + UPDATE {borders_table} + SET mwm_size_est = (SELECT mwm_size_est + FROM {borders_table} + WHERE id = %s) + WHERE id = %s + """, (joint_id, parent_id) + ) + cursor.execute(f"DELETE FROM {borders_table} WHERE id = %s", + (joint_id,)) g.conn.commit() return jsonify(status='ok') @@ -413,29 +447,9 @@ def find_osm_borders(): def copy_from_osm(): osm_id = int(request.args.get('id')) name = request.args.get('name') - name_sql = f"'{name}'" if name else "'name'" - borders_table = config.BORDERS_TABLE - osm_table = config.OSM_TABLE - with g.conn.cursor() as cursor: - # Check if this id already in use - cursor.execute(f"SELECT id FROM {borders_table} WHERE id = %s", - (osm_id,)) - rec = cursor.fetchone() - if rec and rec[0]: - return jsonify(status=f"Region with id={osm_id} already exists") - cursor.execute(f""" - INSERT INTO {borders_table} (id, geom, name, modified, count_k) - SELECT osm_id, way, {name_sql}, now(), -1 - FROM {osm_table} - WHERE osm_id = %s - """, (osm_id,) - ) - assign_region_to_lowest_parent(osm_id) - warnings = [] - try: - update_border_mwm_size_estimation(g.conn, osm_id) - except Exception as e: - warnings.append(str(e)) + errors, warnings = copy_region_from_osm(g.conn, osm_id, name) + if errors: + return jsonify(status='\n'.join(errors)) g.conn.commit() return jsonify(status='ok', warnings=warnings) @@ -983,7 +997,7 @@ def border(): @app.route('/start_over') def start_over(): try: - warnings = create_countries_initial_structure(g.conn) + create_countries_initial_structure(g.conn) except CountryStructureException as e: return jsonify(status=str(e)) @@ -991,7 +1005,7 @@ def start_over(): with g.conn.cursor() as cursor: cursor.execute(f"DELETE FROM {autosplit_table}") g.conn.commit() - return jsonify(status='ok', warnings=warnings[:10]) + return jsonify(status='ok') if __name__ == '__main__': diff --git a/web/app/borders_api_utils.py b/web/app/borders_api_utils.py index 6b6f967..e152919 100644 --- a/web/app/borders_api_utils.py +++ b/web/app/borders_api_utils.py @@ -3,13 +3,18 @@ from flask import g, jsonify -import config +from config import ( + AUTOSPLIT_TABLE as autosplit_table, + BORDERS_TABLE as borders_table, + OSM_TABLE as osm_table, +) from auto_split import split_region -from countries_structure import get_osm_border_name_by_osm_id from subregions import ( + get_parent_region_id, get_region_country, get_subregions_info, is_administrative_region, + update_border_mwm_size_estimation, ) @@ -19,14 +24,13 @@ def geom_intersects_bbox_sql(xmin, ymin, xmax, ymax): def fetch_borders(**kwargs): - borders_table = kwargs.get('table', config.BORDERS_TABLE) + a_borders_table = kwargs.get('table', borders_table) simplify = kwargs.get('simplify', 0) where_clause = kwargs.get('where_clause', '1=1') only_leaves = kwargs.get('only_leaves', True) - osm_table = config.OSM_TABLE geom = (f'ST_SimplifyPreserveTopology(geom, {simplify})' if simplify > 0 else 'geom') - leaves_filter = (f""" AND id NOT IN (SELECT parent_id FROM {borders_table} + leaves_filter = (f""" AND id NOT IN (SELECT parent_id FROM {a_borders_table} WHERE parent_id IS NOT NULL)""" if only_leaves else '') query = f""" @@ -48,14 +52,14 @@ def fetch_borders(**kwargs): WHERE osm_id = t.id ) AS admin_level, parent_id, - ( SELECT name FROM {borders_table} + ( SELECT name FROM {a_borders_table} WHERE id = t.parent_id ) AS parent_name, ( SELECT admin_level FROM {osm_table} - WHERE osm_id = (SELECT parent_id FROM {borders_table} WHERE id = t.id) + WHERE osm_id = (SELECT parent_id FROM {a_borders_table} WHERE id = t.id) ) AS parent_admin_level, mwm_size_est - FROM {borders_table} t + FROM {a_borders_table} t WHERE ({where_clause}) {leaves_filter} ) q ORDER BY area DESC @@ -104,8 +108,6 @@ def get_subregions_for_preview(region_ids, next_level): def get_subregions_one_for_preview(region_id, next_level): - osm_table = config.OSM_TABLE - borders_table = config.BORDERS_TABLE with g.conn.cursor() as cursor: # We use ST_SimplifyPreserveTopology, since ST_Simplify would give NULL # for very little regions. @@ -137,12 +139,12 @@ def get_clusters_for_preview(region_ids, next_level, thresholds): def get_clusters_for_preview_one(region_id, next_level, mwm_size_thr): - autosplit_table = config.AUTOSPLIT_TABLE where_clause = f""" osm_border_id = %s AND mwm_size_thr = %s + AND next_level = %s """ - splitting_sql_params = (region_id, mwm_size_thr) + splitting_sql_params = (region_id, mwm_size_thr, next_level) with g.conn.cursor() as cursor: cursor.execute(f""" SELECT 1 FROM {autosplit_table} @@ -190,47 +192,54 @@ def divide_into_clusters_preview(region_ids, next_level, mwm_size_thr): def divide_into_subregions(region_ids, next_level): for region_id in region_ids: - divide_into_subregions_one(region_id, next_level) + divide_region_into_subregions(g.conn, region_id, next_level) g.conn.commit() return jsonify(status='ok') -def divide_into_subregions_one(region_id, next_level): - borders_table = config.BORDERS_TABLE - osm_table = config.OSM_TABLE - subregions = get_subregions_info(g.conn, region_id, borders_table, +def divide_region_into_subregions(conn, region_id, next_level): + """Divides a region into subregions of specified admin level. + Returns the list of added subregion ids. + """ + subregions = get_subregions_info(conn, region_id, borders_table, next_level, need_cities=False) - with g.conn.cursor() as cursor: - is_admin_region = is_administrative_region(g.conn, region_id) + if not subregions: + return [] + with conn.cursor() as cursor: + subregion_ids_str = ','.join(str(x) for x in subregions.keys()) + cursor.execute(f""" + SELECT id + FROM {borders_table} + WHERE id IN ({subregion_ids_str}) + """ + ) + occupied_ids = [rec[0] for rec in cursor] + ids_to_insert = set(subregions.keys()) - set(occupied_ids) + if not ids_to_insert: + return [] + + is_admin_region = is_administrative_region(conn, region_id) + if is_admin_region: - for subregion_id, data in subregions.items(): - cursor.execute(f""" - INSERT INTO {borders_table} - (id, geom, name, parent_id, modified, count_k, mwm_size_est) - SELECT osm_id, way, name, %s, now(), -1, {data['mwm_size_est']} - FROM {osm_table} - WHERE osm_id = %s - """, (region_id, subregion_id) - ) + parent_id = region_id else: - for subregion_id, data in subregions.items(): - cursor.execute(f""" - INSERT INTO {borders_table} - (id, geom, name, parent_id, modified, count_k, mwm_size_est) - SELECT osm_id, way, name, - (SELECT parent_id FROM {borders_table} WHERE id = %s), - now(), -1, {data['mwm_size_est']} - FROM {osm_table} - WHERE osm_id = %s - """, (region_id, subregion_id) - ) + parent_id = get_parent_region_id(conn, region_id) + + for subregion_id in ids_to_insert: + mwm_size_est = subregions[subregion_id]['mwm_size_est'] + cursor.execute(f""" + INSERT INTO {borders_table} + (id, geom, name, parent_id, modified, count_k, mwm_size_est) + SELECT osm_id, way, name, {parent_id}, now(), -1, %s + FROM {osm_table} + WHERE osm_id = %s""", (mwm_size_est, subregion_id,) + ) + if not is_admin_region: cursor.execute(f"DELETE FROM {borders_table} WHERE id = %s", (region_id,)) - g.conn.commit() + return ids_to_insert def divide_into_clusters(region_ids, next_level, mwm_size_thr): - borders_table = config.BORDERS_TABLE - autosplit_table = config.AUTOSPLIT_TABLE cursor = g.conn.cursor() insert_cursor = g.conn.cursor() for region_id in region_ids: @@ -240,8 +249,9 @@ def divide_into_clusters(region_ids, next_level, mwm_size_thr): where_clause = f""" osm_border_id = %s AND mwm_size_thr = %s + AND next_level = %s """ - splitting_sql_params = (region_id, mwm_size_thr) + splitting_sql_params = (region_id, mwm_size_thr, next_level) cursor.execute(f""" SELECT 1 FROM {autosplit_table} WHERE {where_clause} @@ -258,46 +268,52 @@ def divide_into_clusters(region_ids, next_level, mwm_size_thr): """, splitting_sql_params ) if cursor.rowcount == 1: - continue - for rec in cursor: - subregion_ids = rec[0] - cluster_id = subregion_ids[0] - if len(subregion_ids) == 1: - subregion_id = cluster_id - name = get_osm_border_name_by_osm_id(g.conn, subregion_id) - else: - counter += 1 - free_id -= 1 - subregion_id = free_id - name = f"{base_name}_{counter}" insert_cursor.execute(f""" - INSERT INTO {borders_table} (id, name, parent_id, geom, modified, count_k, mwm_size_est) - SELECT {subregion_id}, %s, osm_border_id, geom, now(), -1, mwm_size_est - FROM {autosplit_table} WHERE subregion_ids[1] = %s AND {where_clause} - """, (name, cluster_id,) + splitting_sql_params - ) + UPDATE {borders_table} + SET modified = now(), + mwm_size_est = (SELECT mwm_size_est + FROM {autosplit_table} + WHERE {where_clause}) + WHERE id = {region_id} + """, splitting_sql_params) + else: + for rec in cursor: + subregion_ids = rec[0] + cluster_id = subregion_ids[0] + if len(subregion_ids) == 1: + subregion_id = cluster_id + name = get_osm_border_name_by_osm_id(g.conn, subregion_id) + else: + counter += 1 + free_id -= 1 + subregion_id = free_id + name = f"{base_name}_{counter}" + insert_cursor.execute(f""" + INSERT INTO {borders_table} (id, name, parent_id, geom, modified, count_k, mwm_size_est) + SELECT {subregion_id}, %s, osm_border_id, geom, now(), -1, mwm_size_est + FROM {autosplit_table} WHERE subregion_ids[1] = %s AND {where_clause} + """, (name, cluster_id,) + splitting_sql_params + ) g.conn.commit() return jsonify(status='ok') def get_free_id(): with g.conn.cursor() as cursor: - borders_table = config.BORDERS_TABLE cursor.execute(f"SELECT min(id) FROM {borders_table} WHERE id < -1000000000") min_id = cursor.fetchone()[0] free_id = min_id - 1 if min_id else -1_000_000_001 return free_id -def assign_region_to_lowest_parent(region_id): +def assign_region_to_lowest_parent(conn, region_id): """Lowest parent is the region with lowest (maximum by absolute value) admin_level containing given region.""" pot_parents = find_potential_parents(region_id) if pot_parents: # potential_parents are sorted by area ascending parent_id = pot_parents[0]['properties']['id'] - borders_table = config.BORDERS_TABLE - with g.conn.cursor() as cursor: + with conn.cursor() as cursor: cursor.execute(f""" UPDATE {borders_table} SET parent_id = %s @@ -309,7 +325,6 @@ def assign_region_to_lowest_parent(region_id): def create_or_update_region(region, free_id): - borders_table = config.BORDERS_TABLE with g.conn.cursor() as cursor: if region['id'] < 0: if not free_id: @@ -323,7 +338,7 @@ def create_or_update_region(region, free_id): """, (region_id, region['name'], region['disabled'], region['wkt']) ) - assign_region_to_lowest_parent(region_id) + assign_region_to_lowest_parent(g.conn, region_id) return region_id else: cursor.execute(f"SELECT count(1) FROM {borders_table} WHERE id = %s", @@ -346,8 +361,6 @@ def create_or_update_region(region, free_id): def find_potential_parents(region_id): - borders_table = config.BORDERS_TABLE - osm_table = config.OSM_TABLE p_geogr = "geography(p.geom)" c_geogr = "geography(c.geom)" query = f""" @@ -380,3 +393,44 @@ def find_potential_parents(region_id): } parents.append(feature) return parents + + +def copy_region_from_osm(conn, region_id, name=None, parent_id='not_passed'): + errors, warnings = [], [] + with conn.cursor() as cursor: + # Check if this id already in use + cursor.execute(f"SELECT name FROM {borders_table} WHERE id = %s", + (region_id,)) + if cursor.rowcount > 0: + name = cursor.fetchone()[0] + errors.append(f"Region with id={region_id} already exists under name '{name}'") + return errors, warnings + + name_expr = f"'{name}'" if name else "name" + parent_id_expr = f"{parent_id}" if isinstance(parent_id, int) else "NULL" + cursor.execute(f""" + INSERT INTO {borders_table} + (id, geom, name, parent_id, modified, count_k) + SELECT osm_id, way, {name_expr}, {parent_id_expr}, now(), -1 + FROM {osm_table} + WHERE osm_id = %s + """, (region_id,) + ) + if parent_id == 'not_passed': + assign_region_to_lowest_parent(conn, region_id) + try: + update_border_mwm_size_estimation(conn, region_id) + except Exception as e: + warnings.append(str(e)) + return errors, warnings + + +def get_osm_border_name_by_osm_id(conn, osm_id): + with conn.cursor() as cursor: + cursor.execute(f""" + SELECT name FROM {osm_table} + WHERE osm_id = %s + """, (osm_id,)) + rec = cursor.fetchone() + return rec[0] if rec else None + diff --git a/web/app/config.py b/web/app/config.py index 432f05b..7377eed 100644 --- a/web/app/config.py +++ b/web/app/config.py @@ -33,3 +33,9 @@ # Estimated mwm size is predicted by the 'model.pkl' with 'scaler.pkl' for X MWM_SIZE_PREDICTION_MODEL_PATH = '/app/data/model.pkl' MWM_SIZE_PREDICTION_MODEL_SCALER_PATH = '/app/data/scaler.pkl' +MWM_SIZE_PREDICTION_MODEL_LIMITATIONS = { + 'area': 5500 * 1.5, + 'urban_pop': 3500000 * 1.5, + 'city_cnt': 32 * 1.5, + 'hamlet_cnt': 2120 * 1.5 +} diff --git a/web/app/countries_structure.py b/web/app/countries_structure.py index 0a0e5fe..5567cb4 100644 --- a/web/app/countries_structure.py +++ b/web/app/countries_structure.py @@ -1,12 +1,13 @@ +from borders_api_utils import ( + copy_region_from_osm, + divide_region_into_subregions, + get_osm_border_name_by_osm_id, +) from config import ( BORDERS_TABLE as borders_table, OSM_TABLE as osm_table ) from countries_division import country_initial_levels -from subregions import ( - get_subregions_info, - update_border_mwm_size_estimation, -) class CountryStructureException(Exception): @@ -16,80 +17,25 @@ class CountryStructureException(Exception): def _clear_borders(conn): with conn.cursor() as cursor: cursor.execute(f"DELETE FROM {borders_table}") - conn.commit() - - -def _find_subregions(conn, osm_ids, next_level, regions): - """Return subregions of level 'next_level' for regions with osm_ids.""" - subregion_ids = [] - for osm_id in osm_ids: - more_subregions = get_subregions_info(conn, osm_id, borders_table, - next_level, need_cities=False) - for subregion_id, subregion_data in more_subregions.items(): - region_data = regions.setdefault(subregion_id, {}) - region_data['name'] = subregion_data['name'] - region_data['mwm_size_est'] = subregion_data['mwm_size_est'] - region_data['parent_id'] = osm_id - subregion_ids.append(subregion_id) - return subregion_ids - - -def _create_regions(conn, osm_ids, regions): - if not osm_ids: - return - osm_ids = list(osm_ids) # to ensure order - sql_values = ','.join( - f'({osm_id},' - '%s,' - f"{regions[osm_id].get('parent_id', 'NULL')}," - f"{regions[osm_id].get('mwm_size_est', 'NULL')}," - f'(SELECT way FROM {osm_table} WHERE osm_id={osm_id}),' - 'now())' - for osm_id in osm_ids - ) - with conn.cursor() as cursor: - cursor.execute(f""" - INSERT INTO {borders_table} (id, name, parent_id, mwm_size_est, - geom, modified) - VALUES {sql_values} - """, tuple(regions[osm_id]['name'] for osm_id in osm_ids) - ) def _make_country_structure(conn, country_osm_id): - regions = {} # osm_id: { 'name': name, - # 'mwm_size_est': size, - # 'parent_id': parent_id } - country_name = get_osm_border_name_by_osm_id(conn, country_osm_id) - country_data = regions.setdefault(country_osm_id, {}) - country_data['name'] = country_name - # TODO: country_data['mwm_size_est'] = ... - _create_regions(conn, [country_osm_id], regions) + copy_region_from_osm(conn, country_osm_id, parent_id=None) if country_initial_levels.get(country_name): admin_levels = country_initial_levels[country_name] prev_admin_levels = [2] + admin_levels[:-1] - prev_region_ids = [country_osm_id] + prev_level_region_ids = [country_osm_id] for admin_level, prev_level in zip(admin_levels, prev_admin_levels): - if not prev_region_ids: - raise CountryStructureException( - f"Empty prev_region_ids at {country_name}, " - f"AL={admin_level}, prev-AL={prev_level}" - ) - subregion_ids = _find_subregions(conn, prev_region_ids, - admin_level, regions) - _create_regions(conn, subregion_ids, regions) - prev_region_ids = subregion_ids - warning = None - if len(regions) == 1: - try: - update_border_mwm_size_estimation(conn, country_osm_id) - except Exception as e: - warning = str(e) - return warning + current_level_region_ids = [] + for region_id in prev_level_region_ids: + subregion_ids = divide_region_into_subregions( + conn, region_id, admin_level) + current_level_region_ids.extend(subregion_ids) + prev_level_region_ids = current_level_region_ids def create_countries_initial_structure(conn): @@ -97,32 +43,15 @@ def create_countries_initial_structure(conn): with conn.cursor() as cursor: # TODO: process overlapping countries, like Ukraine and Russia with common Crimea cursor.execute(f""" - SELECT osm_id, name + SELECT osm_id FROM {osm_table} - WHERE admin_level = 2 and name != 'Ukraine' + WHERE admin_level = 2 """ ) - warnings = [] - for rec in cursor: - warning = _make_country_structure(conn, rec[0]) - if warning: - warnings.append(warning) + for country_osm_id, *_ in cursor: + _make_country_structure(conn, country_osm_id) conn.commit() - return warnings - - -def get_osm_border_name_by_osm_id(conn, osm_id): - with conn.cursor() as cursor: - cursor.execute(f""" - SELECT name FROM {osm_table} - WHERE osm_id = %s - """, (osm_id,)) - rec = cursor.fetchone() - if not rec: - raise CountryStructureException( - f'Not found region with osm_id="{osm_id}"' - ) - return rec[0] + return def _get_country_osm_id_by_name(conn, name): @@ -137,4 +66,5 @@ def _get_country_osm_id_by_name(conn, name): rec = cursor.fetchone() if not rec: raise CountryStructureException(f'Not found country "{name}"') - return int(rec[0]) + return rec[0] + diff --git a/web/app/mwm_size_predictor.py b/web/app/mwm_size_predictor.py index 4045635..c69a14b 100644 --- a/web/app/mwm_size_predictor.py +++ b/web/app/mwm_size_predictor.py @@ -6,6 +6,8 @@ class MwmSizePredictor: + factors = ('urban_pop', 'area', 'city_cnt', 'hamlet_cnt',) + def __init__(self): with open(config.MWM_SIZE_PREDICTION_MODEL_PATH, 'rb') as f: self.model = pickle.load(f) @@ -20,9 +22,9 @@ def _get_instance(cls): @classmethod def predict(cls, features_array): - """1D or 2D array of feature values for predictions. Features are - 'urban_pop', 'area', 'city_cnt', 'hamlet_cnt' as defined for the - prediction model. + """1D or 2D array of feature values for predictions. + Each feature is a list of values for factors + defined by 'cls.factors' sequence. """ X = np.array(features_array) one_prediction = (X.ndim == 1) diff --git a/web/app/static/borders.js b/web/app/static/borders.js index f407620..3ade574 100644 --- a/web/app/static/borders.js +++ b/web/app/static/borders.js @@ -316,8 +316,9 @@ function selectLayer(e) { $('#b_size').text( Math.round(props['count_k'] * BYTES_FOR_NODE / 1024 / 1024) + ' MB' ); - $('#pa_size').text(Math.round(props['mwm_size_est'] / 1024) + ' MB'); - //$('#b_nodes').text(borders[selectedId].layer.getLatLngs()[0].length); + var mwm_size_est = props['mwm_size_est']; + var mwm_size_est_text = mwm_size_est === null ? '-' : Math.round(props['mwm_size_est']/1024) + ' MB'; + $('#pa_size').text(mwm_size_est_text); $('#b_nodes').text(props['nodes']); $('#b_date').text(props['modified']); $('#b_area').text(L.Util.formatNum(props['area'] / 1000000, 2)); @@ -1114,7 +1115,7 @@ function bDivideDrawPreview(response) { var show_divide_button = (subregions.features.length > 1); if (clusters) { subregions_count_text += ', ' + clusters.features.length + ' кластеров'; - show_divide_button = (clusters.features.length > 1); + show_divide_button = (clusters.features.length > 0); } $('#d_count').text(subregions_count_text).show(); if (show_divide_button) diff --git a/web/app/subregions.py b/web/app/subregions.py index b9db6fb..5305530 100644 --- a/web/app/subregions.py +++ b/web/app/subregions.py @@ -3,6 +3,7 @@ from config import ( BORDERS_TABLE as borders_table, + MWM_SIZE_PREDICTION_MODEL_LIMITATIONS, OSM_TABLE as osm_table, OSM_PLACES_TABLE as osm_places_table, ) @@ -19,19 +20,19 @@ def get_subregions_info(conn, region_id, region_table, :return: dict {subregion_id => subregion data} including area and population info """ subregions = _get_subregions_basic_info(conn, region_id, region_table, - next_level, need_cities) - _add_population_data(conn, subregions, need_cities) - _add_mwm_size_estimation(subregions) + next_level) + _add_mwm_size_estimation(conn, subregions, need_cities) keys = ('name', 'mwm_size_est') if need_cities: keys = keys + ('cities',) - return {subregion_id: {k: subregion_data[k] for k in keys} + return {subregion_id: {k: subregion_data[k] for k in keys + if k in subregion_data} for subregion_id, subregion_data in subregions.items() } def _get_subregions_basic_info(conn, region_id, region_table, - next_level, need_cities): + next_level): cursor = conn.cursor() region_id_column, region_geom_column = ( ('id', 'geom') if region_table == borders_table else @@ -51,98 +52,109 @@ def _get_subregions_basic_info(conn, region_id, region_table, 'osm_id': rec[0], 'name': rec[1], 'area': rec[2], - 'urban_pop': 0, - 'city_cnt': 0, - 'hamlet_cnt': 0 } - if need_cities: - subregion_data['cities'] = [] subregions[rec[0]] = subregion_data return subregions def _add_population_data(conn, subregions, need_cities): - if not subregions: + """Adds population data only for subregions that are suitable + for mwm size estimation. + """ + subregion_ids = [ + s_id for s_id, s_data in subregions.items() + if s_data['area'] <= MWM_SIZE_PREDICTION_MODEL_LIMITATIONS['area'] + ] + if not subregion_ids: return - cursor = conn.cursor() - subregion_ids = ','.join(str(x) for x in subregions.keys()) - cursor.execute(f""" - SELECT b.osm_id, p.name, coalesce(p.population, 0), p.place - FROM {osm_table} b, {osm_places_table} p - WHERE b.osm_id IN ({subregion_ids}) - AND ST_Contains(b.way, p.center) - """ - ) - for subregion_id, place_name, place_population, place_type in cursor: - subregion_data = subregions[subregion_id] - if place_type in ('city', 'town'): - subregion_data['city_cnt'] += 1 - subregion_data['urban_pop'] += place_population - if need_cities: - subregion_data['cities'].append({ - 'name': place_name, - 'population': place_population - }) - else: - subregion_data['hamlet_cnt'] += 1 - - -def _add_mwm_size_estimation(subregions): - subregions_sorted = [ + + for subregion_id, data in subregions.items(): + data.update({ + 'urban_pop': 0, + 'city_cnt': 0, + 'hamlet_cnt': 0 + }) + if need_cities: + data['cities'] = [] + + subregion_ids_str = ','.join(str(x) for x in subregion_ids) + with conn.cursor() as cursor: + cursor.execute(f""" + SELECT b.osm_id, p.name, coalesce(p.population, 0), p.place + FROM {osm_table} b, {osm_places_table} p + WHERE b.osm_id IN ({subregion_ids_str}) + AND ST_Contains(b.way, p.center) + """ + ) + for subregion_id, place_name, place_population, place_type in cursor: + subregion_data = subregions[subregion_id] + if place_type in ('city', 'town'): + subregion_data['city_cnt'] += 1 + subregion_data['urban_pop'] += place_population + if need_cities: + subregion_data['cities'].append({ + 'name': place_name, + 'population': place_population + }) + else: + subregion_data['hamlet_cnt'] += 1 + + +def _add_mwm_size_estimation(conn, subregions, need_cities): + for subregion_data in subregions.values(): + subregion_data['mwm_size_est'] = None + + _add_population_data(conn, subregions, need_cities) + + subregions_to_predict = [ ( s_id, - [subregions[s_id][f] for f in - ('urban_pop', 'area', 'city_cnt', 'hamlet_cnt')] + [subregions[s_id][f] for f in MwmSizePredictor.factors] ) for s_id in sorted(subregions.keys()) + if all(subregions[s_id].get(f) is not None and + subregions[s_id][f] <= + MWM_SIZE_PREDICTION_MODEL_LIMITATIONS[f] + for f in MwmSizePredictor.factors) ] - feature_array = [x[1] for x in subregions_sorted] + if not subregions_to_predict: + return + + feature_array = [x[1] for x in subregions_to_predict] predictions = MwmSizePredictor.predict(feature_array) for subregion_id, mwm_size_prediction in zip( - (x[0] for x in subregions_sorted), + (x[0] for x in subregions_to_predict), predictions ): subregions[subregion_id]['mwm_size_est'] = mwm_size_prediction def update_border_mwm_size_estimation(conn, border_id): - cursor = conn.cursor() - cursor.execute(f""" - SELECT name, ST_Area(geography(geom))/1.0E+6 area - FROM {borders_table} - WHERE id = %s""", (border_id, )) - name, area = cursor.fetchone() - if math.isnan(area): - raise Exception(f"Area is NaN for border '{name}' ({border_id})") - border_data = { - 'area': area, - 'urban_pop': 0, - 'city_cnt': 0, - 'hamlet_cnt': 0 - } - cursor.execute(f""" - SELECT coalesce(p.population, 0), p.place - FROM {borders_table} b, {osm_places_table} p - WHERE b.id = %s - AND ST_Contains(b.geom, p.center) - """, (border_id, )) - for place_population, place_type in cursor: - if place_type in ('city', 'town'): - border_data['city_cnt'] += 1 - border_data['urban_pop'] += place_population - else: - border_data['hamlet_cnt'] += 1 - - feature_array = [ - border_data[f] for f in - ('urban_pop', 'area', 'city_cnt', 'hamlet_cnt') - ] - mwm_size_est = MwmSizePredictor.predict(feature_array) - cursor.execute(f"UPDATE {borders_table} SET mwm_size_est = %s WHERE id = %s", - (mwm_size_est, border_id)) - conn.commit() + with conn.cursor() as cursor: + cursor.execute(f""" + SELECT name, ST_Area(geography(geom))/1.0E+6 area + FROM {borders_table} + WHERE id = %s""", (border_id,)) + name, area = cursor.fetchone() + if math.isnan(area): + e = Exception(f"Area is NaN for border '{name}' ({border_id})") + raise e + border_data = { + 'area': area, + } + regions = {border_id: border_data} + _add_mwm_size_estimation(conn, regions, need_cities=False) + mwm_size_est = border_data.get('mwm_size_est') + # mwm_size_est may be None. Python's None is converted to NULL + # duging %s substitution in execute(). + cursor.execute(f""" + UPDATE {borders_table} + SET mwm_size_est = %s + WHERE id = %s + """, (mwm_size_est, border_id,)) + conn.commit() def is_administrative_region(conn, region_id): @@ -245,7 +257,7 @@ def get_similar_regions(conn, region_id, only_leaves=False): if item['admin_level'] == admin_level: similar_region_ids.append(item['id']) elif item['admin_level'] < admin_level: - children = find_osm_child_regions(item['id']) + children = find_osm_child_regions(conn, item['id']) for ch in children: q.put(ch) if only_leaves: diff --git a/web/app/templates/index.html b/web/app/templates/index.html index e595049..669416f 100644 --- a/web/app/templates/index.html +++ b/web/app/templates/index.html @@ -210,7 +210,7 @@ ожидайте...