-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: auto add dem file if not provided by the user
- Loading branch information
1 parent
40125c4
commit cae4a94
Showing
8 changed files
with
682 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
import os | ||
import zipfile | ||
import io | ||
import scrapy | ||
|
||
from osgeo import gdal | ||
from pathlib import Path | ||
from app.config import settings | ||
|
||
base_dir = Path(__file__).resolve().parent | ||
|
||
|
||
class TifSpider(scrapy.Spider): | ||
name = "tif_spider" | ||
allowed_domains = ["eorc.jaxa.jp"] | ||
merged_file_path = None | ||
|
||
def __init__(self, coordinates, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
self.tif_files = [] | ||
self.coordinates = coordinates.split(",") | ||
|
||
headers = { | ||
"authority": "www.eorc.jaxa.jp", | ||
"path": "/ALOS/en/aw3d30/data/html_v2404/xml/{caption}_5_5.xml", | ||
"method": "GET", | ||
"accept": "application/xml, text/xml, */*; q=0.01", | ||
"accept-encoding": "gzip, deflate, br, zstd", | ||
"accept-language": "en-US,en;q=0.9", | ||
"authorization": f"Basic {settings.JAXA_AUTH_TOKEN}", | ||
"cache-control": "no-cache", | ||
"pragma": "no-cache", | ||
"sec-ch-ua": '"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"', | ||
"sec-ch-ua-mobile": "?0", | ||
"sec-ch-ua-platform": '"Linux"', | ||
"sec-fetch-dest": "empty", | ||
"sec-fetch-mode": "cors", | ||
"sec-fetch-site": "same-origin", | ||
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36", | ||
"x-requested-with": "XMLHttpRequest", | ||
} | ||
|
||
def start_requests(self): | ||
urls = [] | ||
for coordinate in self.coordinates: | ||
coords = coordinate.split("_") | ||
five_by_five, one_by_one = coords[0], coords[1] | ||
urls.append( | ||
f"https://www.eorc.jaxa.jp/ALOS/aw3d30/data/release_v2404/{five_by_five}/{one_by_one}.zip", | ||
) | ||
|
||
for url in urls: | ||
yield scrapy.Request(url=url, callback=self.parse) | ||
|
||
def parse(self, response): | ||
temp_dir = os.path.join(os.getcwd(), "temp") | ||
os.makedirs(temp_dir, exist_ok=True) | ||
try: | ||
with zipfile.ZipFile(io.BytesIO(response.body)) as zip_file: | ||
self.log(f"Extracted files: {zip_file.namelist()}") | ||
for file_name in zip_file.namelist(): | ||
if file_name.endswith("DSM.tif"): | ||
# Save .tif file into the temp directory | ||
temp_path = os.path.join(temp_dir, os.path.basename(file_name)) | ||
with zip_file.open(file_name) as tif_file: | ||
with open(temp_path, "wb") as out_file: | ||
out_file.write(tif_file.read()) | ||
self.tif_files.append(temp_path) | ||
except Exception: | ||
pass | ||
|
||
def closed(self, reason): | ||
if self.tif_files: | ||
self.merged_file_path = self.merge_tiles() | ||
|
||
def merge_tiles(self): | ||
vrt_file = "merged.vrt" | ||
gdal.BuildVRT(vrt_file, self.tif_files) | ||
output_file = str(base_dir / "merged.tif") | ||
|
||
gdal.Translate(output_file, vrt_file) | ||
for file in self.tif_files: | ||
os.remove(file) | ||
os.remove(vrt_file) | ||
return output_file |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
import math | ||
from shapely.geometry import Polygon, box | ||
|
||
|
||
def parse_polygon(coordinates): | ||
""" | ||
Parses the input coordinates into a Shapely Polygon object. | ||
""" | ||
return Polygon(coordinates) | ||
|
||
|
||
def get_bounding_box(polygon): | ||
""" | ||
Returns the bounding box of the polygon as (min_lon, min_lat, max_lon, max_lat). | ||
""" | ||
min_lon, min_lat, max_lon, max_lat = polygon.bounds | ||
return min_lon, min_lat, max_lon, max_lat | ||
|
||
|
||
def get_5x5_tiles(min_lon, min_lat, max_lon, max_lat): | ||
""" | ||
Calculates all 5°×5° tiles that intersect with the bounding box. | ||
Returns a list of tuples representing the lower-left corner of each tile. | ||
""" | ||
tiles = [] | ||
|
||
# Define the origin for 5x5 tiles | ||
origin_lon = -180 | ||
origin_lat = -90 | ||
|
||
# Calculate the starting and ending indices for longitude and latitude | ||
start_lon_idx = math.floor((min_lon - origin_lon) / 5) | ||
end_lon_idx = math.floor((max_lon - origin_lon) / 5) | ||
start_lat_idx = math.floor((min_lat - origin_lat) / 5) | ||
end_lat_idx = math.floor((max_lat - origin_lat) / 5) | ||
|
||
for lon_idx in range(start_lon_idx, end_lon_idx + 1): | ||
for lat_idx in range(start_lat_idx, end_lat_idx + 1): | ||
tile_lon = origin_lon + lon_idx * 5 | ||
tile_lat = origin_lat + lat_idx * 5 | ||
tiles.append((tile_lon, tile_lat)) | ||
|
||
return tiles | ||
|
||
|
||
def get_1x1_tiles_within_5x5(tile_lon, tile_lat, polygon): | ||
""" | ||
For a given 5°×5° tile, calculates all 1°×1° tiles that intersect with the polygon. | ||
Returns a list of tuples representing the lower-left corner of each 1x1 tile. | ||
""" | ||
tiles = [] | ||
|
||
for lon in range(int(tile_lon), int(tile_lon + 5)): | ||
for lat in range(int(tile_lat), int(tile_lat + 5)): | ||
tile_polygon = box(lon, lat, lon + 1, lat + 1) | ||
if polygon.intersects(tile_polygon): | ||
tiles.append((lon, lat)) | ||
|
||
return tiles | ||
|
||
|
||
def format_tile_name(tile5_lon, tile5_lat, tile1_lon, tile1_lat): | ||
""" | ||
Formats the tile name based on the naming convention: | ||
N015W075_N019W071 | ||
""" | ||
# Format 5x5 tile part | ||
lat_5 = f"{abs(tile5_lat):03d}" | ||
lat_dir_5 = "N" if tile5_lat >= 0 else "S" | ||
lon_5 = f"{abs(tile5_lon):03d}" | ||
lon_dir_5 = "E" if tile5_lon >= 0 else "W" | ||
tile5 = f"{lat_dir_5}{lat_5}{lon_dir_5}{lon_5}" | ||
|
||
# Format 1x1 tile part | ||
lat_1 = f"{abs(tile1_lat):03d}" | ||
lat_dir_1 = "N" if tile1_lat >= 0 else "S" | ||
lon_1 = f"{abs(tile1_lon):03d}" | ||
lon_dir_1 = "E" if tile1_lon >= 0 else "W" | ||
tile1 = f"{lat_dir_1}{lat_1}{lon_dir_1}{lon_1}" | ||
|
||
return f"{tile5}_{tile1}" | ||
|
||
|
||
def get_covering_tiles(polygon_geojson): | ||
""" | ||
Main function to get the list of tile names covering the polygon. | ||
""" | ||
# Parse the GeoJSON polygon | ||
polygon = parse_polygon(polygon_geojson["coordinates"][0]) | ||
|
||
# Get bounding box | ||
min_lon, min_lat, max_lon, max_lat = get_bounding_box(polygon) | ||
|
||
# Get all relevant 5x5 tiles | ||
tiles_5x5 = get_5x5_tiles(min_lon, min_lat, max_lon, max_lat) | ||
|
||
# Initialize a set to avoid duplicates | ||
tile_names = set() | ||
|
||
# Iterate through each 5x5 tile and find intersecting 1x1 tiles | ||
for tile5_lon, tile5_lat in tiles_5x5: | ||
tiles_1x1 = get_1x1_tiles_within_5x5(tile5_lon, tile5_lat, polygon) | ||
for tile1_lon, tile1_lat in tiles_1x1: | ||
name = format_tile_name(tile5_lon, tile5_lat, tile1_lon, tile1_lat) | ||
tile_names.add(name) | ||
|
||
return sorted(tile_names) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
import os | ||
import io | ||
|
||
from fastapi import UploadFile, BackgroundTasks | ||
from pathlib import Path | ||
from psycopg import Connection | ||
|
||
|
||
from scrapy.crawler import CrawlerProcess | ||
from scrapy.utils.project import get_project_settings | ||
|
||
from app.projects import project_logic | ||
from app.jaxa_coordinates import get_covering_tiles | ||
from app.jaxa.tif_spider import TifSpider | ||
|
||
|
||
base_dir = Path(__file__).resolve().parent | ||
|
||
|
||
def upload_dem_file( | ||
db: Connection, geometry, project_id, background_tasks: BackgroundTasks | ||
): | ||
""" | ||
Fetch the dem file from the scrapy and pass it to store in the db in background | ||
Args: | ||
db (Connection): _description_ | ||
geometry (_type_): _description_ | ||
project_id (_type_): _description_ | ||
background_tasks (BackgroundTasks): _description_ | ||
""" | ||
|
||
tiles = get_covering_tiles(geometry) | ||
tif_file_path = str(base_dir / "jaxa/merged.tif") | ||
|
||
coordinates_str = ",".join(tiles) | ||
process = CrawlerProcess(get_project_settings()) | ||
|
||
crawler = process.create_crawler(TifSpider) | ||
|
||
process.crawl(crawler, coordinates=coordinates_str) | ||
|
||
try: | ||
process.start() | ||
print("Scrapy executed successfully.") | ||
|
||
## store the file in db | ||
background_tasks.add_task(upload_dem_file_s3, db, tif_file_path, project_id) | ||
except Exception as e: | ||
print(f"Scrapy execution failed: {str(e)}") | ||
|
||
|
||
async def upload_dem_file_s3(db: Connection, tif_file_path, project_id): | ||
""" | ||
Upload the dem file in the db | ||
Args: | ||
db (Connection): _description_ | ||
tif_file_path (_type_): _description_ | ||
project_id (_type_): _description_ | ||
""" | ||
with open(tif_file_path, "rb") as dem_file: | ||
file_bytes = dem_file.read() | ||
file_obj = io.BytesIO(file_bytes) # Create an in-memory file-like object | ||
dem = UploadFile(file=file_obj, filename="dem.tif") | ||
|
||
dem_url = await project_logic.upload_file_to_s3(project_id, dem, "dem.tif") | ||
|
||
await project_logic.update_url(db, project_id, dem_url) | ||
os.remove(tif_file_path) | ||
print("Removed file successfully") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.