Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Download dem file from JAXA if not provided by the user #380

Merged
merged 15 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ POSTGRES_HOST=${POSTGRES_HOST:-db}
S3_BUCKET_NAME=${S3_BUCKET_NAME:-dtm-bucket}
S3_ACCESS_KEY=${S3_ACCESS_KEY:-SAMPLEACCESSKEYFORMINIOROOT}
S3_SECRET_KEY=${S3_SECRET_KEY:-SAMPLESECRETACCESSKEYFORMINIOROOT}


### JAXA ###
JAXA_AUTH_TOKEN=${JAXA_AUTH_TOKEN:-SAMPLEJAXAACCESSTOKEN}

# Use MINIO_ENDPOINT if minioconsole exposed to internet & behind rev proxy.
# REF:https://min.io/docs/minio/linux/integrations/setup-nginx-proxy-with-minio.html
# ex: https://minio.example.net/minio/ui/
Expand Down
2 changes: 2 additions & 0 deletions src/backend/app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ def assemble_db_connection(cls, v: Optional[str], info: ValidationInfo) -> Any:
S3_BUCKET_NAME: str = "dtm-data"
S3_DOWNLOAD_ROOT: Optional[str] = None

JAXA_AUTH_TOKEN: Optional[str] = ""

ALGORITHM: str = "HS256"
ACCESS_TOKEN_EXPIRE_MINUTES: int = 60 * 60 * 24 * 1 # 1 day
REFRESH_TOKEN_EXPIRE_MINUTES: int = 60 * 60 * 24 * 8 # 8 day
Expand Down
107 changes: 107 additions & 0 deletions src/backend/app/jaxa/jaxa_coordinates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import math
from shapely.geometry import Polygon, box


def parse_polygon(coordinates):
"""
Parses the input coordinates into a Shapely Polygon object.
"""
return Polygon(coordinates)


def get_bounding_box(polygon):
"""
Returns the bounding box of the polygon as (min_lon, min_lat, max_lon, max_lat).
"""
min_lon, min_lat, max_lon, max_lat = polygon.bounds
return min_lon, min_lat, max_lon, max_lat


def get_5x5_tiles(min_lon, min_lat, max_lon, max_lat):
"""
Calculates all 5°×5Β° tiles that intersect with the bounding box.
Returns a list of tuples representing the lower-left corner of each tile.
"""
tiles = []

# Define the origin for 5x5 tiles
origin_lon = -180
origin_lat = -90

# Calculate the starting and ending indices for longitude and latitude
start_lon_idx = math.floor((min_lon - origin_lon) / 5)
end_lon_idx = math.floor((max_lon - origin_lon) / 5)
start_lat_idx = math.floor((min_lat - origin_lat) / 5)
end_lat_idx = math.floor((max_lat - origin_lat) / 5)

for lon_idx in range(start_lon_idx, end_lon_idx + 1):
for lat_idx in range(start_lat_idx, end_lat_idx + 1):
tile_lon = origin_lon + lon_idx * 5
tile_lat = origin_lat + lat_idx * 5
tiles.append((tile_lon, tile_lat))

return tiles


def get_1x1_tiles_within_5x5(tile_lon, tile_lat, polygon):
"""
For a given 5°×5Β° tile, calculates all 1°×1Β° tiles that intersect with the polygon.
Returns a list of tuples representing the lower-left corner of each 1x1 tile.
"""
tiles = []

for lon in range(int(tile_lon), int(tile_lon + 5)):
for lat in range(int(tile_lat), int(tile_lat + 5)):
tile_polygon = box(lon, lat, lon + 1, lat + 1)
if polygon.intersects(tile_polygon):
tiles.append((lon, lat))

return tiles


def format_tile_name(tile5_lon, tile5_lat, tile1_lon, tile1_lat):
"""
Formats the tile name based on the naming convention:
N015W075_N019W071
"""
# Format 5x5 tile part
lat_5 = f"{abs(tile5_lat):03d}"
lat_dir_5 = "N" if tile5_lat >= 0 else "S"
lon_5 = f"{abs(tile5_lon):03d}"
lon_dir_5 = "E" if tile5_lon >= 0 else "W"
tile5 = f"{lat_dir_5}{lat_5}{lon_dir_5}{lon_5}"

# Format 1x1 tile part
lat_1 = f"{abs(tile1_lat):03d}"
lat_dir_1 = "N" if tile1_lat >= 0 else "S"
lon_1 = f"{abs(tile1_lon):03d}"
lon_dir_1 = "E" if tile1_lon >= 0 else "W"
tile1 = f"{lat_dir_1}{lat_1}{lon_dir_1}{lon_1}"

return f"{tile5}_{tile1}"


def get_covering_tiles(polygon_geojson):
"""
Main function to get the list of tile names covering the polygon.
"""
# Parse the GeoJSON polygon
polygon = parse_polygon(polygon_geojson["coordinates"][0])

# Get bounding box
min_lon, min_lat, max_lon, max_lat = get_bounding_box(polygon)

# Get all relevant 5x5 tiles
tiles_5x5 = get_5x5_tiles(min_lon, min_lat, max_lon, max_lat)

# Initialize a set to avoid duplicates
tile_names = set()

# Iterate through each 5x5 tile and find intersecting 1x1 tiles
for tile5_lon, tile5_lat in tiles_5x5:
tiles_1x1 = get_1x1_tiles_within_5x5(tile5_lon, tile5_lat, polygon)
for tile1_lon, tile1_lat in tiles_1x1:
name = format_tile_name(tile5_lon, tile5_lat, tile1_lon, tile1_lat)
tile_names.add(name)

return sorted(tile_names)
84 changes: 84 additions & 0 deletions src/backend/app/jaxa/tif_spider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import os
import zipfile
import io
import scrapy

from osgeo import gdal
from pathlib import Path
from app.config import settings

base_dir = Path(__file__).resolve().parent


class TifSpider(scrapy.Spider):
name = "tif_spider"
allowed_domains = ["eorc.jaxa.jp"]
merged_file_path = None

def __init__(self, coordinates, *args, **kwargs):
super().__init__(*args, **kwargs)
self.tif_files = []
self.coordinates = coordinates.split(",")

headers = {
"authority": "www.eorc.jaxa.jp",
"path": "/ALOS/en/aw3d30/data/html_v2404/xml/{caption}_5_5.xml",
"method": "GET",
"accept": "application/xml, text/xml, */*; q=0.01",
"accept-encoding": "gzip, deflate, br, zstd",
"accept-language": "en-US,en;q=0.9",
"authorization": f"Basic {settings.JAXA_AUTH_TOKEN}",
"cache-control": "no-cache",
"pragma": "no-cache",
"sec-ch-ua": '"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Linux"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
"x-requested-with": "XMLHttpRequest",
}

def start_requests(self):
urls = []
for coordinate in self.coordinates:
coords = coordinate.split("_")
five_by_five, one_by_one = coords[0], coords[1]
urls.append(
f"https://www.eorc.jaxa.jp/ALOS/aw3d30/data/release_v2404/{five_by_five}/{one_by_one}.zip",
)

for url in urls:
yield scrapy.Request(url=url, callback=self.parse)

def parse(self, response):
temp_dir = os.path.join(os.getcwd(), "temp")
os.makedirs(temp_dir, exist_ok=True)
try:
with zipfile.ZipFile(io.BytesIO(response.body)) as zip_file:
for file_name in zip_file.namelist():
if file_name.endswith("DSM.tif"):
# Save .tif file into the temp directory
temp_path = os.path.join(temp_dir, os.path.basename(file_name))
with zip_file.open(file_name) as tif_file:
with open(temp_path, "wb") as out_file:
out_file.write(tif_file.read())
self.tif_files.append(temp_path)
except Exception:
pass

def closed(self, reason):
if self.tif_files:
self.merged_file_path = self.merge_tiles()

def merge_tiles(self):
vrt_file = "merged.vrt"
gdal.BuildVRT(vrt_file, self.tif_files)
output_file = str(base_dir / "merged.tif")

gdal.Translate(output_file, vrt_file)
for file in self.tif_files:
os.remove(file)
os.remove(vrt_file)
return output_file
74 changes: 74 additions & 0 deletions src/backend/app/jaxa/upload_dem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import os
import io
import asyncio
import multiprocessing
from pathlib import Path

from fastapi import UploadFile
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings


from app.projects import project_logic
from app.jaxa.jaxa_coordinates import get_covering_tiles
from app.jaxa.tif_spider import TifSpider
from app.db import database

base_dir = Path(__file__).resolve().parent


def run_crawler_and_upload(coordinates_str: str, tif_file_path: str, project_id):
"""
Runs the Scrapy crawler and uploads the DEM file to S3.
"""
try:
# Initialize and start the Scrapy crawler
process = CrawlerProcess(get_project_settings())
process.crawl(TifSpider, coordinates=coordinates_str)
process.start()

asyncio.run(upload_dem_file_s3_sync(tif_file_path, project_id))

except Exception:
pass


async def upload_dem_file_s3_sync(tif_file_path: str, project_id):
"""
Synchronously uploads the DEM file to S3 and updates the database.
"""
try:
with open(tif_file_path, "rb") as dem_file:
file_bytes = dem_file.read()
file_obj = io.BytesIO(file_bytes)
dem = UploadFile(file=file_obj, filename="dem.tif")

dem_url = await project_logic.upload_file_to_s3(project_id, dem, "dem.tif")

pool = await database.get_db_connection_pool()
async with pool as pool_instance:
async with pool_instance.connection() as conn:
await project_logic.update_url(conn, project_id, dem_url)

os.remove(tif_file_path)
except Exception:
pass


async def upload_dem_file(geometry, project_id):
"""
Initiates the DEM file fetching and uploading process.
"""
tiles = get_covering_tiles(geometry)
tif_file_path = str(base_dir / "merged.tif")
coordinates_str = ",".join(tiles)

try:
p = multiprocessing.Process(
target=run_crawler_and_upload,
args=(coordinates_str, tif_file_path, project_id),
)
p.start()

except Exception:
pass
5 changes: 5 additions & 0 deletions src/backend/app/projects/project_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
send_project_approval_email_to_regulator,
)
from app.users import user_schemas
from app.jaxa.upload_dem import upload_dem_file
from minio.deleteobjects import DeleteObject
from drone_flightplan import waypoints, add_elevation_from_dem

Expand Down Expand Up @@ -229,6 +230,10 @@ async def create_project(
project_info.name,
)

if project_info.is_terrain_follow and not dem:
geometry = project_info.outline["features"][0]["geometry"]
background_tasks.add_task(upload_dem_file, geometry, project_id)

return {"message": "Project successfully created", "project_id": project_id}


Expand Down
3 changes: 2 additions & 1 deletion src/backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ dependencies = [
"pyodm>=1.5.11",
"asgiref>=3.8.1",
"bcrypt>=4.2.1",
"drone-flightplan>=0.3.3",
"drone-flightplan>=0.3.2",
"Scrapy==2.12.0",
]
requires-python = ">=3.11"
license = {text = "GPL-3.0-only"}
Expand Down
Loading
Loading