Skip to content

Commit

Permalink
feat: auto add dem file if not provided by the user
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
aryalsaurav committed Dec 5, 2024
1 parent 40125c4 commit 1f0baf5
Show file tree
Hide file tree
Showing 8 changed files with 678 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ POSTGRES_HOST=${POSTGRES_HOST:-db}
S3_BUCKET_NAME=${S3_BUCKET_NAME:-dtm-bucket}
S3_ACCESS_KEY=${S3_ACCESS_KEY:-SAMPLEACCESSKEYFORMINIOROOT}
S3_SECRET_KEY=${S3_SECRET_KEY:-SAMPLESECRETACCESSKEYFORMINIOROOT}


### JAXA ###
JAXA_AUTH_TOKEN=${JAXA_AUTH_TOKEN:-SAMPLEJAXAACCESSTOKEN}

# Use MINIO_ENDPOINT if minioconsole exposed to internet & behind rev proxy.
# REF:https://min.io/docs/minio/linux/integrations/setup-nginx-proxy-with-minio.html
# ex: https://minio.example.net/minio/ui/
Expand Down
2 changes: 2 additions & 0 deletions src/backend/app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ def assemble_db_connection(cls, v: Optional[str], info: ValidationInfo) -> Any:
S3_BUCKET_NAME: str = "dtm-data"
S3_DOWNLOAD_ROOT: Optional[str] = None

JAXA_AUTH_TOKEN: Optional[str] = ""

ALGORITHM: str = "HS256"
ACCESS_TOKEN_EXPIRE_MINUTES: int = 60 * 60 * 24 * 1 # 1 day
REFRESH_TOKEN_EXPIRE_MINUTES: int = 60 * 60 * 24 * 8 # 8 day
Expand Down
107 changes: 107 additions & 0 deletions src/backend/app/jaxa/jaxa_coordinates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import math
from shapely.geometry import Polygon, box


def parse_polygon(coordinates):
"""
Parses the input coordinates into a Shapely Polygon object.
"""
return Polygon(coordinates)


def get_bounding_box(polygon):
"""
Returns the bounding box of the polygon as (min_lon, min_lat, max_lon, max_lat).
"""
min_lon, min_lat, max_lon, max_lat = polygon.bounds
return min_lon, min_lat, max_lon, max_lat


def get_5x5_tiles(min_lon, min_lat, max_lon, max_lat):
"""
Calculates all 5°×5° tiles that intersect with the bounding box.
Returns a list of tuples representing the lower-left corner of each tile.
"""
tiles = []

# Define the origin for 5x5 tiles
origin_lon = -180
origin_lat = -90

# Calculate the starting and ending indices for longitude and latitude
start_lon_idx = math.floor((min_lon - origin_lon) / 5)
end_lon_idx = math.floor((max_lon - origin_lon) / 5)
start_lat_idx = math.floor((min_lat - origin_lat) / 5)
end_lat_idx = math.floor((max_lat - origin_lat) / 5)

for lon_idx in range(start_lon_idx, end_lon_idx + 1):
for lat_idx in range(start_lat_idx, end_lat_idx + 1):
tile_lon = origin_lon + lon_idx * 5
tile_lat = origin_lat + lat_idx * 5
tiles.append((tile_lon, tile_lat))

return tiles


def get_1x1_tiles_within_5x5(tile_lon, tile_lat, polygon):
"""
For a given 5°×5° tile, calculates all 1°×1° tiles that intersect with the polygon.
Returns a list of tuples representing the lower-left corner of each 1x1 tile.
"""
tiles = []

for lon in range(int(tile_lon), int(tile_lon + 5)):
for lat in range(int(tile_lat), int(tile_lat + 5)):
tile_polygon = box(lon, lat, lon + 1, lat + 1)
if polygon.intersects(tile_polygon):
tiles.append((lon, lat))

return tiles


def format_tile_name(tile5_lon, tile5_lat, tile1_lon, tile1_lat):
"""
Formats the tile name based on the naming convention:
N015W075_N019W071
"""
# Format 5x5 tile part
lat_5 = f"{abs(tile5_lat):03d}"
lat_dir_5 = "N" if tile5_lat >= 0 else "S"
lon_5 = f"{abs(tile5_lon):03d}"
lon_dir_5 = "E" if tile5_lon >= 0 else "W"
tile5 = f"{lat_dir_5}{lat_5}{lon_dir_5}{lon_5}"

# Format 1x1 tile part
lat_1 = f"{abs(tile1_lat):03d}"
lat_dir_1 = "N" if tile1_lat >= 0 else "S"
lon_1 = f"{abs(tile1_lon):03d}"
lon_dir_1 = "E" if tile1_lon >= 0 else "W"
tile1 = f"{lat_dir_1}{lat_1}{lon_dir_1}{lon_1}"

return f"{tile5}_{tile1}"


def get_covering_tiles(polygon_geojson):
"""
Main function to get the list of tile names covering the polygon.
"""
# Parse the GeoJSON polygon
polygon = parse_polygon(polygon_geojson["coordinates"][0])

# Get bounding box
min_lon, min_lat, max_lon, max_lat = get_bounding_box(polygon)

# Get all relevant 5x5 tiles
tiles_5x5 = get_5x5_tiles(min_lon, min_lat, max_lon, max_lat)

# Initialize a set to avoid duplicates
tile_names = set()

# Iterate through each 5x5 tile and find intersecting 1x1 tiles
for tile5_lon, tile5_lat in tiles_5x5:
tiles_1x1 = get_1x1_tiles_within_5x5(tile5_lon, tile5_lat, polygon)
for tile1_lon, tile1_lat in tiles_1x1:
name = format_tile_name(tile5_lon, tile5_lat, tile1_lon, tile1_lat)
tile_names.add(name)

return sorted(tile_names)
84 changes: 84 additions & 0 deletions src/backend/app/jaxa/tif_spider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import os
import zipfile
import io
import scrapy

from osgeo import gdal
from pathlib import Path
from app.config import settings

base_dir = Path(__file__).resolve().parent


class TifSpider(scrapy.Spider):
name = "tif_spider"
allowed_domains = ["eorc.jaxa.jp"]
merged_file_path = None

def __init__(self, coordinates, *args, **kwargs):
super().__init__(*args, **kwargs)
self.tif_files = []
self.coordinates = coordinates.split(",")

headers = {
"authority": "www.eorc.jaxa.jp",
"path": "/ALOS/en/aw3d30/data/html_v2404/xml/{caption}_5_5.xml",
"method": "GET",
"accept": "application/xml, text/xml, */*; q=0.01",
"accept-encoding": "gzip, deflate, br, zstd",
"accept-language": "en-US,en;q=0.9",
"authorization": f"Basic {settings.JAXA_AUTH_TOKEN}",
"cache-control": "no-cache",
"pragma": "no-cache",
"sec-ch-ua": '"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Linux"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
"x-requested-with": "XMLHttpRequest",
}

def start_requests(self):
urls = []
for coordinate in self.coordinates:
coords = coordinate.split("_")
five_by_five, one_by_one = coords[0], coords[1]
urls.append(
f"https://www.eorc.jaxa.jp/ALOS/aw3d30/data/release_v2404/{five_by_five}/{one_by_one}.zip",
)

for url in urls:
yield scrapy.Request(url=url, callback=self.parse)

def parse(self, response):
temp_dir = os.path.join(os.getcwd(), "temp")
os.makedirs(temp_dir, exist_ok=True)
try:
with zipfile.ZipFile(io.BytesIO(response.body)) as zip_file:
for file_name in zip_file.namelist():
if file_name.endswith("DSM.tif"):
# Save .tif file into the temp directory
temp_path = os.path.join(temp_dir, os.path.basename(file_name))
with zip_file.open(file_name) as tif_file:
with open(temp_path, "wb") as out_file:
out_file.write(tif_file.read())
self.tif_files.append(temp_path)
except Exception:
pass

def closed(self, reason):
if self.tif_files:
self.merged_file_path = self.merge_tiles()

def merge_tiles(self):
vrt_file = "merged.vrt"
gdal.BuildVRT(vrt_file, self.tif_files)
output_file = str(base_dir / "merged.tif")

gdal.Translate(output_file, vrt_file)
for file in self.tif_files:
os.remove(file)
os.remove(vrt_file)
return output_file
68 changes: 68 additions & 0 deletions src/backend/app/jaxa/upload_dem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import os
import io

from fastapi import UploadFile, BackgroundTasks
from pathlib import Path
from psycopg import Connection


from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings

from app.projects import project_logic
from app.jaxa.jaxa_coordinates import get_covering_tiles
from app.jaxa.tif_spider import TifSpider


base_dir = Path(__file__).resolve().parent


def upload_dem_file(
db: Connection, geometry, project_id, background_tasks: BackgroundTasks
):
"""
Fetch the dem file from the scrapy and pass it to store in the db in background
Args:
db (Connection): _description_
geometry (_type_): _description_
project_id (_type_): _description_
background_tasks (BackgroundTasks): _description_
"""

tiles = get_covering_tiles(geometry)
tif_file_path = str(base_dir / "merged.tif")

coordinates_str = ",".join(tiles)
process = CrawlerProcess(get_project_settings())

crawler = process.create_crawler(TifSpider)

process.crawl(crawler, coordinates=coordinates_str)

try:
process.start()
## store the file in db
background_tasks.add_task(upload_dem_file_s3, db, tif_file_path, project_id)
except Exception as e:
print(f"Scrapy execution failed: {str(e)}")


async def upload_dem_file_s3(db: Connection, tif_file_path, project_id):
"""
Upload the dem file in the db
Args:
db (Connection): _description_
tif_file_path (_type_): _description_
project_id (_type_): _description_
"""
with open(tif_file_path, "rb") as dem_file:
file_bytes = dem_file.read()
file_obj = io.BytesIO(file_bytes) # Create an in-memory file-like object
dem = UploadFile(file=file_obj, filename="dem.tif")

dem_url = await project_logic.upload_file_to_s3(project_id, dem, "dem.tif")

await project_logic.update_url(db, project_id, dem_url)
os.remove(tif_file_path)
7 changes: 7 additions & 0 deletions src/backend/app/projects/project_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
send_project_approval_email_to_regulator,
)
from app.users import user_schemas
from app.jaxa.upload_dem import upload_dem_file
from minio.deleteobjects import DeleteObject
from drone_flightplan import waypoints

Expand Down Expand Up @@ -228,6 +229,12 @@ async def create_project(
project_info.name,
)

if project_info.is_terrain_follow and not dem:
geometry = project_info.outline["features"][0]["geometry"]
background_tasks.add_task(
upload_dem_file, db, geometry, project_id, background_tasks
)

return {"message": "Project successfully created", "project_id": project_id}


Expand Down
1 change: 1 addition & 0 deletions src/backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ dependencies = [
"asgiref>=3.8.1",
"bcrypt>=4.2.1",
"drone-flightplan>=0.3.2",
"Scrapy==2.12.0",
]
requires-python = ">=3.11"
license = {text = "GPL-3.0-only"}
Expand Down
Loading

0 comments on commit 1f0baf5

Please sign in to comment.