Skip to content

Commit

Permalink
Merge pull request #541 from databrickslabs/feature/python_analyzer
Browse files Browse the repository at this point in the history
Add python bindings for MosaicAnalyzer.
  • Loading branch information
Milos Colic authored Mar 15, 2024
2 parents fc7bbfb + 7918a29 commit e1d396f
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 0 deletions.
1 change: 1 addition & 0 deletions python/mosaic/models/analyzer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .analyzer import MosaicAnalyzer
38 changes: 38 additions & 0 deletions python/mosaic/models/analyzer/analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from pyspark.sql import SparkSession, DataFrame, SQLContext
from typing import *


class MosaicAnalyzer:
"""
MosaicAnalyzer is a class that provides the ability to analyze spatial data
and provide insights into the optimal resolution for the given dataset.
This only works for geometries that have area > 0.
"""

def __init__(self, dataframe: DataFrame):
"""
Initialize the SpatialKNN model.
"""

self.spark = SparkSession.builder.getOrCreate()
self.model = getattr(
self.spark._jvm.com.databricks.labs.mosaic.sql, "MosaicAnalyzer"
)(dataframe._jdf)

def get_optimal_resolution(self, geometry_column: str):
"""
Get the optimal resolution for the given dataset.
"""
return self.model.getOptimalResolution(geometry_column)

def get_optimal_resolution(self, geometry_column: str, nrows: int):
"""
Get the optimal resolution for the given dataset.
"""
return self.model.getOptimalResolution(geometry_column, nrows)

def get_optimal_resolution(self, geometry_column: str, sample: float):
"""
Get the optimal resolution for the given dataset.
"""
return self.model.getOptimalResolution(geometry_column, sample)

0 comments on commit e1d396f

Please sign in to comment.