diff --git a/python/mosaic/models/analyzer/__init__.py b/python/mosaic/models/analyzer/__init__.py new file mode 100644 index 000000000..6dfb1347f --- /dev/null +++ b/python/mosaic/models/analyzer/__init__.py @@ -0,0 +1 @@ +from .analyzer import MosaicAnalyzer diff --git a/python/mosaic/models/analyzer/analyzer.py b/python/mosaic/models/analyzer/analyzer.py new file mode 100644 index 000000000..f23f96f36 --- /dev/null +++ b/python/mosaic/models/analyzer/analyzer.py @@ -0,0 +1,38 @@ +from pyspark.sql import SparkSession, DataFrame, SQLContext +from typing import * + + +class MosaicAnalyzer: + """ + MosaicAnalyzer is a class that provides the ability to analyze spatial data + and provide insights into the optimal resolution for the given dataset. + This only works for geometries that have area > 0. + """ + + def __init__(self, dataframe: DataFrame): + """ + Initialize the SpatialKNN model. + """ + + self.spark = SparkSession.builder.getOrCreate() + self.model = getattr( + self.spark._jvm.com.databricks.labs.mosaic.sql, "MosaicAnalyzer" + )(dataframe._jdf) + + def get_optimal_resolution(self, geometry_column: str): + """ + Get the optimal resolution for the given dataset. + """ + return self.model.getOptimalResolution(geometry_column) + + def get_optimal_resolution(self, geometry_column: str, nrows: int): + """ + Get the optimal resolution for the given dataset. + """ + return self.model.getOptimalResolution(geometry_column, nrows) + + def get_optimal_resolution(self, geometry_column: str, sample: float): + """ + Get the optimal resolution for the given dataset. + """ + return self.model.getOptimalResolution(geometry_column, sample)