diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/exploratory/DataBalanceTestBase.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/exploratory/DataBalanceTestBase.scala index 2e72cfa300..051aeedbd9 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/exploratory/DataBalanceTestBase.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/exploratory/DataBalanceTestBase.scala @@ -15,7 +15,7 @@ trait DataBalanceTestBase extends TestBase { import spark.implicits._ - lazy val errorTolerance: Double = 1e-12 + lazy val errorTolerance: Double = 1e-8 lazy val featureProbCol = "featureProb" lazy val positiveFeatureCountCol = "positiveFeatureCount" diff --git a/notebooks/features/opencv/OpenCV - Pipeline Image Transformations.ipynb b/notebooks/features/opencv/OpenCV - Pipeline Image Transformations.ipynb index 34adfcbdc9..c22be723ee 100644 --- a/notebooks/features/opencv/OpenCV - Pipeline Image Transformations.ipynb +++ b/notebooks/features/opencv/OpenCV - Pipeline Image Transformations.ipynb @@ -57,8 +57,10 @@ "metadata": {}, "outputs": [], "source": [ + "import time\n", "imageStream = spark.readStream.image().load(imageDir)\n", "query = imageStream.select(\"image.height\").writeStream.format(\"memory\").queryName(\"heights\").start()\n", + "time.sleep(3)\n", "print(\"Streaming query activity: {}\".format(query.isActive))" ] }, @@ -120,7 +122,7 @@ "outputs": [], "source": [ "from PIL import Image\n", - "\n", + "import matplotlib.pyplot as plt\n", "data = images.take(3) # take first three rows of the dataframe\n", "im = data[2][0] # the image is in the first column of a given row\n", "\n", @@ -129,8 +131,8 @@ "print(\"height: {}, width: {}, OpenCV type: {}\".format(im.height, im.width, im.mode))\n", "\n", "arr = toNDArray(im) # convert to numpy array\n", - "Image.fromarray(arr, \"RGB\") # display the image inside notebook\n", - "print(images.count())" + "print(images.count())\n", + "plt.imshow(Image.fromarray(arr, \"RGB\")) # display the image inside notebook\n" ] }, { @@ -157,7 +159,7 @@ "small = tr.transform(images).select(\"transformed\")\n", "\n", "im = small.take(3)[2][0] # take third image\n", - "Image.fromarray(toNDArray(im), \"RGB\") # display the image inside notebook" + "plt.imshow(Image.fromarray(toNDArray(im), \"RGB\")) # display the image inside notebook" ] }, { @@ -188,7 +190,7 @@ "noblue = small.withColumn(\"noblue\", noBlueUDF(small[\"transformed\"])).select(\"noblue\")\n", "\n", "im = noblue.take(3)[2][0] # take second image\n", - "Image.fromarray(toNDArray(im), \"RGB\") # display the image inside notebook" + "plt.imshow(Image.fromarray(toNDArray(im), \"RGB\")) # display the image inside notebook" ] }, { diff --git a/notebooks/features/other/HyperParameterTuning - Fighting Breast Cancer.ipynb b/notebooks/features/other/HyperParameterTuning - Fighting Breast Cancer.ipynb index 075b777b51..cf4ec397e3 100644 --- a/notebooks/features/other/HyperParameterTuning - Fighting Breast Cancer.ipynb +++ b/notebooks/features/other/HyperParameterTuning - Fighting Breast Cancer.ipynb @@ -33,7 +33,7 @@ "metadata": {}, "outputs": [], "source": [ - "data = spark.read.parquet(\"wasbs://publicwasb@mmlspark.blob.core.windows.net/BreastCancer.parquet\")\n", + "data = spark.read.parquet(\"wasbs://publicwasb@mmlspark.blob.core.windows.net/BreastCancer.parquet\").cache()\n", "tune, test = data.randomSplit([0.80, 0.20])\n", "tune.limit(10).toPandas()" ] diff --git a/notebooks/features/responsible_ai/Interpretability - Explanation Dashboard.ipynb b/notebooks/features/responsible_ai/Interpretability - Explanation Dashboard.ipynb index a86960cf0d..16066f369c 100644 --- a/notebooks/features/responsible_ai/Interpretability - Explanation Dashboard.ipynb +++ b/notebooks/features/responsible_ai/Interpretability - Explanation Dashboard.ipynb @@ -73,7 +73,7 @@ }, "outputs": [], "source": [ - "df = spark.read.parquet(\"wasbs://publicwasb@mmlspark.blob.core.windows.net/AdultCensusIncome.parquet\")\n", + "df = spark.read.parquet(\"wasbs://publicwasb@mmlspark.blob.core.windows.net/AdultCensusIncome.parquet\").cache()\n", "\n", "labelIndexer = StringIndexer(inputCol=\"income\", outputCol=\"label\", stringOrderType=\"alphabetAsc\").fit(df)\n", "print(\"Label index assigment: \" + str(set(zip(labelIndexer.labels, [0, 1]))))\n", @@ -427,4 +427,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/notebooks/features/responsible_ai/Interpretability - Tabular SHAP explainer.ipynb b/notebooks/features/responsible_ai/Interpretability - Tabular SHAP explainer.ipynb index 0f72094d9c..8f37624e17 100644 --- a/notebooks/features/responsible_ai/Interpretability - Tabular SHAP explainer.ipynb +++ b/notebooks/features/responsible_ai/Interpretability - Tabular SHAP explainer.ipynb @@ -76,7 +76,7 @@ "labelIndexer = StringIndexer(inputCol=\"income\", outputCol=\"label\", stringOrderType=\"alphabetAsc\").fit(df)\n", "print(\"Label index assigment: \" + str(set(zip(labelIndexer.labels, [0, 1]))))\n", "\n", - "training = labelIndexer.transform(df)\n", + "training = labelIndexer.transform(df).cache()\n", "display(training)\n", "categorical_features = [\n", " \"workclass\",\n", @@ -318,4 +318,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/notebooks/features/responsible_ai/Interpretability - Text Explainers.ipynb b/notebooks/features/responsible_ai/Interpretability - Text Explainers.ipynb index 281ace06f3..83bed19f21 100644 --- a/notebooks/features/responsible_ai/Interpretability - Text Explainers.ipynb +++ b/notebooks/features/responsible_ai/Interpretability - Text Explainers.ipynb @@ -74,9 +74,10 @@ " spark.read.parquet(\"wasbs://publicwasb@mmlspark.blob.core.windows.net/BookReviewsFromAmazon10K.parquet\")\n", " .withColumn(\"label\", (col(\"rating\") > 3).cast(LongType()))\n", " .select(\"label\", \"text\")\n", + " .cache()\n", ")\n", "\n", - "data.limit(10).toPandas()" + "display(data)" ] }, { @@ -279,4 +280,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file