From a1ddb5692558b942e7e6d59854d2a4b3fd94a361 Mon Sep 17 00:00:00 2001
From: Mark Hamilton <mhamilton723@gmail.com>
Date: Wed, 12 Jan 2022 15:42:50 -0500
Subject: [PATCH] fix: fix flaky tests (#1342)

---
 .../synapse/ml/exploratory/DataBalanceTestBase.scala |  2 +-
 .../OpenCV - Pipeline Image Transformations.ipynb    | 12 +++++++-----
 ...perParameterTuning - Fighting Breast Cancer.ipynb |  2 +-
 .../Interpretability - Explanation Dashboard.ipynb   |  4 ++--
 .../Interpretability - Tabular SHAP explainer.ipynb  |  4 ++--
 .../Interpretability - Text Explainers.ipynb         |  5 +++--
 6 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/exploratory/DataBalanceTestBase.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/exploratory/DataBalanceTestBase.scala
index 2e72cfa300..051aeedbd9 100644
--- a/core/src/test/scala/com/microsoft/azure/synapse/ml/exploratory/DataBalanceTestBase.scala
+++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/exploratory/DataBalanceTestBase.scala
@@ -15,7 +15,7 @@ trait DataBalanceTestBase extends TestBase {
 
   import spark.implicits._
 
-  lazy val errorTolerance: Double = 1e-12
+  lazy val errorTolerance: Double = 1e-8
 
   lazy val featureProbCol = "featureProb"
   lazy val positiveFeatureCountCol = "positiveFeatureCount"
diff --git a/notebooks/features/opencv/OpenCV - Pipeline Image Transformations.ipynb b/notebooks/features/opencv/OpenCV - Pipeline Image Transformations.ipynb
index 34adfcbdc9..c22be723ee 100644
--- a/notebooks/features/opencv/OpenCV - Pipeline Image Transformations.ipynb	
+++ b/notebooks/features/opencv/OpenCV - Pipeline Image Transformations.ipynb	
@@ -57,8 +57,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import time\n",
     "imageStream = spark.readStream.image().load(imageDir)\n",
     "query = imageStream.select(\"image.height\").writeStream.format(\"memory\").queryName(\"heights\").start()\n",
+    "time.sleep(3)\n",
     "print(\"Streaming query activity: {}\".format(query.isActive))"
    ]
   },
@@ -120,7 +122,7 @@
    "outputs": [],
    "source": [
     "from PIL import Image\n",
-    "\n",
+    "import matplotlib.pyplot as plt\n",
     "data = images.take(3)    # take first three rows of the dataframe\n",
     "im = data[2][0]          # the image is in the first column of a given row\n",
     "\n",
@@ -129,8 +131,8 @@
     "print(\"height: {}, width: {}, OpenCV type: {}\".format(im.height, im.width, im.mode))\n",
     "\n",
     "arr = toNDArray(im)     # convert to numpy array\n",
-    "Image.fromarray(arr, \"RGB\")   # display the image inside notebook\n",
-    "print(images.count())"
+    "print(images.count())\n",
+    "plt.imshow(Image.fromarray(arr, \"RGB\"))   # display the image inside notebook\n"
    ]
   },
   {
@@ -157,7 +159,7 @@
     "small = tr.transform(images).select(\"transformed\")\n",
     "\n",
     "im = small.take(3)[2][0]                  # take third image\n",
-    "Image.fromarray(toNDArray(im), \"RGB\")   # display the image inside notebook"
+    "plt.imshow(Image.fromarray(toNDArray(im), \"RGB\"))   # display the image inside notebook"
    ]
   },
   {
@@ -188,7 +190,7 @@
     "noblue = small.withColumn(\"noblue\", noBlueUDF(small[\"transformed\"])).select(\"noblue\")\n",
     "\n",
     "im = noblue.take(3)[2][0]                # take second image\n",
-    "Image.fromarray(toNDArray(im), \"RGB\")   # display the image inside notebook"
+    "plt.imshow(Image.fromarray(toNDArray(im), \"RGB\"))   # display the image inside notebook"
    ]
   },
   {
diff --git a/notebooks/features/other/HyperParameterTuning - Fighting Breast Cancer.ipynb b/notebooks/features/other/HyperParameterTuning - Fighting Breast Cancer.ipynb
index 075b777b51..cf4ec397e3 100644
--- a/notebooks/features/other/HyperParameterTuning - Fighting Breast Cancer.ipynb	
+++ b/notebooks/features/other/HyperParameterTuning - Fighting Breast Cancer.ipynb	
@@ -33,7 +33,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "data = spark.read.parquet(\"wasbs://publicwasb@mmlspark.blob.core.windows.net/BreastCancer.parquet\")\n",
+    "data = spark.read.parquet(\"wasbs://publicwasb@mmlspark.blob.core.windows.net/BreastCancer.parquet\").cache()\n",
     "tune, test = data.randomSplit([0.80, 0.20])\n",
     "tune.limit(10).toPandas()"
    ]
diff --git a/notebooks/features/responsible_ai/Interpretability - Explanation Dashboard.ipynb b/notebooks/features/responsible_ai/Interpretability - Explanation Dashboard.ipynb
index a86960cf0d..16066f369c 100644
--- a/notebooks/features/responsible_ai/Interpretability - Explanation Dashboard.ipynb	
+++ b/notebooks/features/responsible_ai/Interpretability - Explanation Dashboard.ipynb	
@@ -73,7 +73,7 @@
    },
    "outputs": [],
    "source": [
-    "df = spark.read.parquet(\"wasbs://publicwasb@mmlspark.blob.core.windows.net/AdultCensusIncome.parquet\")\n",
+    "df = spark.read.parquet(\"wasbs://publicwasb@mmlspark.blob.core.windows.net/AdultCensusIncome.parquet\").cache()\n",
     "\n",
     "labelIndexer = StringIndexer(inputCol=\"income\", outputCol=\"label\", stringOrderType=\"alphabetAsc\").fit(df)\n",
     "print(\"Label index assigment: \" + str(set(zip(labelIndexer.labels, [0, 1]))))\n",
@@ -427,4 +427,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file
diff --git a/notebooks/features/responsible_ai/Interpretability - Tabular SHAP explainer.ipynb b/notebooks/features/responsible_ai/Interpretability - Tabular SHAP explainer.ipynb
index 0f72094d9c..8f37624e17 100644
--- a/notebooks/features/responsible_ai/Interpretability - Tabular SHAP explainer.ipynb	
+++ b/notebooks/features/responsible_ai/Interpretability - Tabular SHAP explainer.ipynb	
@@ -76,7 +76,7 @@
     "labelIndexer = StringIndexer(inputCol=\"income\", outputCol=\"label\", stringOrderType=\"alphabetAsc\").fit(df)\n",
     "print(\"Label index assigment: \" + str(set(zip(labelIndexer.labels, [0, 1]))))\n",
     "\n",
-    "training = labelIndexer.transform(df)\n",
+    "training = labelIndexer.transform(df).cache()\n",
     "display(training)\n",
     "categorical_features = [\n",
     "    \"workclass\",\n",
@@ -318,4 +318,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file
diff --git a/notebooks/features/responsible_ai/Interpretability - Text Explainers.ipynb b/notebooks/features/responsible_ai/Interpretability - Text Explainers.ipynb
index 281ace06f3..83bed19f21 100644
--- a/notebooks/features/responsible_ai/Interpretability - Text Explainers.ipynb	
+++ b/notebooks/features/responsible_ai/Interpretability - Text Explainers.ipynb	
@@ -74,9 +74,10 @@
     "    spark.read.parquet(\"wasbs://publicwasb@mmlspark.blob.core.windows.net/BookReviewsFromAmazon10K.parquet\")\n",
     "    .withColumn(\"label\", (col(\"rating\") > 3).cast(LongType()))\n",
     "    .select(\"label\", \"text\")\n",
+    "    .cache()\n",
     ")\n",
     "\n",
-    "data.limit(10).toPandas()"
+    "display(data)"
    ]
   },
   {
@@ -279,4 +280,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file