Skip to content

Commit

Permalink
fix: fix flaky tests (#1342)
Browse files Browse the repository at this point in the history
  • Loading branch information
mhamilton723 authored Jan 12, 2022
1 parent 2ae312e commit a1ddb56
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ trait DataBalanceTestBase extends TestBase {

import spark.implicits._

lazy val errorTolerance: Double = 1e-12
lazy val errorTolerance: Double = 1e-8

lazy val featureProbCol = "featureProb"
lazy val positiveFeatureCountCol = "positiveFeatureCount"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,10 @@
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"imageStream = spark.readStream.image().load(imageDir)\n",
"query = imageStream.select(\"image.height\").writeStream.format(\"memory\").queryName(\"heights\").start()\n",
"time.sleep(3)\n",
"print(\"Streaming query activity: {}\".format(query.isActive))"
]
},
Expand Down Expand Up @@ -120,7 +122,7 @@
"outputs": [],
"source": [
"from PIL import Image\n",
"\n",
"import matplotlib.pyplot as plt\n",
"data = images.take(3) # take first three rows of the dataframe\n",
"im = data[2][0] # the image is in the first column of a given row\n",
"\n",
Expand All @@ -129,8 +131,8 @@
"print(\"height: {}, width: {}, OpenCV type: {}\".format(im.height, im.width, im.mode))\n",
"\n",
"arr = toNDArray(im) # convert to numpy array\n",
"Image.fromarray(arr, \"RGB\") # display the image inside notebook\n",
"print(images.count())"
"print(images.count())\n",
"plt.imshow(Image.fromarray(arr, \"RGB\")) # display the image inside notebook\n"
]
},
{
Expand All @@ -157,7 +159,7 @@
"small = tr.transform(images).select(\"transformed\")\n",
"\n",
"im = small.take(3)[2][0] # take third image\n",
"Image.fromarray(toNDArray(im), \"RGB\") # display the image inside notebook"
"plt.imshow(Image.fromarray(toNDArray(im), \"RGB\")) # display the image inside notebook"
]
},
{
Expand Down Expand Up @@ -188,7 +190,7 @@
"noblue = small.withColumn(\"noblue\", noBlueUDF(small[\"transformed\"])).select(\"noblue\")\n",
"\n",
"im = noblue.take(3)[2][0] # take second image\n",
"Image.fromarray(toNDArray(im), \"RGB\") # display the image inside notebook"
"plt.imshow(Image.fromarray(toNDArray(im), \"RGB\")) # display the image inside notebook"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"metadata": {},
"outputs": [],
"source": [
"data = spark.read.parquet(\"wasbs://[email protected]/BreastCancer.parquet\")\n",
"data = spark.read.parquet(\"wasbs://[email protected]/BreastCancer.parquet\").cache()\n",
"tune, test = data.randomSplit([0.80, 0.20])\n",
"tune.limit(10).toPandas()"
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
},
"outputs": [],
"source": [
"df = spark.read.parquet(\"wasbs://[email protected]/AdultCensusIncome.parquet\")\n",
"df = spark.read.parquet(\"wasbs://[email protected]/AdultCensusIncome.parquet\").cache()\n",
"\n",
"labelIndexer = StringIndexer(inputCol=\"income\", outputCol=\"label\", stringOrderType=\"alphabetAsc\").fit(df)\n",
"print(\"Label index assigment: \" + str(set(zip(labelIndexer.labels, [0, 1]))))\n",
Expand Down Expand Up @@ -427,4 +427,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
"labelIndexer = StringIndexer(inputCol=\"income\", outputCol=\"label\", stringOrderType=\"alphabetAsc\").fit(df)\n",
"print(\"Label index assigment: \" + str(set(zip(labelIndexer.labels, [0, 1]))))\n",
"\n",
"training = labelIndexer.transform(df)\n",
"training = labelIndexer.transform(df).cache()\n",
"display(training)\n",
"categorical_features = [\n",
" \"workclass\",\n",
Expand Down Expand Up @@ -318,4 +318,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,10 @@
" spark.read.parquet(\"wasbs://[email protected]/BookReviewsFromAmazon10K.parquet\")\n",
" .withColumn(\"label\", (col(\"rating\") > 3).cast(LongType()))\n",
" .select(\"label\", \"text\")\n",
" .cache()\n",
")\n",
"\n",
"data.limit(10).toPandas()"
"display(data)"
]
},
{
Expand Down Expand Up @@ -279,4 +280,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}

0 comments on commit a1ddb56

Please sign in to comment.