docs: clean up some spacing issues in website (#1256)

microsoft · Nov 15, 2021 · c6660d0 · c6660d0
1 parent b2751eb
commit c6660d0
Show file tree

Hide file tree

Showing 33 changed files with 282 additions and 2,420 deletions.
diff --git a/notebooks/features/responsible_ai/Interpretability - Image Explainers.ipynb b/notebooks/features/responsible_ai/Interpretability - Image Explainers.ipynb
@@ -29,33 +29,33 @@
                 "import urllib.request\n",
                 "import matplotlib.pyplot as plt\n",
                 "import PIL, io\n",
-                "from PIL import Image\r\n",
-                "\r\n",
-                "vec_slice = udf(lambda vec, indices: (vec.toArray())[indices].tolist(), ArrayType(FloatType()))\r\n",
-                "arg_top_k = udf(lambda vec, k: (-vec.toArray()).argsort()[:k].tolist(), ArrayType(IntegerType()))\r\n",
-                "\r\n",
-                "def downloadBytes(url: str):\r\n",
-                "  with urllib.request.urlopen(url) as url:\r\n",
-                "    barr = url.read()\r\n",
-                "    return barr\r\n",
-                "\r\n",
-                "def rotate_color_channel(bgr_image_array, height, width, nChannels):\r\n",
-                "  B, G, R, *_ = np.asarray(bgr_image_array).reshape(height, width, nChannels).T\r\n",
-                "  rgb_image_array = np.array((R, G, B)).T\r\n",
-                "  return rgb_image_array\r\n",
-                "    \r\n",
-                "def plot_superpixels(image_rgb_array, sp_clusters, weights, green_threshold=99):\r\n",
-                "    superpixels = sp_clusters\r\n",
-                "    green_value = np.percentile(weights, green_threshold)\r\n",
-                "    img = Image.fromarray(image_rgb_array, mode='RGB').convert(\"RGBA\")\r\n",
-                "    image_array = np.asarray(img).copy()\r\n",
-                "    for (sp, v) in zip(superpixels, weights):\r\n",
-                "        if v > green_value:\r\n",
-                "            for (x, y) in sp:\r\n",
-                "                image_array[y, x, 1] = 255\r\n",
-                "                image_array[y, x, 3] = 200\r\n",
-                "    plt.clf()\r\n",
-                "    plt.imshow(image_array)\r\n",
+                "from PIL import Image\n",
+                "\n",
+                "vec_slice = udf(lambda vec, indices: (vec.toArray())[indices].tolist(), ArrayType(FloatType()))\n",
+                "arg_top_k = udf(lambda vec, k: (-vec.toArray()).argsort()[:k].tolist(), ArrayType(IntegerType()))\n",
+                "\n",
+                "def downloadBytes(url: str):\n",
+                "  with urllib.request.urlopen(url) as url:\n",
+                "    barr = url.read()\n",
+                "    return barr\n",
+                "\n",
+                "def rotate_color_channel(bgr_image_array, height, width, nChannels):\n",
+                "  B, G, R, *_ = np.asarray(bgr_image_array).reshape(height, width, nChannels).T\n",
+                "  rgb_image_array = np.array((R, G, B)).T\n",
+                "  return rgb_image_array\n",
+                "    \n",
+                "def plot_superpixels(image_rgb_array, sp_clusters, weights, green_threshold=99):\n",
+                "    superpixels = sp_clusters\n",
+                "    green_value = np.percentile(weights, green_threshold)\n",
+                "    img = Image.fromarray(image_rgb_array, mode='RGB').convert(\"RGBA\")\n",
+                "    image_array = np.asarray(img).copy()\n",
+                "    for (sp, v) in zip(superpixels, weights):\n",
+                "        if v > green_value:\n",
+                "            for (x, y) in sp:\n",
+                "                image_array[y, x, 1] = 255\n",
+                "                image_array[y, x, 3] = 200\n",
+                "    plt.clf()\n",
+                "    plt.imshow(image_array)\n",
                 "    display()"
             ],
             "outputs": [],
@@ -74,36 +74,36 @@
             "cell_type": "code",
             "execution_count": null,
             "source": [
-                "from synapse.ml.io import *\r\n",
-                "\r\n",
-                "image_df = spark.read.image().load(\"wasbs://[email protected]/explainers/images/david-lusvardi-dWcUncxocQY-unsplash.jpg\")\r\n",
-                "display(image_df)\r\n",
-                "\r\n",
-                "# Rotate the image array from BGR into RGB channels for visualization later.\r\n",
-                "row = image_df.select(\"image.height\", \"image.width\", \"image.nChannels\", \"image.data\").head()\r\n",
-                "locals().update(row.asDict())\r\n",
-                "rgb_image_array = rotate_color_channel(data, height, width, nChannels)\r\n",
-                "\r\n",
-                "# Download the ONNX model\r\n",
-                "modelPayload = downloadBytes(\"https://mmlspark.blob.core.windows.net/publicwasb/ONNXModels/resnet50-v2-7.onnx\")\r\n",
-                "\r\n",
-                "featurizer = (\r\n",
-                "  ImageTransformer(inputCol=\"image\", outputCol=\"features\")\r\n",
-                "      .resize(224, True)\r\n",
-                "      .centerCrop(224, 224)\r\n",
-                "      .normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], color_scale_factor = 1/255)\r\n",
-                "      .setTensorElementType(FloatType())\r\n",
-                ")\r\n",
-                "\r\n",
-                "onnx = (\r\n",
-                "  ONNXModel()\r\n",
-                "      .setModelPayload(modelPayload)\r\n",
-                "      .setFeedDict({\"data\": \"features\"})\r\n",
-                "      .setFetchDict({\"rawPrediction\": \"resnetv24_dense0_fwd\"})\r\n",
-                "      .setSoftMaxDict({\"rawPrediction\": \"probability\"})\r\n",
-                "      .setMiniBatchSize(1)\r\n",
-                ")\r\n",
-                "\r\n",
+                "from synapse.ml.io import *\n",
+                "\n",
+                "image_df = spark.read.image().load(\"wasbs://[email protected]/explainers/images/david-lusvardi-dWcUncxocQY-unsplash.jpg\")\n",
+                "display(image_df)\n",
+                "\n",
+                "# Rotate the image array from BGR into RGB channels for visualization later.\n",
+                "row = image_df.select(\"image.height\", \"image.width\", \"image.nChannels\", \"image.data\").head()\n",
+                "locals().update(row.asDict())\n",
+                "rgb_image_array = rotate_color_channel(data, height, width, nChannels)\n",
+                "\n",
+                "# Download the ONNX model\n",
+                "modelPayload = downloadBytes(\"https://mmlspark.blob.core.windows.net/publicwasb/ONNXModels/resnet50-v2-7.onnx\")\n",
+                "\n",
+                "featurizer = (\n",
+                "  ImageTransformer(inputCol=\"image\", outputCol=\"features\")\n",
+                "      .resize(224, True)\n",
+                "      .centerCrop(224, 224)\n",
+                "      .normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], color_scale_factor = 1/255)\n",
+                "      .setTensorElementType(FloatType())\n",
+                ")\n",
+                "\n",
+                "onnx = (\n",
+                "  ONNXModel()\n",
+                "      .setModelPayload(modelPayload)\n",
+                "      .setFeedDict({\"data\": \"features\"})\n",
+                "      .setFetchDict({\"rawPrediction\": \"resnetv24_dense0_fwd\"})\n",
+                "      .setSoftMaxDict({\"rawPrediction\": \"probability\"})\n",
+                "      .setMiniBatchSize(1)\n",
+                ")\n",
+                "\n",
                 "model = Pipeline(stages=[featurizer, onnx]).fit(image_df)"
             ],
             "outputs": [],

diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js
@@ -3,7 +3,7 @@ const {all_examples} = require('./src/plugins/examples');
 let version = "0.9.1";
 
 module.exports = {
-  title: 'Synapse ML',
+  title: 'SynapseML',
   tagline: 'Simple and Distributed Machine Learning',
   url: 'https://microsoft.github.io',
   baseUrl: '/SynapseML/',

diff --git a/website/src/pages/index.js b/website/src/pages/index.js
@@ -61,13 +61,26 @@ interpretation_df = (TabularSHAP()
     config: `from synapse.ml.lightgbm import *
     
 quantile_df = (LightGBMRegressor()
-  .setApplication('quantile')
-  .setAlpha(0.3)
-  .setLearningRate(0.3)
-  .setNumIterations(100)
-  .setNumLeaves(31)
-  .fit(train_df)
-  .transform(test_df))`,
+    .setApplication('quantile')
+    .setAlpha(0.3)
+    .setLearningRate(0.3)
+    .setNumIterations(100)
+    .setNumLeaves(31)
+    .fit(train_df)
+    .transform(test_df))`,
+  },
+  {
+    label: "OpenCV",
+    further: "docs/features/opencv/OpenCV%20-%20Pipeline%20Image%20Transformations",
+    config: `from synapse.ml.opencv import *
+
+image_df = (ImageTransformer()
+    .setInputCol("images")
+    .setOutputCol("transformed_images")
+    .resize(224, True)
+    .centerCrop(224, 224)
+    .normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], color_scale_factor = 1/255)
+    .transform(input_df))`,
   },
 ];
 

diff --git a/website/versioned_docs/version-0.9.1/documentation/estimators/_LightGBM.md b/website/versioned_docs/version-0.9.1/documentation/estimators/_LightGBM.md
@@ -2,26 +2,8 @@ import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 import DocTable from "@theme/DocumentationTable";
 
-<!-- 
-```python
-import pyspark
-import os
-import json
-from IPython.display import display
-
-spark = (pyspark.sql.SparkSession.builder.appName("MyApp")
-        .config("spark.jars.packages", "com.microsoft.azure:synapseml:0.9.1")
-        .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven")
-        .getOrCreate())
-
-def getSecret(secretName):
-        get_secret_cmd = 'az keyvault secret show --vault-name mmlspark-build-keys --name {}'.format(secretName)
-        value = json.loads(os.popen(get_secret_cmd).read())["value"]
-        return value
-
-import synapse.ml
-```
--->
+
+
 
 ## LightGBMClassifier
 
@@ -87,27 +69,8 @@ values={[
 ]}>
 <TabItem value="py">
 
-<!-- 
-```python
-import pyspark
-import os
-import json
-from IPython.display import display
-from pyspark.sql.functions import *
-
-spark = (pyspark.sql.SparkSession.builder.appName("MyApp")
-        .config("spark.jars.packages", "com.microsoft.azure:synapseml:0.9.1")
-        .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven")
-        .getOrCreate())
-
-def getSecret(secretName):
-        get_secret_cmd = 'az keyvault secret show --vault-name mmlspark-build-keys --name {}'.format(secretName)
-        value = json.loads(os.popen(get_secret_cmd).read())["value"]
-        return value
-
-import synapse.ml
-```
--->
+
+
 
 <!--pytest-codeblocks:cont-->
 
@@ -159,27 +122,8 @@ values={[
 ]}>
 <TabItem value="py">
 
-<!-- 
-```python
-import pyspark
-import os
-import json
-from IPython.display import display
-from pyspark.sql.functions import *
-
-spark = (pyspark.sql.SparkSession.builder.appName("MyApp")
-        .config("spark.jars.packages", "com.microsoft.azure:synapseml:0.9.1")
-        .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven")
-        .getOrCreate())
-
-def getSecret(secretName):
-        get_secret_cmd = 'az keyvault secret show --vault-name mmlspark-build-keys --name {}'.format(secretName)
-        value = json.loads(os.popen(get_secret_cmd).read())["value"]
-        return value
-
-import synapse.ml
-```
--->
+
+
 
 <!--pytest-codeblocks:cont-->
 

diff --git a/website/versioned_docs/version-0.9.1/documentation/estimators/_VW.md b/website/versioned_docs/version-0.9.1/documentation/estimators/_VW.md
@@ -2,26 +2,8 @@ import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 import DocTable from "@theme/DocumentationTable";
 
-<!-- 
-```python
-import pyspark
-import os
-import json
-from IPython.display import display
-
-spark = (pyspark.sql.SparkSession.builder.appName("MyApp")
-        .config("spark.jars.packages", "com.microsoft.azure:synapseml:0.9.1")
-        .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven")
-        .getOrCreate())
-
-def getSecret(secretName):
-        get_secret_cmd = 'az keyvault secret show --vault-name mmlspark-build-keys --name {}'.format(secretName)
-        value = json.loads(os.popen(get_secret_cmd).read())["value"]
-        return value
-
-import synapse.ml
-```
--->
+
+
 
 ## VowpalWabbitRegressor
 
@@ -84,27 +66,8 @@ values={[
 ]}>
 <TabItem value="py">
 
-<!-- 
-```python
-import pyspark
-import os
-import json
-from IPython.display import display
-from pyspark.sql.functions import *
-
-spark = (pyspark.sql.SparkSession.builder.appName("MyApp")
-        .config("spark.jars.packages", "com.microsoft.azure:synapseml:0.9.1")
-        .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven")
-        .getOrCreate())
-
-def getSecret(secretName):
-        get_secret_cmd = 'az keyvault secret show --vault-name mmlspark-build-keys --name {}'.format(secretName)
-        value = json.loads(os.popen(get_secret_cmd).read())["value"]
-        return value
-
-import synapse.ml
-```
--->
+
+
 
 <!--pytest-codeblocks:cont-->
 

diff --git a/website/versioned_docs/version-0.9.1/documentation/estimators/core/_AutoML.md b/website/versioned_docs/version-0.9.1/documentation/estimators/core/_AutoML.md
@@ -2,26 +2,6 @@ import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 import DocTable from "@theme/DocumentationTable";
 
-<!-- 
-```python
-import pyspark
-import os
-import json
-from IPython.display import display
-
-spark = (pyspark.sql.SparkSession.builder.appName("MyApp")
-        .config("spark.jars.packages", "com.microsoft.azure:synapseml:0.9.1")
-        .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven")
-        .getOrCreate())
-
-def getSecret(secretName):
-        get_secret_cmd = 'az keyvault secret show --vault-name mmlspark-build-keys --name {}'.format(secretName)
-        value = json.loads(os.popen(get_secret_cmd).read())["value"]
-        return value
-
-import synapse.ml
-``` 
--->
 
 ## AutoML
 
@@ -143,27 +123,6 @@ values={[
 ]}>
 <TabItem value="py">
 
-<!-- 
-```python
-import pyspark
-import os
-import json
-from IPython.display import display
-from pyspark.sql.functions import *
-
-spark = (pyspark.sql.SparkSession.builder.appName("MyApp")
-        .config("spark.jars.packages", "com.microsoft.azure:synapseml:0.9.1")
-        .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven")
-        .getOrCreate())
-
-def getSecret(secretName):
-        get_secret_cmd = 'az keyvault secret show --vault-name mmlspark-build-keys --name {}'.format(secretName)
-        value = json.loads(os.popen(get_secret_cmd).read())["value"]
-        return value
-
-import synapse.ml
-```
--->
 
 <!--pytest-codeblocks:cont-->