Skip to content

Commit 2b0a724

Browse files
authored
Update Databricks Notebook for Tools v24.08.0 (#427)
* Update Databricks Notebook for Tools for v24.08 * Sign-off commit Signed-off-by: Partho Sarthi <[email protected]> --------- Signed-off-by: Partho Sarthi <[email protected]>
1 parent 806b355 commit 2b0a724

3 files changed

+140
-16
lines changed

tools/databricks/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,4 @@ top of the notebook. After that, select *Run all* to execute the tools for the
2020
1. Multiple event logs must be comma-separated.
2121
- For example: `/dbfs/path/to/eventlog1,/dbfs/path/to/eventlog2`
2222

23-
**Latest Tools Version Supported** 24.06.1
23+
**Latest Tools Version Supported** 24.08.0

tools/databricks/[RAPIDS Accelerator for Apache Spark] Profiling Tool Notebook Template.ipynb

+18-9
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
"cell_type": "markdown",
55
"metadata": {
66
"application/vnd.databricks.v1+cell": {
7-
"cellMetadata": {},
7+
"cellMetadata": {
8+
"byteLimit": 2048000,
9+
"rowLimit": 10000
10+
},
811
"inputWidgets": {},
912
"nuid": "df33c614-2ecc-47a0-8600-bc891681997f",
1013
"showTitle": false,
@@ -50,7 +53,7 @@
5053
},
5154
"outputs": [],
5255
"source": [
53-
"TOOLS_VER = \"24.06.1\"\n",
56+
"TOOLS_VER = \"24.08.0\"\n",
5457
"print(f\"Using Tools Version: {TOOLS_VER}\")"
5558
]
5659
},
@@ -156,7 +159,10 @@
156159
"cell_type": "markdown",
157160
"metadata": {
158161
"application/vnd.databricks.v1+cell": {
159-
"cellMetadata": {},
162+
"cellMetadata": {
163+
"byteLimit": 2048000,
164+
"rowLimit": 10000
165+
},
160166
"inputWidgets": {},
161167
"nuid": "f83af6c8-5a79-4a46-965b-38a4cb621877",
162168
"showTitle": false,
@@ -380,7 +386,10 @@
380386
"cell_type": "markdown",
381387
"metadata": {
382388
"application/vnd.databricks.v1+cell": {
383-
"cellMetadata": {},
389+
"cellMetadata": {
390+
"byteLimit": 2048000,
391+
"rowLimit": 10000
392+
},
384393
"inputWidgets": {},
385394
"nuid": "bbe50fde-0bd6-4281-95fd-6a1ec6f17ab2",
386395
"showTitle": false,
@@ -455,7 +464,7 @@
455464
"stack": true
456465
},
457466
"nuid": "91c1bfb2-695a-4e5c-8a25-848a433108dc",
458-
"origId": 1075819839476955,
467+
"origId": 2173122769183713,
459468
"title": "Executive View",
460469
"version": "DashboardViewV1",
461470
"width": 1600
@@ -469,7 +478,7 @@
469478
"stack": true
470479
},
471480
"nuid": "62243296-4562-4f06-90ac-d7a609f19c16",
472-
"origId": 1075819839476956,
481+
"origId": 2173122769183714,
473482
"title": "App View",
474483
"version": "DashboardViewV1",
475484
"width": 1920
@@ -479,7 +488,7 @@
479488
"language": "python",
480489
"notebookMetadata": {
481490
"mostRecentlyExecutedCommandWithImplicitDF": {
482-
"commandId": 203373918309288,
491+
"commandId": 2173122769183692,
483492
"dataframes": [
484493
"_sqldf"
485494
]
@@ -507,11 +516,11 @@
507516
"widgetInfo": {
508517
"widgetType": "text",
509518
"defaultValue": "/dbfs/user1/profiling_logs",
510-
"label": null,
519+
"label": "",
511520
"name": "Eventlog Path",
512521
"options": {
513522
"widgetType": "text",
514-
"autoCreated": null,
523+
"autoCreated": false,
515524
"validationRegex": null
516525
}
517526
}

tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification Tool Notebook Template.ipynb

+121-6
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
},
5050
"outputs": [],
5151
"source": [
52-
"TOOLS_VER = \"24.06.1\"\n",
52+
"TOOLS_VER = \"24.08.0\"\n",
5353
"print(f\"Using Tools Version: {TOOLS_VER}\")"
5454
]
5555
},
@@ -282,6 +282,7 @@
282282
"\n",
283283
"try:\n",
284284
" output_folder, log_file_location = extract_file_info(CONSOLE_OUTPUT_PATH, OUTPUT_PATH)\n",
285+
" jar_output_folder = os.path.join(output_folder, \"rapids_4_spark_qualification_output\")\n",
285286
" print(f\"Output folder detected {output_folder}\")\n",
286287
" copy_logs(output_folder, log_file_location, CONSOLE_OUTPUT_PATH, CONSOLE_ERROR_PATH)\n",
287288
" print(f\"Logs successfully copied to {output_folder}\")\n",
@@ -424,9 +425,110 @@
424425
"outputs": [],
425426
"source": [
426427
"summary_output=pd.read_csv(os.path.join(output_folder, \"qualification_summary.csv\"))\n",
428+
"summary_output=summary_output.drop(columns=[\"Unnamed: 0\"]).rename_axis('Index').reset_index()\n",
427429
"display(summary_output)"
428430
]
429431
},
432+
{
433+
"cell_type": "markdown",
434+
"metadata": {
435+
"application/vnd.databricks.v1+cell": {
436+
"cellMetadata": {},
437+
"inputWidgets": {},
438+
"nuid": "73b5e0b0-3a96-4cc6-8e6c-840e4b0d9d43",
439+
"showTitle": false,
440+
"title": ""
441+
}
442+
},
443+
"source": [
444+
"\n",
445+
"## Application Status\n",
446+
"\n",
447+
"The report show the status of each eventlog file that was provided\n"
448+
]
449+
},
450+
{
451+
"cell_type": "code",
452+
"execution_count": 0,
453+
"metadata": {
454+
"application/vnd.databricks.v1+cell": {
455+
"cellMetadata": {
456+
"byteLimit": 2048000,
457+
"rowLimit": 10000
458+
},
459+
"inputWidgets": {},
460+
"nuid": "c9ffbfdb-dbb6-4736-b9cb-2ac457cc6714",
461+
"showTitle": true,
462+
"title": "rapids_4_spark_qualification_output_status.csv"
463+
},
464+
"jupyter": {
465+
"source_hidden": true
466+
}
467+
},
468+
"outputs": [],
469+
"source": [
470+
"status_output=pd.read_csv(os.path.join(jar_output_folder, \"rapids_4_spark_qualification_output_status.csv\"))\n",
471+
"display(status_output)"
472+
]
473+
},
474+
{
475+
"cell_type": "markdown",
476+
"metadata": {
477+
"application/vnd.databricks.v1+cell": {
478+
"cellMetadata": {},
479+
"inputWidgets": {},
480+
"nuid": "09945d39-f9c2-4f4a-8afd-4f309f24f8e0",
481+
"showTitle": false,
482+
"title": ""
483+
}
484+
},
485+
"source": [
486+
"\n",
487+
"## Metadata for Migration\n",
488+
"\n",
489+
"The report show the metadata of each app as:\n",
490+
"- Recommended GPU cluster\n",
491+
"- File location of full cluster config recommendations\n",
492+
"- File location of only Gpu specific config recommendations\n"
493+
]
494+
},
495+
{
496+
"cell_type": "code",
497+
"execution_count": 0,
498+
"metadata": {
499+
"application/vnd.databricks.v1+cell": {
500+
"cellMetadata": {
501+
"byteLimit": 2048000,
502+
"rowLimit": 10000
503+
},
504+
"inputWidgets": {},
505+
"nuid": "133cf1bd-33b6-4a62-9ae2-5505717092d1",
506+
"showTitle": true,
507+
"title": "app_metadata.json"
508+
},
509+
"jupyter": {
510+
"source_hidden": true
511+
}
512+
},
513+
"outputs": [],
514+
"source": [
515+
"import json\n",
516+
"metadata_file = os.path.join(output_folder, \"app_metadata.json\")\n",
517+
"def camel_to_title(name):\n",
518+
" return re.sub('([a-z])([A-Z])', r'\\1 \\2', name).title()\n",
519+
" \n",
520+
"with open(metadata_file, 'r') as file:\n",
521+
" json_data = json.load(file)\n",
522+
"\n",
523+
"df = pd.DataFrame(json_data)\n",
524+
"df['recommendedGpuCluster'] = df['clusterInfo'].apply(lambda x: x['recommendedCluster'])\n",
525+
"df['sourceCluster'] = df['clusterInfo'].apply(lambda x: x['sourceCluster'])\n",
526+
"df.drop(columns=['clusterInfo'], inplace=True)\n",
527+
"df = df[['appId', 'appName', 'estimatedGpuSpeedupCategory', 'recommendedGpuCluster', 'fullClusterConfigRecommendations', 'gpuConfigRecommendationBreakdown']]\n",
528+
"df.columns = [camel_to_title(col) for col in df.columns]\n",
529+
"display(df)"
530+
]
531+
},
430532
{
431533
"cell_type": "markdown",
432534
"metadata": {
@@ -474,7 +576,6 @@
474576
},
475577
"outputs": [],
476578
"source": [
477-
"jar_output_folder = os.path.join(output_folder, \"rapids_4_spark_qualification_output\")\n",
478579
"stages_output=pd.read_csv(os.path.join(jar_output_folder, \"rapids_4_spark_qualification_output_stages.csv\"))\n",
479580
"display(stages_output)"
480581
]
@@ -524,7 +625,7 @@
524625
"inputWidgets": {},
525626
"nuid": "998b0c51-0cb6-408e-a01a-d1f5b1a61e1f",
526627
"showTitle": true,
527-
"title": "rapids_4_spark_qualification_output_execs"
628+
"title": "rapids_4_spark_qualification_output_execs.csv"
528629
},
529630
"jupyter": {
530631
"source_hidden": true
@@ -549,7 +650,7 @@
549650
"stack": true
550651
},
551652
"nuid": "91c1bfb2-695a-4e5c-8a25-848a433108dc",
552-
"origId": 1075819839476974,
653+
"origId": 2173122769183715,
553654
"title": "Executive View",
554655
"version": "DashboardViewV1",
555656
"width": 1600
@@ -563,17 +664,31 @@
563664
"stack": true
564665
},
565666
"nuid": "62243296-4562-4f06-90ac-d7a609f19c16",
566-
"origId": 1075819839476975,
667+
"origId": 2173122769183716,
567668
"title": "App View",
568669
"version": "DashboardViewV1",
569670
"width": 1920
671+
},
672+
{
673+
"elements": [],
674+
"globalVars": {},
675+
"guid": "",
676+
"layoutOption": {
677+
"grid": true,
678+
"stack": true
679+
},
680+
"nuid": "854f9c75-5977-42aa-b3dd-c680b8331f19",
681+
"origId": 2173122769183722,
682+
"title": "Untitled",
683+
"version": "DashboardViewV1",
684+
"width": 1024
570685
}
571686
],
572687
"environmentMetadata": null,
573688
"language": "python",
574689
"notebookMetadata": {
575690
"mostRecentlyExecutedCommandWithImplicitDF": {
576-
"commandId": 1075819839476965,
691+
"commandId": 2173122769183704,
577692
"dataframes": [
578693
"_sqldf"
579694
]

0 commit comments

Comments
 (0)