|
49 | 49 | },
|
50 | 50 | "outputs": [],
|
51 | 51 | "source": [
|
52 |
| - "TOOLS_VER = \"24.06.1\"\n", |
| 52 | + "TOOLS_VER = \"24.08.0\"\n", |
53 | 53 | "print(f\"Using Tools Version: {TOOLS_VER}\")"
|
54 | 54 | ]
|
55 | 55 | },
|
|
282 | 282 | "\n",
|
283 | 283 | "try:\n",
|
284 | 284 | " output_folder, log_file_location = extract_file_info(CONSOLE_OUTPUT_PATH, OUTPUT_PATH)\n",
|
| 285 | + " jar_output_folder = os.path.join(output_folder, \"rapids_4_spark_qualification_output\")\n", |
285 | 286 | " print(f\"Output folder detected {output_folder}\")\n",
|
286 | 287 | " copy_logs(output_folder, log_file_location, CONSOLE_OUTPUT_PATH, CONSOLE_ERROR_PATH)\n",
|
287 | 288 | " print(f\"Logs successfully copied to {output_folder}\")\n",
|
|
424 | 425 | "outputs": [],
|
425 | 426 | "source": [
|
426 | 427 | "summary_output=pd.read_csv(os.path.join(output_folder, \"qualification_summary.csv\"))\n",
|
| 428 | + "summary_output=summary_output.drop(columns=[\"Unnamed: 0\"]).rename_axis('Index').reset_index()\n", |
427 | 429 | "display(summary_output)"
|
428 | 430 | ]
|
429 | 431 | },
|
| 432 | + { |
| 433 | + "cell_type": "markdown", |
| 434 | + "metadata": { |
| 435 | + "application/vnd.databricks.v1+cell": { |
| 436 | + "cellMetadata": {}, |
| 437 | + "inputWidgets": {}, |
| 438 | + "nuid": "73b5e0b0-3a96-4cc6-8e6c-840e4b0d9d43", |
| 439 | + "showTitle": false, |
| 440 | + "title": "" |
| 441 | + } |
| 442 | + }, |
| 443 | + "source": [ |
| 444 | + "\n", |
| 445 | + "## Application Status\n", |
| 446 | + "\n", |
| 447 | + "The report show the status of each eventlog file that was provided\n" |
| 448 | + ] |
| 449 | + }, |
| 450 | + { |
| 451 | + "cell_type": "code", |
| 452 | + "execution_count": 0, |
| 453 | + "metadata": { |
| 454 | + "application/vnd.databricks.v1+cell": { |
| 455 | + "cellMetadata": { |
| 456 | + "byteLimit": 2048000, |
| 457 | + "rowLimit": 10000 |
| 458 | + }, |
| 459 | + "inputWidgets": {}, |
| 460 | + "nuid": "c9ffbfdb-dbb6-4736-b9cb-2ac457cc6714", |
| 461 | + "showTitle": true, |
| 462 | + "title": "rapids_4_spark_qualification_output_status.csv" |
| 463 | + }, |
| 464 | + "jupyter": { |
| 465 | + "source_hidden": true |
| 466 | + } |
| 467 | + }, |
| 468 | + "outputs": [], |
| 469 | + "source": [ |
| 470 | + "status_output=pd.read_csv(os.path.join(jar_output_folder, \"rapids_4_spark_qualification_output_status.csv\"))\n", |
| 471 | + "display(status_output)" |
| 472 | + ] |
| 473 | + }, |
| 474 | + { |
| 475 | + "cell_type": "markdown", |
| 476 | + "metadata": { |
| 477 | + "application/vnd.databricks.v1+cell": { |
| 478 | + "cellMetadata": {}, |
| 479 | + "inputWidgets": {}, |
| 480 | + "nuid": "09945d39-f9c2-4f4a-8afd-4f309f24f8e0", |
| 481 | + "showTitle": false, |
| 482 | + "title": "" |
| 483 | + } |
| 484 | + }, |
| 485 | + "source": [ |
| 486 | + "\n", |
| 487 | + "## Metadata for Migration\n", |
| 488 | + "\n", |
| 489 | + "The report show the metadata of each app as:\n", |
| 490 | + "- Recommended GPU cluster\n", |
| 491 | + "- File location of full cluster config recommendations\n", |
| 492 | + "- File location of only Gpu specific config recommendations\n" |
| 493 | + ] |
| 494 | + }, |
| 495 | + { |
| 496 | + "cell_type": "code", |
| 497 | + "execution_count": 0, |
| 498 | + "metadata": { |
| 499 | + "application/vnd.databricks.v1+cell": { |
| 500 | + "cellMetadata": { |
| 501 | + "byteLimit": 2048000, |
| 502 | + "rowLimit": 10000 |
| 503 | + }, |
| 504 | + "inputWidgets": {}, |
| 505 | + "nuid": "133cf1bd-33b6-4a62-9ae2-5505717092d1", |
| 506 | + "showTitle": true, |
| 507 | + "title": "app_metadata.json" |
| 508 | + }, |
| 509 | + "jupyter": { |
| 510 | + "source_hidden": true |
| 511 | + } |
| 512 | + }, |
| 513 | + "outputs": [], |
| 514 | + "source": [ |
| 515 | + "import json\n", |
| 516 | + "metadata_file = os.path.join(output_folder, \"app_metadata.json\")\n", |
| 517 | + "def camel_to_title(name):\n", |
| 518 | + " return re.sub('([a-z])([A-Z])', r'\\1 \\2', name).title()\n", |
| 519 | + " \n", |
| 520 | + "with open(metadata_file, 'r') as file:\n", |
| 521 | + " json_data = json.load(file)\n", |
| 522 | + "\n", |
| 523 | + "df = pd.DataFrame(json_data)\n", |
| 524 | + "df['recommendedGpuCluster'] = df['clusterInfo'].apply(lambda x: x['recommendedCluster'])\n", |
| 525 | + "df['sourceCluster'] = df['clusterInfo'].apply(lambda x: x['sourceCluster'])\n", |
| 526 | + "df.drop(columns=['clusterInfo'], inplace=True)\n", |
| 527 | + "df = df[['appId', 'appName', 'estimatedGpuSpeedupCategory', 'recommendedGpuCluster', 'fullClusterConfigRecommendations', 'gpuConfigRecommendationBreakdown']]\n", |
| 528 | + "df.columns = [camel_to_title(col) for col in df.columns]\n", |
| 529 | + "display(df)" |
| 530 | + ] |
| 531 | + }, |
430 | 532 | {
|
431 | 533 | "cell_type": "markdown",
|
432 | 534 | "metadata": {
|
|
474 | 576 | },
|
475 | 577 | "outputs": [],
|
476 | 578 | "source": [
|
477 |
| - "jar_output_folder = os.path.join(output_folder, \"rapids_4_spark_qualification_output\")\n", |
478 | 579 | "stages_output=pd.read_csv(os.path.join(jar_output_folder, \"rapids_4_spark_qualification_output_stages.csv\"))\n",
|
479 | 580 | "display(stages_output)"
|
480 | 581 | ]
|
|
524 | 625 | "inputWidgets": {},
|
525 | 626 | "nuid": "998b0c51-0cb6-408e-a01a-d1f5b1a61e1f",
|
526 | 627 | "showTitle": true,
|
527 |
| - "title": "rapids_4_spark_qualification_output_execs" |
| 628 | + "title": "rapids_4_spark_qualification_output_execs.csv" |
528 | 629 | },
|
529 | 630 | "jupyter": {
|
530 | 631 | "source_hidden": true
|
|
549 | 650 | "stack": true
|
550 | 651 | },
|
551 | 652 | "nuid": "91c1bfb2-695a-4e5c-8a25-848a433108dc",
|
552 |
| - "origId": 1075819839476974, |
| 653 | + "origId": 2173122769183715, |
553 | 654 | "title": "Executive View",
|
554 | 655 | "version": "DashboardViewV1",
|
555 | 656 | "width": 1600
|
|
563 | 664 | "stack": true
|
564 | 665 | },
|
565 | 666 | "nuid": "62243296-4562-4f06-90ac-d7a609f19c16",
|
566 |
| - "origId": 1075819839476975, |
| 667 | + "origId": 2173122769183716, |
567 | 668 | "title": "App View",
|
568 | 669 | "version": "DashboardViewV1",
|
569 | 670 | "width": 1920
|
| 671 | + }, |
| 672 | + { |
| 673 | + "elements": [], |
| 674 | + "globalVars": {}, |
| 675 | + "guid": "", |
| 676 | + "layoutOption": { |
| 677 | + "grid": true, |
| 678 | + "stack": true |
| 679 | + }, |
| 680 | + "nuid": "854f9c75-5977-42aa-b3dd-c680b8331f19", |
| 681 | + "origId": 2173122769183722, |
| 682 | + "title": "Untitled", |
| 683 | + "version": "DashboardViewV1", |
| 684 | + "width": 1024 |
570 | 685 | }
|
571 | 686 | ],
|
572 | 687 | "environmentMetadata": null,
|
573 | 688 | "language": "python",
|
574 | 689 | "notebookMetadata": {
|
575 | 690 | "mostRecentlyExecutedCommandWithImplicitDF": {
|
576 |
| - "commandId": 1075819839476965, |
| 691 | + "commandId": 2173122769183704, |
577 | 692 | "dataframes": [
|
578 | 693 | "_sqldf"
|
579 | 694 | ]
|
|
0 commit comments