From 6dc18f814e748f13e534f5874c4d3db545962282 Mon Sep 17 00:00:00 2001 From: Dmitry Openkov Date: Wed, 30 Nov 2022 18:18:11 +0300 Subject: [PATCH 01/19] Deploying to Google Cloud Platform: initial version --- gcp/build.gradle | 94 +++++++++++++ gcp/src/main/bash/cloud-init.sh | 25 ++++ gcp/src/main/python/deploy_beam/main.py | 123 ++++++++++++++++++ .../main/python/deploy_beam/requirements.txt | 3 + settings.gradle | 1 + 5 files changed, 246 insertions(+) create mode 100644 gcp/build.gradle create mode 100644 gcp/src/main/bash/cloud-init.sh create mode 100644 gcp/src/main/python/deploy_beam/main.py create mode 100644 gcp/src/main/python/deploy_beam/requirements.txt diff --git a/gcp/build.gradle b/gcp/build.gradle new file mode 100644 index 00000000000..61b690be5d9 --- /dev/null +++ b/gcp/build.gradle @@ -0,0 +1,94 @@ +import com.google.api.client.http.GenericUrl; +import com.google.api.client.http.HttpRequest; +import com.google.api.client.http.HttpResponse; +import com.google.api.client.http.HttpTransport; +import com.google.api.client.http.HttpContent; +import com.google.api.client.http.ByteArrayContent; +import com.google.api.client.http.javanet.NetHttpTransport; +import com.google.auth.http.HttpCredentialsAdapter; +import com.google.auth.oauth2.GoogleCredentials; +import com.google.auth.oauth2.IdTokenCredentials; +import com.google.auth.oauth2.IdTokenProvider; + +group = 'beam' +version = '0.8.0' + +buildscript { + repositories { + mavenLocal() + mavenCentral() + maven { url "https://plugins.gradle.org/m2/" } + gradlePluginPortal() + } + dependencies { + classpath "com.google.auth:google-auth-library-oauth2-http:1.3.0" + } +} + +task deployToGCE { + def propsFileName = "../gradle.deploy.properties" + if (project.hasProperty('propsFile')) { + propsFileName = project.findProperty('propsFile') + } + + def propsFile = new Properties() + propsFile.load(project.file(propsFileName).newDataInputStream()) + + ext.getParameterValue = { paramName -> + if (project.hasProperty(paramName)) { + return project.findProperty(paramName) + } else { + return propsFile.getProperty(paramName) + } + } + + if (!ext.getParameterValue('runName')) { + throw new GradleException('Please name the run by specifying `runName` argument. e.g; ./gradlew deploy -PrunName=sfbay-performance-run') + } + def tempInstanceType = "${ext.getParameterValue('instanceType') ?: (project.hasProperty('defaultInstanceType') ? defaultInstanceType : '')}" + def finalInstanceType = tempInstanceType.isEmpty() ? null : tempInstanceType + GString pload = """{ + "runName": "${ext.getParameterValue('runName') + '_' + getCurrentGitUserEmail()}", + "instance_type": "${finalInstanceType}", + "forced_max_ram": "${ext.getParameterValue('forcedMaxRAM')}", + "config": "${ext.getParameterValue('beamConfigs')}" + }""" + def result = makeJsonPostRequest("https://us-central1-beam-core.cloudfunctions.net/deploy-beam", pload) + logger.warn("response status: ${result.statusCode}, response message: ${result.statusMessage}, payload: ${result.content}") +} + +HttpResponse makeJsonPostRequest(String functionUrl, String requestBody) { + GoogleCredentials credentials = GoogleCredentials.getApplicationDefault(); + IdTokenCredentials tokenCredential = + IdTokenCredentials.newBuilder() + .setIdTokenProvider((IdTokenProvider) credentials) + .setTargetAudience(functionUrl) + .build(); + + GenericUrl genericUrl = new GenericUrl(functionUrl); + HttpCredentialsAdapter adapter = new HttpCredentialsAdapter(tokenCredential); + HttpTransport transport = new NetHttpTransport(); + + HttpContent requestContent = ByteArrayContent.fromString("application/json", requestBody) + HttpRequest request = transport.createRequestFactory(adapter).buildPostRequest(genericUrl, requestContent); + return request.execute(); +} + +def getCurrentGitUserEmail() { + def rawGitUserEmail = getGitResultFromWorkingDirUsing('git config user.email', "GitUserEmailNotFound") + return rawGitUserEmail +} + +def getGitResultFromWorkingDirUsing(command, defaultResult) { + def gitResult = defaultResult + try { + def workingDir = new File("${project.projectDir}") + def result = command.execute(null, workingDir) + result.waitFor() + if (result.exitValue() == 0) { + gitResult = result.text.trim() + } + } catch (ignored) { + } + return gitResult +} diff --git a/gcp/src/main/bash/cloud-init.sh b/gcp/src/main/bash/cloud-init.sh new file mode 100644 index 00000000000..044be6c097b --- /dev/null +++ b/gcp/src/main/bash/cloud-init.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +INSTANCE_NAME=$(curl http://metadata/computeMetadata/v1/instance/name -H "Metadata-Flavor: Google") +INSTANCE_ZONE=$(curl http://metadata/computeMetadata/v1/instance/zone -H "Metadata-Flavor: Google") +BEAM_CONFIG=$(curl http://metadata/computeMetadata/v1/instance/attributes/beam_config -H "Metadata-Flavor: Google") +MAX_RAM=$(curl http://metadata/computeMetadata/v1/instance/attributes/max_ram -H "Metadata-Flavor: Google") + +cd ~/sources/beam +git checkout develop +git pull +git lfs pull +./gradlew assemble +./gradlew --stacktrace :run -PappArgs="['--config', '$BEAM_CONFIG']" -PmaxRAM="$MAX_RAM"g + +# copy to bucket +finalPath="" +for file in "output"/*; do + for path2 in $file/*; do + finalPath="$path2"; + done; +done; +gsutil -m cp -r "$finalPath" gs://beam-core-outputs/"$finalPath" + +sudo shutdown -h +15 + diff --git a/gcp/src/main/python/deploy_beam/main.py b/gcp/src/main/python/deploy_beam/main.py new file mode 100644 index 00000000000..437ce4cbe4e --- /dev/null +++ b/gcp/src/main/python/deploy_beam/main.py @@ -0,0 +1,123 @@ +from flask import escape +import functions_framework +from googleapiclient import discovery +import re +import time +import random +import string +from datetime import datetime +from datetime import timezone + + +def to_instance_name(run_name): + no_spaces = re.sub(r'\s|_', '-', run_name.lower()) + clean = re.sub(r'[^a-z0-9\\-]', '', no_spaces) + if not re.search(r'^[a-z]', clean): clean = 'name-' + clean + date_time = datetime.fromtimestamp(time.time(), tz=timezone.utc) + str_date_time = date_time.strftime("%Y-%m-%d-%H-%M-%S") + rnd_str = ''.join(random.choices(string.ascii_lowercase, k=3)) + # name cannot exceed 63 chars + clean = clean[:39] + return clean + '-' + str_date_time + '-' + rnd_str + + +def parameter_is_not_specified(parameter_value): + # in gradle if parameter wasn't specified then project.findProperty return 'null' + return parameter_value is None or parameter_value == 'null' + + +@functions_framework.http +def create_beam_instance(request): + json = request.get_json(silent=True) + if not json: return escape("No valid json payload provided"), 400 + beam_config = json['config'] + if parameter_is_not_specified(beam_config): return escape("No beam config provided"), 400 + instance_type = json['instance_type'] + if parameter_is_not_specified(instance_type): return escape("No instance type provided"), 400 + max_ram = json['forced_max_ram'] + if parameter_is_not_specified(max_ram): max_ram = 32 # todo calculate max ram + run_name = json.get('runName', "not-set") + + # project = requests.get("http://metadata/computeMetadata/v1/instance/id", headers={'Metadata-Flavor': 'Google'}).text + project = 'beam-core' + zone = 'us-central1-a' + name = to_instance_name(run_name) + machine_type = f"zones/{zone}/machineTypes/{instance_type.strip()}" + source_snapshot = f"projects/{project}/global/snapshots/beam-run-sn--01" + startup_script = """ +#!/bin/sh +sudo -u clu bash -c 'cd; wget https://gist.github.com/dimaopen/3e736f1ec1d49c7e162867b280736312/raw/cloud-init.sh' +sudo -u clu bash -c 'cd; chmod 755 cloud-init.sh' +sudo -u clu bash -c 'cd; ./cloud-init.sh &> cloud-init-output.log' + """ + + config = { + 'name': name, + 'machineType': machine_type, + + # Specify the boot disk and the image to use as a source. + 'disks': [ + { + 'boot': True, + 'autoDelete': True, + 'initializeParams': { + 'sourceSnapshot': source_snapshot, + } + } + ], + + # Specify a network interface with NAT to access the public + # internet. + 'networkInterfaces': [{ + 'network': 'global/networks/default', + "accessConfigs": [ + { + "name": "external-nat", + "type": "ONE_TO_ONE_NAT", + "kind": "compute#accessConfig", + "networkTier": "PREMIUM" + } + ] + }], + + # Set beam-bot as the service account + # permissions could be set via IAM roles assigned to this service account + 'serviceAccounts': [ + { + 'email': 'beam-bot@beam-core.iam.gserviceaccount.com', + 'scopes': [ + 'https://www.googleapis.com/auth/cloud-platform' + ] + } + ], + + 'metadata': { + 'items': [{ + 'key': 'startup-script', + 'value': startup_script + },{ + 'key': 'beam_config', + 'value': beam_config + },{ + 'key': 'max_ram', + 'value': max_ram + },] + } + } + + service = discovery.build('compute', 'v1') + result = service.instances()\ + .insert(project=project, zone=zone, body=config)\ + .execute() + + operation_id = result["id"] + operation_status = result["status"] + error = None + if result.get("error", None): + error_head = result["error"]["errors"][0] + error = f"{error_head['code']}, {error_head['location']}, {error_head['message']}" + + if error: + return escape(f"operation id: {operation_id}, status: {operation_status}, error: {error}") + else: + return escape(f"operation id: {operation_id}, status: {operation_status}") \ No newline at end of file diff --git a/gcp/src/main/python/deploy_beam/requirements.txt b/gcp/src/main/python/deploy_beam/requirements.txt new file mode 100644 index 00000000000..d491d51f59c --- /dev/null +++ b/gcp/src/main/python/deploy_beam/requirements.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c868ce84345515e4900fe7e800b6c1e2e5560da5101641c9f1afcd2f81a092c +size 32 diff --git a/settings.gradle b/settings.gradle index 53b9bac2ede..718c770c3a8 100755 --- a/settings.gradle +++ b/settings.gradle @@ -1,5 +1,6 @@ include 'beam-gui' include 'aws' +include 'gcp' include 'nersc' include 'metrics2.0' include 'jupyter' From 10eb5f3a00f56d73150d4c5a53c4587ec525a1aa Mon Sep 17 00:00:00 2001 From: Dmitry Openkov Date: Thu, 1 Dec 2022 17:50:56 +0300 Subject: [PATCH 02/19] Added git params, shutdown_wait to deploying to GCE --- gcp/build.gradle | 7 ++++- gcp/src/main/bash/cloud-init.sh | 39 +++++++++++++++++++++++-- gcp/src/main/python/deploy_beam/main.py | 22 +++++++++++++- 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/gcp/build.gradle b/gcp/build.gradle index 61b690be5d9..f6d713283f0 100644 --- a/gcp/build.gradle +++ b/gcp/build.gradle @@ -48,9 +48,14 @@ task deployToGCE { def tempInstanceType = "${ext.getParameterValue('instanceType') ?: (project.hasProperty('defaultInstanceType') ? defaultInstanceType : '')}" def finalInstanceType = tempInstanceType.isEmpty() ? null : tempInstanceType GString pload = """{ - "runName": "${ext.getParameterValue('runName') + '_' + getCurrentGitUserEmail()}", + "run_name": "${ext.getParameterValue('runName') + '_' + getCurrentGitUserEmail()}", "instance_type": "${finalInstanceType}", "forced_max_ram": "${ext.getParameterValue('forcedMaxRAM')}", + "beam_branch": "${ext.getParameterValue('beamBranch') ?: getCurrentGitBranch()}", + "beam_commit": "${ext.getParameterValue('beamCommit') ?: 'HEAD'}", + "data_branch": "${ext.getParameterValue('dataBranch') ?: 'develop'}", + "data_commit": "${ext.getParameterValue('dataCommit') ?: 'HEAD'}", + "shutdown_wait": "${ext.getParameterValue('shutdownWait')}", "config": "${ext.getParameterValue('beamConfigs')}" }""" def result = makeJsonPostRequest("https://us-central1-beam-core.cloudfunctions.net/deploy-beam", pload) diff --git a/gcp/src/main/bash/cloud-init.sh b/gcp/src/main/bash/cloud-init.sh index 044be6c097b..ab02c91a1a0 100644 --- a/gcp/src/main/bash/cloud-init.sh +++ b/gcp/src/main/bash/cloud-init.sh @@ -3,23 +3,56 @@ INSTANCE_NAME=$(curl http://metadata/computeMetadata/v1/instance/name -H "Metadata-Flavor: Google") INSTANCE_ZONE=$(curl http://metadata/computeMetadata/v1/instance/zone -H "Metadata-Flavor: Google") BEAM_CONFIG=$(curl http://metadata/computeMetadata/v1/instance/attributes/beam_config -H "Metadata-Flavor: Google") +BEAM_BRANCH=$(curl http://metadata/computeMetadata/v1/instance/attributes/beam_branch -H "Metadata-Flavor: Google") +BEAM_COMMIT=$(curl http://metadata/computeMetadata/v1/instance/attributes/beam_commit -H "Metadata-Flavor: Google") +DATA_COMMIT=$(curl http://metadata/computeMetadata/v1/instance/attributes/data_commit -H "Metadata-Flavor: Google") +DATA_BRANCH=$(curl http://metadata/computeMetadata/v1/instance/attributes/data_branch -H "Metadata-Flavor: Google") MAX_RAM=$(curl http://metadata/computeMetadata/v1/instance/attributes/max_ram -H "Metadata-Flavor: Google") +SHUTDOWN_WAIT=$(curl http://metadata/computeMetadata/v1/instance/attributes/shutdown_wait -H "Metadata-Flavor: Google") cd ~/sources/beam -git checkout develop +echo "git fetch" +git fetch +echo "GIT_LFS_SKIP_SMUDGE=1 git checkout $BEAM_BRANCH $(date)" +GIT_LFS_SKIP_SMUDGE=1 git checkout $BEAM_BRANCH +echo "git pull" git pull +echo "git lfs pull" git lfs pull + +echo "git checkout -qf $BEAM_COMMIT" +GIT_LFS_SKIP_SMUDGE=1 git checkout -qf "$BEAM_COMMIT" +RESOLVED_COMMIT=$(git log -1 --pretty=format:%H) +echo "Resolved commit is $RESOLVED_COMMIT" + +production_data_submodules=$(git submodule | awk '{ print $2 }') +for i in $production_data_submodules +do + echo $i + case $BEAM_CONFIG in + *$i*) + echo "Loading remote production data for $i" + git config submodule.$i.branch "$DATA_BRANCH" + git submodule update --init --remote "$i" + cd "$i" + git checkout "$DATA_COMMIT" + RESOLVED_DATA_COMMIT=$(git log -1 --pretty=format:%H) + echo "Resolved data commit is $RESOLVED_DATA_COMMIT" + cd - + esac +done + ./gradlew assemble ./gradlew --stacktrace :run -PappArgs="['--config', '$BEAM_CONFIG']" -PmaxRAM="$MAX_RAM"g # copy to bucket finalPath="" for file in "output"/*; do - for path2 in $file/*; do + for path2 in "$file"/*; do finalPath="$path2"; done; done; gsutil -m cp -r "$finalPath" gs://beam-core-outputs/"$finalPath" -sudo shutdown -h +15 +sudo shutdown -h +"$SHUTDOWN_WAIT" diff --git a/gcp/src/main/python/deploy_beam/main.py b/gcp/src/main/python/deploy_beam/main.py index 437ce4cbe4e..63cb0256aa9 100644 --- a/gcp/src/main/python/deploy_beam/main.py +++ b/gcp/src/main/python/deploy_beam/main.py @@ -36,7 +36,12 @@ def create_beam_instance(request): if parameter_is_not_specified(instance_type): return escape("No instance type provided"), 400 max_ram = json['forced_max_ram'] if parameter_is_not_specified(max_ram): max_ram = 32 # todo calculate max ram - run_name = json.get('runName', "not-set") + run_name = json.get('run_name', "not-set") + beam_branch = json.get('beam_branch', "develop") + beam_commit = json.get('beam_commit', "HEAD") + data_branch = json.get('data_branch', "develop") + data_commit = json.get('data_commit', "HEAD") + shutdown_wait = json.get('shutdown_wait', "15") # project = requests.get("http://metadata/computeMetadata/v1/instance/id", headers={'Metadata-Flavor': 'Google'}).text project = 'beam-core' @@ -101,6 +106,21 @@ def create_beam_instance(request): },{ 'key': 'max_ram', 'value': max_ram + },{ + 'key': 'beam_branch', + 'value': beam_branch + },{ + 'key': 'beam_commit', + 'value': beam_commit + },{ + 'key': 'data_branch', + 'value': data_branch + },{ + 'key': 'data_commit', + 'value': data_commit + },{ + 'key': 'shutdown_wait', + 'value': shutdown_wait },] } } From 0c897cedd7c148df0c937cc935f023ac665422fd Mon Sep 17 00:00:00 2001 From: Dmitry Openkov Date: Fri, 2 Dec 2022 16:56:08 +0300 Subject: [PATCH 03/19] Provide storage_size, using new disk image --- gcp/build.gradle | 1 + gcp/src/main/python/deploy_beam/main.py | 31 ++++++++++++++----------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/gcp/build.gradle b/gcp/build.gradle index f6d713283f0..c7d98bf1739 100644 --- a/gcp/build.gradle +++ b/gcp/build.gradle @@ -56,6 +56,7 @@ task deployToGCE { "data_branch": "${ext.getParameterValue('dataBranch') ?: 'develop'}", "data_commit": "${ext.getParameterValue('dataCommit') ?: 'HEAD'}", "shutdown_wait": "${ext.getParameterValue('shutdownWait')}", + "storage_size": "${ext.getParameterValue('storageSize')}", "config": "${ext.getParameterValue('beamConfigs')}" }""" def result = makeJsonPostRequest("https://us-central1-beam-core.cloudfunctions.net/deploy-beam", pload) diff --git a/gcp/src/main/python/deploy_beam/main.py b/gcp/src/main/python/deploy_beam/main.py index 63cb0256aa9..5710c602d11 100644 --- a/gcp/src/main/python/deploy_beam/main.py +++ b/gcp/src/main/python/deploy_beam/main.py @@ -42,13 +42,14 @@ def create_beam_instance(request): data_branch = json.get('data_branch', "develop") data_commit = json.get('data_commit', "HEAD") shutdown_wait = json.get('shutdown_wait', "15") + storage_size = json.get('storage_size', "100") # project = requests.get("http://metadata/computeMetadata/v1/instance/id", headers={'Metadata-Flavor': 'Google'}).text project = 'beam-core' zone = 'us-central1-a' name = to_instance_name(run_name) machine_type = f"zones/{zone}/machineTypes/{instance_type.strip()}" - source_snapshot = f"projects/{project}/global/snapshots/beam-run-sn--01" + disk_image_name = f"projects/{project}/global/images/beam-box" startup_script = """ #!/bin/sh sudo -u clu bash -c 'cd; wget https://gist.github.com/dimaopen/3e736f1ec1d49c7e162867b280736312/raw/cloud-init.sh' @@ -66,8 +67,10 @@ def create_beam_instance(request): 'boot': True, 'autoDelete': True, 'initializeParams': { - 'sourceSnapshot': source_snapshot, - } + 'sourceImage': disk_image_name, + }, + # beam disk minimum size is 100 (Gb) + "diskSizeGb": storage_size, } ], @@ -100,34 +103,34 @@ def create_beam_instance(request): 'items': [{ 'key': 'startup-script', 'value': startup_script - },{ + }, { 'key': 'beam_config', 'value': beam_config - },{ + }, { 'key': 'max_ram', 'value': max_ram - },{ + }, { 'key': 'beam_branch', 'value': beam_branch - },{ + }, { 'key': 'beam_commit', 'value': beam_commit - },{ + }, { 'key': 'data_branch', 'value': data_branch - },{ + }, { 'key': 'data_commit', 'value': data_commit - },{ + }, { 'key': 'shutdown_wait', 'value': shutdown_wait - },] + }, ] } } service = discovery.build('compute', 'v1') - result = service.instances()\ - .insert(project=project, zone=zone, body=config)\ + result = service.instances() \ + .insert(project=project, zone=zone, body=config) \ .execute() operation_id = result["id"] @@ -140,4 +143,4 @@ def create_beam_instance(request): if error: return escape(f"operation id: {operation_id}, status: {operation_status}, error: {error}") else: - return escape(f"operation id: {operation_id}, status: {operation_status}") \ No newline at end of file + return escape(f"operation id: {operation_id}, status: {operation_status}") From ca866b5593d47b64adbb36f75b756d6a3e99f9f2 Mon Sep 17 00:00:00 2001 From: Dmitry Openkov Date: Thu, 8 Dec 2022 11:55:43 +0300 Subject: [PATCH 04/19] Removed println to avoid polluting the cloud-init-output.log --- .../agentsim/infrastructure/ChargingNetworkManager.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/main/scala/beam/agentsim/infrastructure/ChargingNetworkManager.scala b/src/main/scala/beam/agentsim/infrastructure/ChargingNetworkManager.scala index b5146719d03..2732ee627aa 100644 --- a/src/main/scala/beam/agentsim/infrastructure/ChargingNetworkManager.scala +++ b/src/main/scala/beam/agentsim/infrastructure/ChargingNetworkManager.scala @@ -144,10 +144,7 @@ class ChargingNetworkManager( sender ! CompletionNotice(triggerId) case request @ ChargingPlugRequest(tick, vehicle, stall, personId, triggerId, theSender, _, _) => - log.debug(s"ChargingPlugRequest received from vehicle $vehicle at $tick and stall ${vehicle.stall}") - println( - s"ChargingPlugRequest received for vehicle $vehicle at $tick and stall ${vehicle.stall} (taz: ${stall.tazId})" - ) + log.debug("ChargingPlugRequest received from vehicle {} at {} and stall {} (taz: {})", vehicle, tick, vehicle.stall, stall.tazId) val responseHasTriggerId = if (vehicle.isEV) { // connecting the current vehicle val chargingNetwork = chargingNetworkHelper.get(stall.reservedFor.managerId) From d052285da1c7719f41e6f9bbc1d8c29387079786 Mon Sep 17 00:00:00 2001 From: Dmitry Openkov Date: Thu, 8 Dec 2022 12:44:05 +0300 Subject: [PATCH 05/19] Added shutdown behaviour for gce instnaces --- gcp/build.gradle | 49 +++++++++++++------------ gcp/src/main/bash/cloud-init.sh | 1 + gcp/src/main/python/deploy_beam/main.py | 46 +++++++++++------------ 3 files changed, 48 insertions(+), 48 deletions(-) diff --git a/gcp/build.gradle b/gcp/build.gradle index c7d98bf1739..6235f1ca4c4 100644 --- a/gcp/build.gradle +++ b/gcp/build.gradle @@ -25,29 +25,30 @@ buildscript { } } -task deployToGCE { - def propsFileName = "../gradle.deploy.properties" - if (project.hasProperty('propsFile')) { - propsFileName = project.findProperty('propsFile') - } +tasks.register('deployToGCE') { + doLast { + def propsFileName = "../gradle.deploy.properties" + if (project.hasProperty('propsFile')) { + propsFileName = project.findProperty('propsFile') + } - def propsFile = new Properties() - propsFile.load(project.file(propsFileName).newDataInputStream()) + def propsFile = new Properties() + propsFile.load(project.file(propsFileName).newDataInputStream()) - ext.getParameterValue = { paramName -> - if (project.hasProperty(paramName)) { - return project.findProperty(paramName) - } else { - return propsFile.getProperty(paramName) + ext.getParameterValue = { paramName -> + if (project.hasProperty(paramName)) { + return project.findProperty(paramName) + } else { + return propsFile.getProperty(paramName) + } } - } - if (!ext.getParameterValue('runName')) { - throw new GradleException('Please name the run by specifying `runName` argument. e.g; ./gradlew deploy -PrunName=sfbay-performance-run') - } - def tempInstanceType = "${ext.getParameterValue('instanceType') ?: (project.hasProperty('defaultInstanceType') ? defaultInstanceType : '')}" - def finalInstanceType = tempInstanceType.isEmpty() ? null : tempInstanceType - GString pload = """{ + if (!ext.getParameterValue('runName')) { + throw new GradleException('Please name the run by specifying `runName` argument. e.g; ./gradlew deploy -PrunName=sfbay-performance-run') + } + def tempInstanceType = "${ext.getParameterValue('instanceType') ?: (project.hasProperty('defaultInstanceType') ? defaultInstanceType : '')}" + def finalInstanceType = tempInstanceType.isEmpty() ? null : tempInstanceType + GString pload = """{ "run_name": "${ext.getParameterValue('runName') + '_' + getCurrentGitUserEmail()}", "instance_type": "${finalInstanceType}", "forced_max_ram": "${ext.getParameterValue('forcedMaxRAM')}", @@ -57,13 +58,15 @@ task deployToGCE { "data_commit": "${ext.getParameterValue('dataCommit') ?: 'HEAD'}", "shutdown_wait": "${ext.getParameterValue('shutdownWait')}", "storage_size": "${ext.getParameterValue('storageSize')}", + "shutdown_behaviour": "${ext.getParameterValue('shutdownBehaviour')}", "config": "${ext.getParameterValue('beamConfigs')}" - }""" - def result = makeJsonPostRequest("https://us-central1-beam-core.cloudfunctions.net/deploy-beam", pload) - logger.warn("response status: ${result.statusCode}, response message: ${result.statusMessage}, payload: ${result.content}") + }""" + HttpResponse result = makeJsonPostRequest("https://us-central1-beam-core.cloudfunctions.net/deploy-beam", pload) + logger.warn("response status: ${result.statusCode}, response message: ${result.statusMessage}, payload: ${result.content}") + } } -HttpResponse makeJsonPostRequest(String functionUrl, String requestBody) { +static HttpResponse makeJsonPostRequest(String functionUrl, String requestBody) { GoogleCredentials credentials = GoogleCredentials.getApplicationDefault(); IdTokenCredentials tokenCredential = IdTokenCredentials.newBuilder() diff --git a/gcp/src/main/bash/cloud-init.sh b/gcp/src/main/bash/cloud-init.sh index ab02c91a1a0..961ca838100 100644 --- a/gcp/src/main/bash/cloud-init.sh +++ b/gcp/src/main/bash/cloud-init.sh @@ -52,6 +52,7 @@ for file in "output"/*; do finalPath="$path2"; done; done; +ln -sf ~/cloud-init-output.log "$finalPath"/cloud-init-output.log gsutil -m cp -r "$finalPath" gs://beam-core-outputs/"$finalPath" sudo shutdown -h +"$SHUTDOWN_WAIT" diff --git a/gcp/src/main/python/deploy_beam/main.py b/gcp/src/main/python/deploy_beam/main.py index 5710c602d11..85dd94ec9b0 100644 --- a/gcp/src/main/python/deploy_beam/main.py +++ b/gcp/src/main/python/deploy_beam/main.py @@ -43,6 +43,7 @@ def create_beam_instance(request): data_commit = json.get('data_commit', "HEAD") shutdown_wait = json.get('shutdown_wait', "15") storage_size = json.get('storage_size', "100") + shutdown_behaviour = json.get('shutdown_behaviour', "terminate") # project = requests.get("http://metadata/computeMetadata/v1/instance/id", headers={'Metadata-Flavor': 'Google'}).text project = 'beam-core' @@ -56,6 +57,25 @@ def create_beam_instance(request): sudo -u clu bash -c 'cd; chmod 755 cloud-init.sh' sudo -u clu bash -c 'cd; ./cloud-init.sh &> cloud-init-output.log' """ + shutdown_script = """ +#!/bin/bash +INSTANCE_NAME=$(curl http://metadata/computeMetadata/v1/instance/name -H "Metadata-Flavor: Google") +INSTANCE_ZONE=$(curl http://metadata/computeMetadata/v1/instance/zone -H "Metadata-Flavor: Google") +gcloud --quiet compute instances delete --zone="$INSTANCE_ZONE" "$INSTANCE_NAME" + """ + + metadata = [ + ('startup-script', startup_script), + ('beam_config', beam_config), + ('max_ram', max_ram), + ('beam_branch', beam_branch), + ('beam_commit', beam_commit), + ('data_branch', data_branch), + ('data_commit', data_commit), + ('shutdown_wait', shutdown_wait), + ] + if shutdown_behaviour.lower() == "terminate": + metadata.append(('shutdown-script', shutdown_script)) config = { 'name': name, @@ -100,31 +120,7 @@ def create_beam_instance(request): ], 'metadata': { - 'items': [{ - 'key': 'startup-script', - 'value': startup_script - }, { - 'key': 'beam_config', - 'value': beam_config - }, { - 'key': 'max_ram', - 'value': max_ram - }, { - 'key': 'beam_branch', - 'value': beam_branch - }, { - 'key': 'beam_commit', - 'value': beam_commit - }, { - 'key': 'data_branch', - 'value': data_branch - }, { - 'key': 'data_commit', - 'value': data_commit - }, { - 'key': 'shutdown_wait', - 'value': shutdown_wait - }, ] + 'items': [{'key': k, 'value': v} for k, v in metadata] } } From 77bbec17831456b5bf07c721b6a493129560219d Mon Sep 17 00:00:00 2001 From: Dmitry Openkov Date: Thu, 8 Dec 2022 18:13:29 +0300 Subject: [PATCH 06/19] Added some documentation how to run BEAM on GCE --- docs/developers.rst | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/docs/developers.rst b/docs/developers.rst index c8a49b90956..270f81c3ffc 100755 --- a/docs/developers.rst +++ b/docs/developers.rst @@ -197,10 +197,10 @@ The command will start an ec2 instance based on the provided configurations and * **beamExperiments**: A comma `,` separated list of `experiment.yml` files. It should be relative path under the project home.You can create branch level defaults same as configs by specifying the branch name with `.experiments` suffix like `master.experiments`. Branch level default will be used if `beamExperiments` is not present. `beamConfigs` has priority over this, in other words, if both are provided then `beamConfigs` will be used. * **executeClass** and **executeArgs**: to specify class and args to execute if `execute` was chosen as deploy mode * **maxRAM**: to specify MAXRAM environment variable for simulation. -* **storageSize**: to specfy storage size of instance. May be from `64` to `256`. +* **storageSize**: to specify storage size of instance. May be from `64` to `256`. * **beamBatch**: Set to `false` in case you want to run as many instances as number of config/experiment files. Default is `true`. * **s3Backup**: to specify if copying results to s3 bucket is needed, default is `true`. -* **instanceType**: to specify s2 instance type. +* **instanceType**: to specify EC2 instance type. * **region**: Use this parameter to select the AWS region for the run, all instances would be created in specified region. Default `region` is `us-east-2`. * **shutdownWait**: As simulation ends, ec2 instance would automatically terminate. In case you want to use the instance, please specify the wait in minutes, default wait is 30 min. * **shutdownBehaviour**: to specify shutdown behaviour after and of simulation. May be `stop` or `terminate`, default is `terminate`. @@ -245,6 +245,43 @@ You need to define the deploy properties that are similar to the ones for AWS de Your task is going to be added to the queue and when it starts/finishes you receive a notification on your git user email. It may take 1-24 hours (or even more) for the task to get started. It depends on the NERSC workload. In your user home directory on NERSC you can find the output file of your task that looks like `slurm-.out`. The BEAM output directory is resides at `$SCRATCH/beam_runs/`. Also the output is uploaded to s3 if `s3Backup` is set to true. +BEAM run on Google Compute Engine +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In order to run BEAM on GCE one needs to have `cloudfunctions.functions.invoke` permission on function `projects/beam-core/locations/us-central1/functions/deploy-beam`. + +One needs to install `glcoud `_ utility in order to be able to authenticate themself against the Google Cloud Platform. + +The project id is `beam-core`. One can set it using:: + + gcloud config set project beam-core + +There are `some ways to provide credentials `_. One option is just run the following command:: + + gcloud auth application-default login + +Now all the instance are created in `us-central1-a` zone. One must set **forcedMaxRAM** according to the **instanceType** memory size. +Now the deployment script doesn't calculate it automatically. +One needs to define the deploy properties that are similar to the ones for AWS deploy. These are the properties that is used on GCE: + +* **runName**: to specify instance name. +* **beamBranch**: To specify the branch for simulation, current source branch will be used as default branch. +* **beamCommit**: The commit SHA to run simulation. Comment it out if you want to run with latest commit. +* **dataBranch**: To specify the branch for production data, 'develop' branch will be used as default branch. +* **dataCommit**: The commit SHA for the the data branch, default is `HEAD` +* **beamConfigs**: The `beam.conf` file. It should be relative path under the project home. A single file is supported right now. +* **shutdownWait**: As simulation ends, ec2 instance would automatically terminate. In case you want to use the instance, please specify the wait in minutes, default wait is 15 min. +* **shutdownBehaviour**: to specify shutdown behaviour after and of simulation. May be `stop` or `terminate`, default is `terminate`. +* **instanceType**: To specify GCE instance type. +* **forcedMaxRAM**: This parameter must be set according to the **instanceType** memory size. +* **storageSize**: to specify storage size (Gb) of instance. May be from `100` to `256`. Default value is `100`. + +The simulation output is uploaded to the `Google Cloud Storage `_. + +In order to ssh to the running instance one could start the following command:: + + gcloud compute ssh --zone=us-central1-a clu@ + PILATES run on EC2 ~~~~~~~~~~~~~~~~~~ From 64d378496f3db16ee661e7733c9bf4f918297729 Mon Sep 17 00:00:00 2001 From: Dmitry Openkov Date: Thu, 8 Dec 2022 18:34:10 +0300 Subject: [PATCH 07/19] GCE documentation update: the deploy command --- docs/developers.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/developers.rst b/docs/developers.rst index 270f81c3ffc..18a55fe8a0f 100755 --- a/docs/developers.rst +++ b/docs/developers.rst @@ -276,6 +276,12 @@ One needs to define the deploy properties that are similar to the ones for AWS d * **forcedMaxRAM**: This parameter must be set according to the **instanceType** memory size. * **storageSize**: to specify storage size (Gb) of instance. May be from `100` to `256`. Default value is `100`. +The deployment command is + +.. code-block:: bash + + ./gradlew deployToGCE + The simulation output is uploaded to the `Google Cloud Storage `_. In order to ssh to the running instance one could start the following command:: From 20f6bfd8983e8d48561471323dea8ac9c7410137 Mon Sep 17 00:00:00 2001 From: Dmitry Openkov Date: Mon, 12 Dec 2022 18:43:47 +0300 Subject: [PATCH 08/19] Messages to slack channel --- gcp/src/main/bash/cloud-init.sh | 73 ++++++++++++++++++++++++- gcp/src/main/python/deploy_beam/main.py | 10 ++++ 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/gcp/src/main/bash/cloud-init.sh b/gcp/src/main/bash/cloud-init.sh index 961ca838100..54d7400ad7e 100644 --- a/gcp/src/main/bash/cloud-init.sh +++ b/gcp/src/main/bash/cloud-init.sh @@ -1,15 +1,38 @@ #!/bin/bash +INSTANCE_ID=$(curl http://metadata/computeMetadata/v1/instance/id -H "Metadata-Flavor: Google") INSTANCE_NAME=$(curl http://metadata/computeMetadata/v1/instance/name -H "Metadata-Flavor: Google") INSTANCE_ZONE=$(curl http://metadata/computeMetadata/v1/instance/zone -H "Metadata-Flavor: Google") +MACHINE_TYPE=$(curl http://metadata/computeMetadata/v1/instance/machine-type -H "Metadata-Flavor: Google") +HOST_NAME=$(curl http://metadata/computeMetadata/v1/instance/hostname -H "Metadata-Flavor: Google") +RUN_NAME=$(curl http://metadata/computeMetadata/v1/instance/attributes/run_name -H "Metadata-Flavor: Google") BEAM_CONFIG=$(curl http://metadata/computeMetadata/v1/instance/attributes/beam_config -H "Metadata-Flavor: Google") BEAM_BRANCH=$(curl http://metadata/computeMetadata/v1/instance/attributes/beam_branch -H "Metadata-Flavor: Google") BEAM_COMMIT=$(curl http://metadata/computeMetadata/v1/instance/attributes/beam_commit -H "Metadata-Flavor: Google") DATA_COMMIT=$(curl http://metadata/computeMetadata/v1/instance/attributes/data_commit -H "Metadata-Flavor: Google") DATA_BRANCH=$(curl http://metadata/computeMetadata/v1/instance/attributes/data_branch -H "Metadata-Flavor: Google") +BATCH_UID=$(curl http://metadata/computeMetadata/v1/instance/attributes/uid -H "Metadata-Flavor: Google") MAX_RAM=$(curl http://metadata/computeMetadata/v1/instance/attributes/max_ram -H "Metadata-Flavor: Google") SHUTDOWN_WAIT=$(curl http://metadata/computeMetadata/v1/instance/attributes/shutdown_wait -H "Metadata-Flavor: Google") +SLACK_HOOK_WITH_TOKEN=$(curl http://metadata/computeMetadata/v1/instance/attributes/slack_hook_with_token -H "Metadata-Flavor: Google") +SLACK_TOKEN=$(curl http://metadata/computeMetadata/v1/instance/attributes/slack_token -H "Metadata-Flavor: Google") +SLACK_CHANNEL=$(curl http://metadata/computeMetadata/v1/instance/attributes/slack_channel -H "Metadata-Flavor: Google") +GOOGLE_API_KEY=$(curl http://metadata/computeMetadata/v1/instance/attributes/google_api_key -H "Metadata-Flavor: Google") +function check_simulation_result() { + log_file="$(find output -maxdepth 2 -mindepth 2 -type d -print -quit)/beamLog.out" + if [[ ! -f $log_file ]]; then + echo "Unable to start" + fi + last_line=$(tail $log_file -n 1) + if [[ $last_line == *"Exiting BEAM"* ]]; then + echo "Run Completed" + else + echo "Run Failed" + fi +} + +#get beam sources cd ~/sources/beam echo "git fetch" git fetch @@ -25,6 +48,7 @@ GIT_LFS_SKIP_SMUDGE=1 git checkout -qf "$BEAM_COMMIT" RESOLVED_COMMIT=$(git log -1 --pretty=format:%H) echo "Resolved commit is $RESOLVED_COMMIT" +#get data sources production_data_submodules=$(git submodule | awk '{ print $2 }') for i in $production_data_submodules do @@ -42,7 +66,28 @@ do esac done +#building beam ./gradlew assemble + +#sending message to the slack channel +hello_msg=$(cat < Date: Tue, 13 Dec 2022 16:45:24 +0300 Subject: [PATCH 09/19] Renamed uid to batch_uid for clearer name --- gcp/src/main/bash/cloud-init.sh | 2 +- gcp/src/main/python/deploy_beam/main.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gcp/src/main/bash/cloud-init.sh b/gcp/src/main/bash/cloud-init.sh index 54d7400ad7e..c8da80d9beb 100644 --- a/gcp/src/main/bash/cloud-init.sh +++ b/gcp/src/main/bash/cloud-init.sh @@ -11,7 +11,7 @@ BEAM_BRANCH=$(curl http://metadata/computeMetadata/v1/instance/attributes/beam_b BEAM_COMMIT=$(curl http://metadata/computeMetadata/v1/instance/attributes/beam_commit -H "Metadata-Flavor: Google") DATA_COMMIT=$(curl http://metadata/computeMetadata/v1/instance/attributes/data_commit -H "Metadata-Flavor: Google") DATA_BRANCH=$(curl http://metadata/computeMetadata/v1/instance/attributes/data_branch -H "Metadata-Flavor: Google") -BATCH_UID=$(curl http://metadata/computeMetadata/v1/instance/attributes/uid -H "Metadata-Flavor: Google") +BATCH_UID=$(curl http://metadata/computeMetadata/v1/instance/attributes/batch_uid -H "Metadata-Flavor: Google") MAX_RAM=$(curl http://metadata/computeMetadata/v1/instance/attributes/max_ram -H "Metadata-Flavor: Google") SHUTDOWN_WAIT=$(curl http://metadata/computeMetadata/v1/instance/attributes/shutdown_wait -H "Metadata-Flavor: Google") SLACK_HOOK_WITH_TOKEN=$(curl http://metadata/computeMetadata/v1/instance/attributes/slack_hook_with_token -H "Metadata-Flavor: Google") diff --git a/gcp/src/main/python/deploy_beam/main.py b/gcp/src/main/python/deploy_beam/main.py index ff403021104..3c6511346be 100644 --- a/gcp/src/main/python/deploy_beam/main.py +++ b/gcp/src/main/python/deploy_beam/main.py @@ -51,7 +51,7 @@ def create_beam_instance(request): # project = requests.get("http://metadata/computeMetadata/v1/instance/id", headers={'Metadata-Flavor': 'Google'}).text project = 'beam-core' zone = 'us-central1-a' - uid = str(uuid.uuid4())[:8] + batch_uid = str(uuid.uuid4())[:8] name = to_instance_name(run_name) machine_type = f"zones/{zone}/machineTypes/{instance_type.strip()}" disk_image_name = f"projects/{project}/global/images/beam-box" @@ -70,7 +70,7 @@ def create_beam_instance(request): metadata = [ ('startup-script', startup_script), - ('uid', uid), + ('batch_uid', batch_uid), ('run_name', run_name), ('beam_config', beam_config), ('max_ram', max_ram), From ba3855826ce4faf6dee2963c0b81ecc06c18d9e0 Mon Sep 17 00:00:00 2001 From: Dmitry Openkov Date: Wed, 14 Dec 2022 18:14:44 +0300 Subject: [PATCH 10/19] Using s3Publish, uploading cloud-init-output.log in any case --- gcp/build.gradle | 2 + gcp/src/main/bash/cloud-init.sh | 35 +++++++++----- gcp/src/main/python/deploy_beam/main.py | 62 ++++++++++++++++--------- 3 files changed, 66 insertions(+), 33 deletions(-) diff --git a/gcp/build.gradle b/gcp/build.gradle index 6235f1ca4c4..960423244f3 100644 --- a/gcp/build.gradle +++ b/gcp/build.gradle @@ -59,8 +59,10 @@ tasks.register('deployToGCE') { "shutdown_wait": "${ext.getParameterValue('shutdownWait')}", "storage_size": "${ext.getParameterValue('storageSize')}", "shutdown_behaviour": "${ext.getParameterValue('shutdownBehaviour')}", + "storage_publish": ${"false".equalsIgnoreCase(ext.getParameterValue('s3Backup')) ? "false" : "true"}, "config": "${ext.getParameterValue('beamConfigs')}" }""" + logger.warn(pload) HttpResponse result = makeJsonPostRequest("https://us-central1-beam-core.cloudfunctions.net/deploy-beam", pload) logger.warn("response status: ${result.statusCode}, response message: ${result.statusMessage}, payload: ${result.content}") } diff --git a/gcp/src/main/bash/cloud-init.sh b/gcp/src/main/bash/cloud-init.sh index c8da80d9beb..68b409cdc56 100644 --- a/gcp/src/main/bash/cloud-init.sh +++ b/gcp/src/main/bash/cloud-init.sh @@ -11,6 +11,7 @@ BEAM_BRANCH=$(curl http://metadata/computeMetadata/v1/instance/attributes/beam_b BEAM_COMMIT=$(curl http://metadata/computeMetadata/v1/instance/attributes/beam_commit -H "Metadata-Flavor: Google") DATA_COMMIT=$(curl http://metadata/computeMetadata/v1/instance/attributes/data_commit -H "Metadata-Flavor: Google") DATA_BRANCH=$(curl http://metadata/computeMetadata/v1/instance/attributes/data_branch -H "Metadata-Flavor: Google") +STORAGE_PUBLISH=$(curl http://metadata/computeMetadata/v1/instance/attributes/storage_publish -H "Metadata-Flavor: Google") BATCH_UID=$(curl http://metadata/computeMetadata/v1/instance/attributes/batch_uid -H "Metadata-Flavor: Google") MAX_RAM=$(curl http://metadata/computeMetadata/v1/instance/attributes/max_ram -H "Metadata-Flavor: Google") SHUTDOWN_WAIT=$(curl http://metadata/computeMetadata/v1/instance/attributes/shutdown_wait -H "Metadata-Flavor: Google") @@ -66,9 +67,6 @@ do esac done -#building beam -./gradlew assemble - #sending message to the slack channel hello_msg=$(cat < Date: Fri, 16 Dec 2022 18:09:00 +0300 Subject: [PATCH 11/19] Fixed uploading cloud-init-output.log in case of issues --- gcp/src/main/bash/cloud-init.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/gcp/src/main/bash/cloud-init.sh b/gcp/src/main/bash/cloud-init.sh index 68b409cdc56..460585c8029 100644 --- a/gcp/src/main/bash/cloud-init.sh +++ b/gcp/src/main/bash/cloud-init.sh @@ -105,11 +105,12 @@ if [ "${STORAGE_PUBLISH,,}" != "false" ]; then ln -sf ~/cloud-init-output.log "$finalPath"/cloud-init-output.log storage_url="https://console.cloud.google.com/storage/browser/beam-core-outputs/$finalPath" else - finalPath="output/cloud-init-logs" - mkdir -p "$finalPath" - cloudInitName=$(echo "$(date '+%Y-%m-%d_%H-%M-%S')__${BEAM_CONFIG}__cloud-init-output.log" | tr '/' '_' ) - ln -sf ~/cloud-init-output.log "$finalPath/$cloudInitName" - storage_url="https://console.cloud.google.com/storage/browser/beam-core-outputs/$finalPath/$cloudInitName" + log_dir="output/cloud-init-logs" + mkdir -p "$log_dir" + cloud_init_name=$(echo "$(date '+%Y-%m-%d_%H-%M-%S')__${BEAM_CONFIG}__cloud-init-output.log" | tr '/' '_' ) + finalPath="$log_dir/$cloud_init_name" + ln -sf ~/cloud-init-output.log "$finalPath" + storage_url="https://console.cloud.google.com/storage/browser/_details/beam-core-outputs/$finalPath" fi gsutil -m cp -r "$finalPath" "gs://beam-core-outputs/$finalPath" fi From d1ff68701b9323a4a8c667831a0516e22e860282 Mon Sep 17 00:00:00 2001 From: Dmitry Openkov Date: Fri, 16 Dec 2022 18:09:38 +0300 Subject: [PATCH 12/19] Calculating MAR_RAM basing on the instance type --- gcp/src/main/python/deploy_beam/main.py | 101 +++++++++++++++++------- 1 file changed, 71 insertions(+), 30 deletions(-) diff --git a/gcp/src/main/python/deploy_beam/main.py b/gcp/src/main/python/deploy_beam/main.py index ce634141021..94c79ac079a 100644 --- a/gcp/src/main/python/deploy_beam/main.py +++ b/gcp/src/main/python/deploy_beam/main.py @@ -42,9 +42,12 @@ def create_beam_instance(request): instance_type = request_payload['instance_type'] if parameter_is_not_specified(instance_type): return escape("No instance type provided"), 400 + instance_cores, instance_memory = find_instance_cores_and_memory(instance_type) + if not instance_memory: + return escape(f"Instance type '{instance_type}' is not supported") max_ram = request_payload['forced_max_ram'] if parameter_is_not_specified(max_ram): - max_ram = 32 # todo calculate max ram + max_ram = calculate_heap_size(instance_cores, instance_memory) run_name = request_payload.get('run_name', "not-set") beam_branch = request_payload.get('beam_branch', "develop") beam_commit = request_payload.get('beam_commit', "HEAD") @@ -94,7 +97,73 @@ def create_beam_instance(request): if shutdown_behaviour.lower() == "terminate": metadata.append(('shutdown-script', shutdown_script)) - config = { + create_instance_request_body = create_instance_request(instance_name, machine_type, disk_image_name, storage_size, + metadata) + + service = discovery.build('compute', 'v1') + result = service.instances() \ + .insert(project=project, zone=zone, body=create_instance_request_body) \ + .execute() + + entry = dict( + severity="NOTICE", + message=result, + component="deploy_beam_function" + ) + + print(json.dumps(entry)) + + operation_id = result["id"] + operation_status = result["status"] + error = None + if result.get("error", None): + error_head = result["error"]["errors"][0] + error = f"{error_head['code']}, {error_head['location']}, {error_head['message']}" + + if error: + return escape(f"operation id: {operation_id}, status: {operation_status}, error: {error}") + else: + return escape(f'Started batch: {batch_uid}' + f' with run name: {run_name}' + f' for branch/commit {beam_branch}/{beam_commit}' + f' at instance {instance_name}.') + + +def calculate_heap_size(instance_cores: int, instance_memory: float) -> int: + max_remaining_memory = instance_cores # 1Gib per core + percent_towards_remaining_memory = .25 + return round(instance_memory - min(instance_memory * percent_towards_remaining_memory, max_remaining_memory)) + +def find_instance_cores_and_memory(instance_type): + instance_type_to_params = { + 'm1-megamem-96': (96, 1433.6), + 'm2-ultramem-208': (208, 5888), + 'm2-ultramem-416': (416, 11776), + 'm2-megamem-416': (416, 5888), + 'm2-hypermem-416': (416, 8832), + } + if instance_type in instance_type_to_params: + return instance_type_to_params[instance_type] + standard_multipliers = {"highcpu": 2, "standard": 4, "highmem": 8} + family_to_multipliers = { + "n2": standard_multipliers, + "n2d": standard_multipliers, + "c2": standard_multipliers, + "c2d": standard_multipliers, + "m3": {"megamem": 15.25, "ultramem": 30.5}, + "m1": {"ultramem": 24.025}, + } + split = instance_type.split('-') + if len(split) != 3: + return None, None + family, sub_type, num_cores_str = split + num_cores = int(num_cores_str) + multiplier = family_to_multipliers.get(family, {}).get(sub_type) + return (num_cores, multiplier * num_cores) if multiplier else (num_cores, None) + + +def create_instance_request(instance_name, machine_type, disk_image_name, storage_size, metadata): + return { 'name': instance_name, 'machineType': machine_type, @@ -140,31 +209,3 @@ def create_beam_instance(request): 'items': [{'key': k, 'value': v} for k, v in metadata] } } - - service = discovery.build('compute', 'v1') - result = service.instances() \ - .insert(project=project, zone=zone, body=config) \ - .execute() - - entry = dict( - severity="NOTICE", - message=result, - component="deploy_beam_function" - ) - - print(json.dumps(entry)) - - operation_id = result["id"] - operation_status = result["status"] - error = None - if result.get("error", None): - error_head = result["error"]["errors"][0] - error = f"{error_head['code']}, {error_head['location']}, {error_head['message']}" - - if error: - return escape(f"operation id: {operation_id}, status: {operation_status}, error: {error}") - else: - return escape(f'Started batch: {batch_uid}' - f' with run name: {run_name}' - f' for branch/commit {beam_branch}/{beam_commit}' - f' at instance {instance_name}.') From 58477f8918f807766f75b430862be4e685b1f600 Mon Sep 17 00:00:00 2001 From: Dmitry Openkov Date: Wed, 28 Dec 2022 13:04:43 +0300 Subject: [PATCH 13/19] Added health metric; putting run data into the spreadsheet; shared common functions. --- aws/build.gradle | 4 ++ gcp/build.gradle | 21 +----- gcp/src/main/bash/cloud-init.sh | 118 ++++++++++++++++++++++++++++---- 3 files changed, 109 insertions(+), 34 deletions(-) diff --git a/aws/build.gradle b/aws/build.gradle index f456f004e37..c0f702c6fe7 100755 --- a/aws/build.gradle +++ b/aws/build.gradle @@ -336,4 +336,8 @@ def getGitResultFromWorkingDirUsing(command, defaultResult) { } catch (ignored) { } return gitResult +} + +ext { + getCurrentGitUserEmail = this.&getCurrentGitUserEmail } \ No newline at end of file diff --git a/gcp/build.gradle b/gcp/build.gradle index 960423244f3..88c94e00447 100644 --- a/gcp/build.gradle +++ b/gcp/build.gradle @@ -10,6 +10,8 @@ import com.google.auth.oauth2.GoogleCredentials; import com.google.auth.oauth2.IdTokenCredentials; import com.google.auth.oauth2.IdTokenProvider; +apply from: "$rootDir/aws/build.gradle" + group = 'beam' version = '0.8.0' @@ -84,22 +86,3 @@ static HttpResponse makeJsonPostRequest(String functionUrl, String requestBody) HttpRequest request = transport.createRequestFactory(adapter).buildPostRequest(genericUrl, requestContent); return request.execute(); } - -def getCurrentGitUserEmail() { - def rawGitUserEmail = getGitResultFromWorkingDirUsing('git config user.email', "GitUserEmailNotFound") - return rawGitUserEmail -} - -def getGitResultFromWorkingDirUsing(command, defaultResult) { - def gitResult = defaultResult - try { - def workingDir = new File("${project.projectDir}") - def result = command.execute(null, workingDir) - result.waitFor() - if (result.exitValue() == 0) { - gitResult = result.text.trim() - } - } catch (ignored) { - } - return gitResult -} diff --git a/gcp/src/main/bash/cloud-init.sh b/gcp/src/main/bash/cloud-init.sh index 460585c8029..9fb2a5bf3e2 100644 --- a/gcp/src/main/bash/cloud-init.sh +++ b/gcp/src/main/bash/cloud-init.sh @@ -2,8 +2,8 @@ INSTANCE_ID=$(curl http://metadata/computeMetadata/v1/instance/id -H "Metadata-Flavor: Google") INSTANCE_NAME=$(curl http://metadata/computeMetadata/v1/instance/name -H "Metadata-Flavor: Google") -INSTANCE_ZONE=$(curl http://metadata/computeMetadata/v1/instance/zone -H "Metadata-Flavor: Google") -MACHINE_TYPE=$(curl http://metadata/computeMetadata/v1/instance/machine-type -H "Metadata-Flavor: Google") +INSTANCE_ZONE=$(basename "$(curl http://metadata/computeMetadata/v1/instance/zone -H 'Metadata-Flavor: Google')") +MACHINE_TYPE=$(basename "$(curl http://metadata/computeMetadata/v1/instance/machine-type -H 'Metadata-Flavor: Google')") HOST_NAME=$(curl http://metadata/computeMetadata/v1/instance/hostname -H "Metadata-Flavor: Google") RUN_NAME=$(curl http://metadata/computeMetadata/v1/instance/attributes/run_name -H "Metadata-Flavor: Google") BEAM_CONFIG=$(curl http://metadata/computeMetadata/v1/instance/attributes/beam_config -H "Metadata-Flavor: Google") @@ -73,9 +73,9 @@ Run Started Run Name **$RUN_NAME** Instance name $INSTANCE_NAME Instance id $INSTANCE_ID -Instance type **$(basename "$MACHINE_TYPE")** +Instance type **$MACHINE_TYPE** Host name **$HOST_NAME** -Zone $(basename "$INSTANCE_ZONE") +Zone $INSTANCE_ZONE Batch $BATCH_UID Branch **$BEAM_BRANCH** Commit $BEAM_COMMIT @@ -84,6 +84,37 @@ EOF echo "$hello_msg" curl -X POST -H 'Content-type: application/json' --data '{"text":"'"$hello_msg"'"}' "$SLACK_HOOK_WITH_TOKEN" +# spreadsheet data +start_json=$(cat < Date: Wed, 28 Dec 2022 14:45:11 +0300 Subject: [PATCH 14/19] Uploading cloud-init-output.log at the end of the script to see all the errors --- gcp/src/main/bash/cloud-init.sh | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/gcp/src/main/bash/cloud-init.sh b/gcp/src/main/bash/cloud-init.sh index 9fb2a5bf3e2..8fa6f146c23 100644 --- a/gcp/src/main/bash/cloud-init.sh +++ b/gcp/src/main/bash/cloud-init.sh @@ -24,6 +24,7 @@ function check_simulation_result() { log_file="$(find output -maxdepth 2 -mindepth 2 -type d -print -quit)/beamLog.out" if [[ ! -f $log_file ]]; then echo "Unable to start" + return fi last_line=$(tail $log_file -n 1) if [[ $last_line == *"Exiting BEAM"* ]]; then @@ -125,6 +126,7 @@ export GOOGLE_API_KEY="$GOOGLE_API_KEY" # copy to bucket storage_url="" finalPath="" +cloud_init_output_path="" for file in "output"/*; do for path2 in "$file"/*; do finalPath="$path2"; @@ -133,18 +135,17 @@ done; if [ "${STORAGE_PUBLISH,,}" != "false" ]; then - if [ -d "$finalPath" ]; then - ln -sf ~/cloud-init-output.log "$finalPath"/cloud-init-output.log + if [ -d "$finalPath" ]; then #beam started; + # upload everything to the storage + cloud_init_output_path="$finalPath/cloud-init-output.log" storage_url="https://console.cloud.google.com/storage/browser/beam-core-outputs/$finalPath" - else - log_dir="output/cloud-init-logs" - mkdir -p "$log_dir" + gsutil -m cp -r "$finalPath" "gs://beam-core-outputs/$finalPath" + else # beam not started + # upload the cloud-init-output.log to the storage (at the end of script) cloud_init_name=$(echo "$(date '+%Y-%m-%d_%H-%M-%S')__${BEAM_CONFIG}__cloud-init-output.log" | tr '/' '_' ) - finalPath="$log_dir/$cloud_init_name" - ln -sf ~/cloud-init-output.log "$finalPath" - storage_url="https://console.cloud.google.com/storage/browser/_details/beam-core-outputs/$finalPath" + cloud_init_output_path="output/cloud-init-logs/$cloud_init_name" + storage_url="https://console.cloud.google.com/storage/browser/_details/beam-core-outputs/$cloud_init_output_path" fi - gsutil -m cp -r "$finalPath" "gs://beam-core-outputs/$finalPath" fi #Run and publish analysis @@ -224,8 +225,8 @@ echo "$stop_json" curl -X POST "https://ca4ircx74d.execute-api.us-east-2.amazonaws.com/production/spreadsheet" -H "Content-Type:application/json" --data "$stop_json" # uploading cloud-init-output.log again to have the latest output -if [ -d "$finalPath" ] && [ "${STORAGE_PUBLISH,,}" != "false" ]; then - gsutil cp "$finalPath/cloud-init-output.log" "gs://beam-core-outputs/$finalPath/cloud-init-output.log" +if [ "${STORAGE_PUBLISH,,}" != "false" ]; then + gsutil cp ~/cloud-init-output.log "gs://beam-core-outputs/$cloud_init_output_path" fi #shutdown instance From 03d8358a183be84f355600d571abb6d12686bcb6 Mon Sep 17 00:00:00 2001 From: Dmitry Openkov Date: Fri, 20 Jan 2023 16:59:30 +0300 Subject: [PATCH 15/19] Added user email to the instance metadata; CLOUD_INIT_SCRIPT_URL env var --- docs/developers.rst | 2 +- gcp/build.gradle | 2 +- gcp/src/main/python/deploy_beam/main.py | 53 ++++++++++++++++++++----- 3 files changed, 45 insertions(+), 12 deletions(-) diff --git a/docs/developers.rst b/docs/developers.rst index 818b79d5a75..e0a79dcf5d7 100755 --- a/docs/developers.rst +++ b/docs/developers.rst @@ -262,7 +262,7 @@ Your task is going to be added to the queue and when it starts/finishes you rece BEAM run on Google Compute Engine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In order to run BEAM on GCE one needs to have `cloudfunctions.functions.invoke` permission on function `projects/beam-core/locations/us-central1/functions/deploy-beam`. +In order to run BEAM on GCE one needs to have `cloudfunctions.functions.invoke` permission on function `projects/beam-core/locations/us-central1/functions/deploy_beam`. One needs to install `glcoud `_ utility in order to be able to authenticate themself against the Google Cloud Platform. diff --git a/gcp/build.gradle b/gcp/build.gradle index 88c94e00447..f4568f4a4be 100644 --- a/gcp/build.gradle +++ b/gcp/build.gradle @@ -65,7 +65,7 @@ tasks.register('deployToGCE') { "config": "${ext.getParameterValue('beamConfigs')}" }""" logger.warn(pload) - HttpResponse result = makeJsonPostRequest("https://us-central1-beam-core.cloudfunctions.net/deploy-beam", pload) + HttpResponse result = makeJsonPostRequest("https://us-central1-beam-core.cloudfunctions.net/deploy_beam", pload) logger.warn("response status: ${result.statusCode}, response message: ${result.statusMessage}, payload: ${result.content}") } } diff --git a/gcp/src/main/python/deploy_beam/main.py b/gcp/src/main/python/deploy_beam/main.py index 94c79ac079a..9d462cc6a5d 100644 --- a/gcp/src/main/python/deploy_beam/main.py +++ b/gcp/src/main/python/deploy_beam/main.py @@ -3,6 +3,7 @@ from flask import escape import functions_framework from googleapiclient import discovery +from google.auth import jwt import re import time import random @@ -33,6 +34,9 @@ def parameter_is_not_specified(parameter_value): @functions_framework.http def create_beam_instance(request): + user_email = get_user_email(request) + if not user_email: + return escape("Cannot extract user email from the auth token"), 403 request_payload = request.get_json(silent=True) if not request_payload: return escape("No valid json payload provided"), 400 @@ -64,11 +68,16 @@ def create_beam_instance(request): instance_name = to_instance_name(run_name) machine_type = f"zones/{zone}/machineTypes/{instance_type.strip()}" disk_image_name = f"projects/{project}/global/images/beam-box" + cloud_init_script_url = os.environ.get('CLOUD_INIT_SCRIPT_URL') + if not cloud_init_script_url: + cloud_init_script_url = "https://raw.githubusercontent.com/LBNL-UCB-STI/beam/do/%233652-execute-beam-on-google-cloud-compute/gcp/src/main/bash/cloud-init.sh" + log(f"cloud_init_script_url: {cloud_init_script_url}") startup_script = """ #!/bin/sh -sudo -u clu bash -c 'cd; wget https://gist.github.com/dimaopen/3e736f1ec1d49c7e162867b280736312/raw/cloud-init.sh' -sudo -u clu bash -c 'cd; chmod 755 cloud-init.sh' -sudo -u clu bash -c 'cd; ./cloud-init.sh &> cloud-init-output.log' +CLOUD_INIT_SCRIPT_URL=$(curl http://metadata/computeMetadata/v1/instance/attributes/cloud_init_script_url -H "Metadata-Flavor: Google") +sudo -u clu bash -c "cd; wget $CLOUD_INIT_SCRIPT_URL" +sudo -u clu bash -c "cd; chmod 755 cloud-init.sh" +sudo -u clu bash -c "cd; ./cloud-init.sh &> cloud-init-output.log" """ shutdown_script = """ #!/bin/bash @@ -79,6 +88,7 @@ def create_beam_instance(request): metadata = [ ('startup-script', startup_script), + ('cloud_init_script_url', cloud_init_script_url), ('batch_uid', batch_uid), ('run_name', run_name), ('beam_config', beam_config), @@ -93,6 +103,7 @@ def create_beam_instance(request): ('slack_hook_with_token', os.environ['SLACK_HOOK_WITH_TOKEN']), ('slack_token', os.environ['SLACK_TOKEN']), ('slack_channel', os.environ['SLACK_CHANNEL']), + ('user_email', user_email), ] if shutdown_behaviour.lower() == "terminate": metadata.append(('shutdown-script', shutdown_script)) @@ -105,13 +116,7 @@ def create_beam_instance(request): .insert(project=project, zone=zone, body=create_instance_request_body) \ .execute() - entry = dict( - severity="NOTICE", - message=result, - component="deploy_beam_function" - ) - - print(json.dumps(entry)) + log(result) operation_id = result["id"] operation_status = result["status"] @@ -129,6 +134,15 @@ def create_beam_instance(request): f' at instance {instance_name}.') +def log(msg, severity="NOTICE"): + entry = dict( + severity=severity, + message=str(msg), + component="deploy_beam_function" + ) + print(json.dumps(entry)) + + def calculate_heap_size(instance_cores: int, instance_memory: float) -> int: max_remaining_memory = instance_cores # 1Gib per core percent_towards_remaining_memory = .25 @@ -209,3 +223,22 @@ def create_instance_request(instance_name, machine_type, disk_image_name, storag 'items': [{'key': k, 'value': v} for k, v in metadata] } } + + +def get_user_email(request): + auth_header = request.headers.get("Authorization") + if not auth_header: + log("no Authorization header") + return None + token_start = auth_header.lower().find("bearer ") + if token_start < 0: + log(f"No bearer token") + return None + token = auth_header[token_start + 7:].strip() + try: + # decoding the token without verification; token should be verified before it gets to our function + idinfo = jwt.decode(token, verify=False) + return idinfo['email'] + except Exception as e: + log(e) + return None From 740f9c7124b296cbdea09c638c894a7cb6d9a7aa Mon Sep 17 00:00:00 2001 From: Dmitry Openkov Date: Fri, 20 Jan 2023 17:03:28 +0300 Subject: [PATCH 16/19] forcedMaxRAM is not required now --- docs/developers.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/developers.rst b/docs/developers.rst index e0a79dcf5d7..52f4cf919de 100755 --- a/docs/developers.rst +++ b/docs/developers.rst @@ -274,7 +274,7 @@ There are `some ways to provide credentials Date: Mon, 20 Feb 2023 19:57:38 +0300 Subject: [PATCH 17/19] Idle notification for Googl Cloud Platform --- gcp/src/main/python/notify_idle/main.py | 114 ++++++++++++++++++ .../main/python/notify_idle/requirements.txt | 3 + 2 files changed, 117 insertions(+) create mode 100644 gcp/src/main/python/notify_idle/main.py create mode 100644 gcp/src/main/python/notify_idle/requirements.txt diff --git a/gcp/src/main/python/notify_idle/main.py b/gcp/src/main/python/notify_idle/main.py new file mode 100644 index 00000000000..884d62acd2f --- /dev/null +++ b/gcp/src/main/python/notify_idle/main.py @@ -0,0 +1,114 @@ +import base64 +import json +import os +import http.client +from googleapiclient import discovery + +project = 'beam-core' +zone = 'us-central1-a' + + +def log(msg, severity="NOTICE"): + entry = dict( + severity=severity, + message=str(msg), + component="deploy_beam_function" + ) + print(json.dumps(entry)) + + +def message_handler(event, context): + """Triggered from a message on a Cloud Pub/Sub topic. + Args: + event (dict): Event payload. + context (google.cloud.functions.Context): Metadata for the event. + """ + json_str = base64.b64decode(event['data']).decode('utf-8') + pubsub_message = json.loads(json_str) + + incident_summary = get_json_reference('incident.summary', pubsub_message, 'NA') + log(incident_summary) + incident_link = get_json_reference('incident.url', pubsub_message, 'NA') + instance_name = get_json_reference('incident.metric.labels.instance_name', pubsub_message, 'unknown') + instance_link = f"https://console.cloud.google.com/compute/instancesDetail/zones/{zone}/instances/{instance_name}" \ + f"?project={project}" + metadata = get_instance_metadata(instance_name) + email = get_custom_metadata_value(metadata, "user_email") + log(f"Instance user email {email}") + email_escaped = email.replace("@", "AT").replace(".", "_") if email else None + run_name = get_custom_metadata_value(metadata, "run_name") + if run_name is None: run_name = "No run" + user_slacks_ids = os.environ['USER_SLACK_IDS'] + slack_ids_array = json.loads(user_slacks_ids) + channel_id = safe_value_with_default(slack_ids_array, email_escaped, 'here') + log('channel_id = ' + channel_id) + if channel_id == "here": + channel_id = "!here" + else: + channel_id = "@" + channel_id + payload = { + "blocks": [ + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": f"*<{channel_id}> GCE Idle Incident *\n" + f"> {incident_summary}\n" + f"> *Run Name*\n" + f"> {run_name}\n" + f"> *Incident <{incident_link}|(Link)>*\n" + f"> *Instance <{instance_link}|(Link)>*\n" + f"> {instance_name}\n" + } + } + ] + } + + slack_hook = os.environ['SLACK_HOOK'] + log('Sending slack notification about idle instance with payload: ' + str(payload)) + conn = http.client.HTTPSConnection('hooks.slack.com') + conn.request('POST', url=slack_hook, body=json.dumps(payload), headers={'Content-type': 'application/json'}) + response = conn.getresponse() + log('Received response from slack notification about idle instance with response: ' + response.read().decode()) + + return json.dumps({}) + + +def get_custom_metadata_value(metadata, key): + try: + items = next(x[1] for x in metadata.items() if x[0] == 'items') + value = next(x['value'] for x in items if x['key'] == key) + except StopIteration: + value = None + return value + + +def get_json_reference(ref: str, json_document, default=None): + for i in ref.split("."): + if i in json_document: + json_document = json_document[i] + else: + return default + return default if json_document is None else json_document + + +def get_instance_metadata(instance_name): + service = discovery.build('compute', 'v1') + instance_data = service.instances() \ + .get(project=project, zone=zone, instance=instance_name) \ + .execute() + return instance_data["metadata"] + + +def safe_get(dict_obj, key): + if dict_obj is not None: + return dict_obj.get(key) + return None + + +def safe_value_with_default(list_obj, key, default): + for item in list_obj: + value = safe_get(item, key) + if value is not None: + return value + return default diff --git a/gcp/src/main/python/notify_idle/requirements.txt b/gcp/src/main/python/notify_idle/requirements.txt new file mode 100644 index 00000000000..eae558c6268 --- /dev/null +++ b/gcp/src/main/python/notify_idle/requirements.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83ebe9988d6a14378d99baa36d3e5d8dff875e4b543dd4227bd83f426a6631c7 +size 33 From a66d198cf39c00122a8995408759aaaea25d6648 Mon Sep 17 00:00:00 2001 From: Dmitry Openkov Date: Tue, 21 Feb 2023 16:11:43 +0300 Subject: [PATCH 18/19] A single deploy task for both Google Cloud and Amazon --- aws/build.gradle | 2 +- build.gradle | 31 +++++++++++++++++++++++++++++++ gradle.deploy.properties | 6 ++++-- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/aws/build.gradle b/aws/build.gradle index e5214ef6ba5..e8b6ed6424d 100755 --- a/aws/build.gradle +++ b/aws/build.gradle @@ -32,7 +32,7 @@ lambda { socketTimeout = 900000 } -task deploy(type: AWSLambdaInvokeTask) { +tasks.register('deployToEC2', AWSLambdaInvokeTask) { doFirst { def propsFileName = "../gradle.deploy.properties" if (project.hasProperty('propsFile')) { diff --git a/build.gradle b/build.gradle index 68a061c8597..fb83e09d31f 100755 --- a/build.gradle +++ b/build.gradle @@ -738,3 +738,34 @@ jmh { duplicateClassesStrategy = 'exclude' zip64 = true } + +tasks.register('deploy') { + def cloudProvider = null + def paramName = "cloudProvider" + if (project.hasProperty(paramName)) { + cloudProvider = project.findProperty(paramName) + } else { + def propsFileName = "./gradle.deploy.properties" + if (project.hasProperty('propsFile')) { + propsFileName = project.findProperty('propsFile') + } + def propsFile = new Properties() + propsFile.load(project.file(propsFileName).newDataInputStream()) + cloudProvider = propsFile.getProperty(paramName) + } + if (cloudProvider == null) { + cloudProvider = "" + } + + switch (cloudProvider.trim().toLowerCase()) { + case "amazon": + dependsOn ':aws:deployToEC2' + break + case "google": + dependsOn ':gcp:deployToGCE' + break + default: + throw new InvalidUserDataException("Cannot deploy! Please specify cloudProvider property to one of [Google, Amazon]") + break + } +} \ No newline at end of file diff --git a/gradle.deploy.properties b/gradle.deploy.properties index d29f2cd8404..4cbd9d44f4f 100644 --- a/gradle.deploy.properties +++ b/gradle.deploy.properties @@ -1,4 +1,4 @@ -runName=beamville +runName=beamville_deploy beamBranch=develop # beamCommit=dc4b0b05a3885357c7b5b65fb91181c8163aa9b6 dataBranch=develop @@ -7,7 +7,9 @@ dataBranch=develop # comma-separated list of configs (or single config) which will be run on an AWS instances (one instance per config) beamConfigs=test/input/beamville/beam.conf -instanceType=r5.2xlarge +# Google or Amazon +cloudProvider=Google +instanceType=n2d-standard-4 # shutdownBehaviour = stop | terminate shutdownBehaviour=terminate From efa82cfaf98a16c39531bf2a621ae04c61ddaf41 Mon Sep 17 00:00:00 2001 From: Dmitry Openkov Date: Tue, 21 Feb 2023 16:19:30 +0300 Subject: [PATCH 19/19] Rename cloudProvider -> cloudPlatform, add it to documentation --- build.gradle | 16 ++++++++-------- docs/developers.rst | 9 ++++++--- gradle.deploy.properties | 2 +- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/build.gradle b/build.gradle index fb83e09d31f..01627483fdc 100755 --- a/build.gradle +++ b/build.gradle @@ -740,10 +740,10 @@ jmh { } tasks.register('deploy') { - def cloudProvider = null - def paramName = "cloudProvider" + def cloudPlatform = null + def paramName = "cloudPlatform" if (project.hasProperty(paramName)) { - cloudProvider = project.findProperty(paramName) + cloudPlatform = project.findProperty(paramName) } else { def propsFileName = "./gradle.deploy.properties" if (project.hasProperty('propsFile')) { @@ -751,13 +751,13 @@ tasks.register('deploy') { } def propsFile = new Properties() propsFile.load(project.file(propsFileName).newDataInputStream()) - cloudProvider = propsFile.getProperty(paramName) + cloudPlatform = propsFile.getProperty(paramName) } - if (cloudProvider == null) { - cloudProvider = "" + if (cloudPlatform == null) { + cloudPlatform = "" } - switch (cloudProvider.trim().toLowerCase()) { + switch (cloudPlatform.trim().toLowerCase()) { case "amazon": dependsOn ':aws:deployToEC2' break @@ -765,7 +765,7 @@ tasks.register('deploy') { dependsOn ':gcp:deployToGCE' break default: - throw new InvalidUserDataException("Cannot deploy! Please specify cloudProvider property to one of [Google, Amazon]") + throw new InvalidUserDataException("Cannot deploy! Please specify cloudPlatform property to one of [Google, Amazon]") break } } \ No newline at end of file diff --git a/docs/developers.rst b/docs/developers.rst index f87e6630697..0be170a0345 100755 --- a/docs/developers.rst +++ b/docs/developers.rst @@ -201,6 +201,7 @@ To run a BEAM simulation or experiment on amazon ec2, use following command with The command will start an ec2 instance based on the provided configurations and run all simulations in serial. At the end of each simulation/experiment, outputs are uploaded to a public Amazon S3 bucket_. The default behavior is to run each simulation/experiment parallel on separate instances. For customized runs, you can also use following parameters that can be specified from command line: * **propsFile**: to specify file with default values +* **cloudPlatform**: Amazon * **runName**: to specify instance name. * **beamBranch**: To specify the branch for simulation, current source branch will be used as default branch. * **beamCommit**: The commit SHA to run simulation. use `HEAD` if you want to run with latest commit, default is `HEAD`. @@ -231,11 +232,11 @@ The order which will be used to look for parameter values is follow: To run a batch simulation, you can specify multiple configuration files separated by commas:: - ./gradlew deploy -PbeamConfigs=test/input/beamville/beam.conf,test/input/sf-light/sf-light.conf + ./gradlew deploy -PcloudPlatform=Amazon -PbeamConfigs=test/input/beamville/beam.conf,test/input/sf-light/sf-light.conf Similarly for experiment batch, you can specify comma-separated experiment files:: - ./gradlew deploy -PbeamExperiments=test/input/beamville/calibration/transport-cost/experiments.yml,test/input/sf-light/calibration/transport-cost/experiments.yml + ./gradlew deploy -PcloudPlatform=Amazon -PbeamExperiments=test/input/beamville/calibration/transport-cost/experiments.yml,test/input/sf-light/calibration/transport-cost/experiments.yml For demo and presentation material, please follow the link_ on google drive. @@ -277,6 +278,8 @@ Now all the instance are created in `us-central1-a` zone. Now the deployment script doesn't calculate it automatically. One needs to define the deploy properties that are similar to the ones for AWS deploy. These are the properties that is used on GCE: +* **propsFile**: to specify file with default values +* **cloudPlatform**: Google * **runName**: to specify instance name. * **beamBranch**: To specify the branch for simulation, current source branch will be used as default branch. * **beamCommit**: The commit SHA to run simulation. Comment it out if you want to run with latest commit. @@ -293,7 +296,7 @@ The deployment command is .. code-block:: bash - ./gradlew deployToGCE + ./gradlew -PcloudPlatform=Google deploy The simulation output is uploaded to the `Google Cloud Storage `_. diff --git a/gradle.deploy.properties b/gradle.deploy.properties index 4cbd9d44f4f..c94eb6ea2cb 100644 --- a/gradle.deploy.properties +++ b/gradle.deploy.properties @@ -8,7 +8,7 @@ dataBranch=develop beamConfigs=test/input/beamville/beam.conf # Google or Amazon -cloudProvider=Google +cloudPlatform=Google instanceType=n2d-standard-4 # shutdownBehaviour = stop | terminate