Skip to content

Commit

Permalink
Spark provision & deployment fixes OCI reports issue (#1637)
Browse files Browse the repository at this point in the history
  • Loading branch information
sowmya-dixit authored Mar 4, 2024
1 parent 0a52f98 commit 685611b
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 19 deletions.
7 changes: 4 additions & 3 deletions ansible/inventory/env/group_vars/all.yml
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,10 @@ etb_dialcode_list_druid_length: 70
# s3 storage config
s3_storage_key: "{{ sunbird_private_s3_storage_key }}"
s3_storage_secret: "{{ sunbird_private_s3_storage_secret }}"
s3_storage_endpoint: ""
s3_storage_endpoint: "{{ cloud_private_storage_endpoint }}"
s3_request_signature_version: AWS4-HMAC-SHA256
s3_path_style_access: true
s3_https_only: false
s3_default_bucket_location: ""
s3_https_only: true
s3_default_bucket_location: "{{ cloud_private_storage_region }}"
s3_storage_container: ""

4 changes: 2 additions & 2 deletions ansible/roles/analytics-spark-provision/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ spark:

spark_url: https://archive.apache.org/dist/spark/spark-{{ spark_version }}/spark-{{ spark_version }}-bin-hadoop2.7.tgz
guava_url: https://repo1.maven.org/maven2/com/google/guava/guava/19.0/guava-19.0.jar
jets3t_url: https://repo1.maven.org/maven2/net/java/dev/jets3t/jets3t/0.9.4/jets3t-0.9.4.jar
jets3t_url: https://repo1.maven.org/maven2/org/jets3t/jets3t/0.9.7/jets3t-0.9.7.jar
hadoop_aws_url: https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/2.7.3/hadoop-aws-2.7.3.jar
java_xmlbuilder_url: https://repo1.maven.org/maven2/com/jamesmurty/utils/java-xmlbuilder/1.1/java-xmlbuilder-1.1.jar
hadoop_azure_url: https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-azure/2.7.3/hadoop-azure-2.7.3.jar
Expand All @@ -38,4 +38,4 @@ jets3t_s3_request_signature_version: "{{ s3_request_signature_version }}"
jets3t_s3_endpoint_host: "{% if s3_storage_endpoint %}{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}{% endif %}"
jets3t_s3_disable_dns_buckets: "{{ s3_path_style_access }}"
jets3t_s3_https_only: "{{ s3_https_only }}"
jets3t_s3_default_bucket_location: "{{ s3_default_bucket_location }}"
jets3t_s3_default_bucket_location: "{{ s3_default_bucket_location }}"
4 changes: 2 additions & 2 deletions ansible/roles/analytics-spark-provision/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@
become_user: "{{ analytics_user }}"
file: path={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7/jars/jets3t-0.9.3.jar state=absent

- name: Download jets3t 0.9.4 and copy to Spark jars folder
- name: Download jets3t 0.9.7 and copy to Spark jars folder
become: yes
become_user: "{{ analytics_user }}"
get_url: url={{ jets3t_url }} dest={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7/jars/jets3t-0.9.4.jar timeout=1000 force=no owner={{ analytics_user }} group={{ analytics_group }}
get_url: url={{ jets3t_url }} dest={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7/jars/jets3t-0.9.7.jar timeout=1000 force=no owner={{ analytics_user }} group={{ analytics_group }}

- name: Download hadoop-aws 2.7.3 and copy to Spark jars folder
become: yes
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
storage-service.request-signature-version={{ jets3t_s3_request_signature_version }}
s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %}
s3service.disable-dns-buckets={{ jets3t_s3_disable_dns_buckets }}
s3service.https-only={{ jets3t_s3_https_only }}
{% if jets3t_s3_default_bucket_location %}
s3service.default-bucket-location={{ jets3t_s3_default_bucket_location }}
{% endif %}
uploads.stream-retry-buffer-size=2147483646
s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %}
10 changes: 6 additions & 4 deletions ansible/roles/analytics-spark-provision/templates/spark-env.j2
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,12 @@ export SPARK_EXECUTOR_MEMORY={{ spark.executor.memory }}
export SPARK_PUBLIC_DNS="{{ spark.public_dns }}"
export reports_storage_key={{cloud_private_storage_accountname}}
export reports_storage_secret={{cloud_private_storage_secret}}
export azure_storage_key={{cloud_private_storage_accountname}}
export azure_storage_secret={{cloud_private_storage_secret}}
# export azure_storage_key={{cloud_private_storage_accountname}}
# export azure_storage_secret={{cloud_private_storage_secret}}
export druid_storage_account_key={{cloud_private_storage_accountname}}
export druid_storage_account_secret={{cloud_private_storage_secret}}
export aws_storage_key={{ cloud_private_storage_accountname }}
export aws_storage_secret={{ cloud_private_storage_secret }}
# export aws_storage_key={{ cloud_private_storage_accountname }}
# export aws_storage_secret={{ cloud_private_storage_secret }}
export cloud_storage_key={{ cloud_private_storage_accountname }}
export cloud_storage_secret={{ cloud_private_storage_secret }}

6 changes: 3 additions & 3 deletions ansible/roles/data-products-deploy/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ secor_bucket: "telemetry-data-store"
# dp_object_store_type: "azure"
cloud_service_provider: "azure"
dp_raw_telemetry_backup_location: "unique/raw/"
dp_storage_key_config: "azure_storage_key"
dp_storage_secret_config: "azure_storage_secret"
dp_storage_key_config: "cloud_storage_key"
dp_storage_secret_config: "cloud_storage_secret"
dp_reports_storage_key_config: "reports_azure_storage_key"
dp_reports_storage_secret_config: "reports_azure_storage_secret"

Expand Down Expand Up @@ -46,7 +46,7 @@ analytics_jobs_count: 3
cassandra_keyspace_prefix: '{{ env }}_'
report_cassandra_cluster_host: "{{ report_cassandra_host | default(core_cassandra_host) }}"
cassandra_hierarchy_store_keyspace: "{{ env_name}}_hierarchy_store"
spark_version: 3.1.3
spark_version: 3.2.1

heap_memory: "-Xmx5120m"

Expand Down
10 changes: 6 additions & 4 deletions ansible/roles/data-products-deploy/templates/start-jobmanager.j2
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@ export DP_LOGS={{ analytics.home }}/logs/data-products
export SERVICE_LOGS={{ analytics.home }}/logs/services
export JM_HOME={{ analytics.home }}/job-manager

export azure_storage_key={{cloud_private_storage_accountname}}
export azure_storage_secret={{cloud_private_storage_secret}}
export reports_azure_storage_key={{cloud_private_storage_accountname}}
export reports_azure_storage_secret={{cloud_private_storage_secret}}
# export azure_storage_key={{cloud_private_storage_accountname}}
# export azure_storage_secret={{cloud_private_storage_secret}}
export cloud_storage_key={{cloud_private_storage_accountname}}
export cloud_storage_secret={{cloud_private_storage_secret}}
export reports_storage_key={{cloud_private_storage_accountname}}
export reports_storage_secret={{cloud_private_storage_secret}}
export druid_storage_account_key={{cloud_public_storage_accountname}}
export druid_storage_account_secret={{cloud_public_storage_secret}}

Expand Down

0 comments on commit 685611b

Please sign in to comment.