diff --git a/ansible/artifacts-download.yml b/ansible/artifacts-download.yml index fcf58787f5..b693f75d1f 100644 --- a/ansible/artifacts-download.yml +++ b/ansible/artifacts-download.yml @@ -40,3 +40,14 @@ aws_access_key_id: "{{ cloud_artifact_storage_accountname }}" aws_secret_access_key: "{{ cloud_artifact_storage_secret }}" when: cloud_service_provider == "aws" + + + - name: download artifact from oci oss + include_role: + name: oci-cloud-storage + tasks_from: download.yml + vars: + local_file_or_folder_path: "{{ artifact_path }}" + oss_bucket_name: "{{ cloud_storage_artifacts_bucketname }}" + oss_object_name: "{{ artifact }}" + when: cloud_service_provider == "oci" diff --git a/ansible/artifacts-upload.yml b/ansible/artifacts-upload.yml index 305492afc2..13af4627e6 100644 --- a/ansible/artifacts-upload.yml +++ b/ansible/artifacts-upload.yml @@ -41,3 +41,13 @@ aws_access_key_id: "{{ cloud_artifact_storage_accountname }}" aws_secret_access_key: "{{ cloud_artifact_storage_secret }}" when: cloud_service_provider == "aws" + + - name: upload artifact to oci oss + include_role: + name: oci-cloud-storage + tasks_from: upload.yml + vars: + local_file_or_folder_path: "{{ artifact_path }}" + oss_bucket_name: "{{ cloud_storage_artifacts_bucketname }}" + oss_path: "{{ artifact }}" + when: cloud_service_provider == "oci" \ No newline at end of file diff --git a/ansible/lpa_data-products_deploy.yml b/ansible/lpa_data-products_deploy.yml index 7e84ba53f7..1ff0cbdabc 100644 --- a/ansible/lpa_data-products_deploy.yml +++ b/ansible/lpa_data-products_deploy.yml @@ -7,5 +7,6 @@ environment: AZURE_STORAGE_ACCOUNT: "{{sunbird_private_storage_account_name}}" AZURE_STORAGE_KEY: "{{sunbird_private_storage_account_key}}" + OCI_CLI_AUTH: "instance_principal" roles: - data-products-deploy diff --git a/ansible/oci-bds-spark.provision.yml b/ansible/oci-bds-spark.provision.yml new file mode 100644 index 0000000000..4eb80aa296 --- /dev/null +++ b/ansible/oci-bds-spark.provision.yml @@ -0,0 +1,18 @@ +- hosts: local + become: yes + vars_files: + - "{{inventory_dir}}/secrets.yml" + roles: + - oci-bds-spark-cluster + tags: + - copy-script + +- hosts: bds-livy-node + become: yes + gather_facts: no + vars_files: + - "{{inventory_dir}}/secrets.yml" + roles: + - provision-oci-spark-cluster + tags: + - spark-provision diff --git a/ansible/roles/analytics-bootstrap-always/meta/main.yml b/ansible/roles/analytics-bootstrap-always/meta/main.yml index af15826aef..3566881eff 100644 --- a/ansible/roles/analytics-bootstrap-always/meta/main.yml +++ b/ansible/roles/analytics-bootstrap-always/meta/main.yml @@ -1,6 +1,7 @@ --- dependencies: - { role: jdk11 , become: yes } - - { role: azure-cli , become: yes } + - { role: azure-cli , become: yes, when: cloud_service_provider == "azure" } + - { role: oci-cli , become: yes, when: cloud_service_provider == "oci" } diff --git a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml index 13ba75f78a..663f76d68a 100644 --- a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml +++ b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml @@ -10,6 +10,36 @@ with_items: - {var: 'azure_storage_key', value: '{{ sunbird_private_storage_account_name }}'} - {var: 'azure_storage_secret', value: '{{ sunbird_private_storage_account_key }}'} + + +- name: Adding PATH for oci cli Vars to bashrc file of spark. + become: yes + become_user: "{{ analytics_user }}" + lineinfile: + path: '{{ analytics_user_home }}/.bashrc' + line: 'export PATH={{ analytics_user_home }}/bin:$PATH' + regexp: "export PATH={{ analytics_user_home }}/bin.*" + when: cloud_service_provider == "oci" + +- name: Configure OCI cli + become: yes + become_user: "{{ analytics_user }}" + file: + path: "{{ analytics_user_home }}/.oci" + state: directory + when: cloud_service_provider == "oci" + +- name: Create OCI cli config location + become: yes + become_user: "{{ analytics_user }}" + template: src=oci-key.j2 dest={{ analytics_user_home }}/.oci/oci-key.pem mode=600 owner={{ analytics_user }} group={{ analytics_group }} + when: cloud_service_provider == "oci" + +- name: Create OCI cli config file + become: yes + become_user: "{{ analytics_user }}" + template: src=oci-cli-config.j2 dest={{ analytics_user_home }}/.oci/config mode=600 owner={{ analytics_user }} group={{ analytics_group }} + when: cloud_service_provider == "oci" - name: Adding ENV Vars to spark servers environment. become: yes diff --git a/ansible/roles/analytics-bootstrap-spark/templates/oci-cli-config.j2 b/ansible/roles/analytics-bootstrap-spark/templates/oci-cli-config.j2 new file mode 100644 index 0000000000..56cf3ba3ef --- /dev/null +++ b/ansible/roles/analytics-bootstrap-spark/templates/oci-cli-config.j2 @@ -0,0 +1,6 @@ +[DEFAULT] +user={{oci_cli_user_ocid }} +fingerprint={{oci_cli_fingerprint}} +key_file=/home/analytics/.oci/oci-key.pem +tenancy={{oci_cli_tenancy}} +region={{oci_cli_region}} \ No newline at end of file diff --git a/ansible/roles/analytics-bootstrap-spark/templates/oci-key.j2 b/ansible/roles/analytics-bootstrap-spark/templates/oci-key.j2 new file mode 100644 index 0000000000..b969594016 --- /dev/null +++ b/ansible/roles/analytics-bootstrap-spark/templates/oci-key.j2 @@ -0,0 +1 @@ +{{ oci_cli_key_content }} \ No newline at end of file diff --git a/ansible/roles/analytics-spark-provision/tasks/main.yml b/ansible/roles/analytics-spark-provision/tasks/main.yml index 25ebd9da23..65731cecb9 100644 --- a/ansible/roles/analytics-spark-provision/tasks/main.yml +++ b/ansible/roles/analytics-spark-provision/tasks/main.yml @@ -117,28 +117,31 @@ recurse: yes become: yes +# kenneth changed to install Ruby 2.6 as per R.4.6.0 - name: Install latest ruby become: yes become_user: "{{ analytics_user }}" - shell: "export PATH=$PATH:/home/analytics/.rvm/bin && rvm install ruby-2.5" + shell: "export PATH=$PATH:/home/analytics/.rvm/bin && rvm install ruby-2.6" - name: Add ruby repository become: yes apt_repository: repo: ppa:brightbox/ruby-ng +# kenneth changed to install ruby-dev 2.6 as per R.4.6.0 - there is no ruby2.2-dev in bionic - name: Install latest ruby-dev become: yes apt: - name: "ruby2.5-dev" + name: "ruby2.6-dev" state: installed update_cache: true cache_valid_time: 3600 +# changed to ruby 2.6 as per R.4.6.0 - name: Install ruby-kafka become: yes become_user: "{{ analytics_user }}" - shell: "bash -ilc 'export PATH=$PATH:/home/analytics/.rvm/bin && rvm --default use ruby-2.5 && gem install ruby-kafka'" + shell: "bash -ilc 'export PATH=$PATH:/home/analytics/.rvm/bin && rvm --default use ruby-2.6 && gem install --user-install --no-document ruby-kafka'" - name: Download Kafka-2.11 become: yes diff --git a/ansible/roles/data-products-deploy/defaults/main.yml b/ansible/roles/data-products-deploy/defaults/main.yml index 690c51d87d..06cdf1b0f2 100755 --- a/ansible/roles/data-products-deploy/defaults/main.yml +++ b/ansible/roles/data-products-deploy/defaults/main.yml @@ -1,6 +1,7 @@ analytics_user: analytics analytics_group: analytics spark_output_temp_dir: /mount/data/analytics/tmp/ +oci_install_loc: /home/{{analytics_user}}/bin/ bucket: "telemetry-data-store" secor_bucket: "telemetry-data-store" @@ -279,4 +280,15 @@ assessment_metric_primary_category: "{{ exhaust_job_assessment_primary_category # Default s3 variables sunbird_private_s3_storage_key: "" -sunbird_private_s3_storage_secret: "" \ No newline at end of file +sunbird_private_s3_storage_secret: "" + + +# jets3t s3 config, allows us to configure for s3-like object stores +jets3t_s3_request_signature_version: "{{ s3_request_signature_version }}" +jets3t_s3_endpoint_host: "{% if s3_storage_endpoint %}{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}{% endif %}" +jets3t_s3_disable_dns_buckets: "{{ s3_path_style_access }}" +jets3t_s3_https_only: "{{ s3_https_only }}" +jets3t_s3_default_bucket_location: "{{ s3_default_bucket_location }}" + + +spark_pg_db_name: "analytics" \ No newline at end of file diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index df495a5d4a..1c15e14023 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -5,6 +5,19 @@ tags: - always +- name: Ensure oci oss bucket exists + command: "{{oci_install_loc}}/oci os bucket get --name {{ bucket }}" + register: check_bucket + when: dp_object_store_type == "oci" + tags: + - always + +- name: Create oci oss bucket + command: "{{oci_install_loc}}/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" + when: dp_object_store_type == "oci" and check_bucket.rc !=0 + tags: + - always + - name: Copy Core Data Products copy: src={{ analytics_batch_module_artifact }} dest={{ analytics.home }}/models-{{ model_version }} tags: @@ -14,6 +27,15 @@ command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} async: 3600 poll: 10 + when: dp_object_store_type == "azure" + tags: + - dataproducts-spark-cluster + +- name: Copy Core Data Products to oci oss + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force" + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" tags: - dataproducts-spark-cluster @@ -27,8 +49,17 @@ command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar -f {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar async: 3600 poll: 10 + when: dp_object_store_type == "azure" tags: - - ed-dataproducts-spark-cluster + - ed-dataproducts-spark-cluster + +- name: Copy Ed Data Products to oci oss + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force" + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" + tags: + - ed-dataproducts-spark-cluster - name: Copy Framework Library copy: src={{ analytics_core_artifact }} dest={{ analytics.home }}/models-{{ model_version }} @@ -39,6 +70,15 @@ command: az storage blob upload --overwrite --debug -c {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} async: 3600 poll: 10 + when: dp_object_store_type == "azure" + tags: + - framework-spark-cluster + +- name: Copy Framework Library to oci oss + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force" + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" tags: - framework-spark-cluster @@ -51,6 +91,15 @@ command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} async: 3600 poll: 10 + when: dp_object_store_type == "azure" + tags: + - framework-spark-cluster + +- name: Copy Scruid Library to oci oss + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force" + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" tags: - framework-spark-cluster @@ -87,6 +136,17 @@ line: 'spark_output_temp_dir="/var/log/sparkapp/tmp/"' tags: - framework-spark-cluster + when: dp_object_store_type != "oci" + +- name: Update spark temp dir value for cluster + lineinfile: + path: '{{ analytics.home }}/models-{{ model_version }}/application.conf' + regexp: '^spark_output_temp_dir="/mount/data/analytics/tmp/"' + line: 'spark_output_temp_dir="/var/log/spark/"' + tags: + - framework-spark-cluster + when: dp_object_store_type == "oci" + - name: Update logger kafka config for cluster lineinfile: @@ -100,9 +160,18 @@ command: az storage blob upload --overwrite -c {{ bucket }} -f {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf async: 3600 poll: 10 + when: dp_object_store_type == "azure" tags: - framework-spark-cluster +- name: Copy configuration file to oci oss + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force" + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" + tags: + - framework-spark-cluster + - name: Copy log4j2 xml file template: src=log4j2.xml.j2 dest={{ analytics.home }}/models-{{ model_version }}/log4j2.xml mode=755 owner={{ analytics_user }} group={{ analytics_group }} tags: [ dataproducts, framework, ed-dataproducts ] @@ -271,7 +340,7 @@ - spark-jobs - name: Copy collection-summary ingestion spec - copy: src="collection-summary-ingestion-spec.json" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + template: src="collection-summary-ingestion-spec.j2" dest={{ analytics.home }}/scripts/collection-summary-ingestion-spec.json mode=755 owner={{ analytics_user }} group={{ analytics_group }} tags: - ed-dataproducts @@ -300,7 +369,7 @@ - spark-jobs - name: Copy sourcing-summary ingestion spec - copy: src="sourcing-ingestion-spec.json" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + template: src="sourcing-ingestion-spec.j2" dest={{ analytics.home }}/scripts/sourcing-ingestion-spec.json mode=755 owner={{ analytics_user }} group={{ analytics_group }} tags: - ed-dataproducts @@ -335,6 +404,24 @@ - run-job - config-update +- name: Copy jets3t.properties file + template: src=jets3t.j2 dest={{ analytics_cluster.home }}/jets3t.properties + delegate_to: localhost + tags: + - replay-job + - run-job + - config-update + +- name: Copy JetS3t.properties to oci oss + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/jets3t.properties --file {{ analytics_cluster.home }}/jets3t.properties --content-type auto --force" + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" + tags: + - replay-job + - run-job + - config-update + - name: Replay Job shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ job_id }} --mode {{ mode }} --partitions {{ partitions }} --parallelisation {{ parallelisation }} --startDate {{ start_date }} --endDate {{ end_date }} --sparkMaster {{ sparkMaster }} --selectedPartitions {{ selected_partitions }} &" async: "{{ (pause_min * 60) }}" @@ -376,10 +463,10 @@ shell: | if echo "{{jobs}}" | grep 'druid' then - python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} druid {{env}}_report_config + python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} druid {{spark_env}}_report_config elif echo "{{jobs}}" | grep 'exhaust' then - python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} exhaust {{env}}_job_request + python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} exhaust {{spark_env}}_job_request fi tags: - parallel-jobs-submit diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 1a26514684..d6b6e18bb9 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -1,3 +1,5 @@ + +{% if dp_object_store_type == "azure" %} { "jars": [ "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_core_artifact }}", @@ -25,3 +27,74 @@ "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } } +{% elif (dp_object_store_type == "s3") %} +{ + "jars": [ + "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", + "s3n://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", + "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + ], + "file": "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + "files": [ + "s3n://{{ bucket }}/models-{{ model_version }}/application.conf" + ], + "className": "org.ekstep.analytics.job.JobExecutor", + "executorCores": {{ spark_cluster.executor_core }}, + "executorMemory": "{{ spark_cluster.executor_memory }}", + "numExecutors": {{ spark_cluster.num_executors }}, + "conf": { + "spark.sql.autoBroadcastJoinThreshold" : "-1", + "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", + "spark.scheduler.mode" : "FAIR", + "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", + "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", + "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + } +} +{% elif (dp_object_store_type == "oci") %} +{ + "jars": [ + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_core_artifact }}", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ scruid_artifact }}", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}" + ], + "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + "files": [ + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/jets3t.properties" + ], + "className": "org.ekstep.analytics.job.JobExecutor", + "executorCores": {{ spark_cluster.executor_core }}, + "executorMemory": "{{ spark_cluster.executor_memory }}", + "numExecutors": {{ spark_cluster.num_executors }}, + "conf": { + "spark.sql.autoBroadcastJoinThreshold" : "-1", + "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", + "spark.scheduler.mode" : "FAIR", + "spark.driver.memory" : "60g", + "spark.executor.memory" : "40g", + "spark.driver.memoryOverhead" : "20g", + "spark.executor.memoryOverhead" : "4g", + "spark.serializer" : "org.apache.spark.serializer.KryoSerializer", + "spark.shuffle.service.enabled" : "true", + "spark.dynamicAllocation.enabled" : "true", + "spark.yarn.scheduler.heartbeat.interval-ms" : "7200000", + "spark.executor.heartbeatInterval" : "1800s", + "spark.network.timeout" : "5400s", + "spark.executor.extraJavaOptions" : "-Xmx4g", + "spark.driver.extraJavaOptions" : "-Xmx4g", + "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", + "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", + "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}, -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }} -Dcom.amazonaws.sdk.s3.defaultStreamBufferSize=2048m", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}} -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }} -Dcom.amazonaws.sdk.s3.defaultStreamBufferSize=2048m" + } +} +{% endif %} diff --git a/ansible/roles/data-products-deploy/templates/collection-summary-ingestion-spec.j2 b/ansible/roles/data-products-deploy/templates/collection-summary-ingestion-spec.j2 new file mode 100644 index 0000000000..d734ee3db2 --- /dev/null +++ b/ansible/roles/data-products-deploy/templates/collection-summary-ingestion-spec.j2 @@ -0,0 +1,256 @@ +{ + "type": "index", + "spec": { + "dataSchema": { + "dataSource": "collection-summary-snapshot", + "parser": { + "type": "string", + "parseSpec": { + "format": "json", + "flattenSpec": { + "useFieldDiscovery": false, + "fields": [ + { + "type": "root", + "name": "content_org", + "expr": "contentorg" + }, + { + "type": "root", + "name": "user_org", + "expr": "orgname" + }, + { + "type": "root", + "name": "batch_start_date", + "expr": "startdate" + }, + { + "type": "root", + "name": "batch_end_date", + "expr": "enddate" + }, + { + "type": "root", + "name": "has_certificate", + "expr": "hascertified" + }, + { + "type": "root", + "name": "collection_id", + "expr": "courseid" + }, + { + "type": "root", + "name": "batch_id", + "expr": "batchid" + }, + { + "type": "root", + "name": "collection_name", + "expr": "collectionname" + }, + { + "type": "root", + "name": "batch_name", + "expr": "batchname" + }, + { + "type": "root", + "name": "total_enrolment", + "expr": "enrolleduserscount" + }, + { + "type": "root", + "name": "total_completion", + "expr": "completionuserscount" + }, + { + "type": "root", + "name": "total_certificates_issued", + "expr": "certificateissuedcount" + }, + { + "type": "root", + "name": "content_status", + "expr": "contentstatus" + }, + { + "type": "root", + "name": "user_state", + "expr": "state" + }, + { + "type": "root", + "name": "user_district", + "expr": "district" + }, + { + "type": "root", + "name": "content_channel", + "expr": "channel" + }, + { + "type": "root", + "name": "keywords", + "expr": "keywords" + }, + { + "type": "root", + "name": "timestamp", + "expr": "timestamp" + }, + { + "type": "root", + "name": "medium", + "expr": "medium" + }, + { + "type": "root", + "name": "subject", + "expr": "subject" + }, + { + "type": "root", + "name": "created_for", + "expr": "createdfor" + }, + { + "type": "root", + "name": "user_type", + "expr": "usertype" + }, + { + "type": "root", + "name": "user_subtype", + "expr": "usersubtype" + } + ] + }, + "dimensionsSpec": { + "dimensions": [ + { + "name": "content_org" + }, + { + "name": "user_org" + }, + { + "type": "string", + "name": "batch_id" + }, + { + "type": "string", + "name": "batch_start_date" + }, + { + "type": "string", + "name": "batch_end_date" + }, + { + "type": "string", + "name": "collection_id" + }, + { + "type": "string", + "name": "collection_name" + }, + { + "type": "string", + "name": "batch_name" + }, + { + "type": "long", + "name": "total_enrolment" + }, + { + "type": "long", + "name": "total_completion" + }, + { + "type": "long", + "name": "total_certificates_issued" + }, + { + "type": "string", + "name": "content_status" + }, + { + "type": "string", + "name": "user_state" + }, + { + "type": "string", + "name": "user_district" + }, + { + "name": "keywords" + }, + { + "name": "has_certificate" + }, + { + "type": "string", + "name": "content_channel" + }, + { + "name": "medium" + }, + { + "name": "subject" + }, + { + "name": "created_for" + }, + { + "type": "string", + "name": "user_type" + }, + { + "type": "string", + "name": "user_subtype" + } + ], + "dimensionsExclusions": [] + }, + "timestampSpec": { + "column": "timestamp", + "format": "auto" + } + } + }, + "metricsSpec": [], + "granularitySpec": { + "type": "uniform", + "segmentGranularity": "day", + "queryGranularity": "none", + "rollup": true + } + }, + "ioConfig": { + "type": "index", + "firehose": { +{% if dp_object_store_type == "azure" %} + "type": "static-azure-blobstore", + "blobs": [ + { + "container": "{{reports_container}}", + "path": "/collection-summary-reports-v2/collection-summary-report-latest.json" + } + ], +{% elif (dp_object_store_type == "oci") %} + "type": "static-s3", + "uris": [ "s3://{{reports_container}}/collection-summary-reports-v2/collection-summary-report-latest.json"], +{% endif %} + "fetchTimeout": 300000 + } + }, + "tuningConfig": { + "type": "index", + "targetPartitionSize": 5000000, + "maxRowsInMemory": 25000, + "forceExtendableShardSpecs": false, + "logParseExceptions": true + } + } +} diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index bde88ec9d4..c9a3ba285e 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -16,10 +16,15 @@ reports.storage.key.config="{{ dp_reports_storage_key_config }}" reports.storage.secret.config="{{ dp_reports_storage_secret_config }}" {% if dp_object_store_type == "azure" %} cloud_storage_type="azure" -{% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3") %} +{% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3" or dp_object_store_type == "oci") %} +{% if cloud_service_provider == "oci" %} +cloud_storage_type="oci" +{% else %} cloud_storage_type="s3" +{% endif %} cloud_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" -cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint }}" +cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint_with_protocol }}" +storage.endpoint.config="{{ s3_storage_endpoint_with_protocol }}" aws_storage_key="{{ s3_storage_key }}" aws_storage_secret="{{ s3_storage_secret }}" {% endif %} @@ -112,7 +117,7 @@ azure { } ## Reports - Global config -cloud.container.reports="reports" +cloud.container.reports="{{cloud_storage_privatereports_bucketname}}" # course metrics container in azure course.metrics.cassandra.sunbirdKeyspace="sunbird" @@ -218,7 +223,7 @@ metric.kafka.broker="{{groups['processing-cluster-kafka']|join(':9092,')}}:9092" metric.kafka.topic="{{ env }}.prom.monitoring.metrics" //Postgres Config -postgres.db="{{postgres.db_name}}" +postgres.db="{{postgres.spark_db_name}}" postgres.url="jdbc:postgresql://{{postgres.db_url}}:{{postgres.db_port}}/" postgres.user="{{postgres.db_username}}" postgres.pass="{{postgres.db_password}}" @@ -251,10 +256,14 @@ dcetextbook.filename="DCE_textbook_data.csv" etbtextbook.filename="ETB_textbook_data.csv" etb.dialcode.druid.length={{ etb_dialcode_list_druid_length }} - +{% if dp_object_store_type == "azure" %} druid.report.default.storage="azure" +{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} +druid.report.default.storage="s3" +{% endif %} + druid.report.date.format="yyyy-MM-dd" -druid.report.default.container="report-verification" +druid.report.default.container="{{cloud_storage_report_verfication_bucketname}}" ## Collection Exhaust Jobs Configuration -- Start ## diff --git a/ansible/roles/data-products-deploy/templates/jets3t.j2 b/ansible/roles/data-products-deploy/templates/jets3t.j2 new file mode 100644 index 0000000000..1ca346578c --- /dev/null +++ b/ansible/roles/data-products-deploy/templates/jets3t.j2 @@ -0,0 +1,8 @@ +storage-service.request-signature-version={{ jets3t_s3_request_signature_version }} +s3service.disable-dns-buckets={{ jets3t_s3_disable_dns_buckets }} +s3service.https-only=true +{% if jets3t_s3_default_bucket_location %} +s3service.default-bucket-location={{ jets3t_s3_default_bucket_location }} +{% endif %} +uploads.stream-retry-buffer-size=2147483646 +s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %} diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 2bb0a042ea..f9991e1714 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -30,19 +30,24 @@ config() { if [ ! -z "$2" ]; then keyword=$2; fi case "$1" in "assessment-correction") - echo '{"search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"model":"org.sunbird.analytics.model.report.AssessmentCorrectionModel","modelParams":{"parallelization":200,"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","granularity":"all","aggregations":[{"name":"count","type":"count","fieldName":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"}],"filters":[{"type":"equals","dimension":"contentType","value":"SelfAssess"}],"descending":"false"},"fileOutputConfig":{"to":"file","params":{"file":"{{ analytics.home }}/assessment-correction/skippedEvents"}},"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"kafka","params":{"brokerList":"'$brokerIngestionList'","topic":"'$assessTopic'"}}],"parallelization":200,"appName":"Assessment Correction Model"}' + echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"model":"org.sunbird.analytics.model.report.AssessmentCorrectionModel","modelParams":{"parallelization":200,"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","granularity":"all","aggregations":[{"name":"count","type":"count","fieldName":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"}],"filters":[{"type":"equals","dimension":"contentType","value":"SelfAssess"}],"descending":"false"},"fileOutputConfig":{"to":"file","params":{"file":"{{ analytics.home }}/assessment-correction/skippedEvents"}},"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"kafka","params":{"brokerList":"'$brokerIngestionList'","topic":"'$assessTopic'"}}],"parallelization":200,"appName":"Assessment Correction Model"}' ;; "assessment-archival") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"store":"azure","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"store":"{{ dp_object_store_type }}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}' ;; "assessment-archived-removal") +{% if dp_object_store_type == "azure" %} echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"azure","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' ;; +{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"{{dp_object_store_type}}","blobExt":"csv.gz","reportPath":"archived-data/","container":"{{reports_container}}"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' + ;; +{% endif %} "collection-reconciliation-job") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CollectionReconciliationJob","modelParams":{"mode":"prodrun","brokerList":"{{ingestion_kafka_broker_host}}","topic":"{{env}}.issue.certificate.request","sparkCassandraConnectionHost":"{{ core_cassandra_host }}"},"parallelization":30,"appName":"CollectionReconciliationJob"}' ;; "collection-summary-report") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"], "contentType": "Course"},"fields":["identifier","name","organisation","channel"],"limit":10000}},"store":"azure","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course"},"fields":["identifier","name","organisation","channel"],"limit":10000}},"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' ;; "score-metric-migration-job") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.ScoreMetricMigrationJob","modelParams":{"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Score Metric Migration Job"}' @@ -51,34 +56,34 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.AssessmentScoreCorrectionJob","modelParams":{"assessment.score.correction.batches":"","cassandraReadConsistency":"QUORUM","cassandraWriteConsistency":"QUORUM","csvPath":"/mount/data/analytics/score_correction","isDryRunMode":true,"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":30,"appName":"Assessment Score Correction Job"}' ;; "course-batch-status-updater") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob","modelParams":{"store":"azure","sparkElasticsearchConnectionHost":"http://{{ single_node_es_host }}:9200","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","kpLearningBasePath":"http://{{groups['learning'][0]}}:8080/learning-service","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Course Batch Status Updater Job"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","sparkElasticsearchConnectionHost":"http://{{ single_node_es_host }}:9200","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","kpLearningBasePath":"http://{{groups['learning'][0]}}:8080/learning-service","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Course Batch Status Updater Job"}' ;; "collection-summary-report-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJobV2","modelParams":{"storageKeyConfig":"druid_storage_account_key","storageSecretConfig":"druid_storage_account_secret","batchSize":50,"generateForAllBatches":true,"contentFields":["identifier","name","organisation","channel","status","keywords","createdFor","medium","subject"],"contentStatus":["Live","Unlisted","Retired"],"store":"azure","specPath":"/mount/data/analytics/scripts/collection-summary-ingestion-spec.json","druidIngestionUrl":"'$druidIngestionURL'","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report V2"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJobV2","modelParams":{"storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","batchSize":50,"generateForAllBatches":true,"contentFields":["identifier","name","organisation","channel","status","keywords","createdFor","medium","subject"],"contentStatus":["Live","Unlisted","Retired"],"store":"{{dp_object_store_type}}","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","specPath":"/mount/data/analytics/scripts/collection-summary-ingestion-spec.json","druidIngestionUrl":"'$druidIngestionURL'","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report V2"}' ;; "uci-private-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob","modelParams":{"store":"azure","mode":"OnDemand","storageContainer":"reports","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UCI Private Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","mode":"OnDemand","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UCI Private Exhaust"}' ;; "uci-response-exhaust") - echo '{"search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"azure","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":"","storageContainer":"reports"},"parallelization":8,"appName":"UCI Response Exhaust"}' + echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":""},"parallelization":8,"appName":"UCI Response Exhaust"}' ;; "userinfo-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"azure","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{},"sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' ;; "program-collection-summary-report") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"], "keywords":"'$keyword'", "store":"azure","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"],"keywords":"'$keyword'","store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' ;; "response-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJob","modelParams":{"store":"azure","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{},"sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust"}' ;; "response-exhaust-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"azure","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{},"sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"azure","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config", "storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"azure","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","batchFilters":["TPD"],"searchFilter":{},"sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' ;; "druid_reports") echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.DruidQueryProcessingModel","modelParams":{"mode":"batch"},"parallelization":8,"appName":"Druid Reports"}' @@ -93,23 +98,23 @@ config() { echo '{"jobsCount":'$jobManagerJobsCount',"topic":"'$job_topic'","bootStrapServer":"'$brokerList'","zookeeperConnect":"'$zookeeper'","consumerGroup":"jobmanager","slackChannel":"#test_channel","slackUserName":"JobManager","tempBucket":"'$bucket'","tempFolder":"'$temp_folder'"}' ;; "wfs") - echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"{{ dp_raw_telemetry_backup_location }}","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}", "apiVersion":"v2", "parallelization":200},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"} }],"parallelization":200,"appName":"Workflow Summarizer","deviceMapping":true}' - #echo '{"search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"unique/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"apiVersion":"v2"},"output":[{"to":"console","params":{"printEvent": false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"parallelization":8,"appName":"Workflow Summarizer","deviceMapping":true}' + echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"{{ dp_raw_telemetry_backup_location }}","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","store":"{{ dp_object_store_type }}","apiVersion":"v2","parallelization":200},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"parallelization":200,"appName":"Workflow Summarizer","deviceMapping":true}' + #echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"apiVersion":"v2"},"output":[{"to":"console","params":{"printEvent": false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"parallelization":8,"appName":"Workflow Summarizer","deviceMapping":true}' ;; "video-streaming") - echo '{"search":{"type":"azure"},"model":"org.ekstep.analytics.job.VideoStreamingJob","modelParams":{"maxIterations":10},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Video Streaming Job","deviceMapping":false}' + echo '{"search":{"type":"{{ dp_object_store_type }}"},"model":"org.ekstep.analytics.job.VideoStreamingJob","modelParams":{"maxIterations":10},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Video Streaming Job","deviceMapping":false}' ;; "admin-user-reports") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin User Reports","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminReportJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin User Reports","deviceMapping":false}' ;; "admin-geo-reports") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminGeoReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin Geo Reports","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminGeoReportJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin Geo Reports","deviceMapping":false}' ;; "telemetry-replay") - echo '{"search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"'$inputBucket'","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"TelemetryReplayJob","deviceMapping":false}' + echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"'$inputBucket'","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"TelemetryReplayJob","deviceMapping":false}' ;; "summary-replay") - echo '{"search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"SummaryReplayJob","deviceMapping":false}' + echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"SummaryReplayJob","deviceMapping":false}' ;; "content-rating-updater") echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.updater.UpdateContentRating","modelParams": {"startDate": "'$endDate'","endDate": "'$endDate'"},"output": [{"to":"console","params":{"printEvent":false}}],"parallelization": 8,"appName": "Content Rating Updater","deviceMapping": false}' @@ -118,25 +123,25 @@ config() { echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.ExperimentDefinitionModel","modelParams":{"sparkElasticsearchConnectionHost":"{{ lp_composite_search_host }}"},"output":[{"to":"elasticsearch","params":{"index":"experiment"}}],"parallelization":8,"appName":"Experiment-Definition","deviceMapping":false}' ;; "etb-metrics") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.ETBMetricsJob","modelParams":{"reportConfig":{"id":"etb_metrics","metrics":[],"labels":{"date":"Date","identifier":"Textbook ID","name":"Textbook Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","createdOn":"Created On","lastUpdatedOn":"Last Updated On","totalQRCodes":"Total number of QR codes","contentLinkedQR":"Number of QR codes with atleast 1 linked content","withoutContentQR":"Number of QR codes with no linked content","withoutContentT1":"Term 1 QR Codes with no linked content","withoutContentT2":"Term 2 QR Codes with no linked content","status":"Textbook Status","totalContentLinked":"Total content linked","totalQRLinked":"Total QR codes linked to content","totalQRNotLinked":"Total number of QR codes with no linked content","leafNodesCount":"Total number of leaf nodes","leafNodeUnlinked":"Number of leaf nodes with no content","l1Name":"Level 1 Name","l2Name":"Level 2 Name","l3Name":"Level 3 Name","l4Name":"Level 4 Name","l5Name":"Level 5 Name","dialcode":"QR Code","sum(scans)":"Total Scans","noOfContent":"Number of contents","nodeType":"Type of Node","term":"Term"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"dialcode_counts.csv","postContainer":"'$reportPostContainer'"}},"dialcodeReportConfig":{"id":"etb_metrics","metrics":[],"labels":{},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":1,"reportPath":"dialcode_counts.csv","rollupAge":"ACADEMIC_YEAR","rollupCol":"Date","rollupRange":10,"postContainer":"'$reportPostContainer'"}},"etbFileConfig":{"bucket":"'$reportPostContainer'","file":"dialcode_scans/dialcode_counts.csv"},"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"channel","aliasName":"channel"},{"fieldName":"identifier","aliasName":"identifier","type":"Extraction","outputType":"STRING","extractionFn":[{"type":"javascript","fn":"function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName":"name","aliasName":"name"},{"fieldName":"status","aliasName":"status"}],"filters":[{"type":"equals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live","Draft","Review"]}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"tenantConfig":{"tenantId":"","slugName":""},"store":"azure","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"ETB Metrics Model","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.ETBMetricsJob","modelParams":{"reportConfig":{"id":"etb_metrics","metrics":[],"labels":{"date":"Date","identifier":"Textbook ID","name":"Textbook Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","createdOn":"Created On","lastUpdatedOn":"Last Updated On","totalQRCodes":"Total number of QR codes","contentLinkedQR":"Number of QR codes with atleast 1 linked content","withoutContentQR":"Number of QR codes with no linked content","withoutContentT1":"Term 1 QR Codes with no linked content","withoutContentT2":"Term 2 QR Codes with no linked content","status":"Textbook Status","totalContentLinked":"Total content linked","totalQRLinked":"Total QR codes linked to content","totalQRNotLinked":"Total number of QR codes with no linked content","leafNodesCount":"Total number of leaf nodes","leafNodeUnlinked":"Number of leaf nodes with no content","l1Name":"Level 1 Name","l2Name":"Level 2 Name","l3Name":"Level 3 Name","l4Name":"Level 4 Name","l5Name":"Level 5 Name","dialcode":"QR Code","sum(scans)":"Total Scans","noOfContent":"Number of contents","nodeType":"Type of Node","term":"Term"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"dialcode_counts.csv","postContainer":"'$reportPostContainer'"}},"dialcodeReportConfig":{"id":"etb_metrics","metrics":[],"labels":{},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":1,"reportPath":"dialcode_counts.csv","rollupAge":"ACADEMIC_YEAR","rollupCol":"Date","rollupRange":10,"postContainer":"'$reportPostContainer'"}},"etbFileConfig":{"bucket":"'$reportPostContainer'","file":"dialcode_scans/dialcode_counts.csv"},"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"channel","aliasName":"channel"},{"fieldName":"identifier","aliasName":"identifier","type":"Extraction","outputType":"STRING","extractionFn":[{"type":"javascript","fn":"function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName":"name","aliasName":"name"},{"fieldName":"status","aliasName":"status"}],"filters":[{"type":"equals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live","Draft","Review"]}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"tenantConfig":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"ETB Metrics Model","deviceMapping":false}' ;; "course-enrollment-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.CourseEnrollmentJob","modelParams":{"reportConfig":{"id":"tpd_metrics","metrics":[],"labels":{"completionCount":"Completion Count","status":"Status","enrollmentCount":"Enrollment Count","courseName":"Course Name","batchName":"Batch Name"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"DAY","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"course_enrollment.csv"}},"esConfig":{"request":{"filters":{"objectType":["Content"],"contentType":["Course"],"identifier":[],"status":["Live"]},"limit":10000}},"store":"azure","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"TPD Course Enrollment Metrics Model","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.CourseEnrollmentJob","modelParams":{"reportConfig":{"id":"tpd_metrics","metrics":[],"labels":{"completionCount":"Completion Count","status":"Status","enrollmentCount":"Enrollment Count","courseName":"Course Name","batchName":"Batch Name"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"DAY","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"course_enrollment.csv"}},"esConfig":{"request":{"filters":{"objectType":["Content"],"contentType":["Course"],"identifier":[],"status":["Live"]},"limit":10000}},"store":"{{ dp_object_store_type }}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"TPD Course Enrollment Metrics Model","deviceMapping":false}' ;; "course-consumption-report") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.model.report.CourseConsumptionJob","modelParams": {"esConfig": {"request": {"filters": {"objectType": ["Content"],"contentType": ["Course"],"identifier": [],"status": ["Live"]}}},"reportConfig": {"id": "tpd_metrics","labels": {"date": "Date","status": "Batch Status","timespent": "Timespent in mins","courseName": "Course Name","batchName": "Batch Name"},"dateRange": {"staticInterval": "LastDay","granularity": "all"},"metrics": [{"metric": "totalCoursePlays","label": "Total Course Plays (in mins)","druidQuery": {"queryType": "groupBy","dataSource": "summary-events","intervals":"LastDay","aggregations": [{"name": "sum__edata_time_spent","type": "doubleSum","fieldName": "edata_time_spent"}],"dimensions": [{"fieldName": "object_rollup_l1","aliasName": "courseId"}, {"fieldName": "uid","aliasName": "userId"}, {"fieldName": "context_cdata_id","aliasName": "batchId"}],"filters": [{"type": "equals","dimension": "eid","value": "ME_WORKFLOW_SUMMARY"}, {"type": "in","dimension": "dimensions_pdata_id","values": ["'$producerEnv'.app", "'$producerEnv'.portal"]}, {"type": "equals","dimension": "dimensions_type","value": "content"}, {"type": "equals","dimension": "dimensions_mode","value": "play"}, {"type": "equals","dimension": "context_cdata_type","value": "batch"}],"postAggregation": [{"type": "arithmetic","name": "timespent","fields": {"leftField": "sum__edata_time_spent","rightField": 60,"rightFieldType": "constant"},"fn": "/"}],"descending": "false"}}],"output": [{"type": "csv","metrics": ["timespent"],"dims": []}],"queryType": "groupBy"},"store": "azure","format":"csv","key": "druid-reports/","filePath": "druid-reports/","container":"'$bucket'","folderPrefix": ["slug", "reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "TPD Course Consumption Metrics Model","deviceMapping": false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.CourseConsumptionJob","modelParams":{"esConfig":{"request":{"filters":{"objectType":["Content"],"contentType":["Course"],"identifier":[],"status":["Live"]}}},"reportConfig":{"id":"tpd_metrics","labels":{"date":"Date","status":"Batch Status","timespent":"Timespent in mins","courseName":"Course Name","batchName":"Batch Name"},"dateRange":{"staticInterval":"LastDay","granularity":"all"},"metrics":[{"metric":"totalCoursePlays","label":"Total Course Plays (in mins)","druidQuery":{"queryType":"groupBy","dataSource":"summary-events","intervals":"LastDay","aggregations":[{"name":"sum__edata_time_spent","type":"doubleSum","fieldName":"edata_time_spent"}],"dimensions":[{"fieldName":"object_rollup_l1","aliasName":"courseId"},{"fieldName":"uid","aliasName":"userId"},{"fieldName":"context_cdata_id","aliasName":"batchId"}],"filters":[{"type":"equals","dimension":"eid","value":"ME_WORKFLOW_SUMMARY"},{"type":"in","dimension":"dimensions_pdata_id","values":["'$producerEnv'.app","'$producerEnv'.portal"]},{"type":"equals","dimension":"dimensions_type","value":"content"},{"type":"equals","dimension":"dimensions_mode","value":"play"},{"type":"equals","dimension":"context_cdata_type","value":"batch"}],"postAggregation":[{"type":"arithmetic","name":"timespent","fields":{"leftField":"sum__edata_time_spent","rightField":60,"rightFieldType":"constant"},"fn":"/"}],"descending":"false"}}],"output":[{"type":"csv","metrics":["timespent"],"dims":[]}],"queryType":"groupBy"},"store":"{{ dp_object_store_type }}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"TPD Course Consumption Metrics Model","deviceMapping":false}' ;; "textbook-progress-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.TextBookProgressModel","modelParams":{"reportConfig":{"id":"content_progress_metrics","metrics":[],"labels":{"board":"Board","medium":"Medium","gradeLevel":"Grade","subject":"Subject","resourceType":"Content Type","totalContent": "Total Contents","live":"Live","review":"Review","draft":"Draft","unlisted":"Limited Sharing","application_ecml":"Created on Diksha","video_youtube":"YouTube Content","video_mp4":"Uploaded Videos","application_pdf":"Text Content","application_html":"Uploaded Interactive Content","identifier":"Content ID","creator":"Created By","createdOn":"Creation Date","lastPublishDate":"Last Publish Date","status":"Status","pkgVersion":"Number of times Published","lastPublishedOn":"Pending in current status since","pendingInCurrentStatus":"Pending in current status since"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"content_progress_metrics.csv","postContainer":"'$reportPostContainer'"}},"filter":{"tenantId":"","slugName":""},"store":"azure","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Textbook Progress Metrics Model","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.TextBookProgressModel","modelParams":{"reportConfig":{"id":"content_progress_metrics","metrics":[],"labels":{"board":"Board","medium":"Medium","gradeLevel":"Grade","subject":"Subject","resourceType":"Content Type","totalContent":"Total Contents","live":"Live","review":"Review","draft":"Draft","unlisted":"Limited Sharing","application_ecml":"Created on Diksha","video_youtube":"YouTube Content","video_mp4":"Uploaded Videos","application_pdf":"Text Content","application_html":"Uploaded Interactive Content","identifier":"Content ID","creator":"Created By","createdOn":"Creation Date","lastPublishDate":"Last Publish Date","status":"Status","pkgVersion":"Number of times Published","lastPublishedOn":"Pending in current status since","pendingInCurrentStatus":"Pending in current status since"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"content_progress_metrics.csv","postContainer":"'$reportPostContainer'"}},"filter":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Textbook Progress Metrics Model","deviceMapping":false}' ;; "audit-metrics-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.MetricsAuditJob","modelParams":{"auditConfig":[{"name":"denorm","search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"telemetry-denormalized/raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]},"filters":[{"name":"flags.user_data_retrieved","operator":"EQ","value":true},{"name":"flags.content_data_retrieved","operator":"EQ","value":true},{"name":"flags.device_data_retrieved","operator":"EQ","value":true},{"name":"flags.dialcode_data_retrieved","operator":"EQ","value":true},{"name":"flags.collection_data_retrieved","operator":"EQ","value":true},{"name":"flags.derived_location_retrieved","operator":"EQ","value":true}]},{"name":"failed","search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"failed/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"unique","search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"unique/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"raw","search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"channel-raw","search":{"type":"azure","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/raw/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"channel-summary","search":{"type":"azure","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/summary/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"derived","search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"telemetry-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"telemetry-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}},{"name":"summary-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"summary-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}}]},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$metricsTopic'"}}],"parallelization":8,"appName":"Metrics Audit"}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.MetricsAuditJob","modelParams":{"auditConfig":[{"name":"denorm","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"telemetry-denormalized/raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]},"filters":[{"name":"flags.user_data_retrieved","operator":"EQ","value":true},{"name":"flags.content_data_retrieved","operator":"EQ","value":true},{"name":"flags.device_data_retrieved","operator":"EQ","value":true},{"name":"flags.dialcode_data_retrieved","operator":"EQ","value":true},{"name":"flags.collection_data_retrieved","operator":"EQ","value":true},{"name":"flags.derived_location_retrieved","operator":"EQ","value":true}]},{"name":"failed","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"failed/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"unique","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"channel-raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/raw/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"channel-summary","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/summary/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"derived","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"telemetry-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"telemetry-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}},{"name":"summary-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"summary-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}}]},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$metricsTopic'"}}],"parallelization":8,"appName":"Metrics Audit"}' ;; "sourcing-metrics") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","primaryCategory":"Collection Category","identifier": "Collection ID","name": "Collection Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Folder Name","totalChapters": "Total number of first level folders","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "in","dimension": "primaryCategory","values": ["Digital Textbook", "Course", "Content Playlist","Question paper","Question Paper"]}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "azure","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.sourcing.SourcingMetrics","modelParams":{"reportConfig":{"id":"textbook_report","metrics":[],"labels":{"date":"Date","primaryCategory":"Collection Category","identifier":"Collection ID","name":"Collection Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","createdOn":"Created On","lastUpdatedOn":"Last Updated On","reportDate":"Report generation date","board":"Board","grade":"Grade","chapters":"Folder Name","totalChapters":"Total number of first level folders","status":"Textbook Status"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]},{"type":"json","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}]},"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"channel","aliasName":"channel"},{"fieldName":"identifier","aliasName":"identifier","type":"Extraction","outputType":"STRING","extractionFn":[{"type":"javascript","fn":"function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName":"name","aliasName":"name"},{"fieldName":"createdFor","aliasName":"createdFor"},{"fieldName":"createdOn","aliasName":"createdOn"},{"fieldName":"lastUpdatedOn","aliasName":"lastUpdatedOn"},{"fieldName":"board","aliasName":"board"},{"fieldName":"medium","aliasName":"medium"},{"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName":"gradeLevel","aliasName":"gradeLevel"},{"fieldName":"subject","aliasName":"subject"},{"fieldName":"status","aliasName":"status"}],"filters":[{"type":"in","dimension":"primaryCategory","values":["Digital Textbook","Course","Content Playlist","Question paper","Question Paper"]},{"type":"in","dimension":"status","values":["Live"]}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"store":"{{ dp_object_store_type }}","storageContainer":"'$reportPostContainer'","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageEndpoint":"{{dp_storage_endpoint_config}}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$reportPostContainer'","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Textbook Report Job","deviceMapping":false}' ;; "druid-dataset") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.OnDemandDruidExhaustJob","modelParams":{"store":"azure","container":"'$reportPostContainer'","key":"ml_reports/","format":"csv"},"output":[{"to": "console","params": {"printEvent": false}}],"parallelization":8,"appName":"ML Druid Data Model"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.OnDemandDruidExhaustJob","modelParams":{"store":"{{ dp_object_store_type }}","container":"'$reportPostContainer'","key":"ml_reports/","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","format":"csv"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"ML Druid Data Model"}' ;; "*") echo "Unknown model code" diff --git a/ansible/roles/data-products-deploy/templates/model-config.json.j2 b/ansible/roles/data-products-deploy/templates/model-config.json.j2 index 4594a1978a..bb170b2193 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.json.j2 @@ -1,7 +1,7 @@ { "wfs": { "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", @@ -20,11 +20,16 @@ "model": "org.ekstep.analytics.model.WorkflowSummary", "modelParams": { "apiVersion": "v2", - "parallelization": 32 + "parallelization": 32, + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", + "store":"{{ dp_object_store_type }}" }, "output": [ { - "to": "azure", + "to": "{{dp_object_store_type}}", "params": { "bucket": "{{ bucket }}", "key": "{{ job_manager_tmp_dir }}/wfs/$(date --date yesterday '+%Y-%m-%d')" @@ -44,7 +49,7 @@ }, "video-streaming": { "search": { - "type": "azure" + "type": "{{dp_object_store_type}}" }, "model": "org.ekstep.analytics.job.VideoStreamingJob", "modelParams": { @@ -68,6 +73,11 @@ }, "model": "org.ekstep.analytics.job.report.StateAdminReportJob", "modelParams": { + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", + "store":"{{ dp_object_store_type }}", "sparkCassandraConnectionHost": "{{core_cassandra_host}}", "sparkElasticsearchConnectionHost": "{{sunbird_es_host}}" }, @@ -89,6 +99,11 @@ }, "model": "org.ekstep.analytics.job.report.StateAdminGeoReportJob", "modelParams": { + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", + "store":"{{ dp_object_store_type }}", "sparkCassandraConnectionHost": "{{core_cassandra_host}}", "sparkElasticsearchConnectionHost": "{{sunbird_es_host}}" }, @@ -110,6 +125,11 @@ }, "model": "org.ekstep.analytics.updater.UpdateContentRating", "modelParams": { + "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "startDate": "$(date --date yesterday '+%Y-%m-%d')", "endDate": "$(date '+%Y-%m-%d')" }, @@ -139,6 +159,11 @@ "pushMetrics": true, "brokerList": "{{ brokerlist }}", "topic": "{{ analytics_metrics_topic }}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", + "store":"{{ dp_object_store_type }}", "model": [ { "model": "WorkFlowSummaryModel", @@ -201,6 +226,11 @@ }, "model": "org.ekstep.analytics.model.ExperimentDefinitionModel", "modelParams": { + "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "sparkElasticsearchConnectionHost": "{{ lp_composite_search_host }}" }, "output": [ @@ -297,7 +327,11 @@ "tenantId": "", "slugName": "" }, - "store": "azure", + "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "format": "csv", "key": "druid-reports/", "filePath": "druid-reports/", @@ -346,7 +380,11 @@ "limit": 10000 } }, - "store": "azure", + "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "format":"csv", "key": "druid-reports/", "filePath": "druid-reports/", @@ -457,7 +495,11 @@ }], "queryType": "groupBy" }, - "store": "azure", + "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "format":"csv", "key": "druid-reports/", "filePath": "druid-reports/", @@ -482,11 +524,16 @@ }, "model": "org.ekstep.analytics.model.MetricsAuditJob", "modelParams": { + "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "auditConfig": [ { "name": "denorm", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", @@ -532,7 +579,7 @@ { "name": "failed", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", @@ -546,7 +593,7 @@ { "name": "unique", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", @@ -560,7 +607,7 @@ { "name": "raw", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", @@ -574,7 +621,7 @@ { "name": "channel-raw", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "folder": true, @@ -589,7 +636,7 @@ { "name": "channel-summary", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "folder": true, @@ -604,7 +651,7 @@ { "name": "derived", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", diff --git a/ansible/roles/data-products-deploy/templates/model-dock-config.j2 b/ansible/roles/data-products-deploy/templates/model-dock-config.j2 index 20d82dbfb5..f720f4687e 100644 --- a/ansible/roles/data-products-deploy/templates/model-dock-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-dock-config.j2 @@ -15,16 +15,16 @@ config() { if [ ! -z "$3" ]; then inputBucket=$3; fi case "$1" in "content-details") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.sourcing.ContentDetailsReport","modelParams":{"tenantId":"","slug":"","reportConfig":{"id":"content_report","metrics":[],"labels":{"programName":"Project Name","programId":"Project ID","contentId":"Content/Question ID","contentName":"Content/Question Name","mimeType":"MimeType","chapterId":"Folder ID","contentStatus":"Content/Question Status","creator":"Creator Name","createdBy":"CreatedBy ID","date":"Date","identifier":"Collection/Question Set ID","name":"Collection/Question Set Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","board":"Board","grade":"Grade","chapters":"Chapter Name","status":"Textbook Status","objectType":"Object Type","primaryCategory":"Primary category","topic":"Topic","learningOutcome":"Learning Outcome","addedFromLibrary":"Added from library","contentType":"Content Type"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]},{"type":"json","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}]},"contentQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"unitIdentifiers","aliasName":"unitIdentifiers"},{"fieldName":"collectionId","aliasName":"collectionId"},{"fieldName":"createdBy","aliasName":"createdBy"},{"fieldName":"creator","aliasName":"creator"},{"fieldName":"mimeType","aliasName":"mimeType"},{"fieldName":"topic","aliasName":"topic"},{"fieldName":"learningOutcome","aliasName":"learningOutcome"},{"fieldName":"primaryCategory","aliasName":"contentType"}],"filters":[{"type":"notequals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live"]},{"type":"isnotnull","dimension":"collectionId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"textbookQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"programId","aliasName":"programId"},{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"board","aliasName":"board"},{"fieldName":"medium","aliasName":"medium"},{"fieldName":"gradeLevel","aliasName":"gradeLevel"},{"fieldName":"subject","aliasName":"subject"},{"fieldName":"status","aliasName":"status"},{"fieldName":"acceptedContents","aliasName":"acceptedContents"},{"fieldName":"acceptedContributions","aliasName":"acceptedContributions"},{"fieldName":"rejectedContents","aliasName":"rejectedContents"},{"fieldName":"rejectedContributions","aliasName":"rejectedContributions"},{"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName":"objectType","aliasName":"objectType"},{"fieldName":"reusedContributions","aliasName":"reusedContributions"}],"filters":[{"type":"in","dimension":"primaryCategory","values":["Digital Textbook","Course","Content Playlist","Question paper","Question Paper","Exam Question Set","Practice Set","Demo Practice Question Set"]},{"type":"isnotnull","dimension":"programId"},{"type":"in","dimension":"status","values":["Draft"]},{"type":"equals","dimension":"channel","value":"channelId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"store":"azure","storageKeyConfig":"azure_storage_key","storageSecretConfig":"azure_storage_secret","storageContainer":"'$reportPostContainer'","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$reportPostContainer'","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Content Report Job","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.sourcing.ContentDetailsReport","modelParams":{"tenantId":"","slug":"","reportConfig":{"id":"content_report","metrics":[],"labels":{"programName":"Project Name","programId":"Project ID","contentId":"Content/Question ID","contentName":"Content/Question Name","mimeType":"MimeType","chapterId":"Folder ID","contentStatus":"Content/Question Status","creator":"Creator Name","createdBy":"CreatedBy ID","date":"Date","identifier":"Collection/Question Set ID","name":"Collection/Question Set Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","board":"Board","grade":"Grade","chapters":"Chapter Name","status":"Textbook Status","objectType":"Object Type","primaryCategory":"Primary category","topic":"Topic","learningOutcome":"Learning Outcome","addedFromLibrary":"Added from library","contentType":"Content Type"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]},{"type":"json","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}]},"contentQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"unitIdentifiers","aliasName":"unitIdentifiers"},{"fieldName":"collectionId","aliasName":"collectionId"},{"fieldName":"createdBy","aliasName":"createdBy"},{"fieldName":"creator","aliasName":"creator"},{"fieldName":"mimeType","aliasName":"mimeType"},{"fieldName":"topic","aliasName":"topic"},{"fieldName":"learningOutcome","aliasName":"learningOutcome"},{"fieldName":"primaryCategory","aliasName":"contentType"}],"filters":[{"type":"notequals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live"]},{"type":"isnotnull","dimension":"collectionId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"textbookQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"programId","aliasName":"programId"},{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"board","aliasName":"board"},{"fieldName":"medium","aliasName":"medium"},{"fieldName":"gradeLevel","aliasName":"gradeLevel"},{"fieldName":"subject","aliasName":"subject"},{"fieldName":"status","aliasName":"status"},{"fieldName":"acceptedContents","aliasName":"acceptedContents"},{"fieldName":"acceptedContributions","aliasName":"acceptedContributions"},{"fieldName":"rejectedContents","aliasName":"rejectedContents"},{"fieldName":"rejectedContributions","aliasName":"rejectedContributions"},{"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName":"objectType","aliasName":"objectType"},{"fieldName":"reusedContributions","aliasName":"reusedContributions"}],"filters":[{"type":"in","dimension":"primaryCategory","values":["Digital Textbook","Course","Content Playlist","Question paper","Question Paper","Exam Question Set","Practice Set","Demo Practice Question Set"]},{"type":"isnotnull","dimension":"programId"},{"type":"in","dimension":"status","values":["Draft"]},{"type":"equals","dimension":"channel","value":"channelId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"store":"{{dp_object_store_type}}","storageKeyConfig":"azure_storage_key","storageSecretConfig":"azure_storage_secret","storageContainer":"'$reportPostContainer'","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$reportPostContainer'","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Content Report Job","deviceMapping":false}' ;; "sourcing-summary-report") - echo '{"search": {"type": "none"}, "model": "org.ekstep.analytics.job.report.SourcingSummaryReport", "modelParams": {"storageKeyConfig":"druid_storage_account_key", "storageSecretConfig":"druid_storage_account_secret", "dataSource": "sourcing-summary-snapshot", "druidHost": "'$druidRollupHost'", "druidSegmentUrl":"'$druidRollupHost'/druid/coordinator/v1/metadata/datasources/sourcing-model-snapshot/segments", "deleteSegmentUrl": "'$druidRollupHost'/druid/coordinator/v1/datasources/sourcing-model-snapshot/segments/", "druidIngestionUrl": "'$druidRollupHost'/druid/indexer/v1/task", "specPath": "/mount/data/analytics/scripts/sourcing-ingestion-spec.json", "dbName": "opensaberdb", "tables": {"programTable": "program", "nominationTable": "nomination"}, "druidQuery": {"queryType": "groupBy", "dataSource": "vdn-content-model-snapshot", "intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00", "aggregations": [{"name": "count", "type": "count"}], "dimensions": [{"fieldName": "primaryCategory", "aliasName": "primaryCategory"}, {"fieldName": "createdBy", "aliasName": "createdBy"}], "filters": [{"type": "equals", "dimension": "objectType", "value": "Content"}, {"type": "equals", "dimension": "sampleContent", "value": "false"}], "postAggregation": [], "descending": "false", "limitSpec": {"type": "default", "limit": 1000000, "columns": [{"dimension": "count", "direction": "descending"}]}}, "reportConfig": {"id": "sourcing", "metrics": [], "labels": {}, "output": [{"type": "json", "dims": ["identifier", "channel", "name"], "fileParameters": ["id", "dims"]}]}, "store": "azure", "format": "json", "folderPrefix": ["slug", "reportName"]}, "output": [{"to": "console", "params": {"printEvent": false}}], "parallelization": 8, "appName": "Sourcing Summary Report Job", "deviceMapping": false}' + echo '{"search": {"type": "none"}, "model": "org.ekstep.analytics.job.report.SourcingSummaryReport", "modelParams": {"storageKeyConfig":"druid_storage_account_key", "storageSecretConfig":"druid_storage_account_secret", "dataSource": "sourcing-summary-snapshot", "druidHost": "'$druidRollupHost'", "druidSegmentUrl":"'$druidRollupHost'/druid/coordinator/v1/metadata/datasources/sourcing-model-snapshot/segments", "deleteSegmentUrl": "'$druidRollupHost'/druid/coordinator/v1/datasources/sourcing-model-snapshot/segments/", "druidIngestionUrl": "'$druidRollupHost'/druid/indexer/v1/task", "specPath": "/mount/data/analytics/scripts/sourcing-ingestion-spec.json", "dbName": "opensaberdb", "tables": {"programTable": "program", "nominationTable": "nomination"}, "druidQuery": {"queryType": "groupBy", "dataSource": "vdn-content-model-snapshot", "intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00", "aggregations": [{"name": "count", "type": "count"}], "dimensions": [{"fieldName": "primaryCategory", "aliasName": "primaryCategory"}, {"fieldName": "createdBy", "aliasName": "createdBy"}], "filters": [{"type": "equals", "dimension": "objectType", "value": "Content"}, {"type": "equals", "dimension": "sampleContent", "value": "false"}], "postAggregation": [], "descending": "false", "limitSpec": {"type": "default", "limit": 1000000, "columns": [{"dimension": "count", "direction": "descending"}]}}, "reportConfig": {"id": "sourcing", "metrics": [], "labels": {}, "output": [{"type": "json", "dims": ["identifier", "channel", "name"], "fileParameters": ["id", "dims"]}]}, "store": "{{dp_object_store_type}}", "format": "json", "folderPrefix": ["slug", "reportName"]}, "output": [{"to": "console", "params": {"printEvent": false}}], "parallelization": 8, "appName": "Sourcing Summary Report Job", "deviceMapping": false}' ;; "funnel-report") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.job.report.FunnelReport","modelParams": {"contributionConfig": {"contentRequest": {"request": {"filters": {"programId": "programIdentifier","objectType": "content","status": ["Draft", "Live", "Review"],"mimeType": "application/vnd.ekstep.content-collection"},"fields": ["acceptedContents", "rejectedContents"],"limit": 10000}},"correctionsPendingRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": "Draft","prevStatus": "Live","programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}},"contributionRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": ["Live"],"programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}}},"reportConfig": {"id": "funnel_report","metrics": [],"labels": {"reportDate": "Report generation date","visitors": "No. of users opening the project","projectName": "Project Name","initiatedNominations": "No. of initiated nominations","rejectedNominations": "No. of rejected nominations","pendingNominations": "No. of nominations pending review","acceptedNominations": "No. of accepted nominations to the project","noOfContributors": "No. of contributors to the project","noOfContributions": "No. of contributions to the project","pendingContributions": "No. of contributions pending review","approvedContributions": "No. of approved contributions"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"store": "azure","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","folderPrefix": ["slug", "reportName"]},"sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","druidConfig": {"queryType": "timeseries","dataSource": "telemetry-events-syncts","intervals": "startdate/enddate","aggregations": [{"name": "visitors","type": "count","fieldName": "actor_id"}],"filters": [{"type": "equals","dimension": "context_cdata_id","value": "program_id"}, {"type": "equals","dimension": "edata_pageid","value": "contribution_project_contributions"}, {"type": "equals","dimension": "context_pdata_pid","value": "creation-portal.programs"}, {"type": "equals","dimension": "context_cdata_type","value": "project"}, {"type": "equals","dimension": "context_env","value": "creation-portal"}, {"type": "equals","dimension": "eid","value": "IMPRESSION"}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Funnel Report Job","deviceMapping": false}' + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.job.report.FunnelReport","modelParams": {"contributionConfig": {"contentRequest": {"request": {"filters": {"programId": "programIdentifier","objectType": "content","status": ["Draft", "Live", "Review"],"mimeType": "application/vnd.ekstep.content-collection"},"fields": ["acceptedContents", "rejectedContents"],"limit": 10000}},"correctionsPendingRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": "Draft","prevStatus": "Live","programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}},"contributionRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": ["Live"],"programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}}},"reportConfig": {"id": "funnel_report","metrics": [],"labels": {"reportDate": "Report generation date","visitors": "No. of users opening the project","projectName": "Project Name","initiatedNominations": "No. of initiated nominations","rejectedNominations": "No. of rejected nominations","pendingNominations": "No. of nominations pending review","acceptedNominations": "No. of accepted nominations to the project","noOfContributors": "No. of contributors to the project","noOfContributions": "No. of contributions to the project","pendingContributions": "No. of contributions pending review","approvedContributions": "No. of approved contributions"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"store": "{{dp_object_store_type}}","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","folderPrefix": ["slug", "reportName"]},"sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","druidConfig": {"queryType": "timeseries","dataSource": "telemetry-events-syncts","intervals": "startdate/enddate","aggregations": [{"name": "visitors","type": "count","fieldName": "actor_id"}],"filters": [{"type": "equals","dimension": "context_cdata_id","value": "program_id"}, {"type": "equals","dimension": "edata_pageid","value": "contribution_project_contributions"}, {"type": "equals","dimension": "context_pdata_pid","value": "creation-portal.programs"}, {"type": "equals","dimension": "context_cdata_type","value": "project"}, {"type": "equals","dimension": "context_env","value": "creation-portal"}, {"type": "equals","dimension": "eid","value": "IMPRESSION"}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Funnel Report Job","deviceMapping": false}' ;; "sourcing-metrics") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","identifier": "Textbook ID","name": "Textbook Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Chapter Name","totalChapters": "Total number of chapters (first level sections of ToC)","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "equals","dimension": "contentType","value": "TextBook"}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "azure","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","identifier": "Textbook ID","name": "Textbook Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Chapter Name","totalChapters": "Total number of chapters (first level sections of ToC)","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "equals","dimension": "contentType","value": "TextBook"}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "{{dp_object_store_type}}","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' ;; "*") echo "Unknown model code" diff --git a/ansible/roles/data-products-deploy/templates/sourcing-ingestion-spec.j2 b/ansible/roles/data-products-deploy/templates/sourcing-ingestion-spec.j2 new file mode 100644 index 0000000000..41bb51afba --- /dev/null +++ b/ansible/roles/data-products-deploy/templates/sourcing-ingestion-spec.j2 @@ -0,0 +1,151 @@ +{ + "type": "index", + "spec": { + "dataSchema": { + "dataSource": "sourcing-summary-snapshot", + "parser": { + "type": "string", + "parseSpec": { + "format": "json", + "flattenSpec": { + "useFieldDiscovery": false, + "fields": [ + { + "type": "root", + "name": "program_id", + "expr": "program_id" + }, + { + "type": "root", + "name": "status", + "expr": "status" + }, + { + "type": "root", + "name": "rootorg_id", + "expr": "rootorg_id" + }, + { + "type": "root", + "name": "user_id", + "expr": "user_id" + }, + { + "type": "root", + "name": "osid", + "expr": "osid" + }, + { + "type": "root", + "name": "user_type", + "expr": "user_type" + }, + { + "type": "root", + "name": "contributor_id", + "expr": "contributor_id" + }, + { + "type": "root", + "name": "total_contributed_content", + "expr": "total_contributed_content" + }, + { + "type": "root", + "name": "primary_category", + "expr": "primary_category" + }, + { + "type": "root", + "name": "created_by", + "expr": "created_by" + } + ] + }, + "dimensionsSpec": { + "dimensions": [ + { + "type": "string", + "name": "program_id" + }, + { + "type": "string", + "name": "status" + }, + { + "type": "string", + "name": "rootorg_id" + }, + { + "type": "string", + "name": "user_id" + }, + { + "type": "string", + "name": "osid" + }, + { + "type": "string", + "name": "user_type" + }, + { + "type": "string", + "name": "contributor_id" + }, + { + "type": "string", + "name": "primary_category" + }, + { + "type": "string", + "name": "created_by" + } + ], + "dimensionsExclusions": [] + }, + "timestampSpec": { + "column": "timestamp", + "format": "auto" + } + } + }, + "metricsSpec": [ + { + "name": "total_count", + "type": "count" + } + ], + "granularitySpec": { + "type": "uniform", + "segmentGranularity": "day", + "queryGranularity": "none", + "rollup": true + } + }, + "ioConfig": { + "type": "index", + "firehose": { +{% if dp_object_store_type == "azure" %} + "type": "static-azure-blobstore", + "blobs": [ + { + "container": "{{reports_container}}", + "path": "/sourcing/SourcingSummaryReport.json" + } + ], +{% elif (dp_object_store_type == "oci") %} + "type": "static-s3", + "uris": [ "s3://{{reports_container}}/sourcing/SourcingSummaryReport.json"], +{% endif %} + "fetchTimeout": 300000 + } + }, + "tuningConfig": { + "type": "index", + "targetPartitionSize": 5000000, + "maxRowsInMemory": 25000, + "forceExtendableShardSpecs": false, + "logParseExceptions": true + } + } +} diff --git a/ansible/roles/data-products-deploy/templates/submit-script.j2 b/ansible/roles/data-products-deploy/templates/submit-script.j2 index e8341dc1e8..0e629a4ce8 100644 --- a/ansible/roles/data-products-deploy/templates/submit-script.j2 +++ b/ansible/roles/data-products-deploy/templates/submit-script.j2 @@ -1,6 +1,7 @@ #!/usr/bin/env bash ## Job to run daily + cd "{{ analytics_cluster.home }}" source model-config.sh today=$(date "+%Y-%m-%d") @@ -79,7 +80,15 @@ submit_cluster_job() { requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody - response=$(curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: admin_name }}") +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_bds_url }}:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} echo "Submitted job for batchNumer $i below is the response" echo $response } @@ -118,7 +127,15 @@ if [ "$mode" = "via-partition" ]; then requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_bds_url }}:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} done elif [ "$mode" = "parallel-jobs" ]; then @@ -157,8 +174,15 @@ elif [ "$mode" = "selected-partition" ]; then requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" - +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_bds_url }}:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} else if [ -z "$start_date" ]; then echo "Running $job without partition via run-job." @@ -179,5 +203,14 @@ else requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_bds_url }}:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} + fi diff --git a/ansible/roles/oci-bds-spark-cluster/defaults/main.yml b/ansible/roles/oci-bds-spark-cluster/defaults/main.yml new file mode 100644 index 0000000000..95b0b73e0d --- /dev/null +++ b/ansible/roles/oci-bds-spark-cluster/defaults/main.yml @@ -0,0 +1,7 @@ + +spark-folder: /usr/hdp/current/spark2-client +guava_version: 19.0 +log4j_version: 2.5 +guava_url: https://repo1.maven.org/maven2/com/google/guava/guava/{{guava_version}}/guava-{{guava_version}}.jar +log4j_core_url: https://repo1.maven.org/maven2/org/apache/logging/log4j/log4j-core/{{log4j_version}}/log4j-core-{{log4j_version}}.jar +log4j_api_url: https://repo1.maven.org/maven2/org/apache/logging/log4j/log4j-api/{{log4j_version}}/log4j-api-{{log4j_version}}.jar diff --git a/ansible/roles/oci-bds-spark-cluster/tasks/main.yml b/ansible/roles/oci-bds-spark-cluster/tasks/main.yml new file mode 100644 index 0000000000..d8f4d3cc50 --- /dev/null +++ b/ansible/roles/oci-bds-spark-cluster/tasks/main.yml @@ -0,0 +1,13 @@ +- name: copy cluster creation script + template: + src: create-cluster.sh.j2 + dest: /tmp/create-cluster.sh + mode: 0755 + when: cluster_state == "create_cluster" + +- name: copy cluster deletion script + template: + src: delete-cluster.sh.j2 + dest: /tmp/delete-cluster.sh + mode: 0755 + when: cluster_state == "delete_cluster" diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 new file mode 100644 index 0000000000..161b5f3ecf --- /dev/null +++ b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 @@ -0,0 +1,320 @@ +#! /bin/bash +# Subnet id will generate from env variable +# Version 1 Author Nikesh Gogia and Ali Shemshadi + +ambari_user="${1}" +cluster_password="${2}" +key_alias="{{key_alias}}" +user_id="{{user_id}}" +subnet="{{subnet_id}}" +compartment_id="{{compartment_id}}" +display_name="{{display_name}}" +workernode="{{workernode}}" +cluster_public_key="{{public_key}}" + +cstate='SUCCEEDED' +cwait=2500 + +echo "RECEIVED ALL ENV VARIABLES" + +AMBARI_USER=$ambari_user +AMBARI_PWD=$cluster_password + +function get_bdsid() { + list_param=`oci bds instance list --compartment-id $compartment_id` + bdsid="NULL" + # echo $list_param | jq '.data' + state="ACTIVE" + disname="NULL" + for k in $(jq '.data | keys | .[]' <<< "$list_param"); do + # echo $k + cstate=`echo $list_param | jq -r '.data['$k']["lifecycle-state"]'` + if [ $cstate = $state ]; then + disname=`echo $list_param | jq -r '.data['$k']["display-name"]'` + if [ $disname = $display_name ]; then + bdsid=`echo $list_param | jq -r '.data['$k']["id"]'` + fi + + fi + echo "BDS ID" + echo $bdsid + done +} + +function getLivyip() { + + export bds_instance_id=$bdsid + bdsjson=$(oci bds instance get --bds-instance-id $bds_instance_id) + # echo "AMBARI URL" + ambari_url=`echo $bdsjson | jq -r '.data["cluster-details"]["ambari-url"]'` + # echo $ambari_url + livyip="NULL" + cnode="UTILITY" + for k in $(jq '.data["nodes"] | keys | .[]' <<< "$bdsjson"); do + node=`echo $bdsjson | jq -r '.data["nodes"]['$k']["node-type"]'` + if [ $node = "$cnode" ]; then + livyip=`echo $bdsjson | jq -r '.data["nodes"]['$k']["ip-address"]'` + fi + done + echo "LIVY IP" + echo $livyip + +} + +getlivyclustername() { + cdet=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$livyip:7183/api/v1/clusters/) + echo $cdet + for k in $(jq '.items | keys | .[]' <<< "$cdet"); do + # echo $k + cluster_name=`echo $cdet | jq -r '.items['$k']["Clusters"]["cluster_name"]'` + echo $cluster_name + done + echo "CLUSTER NAME" + +} + +function get_apidetails() { + + export bds_instance_id=$bdsid + + listapijson=$(oci bds bds-api-key list --bds-instance-id $bds_instance_id) + + #echo $listapijson | jq '.data[1]["key-alias"]' + id="NULL" + ctype="ACTIVE" + for k in $(jq '.data | keys | .[]' <<< "$listapijson"); do + type=`echo $listapijson | jq -r '.data['$k']["lifecycle-state"]'` + if [ $type = "$ctype" ]; then + id=`echo $listapijson | jq -r '.data['$k']["id"]'` + fi + done + + echo $id + + export api_key_id=$id + + list_api=`oci bds bds-api-key get --api-key-id $api_key_id --bds-instance-id $bds_instance_id ` + + #echo $list_api | jq '.data' + + data=`echo $list_api | jq '.data'` + echo "API DETAILS" + echo $data + region=`echo $list_api | jq -r '.data["default-region"]'` + fingerprint=`echo $list_api | jq -r '.data["fingerprint"]'` + keyalias=`echo $list_api | jq -r '.data["key-alias"]'` + lifecyc=`echo $list_api | jq -r '.data["lifecycle-state"]'` + tm=`echo $list_api | jq -r '.data["time-created"]'` + usid=`echo $list_api | jq -r '.data["user-id"]'` + tenid=`echo $list_api | jq -r '.data["tenant-id"]'` + pemfilepath=`echo $list_api | jq -r '.data["pemfilepath"]'` + +} + +function update_bds_config(){ + #change below variables for your cluster + CONFIG_FILE_TO_UPDATE="" + + #Used when for restarting components after config update + #Wait time before we poll for restart status. Default 30 seconds. Meaning, We poll for restart status every 30 seconds + WAIT_TIME_IN_SEC=30 + + #No of tries before we give up on the restart status. Default 20. With default WAIT_TIME_IN_SEC as 30, At max we wait for 10(20*30=600 seconds) minutes before we give up. + RETRY_COUNT=20 + + #INTERNAL USE ONLY + propObj="" + + get_apidetails + getUtilityNodesIps=$livyip + getlivyclustername + echo $getUtilityNodesIps + getClusterName=$cluster_name + for utilityNodeIp in $getUtilityNodesIps + do + echo "Current utility node ip: $utilityNodeIp" + str1=$(nslookup $utilityNodeIp | awk -v var=$utilityNodeIp '/name =/{print var "\t", $4}') + CONFIG_FILE_TO_UPDATE="core-site" #this is the file we're updating in this example + propObj=$(get_property_json) + echo $propObj + echo "calling add properties" + + #update key value pairs. Multiple key value pairs can be updated before doing update_ambari_config + add_properties "fs.oci.client.auth.fingerprint" $fingerprint + add_properties "fs.oci.client.auth.passphrase" $cluster_password + add_properties "fs.oci.client.auth.pemfilepath" $pemfilepath + add_properties "fs.oci.client.auth.tenantId" $tenid + add_properties "fs.oci.client.auth.userId" $usid + add_properties "fs.oci.client.regionCodeOrId" $region + add_properties "fs.s3.buffer.dir" /tmp + #Update it to ambari + echo "updating ambari config" + update_ambari_config + + echo "restarting all required components" + restart_required_components + + done + +} + + +#Method to collect the current config +function get_property_json(){ + allConfs=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName?fields=Clusters/desired_configs) #to get all the configs + currVersionLoc=".Clusters.desired_configs.\"$CONFIG_FILE_TO_UPDATE\".tag" #fetching current version for property + propVersion=$(echo $allConfs | jq $currVersionLoc | tr -d '"') + propJson=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X GET "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/configurations?type=$CONFIG_FILE_TO_UPDATE&tag=$propVersion") #fetch property json + propLoc=".items[].properties" + propKeyVal=$(echo $propJson | jq $propLoc) + propObj="{\"properties\":$propKeyVal}" + echo $propObj +} + +#Method to add/update key value pair to existing config +function add_properties(){ + echo $1 $2 + echo $propObj + propObj=$(echo $propObj | jq '.properties += { "'$1'": "'$2'" }') + echo $propObj +} + +#Method to update config in ambari +function update_ambari_config(){ + parseableAddedProp=$(echo $propObj | jq '.properties') + echo $parseableAddedProp + timestamp=$(date +%s) + newVersion="version$timestamp" + finalJson='[{"Clusters":{"desired_config":[{"type":"'$CONFIG_FILE_TO_UPDATE'","tag":"'$newVersion'","properties":'$parseableAddedProp'}]}}]' + echo "CALING AMABRI API" + response_body_amb=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X PUT -d "$finalJson" "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName") + echo $response_body_amb + echo "DONE AMABRI API" +} + +#Method to restart required components +function restart_required_components(){ + echo "restarting all required components" + response_body=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X POST -d '{"RequestInfo":{"command":"RESTART","context":"Restart all required services from bootstrap script","operation_level":"host_component"},"Requests/resource_filters":[{"hosts_predicate":"HostRoles/stale_configs=true&HostRoles/cluster_name='$getClusterName'"}]}' "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/requests") + + echo "printing response_body: $response_body" + + idLoc=".Requests.id" + requestId=$(echo $response_body | jq $idLoc) + echo "request id is : $requestId" + + current_count=0 + while [[ $current_count -lt $RETRY_COUNT ]]; + do + current_count=$((current_count+1)) + response=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/requests/$requestId) + request_status=$(echo $response | jq -r ".Requests.request_status") + echo "printing request_status: $request_status" + if [[ $request_status == "IN_PROGRESS" ]] || [[ $request_status == "PENDING" ]]; then + echo "current_count is : $current_count" + sleep $WAIT_TIME_IN_SEC + elif [[ $request_status == "COMPLETED" ]]; then + echo "Restart successful" + break + fi + done +} + +function create_api(){ + export bds_instance_id=$bdsid + export key_alias=$key_alias # https://docs.cloud.oracle.com/en-us/iaas/tools/oci-cli/latest/oci_cli_docs/cmdref/bds/bds-api-key/create.html#cmdoption-key-alias + export passphrase=$b64p # https://docs.cloud.oracle.com/en-us/iaas/tools/oci-cli/latest/oci_cli_docs/cmdref/bds/bds-api-key/create.html#cmdoption-passphrase + export user_id=$user_id + capi='SUCCEEDED' + oci bds bds-api-key create --bds-instance-id $bds_instance_id --key-alias $key_alias --passphrase $passphrase --user-id $user_id --wait-for-state $capi --max-wait-seconds $cwait +} + + +# Below is tenancy + +function create_cluster() { + + export compartment_id=$compartment_id + + master=1 + utility=1 + + worker=$workernode # This has to be replaced with Jenkins Paramter + + # Begin script in case all parameters are correct + echo "Generating json woth $master master ndoes $utility utility nodes and $worker worker nodes" + json="[" + + for i in `seq 1 $master` + do + json="$json{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"MASTER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": {{head_node_memory}}, \"ocpus\": {{head_node_cpu}}},\"subnetId\": \"$subnet\" }" + done + + for i in `seq 1 $utility` + do + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"UTILITY\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": {{head_node_memory}}, \"ocpus\": {{head_node_cpu}}},\"subnetId\": \"$subnet\" }" + done + + for i in `seq 1 $worker` + do + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"WORKER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": {{worker_node_memory}}, \"ocpus\": {{worker_node_cpu}}},\"subnetId\": \"$subnet\" }" + done + + json="$json]" + printf "$json" > "nodes.json" + echo "File successfully generated and saved as nodes.json" + + echo "TRIGGERED CREATING THE BDS CLUSTER" + + export cluster_public_key=$public_key + export cluster_version="ODH2_0" + export display_name=$display_name + export is_high_availability='false' + export is_secure='false' + cmd="oci bds instance create --cluster-admin-password '$b64p' --cluster-public-key '$cluster_public_key' --cluster-version '$cluster_version' --compartment-id '$compartment_id' --display-name '$display_name' --is-high-availability $is_high_availability --is-secure $is_secure --wait-for-state $cstate --max-wait-seconds $cwait --nodes file://nodes.json " + #echo $cmd + create_response=$(eval "$cmd") + echo "CLUSTER CREATED SUCCESSFULLY" +} + +function replace_host() { + echo "REPLACE THE HOSTS" + echo "" >> {{inventory_dir}}/hosts + echo "[bds-livy-node]" >> {{inventory_dir}}/hosts + echo "$livyip ansible_ssh_user=opc" >> {{inventory_dir}}/hosts + echo "" >> {{inventory_dir}}/hosts + +} + +# MAIN TO START + +b64p=`echo -n $cluster_password | base64` +echo $b64p +echo $compartment_id + +echo "CREATING BDS CLUSTER" + +create_cluster + +echo "FETCHING BDS ID" + +get_bdsid # This sets BDS ID + +echo "GET LIVY-AMBARI IP" + +getLivyip # This will be ambari ip also + +replace_host + +echo "CREATE OBJECT STORAGE API KEY" + +create_api + +echo "UPDATE BDS AMBARI CONFIG" + +get_apidetails + +update_bds_config + +echo "BDS Config Completed and Ambari Restarted" + diff --git a/ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 new file mode 100755 index 0000000000..3e3df78b33 --- /dev/null +++ b/ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 @@ -0,0 +1,35 @@ +#!/bin/bash +# Version 1 - Author Nikesh Gogia nikesh.g.gogia@oracle.com + +compartment_id="{{compartment_id}}" +display_name="{{display_name}}" + + +echo "DELETE STARTED" +echo $display_name +echo $compartment_id + + +function get_bdsid() { + list_param=`oci bds instance list --compartment-id $compartment_id` + bdsid="NULL" + # echo $list_param | jq '.data' + state="ACTIVE" + disname="NULL" + for k in $(jq '.data | keys | .[]' <<< "$list_param"); do + # echo $k + cstate=`echo $list_param | jq -r '.data['$k']["lifecycle-state"]'` + if [ $cstate = $state ]; then + disname=`echo $list_param | jq -r '.data['$k']["display-name"]'` + if [ $disname = $display_name ]; then + bdsid=`echo $list_param | jq -r '.data['$k']["id"]'` + fi + + fi + echo $bdsid + done +} + +get_bdsid + +yes Y | oci bds instance delete --bds-instance-id $bdsid diff --git a/ansible/roles/oci-cli/defaults/main.yml b/ansible/roles/oci-cli/defaults/main.yml new file mode 100644 index 0000000000..147a2e03f1 --- /dev/null +++ b/ansible/roles/oci-cli/defaults/main.yml @@ -0,0 +1 @@ +oci_cli_url: https://github.com/oracle/oci-cli/releases/download/v3.22.0/oci-cli-3.22.0-Ubuntu-18.04-Offline.zip \ No newline at end of file diff --git a/ansible/roles/oci-cli/tasks/main.yml b/ansible/roles/oci-cli/tasks/main.yml new file mode 100644 index 0000000000..389a9e8235 --- /dev/null +++ b/ansible/roles/oci-cli/tasks/main.yml @@ -0,0 +1,24 @@ +--- +- name: Download the installation file + get_url: + url: "{{ oci_cli_url }}" + dest: /tmp/ocicli.zip + +- name: Installing unzip + apt: + name: "{{item}}" + state: latest + with_items: + - zip + - unzip + +- name: Unzip the installer + unarchive: + src: /tmp/ocicli.zip + dest: /tmp/ + remote_src: yes + +- name: install oci cli + shell: ./oci-cli-installation/install.sh --install-dir {{ analytics_user_home }} --exec-dir {{ analytics_user_home }} --script-dir {{ analytics_user_home }} --accept-all-defaults + args: + chdir: /tmp/ diff --git a/ansible/roles/oci-cloud-storage/defaults/main.yml b/ansible/roles/oci-cloud-storage/defaults/main.yml new file mode 100644 index 0000000000..72727de167 --- /dev/null +++ b/ansible/roles/oci-cloud-storage/defaults/main.yml @@ -0,0 +1,3 @@ +oss_bucket_name: "" +oss_path: "" +local_file_or_folder_path: "" diff --git a/ansible/roles/oci-cloud-storage/tasks/delete-folder.yml b/ansible/roles/oci-cloud-storage/tasks/delete-folder.yml new file mode 100644 index 0000000000..6ed4e6b8b4 --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/delete-folder.yml @@ -0,0 +1,5 @@ +--- +- name: delete files and folders recursively + shell: "oci os object bulk-delete -ns {{oss_namespace}} -bn {{oss_bucket_name}} --prefix {{oss_path}} --force" + async: 3600 + poll: 10 diff --git a/ansible/roles/oci-cloud-storage/tasks/delete.yml b/ansible/roles/oci-cloud-storage/tasks/delete.yml new file mode 100644 index 0000000000..65d18843ca --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/delete.yml @@ -0,0 +1,7 @@ +- name: Ensure oci oss bucket exists + command: oci os bucket get --name {{ oss_bucket_name }} + +- name: Upload to oci oss bucket + command: oci os object delete -bn {{ oss_bucket_name }} --name {{ oss_path }} --force + async: 3600 + poll: 10 \ No newline at end of file diff --git a/ansible/roles/oci-cloud-storage/tasks/download.yml b/ansible/roles/oci-cloud-storage/tasks/download.yml new file mode 100644 index 0000000000..bb32e9ed93 --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/download.yml @@ -0,0 +1,7 @@ +- name: Ensure oci oss bucket exists + command: oci os bucket get --name {{ oss_bucket_name }} + +- name: download files from oci oss bucket + command: oci os object get -bn {{ oss_bucket_name }} --name {{ oss_object_name }} --file {{ local_file_or_folder_path }} + async: 3600 + poll: 10 diff --git a/ansible/roles/oci-cloud-storage/tasks/main.yml b/ansible/roles/oci-cloud-storage/tasks/main.yml new file mode 100644 index 0000000000..6f9dca6b63 --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/main.yml @@ -0,0 +1,18 @@ +--- +- name: delete files from oci oss bucket + include: delete.yml + +- name: delete folders from oci oss bucket recursively + include: delete-folder.yml + + +- name: download file from oss + include: download.yml + +- name: upload files from a local to oci oss + include: upload.yml + +- name: upload files and folder from local directory to oci oss + include: upload-folder.yml + + diff --git a/ansible/roles/oci-cloud-storage/tasks/upload-folder.yml b/ansible/roles/oci-cloud-storage/tasks/upload-folder.yml new file mode 100644 index 0000000000..6e4d06562c --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/upload-folder.yml @@ -0,0 +1,8 @@ +--- +- name: Ensure oci oss bucket exists + command: oci os bucket get --name {{ oss_bucket_name }} + +- name: Upload folder to oci oss bucket + command: oci os object bulk-upload -bn {{ oss_bucket_name }} --prefix {{ oss_path }} --src-dir {{ local_file_or_folder_path }} --content-type auto + async: 3600 + poll: 10 diff --git a/ansible/roles/oci-cloud-storage/tasks/upload.yml b/ansible/roles/oci-cloud-storage/tasks/upload.yml new file mode 100644 index 0000000000..2771da5771 --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/upload.yml @@ -0,0 +1,8 @@ +--- +- name: Ensure oci oss bucket exists + command: oci os bucket get --name {{ oss_bucket_name }} + +- name: Upload to oci oss bucket + command: oci os object put -bn {{ oss_bucket_name }} --name {{ oss_path }} --file {{ local_file_or_folder_path }} --content-type auto --force + async: 3600 + poll: 10 diff --git a/ansible/roles/provision-oci-spark-cluster/defaults/main.yml b/ansible/roles/provision-oci-spark-cluster/defaults/main.yml new file mode 100644 index 0000000000..777554f8ba --- /dev/null +++ b/ansible/roles/provision-oci-spark-cluster/defaults/main.yml @@ -0,0 +1,44 @@ + +bucket: "telemetry-data-store" +model_version: "2.0" + +spark_folder: /usr/odh/2.0.1/spark + +# delete +guava_default_version: 14.0.1 +guava_default_jre_version_1: 26.0-jre +guava_default_jre_version_2: 27.0-jre +guice_default_version: 4.2.2 + +# add +guava_version: 19.0 +log4j_version: 2.16.0 +spark_redis_version: 2.5.0 +guava_jre_version: 24.1.1-jre +jedis_version: 3.2.0 +zip4j_version: 2.6.2 +guice_version: 3.0 + +jets3t_version: 0.9.7 +hadoop_aws_version: 2.7.3 +java_xmlbuilder_version: 1.1 +cassandra_connector_version: 3.2.0 +commons_pool_version: 2.0 + +guava_url: https://repo1.maven.org/maven2/com/google/guava/guava/{{guava_version}}/guava-{{guava_version}}.jar +guava_jre_url: https://repo1.maven.org/maven2/com/google/guava/guava/{{guava_jre_version}}/guava-{{guava_jre_version}}.jar +log4j_core_url: https://repo1.maven.org/maven2/org/apache/logging/log4j/log4j-core/{{log4j_version}}/log4j-core-{{log4j_version}}.jar +log4j_api_url: https://repo1.maven.org/maven2/org/apache/logging/log4j/log4j-api/{{log4j_version}}/log4j-api-{{log4j_version}}.jar +spark_redis_url: https://repo1.maven.org/maven2/com/redislabs/spark-redis_2.12/{{spark_redis_version}}/spark-redis_2.12-{{spark_redis_version}}.jar +jedis_url: https://repo1.maven.org/maven2/redis/clients/jedis/{{jedis_version}}/jedis-{{jedis_version}}.jar +zip4j_url: https://repo1.maven.org/maven2/net/lingala/zip4j/zip4j/{{zip4j_version}}/zip4j-{{zip4j_version}}.jar +guice_url: https://repo1.maven.org/maven2/com/google/inject/guice/{{guice_version}}/guice-{{guice_version}}.jar +guice_servlet_url: https://repo1.maven.org/maven2/com/google/inject/extensions/guice-servlet/{{guice_version}}/guice-servlet-{{guice_version}}.jar + +# jets3t_url: https://repo1.maven.org/maven2/net/java/dev/jets3t/jets3t/{{jets3t_version}}/jets3t-{{jets3t_version}}.jar +jets3t_url: https://repo1.maven.org/maven2/org/jets3t/jets3t/{{jets3t_version}}/jets3t-{{jets3t_version}}.jar +hadoop_aws_url: https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/{{hadoop_aws_version}}/hadoop-aws-{{hadoop_aws_version}}.jar +java_xmlbuilder_url: https://repo1.maven.org/maven2/com/jamesmurty/utils/java-xmlbuilder/{{java_xmlbuilder_version}}/java-xmlbuilder-{{java_xmlbuilder_version}}.jar +common_pool_url: "https://repo1.maven.org/maven2/org/apache/commons/commons-pool2/{{commons_pool_version}}/commons-pool2-{{commons_pool_version}}.jar" +spark_cassandra_connector_assembly_url: "https://repo1.maven.org/maven2/com/datastax/spark/spark-cassandra-connector-assembly_2.12/{{cassandra_connector_version}}/spark-cassandra-connector-assembly_2.12-{{cassandra_connector_version}}.jar" + diff --git a/ansible/roles/provision-oci-spark-cluster/tasks/main.yml b/ansible/roles/provision-oci-spark-cluster/tasks/main.yml new file mode 100644 index 0000000000..4c2302d168 --- /dev/null +++ b/ansible/roles/provision-oci-spark-cluster/tasks/main.yml @@ -0,0 +1,99 @@ +# - name: Adding azure blob variable to spark env file + # lineinfile: + # path: "{{spark_folder}}/conf/spark-env.sh" + # line: '{{item.var}}={{item.value}}' + # regexp: "{{ item.var }}.*" + # with_items: + # - {var: 'azure_storage_key', value: '{{ azure_private_storage_account_name }}'} + # - {var: 'azure_storage_secret', value: '{{ azure_private_storage_account_key }}'} + # no_log: true + # when: cloud_service_provider == "azure" + +- name: Remove guava-jre, guice default jars + become: yes + file: + path: "{{ spark_folder }}/jars/{{item.var}}-{{item.value}}.jar" + state: absent + with_items: + - {var: 'guava', value: '{{ guava_default_version }}'} + - {var: 'guava', value: '{{ guava_default_jre_version_1 }}'} + - {var: 'guava', value: '{{ guava_default_jre_version_2 }}'} + - {var: 'guice', value: '{{ guice_default_version }}'} + - {var: 'guice-servlet', value: '{{ guice_default_version }}'} + +- name: Download guava and copy to Spark jars folder + become: yes + get_url: url={{ guava_url }} dest={{ spark_folder }}/jars/guava-{{guava_version}}.jar timeout=1000 force=no + +- name: Download guava_jre_url and copy to Spark jars folder + become: yes + get_url: url={{ guava_jre_url }} dest={{ spark_folder }}/jars/guava-{{guava_jre_version}}.jar timeout=1000 force=no + +- name: Download log4j api and copy to Spark jars folder + become: yes + get_url: url={{ log4j_api_url }} dest={{ spark_folder }}/jars/log4j-api-{{log4j_version}}.jar timeout=1000 force=no + +- name: Download log4j core and copy to Spark jars folder + become: yes + get_url: url={{ log4j_core_url }} dest={{ spark_folder }}/jars/log4j-core-{{log4j_version}}.jar timeout=1000 force=no + +- name: Download spark-redis and copy to Spark jars folder + become: yes + get_url: url={{ spark_redis_url }} dest={{ spark_folder }}/jars/spark-redis_2.12-{{spark_redis_version}}.jar timeout=1000 force=no + +- name: Download jedis and copy to Spark jars folder + become: yes + get_url: url={{ jedis_url }} dest={{ spark_folder }}/jars/jedis-{{jedis_version}}.jar timeout=1000 force=no + +- name: Download zip4j and copy to Spark jars folder + become: yes + get_url: url={{ zip4j_url }} dest={{ spark_folder }}/jars/zip4j-{{zip4j_version}}.jar timeout=1000 force=no + +- name: Download guice and copy to Spark jars folder + become: yes + get_url: url={{ guice_url }} dest={{ spark_folder }}/jars/guice-{{guice_version}}.jar timeout=1000 force=no + +- name: Download guice-servlet and copy to Spark jars folder + become: yes + get_url: url={{ guice_servlet_url }} dest={{ spark_folder }}/jars/guice-servlet-{{guice_version}}.jar timeout=1000 force=no + +- name: Download jets3t and copy to Spark jars folder + become: yes + get_url: url={{ jets3t_url }} dest={{ spark_folder }}/jars/jets3t-{{jets3t_version}}.jar timeout=1000 force=no + +- name: Download hadoop_aws and copy to Spark jars folder + become: yes + get_url: url={{ hadoop_aws_url }} dest={{ spark_folder }}/jars/hadoop-aws-{{hadoop_aws_version}}.jar timeout=1000 force=no + +- name: Download java_xmlbuilder and copy to Spark jars folder + become: yes + get_url: url={{ java_xmlbuilder_url }} dest={{ spark_folder }}/jars/java-xmlbuilder-{{java_xmlbuilder_version}}.jar timeout=1000 force=no + +- name: Download spark_cassandra_connector and copy to Spark jars folder + become: yes + get_url: url={{ spark_cassandra_connector_assembly_url }} dest={{ spark_folder }}/jars/spark-cassandra-connector-assembly_2.12-{{cassandra_connector_version}}.jar timeout=1000 force=no + +- name: Download common_pool_url and copy to Spark jars folder + become: yes + get_url: url={{ common_pool_url }} dest={{ spark_folder }}/jars/commons-pool2-{{commons_pool_version}}.jar timeout=1000 force=no + + +- name: Download config to livy + command: hdfs dfs -get -f oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf {{ spark_folder }}/conf/application.conf + +- name: Download jets3t config to livy + command: hdfs dfs -get -f oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/jets3t.properties {{ spark_folder }}/conf/jets3t.properties + + +- name: Update log4j.properties + become: yes + blockinfile: + path: "{{ spark_folder }}/conf/log4j.properties" + block: | + log4j.logger.org.ekstep.analytics=INFO + log4j.appender.org.ekstep.analytics=org.apache.log4j.RollingFileAppender + log4j.appender.org.ekstep.analytics.File=./joblog.log + log4j.appender.org.ekstep.analytics.MaxFileSize=${log4jspark.log.maxfilesize} + log4j.appender.org.ekstep.analytics.MaxBackupIndex=${log4jspark.log.maxbackupindex} + log4j.appender.org.ekstep.analytics.layout=org.apache.log4j.PatternLayout + log4j.appender.org.ekstep.analytics.layout.ConversionPattern=%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n diff --git a/ansible/spark-cluster-job-submit.yml b/ansible/spark-cluster-job-submit.yml index ba4e017a23..8924fce8f2 100644 --- a/ansible/spark-cluster-job-submit.yml +++ b/ansible/spark-cluster-job-submit.yml @@ -6,6 +6,7 @@ environment: AZURE_STORAGE_ACCOUNT: "{{sunbird_private_storage_account_name}}" AZURE_STORAGE_KEY: "{{sunbird_private_storage_account_key}}" + OCI_CLI_AUTH: instance_principal roles: - data-products-deploy diff --git a/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrole.yaml b/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrole.yaml index 8d51ef406b..b2817c5f9a 100755 --- a/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrole.yaml +++ b/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrole.yaml @@ -1,5 +1,9 @@ {{- if and .Values.reloader.watchGlobally (.Values.reloader.rbac.enabled) }} +{{- if (.Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1") }} +apiVersion: rbac.authorization.k8s.io/v1 +{{ else }} apiVersion: rbac.authorization.k8s.io/v1beta1 +{{- end }} kind: ClusterRole metadata: labels: diff --git a/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrolebinding.yaml b/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrolebinding.yaml index 28c9d4b916..748e52528d 100755 --- a/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrolebinding.yaml +++ b/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrolebinding.yaml @@ -1,5 +1,9 @@ {{- if and .Values.reloader.watchGlobally (.Values.reloader.rbac.enabled) }} +{{- if (.Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1") }} +apiVersion: rbac.authorization.k8s.io/v1 +{{ else }} apiVersion: rbac.authorization.k8s.io/v1beta1 +{{- end }} kind: ClusterRoleBinding metadata: labels: diff --git a/kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml b/kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml index 5827f5cdcb..b654024031 100755 --- a/kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml +++ b/kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml @@ -1,5 +1,9 @@ {{- if and (not (.Values.reloader.watchGlobally)) (.Values.reloader.rbac.enabled) }} +{{- if (.Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1") }} +apiVersion: rbac.authorization.k8s.io/v1 +{{ else }} apiVersion: rbac.authorization.k8s.io/v1beta1 +{{- end }} kind: Role metadata: labels: diff --git a/kubernetes/helm_charts/bootstrap/reloader/templates/rolebinding.yaml b/kubernetes/helm_charts/bootstrap/reloader/templates/rolebinding.yaml index 94fb1f838b..d915db304d 100755 --- a/kubernetes/helm_charts/bootstrap/reloader/templates/rolebinding.yaml +++ b/kubernetes/helm_charts/bootstrap/reloader/templates/rolebinding.yaml @@ -1,5 +1,9 @@ {{- if and (not (.Values.reloader.watchGlobally)) (.Values.reloader.rbac.enabled) }} +{{- if (.Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1") }} +apiVersion: rbac.authorization.k8s.io/v1 +{{ else }} apiVersion: rbac.authorization.k8s.io/v1beta1 +{{- end }} kind: RoleBinding metadata: labels: diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 22630c3015..cbafb57c22 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -8,7 +8,12 @@ azure_account: {{ azure_account }} azure_secret: {{ azure_secret }} s3_access_key: {{ s3_storage_key }} s3_secret_key: {{ s3_storage_secret }} +{% if cloud_service_provider == "oci" %} +s3_endpoint: {{ oci_flink_s3_storage_endpoint }} +{% else %} s3_endpoint: {{ s3_storage_endpoint }} +{% endif %} + s3_path_style_access: {{ s3_path_style_access }} serviceMonitor: @@ -158,7 +163,8 @@ base_config: | {% if checkpoint_store_type == "azure" %} base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} - base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} + # base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} + base.url = "s3://"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} {% endif %} } } @@ -236,7 +242,6 @@ ingest-router: taskmanager.memory.process.size: {{ flink_job_names['ingest-router'].taskmanager_process_memory }} jobmanager.memory.process.size: {{ flink_job_names['ingest-router'].jobmanager_process_memory }} - telemetry-extractor: telemetry-extractor: |+ include file("/data/flink/conf/base-config.conf") @@ -287,6 +292,7 @@ telemetry-extractor: taskmanager.memory.process.size: {{ flink_job_names['telemetry-extractor'].taskmanager_process_memory }} jobmanager.memory.process.size: {{ flink_job_names['telemetry-extractor'].jobmanager_process_memory }} + pipeline-preprocessor: pipeline-preprocessor: |+ include file("/data/flink/conf/base-config.conf") diff --git a/kubernetes/helm_charts/secor/config/secor.common.properties b/kubernetes/helm_charts/secor/config/secor.common.properties index 7050ebcf1b..ebe2bb7d26 100644 --- a/kubernetes/helm_charts/secor/config/secor.common.properties +++ b/kubernetes/helm_charts/secor/config/secor.common.properties @@ -23,12 +23,12 @@ secor.kafka.topic_blacklist= # Choose what to fill according to the service you are using # in the choice option you can fill S3, GS, Swift or Azure -cloud.service=Azure +cloud.service={{ $.Values.storage_type }} # AWS authentication credentials. # Leave empty if using IAM role-based authentication with s3a filesystem. -aws.access.key= -aws.secret.key= +aws.access.key={{ $.Values.s3_access_key }} +aws.secret.key={{ $.Values.s3_secret_id }} aws.role= # Optional Proxy Setting. Set to true to enable proxy @@ -51,12 +51,12 @@ aws.proxy.http.port= # secor.upload.manager.class. # # http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region -aws.region= -aws.endpoint= +aws.region={{ $.Values.s3_region }} +aws.endpoint={{ $.Values.s3_endpoint }} # Toggle the AWS S3 client between virtual host style access and path style # access. See http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html -aws.client.pathstyleaccess=false +aws.client.pathstyleaccess={{ $.Values.s3_path_style_access }} ########################### # START AWS S3 ENCRYPTION # @@ -357,7 +357,8 @@ secor.max.message.size.bytes=100000 # Class that will manage uploads. Default is to use the hadoop # interface to S3. -secor.upload.manager.class=com.pinterest.secor.uploader.AzureUploadManager +# secor.upload.manager.class=com.pinterest.secor.uploader.AzureUploadManager +secor.upload.manager.class=com.pinterest.secor.uploader.S3UploadManager #Set below property to your timezone, and the events will be parsed and converted to the timezone specified secor.message.timezone=UTC diff --git a/kubernetes/helm_charts/secor/config/secor.partition.properties b/kubernetes/helm_charts/secor/config/secor.partition.properties index 743e1bab86..cbcc742081 100644 --- a/kubernetes/helm_charts/secor/config/secor.partition.properties +++ b/kubernetes/helm_charts/secor/config/secor.partition.properties @@ -14,7 +14,9 @@ # limitations under the License. include=secor.properties +{{- if eq .Values.storage_type "Azure" }} include=secor.azure.properties +{{- end }} # Name of the Kafka consumer group. secor.kafka.group={{ get (get $.Values.secor_jobs $.Release.Name) "consumer_group" }} @@ -23,7 +25,7 @@ secor.kafka.group={{ get (get $.Values.secor_jobs $.Release.Name) "consumer_grou secor.message.parser.class={{ get (get $.Values.secor_jobs $.Release.Name) "message_parser" }} # S3 path where sequence files are stored. -secor.s3.path= +secor.s3.path={{- get (get $.Values.secor_jobs $.Release.Name) "base_path" }} # Swift path where sequence files are stored. secor.swift.path=secor_dev/partition diff --git a/kubernetes/helm_charts/secor/config/secor.properties b/kubernetes/helm_charts/secor/config/secor.properties index 6f2876d1de..4a724a051a 100644 --- a/kubernetes/helm_charts/secor/config/secor.properties +++ b/kubernetes/helm_charts/secor/config/secor.properties @@ -10,7 +10,7 @@ include=secor.common.properties ############### # Name of the s3 bucket where log files are stored. -secor.s3.bucket= +secor.s3.bucket={{ $.Values.s3_bucket_name }} ############### # Using Swift # diff --git a/kubernetes/helm_charts/secor/values.j2 b/kubernetes/helm_charts/secor/values.j2 index 4aa2e0ee83..183bce6c8e 100644 --- a/kubernetes/helm_charts/secor/values.j2 +++ b/kubernetes/helm_charts/secor/values.j2 @@ -2,8 +2,24 @@ azure_account: "{{ sunbird_private_storage_account_name }}" azure_secret: "{{ sunbird_private_storage_account_key }}" azure_container_name: "telemetry-data-store" -namespace: {{ secor_namespace }} +s3_access_key: "{{s3_storage_key}}" +s3_secret_id: "{{s3_storage_secret}}" +s3_region: "{{oci_region}}" +s3_endpoint: "{{s3_storage_endpoint}}" +s3_path_style_access: "{{s3_path_style_access}}" +s3_bucket_name: "telemetry-data-store" + +{% if cloud_service_provider == 'oci' -%} +storage_type: "S3" +storageClass: "oci-bv" +{%- else -%} +storage_type: "Azure" storageClass: {{ secor_storage_class | default('default') }} +{%- endif %} + + + +namespace: {{ secor_namespace }} imagepullsecrets: {{ imagepullsecrets }} secor_jobs: diff --git a/pipelines/deploy/spark-cluster-deploy/Jenkinsfile b/pipelines/deploy/spark-cluster-deploy/Jenkinsfile index 9749d35b36..926c773ff0 100644 --- a/pipelines/deploy/spark-cluster-deploy/Jenkinsfile +++ b/pipelines/deploy/spark-cluster-deploy/Jenkinsfile @@ -26,7 +26,7 @@ node() { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/spark-cluster-job-submit.yml" - ansibleExtraArgs = "--tags ${params.job_type} --extra-vars \"job_id=${params.job_id} mode=${params.mode} partitions=${params.partitions} parallelisation=${params.parallelisation} start_date=${params.start_date} end_date=${params.end_date} batch_id=${params.batch_identifier} sparkMaster=${params.sparkMaster} pause_min=${params.pause_min} selected_partitions=${params.selected_partitions}\" --vault-password-file /var/lib/jenkins/secrets/vault-pass -vvvv " + ansibleExtraArgs = "--tags ${params.job_type} --extra-vars \"job_id=${params.job_id} mode=${params.mode} partitions=${params.partitions} parallelisation=${params.parallelisation} start_date=${params.start_date} end_date=${params.end_date} batch_id=${params.batch_identifier} sparkMaster=${params.sparkMaster} pause_min=${params.pause_min} vcn_name=${params.vcn_name} bds_cluster_name=${params.bds_cluster_name} selected_partitions=${params.selected_partitions}\" --vault-password-file /var/lib/jenkins/secrets/vault-pass -vvvv " values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) diff --git a/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel b/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel index 4b9891d62a..480e880609 100644 --- a/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel +++ b/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel @@ -26,7 +26,7 @@ node() { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/spark-cluster-job-submit.yml" - ansibleExtraArgs = "--vault-password-file /var/lib/jenkins/secrets/vault-pass --extra-vars \"jobs=${params.jobs_to_submit} batch_size=${params.batch_size}\" --tags config-update,${params.jobs_submit_type} -vvvv " + ansibleExtraArgs = "--vault-password-file /var/lib/jenkins/secrets/vault-pass --extra-vars \"jobs=${params.jobs_to_submit} oci_install_loc=${params.oci_install_loc} batch_size=${params.batch_size}\" --tags config-update,${params.jobs_submit_type}" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds new file mode 100644 index 0000000000..418d5ded0a --- /dev/null +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -0,0 +1,57 @@ + +@Library('deploy-conf') _ +node('build-slave') { + try { + String ANSI_GREEN = "\u001B[32m" + String ANSI_NORMAL = "\u001B[0m" + String ANSI_BOLD = "\u001B[1m" + String ANSI_RED = "\u001B[31m" + String ANSI_YELLOW = "\u001B[33m" + + ansiColor('xterm') { + stage('Checkout') { + checkout scm + } + + stage('copy cluster creation script') { + values = [:] + envDir = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-3].trim() + module = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-2].trim() + jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() + currentWs = sh(returnStdout: true, script: 'pwd').trim() + ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} head_node_cpu=${params.head_node_cpu} worker_node_cpu=${params.worker_node_cpu} head_node_memory=${params.head_node_memory} worker_node_memory=${params.worker_node_memory} key_alias=${params.key_alias} user_id=${params.user_id} bucket=${params.bucket} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + values.put('currentWs', currentWs) + values.put('env', envDir) + values.put('module', module) + values.put('jobName', jobName) + values.put('ansiblePlaybook', ansiblePlaybook) + values.put('ansibleExtraArgs', ansibleExtraArgs) + println values + ansible_playbook_run(values) + + } + stage('create and provision spark OCI BDS') { + oci_namespace=params.oci_namespace + bucket=params.bucket + withCredentials([usernamePassword(credentialsId: 'oci-bds-credential', passwordVariable: 'cluster_password', usernameVariable: 'ambari_user')]) { + sh ''' + currentws=$(pwd) + ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" + cd /tmp + ./create-cluster.sh $ambari_user $cluster_password + export ANSIBLE_HOST_KEY_CHECKING=False + ansible-playbook -i $currentws/ansible/inventory/env $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace bucket=$bucket" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass + ''' + } + + } + + } + } + catch (err) { + currentBuild.result = "FAILURE" + throw err + } + +} diff --git a/pipelines/provision/spark/Jenkinsfile.bds.test b/pipelines/provision/spark/Jenkinsfile.bds.test new file mode 100644 index 0000000000..bd6de3ad34 --- /dev/null +++ b/pipelines/provision/spark/Jenkinsfile.bds.test @@ -0,0 +1,60 @@ + +@Library('deploy-conf') _ +node('build-slave') { + try { + String ANSI_GREEN = "\u001B[32m" + String ANSI_NORMAL = "\u001B[0m" + String ANSI_BOLD = "\u001B[1m" + String ANSI_RED = "\u001B[31m" + String ANSI_YELLOW = "\u001B[33m" + + ansiColor('xterm') { + stage('Checkout') { + checkout scm + } + + stage('copy cluster creation script') { + values = [:] + envDir = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-3].trim() + module = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-2].trim() + jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() + currentWs = sh(returnStdout: true, script: 'pwd').trim() + ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} ambari_user=${params.ambari_user} cluster_password=${params.cluster_password} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.type}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + values.put('currentWs', currentWs) + values.put('env', envDir) + values.put('module', module) + values.put('jobName', jobName) + values.put('ansiblePlaybook', ansiblePlaybook) + values.put('ansibleExtraArgs', ansibleExtraArgs) + println values + ansible_playbook_run(values) + } + stage('create and provision spark OCI BDS') { + oci_namespace=params.oci_namespace + bds-livy-node-ip=params.bds-livy-node-ip + sh ''' + currentws=$(pwd) + ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" + cd /tmp + #./create_cluster_bds.sh + + export inventory_dir=/var/lib/jenkins/workspace/Provision/dev/DataPipeline/__SparkBDSCluster3/ansible/inventory/env + echo "" >> $inventory_dir/hosts + echo "[bds-livy-node]" >> $inventory_dir/hosts + echo "$bds-livy-node-ip ansible_ssh_user=opc" >> $inventory_dir/hosts + echo "" >> $inventory_dir/hosts + + ANSIBLE_HOST_KEY_CHECKING=False + ansible-playbook -i $currentws/ansible/inventory/env/hosts $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass + ''' + } + + } + } + catch (err) { + currentBuild.result = "FAILURE" + throw err + } + +} diff --git a/pipelines/provision/spark/Jenkinsfile.delete b/pipelines/provision/spark/Jenkinsfile.delete index 93aed171cb..ce5e0b19dd 100644 --- a/pipelines/provision/spark/Jenkinsfile.delete +++ b/pipelines/provision/spark/Jenkinsfile.delete @@ -1,3 +1,4 @@ + @Library('deploy-conf') _ node('build-slave') { try { @@ -18,8 +19,8 @@ node('build-slave') { module = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-2].trim() jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() - ansiblePlaybook = "${currentWs}/ansible/azure-hdinsight-spark.provision.yml" - ansibleExtraArgs = "--extra-vars \"azure_resource_group=${params.resource_group} subscription_id=${env.subscription_id} tenant_id=${env.tenant_id} cluster_state=${params.type}\" --tags copy-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) @@ -29,15 +30,16 @@ node('build-slave') { println values ansible_playbook_run(values) } - stage('delete spark HDinsight cluster') { + stage('create and provision spark OCI BDS') { storage_container=params.storage_container - withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { + //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { sh ''' + currentws=$(pwd) + ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" cd /tmp - ./delete-cluster.sh $spuser $sppass + ./delete-cluster.sh ''' - } - + //} }