From 12de5e6a13902526273dd54e8b2ec7b880a92a97 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 22 Dec 2022 13:16:36 +1100 Subject: [PATCH 001/161] added artifact upload role for oci oss Signed-off-by: Deepak Devadathan --- ansible/artifacts-upload.yml | 10 ++++++++++ .../roles/oci-cloud-storage/defaults/main.yml | 3 +++ .../oci-cloud-storage/tasks/delete-folder.yml | 5 +++++ .../roles/oci-cloud-storage/tasks/delete.yml | 7 +++++++ .../roles/oci-cloud-storage/tasks/download.yml | 7 +++++++ ansible/roles/oci-cloud-storage/tasks/main.yml | 18 ++++++++++++++++++ .../oci-cloud-storage/tasks/upload-folder.yml | 8 ++++++++ .../roles/oci-cloud-storage/tasks/upload.yml | 8 ++++++++ 8 files changed, 66 insertions(+) create mode 100644 ansible/roles/oci-cloud-storage/defaults/main.yml create mode 100644 ansible/roles/oci-cloud-storage/tasks/delete-folder.yml create mode 100644 ansible/roles/oci-cloud-storage/tasks/delete.yml create mode 100644 ansible/roles/oci-cloud-storage/tasks/download.yml create mode 100644 ansible/roles/oci-cloud-storage/tasks/main.yml create mode 100644 ansible/roles/oci-cloud-storage/tasks/upload-folder.yml create mode 100644 ansible/roles/oci-cloud-storage/tasks/upload.yml diff --git a/ansible/artifacts-upload.yml b/ansible/artifacts-upload.yml index 3bdbe73017..3bc192e194 100644 --- a/ansible/artifacts-upload.yml +++ b/ansible/artifacts-upload.yml @@ -39,3 +39,13 @@ aws_access_key_id: "{{ cloud_artifact_storage_accountname }}" aws_secret_access_key: "{{ cloud_artifact_storage_secret }}" when: cloud_service_provider == "aws" + + - name: upload artifact to oci oss + include_role: + name: oci-cloud-storage + tasks_from: upload.yml + vars: + local_file_or_folder_path: "{{ artifact_path }}" + oss_bucket_name: "{{ cloud_storage_artifacts_bucketname }}" + oss_path: "{{ artifact }}" + when: cloud_service_provider == "oci" \ No newline at end of file diff --git a/ansible/roles/oci-cloud-storage/defaults/main.yml b/ansible/roles/oci-cloud-storage/defaults/main.yml new file mode 100644 index 0000000000..72727de167 --- /dev/null +++ b/ansible/roles/oci-cloud-storage/defaults/main.yml @@ -0,0 +1,3 @@ +oss_bucket_name: "" +oss_path: "" +local_file_or_folder_path: "" diff --git a/ansible/roles/oci-cloud-storage/tasks/delete-folder.yml b/ansible/roles/oci-cloud-storage/tasks/delete-folder.yml new file mode 100644 index 0000000000..6ed4e6b8b4 --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/delete-folder.yml @@ -0,0 +1,5 @@ +--- +- name: delete files and folders recursively + shell: "oci os object bulk-delete -ns {{oss_namespace}} -bn {{oss_bucket_name}} --prefix {{oss_path}} --force" + async: 3600 + poll: 10 diff --git a/ansible/roles/oci-cloud-storage/tasks/delete.yml b/ansible/roles/oci-cloud-storage/tasks/delete.yml new file mode 100644 index 0000000000..65d18843ca --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/delete.yml @@ -0,0 +1,7 @@ +- name: Ensure oci oss bucket exists + command: oci os bucket get --name {{ oss_bucket_name }} + +- name: Upload to oci oss bucket + command: oci os object delete -bn {{ oss_bucket_name }} --name {{ oss_path }} --force + async: 3600 + poll: 10 \ No newline at end of file diff --git a/ansible/roles/oci-cloud-storage/tasks/download.yml b/ansible/roles/oci-cloud-storage/tasks/download.yml new file mode 100644 index 0000000000..63e776c348 --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/download.yml @@ -0,0 +1,7 @@ +- name: Ensure oci oss bucket exists + command: oci os bucket get --name {{ oss_bucket_name }} + +- name: download files from oci oss bucket + command: oci os object bulk-download -bn {{ oss_bucket_name }} --prefix {{ oss_path }} --dest-dir {{ local_file_or_folder_path }} + async: 3600 + poll: 10 \ No newline at end of file diff --git a/ansible/roles/oci-cloud-storage/tasks/main.yml b/ansible/roles/oci-cloud-storage/tasks/main.yml new file mode 100644 index 0000000000..6f9dca6b63 --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/main.yml @@ -0,0 +1,18 @@ +--- +- name: delete files from oci oss bucket + include: delete.yml + +- name: delete folders from oci oss bucket recursively + include: delete-folder.yml + + +- name: download file from oss + include: download.yml + +- name: upload files from a local to oci oss + include: upload.yml + +- name: upload files and folder from local directory to oci oss + include: upload-folder.yml + + diff --git a/ansible/roles/oci-cloud-storage/tasks/upload-folder.yml b/ansible/roles/oci-cloud-storage/tasks/upload-folder.yml new file mode 100644 index 0000000000..6e4d06562c --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/upload-folder.yml @@ -0,0 +1,8 @@ +--- +- name: Ensure oci oss bucket exists + command: oci os bucket get --name {{ oss_bucket_name }} + +- name: Upload folder to oci oss bucket + command: oci os object bulk-upload -bn {{ oss_bucket_name }} --prefix {{ oss_path }} --src-dir {{ local_file_or_folder_path }} --content-type auto + async: 3600 + poll: 10 diff --git a/ansible/roles/oci-cloud-storage/tasks/upload.yml b/ansible/roles/oci-cloud-storage/tasks/upload.yml new file mode 100644 index 0000000000..0edcbc793f --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/upload.yml @@ -0,0 +1,8 @@ +--- +- name: Ensure oci oss bucket exists + command: oci os bucket get --name {{ oss_bucket_name }} + +- name: Upload to oci oss bucket + command: oci os object put -bn {{ oss_bucket_name }} --name {{ oss_path }} --file {{ local_file_or_folder_path }} --content-type auto + async: 3600 + poll: 10 From ea2c77890301fa52608fa8b08b67d4085383b68b Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 22 Dec 2022 13:27:38 +1100 Subject: [PATCH 002/161] overwrite file in oss if the file exists Signed-off-by: Deepak Devadathan --- ansible/roles/oci-cloud-storage/tasks/upload.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/oci-cloud-storage/tasks/upload.yml b/ansible/roles/oci-cloud-storage/tasks/upload.yml index 0edcbc793f..2771da5771 100644 --- a/ansible/roles/oci-cloud-storage/tasks/upload.yml +++ b/ansible/roles/oci-cloud-storage/tasks/upload.yml @@ -3,6 +3,6 @@ command: oci os bucket get --name {{ oss_bucket_name }} - name: Upload to oci oss bucket - command: oci os object put -bn {{ oss_bucket_name }} --name {{ oss_path }} --file {{ local_file_or_folder_path }} --content-type auto + command: oci os object put -bn {{ oss_bucket_name }} --name {{ oss_path }} --file {{ local_file_or_folder_path }} --content-type auto --force async: 3600 poll: 10 From 852a610e155d000a0151ffd7f2e53798be4d5a43 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 10:31:33 +1100 Subject: [PATCH 003/161] added artifact download for oci role Signed-off-by: Deepak Devadathan --- ansible/artifacts-download.yml | 11 +++++++++++ ansible/roles/oci-cloud-storage/tasks/download.yml | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/ansible/artifacts-download.yml b/ansible/artifacts-download.yml index 9db0efb42f..e216c10999 100644 --- a/ansible/artifacts-download.yml +++ b/ansible/artifacts-download.yml @@ -38,3 +38,14 @@ aws_access_key_id: "{{ cloud_artifact_storage_accountname }}" aws_secret_access_key: "{{ cloud_artifact_storage_secret }}" when: cloud_service_provider == "aws" + + + - name: download artifact from oci oss + include_role: + name: oci-cloud-storage + tasks_from: download.yml + vars: + local_file_or_folder_path: "{{ artifact_path }}" + oss_bucket_name: "{{ cloud_storage_artifacts_bucketname }}" + oss_object_name: "{{ artifact }}" + when: cloud_service_provider == "oci" diff --git a/ansible/roles/oci-cloud-storage/tasks/download.yml b/ansible/roles/oci-cloud-storage/tasks/download.yml index 63e776c348..bb32e9ed93 100644 --- a/ansible/roles/oci-cloud-storage/tasks/download.yml +++ b/ansible/roles/oci-cloud-storage/tasks/download.yml @@ -2,6 +2,6 @@ command: oci os bucket get --name {{ oss_bucket_name }} - name: download files from oci oss bucket - command: oci os object bulk-download -bn {{ oss_bucket_name }} --prefix {{ oss_path }} --dest-dir {{ local_file_or_folder_path }} + command: oci os object get -bn {{ oss_bucket_name }} --name {{ oss_object_name }} --file {{ local_file_or_folder_path }} async: 3600 - poll: 10 \ No newline at end of file + poll: 10 From 3ee3b85664a41de53e72722ec6f2e7362c43e8c9 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 13:28:37 +1100 Subject: [PATCH 004/161] added role to install oci-cli Signed-off-by: Deepak Devadathan --- ansible/roles/oci-cli/defaults/main.yml | 1 + ansible/roles/oci-cli/tasks/main.yml | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 ansible/roles/oci-cli/defaults/main.yml create mode 100644 ansible/roles/oci-cli/tasks/main.yml diff --git a/ansible/roles/oci-cli/defaults/main.yml b/ansible/roles/oci-cli/defaults/main.yml new file mode 100644 index 0000000000..147a2e03f1 --- /dev/null +++ b/ansible/roles/oci-cli/defaults/main.yml @@ -0,0 +1 @@ +oci_cli_url: https://github.com/oracle/oci-cli/releases/download/v3.22.0/oci-cli-3.22.0-Ubuntu-18.04-Offline.zip \ No newline at end of file diff --git a/ansible/roles/oci-cli/tasks/main.yml b/ansible/roles/oci-cli/tasks/main.yml new file mode 100644 index 0000000000..f3d5ad29b8 --- /dev/null +++ b/ansible/roles/oci-cli/tasks/main.yml @@ -0,0 +1,24 @@ +--- +- name: Download the installation file + get_url: + url: "{{ oci_cli_url }}" + dest: /tmp/ocicli.zip + +- name: Installing unzip + apt: + name: "{{item}}" + state: latest + with_items: + - zip + - unzip + +- name: Unzip the installer + unarchive: + src: /tmp/ocicli.zip + dest: /tmp/ + remote_src: yes + +- name: install oci cli + shell: ./oci-cli-installation/install.sh --accept-all-defaults + args: + chdir: /tmp/ From 3ee652e6f582bed23d13d0c1fef5e43db1100238 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 13:33:40 +1100 Subject: [PATCH 005/161] added csp choice based cloud cli installation Signed-off-by: Deepak Devadathan --- ansible/roles/analytics-bootstrap-always/meta/main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/roles/analytics-bootstrap-always/meta/main.yml b/ansible/roles/analytics-bootstrap-always/meta/main.yml index af15826aef..3566881eff 100644 --- a/ansible/roles/analytics-bootstrap-always/meta/main.yml +++ b/ansible/roles/analytics-bootstrap-always/meta/main.yml @@ -1,6 +1,7 @@ --- dependencies: - { role: jdk11 , become: yes } - - { role: azure-cli , become: yes } + - { role: azure-cli , become: yes, when: cloud_service_provider == "azure" } + - { role: oci-cli , become: yes, when: cloud_service_provider == "oci" } From 4ffea4d3aeea58ccf2bef6a8561b19aa7531e125 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 13:48:27 +1100 Subject: [PATCH 006/161] added env variables for oci cli for analytics user Signed-off-by: Deepak Devadathan --- ansible/roles/analytics-bootstrap-spark/tasks/main.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml index 13ba75f78a..e6f138edd2 100644 --- a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml +++ b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml @@ -10,6 +10,11 @@ with_items: - {var: 'azure_storage_key', value: '{{ sunbird_private_storage_account_name }}'} - {var: 'azure_storage_secret', value: '{{ sunbird_private_storage_account_key }}'} + - {var: 'OCI_CLI_USER', value: '{{ oci_cli_user_ocid }}'} + - {var: 'OCI_CLI_REGION', value: '{{ oci_cli_region }}'} + - {var: 'OCI_CLI_FINGERPRINT', value: '{{ oci_cli_fingerprint }}'} + - {var: 'OCI_CLI_TENANCY', value: '{{ oci_cli_tenancy }}'} + - {var: 'OCI_CLI_KEY_CONTENT', value: '{{ oci_cli_key_content }}'} - name: Adding ENV Vars to spark servers environment. become: yes From 8a70a47ce21d491e14a799253ec855291c305286 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 14:05:39 +1100 Subject: [PATCH 007/161] install oci cli as analytics user Signed-off-by: Deepak Devadathan --- ansible/roles/oci-cli/tasks/main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ansible/roles/oci-cli/tasks/main.yml b/ansible/roles/oci-cli/tasks/main.yml index f3d5ad29b8..68adfa6077 100644 --- a/ansible/roles/oci-cli/tasks/main.yml +++ b/ansible/roles/oci-cli/tasks/main.yml @@ -19,6 +19,8 @@ remote_src: yes - name: install oci cli + become: yes + become_user: "{{ analytics_user }}" shell: ./oci-cli-installation/install.sh --accept-all-defaults args: chdir: /tmp/ From 69f2e83a79fb80d5f798ed8728827d4fcdb3871f Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 14:11:44 +1100 Subject: [PATCH 008/161] adding oci cli in PATH variable for analytics user Signed-off-by: Deepak Devadathan --- ansible/roles/analytics-bootstrap-spark/tasks/main.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml index e6f138edd2..bdd164fc23 100644 --- a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml +++ b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml @@ -16,6 +16,15 @@ - {var: 'OCI_CLI_TENANCY', value: '{{ oci_cli_tenancy }}'} - {var: 'OCI_CLI_KEY_CONTENT', value: '{{ oci_cli_key_content }}'} +- name: Adding PATH for oci cli Vars to bashrc file of spark. + become: yes + become_user: "{{ analytics_user }}" + lineinfile: + path: '{{ analytics_user_home }}/.bashrc' + line: 'export PATH={{ analytics_user_home }}/bin:$PATH' + regexp: "export PATH={{ analytics_user_home }}/bin.*" + when: cloud_service_provider == "oci" + - name: Adding ENV Vars to spark servers environment. become: yes lineinfile: From 4ce637daf46712d23065780a923ebf535856e014 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 14:39:57 +1100 Subject: [PATCH 009/161] install oci-cli in analytics home location Signed-off-by: Deepak Devadathan --- ansible/roles/oci-cli/tasks/main.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ansible/roles/oci-cli/tasks/main.yml b/ansible/roles/oci-cli/tasks/main.yml index 68adfa6077..389a9e8235 100644 --- a/ansible/roles/oci-cli/tasks/main.yml +++ b/ansible/roles/oci-cli/tasks/main.yml @@ -19,8 +19,6 @@ remote_src: yes - name: install oci cli - become: yes - become_user: "{{ analytics_user }}" - shell: ./oci-cli-installation/install.sh --accept-all-defaults + shell: ./oci-cli-installation/install.sh --install-dir {{ analytics_user_home }} --exec-dir {{ analytics_user_home }} --script-dir {{ analytics_user_home }} --accept-all-defaults args: chdir: /tmp/ From 273337473fc1c332b3f10e615490cafdbbf059e1 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 15:41:29 +1100 Subject: [PATCH 010/161] disabled cloud cli temporarlily Signed-off-by: Deepak Devadathan --- ansible/roles/analytics-bootstrap-always/meta/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/analytics-bootstrap-always/meta/main.yml b/ansible/roles/analytics-bootstrap-always/meta/main.yml index 3566881eff..0fd0637734 100644 --- a/ansible/roles/analytics-bootstrap-always/meta/main.yml +++ b/ansible/roles/analytics-bootstrap-always/meta/main.yml @@ -1,7 +1,7 @@ --- dependencies: - { role: jdk11 , become: yes } - - { role: azure-cli , become: yes, when: cloud_service_provider == "azure" } - - { role: oci-cli , become: yes, when: cloud_service_provider == "oci" } + # - { role: azure-cli , become: yes, when: cloud_service_provider == "azure" } + # - { role: oci-cli , become: yes, when: cloud_service_provider == "oci" } From 2e4cf614a94eb53dd25530876ea01b0bc9273397 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 15:42:44 +1100 Subject: [PATCH 011/161] setting env variable with double quotes Signed-off-by: Deepak Devadathan --- ansible/roles/analytics-bootstrap-spark/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml index bdd164fc23..cd0cc563d8 100644 --- a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml +++ b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml @@ -5,7 +5,7 @@ become_user: "{{ analytics_user }}" lineinfile: path: '{{ analytics_user_home }}/.bashrc' - line: 'export {{item.var}}={{item.value}}' + line: 'export {{item.var}}="{{item.value}}"' regexp: "export {{ item.var }}.*" with_items: - {var: 'azure_storage_key', value: '{{ sunbird_private_storage_account_name }}'} From 2bbab172de10422a638140db1b3cdc6dd7cbd6bc Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 16:34:14 +1100 Subject: [PATCH 012/161] placed oci cli env variables in /etc/environment file Signed-off-by: Deepak Devadathan --- .../analytics-bootstrap-spark/tasks/main.yml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml index cd0cc563d8..a16122fecf 100644 --- a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml +++ b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml @@ -5,16 +5,12 @@ become_user: "{{ analytics_user }}" lineinfile: path: '{{ analytics_user_home }}/.bashrc' - line: 'export {{item.var}}="{{item.value}}"' + line: 'export {{item.var}}={{item.value}}' regexp: "export {{ item.var }}.*" with_items: - {var: 'azure_storage_key', value: '{{ sunbird_private_storage_account_name }}'} - {var: 'azure_storage_secret', value: '{{ sunbird_private_storage_account_key }}'} - - {var: 'OCI_CLI_USER', value: '{{ oci_cli_user_ocid }}'} - - {var: 'OCI_CLI_REGION', value: '{{ oci_cli_region }}'} - - {var: 'OCI_CLI_FINGERPRINT', value: '{{ oci_cli_fingerprint }}'} - - {var: 'OCI_CLI_TENANCY', value: '{{ oci_cli_tenancy }}'} - - {var: 'OCI_CLI_KEY_CONTENT', value: '{{ oci_cli_key_content }}'} + - name: Adding PATH for oci cli Vars to bashrc file of spark. become: yes @@ -29,7 +25,7 @@ become: yes lineinfile: path: '/etc/environment' - line: '{{item.var}}={{item.value}}' + line: '{{item.var}}="{{item.value}}"' regexp: "{{ item.var }}.*" with_items: - {var: 'azure_storage_key', value: '{{ sunbird_private_storage_account_name }}'} @@ -45,6 +41,11 @@ - {var: 'STORAGE_PROVIDER', value: 'AZURE'} - {var: 'ENV', value: '{{env}}'} - {var: 'KAFKA_BROKER_HOST', value: "{{groups['processing-cluster-kafka'][0]}}:9092"} + - {var: 'OCI_CLI_USER', value: '{{ oci_cli_user_ocid }}'} + - {var: 'OCI_CLI_REGION', value: '{{ oci_cli_region }}'} + - {var: 'OCI_CLI_FINGERPRINT', value: '{{ oci_cli_fingerprint }}'} + - {var: 'OCI_CLI_TENANCY', value: '{{ oci_cli_tenancy }}'} + - {var: 'OCI_CLI_KEY_CONTENT', value: '{{ oci_cli_key_content }}'} - name: Install required python packages become: yes From d0d1b1d238d198f09e792802753de67e2f559d4d Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 16:50:55 +1100 Subject: [PATCH 013/161] added jinja2 template for oci cli config Signed-off-by: Deepak Devadathan --- .../analytics-bootstrap-spark/tasks/main.yml | 27 ++++++++++++++----- .../template/oci-cli-config.j2 | 6 +++++ .../template/oci-key.j2 | 1 + 3 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 ansible/roles/analytics-bootstrap-spark/template/oci-cli-config.j2 create mode 100644 ansible/roles/analytics-bootstrap-spark/template/oci-key.j2 diff --git a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml index a16122fecf..663f76d68a 100644 --- a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml +++ b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml @@ -21,11 +21,31 @@ regexp: "export PATH={{ analytics_user_home }}/bin.*" when: cloud_service_provider == "oci" +- name: Configure OCI cli + become: yes + become_user: "{{ analytics_user }}" + file: + path: "{{ analytics_user_home }}/.oci" + state: directory + when: cloud_service_provider == "oci" + +- name: Create OCI cli config location + become: yes + become_user: "{{ analytics_user }}" + template: src=oci-key.j2 dest={{ analytics_user_home }}/.oci/oci-key.pem mode=600 owner={{ analytics_user }} group={{ analytics_group }} + when: cloud_service_provider == "oci" + +- name: Create OCI cli config file + become: yes + become_user: "{{ analytics_user }}" + template: src=oci-cli-config.j2 dest={{ analytics_user_home }}/.oci/config mode=600 owner={{ analytics_user }} group={{ analytics_group }} + when: cloud_service_provider == "oci" + - name: Adding ENV Vars to spark servers environment. become: yes lineinfile: path: '/etc/environment' - line: '{{item.var}}="{{item.value}}"' + line: '{{item.var}}={{item.value}}' regexp: "{{ item.var }}.*" with_items: - {var: 'azure_storage_key', value: '{{ sunbird_private_storage_account_name }}'} @@ -41,11 +61,6 @@ - {var: 'STORAGE_PROVIDER', value: 'AZURE'} - {var: 'ENV', value: '{{env}}'} - {var: 'KAFKA_BROKER_HOST', value: "{{groups['processing-cluster-kafka'][0]}}:9092"} - - {var: 'OCI_CLI_USER', value: '{{ oci_cli_user_ocid }}'} - - {var: 'OCI_CLI_REGION', value: '{{ oci_cli_region }}'} - - {var: 'OCI_CLI_FINGERPRINT', value: '{{ oci_cli_fingerprint }}'} - - {var: 'OCI_CLI_TENANCY', value: '{{ oci_cli_tenancy }}'} - - {var: 'OCI_CLI_KEY_CONTENT', value: '{{ oci_cli_key_content }}'} - name: Install required python packages become: yes diff --git a/ansible/roles/analytics-bootstrap-spark/template/oci-cli-config.j2 b/ansible/roles/analytics-bootstrap-spark/template/oci-cli-config.j2 new file mode 100644 index 0000000000..56cf3ba3ef --- /dev/null +++ b/ansible/roles/analytics-bootstrap-spark/template/oci-cli-config.j2 @@ -0,0 +1,6 @@ +[DEFAULT] +user={{oci_cli_user_ocid }} +fingerprint={{oci_cli_fingerprint}} +key_file=/home/analytics/.oci/oci-key.pem +tenancy={{oci_cli_tenancy}} +region={{oci_cli_region}} \ No newline at end of file diff --git a/ansible/roles/analytics-bootstrap-spark/template/oci-key.j2 b/ansible/roles/analytics-bootstrap-spark/template/oci-key.j2 new file mode 100644 index 0000000000..b969594016 --- /dev/null +++ b/ansible/roles/analytics-bootstrap-spark/template/oci-key.j2 @@ -0,0 +1 @@ +{{ oci_cli_key_content }} \ No newline at end of file From cc11113213dd522c43f0a6365ff844666d4027c6 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 16:58:28 +1100 Subject: [PATCH 014/161] renamed dir templates to template Signed-off-by: Deepak Devadathan --- .../{template => templates}/oci-cli-config.j2 | 0 .../analytics-bootstrap-spark/{template => templates}/oci-key.j2 | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename ansible/roles/analytics-bootstrap-spark/{template => templates}/oci-cli-config.j2 (100%) rename ansible/roles/analytics-bootstrap-spark/{template => templates}/oci-key.j2 (100%) diff --git a/ansible/roles/analytics-bootstrap-spark/template/oci-cli-config.j2 b/ansible/roles/analytics-bootstrap-spark/templates/oci-cli-config.j2 similarity index 100% rename from ansible/roles/analytics-bootstrap-spark/template/oci-cli-config.j2 rename to ansible/roles/analytics-bootstrap-spark/templates/oci-cli-config.j2 diff --git a/ansible/roles/analytics-bootstrap-spark/template/oci-key.j2 b/ansible/roles/analytics-bootstrap-spark/templates/oci-key.j2 similarity index 100% rename from ansible/roles/analytics-bootstrap-spark/template/oci-key.j2 rename to ansible/roles/analytics-bootstrap-spark/templates/oci-key.j2 From 2e11eaf0677df8793be6d4737ffcbabcbe7092ca Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 17:03:58 +1100 Subject: [PATCH 015/161] disabled only oci cli temporarily Signed-off-by: Deepak Devadathan --- ansible/roles/analytics-bootstrap-always/meta/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/analytics-bootstrap-always/meta/main.yml b/ansible/roles/analytics-bootstrap-always/meta/main.yml index 0fd0637734..e04443fb98 100644 --- a/ansible/roles/analytics-bootstrap-always/meta/main.yml +++ b/ansible/roles/analytics-bootstrap-always/meta/main.yml @@ -1,7 +1,7 @@ --- dependencies: - { role: jdk11 , become: yes } - # - { role: azure-cli , become: yes, when: cloud_service_provider == "azure" } + - { role: azure-cli , become: yes, when: cloud_service_provider == "azure" } # - { role: oci-cli , become: yes, when: cloud_service_provider == "oci" } From 054d9dbd233efd46a688af4a640c065188d5dd1e Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 17:06:01 +1100 Subject: [PATCH 016/161] testing idempotency for oci cli Signed-off-by: Deepak Devadathan --- ansible/roles/analytics-bootstrap-always/meta/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/analytics-bootstrap-always/meta/main.yml b/ansible/roles/analytics-bootstrap-always/meta/main.yml index e04443fb98..3566881eff 100644 --- a/ansible/roles/analytics-bootstrap-always/meta/main.yml +++ b/ansible/roles/analytics-bootstrap-always/meta/main.yml @@ -2,6 +2,6 @@ dependencies: - { role: jdk11 , become: yes } - { role: azure-cli , become: yes, when: cloud_service_provider == "azure" } - # - { role: oci-cli , become: yes, when: cloud_service_provider == "oci" } + - { role: oci-cli , become: yes, when: cloud_service_provider == "oci" } From b1de73736c5147f67c41ff9e860b2f939b2d56da Mon Sep 17 00:00:00 2001 From: Kenneth Heung Date: Sun, 25 Dec 2022 13:43:11 +0800 Subject: [PATCH 017/161] change Spark provision task with Ruby 2.6 In release 4.6.0, Ruby 2.6 is being used. Don't know why in 4.10 / 5.0, Ruby suddenly became 2.2. In fact, there is NO ruby2.2-dev for bionic. Changing to Ruby 2.6 to proceed --- ansible/roles/analytics-spark-provision/tasks/main.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ansible/roles/analytics-spark-provision/tasks/main.yml b/ansible/roles/analytics-spark-provision/tasks/main.yml index 01eef6c9e1..65731cecb9 100644 --- a/ansible/roles/analytics-spark-provision/tasks/main.yml +++ b/ansible/roles/analytics-spark-provision/tasks/main.yml @@ -117,28 +117,31 @@ recurse: yes become: yes +# kenneth changed to install Ruby 2.6 as per R.4.6.0 - name: Install latest ruby become: yes become_user: "{{ analytics_user }}" - shell: "export PATH=$PATH:/home/analytics/.rvm/bin && rvm install ruby-2.2" + shell: "export PATH=$PATH:/home/analytics/.rvm/bin && rvm install ruby-2.6" - name: Add ruby repository become: yes apt_repository: repo: ppa:brightbox/ruby-ng +# kenneth changed to install ruby-dev 2.6 as per R.4.6.0 - there is no ruby2.2-dev in bionic - name: Install latest ruby-dev become: yes apt: - name: "ruby2.2-dev" + name: "ruby2.6-dev" state: installed update_cache: true cache_valid_time: 3600 +# changed to ruby 2.6 as per R.4.6.0 - name: Install ruby-kafka become: yes become_user: "{{ analytics_user }}" - shell: "bash -ilc 'export PATH=$PATH:/home/analytics/.rvm/bin && rvm --default use ruby-2.2 && gem install ruby-kafka'" + shell: "bash -ilc 'export PATH=$PATH:/home/analytics/.rvm/bin && rvm --default use ruby-2.6 && gem install --user-install --no-document ruby-kafka'" - name: Download Kafka-2.11 become: yes From 07d4e5e06f4348d4a2aac72a7bd2533b94929ff5 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 28 Dec 2022 00:56:20 +1100 Subject: [PATCH 018/161] added apiversion selector Signed-off-by: Deepak Devadathan --- .../helm_charts/bootstrap/reloader/templates/clusterrole.yaml | 4 ++++ .../bootstrap/reloader/templates/clusterrolebinding.yaml | 4 ++++ kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml | 4 ++++ .../helm_charts/bootstrap/reloader/templates/rolebinding.yaml | 4 ++++ 4 files changed, 16 insertions(+) diff --git a/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrole.yaml b/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrole.yaml index 8d51ef406b..b2817c5f9a 100755 --- a/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrole.yaml +++ b/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrole.yaml @@ -1,5 +1,9 @@ {{- if and .Values.reloader.watchGlobally (.Values.reloader.rbac.enabled) }} +{{- if (.Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1") }} +apiVersion: rbac.authorization.k8s.io/v1 +{{ else }} apiVersion: rbac.authorization.k8s.io/v1beta1 +{{- end }} kind: ClusterRole metadata: labels: diff --git a/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrolebinding.yaml b/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrolebinding.yaml index 28c9d4b916..748e52528d 100755 --- a/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrolebinding.yaml +++ b/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrolebinding.yaml @@ -1,5 +1,9 @@ {{- if and .Values.reloader.watchGlobally (.Values.reloader.rbac.enabled) }} +{{- if (.Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1") }} +apiVersion: rbac.authorization.k8s.io/v1 +{{ else }} apiVersion: rbac.authorization.k8s.io/v1beta1 +{{- end }} kind: ClusterRoleBinding metadata: labels: diff --git a/kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml b/kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml index 5827f5cdcb..b654024031 100755 --- a/kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml +++ b/kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml @@ -1,5 +1,9 @@ {{- if and (not (.Values.reloader.watchGlobally)) (.Values.reloader.rbac.enabled) }} +{{- if (.Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1") }} +apiVersion: rbac.authorization.k8s.io/v1 +{{ else }} apiVersion: rbac.authorization.k8s.io/v1beta1 +{{- end }} kind: Role metadata: labels: diff --git a/kubernetes/helm_charts/bootstrap/reloader/templates/rolebinding.yaml b/kubernetes/helm_charts/bootstrap/reloader/templates/rolebinding.yaml index 94fb1f838b..d915db304d 100755 --- a/kubernetes/helm_charts/bootstrap/reloader/templates/rolebinding.yaml +++ b/kubernetes/helm_charts/bootstrap/reloader/templates/rolebinding.yaml @@ -1,5 +1,9 @@ {{- if and (not (.Values.reloader.watchGlobally)) (.Values.reloader.rbac.enabled) }} +{{- if (.Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1") }} +apiVersion: rbac.authorization.k8s.io/v1 +{{ else }} apiVersion: rbac.authorization.k8s.io/v1beta1 +{{- end }} kind: RoleBinding metadata: labels: From 9ba64978a30186794f206d4c487369911b3e584a Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 16:40:54 +1100 Subject: [PATCH 019/161] added the oci oss bucket upload Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index df495a5d4a..9ac97b4d43 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -2,6 +2,11 @@ - name: Ensure azure blob storage container exists command: az storage container create --name {{ bucket }} when: dp_object_store_type == "azure" + +- name: Ensure oci oss bucket exists + command: oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}} + when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + tags: - always From 5fa0e0040d6414ce302c373ed0a94133d368c65b Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 16:46:46 +1100 Subject: [PATCH 020/161] added full path of oci from analaytics home Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 9ac97b4d43..0921ff5531 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -4,7 +4,7 @@ when: dp_object_store_type == "azure" - name: Ensure oci oss bucket exists - command: oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}} + command: "{{ analytics.home }}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" when: dp_object_store_type == "s3" and cloud_service_provider == "oci" tags: From 329e8200a63f9ac2d152c5e22b9329bb343f33ad Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 16:48:37 +1100 Subject: [PATCH 021/161] correction in oci cli location Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 0921ff5531..ab31c09e87 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -4,7 +4,7 @@ when: dp_object_store_type == "azure" - name: Ensure oci oss bucket exists - command: "{{ analytics.home }}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" + command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" when: dp_object_store_type == "s3" and cloud_service_provider == "oci" tags: From 86b024d326716aa5a48f2bb1d280dae8009ad2f6 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 16:54:28 +1100 Subject: [PATCH 022/161] check bucket existence before creating Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index ab31c09e87..a0977df794 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -4,8 +4,13 @@ when: dp_object_store_type == "azure" - name: Ensure oci oss bucket exists - command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" + command: oci os bucket get --name {{ bucket }} when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + register: check_bucket + +- name: Ensure oci oss bucket exists + command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" + when: dp_object_store_type == "s3" and cloud_service_provider == "oci" and check_bucket.rc !=0 tags: - always From 51894dbbd89174cf3e4a77fc206d4283570d90ae Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 19:02:49 +1100 Subject: [PATCH 023/161] testing the logic for register variable Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index a0977df794..7a836a8bf2 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -5,8 +5,9 @@ - name: Ensure oci oss bucket exists command: oci os bucket get --name {{ bucket }} - when: dp_object_store_type == "s3" and cloud_service_provider == "oci" register: check_bucket + when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + - name: Ensure oci oss bucket exists command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" From cbb93c158b26b822c017a7773f8da01e207e4519 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 19:04:37 +1100 Subject: [PATCH 024/161] using full path for oci cli Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 7a836a8bf2..2053e72ded 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -4,11 +4,10 @@ when: dp_object_store_type == "azure" - name: Ensure oci oss bucket exists - command: oci os bucket get --name {{ bucket }} + command: "/home/{{analytics_user}}/bin/oci os bucket get --name {{ bucket }}" register: check_bucket when: dp_object_store_type == "s3" and cloud_service_provider == "oci" - - name: Ensure oci oss bucket exists command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" when: dp_object_store_type == "s3" and cloud_service_provider == "oci" and check_bucket.rc !=0 From 47ccff0153da65ad3881350fb3b396083eac3d0b Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 19:06:45 +1100 Subject: [PATCH 025/161] testing the bucket check Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 2053e72ded..b677d2f13d 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -8,7 +8,7 @@ register: check_bucket when: dp_object_store_type == "s3" and cloud_service_provider == "oci" -- name: Ensure oci oss bucket exists +- name: Create oci oss bucket command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" when: dp_object_store_type == "s3" and cloud_service_provider == "oci" and check_bucket.rc !=0 From d3b24a979c3c62473ba30b659398062b742a5163 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 19:08:27 +1100 Subject: [PATCH 026/161] added the always tag individually Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index b677d2f13d..28efbd3516 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -2,16 +2,19 @@ - name: Ensure azure blob storage container exists command: az storage container create --name {{ bucket }} when: dp_object_store_type == "azure" - + tags: + - always + - name: Ensure oci oss bucket exists command: "/home/{{analytics_user}}/bin/oci os bucket get --name {{ bucket }}" register: check_bucket when: dp_object_store_type == "s3" and cloud_service_provider == "oci" - + tags: + - always + - name: Create oci oss bucket command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" when: dp_object_store_type == "s3" and cloud_service_provider == "oci" and check_bucket.rc !=0 - tags: - always From 3c7931040aa6ee5f577c328f77a32663ba793325 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 19:23:05 +1100 Subject: [PATCH 027/161] place an oci os upload command for every upload Signed-off-by: Deepak Devadathan --- .../roles/data-products-deploy/tasks/main.yml | 49 ++++++++++++++++++- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 28efbd3516..4d454f9d6f 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -4,7 +4,7 @@ when: dp_object_store_type == "azure" tags: - always - + - name: Ensure oci oss bucket exists command: "/home/{{analytics_user}}/bin/oci os bucket get --name {{ bucket }}" register: check_bucket @@ -27,6 +27,15 @@ command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} async: 3600 poll: 10 + when: dp_object_store_type == "azure" + tags: + - dataproducts-spark-cluster + +- name: Copy Core Data Products to oci oss + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force + async: 3600 + poll: 10 + when: dp_object_store_type == "s3" and cloud_service_provider == "oci" tags: - dataproducts-spark-cluster @@ -40,8 +49,17 @@ command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar -f {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar async: 3600 poll: 10 + when: dp_object_store_type == "azure" tags: - - ed-dataproducts-spark-cluster + - ed-dataproducts-spark-cluster + +- name: Copy Ed Data Products to oci oss + command: /home/{{analytics_user}}/bin/oci os os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force + async: 3600 + poll: 10 + when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + tags: + - ed-dataproducts-spark-cluster - name: Copy Framework Library copy: src={{ analytics_core_artifact }} dest={{ analytics.home }}/models-{{ model_version }} @@ -52,6 +70,15 @@ command: az storage blob upload --overwrite --debug -c {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} async: 3600 poll: 10 + when: dp_object_store_type == "azure" + tags: + - framework-spark-cluster + +- name: Copy Framework Library to oci oss + command: /home/{{analytics_user}}/bin/oci os os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force + async: 3600 + poll: 10 + when: dp_object_store_type == "s3" and cloud_service_provider == "oci" tags: - framework-spark-cluster @@ -64,6 +91,15 @@ command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} async: 3600 poll: 10 + when: dp_object_store_type == "azure" + tags: + - framework-spark-cluster + +- name: Copy Scruid Library to oci oss + command: /home/{{analytics_user}}/bin/oci os os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force + async: 3600 + poll: 10 + when: dp_object_store_type == "s3" and cloud_service_provider == "oci" tags: - framework-spark-cluster @@ -113,9 +149,18 @@ command: az storage blob upload --overwrite -c {{ bucket }} -f {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf async: 3600 poll: 10 + when: dp_object_store_type == "azure" tags: - framework-spark-cluster +- name: Copy configuration file to oci oss + command: /home/{{analytics_user}}/bin/oci os os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force + async: 3600 + poll: 10 + when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + tags: + - framework-spark-cluster + - name: Copy log4j2 xml file template: src=log4j2.xml.j2 dest={{ analytics.home }}/models-{{ model_version }}/log4j2.xml mode=755 owner={{ analytics_user }} group={{ analytics_group }} tags: [ dataproducts, framework, ed-dataproducts ] From 5e4749ecb56cb6d0daea3f89f01d20cf95e3e026 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 19:24:34 +1100 Subject: [PATCH 028/161] corrected typo for oci cli command Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 4d454f9d6f..2f2f3a8063 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -75,7 +75,7 @@ - framework-spark-cluster - name: Copy Framework Library to oci oss - command: /home/{{analytics_user}}/bin/oci os os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force async: 3600 poll: 10 when: dp_object_store_type == "s3" and cloud_service_provider == "oci" From b7daec66ba39c5ce62ca2f20d4150c0365d125a7 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 19:27:55 +1100 Subject: [PATCH 029/161] corrected typo for oci cli command Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 2f2f3a8063..f4cbf7c216 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -54,7 +54,7 @@ - ed-dataproducts-spark-cluster - name: Copy Ed Data Products to oci oss - command: /home/{{analytics_user}}/bin/oci os os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force async: 3600 poll: 10 when: dp_object_store_type == "s3" and cloud_service_provider == "oci" @@ -96,7 +96,7 @@ - framework-spark-cluster - name: Copy Scruid Library to oci oss - command: /home/{{analytics_user}}/bin/oci os os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force async: 3600 poll: 10 when: dp_object_store_type == "s3" and cloud_service_provider == "oci" @@ -154,7 +154,7 @@ - framework-spark-cluster - name: Copy configuration file to oci oss - command: /home/{{analytics_user}}/bin/oci os os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force async: 3600 poll: 10 when: dp_object_store_type == "s3" and cloud_service_provider == "oci" From 4dbefc932fd61d708f9ecb4800ea08498c04632a Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sat, 31 Dec 2022 08:14:04 +1100 Subject: [PATCH 030/161] testing secor changes for oci oss Signed-off-by: Deepak Devadathan --- .../secor/config/secor.common.properties | 12 ++++++------ .../secor/config/secor.partition.properties | 2 +- .../helm_charts/secor/config/secor.properties | 2 +- kubernetes/helm_charts/secor/values.j2 | 15 +++++++++++++++ 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/kubernetes/helm_charts/secor/config/secor.common.properties b/kubernetes/helm_charts/secor/config/secor.common.properties index 7050ebcf1b..fbe441c187 100644 --- a/kubernetes/helm_charts/secor/config/secor.common.properties +++ b/kubernetes/helm_charts/secor/config/secor.common.properties @@ -23,12 +23,12 @@ secor.kafka.topic_blacklist= # Choose what to fill according to the service you are using # in the choice option you can fill S3, GS, Swift or Azure -cloud.service=Azure +cloud.service={{ $.Values.storage_type }} # AWS authentication credentials. # Leave empty if using IAM role-based authentication with s3a filesystem. -aws.access.key= -aws.secret.key= +aws.access.key={{ $.Values.s3_access_key }} +aws.secret.key={{ $.Values.s3_secret_id }} aws.role= # Optional Proxy Setting. Set to true to enable proxy @@ -51,12 +51,12 @@ aws.proxy.http.port= # secor.upload.manager.class. # # http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region -aws.region= -aws.endpoint= +aws.region={{ $.Values.s3_region }} +aws.endpoint={{ $.Values.s3_endpoint }} # Toggle the AWS S3 client between virtual host style access and path style # access. See http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html -aws.client.pathstyleaccess=false +aws.client.pathstyleaccess={{ $.Values.s3_path_style_access }} ########################### # START AWS S3 ENCRYPTION # diff --git a/kubernetes/helm_charts/secor/config/secor.partition.properties b/kubernetes/helm_charts/secor/config/secor.partition.properties index 743e1bab86..0bee7818ea 100644 --- a/kubernetes/helm_charts/secor/config/secor.partition.properties +++ b/kubernetes/helm_charts/secor/config/secor.partition.properties @@ -23,7 +23,7 @@ secor.kafka.group={{ get (get $.Values.secor_jobs $.Release.Name) "consumer_grou secor.message.parser.class={{ get (get $.Values.secor_jobs $.Release.Name) "message_parser" }} # S3 path where sequence files are stored. -secor.s3.path= +secor.s3.path={{- get (get $.Values.secor_jobs $.Release.Name) "base_path" }} # Swift path where sequence files are stored. secor.swift.path=secor_dev/partition diff --git a/kubernetes/helm_charts/secor/config/secor.properties b/kubernetes/helm_charts/secor/config/secor.properties index 6f2876d1de..4a724a051a 100644 --- a/kubernetes/helm_charts/secor/config/secor.properties +++ b/kubernetes/helm_charts/secor/config/secor.properties @@ -10,7 +10,7 @@ include=secor.common.properties ############### # Name of the s3 bucket where log files are stored. -secor.s3.bucket= +secor.s3.bucket={{ $.Values.s3_bucket_name }} ############### # Using Swift # diff --git a/kubernetes/helm_charts/secor/values.j2 b/kubernetes/helm_charts/secor/values.j2 index 4aa2e0ee83..d09a05b9d6 100644 --- a/kubernetes/helm_charts/secor/values.j2 +++ b/kubernetes/helm_charts/secor/values.j2 @@ -2,6 +2,21 @@ azure_account: "{{ sunbird_private_storage_account_name }}" azure_secret: "{{ sunbird_private_storage_account_key }}" azure_container_name: "telemetry-data-store" +s3_access_key: "{{s3_storage_key}}" +s3_secret_id: "{{s3_storage_secret}}" +s3_region: "{{oci_region}}" +s3_endpoint: "{{s3_storage_endpoint}}" +s3_path_style_access: "{{s3_path_style_access}}" +s3_bucket_name: "telemetry-data-store" + +{% if cloud_service_provider == 'oci' -%} +storage_type: "S3" +{%- else -%} +storage_type: "Azure" +{%- endif %} + + + namespace: {{ secor_namespace }} storageClass: {{ secor_storage_class | default('default') }} imagepullsecrets: {{ imagepullsecrets }} From 7b7854d82a0deea484d5f13b5a650deb166c6e02 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sat, 31 Dec 2022 08:22:55 +1100 Subject: [PATCH 031/161] added oci-bv as the storage class Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/secor/values.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/kubernetes/helm_charts/secor/values.j2 b/kubernetes/helm_charts/secor/values.j2 index d09a05b9d6..62610ee5fe 100644 --- a/kubernetes/helm_charts/secor/values.j2 +++ b/kubernetes/helm_charts/secor/values.j2 @@ -11,6 +11,7 @@ s3_bucket_name: "telemetry-data-store" {% if cloud_service_provider == 'oci' -%} storage_type: "S3" +secor_storage_class: "oci-bv" {%- else -%} storage_type: "Azure" {%- endif %} From 8fd5084dd067d877f2333e8747b397ad9c9460a4 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sat, 31 Dec 2022 08:32:10 +1100 Subject: [PATCH 032/161] added storageclass selection Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/secor/values.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kubernetes/helm_charts/secor/values.j2 b/kubernetes/helm_charts/secor/values.j2 index 62610ee5fe..183bce6c8e 100644 --- a/kubernetes/helm_charts/secor/values.j2 +++ b/kubernetes/helm_charts/secor/values.j2 @@ -11,15 +11,15 @@ s3_bucket_name: "telemetry-data-store" {% if cloud_service_provider == 'oci' -%} storage_type: "S3" -secor_storage_class: "oci-bv" +storageClass: "oci-bv" {%- else -%} storage_type: "Azure" +storageClass: {{ secor_storage_class | default('default') }} {%- endif %} namespace: {{ secor_namespace }} -storageClass: {{ secor_storage_class | default('default') }} imagepullsecrets: {{ imagepullsecrets }} secor_jobs: From 6a4f5513d4e6279d4bc795eaad120ee7b1647aba Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sat, 31 Dec 2022 08:41:05 +1100 Subject: [PATCH 033/161] added a condition to include secor.azure.properties Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/secor/config/secor.partition.properties | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kubernetes/helm_charts/secor/config/secor.partition.properties b/kubernetes/helm_charts/secor/config/secor.partition.properties index 0bee7818ea..cbcc742081 100644 --- a/kubernetes/helm_charts/secor/config/secor.partition.properties +++ b/kubernetes/helm_charts/secor/config/secor.partition.properties @@ -14,7 +14,9 @@ # limitations under the License. include=secor.properties +{{- if eq .Values.storage_type "Azure" }} include=secor.azure.properties +{{- end }} # Name of the Kafka consumer group. secor.kafka.group={{ get (get $.Values.secor_jobs $.Release.Name) "consumer_group" }} From ea1e6659ae5f6c026d4afac58c90016df0aad638 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sat, 31 Dec 2022 08:53:46 +1100 Subject: [PATCH 034/161] using S3UploadManager in common.properties Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/secor/config/secor.common.properties | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/secor/config/secor.common.properties b/kubernetes/helm_charts/secor/config/secor.common.properties index fbe441c187..ebe2bb7d26 100644 --- a/kubernetes/helm_charts/secor/config/secor.common.properties +++ b/kubernetes/helm_charts/secor/config/secor.common.properties @@ -357,7 +357,8 @@ secor.max.message.size.bytes=100000 # Class that will manage uploads. Default is to use the hadoop # interface to S3. -secor.upload.manager.class=com.pinterest.secor.uploader.AzureUploadManager +# secor.upload.manager.class=com.pinterest.secor.uploader.AzureUploadManager +secor.upload.manager.class=com.pinterest.secor.uploader.S3UploadManager #Set below property to your timezone, and the events will be parsed and converted to the timezone specified secor.message.timezone=UTC From b80f4d0e7e8f73cdf8f679ce1a33af391a79a823 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 08:31:41 +1100 Subject: [PATCH 035/161] update flink-conf for telemetry-extractor Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 22630c3015..49c712f120 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -286,6 +286,11 @@ telemetry-extractor: heartbeat.interval: 5000 taskmanager.memory.process.size: {{ flink_job_names['telemetry-extractor'].taskmanager_process_memory }} jobmanager.memory.process.size: {{ flink_job_names['telemetry-extractor'].jobmanager_process_memory }} + fs.s3.access.key: {{s3_storage_key}} + fs.s3.secret.key: {{s3_storage_secret}} + fs.s3.endpoint: {{s3_storage_endpoint}} + fs.s3.path.style.access: {{s3_path_style_access}} + pipeline-preprocessor: pipeline-preprocessor: |+ From e46a8188276c6ce22b262398ca7ab65acc13c52e Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 08:36:16 +1100 Subject: [PATCH 036/161] hardcoding base.url Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 49c712f120..f2ff94c244 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -158,7 +158,8 @@ base_config: | {% if checkpoint_store_type == "azure" %} base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} - base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} + #base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} + base.url = s3://dev-data-store/checkpoint {% endif %} } } From 0ec7478e83a98668a20415efc982a4c60d2b210c Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 08:38:29 +1100 Subject: [PATCH 037/161] hardcode base.url with double quotes Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index f2ff94c244..ed51046a59 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -159,7 +159,7 @@ base_config: | base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} #base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} - base.url = s3://dev-data-store/checkpoint + base.url = "s3://dev-data-store/checkpoint" {% endif %} } } From 61c4c1707ac63d8cc92655a095f3613d4f8d1c02 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 08:46:45 +1100 Subject: [PATCH 038/161] updated flink-conf for telemetry extractor Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index ed51046a59..cba8d4e92f 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -291,6 +291,10 @@ telemetry-extractor: fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} fs.s3.path.style.access: {{s3_path_style_access}} + s3.access-key: {{s3_storage_key}} + s3.secret-key: {{s3_storage_secret}} + s3.endpoint: {{s3_storage_endpoint}} + s3.path.style.access: {{s3_path_style_access}} pipeline-preprocessor: From b9ddecd28a76a3d0c06e4dd98ae1af3a2f90dd51 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 08:53:46 +1100 Subject: [PATCH 039/161] removed changes from flink-conf Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index cba8d4e92f..8a0476c32b 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -291,10 +291,8 @@ telemetry-extractor: fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} fs.s3.path.style.access: {{s3_path_style_access}} - s3.access-key: {{s3_storage_key}} - s3.secret-key: {{s3_storage_secret}} - s3.endpoint: {{s3_storage_endpoint}} - s3.path.style.access: {{s3_path_style_access}} + + pipeline-preprocessor: From e83136b6342940051fbbbc971e7c8712a296a2ac Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 08:59:49 +1100 Subject: [PATCH 040/161] temporary change Signed-off-by: Deepak Devadathan --- .../templates/flink_job_deployment.yaml | 6 - .../flink_job_deployment.yaml.disabled | 245 ++++++++++++++++++ .../helm_charts/datapipeline_jobs/values.j2 | 5 +- 3 files changed, 249 insertions(+), 7 deletions(-) create mode 100644 kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml.disabled diff --git a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml index 10e6b62181..c794c7e702 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml +++ b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml @@ -112,12 +112,6 @@ spec: "--job-classname={{ .Values.job_classname }}", {{- if eq .Values.checkpoint_store_type "azure" }} "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", -{{- end }} -{{- if eq .Values.checkpoint_store_type "s3" }} - "-Ds3.access-key={{ .Values.s3_access_key }}", - "-Ds3.secret-key={{ .Values.s3_secret_key }}", - "-Ds3.endpoint={{ .Values.s3_endpoint }}", - "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", diff --git a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml.disabled b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml.disabled new file mode 100644 index 0000000000..10e6b62181 --- /dev/null +++ b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml.disabled @@ -0,0 +1,245 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-jobmanager + namespace: {{ .Values.namespace }} + labels: + app: flink + component: {{ .Release.Name }}-jobmanager +spec: + type: ClusterIP + ports: + - name: rpc + port: {{ .Values.jobmanager.rpc_port }} + - name: blob + port: {{ .Values.jobmanager.blob_port }} + - name: query + port: {{ .Values.jobmanager.query_port }} + - name: ui + port: {{ .Values.jobmanager.ui_port }} + - name: prom + port: {{ .Values.jobmanager.prom_port }} + selector: + app: flink + component: {{ .Release.Name }}-jobmanager + +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-jobmanager-webui + namespace: {{ .Values.namespace }} +{{- if .Values.service.annotations }} +{{- with .Values.service.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} +{{- end }} +spec: + {{- if eq .Values.service.type "ClusterIP" }} + type: ClusterIP + {{- end }} + {{- if eq .Values.service.type "LoadBalancer" }} + type: LoadBalancer + {{- end }} + ports: + - name: rest + port: {{ .Values.rest_port }} + protocol: TCP + targetPort: {{ .Values.resttcp_port }} + selector: + app: flink + component: {{ .Release.Name }}-jobmanager + +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-taskmanager-prometheus + namespace: {{ .Values.namespace }} + labels: + app: flink + component: {{ .Release.Name }}-taskmanager +spec: + type: ClusterIP + ports: + - name: prom + port: {{ .Values.taskmanager.prom_port }} + selector: + app: flink + component: {{ .Release.Name }}-taskmanager + +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ .Release.Name }}-jobmanager + namespace: {{ .Values.namespace }} +spec: + template: + metadata: + labels: + app: flink + component: {{ .Release.Name }}-jobmanager + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: "{{ .Values.jobmanager.prom_port }}" + spec: + volumes: + - name: flink-config-volume + configMap: + name: {{ .Release.Name }}-config + items: + - key: flink-conf + path: flink-conf.yaml + - key: base-config + path: base-config.conf + - key: {{ .Release.Name }} + path: {{ .Release.Name }}.conf + - key: log4j_console_properties + path: log4j-console.properties + restartPolicy: OnFailure + imagePullSecrets: + - name: {{ .Values.imagepullsecrets }} + containers: + - name: {{ .Release.Name }}-jobmanager + image: "{{ .Values.dockerhub }}/{{ .Values.repository }}:{{ .Values.image_tag }}" + imagePullPolicy: Always + workingDir: /opt/flink + command: ["/opt/flink/bin/standalone-job.sh"] + args: ["start-foreground", + "--job-classname={{ .Values.job_classname }}", +{{- if eq .Values.checkpoint_store_type "azure" }} + "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "s3" }} + "-Ds3.access-key={{ .Values.s3_access_key }}", + "-Ds3.secret-key={{ .Values.s3_secret_key }}", + "-Ds3.endpoint={{ .Values.s3_endpoint }}", + "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", +{{- end }} + "-Dweb.submit.enable=false", + "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", + "-Dmetrics.reporter.prom.port={{ .Values.jobmanager.prom_port }}", + "-Djobmanager.rpc.address={{ .Release.Name }}-jobmanager", + "-Djobmanager.rpc.port={{ .Values.jobmanager.rpc_port }}", + "-Dparallelism.default=1", + "-Dblob.server.port={{ .Values.jobmanager.blob_port }}", + "-Dqueryable-state.server.ports={{ .Values.jobmanager.query_port }}", + "--config.file.path", + "/data/flink/conf/{{ .Release.Name }}.conf"] + ports: + - containerPort: {{ .Values.jobmanager.rpc_port }} + name: rpc + - containerPort: {{ .Values.jobmanager.blob_port }} + name: blob + - containerPort: {{ .Values.jobmanager.query_port }} + name: query + - containerPort: {{ .Values.jobmanager.ui_port }} + name: ui + volumeMounts: + - name: flink-config-volume + mountPath: /opt/flink/conf/flink-conf.yaml + subPath: flink-conf.yaml + - name: flink-config-volume + mountPath: /data/flink/conf/base-config.conf + subPath: base-config.conf + - name: flink-config-volume + mountPath: /data/flink/conf/{{ .Release.Name }}.conf + subPath: {{ .Release.Name }}.conf + - name: flink-config-volume + mountPath: /opt/flink/conf/log4j-console.properties + subPath: log4j-console.properties + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }}-taskmanager + namespace: {{ .Values.namespace }} +spec: + replicas: {{ .Values.taskmanager.replicas }} + selector: + matchLabels: + app: flink + component: {{ .Release.Name }}-taskmanager + template: + metadata: + labels: + app: flink + component: {{ .Release.Name }}-taskmanager + spec: + volumes: + - name: flink-config-volume + configMap: + name: {{ .Release.Name }}-config + items: + - key: flink-conf + path: flink-conf.yaml + - key: log4j_console_properties + path: log4j-console.properties + imagePullSecrets: + - name: {{ .Values.imagepullsecrets }} + containers: + - name: {{ .Release.Name }}-taskmanager + image: "{{ .Values.dockerhub }}/{{ .Values.repository }}:{{ .Values.image_tag }}" + imagePullPolicy: Always + resources: + requests: + cpu: "{{ .Values.taskmanager.cpu_requests }}" + workingDir: {{ .Values.taskmanager.flink_work_dir }} + command: ["/opt/flink/bin/taskmanager.sh"] + args: ["start-foreground", +{{- if eq .Values.checkpoint_store_type "azure" }} + "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "s3" }} + "-Ds3.access-key={{ .Values.s3_access_key }}", + "-Ds3.secret-key={{ .Values.s3_secret_key }}", + "-Ds3.endpoint={{ .Values.s3_endpoint }}", + "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", +{{- end }} + "-Dweb.submit.enable=false", + "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", + "-Dmetrics.reporter.prom.host={{ .Release.Name }}-taskmanager", + "-Dmetrics.reporter.prom.port=9251-9260", + "-Djobmanager.rpc.address={{ .Release.Name }}-jobmanager", + "-Dtaskmanager.rpc.port={{ .Values.taskmanager.rpc_port }}"] + ports: + - containerPort: {{ .Values.taskmanager.rpc_port }} + name: rpc + {{- if .Values.healthcheck }} + livenessProbe: +{{ toYaml .Values.livenessProbe | indent 10 }} + {{- end }} + volumeMounts: + - name: flink-config-volume + mountPath: /opt/flink/conf/flink-conf.yaml + subPath: flink-conf.yaml + - name: flink-config-volume + mountPath: /opt/flink/conf/log4j-console.properties + subPath: log4j-console.properties + +{{- $name := .Release.Name }} +{{- $prop := (index .Values.scale_properties $name)}} +{{- if $prop.enabled}} +--- +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: {{ .Release.Name }}-taskmanager-hpa + namespace: {{ .Values.namespace }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ .Release.Name }}-taskmanager + minReplicas: {{ $prop.min_replica }} + maxReplicas: {{ $prop.max_replica }} + metrics: + - type: External + external: + metricName: {{ .Release.Name }}_kafka_consumergroup_lag_sum + targetValue: "{{ $prop.scale_target_value }}" +{{- end }} \ No newline at end of file diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 8a0476c32b..7b1dd3b833 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -291,7 +291,10 @@ telemetry-extractor: fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} fs.s3.path.style.access: {{s3_path_style_access}} - + s3.access-key: {{s3_storage_key}} + s3.secret-key: {{s3_storage_secret}} + s3.endpoint: {{s3_storage_endpoint}} + s3.path.style.access: {{s3_path_style_access}} From 5ced03952f8c8bc8a25a66f2e3f2412bbb32db13 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 09:01:55 +1100 Subject: [PATCH 041/161] temporary change Signed-off-by: Deepak Devadathan --- .../flink_job_deployment.yaml.disabled | 245 ------------------ 1 file changed, 245 deletions(-) delete mode 100644 kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml.disabled diff --git a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml.disabled b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml.disabled deleted file mode 100644 index 10e6b62181..0000000000 --- a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml.disabled +++ /dev/null @@ -1,245 +0,0 @@ ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ .Release.Name }}-jobmanager - namespace: {{ .Values.namespace }} - labels: - app: flink - component: {{ .Release.Name }}-jobmanager -spec: - type: ClusterIP - ports: - - name: rpc - port: {{ .Values.jobmanager.rpc_port }} - - name: blob - port: {{ .Values.jobmanager.blob_port }} - - name: query - port: {{ .Values.jobmanager.query_port }} - - name: ui - port: {{ .Values.jobmanager.ui_port }} - - name: prom - port: {{ .Values.jobmanager.prom_port }} - selector: - app: flink - component: {{ .Release.Name }}-jobmanager - ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ .Release.Name }}-jobmanager-webui - namespace: {{ .Values.namespace }} -{{- if .Values.service.annotations }} -{{- with .Values.service.annotations }} - annotations: -{{ toYaml . | indent 4 }} -{{- end }} -{{- end }} -spec: - {{- if eq .Values.service.type "ClusterIP" }} - type: ClusterIP - {{- end }} - {{- if eq .Values.service.type "LoadBalancer" }} - type: LoadBalancer - {{- end }} - ports: - - name: rest - port: {{ .Values.rest_port }} - protocol: TCP - targetPort: {{ .Values.resttcp_port }} - selector: - app: flink - component: {{ .Release.Name }}-jobmanager - ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ .Release.Name }}-taskmanager-prometheus - namespace: {{ .Values.namespace }} - labels: - app: flink - component: {{ .Release.Name }}-taskmanager -spec: - type: ClusterIP - ports: - - name: prom - port: {{ .Values.taskmanager.prom_port }} - selector: - app: flink - component: {{ .Release.Name }}-taskmanager - ---- -apiVersion: batch/v1 -kind: Job -metadata: - name: {{ .Release.Name }}-jobmanager - namespace: {{ .Values.namespace }} -spec: - template: - metadata: - labels: - app: flink - component: {{ .Release.Name }}-jobmanager - annotations: - prometheus.io/scrape: 'true' - prometheus.io/port: "{{ .Values.jobmanager.prom_port }}" - spec: - volumes: - - name: flink-config-volume - configMap: - name: {{ .Release.Name }}-config - items: - - key: flink-conf - path: flink-conf.yaml - - key: base-config - path: base-config.conf - - key: {{ .Release.Name }} - path: {{ .Release.Name }}.conf - - key: log4j_console_properties - path: log4j-console.properties - restartPolicy: OnFailure - imagePullSecrets: - - name: {{ .Values.imagepullsecrets }} - containers: - - name: {{ .Release.Name }}-jobmanager - image: "{{ .Values.dockerhub }}/{{ .Values.repository }}:{{ .Values.image_tag }}" - imagePullPolicy: Always - workingDir: /opt/flink - command: ["/opt/flink/bin/standalone-job.sh"] - args: ["start-foreground", - "--job-classname={{ .Values.job_classname }}", -{{- if eq .Values.checkpoint_store_type "azure" }} - "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", -{{- end }} -{{- if eq .Values.checkpoint_store_type "s3" }} - "-Ds3.access-key={{ .Values.s3_access_key }}", - "-Ds3.secret-key={{ .Values.s3_secret_key }}", - "-Ds3.endpoint={{ .Values.s3_endpoint }}", - "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", -{{- end }} - "-Dweb.submit.enable=false", - "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", - "-Dmetrics.reporter.prom.port={{ .Values.jobmanager.prom_port }}", - "-Djobmanager.rpc.address={{ .Release.Name }}-jobmanager", - "-Djobmanager.rpc.port={{ .Values.jobmanager.rpc_port }}", - "-Dparallelism.default=1", - "-Dblob.server.port={{ .Values.jobmanager.blob_port }}", - "-Dqueryable-state.server.ports={{ .Values.jobmanager.query_port }}", - "--config.file.path", - "/data/flink/conf/{{ .Release.Name }}.conf"] - ports: - - containerPort: {{ .Values.jobmanager.rpc_port }} - name: rpc - - containerPort: {{ .Values.jobmanager.blob_port }} - name: blob - - containerPort: {{ .Values.jobmanager.query_port }} - name: query - - containerPort: {{ .Values.jobmanager.ui_port }} - name: ui - volumeMounts: - - name: flink-config-volume - mountPath: /opt/flink/conf/flink-conf.yaml - subPath: flink-conf.yaml - - name: flink-config-volume - mountPath: /data/flink/conf/base-config.conf - subPath: base-config.conf - - name: flink-config-volume - mountPath: /data/flink/conf/{{ .Release.Name }}.conf - subPath: {{ .Release.Name }}.conf - - name: flink-config-volume - mountPath: /opt/flink/conf/log4j-console.properties - subPath: log4j-console.properties - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ .Release.Name }}-taskmanager - namespace: {{ .Values.namespace }} -spec: - replicas: {{ .Values.taskmanager.replicas }} - selector: - matchLabels: - app: flink - component: {{ .Release.Name }}-taskmanager - template: - metadata: - labels: - app: flink - component: {{ .Release.Name }}-taskmanager - spec: - volumes: - - name: flink-config-volume - configMap: - name: {{ .Release.Name }}-config - items: - - key: flink-conf - path: flink-conf.yaml - - key: log4j_console_properties - path: log4j-console.properties - imagePullSecrets: - - name: {{ .Values.imagepullsecrets }} - containers: - - name: {{ .Release.Name }}-taskmanager - image: "{{ .Values.dockerhub }}/{{ .Values.repository }}:{{ .Values.image_tag }}" - imagePullPolicy: Always - resources: - requests: - cpu: "{{ .Values.taskmanager.cpu_requests }}" - workingDir: {{ .Values.taskmanager.flink_work_dir }} - command: ["/opt/flink/bin/taskmanager.sh"] - args: ["start-foreground", -{{- if eq .Values.checkpoint_store_type "azure" }} - "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", -{{- end }} -{{- if eq .Values.checkpoint_store_type "s3" }} - "-Ds3.access-key={{ .Values.s3_access_key }}", - "-Ds3.secret-key={{ .Values.s3_secret_key }}", - "-Ds3.endpoint={{ .Values.s3_endpoint }}", - "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", -{{- end }} - "-Dweb.submit.enable=false", - "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", - "-Dmetrics.reporter.prom.host={{ .Release.Name }}-taskmanager", - "-Dmetrics.reporter.prom.port=9251-9260", - "-Djobmanager.rpc.address={{ .Release.Name }}-jobmanager", - "-Dtaskmanager.rpc.port={{ .Values.taskmanager.rpc_port }}"] - ports: - - containerPort: {{ .Values.taskmanager.rpc_port }} - name: rpc - {{- if .Values.healthcheck }} - livenessProbe: -{{ toYaml .Values.livenessProbe | indent 10 }} - {{- end }} - volumeMounts: - - name: flink-config-volume - mountPath: /opt/flink/conf/flink-conf.yaml - subPath: flink-conf.yaml - - name: flink-config-volume - mountPath: /opt/flink/conf/log4j-console.properties - subPath: log4j-console.properties - -{{- $name := .Release.Name }} -{{- $prop := (index .Values.scale_properties $name)}} -{{- if $prop.enabled}} ---- -apiVersion: autoscaling/v2beta1 -kind: HorizontalPodAutoscaler -metadata: - name: {{ .Release.Name }}-taskmanager-hpa - namespace: {{ .Values.namespace }} -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: {{ .Release.Name }}-taskmanager - minReplicas: {{ $prop.min_replica }} - maxReplicas: {{ $prop.max_replica }} - metrics: - - type: External - external: - metricName: {{ .Release.Name }}_kafka_consumergroup_lag_sum - targetValue: "{{ $prop.scale_target_value }}" -{{- end }} \ No newline at end of file From 426036fbd3d3218d8862767cd4749af7e77aaa75 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 09:03:22 +1100 Subject: [PATCH 042/161] temporary change Signed-off-by: Deepak Devadathan --- .../datapipeline_jobs/templates/flink_job_deployment.yaml | 6 ++++++ kubernetes/helm_charts/datapipeline_jobs/values.j2 | 4 ---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml index c794c7e702..10e6b62181 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml +++ b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml @@ -112,6 +112,12 @@ spec: "--job-classname={{ .Values.job_classname }}", {{- if eq .Values.checkpoint_store_type "azure" }} "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "s3" }} + "-Ds3.access-key={{ .Values.s3_access_key }}", + "-Ds3.secret-key={{ .Values.s3_secret_key }}", + "-Ds3.endpoint={{ .Values.s3_endpoint }}", + "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 7b1dd3b833..d2a82456f3 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -291,10 +291,6 @@ telemetry-extractor: fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} fs.s3.path.style.access: {{s3_path_style_access}} - s3.access-key: {{s3_storage_key}} - s3.secret-key: {{s3_storage_secret}} - s3.endpoint: {{s3_storage_endpoint}} - s3.path.style.access: {{s3_path_style_access}} From 3ffd0e4ce11de5d2b3bbb023cd4e20eec53c63aa Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 09:59:59 +1100 Subject: [PATCH 043/161] added hard code value for region Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index d2a82456f3..1cb6466932 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -290,7 +290,9 @@ telemetry-extractor: fs.s3.access.key: {{s3_storage_key}} fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} + fs.s3.endpoint.region: ap-hyderabad-1 fs.s3.path.style.access: {{s3_path_style_access}} + From 9a8e39f71202e3ed50079e2eb3fbe72be07df4e4 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 10:02:22 +1100 Subject: [PATCH 044/161] removed the hardcode region for flink Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 1 - 1 file changed, 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 1cb6466932..b46c2b2d81 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -290,7 +290,6 @@ telemetry-extractor: fs.s3.access.key: {{s3_storage_key}} fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} - fs.s3.endpoint.region: ap-hyderabad-1 fs.s3.path.style.access: {{s3_path_style_access}} From 6d0fb196de38b08fdca2e46b79b56b1e1e1f0c34 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 10:03:37 +1100 Subject: [PATCH 045/161] added a trailing / Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index b46c2b2d81..ab46f36a65 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -159,7 +159,7 @@ base_config: | base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} #base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} - base.url = "s3://dev-data-store/checkpoint" + base.url = "s3://dev-data-store/checkpoint/" {% endif %} } } From 4f6a9b07f1faed62691762dd8ba4219d7c6d7b98 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 10:05:58 +1100 Subject: [PATCH 046/161] removed trailing slash Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index ab46f36a65..b46c2b2d81 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -159,7 +159,7 @@ base_config: | base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} #base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} - base.url = "s3://dev-data-store/checkpoint/" + base.url = "s3://dev-data-store/checkpoint" {% endif %} } } From 7c2482838f58843d4ff145598c809c978b9cb60d Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 10:06:41 +1100 Subject: [PATCH 047/161] changed base.url for s3 Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index b46c2b2d81..e4086dbde0 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -158,8 +158,8 @@ base_config: | {% if checkpoint_store_type == "azure" %} base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} - #base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} - base.url = "s3://dev-data-store/checkpoint" + base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} + #base.url = "s3://dev-data-store/checkpoint" {% endif %} } } From d149abc49e89944531d8ccc16bec89b599d188e6 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 10:09:23 +1100 Subject: [PATCH 048/161] hardcode base.url Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index e4086dbde0..ed97213052 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -158,8 +158,8 @@ base_config: | {% if checkpoint_store_type == "azure" %} base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} - base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} - #base.url = "s3://dev-data-store/checkpoint" + # base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} + base.url = "s3://dev-data-store/checkpoint" {% endif %} } } From 3b55ac1871cb84787d93c6d552cfd63f475e7718 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 10:52:49 +1100 Subject: [PATCH 049/161] debug level only for hadoop Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index ed97213052..7cfbf88ef3 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -117,7 +117,8 @@ log4j_console_properties: | logger.kafka.name= org.apache.kafka logger.kafka.level = {{ flink_libraries_log_level | default(INFO) }} logger.hadoop.name = org.apache.hadoop - logger.hadoop.level = {{ flink_libraries_log_level | default(INFO) }} + # logger.hadoop.level = {{ flink_libraries_log_level | default(INFO) }} + logger.hadoop.level = {{ flink_hadoop_log_level | default(INFO) }} logger.zookeeper.name = org.apache.zookeeper logger.zookeeper.level = {{ flink_libraries_log_level | default(INFO) }} From ac6d377a9a37e043f127c014e41615c2a75aef5c Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 11:47:33 +1100 Subject: [PATCH 050/161] using s3a Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 7cfbf88ef3..f301b0651a 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -160,7 +160,7 @@ base_config: | base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} # base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} - base.url = "s3://dev-data-store/checkpoint" + base.url = "s3a://dev-data-store/checkpoint" {% endif %} } } From 60a50c3ebd13813d21dccb869dbd4c3b610d0dfc Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 11:49:53 +1100 Subject: [PATCH 051/161] using s3 url Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index f301b0651a..7cfbf88ef3 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -160,7 +160,7 @@ base_config: | base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} # base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} - base.url = "s3a://dev-data-store/checkpoint" + base.url = "s3://dev-data-store/checkpoint" {% endif %} } } From 033b16074b2ed541cdd8a177d20e08ef1143f407 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 13:07:06 +1100 Subject: [PATCH 052/161] removed hadoop logging Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 7cfbf88ef3..e9df6b5fcb 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -117,8 +117,8 @@ log4j_console_properties: | logger.kafka.name= org.apache.kafka logger.kafka.level = {{ flink_libraries_log_level | default(INFO) }} logger.hadoop.name = org.apache.hadoop - # logger.hadoop.level = {{ flink_libraries_log_level | default(INFO) }} - logger.hadoop.level = {{ flink_hadoop_log_level | default(INFO) }} + logger.hadoop.level = {{ flink_libraries_log_level | default(INFO) }} + # logger.hadoop.level = {{ flink_hadoop_log_level | default(INFO) }} logger.zookeeper.name = org.apache.zookeeper logger.zookeeper.level = {{ flink_libraries_log_level | default(INFO) }} From 2a5050f1a8e456525e2a4f6a5bda453da1c7b9f3 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 14:05:43 +1100 Subject: [PATCH 053/161] testing with sse-c values Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index e9df6b5fcb..69e4c8a170 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -292,8 +292,8 @@ telemetry-extractor: fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} fs.s3.path.style.access: {{s3_path_style_access}} - - + fs.s3.server-side-encryption-algorithm: SSE-C + fs.s3a.server-side-encryption.key: SGVscCwgSSdtIHRyYXBwZWQgaW5zaWRlIGEgYmFzZS02NC1jb2RlYyE= pipeline-preprocessor: From 58dec826c1e3829137d2fc2e9b643401b8383aa4 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 14:08:06 +1100 Subject: [PATCH 054/161] corrected the typo Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 69e4c8a170..12e430c7ca 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -293,7 +293,7 @@ telemetry-extractor: fs.s3.endpoint: {{s3_storage_endpoint}} fs.s3.path.style.access: {{s3_path_style_access}} fs.s3.server-side-encryption-algorithm: SSE-C - fs.s3a.server-side-encryption.key: SGVscCwgSSdtIHRyYXBwZWQgaW5zaWRlIGEgYmFzZS02NC1jb2RlYyE= + fs.s3.server-side-encryption.key: SGVscCwgSSdtIHRyYXBwZWQgaW5zaWRlIGEgYmFzZS02NC1jb2RlYyE= pipeline-preprocessor: From f53d043c2c1906edc7866f5c7e183d3237e1be30 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 14:37:09 +1100 Subject: [PATCH 055/161] hardcoded endpoint Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 12e430c7ca..d87633105a 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -290,10 +290,8 @@ telemetry-extractor: jobmanager.memory.process.size: {{ flink_job_names['telemetry-extractor'].jobmanager_process_memory }} fs.s3.access.key: {{s3_storage_key}} fs.s3.secret.key: {{s3_storage_secret}} - fs.s3.endpoint: {{s3_storage_endpoint}} + fs.s3.endpoint: https://apaccpt03.compat.objectstorage.ap-hyderabad-1.oraclecloud.com fs.s3.path.style.access: {{s3_path_style_access}} - fs.s3.server-side-encryption-algorithm: SSE-C - fs.s3.server-side-encryption.key: SGVscCwgSSdtIHRyYXBwZWQgaW5zaWRlIGEgYmFzZS02NC1jb2RlYyE= pipeline-preprocessor: From 898177c7f47218b86fc496fdfc28c1a6b510760a Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 14:37:35 +1100 Subject: [PATCH 056/161] hard coded endpoint url Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index d87633105a..799a10d454 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -290,7 +290,7 @@ telemetry-extractor: jobmanager.memory.process.size: {{ flink_job_names['telemetry-extractor'].jobmanager_process_memory }} fs.s3.access.key: {{s3_storage_key}} fs.s3.secret.key: {{s3_storage_secret}} - fs.s3.endpoint: https://apaccpt03.compat.objectstorage.ap-hyderabad-1.oraclecloud.com + fs.s3.endpoint: 'https://apaccpt03.compat.objectstorage.ap-hyderabad-1.oraclecloud.com' fs.s3.path.style.access: {{s3_path_style_access}} From 8a64d3e8037282ec10cd84a9932951cb99597171 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 14:39:37 +1100 Subject: [PATCH 057/161] endpoint as variable Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 799a10d454..cb20557cd9 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -290,7 +290,7 @@ telemetry-extractor: jobmanager.memory.process.size: {{ flink_job_names['telemetry-extractor'].jobmanager_process_memory }} fs.s3.access.key: {{s3_storage_key}} fs.s3.secret.key: {{s3_storage_secret}} - fs.s3.endpoint: 'https://apaccpt03.compat.objectstorage.ap-hyderabad-1.oraclecloud.com' + fs.s3.endpoint: {{s3_storage_endpoint}} fs.s3.path.style.access: {{s3_path_style_access}} From f451d4554cf5af00cee1518a9b2c603b75b51bb9 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 15:32:51 +1100 Subject: [PATCH 058/161] removed hadoop specific logging Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 1 - 1 file changed, 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index cb20557cd9..efcc3cdda9 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -118,7 +118,6 @@ log4j_console_properties: | logger.kafka.level = {{ flink_libraries_log_level | default(INFO) }} logger.hadoop.name = org.apache.hadoop logger.hadoop.level = {{ flink_libraries_log_level | default(INFO) }} - # logger.hadoop.level = {{ flink_hadoop_log_level | default(INFO) }} logger.zookeeper.name = org.apache.zookeeper logger.zookeeper.level = {{ flink_libraries_log_level | default(INFO) }} From 0a4380a36cdff95cd3bf3b859ddd7fed7faf2be5 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 15:53:22 +1100 Subject: [PATCH 059/161] added explicit region Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index efcc3cdda9..951b9772ba 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -290,6 +290,7 @@ telemetry-extractor: fs.s3.access.key: {{s3_storage_key}} fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} + fs.s3.endpoint.region: {{s3_region}} fs.s3.path.style.access: {{s3_path_style_access}} From 06a5bad926cc1fadb551468aeaf852869ed48ffc Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 16:04:06 +1100 Subject: [PATCH 060/161] removed region flag Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 1 - 1 file changed, 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 951b9772ba..efcc3cdda9 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -290,7 +290,6 @@ telemetry-extractor: fs.s3.access.key: {{s3_storage_key}} fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} - fs.s3.endpoint.region: {{s3_region}} fs.s3.path.style.access: {{s3_path_style_access}} From bb5fa355d4cb2db41b4a1ee359950c6363337b4a Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 16:21:17 +1100 Subject: [PATCH 061/161] changed oci specific end point for s3 Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index efcc3cdda9..2f413910df 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -8,7 +8,12 @@ azure_account: {{ azure_account }} azure_secret: {{ azure_secret }} s3_access_key: {{ s3_storage_key }} s3_secret_key: {{ s3_storage_secret }} +{% if cloud_service_provider == "oci" %} +s3_endpoint: {{ oci_flink_s3_storage_endpoint }} +{% else %} s3_endpoint: {{ s3_storage_endpoint }} +{% endif %} + s3_path_style_access: {{ s3_path_style_access }} serviceMonitor: @@ -159,7 +164,7 @@ base_config: | base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} # base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} - base.url = "s3://dev-data-store/checkpoint" + base.url = "s3://"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} {% endif %} } } From 0394d1be01798ade853e105ecbf24b4d90fb09d2 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 16:27:35 +1100 Subject: [PATCH 062/161] removed customization from flink-conf.yaml Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 5 ----- 1 file changed, 5 deletions(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 2f413910df..cbafb57c22 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -242,7 +242,6 @@ ingest-router: taskmanager.memory.process.size: {{ flink_job_names['ingest-router'].taskmanager_process_memory }} jobmanager.memory.process.size: {{ flink_job_names['ingest-router'].jobmanager_process_memory }} - telemetry-extractor: telemetry-extractor: |+ include file("/data/flink/conf/base-config.conf") @@ -292,10 +291,6 @@ telemetry-extractor: heartbeat.interval: 5000 taskmanager.memory.process.size: {{ flink_job_names['telemetry-extractor'].taskmanager_process_memory }} jobmanager.memory.process.size: {{ flink_job_names['telemetry-extractor'].jobmanager_process_memory }} - fs.s3.access.key: {{s3_storage_key}} - fs.s3.secret.key: {{s3_storage_secret}} - fs.s3.endpoint: {{s3_storage_endpoint}} - fs.s3.path.style.access: {{s3_path_style_access}} pipeline-preprocessor: From f808208a3f7ab9e15433c9c0b90667c7554ab243 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 1 Feb 2023 11:28:50 +1100 Subject: [PATCH 063/161] templated jinja templates Signed-off-by: Deepak Devadathan --- .../templates/cluster-config.json.j2 | 31 +++++++++++ .../templates/common.conf.j2 | 6 ++- .../templates/model-config.j2 | 51 ++++++++++--------- .../templates/model-config.json.j2 | 26 +++++----- .../templates/model-dock-config.j2 | 8 +-- 5 files changed, 81 insertions(+), 41 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 1a26514684..e899827fdb 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -1,3 +1,5 @@ + +{% if dp_object_store_type == "azure" %} { "jars": [ "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_core_artifact }}", @@ -25,3 +27,32 @@ "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } } +{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} +{ + "jars": [ + "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", + "s3n://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", + "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + ], + "file": "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + "files": [ + "s3n://{{ bucket }}/models-{{ model_version }}/application.conf" + ], + "className": "org.ekstep.analytics.job.JobExecutor", + "executorCores": {{ spark_cluster.executor_core }}, + "executorMemory": "{{ spark_cluster.executor_memory }}", + "numExecutors": {{ spark_cluster.num_executors }}, + "conf": { + "spark.sql.autoBroadcastJoinThreshold" : "-1", + "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", + "spark.scheduler.mode" : "FAIR", + "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", + "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", + "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + } +} +{% endif %} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index bde88ec9d4..bec3d21d81 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -251,8 +251,12 @@ dcetextbook.filename="DCE_textbook_data.csv" etbtextbook.filename="ETB_textbook_data.csv" etb.dialcode.druid.length={{ etb_dialcode_list_druid_length }} - +{% if dp_object_store_type == "azure" %} druid.report.default.storage="azure" +{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} +druid.report.default.storage="s3" +{% endif %} + druid.report.date.format="yyyy-MM-dd" druid.report.default.container="report-verification" diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 2bb0a042ea..8b238e8ef2 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -30,19 +30,24 @@ config() { if [ ! -z "$2" ]; then keyword=$2; fi case "$1" in "assessment-correction") - echo '{"search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"model":"org.sunbird.analytics.model.report.AssessmentCorrectionModel","modelParams":{"parallelization":200,"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","granularity":"all","aggregations":[{"name":"count","type":"count","fieldName":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"}],"filters":[{"type":"equals","dimension":"contentType","value":"SelfAssess"}],"descending":"false"},"fileOutputConfig":{"to":"file","params":{"file":"{{ analytics.home }}/assessment-correction/skippedEvents"}},"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"kafka","params":{"brokerList":"'$brokerIngestionList'","topic":"'$assessTopic'"}}],"parallelization":200,"appName":"Assessment Correction Model"}' + echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"model":"org.sunbird.analytics.model.report.AssessmentCorrectionModel","modelParams":{"parallelization":200,"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","granularity":"all","aggregations":[{"name":"count","type":"count","fieldName":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"}],"filters":[{"type":"equals","dimension":"contentType","value":"SelfAssess"}],"descending":"false"},"fileOutputConfig":{"to":"file","params":{"file":"{{ analytics.home }}/assessment-correction/skippedEvents"}},"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"kafka","params":{"brokerList":"'$brokerIngestionList'","topic":"'$assessTopic'"}}],"parallelization":200,"appName":"Assessment Correction Model"}' ;; "assessment-archival") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"store":"azure","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}' ;; "assessment-archived-removal") +{% if dp_object_store_type == "azure" %} echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"azure","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' ;; +{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"s3","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' + ;; +{% endif %} "collection-reconciliation-job") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CollectionReconciliationJob","modelParams":{"mode":"prodrun","brokerList":"{{ingestion_kafka_broker_host}}","topic":"{{env}}.issue.certificate.request","sparkCassandraConnectionHost":"{{ core_cassandra_host }}"},"parallelization":30,"appName":"CollectionReconciliationJob"}' ;; "collection-summary-report") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"], "contentType": "Course"},"fields":["identifier","name","organisation","channel"],"limit":10000}},"store":"azure","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"], "contentType": "Course"},"fields":["identifier","name","organisation","channel"],"limit":10000}},"store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' ;; "score-metric-migration-job") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.ScoreMetricMigrationJob","modelParams":{"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Score Metric Migration Job"}' @@ -51,34 +56,34 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.AssessmentScoreCorrectionJob","modelParams":{"assessment.score.correction.batches":"","cassandraReadConsistency":"QUORUM","cassandraWriteConsistency":"QUORUM","csvPath":"/mount/data/analytics/score_correction","isDryRunMode":true,"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":30,"appName":"Assessment Score Correction Job"}' ;; "course-batch-status-updater") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob","modelParams":{"store":"azure","sparkElasticsearchConnectionHost":"http://{{ single_node_es_host }}:9200","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","kpLearningBasePath":"http://{{groups['learning'][0]}}:8080/learning-service","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Course Batch Status Updater Job"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob","modelParams":{"store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"http://{{ single_node_es_host }}:9200","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","kpLearningBasePath":"http://{{groups['learning'][0]}}:8080/learning-service","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Course Batch Status Updater Job"}' ;; "collection-summary-report-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJobV2","modelParams":{"storageKeyConfig":"druid_storage_account_key","storageSecretConfig":"druid_storage_account_secret","batchSize":50,"generateForAllBatches":true,"contentFields":["identifier","name","organisation","channel","status","keywords","createdFor","medium","subject"],"contentStatus":["Live","Unlisted","Retired"],"store":"azure","specPath":"/mount/data/analytics/scripts/collection-summary-ingestion-spec.json","druidIngestionUrl":"'$druidIngestionURL'","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report V2"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJobV2","modelParams":{"storageKeyConfig":"druid_storage_account_key","storageSecretConfig":"druid_storage_account_secret","batchSize":50,"generateForAllBatches":true,"contentFields":["identifier","name","organisation","channel","status","keywords","createdFor","medium","subject"],"contentStatus":["Live","Unlisted","Retired"],"store":"{{dp_object_store_type}}","specPath":"/mount/data/analytics/scripts/collection-summary-ingestion-spec.json","druidIngestionUrl":"'$druidIngestionURL'","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report V2"}' ;; "uci-private-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob","modelParams":{"store":"azure","mode":"OnDemand","storageContainer":"reports","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UCI Private Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","storageContainer":"reports","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UCI Private Exhaust"}' ;; "uci-response-exhaust") - echo '{"search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"azure","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":"","storageContainer":"reports"},"parallelization":8,"appName":"UCI Response Exhaust"}' + echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"{{dp_object_store_type}}","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":"","storageContainer":"reports"},"parallelization":8,"appName":"UCI Response Exhaust"}' ;; "userinfo-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"azure","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' ;; "program-collection-summary-report") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"], "keywords":"'$keyword'", "store":"azure","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"], "keywords":"'$keyword'", "store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' ;; "response-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJob","modelParams":{"store":"azure","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust"}' ;; "response-exhaust-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"azure","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"azure","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"azure","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' ;; "druid_reports") echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.DruidQueryProcessingModel","modelParams":{"mode":"batch"},"parallelization":8,"appName":"Druid Reports"}' @@ -94,10 +99,10 @@ config() { ;; "wfs") echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"{{ dp_raw_telemetry_backup_location }}","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}", "apiVersion":"v2", "parallelization":200},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"} }],"parallelization":200,"appName":"Workflow Summarizer","deviceMapping":true}' - #echo '{"search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"unique/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"apiVersion":"v2"},"output":[{"to":"console","params":{"printEvent": false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"parallelization":8,"appName":"Workflow Summarizer","deviceMapping":true}' + #echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"apiVersion":"v2"},"output":[{"to":"console","params":{"printEvent": false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"parallelization":8,"appName":"Workflow Summarizer","deviceMapping":true}' ;; "video-streaming") - echo '{"search":{"type":"azure"},"model":"org.ekstep.analytics.job.VideoStreamingJob","modelParams":{"maxIterations":10},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Video Streaming Job","deviceMapping":false}' + echo '{"search":{"type":"{{ dp_object_store_type }}"},"model":"org.ekstep.analytics.job.VideoStreamingJob","modelParams":{"maxIterations":10},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Video Streaming Job","deviceMapping":false}' ;; "admin-user-reports") echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin User Reports","deviceMapping":false}' @@ -106,10 +111,10 @@ config() { echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminGeoReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin Geo Reports","deviceMapping":false}' ;; "telemetry-replay") - echo '{"search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"'$inputBucket'","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"TelemetryReplayJob","deviceMapping":false}' + echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"'$inputBucket'","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"TelemetryReplayJob","deviceMapping":false}' ;; "summary-replay") - echo '{"search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"SummaryReplayJob","deviceMapping":false}' + echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"SummaryReplayJob","deviceMapping":false}' ;; "content-rating-updater") echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.updater.UpdateContentRating","modelParams": {"startDate": "'$endDate'","endDate": "'$endDate'"},"output": [{"to":"console","params":{"printEvent":false}}],"parallelization": 8,"appName": "Content Rating Updater","deviceMapping": false}' @@ -118,25 +123,25 @@ config() { echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.ExperimentDefinitionModel","modelParams":{"sparkElasticsearchConnectionHost":"{{ lp_composite_search_host }}"},"output":[{"to":"elasticsearch","params":{"index":"experiment"}}],"parallelization":8,"appName":"Experiment-Definition","deviceMapping":false}' ;; "etb-metrics") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.ETBMetricsJob","modelParams":{"reportConfig":{"id":"etb_metrics","metrics":[],"labels":{"date":"Date","identifier":"Textbook ID","name":"Textbook Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","createdOn":"Created On","lastUpdatedOn":"Last Updated On","totalQRCodes":"Total number of QR codes","contentLinkedQR":"Number of QR codes with atleast 1 linked content","withoutContentQR":"Number of QR codes with no linked content","withoutContentT1":"Term 1 QR Codes with no linked content","withoutContentT2":"Term 2 QR Codes with no linked content","status":"Textbook Status","totalContentLinked":"Total content linked","totalQRLinked":"Total QR codes linked to content","totalQRNotLinked":"Total number of QR codes with no linked content","leafNodesCount":"Total number of leaf nodes","leafNodeUnlinked":"Number of leaf nodes with no content","l1Name":"Level 1 Name","l2Name":"Level 2 Name","l3Name":"Level 3 Name","l4Name":"Level 4 Name","l5Name":"Level 5 Name","dialcode":"QR Code","sum(scans)":"Total Scans","noOfContent":"Number of contents","nodeType":"Type of Node","term":"Term"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"dialcode_counts.csv","postContainer":"'$reportPostContainer'"}},"dialcodeReportConfig":{"id":"etb_metrics","metrics":[],"labels":{},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":1,"reportPath":"dialcode_counts.csv","rollupAge":"ACADEMIC_YEAR","rollupCol":"Date","rollupRange":10,"postContainer":"'$reportPostContainer'"}},"etbFileConfig":{"bucket":"'$reportPostContainer'","file":"dialcode_scans/dialcode_counts.csv"},"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"channel","aliasName":"channel"},{"fieldName":"identifier","aliasName":"identifier","type":"Extraction","outputType":"STRING","extractionFn":[{"type":"javascript","fn":"function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName":"name","aliasName":"name"},{"fieldName":"status","aliasName":"status"}],"filters":[{"type":"equals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live","Draft","Review"]}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"tenantConfig":{"tenantId":"","slugName":""},"store":"azure","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"ETB Metrics Model","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.ETBMetricsJob","modelParams":{"reportConfig":{"id":"etb_metrics","metrics":[],"labels":{"date":"Date","identifier":"Textbook ID","name":"Textbook Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","createdOn":"Created On","lastUpdatedOn":"Last Updated On","totalQRCodes":"Total number of QR codes","contentLinkedQR":"Number of QR codes with atleast 1 linked content","withoutContentQR":"Number of QR codes with no linked content","withoutContentT1":"Term 1 QR Codes with no linked content","withoutContentT2":"Term 2 QR Codes with no linked content","status":"Textbook Status","totalContentLinked":"Total content linked","totalQRLinked":"Total QR codes linked to content","totalQRNotLinked":"Total number of QR codes with no linked content","leafNodesCount":"Total number of leaf nodes","leafNodeUnlinked":"Number of leaf nodes with no content","l1Name":"Level 1 Name","l2Name":"Level 2 Name","l3Name":"Level 3 Name","l4Name":"Level 4 Name","l5Name":"Level 5 Name","dialcode":"QR Code","sum(scans)":"Total Scans","noOfContent":"Number of contents","nodeType":"Type of Node","term":"Term"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"dialcode_counts.csv","postContainer":"'$reportPostContainer'"}},"dialcodeReportConfig":{"id":"etb_metrics","metrics":[],"labels":{},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":1,"reportPath":"dialcode_counts.csv","rollupAge":"ACADEMIC_YEAR","rollupCol":"Date","rollupRange":10,"postContainer":"'$reportPostContainer'"}},"etbFileConfig":{"bucket":"'$reportPostContainer'","file":"dialcode_scans/dialcode_counts.csv"},"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"channel","aliasName":"channel"},{"fieldName":"identifier","aliasName":"identifier","type":"Extraction","outputType":"STRING","extractionFn":[{"type":"javascript","fn":"function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName":"name","aliasName":"name"},{"fieldName":"status","aliasName":"status"}],"filters":[{"type":"equals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live","Draft","Review"]}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"tenantConfig":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"ETB Metrics Model","deviceMapping":false}' ;; "course-enrollment-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.CourseEnrollmentJob","modelParams":{"reportConfig":{"id":"tpd_metrics","metrics":[],"labels":{"completionCount":"Completion Count","status":"Status","enrollmentCount":"Enrollment Count","courseName":"Course Name","batchName":"Batch Name"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"DAY","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"course_enrollment.csv"}},"esConfig":{"request":{"filters":{"objectType":["Content"],"contentType":["Course"],"identifier":[],"status":["Live"]},"limit":10000}},"store":"azure","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"TPD Course Enrollment Metrics Model","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.CourseEnrollmentJob","modelParams":{"reportConfig":{"id":"tpd_metrics","metrics":[],"labels":{"completionCount":"Completion Count","status":"Status","enrollmentCount":"Enrollment Count","courseName":"Course Name","batchName":"Batch Name"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"DAY","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"course_enrollment.csv"}},"esConfig":{"request":{"filters":{"objectType":["Content"],"contentType":["Course"],"identifier":[],"status":["Live"]},"limit":10000}},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"TPD Course Enrollment Metrics Model","deviceMapping":false}' ;; "course-consumption-report") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.model.report.CourseConsumptionJob","modelParams": {"esConfig": {"request": {"filters": {"objectType": ["Content"],"contentType": ["Course"],"identifier": [],"status": ["Live"]}}},"reportConfig": {"id": "tpd_metrics","labels": {"date": "Date","status": "Batch Status","timespent": "Timespent in mins","courseName": "Course Name","batchName": "Batch Name"},"dateRange": {"staticInterval": "LastDay","granularity": "all"},"metrics": [{"metric": "totalCoursePlays","label": "Total Course Plays (in mins)","druidQuery": {"queryType": "groupBy","dataSource": "summary-events","intervals":"LastDay","aggregations": [{"name": "sum__edata_time_spent","type": "doubleSum","fieldName": "edata_time_spent"}],"dimensions": [{"fieldName": "object_rollup_l1","aliasName": "courseId"}, {"fieldName": "uid","aliasName": "userId"}, {"fieldName": "context_cdata_id","aliasName": "batchId"}],"filters": [{"type": "equals","dimension": "eid","value": "ME_WORKFLOW_SUMMARY"}, {"type": "in","dimension": "dimensions_pdata_id","values": ["'$producerEnv'.app", "'$producerEnv'.portal"]}, {"type": "equals","dimension": "dimensions_type","value": "content"}, {"type": "equals","dimension": "dimensions_mode","value": "play"}, {"type": "equals","dimension": "context_cdata_type","value": "batch"}],"postAggregation": [{"type": "arithmetic","name": "timespent","fields": {"leftField": "sum__edata_time_spent","rightField": 60,"rightFieldType": "constant"},"fn": "/"}],"descending": "false"}}],"output": [{"type": "csv","metrics": ["timespent"],"dims": []}],"queryType": "groupBy"},"store": "azure","format":"csv","key": "druid-reports/","filePath": "druid-reports/","container":"'$bucket'","folderPrefix": ["slug", "reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "TPD Course Consumption Metrics Model","deviceMapping": false}' + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.model.report.CourseConsumptionJob","modelParams": {"esConfig": {"request": {"filters": {"objectType": ["Content"],"contentType": ["Course"],"identifier": [],"status": ["Live"]}}},"reportConfig": {"id": "tpd_metrics","labels": {"date": "Date","status": "Batch Status","timespent": "Timespent in mins","courseName": "Course Name","batchName": "Batch Name"},"dateRange": {"staticInterval": "LastDay","granularity": "all"},"metrics": [{"metric": "totalCoursePlays","label": "Total Course Plays (in mins)","druidQuery": {"queryType": "groupBy","dataSource": "summary-events","intervals":"LastDay","aggregations": [{"name": "sum__edata_time_spent","type": "doubleSum","fieldName": "edata_time_spent"}],"dimensions": [{"fieldName": "object_rollup_l1","aliasName": "courseId"}, {"fieldName": "uid","aliasName": "userId"}, {"fieldName": "context_cdata_id","aliasName": "batchId"}],"filters": [{"type": "equals","dimension": "eid","value": "ME_WORKFLOW_SUMMARY"}, {"type": "in","dimension": "dimensions_pdata_id","values": ["'$producerEnv'.app", "'$producerEnv'.portal"]}, {"type": "equals","dimension": "dimensions_type","value": "content"}, {"type": "equals","dimension": "dimensions_mode","value": "play"}, {"type": "equals","dimension": "context_cdata_type","value": "batch"}],"postAggregation": [{"type": "arithmetic","name": "timespent","fields": {"leftField": "sum__edata_time_spent","rightField": 60,"rightFieldType": "constant"},"fn": "/"}],"descending": "false"}}],"output": [{"type": "csv","metrics": ["timespent"],"dims": []}],"queryType": "groupBy"},"store": "{{ dp_object_store_type }}","format":"csv","key": "druid-reports/","filePath": "druid-reports/","container":"'$bucket'","folderPrefix": ["slug", "reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "TPD Course Consumption Metrics Model","deviceMapping": false}' ;; "textbook-progress-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.TextBookProgressModel","modelParams":{"reportConfig":{"id":"content_progress_metrics","metrics":[],"labels":{"board":"Board","medium":"Medium","gradeLevel":"Grade","subject":"Subject","resourceType":"Content Type","totalContent": "Total Contents","live":"Live","review":"Review","draft":"Draft","unlisted":"Limited Sharing","application_ecml":"Created on Diksha","video_youtube":"YouTube Content","video_mp4":"Uploaded Videos","application_pdf":"Text Content","application_html":"Uploaded Interactive Content","identifier":"Content ID","creator":"Created By","createdOn":"Creation Date","lastPublishDate":"Last Publish Date","status":"Status","pkgVersion":"Number of times Published","lastPublishedOn":"Pending in current status since","pendingInCurrentStatus":"Pending in current status since"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"content_progress_metrics.csv","postContainer":"'$reportPostContainer'"}},"filter":{"tenantId":"","slugName":""},"store":"azure","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Textbook Progress Metrics Model","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.TextBookProgressModel","modelParams":{"reportConfig":{"id":"content_progress_metrics","metrics":[],"labels":{"board":"Board","medium":"Medium","gradeLevel":"Grade","subject":"Subject","resourceType":"Content Type","totalContent": "Total Contents","live":"Live","review":"Review","draft":"Draft","unlisted":"Limited Sharing","application_ecml":"Created on Diksha","video_youtube":"YouTube Content","video_mp4":"Uploaded Videos","application_pdf":"Text Content","application_html":"Uploaded Interactive Content","identifier":"Content ID","creator":"Created By","createdOn":"Creation Date","lastPublishDate":"Last Publish Date","status":"Status","pkgVersion":"Number of times Published","lastPublishedOn":"Pending in current status since","pendingInCurrentStatus":"Pending in current status since"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"content_progress_metrics.csv","postContainer":"'$reportPostContainer'"}},"filter":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Textbook Progress Metrics Model","deviceMapping":false}' ;; "audit-metrics-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.MetricsAuditJob","modelParams":{"auditConfig":[{"name":"denorm","search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"telemetry-denormalized/raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]},"filters":[{"name":"flags.user_data_retrieved","operator":"EQ","value":true},{"name":"flags.content_data_retrieved","operator":"EQ","value":true},{"name":"flags.device_data_retrieved","operator":"EQ","value":true},{"name":"flags.dialcode_data_retrieved","operator":"EQ","value":true},{"name":"flags.collection_data_retrieved","operator":"EQ","value":true},{"name":"flags.derived_location_retrieved","operator":"EQ","value":true}]},{"name":"failed","search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"failed/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"unique","search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"unique/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"raw","search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"channel-raw","search":{"type":"azure","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/raw/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"channel-summary","search":{"type":"azure","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/summary/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"derived","search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"telemetry-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"telemetry-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}},{"name":"summary-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"summary-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}}]},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$metricsTopic'"}}],"parallelization":8,"appName":"Metrics Audit"}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.MetricsAuditJob","modelParams":{"auditConfig":[{"name":"denorm","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"telemetry-denormalized/raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]},"filters":[{"name":"flags.user_data_retrieved","operator":"EQ","value":true},{"name":"flags.content_data_retrieved","operator":"EQ","value":true},{"name":"flags.device_data_retrieved","operator":"EQ","value":true},{"name":"flags.dialcode_data_retrieved","operator":"EQ","value":true},{"name":"flags.collection_data_retrieved","operator":"EQ","value":true},{"name":"flags.derived_location_retrieved","operator":"EQ","value":true}]},{"name":"failed","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"failed/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"unique","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"channel-raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/raw/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"channel-summary","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/summary/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"derived","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"telemetry-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"telemetry-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}},{"name":"summary-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"summary-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}}]},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$metricsTopic'"}}],"parallelization":8,"appName":"Metrics Audit"}' ;; "sourcing-metrics") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","primaryCategory":"Collection Category","identifier": "Collection ID","name": "Collection Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Folder Name","totalChapters": "Total number of first level folders","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "in","dimension": "primaryCategory","values": ["Digital Textbook", "Course", "Content Playlist","Question paper","Question Paper"]}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "azure","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","primaryCategory":"Collection Category","identifier": "Collection ID","name": "Collection Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Folder Name","totalChapters": "Total number of first level folders","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "in","dimension": "primaryCategory","values": ["Digital Textbook", "Course", "Content Playlist","Question paper","Question Paper"]}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "{{ dp_object_store_type }}","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' ;; "druid-dataset") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.OnDemandDruidExhaustJob","modelParams":{"store":"azure","container":"'$reportPostContainer'","key":"ml_reports/","format":"csv"},"output":[{"to": "console","params": {"printEvent": false}}],"parallelization":8,"appName":"ML Druid Data Model"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.OnDemandDruidExhaustJob","modelParams":{"store":"{{ dp_object_store_type }}","container":"'$reportPostContainer'","key":"ml_reports/","format":"csv"},"output":[{"to": "console","params": {"printEvent": false}}],"parallelization":8,"appName":"ML Druid Data Model"}' ;; "*") echo "Unknown model code" diff --git a/ansible/roles/data-products-deploy/templates/model-config.json.j2 b/ansible/roles/data-products-deploy/templates/model-config.json.j2 index 4594a1978a..a3569c7f46 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.json.j2 @@ -1,7 +1,7 @@ { "wfs": { "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", @@ -24,7 +24,7 @@ }, "output": [ { - "to": "azure", + "to": "{{dp_object_store_type}}", "params": { "bucket": "{{ bucket }}", "key": "{{ job_manager_tmp_dir }}/wfs/$(date --date yesterday '+%Y-%m-%d')" @@ -44,7 +44,7 @@ }, "video-streaming": { "search": { - "type": "azure" + "type": "{{dp_object_store_type}}" }, "model": "org.ekstep.analytics.job.VideoStreamingJob", "modelParams": { @@ -297,7 +297,7 @@ "tenantId": "", "slugName": "" }, - "store": "azure", + "store": "{{dp_object_store_type}}", "format": "csv", "key": "druid-reports/", "filePath": "druid-reports/", @@ -346,7 +346,7 @@ "limit": 10000 } }, - "store": "azure", + "store": "{{dp_object_store_type}}", "format":"csv", "key": "druid-reports/", "filePath": "druid-reports/", @@ -457,7 +457,7 @@ }], "queryType": "groupBy" }, - "store": "azure", + "store": "{{dp_object_store_type}}", "format":"csv", "key": "druid-reports/", "filePath": "druid-reports/", @@ -486,7 +486,7 @@ { "name": "denorm", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", @@ -532,7 +532,7 @@ { "name": "failed", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", @@ -546,7 +546,7 @@ { "name": "unique", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", @@ -560,7 +560,7 @@ { "name": "raw", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", @@ -574,7 +574,7 @@ { "name": "channel-raw", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "folder": true, @@ -589,7 +589,7 @@ { "name": "channel-summary", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "folder": true, @@ -604,7 +604,7 @@ { "name": "derived", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", diff --git a/ansible/roles/data-products-deploy/templates/model-dock-config.j2 b/ansible/roles/data-products-deploy/templates/model-dock-config.j2 index 20d82dbfb5..f720f4687e 100644 --- a/ansible/roles/data-products-deploy/templates/model-dock-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-dock-config.j2 @@ -15,16 +15,16 @@ config() { if [ ! -z "$3" ]; then inputBucket=$3; fi case "$1" in "content-details") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.sourcing.ContentDetailsReport","modelParams":{"tenantId":"","slug":"","reportConfig":{"id":"content_report","metrics":[],"labels":{"programName":"Project Name","programId":"Project ID","contentId":"Content/Question ID","contentName":"Content/Question Name","mimeType":"MimeType","chapterId":"Folder ID","contentStatus":"Content/Question Status","creator":"Creator Name","createdBy":"CreatedBy ID","date":"Date","identifier":"Collection/Question Set ID","name":"Collection/Question Set Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","board":"Board","grade":"Grade","chapters":"Chapter Name","status":"Textbook Status","objectType":"Object Type","primaryCategory":"Primary category","topic":"Topic","learningOutcome":"Learning Outcome","addedFromLibrary":"Added from library","contentType":"Content Type"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]},{"type":"json","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}]},"contentQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"unitIdentifiers","aliasName":"unitIdentifiers"},{"fieldName":"collectionId","aliasName":"collectionId"},{"fieldName":"createdBy","aliasName":"createdBy"},{"fieldName":"creator","aliasName":"creator"},{"fieldName":"mimeType","aliasName":"mimeType"},{"fieldName":"topic","aliasName":"topic"},{"fieldName":"learningOutcome","aliasName":"learningOutcome"},{"fieldName":"primaryCategory","aliasName":"contentType"}],"filters":[{"type":"notequals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live"]},{"type":"isnotnull","dimension":"collectionId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"textbookQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"programId","aliasName":"programId"},{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"board","aliasName":"board"},{"fieldName":"medium","aliasName":"medium"},{"fieldName":"gradeLevel","aliasName":"gradeLevel"},{"fieldName":"subject","aliasName":"subject"},{"fieldName":"status","aliasName":"status"},{"fieldName":"acceptedContents","aliasName":"acceptedContents"},{"fieldName":"acceptedContributions","aliasName":"acceptedContributions"},{"fieldName":"rejectedContents","aliasName":"rejectedContents"},{"fieldName":"rejectedContributions","aliasName":"rejectedContributions"},{"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName":"objectType","aliasName":"objectType"},{"fieldName":"reusedContributions","aliasName":"reusedContributions"}],"filters":[{"type":"in","dimension":"primaryCategory","values":["Digital Textbook","Course","Content Playlist","Question paper","Question Paper","Exam Question Set","Practice Set","Demo Practice Question Set"]},{"type":"isnotnull","dimension":"programId"},{"type":"in","dimension":"status","values":["Draft"]},{"type":"equals","dimension":"channel","value":"channelId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"store":"azure","storageKeyConfig":"azure_storage_key","storageSecretConfig":"azure_storage_secret","storageContainer":"'$reportPostContainer'","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$reportPostContainer'","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Content Report Job","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.sourcing.ContentDetailsReport","modelParams":{"tenantId":"","slug":"","reportConfig":{"id":"content_report","metrics":[],"labels":{"programName":"Project Name","programId":"Project ID","contentId":"Content/Question ID","contentName":"Content/Question Name","mimeType":"MimeType","chapterId":"Folder ID","contentStatus":"Content/Question Status","creator":"Creator Name","createdBy":"CreatedBy ID","date":"Date","identifier":"Collection/Question Set ID","name":"Collection/Question Set Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","board":"Board","grade":"Grade","chapters":"Chapter Name","status":"Textbook Status","objectType":"Object Type","primaryCategory":"Primary category","topic":"Topic","learningOutcome":"Learning Outcome","addedFromLibrary":"Added from library","contentType":"Content Type"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]},{"type":"json","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}]},"contentQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"unitIdentifiers","aliasName":"unitIdentifiers"},{"fieldName":"collectionId","aliasName":"collectionId"},{"fieldName":"createdBy","aliasName":"createdBy"},{"fieldName":"creator","aliasName":"creator"},{"fieldName":"mimeType","aliasName":"mimeType"},{"fieldName":"topic","aliasName":"topic"},{"fieldName":"learningOutcome","aliasName":"learningOutcome"},{"fieldName":"primaryCategory","aliasName":"contentType"}],"filters":[{"type":"notequals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live"]},{"type":"isnotnull","dimension":"collectionId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"textbookQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"programId","aliasName":"programId"},{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"board","aliasName":"board"},{"fieldName":"medium","aliasName":"medium"},{"fieldName":"gradeLevel","aliasName":"gradeLevel"},{"fieldName":"subject","aliasName":"subject"},{"fieldName":"status","aliasName":"status"},{"fieldName":"acceptedContents","aliasName":"acceptedContents"},{"fieldName":"acceptedContributions","aliasName":"acceptedContributions"},{"fieldName":"rejectedContents","aliasName":"rejectedContents"},{"fieldName":"rejectedContributions","aliasName":"rejectedContributions"},{"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName":"objectType","aliasName":"objectType"},{"fieldName":"reusedContributions","aliasName":"reusedContributions"}],"filters":[{"type":"in","dimension":"primaryCategory","values":["Digital Textbook","Course","Content Playlist","Question paper","Question Paper","Exam Question Set","Practice Set","Demo Practice Question Set"]},{"type":"isnotnull","dimension":"programId"},{"type":"in","dimension":"status","values":["Draft"]},{"type":"equals","dimension":"channel","value":"channelId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"store":"{{dp_object_store_type}}","storageKeyConfig":"azure_storage_key","storageSecretConfig":"azure_storage_secret","storageContainer":"'$reportPostContainer'","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$reportPostContainer'","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Content Report Job","deviceMapping":false}' ;; "sourcing-summary-report") - echo '{"search": {"type": "none"}, "model": "org.ekstep.analytics.job.report.SourcingSummaryReport", "modelParams": {"storageKeyConfig":"druid_storage_account_key", "storageSecretConfig":"druid_storage_account_secret", "dataSource": "sourcing-summary-snapshot", "druidHost": "'$druidRollupHost'", "druidSegmentUrl":"'$druidRollupHost'/druid/coordinator/v1/metadata/datasources/sourcing-model-snapshot/segments", "deleteSegmentUrl": "'$druidRollupHost'/druid/coordinator/v1/datasources/sourcing-model-snapshot/segments/", "druidIngestionUrl": "'$druidRollupHost'/druid/indexer/v1/task", "specPath": "/mount/data/analytics/scripts/sourcing-ingestion-spec.json", "dbName": "opensaberdb", "tables": {"programTable": "program", "nominationTable": "nomination"}, "druidQuery": {"queryType": "groupBy", "dataSource": "vdn-content-model-snapshot", "intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00", "aggregations": [{"name": "count", "type": "count"}], "dimensions": [{"fieldName": "primaryCategory", "aliasName": "primaryCategory"}, {"fieldName": "createdBy", "aliasName": "createdBy"}], "filters": [{"type": "equals", "dimension": "objectType", "value": "Content"}, {"type": "equals", "dimension": "sampleContent", "value": "false"}], "postAggregation": [], "descending": "false", "limitSpec": {"type": "default", "limit": 1000000, "columns": [{"dimension": "count", "direction": "descending"}]}}, "reportConfig": {"id": "sourcing", "metrics": [], "labels": {}, "output": [{"type": "json", "dims": ["identifier", "channel", "name"], "fileParameters": ["id", "dims"]}]}, "store": "azure", "format": "json", "folderPrefix": ["slug", "reportName"]}, "output": [{"to": "console", "params": {"printEvent": false}}], "parallelization": 8, "appName": "Sourcing Summary Report Job", "deviceMapping": false}' + echo '{"search": {"type": "none"}, "model": "org.ekstep.analytics.job.report.SourcingSummaryReport", "modelParams": {"storageKeyConfig":"druid_storage_account_key", "storageSecretConfig":"druid_storage_account_secret", "dataSource": "sourcing-summary-snapshot", "druidHost": "'$druidRollupHost'", "druidSegmentUrl":"'$druidRollupHost'/druid/coordinator/v1/metadata/datasources/sourcing-model-snapshot/segments", "deleteSegmentUrl": "'$druidRollupHost'/druid/coordinator/v1/datasources/sourcing-model-snapshot/segments/", "druidIngestionUrl": "'$druidRollupHost'/druid/indexer/v1/task", "specPath": "/mount/data/analytics/scripts/sourcing-ingestion-spec.json", "dbName": "opensaberdb", "tables": {"programTable": "program", "nominationTable": "nomination"}, "druidQuery": {"queryType": "groupBy", "dataSource": "vdn-content-model-snapshot", "intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00", "aggregations": [{"name": "count", "type": "count"}], "dimensions": [{"fieldName": "primaryCategory", "aliasName": "primaryCategory"}, {"fieldName": "createdBy", "aliasName": "createdBy"}], "filters": [{"type": "equals", "dimension": "objectType", "value": "Content"}, {"type": "equals", "dimension": "sampleContent", "value": "false"}], "postAggregation": [], "descending": "false", "limitSpec": {"type": "default", "limit": 1000000, "columns": [{"dimension": "count", "direction": "descending"}]}}, "reportConfig": {"id": "sourcing", "metrics": [], "labels": {}, "output": [{"type": "json", "dims": ["identifier", "channel", "name"], "fileParameters": ["id", "dims"]}]}, "store": "{{dp_object_store_type}}", "format": "json", "folderPrefix": ["slug", "reportName"]}, "output": [{"to": "console", "params": {"printEvent": false}}], "parallelization": 8, "appName": "Sourcing Summary Report Job", "deviceMapping": false}' ;; "funnel-report") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.job.report.FunnelReport","modelParams": {"contributionConfig": {"contentRequest": {"request": {"filters": {"programId": "programIdentifier","objectType": "content","status": ["Draft", "Live", "Review"],"mimeType": "application/vnd.ekstep.content-collection"},"fields": ["acceptedContents", "rejectedContents"],"limit": 10000}},"correctionsPendingRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": "Draft","prevStatus": "Live","programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}},"contributionRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": ["Live"],"programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}}},"reportConfig": {"id": "funnel_report","metrics": [],"labels": {"reportDate": "Report generation date","visitors": "No. of users opening the project","projectName": "Project Name","initiatedNominations": "No. of initiated nominations","rejectedNominations": "No. of rejected nominations","pendingNominations": "No. of nominations pending review","acceptedNominations": "No. of accepted nominations to the project","noOfContributors": "No. of contributors to the project","noOfContributions": "No. of contributions to the project","pendingContributions": "No. of contributions pending review","approvedContributions": "No. of approved contributions"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"store": "azure","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","folderPrefix": ["slug", "reportName"]},"sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","druidConfig": {"queryType": "timeseries","dataSource": "telemetry-events-syncts","intervals": "startdate/enddate","aggregations": [{"name": "visitors","type": "count","fieldName": "actor_id"}],"filters": [{"type": "equals","dimension": "context_cdata_id","value": "program_id"}, {"type": "equals","dimension": "edata_pageid","value": "contribution_project_contributions"}, {"type": "equals","dimension": "context_pdata_pid","value": "creation-portal.programs"}, {"type": "equals","dimension": "context_cdata_type","value": "project"}, {"type": "equals","dimension": "context_env","value": "creation-portal"}, {"type": "equals","dimension": "eid","value": "IMPRESSION"}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Funnel Report Job","deviceMapping": false}' + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.job.report.FunnelReport","modelParams": {"contributionConfig": {"contentRequest": {"request": {"filters": {"programId": "programIdentifier","objectType": "content","status": ["Draft", "Live", "Review"],"mimeType": "application/vnd.ekstep.content-collection"},"fields": ["acceptedContents", "rejectedContents"],"limit": 10000}},"correctionsPendingRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": "Draft","prevStatus": "Live","programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}},"contributionRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": ["Live"],"programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}}},"reportConfig": {"id": "funnel_report","metrics": [],"labels": {"reportDate": "Report generation date","visitors": "No. of users opening the project","projectName": "Project Name","initiatedNominations": "No. of initiated nominations","rejectedNominations": "No. of rejected nominations","pendingNominations": "No. of nominations pending review","acceptedNominations": "No. of accepted nominations to the project","noOfContributors": "No. of contributors to the project","noOfContributions": "No. of contributions to the project","pendingContributions": "No. of contributions pending review","approvedContributions": "No. of approved contributions"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"store": "{{dp_object_store_type}}","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","folderPrefix": ["slug", "reportName"]},"sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","druidConfig": {"queryType": "timeseries","dataSource": "telemetry-events-syncts","intervals": "startdate/enddate","aggregations": [{"name": "visitors","type": "count","fieldName": "actor_id"}],"filters": [{"type": "equals","dimension": "context_cdata_id","value": "program_id"}, {"type": "equals","dimension": "edata_pageid","value": "contribution_project_contributions"}, {"type": "equals","dimension": "context_pdata_pid","value": "creation-portal.programs"}, {"type": "equals","dimension": "context_cdata_type","value": "project"}, {"type": "equals","dimension": "context_env","value": "creation-portal"}, {"type": "equals","dimension": "eid","value": "IMPRESSION"}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Funnel Report Job","deviceMapping": false}' ;; "sourcing-metrics") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","identifier": "Textbook ID","name": "Textbook Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Chapter Name","totalChapters": "Total number of chapters (first level sections of ToC)","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "equals","dimension": "contentType","value": "TextBook"}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "azure","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","identifier": "Textbook ID","name": "Textbook Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Chapter Name","totalChapters": "Total number of chapters (first level sections of ToC)","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "equals","dimension": "contentType","value": "TextBook"}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "{{dp_object_store_type}}","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' ;; "*") echo "Unknown model code" From 7eff73f93b1d90949b45033ec6ac8929720967b3 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 1 Feb 2023 14:44:34 +1100 Subject: [PATCH 064/161] updated storage type info Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index bec3d21d81..c86cf0ef10 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -17,9 +17,13 @@ reports.storage.secret.config="{{ dp_reports_storage_secret_config }}" {% if dp_object_store_type == "azure" %} cloud_storage_type="azure" {% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3") %} +{% if cloud_service_provider == "oci" %} +cloud_storage_type="oci" +{% else %} cloud_storage_type="s3" +{% endif %} cloud_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" -cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint }}" +cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint_with_protocol }}" aws_storage_key="{{ s3_storage_key }}" aws_storage_secret="{{ s3_storage_secret }}" {% endif %} From 6910f88c86b921295d625a2b17b8203332aa648c Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 1 Feb 2023 17:04:12 +1100 Subject: [PATCH 065/161] updated the store type as template value Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 2 +- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index c86cf0ef10..eb56426862 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -16,7 +16,7 @@ reports.storage.key.config="{{ dp_reports_storage_key_config }}" reports.storage.secret.config="{{ dp_reports_storage_secret_config }}" {% if dp_object_store_type == "azure" %} cloud_storage_type="azure" -{% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3") %} +{% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3" or dp_object_store_type == "oci") %} {% if cloud_service_provider == "oci" %} cloud_storage_type="oci" {% else %} diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 8b238e8ef2..86f376b65d 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -40,7 +40,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"azure","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' ;; {% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"s3","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"{{dp_object_store_type}}","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' ;; {% endif %} "collection-reconciliation-job") From 24ab958ac86f0289631b6c20d001eb5e615048ce Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 1 Feb 2023 17:26:39 +1100 Subject: [PATCH 066/161] added the endpoint variable for jobmanager Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index eb56426862..e0ec7005df 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -24,6 +24,7 @@ cloud_storage_type="s3" {% endif %} cloud_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint_with_protocol }}" +storage.endpoint.config="{{ s3_storage_endpoint_with_protocol }}" aws_storage_key="{{ s3_storage_key }}" aws_storage_secret="{{ s3_storage_secret }}" {% endif %} From 458acc444521b41965b6a4701bb4289611e0dfa7 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 2 Feb 2023 10:40:36 +1100 Subject: [PATCH 067/161] updated the condition of oss upload Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index f4cbf7c216..c659f75113 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -8,13 +8,13 @@ - name: Ensure oci oss bucket exists command: "/home/{{analytics_user}}/bin/oci os bucket get --name {{ bucket }}" register: check_bucket - when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + when: dp_object_store_type == "oci" tags: - always - name: Create oci oss bucket command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" - when: dp_object_store_type == "s3" and cloud_service_provider == "oci" and check_bucket.rc !=0 + when: dp_object_store_type == "oci" and check_bucket.rc !=0 tags: - always @@ -35,7 +35,7 @@ command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force async: 3600 poll: 10 - when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + when: dp_object_store_type == "oci" tags: - dataproducts-spark-cluster @@ -57,7 +57,7 @@ command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force async: 3600 poll: 10 - when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + when: dp_object_store_type == "oci" tags: - ed-dataproducts-spark-cluster @@ -78,7 +78,7 @@ command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force async: 3600 poll: 10 - when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + when: dp_object_store_type == "oci" tags: - framework-spark-cluster @@ -99,7 +99,7 @@ command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force async: 3600 poll: 10 - when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + when: dp_object_store_type == "oci" tags: - framework-spark-cluster @@ -157,7 +157,7 @@ command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force async: 3600 poll: 10 - when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + when: dp_object_store_type == "oci" tags: - framework-spark-cluster From f546aea9eef239540a18ce4a9d4176f7b90259ff Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 22 Feb 2023 23:06:10 +1100 Subject: [PATCH 068/161] added oci instance principal variable Signed-off-by: Deepak Devadathan --- ansible/lpa_data-products_deploy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/lpa_data-products_deploy.yml b/ansible/lpa_data-products_deploy.yml index 7e84ba53f7..1ff0cbdabc 100644 --- a/ansible/lpa_data-products_deploy.yml +++ b/ansible/lpa_data-products_deploy.yml @@ -7,5 +7,6 @@ environment: AZURE_STORAGE_ACCOUNT: "{{sunbird_private_storage_account_name}}" AZURE_STORAGE_KEY: "{{sunbird_private_storage_account_key}}" + OCI_CLI_AUTH: "instance_principal" roles: - data-products-deploy From ddadf4ca887bb63d0855308dfedeeef4453db58e Mon Sep 17 00:00:00 2001 From: ali_shemshadi Date: Fri, 7 Apr 2023 12:18:46 +0800 Subject: [PATCH 069/161] bring bds changes from local repository --- ansible/oci-bds-spark.provision.yml | 18 + .../data-products-deploy/defaults/main.yml | 8 +- .../roles/data-products-deploy/tasks/main.yml | 14 +- .../templates/cluster-config.json.j2 | 30 +- .../templates/submit-script.j2 | 39 ++- .../oci-bds-spark-cluster/defaults/main.yml | 7 + .../oci-bds-spark-cluster/tasks/main.yml | 13 + .../templates/create-cluster-with-sleep.sh.j2 | 331 ++++++++++++++++++ .../templates/create-cluster.sh.j2 | 329 +++++++++++++++++ .../templates/delete-cluster.sh.j2 | 34 ++ .../defaults/main.yml | 39 +++ .../tasks/main.yml | 89 +++++ .../deploy/spark-cluster-deploy/Jenkinsfile | 2 +- pipelines/provision/spark/Jenkinsfile.bds | 57 +++ .../provision/spark/Jenkinsfile.bds.test | 60 ++++ pipelines/provision/spark/Jenkinsfile.delete | 16 +- 16 files changed, 1063 insertions(+), 23 deletions(-) create mode 100644 ansible/oci-bds-spark.provision.yml create mode 100644 ansible/roles/oci-bds-spark-cluster/defaults/main.yml create mode 100644 ansible/roles/oci-bds-spark-cluster/tasks/main.yml create mode 100755 ansible/roles/oci-bds-spark-cluster/templates/create-cluster-with-sleep.sh.j2 create mode 100644 ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 create mode 100755 ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 create mode 100644 ansible/roles/provision-oci-spark-cluster/defaults/main.yml create mode 100644 ansible/roles/provision-oci-spark-cluster/tasks/main.yml create mode 100644 pipelines/provision/spark/Jenkinsfile.bds create mode 100644 pipelines/provision/spark/Jenkinsfile.bds.test diff --git a/ansible/oci-bds-spark.provision.yml b/ansible/oci-bds-spark.provision.yml new file mode 100644 index 0000000000..4eb80aa296 --- /dev/null +++ b/ansible/oci-bds-spark.provision.yml @@ -0,0 +1,18 @@ +- hosts: local + become: yes + vars_files: + - "{{inventory_dir}}/secrets.yml" + roles: + - oci-bds-spark-cluster + tags: + - copy-script + +- hosts: bds-livy-node + become: yes + gather_facts: no + vars_files: + - "{{inventory_dir}}/secrets.yml" + roles: + - provision-oci-spark-cluster + tags: + - spark-provision diff --git a/ansible/roles/data-products-deploy/defaults/main.yml b/ansible/roles/data-products-deploy/defaults/main.yml index 690c51d87d..7eb22c7a18 100755 --- a/ansible/roles/data-products-deploy/defaults/main.yml +++ b/ansible/roles/data-products-deploy/defaults/main.yml @@ -4,7 +4,7 @@ spark_output_temp_dir: /mount/data/analytics/tmp/ bucket: "telemetry-data-store" secor_bucket: "telemetry-data-store" -dp_object_store_type: "azure" +dp_object_store_type: "oci" dp_raw_telemetry_backup_location: "unique/raw/" dp_storage_key_config: "azure_storage_key" dp_storage_secret_config: "azure_storage_secret" @@ -210,9 +210,9 @@ admin_password: "{{ spark_cluster_user_password }}" spark_cluster_name: "{{env}}-spark-cluster" spark_cluster: - executor_core: 5 - executor_memory: 19G - num_executors: 5 + executor_core: 1 + executor_memory: 2G + num_executors: 1 analytics_cluster: home: "/tmp" diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index c659f75113..733c416138 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -6,14 +6,14 @@ - always - name: Ensure oci oss bucket exists - command: "/home/{{analytics_user}}/bin/oci os bucket get --name {{ bucket }}" + command: "oci os bucket get --name {{ bucket }}" register: check_bucket when: dp_object_store_type == "oci" tags: - always - name: Create oci oss bucket - command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" + command: "oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" when: dp_object_store_type == "oci" and check_bucket.rc !=0 tags: - always @@ -32,7 +32,7 @@ - dataproducts-spark-cluster - name: Copy Core Data Products to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force + command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -54,7 +54,7 @@ - ed-dataproducts-spark-cluster - name: Copy Ed Data Products to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force + command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -75,7 +75,7 @@ - framework-spark-cluster - name: Copy Framework Library to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force + command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -96,7 +96,7 @@ - framework-spark-cluster - name: Copy Scruid Library to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force + command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -154,7 +154,7 @@ - framework-spark-cluster - name: Copy configuration file to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force + command: oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force async: 3600 poll: 10 when: dp_object_store_type == "oci" diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index e899827fdb..12ebf0bde0 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -27,7 +27,7 @@ "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } } -{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} +{% elif (dp_object_store_type == "s3") %} { "jars": [ "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", @@ -55,4 +55,32 @@ "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } } +{% elif (dp_object_store_type == "oci") %} +{ + "jars": [ + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_core_artifact }}", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ scruid_artifact }}", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + ], + "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + "files": [ + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf" + ], + "className": "org.ekstep.analytics.job.JobExecutor", + "executorCores": {{ spark_cluster.executor_core }}, + "executorMemory": "{{ spark_cluster.executor_memory }}", + "numExecutors": {{ spark_cluster.num_executors }}, + "conf": { + "spark.sql.autoBroadcastJoinThreshold" : "-1", + "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", + "spark.scheduler.mode" : "FAIR", + "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", + "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", + "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + } +} {% endif %} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy/templates/submit-script.j2 b/ansible/roles/data-products-deploy/templates/submit-script.j2 index e8341dc1e8..edd03ff36b 100644 --- a/ansible/roles/data-products-deploy/templates/submit-script.j2 +++ b/ansible/roles/data-products-deploy/templates/submit-script.j2 @@ -1,6 +1,7 @@ #!/usr/bin/env bash ## Job to run daily + cd "{{ analytics_cluster.home }}" source model-config.sh today=$(date "+%Y-%m-%d") @@ -79,7 +80,15 @@ submit_cluster_job() { requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody - response=$(curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: admin_name }}") +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} echo "Submitted job for batchNumer $i below is the response" echo $response } @@ -118,7 +127,15 @@ if [ "$mode" = "via-partition" ]; then requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} done elif [ "$mode" = "parallel-jobs" ]; then @@ -157,8 +174,15 @@ elif [ "$mode" = "selected-partition" ]; then requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" - +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} else if [ -z "$start_date" ]; then echo "Running $job without partition via run-job." @@ -179,5 +203,14 @@ else requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} + fi diff --git a/ansible/roles/oci-bds-spark-cluster/defaults/main.yml b/ansible/roles/oci-bds-spark-cluster/defaults/main.yml new file mode 100644 index 0000000000..95b0b73e0d --- /dev/null +++ b/ansible/roles/oci-bds-spark-cluster/defaults/main.yml @@ -0,0 +1,7 @@ + +spark-folder: /usr/hdp/current/spark2-client +guava_version: 19.0 +log4j_version: 2.5 +guava_url: https://repo1.maven.org/maven2/com/google/guava/guava/{{guava_version}}/guava-{{guava_version}}.jar +log4j_core_url: https://repo1.maven.org/maven2/org/apache/logging/log4j/log4j-core/{{log4j_version}}/log4j-core-{{log4j_version}}.jar +log4j_api_url: https://repo1.maven.org/maven2/org/apache/logging/log4j/log4j-api/{{log4j_version}}/log4j-api-{{log4j_version}}.jar diff --git a/ansible/roles/oci-bds-spark-cluster/tasks/main.yml b/ansible/roles/oci-bds-spark-cluster/tasks/main.yml new file mode 100644 index 0000000000..d8f4d3cc50 --- /dev/null +++ b/ansible/roles/oci-bds-spark-cluster/tasks/main.yml @@ -0,0 +1,13 @@ +- name: copy cluster creation script + template: + src: create-cluster.sh.j2 + dest: /tmp/create-cluster.sh + mode: 0755 + when: cluster_state == "create_cluster" + +- name: copy cluster deletion script + template: + src: delete-cluster.sh.j2 + dest: /tmp/delete-cluster.sh + mode: 0755 + when: cluster_state == "delete_cluster" diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster-with-sleep.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster-with-sleep.sh.j2 new file mode 100755 index 0000000000..54784435d7 --- /dev/null +++ b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster-with-sleep.sh.j2 @@ -0,0 +1,331 @@ +#! /bin/bash +# Subnet id will generate from env variable + +ambari_user="{{ambari_user}}" +cluster_password="{{cluster_password}}" +key_alias="{{key_alias}}" +user_id="{{user_id}}" +subnet="{{subnet_id}}" +compartment_id="{{compartment_id}}" +display_name="{{display_name}}" +workernode="{{workernode}}" +cluster_public_key="{{public_key}}" + +AMBARI_USER=$ambari_user +AMBARI_PWD=$cluster_password + +function get_bdsid() { + list_param=`oci bds instance list --compartment-id $compartment_id` + bdsid="NULL" + # echo $list_param | jq '.data' + state="ACTIVE" + disname="NULL" + for k in $(jq '.data | keys | .[]' <<< "$list_param"); do + # echo $k + cstate=`echo $list_param | jq -r '.data['$k']["lifecycle-state"]'` + if [ $cstate = $state ]; then + disname=`echo $list_param | jq -r '.data['$k']["display-name"]'` + if [ $disname = $display_name ]; then + bdsid=`echo $list_param | jq -r '.data['$k']["id"]'` + fi + + fi + echo "BDS ID" + echo $bdsid + done +} + +function getLivyip() { + + export bds_instance_id=$bdsid + bdsjson=$(oci bds instance get --bds-instance-id $bds_instance_id) + # echo "AMBARI URL" + ambari_url=`echo $bdsjson | jq -r '.data["cluster-details"]["ambari-url"]'` + # echo $ambari_url + livyip="NULL" + cnode="UTILITY" + for k in $(jq '.data["nodes"] | keys | .[]' <<< "$bdsjson"); do + node=`echo $bdsjson | jq -r '.data["nodes"]['$k']["node-type"]'` + if [ $node = "$cnode" ]; then + livyip=`echo $bdsjson | jq -r '.data["nodes"]['$k']["ip-address"]'` + fi + done + echo "LIVY IP" + echo $livyip + +} + +getlivyclustername() { + cdet=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$livyip:7183/api/v1/clusters/) + echo $cdet + for k in $(jq '.items | keys | .[]' <<< "$cdet"); do + # echo $k + cluster_name=`echo $cdet | jq -r '.items['$k']["Clusters"]["cluster_name"]'` + echo $cluster_name + done + echo "CLUSTER NAME" + +} + +function get_apidetails() { + + export bds_instance_id=$bdsid + + listapijson=$(oci bds bds-api-key list --bds-instance-id $bds_instance_id) + + #echo $listapijson | jq '.data[1]["key-alias"]' + id="NULL" + ctype="ACTIVE" + for k in $(jq '.data | keys | .[]' <<< "$listapijson"); do + type=`echo $listapijson | jq -r '.data['$k']["lifecycle-state"]'` + if [ $type = "$ctype" ]; then + id=`echo $listapijson | jq -r '.data['$k']["id"]'` + fi + done + + echo $id + + export api_key_id=$id + + list_api=`oci bds bds-api-key get --api-key-id $api_key_id --bds-instance-id $bds_instance_id` + + #echo $list_api | jq '.data' + + data=`echo $list_api | jq '.data'` + echo "API DETAILS" + echo $data + region=`echo $list_api | jq -r '.data["default-region"]'` + fingerprint=`echo $list_api | jq -r '.data["fingerprint"]'` + keyalias=`echo $list_api | jq -r '.data["key-alias"]'` + lifecyc=`echo $list_api | jq -r '.data["lifecycle-state"]'` + tm=`echo $list_api | jq -r '.data["time-created"]'` + usid=`echo $list_api | jq -r '.data["user-id"]'` + tenid=`echo $list_api | jq -r '.data["tenant-id"]'` + pemfilepath=`echo $list_api | jq -r '.data["pemfilepath"]'` + +} + +function update_bds_config(){ + #change below variables for your cluster + CONFIG_FILE_TO_UPDATE="" + + #Used when for restarting components after config update + #Wait time before we poll for restart status. Default 30 seconds. Meaning, We poll for restart status every 30 seconds + WAIT_TIME_IN_SEC=30 + + #No of tries before we give up on the restart status. Default 20. With default WAIT_TIME_IN_SEC as 30, At max we wait for 10(20*30=600 seconds) minutes before we give up. + RETRY_COUNT=20 + + #INTERNAL USE ONLY + propObj="" + + get_apidetails + getUtilityNodesIps=$livyip + getlivyclustername + echo $getUtilityNodesIps + getClusterName=$cluster_name + for utilityNodeIp in $getUtilityNodesIps + do + echo "Current utility node ip: $utilityNodeIp" + str1=$(nslookup $utilityNodeIp | awk -v var=$utilityNodeIp '/name =/{print var "\t", $4}') + CONFIG_FILE_TO_UPDATE="core-site" #this is the file we're updating in this example + propObj=$(get_property_json) + echo $propObj + echo "calling add properties" + + #update key value pairs. Multiple key value pairs can be updated before doing update_ambari_config + add_properties "fs.oci.client.auth.fingerprint" $fingerprint + add_properties "fs.oci.client.auth.passphrase" $passphrase + add_properties "fs.oci.client.auth.pemfilepath" $pemfilepath + add_properties "fs.oci.client.auth.tenantId" $tenid + add_properties "fs.oci.client.auth.userId" $usid + add_properties "fs.oci.client.regionCodeOrId" $region + #Update it to ambari + echo "updating ambari config" + update_ambari_config + + # echo "restarting all required components" + # restart_required_components + + done + +} + + +#Method to collect the current config +function get_property_json(){ + allConfs=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName?fields=Clusters/desired_configs) #to get all the configs + currVersionLoc=".Clusters.desired_configs.\"$CONFIG_FILE_TO_UPDATE\".tag" #fetching current version for property + propVersion=$(echo $allConfs | jq $currVersionLoc | tr -d '"') + propJson=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X GET "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/configurations?type=$CONFIG_FILE_TO_UPDATE&tag=$propVersion") #fetch property json + propLoc=".items[].properties" + propKeyVal=$(echo $propJson | jq $propLoc) + propObj="{\"properties\":$propKeyVal}" + echo $propObj +} + +#Method to add/update key value pair to existing config +function add_properties(){ + echo $1 $2 + echo $propObj + propObj=$(echo $propObj | jq '.properties += { "'$1'": "'$2'" }') + echo $propObj +} + +#Method to update config in ambari +function update_ambari_config(){ + parseableAddedProp=$(echo $propObj | jq '.properties') + echo $parseableAddedProp + timestamp=$(date +%s) + newVersion="version$timestamp" + finalJson='[{"Clusters":{"desired_config":[{"type":"'$CONFIG_FILE_TO_UPDATE'","tag":"'$newVersion'","properties":'$parseableAddedProp'}]}}]' + echo "CALING AMABRI API" + response_body_amb=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X PUT -d "$finalJson" "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName") + echo $response_body_amb + echo "DONE AMABRI API" +} + +#Method to restart required components +function restart_required_components(){ + echo "restarting all required components" + response_body=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X POST -d '{"RequestInfo":{"command":"RESTART","context":"Restart all required services from bootstrap script","operation_level":"host_component"},"Requests/resource_filters":[{"hosts_predicate":"HostRoles/stale_configs=true&HostRoles/cluster_name='$getClusterName'"}]}' "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/requests") + + echo "printing response_body: $response_body" + + idLoc=".Requests.id" + requestId=$(echo $response_body | jq $idLoc) + echo "request id is : $requestId" + + current_count=0 + while [[ $current_count -lt $RETRY_COUNT ]]; + do + current_count=$((current_count+1)) + response=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/requests/$requestId) + request_status=$(echo $response | jq -r ".Requests.request_status") + echo "printing request_status: $request_status" + if [[ $request_status == "IN_PROGRESS" ]] || [[ $request_status == "PENDING" ]]; then + echo "current_count is : $current_count" + sleep $WAIT_TIME_IN_SEC + elif [[ $request_status == "COMPLETED" ]]; then + echo "Restart successful" + break + fi + done +} + +function creat_api(){ + export bds_instance_id=$bdsid + export key_alias=$key_alias # https://docs.cloud.oracle.com/en-us/iaas/tools/oci-cli/latest/oci_cli_docs/cmdref/bds/bds-api-key/create.html#cmdoption-key-alias + export passphrase=$b64p # https://docs.cloud.oracle.com/en-us/iaas/tools/oci-cli/latest/oci_cli_docs/cmdref/bds/bds-api-key/create.html#cmdoption-passphrase + export user_id=$user_id + oci bds bds-api-key create --bds-instance-id $bds_instance_id --key-alias $key_alias --passphrase $passphrase --user-id $user_id +} + +function restart_bds_cluster() { + # oci cli command to stop + echo "STOPPING CLUSTER" + oci bds instance stop --bds-instance-id $bds_instance_id --cluster-admin-password $b64p --is-force-stop-jobs true + sleep 10m + # oci cli command to start + echo "STARTING CLUSTER" + oci bds instance start --bds-instance-id $bds_instance_id --cluster-admin-password $b64p + sleep 15m +} + + +# Below is tenancy + +function create_cluster() { + + export compartment_id=$compartment_id + + master=1 + utility=1 + + worker=$workernode # This has to be replaced with Jenkins Paramter + + # Begin script in case all parameters are correct + echo "Generating json woth $master master ndoes $utility utility nodes and $worker worker nodes" + json="[" + + for i in `seq 1 $master` + do + json="$json{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"MASTER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 32, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + done + + for i in `seq 1 $utility` + do + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"UTILITY\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 32, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + done + + for i in `seq 1 $worker` + do + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"WORKER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 16, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + done + + json="$json]" + printf "$json" > "nodes.json" + echo "File successfully generated and saved as nodes.json" + + echo "CREATING THE BDS CLUSTER" + + export cluster_public_key=$public_key + export cluster_version="ODH2_0" + export display_name=$display_name + export is_high_availability='false' + export is_secure='false' + + cmd="oci bds instance create --cluster-admin-password '$b64p' --cluster-public-key '$cluster_public_key' --cluster-version '$cluster_version' --compartment-id '$compartment_id' --display-name '$display_name' --is-high-availability $is_high_availability --is-secure $is_secure --nodes file://nodes.json " + echo $cmd + eval "$cmd" + +} + +function replace_host() { + echo "REPLACE THE HOSTS" + echo "" >> {{inventory_dir}}/hosts + echo "[bds-livy-node]" >> {{inventory_dir}}/hosts + echo "$livyip ansible_ssh_user=opc" >> {{inventory_dir}}/hosts + echo "" >> {{inventory_dir}}/hosts + +} + +# MAIN TO START + +b64p=`echo -n $cluster_password | base64` +echo $b64p +echo $compartment_id + +echo "CREATING CLUSTER" + +create_cluster + +echo "WAITING CLUSTER TO CREATE" + +sleep 42m + +echo "FETCHING BDS ID" + +get_bdsid # This sets BDS ID + +echo "GET LIVY-AMBARI IP" + +getLivyip # This will be ambari ip also + +replace_host + +echo "CREATE OBJECT STORAGE API KEY" + +creat_api + +echo "WAITING FOR API TO CREATE" + +sleep 5m + +echo "UPDATE BDS AMBARI CONFIG" + +get_apidetails + +update_bds_config + +restart_bds_cluster \ No newline at end of file diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 new file mode 100644 index 0000000000..e4295dded8 --- /dev/null +++ b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 @@ -0,0 +1,329 @@ +#! /bin/bash +# Subnet id will generate from env variable +# Version 1 Running Fine +ambari_user="{{ambari_user}}" +cluster_password="{{cluster_password}}" +key_alias="{{key_alias}}" +user_id="{{user_id}}" +subnet="{{subnet_id}}" +compartment_id="{{compartment_id}}" +display_name="{{display_name}}" +workernode="{{workernode}}" +cluster_public_key="{{public_key}}" + +cstate='SUCCEEDED' +cwait=2500 + +echo "RECEIVED ALL ENV VARIABLES" + +AMBARI_USER=$ambari_user +AMBARI_PWD=$cluster_password + +function get_bdsid() { + list_param=`oci bds instance list --compartment-id $compartment_id` + bdsid="NULL" + # echo $list_param | jq '.data' + state="ACTIVE" + disname="NULL" + for k in $(jq '.data | keys | .[]' <<< "$list_param"); do + # echo $k + cstate=`echo $list_param | jq -r '.data['$k']["lifecycle-state"]'` + if [ $cstate = $state ]; then + disname=`echo $list_param | jq -r '.data['$k']["display-name"]'` + if [ $disname = $display_name ]; then + bdsid=`echo $list_param | jq -r '.data['$k']["id"]'` + fi + + fi + echo "BDS ID" + echo $bdsid + done +} + +function getLivyip() { + + export bds_instance_id=$bdsid + bdsjson=$(oci bds instance get --bds-instance-id $bds_instance_id) + # echo "AMBARI URL" + ambari_url=`echo $bdsjson | jq -r '.data["cluster-details"]["ambari-url"]'` + # echo $ambari_url + livyip="NULL" + cnode="UTILITY" + for k in $(jq '.data["nodes"] | keys | .[]' <<< "$bdsjson"); do + node=`echo $bdsjson | jq -r '.data["nodes"]['$k']["node-type"]'` + if [ $node = "$cnode" ]; then + livyip=`echo $bdsjson | jq -r '.data["nodes"]['$k']["ip-address"]'` + fi + done + echo "LIVY IP" + echo $livyip + +} + +getlivyclustername() { + cdet=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$livyip:7183/api/v1/clusters/) + echo $cdet + for k in $(jq '.items | keys | .[]' <<< "$cdet"); do + # echo $k + cluster_name=`echo $cdet | jq -r '.items['$k']["Clusters"]["cluster_name"]'` + echo $cluster_name + done + echo "CLUSTER NAME" + +} + +function get_apidetails() { + + export bds_instance_id=$bdsid + + listapijson=$(oci bds bds-api-key list --bds-instance-id $bds_instance_id) + + #echo $listapijson | jq '.data[1]["key-alias"]' + id="NULL" + ctype="ACTIVE" + for k in $(jq '.data | keys | .[]' <<< "$listapijson"); do + type=`echo $listapijson | jq -r '.data['$k']["lifecycle-state"]'` + if [ $type = "$ctype" ]; then + id=`echo $listapijson | jq -r '.data['$k']["id"]'` + fi + done + + echo $id + + export api_key_id=$id + + list_api=`oci bds bds-api-key get --api-key-id $api_key_id --bds-instance-id $bds_instance_id ` + + #echo $list_api | jq '.data' + + data=`echo $list_api | jq '.data'` + echo "API DETAILS" + echo $data + region=`echo $list_api | jq -r '.data["default-region"]'` + fingerprint=`echo $list_api | jq -r '.data["fingerprint"]'` + keyalias=`echo $list_api | jq -r '.data["key-alias"]'` + lifecyc=`echo $list_api | jq -r '.data["lifecycle-state"]'` + tm=`echo $list_api | jq -r '.data["time-created"]'` + usid=`echo $list_api | jq -r '.data["user-id"]'` + tenid=`echo $list_api | jq -r '.data["tenant-id"]'` + pemfilepath=`echo $list_api | jq -r '.data["pemfilepath"]'` + +} + +function update_bds_config(){ + #change below variables for your cluster + CONFIG_FILE_TO_UPDATE="" + + #Used when for restarting components after config update + #Wait time before we poll for restart status. Default 30 seconds. Meaning, We poll for restart status every 30 seconds + WAIT_TIME_IN_SEC=30 + + #No of tries before we give up on the restart status. Default 20. With default WAIT_TIME_IN_SEC as 30, At max we wait for 10(20*30=600 seconds) minutes before we give up. + RETRY_COUNT=20 + + #INTERNAL USE ONLY + propObj="" + + get_apidetails + getUtilityNodesIps=$livyip + getlivyclustername + echo $getUtilityNodesIps + getClusterName=$cluster_name + for utilityNodeIp in $getUtilityNodesIps + do + echo "Current utility node ip: $utilityNodeIp" + str1=$(nslookup $utilityNodeIp | awk -v var=$utilityNodeIp '/name =/{print var "\t", $4}') + CONFIG_FILE_TO_UPDATE="core-site" #this is the file we're updating in this example + propObj=$(get_property_json) + echo $propObj + echo "calling add properties" + + #update key value pairs. Multiple key value pairs can be updated before doing update_ambari_config + add_properties "fs.oci.client.auth.fingerprint" $fingerprint + add_properties "fs.oci.client.auth.passphrase" $cluster_password + add_properties "fs.oci.client.auth.pemfilepath" $pemfilepath + add_properties "fs.oci.client.auth.tenantId" $tenid + add_properties "fs.oci.client.auth.userId" $usid + add_properties "fs.oci.client.regionCodeOrId" $region + #Update it to ambari + echo "updating ambari config" + update_ambari_config + + echo "restarting all required components" + restart_required_components + + done + +} + + +#Method to collect the current config +function get_property_json(){ + allConfs=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName?fields=Clusters/desired_configs) #to get all the configs + currVersionLoc=".Clusters.desired_configs.\"$CONFIG_FILE_TO_UPDATE\".tag" #fetching current version for property + propVersion=$(echo $allConfs | jq $currVersionLoc | tr -d '"') + propJson=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X GET "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/configurations?type=$CONFIG_FILE_TO_UPDATE&tag=$propVersion") #fetch property json + propLoc=".items[].properties" + propKeyVal=$(echo $propJson | jq $propLoc) + propObj="{\"properties\":$propKeyVal}" + echo $propObj +} + +#Method to add/update key value pair to existing config +function add_properties(){ + echo $1 $2 + echo $propObj + propObj=$(echo $propObj | jq '.properties += { "'$1'": "'$2'" }') + echo $propObj +} + +#Method to update config in ambari +function update_ambari_config(){ + parseableAddedProp=$(echo $propObj | jq '.properties') + echo $parseableAddedProp + timestamp=$(date +%s) + newVersion="version$timestamp" + finalJson='[{"Clusters":{"desired_config":[{"type":"'$CONFIG_FILE_TO_UPDATE'","tag":"'$newVersion'","properties":'$parseableAddedProp'}]}}]' + echo "CALING AMABRI API" + response_body_amb=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X PUT -d "$finalJson" "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName") + echo $response_body_amb + echo "DONE AMABRI API" +} + +#Method to restart required components +function restart_required_components(){ + echo "restarting all required components" + response_body=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X POST -d '{"RequestInfo":{"command":"RESTART","context":"Restart all required services from bootstrap script","operation_level":"host_component"},"Requests/resource_filters":[{"hosts_predicate":"HostRoles/stale_configs=true&HostRoles/cluster_name='$getClusterName'"}]}' "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/requests") + + echo "printing response_body: $response_body" + + idLoc=".Requests.id" + requestId=$(echo $response_body | jq $idLoc) + echo "request id is : $requestId" + + current_count=0 + while [[ $current_count -lt $RETRY_COUNT ]]; + do + current_count=$((current_count+1)) + response=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/requests/$requestId) + request_status=$(echo $response | jq -r ".Requests.request_status") + echo "printing request_status: $request_status" + if [[ $request_status == "IN_PROGRESS" ]] || [[ $request_status == "PENDING" ]]; then + echo "current_count is : $current_count" + sleep $WAIT_TIME_IN_SEC + elif [[ $request_status == "COMPLETED" ]]; then + echo "Restart successful" + break + fi + done +} + +function create_api(){ + export bds_instance_id=$bdsid + export key_alias=$key_alias # https://docs.cloud.oracle.com/en-us/iaas/tools/oci-cli/latest/oci_cli_docs/cmdref/bds/bds-api-key/create.html#cmdoption-key-alias + export passphrase=$b64p # https://docs.cloud.oracle.com/en-us/iaas/tools/oci-cli/latest/oci_cli_docs/cmdref/bds/bds-api-key/create.html#cmdoption-passphrase + export user_id=$user_id + capi='SUCCEEDED' + oci bds bds-api-key create --bds-instance-id $bds_instance_id --key-alias $key_alias --passphrase $passphrase --user-id $user_id --wait-for-state $capi --max-wait-seconds $cwait +} + +function restart_bds_cluster() { + # oci cli command to stop + echo "STOPPING CLUSTER" + cstate='SUCCEEDED' + cwait=2000 + oci bds instance stop --bds-instance-id $bds_instance_id --cluster-admin-password $b64p --is-force-stop-jobs true --wait-for-state=$cstate --max-wait-seconds $cwait + # oci cli command to start + echo "STARTING CLUSTER" + cstate='SUCCEEDED' + oci bds instance start --bds-instance-id $bds_instance_id --cluster-admin-password $b64p --wait-for-state=$cstate --max-wait-seconds $cwait +} + + +# Below is tenancy + +function create_cluster() { + + export compartment_id=$compartment_id + + master=1 + utility=1 + + worker=$workernode # This has to be replaced with Jenkins Paramter + + # Begin script in case all parameters are correct + echo "Generating json woth $master master ndoes $utility utility nodes and $worker worker nodes" + json="[" + + for i in `seq 1 $master` + do + json="$json{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"MASTER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 32, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + done + + for i in `seq 1 $utility` + do + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"UTILITY\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 32, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + done + + for i in `seq 1 $worker` + do + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"WORKER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 16, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + done + + json="$json]" + printf "$json" > "nodes.json" + echo "File successfully generated and saved as nodes.json" + + echo "TRIGGERED CREATING THE BDS CLUSTER" + + export cluster_public_key=$public_key + export cluster_version="ODH2_0" + export display_name=$display_name + export is_high_availability='false' + export is_secure='false' + cmd="oci bds instance create --cluster-admin-password '$b64p' --cluster-public-key '$cluster_public_key' --cluster-version '$cluster_version' --compartment-id '$compartment_id' --display-name '$display_name' --is-high-availability $is_high_availability --is-secure $is_secure --wait-for-state $cstate --max-wait-seconds $cwait --nodes file://nodes.json " + #echo $cmd + create_response=$(eval "$cmd") + echo "CLUSTER CREATED SUCCESSFULLY" +} + +function replace_host() { + echo "REPLACE THE HOSTS" + echo "" >> {{inventory_dir}}/hosts + echo "[bds-livy-node]" >> {{inventory_dir}}/hosts + echo "$livyip ansible_ssh_user=opc" >> {{inventory_dir}}/hosts + echo "" >> {{inventory_dir}}/hosts + +} + +# MAIN TO START + +b64p=`echo -n $cluster_password | base64` +echo $b64p +echo $compartment_id + +echo "CREATING BDS CLUSTER" + +create_cluster + +echo "FETCHING BDS ID" + +get_bdsid # This sets BDS ID + +echo "GET LIVY-AMBARI IP" + +getLivyip # This will be ambari ip also + +replace_host + +echo "CREATE OBJECT STORAGE API KEY" + +create_api + +echo "UPDATE BDS AMBARI CONFIG" + +get_apidetails + +update_bds_config + +# restart_bds_cluster diff --git a/ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 new file mode 100755 index 0000000000..b5e1d28d36 --- /dev/null +++ b/ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 @@ -0,0 +1,34 @@ +#!/bin/bash + +compartment_id="{{compartment_id}}" +display_name="{{display_name}}" + + +echo "DELETE STARTED" +echo $display_name +echo $compartment_id + + +function get_bdsid() { + list_param=`oci bds instance list --compartment-id $compartment_id` + bdsid="NULL" + # echo $list_param | jq '.data' + state="ACTIVE" + disname="NULL" + for k in $(jq '.data | keys | .[]' <<< "$list_param"); do + # echo $k + cstate=`echo $list_param | jq -r '.data['$k']["lifecycle-state"]'` + if [ $cstate = $state ]; then + disname=`echo $list_param | jq -r '.data['$k']["display-name"]'` + if [ $disname = $display_name ]; then + bdsid=`echo $list_param | jq -r '.data['$k']["id"]'` + fi + + fi + echo $bdsid + done +} + +get_bdsid + +yes Y | oci bds instance delete --bds-instance-id $bdsid diff --git a/ansible/roles/provision-oci-spark-cluster/defaults/main.yml b/ansible/roles/provision-oci-spark-cluster/defaults/main.yml new file mode 100644 index 0000000000..11e3e6357b --- /dev/null +++ b/ansible/roles/provision-oci-spark-cluster/defaults/main.yml @@ -0,0 +1,39 @@ + +bucket: "telemetry-data-store" +model_version: "2.0" + +spark_folder: /usr/odh/2.0.1/spark + +# delete +guava_default_version: 14.0.1 +guava_default_jre_version_1: 26.0-jre +guava_default_jre_version_2: 27.0-jre +guice_default_version: 4.2.2 + +# add +guava_version: 19.0 +log4j_version: 2.16.0 +spark_redis_version: 2.5.0 +guava_jre_version: 24.1.1-jre +jedis_version: 3.2.0 +zip4j_version: 2.6.2 +guice_version: 3.0 + +jets3t_version: 0.9.4 +hadoop_aws_version: 2.7.3 +java_xmlbuilder_version: 1.1 + +guava_url: https://repo1.maven.org/maven2/com/google/guava/guava/{{guava_version}}/guava-{{guava_version}}.jar +guava_jre_url: https://repo1.maven.org/maven2/com/google/guava/guava/{{guava_jre_version}}/guava-{{guava_jre_version}}.jar +log4j_core_url: https://repo1.maven.org/maven2/org/apache/logging/log4j/log4j-core/{{log4j_version}}/log4j-core-{{log4j_version}}.jar +log4j_api_url: https://repo1.maven.org/maven2/org/apache/logging/log4j/log4j-api/{{log4j_version}}/log4j-api-{{log4j_version}}.jar +spark_redis_url: https://repo1.maven.org/maven2/com/redislabs/spark-redis_2.12/{{spark_redis_version}}/spark-redis_2.12-{{spark_redis_version}}.jar +jedis_url: https://repo1.maven.org/maven2/redis/clients/jedis/{{jedis_version}}/jedis-{{jedis_version}}.jar +zip4j_url: https://repo1.maven.org/maven2/net/lingala/zip4j/zip4j/{{zip4j_version}}/zip4j-{{zip4j_version}}.jar +guice_url: https://repo1.maven.org/maven2/com/google/inject/guice/{{guice_version}}/guice-{{guice_version}}.jar +guice_servlet_url: https://repo1.maven.org/maven2/com/google/inject/extensions/guice-servlet/{{guice_version}}/guice-servlet-{{guice_version}}.jar + +jets3t_url: https://repo1.maven.org/maven2/net/java/dev/jets3t/jets3t/{{jets3t_version}}/jets3t-{{jets3t_version}}.jar +hadoop_aws_url: https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/{{hadoop_aws_version}}/hadoop-aws-{{hadoop_aws_version}}.jar +java_xmlbuilder_url: https://repo1.maven.org/maven2/com/jamesmurty/utils/java-xmlbuilder/{{java_xmlbuilder_version}}/java-xmlbuilder-{{java_xmlbuilder_version}}.jar + diff --git a/ansible/roles/provision-oci-spark-cluster/tasks/main.yml b/ansible/roles/provision-oci-spark-cluster/tasks/main.yml new file mode 100644 index 0000000000..9df37915bd --- /dev/null +++ b/ansible/roles/provision-oci-spark-cluster/tasks/main.yml @@ -0,0 +1,89 @@ +# - name: Adding azure blob variable to spark env file + # lineinfile: + # path: "{{spark_folder}}/conf/spark-env.sh" + # line: '{{item.var}}={{item.value}}' + # regexp: "{{ item.var }}.*" + # with_items: + # - {var: 'azure_storage_key', value: '{{ azure_private_storage_account_name }}'} + # - {var: 'azure_storage_secret', value: '{{ azure_private_storage_account_key }}'} + # no_log: true + # when: cloud_service_provider == "azure" + +- name: Remove guava-jre, guice default jars + become: yes + file: + path: "{{ spark_folder }}/jars/{{item.var}}-{{item.value}}.jar" + state: absent + with_items: + - {var: 'guava', value: '{{ guava_default_version }}'} + - {var: 'guava', value: '{{ guava_default_jre_version_1 }}'} + - {var: 'guava', value: '{{ guava_default_jre_version_2 }}'} + - {var: 'guice', value: '{{ guice_default_version }}'} + - {var: 'guice-servlet', value: '{{ guice_default_version }}'} + +- name: Download guava and copy to Spark jars folder + become: yes + get_url: url={{ guava_url }} dest={{ spark_folder }}/jars/guava-{{guava_version}}.jar timeout=1000 force=no + +- name: Download guava_jre_url and copy to Spark jars folder + become: yes + get_url: url={{ guava_jre_url }} dest={{ spark_folder }}/jars/guava-{{guava_jre_version}}.jar timeout=1000 force=no + +- name: Download log4j api and copy to Spark jars folder + become: yes + get_url: url={{ log4j_api_url }} dest={{ spark_folder }}/jars/log4j-api-{{log4j_version}}.jar timeout=1000 force=no + +- name: Download log4j core and copy to Spark jars folder + become: yes + get_url: url={{ log4j_core_url }} dest={{ spark_folder }}/jars/log4j-core-{{log4j_version}}.jar timeout=1000 force=no + +- name: Download spark-redis and copy to Spark jars folder + become: yes + get_url: url={{ spark_redis_url }} dest={{ spark_folder }}/jars/spark-redis_2.12-{{spark_redis_version}}.jar timeout=1000 force=no + +- name: Download jedis and copy to Spark jars folder + become: yes + get_url: url={{ jedis_url }} dest={{ spark_folder }}/jars/jedis-{{jedis_version}}.jar timeout=1000 force=no + +- name: Download zip4j and copy to Spark jars folder + become: yes + get_url: url={{ zip4j_url }} dest={{ spark_folder }}/jars/zip4j-{{zip4j_version}}.jar timeout=1000 force=no + +- name: Download guice and copy to Spark jars folder + become: yes + get_url: url={{ guice_url }} dest={{ spark_folder }}/jars/guice-{{guice_version}}.jar timeout=1000 force=no + +- name: Download guice-servlet and copy to Spark jars folder + become: yes + get_url: url={{ guice_servlet_url }} dest={{ spark_folder }}/jars/guice-servlet-{{guice_version}}.jar timeout=1000 force=no + +- name: Download jets3t and copy to Spark jars folder + become: yes + get_url: url={{ jets3t_url }} dest={{ spark_folder }}/jars/jets3t-{{jets3t_version}}.jar timeout=1000 force=no + +- name: Download hadoop_aws and copy to Spark jars folder + become: yes + get_url: url={{ hadoop_aws_url }} dest={{ spark_folder }}/jars/hadoop-aws-{{hadoop_aws_version}}.jar timeout=1000 force=no + +- name: Download java_xmlbuilder and copy to Spark jars folder + become: yes + get_url: url={{ java_xmlbuilder_url }} dest={{ spark_folder }}/jars/java-xmlbuilder-{{java_xmlbuilder_version}}.jar timeout=1000 force=no + + +- name: Download config to livy + command: hdfs dfs -get -f oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf {{ spark_folder }}/conf/application.conf + + + +- name: Update log4j.properties + become: yes + blockinfile: + path: "{{ spark_folder }}/conf/log4j.properties" + block: | + log4j.logger.org.ekstep.analytics=INFO + log4j.appender.org.ekstep.analytics=org.apache.log4j.RollingFileAppender + log4j.appender.org.ekstep.analytics.File=./joblog.log + log4j.appender.org.ekstep.analytics.MaxFileSize=${log4jspark.log.maxfilesize} + log4j.appender.org.ekstep.analytics.MaxBackupIndex=${log4jspark.log.maxbackupindex} + log4j.appender.org.ekstep.analytics.layout=org.apache.log4j.PatternLayout + log4j.appender.org.ekstep.analytics.layout.ConversionPattern=%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n diff --git a/pipelines/deploy/spark-cluster-deploy/Jenkinsfile b/pipelines/deploy/spark-cluster-deploy/Jenkinsfile index 9749d35b36..926c773ff0 100644 --- a/pipelines/deploy/spark-cluster-deploy/Jenkinsfile +++ b/pipelines/deploy/spark-cluster-deploy/Jenkinsfile @@ -26,7 +26,7 @@ node() { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/spark-cluster-job-submit.yml" - ansibleExtraArgs = "--tags ${params.job_type} --extra-vars \"job_id=${params.job_id} mode=${params.mode} partitions=${params.partitions} parallelisation=${params.parallelisation} start_date=${params.start_date} end_date=${params.end_date} batch_id=${params.batch_identifier} sparkMaster=${params.sparkMaster} pause_min=${params.pause_min} selected_partitions=${params.selected_partitions}\" --vault-password-file /var/lib/jenkins/secrets/vault-pass -vvvv " + ansibleExtraArgs = "--tags ${params.job_type} --extra-vars \"job_id=${params.job_id} mode=${params.mode} partitions=${params.partitions} parallelisation=${params.parallelisation} start_date=${params.start_date} end_date=${params.end_date} batch_id=${params.batch_identifier} sparkMaster=${params.sparkMaster} pause_min=${params.pause_min} vcn_name=${params.vcn_name} bds_cluster_name=${params.bds_cluster_name} selected_partitions=${params.selected_partitions}\" --vault-password-file /var/lib/jenkins/secrets/vault-pass -vvvv " values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds new file mode 100644 index 0000000000..ae7e33e271 --- /dev/null +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -0,0 +1,57 @@ + +@Library('deploy-conf') _ +node('build-slave') { + try { + String ANSI_GREEN = "\u001B[32m" + String ANSI_NORMAL = "\u001B[0m" + String ANSI_BOLD = "\u001B[1m" + String ANSI_RED = "\u001B[31m" + String ANSI_YELLOW = "\u001B[33m" + + ansiColor('xterm') { + stage('Checkout') { + checkout scm + } + + stage('copy cluster creation script') { + values = [:] + envDir = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-3].trim() + module = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-2].trim() + jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() + currentWs = sh(returnStdout: true, script: 'pwd').trim() + ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} ambari_user=${params.ambari_user} cluster_password=${params.cluster_password} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + values.put('currentWs', currentWs) + values.put('env', envDir) + values.put('module', module) + values.put('jobName', jobName) + values.put('ansiblePlaybook', ansiblePlaybook) + values.put('ansibleExtraArgs', ansibleExtraArgs) + println values + ansible_playbook_run(values) + } + stage('create and provision spark OCI BDS') { + oci_namespace=params.oci_namespace + //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { + sh ''' + currentws=$(pwd) + ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" + cd /tmp + ./create-cluster.sh + export inventory_dir=/var/lib/jenkins/workspace/Provision/dev/DataPipeline/__SparkBDSCluster3/ansible/inventory/env + + export ANSIBLE_HOST_KEY_CHECKING=False + ansible-playbook -i $currentws/ansible/inventory/env $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass + ''' + //} + + } + + } + } + catch (err) { + currentBuild.result = "FAILURE" + throw err + } + +} diff --git a/pipelines/provision/spark/Jenkinsfile.bds.test b/pipelines/provision/spark/Jenkinsfile.bds.test new file mode 100644 index 0000000000..bd6de3ad34 --- /dev/null +++ b/pipelines/provision/spark/Jenkinsfile.bds.test @@ -0,0 +1,60 @@ + +@Library('deploy-conf') _ +node('build-slave') { + try { + String ANSI_GREEN = "\u001B[32m" + String ANSI_NORMAL = "\u001B[0m" + String ANSI_BOLD = "\u001B[1m" + String ANSI_RED = "\u001B[31m" + String ANSI_YELLOW = "\u001B[33m" + + ansiColor('xterm') { + stage('Checkout') { + checkout scm + } + + stage('copy cluster creation script') { + values = [:] + envDir = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-3].trim() + module = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-2].trim() + jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() + currentWs = sh(returnStdout: true, script: 'pwd').trim() + ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} ambari_user=${params.ambari_user} cluster_password=${params.cluster_password} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.type}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + values.put('currentWs', currentWs) + values.put('env', envDir) + values.put('module', module) + values.put('jobName', jobName) + values.put('ansiblePlaybook', ansiblePlaybook) + values.put('ansibleExtraArgs', ansibleExtraArgs) + println values + ansible_playbook_run(values) + } + stage('create and provision spark OCI BDS') { + oci_namespace=params.oci_namespace + bds-livy-node-ip=params.bds-livy-node-ip + sh ''' + currentws=$(pwd) + ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" + cd /tmp + #./create_cluster_bds.sh + + export inventory_dir=/var/lib/jenkins/workspace/Provision/dev/DataPipeline/__SparkBDSCluster3/ansible/inventory/env + echo "" >> $inventory_dir/hosts + echo "[bds-livy-node]" >> $inventory_dir/hosts + echo "$bds-livy-node-ip ansible_ssh_user=opc" >> $inventory_dir/hosts + echo "" >> $inventory_dir/hosts + + ANSIBLE_HOST_KEY_CHECKING=False + ansible-playbook -i $currentws/ansible/inventory/env/hosts $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass + ''' + } + + } + } + catch (err) { + currentBuild.result = "FAILURE" + throw err + } + +} diff --git a/pipelines/provision/spark/Jenkinsfile.delete b/pipelines/provision/spark/Jenkinsfile.delete index 93aed171cb..5675a7e1df 100644 --- a/pipelines/provision/spark/Jenkinsfile.delete +++ b/pipelines/provision/spark/Jenkinsfile.delete @@ -1,3 +1,4 @@ + @Library('deploy-conf') _ node('build-slave') { try { @@ -18,8 +19,8 @@ node('build-slave') { module = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-2].trim() jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() - ansiblePlaybook = "${currentWs}/ansible/azure-hdinsight-spark.provision.yml" - ansibleExtraArgs = "--extra-vars \"azure_resource_group=${params.resource_group} subscription_id=${env.subscription_id} tenant_id=${env.tenant_id} cluster_state=${params.type}\" --tags copy-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} ambari_user=${params.ambari_user} cluster_password=${params.cluster_password} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) @@ -29,15 +30,16 @@ node('build-slave') { println values ansible_playbook_run(values) } - stage('delete spark HDinsight cluster') { + stage('create and provision spark OCI BDS') { storage_container=params.storage_container - withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { + //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { sh ''' + currentws=$(pwd) + ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" cd /tmp - ./delete-cluster.sh $spuser $sppass + ./delete-cluster.sh ''' - } - + //} } From 84b1b8572c863b1c3915e8527a6f2fa4e3fd13c6 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 7 Apr 2023 15:41:56 +1000 Subject: [PATCH 070/161] v2 base changes Signed-off-by: Deepak Devadathan --- .../defaults/main.yml | 282 ++++++++ .../collection-summary-ingestion-spec.json | 251 +++++++ .../files/sourcing-ingestion-spec.json | 146 ++++ .../tasks/main.yml | 499 +++++++++++++ .../templates/cluster-config.json.j2 | 86 +++ .../templates/common.conf.j2 | 317 +++++++++ .../templates/exhaust_sanity_check.py.j2 | 58 ++ .../templates/log4j2.xml.j2 | 54 ++ .../templates/model-config.j2 | 151 ++++ .../templates/model-config.json.j2 | 670 ++++++++++++++++++ .../templates/model-dock-config.j2 | 34 + .../templates/replay-job.j2 | 63 ++ .../templates/replay-updater.j2 | 24 + .../templates/replay-utils.j2 | 43 ++ .../templates/run-dock-job.j2 | 41 ++ .../templates/run-job.j2 | 83 +++ .../templates/start-jobmanager.j2 | 46 ++ .../templates/submit-all-jobs.rb.j2 | 58 ++ .../templates/submit-job.j2 | 22 + .../templates/submit-script.j2 | 216 ++++++ .../templates/update-job-requests.py.j2 | 119 ++++ .../data-products-deploy/defaults/main.yml | 8 +- .../roles/data-products-deploy/tasks/main.yml | 14 +- 23 files changed, 3274 insertions(+), 11 deletions(-) create mode 100755 ansible/roles/data-products-deploy-oci-bds/defaults/main.yml create mode 100644 ansible/roles/data-products-deploy-oci-bds/files/collection-summary-ingestion-spec.json create mode 100644 ansible/roles/data-products-deploy-oci-bds/files/sourcing-ingestion-spec.json create mode 100644 ansible/roles/data-products-deploy-oci-bds/tasks/main.yml create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/cluster-config.json.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/common.conf.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/exhaust_sanity_check.py.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/log4j2.xml.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/model-config.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/model-config.json.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/model-dock-config.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/replay-job.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/replay-updater.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/replay-utils.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/run-dock-job.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/run-job.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/start-jobmanager.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/submit-all-jobs.rb.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/submit-job.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/submit-script.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/update-job-requests.py.j2 diff --git a/ansible/roles/data-products-deploy-oci-bds/defaults/main.yml b/ansible/roles/data-products-deploy-oci-bds/defaults/main.yml new file mode 100755 index 0000000000..7eb22c7a18 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/defaults/main.yml @@ -0,0 +1,282 @@ +analytics_user: analytics +analytics_group: analytics +spark_output_temp_dir: /mount/data/analytics/tmp/ + +bucket: "telemetry-data-store" +secor_bucket: "telemetry-data-store" +dp_object_store_type: "oci" +dp_raw_telemetry_backup_location: "unique/raw/" +dp_storage_key_config: "azure_storage_key" +dp_storage_secret_config: "azure_storage_secret" +dp_reports_storage_key_config: "reports_azure_storage_key" +dp_reports_storage_secret_config: "reports_azure_storage_secret" + +kafka_broker_host: "{{groups['processing-cluster-kafka'][0]}}:9092" +ingestion_kafka_broker_host: "{{groups['ingestion-cluster-kafka'][0]}}:9092" +brokerlist: "{{groups['processing-cluster-kafka']|join(':9092,')}}:9092" +zookeeper: "{{groups['processing-cluster-zookeepers']|join(':2181,')}}:2181" +dp_username: dp-monitor +analytics_job_queue_topic: "{{ env }}.analytics.job_queue" +topic: "{{ env }}.telemetry.derived" +analytics_metrics_topic: "{{ env }}.analytics_metrics" +sink_topic: "{{ env }}.telemetry.sink" +assess_topic: "{{ env }}.telemetry.assess" +metrics_topic: "{{ env }}.telemetry.metrics" +job_manager_tmp_dir: "transient-data" +channel: dev-test +druid_broker_host: "{{groups['raw-broker'][0]}}" +druid_router_host: "{{groups['raw-router'][0]}}" +druid_rollup_broker_host: "{{groups['raw-broker'][0]}}" +hierarchySearchServiceUrl: "{{ proto }}://{{ domain_name }}/action/content" +hierarchySearchServicEndpoint: /v3/hierarchy/ + +user_table_keyspace: "sunbird" +course_keyspace: "sunbird_courses" +hierarchy_store_keyspace: "{{ env }}_hierarchy_store" +job_request_table: "{{ env }}_job_request" +dataset_metadata_table: "{{ env }}_dataset_metadata" +report_user_table_keyspace: "sunbird_courses" +report_user_enrolment_table: "report_user_enrolments" + +analytics_job_list: '"wfs", "content-rating-updater", "monitor-job-summ"' +analytics_jobs_count: 3 + +cassandra_keyspace_prefix: '{{ env }}_' +report_cassandra_cluster_host: "{{ report_cassandra_host | default(core_cassandra_host) }}" +cassandra_hierarchy_store_keyspace: "{{ env_name}}_hierarchy_store" +spark_version: 3.1.3 + +heap_memory: "-Xmx5120m" + +spark: + home: "{{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7" + public_dns: 54.255.154.146 + master: + url: spark://172.31.11.117:7077 + host: 172.31.11.117 + worker: + instances: 1 + cores: 2 + memory: 4g + driver: + memory: 3g + executor: + memory: 4g + driver_memory: 7g + memory_fraction: 0.3 + storage_fraction: 0.5 + executor_memory: 2g + heap_conf_str: '"-XX:+UseG1GC -XX:MaxGCPauseMillis=100 -Xms250m {{ heap_memory }} -XX:+UseStringDeduplication"' + +submit_jobs: + submit-all-jobs: + hour: 02 + minute: 35 + +start_jobmanager: + job-manager: + hour: 02 + minute: 30 +have_weekly_jobs: false + +course_batch_status_updater_job_schedule: 60 + +run_wfs_job: + wfs: + hour: 00 + minute: 30 +run_monitor_job: + monitor-job-summ: + hour: 03 + minute: 00 + +run_admin_user_reports_job: + admin-user-reports-3AMIST: + hour: 21 + minute: 30 + admin-user-reports-2PMIST: + hour: 8 + minute: 30 + +run_admin_geo_reports_job: + admin-geo-reports-4AMIST: + hour: 22 + minute: 30 + admin-geo-reports-3PMIST: + hour: 9 + minute: 30 + +run_assessment_aggregator_report_job: + assessment-aggregator-report: + hour: 18 + minute: 35 + +update_user_redis_cache: + populate-user-cache: + hour: 3 + minute: 00 + +index_content_model_druid: + index_content: + hour: 1 + minute: 00 + +run_etb_metrics_weekly_job: + etb-metrics-weekly: + hour: 23 + minute: 30 + weekday: 1 + +# These are the dummy times till sept30 for exhaust reports +#To-Do: Update time after 3.2.7 deployment + +run_progress_exhaust: + progress-exhaust: + hour: 08 + minute: 00 + +run_response_exhaust: + response-exhaust: + hour: 09 + minute: 00 + +run_userinfo_exhaust: + userinfo-exhaust: + hour: 10 + minute: 00 + +run_collection_summary: + collection-summary: + hour: 09 + minute: 30 + +run_sourcing_summary: + sourcing-summary: + hour: 10 + minute: 30 + +run_cassandra_migration: + cassandra-migration: + hour: 19 + minute: 15 + +run_uci_private_exhaust_job: + uci-private-exhaust: + hour: 03 + minute: 00 + +run_uci_response_exhaust_job: + uci-response-exhaust: + hour: 02 + minute: 00 + + +service: + search: + url: http://{{private_ingressgateway_ip}}/search + path: /v3/search + +es_search_index: "compositesearch" +analytics: + home: /mount/data/analytics + soft_path: /mount/data/analytics + paths: ['/mount/data/analytics', '/mount/data/analytics/logs', '/mount/data/analytics/logs/services', '/mount/data/analytics/logs/data-products', '/mount/data/analytics/tmp', '/mount/data/analytics/scripts', '/mount/data/analytics/models' ] + scripts: ['model-config', 'replay-job', 'replay-updater', 'replay-utils', 'run-job', 'submit-job', 'start-jobmanager', 'submit-script'] + dockScripts: ['model-dock-config','run-dock-job'] + +# artifact versions +analytics_core_artifact_ver: "2.0" +analytics_ed_dataporducts_artifact_ver: "1.0" +scruid_artifact_ver: "2.5.0" + +producer_env: "dev.sunbird" +analytics_job_manager_artifact: "job-manager-{{ analytics_core_artifact_ver }}.jar" +analytics_core_artifact: "analytics-framework-{{ analytics_core_artifact_ver }}.jar" +scruid_artifact: "scruid_2.12-{{ scruid_artifact_ver }}.jar" +analytics_batch_module_artifact: "batch-models-{{ analytics_core_artifact_ver }}.jar" +analytics_ed_dataporducts_artifact: "data-products-{{ analytics_ed_dataporducts_artifact_ver }}-distribution.tar.gz" +model_version: "2.0" + +submit_jobs_auth_token: "{{ sunbird_api_auth_token }}" +report_list_jobs_url: "{{ druid_report_url }}" + +reports_container: "reports" + +# Cluster vars +spark_cluster_user_password: "" +spark_cluster_user_name: "" +admin_name: "{{ spark_cluster_user_name }}" +admin_password: "{{ spark_cluster_user_password }}" +spark_cluster_name: "{{env}}-spark-cluster" + +spark_cluster: + executor_core: 1 + executor_memory: 2G + num_executors: 1 + +analytics_cluster: + home: "/tmp" + +analytics_ed_dataporducts_jar_artifact: "data-products-{{ analytics_ed_dataporducts_artifact_ver }}.jar" + +spark_enable_dynamic_allocation: false +# Spark Cassandra config-vars +spark_cassandra_connection_timeout_millis: 30000 +spark_cassandra_query_timeout_millis: 180000 +spark_cassandra_query_max_rows_fetch_count: 1000 +spark_sql_shuffle_partitions: 200 + +druid_report_postgres_db_name: druid +druid_report_postgres_db_username: druid + + +#Override this variable in production and point to druid rollup ingestion cluster +# Example: "http://$rollup_cluster_ip:8090" +druid_rollup_cluster_ingestion_task_url: "http://{{groups['raw-overlord'][0]}}:8081" + +# On demand Exhaust throttling vars +exhaust_batches_limit_per_channel: 30 +exhaust_file_size_limit_bytes_per_channel: 1073741824 + +exhaust_parallel_batch_load_limit: 10 +exhaust_user_parallelism: 200 + +data_exhaust_batch_limit_per_request: 20 + +# Start Of UCI Related Variables +uci_postgres_host: "dev-pg11.postgres.database.azure.com" +uci_encryption_key_base64: "" +uci_bot_postgres_database: uci-botdb +uci_fusionauth_postgres_database: uci-fusionauth +uci_postgres_user: "{{postgres.db_username}}" +uci_postgres_password: "{{postgres.db_password}}" + +uci_postgres: + conversation_db_name: "{{ uci_bot_postgres_database }}" + conversation_db_host: "{{ uci_postgres_host }}" + conversation_db_port: "5432" + conversation_db_user: "{{ uci_postgres_user }}" + conversation_db_psss: "{{ uci_postgres_password }}" + conversation_table_name: "bot" + fushionauth_db_name: "{{ uci_fusionauth_postgres_database }}" + fushionauth_db_host: "{{ uci_postgres_host }}" + fushionauth_db_port: "5432" + fushionauth_db_user: "{{ uci_postgres_user }}" + fushionauth_db_psss: "{{ uci_postgres_password }}" + user_table_name: "users" + user_registration_table_name: "user_registrations" + user_identities_table_name: "identities" + +uci_encryption_secret_key: "{{uci_encryption_key_base64}}" +uci_pdata_id: "{{uci_env}}.uci.{{sunbird_instance}}" + +# End Of UCI Related Variables + +# Exhaust sanity check vars +cassandra_migrator_job_name: "Cassandra Migrator" + +assessment_metric_primary_category: "{{ exhaust_job_assessment_primary_category }}" + +# Default s3 variables +sunbird_private_s3_storage_key: "" +sunbird_private_s3_storage_secret: "" \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/files/collection-summary-ingestion-spec.json b/ansible/roles/data-products-deploy-oci-bds/files/collection-summary-ingestion-spec.json new file mode 100644 index 0000000000..69e13196e2 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/files/collection-summary-ingestion-spec.json @@ -0,0 +1,251 @@ +{ + "type": "index", + "spec": { + "dataSchema": { + "dataSource": "collection-summary-snapshot", + "parser": { + "type": "string", + "parseSpec": { + "format": "json", + "flattenSpec": { + "useFieldDiscovery": false, + "fields": [ + { + "type": "root", + "name": "content_org", + "expr": "contentorg" + }, + { + "type": "root", + "name": "user_org", + "expr": "orgname" + }, + { + "type": "root", + "name": "batch_start_date", + "expr": "startdate" + }, + { + "type": "root", + "name": "batch_end_date", + "expr": "enddate" + }, + { + "type": "root", + "name": "has_certificate", + "expr": "hascertified" + }, + { + "type": "root", + "name": "collection_id", + "expr": "courseid" + }, + { + "type": "root", + "name": "batch_id", + "expr": "batchid" + }, + { + "type": "root", + "name": "collection_name", + "expr": "collectionname" + }, + { + "type": "root", + "name": "batch_name", + "expr": "batchname" + }, + { + "type": "root", + "name": "total_enrolment", + "expr": "enrolleduserscount" + }, + { + "type": "root", + "name": "total_completion", + "expr": "completionuserscount" + }, + { + "type": "root", + "name": "total_certificates_issued", + "expr": "certificateissuedcount" + }, + { + "type": "root", + "name": "content_status", + "expr": "contentstatus" + }, + { + "type": "root", + "name": "user_state", + "expr": "state" + }, + { + "type": "root", + "name": "user_district", + "expr": "district" + }, + { + "type": "root", + "name": "content_channel", + "expr": "channel" + }, + { + "type": "root", + "name": "keywords", + "expr": "keywords" + }, + { + "type": "root", + "name": "timestamp", + "expr": "timestamp" + }, + { + "type": "root", + "name": "medium", + "expr": "medium" + }, + { + "type": "root", + "name": "subject", + "expr": "subject" + }, + { + "type": "root", + "name": "created_for", + "expr": "createdfor" + }, + { + "type": "root", + "name": "user_type", + "expr": "usertype" + }, + { + "type": "root", + "name": "user_subtype", + "expr": "usersubtype" + } + ] + }, + "dimensionsSpec": { + "dimensions": [ + { + "name": "content_org" + }, + { + "name": "user_org" + }, + { + "type": "string", + "name": "batch_id" + }, + { + "type": "string", + "name": "batch_start_date" + }, + { + "type": "string", + "name": "batch_end_date" + }, + { + "type": "string", + "name": "collection_id" + }, + { + "type": "string", + "name": "collection_name" + }, + { + "type": "string", + "name": "batch_name" + }, + { + "type": "long", + "name": "total_enrolment" + }, + { + "type": "long", + "name": "total_completion" + }, + { + "type": "long", + "name": "total_certificates_issued" + }, + { + "type": "string", + "name": "content_status" + }, + { + "type": "string", + "name": "user_state" + }, + { + "type": "string", + "name": "user_district" + }, + { + "name": "keywords" + }, + { + "name": "has_certificate" + }, + { + "type": "string", + "name": "content_channel" + }, + { + "name": "medium" + }, + { + "name": "subject" + }, + { + "name": "created_for" + }, + { + "type": "string", + "name": "user_type" + }, + { + "type": "string", + "name": "user_subtype" + } + ], + "dimensionsExclusions": [] + }, + "timestampSpec": { + "column": "timestamp", + "format": "auto" + } + } + }, + "metricsSpec": [], + "granularitySpec": { + "type": "uniform", + "segmentGranularity": "day", + "queryGranularity": "none", + "rollup": true + } + }, + "ioConfig": { + "type": "index", + "firehose": { + "type": "static-azure-blobstore", + "blobs": [ + { + "container": "reports", + "path": "/collection-summary-reports-v2/collection-summary-report-latest.json" + } + ], + "fetchTimeout": 300000 + } + }, + "tuningConfig": { + "type": "index", + "targetPartitionSize": 5000000, + "maxRowsInMemory": 25000, + "forceExtendableShardSpecs": false, + "logParseExceptions": true + } + } +} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/files/sourcing-ingestion-spec.json b/ansible/roles/data-products-deploy-oci-bds/files/sourcing-ingestion-spec.json new file mode 100644 index 0000000000..69e773d457 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/files/sourcing-ingestion-spec.json @@ -0,0 +1,146 @@ +{ + "type": "index", + "spec": { + "dataSchema": { + "dataSource": "sourcing-summary-snapshot", + "parser": { + "type": "string", + "parseSpec": { + "format": "json", + "flattenSpec": { + "useFieldDiscovery": false, + "fields": [ + { + "type": "root", + "name": "program_id", + "expr": "program_id" + }, + { + "type": "root", + "name": "status", + "expr": "status" + }, + { + "type": "root", + "name": "rootorg_id", + "expr": "rootorg_id" + }, + { + "type": "root", + "name": "user_id", + "expr": "user_id" + }, + { + "type": "root", + "name": "osid", + "expr": "osid" + }, + { + "type": "root", + "name": "user_type", + "expr": "user_type" + }, + { + "type": "root", + "name": "contributor_id", + "expr": "contributor_id" + }, + { + "type": "root", + "name": "total_contributed_content", + "expr": "total_contributed_content" + }, + { + "type": "root", + "name": "primary_category", + "expr": "primary_category" + }, + { + "type": "root", + "name": "created_by", + "expr": "created_by" + } + ] + }, + "dimensionsSpec": { + "dimensions": [ + { + "type": "string", + "name": "program_id" + }, + { + "type": "string", + "name": "status" + }, + { + "type": "string", + "name": "rootorg_id" + }, + { + "type": "string", + "name": "user_id" + }, + { + "type": "string", + "name": "osid" + }, + { + "type": "string", + "name": "user_type" + }, + { + "type": "string", + "name": "contributor_id" + }, + { + "type": "string", + "name": "primary_category" + }, + { + "type": "string", + "name": "created_by" + } + ], + "dimensionsExclusions": [] + }, + "timestampSpec": { + "column": "timestamp", + "format": "auto" + } + } + }, + "metricsSpec": [ + { + "name": "total_count", + "type": "count" + } + ], + "granularitySpec": { + "type": "uniform", + "segmentGranularity": "day", + "queryGranularity": "none", + "rollup": true + } + }, + "ioConfig": { + "type": "index", + "firehose": { + "type": "static-azure-blobstore", + "blobs": [ + { + "container": "reports", + "path": "/sourcing/SourcingSummaryReport.json" + } + ], + "fetchTimeout": 300000 + } + }, + "tuningConfig": { + "type": "index", + "targetPartitionSize": 5000000, + "maxRowsInMemory": 25000, + "forceExtendableShardSpecs": false, + "logParseExceptions": true + } + } +} diff --git a/ansible/roles/data-products-deploy-oci-bds/tasks/main.yml b/ansible/roles/data-products-deploy-oci-bds/tasks/main.yml new file mode 100644 index 0000000000..733c416138 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/tasks/main.yml @@ -0,0 +1,499 @@ +## Data products deployment ## +- name: Ensure azure blob storage container exists + command: az storage container create --name {{ bucket }} + when: dp_object_store_type == "azure" + tags: + - always + +- name: Ensure oci oss bucket exists + command: "oci os bucket get --name {{ bucket }}" + register: check_bucket + when: dp_object_store_type == "oci" + tags: + - always + +- name: Create oci oss bucket + command: "oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" + when: dp_object_store_type == "oci" and check_bucket.rc !=0 + tags: + - always + +- name: Copy Core Data Products + copy: src={{ analytics_batch_module_artifact }} dest={{ analytics.home }}/models-{{ model_version }} + tags: + - dataproducts + +- name: Copy Core Data Products to azure blob + command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} + async: 3600 + poll: 10 + when: dp_object_store_type == "azure" + tags: + - dataproducts-spark-cluster + +- name: Copy Core Data Products to oci oss + command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" + tags: + - dataproducts-spark-cluster + +- name: Unarchive Ed Data Products + become: yes + unarchive: src={{ playbook_dir}}/{{ analytics_ed_dataporducts_artifact }} dest={{ analytics.home }}/models-{{ model_version }} copy=yes group={{ analytics_group }} owner={{ analytics_user }} + tags: + - ed-dataproducts + +- name: Copy Ed Data Products to azure blob + command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar -f {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar + async: 3600 + poll: 10 + when: dp_object_store_type == "azure" + tags: + - ed-dataproducts-spark-cluster + +- name: Copy Ed Data Products to oci oss + command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" + tags: + - ed-dataproducts-spark-cluster + +- name: Copy Framework Library + copy: src={{ analytics_core_artifact }} dest={{ analytics.home }}/models-{{ model_version }} + tags: + - framework + +- name: Copy Framework Library to azure blob + command: az storage blob upload --overwrite --debug -c {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} + async: 3600 + poll: 10 + when: dp_object_store_type == "azure" + tags: + - framework-spark-cluster + +- name: Copy Framework Library to oci oss + command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" + tags: + - framework-spark-cluster + +- name: Copy Scruid Library + copy: src={{ scruid_artifact }} dest={{ analytics.home }}/models-{{ model_version }} + tags: + - framework + +- name: Copy Scruid Library to azure blob + command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} + async: 3600 + poll: 10 + when: dp_object_store_type == "azure" + tags: + - framework-spark-cluster + +- name: Copy Scruid Library to oci oss + command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" + tags: + - framework-spark-cluster + +- name: Copy Job Manager + copy: src={{ analytics_job_manager_artifact }} dest={{ analytics.home }}/models-{{ model_version }} + tags: + - dataproducts + +- name: Copy configuration file + template: src=common.conf.j2 dest={{ analytics.home }}/models-{{ model_version }}/{{ env }}.conf mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - dataproducts + - ed-dataproducts + - framework + when: dockdataproducts is undefined + +- name: Copy configuration file + template: src=common.conf.j2 dest={{ analytics.home }}/models-{{ model_version }}/dock-{{ env }}.conf mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - dataproducts + - ed-dataproducts + - framework + when: dockdataproducts is defined + +- name: Copy configuration file as application.conf for cluster + template: src=common.conf.j2 dest={{ analytics.home }}/models-{{ model_version }}/application.conf mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - framework-spark-cluster + +- name: Update spark temp dir value for cluster + lineinfile: + path: '{{ analytics.home }}/models-{{ model_version }}/application.conf' + regexp: '^spark_output_temp_dir="/mount/data/analytics/tmp/"' + line: 'spark_output_temp_dir="/var/log/sparkapp/tmp/"' + tags: + - framework-spark-cluster + +- name: Update logger kafka config for cluster + lineinfile: + path: '{{ analytics.home }}/models-{{ model_version }}/application.conf' + regexp: '^log.appender.kafka.enable="false"' + line: 'log.appender.kafka.enable="true"' + tags: + - framework-spark-cluster + +- name: Copy configuration file to azure blob + command: az storage blob upload --overwrite -c {{ bucket }} -f {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf + async: 3600 + poll: 10 + when: dp_object_store_type == "azure" + tags: + - framework-spark-cluster + +- name: Copy configuration file to oci oss + command: oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" + tags: + - framework-spark-cluster + +- name: Copy log4j2 xml file + template: src=log4j2.xml.j2 dest={{ analytics.home }}/models-{{ model_version }}/log4j2.xml mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: [ dataproducts, framework, ed-dataproducts ] + +- name: Copy Scripts + template: src={{ item }}.j2 dest={{ analytics.home }}/scripts/{{ item }}.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} + with_items: "{{ analytics.scripts }}" + tags: [ dataproducts, framework, ed-dataproducts ] + when: dockdataproducts is undefined + +- name: Copy python sanity check script file + template: src=exhaust_sanity_check.py.j2 dest={{ analytics.home }}/scripts/exhaust_sanity_check.py + tags: [ dataproducts, framework, ed-dataproducts ] + when: dockdataproducts is undefined + +- name: Copy Dock Scripts + template: src={{ item }}.j2 dest={{ analytics.home }}/scripts/{{ item }}.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} + with_items: "{{ analytics.dockScripts }}" + tags: [ dataproducts, framework, ed-dataproducts ] + when: dockdataproducts is defined + +- name: Update model config + template: src=model-config.j2 dest={{ analytics.home }}/scripts/model-config.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - dataproducts + - update-config + - ed-dataproducts + when: dockdataproducts is undefined + +- name: Update model dock config + template: src=model-dock-config.j2 dest={{ analytics.home }}/scripts/model-dock-config.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - dataproducts + - update-config + - ed-dataproducts + when: dockdataproducts is defined + +- name: Copy submit-all-jobs ruby file + template: src=submit-all-jobs.rb.j2 dest={{ analytics.home }}/scripts/submit-all-jobs.rb mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - dataproducts + - update-config + - ed-dataproducts + +- name: Copy model-config.json file + template: src=model-config.json.j2 dest={{ analytics.home }}/scripts/model-config.json mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - dataproducts + - update-config + - ed-dataproducts + +- name: Clean cron jobs + command: crontab -r + ignore_errors: yes + tags: + - default-jobs + - spark-jobs + - spark1-jobs + - clean-cronjobs + +- name: Create daily cron jobs for wfs + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh wfs" + with_dict: "{{ run_wfs_job }}" + tags: + - spark1-jobs + +- name: Create daily cron jobs for monitor job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh monitor-job-summ" + with_dict: "{{ run_monitor_job }}" + tags: + - spark1-jobs + +- name: Create daily cron jobs using submit-all-jobs + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job='/bin/bash -lc "ruby {{ analytics.home }}/scripts/submit-all-jobs.rb"' + with_dict: "{{ submit_jobs }}" + tags: + - default-jobs + - spark-jobs + - cronjobs + +- name: Create start-jobmanager cron jobs + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/start-jobmanager.sh" + with_dict: "{{ start_jobmanager }}" + tags: + - default-jobs + - spark-jobs + - cronjobs + +- name: Create course-batch-status-updater cron job + cron: name="{{env}}-course-batch-status-updater" minute=*/{{ course_batch_status_updater_job_schedule }} job="{{ analytics.home }}/scripts/run-job.sh course-batch-status-updater" + tags: + - cronjobs + - default-jobs + - spark1-jobs + +- name: Create admin-user-reports cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh admin-user-reports" + with_dict: "{{ run_admin_user_reports_job }}" + tags: + - cronjobs + - default-jobs + - spark-jobs +- name: Create admin-geo-reports cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh admin-geo-reports" + with_dict: "{{ run_admin_geo_reports_job }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Create assessment-aggregator reports cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="/bin/bash {{ analytics.home }}/adhoc-scripts/run_exporter.sh > /home/analytics/output.log" + with_dict: "{{ run_assessment_aggregator_report_job }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Create etb metrics cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} weekday={{ item.value.weekday }} job="{{ analytics.home }}/scripts/run-job.sh etb-metrics" + with_dict: "{{ run_etb_metrics_weekly_job }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Create progress-exhaust cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh progress-exhaust" + with_dict: "{{ run_progress_exhaust }}" + tags: + - cronjobs + - default-jobs + - spark1-jobs + +- name: Create response-exhaust cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh response-exhaust" + with_dict: "{{ run_response_exhaust }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Create cassandra-migration cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh cassandra-migration" + with_dict: "{{ run_cassandra_migration }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + + +- name: Create userinfo-exhaust cron job + cron: name="{{ env }}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh userinfo-exhaust" + with_dict: "{{ run_userinfo_exhaust }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Create collection-summary cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh collection-summary-report" + with_dict: "{{ run_collection_summary }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Copy collection-summary ingestion spec + copy: src="collection-summary-ingestion-spec.json" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - ed-dataproducts + +- name: Create sourcing-summary cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-dock-job.sh sourcing-summary-report" + with_dict: "{{ run_sourcing_summary }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Create uci-private-exhaust cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh uci-private-exhaust" + with_dict: "{{ run_uci_private_exhaust_job }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Create uci-response-exhaust cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh uci-response-exhaust" + with_dict: "{{ run_uci_response_exhaust_job }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Copy sourcing-summary ingestion spec + copy: src="sourcing-ingestion-spec.json" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - ed-dataproducts + +- name: Update start jobmanager + template: src=start-jobmanager.j2 dest={{ analytics.home }}/scripts/start-jobmanager.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - update-jobmanager-config + - dataproducts + +# Cluster job sumbit tasks +- name: Copy cluster-config.json file + template: src=cluster-config.json.j2 dest={{ analytics_cluster.home }}/cluster-config.json + delegate_to: localhost + tags: + - replay-job + - run-job + - config-update + +- name: Copy submit-script.sh file + template: src=submit-script.j2 dest={{ analytics_cluster.home }}/submit-script.sh mode=755 + delegate_to: localhost + tags: + - replay-job + - run-job + - config-update + +- name: Copy model-config.sh file + template: src=model-config.j2 dest={{ analytics_cluster.home }}/model-config.sh + delegate_to: localhost + tags: + - replay-job + - run-job + - config-update + +- name: Replay Job + shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ job_id }} --mode {{ mode }} --partitions {{ partitions }} --parallelisation {{ parallelisation }} --startDate {{ start_date }} --endDate {{ end_date }} --sparkMaster {{ sparkMaster }} --selectedPartitions {{ selected_partitions }} &" + async: "{{ (pause_min * 60) }}" + poll: 0 + tags: + - replay-job + +- name: Run Job + shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ job_id }} --mode {{ mode }} --partitions {{ partitions }} --parallelisation {{ parallelisation }} --sparkMaster {{ sparkMaster }} --selectedPartitions {{ selected_partitions }} --batch_id {{ batch_id }} &" + async: "{{ (pause_min * 60) }}" + poll: 0 + tags: + - run-job + +- name: Submit jobs + shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ item }} --mode default --sparkMaster yarn &" + with_items: "{{ jobs.split(',')|list }}" + tags: + - job-submit + +# Cluster exhaust parallel jobs sumbit tasks + +- name: Install required python packages + pip: + name: + - psycopg2-binary + - pandas + - IPython + tags: + - parallel-jobs-submit + +- name: Copy python script file + template: src=update-job-requests.py.j2 dest={{ analytics_cluster.home }}/update-job-requests.py + delegate_to: localhost + tags: + - parallel-jobs-submit + +- name: Execute python script to populate batch numbers + shell: | + if echo "{{jobs}}" | grep 'druid' + then + python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} druid {{env}}_report_config + elif echo "{{jobs}}" | grep 'exhaust' + then + python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} exhaust {{env}}_job_request + fi + tags: + - parallel-jobs-submit + register: jobsCountStr + + +- debug: + var: jobsCountStr + tags: + - parallel-jobs-submit + +- name: Get stdout with parallelisation value from python script to tmp file + shell: echo "{{ jobsCountStr.stdout }}" > /tmp/test.txt + tags: + - parallel-jobs-submit + +- name: Extract parallelisation value from tmp file + shell: "cat /tmp/test.txt | tr '\n' ' ' | awk -F': ' '{print $NF}'" + register: jobsCountOut + tags: + - parallel-jobs-submit + +- debug: + var: jobsCountOut + tags: + - parallel-jobs-submit + +# set jobs count variable from python script output +- set_fact: + jobs_count: "{{ jobsCountOut.stdout }}" + tags: + - parallel-jobs-submit + +- name: Submit parallel exhaust jobs + shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ jobs }} --mode parallel-jobs --parallelisation {{ jobs_count }} &" + poll: 30 + tags: + - parallel-jobs-submit + register: submitOutput + +- debug: + var: submitOutput + tags: + - parallel-jobs-submit + +# Execute Exhaust job sanity check script tasks + +- name: Install required python packages + pip: + name: + - requests + tags: + - run-sanity + +- name: Run sanity check python script + shell: python {{ analytics.home }}/scripts/exhaust_sanity_check.py + tags: + - run-sanity + register: SanityCheckStatus \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/cluster-config.json.j2 new file mode 100644 index 0000000000..12ebf0bde0 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/cluster-config.json.j2 @@ -0,0 +1,86 @@ + +{% if dp_object_store_type == "azure" %} +{ + "jars": [ + "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_core_artifact }}", + "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ scruid_artifact }}", + "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + ], + "file": "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + "files": [ + "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/application.conf" + ], + "className": "org.ekstep.analytics.job.JobExecutor", + "executorCores": {{ spark_cluster.executor_core }}, + "executorMemory": "{{ spark_cluster.executor_memory }}", + "numExecutors": {{ spark_cluster.num_executors }}, + "conf": { + "spark.sql.autoBroadcastJoinThreshold" : "-1", + "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", + "spark.scheduler.mode" : "FAIR", + "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", + "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", + "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + } +} +{% elif (dp_object_store_type == "s3") %} +{ + "jars": [ + "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", + "s3n://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", + "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + ], + "file": "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + "files": [ + "s3n://{{ bucket }}/models-{{ model_version }}/application.conf" + ], + "className": "org.ekstep.analytics.job.JobExecutor", + "executorCores": {{ spark_cluster.executor_core }}, + "executorMemory": "{{ spark_cluster.executor_memory }}", + "numExecutors": {{ spark_cluster.num_executors }}, + "conf": { + "spark.sql.autoBroadcastJoinThreshold" : "-1", + "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", + "spark.scheduler.mode" : "FAIR", + "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", + "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", + "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + } +} +{% elif (dp_object_store_type == "oci") %} +{ + "jars": [ + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_core_artifact }}", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ scruid_artifact }}", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + ], + "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + "files": [ + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf" + ], + "className": "org.ekstep.analytics.job.JobExecutor", + "executorCores": {{ spark_cluster.executor_core }}, + "executorMemory": "{{ spark_cluster.executor_memory }}", + "numExecutors": {{ spark_cluster.num_executors }}, + "conf": { + "spark.sql.autoBroadcastJoinThreshold" : "-1", + "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", + "spark.scheduler.mode" : "FAIR", + "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", + "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", + "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + } +} +{% endif %} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/common.conf.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/common.conf.j2 new file mode 100644 index 0000000000..e0ec7005df --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/common.conf.j2 @@ -0,0 +1,317 @@ +application.env="{{ env }}" +telemetry.version="2.1" +default.parallelization="10" +spark_output_temp_dir="/mount/data/analytics/tmp/" +lp.url="{{lp_url}}" +service.search.url="{{ service.search.url }}" +service.search.path="{{ service.search.path }}" +spark.cassandra.connection.host="{{groups['dp-cassandra'][0]}}" +cassandra.keyspace_prefix="{{ cassandra_keyspace_prefix }}" +cassandra.hierarchy_store_prefix="{{ cassandra_hierarchy_store_prefix }}" + + +storage.key.config="{{ dp_storage_key_config }}" +storage.secret.config="{{ dp_storage_secret_config }}" +reports.storage.key.config="{{ dp_reports_storage_key_config }}" +reports.storage.secret.config="{{ dp_reports_storage_secret_config }}" +{% if dp_object_store_type == "azure" %} +cloud_storage_type="azure" +{% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3" or dp_object_store_type == "oci") %} +{% if cloud_service_provider == "oci" %} +cloud_storage_type="oci" +{% else %} +cloud_storage_type="s3" +{% endif %} +cloud_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" +cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint_with_protocol }}" +storage.endpoint.config="{{ s3_storage_endpoint_with_protocol }}" +aws_storage_key="{{ s3_storage_key }}" +aws_storage_secret="{{ s3_storage_secret }}" +{% endif %} + +lp.contentmodel.versionkey="jd5ECm/o0BXwQCe8PfZY1NoUkB9HN41QjA80p22MKyRIcP5RW4qHw8sZztCzv87M" + +# Joblog Kafka appender config for cluster execution +log.appender.kafka.enable="false" +log.appender.kafka.broker_host="{{groups['processing-cluster-kafka'][0]}}:9092" +log.appender.kafka.topic="{{ env }}.druid.events.log" + +# Kafka connection configuration +kafka.consumer.brokerlist="{{groups['processing-cluster-kafka'][0]}}:9092" +kafka.consumer.topic="{{ env }}.analytics.job_queue" +no_of_jobs=42 + +# Spark Driver +spark.driver.memory=6g + +spark.memory_fraction={{ spark.memory_fraction }} +spark.storage_fraction={{ spark.storage_fraction }} +spark.driver_memory="{{ spark.driver_memory }}" + +#Monitor Jobs + +monitor { + notification { + webhook_url = "{{ data_exhaust_webhook_url }}" + channel = "{{ data_exhaust_Channel }}" + token = "{{ data_exhaust_token }}" + slack = true + name = "{{ data_exhaust_name }}" + } +} + +#App ID & Channel ID +default.consumption.app.id="no_value" +default.channel.id="in.ekstep" +default.creation.app.id="no_value" + + +# Media Service Type +media_service_type = "azure" + +azure_tenant="{{ media_service_azure_tenant }}" +azure_subscription_id="{{ media_service_azure_subscription_id }}" +azure_account_name="{{ media_service_azure_account_name }}" +azure_resource_group_name="{{ media_service_azure_resource_group_name }}" +azure_token_client_key="{{ media_service_azure_token_client_key }}" +azure_token_client_secret="{{ media_service_azure_token_client_secret }}" +elasticsearch.service.endpoint="http://{{groups['composite-search-cluster'][0]}}:9200" +elasticsearch.index.compositesearch.name="{{ es_search_index }}" + +org.search.api.url="{{ channelSearchServiceEndpoint }}" +org.search.api.key="{{ searchServiceAuthorizationToken }}" + +hierarchy.search.api.url="{{ hierarchySearchServiceUrl }}" +hierarchy.search.api.path="{{ hierarchySearchServicEndpoint }}" + +# Azure Media Service Config +azure { + location = "centralindia" + tenant = "tenant name" + subscription_id = "subscription id" + + api { + endpoint="Media Service API End Point" + version = "2018-07-01" + } + + account_name = "account name" + resource_group_name = "Resource Group Name" + + transform { + default = "media_transform_default" + hls = "media_transform_hls" + } + + stream { + base_url = "{{ stream_base_url }}" + endpoint_name = "default" + protocol = "Hls" + policy_name = "Predefined_ClearStreamingOnly" + } + + token { + client_key = "client key" + client_secret = "client secret" + } +} + +## Reports - Global config +cloud.container.reports="reports" + +# course metrics container in azure +course.metrics.cassandra.sunbirdKeyspace="sunbird" +course.metrics.cassandra.sunbirdCoursesKeyspace="sunbird_courses" +course.metrics.cassandra.sunbirdHierarchyStore="{{ cassandra_hierarchy_store_keyspace }}" +course.metrics.cloud.objectKey="" +course.metrics.cassandra.input.consistency="QUORUM" +es.host="http://{{groups['core-es'][0]}}" +es.port="9200" +es.composite.host="{{groups['composite-search-cluster'][0]}}" + +# State admin user reports +# Uses azure only - course.metrics.cloud.provider +admin.metrics.cloud.objectKey="" +admin.metrics.temp.dir="/mount/data/analytics/admin-user-reports" + +#Assessment report config +es.scroll.size = 1000 + +#BestScore or Latst Updated Score +assessment.metrics.bestscore.report=true +assessment.metrics.supported.contenttype="SelfAssess" +assessment.metrics.supported.primaryCategories="{{ assessment_metric_primary_category }}" +spark.sql.caseSensitive=true + +# content rating configurations + +druid.sql.host="http://{{druid_broker_host}}:8082/druid/v2/sql/" +druid.unique.content.query="{\"query\":\"SELECT DISTINCT \\\"object_id\\\" AS \\\"Id\\\"\\nFROM \\\"druid\\\".\\\"summary-events\\\" WHERE \\\"__time\\\" BETWEEN TIMESTAMP '%s' AND TIMESTAMP '%s'\"}" +druid.content.rating.query="{\"query\":\"SELECT \\\"object_id\\\" AS contentId, COUNT(*) AS \\\"totalRatingsCount\\\", SUM(edata_rating) AS \\\"Total Ratings\\\", SUM(edata_rating)/COUNT(*) AS \\\"averageRating\\\" FROM \\\"druid\\\".\\\"telemetry-feedback-events\\\" WHERE \\\"eid\\\" = 'FEEDBACK' AND \\\"edata_rating\\\">0 GROUP BY \\\"object_id\\\"\"}" +druid.content.consumption.query="{\"query\":\"SELECT COUNT(*) as \\\"play_sessions_count\\\", object_id as \\\"contentId\\\", SUM(total_time_spent) as \\\"total_time_spent\\\", dimensions_pdata_id, object_id\\nFROM \\\"summary-events\\\"\\nWHERE \\\"dimensions_mode\\\" = 'play' AND \\\"dimensions_type\\\" ='content' AND \\\"dimensions_pdata_pid\\\" != 'creation-portal' \\nGROUP BY object_id, dimensions_pdata_id\"}" +lp.system.update.base.url="{{lp_url}}/system/v3/content/update" + + +#Experiment Configuration + +user.search.api.url="{{sunbird_learner_service_url}}/private/user/v1/search" +user.search.limit="10000" + +# pipeline auditing +druid.pipeline_metrics.audit.query="{\"query\":\"SELECT \\\"job-name\\\", SUM(\\\"success-message-count\\\") AS \\\"success-message-count\\\", SUM(\\\"failed-message-count\\\") AS \\\"failed-message-count\\\", SUM(\\\"duplicate-event-count\\\") AS \\\"duplicate-event-count\\\", SUM(\\\"batch-success-count\\\") AS \\\"batch-success-count\\\", SUM(\\\"batch-error-count\\\") AS \\\"batch-error-count\\\", SUM(\\\"primary-route-success-count\\\") AS \\\"primary-route-success-count\\\", SUM(\\\"secondary-route-success-count\\\") AS \\\"secondary-route-success-count\\\" FROM \\\"druid\\\".\\\"pipeline-metrics\\\" WHERE \\\"job-name\\\" IN (%s) AND \\\"__time\\\" BETWEEN TIMESTAMP '%s' AND TIMESTAMP '%s' GROUP BY \\\"job-name\\\" \"}" +druid.telemetryDatasource.count.query="{ \"query\": \"SELECT COUNT(*) AS \\\"total\\\" FROM \\\"druid\\\".\\\"telemetry-events\\\" WHERE TIME_FORMAT(MILLIS_TO_TIMESTAMP(\\\"syncts\\\"), 'yyyy-MM-dd HH:mm:ss.SSS', 'Asia/Kolkata') BETWEEN TIMESTAMP '%s' AND '%s' AND \\\"__time\\\" BETWEEN TIMESTAMP '%s' AND TIMESTAMP '%s'\" }" +druid.summaryDatasource.count.query="{\"query\": \"SELECT COUNT(*) AS \\\"total\\\" FROM \\\"druid\\\".\\\"summary-events\\\" WHERE \\\"__time\\\" BETWEEN TIMESTAMP '%s' AND TIMESTAMP '%s'\" }" + +#Pipeline Audit Jobs + +pipeline_audit { + notification { + webhook_url = "{{ data_exhaust_webhook_url }}" + channel = "{{ data_exhaust_Channel }}" + token = "{{ data_exhaust_token }}" + slack = true + name = "Pipeline Audit" + } +} + +#Druid Query Processor + +druid = { + hosts = "{{druid_broker_host}}:8082" + secure = false + url = "/druid/v2/" + datasource = "telemetry-events" + response-parsing-timeout = 300000 + client-backend = "com.ing.wbaa.druid.client.DruidAdvancedHttpClient" + client-config = { + druid-advanced-http-client ={ + queue-size = 32768 + queue-overflow-strategy = "Backpressure" + query-retries = 5 + query-retry-delay = 10 ms + host-connection-pool = { + max-connections = 32 + min-connections = 0 + max-open-requests = 128 + max-connection-lifetime = 20 min + idle-timeout = 15 min + client = { + # The time after which an idle connection will be automatically closed. + # Set to `infinite` to completely disable idle timeouts. + idle-timeout = 10 min + parsing.max-chunk-size = 10m + } + } + } + + } +} +druid.rollup.host="{{druid_rollup_broker_host}}" +druid.rollup.port=8082 +druid.query.wait.time.mins=10 +druid.report.upload.wait.time.mins=10 +druid.scan.batch.size=100 +druid.scan.batch.bytes=2000000 +druid.query.batch.buffer=500000 + + +// Metric event config +metric.producer.id="pipeline.monitoring" +metric.producer.pid="dataproduct.metrics" +push.metrics.kafka=true +metric.kafka.broker="{{groups['processing-cluster-kafka']|join(':9092,')}}:9092" +metric.kafka.topic="{{ env }}.prom.monitoring.metrics" + +//Postgres Config +postgres.db="{{postgres.db_name}}" +postgres.url="jdbc:postgresql://{{postgres.db_url}}:{{postgres.db_port}}/" +postgres.user="{{postgres.db_username}}" +postgres.pass="{{postgres.db_password}}" +postgres.program.table="program" +postgres.nomination.table="nomination" +postgres.usertable="\"V_User\"" +postgres.org.table="\"V_User_Org\"" + +druid.ingestion.path="/druid/indexer/v1/task" +druid.segment.path="/druid/coordinator/v1/metadata/datasources/" +druid.deletesegment.path="/druid/coordinator/v1/datasources/" + +postgres.druid.db="{{ druid_report_postgres_db_name }}" +postgres.druid.url="jdbc:postgresql://{{postgres.db_url}}:{{postgres.db_port}}/" +postgres.druid.user="{{ druid_report_postgres_db_username }}" +postgres.druid.pass="{{ dp_vault_druid_postgress_pass }}" + + +location.search.url="https://{{location_search_url}}/v1/location/search" +location.search.token="{{ location_search_token }}" +location.search.request="{\"request\": {\"filters\": {\"type\" :[\"state\",\"district\"]},\"limit\" : 10000}}" + +druid.state.lookup.url = "http://{{groups['raw-coordinator'][0]}}:8081/druid/coordinator/v1/lookups/config/__default/stateSlugLookup" + +sunbird_encryption_key="{{ core_vault_sunbird_encryption_key }}" + +dcedialcode.filename="DCE_dialcode_data.csv" +etbdialcode.filename="ETB_dialcode_data.csv" +dcetextbook.filename="DCE_textbook_data.csv" +etbtextbook.filename="ETB_textbook_data.csv" +etb.dialcode.druid.length={{ etb_dialcode_list_druid_length }} + +{% if dp_object_store_type == "azure" %} +druid.report.default.storage="azure" +{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} +druid.report.default.storage="s3" +{% endif %} + +druid.report.date.format="yyyy-MM-dd" +druid.report.default.container="report-verification" + +## Collection Exhaust Jobs Configuration -- Start ## + +sunbird.user.keyspace="{{ user_table_keyspace }}" +sunbird.courses.keyspace="{{ course_keyspace }}" +sunbird.content.hierarchy.keyspace="{{ cassandra_hierarchy_store_keyspace }}" +sunbird.user.cluster.host="{{ core_cassandra_host }}" +sunbird.courses.cluster.host="{{ core_cassandra_host }}" +sunbird.content.cluster.host="{{ core_cassandra_host }}" +sunbird.report.cluster.host="{{ report_cassandra_cluster_host }}" +sunbird.user.report.keyspace="{{ report_user_table_keyspace }}" +collection.exhaust.store.prefix="" +postgres.table.job_request="{{ job_request_table }}" +postgres.table.dataset_metadata="{{ dataset_metadata_table }}" + +## Collection Exhaust Jobs Configuration -- End ## + +## Exhaust throttling variables +exhaust.batches.limit.per.channel={{ exhaust_batches_limit_per_channel }} +exhaust.file.size.limit.per.channel={{ exhaust_file_size_limit_bytes_per_channel }} + +exhaust.parallel.batch.load.limit={{ exhaust_parallel_batch_load_limit }} +exhaust.user.parallelism={{ exhaust_user_parallelism }} + +data_exhaust.batch.limit.per.request={{ data_exhaust_batch_limit_per_request }} + + + +//START of UCI Postgres Config + +uci.conversation.postgres.db="{{ uci_postgres.conversation_db_name }}" +uci.conversation.postgres.url="jdbc:postgresql://{{uci_postgres.conversation_db_host}}:{{uci_postgres.conversation_db_port}}/" + +uci.fushionauth.postgres.db="{{ uci_postgres.fushionauth_db_name }}" +uci.fushionauth.postgres.url="jdbc:postgresql://{{uci_postgres.fushionauth_db_host}}:{{uci_postgres.fushionauth_db_port}}/" + +uci.postgres.table.conversation="{{ uci_postgres.conversation_table_name }}" +uci.postgres.table.user="{{ uci_postgres.user_table_name }}" +uci.postgres.table.user_registration="{{ uci_postgres.user_registration_table_name }}" +uci.postgres.table.identities="{{ uci_postgres.user_identities_table_name }}" + +uci.conversation.postgres.user="{{ uci_postgres.conversation_db_user }}" +uci.conversation.postgres.pass="{{ uci_postgres.conversation_db_psss }}" + +uci.fushionauth.postgres.user="{{ uci_postgres.fushionauth_db_user }}" +uci.fushionauth.postgres.pass="{{ uci_postgres.fushionauth_db_psss }}" + +uci.exhaust.store.prefix="" +uci.encryption.secret="{{ uci_encryption_secret_key }}" + +// END OF UCI Related Job Configs \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/exhaust_sanity_check.py.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/exhaust_sanity_check.py.j2 new file mode 100644 index 0000000000..3f6ba98d9d --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/exhaust_sanity_check.py.j2 @@ -0,0 +1,58 @@ +import requests +from requests.auth import HTTPBasicAuth +import json +from kafka import KafkaConsumer +from json import loads +import sys + +def checkClusterStatus(): + try: + res = requests.get('https://{{ spark_cluster_name }}.azurehdinsight.net/api/v1/clusters/{{ spark_cluster_name }}/alerts?format=summary', auth = HTTPBasicAuth("{{ admin_name }}" ,"{{ admin_password }}")) + if(res.status_code == 200): + resJson = json.loads(res.text) + warningCount = resJson["alerts_summary"]["WARNING"]["count"] + criticalCount = resJson["alerts_summary"]["CRITICAL"]["count"] + unknownCount = resJson["alerts_summary"]["UNKNOWN"]["count"] + if((warningCount + criticalCount + unknownCount) == 0): + print("Cluster is up & running fine. With these - WARNING:{0}, CRITICAL:{1}, UNKNOWN:{2}".format(warningCount, criticalCount, unknownCount)) + return "SUCCESS" + else: + return "FAILED. Cluster is not running properly. Found these - WARNING:{0}, CRITICAL:{1}, UNKNOWN:{2}".format(warningCount, criticalCount, unknownCount) + else: + return "FAILED. Cluster failed to provide response. Resulted in {0} response".format(res.status_code) + except Exception as e: + return "FAILED with {0}".format(str(e)) + +def checkCassandraMigratorStatus(): + try: + ## from joblog file + migratorENDEvent = "" + with open ('{{ analytics.home }}/scripts/logs/joblog.log', 'rt') as logs: + for log in logs: + if (log.count("JOB_END") == 1 & log.count("{{ cassandra_migrator_job_name }}") == 1): + migratorENDEvent = log + logJson = json.loads(migratorENDEvent) + jobStatus = logJson["edata"]["status"] + if (jobStatus == "SUCCESS"): + print("Cassandra Migrator Completed successfully!") + return "SUCCESS" + else: + return "Cassandra Migrator failed" + except Exception as e: + return "FAILED with {0}".format(str(e)) + + +def main(): + finalSuccessMessage="All checks are successful" + ## check Cassandra Migrator status + cassandraMigratorState=checkCassandraMigratorStatus() + ## check spark cluster status + clusterState=checkClusterStatus() + + if(cassandraMigratorState == "SUCCESS" and clusterState == "SUCCESS"): + return finalSuccessMessage + else: + raise Exception("Required checks failed. Job Status: {0} and Cluster status: {1}".format(cassandraMigratorState, clusterState)) + +result=main() +print(result) \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/log4j2.xml.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/log4j2.xml.j2 new file mode 100644 index 0000000000..c82cdd702c --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/log4j2.xml.j2 @@ -0,0 +1,54 @@ + + + + {{ analytics.home }}/scripts/logs + {{ analytics.home }}/scripts/logs + + + + + + + + %m%n + + + + + + + + + + + + + + %m%n + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/model-config.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/model-config.j2 new file mode 100644 index 0000000000..86f376b65d --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/model-config.j2 @@ -0,0 +1,151 @@ +#!/usr/bin/env bash + +config() { + bucket={{ secor_bucket }} + brokerList={{ brokerlist }} + zookeeper={{ zookeeper }} + brokerIngestionList={{ ingestion_kafka_brokers }} + job_topic={{ analytics_job_queue_topic }} + topic={{ topic }} + analyticsMetricsTopic={{ analytics_metrics_topic }} + sinkTopic={{ sink_topic }} + metricsTopic={{ metrics_topic }} + analytics_home={{ analytics.home }} + temp_folder={{ job_manager_tmp_dir }} + sparkCassandraConnectionHost="{{ lp_cassandra_host }}" + sparkRedisConnectionHost={{ metadata2_redis_host }} + sunbirdPlatformCassandraHost="{{ core_cassandra_host }}" + sunbirdPlatformElasticsearchHost="{{ sunbird_es_host }}" + jobManagerJobsCount="{{ analytics_jobs_count }}" + producerEnv="{{ producer_env }}" + baseScriptPath="{{ spark_output_temp_dir }}" + reportPostContainer="{{ reports_container }}" + druidIngestionURL="{{ druid_rollup_cluster_ingestion_task_url }}/druid/indexer/v1/task" + assessTopic={{ assess_topic }} + + + if [ -z "$2" ]; then endDate=$(date --date yesterday "+%Y-%m-%d"); else endDate=$2; fi + if [ ! -z "$3" ]; then inputBucket=$3; fi + if [ ! -z "$4" ]; then sinkTopic=$4; fi + if [ ! -z "$2" ]; then keyword=$2; fi + case "$1" in + "assessment-correction") + echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"model":"org.sunbird.analytics.model.report.AssessmentCorrectionModel","modelParams":{"parallelization":200,"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","granularity":"all","aggregations":[{"name":"count","type":"count","fieldName":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"}],"filters":[{"type":"equals","dimension":"contentType","value":"SelfAssess"}],"descending":"false"},"fileOutputConfig":{"to":"file","params":{"file":"{{ analytics.home }}/assessment-correction/skippedEvents"}},"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"kafka","params":{"brokerList":"'$brokerIngestionList'","topic":"'$assessTopic'"}}],"parallelization":200,"appName":"Assessment Correction Model"}' + ;; + "assessment-archival") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"store":"azure","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}' + ;; + "assessment-archived-removal") +{% if dp_object_store_type == "azure" %} + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"azure","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' + ;; +{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"{{dp_object_store_type}}","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' + ;; +{% endif %} + "collection-reconciliation-job") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CollectionReconciliationJob","modelParams":{"mode":"prodrun","brokerList":"{{ingestion_kafka_broker_host}}","topic":"{{env}}.issue.certificate.request","sparkCassandraConnectionHost":"{{ core_cassandra_host }}"},"parallelization":30,"appName":"CollectionReconciliationJob"}' + ;; + "collection-summary-report") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"], "contentType": "Course"},"fields":["identifier","name","organisation","channel"],"limit":10000}},"store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' + ;; + "score-metric-migration-job") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.ScoreMetricMigrationJob","modelParams":{"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Score Metric Migration Job"}' + ;; + "assessment-score-metric-correction") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.AssessmentScoreCorrectionJob","modelParams":{"assessment.score.correction.batches":"","cassandraReadConsistency":"QUORUM","cassandraWriteConsistency":"QUORUM","csvPath":"/mount/data/analytics/score_correction","isDryRunMode":true,"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":30,"appName":"Assessment Score Correction Job"}' + ;; + "course-batch-status-updater") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob","modelParams":{"store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"http://{{ single_node_es_host }}:9200","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","kpLearningBasePath":"http://{{groups['learning'][0]}}:8080/learning-service","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Course Batch Status Updater Job"}' + ;; + "collection-summary-report-v2") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJobV2","modelParams":{"storageKeyConfig":"druid_storage_account_key","storageSecretConfig":"druid_storage_account_secret","batchSize":50,"generateForAllBatches":true,"contentFields":["identifier","name","organisation","channel","status","keywords","createdFor","medium","subject"],"contentStatus":["Live","Unlisted","Retired"],"store":"{{dp_object_store_type}}","specPath":"/mount/data/analytics/scripts/collection-summary-ingestion-spec.json","druidIngestionUrl":"'$druidIngestionURL'","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report V2"}' + ;; + "uci-private-exhaust") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","storageContainer":"reports","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UCI Private Exhaust"}' + ;; + "uci-response-exhaust") + echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"{{dp_object_store_type}}","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":"","storageContainer":"reports"},"parallelization":8,"appName":"UCI Response Exhaust"}' + ;; + "userinfo-exhaust") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' + ;; + "program-collection-summary-report") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"], "keywords":"'$keyword'", "store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' + ;; + "response-exhaust") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust"}' + ;; + "response-exhaust-v2") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' + ;; + "progress-exhaust") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + ;; + "progress-exhaust-v2") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' + ;; + "druid_reports") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.DruidQueryProcessingModel","modelParams":{"mode":"batch"},"parallelization":8,"appName":"Druid Reports"}' + ;; + "cassandra-migration") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.updater.CassandraMigratorJob","modelParams":{"cassandraDataHost":"{{ core_cassandra_host }}","cassandraMigrateHost":"{{ report_cassandra_host }}","keyspace":"sunbird_courses","cassandraDataTable":"user_enrolments","cassandraMigrateTable":"{{ report_user_enrolment_table }}","repartitionColumns":"batchid"},"parallelization":10,"appName":"Cassandra Migrator","deviceMapping":false}' + ;; + "monitor-job-summ") + echo '{"search":{"type":"local","queries":[{"file":"'$analytics_home'/scripts/logs/joblog.log"}]},"model":"org.ekstep.analytics.model.MonitorSummaryModel","modelParams":{"pushMetrics":true,"brokerList":"'$brokerList'","topic":"'$analyticsMetricsTopic'","model":[{"model":"WorkFlowSummaryModel","category":"consumption","input_dependency":"None"},{"model":"UpdateContentRating","category":"consumption","input_dependency":"None"},{"model":"DruidQueryProcessingModel","category":"consumption","input_dependency":"None"},{"model":"MetricsAuditJob","category":"consumption","input_dependency":"None"},{"model":"StateAdminReportJob","category":"consumption","input_dependency":"None"},{"model":"StateAdminGeoReportJob","category":"consumption","input_dependency":"None"},{"model":"CourseEnrollmentJob","category":"consumption","input_dependency":"None"}]},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"appName":"TestMonitorSummarizer","deviceMapping":true}' + ;; + "job-manager") + echo '{"jobsCount":'$jobManagerJobsCount',"topic":"'$job_topic'","bootStrapServer":"'$brokerList'","zookeeperConnect":"'$zookeeper'","consumerGroup":"jobmanager","slackChannel":"#test_channel","slackUserName":"JobManager","tempBucket":"'$bucket'","tempFolder":"'$temp_folder'"}' + ;; + "wfs") + echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"{{ dp_raw_telemetry_backup_location }}","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}", "apiVersion":"v2", "parallelization":200},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"} }],"parallelization":200,"appName":"Workflow Summarizer","deviceMapping":true}' + #echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"apiVersion":"v2"},"output":[{"to":"console","params":{"printEvent": false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"parallelization":8,"appName":"Workflow Summarizer","deviceMapping":true}' + ;; + "video-streaming") + echo '{"search":{"type":"{{ dp_object_store_type }}"},"model":"org.ekstep.analytics.job.VideoStreamingJob","modelParams":{"maxIterations":10},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Video Streaming Job","deviceMapping":false}' + ;; + "admin-user-reports") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin User Reports","deviceMapping":false}' + ;; + "admin-geo-reports") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminGeoReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin Geo Reports","deviceMapping":false}' + ;; + "telemetry-replay") + echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"'$inputBucket'","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"TelemetryReplayJob","deviceMapping":false}' + ;; + "summary-replay") + echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"SummaryReplayJob","deviceMapping":false}' + ;; + "content-rating-updater") + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.updater.UpdateContentRating","modelParams": {"startDate": "'$endDate'","endDate": "'$endDate'"},"output": [{"to":"console","params":{"printEvent":false}}],"parallelization": 8,"appName": "Content Rating Updater","deviceMapping": false}' + ;; + "experiment") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.ExperimentDefinitionModel","modelParams":{"sparkElasticsearchConnectionHost":"{{ lp_composite_search_host }}"},"output":[{"to":"elasticsearch","params":{"index":"experiment"}}],"parallelization":8,"appName":"Experiment-Definition","deviceMapping":false}' + ;; + "etb-metrics") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.ETBMetricsJob","modelParams":{"reportConfig":{"id":"etb_metrics","metrics":[],"labels":{"date":"Date","identifier":"Textbook ID","name":"Textbook Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","createdOn":"Created On","lastUpdatedOn":"Last Updated On","totalQRCodes":"Total number of QR codes","contentLinkedQR":"Number of QR codes with atleast 1 linked content","withoutContentQR":"Number of QR codes with no linked content","withoutContentT1":"Term 1 QR Codes with no linked content","withoutContentT2":"Term 2 QR Codes with no linked content","status":"Textbook Status","totalContentLinked":"Total content linked","totalQRLinked":"Total QR codes linked to content","totalQRNotLinked":"Total number of QR codes with no linked content","leafNodesCount":"Total number of leaf nodes","leafNodeUnlinked":"Number of leaf nodes with no content","l1Name":"Level 1 Name","l2Name":"Level 2 Name","l3Name":"Level 3 Name","l4Name":"Level 4 Name","l5Name":"Level 5 Name","dialcode":"QR Code","sum(scans)":"Total Scans","noOfContent":"Number of contents","nodeType":"Type of Node","term":"Term"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"dialcode_counts.csv","postContainer":"'$reportPostContainer'"}},"dialcodeReportConfig":{"id":"etb_metrics","metrics":[],"labels":{},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":1,"reportPath":"dialcode_counts.csv","rollupAge":"ACADEMIC_YEAR","rollupCol":"Date","rollupRange":10,"postContainer":"'$reportPostContainer'"}},"etbFileConfig":{"bucket":"'$reportPostContainer'","file":"dialcode_scans/dialcode_counts.csv"},"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"channel","aliasName":"channel"},{"fieldName":"identifier","aliasName":"identifier","type":"Extraction","outputType":"STRING","extractionFn":[{"type":"javascript","fn":"function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName":"name","aliasName":"name"},{"fieldName":"status","aliasName":"status"}],"filters":[{"type":"equals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live","Draft","Review"]}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"tenantConfig":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"ETB Metrics Model","deviceMapping":false}' + ;; + "course-enrollment-report") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.CourseEnrollmentJob","modelParams":{"reportConfig":{"id":"tpd_metrics","metrics":[],"labels":{"completionCount":"Completion Count","status":"Status","enrollmentCount":"Enrollment Count","courseName":"Course Name","batchName":"Batch Name"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"DAY","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"course_enrollment.csv"}},"esConfig":{"request":{"filters":{"objectType":["Content"],"contentType":["Course"],"identifier":[],"status":["Live"]},"limit":10000}},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"TPD Course Enrollment Metrics Model","deviceMapping":false}' + ;; + "course-consumption-report") + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.model.report.CourseConsumptionJob","modelParams": {"esConfig": {"request": {"filters": {"objectType": ["Content"],"contentType": ["Course"],"identifier": [],"status": ["Live"]}}},"reportConfig": {"id": "tpd_metrics","labels": {"date": "Date","status": "Batch Status","timespent": "Timespent in mins","courseName": "Course Name","batchName": "Batch Name"},"dateRange": {"staticInterval": "LastDay","granularity": "all"},"metrics": [{"metric": "totalCoursePlays","label": "Total Course Plays (in mins)","druidQuery": {"queryType": "groupBy","dataSource": "summary-events","intervals":"LastDay","aggregations": [{"name": "sum__edata_time_spent","type": "doubleSum","fieldName": "edata_time_spent"}],"dimensions": [{"fieldName": "object_rollup_l1","aliasName": "courseId"}, {"fieldName": "uid","aliasName": "userId"}, {"fieldName": "context_cdata_id","aliasName": "batchId"}],"filters": [{"type": "equals","dimension": "eid","value": "ME_WORKFLOW_SUMMARY"}, {"type": "in","dimension": "dimensions_pdata_id","values": ["'$producerEnv'.app", "'$producerEnv'.portal"]}, {"type": "equals","dimension": "dimensions_type","value": "content"}, {"type": "equals","dimension": "dimensions_mode","value": "play"}, {"type": "equals","dimension": "context_cdata_type","value": "batch"}],"postAggregation": [{"type": "arithmetic","name": "timespent","fields": {"leftField": "sum__edata_time_spent","rightField": 60,"rightFieldType": "constant"},"fn": "/"}],"descending": "false"}}],"output": [{"type": "csv","metrics": ["timespent"],"dims": []}],"queryType": "groupBy"},"store": "{{ dp_object_store_type }}","format":"csv","key": "druid-reports/","filePath": "druid-reports/","container":"'$bucket'","folderPrefix": ["slug", "reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "TPD Course Consumption Metrics Model","deviceMapping": false}' + ;; + "textbook-progress-report") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.TextBookProgressModel","modelParams":{"reportConfig":{"id":"content_progress_metrics","metrics":[],"labels":{"board":"Board","medium":"Medium","gradeLevel":"Grade","subject":"Subject","resourceType":"Content Type","totalContent": "Total Contents","live":"Live","review":"Review","draft":"Draft","unlisted":"Limited Sharing","application_ecml":"Created on Diksha","video_youtube":"YouTube Content","video_mp4":"Uploaded Videos","application_pdf":"Text Content","application_html":"Uploaded Interactive Content","identifier":"Content ID","creator":"Created By","createdOn":"Creation Date","lastPublishDate":"Last Publish Date","status":"Status","pkgVersion":"Number of times Published","lastPublishedOn":"Pending in current status since","pendingInCurrentStatus":"Pending in current status since"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"content_progress_metrics.csv","postContainer":"'$reportPostContainer'"}},"filter":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Textbook Progress Metrics Model","deviceMapping":false}' + ;; + "audit-metrics-report") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.MetricsAuditJob","modelParams":{"auditConfig":[{"name":"denorm","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"telemetry-denormalized/raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]},"filters":[{"name":"flags.user_data_retrieved","operator":"EQ","value":true},{"name":"flags.content_data_retrieved","operator":"EQ","value":true},{"name":"flags.device_data_retrieved","operator":"EQ","value":true},{"name":"flags.dialcode_data_retrieved","operator":"EQ","value":true},{"name":"flags.collection_data_retrieved","operator":"EQ","value":true},{"name":"flags.derived_location_retrieved","operator":"EQ","value":true}]},{"name":"failed","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"failed/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"unique","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"channel-raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/raw/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"channel-summary","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/summary/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"derived","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"telemetry-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"telemetry-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}},{"name":"summary-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"summary-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}}]},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$metricsTopic'"}}],"parallelization":8,"appName":"Metrics Audit"}' + ;; + "sourcing-metrics") + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","primaryCategory":"Collection Category","identifier": "Collection ID","name": "Collection Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Folder Name","totalChapters": "Total number of first level folders","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "in","dimension": "primaryCategory","values": ["Digital Textbook", "Course", "Content Playlist","Question paper","Question Paper"]}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "{{ dp_object_store_type }}","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' + ;; + "druid-dataset") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.OnDemandDruidExhaustJob","modelParams":{"store":"{{ dp_object_store_type }}","container":"'$reportPostContainer'","key":"ml_reports/","format":"csv"},"output":[{"to": "console","params": {"printEvent": false}}],"parallelization":8,"appName":"ML Druid Data Model"}' + ;; + "*") + echo "Unknown model code" + exit 1 # Command to come out of the program with status 1 + ;; + esac +} diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/model-config.json.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/model-config.json.j2 new file mode 100644 index 0000000000..a3569c7f46 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/model-config.json.j2 @@ -0,0 +1,670 @@ +{ + "wfs": { + "search": { + "type": "{{dp_object_store_type}}", + "queries": [ + { + "bucket": "{{ bucket }}", + "prefix": "unique/raw/", + "endDate": "$(date --date yesterday '+%Y-%m-%d')", + "delta": 0 + } + ] + }, + "filters": [ + { + "name": "actor", + "operator": "ISNOTNULL" + } + ], + "model": "org.ekstep.analytics.model.WorkflowSummary", + "modelParams": { + "apiVersion": "v2", + "parallelization": 32 + }, + "output": [ + { + "to": "{{dp_object_store_type}}", + "params": { + "bucket": "{{ bucket }}", + "key": "{{ job_manager_tmp_dir }}/wfs/$(date --date yesterday '+%Y-%m-%d')" + } + }, + { + "to": "kafka", + "params": { + "brokerList": "{{ brokerlist }}", + "topic": "{{ topic }}" + } + } + ], + "parallelization": 32, + "appName": "Workflow Summarizer", + "deviceMapping": true + }, + "video-streaming": { + "search": { + "type": "{{dp_object_store_type}}" + }, + "model": "org.ekstep.analytics.job.VideoStreamingJob", + "modelParams": { + "maxIterations": 10 + }, + "output": [ + { + "to": "console", + "params": { + "printEvent": false + } + } + ], + "parallelization": 8, + "appName": "Video Streaming Job", + "deviceMapping": false + }, + "admin-user-reports": { + "search": { + "type": "none" + }, + "model": "org.ekstep.analytics.job.report.StateAdminReportJob", + "modelParams": { + "sparkCassandraConnectionHost": "{{core_cassandra_host}}", + "sparkElasticsearchConnectionHost": "{{sunbird_es_host}}" + }, + "output": [ + { + "to": "console", + "params": { + "printEvent": false + } + } + ], + "parallelization": 8, + "appName": "Admin User Reports", + "deviceMapping": false + }, + "admin-geo-reports": { + "search": { + "type": "none" + }, + "model": "org.ekstep.analytics.job.report.StateAdminGeoReportJob", + "modelParams": { + "sparkCassandraConnectionHost": "{{core_cassandra_host}}", + "sparkElasticsearchConnectionHost": "{{sunbird_es_host}}" + }, + "output": [ + { + "to": "console", + "params": { + "printEvent": false + } + } + ], + "parallelization": 8, + "appName": "Admin Geo Reports", + "deviceMapping": false + }, + "content-rating-updater": { + "search": { + "type": "none" + }, + "model": "org.ekstep.analytics.updater.UpdateContentRating", + "modelParams": { + "startDate": "$(date --date yesterday '+%Y-%m-%d')", + "endDate": "$(date '+%Y-%m-%d')" + }, + "output": [ + { + "to": "console", + "params": { + "printEvent": false + } + } + ], + "parallelization": 8, + "appName": "Content Rating Updater", + "deviceMapping": false + }, + "monitor-job-summ": { + "search": { + "type": "local", + "queries": [ + { + "file": "{{ analytics.home }}/scripts/logs/joblog.log" + } + ] + }, + "model": "org.ekstep.analytics.model.MonitorSummaryModel", + "modelParams": { + "pushMetrics": true, + "brokerList": "{{ brokerlist }}", + "topic": "{{ analytics_metrics_topic }}", + "model": [ + { + "model": "WorkFlowSummaryModel", + "category": "consumption", + "input_dependency": "None" + }, + { + "model": "UpdateContentRating", + "category": "consumption", + "input_dependency": "None" + }, + { + "model": "DruidQueryProcessingModel", + "category": "consumption", + "input_dependency": "None" + }, + { + "model": "MetricsAuditJob", + "category": "consumption", + "input_dependency": "None" + }, + { + "model": "StateAdminReportJob", + "category": "consumption", + "input_dependency": "None" + }, + { + "model": "StateAdminGeoReportJob", + "category": "consumption", + "input_dependency": "None" + }, + { + "model": "CourseEnrollmentJob", + "category": "consumption", + "input_dependency": "None" + } + ] + }, + "output": [ + { + "to": "console", + "params": { + "printEvent": false + } + }, + { + "to": "kafka", + "params": { + "brokerList": "{{ brokerlist }}", + "topic": "{{ topic }}" + } + } + ], + "appName": "TestMonitorSummarizer", + "deviceMapping": true + }, + "experiment": { + "search": { + "type": "none" + }, + "model": "org.ekstep.analytics.model.ExperimentDefinitionModel", + "modelParams": { + "sparkElasticsearchConnectionHost": "{{ lp_composite_search_host }}" + }, + "output": [ + { + "to": "elasticsearch", + "params": { + "index": "experiment" + } + } + ], + "parallelization": 8, + "appName": "Experiment-Definition", + "deviceMapping": false + }, + "etb-metrics": { + "search": { + "type": "none" + }, + "model": "org.ekstep.analytics.model.report.ETBMetricsJob", + "modelParams": { + "reportConfig": { + "id": "etb_metrics", + "metrics": [], + "labels": { + "date": "Date", + "identifier": "TextBook ID", + "name": "TextBook Name", + "medium": "Medium", + "gradeLevel": "Grade", + "subject": "Subject", + "createdOn": "Created On", + "lastUpdatedOn": "Last Updated On", + "totalQRCodes": "Total number of QR codes", + "contentLinkedQR": "Number of QR codes with atleast 1 linked content", + "withoutContentQR": "Number of QR codes with no linked content", + "withoutContentT1": "Term 1 QR Codes with no linked content", + "withoutContentT2": "Term 2 QR Codes with no linked content", + "status": "Status", + "totalContentLinked": "Total content linked", + "totalQRLinked": "Total QR codes linked to content", + "totalQRNotLinked": "Total number of QR codes with no linked content", + "leafNodesCount": "Total number of leaf nodes", + "leafNodeUnlinked": "Number of leaf nodes with no content", + "l1Name": "Level 1 Name", + "l2Name": "Level 2 Name", + "l3Name": "Level 3 Name", + "l4Name": "Level 4 Name", + "l5Name": "Level 5 Name", + "dialcode": "QR Code", + "sum(scans)": "Total Scans", + "noOfContent": "Number of contents", + "nodeType": "Type of Node", + "term": "Term" + }, + "output": [{ + "type": "csv", + "dims": ["identifier", "channel", "name"], + "fileParameters": ["id", "dims"] + }], + "mergeConfig": { + "frequency": "WEEK", + "basePath": "{{ spark_output_temp_dir }}", + "rollup": 0, + "reportPath": "dialcode_counts.csv", + "postContainer":"{{ reports_container }}" + } + }, + "dialcodeReportConfig": { + "id": "etb_metrics", + "metrics": [], + "labels": {}, + "output": [{ + "type": "csv", + "dims": ["identifier", "channel", "name"], + "fileParameters": ["id", "dims"] + }], + "mergeConfig": { + "frequency": "WEEK", + "basePath": "{{ spark_output_temp_dir }}", + "rollup": 1, + "reportPath": "dialcode_counts.csv", + "rollupAge": "ACADEMIC_YEAR", + "rollupCol": "Date", + "rollupRange": 10, + "postContainer":"{{ reports_container }}" + } + }, + "etbFileConfig": { + "bucket": "{{ reports_container }}", + "file": "dialcode_scans/dialcode_counts.csv" + }, + "druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"},{"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName": "name","aliasName": "name"},{"fieldName": "createdFor","aliasName": "createdFor"},{"fieldName": "createdOn","aliasName": "createdOn"},{"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"},{"fieldName": "board","aliasName": "board"},{"fieldName": "medium","aliasName": "medium"},{"fieldName": "gradeLevel","aliasName": "gradeLevel"},{"fieldName": "subject","aliasName": "subject"},{"fieldName": "status","aliasName": "status"}],"filters": [{"type": "equals","dimension": "contentType","value": "TextBook"},{"type": "in","dimension": "status","values": ["Live","Draft","Review"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}}, + "tenantConfig": { + "tenantId": "", + "slugName": "" + }, + "store": "{{dp_object_store_type}}", + "format": "csv", + "key": "druid-reports/", + "filePath": "druid-reports/", + "container": "{{ bucket }}", + "folderPrefix": ["slug", "reportName"] + }, + "output": [{ + "to": "console", + "params": { + "printEvent": false + } + }], + "parallelization": 8, + "appName": "ETB Metrics Model", + "deviceMapping": false + }, + "course-enrollment-report":{ + "search": { + "type": "none" + }, + "model": "org.ekstep.analytics.model.report.CourseEnrollmentJob", + "modelParams": { + "reportConfig": { + "id": "tpd_metrics", + "metrics" : [], + "labels": { + "completionCount": "Completion Count", + "status": "Status", + "enrollmentCount": "Enrollment Count", + "courseName": "Course Name", + "batchName": "Batch Name" + }, + "output": [{ + "type": "csv", + "dims": [] + }] + }, + "esConfig": { + "request": { + "filters":{ + "objectType": ["Content"], + "contentType": ["Course"], + "identifier": [], + "status": ["Live"] + }, + "limit": 10000 + } + }, + "store": "{{dp_object_store_type}}", + "format":"csv", + "key": "druid-reports/", + "filePath": "druid-reports/", + "container": "{{ bucket }}", + "folderPrefix": ["slug", "reportName"], + "sparkCassandraConnectionHost":"{{core_cassandra_host}}", + "sparkElasticsearchConnectionHost":"{{sunbird_es_host}}" + }, + "output": [{ + "to": "console", + "params": { + "printEvent": false + } + }], + "parallelization": 8, + "appName": "TPD Course Enrollment Metrics Model", + "deviceMapping": false + }, + "course-consumption-report":{ + "search": { + "type": "none" + }, + "model": "org.ekstep.analytics.model.report.CourseConsumptionJob", + "modelParams": { + "esConfig": { + "request": { + "filters": { + "objectType": ["Content"], + "contentType": ["Course"], + "identifier": [], + "status": ["Live"] + } + } + }, + "reportConfig": { + "id": "tpd_metrics", + "labels": { + "date": "Date", + "status": "Batch Status", + "timespent": "Timespent in mins", + "courseName": "Course Name", + "batchName": "Batch Name" + }, + "dateRange": { + "staticInterval": "LastDay", + "granularity": "all" + }, + "metrics": [{ + "metric": "totalCoursePlays", + "label": "Total Course Plays (in mins) ", + "druidQuery": { + "queryType": "groupBy", + "dataSource": "summary-events", + "intervals": "LastDay", + "aggregations": [{ + "name": "sum__edata_time_spent", + "type": "doubleSum", + "fieldName": "edata_time_spent" + }], + "dimensions": [{ + "fieldName": "object_rollup_l1", + "aliasName": "courseId" + }, { + "fieldName": "uid", + "aliasName": "userId" + }, { + "fieldName": "context_cdata_id", + "aliasName": "batchId" + }], + "filters": [{ + "type": "equals", + "dimension": "eid", + "value": "ME_WORKFLOW_SUMMARY" + }, { + "type": "in", + "dimension": "dimensions_pdata_id", + "values": ["{{ producer_env }}.app", "{{ producer_env }}.portal"] + }, { + "type": "equals", + "dimension": "dimensions_type", + "value": "content" + }, { + "type": "equals", + "dimension": "dimensions_mode", + "value": "play" + }, { + "type": "equals", + "dimension": "context_cdata_type", + "value": "batch" + }], + "postAggregation": [{ + "type": "arithmetic", + "name": "timespent", + "fields": { + "leftField": "sum__edata_time_spent", + "rightField": 60, + "rightFieldType": "constant" + }, + "fn": "/" + }], + "descending": "false" + } + }], + "output": [{ + "type": "csv", + "metrics": ["timespent"], + "dims": [] + }], + "queryType": "groupBy" + }, + "store": "{{dp_object_store_type}}", + "format":"csv", + "key": "druid-reports/", + "filePath": "druid-reports/", + "container": "{{ bucket }}", + "folderPrefix": ["slug", "reportName"], + "sparkCassandraConnectionHost":"{{core_cassandra_host}}", + "sparkElasticsearchConnectionHost":"{{sunbird_es_host}}" + }, + "output": [{ + "to": "console", + "params": { + "printEvent": false + } + }], + "parallelization": 8, + "appName": "TPD Course Consumption Metrics Model", + "deviceMapping": false + }, + "audit-metrics-report": { + "search": { + "type": "none" + }, + "model": "org.ekstep.analytics.model.MetricsAuditJob", + "modelParams": { + "auditConfig": [ + { + "name": "denorm", + "search": { + "type": "{{dp_object_store_type}}", + "queries": [ + { + "bucket": "{{ bucket }}", + "prefix": "telemetry-denormalized/raw/", + "startDate": "$(date --date yesterday '+%Y-%m-%d')", + "endDate": "$(date --date yesterday '+%Y-%m-%d')" + } + ] + }, + "filters": [ + { + "name": "flags.user_data_retrieved", + "operator": "EQ", + "value": true + }, + { + "name": "flags.content_data_retrieved", + "operator": "EQ", + "value": true + }, + { + "name": "flags.device_data_retrieved", + "operator": "EQ", + "value": true + }, + { + "name": "flags.dialcode_data_retrieved", + "operator": "EQ", + "value": true + }, + { + "name": "flags.collection_data_retrieved", + "operator": "EQ", + "value": true + }, + { + "name": "flags.derived_location_retrieved", + "operator": "EQ", + "value": true + } + ] + }, + { + "name": "failed", + "search": { + "type": "{{dp_object_store_type}}", + "queries": [ + { + "bucket": "{{ bucket }}", + "prefix": "failed/", + "startDate": "$(date --date yesterday '+%Y-%m-%d')", + "endDate": "$(date --date yesterday '+%Y-%m-%d')" + } + ] + } + }, + { + "name": "unique", + "search": { + "type": "{{dp_object_store_type}}", + "queries": [ + { + "bucket": "{{ bucket }}", + "prefix": "unique/", + "startDate": "$(date --date yesterday '+%Y-%m-%d')", + "endDate": "$(date --date yesterday '+%Y-%m-%d')" + } + ] + } + }, + { + "name": "raw", + "search": { + "type": "{{dp_object_store_type}}", + "queries": [ + { + "bucket": "{{ bucket }}", + "prefix": "raw/", + "startDate": "$(date --date yesterday '+%Y-%m-%d')", + "endDate": "$(date --date yesterday '+%Y-%m-%d')" + } + ] + } + }, + { + "name": "channel-raw", + "search": { + "type": "{{dp_object_store_type}}", + "queries": [ + { + "folder": true, + "bucket": "{{ bucket }}", + "prefix": "channel/*/raw/", + "startDate": "$(date --date yesterday '+%Y-%m-%d')", + "endDate": "$(date --date yesterday '+%Y-%m-%d')*.json.gz" + } + ] + } + }, + { + "name": "channel-summary", + "search": { + "type": "{{dp_object_store_type}}", + "queries": [ + { + "folder": true, + "bucket": "{{ bucket }}", + "prefix": "channel/*/summary/", + "startDate": "$(date --date yesterday '+%Y-%m-%d')", + "endDate": "$(date --date yesterday '+%Y-%m-%d')*.json.gz" + } + ] + } + }, + { + "name": "derived", + "search": { + "type": "{{dp_object_store_type}}", + "queries": [ + { + "bucket": "{{ bucket }}", + "prefix": "derived/wfs/", + "startDate": "$(date --date yesterday '+%Y-%m-%d')", + "endDate": "$(date --date yesterday '+%Y-%m-%d')" + } + ] + } + }, + { + "name": "telemetry-count", + "search": { + "type": "druid", + "druidQuery": { + "queryType": "timeSeries", + "dataSource": "telemetry-events", + "intervals": "LastDay", + "aggregations": [ + { + "name": "total_count", + "type": "count", + "fieldName": "count" + } + ], + "descending": "false" + } + } + }, + { + "name": "summary-count", + "search": { + "type": "druid", + "druidQuery": { + "queryType": "timeSeries", + "dataSource": "summary-events", + "intervals": "LastDay", + "aggregations": [ + { + "name": "total_count", + "type": "count", + "fieldName": "count" + } + ], + "descending": "false" + } + } + } + ] + }, + "output": [ + { + "to": "kafka", + "params": { + "brokerList": "{{ brokerlist }}", + "topic": "{{ metrics_topic }}" + } + } + ], + "parallelization": 8, + "appName": "Metrics Audit" + } +} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/model-dock-config.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/model-dock-config.j2 new file mode 100644 index 0000000000..f720f4687e --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/model-dock-config.j2 @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +config() { + bucket={{ bucket }} + brokerList={{ brokerlist }} + zookeeper={{ zookeeper }} + job_topic={{ analytics_job_queue_topic }} + topic={{ topic }} + sparkCassandraConnectionHost="{{ lp_cassandra_host }}" + sunbirdPlatformCassandraHost="{{ core_cassandra_host }}" + reportPostContainer="{{ reports_container }}" + druidRollupHost="{{ druid_rollup_cluster_ingestion_task_url }}" + + if [ -z "$2" ]; then endDate=$(date --date yesterday "+%Y-%m-%d"); else endDate=$2; fi + if [ ! -z "$3" ]; then inputBucket=$3; fi + case "$1" in + "content-details") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.sourcing.ContentDetailsReport","modelParams":{"tenantId":"","slug":"","reportConfig":{"id":"content_report","metrics":[],"labels":{"programName":"Project Name","programId":"Project ID","contentId":"Content/Question ID","contentName":"Content/Question Name","mimeType":"MimeType","chapterId":"Folder ID","contentStatus":"Content/Question Status","creator":"Creator Name","createdBy":"CreatedBy ID","date":"Date","identifier":"Collection/Question Set ID","name":"Collection/Question Set Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","board":"Board","grade":"Grade","chapters":"Chapter Name","status":"Textbook Status","objectType":"Object Type","primaryCategory":"Primary category","topic":"Topic","learningOutcome":"Learning Outcome","addedFromLibrary":"Added from library","contentType":"Content Type"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]},{"type":"json","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}]},"contentQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"unitIdentifiers","aliasName":"unitIdentifiers"},{"fieldName":"collectionId","aliasName":"collectionId"},{"fieldName":"createdBy","aliasName":"createdBy"},{"fieldName":"creator","aliasName":"creator"},{"fieldName":"mimeType","aliasName":"mimeType"},{"fieldName":"topic","aliasName":"topic"},{"fieldName":"learningOutcome","aliasName":"learningOutcome"},{"fieldName":"primaryCategory","aliasName":"contentType"}],"filters":[{"type":"notequals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live"]},{"type":"isnotnull","dimension":"collectionId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"textbookQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"programId","aliasName":"programId"},{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"board","aliasName":"board"},{"fieldName":"medium","aliasName":"medium"},{"fieldName":"gradeLevel","aliasName":"gradeLevel"},{"fieldName":"subject","aliasName":"subject"},{"fieldName":"status","aliasName":"status"},{"fieldName":"acceptedContents","aliasName":"acceptedContents"},{"fieldName":"acceptedContributions","aliasName":"acceptedContributions"},{"fieldName":"rejectedContents","aliasName":"rejectedContents"},{"fieldName":"rejectedContributions","aliasName":"rejectedContributions"},{"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName":"objectType","aliasName":"objectType"},{"fieldName":"reusedContributions","aliasName":"reusedContributions"}],"filters":[{"type":"in","dimension":"primaryCategory","values":["Digital Textbook","Course","Content Playlist","Question paper","Question Paper","Exam Question Set","Practice Set","Demo Practice Question Set"]},{"type":"isnotnull","dimension":"programId"},{"type":"in","dimension":"status","values":["Draft"]},{"type":"equals","dimension":"channel","value":"channelId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"store":"{{dp_object_store_type}}","storageKeyConfig":"azure_storage_key","storageSecretConfig":"azure_storage_secret","storageContainer":"'$reportPostContainer'","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$reportPostContainer'","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Content Report Job","deviceMapping":false}' + ;; + "sourcing-summary-report") + echo '{"search": {"type": "none"}, "model": "org.ekstep.analytics.job.report.SourcingSummaryReport", "modelParams": {"storageKeyConfig":"druid_storage_account_key", "storageSecretConfig":"druid_storage_account_secret", "dataSource": "sourcing-summary-snapshot", "druidHost": "'$druidRollupHost'", "druidSegmentUrl":"'$druidRollupHost'/druid/coordinator/v1/metadata/datasources/sourcing-model-snapshot/segments", "deleteSegmentUrl": "'$druidRollupHost'/druid/coordinator/v1/datasources/sourcing-model-snapshot/segments/", "druidIngestionUrl": "'$druidRollupHost'/druid/indexer/v1/task", "specPath": "/mount/data/analytics/scripts/sourcing-ingestion-spec.json", "dbName": "opensaberdb", "tables": {"programTable": "program", "nominationTable": "nomination"}, "druidQuery": {"queryType": "groupBy", "dataSource": "vdn-content-model-snapshot", "intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00", "aggregations": [{"name": "count", "type": "count"}], "dimensions": [{"fieldName": "primaryCategory", "aliasName": "primaryCategory"}, {"fieldName": "createdBy", "aliasName": "createdBy"}], "filters": [{"type": "equals", "dimension": "objectType", "value": "Content"}, {"type": "equals", "dimension": "sampleContent", "value": "false"}], "postAggregation": [], "descending": "false", "limitSpec": {"type": "default", "limit": 1000000, "columns": [{"dimension": "count", "direction": "descending"}]}}, "reportConfig": {"id": "sourcing", "metrics": [], "labels": {}, "output": [{"type": "json", "dims": ["identifier", "channel", "name"], "fileParameters": ["id", "dims"]}]}, "store": "{{dp_object_store_type}}", "format": "json", "folderPrefix": ["slug", "reportName"]}, "output": [{"to": "console", "params": {"printEvent": false}}], "parallelization": 8, "appName": "Sourcing Summary Report Job", "deviceMapping": false}' + ;; + "funnel-report") + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.job.report.FunnelReport","modelParams": {"contributionConfig": {"contentRequest": {"request": {"filters": {"programId": "programIdentifier","objectType": "content","status": ["Draft", "Live", "Review"],"mimeType": "application/vnd.ekstep.content-collection"},"fields": ["acceptedContents", "rejectedContents"],"limit": 10000}},"correctionsPendingRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": "Draft","prevStatus": "Live","programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}},"contributionRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": ["Live"],"programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}}},"reportConfig": {"id": "funnel_report","metrics": [],"labels": {"reportDate": "Report generation date","visitors": "No. of users opening the project","projectName": "Project Name","initiatedNominations": "No. of initiated nominations","rejectedNominations": "No. of rejected nominations","pendingNominations": "No. of nominations pending review","acceptedNominations": "No. of accepted nominations to the project","noOfContributors": "No. of contributors to the project","noOfContributions": "No. of contributions to the project","pendingContributions": "No. of contributions pending review","approvedContributions": "No. of approved contributions"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"store": "{{dp_object_store_type}}","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","folderPrefix": ["slug", "reportName"]},"sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","druidConfig": {"queryType": "timeseries","dataSource": "telemetry-events-syncts","intervals": "startdate/enddate","aggregations": [{"name": "visitors","type": "count","fieldName": "actor_id"}],"filters": [{"type": "equals","dimension": "context_cdata_id","value": "program_id"}, {"type": "equals","dimension": "edata_pageid","value": "contribution_project_contributions"}, {"type": "equals","dimension": "context_pdata_pid","value": "creation-portal.programs"}, {"type": "equals","dimension": "context_cdata_type","value": "project"}, {"type": "equals","dimension": "context_env","value": "creation-portal"}, {"type": "equals","dimension": "eid","value": "IMPRESSION"}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Funnel Report Job","deviceMapping": false}' + ;; + "sourcing-metrics") + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","identifier": "Textbook ID","name": "Textbook Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Chapter Name","totalChapters": "Total number of chapters (first level sections of ToC)","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "equals","dimension": "contentType","value": "TextBook"}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "{{dp_object_store_type}}","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' + ;; + "*") + echo "Unknown model code" + exit 1 # Command to come out of the program with status 1 + ;; + esac +} diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/replay-job.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/replay-job.j2 new file mode 100644 index 0000000000..3a6c969b7b --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/replay-job.j2 @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 +export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} +export DP_LOGS={{ analytics.home }}/logs/data-products + +cd {{ analytics.home }}/scripts +source model-config.sh +source replay-utils.sh + +libs_path="{{ analytics.home }}/models-{{ model_version }}/data-products-1.0" + +if [ "$1" == "telemetry-replay" ] + then + if [ ! $# -eq 5 ] + then + echo "Not suffecient arguments. killing process" + exit + fi +fi + +get_report_job_model_name(){ + case "$1" in + "assessment-correction") echo 'org.sunbird.analytics.job.report.AssessmentCorrectionJob' + ;; + *) echo $1 + ;; + esac +} + +if [ ! -z "$1" ]; then job_id=$(get_report_job_model_name $1); fi +if [ -z "$job_config" ]; then job_config=$(config $1 '__endDate__' $4 $5); fi +start_date=$2 +end_date=$3 +backup_key=$1 + +if [ "$1" == "gls-v1" ] + then + backup_key="gls" +elif [ "$1" == "app-ss-v1" ] + then + backup_key="app-ss" +fi + +backup $start_date $end_date {{ bucket }} "derived/$backup_key" "derived/backup-$backup_key" >> "$DP_LOGS/$end_date-$1-replay.log" +if [ $? == 0 ] + then + echo "Backup completed Successfully..." >> "$DP_LOGS/$end_date-$1-replay.log" + echo "Running the $1 job replay..." >> "$DP_LOGS/$end_date-$1-replay.log" + echo "Job modelName - $job_id" >> "$DP_LOGS/$end_date-$1-replay.log" + $SPARK_HOME/bin/spark-submit --master local[*] --jars $(echo ${libs_path}/lib/*.jar | tr ' ' ','),$MODELS_HOME/analytics-framework-2.0.jar,$MODELS_HOME/scruid_2.12-2.5.0.jar --class org.ekstep.analytics.job.ReplaySupervisor $MODELS_HOME/batch-models-2.0.jar --model "$job_id" --fromDate "$start_date" --toDate "$end_date" --config "$job_config" >> "$DP_LOGS/$end_date-$1-replay.log" +else + echo "Unable to take backup" >> "$DP_LOGS/$end_date-$1-replay.log" +fi + +if [ $? == 0 ] + then + echo "$1 replay executed successfully" >> "$DP_LOGS/$end_date-$1-replay.log" + delete {{ bucket }} "derived/backup-$backup_key" >> "$DP_LOGS/$end_date-$1-replay.log" +else + echo "$1 replay failed" >> "$DP_LOGS/$end_date-$1-replay.log" + rollback {{ bucket }} "derived/$backup_key" "backup-$backup_key" >> "$DP_LOGS/$end_date-$1-replay.log" + delete {{ bucket }} "derived/backup-$backup_key" >> "$DP_LOGS/$end_date-$1-replay.log" +fi diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/replay-updater.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/replay-updater.j2 new file mode 100644 index 0000000000..580c3bf29c --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/replay-updater.j2 @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 +export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} +export DP_LOGS={{ analytics.home }}/logs/data-products + +cd {{ analytics.home }}/scripts +source model-config.sh +source replay-utils.sh + +job_config=$(config $1 '__endDate__') +start_date=$2 +end_date=$3 + +echo "Running the $1 updater replay..." >> "$DP_LOGS/$end_date-$1-replay.log" +$SPARK_HOME/bin/spark-submit --master local[*] --jars $MODELS_HOME/analytics-framework-2.0.jar --class org.ekstep.analytics.job.ReplaySupervisor $MODELS_HOME/batch-models-2.0.jar --model "$1" --fromDate "$start_date" --toDate "$end_date" --config "$job_config" >> "$DP_LOGS/$end_date-$1-replay.log" + +if [ $? == 0 ] + then + echo "$1 updater replay executed successfully..." >> "$DP_LOGS/$end_date-$1-replay.log" +else + echo "$1 updater replay failed" >> "$DP_LOGS/$end_date-$1-replay.log" + exit 1 +fi diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/replay-utils.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/replay-utils.j2 new file mode 100644 index 0000000000..31ead572f3 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/replay-utils.j2 @@ -0,0 +1,43 @@ +#!/bin/bash + +rollback() { + bucket_name=$1 + prefix=$2 + backup_dir=$3 + + src="s3://$bucket_name/$prefix/" + dst="s3://$bucket_name/$backup_dir/" + echo "Copy back the $prefix files to source directory $src from backup directory $dst" + aws s3 cp $dst $src --recursive --include "*" --region ap-south-1 +} + +delete() { + bucket_name=$1 + backup_dir=$2 + + path="s3://$bucket_name/$backup_dir/" + echo "Deleting the back-up files from $path" + aws s3 rm $path --recursive --region ap-south-1 +} + +backup() { + dt_start=$1 + dt_end=$2 + prefix=$4 + bucket_name=$3 + backup_dir=$5 + + ts_start=$(date -d $dt_start +%s) + ts_end=$(date -d $dt_end +%s) + src="s3://$bucket_name/$prefix/" + dst="s3://$bucket_name/$backup_dir/" + + + echo "Backing up the files from $src to $dst for the date range - ($dt_start, $dt_end)" + while [ $ts_start -le $ts_end ] + do + date=`date -d @$ts_start +%F` + aws s3 mv $src $dst --recursive --exclude "*" --include "$date-*" --region ap-south-1 + let ts_start+=86400 + done +} diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/run-dock-job.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/run-dock-job.j2 new file mode 100644 index 0000000000..e6f1cdf9ad --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/run-dock-job.j2 @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 +export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} +export DP_LOGS={{ analytics.home }}/logs/data-products +## Job to run daily +cd {{ analytics.home }}/scripts +source model-dock-config.sh +today=$(date "+%Y-%m-%d") + +libs_path="{{ analytics.home }}/models-{{ model_version }}/data-products-1.0" +file_path="dock-{{ env }}.conf" + +get_report_job_model_name(){ + case "$1" in + "funnel-report") echo 'org.sunbird.analytics.sourcing.FunnelReport' + ;; + "sourcing-summary-report") echo 'org.sunbird.analytics.sourcing.SourcingSummaryReport' + ;; + "sourcing-metrics") echo 'org.sunbird.analytics.sourcing.SourcingMetrics' + ;; + "content-details") echo 'org.sunbird.analytics.sourcing.ContentDetailsReport' + ;; + *) echo $1 + ;; + esac +} + +if [ ! -z "$1" ]; then job_id=$(get_report_job_model_name $1); fi + +if [ ! -z "$1" ]; then job_config=$(config $1); else job_config="$2"; fi + +if [ ! -z "$2" ]; then batchIds=";$2"; else batchIds=""; fi + +echo "Starting the job - $1" >> "$DP_LOGS/$today-job-execution.log" + +echo "Job modelName - $job_id" >> "$DP_LOGS/$today-job-execution.log" + +nohup $SPARK_HOME/bin/spark-submit --conf spark.driver.extraJavaOptions="-Dconfig.file=$MODELS_HOME/$file_path" --conf spark.executor.extraJavaOptions="-Dconfig.file=$MODELS_HOME/$file_path" --master local[*] --jars $(echo ${libs_path}/lib/*.jar | tr ' ' ','),$MODELS_HOME/analytics-framework-2.0.jar,$MODELS_HOME/scruid_2.12-2.5.0.jar,$MODELS_HOME/batch-models-2.0.jar --class org.ekstep.analytics.job.JobExecutor $MODELS_HOME/batch-models-2.0.jar --model "$job_id" --config "$job_config$batchIds" \ >> "$DP_LOGS/$today-job-execution.log" 2>&1 + +echo "Job execution completed - $1" >> "$DP_LOGS/$today-job-execution.log" \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/run-job.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/run-job.j2 new file mode 100644 index 0000000000..26ec84da87 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/run-job.j2 @@ -0,0 +1,83 @@ +#!/usr/bin/env bash + +export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 +export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} +export DP_LOGS={{ analytics.home }}/logs/data-products +## Job to run daily +cd {{ analytics.home }}/scripts +source model-config.sh +today=$(date "+%Y-%m-%d") + +libs_path="{{ analytics.home }}/models-{{ model_version }}/data-products-1.0" + +get_report_job_model_name(){ + case "$1" in + "course-enrollment-report") echo 'org.sunbird.analytics.job.report.CourseEnrollmentJob' + ;; + "course-consumption-report") echo 'org.sunbird.analytics.job.report.CourseConsumptionJob' + ;; + "funnel-report") echo 'org.sunbird.analytics.sourcing.FunnelReport' + ;; + "sourcing-metrics") echo 'org.sunbird.analytics.sourcing.SourcingMetrics' + ;; + "admin-geo-reports") echo 'org.sunbird.analytics.job.report.StateAdminGeoReportJob' + ;; + "etb-metrics") echo 'org.sunbird.analytics.job.report.ETBMetricsJob' + ;; + "admin-user-reports") echo 'org.sunbird.analytics.job.report.StateAdminReportJob' + ;; + "userinfo-exhaust") echo 'org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob' + ;; + "response-exhaust") echo 'org.sunbird.analytics.exhaust.collection.ResponseExhaustJob' + ;; + "response-exhaust-v2") echo 'org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2' + ;; + "progress-exhaust") echo 'org.sunbird.analytics.exhaust.collection.ProgressExhaustJob' + ;; + "progress-exhaust-v2") echo 'org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2' + ;; + "cassandra-migration") echo 'org.sunbird.analytics.updater.CassandraMigratorJob' + ;; + "collection-summary-report") echo 'org.sunbird.analytics.job.report.CollectionSummaryJob' + ;; + "program-collection-summary-report") echo 'org.sunbird.analytics.job.report.CollectionSummaryJob' + ;; + "collection-summary-report-v2") echo 'org.sunbird.analytics.job.report.CollectionSummaryJobV2' + ;; + "assessment-score-metric-correction") echo 'org.sunbird.analytics.audit.AssessmentScoreCorrectionJob' + ;; + "course-batch-status-updater") echo 'org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob' + ;; + "collection-reconciliation-job") echo 'org.sunbird.analytics.audit.CollectionReconciliationJob' + ;; + "assessment-correction") echo 'org.sunbird.analytics.job.report.AssessmentCorrectionJob' + ;; + "score-metric-migration-job") echo 'org.sunbird.analytics.audit.ScoreMetricMigrationJob' + ;; + "assessment-archival") echo 'org.sunbird.analytics.job.report.AssessmentArchivalJob' + ;; + "assessment-archived-removal") echo 'org.sunbird.analytics.job.report.AssessmentArchivalJob' + ;; + "uci-private-exhaust") echo 'org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob' + ;; + "uci-response-exhaust") echo 'org.sunbird.analytics.exhaust.uci.UCIResponseExhaustJob' + ;; + *) echo $1 + ;; + esac +} + +if [ ! -z "$1" ]; then job_id=$(get_report_job_model_name $1); fi + +if [ ! -z "$1" ]; then job_config=$(config $1 $2); else job_config="$2"; fi + +if [ ! -z "$2" ]; then batchIds=";$2"; else batchIds=""; fi + + +echo "Starting the job - $1" >> "$DP_LOGS/$today-job-execution.log" + +echo "Job modelName - $job_id" >> "$DP_LOGS/$today-job-execution.log" + +nohup $SPARK_HOME/bin/spark-submit --master local[*] --jars $(echo ${libs_path}/lib/*.jar | tr ' ' ','),$MODELS_HOME/analytics-framework-2.0.jar,$MODELS_HOME/scruid_2.12-2.5.0.jar,$MODELS_HOME/batch-models-2.0.jar --class org.ekstep.analytics.job.JobExecutor $MODELS_HOME/batch-models-2.0.jar --model "$job_id" --config "$job_config$batchIds" >> "$DP_LOGS/$today-job-execution.log" 2>&1 + +echo "Job execution completed - $1" >> "$DP_LOGS/$today-job-execution.log" diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/start-jobmanager.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/start-jobmanager.j2 new file mode 100644 index 0000000000..2e613b9866 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/start-jobmanager.j2 @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version}}-bin-hadoop2.7 +export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} +export DP_LOGS={{ analytics.home }}/logs/data-products +export SERVICE_LOGS={{ analytics.home }}/logs/services +export JM_HOME={{ analytics.home }}/job-manager + +export azure_storage_key={{sunbird_private_storage_account_name}} +export azure_storage_secret={{sunbird_private_storage_account_key}} +export reports_azure_storage_key={{sunbird_private_storage_account_name}} +export reports_azure_storage_secret={{sunbird_private_storage_account_key}} +export druid_storage_account_key={{sunbird_public_storage_account_name}} +export druid_storage_account_secret={{sunbird_public_storage_account_key}} + +export heap_conf_str={{ spark.heap_conf_str }} +today=$(date "+%Y-%m-%d") + +kill_job_manager() +{ + echo "Killing currently running job-manager process" >> "$SERVICE_LOGS/$today-job-manager.log" + kill $(ps aux | grep 'JobManager' | awk '{print $2}') >> "$SERVICE_LOGS/$today-job-manager.log" +} + +start_job_manager() +{ + kill_job_manager # Before starting the job, We are killing the job-manager + cd {{ analytics.home }}/scripts + source model-config.sh + job_config=$(config 'job-manager') + echo "Starting the job manager" >> "$SERVICE_LOGS/$today-job-manager.log" + echo "config: $job_config" >> "$SERVICE_LOGS/$today-job-manager.log" + nohup java $heap_conf_str -cp "$SPARK_HOME/jars/*:$MODELS_HOME/*:$MODELS_HOME/data-products-1.0/lib/*" -Dconfig.file=$MODELS_HOME/{{ env }}.conf org.ekstep.analytics.job.JobManager --config "$job_config" >> $SERVICE_LOGS/$today-job-manager.log 2>&1 & + + job_manager_pid=$(ps aux | grep 'JobManager' | awk '{print $2}') # Once Job is started just we are making whether job is running or not. + if [[ ! -z "$job_manager_pid" ]]; then + echo "Job manager is started." >> "$SERVICE_LOGS/$today-job-manager.log" + else + echo "Job manager is not started." >> "$SERVICE_LOGS/$today-job-manager.log" + fi +} +# Tasks +# Kill the job-manager +# Start the job-manager +# Make sure whether is running or not. +start_job_manager + diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/submit-all-jobs.rb.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/submit-all-jobs.rb.j2 new file mode 100644 index 0000000000..53c032cd29 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/submit-all-jobs.rb.j2 @@ -0,0 +1,58 @@ +require "ruby-kafka" +require 'json' + +@log = File.open("{{ analytics.home }}/logs/logfile.log", 'a') +@kafka = Kafka.new(["{{ kafka_broker_host }}"]) +@topic = "{{ analytics_job_queue_topic }}" +@report_list_jobs_url = "{{ report_list_jobs_url }}" +@submit_jobs_auth_token = "{{ submit_jobs_auth_token }}" +@submit_jobs_command = "source /mount/venv/bin/activate && dataproducts submit_druid_jobs --report_list_jobs_url #{@report_list_jobs_url} --auth_token #{@submit_jobs_auth_token}" + +def log(message) + @log.write("#{Time.now.to_s}: #{message}\n") +end + +def submit_all_jobs + report_jobs = { + "assessment-dashboard-metrics" => "org.sunbird.analytics.job.report.AssessmentMetricsJobV2", + "course-dashboard-metrics" => "org.sunbird.analytics.job.report.CourseMetricsJobV2", + "course-enrollment-report" => "org.sunbird.analytics.job.report.CourseEnrollmentJob", + "course-consumption-report" => "org.sunbird.analytics.job.report.CourseConsumptionJob", + "etb-metrics" => "org.sunbird.analytics.job.report.ETBMetricsJob", + "admin-geo-reports" => "org.sunbird.analytics.job.report.StateAdminGeoReportJob", + "admin-user-reports" => "org.sunbird.analytics.job.report.StateAdminReportJob" + } + jobs = [{{ analytics_job_list }}] + + log("Starting to submit #{jobs.count} jobs for processing") + file = File.read("{{ analytics.home }}/scripts/model-config.json") + file = file.gsub("$(date --date yesterday '+%Y-%m-%d')", `date --date yesterday '+%Y-%m-%d'`.strip) + file = file.gsub("$(date '+%Y-%m-%d')", `date "+%Y-%m-%d"`.strip) + config_hash = JSON.parse(file) + log("Config file loaded") + jobs.each do |job| + if job == "monitor-job-summ" + log("python") + system('/bin/bash -l -c "'+ @submit_jobs_command +'"') + submit_job(job, config_hash[job]) + elsif report_jobs[job].nil? + submit_job(job, config_hash[job]) + else + submit_job(report_jobs[job], config_hash[job]) + end + + log("Submitted #{jobs.count} jobs for processing") + end +end + +def submit_job(job, config) + job_config = {model: job, config: config}.to_json + log("message: #{job_config}") + @kafka.deliver_message(job_config, topic: @topic) + log("Submitted #{job} for processing") +end + + + + +submit_all_jobs diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/submit-job.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/submit-job.j2 new file mode 100644 index 0000000000..859cf602c3 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/submit-job.j2 @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 +export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} +export DP_LOGS={{ analytics.home }}/logs/data-products +export KAFKA_HOME={{ analytics.soft_path }}/kafka_2.11-0.10.1.0 + +## job broker-list and kafka-topic +job_brokerList={{ brokerlist }} +job_topic={{ analytics_job_queue_topic }} + +## Job to run daily +cd {{ analytics.home }}/scripts +source model-config.sh +today=$(date "+%Y-%m-%d") + +if [ -z "$job_config" ]; then job_config=$(config $1); fi + +echo "Submitted $1 with config $job_config" >> "$DP_LOGS/$today-job-execution.log" +echo '{ "model" :' \"$1\" ',' ' "config": ' "$job_config" '}' >> "$DP_LOGS/$today-job-execution-debug.log" +echo '{ "model" :' \"$1\" ',' ' "config": ' "$job_config" '}' > /tmp/job-request.json +cat /tmp/job-request.json | $KAFKA_HOME/bin/kafka-console-producer.sh --broker-list $job_brokerList --topic $job_topic >> "$DP_LOGS/$today-job-execution.log" 2>&1 diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/submit-script.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/submit-script.j2 new file mode 100644 index 0000000000..edd03ff36b --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/submit-script.j2 @@ -0,0 +1,216 @@ +#!/usr/bin/env bash + +## Job to run daily + +cd "{{ analytics_cluster.home }}" +source model-config.sh +today=$(date "+%Y-%m-%d") + +while :; do + case $1 in + -j|--job) shift + job="$1" + ;; + -m|--mode) shift + mode="$1" + ;; + -p|--parallelisation) shift + parallelisation=$1 + ;; + -pa|--partitions) shift + partitions=$1 + ;; + -sd|--startDate) shift + start_date=$1 + ;; + -ed|--endDate) shift + end_date=$1 + ;; + -h|--sparkMaster) shift + sparkMaster=$1 + ;; + -sp|--selectedPartitions) shift + selected_partitions=$1 + ;; + *) break + esac + shift +done + +get_report_job_model_name(){ + case "$1" in + "assessment-dashboard-metrics") echo 'org.sunbird.analytics.job.report.AssessmentMetricsJobV2' + ;; + "course-dashboard-metrics") echo 'org.sunbird.analytics.job.report.CourseMetricsJobV2' + ;; + "userinfo-exhaust") echo 'org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob' + ;; + "response-exhaust") echo 'org.sunbird.analytics.exhaust.collection.ResponseExhaustJob' + ;; + "response-exhaust-v2") echo 'org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2' + ;; + "progress-exhaust") echo 'org.sunbird.analytics.exhaust.collection.ProgressExhaustJob' + ;; + "progress-exhaust-v2") echo 'org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2' + ;; + "cassandra-migration") echo 'org.sunbird.analytics.updater.CassandraMigratorJob' + ;; + "uci-private-exhaust") echo 'org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob' + ;; + "uci-response-exhaust") echo 'org.sunbird.analytics.exhaust.uci.UCIResponseExhaustJob' + ;; + *) echo $1 + ;; + esac +} + +submit_cluster_job() { + # add batch number to config + echo "Running for below batch number $i" + batchNumberString="\\\"modelParams\\\":{\\\"batchNumber\\\":$i," + job_config=$(config $job) + cluster_job_config=${job_config//'"'/'\"'} + finalConfig=${cluster_job_config/'\"modelParams\":{'/$batchNumberString} + echo $finalConfig + echo "Running $job as parallel jobs" + classVariable="org.ekstep.analytics.job.JobExecutor" + argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\"]" + argsStr="\"className\": \"org.ekstep.analytics.job.JobExecutor\", $argsList" + clusterConfig=`cat cluster-config.json` + requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} + finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} + echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} + echo "Submitted job for batchNumer $i below is the response" + echo $response +} + +job_id=$(get_report_job_model_name $job) + +if [ -z "$sparkMaster" ]; then sparkMaster="local[*]"; else sparkMaster="$sparkMaster"; fi + +if [ "$mode" = "via-partition" ]; then + endPartitions=`expr $partitions - 1` + if [ -z "$parallelisation" ]; then parallelisation=1; else parallelisation=$parallelisation; fi + # add partitions to config and start jobs + for i in $(seq 0 $parallelisation $endPartitions) + do + # add partitions to config + partitionString="\\\"delta\\\":0,\\\"partitions\\\":[$(seq -s , $i `expr $i + $parallelisation - 1`)]" + if [ -z "$start_date" ]; then + job_config=$(config $job) + cluster_job_config=${job_config//'"'/'\"'} + finalConfig=${cluster_job_config/'\"delta\":0'/$partitionString} + echo $finalConfig + echo "Running $job by partitions." + classVariable="org.ekstep.analytics.job.JobExecutor" + argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\"]" + else + job_config=$(config $job '__endDate__') + cluster_job_config=${job_config//'"'/'\"'} + finalConfig=${cluster_job_config/'\"delta\":0'/$partitionString} + echo $finalConfig + echo "Running $job by partitions via Replay-Supervisor." + classVariable="org.ekstep.analytics.job.ReplaySupervisor" + argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\", \"--fromDate\", \"$start_date\", \"--toDate\", \"$end_date\"]" + fi + argsStr="\"className\": \"org.ekstep.analytics.job.JobExecutor\", $argsList" + clusterConfig=`cat cluster-config.json` + requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} + finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} + echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} + done + +elif [ "$mode" = "parallel-jobs" ]; then + # add batch number to config and submit jobs + echo "inside parallel-jobs block" + echo $parallelisation + if [ $parallelisation -ge 1 ]; then + for i in $(seq 1 $parallelisation) + do + submit_cluster_job $i & + done + else echo "No requests found in table"; fi + +elif [ "$mode" = "selected-partition" ]; then + # add partitions to config + partitionString="\\\"delta\\\":0,\\\"partitions\\\":[$selected_partitions]" + if [ -z "$start_date" ]; then + job_config=$(config $job) + cluster_job_config=${job_config//'"'/'\"'} + finalConfig=${cluster_job_config/'\"delta\":0'/$partitionString} + echo $finalConfig + echo "Running $job by partitions." + classVariable="org.ekstep.analytics.job.JobExecutor" + argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\"]" + else + job_config=$(config $job '__endDate__') + cluster_job_config=${job_config//'"'/'\"'} + finalConfig=${cluster_job_config/'\"delta\":0'/$partitionString} + echo $finalConfig + echo "Running $job by partitions via Replay-Supervisor." + classVariable="org.ekstep.analytics.job.ReplaySupervisor" + argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\", \"--fromDate\", \"$start_date\", \"--toDate\", \"$end_date\"]" + fi + argsStr="\"className\": \"org.ekstep.analytics.job.JobExecutor\", $argsList" + clusterConfig=`cat cluster-config.json` + requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} + finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} + echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} +else + if [ -z "$start_date" ]; then + echo "Running $job without partition via run-job." + job_config=$(config $job) + cluster_job_config=${job_config//'"'/'\"'} + classVariable="org.ekstep.analytics.job.JobExecutor" + argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$cluster_job_config\"]" + else + job_config=$(config $job '__endDate__') + cluster_job_config=${job_config//'"'/'\"'} + echo "Running $job without partition via Replay-Supervisor." + classVariable="org.ekstep.analytics.job.ReplaySupervisor" + argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$cluster_job_config\", \"--fromDate\", \"$start_date\", \"--toDate\", \"$end_date\"]" + fi + argsStr="\"className\": \"org.ekstep.analytics.job.JobExecutor\", $argsList" + echo $argsStr + clusterConfig=`cat cluster-config.json` + requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} + finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} + echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} + +fi diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/update-job-requests.py.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/update-job-requests.py.j2 new file mode 100644 index 0000000000..cfd986b008 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/update-job-requests.py.j2 @@ -0,0 +1,119 @@ +from __future__ import division +import math +import psycopg2 +import sys +import pandas as pd +from IPython.display import display +from psycopg2 import sql, connect +import json + + +def updateExhaustRequests(db, table, update_list): + for r in update_list: + cursor = db.cursor() + batchNum = r['batch_number'] + requestId = r['request_id'] + insertQry = "UPDATE {0} SET batch_number = {1} WHERE request_id = '{2}'".format(table, batchNum, requestId) + n = cursor.execute(insertQry) + +def updateDruidRequests(db, table, update_list): + for r in update_list: + cursor = db.cursor() + batchNum = r['batch_number'] + reportId = r['report_id'] + insertQry = "UPDATE {0} SET batch_number = {1} WHERE report_id = '{2}'".format(table, batchNum, reportId) + n = cursor.execute(insertQry) + +def processRequests(totalRequestsDf, jobId, batchSize, db, table,jobType): + # Compute parallelism from batchSize & totalRequests + # update batch_number to table + + totalRequests = len(totalRequestsDf.index) + print("totalRequests {0}".format(totalRequests)) + + parallelism = int(math.ceil(totalRequests/batchSize)) + print("parallelism computed {0}".format(parallelism)) + + if totalRequests > 0: + if jobType == 'exhaust': + totalRequestsDf["row_num"] = totalRequestsDf.groupby(by=['job_id'])['request_id'].transform(lambda x: x.rank()) + else: + totalRequestsDf["row_num"] = totalRequestsDf['report_id'].transform(lambda x: x.rank()) + #display(totalRequestsDf) + + start_index = 1 + end_index = batchSize + for i in range(1, parallelism+1): + subSetDf = totalRequestsDf[(totalRequestsDf['row_num'] >= start_index) & (totalRequestsDf['row_num'] <= end_index)] + subSetDf["batch_number"] = i + print(start_index,end_index) + if jobType == 'exhaust': + updateExhaustRequests(db, table, json.loads(subSetDf.to_json(orient='records'))) + else: + updateDruidRequests(db, table, json.loads(subSetDf.to_json(orient='records'))) + start_index = 1 + end_index + end_index = end_index + batchSize + db.commit() + db.close() + return parallelism + else: + return 0 + +def postgresql_to_dataframe(db, select_query, column_names): + cursor = db.cursor() + try: + cursor.execute(select_query) + except (Exception, psycopg2.DatabaseError) as error: + print("Error: %s" % error) + return 1 + + tupples = cursor.fetchall() + + df = pd.DataFrame(tupples, columns=column_names) + #display(df) + return df + +def get_columns_names(db,table): + columns = [] + col_cursor = db.cursor() + col_names_str = "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS WHERE " + col_names_str += "table_name = '{}';".format( table ) + try: + sql_object = sql.SQL(col_names_str).format(sql.Identifier( table)) + col_cursor.execute( sql_object ) + col_names = (col_cursor.fetchall()) + for tup in col_names: + columns += [ tup[0] ] + col_cursor.close() + except Exception as err: + print ("get_columns_names ERROR:", err) + return columns + +def main(batchSize, jobId,jobType,table): + host="{{postgres.db_url}}" + port={{postgres.db_port}} + user="{{postgres.db_username}}" + password="{{postgres.db_password}}" + database="{{postgres.db_name}}" + url_connect = "jdbc:postgresql://{0}:{1}/{2}".format(host, port, database) + + db = psycopg2.connect(host=host, user=user, password=password, database=database, port=port) + + column_names = get_columns_names(db, table) + + if jobType == 'exhaust': + jobId = jobId.split("-v2")[0] if "-v2" in jobId else jobId + selectQuery = "select * from {0} where job_id = '{1}' and status IN ('SUBMITTED', 'FAILED') and iteration < 3;".format(table, jobId) + else: + selectQuery = "select * from {0} where status IN ('ACTIVE')".format(table) + df = postgresql_to_dataframe(db, selectQuery, column_names) + + parallelism = processRequests(df, jobId, batchSize, db, table,jobType) + return parallelism + +batchSize =int(sys.argv[2]) +jobId=sys.argv[1] +jobType = sys.argv[3] +table = sys.argv[4] +parallelism = main(batchSize, jobId,jobType,table) +print("returning parallelism value: {0}".format(parallelism)) diff --git a/ansible/roles/data-products-deploy/defaults/main.yml b/ansible/roles/data-products-deploy/defaults/main.yml index 7eb22c7a18..690c51d87d 100755 --- a/ansible/roles/data-products-deploy/defaults/main.yml +++ b/ansible/roles/data-products-deploy/defaults/main.yml @@ -4,7 +4,7 @@ spark_output_temp_dir: /mount/data/analytics/tmp/ bucket: "telemetry-data-store" secor_bucket: "telemetry-data-store" -dp_object_store_type: "oci" +dp_object_store_type: "azure" dp_raw_telemetry_backup_location: "unique/raw/" dp_storage_key_config: "azure_storage_key" dp_storage_secret_config: "azure_storage_secret" @@ -210,9 +210,9 @@ admin_password: "{{ spark_cluster_user_password }}" spark_cluster_name: "{{env}}-spark-cluster" spark_cluster: - executor_core: 1 - executor_memory: 2G - num_executors: 1 + executor_core: 5 + executor_memory: 19G + num_executors: 5 analytics_cluster: home: "/tmp" diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 733c416138..c659f75113 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -6,14 +6,14 @@ - always - name: Ensure oci oss bucket exists - command: "oci os bucket get --name {{ bucket }}" + command: "/home/{{analytics_user}}/bin/oci os bucket get --name {{ bucket }}" register: check_bucket when: dp_object_store_type == "oci" tags: - always - name: Create oci oss bucket - command: "oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" + command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" when: dp_object_store_type == "oci" and check_bucket.rc !=0 tags: - always @@ -32,7 +32,7 @@ - dataproducts-spark-cluster - name: Copy Core Data Products to oci oss - command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -54,7 +54,7 @@ - ed-dataproducts-spark-cluster - name: Copy Ed Data Products to oci oss - command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -75,7 +75,7 @@ - framework-spark-cluster - name: Copy Framework Library to oci oss - command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -96,7 +96,7 @@ - framework-spark-cluster - name: Copy Scruid Library to oci oss - command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -154,7 +154,7 @@ - framework-spark-cluster - name: Copy configuration file to oci oss - command: oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force async: 3600 poll: 10 when: dp_object_store_type == "oci" From f77981144363726c9a7c278db6df00814fcbaefd Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 7 Apr 2023 15:50:32 +1000 Subject: [PATCH 071/161] reverted data-products-deploy role Signed-off-by: Deepak Devadathan --- .../templates/cluster-config.json.j2 | 30 +------------- .../templates/submit-script.j2 | 39 ++----------------- 2 files changed, 4 insertions(+), 65 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 12ebf0bde0..e899827fdb 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -27,7 +27,7 @@ "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } } -{% elif (dp_object_store_type == "s3") %} +{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} { "jars": [ "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", @@ -55,32 +55,4 @@ "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } } -{% elif (dp_object_store_type == "oci") %} -{ - "jars": [ - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_core_artifact }}", - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ scruid_artifact }}", - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" - ], - "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", - "files": [ - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf" - ], - "className": "org.ekstep.analytics.job.JobExecutor", - "executorCores": {{ spark_cluster.executor_core }}, - "executorMemory": "{{ spark_cluster.executor_memory }}", - "numExecutors": {{ spark_cluster.num_executors }}, - "conf": { - "spark.sql.autoBroadcastJoinThreshold" : "-1", - "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", - "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", - "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", - "spark.scheduler.mode" : "FAIR", - "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", - "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", - "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" - } -} {% endif %} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy/templates/submit-script.j2 b/ansible/roles/data-products-deploy/templates/submit-script.j2 index edd03ff36b..e8341dc1e8 100644 --- a/ansible/roles/data-products-deploy/templates/submit-script.j2 +++ b/ansible/roles/data-products-deploy/templates/submit-script.j2 @@ -1,7 +1,6 @@ #!/usr/bin/env bash ## Job to run daily - cd "{{ analytics_cluster.home }}" source model-config.sh today=$(date "+%Y-%m-%d") @@ -80,15 +79,7 @@ submit_cluster_job() { requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody -{% if dp_object_store_type == "azure" %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% elif (dp_object_store_type == "oci") %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% endif %} + response=$(curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: admin_name }}") echo "Submitted job for batchNumer $i below is the response" echo $response } @@ -127,15 +118,7 @@ if [ "$mode" = "via-partition" ]; then requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody -{% if dp_object_store_type == "azure" %} -{ curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% elif (dp_object_store_type == "oci") %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% endif %} done elif [ "$mode" = "parallel-jobs" ]; then @@ -174,15 +157,8 @@ elif [ "$mode" = "selected-partition" ]; then requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody -{% if dp_object_store_type == "azure" %} -{ curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% elif (dp_object_store_type == "oci") %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% endif %} + else if [ -z "$start_date" ]; then echo "Running $job without partition via run-job." @@ -203,14 +179,5 @@ else requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody -{% if dp_object_store_type == "azure" %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% elif (dp_object_store_type == "oci") %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% endif %} - + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" fi From 230b286ee87f4ff72528b3c1a332cd1eb5168845 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 7 Apr 2023 17:55:00 +1000 Subject: [PATCH 072/161] removed unwanted env Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.bds | 2 -- 1 file changed, 2 deletions(-) diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds index ae7e33e271..a4b47800fe 100644 --- a/pipelines/provision/spark/Jenkinsfile.bds +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -38,8 +38,6 @@ node('build-slave') { ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" cd /tmp ./create-cluster.sh - export inventory_dir=/var/lib/jenkins/workspace/Provision/dev/DataPipeline/__SparkBDSCluster3/ansible/inventory/env - export ANSIBLE_HOST_KEY_CHECKING=False ansible-playbook -i $currentws/ansible/inventory/env $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass ''' From 04bf9d4689c6bc124775d7dfafc1d670819fb817 Mon Sep 17 00:00:00 2001 From: nikesh_g_gogia Date: Fri, 7 Apr 2023 14:41:04 +0530 Subject: [PATCH 073/161] Removed Restrat Cluster Logic - Done from Ambari --- .../templates/create-cluster-with-sleep.sh.j2 | 331 ------------------ .../templates/create-cluster.sh.j2 | 16 +- .../templates/delete-cluster.sh.j2 | 1 + 3 files changed, 3 insertions(+), 345 deletions(-) delete mode 100755 ansible/roles/oci-bds-spark-cluster/templates/create-cluster-with-sleep.sh.j2 diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster-with-sleep.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster-with-sleep.sh.j2 deleted file mode 100755 index 54784435d7..0000000000 --- a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster-with-sleep.sh.j2 +++ /dev/null @@ -1,331 +0,0 @@ -#! /bin/bash -# Subnet id will generate from env variable - -ambari_user="{{ambari_user}}" -cluster_password="{{cluster_password}}" -key_alias="{{key_alias}}" -user_id="{{user_id}}" -subnet="{{subnet_id}}" -compartment_id="{{compartment_id}}" -display_name="{{display_name}}" -workernode="{{workernode}}" -cluster_public_key="{{public_key}}" - -AMBARI_USER=$ambari_user -AMBARI_PWD=$cluster_password - -function get_bdsid() { - list_param=`oci bds instance list --compartment-id $compartment_id` - bdsid="NULL" - # echo $list_param | jq '.data' - state="ACTIVE" - disname="NULL" - for k in $(jq '.data | keys | .[]' <<< "$list_param"); do - # echo $k - cstate=`echo $list_param | jq -r '.data['$k']["lifecycle-state"]'` - if [ $cstate = $state ]; then - disname=`echo $list_param | jq -r '.data['$k']["display-name"]'` - if [ $disname = $display_name ]; then - bdsid=`echo $list_param | jq -r '.data['$k']["id"]'` - fi - - fi - echo "BDS ID" - echo $bdsid - done -} - -function getLivyip() { - - export bds_instance_id=$bdsid - bdsjson=$(oci bds instance get --bds-instance-id $bds_instance_id) - # echo "AMBARI URL" - ambari_url=`echo $bdsjson | jq -r '.data["cluster-details"]["ambari-url"]'` - # echo $ambari_url - livyip="NULL" - cnode="UTILITY" - for k in $(jq '.data["nodes"] | keys | .[]' <<< "$bdsjson"); do - node=`echo $bdsjson | jq -r '.data["nodes"]['$k']["node-type"]'` - if [ $node = "$cnode" ]; then - livyip=`echo $bdsjson | jq -r '.data["nodes"]['$k']["ip-address"]'` - fi - done - echo "LIVY IP" - echo $livyip - -} - -getlivyclustername() { - cdet=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$livyip:7183/api/v1/clusters/) - echo $cdet - for k in $(jq '.items | keys | .[]' <<< "$cdet"); do - # echo $k - cluster_name=`echo $cdet | jq -r '.items['$k']["Clusters"]["cluster_name"]'` - echo $cluster_name - done - echo "CLUSTER NAME" - -} - -function get_apidetails() { - - export bds_instance_id=$bdsid - - listapijson=$(oci bds bds-api-key list --bds-instance-id $bds_instance_id) - - #echo $listapijson | jq '.data[1]["key-alias"]' - id="NULL" - ctype="ACTIVE" - for k in $(jq '.data | keys | .[]' <<< "$listapijson"); do - type=`echo $listapijson | jq -r '.data['$k']["lifecycle-state"]'` - if [ $type = "$ctype" ]; then - id=`echo $listapijson | jq -r '.data['$k']["id"]'` - fi - done - - echo $id - - export api_key_id=$id - - list_api=`oci bds bds-api-key get --api-key-id $api_key_id --bds-instance-id $bds_instance_id` - - #echo $list_api | jq '.data' - - data=`echo $list_api | jq '.data'` - echo "API DETAILS" - echo $data - region=`echo $list_api | jq -r '.data["default-region"]'` - fingerprint=`echo $list_api | jq -r '.data["fingerprint"]'` - keyalias=`echo $list_api | jq -r '.data["key-alias"]'` - lifecyc=`echo $list_api | jq -r '.data["lifecycle-state"]'` - tm=`echo $list_api | jq -r '.data["time-created"]'` - usid=`echo $list_api | jq -r '.data["user-id"]'` - tenid=`echo $list_api | jq -r '.data["tenant-id"]'` - pemfilepath=`echo $list_api | jq -r '.data["pemfilepath"]'` - -} - -function update_bds_config(){ - #change below variables for your cluster - CONFIG_FILE_TO_UPDATE="" - - #Used when for restarting components after config update - #Wait time before we poll for restart status. Default 30 seconds. Meaning, We poll for restart status every 30 seconds - WAIT_TIME_IN_SEC=30 - - #No of tries before we give up on the restart status. Default 20. With default WAIT_TIME_IN_SEC as 30, At max we wait for 10(20*30=600 seconds) minutes before we give up. - RETRY_COUNT=20 - - #INTERNAL USE ONLY - propObj="" - - get_apidetails - getUtilityNodesIps=$livyip - getlivyclustername - echo $getUtilityNodesIps - getClusterName=$cluster_name - for utilityNodeIp in $getUtilityNodesIps - do - echo "Current utility node ip: $utilityNodeIp" - str1=$(nslookup $utilityNodeIp | awk -v var=$utilityNodeIp '/name =/{print var "\t", $4}') - CONFIG_FILE_TO_UPDATE="core-site" #this is the file we're updating in this example - propObj=$(get_property_json) - echo $propObj - echo "calling add properties" - - #update key value pairs. Multiple key value pairs can be updated before doing update_ambari_config - add_properties "fs.oci.client.auth.fingerprint" $fingerprint - add_properties "fs.oci.client.auth.passphrase" $passphrase - add_properties "fs.oci.client.auth.pemfilepath" $pemfilepath - add_properties "fs.oci.client.auth.tenantId" $tenid - add_properties "fs.oci.client.auth.userId" $usid - add_properties "fs.oci.client.regionCodeOrId" $region - #Update it to ambari - echo "updating ambari config" - update_ambari_config - - # echo "restarting all required components" - # restart_required_components - - done - -} - - -#Method to collect the current config -function get_property_json(){ - allConfs=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName?fields=Clusters/desired_configs) #to get all the configs - currVersionLoc=".Clusters.desired_configs.\"$CONFIG_FILE_TO_UPDATE\".tag" #fetching current version for property - propVersion=$(echo $allConfs | jq $currVersionLoc | tr -d '"') - propJson=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X GET "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/configurations?type=$CONFIG_FILE_TO_UPDATE&tag=$propVersion") #fetch property json - propLoc=".items[].properties" - propKeyVal=$(echo $propJson | jq $propLoc) - propObj="{\"properties\":$propKeyVal}" - echo $propObj -} - -#Method to add/update key value pair to existing config -function add_properties(){ - echo $1 $2 - echo $propObj - propObj=$(echo $propObj | jq '.properties += { "'$1'": "'$2'" }') - echo $propObj -} - -#Method to update config in ambari -function update_ambari_config(){ - parseableAddedProp=$(echo $propObj | jq '.properties') - echo $parseableAddedProp - timestamp=$(date +%s) - newVersion="version$timestamp" - finalJson='[{"Clusters":{"desired_config":[{"type":"'$CONFIG_FILE_TO_UPDATE'","tag":"'$newVersion'","properties":'$parseableAddedProp'}]}}]' - echo "CALING AMABRI API" - response_body_amb=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X PUT -d "$finalJson" "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName") - echo $response_body_amb - echo "DONE AMABRI API" -} - -#Method to restart required components -function restart_required_components(){ - echo "restarting all required components" - response_body=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X POST -d '{"RequestInfo":{"command":"RESTART","context":"Restart all required services from bootstrap script","operation_level":"host_component"},"Requests/resource_filters":[{"hosts_predicate":"HostRoles/stale_configs=true&HostRoles/cluster_name='$getClusterName'"}]}' "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/requests") - - echo "printing response_body: $response_body" - - idLoc=".Requests.id" - requestId=$(echo $response_body | jq $idLoc) - echo "request id is : $requestId" - - current_count=0 - while [[ $current_count -lt $RETRY_COUNT ]]; - do - current_count=$((current_count+1)) - response=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/requests/$requestId) - request_status=$(echo $response | jq -r ".Requests.request_status") - echo "printing request_status: $request_status" - if [[ $request_status == "IN_PROGRESS" ]] || [[ $request_status == "PENDING" ]]; then - echo "current_count is : $current_count" - sleep $WAIT_TIME_IN_SEC - elif [[ $request_status == "COMPLETED" ]]; then - echo "Restart successful" - break - fi - done -} - -function creat_api(){ - export bds_instance_id=$bdsid - export key_alias=$key_alias # https://docs.cloud.oracle.com/en-us/iaas/tools/oci-cli/latest/oci_cli_docs/cmdref/bds/bds-api-key/create.html#cmdoption-key-alias - export passphrase=$b64p # https://docs.cloud.oracle.com/en-us/iaas/tools/oci-cli/latest/oci_cli_docs/cmdref/bds/bds-api-key/create.html#cmdoption-passphrase - export user_id=$user_id - oci bds bds-api-key create --bds-instance-id $bds_instance_id --key-alias $key_alias --passphrase $passphrase --user-id $user_id -} - -function restart_bds_cluster() { - # oci cli command to stop - echo "STOPPING CLUSTER" - oci bds instance stop --bds-instance-id $bds_instance_id --cluster-admin-password $b64p --is-force-stop-jobs true - sleep 10m - # oci cli command to start - echo "STARTING CLUSTER" - oci bds instance start --bds-instance-id $bds_instance_id --cluster-admin-password $b64p - sleep 15m -} - - -# Below is tenancy - -function create_cluster() { - - export compartment_id=$compartment_id - - master=1 - utility=1 - - worker=$workernode # This has to be replaced with Jenkins Paramter - - # Begin script in case all parameters are correct - echo "Generating json woth $master master ndoes $utility utility nodes and $worker worker nodes" - json="[" - - for i in `seq 1 $master` - do - json="$json{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"MASTER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 32, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" - done - - for i in `seq 1 $utility` - do - json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"UTILITY\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 32, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" - done - - for i in `seq 1 $worker` - do - json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"WORKER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 16, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" - done - - json="$json]" - printf "$json" > "nodes.json" - echo "File successfully generated and saved as nodes.json" - - echo "CREATING THE BDS CLUSTER" - - export cluster_public_key=$public_key - export cluster_version="ODH2_0" - export display_name=$display_name - export is_high_availability='false' - export is_secure='false' - - cmd="oci bds instance create --cluster-admin-password '$b64p' --cluster-public-key '$cluster_public_key' --cluster-version '$cluster_version' --compartment-id '$compartment_id' --display-name '$display_name' --is-high-availability $is_high_availability --is-secure $is_secure --nodes file://nodes.json " - echo $cmd - eval "$cmd" - -} - -function replace_host() { - echo "REPLACE THE HOSTS" - echo "" >> {{inventory_dir}}/hosts - echo "[bds-livy-node]" >> {{inventory_dir}}/hosts - echo "$livyip ansible_ssh_user=opc" >> {{inventory_dir}}/hosts - echo "" >> {{inventory_dir}}/hosts - -} - -# MAIN TO START - -b64p=`echo -n $cluster_password | base64` -echo $b64p -echo $compartment_id - -echo "CREATING CLUSTER" - -create_cluster - -echo "WAITING CLUSTER TO CREATE" - -sleep 42m - -echo "FETCHING BDS ID" - -get_bdsid # This sets BDS ID - -echo "GET LIVY-AMBARI IP" - -getLivyip # This will be ambari ip also - -replace_host - -echo "CREATE OBJECT STORAGE API KEY" - -creat_api - -echo "WAITING FOR API TO CREATE" - -sleep 5m - -echo "UPDATE BDS AMBARI CONFIG" - -get_apidetails - -update_bds_config - -restart_bds_cluster \ No newline at end of file diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 index e4295dded8..541a1ec561 100644 --- a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 +++ b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 @@ -1,6 +1,7 @@ #! /bin/bash # Subnet id will generate from env variable -# Version 1 Running Fine +# Version 1 Author Nikesh Gogia and Ali Shemshadi + ambari_user="{{ambari_user}}" cluster_password="{{cluster_password}}" key_alias="{{key_alias}}" @@ -227,18 +228,6 @@ function create_api(){ oci bds bds-api-key create --bds-instance-id $bds_instance_id --key-alias $key_alias --passphrase $passphrase --user-id $user_id --wait-for-state $capi --max-wait-seconds $cwait } -function restart_bds_cluster() { - # oci cli command to stop - echo "STOPPING CLUSTER" - cstate='SUCCEEDED' - cwait=2000 - oci bds instance stop --bds-instance-id $bds_instance_id --cluster-admin-password $b64p --is-force-stop-jobs true --wait-for-state=$cstate --max-wait-seconds $cwait - # oci cli command to start - echo "STARTING CLUSTER" - cstate='SUCCEEDED' - oci bds instance start --bds-instance-id $bds_instance_id --cluster-admin-password $b64p --wait-for-state=$cstate --max-wait-seconds $cwait -} - # Below is tenancy @@ -326,4 +315,3 @@ get_apidetails update_bds_config -# restart_bds_cluster diff --git a/ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 index b5e1d28d36..3e3df78b33 100755 --- a/ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 +++ b/ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 @@ -1,4 +1,5 @@ #!/bin/bash +# Version 1 - Author Nikesh Gogia nikesh.g.gogia@oracle.com compartment_id="{{compartment_id}}" display_name="{{display_name}}" From 6604a0bd3869026aa9c408d2aced792e21201ffe Mon Sep 17 00:00:00 2001 From: nikesh_g_gogia Date: Sat, 8 Apr 2023 12:01:34 +0530 Subject: [PATCH 074/161] Handled Ambari Restart logic Signed-off-by: nikesh_g_gogia --- .../roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 index 541a1ec561..94479d0764 100644 --- a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 +++ b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 @@ -315,3 +315,5 @@ get_apidetails update_bds_config +echo "BDS Config Completed and Ambari Restarted" + From 8e1f04aad8377218c980fde50692a9f2cadd6d1d Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 06:58:13 +1000 Subject: [PATCH 075/161] updated jenkins.bds to consume credential from credstore Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.bds | 30 ++++++++++++----------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds index a4b47800fe..73de054790 100644 --- a/pipelines/provision/spark/Jenkinsfile.bds +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -14,13 +14,14 @@ node('build-slave') { } stage('copy cluster creation script') { + withCredentials([usernamePassword(credentialsId: 'oci-bds-credential', passwordVariable: 'cluster_password', usernameVariable: 'ambari_user')]) { values = [:] envDir = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-3].trim() module = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-2].trim() jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" - ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} ambari_user=${params.ambari_user} cluster_password=${params.cluster_password} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} ambari_user=$ambari_user cluster_password=$cluster_password key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) @@ -29,21 +30,22 @@ node('build-slave') { values.put('ansibleExtraArgs', ansibleExtraArgs) println values ansible_playbook_run(values) + } } - stage('create and provision spark OCI BDS') { - oci_namespace=params.oci_namespace - //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { - sh ''' - currentws=$(pwd) - ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" - cd /tmp - ./create-cluster.sh - export ANSIBLE_HOST_KEY_CHECKING=False - ansible-playbook -i $currentws/ansible/inventory/env $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass - ''' - //} + // stage('create and provision spark OCI BDS') { + // oci_namespace=params.oci_namespace + // //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { + // sh ''' + // currentws=$(pwd) + // ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" + // cd /tmp + // ./create-cluster.sh + // export ANSIBLE_HOST_KEY_CHECKING=False + // ansible-playbook -i $currentws/ansible/inventory/env $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass + // ''' + // //} - } + // } } } From 00eef6909a19bf1c3c2064f0a238f35c7b10aa9d Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 07:05:44 +1000 Subject: [PATCH 076/161] parametrized create cluster Signed-off-by: Deepak Devadathan --- .../oci-bds-spark-cluster/templates/create-cluster.sh.j2 | 4 ++-- pipelines/provision/spark/Jenkinsfile.bds | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 index 94479d0764..2e880a92d2 100644 --- a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 +++ b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 @@ -2,8 +2,8 @@ # Subnet id will generate from env variable # Version 1 Author Nikesh Gogia and Ali Shemshadi -ambari_user="{{ambari_user}}" -cluster_password="{{cluster_password}}" +ambari_user="${1}" +cluster_password="${2}" key_alias="{{key_alias}}" user_id="{{user_id}}" subnet="{{subnet_id}}" diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds index 73de054790..d7ada30bab 100644 --- a/pipelines/provision/spark/Jenkinsfile.bds +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -14,7 +14,6 @@ node('build-slave') { } stage('copy cluster creation script') { - withCredentials([usernamePassword(credentialsId: 'oci-bds-credential', passwordVariable: 'cluster_password', usernameVariable: 'ambari_user')]) { values = [:] envDir = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-3].trim() module = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-2].trim() @@ -30,7 +29,7 @@ node('build-slave') { values.put('ansibleExtraArgs', ansibleExtraArgs) println values ansible_playbook_run(values) - } + } // stage('create and provision spark OCI BDS') { // oci_namespace=params.oci_namespace From 6c76fbf4aaa16ebc6f5fa41eaeae8fc9a6ec961f Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 07:06:37 +1000 Subject: [PATCH 077/161] corrected syntax Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.bds | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds index d7ada30bab..5d0f175308 100644 --- a/pipelines/provision/spark/Jenkinsfile.bds +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -20,7 +20,7 @@ node('build-slave') { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" - ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} ambari_user=$ambari_user cluster_password=$cluster_password key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) From cc12811533a81a733a1bbfc0fc11a3f4d7ecc47d Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 07:19:00 +1000 Subject: [PATCH 078/161] disabled delete playbook for testing Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.delete | 24 ++++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pipelines/provision/spark/Jenkinsfile.delete b/pipelines/provision/spark/Jenkinsfile.delete index 5675a7e1df..ff5a9c5384 100644 --- a/pipelines/provision/spark/Jenkinsfile.delete +++ b/pipelines/provision/spark/Jenkinsfile.delete @@ -20,7 +20,7 @@ node('build-slave') { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" - ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} ambari_user=${params.ambari_user} cluster_password=${params.cluster_password} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) @@ -30,18 +30,18 @@ node('build-slave') { println values ansible_playbook_run(values) } - stage('create and provision spark OCI BDS') { - storage_container=params.storage_container - //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { - sh ''' - currentws=$(pwd) - ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" - cd /tmp - ./delete-cluster.sh - ''' - //} + // stage('create and provision spark OCI BDS') { + // storage_container=params.storage_container + // //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { + // sh ''' + // currentws=$(pwd) + // ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" + // cd /tmp + // ./delete-cluster.sh + // ''' + // //} - } + // } } } From ce11e924e7dadae21f46b4e362026c1b26210760 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 07:21:33 +1000 Subject: [PATCH 079/161] removed workernode parameter redundant Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.delete | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/provision/spark/Jenkinsfile.delete b/pipelines/provision/spark/Jenkinsfile.delete index ff5a9c5384..a469192dc8 100644 --- a/pipelines/provision/spark/Jenkinsfile.delete +++ b/pipelines/provision/spark/Jenkinsfile.delete @@ -20,7 +20,7 @@ node('build-slave') { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" - ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) From 21b8166b3ac404cdb957ec05fd4034dc12d3374a Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 07:30:33 +1000 Subject: [PATCH 080/161] testing create spark cluster Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.bds | 26 +++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds index 5d0f175308..3deb57cc6a 100644 --- a/pipelines/provision/spark/Jenkinsfile.bds +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -31,20 +31,20 @@ node('build-slave') { ansible_playbook_run(values) } - // stage('create and provision spark OCI BDS') { - // oci_namespace=params.oci_namespace - // //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { - // sh ''' - // currentws=$(pwd) - // ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" - // cd /tmp - // ./create-cluster.sh - // export ANSIBLE_HOST_KEY_CHECKING=False - // ansible-playbook -i $currentws/ansible/inventory/env $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass - // ''' - // //} + stage('create and provision spark OCI BDS') { + oci_namespace=params.oci_namespace + withCredentials([usernamePassword(credentialsId: 'oci-bds-credential', passwordVariable: 'cluster_password', usernameVariable: 'ambari_user')]) { + sh ''' + currentws=$(pwd) + ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" + cd /tmp + ./create-cluster.sh $ambari_user $cluster_password + export ANSIBLE_HOST_KEY_CHECKING=False + ansible-playbook -i $currentws/ansible/inventory/env $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass + ''' + } - // } + } } } From 15832a59bf345017d8af4250c670d8b3010e62fc Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 08:32:48 +1000 Subject: [PATCH 081/161] updated delete cluster jenkins Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.delete | 22 ++++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pipelines/provision/spark/Jenkinsfile.delete b/pipelines/provision/spark/Jenkinsfile.delete index a469192dc8..ce5e0b19dd 100644 --- a/pipelines/provision/spark/Jenkinsfile.delete +++ b/pipelines/provision/spark/Jenkinsfile.delete @@ -30,18 +30,18 @@ node('build-slave') { println values ansible_playbook_run(values) } - // stage('create and provision spark OCI BDS') { - // storage_container=params.storage_container - // //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { - // sh ''' - // currentws=$(pwd) - // ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" - // cd /tmp - // ./delete-cluster.sh - // ''' - // //} + stage('create and provision spark OCI BDS') { + storage_container=params.storage_container + //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { + sh ''' + currentws=$(pwd) + ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" + cd /tmp + ./delete-cluster.sh + ''' + //} - // } + } } } From 20ca43d3132d0f655a26db1481961afbe1ee3c6d Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 10:49:52 +1000 Subject: [PATCH 082/161] updated the spark deploy job for bds Signed-off-by: Deepak Devadathan --- .../data-products-deploy/defaults/main.yml | 1 + .../roles/data-products-deploy/tasks/main.yml | 14 +++---- .../templates/cluster-config.json.j2 | 30 +++++++++++++- .../templates/submit-script.j2 | 39 +++++++++++++++++-- ansible/spark-cluster-job-submit.yml | 1 + .../spark-cluster-deploy/Jenkinsfile.parallel | 2 +- 6 files changed, 75 insertions(+), 12 deletions(-) diff --git a/ansible/roles/data-products-deploy/defaults/main.yml b/ansible/roles/data-products-deploy/defaults/main.yml index 690c51d87d..38ddd70420 100755 --- a/ansible/roles/data-products-deploy/defaults/main.yml +++ b/ansible/roles/data-products-deploy/defaults/main.yml @@ -1,6 +1,7 @@ analytics_user: analytics analytics_group: analytics spark_output_temp_dir: /mount/data/analytics/tmp/ +oci_install_loc: /home/{{analytics_user}}/bin/ bucket: "telemetry-data-store" secor_bucket: "telemetry-data-store" diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index c659f75113..ee6b40c1cb 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -6,14 +6,14 @@ - always - name: Ensure oci oss bucket exists - command: "/home/{{analytics_user}}/bin/oci os bucket get --name {{ bucket }}" + command: "{{oci_install_loc}}/oci os bucket get --name {{ bucket }}" register: check_bucket when: dp_object_store_type == "oci" tags: - always - name: Create oci oss bucket - command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" + command: "{{oci_install_loc}}/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" when: dp_object_store_type == "oci" and check_bucket.rc !=0 tags: - always @@ -32,7 +32,7 @@ - dataproducts-spark-cluster - name: Copy Core Data Products to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force" async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -54,7 +54,7 @@ - ed-dataproducts-spark-cluster - name: Copy Ed Data Products to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force" async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -75,7 +75,7 @@ - framework-spark-cluster - name: Copy Framework Library to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force" async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -96,7 +96,7 @@ - framework-spark-cluster - name: Copy Scruid Library to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force" async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -154,7 +154,7 @@ - framework-spark-cluster - name: Copy configuration file to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force" async: 3600 poll: 10 when: dp_object_store_type == "oci" diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index e899827fdb..12ebf0bde0 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -27,7 +27,7 @@ "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } } -{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} +{% elif (dp_object_store_type == "s3") %} { "jars": [ "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", @@ -55,4 +55,32 @@ "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } } +{% elif (dp_object_store_type == "oci") %} +{ + "jars": [ + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_core_artifact }}", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ scruid_artifact }}", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + ], + "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + "files": [ + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf" + ], + "className": "org.ekstep.analytics.job.JobExecutor", + "executorCores": {{ spark_cluster.executor_core }}, + "executorMemory": "{{ spark_cluster.executor_memory }}", + "numExecutors": {{ spark_cluster.num_executors }}, + "conf": { + "spark.sql.autoBroadcastJoinThreshold" : "-1", + "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", + "spark.scheduler.mode" : "FAIR", + "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", + "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", + "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + } +} {% endif %} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy/templates/submit-script.j2 b/ansible/roles/data-products-deploy/templates/submit-script.j2 index e8341dc1e8..0e629a4ce8 100644 --- a/ansible/roles/data-products-deploy/templates/submit-script.j2 +++ b/ansible/roles/data-products-deploy/templates/submit-script.j2 @@ -1,6 +1,7 @@ #!/usr/bin/env bash ## Job to run daily + cd "{{ analytics_cluster.home }}" source model-config.sh today=$(date "+%Y-%m-%d") @@ -79,7 +80,15 @@ submit_cluster_job() { requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody - response=$(curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: admin_name }}") +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_bds_url }}:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} echo "Submitted job for batchNumer $i below is the response" echo $response } @@ -118,7 +127,15 @@ if [ "$mode" = "via-partition" ]; then requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_bds_url }}:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} done elif [ "$mode" = "parallel-jobs" ]; then @@ -157,8 +174,15 @@ elif [ "$mode" = "selected-partition" ]; then requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" - +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_bds_url }}:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} else if [ -z "$start_date" ]; then echo "Running $job without partition via run-job." @@ -179,5 +203,14 @@ else requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_bds_url }}:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} + fi diff --git a/ansible/spark-cluster-job-submit.yml b/ansible/spark-cluster-job-submit.yml index ba4e017a23..8924fce8f2 100644 --- a/ansible/spark-cluster-job-submit.yml +++ b/ansible/spark-cluster-job-submit.yml @@ -6,6 +6,7 @@ environment: AZURE_STORAGE_ACCOUNT: "{{sunbird_private_storage_account_name}}" AZURE_STORAGE_KEY: "{{sunbird_private_storage_account_key}}" + OCI_CLI_AUTH: instance_principal roles: - data-products-deploy diff --git a/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel b/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel index 4b9891d62a..dad65d4e73 100644 --- a/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel +++ b/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel @@ -26,7 +26,7 @@ node() { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/spark-cluster-job-submit.yml" - ansibleExtraArgs = "--vault-password-file /var/lib/jenkins/secrets/vault-pass --extra-vars \"jobs=${params.jobs_to_submit} batch_size=${params.batch_size}\" --tags config-update,${params.jobs_submit_type} -vvvv " + ansibleExtraArgs = "--vault-password-file /var/lib/jenkins/secrets/vault-pass --extra-vars \"jobs=${params.jobs_to_submit} oci_install_loc=${params.oci_install_loc} batch_size=${params.batch_size}\" --tags config-update,${params.jobs_submit_type} -vvvv " values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) From e278a689c4b87ca70952ca8be2813f5ae7ec0428 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 14:28:47 +1000 Subject: [PATCH 083/161] removed redundant role Signed-off-by: Deepak Devadathan --- .../defaults/main.yml | 282 -------- .../collection-summary-ingestion-spec.json | 251 ------- .../files/sourcing-ingestion-spec.json | 146 ---- .../tasks/main.yml | 499 ------------- .../templates/cluster-config.json.j2 | 86 --- .../templates/common.conf.j2 | 317 --------- .../templates/exhaust_sanity_check.py.j2 | 58 -- .../templates/log4j2.xml.j2 | 54 -- .../templates/model-config.j2 | 151 ---- .../templates/model-config.json.j2 | 670 ------------------ .../templates/model-dock-config.j2 | 34 - .../templates/replay-job.j2 | 63 -- .../templates/replay-updater.j2 | 24 - .../templates/replay-utils.j2 | 43 -- .../templates/run-dock-job.j2 | 41 -- .../templates/run-job.j2 | 83 --- .../templates/start-jobmanager.j2 | 46 -- .../templates/submit-all-jobs.rb.j2 | 58 -- .../templates/submit-job.j2 | 22 - .../templates/submit-script.j2 | 216 ------ .../templates/update-job-requests.py.j2 | 119 ---- 21 files changed, 3263 deletions(-) delete mode 100755 ansible/roles/data-products-deploy-oci-bds/defaults/main.yml delete mode 100644 ansible/roles/data-products-deploy-oci-bds/files/collection-summary-ingestion-spec.json delete mode 100644 ansible/roles/data-products-deploy-oci-bds/files/sourcing-ingestion-spec.json delete mode 100644 ansible/roles/data-products-deploy-oci-bds/tasks/main.yml delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/cluster-config.json.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/common.conf.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/exhaust_sanity_check.py.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/log4j2.xml.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/model-config.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/model-config.json.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/model-dock-config.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/replay-job.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/replay-updater.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/replay-utils.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/run-dock-job.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/run-job.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/start-jobmanager.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/submit-all-jobs.rb.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/submit-job.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/submit-script.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/update-job-requests.py.j2 diff --git a/ansible/roles/data-products-deploy-oci-bds/defaults/main.yml b/ansible/roles/data-products-deploy-oci-bds/defaults/main.yml deleted file mode 100755 index 7eb22c7a18..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/defaults/main.yml +++ /dev/null @@ -1,282 +0,0 @@ -analytics_user: analytics -analytics_group: analytics -spark_output_temp_dir: /mount/data/analytics/tmp/ - -bucket: "telemetry-data-store" -secor_bucket: "telemetry-data-store" -dp_object_store_type: "oci" -dp_raw_telemetry_backup_location: "unique/raw/" -dp_storage_key_config: "azure_storage_key" -dp_storage_secret_config: "azure_storage_secret" -dp_reports_storage_key_config: "reports_azure_storage_key" -dp_reports_storage_secret_config: "reports_azure_storage_secret" - -kafka_broker_host: "{{groups['processing-cluster-kafka'][0]}}:9092" -ingestion_kafka_broker_host: "{{groups['ingestion-cluster-kafka'][0]}}:9092" -brokerlist: "{{groups['processing-cluster-kafka']|join(':9092,')}}:9092" -zookeeper: "{{groups['processing-cluster-zookeepers']|join(':2181,')}}:2181" -dp_username: dp-monitor -analytics_job_queue_topic: "{{ env }}.analytics.job_queue" -topic: "{{ env }}.telemetry.derived" -analytics_metrics_topic: "{{ env }}.analytics_metrics" -sink_topic: "{{ env }}.telemetry.sink" -assess_topic: "{{ env }}.telemetry.assess" -metrics_topic: "{{ env }}.telemetry.metrics" -job_manager_tmp_dir: "transient-data" -channel: dev-test -druid_broker_host: "{{groups['raw-broker'][0]}}" -druid_router_host: "{{groups['raw-router'][0]}}" -druid_rollup_broker_host: "{{groups['raw-broker'][0]}}" -hierarchySearchServiceUrl: "{{ proto }}://{{ domain_name }}/action/content" -hierarchySearchServicEndpoint: /v3/hierarchy/ - -user_table_keyspace: "sunbird" -course_keyspace: "sunbird_courses" -hierarchy_store_keyspace: "{{ env }}_hierarchy_store" -job_request_table: "{{ env }}_job_request" -dataset_metadata_table: "{{ env }}_dataset_metadata" -report_user_table_keyspace: "sunbird_courses" -report_user_enrolment_table: "report_user_enrolments" - -analytics_job_list: '"wfs", "content-rating-updater", "monitor-job-summ"' -analytics_jobs_count: 3 - -cassandra_keyspace_prefix: '{{ env }}_' -report_cassandra_cluster_host: "{{ report_cassandra_host | default(core_cassandra_host) }}" -cassandra_hierarchy_store_keyspace: "{{ env_name}}_hierarchy_store" -spark_version: 3.1.3 - -heap_memory: "-Xmx5120m" - -spark: - home: "{{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7" - public_dns: 54.255.154.146 - master: - url: spark://172.31.11.117:7077 - host: 172.31.11.117 - worker: - instances: 1 - cores: 2 - memory: 4g - driver: - memory: 3g - executor: - memory: 4g - driver_memory: 7g - memory_fraction: 0.3 - storage_fraction: 0.5 - executor_memory: 2g - heap_conf_str: '"-XX:+UseG1GC -XX:MaxGCPauseMillis=100 -Xms250m {{ heap_memory }} -XX:+UseStringDeduplication"' - -submit_jobs: - submit-all-jobs: - hour: 02 - minute: 35 - -start_jobmanager: - job-manager: - hour: 02 - minute: 30 -have_weekly_jobs: false - -course_batch_status_updater_job_schedule: 60 - -run_wfs_job: - wfs: - hour: 00 - minute: 30 -run_monitor_job: - monitor-job-summ: - hour: 03 - minute: 00 - -run_admin_user_reports_job: - admin-user-reports-3AMIST: - hour: 21 - minute: 30 - admin-user-reports-2PMIST: - hour: 8 - minute: 30 - -run_admin_geo_reports_job: - admin-geo-reports-4AMIST: - hour: 22 - minute: 30 - admin-geo-reports-3PMIST: - hour: 9 - minute: 30 - -run_assessment_aggregator_report_job: - assessment-aggregator-report: - hour: 18 - minute: 35 - -update_user_redis_cache: - populate-user-cache: - hour: 3 - minute: 00 - -index_content_model_druid: - index_content: - hour: 1 - minute: 00 - -run_etb_metrics_weekly_job: - etb-metrics-weekly: - hour: 23 - minute: 30 - weekday: 1 - -# These are the dummy times till sept30 for exhaust reports -#To-Do: Update time after 3.2.7 deployment - -run_progress_exhaust: - progress-exhaust: - hour: 08 - minute: 00 - -run_response_exhaust: - response-exhaust: - hour: 09 - minute: 00 - -run_userinfo_exhaust: - userinfo-exhaust: - hour: 10 - minute: 00 - -run_collection_summary: - collection-summary: - hour: 09 - minute: 30 - -run_sourcing_summary: - sourcing-summary: - hour: 10 - minute: 30 - -run_cassandra_migration: - cassandra-migration: - hour: 19 - minute: 15 - -run_uci_private_exhaust_job: - uci-private-exhaust: - hour: 03 - minute: 00 - -run_uci_response_exhaust_job: - uci-response-exhaust: - hour: 02 - minute: 00 - - -service: - search: - url: http://{{private_ingressgateway_ip}}/search - path: /v3/search - -es_search_index: "compositesearch" -analytics: - home: /mount/data/analytics - soft_path: /mount/data/analytics - paths: ['/mount/data/analytics', '/mount/data/analytics/logs', '/mount/data/analytics/logs/services', '/mount/data/analytics/logs/data-products', '/mount/data/analytics/tmp', '/mount/data/analytics/scripts', '/mount/data/analytics/models' ] - scripts: ['model-config', 'replay-job', 'replay-updater', 'replay-utils', 'run-job', 'submit-job', 'start-jobmanager', 'submit-script'] - dockScripts: ['model-dock-config','run-dock-job'] - -# artifact versions -analytics_core_artifact_ver: "2.0" -analytics_ed_dataporducts_artifact_ver: "1.0" -scruid_artifact_ver: "2.5.0" - -producer_env: "dev.sunbird" -analytics_job_manager_artifact: "job-manager-{{ analytics_core_artifact_ver }}.jar" -analytics_core_artifact: "analytics-framework-{{ analytics_core_artifact_ver }}.jar" -scruid_artifact: "scruid_2.12-{{ scruid_artifact_ver }}.jar" -analytics_batch_module_artifact: "batch-models-{{ analytics_core_artifact_ver }}.jar" -analytics_ed_dataporducts_artifact: "data-products-{{ analytics_ed_dataporducts_artifact_ver }}-distribution.tar.gz" -model_version: "2.0" - -submit_jobs_auth_token: "{{ sunbird_api_auth_token }}" -report_list_jobs_url: "{{ druid_report_url }}" - -reports_container: "reports" - -# Cluster vars -spark_cluster_user_password: "" -spark_cluster_user_name: "" -admin_name: "{{ spark_cluster_user_name }}" -admin_password: "{{ spark_cluster_user_password }}" -spark_cluster_name: "{{env}}-spark-cluster" - -spark_cluster: - executor_core: 1 - executor_memory: 2G - num_executors: 1 - -analytics_cluster: - home: "/tmp" - -analytics_ed_dataporducts_jar_artifact: "data-products-{{ analytics_ed_dataporducts_artifact_ver }}.jar" - -spark_enable_dynamic_allocation: false -# Spark Cassandra config-vars -spark_cassandra_connection_timeout_millis: 30000 -spark_cassandra_query_timeout_millis: 180000 -spark_cassandra_query_max_rows_fetch_count: 1000 -spark_sql_shuffle_partitions: 200 - -druid_report_postgres_db_name: druid -druid_report_postgres_db_username: druid - - -#Override this variable in production and point to druid rollup ingestion cluster -# Example: "http://$rollup_cluster_ip:8090" -druid_rollup_cluster_ingestion_task_url: "http://{{groups['raw-overlord'][0]}}:8081" - -# On demand Exhaust throttling vars -exhaust_batches_limit_per_channel: 30 -exhaust_file_size_limit_bytes_per_channel: 1073741824 - -exhaust_parallel_batch_load_limit: 10 -exhaust_user_parallelism: 200 - -data_exhaust_batch_limit_per_request: 20 - -# Start Of UCI Related Variables -uci_postgres_host: "dev-pg11.postgres.database.azure.com" -uci_encryption_key_base64: "" -uci_bot_postgres_database: uci-botdb -uci_fusionauth_postgres_database: uci-fusionauth -uci_postgres_user: "{{postgres.db_username}}" -uci_postgres_password: "{{postgres.db_password}}" - -uci_postgres: - conversation_db_name: "{{ uci_bot_postgres_database }}" - conversation_db_host: "{{ uci_postgres_host }}" - conversation_db_port: "5432" - conversation_db_user: "{{ uci_postgres_user }}" - conversation_db_psss: "{{ uci_postgres_password }}" - conversation_table_name: "bot" - fushionauth_db_name: "{{ uci_fusionauth_postgres_database }}" - fushionauth_db_host: "{{ uci_postgres_host }}" - fushionauth_db_port: "5432" - fushionauth_db_user: "{{ uci_postgres_user }}" - fushionauth_db_psss: "{{ uci_postgres_password }}" - user_table_name: "users" - user_registration_table_name: "user_registrations" - user_identities_table_name: "identities" - -uci_encryption_secret_key: "{{uci_encryption_key_base64}}" -uci_pdata_id: "{{uci_env}}.uci.{{sunbird_instance}}" - -# End Of UCI Related Variables - -# Exhaust sanity check vars -cassandra_migrator_job_name: "Cassandra Migrator" - -assessment_metric_primary_category: "{{ exhaust_job_assessment_primary_category }}" - -# Default s3 variables -sunbird_private_s3_storage_key: "" -sunbird_private_s3_storage_secret: "" \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/files/collection-summary-ingestion-spec.json b/ansible/roles/data-products-deploy-oci-bds/files/collection-summary-ingestion-spec.json deleted file mode 100644 index 69e13196e2..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/files/collection-summary-ingestion-spec.json +++ /dev/null @@ -1,251 +0,0 @@ -{ - "type": "index", - "spec": { - "dataSchema": { - "dataSource": "collection-summary-snapshot", - "parser": { - "type": "string", - "parseSpec": { - "format": "json", - "flattenSpec": { - "useFieldDiscovery": false, - "fields": [ - { - "type": "root", - "name": "content_org", - "expr": "contentorg" - }, - { - "type": "root", - "name": "user_org", - "expr": "orgname" - }, - { - "type": "root", - "name": "batch_start_date", - "expr": "startdate" - }, - { - "type": "root", - "name": "batch_end_date", - "expr": "enddate" - }, - { - "type": "root", - "name": "has_certificate", - "expr": "hascertified" - }, - { - "type": "root", - "name": "collection_id", - "expr": "courseid" - }, - { - "type": "root", - "name": "batch_id", - "expr": "batchid" - }, - { - "type": "root", - "name": "collection_name", - "expr": "collectionname" - }, - { - "type": "root", - "name": "batch_name", - "expr": "batchname" - }, - { - "type": "root", - "name": "total_enrolment", - "expr": "enrolleduserscount" - }, - { - "type": "root", - "name": "total_completion", - "expr": "completionuserscount" - }, - { - "type": "root", - "name": "total_certificates_issued", - "expr": "certificateissuedcount" - }, - { - "type": "root", - "name": "content_status", - "expr": "contentstatus" - }, - { - "type": "root", - "name": "user_state", - "expr": "state" - }, - { - "type": "root", - "name": "user_district", - "expr": "district" - }, - { - "type": "root", - "name": "content_channel", - "expr": "channel" - }, - { - "type": "root", - "name": "keywords", - "expr": "keywords" - }, - { - "type": "root", - "name": "timestamp", - "expr": "timestamp" - }, - { - "type": "root", - "name": "medium", - "expr": "medium" - }, - { - "type": "root", - "name": "subject", - "expr": "subject" - }, - { - "type": "root", - "name": "created_for", - "expr": "createdfor" - }, - { - "type": "root", - "name": "user_type", - "expr": "usertype" - }, - { - "type": "root", - "name": "user_subtype", - "expr": "usersubtype" - } - ] - }, - "dimensionsSpec": { - "dimensions": [ - { - "name": "content_org" - }, - { - "name": "user_org" - }, - { - "type": "string", - "name": "batch_id" - }, - { - "type": "string", - "name": "batch_start_date" - }, - { - "type": "string", - "name": "batch_end_date" - }, - { - "type": "string", - "name": "collection_id" - }, - { - "type": "string", - "name": "collection_name" - }, - { - "type": "string", - "name": "batch_name" - }, - { - "type": "long", - "name": "total_enrolment" - }, - { - "type": "long", - "name": "total_completion" - }, - { - "type": "long", - "name": "total_certificates_issued" - }, - { - "type": "string", - "name": "content_status" - }, - { - "type": "string", - "name": "user_state" - }, - { - "type": "string", - "name": "user_district" - }, - { - "name": "keywords" - }, - { - "name": "has_certificate" - }, - { - "type": "string", - "name": "content_channel" - }, - { - "name": "medium" - }, - { - "name": "subject" - }, - { - "name": "created_for" - }, - { - "type": "string", - "name": "user_type" - }, - { - "type": "string", - "name": "user_subtype" - } - ], - "dimensionsExclusions": [] - }, - "timestampSpec": { - "column": "timestamp", - "format": "auto" - } - } - }, - "metricsSpec": [], - "granularitySpec": { - "type": "uniform", - "segmentGranularity": "day", - "queryGranularity": "none", - "rollup": true - } - }, - "ioConfig": { - "type": "index", - "firehose": { - "type": "static-azure-blobstore", - "blobs": [ - { - "container": "reports", - "path": "/collection-summary-reports-v2/collection-summary-report-latest.json" - } - ], - "fetchTimeout": 300000 - } - }, - "tuningConfig": { - "type": "index", - "targetPartitionSize": 5000000, - "maxRowsInMemory": 25000, - "forceExtendableShardSpecs": false, - "logParseExceptions": true - } - } -} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/files/sourcing-ingestion-spec.json b/ansible/roles/data-products-deploy-oci-bds/files/sourcing-ingestion-spec.json deleted file mode 100644 index 69e773d457..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/files/sourcing-ingestion-spec.json +++ /dev/null @@ -1,146 +0,0 @@ -{ - "type": "index", - "spec": { - "dataSchema": { - "dataSource": "sourcing-summary-snapshot", - "parser": { - "type": "string", - "parseSpec": { - "format": "json", - "flattenSpec": { - "useFieldDiscovery": false, - "fields": [ - { - "type": "root", - "name": "program_id", - "expr": "program_id" - }, - { - "type": "root", - "name": "status", - "expr": "status" - }, - { - "type": "root", - "name": "rootorg_id", - "expr": "rootorg_id" - }, - { - "type": "root", - "name": "user_id", - "expr": "user_id" - }, - { - "type": "root", - "name": "osid", - "expr": "osid" - }, - { - "type": "root", - "name": "user_type", - "expr": "user_type" - }, - { - "type": "root", - "name": "contributor_id", - "expr": "contributor_id" - }, - { - "type": "root", - "name": "total_contributed_content", - "expr": "total_contributed_content" - }, - { - "type": "root", - "name": "primary_category", - "expr": "primary_category" - }, - { - "type": "root", - "name": "created_by", - "expr": "created_by" - } - ] - }, - "dimensionsSpec": { - "dimensions": [ - { - "type": "string", - "name": "program_id" - }, - { - "type": "string", - "name": "status" - }, - { - "type": "string", - "name": "rootorg_id" - }, - { - "type": "string", - "name": "user_id" - }, - { - "type": "string", - "name": "osid" - }, - { - "type": "string", - "name": "user_type" - }, - { - "type": "string", - "name": "contributor_id" - }, - { - "type": "string", - "name": "primary_category" - }, - { - "type": "string", - "name": "created_by" - } - ], - "dimensionsExclusions": [] - }, - "timestampSpec": { - "column": "timestamp", - "format": "auto" - } - } - }, - "metricsSpec": [ - { - "name": "total_count", - "type": "count" - } - ], - "granularitySpec": { - "type": "uniform", - "segmentGranularity": "day", - "queryGranularity": "none", - "rollup": true - } - }, - "ioConfig": { - "type": "index", - "firehose": { - "type": "static-azure-blobstore", - "blobs": [ - { - "container": "reports", - "path": "/sourcing/SourcingSummaryReport.json" - } - ], - "fetchTimeout": 300000 - } - }, - "tuningConfig": { - "type": "index", - "targetPartitionSize": 5000000, - "maxRowsInMemory": 25000, - "forceExtendableShardSpecs": false, - "logParseExceptions": true - } - } -} diff --git a/ansible/roles/data-products-deploy-oci-bds/tasks/main.yml b/ansible/roles/data-products-deploy-oci-bds/tasks/main.yml deleted file mode 100644 index 733c416138..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/tasks/main.yml +++ /dev/null @@ -1,499 +0,0 @@ -## Data products deployment ## -- name: Ensure azure blob storage container exists - command: az storage container create --name {{ bucket }} - when: dp_object_store_type == "azure" - tags: - - always - -- name: Ensure oci oss bucket exists - command: "oci os bucket get --name {{ bucket }}" - register: check_bucket - when: dp_object_store_type == "oci" - tags: - - always - -- name: Create oci oss bucket - command: "oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" - when: dp_object_store_type == "oci" and check_bucket.rc !=0 - tags: - - always - -- name: Copy Core Data Products - copy: src={{ analytics_batch_module_artifact }} dest={{ analytics.home }}/models-{{ model_version }} - tags: - - dataproducts - -- name: Copy Core Data Products to azure blob - command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} - async: 3600 - poll: 10 - when: dp_object_store_type == "azure" - tags: - - dataproducts-spark-cluster - -- name: Copy Core Data Products to oci oss - command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force - async: 3600 - poll: 10 - when: dp_object_store_type == "oci" - tags: - - dataproducts-spark-cluster - -- name: Unarchive Ed Data Products - become: yes - unarchive: src={{ playbook_dir}}/{{ analytics_ed_dataporducts_artifact }} dest={{ analytics.home }}/models-{{ model_version }} copy=yes group={{ analytics_group }} owner={{ analytics_user }} - tags: - - ed-dataproducts - -- name: Copy Ed Data Products to azure blob - command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar -f {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar - async: 3600 - poll: 10 - when: dp_object_store_type == "azure" - tags: - - ed-dataproducts-spark-cluster - -- name: Copy Ed Data Products to oci oss - command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force - async: 3600 - poll: 10 - when: dp_object_store_type == "oci" - tags: - - ed-dataproducts-spark-cluster - -- name: Copy Framework Library - copy: src={{ analytics_core_artifact }} dest={{ analytics.home }}/models-{{ model_version }} - tags: - - framework - -- name: Copy Framework Library to azure blob - command: az storage blob upload --overwrite --debug -c {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} - async: 3600 - poll: 10 - when: dp_object_store_type == "azure" - tags: - - framework-spark-cluster - -- name: Copy Framework Library to oci oss - command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force - async: 3600 - poll: 10 - when: dp_object_store_type == "oci" - tags: - - framework-spark-cluster - -- name: Copy Scruid Library - copy: src={{ scruid_artifact }} dest={{ analytics.home }}/models-{{ model_version }} - tags: - - framework - -- name: Copy Scruid Library to azure blob - command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} - async: 3600 - poll: 10 - when: dp_object_store_type == "azure" - tags: - - framework-spark-cluster - -- name: Copy Scruid Library to oci oss - command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force - async: 3600 - poll: 10 - when: dp_object_store_type == "oci" - tags: - - framework-spark-cluster - -- name: Copy Job Manager - copy: src={{ analytics_job_manager_artifact }} dest={{ analytics.home }}/models-{{ model_version }} - tags: - - dataproducts - -- name: Copy configuration file - template: src=common.conf.j2 dest={{ analytics.home }}/models-{{ model_version }}/{{ env }}.conf mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - dataproducts - - ed-dataproducts - - framework - when: dockdataproducts is undefined - -- name: Copy configuration file - template: src=common.conf.j2 dest={{ analytics.home }}/models-{{ model_version }}/dock-{{ env }}.conf mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - dataproducts - - ed-dataproducts - - framework - when: dockdataproducts is defined - -- name: Copy configuration file as application.conf for cluster - template: src=common.conf.j2 dest={{ analytics.home }}/models-{{ model_version }}/application.conf mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - framework-spark-cluster - -- name: Update spark temp dir value for cluster - lineinfile: - path: '{{ analytics.home }}/models-{{ model_version }}/application.conf' - regexp: '^spark_output_temp_dir="/mount/data/analytics/tmp/"' - line: 'spark_output_temp_dir="/var/log/sparkapp/tmp/"' - tags: - - framework-spark-cluster - -- name: Update logger kafka config for cluster - lineinfile: - path: '{{ analytics.home }}/models-{{ model_version }}/application.conf' - regexp: '^log.appender.kafka.enable="false"' - line: 'log.appender.kafka.enable="true"' - tags: - - framework-spark-cluster - -- name: Copy configuration file to azure blob - command: az storage blob upload --overwrite -c {{ bucket }} -f {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf - async: 3600 - poll: 10 - when: dp_object_store_type == "azure" - tags: - - framework-spark-cluster - -- name: Copy configuration file to oci oss - command: oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force - async: 3600 - poll: 10 - when: dp_object_store_type == "oci" - tags: - - framework-spark-cluster - -- name: Copy log4j2 xml file - template: src=log4j2.xml.j2 dest={{ analytics.home }}/models-{{ model_version }}/log4j2.xml mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: [ dataproducts, framework, ed-dataproducts ] - -- name: Copy Scripts - template: src={{ item }}.j2 dest={{ analytics.home }}/scripts/{{ item }}.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} - with_items: "{{ analytics.scripts }}" - tags: [ dataproducts, framework, ed-dataproducts ] - when: dockdataproducts is undefined - -- name: Copy python sanity check script file - template: src=exhaust_sanity_check.py.j2 dest={{ analytics.home }}/scripts/exhaust_sanity_check.py - tags: [ dataproducts, framework, ed-dataproducts ] - when: dockdataproducts is undefined - -- name: Copy Dock Scripts - template: src={{ item }}.j2 dest={{ analytics.home }}/scripts/{{ item }}.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} - with_items: "{{ analytics.dockScripts }}" - tags: [ dataproducts, framework, ed-dataproducts ] - when: dockdataproducts is defined - -- name: Update model config - template: src=model-config.j2 dest={{ analytics.home }}/scripts/model-config.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - dataproducts - - update-config - - ed-dataproducts - when: dockdataproducts is undefined - -- name: Update model dock config - template: src=model-dock-config.j2 dest={{ analytics.home }}/scripts/model-dock-config.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - dataproducts - - update-config - - ed-dataproducts - when: dockdataproducts is defined - -- name: Copy submit-all-jobs ruby file - template: src=submit-all-jobs.rb.j2 dest={{ analytics.home }}/scripts/submit-all-jobs.rb mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - dataproducts - - update-config - - ed-dataproducts - -- name: Copy model-config.json file - template: src=model-config.json.j2 dest={{ analytics.home }}/scripts/model-config.json mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - dataproducts - - update-config - - ed-dataproducts - -- name: Clean cron jobs - command: crontab -r - ignore_errors: yes - tags: - - default-jobs - - spark-jobs - - spark1-jobs - - clean-cronjobs - -- name: Create daily cron jobs for wfs - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh wfs" - with_dict: "{{ run_wfs_job }}" - tags: - - spark1-jobs - -- name: Create daily cron jobs for monitor job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh monitor-job-summ" - with_dict: "{{ run_monitor_job }}" - tags: - - spark1-jobs - -- name: Create daily cron jobs using submit-all-jobs - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job='/bin/bash -lc "ruby {{ analytics.home }}/scripts/submit-all-jobs.rb"' - with_dict: "{{ submit_jobs }}" - tags: - - default-jobs - - spark-jobs - - cronjobs - -- name: Create start-jobmanager cron jobs - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/start-jobmanager.sh" - with_dict: "{{ start_jobmanager }}" - tags: - - default-jobs - - spark-jobs - - cronjobs - -- name: Create course-batch-status-updater cron job - cron: name="{{env}}-course-batch-status-updater" minute=*/{{ course_batch_status_updater_job_schedule }} job="{{ analytics.home }}/scripts/run-job.sh course-batch-status-updater" - tags: - - cronjobs - - default-jobs - - spark1-jobs - -- name: Create admin-user-reports cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh admin-user-reports" - with_dict: "{{ run_admin_user_reports_job }}" - tags: - - cronjobs - - default-jobs - - spark-jobs -- name: Create admin-geo-reports cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh admin-geo-reports" - with_dict: "{{ run_admin_geo_reports_job }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Create assessment-aggregator reports cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="/bin/bash {{ analytics.home }}/adhoc-scripts/run_exporter.sh > /home/analytics/output.log" - with_dict: "{{ run_assessment_aggregator_report_job }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Create etb metrics cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} weekday={{ item.value.weekday }} job="{{ analytics.home }}/scripts/run-job.sh etb-metrics" - with_dict: "{{ run_etb_metrics_weekly_job }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Create progress-exhaust cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh progress-exhaust" - with_dict: "{{ run_progress_exhaust }}" - tags: - - cronjobs - - default-jobs - - spark1-jobs - -- name: Create response-exhaust cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh response-exhaust" - with_dict: "{{ run_response_exhaust }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Create cassandra-migration cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh cassandra-migration" - with_dict: "{{ run_cassandra_migration }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - - -- name: Create userinfo-exhaust cron job - cron: name="{{ env }}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh userinfo-exhaust" - with_dict: "{{ run_userinfo_exhaust }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Create collection-summary cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh collection-summary-report" - with_dict: "{{ run_collection_summary }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Copy collection-summary ingestion spec - copy: src="collection-summary-ingestion-spec.json" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - ed-dataproducts - -- name: Create sourcing-summary cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-dock-job.sh sourcing-summary-report" - with_dict: "{{ run_sourcing_summary }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Create uci-private-exhaust cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh uci-private-exhaust" - with_dict: "{{ run_uci_private_exhaust_job }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Create uci-response-exhaust cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh uci-response-exhaust" - with_dict: "{{ run_uci_response_exhaust_job }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Copy sourcing-summary ingestion spec - copy: src="sourcing-ingestion-spec.json" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - ed-dataproducts - -- name: Update start jobmanager - template: src=start-jobmanager.j2 dest={{ analytics.home }}/scripts/start-jobmanager.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - update-jobmanager-config - - dataproducts - -# Cluster job sumbit tasks -- name: Copy cluster-config.json file - template: src=cluster-config.json.j2 dest={{ analytics_cluster.home }}/cluster-config.json - delegate_to: localhost - tags: - - replay-job - - run-job - - config-update - -- name: Copy submit-script.sh file - template: src=submit-script.j2 dest={{ analytics_cluster.home }}/submit-script.sh mode=755 - delegate_to: localhost - tags: - - replay-job - - run-job - - config-update - -- name: Copy model-config.sh file - template: src=model-config.j2 dest={{ analytics_cluster.home }}/model-config.sh - delegate_to: localhost - tags: - - replay-job - - run-job - - config-update - -- name: Replay Job - shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ job_id }} --mode {{ mode }} --partitions {{ partitions }} --parallelisation {{ parallelisation }} --startDate {{ start_date }} --endDate {{ end_date }} --sparkMaster {{ sparkMaster }} --selectedPartitions {{ selected_partitions }} &" - async: "{{ (pause_min * 60) }}" - poll: 0 - tags: - - replay-job - -- name: Run Job - shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ job_id }} --mode {{ mode }} --partitions {{ partitions }} --parallelisation {{ parallelisation }} --sparkMaster {{ sparkMaster }} --selectedPartitions {{ selected_partitions }} --batch_id {{ batch_id }} &" - async: "{{ (pause_min * 60) }}" - poll: 0 - tags: - - run-job - -- name: Submit jobs - shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ item }} --mode default --sparkMaster yarn &" - with_items: "{{ jobs.split(',')|list }}" - tags: - - job-submit - -# Cluster exhaust parallel jobs sumbit tasks - -- name: Install required python packages - pip: - name: - - psycopg2-binary - - pandas - - IPython - tags: - - parallel-jobs-submit - -- name: Copy python script file - template: src=update-job-requests.py.j2 dest={{ analytics_cluster.home }}/update-job-requests.py - delegate_to: localhost - tags: - - parallel-jobs-submit - -- name: Execute python script to populate batch numbers - shell: | - if echo "{{jobs}}" | grep 'druid' - then - python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} druid {{env}}_report_config - elif echo "{{jobs}}" | grep 'exhaust' - then - python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} exhaust {{env}}_job_request - fi - tags: - - parallel-jobs-submit - register: jobsCountStr - - -- debug: - var: jobsCountStr - tags: - - parallel-jobs-submit - -- name: Get stdout with parallelisation value from python script to tmp file - shell: echo "{{ jobsCountStr.stdout }}" > /tmp/test.txt - tags: - - parallel-jobs-submit - -- name: Extract parallelisation value from tmp file - shell: "cat /tmp/test.txt | tr '\n' ' ' | awk -F': ' '{print $NF}'" - register: jobsCountOut - tags: - - parallel-jobs-submit - -- debug: - var: jobsCountOut - tags: - - parallel-jobs-submit - -# set jobs count variable from python script output -- set_fact: - jobs_count: "{{ jobsCountOut.stdout }}" - tags: - - parallel-jobs-submit - -- name: Submit parallel exhaust jobs - shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ jobs }} --mode parallel-jobs --parallelisation {{ jobs_count }} &" - poll: 30 - tags: - - parallel-jobs-submit - register: submitOutput - -- debug: - var: submitOutput - tags: - - parallel-jobs-submit - -# Execute Exhaust job sanity check script tasks - -- name: Install required python packages - pip: - name: - - requests - tags: - - run-sanity - -- name: Run sanity check python script - shell: python {{ analytics.home }}/scripts/exhaust_sanity_check.py - tags: - - run-sanity - register: SanityCheckStatus \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/cluster-config.json.j2 deleted file mode 100644 index 12ebf0bde0..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/cluster-config.json.j2 +++ /dev/null @@ -1,86 +0,0 @@ - -{% if dp_object_store_type == "azure" %} -{ - "jars": [ - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_core_artifact }}", - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ scruid_artifact }}", - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" - ], - "file": "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", - "files": [ - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/application.conf" - ], - "className": "org.ekstep.analytics.job.JobExecutor", - "executorCores": {{ spark_cluster.executor_core }}, - "executorMemory": "{{ spark_cluster.executor_memory }}", - "numExecutors": {{ spark_cluster.num_executors }}, - "conf": { - "spark.sql.autoBroadcastJoinThreshold" : "-1", - "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", - "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", - "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", - "spark.scheduler.mode" : "FAIR", - "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", - "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", - "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" - } -} -{% elif (dp_object_store_type == "s3") %} -{ - "jars": [ - "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", - "s3n://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", - "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" - ], - "file": "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", - "files": [ - "s3n://{{ bucket }}/models-{{ model_version }}/application.conf" - ], - "className": "org.ekstep.analytics.job.JobExecutor", - "executorCores": {{ spark_cluster.executor_core }}, - "executorMemory": "{{ spark_cluster.executor_memory }}", - "numExecutors": {{ spark_cluster.num_executors }}, - "conf": { - "spark.sql.autoBroadcastJoinThreshold" : "-1", - "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", - "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", - "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", - "spark.scheduler.mode" : "FAIR", - "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", - "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", - "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" - } -} -{% elif (dp_object_store_type == "oci") %} -{ - "jars": [ - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_core_artifact }}", - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ scruid_artifact }}", - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" - ], - "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", - "files": [ - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf" - ], - "className": "org.ekstep.analytics.job.JobExecutor", - "executorCores": {{ spark_cluster.executor_core }}, - "executorMemory": "{{ spark_cluster.executor_memory }}", - "numExecutors": {{ spark_cluster.num_executors }}, - "conf": { - "spark.sql.autoBroadcastJoinThreshold" : "-1", - "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", - "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", - "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", - "spark.scheduler.mode" : "FAIR", - "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", - "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", - "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" - } -} -{% endif %} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/common.conf.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/common.conf.j2 deleted file mode 100644 index e0ec7005df..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/common.conf.j2 +++ /dev/null @@ -1,317 +0,0 @@ -application.env="{{ env }}" -telemetry.version="2.1" -default.parallelization="10" -spark_output_temp_dir="/mount/data/analytics/tmp/" -lp.url="{{lp_url}}" -service.search.url="{{ service.search.url }}" -service.search.path="{{ service.search.path }}" -spark.cassandra.connection.host="{{groups['dp-cassandra'][0]}}" -cassandra.keyspace_prefix="{{ cassandra_keyspace_prefix }}" -cassandra.hierarchy_store_prefix="{{ cassandra_hierarchy_store_prefix }}" - - -storage.key.config="{{ dp_storage_key_config }}" -storage.secret.config="{{ dp_storage_secret_config }}" -reports.storage.key.config="{{ dp_reports_storage_key_config }}" -reports.storage.secret.config="{{ dp_reports_storage_secret_config }}" -{% if dp_object_store_type == "azure" %} -cloud_storage_type="azure" -{% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3" or dp_object_store_type == "oci") %} -{% if cloud_service_provider == "oci" %} -cloud_storage_type="oci" -{% else %} -cloud_storage_type="s3" -{% endif %} -cloud_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" -cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint_with_protocol }}" -storage.endpoint.config="{{ s3_storage_endpoint_with_protocol }}" -aws_storage_key="{{ s3_storage_key }}" -aws_storage_secret="{{ s3_storage_secret }}" -{% endif %} - -lp.contentmodel.versionkey="jd5ECm/o0BXwQCe8PfZY1NoUkB9HN41QjA80p22MKyRIcP5RW4qHw8sZztCzv87M" - -# Joblog Kafka appender config for cluster execution -log.appender.kafka.enable="false" -log.appender.kafka.broker_host="{{groups['processing-cluster-kafka'][0]}}:9092" -log.appender.kafka.topic="{{ env }}.druid.events.log" - -# Kafka connection configuration -kafka.consumer.brokerlist="{{groups['processing-cluster-kafka'][0]}}:9092" -kafka.consumer.topic="{{ env }}.analytics.job_queue" -no_of_jobs=42 - -# Spark Driver -spark.driver.memory=6g - -spark.memory_fraction={{ spark.memory_fraction }} -spark.storage_fraction={{ spark.storage_fraction }} -spark.driver_memory="{{ spark.driver_memory }}" - -#Monitor Jobs - -monitor { - notification { - webhook_url = "{{ data_exhaust_webhook_url }}" - channel = "{{ data_exhaust_Channel }}" - token = "{{ data_exhaust_token }}" - slack = true - name = "{{ data_exhaust_name }}" - } -} - -#App ID & Channel ID -default.consumption.app.id="no_value" -default.channel.id="in.ekstep" -default.creation.app.id="no_value" - - -# Media Service Type -media_service_type = "azure" - -azure_tenant="{{ media_service_azure_tenant }}" -azure_subscription_id="{{ media_service_azure_subscription_id }}" -azure_account_name="{{ media_service_azure_account_name }}" -azure_resource_group_name="{{ media_service_azure_resource_group_name }}" -azure_token_client_key="{{ media_service_azure_token_client_key }}" -azure_token_client_secret="{{ media_service_azure_token_client_secret }}" -elasticsearch.service.endpoint="http://{{groups['composite-search-cluster'][0]}}:9200" -elasticsearch.index.compositesearch.name="{{ es_search_index }}" - -org.search.api.url="{{ channelSearchServiceEndpoint }}" -org.search.api.key="{{ searchServiceAuthorizationToken }}" - -hierarchy.search.api.url="{{ hierarchySearchServiceUrl }}" -hierarchy.search.api.path="{{ hierarchySearchServicEndpoint }}" - -# Azure Media Service Config -azure { - location = "centralindia" - tenant = "tenant name" - subscription_id = "subscription id" - - api { - endpoint="Media Service API End Point" - version = "2018-07-01" - } - - account_name = "account name" - resource_group_name = "Resource Group Name" - - transform { - default = "media_transform_default" - hls = "media_transform_hls" - } - - stream { - base_url = "{{ stream_base_url }}" - endpoint_name = "default" - protocol = "Hls" - policy_name = "Predefined_ClearStreamingOnly" - } - - token { - client_key = "client key" - client_secret = "client secret" - } -} - -## Reports - Global config -cloud.container.reports="reports" - -# course metrics container in azure -course.metrics.cassandra.sunbirdKeyspace="sunbird" -course.metrics.cassandra.sunbirdCoursesKeyspace="sunbird_courses" -course.metrics.cassandra.sunbirdHierarchyStore="{{ cassandra_hierarchy_store_keyspace }}" -course.metrics.cloud.objectKey="" -course.metrics.cassandra.input.consistency="QUORUM" -es.host="http://{{groups['core-es'][0]}}" -es.port="9200" -es.composite.host="{{groups['composite-search-cluster'][0]}}" - -# State admin user reports -# Uses azure only - course.metrics.cloud.provider -admin.metrics.cloud.objectKey="" -admin.metrics.temp.dir="/mount/data/analytics/admin-user-reports" - -#Assessment report config -es.scroll.size = 1000 - -#BestScore or Latst Updated Score -assessment.metrics.bestscore.report=true -assessment.metrics.supported.contenttype="SelfAssess" -assessment.metrics.supported.primaryCategories="{{ assessment_metric_primary_category }}" -spark.sql.caseSensitive=true - -# content rating configurations - -druid.sql.host="http://{{druid_broker_host}}:8082/druid/v2/sql/" -druid.unique.content.query="{\"query\":\"SELECT DISTINCT \\\"object_id\\\" AS \\\"Id\\\"\\nFROM \\\"druid\\\".\\\"summary-events\\\" WHERE \\\"__time\\\" BETWEEN TIMESTAMP '%s' AND TIMESTAMP '%s'\"}" -druid.content.rating.query="{\"query\":\"SELECT \\\"object_id\\\" AS contentId, COUNT(*) AS \\\"totalRatingsCount\\\", SUM(edata_rating) AS \\\"Total Ratings\\\", SUM(edata_rating)/COUNT(*) AS \\\"averageRating\\\" FROM \\\"druid\\\".\\\"telemetry-feedback-events\\\" WHERE \\\"eid\\\" = 'FEEDBACK' AND \\\"edata_rating\\\">0 GROUP BY \\\"object_id\\\"\"}" -druid.content.consumption.query="{\"query\":\"SELECT COUNT(*) as \\\"play_sessions_count\\\", object_id as \\\"contentId\\\", SUM(total_time_spent) as \\\"total_time_spent\\\", dimensions_pdata_id, object_id\\nFROM \\\"summary-events\\\"\\nWHERE \\\"dimensions_mode\\\" = 'play' AND \\\"dimensions_type\\\" ='content' AND \\\"dimensions_pdata_pid\\\" != 'creation-portal' \\nGROUP BY object_id, dimensions_pdata_id\"}" -lp.system.update.base.url="{{lp_url}}/system/v3/content/update" - - -#Experiment Configuration - -user.search.api.url="{{sunbird_learner_service_url}}/private/user/v1/search" -user.search.limit="10000" - -# pipeline auditing -druid.pipeline_metrics.audit.query="{\"query\":\"SELECT \\\"job-name\\\", SUM(\\\"success-message-count\\\") AS \\\"success-message-count\\\", SUM(\\\"failed-message-count\\\") AS \\\"failed-message-count\\\", SUM(\\\"duplicate-event-count\\\") AS \\\"duplicate-event-count\\\", SUM(\\\"batch-success-count\\\") AS \\\"batch-success-count\\\", SUM(\\\"batch-error-count\\\") AS \\\"batch-error-count\\\", SUM(\\\"primary-route-success-count\\\") AS \\\"primary-route-success-count\\\", SUM(\\\"secondary-route-success-count\\\") AS \\\"secondary-route-success-count\\\" FROM \\\"druid\\\".\\\"pipeline-metrics\\\" WHERE \\\"job-name\\\" IN (%s) AND \\\"__time\\\" BETWEEN TIMESTAMP '%s' AND TIMESTAMP '%s' GROUP BY \\\"job-name\\\" \"}" -druid.telemetryDatasource.count.query="{ \"query\": \"SELECT COUNT(*) AS \\\"total\\\" FROM \\\"druid\\\".\\\"telemetry-events\\\" WHERE TIME_FORMAT(MILLIS_TO_TIMESTAMP(\\\"syncts\\\"), 'yyyy-MM-dd HH:mm:ss.SSS', 'Asia/Kolkata') BETWEEN TIMESTAMP '%s' AND '%s' AND \\\"__time\\\" BETWEEN TIMESTAMP '%s' AND TIMESTAMP '%s'\" }" -druid.summaryDatasource.count.query="{\"query\": \"SELECT COUNT(*) AS \\\"total\\\" FROM \\\"druid\\\".\\\"summary-events\\\" WHERE \\\"__time\\\" BETWEEN TIMESTAMP '%s' AND TIMESTAMP '%s'\" }" - -#Pipeline Audit Jobs - -pipeline_audit { - notification { - webhook_url = "{{ data_exhaust_webhook_url }}" - channel = "{{ data_exhaust_Channel }}" - token = "{{ data_exhaust_token }}" - slack = true - name = "Pipeline Audit" - } -} - -#Druid Query Processor - -druid = { - hosts = "{{druid_broker_host}}:8082" - secure = false - url = "/druid/v2/" - datasource = "telemetry-events" - response-parsing-timeout = 300000 - client-backend = "com.ing.wbaa.druid.client.DruidAdvancedHttpClient" - client-config = { - druid-advanced-http-client ={ - queue-size = 32768 - queue-overflow-strategy = "Backpressure" - query-retries = 5 - query-retry-delay = 10 ms - host-connection-pool = { - max-connections = 32 - min-connections = 0 - max-open-requests = 128 - max-connection-lifetime = 20 min - idle-timeout = 15 min - client = { - # The time after which an idle connection will be automatically closed. - # Set to `infinite` to completely disable idle timeouts. - idle-timeout = 10 min - parsing.max-chunk-size = 10m - } - } - } - - } -} -druid.rollup.host="{{druid_rollup_broker_host}}" -druid.rollup.port=8082 -druid.query.wait.time.mins=10 -druid.report.upload.wait.time.mins=10 -druid.scan.batch.size=100 -druid.scan.batch.bytes=2000000 -druid.query.batch.buffer=500000 - - -// Metric event config -metric.producer.id="pipeline.monitoring" -metric.producer.pid="dataproduct.metrics" -push.metrics.kafka=true -metric.kafka.broker="{{groups['processing-cluster-kafka']|join(':9092,')}}:9092" -metric.kafka.topic="{{ env }}.prom.monitoring.metrics" - -//Postgres Config -postgres.db="{{postgres.db_name}}" -postgres.url="jdbc:postgresql://{{postgres.db_url}}:{{postgres.db_port}}/" -postgres.user="{{postgres.db_username}}" -postgres.pass="{{postgres.db_password}}" -postgres.program.table="program" -postgres.nomination.table="nomination" -postgres.usertable="\"V_User\"" -postgres.org.table="\"V_User_Org\"" - -druid.ingestion.path="/druid/indexer/v1/task" -druid.segment.path="/druid/coordinator/v1/metadata/datasources/" -druid.deletesegment.path="/druid/coordinator/v1/datasources/" - -postgres.druid.db="{{ druid_report_postgres_db_name }}" -postgres.druid.url="jdbc:postgresql://{{postgres.db_url}}:{{postgres.db_port}}/" -postgres.druid.user="{{ druid_report_postgres_db_username }}" -postgres.druid.pass="{{ dp_vault_druid_postgress_pass }}" - - -location.search.url="https://{{location_search_url}}/v1/location/search" -location.search.token="{{ location_search_token }}" -location.search.request="{\"request\": {\"filters\": {\"type\" :[\"state\",\"district\"]},\"limit\" : 10000}}" - -druid.state.lookup.url = "http://{{groups['raw-coordinator'][0]}}:8081/druid/coordinator/v1/lookups/config/__default/stateSlugLookup" - -sunbird_encryption_key="{{ core_vault_sunbird_encryption_key }}" - -dcedialcode.filename="DCE_dialcode_data.csv" -etbdialcode.filename="ETB_dialcode_data.csv" -dcetextbook.filename="DCE_textbook_data.csv" -etbtextbook.filename="ETB_textbook_data.csv" -etb.dialcode.druid.length={{ etb_dialcode_list_druid_length }} - -{% if dp_object_store_type == "azure" %} -druid.report.default.storage="azure" -{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} -druid.report.default.storage="s3" -{% endif %} - -druid.report.date.format="yyyy-MM-dd" -druid.report.default.container="report-verification" - -## Collection Exhaust Jobs Configuration -- Start ## - -sunbird.user.keyspace="{{ user_table_keyspace }}" -sunbird.courses.keyspace="{{ course_keyspace }}" -sunbird.content.hierarchy.keyspace="{{ cassandra_hierarchy_store_keyspace }}" -sunbird.user.cluster.host="{{ core_cassandra_host }}" -sunbird.courses.cluster.host="{{ core_cassandra_host }}" -sunbird.content.cluster.host="{{ core_cassandra_host }}" -sunbird.report.cluster.host="{{ report_cassandra_cluster_host }}" -sunbird.user.report.keyspace="{{ report_user_table_keyspace }}" -collection.exhaust.store.prefix="" -postgres.table.job_request="{{ job_request_table }}" -postgres.table.dataset_metadata="{{ dataset_metadata_table }}" - -## Collection Exhaust Jobs Configuration -- End ## - -## Exhaust throttling variables -exhaust.batches.limit.per.channel={{ exhaust_batches_limit_per_channel }} -exhaust.file.size.limit.per.channel={{ exhaust_file_size_limit_bytes_per_channel }} - -exhaust.parallel.batch.load.limit={{ exhaust_parallel_batch_load_limit }} -exhaust.user.parallelism={{ exhaust_user_parallelism }} - -data_exhaust.batch.limit.per.request={{ data_exhaust_batch_limit_per_request }} - - - -//START of UCI Postgres Config - -uci.conversation.postgres.db="{{ uci_postgres.conversation_db_name }}" -uci.conversation.postgres.url="jdbc:postgresql://{{uci_postgres.conversation_db_host}}:{{uci_postgres.conversation_db_port}}/" - -uci.fushionauth.postgres.db="{{ uci_postgres.fushionauth_db_name }}" -uci.fushionauth.postgres.url="jdbc:postgresql://{{uci_postgres.fushionauth_db_host}}:{{uci_postgres.fushionauth_db_port}}/" - -uci.postgres.table.conversation="{{ uci_postgres.conversation_table_name }}" -uci.postgres.table.user="{{ uci_postgres.user_table_name }}" -uci.postgres.table.user_registration="{{ uci_postgres.user_registration_table_name }}" -uci.postgres.table.identities="{{ uci_postgres.user_identities_table_name }}" - -uci.conversation.postgres.user="{{ uci_postgres.conversation_db_user }}" -uci.conversation.postgres.pass="{{ uci_postgres.conversation_db_psss }}" - -uci.fushionauth.postgres.user="{{ uci_postgres.fushionauth_db_user }}" -uci.fushionauth.postgres.pass="{{ uci_postgres.fushionauth_db_psss }}" - -uci.exhaust.store.prefix="" -uci.encryption.secret="{{ uci_encryption_secret_key }}" - -// END OF UCI Related Job Configs \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/exhaust_sanity_check.py.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/exhaust_sanity_check.py.j2 deleted file mode 100644 index 3f6ba98d9d..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/exhaust_sanity_check.py.j2 +++ /dev/null @@ -1,58 +0,0 @@ -import requests -from requests.auth import HTTPBasicAuth -import json -from kafka import KafkaConsumer -from json import loads -import sys - -def checkClusterStatus(): - try: - res = requests.get('https://{{ spark_cluster_name }}.azurehdinsight.net/api/v1/clusters/{{ spark_cluster_name }}/alerts?format=summary', auth = HTTPBasicAuth("{{ admin_name }}" ,"{{ admin_password }}")) - if(res.status_code == 200): - resJson = json.loads(res.text) - warningCount = resJson["alerts_summary"]["WARNING"]["count"] - criticalCount = resJson["alerts_summary"]["CRITICAL"]["count"] - unknownCount = resJson["alerts_summary"]["UNKNOWN"]["count"] - if((warningCount + criticalCount + unknownCount) == 0): - print("Cluster is up & running fine. With these - WARNING:{0}, CRITICAL:{1}, UNKNOWN:{2}".format(warningCount, criticalCount, unknownCount)) - return "SUCCESS" - else: - return "FAILED. Cluster is not running properly. Found these - WARNING:{0}, CRITICAL:{1}, UNKNOWN:{2}".format(warningCount, criticalCount, unknownCount) - else: - return "FAILED. Cluster failed to provide response. Resulted in {0} response".format(res.status_code) - except Exception as e: - return "FAILED with {0}".format(str(e)) - -def checkCassandraMigratorStatus(): - try: - ## from joblog file - migratorENDEvent = "" - with open ('{{ analytics.home }}/scripts/logs/joblog.log', 'rt') as logs: - for log in logs: - if (log.count("JOB_END") == 1 & log.count("{{ cassandra_migrator_job_name }}") == 1): - migratorENDEvent = log - logJson = json.loads(migratorENDEvent) - jobStatus = logJson["edata"]["status"] - if (jobStatus == "SUCCESS"): - print("Cassandra Migrator Completed successfully!") - return "SUCCESS" - else: - return "Cassandra Migrator failed" - except Exception as e: - return "FAILED with {0}".format(str(e)) - - -def main(): - finalSuccessMessage="All checks are successful" - ## check Cassandra Migrator status - cassandraMigratorState=checkCassandraMigratorStatus() - ## check spark cluster status - clusterState=checkClusterStatus() - - if(cassandraMigratorState == "SUCCESS" and clusterState == "SUCCESS"): - return finalSuccessMessage - else: - raise Exception("Required checks failed. Job Status: {0} and Cluster status: {1}".format(cassandraMigratorState, clusterState)) - -result=main() -print(result) \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/log4j2.xml.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/log4j2.xml.j2 deleted file mode 100644 index c82cdd702c..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/log4j2.xml.j2 +++ /dev/null @@ -1,54 +0,0 @@ - - - - {{ analytics.home }}/scripts/logs - {{ analytics.home }}/scripts/logs - - - - - - - - %m%n - - - - - - - - - - - - - - %m%n - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/model-config.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/model-config.j2 deleted file mode 100644 index 86f376b65d..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/model-config.j2 +++ /dev/null @@ -1,151 +0,0 @@ -#!/usr/bin/env bash - -config() { - bucket={{ secor_bucket }} - brokerList={{ brokerlist }} - zookeeper={{ zookeeper }} - brokerIngestionList={{ ingestion_kafka_brokers }} - job_topic={{ analytics_job_queue_topic }} - topic={{ topic }} - analyticsMetricsTopic={{ analytics_metrics_topic }} - sinkTopic={{ sink_topic }} - metricsTopic={{ metrics_topic }} - analytics_home={{ analytics.home }} - temp_folder={{ job_manager_tmp_dir }} - sparkCassandraConnectionHost="{{ lp_cassandra_host }}" - sparkRedisConnectionHost={{ metadata2_redis_host }} - sunbirdPlatformCassandraHost="{{ core_cassandra_host }}" - sunbirdPlatformElasticsearchHost="{{ sunbird_es_host }}" - jobManagerJobsCount="{{ analytics_jobs_count }}" - producerEnv="{{ producer_env }}" - baseScriptPath="{{ spark_output_temp_dir }}" - reportPostContainer="{{ reports_container }}" - druidIngestionURL="{{ druid_rollup_cluster_ingestion_task_url }}/druid/indexer/v1/task" - assessTopic={{ assess_topic }} - - - if [ -z "$2" ]; then endDate=$(date --date yesterday "+%Y-%m-%d"); else endDate=$2; fi - if [ ! -z "$3" ]; then inputBucket=$3; fi - if [ ! -z "$4" ]; then sinkTopic=$4; fi - if [ ! -z "$2" ]; then keyword=$2; fi - case "$1" in - "assessment-correction") - echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"model":"org.sunbird.analytics.model.report.AssessmentCorrectionModel","modelParams":{"parallelization":200,"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","granularity":"all","aggregations":[{"name":"count","type":"count","fieldName":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"}],"filters":[{"type":"equals","dimension":"contentType","value":"SelfAssess"}],"descending":"false"},"fileOutputConfig":{"to":"file","params":{"file":"{{ analytics.home }}/assessment-correction/skippedEvents"}},"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"kafka","params":{"brokerList":"'$brokerIngestionList'","topic":"'$assessTopic'"}}],"parallelization":200,"appName":"Assessment Correction Model"}' - ;; - "assessment-archival") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"store":"azure","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}' - ;; - "assessment-archived-removal") -{% if dp_object_store_type == "azure" %} - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"azure","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' - ;; -{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"{{dp_object_store_type}}","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' - ;; -{% endif %} - "collection-reconciliation-job") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CollectionReconciliationJob","modelParams":{"mode":"prodrun","brokerList":"{{ingestion_kafka_broker_host}}","topic":"{{env}}.issue.certificate.request","sparkCassandraConnectionHost":"{{ core_cassandra_host }}"},"parallelization":30,"appName":"CollectionReconciliationJob"}' - ;; - "collection-summary-report") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"], "contentType": "Course"},"fields":["identifier","name","organisation","channel"],"limit":10000}},"store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' - ;; - "score-metric-migration-job") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.ScoreMetricMigrationJob","modelParams":{"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Score Metric Migration Job"}' - ;; - "assessment-score-metric-correction") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.AssessmentScoreCorrectionJob","modelParams":{"assessment.score.correction.batches":"","cassandraReadConsistency":"QUORUM","cassandraWriteConsistency":"QUORUM","csvPath":"/mount/data/analytics/score_correction","isDryRunMode":true,"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":30,"appName":"Assessment Score Correction Job"}' - ;; - "course-batch-status-updater") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob","modelParams":{"store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"http://{{ single_node_es_host }}:9200","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","kpLearningBasePath":"http://{{groups['learning'][0]}}:8080/learning-service","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Course Batch Status Updater Job"}' - ;; - "collection-summary-report-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJobV2","modelParams":{"storageKeyConfig":"druid_storage_account_key","storageSecretConfig":"druid_storage_account_secret","batchSize":50,"generateForAllBatches":true,"contentFields":["identifier","name","organisation","channel","status","keywords","createdFor","medium","subject"],"contentStatus":["Live","Unlisted","Retired"],"store":"{{dp_object_store_type}}","specPath":"/mount/data/analytics/scripts/collection-summary-ingestion-spec.json","druidIngestionUrl":"'$druidIngestionURL'","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report V2"}' - ;; - "uci-private-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","storageContainer":"reports","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UCI Private Exhaust"}' - ;; - "uci-response-exhaust") - echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"{{dp_object_store_type}}","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":"","storageContainer":"reports"},"parallelization":8,"appName":"UCI Response Exhaust"}' - ;; - "userinfo-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' - ;; - "program-collection-summary-report") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"], "keywords":"'$keyword'", "store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' - ;; - "response-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust"}' - ;; - "response-exhaust-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' - ;; - "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' - ;; - "progress-exhaust-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' - ;; - "druid_reports") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.DruidQueryProcessingModel","modelParams":{"mode":"batch"},"parallelization":8,"appName":"Druid Reports"}' - ;; - "cassandra-migration") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.updater.CassandraMigratorJob","modelParams":{"cassandraDataHost":"{{ core_cassandra_host }}","cassandraMigrateHost":"{{ report_cassandra_host }}","keyspace":"sunbird_courses","cassandraDataTable":"user_enrolments","cassandraMigrateTable":"{{ report_user_enrolment_table }}","repartitionColumns":"batchid"},"parallelization":10,"appName":"Cassandra Migrator","deviceMapping":false}' - ;; - "monitor-job-summ") - echo '{"search":{"type":"local","queries":[{"file":"'$analytics_home'/scripts/logs/joblog.log"}]},"model":"org.ekstep.analytics.model.MonitorSummaryModel","modelParams":{"pushMetrics":true,"brokerList":"'$brokerList'","topic":"'$analyticsMetricsTopic'","model":[{"model":"WorkFlowSummaryModel","category":"consumption","input_dependency":"None"},{"model":"UpdateContentRating","category":"consumption","input_dependency":"None"},{"model":"DruidQueryProcessingModel","category":"consumption","input_dependency":"None"},{"model":"MetricsAuditJob","category":"consumption","input_dependency":"None"},{"model":"StateAdminReportJob","category":"consumption","input_dependency":"None"},{"model":"StateAdminGeoReportJob","category":"consumption","input_dependency":"None"},{"model":"CourseEnrollmentJob","category":"consumption","input_dependency":"None"}]},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"appName":"TestMonitorSummarizer","deviceMapping":true}' - ;; - "job-manager") - echo '{"jobsCount":'$jobManagerJobsCount',"topic":"'$job_topic'","bootStrapServer":"'$brokerList'","zookeeperConnect":"'$zookeeper'","consumerGroup":"jobmanager","slackChannel":"#test_channel","slackUserName":"JobManager","tempBucket":"'$bucket'","tempFolder":"'$temp_folder'"}' - ;; - "wfs") - echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"{{ dp_raw_telemetry_backup_location }}","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}", "apiVersion":"v2", "parallelization":200},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"} }],"parallelization":200,"appName":"Workflow Summarizer","deviceMapping":true}' - #echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"apiVersion":"v2"},"output":[{"to":"console","params":{"printEvent": false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"parallelization":8,"appName":"Workflow Summarizer","deviceMapping":true}' - ;; - "video-streaming") - echo '{"search":{"type":"{{ dp_object_store_type }}"},"model":"org.ekstep.analytics.job.VideoStreamingJob","modelParams":{"maxIterations":10},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Video Streaming Job","deviceMapping":false}' - ;; - "admin-user-reports") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin User Reports","deviceMapping":false}' - ;; - "admin-geo-reports") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminGeoReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin Geo Reports","deviceMapping":false}' - ;; - "telemetry-replay") - echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"'$inputBucket'","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"TelemetryReplayJob","deviceMapping":false}' - ;; - "summary-replay") - echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"SummaryReplayJob","deviceMapping":false}' - ;; - "content-rating-updater") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.updater.UpdateContentRating","modelParams": {"startDate": "'$endDate'","endDate": "'$endDate'"},"output": [{"to":"console","params":{"printEvent":false}}],"parallelization": 8,"appName": "Content Rating Updater","deviceMapping": false}' - ;; - "experiment") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.ExperimentDefinitionModel","modelParams":{"sparkElasticsearchConnectionHost":"{{ lp_composite_search_host }}"},"output":[{"to":"elasticsearch","params":{"index":"experiment"}}],"parallelization":8,"appName":"Experiment-Definition","deviceMapping":false}' - ;; - "etb-metrics") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.ETBMetricsJob","modelParams":{"reportConfig":{"id":"etb_metrics","metrics":[],"labels":{"date":"Date","identifier":"Textbook ID","name":"Textbook Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","createdOn":"Created On","lastUpdatedOn":"Last Updated On","totalQRCodes":"Total number of QR codes","contentLinkedQR":"Number of QR codes with atleast 1 linked content","withoutContentQR":"Number of QR codes with no linked content","withoutContentT1":"Term 1 QR Codes with no linked content","withoutContentT2":"Term 2 QR Codes with no linked content","status":"Textbook Status","totalContentLinked":"Total content linked","totalQRLinked":"Total QR codes linked to content","totalQRNotLinked":"Total number of QR codes with no linked content","leafNodesCount":"Total number of leaf nodes","leafNodeUnlinked":"Number of leaf nodes with no content","l1Name":"Level 1 Name","l2Name":"Level 2 Name","l3Name":"Level 3 Name","l4Name":"Level 4 Name","l5Name":"Level 5 Name","dialcode":"QR Code","sum(scans)":"Total Scans","noOfContent":"Number of contents","nodeType":"Type of Node","term":"Term"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"dialcode_counts.csv","postContainer":"'$reportPostContainer'"}},"dialcodeReportConfig":{"id":"etb_metrics","metrics":[],"labels":{},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":1,"reportPath":"dialcode_counts.csv","rollupAge":"ACADEMIC_YEAR","rollupCol":"Date","rollupRange":10,"postContainer":"'$reportPostContainer'"}},"etbFileConfig":{"bucket":"'$reportPostContainer'","file":"dialcode_scans/dialcode_counts.csv"},"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"channel","aliasName":"channel"},{"fieldName":"identifier","aliasName":"identifier","type":"Extraction","outputType":"STRING","extractionFn":[{"type":"javascript","fn":"function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName":"name","aliasName":"name"},{"fieldName":"status","aliasName":"status"}],"filters":[{"type":"equals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live","Draft","Review"]}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"tenantConfig":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"ETB Metrics Model","deviceMapping":false}' - ;; - "course-enrollment-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.CourseEnrollmentJob","modelParams":{"reportConfig":{"id":"tpd_metrics","metrics":[],"labels":{"completionCount":"Completion Count","status":"Status","enrollmentCount":"Enrollment Count","courseName":"Course Name","batchName":"Batch Name"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"DAY","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"course_enrollment.csv"}},"esConfig":{"request":{"filters":{"objectType":["Content"],"contentType":["Course"],"identifier":[],"status":["Live"]},"limit":10000}},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"TPD Course Enrollment Metrics Model","deviceMapping":false}' - ;; - "course-consumption-report") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.model.report.CourseConsumptionJob","modelParams": {"esConfig": {"request": {"filters": {"objectType": ["Content"],"contentType": ["Course"],"identifier": [],"status": ["Live"]}}},"reportConfig": {"id": "tpd_metrics","labels": {"date": "Date","status": "Batch Status","timespent": "Timespent in mins","courseName": "Course Name","batchName": "Batch Name"},"dateRange": {"staticInterval": "LastDay","granularity": "all"},"metrics": [{"metric": "totalCoursePlays","label": "Total Course Plays (in mins)","druidQuery": {"queryType": "groupBy","dataSource": "summary-events","intervals":"LastDay","aggregations": [{"name": "sum__edata_time_spent","type": "doubleSum","fieldName": "edata_time_spent"}],"dimensions": [{"fieldName": "object_rollup_l1","aliasName": "courseId"}, {"fieldName": "uid","aliasName": "userId"}, {"fieldName": "context_cdata_id","aliasName": "batchId"}],"filters": [{"type": "equals","dimension": "eid","value": "ME_WORKFLOW_SUMMARY"}, {"type": "in","dimension": "dimensions_pdata_id","values": ["'$producerEnv'.app", "'$producerEnv'.portal"]}, {"type": "equals","dimension": "dimensions_type","value": "content"}, {"type": "equals","dimension": "dimensions_mode","value": "play"}, {"type": "equals","dimension": "context_cdata_type","value": "batch"}],"postAggregation": [{"type": "arithmetic","name": "timespent","fields": {"leftField": "sum__edata_time_spent","rightField": 60,"rightFieldType": "constant"},"fn": "/"}],"descending": "false"}}],"output": [{"type": "csv","metrics": ["timespent"],"dims": []}],"queryType": "groupBy"},"store": "{{ dp_object_store_type }}","format":"csv","key": "druid-reports/","filePath": "druid-reports/","container":"'$bucket'","folderPrefix": ["slug", "reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "TPD Course Consumption Metrics Model","deviceMapping": false}' - ;; - "textbook-progress-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.TextBookProgressModel","modelParams":{"reportConfig":{"id":"content_progress_metrics","metrics":[],"labels":{"board":"Board","medium":"Medium","gradeLevel":"Grade","subject":"Subject","resourceType":"Content Type","totalContent": "Total Contents","live":"Live","review":"Review","draft":"Draft","unlisted":"Limited Sharing","application_ecml":"Created on Diksha","video_youtube":"YouTube Content","video_mp4":"Uploaded Videos","application_pdf":"Text Content","application_html":"Uploaded Interactive Content","identifier":"Content ID","creator":"Created By","createdOn":"Creation Date","lastPublishDate":"Last Publish Date","status":"Status","pkgVersion":"Number of times Published","lastPublishedOn":"Pending in current status since","pendingInCurrentStatus":"Pending in current status since"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"content_progress_metrics.csv","postContainer":"'$reportPostContainer'"}},"filter":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Textbook Progress Metrics Model","deviceMapping":false}' - ;; - "audit-metrics-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.MetricsAuditJob","modelParams":{"auditConfig":[{"name":"denorm","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"telemetry-denormalized/raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]},"filters":[{"name":"flags.user_data_retrieved","operator":"EQ","value":true},{"name":"flags.content_data_retrieved","operator":"EQ","value":true},{"name":"flags.device_data_retrieved","operator":"EQ","value":true},{"name":"flags.dialcode_data_retrieved","operator":"EQ","value":true},{"name":"flags.collection_data_retrieved","operator":"EQ","value":true},{"name":"flags.derived_location_retrieved","operator":"EQ","value":true}]},{"name":"failed","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"failed/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"unique","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"channel-raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/raw/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"channel-summary","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/summary/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"derived","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"telemetry-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"telemetry-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}},{"name":"summary-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"summary-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}}]},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$metricsTopic'"}}],"parallelization":8,"appName":"Metrics Audit"}' - ;; - "sourcing-metrics") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","primaryCategory":"Collection Category","identifier": "Collection ID","name": "Collection Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Folder Name","totalChapters": "Total number of first level folders","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "in","dimension": "primaryCategory","values": ["Digital Textbook", "Course", "Content Playlist","Question paper","Question Paper"]}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "{{ dp_object_store_type }}","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' - ;; - "druid-dataset") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.OnDemandDruidExhaustJob","modelParams":{"store":"{{ dp_object_store_type }}","container":"'$reportPostContainer'","key":"ml_reports/","format":"csv"},"output":[{"to": "console","params": {"printEvent": false}}],"parallelization":8,"appName":"ML Druid Data Model"}' - ;; - "*") - echo "Unknown model code" - exit 1 # Command to come out of the program with status 1 - ;; - esac -} diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/model-config.json.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/model-config.json.j2 deleted file mode 100644 index a3569c7f46..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/model-config.json.j2 +++ /dev/null @@ -1,670 +0,0 @@ -{ - "wfs": { - "search": { - "type": "{{dp_object_store_type}}", - "queries": [ - { - "bucket": "{{ bucket }}", - "prefix": "unique/raw/", - "endDate": "$(date --date yesterday '+%Y-%m-%d')", - "delta": 0 - } - ] - }, - "filters": [ - { - "name": "actor", - "operator": "ISNOTNULL" - } - ], - "model": "org.ekstep.analytics.model.WorkflowSummary", - "modelParams": { - "apiVersion": "v2", - "parallelization": 32 - }, - "output": [ - { - "to": "{{dp_object_store_type}}", - "params": { - "bucket": "{{ bucket }}", - "key": "{{ job_manager_tmp_dir }}/wfs/$(date --date yesterday '+%Y-%m-%d')" - } - }, - { - "to": "kafka", - "params": { - "brokerList": "{{ brokerlist }}", - "topic": "{{ topic }}" - } - } - ], - "parallelization": 32, - "appName": "Workflow Summarizer", - "deviceMapping": true - }, - "video-streaming": { - "search": { - "type": "{{dp_object_store_type}}" - }, - "model": "org.ekstep.analytics.job.VideoStreamingJob", - "modelParams": { - "maxIterations": 10 - }, - "output": [ - { - "to": "console", - "params": { - "printEvent": false - } - } - ], - "parallelization": 8, - "appName": "Video Streaming Job", - "deviceMapping": false - }, - "admin-user-reports": { - "search": { - "type": "none" - }, - "model": "org.ekstep.analytics.job.report.StateAdminReportJob", - "modelParams": { - "sparkCassandraConnectionHost": "{{core_cassandra_host}}", - "sparkElasticsearchConnectionHost": "{{sunbird_es_host}}" - }, - "output": [ - { - "to": "console", - "params": { - "printEvent": false - } - } - ], - "parallelization": 8, - "appName": "Admin User Reports", - "deviceMapping": false - }, - "admin-geo-reports": { - "search": { - "type": "none" - }, - "model": "org.ekstep.analytics.job.report.StateAdminGeoReportJob", - "modelParams": { - "sparkCassandraConnectionHost": "{{core_cassandra_host}}", - "sparkElasticsearchConnectionHost": "{{sunbird_es_host}}" - }, - "output": [ - { - "to": "console", - "params": { - "printEvent": false - } - } - ], - "parallelization": 8, - "appName": "Admin Geo Reports", - "deviceMapping": false - }, - "content-rating-updater": { - "search": { - "type": "none" - }, - "model": "org.ekstep.analytics.updater.UpdateContentRating", - "modelParams": { - "startDate": "$(date --date yesterday '+%Y-%m-%d')", - "endDate": "$(date '+%Y-%m-%d')" - }, - "output": [ - { - "to": "console", - "params": { - "printEvent": false - } - } - ], - "parallelization": 8, - "appName": "Content Rating Updater", - "deviceMapping": false - }, - "monitor-job-summ": { - "search": { - "type": "local", - "queries": [ - { - "file": "{{ analytics.home }}/scripts/logs/joblog.log" - } - ] - }, - "model": "org.ekstep.analytics.model.MonitorSummaryModel", - "modelParams": { - "pushMetrics": true, - "brokerList": "{{ brokerlist }}", - "topic": "{{ analytics_metrics_topic }}", - "model": [ - { - "model": "WorkFlowSummaryModel", - "category": "consumption", - "input_dependency": "None" - }, - { - "model": "UpdateContentRating", - "category": "consumption", - "input_dependency": "None" - }, - { - "model": "DruidQueryProcessingModel", - "category": "consumption", - "input_dependency": "None" - }, - { - "model": "MetricsAuditJob", - "category": "consumption", - "input_dependency": "None" - }, - { - "model": "StateAdminReportJob", - "category": "consumption", - "input_dependency": "None" - }, - { - "model": "StateAdminGeoReportJob", - "category": "consumption", - "input_dependency": "None" - }, - { - "model": "CourseEnrollmentJob", - "category": "consumption", - "input_dependency": "None" - } - ] - }, - "output": [ - { - "to": "console", - "params": { - "printEvent": false - } - }, - { - "to": "kafka", - "params": { - "brokerList": "{{ brokerlist }}", - "topic": "{{ topic }}" - } - } - ], - "appName": "TestMonitorSummarizer", - "deviceMapping": true - }, - "experiment": { - "search": { - "type": "none" - }, - "model": "org.ekstep.analytics.model.ExperimentDefinitionModel", - "modelParams": { - "sparkElasticsearchConnectionHost": "{{ lp_composite_search_host }}" - }, - "output": [ - { - "to": "elasticsearch", - "params": { - "index": "experiment" - } - } - ], - "parallelization": 8, - "appName": "Experiment-Definition", - "deviceMapping": false - }, - "etb-metrics": { - "search": { - "type": "none" - }, - "model": "org.ekstep.analytics.model.report.ETBMetricsJob", - "modelParams": { - "reportConfig": { - "id": "etb_metrics", - "metrics": [], - "labels": { - "date": "Date", - "identifier": "TextBook ID", - "name": "TextBook Name", - "medium": "Medium", - "gradeLevel": "Grade", - "subject": "Subject", - "createdOn": "Created On", - "lastUpdatedOn": "Last Updated On", - "totalQRCodes": "Total number of QR codes", - "contentLinkedQR": "Number of QR codes with atleast 1 linked content", - "withoutContentQR": "Number of QR codes with no linked content", - "withoutContentT1": "Term 1 QR Codes with no linked content", - "withoutContentT2": "Term 2 QR Codes with no linked content", - "status": "Status", - "totalContentLinked": "Total content linked", - "totalQRLinked": "Total QR codes linked to content", - "totalQRNotLinked": "Total number of QR codes with no linked content", - "leafNodesCount": "Total number of leaf nodes", - "leafNodeUnlinked": "Number of leaf nodes with no content", - "l1Name": "Level 1 Name", - "l2Name": "Level 2 Name", - "l3Name": "Level 3 Name", - "l4Name": "Level 4 Name", - "l5Name": "Level 5 Name", - "dialcode": "QR Code", - "sum(scans)": "Total Scans", - "noOfContent": "Number of contents", - "nodeType": "Type of Node", - "term": "Term" - }, - "output": [{ - "type": "csv", - "dims": ["identifier", "channel", "name"], - "fileParameters": ["id", "dims"] - }], - "mergeConfig": { - "frequency": "WEEK", - "basePath": "{{ spark_output_temp_dir }}", - "rollup": 0, - "reportPath": "dialcode_counts.csv", - "postContainer":"{{ reports_container }}" - } - }, - "dialcodeReportConfig": { - "id": "etb_metrics", - "metrics": [], - "labels": {}, - "output": [{ - "type": "csv", - "dims": ["identifier", "channel", "name"], - "fileParameters": ["id", "dims"] - }], - "mergeConfig": { - "frequency": "WEEK", - "basePath": "{{ spark_output_temp_dir }}", - "rollup": 1, - "reportPath": "dialcode_counts.csv", - "rollupAge": "ACADEMIC_YEAR", - "rollupCol": "Date", - "rollupRange": 10, - "postContainer":"{{ reports_container }}" - } - }, - "etbFileConfig": { - "bucket": "{{ reports_container }}", - "file": "dialcode_scans/dialcode_counts.csv" - }, - "druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"},{"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName": "name","aliasName": "name"},{"fieldName": "createdFor","aliasName": "createdFor"},{"fieldName": "createdOn","aliasName": "createdOn"},{"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"},{"fieldName": "board","aliasName": "board"},{"fieldName": "medium","aliasName": "medium"},{"fieldName": "gradeLevel","aliasName": "gradeLevel"},{"fieldName": "subject","aliasName": "subject"},{"fieldName": "status","aliasName": "status"}],"filters": [{"type": "equals","dimension": "contentType","value": "TextBook"},{"type": "in","dimension": "status","values": ["Live","Draft","Review"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}}, - "tenantConfig": { - "tenantId": "", - "slugName": "" - }, - "store": "{{dp_object_store_type}}", - "format": "csv", - "key": "druid-reports/", - "filePath": "druid-reports/", - "container": "{{ bucket }}", - "folderPrefix": ["slug", "reportName"] - }, - "output": [{ - "to": "console", - "params": { - "printEvent": false - } - }], - "parallelization": 8, - "appName": "ETB Metrics Model", - "deviceMapping": false - }, - "course-enrollment-report":{ - "search": { - "type": "none" - }, - "model": "org.ekstep.analytics.model.report.CourseEnrollmentJob", - "modelParams": { - "reportConfig": { - "id": "tpd_metrics", - "metrics" : [], - "labels": { - "completionCount": "Completion Count", - "status": "Status", - "enrollmentCount": "Enrollment Count", - "courseName": "Course Name", - "batchName": "Batch Name" - }, - "output": [{ - "type": "csv", - "dims": [] - }] - }, - "esConfig": { - "request": { - "filters":{ - "objectType": ["Content"], - "contentType": ["Course"], - "identifier": [], - "status": ["Live"] - }, - "limit": 10000 - } - }, - "store": "{{dp_object_store_type}}", - "format":"csv", - "key": "druid-reports/", - "filePath": "druid-reports/", - "container": "{{ bucket }}", - "folderPrefix": ["slug", "reportName"], - "sparkCassandraConnectionHost":"{{core_cassandra_host}}", - "sparkElasticsearchConnectionHost":"{{sunbird_es_host}}" - }, - "output": [{ - "to": "console", - "params": { - "printEvent": false - } - }], - "parallelization": 8, - "appName": "TPD Course Enrollment Metrics Model", - "deviceMapping": false - }, - "course-consumption-report":{ - "search": { - "type": "none" - }, - "model": "org.ekstep.analytics.model.report.CourseConsumptionJob", - "modelParams": { - "esConfig": { - "request": { - "filters": { - "objectType": ["Content"], - "contentType": ["Course"], - "identifier": [], - "status": ["Live"] - } - } - }, - "reportConfig": { - "id": "tpd_metrics", - "labels": { - "date": "Date", - "status": "Batch Status", - "timespent": "Timespent in mins", - "courseName": "Course Name", - "batchName": "Batch Name" - }, - "dateRange": { - "staticInterval": "LastDay", - "granularity": "all" - }, - "metrics": [{ - "metric": "totalCoursePlays", - "label": "Total Course Plays (in mins) ", - "druidQuery": { - "queryType": "groupBy", - "dataSource": "summary-events", - "intervals": "LastDay", - "aggregations": [{ - "name": "sum__edata_time_spent", - "type": "doubleSum", - "fieldName": "edata_time_spent" - }], - "dimensions": [{ - "fieldName": "object_rollup_l1", - "aliasName": "courseId" - }, { - "fieldName": "uid", - "aliasName": "userId" - }, { - "fieldName": "context_cdata_id", - "aliasName": "batchId" - }], - "filters": [{ - "type": "equals", - "dimension": "eid", - "value": "ME_WORKFLOW_SUMMARY" - }, { - "type": "in", - "dimension": "dimensions_pdata_id", - "values": ["{{ producer_env }}.app", "{{ producer_env }}.portal"] - }, { - "type": "equals", - "dimension": "dimensions_type", - "value": "content" - }, { - "type": "equals", - "dimension": "dimensions_mode", - "value": "play" - }, { - "type": "equals", - "dimension": "context_cdata_type", - "value": "batch" - }], - "postAggregation": [{ - "type": "arithmetic", - "name": "timespent", - "fields": { - "leftField": "sum__edata_time_spent", - "rightField": 60, - "rightFieldType": "constant" - }, - "fn": "/" - }], - "descending": "false" - } - }], - "output": [{ - "type": "csv", - "metrics": ["timespent"], - "dims": [] - }], - "queryType": "groupBy" - }, - "store": "{{dp_object_store_type}}", - "format":"csv", - "key": "druid-reports/", - "filePath": "druid-reports/", - "container": "{{ bucket }}", - "folderPrefix": ["slug", "reportName"], - "sparkCassandraConnectionHost":"{{core_cassandra_host}}", - "sparkElasticsearchConnectionHost":"{{sunbird_es_host}}" - }, - "output": [{ - "to": "console", - "params": { - "printEvent": false - } - }], - "parallelization": 8, - "appName": "TPD Course Consumption Metrics Model", - "deviceMapping": false - }, - "audit-metrics-report": { - "search": { - "type": "none" - }, - "model": "org.ekstep.analytics.model.MetricsAuditJob", - "modelParams": { - "auditConfig": [ - { - "name": "denorm", - "search": { - "type": "{{dp_object_store_type}}", - "queries": [ - { - "bucket": "{{ bucket }}", - "prefix": "telemetry-denormalized/raw/", - "startDate": "$(date --date yesterday '+%Y-%m-%d')", - "endDate": "$(date --date yesterday '+%Y-%m-%d')" - } - ] - }, - "filters": [ - { - "name": "flags.user_data_retrieved", - "operator": "EQ", - "value": true - }, - { - "name": "flags.content_data_retrieved", - "operator": "EQ", - "value": true - }, - { - "name": "flags.device_data_retrieved", - "operator": "EQ", - "value": true - }, - { - "name": "flags.dialcode_data_retrieved", - "operator": "EQ", - "value": true - }, - { - "name": "flags.collection_data_retrieved", - "operator": "EQ", - "value": true - }, - { - "name": "flags.derived_location_retrieved", - "operator": "EQ", - "value": true - } - ] - }, - { - "name": "failed", - "search": { - "type": "{{dp_object_store_type}}", - "queries": [ - { - "bucket": "{{ bucket }}", - "prefix": "failed/", - "startDate": "$(date --date yesterday '+%Y-%m-%d')", - "endDate": "$(date --date yesterday '+%Y-%m-%d')" - } - ] - } - }, - { - "name": "unique", - "search": { - "type": "{{dp_object_store_type}}", - "queries": [ - { - "bucket": "{{ bucket }}", - "prefix": "unique/", - "startDate": "$(date --date yesterday '+%Y-%m-%d')", - "endDate": "$(date --date yesterday '+%Y-%m-%d')" - } - ] - } - }, - { - "name": "raw", - "search": { - "type": "{{dp_object_store_type}}", - "queries": [ - { - "bucket": "{{ bucket }}", - "prefix": "raw/", - "startDate": "$(date --date yesterday '+%Y-%m-%d')", - "endDate": "$(date --date yesterday '+%Y-%m-%d')" - } - ] - } - }, - { - "name": "channel-raw", - "search": { - "type": "{{dp_object_store_type}}", - "queries": [ - { - "folder": true, - "bucket": "{{ bucket }}", - "prefix": "channel/*/raw/", - "startDate": "$(date --date yesterday '+%Y-%m-%d')", - "endDate": "$(date --date yesterday '+%Y-%m-%d')*.json.gz" - } - ] - } - }, - { - "name": "channel-summary", - "search": { - "type": "{{dp_object_store_type}}", - "queries": [ - { - "folder": true, - "bucket": "{{ bucket }}", - "prefix": "channel/*/summary/", - "startDate": "$(date --date yesterday '+%Y-%m-%d')", - "endDate": "$(date --date yesterday '+%Y-%m-%d')*.json.gz" - } - ] - } - }, - { - "name": "derived", - "search": { - "type": "{{dp_object_store_type}}", - "queries": [ - { - "bucket": "{{ bucket }}", - "prefix": "derived/wfs/", - "startDate": "$(date --date yesterday '+%Y-%m-%d')", - "endDate": "$(date --date yesterday '+%Y-%m-%d')" - } - ] - } - }, - { - "name": "telemetry-count", - "search": { - "type": "druid", - "druidQuery": { - "queryType": "timeSeries", - "dataSource": "telemetry-events", - "intervals": "LastDay", - "aggregations": [ - { - "name": "total_count", - "type": "count", - "fieldName": "count" - } - ], - "descending": "false" - } - } - }, - { - "name": "summary-count", - "search": { - "type": "druid", - "druidQuery": { - "queryType": "timeSeries", - "dataSource": "summary-events", - "intervals": "LastDay", - "aggregations": [ - { - "name": "total_count", - "type": "count", - "fieldName": "count" - } - ], - "descending": "false" - } - } - } - ] - }, - "output": [ - { - "to": "kafka", - "params": { - "brokerList": "{{ brokerlist }}", - "topic": "{{ metrics_topic }}" - } - } - ], - "parallelization": 8, - "appName": "Metrics Audit" - } -} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/model-dock-config.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/model-dock-config.j2 deleted file mode 100644 index f720f4687e..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/model-dock-config.j2 +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env bash - -config() { - bucket={{ bucket }} - brokerList={{ brokerlist }} - zookeeper={{ zookeeper }} - job_topic={{ analytics_job_queue_topic }} - topic={{ topic }} - sparkCassandraConnectionHost="{{ lp_cassandra_host }}" - sunbirdPlatformCassandraHost="{{ core_cassandra_host }}" - reportPostContainer="{{ reports_container }}" - druidRollupHost="{{ druid_rollup_cluster_ingestion_task_url }}" - - if [ -z "$2" ]; then endDate=$(date --date yesterday "+%Y-%m-%d"); else endDate=$2; fi - if [ ! -z "$3" ]; then inputBucket=$3; fi - case "$1" in - "content-details") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.sourcing.ContentDetailsReport","modelParams":{"tenantId":"","slug":"","reportConfig":{"id":"content_report","metrics":[],"labels":{"programName":"Project Name","programId":"Project ID","contentId":"Content/Question ID","contentName":"Content/Question Name","mimeType":"MimeType","chapterId":"Folder ID","contentStatus":"Content/Question Status","creator":"Creator Name","createdBy":"CreatedBy ID","date":"Date","identifier":"Collection/Question Set ID","name":"Collection/Question Set Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","board":"Board","grade":"Grade","chapters":"Chapter Name","status":"Textbook Status","objectType":"Object Type","primaryCategory":"Primary category","topic":"Topic","learningOutcome":"Learning Outcome","addedFromLibrary":"Added from library","contentType":"Content Type"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]},{"type":"json","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}]},"contentQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"unitIdentifiers","aliasName":"unitIdentifiers"},{"fieldName":"collectionId","aliasName":"collectionId"},{"fieldName":"createdBy","aliasName":"createdBy"},{"fieldName":"creator","aliasName":"creator"},{"fieldName":"mimeType","aliasName":"mimeType"},{"fieldName":"topic","aliasName":"topic"},{"fieldName":"learningOutcome","aliasName":"learningOutcome"},{"fieldName":"primaryCategory","aliasName":"contentType"}],"filters":[{"type":"notequals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live"]},{"type":"isnotnull","dimension":"collectionId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"textbookQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"programId","aliasName":"programId"},{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"board","aliasName":"board"},{"fieldName":"medium","aliasName":"medium"},{"fieldName":"gradeLevel","aliasName":"gradeLevel"},{"fieldName":"subject","aliasName":"subject"},{"fieldName":"status","aliasName":"status"},{"fieldName":"acceptedContents","aliasName":"acceptedContents"},{"fieldName":"acceptedContributions","aliasName":"acceptedContributions"},{"fieldName":"rejectedContents","aliasName":"rejectedContents"},{"fieldName":"rejectedContributions","aliasName":"rejectedContributions"},{"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName":"objectType","aliasName":"objectType"},{"fieldName":"reusedContributions","aliasName":"reusedContributions"}],"filters":[{"type":"in","dimension":"primaryCategory","values":["Digital Textbook","Course","Content Playlist","Question paper","Question Paper","Exam Question Set","Practice Set","Demo Practice Question Set"]},{"type":"isnotnull","dimension":"programId"},{"type":"in","dimension":"status","values":["Draft"]},{"type":"equals","dimension":"channel","value":"channelId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"store":"{{dp_object_store_type}}","storageKeyConfig":"azure_storage_key","storageSecretConfig":"azure_storage_secret","storageContainer":"'$reportPostContainer'","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$reportPostContainer'","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Content Report Job","deviceMapping":false}' - ;; - "sourcing-summary-report") - echo '{"search": {"type": "none"}, "model": "org.ekstep.analytics.job.report.SourcingSummaryReport", "modelParams": {"storageKeyConfig":"druid_storage_account_key", "storageSecretConfig":"druid_storage_account_secret", "dataSource": "sourcing-summary-snapshot", "druidHost": "'$druidRollupHost'", "druidSegmentUrl":"'$druidRollupHost'/druid/coordinator/v1/metadata/datasources/sourcing-model-snapshot/segments", "deleteSegmentUrl": "'$druidRollupHost'/druid/coordinator/v1/datasources/sourcing-model-snapshot/segments/", "druidIngestionUrl": "'$druidRollupHost'/druid/indexer/v1/task", "specPath": "/mount/data/analytics/scripts/sourcing-ingestion-spec.json", "dbName": "opensaberdb", "tables": {"programTable": "program", "nominationTable": "nomination"}, "druidQuery": {"queryType": "groupBy", "dataSource": "vdn-content-model-snapshot", "intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00", "aggregations": [{"name": "count", "type": "count"}], "dimensions": [{"fieldName": "primaryCategory", "aliasName": "primaryCategory"}, {"fieldName": "createdBy", "aliasName": "createdBy"}], "filters": [{"type": "equals", "dimension": "objectType", "value": "Content"}, {"type": "equals", "dimension": "sampleContent", "value": "false"}], "postAggregation": [], "descending": "false", "limitSpec": {"type": "default", "limit": 1000000, "columns": [{"dimension": "count", "direction": "descending"}]}}, "reportConfig": {"id": "sourcing", "metrics": [], "labels": {}, "output": [{"type": "json", "dims": ["identifier", "channel", "name"], "fileParameters": ["id", "dims"]}]}, "store": "{{dp_object_store_type}}", "format": "json", "folderPrefix": ["slug", "reportName"]}, "output": [{"to": "console", "params": {"printEvent": false}}], "parallelization": 8, "appName": "Sourcing Summary Report Job", "deviceMapping": false}' - ;; - "funnel-report") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.job.report.FunnelReport","modelParams": {"contributionConfig": {"contentRequest": {"request": {"filters": {"programId": "programIdentifier","objectType": "content","status": ["Draft", "Live", "Review"],"mimeType": "application/vnd.ekstep.content-collection"},"fields": ["acceptedContents", "rejectedContents"],"limit": 10000}},"correctionsPendingRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": "Draft","prevStatus": "Live","programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}},"contributionRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": ["Live"],"programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}}},"reportConfig": {"id": "funnel_report","metrics": [],"labels": {"reportDate": "Report generation date","visitors": "No. of users opening the project","projectName": "Project Name","initiatedNominations": "No. of initiated nominations","rejectedNominations": "No. of rejected nominations","pendingNominations": "No. of nominations pending review","acceptedNominations": "No. of accepted nominations to the project","noOfContributors": "No. of contributors to the project","noOfContributions": "No. of contributions to the project","pendingContributions": "No. of contributions pending review","approvedContributions": "No. of approved contributions"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"store": "{{dp_object_store_type}}","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","folderPrefix": ["slug", "reportName"]},"sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","druidConfig": {"queryType": "timeseries","dataSource": "telemetry-events-syncts","intervals": "startdate/enddate","aggregations": [{"name": "visitors","type": "count","fieldName": "actor_id"}],"filters": [{"type": "equals","dimension": "context_cdata_id","value": "program_id"}, {"type": "equals","dimension": "edata_pageid","value": "contribution_project_contributions"}, {"type": "equals","dimension": "context_pdata_pid","value": "creation-portal.programs"}, {"type": "equals","dimension": "context_cdata_type","value": "project"}, {"type": "equals","dimension": "context_env","value": "creation-portal"}, {"type": "equals","dimension": "eid","value": "IMPRESSION"}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Funnel Report Job","deviceMapping": false}' - ;; - "sourcing-metrics") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","identifier": "Textbook ID","name": "Textbook Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Chapter Name","totalChapters": "Total number of chapters (first level sections of ToC)","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "equals","dimension": "contentType","value": "TextBook"}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "{{dp_object_store_type}}","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' - ;; - "*") - echo "Unknown model code" - exit 1 # Command to come out of the program with status 1 - ;; - esac -} diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/replay-job.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/replay-job.j2 deleted file mode 100644 index 3a6c969b7b..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/replay-job.j2 +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env bash -export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 -export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} -export DP_LOGS={{ analytics.home }}/logs/data-products - -cd {{ analytics.home }}/scripts -source model-config.sh -source replay-utils.sh - -libs_path="{{ analytics.home }}/models-{{ model_version }}/data-products-1.0" - -if [ "$1" == "telemetry-replay" ] - then - if [ ! $# -eq 5 ] - then - echo "Not suffecient arguments. killing process" - exit - fi -fi - -get_report_job_model_name(){ - case "$1" in - "assessment-correction") echo 'org.sunbird.analytics.job.report.AssessmentCorrectionJob' - ;; - *) echo $1 - ;; - esac -} - -if [ ! -z "$1" ]; then job_id=$(get_report_job_model_name $1); fi -if [ -z "$job_config" ]; then job_config=$(config $1 '__endDate__' $4 $5); fi -start_date=$2 -end_date=$3 -backup_key=$1 - -if [ "$1" == "gls-v1" ] - then - backup_key="gls" -elif [ "$1" == "app-ss-v1" ] - then - backup_key="app-ss" -fi - -backup $start_date $end_date {{ bucket }} "derived/$backup_key" "derived/backup-$backup_key" >> "$DP_LOGS/$end_date-$1-replay.log" -if [ $? == 0 ] - then - echo "Backup completed Successfully..." >> "$DP_LOGS/$end_date-$1-replay.log" - echo "Running the $1 job replay..." >> "$DP_LOGS/$end_date-$1-replay.log" - echo "Job modelName - $job_id" >> "$DP_LOGS/$end_date-$1-replay.log" - $SPARK_HOME/bin/spark-submit --master local[*] --jars $(echo ${libs_path}/lib/*.jar | tr ' ' ','),$MODELS_HOME/analytics-framework-2.0.jar,$MODELS_HOME/scruid_2.12-2.5.0.jar --class org.ekstep.analytics.job.ReplaySupervisor $MODELS_HOME/batch-models-2.0.jar --model "$job_id" --fromDate "$start_date" --toDate "$end_date" --config "$job_config" >> "$DP_LOGS/$end_date-$1-replay.log" -else - echo "Unable to take backup" >> "$DP_LOGS/$end_date-$1-replay.log" -fi - -if [ $? == 0 ] - then - echo "$1 replay executed successfully" >> "$DP_LOGS/$end_date-$1-replay.log" - delete {{ bucket }} "derived/backup-$backup_key" >> "$DP_LOGS/$end_date-$1-replay.log" -else - echo "$1 replay failed" >> "$DP_LOGS/$end_date-$1-replay.log" - rollback {{ bucket }} "derived/$backup_key" "backup-$backup_key" >> "$DP_LOGS/$end_date-$1-replay.log" - delete {{ bucket }} "derived/backup-$backup_key" >> "$DP_LOGS/$end_date-$1-replay.log" -fi diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/replay-updater.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/replay-updater.j2 deleted file mode 100644 index 580c3bf29c..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/replay-updater.j2 +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 -export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} -export DP_LOGS={{ analytics.home }}/logs/data-products - -cd {{ analytics.home }}/scripts -source model-config.sh -source replay-utils.sh - -job_config=$(config $1 '__endDate__') -start_date=$2 -end_date=$3 - -echo "Running the $1 updater replay..." >> "$DP_LOGS/$end_date-$1-replay.log" -$SPARK_HOME/bin/spark-submit --master local[*] --jars $MODELS_HOME/analytics-framework-2.0.jar --class org.ekstep.analytics.job.ReplaySupervisor $MODELS_HOME/batch-models-2.0.jar --model "$1" --fromDate "$start_date" --toDate "$end_date" --config "$job_config" >> "$DP_LOGS/$end_date-$1-replay.log" - -if [ $? == 0 ] - then - echo "$1 updater replay executed successfully..." >> "$DP_LOGS/$end_date-$1-replay.log" -else - echo "$1 updater replay failed" >> "$DP_LOGS/$end_date-$1-replay.log" - exit 1 -fi diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/replay-utils.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/replay-utils.j2 deleted file mode 100644 index 31ead572f3..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/replay-utils.j2 +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -rollback() { - bucket_name=$1 - prefix=$2 - backup_dir=$3 - - src="s3://$bucket_name/$prefix/" - dst="s3://$bucket_name/$backup_dir/" - echo "Copy back the $prefix files to source directory $src from backup directory $dst" - aws s3 cp $dst $src --recursive --include "*" --region ap-south-1 -} - -delete() { - bucket_name=$1 - backup_dir=$2 - - path="s3://$bucket_name/$backup_dir/" - echo "Deleting the back-up files from $path" - aws s3 rm $path --recursive --region ap-south-1 -} - -backup() { - dt_start=$1 - dt_end=$2 - prefix=$4 - bucket_name=$3 - backup_dir=$5 - - ts_start=$(date -d $dt_start +%s) - ts_end=$(date -d $dt_end +%s) - src="s3://$bucket_name/$prefix/" - dst="s3://$bucket_name/$backup_dir/" - - - echo "Backing up the files from $src to $dst for the date range - ($dt_start, $dt_end)" - while [ $ts_start -le $ts_end ] - do - date=`date -d @$ts_start +%F` - aws s3 mv $src $dst --recursive --exclude "*" --include "$date-*" --region ap-south-1 - let ts_start+=86400 - done -} diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/run-dock-job.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/run-dock-job.j2 deleted file mode 100644 index e6f1cdf9ad..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/run-dock-job.j2 +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env bash - -export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 -export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} -export DP_LOGS={{ analytics.home }}/logs/data-products -## Job to run daily -cd {{ analytics.home }}/scripts -source model-dock-config.sh -today=$(date "+%Y-%m-%d") - -libs_path="{{ analytics.home }}/models-{{ model_version }}/data-products-1.0" -file_path="dock-{{ env }}.conf" - -get_report_job_model_name(){ - case "$1" in - "funnel-report") echo 'org.sunbird.analytics.sourcing.FunnelReport' - ;; - "sourcing-summary-report") echo 'org.sunbird.analytics.sourcing.SourcingSummaryReport' - ;; - "sourcing-metrics") echo 'org.sunbird.analytics.sourcing.SourcingMetrics' - ;; - "content-details") echo 'org.sunbird.analytics.sourcing.ContentDetailsReport' - ;; - *) echo $1 - ;; - esac -} - -if [ ! -z "$1" ]; then job_id=$(get_report_job_model_name $1); fi - -if [ ! -z "$1" ]; then job_config=$(config $1); else job_config="$2"; fi - -if [ ! -z "$2" ]; then batchIds=";$2"; else batchIds=""; fi - -echo "Starting the job - $1" >> "$DP_LOGS/$today-job-execution.log" - -echo "Job modelName - $job_id" >> "$DP_LOGS/$today-job-execution.log" - -nohup $SPARK_HOME/bin/spark-submit --conf spark.driver.extraJavaOptions="-Dconfig.file=$MODELS_HOME/$file_path" --conf spark.executor.extraJavaOptions="-Dconfig.file=$MODELS_HOME/$file_path" --master local[*] --jars $(echo ${libs_path}/lib/*.jar | tr ' ' ','),$MODELS_HOME/analytics-framework-2.0.jar,$MODELS_HOME/scruid_2.12-2.5.0.jar,$MODELS_HOME/batch-models-2.0.jar --class org.ekstep.analytics.job.JobExecutor $MODELS_HOME/batch-models-2.0.jar --model "$job_id" --config "$job_config$batchIds" \ >> "$DP_LOGS/$today-job-execution.log" 2>&1 - -echo "Job execution completed - $1" >> "$DP_LOGS/$today-job-execution.log" \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/run-job.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/run-job.j2 deleted file mode 100644 index 26ec84da87..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/run-job.j2 +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env bash - -export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 -export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} -export DP_LOGS={{ analytics.home }}/logs/data-products -## Job to run daily -cd {{ analytics.home }}/scripts -source model-config.sh -today=$(date "+%Y-%m-%d") - -libs_path="{{ analytics.home }}/models-{{ model_version }}/data-products-1.0" - -get_report_job_model_name(){ - case "$1" in - "course-enrollment-report") echo 'org.sunbird.analytics.job.report.CourseEnrollmentJob' - ;; - "course-consumption-report") echo 'org.sunbird.analytics.job.report.CourseConsumptionJob' - ;; - "funnel-report") echo 'org.sunbird.analytics.sourcing.FunnelReport' - ;; - "sourcing-metrics") echo 'org.sunbird.analytics.sourcing.SourcingMetrics' - ;; - "admin-geo-reports") echo 'org.sunbird.analytics.job.report.StateAdminGeoReportJob' - ;; - "etb-metrics") echo 'org.sunbird.analytics.job.report.ETBMetricsJob' - ;; - "admin-user-reports") echo 'org.sunbird.analytics.job.report.StateAdminReportJob' - ;; - "userinfo-exhaust") echo 'org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob' - ;; - "response-exhaust") echo 'org.sunbird.analytics.exhaust.collection.ResponseExhaustJob' - ;; - "response-exhaust-v2") echo 'org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2' - ;; - "progress-exhaust") echo 'org.sunbird.analytics.exhaust.collection.ProgressExhaustJob' - ;; - "progress-exhaust-v2") echo 'org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2' - ;; - "cassandra-migration") echo 'org.sunbird.analytics.updater.CassandraMigratorJob' - ;; - "collection-summary-report") echo 'org.sunbird.analytics.job.report.CollectionSummaryJob' - ;; - "program-collection-summary-report") echo 'org.sunbird.analytics.job.report.CollectionSummaryJob' - ;; - "collection-summary-report-v2") echo 'org.sunbird.analytics.job.report.CollectionSummaryJobV2' - ;; - "assessment-score-metric-correction") echo 'org.sunbird.analytics.audit.AssessmentScoreCorrectionJob' - ;; - "course-batch-status-updater") echo 'org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob' - ;; - "collection-reconciliation-job") echo 'org.sunbird.analytics.audit.CollectionReconciliationJob' - ;; - "assessment-correction") echo 'org.sunbird.analytics.job.report.AssessmentCorrectionJob' - ;; - "score-metric-migration-job") echo 'org.sunbird.analytics.audit.ScoreMetricMigrationJob' - ;; - "assessment-archival") echo 'org.sunbird.analytics.job.report.AssessmentArchivalJob' - ;; - "assessment-archived-removal") echo 'org.sunbird.analytics.job.report.AssessmentArchivalJob' - ;; - "uci-private-exhaust") echo 'org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob' - ;; - "uci-response-exhaust") echo 'org.sunbird.analytics.exhaust.uci.UCIResponseExhaustJob' - ;; - *) echo $1 - ;; - esac -} - -if [ ! -z "$1" ]; then job_id=$(get_report_job_model_name $1); fi - -if [ ! -z "$1" ]; then job_config=$(config $1 $2); else job_config="$2"; fi - -if [ ! -z "$2" ]; then batchIds=";$2"; else batchIds=""; fi - - -echo "Starting the job - $1" >> "$DP_LOGS/$today-job-execution.log" - -echo "Job modelName - $job_id" >> "$DP_LOGS/$today-job-execution.log" - -nohup $SPARK_HOME/bin/spark-submit --master local[*] --jars $(echo ${libs_path}/lib/*.jar | tr ' ' ','),$MODELS_HOME/analytics-framework-2.0.jar,$MODELS_HOME/scruid_2.12-2.5.0.jar,$MODELS_HOME/batch-models-2.0.jar --class org.ekstep.analytics.job.JobExecutor $MODELS_HOME/batch-models-2.0.jar --model "$job_id" --config "$job_config$batchIds" >> "$DP_LOGS/$today-job-execution.log" 2>&1 - -echo "Job execution completed - $1" >> "$DP_LOGS/$today-job-execution.log" diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/start-jobmanager.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/start-jobmanager.j2 deleted file mode 100644 index 2e613b9866..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/start-jobmanager.j2 +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env bash -export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version}}-bin-hadoop2.7 -export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} -export DP_LOGS={{ analytics.home }}/logs/data-products -export SERVICE_LOGS={{ analytics.home }}/logs/services -export JM_HOME={{ analytics.home }}/job-manager - -export azure_storage_key={{sunbird_private_storage_account_name}} -export azure_storage_secret={{sunbird_private_storage_account_key}} -export reports_azure_storage_key={{sunbird_private_storage_account_name}} -export reports_azure_storage_secret={{sunbird_private_storage_account_key}} -export druid_storage_account_key={{sunbird_public_storage_account_name}} -export druid_storage_account_secret={{sunbird_public_storage_account_key}} - -export heap_conf_str={{ spark.heap_conf_str }} -today=$(date "+%Y-%m-%d") - -kill_job_manager() -{ - echo "Killing currently running job-manager process" >> "$SERVICE_LOGS/$today-job-manager.log" - kill $(ps aux | grep 'JobManager' | awk '{print $2}') >> "$SERVICE_LOGS/$today-job-manager.log" -} - -start_job_manager() -{ - kill_job_manager # Before starting the job, We are killing the job-manager - cd {{ analytics.home }}/scripts - source model-config.sh - job_config=$(config 'job-manager') - echo "Starting the job manager" >> "$SERVICE_LOGS/$today-job-manager.log" - echo "config: $job_config" >> "$SERVICE_LOGS/$today-job-manager.log" - nohup java $heap_conf_str -cp "$SPARK_HOME/jars/*:$MODELS_HOME/*:$MODELS_HOME/data-products-1.0/lib/*" -Dconfig.file=$MODELS_HOME/{{ env }}.conf org.ekstep.analytics.job.JobManager --config "$job_config" >> $SERVICE_LOGS/$today-job-manager.log 2>&1 & - - job_manager_pid=$(ps aux | grep 'JobManager' | awk '{print $2}') # Once Job is started just we are making whether job is running or not. - if [[ ! -z "$job_manager_pid" ]]; then - echo "Job manager is started." >> "$SERVICE_LOGS/$today-job-manager.log" - else - echo "Job manager is not started." >> "$SERVICE_LOGS/$today-job-manager.log" - fi -} -# Tasks -# Kill the job-manager -# Start the job-manager -# Make sure whether is running or not. -start_job_manager - diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/submit-all-jobs.rb.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/submit-all-jobs.rb.j2 deleted file mode 100644 index 53c032cd29..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/submit-all-jobs.rb.j2 +++ /dev/null @@ -1,58 +0,0 @@ -require "ruby-kafka" -require 'json' - -@log = File.open("{{ analytics.home }}/logs/logfile.log", 'a') -@kafka = Kafka.new(["{{ kafka_broker_host }}"]) -@topic = "{{ analytics_job_queue_topic }}" -@report_list_jobs_url = "{{ report_list_jobs_url }}" -@submit_jobs_auth_token = "{{ submit_jobs_auth_token }}" -@submit_jobs_command = "source /mount/venv/bin/activate && dataproducts submit_druid_jobs --report_list_jobs_url #{@report_list_jobs_url} --auth_token #{@submit_jobs_auth_token}" - -def log(message) - @log.write("#{Time.now.to_s}: #{message}\n") -end - -def submit_all_jobs - report_jobs = { - "assessment-dashboard-metrics" => "org.sunbird.analytics.job.report.AssessmentMetricsJobV2", - "course-dashboard-metrics" => "org.sunbird.analytics.job.report.CourseMetricsJobV2", - "course-enrollment-report" => "org.sunbird.analytics.job.report.CourseEnrollmentJob", - "course-consumption-report" => "org.sunbird.analytics.job.report.CourseConsumptionJob", - "etb-metrics" => "org.sunbird.analytics.job.report.ETBMetricsJob", - "admin-geo-reports" => "org.sunbird.analytics.job.report.StateAdminGeoReportJob", - "admin-user-reports" => "org.sunbird.analytics.job.report.StateAdminReportJob" - } - jobs = [{{ analytics_job_list }}] - - log("Starting to submit #{jobs.count} jobs for processing") - file = File.read("{{ analytics.home }}/scripts/model-config.json") - file = file.gsub("$(date --date yesterday '+%Y-%m-%d')", `date --date yesterday '+%Y-%m-%d'`.strip) - file = file.gsub("$(date '+%Y-%m-%d')", `date "+%Y-%m-%d"`.strip) - config_hash = JSON.parse(file) - log("Config file loaded") - jobs.each do |job| - if job == "monitor-job-summ" - log("python") - system('/bin/bash -l -c "'+ @submit_jobs_command +'"') - submit_job(job, config_hash[job]) - elsif report_jobs[job].nil? - submit_job(job, config_hash[job]) - else - submit_job(report_jobs[job], config_hash[job]) - end - - log("Submitted #{jobs.count} jobs for processing") - end -end - -def submit_job(job, config) - job_config = {model: job, config: config}.to_json - log("message: #{job_config}") - @kafka.deliver_message(job_config, topic: @topic) - log("Submitted #{job} for processing") -end - - - - -submit_all_jobs diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/submit-job.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/submit-job.j2 deleted file mode 100644 index 859cf602c3..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/submit-job.j2 +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash - -export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 -export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} -export DP_LOGS={{ analytics.home }}/logs/data-products -export KAFKA_HOME={{ analytics.soft_path }}/kafka_2.11-0.10.1.0 - -## job broker-list and kafka-topic -job_brokerList={{ brokerlist }} -job_topic={{ analytics_job_queue_topic }} - -## Job to run daily -cd {{ analytics.home }}/scripts -source model-config.sh -today=$(date "+%Y-%m-%d") - -if [ -z "$job_config" ]; then job_config=$(config $1); fi - -echo "Submitted $1 with config $job_config" >> "$DP_LOGS/$today-job-execution.log" -echo '{ "model" :' \"$1\" ',' ' "config": ' "$job_config" '}' >> "$DP_LOGS/$today-job-execution-debug.log" -echo '{ "model" :' \"$1\" ',' ' "config": ' "$job_config" '}' > /tmp/job-request.json -cat /tmp/job-request.json | $KAFKA_HOME/bin/kafka-console-producer.sh --broker-list $job_brokerList --topic $job_topic >> "$DP_LOGS/$today-job-execution.log" 2>&1 diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/submit-script.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/submit-script.j2 deleted file mode 100644 index edd03ff36b..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/submit-script.j2 +++ /dev/null @@ -1,216 +0,0 @@ -#!/usr/bin/env bash - -## Job to run daily - -cd "{{ analytics_cluster.home }}" -source model-config.sh -today=$(date "+%Y-%m-%d") - -while :; do - case $1 in - -j|--job) shift - job="$1" - ;; - -m|--mode) shift - mode="$1" - ;; - -p|--parallelisation) shift - parallelisation=$1 - ;; - -pa|--partitions) shift - partitions=$1 - ;; - -sd|--startDate) shift - start_date=$1 - ;; - -ed|--endDate) shift - end_date=$1 - ;; - -h|--sparkMaster) shift - sparkMaster=$1 - ;; - -sp|--selectedPartitions) shift - selected_partitions=$1 - ;; - *) break - esac - shift -done - -get_report_job_model_name(){ - case "$1" in - "assessment-dashboard-metrics") echo 'org.sunbird.analytics.job.report.AssessmentMetricsJobV2' - ;; - "course-dashboard-metrics") echo 'org.sunbird.analytics.job.report.CourseMetricsJobV2' - ;; - "userinfo-exhaust") echo 'org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob' - ;; - "response-exhaust") echo 'org.sunbird.analytics.exhaust.collection.ResponseExhaustJob' - ;; - "response-exhaust-v2") echo 'org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2' - ;; - "progress-exhaust") echo 'org.sunbird.analytics.exhaust.collection.ProgressExhaustJob' - ;; - "progress-exhaust-v2") echo 'org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2' - ;; - "cassandra-migration") echo 'org.sunbird.analytics.updater.CassandraMigratorJob' - ;; - "uci-private-exhaust") echo 'org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob' - ;; - "uci-response-exhaust") echo 'org.sunbird.analytics.exhaust.uci.UCIResponseExhaustJob' - ;; - *) echo $1 - ;; - esac -} - -submit_cluster_job() { - # add batch number to config - echo "Running for below batch number $i" - batchNumberString="\\\"modelParams\\\":{\\\"batchNumber\\\":$i," - job_config=$(config $job) - cluster_job_config=${job_config//'"'/'\"'} - finalConfig=${cluster_job_config/'\"modelParams\":{'/$batchNumberString} - echo $finalConfig - echo "Running $job as parallel jobs" - classVariable="org.ekstep.analytics.job.JobExecutor" - argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\"]" - argsStr="\"className\": \"org.ekstep.analytics.job.JobExecutor\", $argsList" - clusterConfig=`cat cluster-config.json` - requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} - finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} - echo $finalRequestBody -{% if dp_object_store_type == "azure" %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% elif (dp_object_store_type == "oci") %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% endif %} - echo "Submitted job for batchNumer $i below is the response" - echo $response -} - -job_id=$(get_report_job_model_name $job) - -if [ -z "$sparkMaster" ]; then sparkMaster="local[*]"; else sparkMaster="$sparkMaster"; fi - -if [ "$mode" = "via-partition" ]; then - endPartitions=`expr $partitions - 1` - if [ -z "$parallelisation" ]; then parallelisation=1; else parallelisation=$parallelisation; fi - # add partitions to config and start jobs - for i in $(seq 0 $parallelisation $endPartitions) - do - # add partitions to config - partitionString="\\\"delta\\\":0,\\\"partitions\\\":[$(seq -s , $i `expr $i + $parallelisation - 1`)]" - if [ -z "$start_date" ]; then - job_config=$(config $job) - cluster_job_config=${job_config//'"'/'\"'} - finalConfig=${cluster_job_config/'\"delta\":0'/$partitionString} - echo $finalConfig - echo "Running $job by partitions." - classVariable="org.ekstep.analytics.job.JobExecutor" - argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\"]" - else - job_config=$(config $job '__endDate__') - cluster_job_config=${job_config//'"'/'\"'} - finalConfig=${cluster_job_config/'\"delta\":0'/$partitionString} - echo $finalConfig - echo "Running $job by partitions via Replay-Supervisor." - classVariable="org.ekstep.analytics.job.ReplaySupervisor" - argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\", \"--fromDate\", \"$start_date\", \"--toDate\", \"$end_date\"]" - fi - argsStr="\"className\": \"org.ekstep.analytics.job.JobExecutor\", $argsList" - clusterConfig=`cat cluster-config.json` - requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} - finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} - echo $finalRequestBody -{% if dp_object_store_type == "azure" %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% elif (dp_object_store_type == "oci") %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% endif %} - done - -elif [ "$mode" = "parallel-jobs" ]; then - # add batch number to config and submit jobs - echo "inside parallel-jobs block" - echo $parallelisation - if [ $parallelisation -ge 1 ]; then - for i in $(seq 1 $parallelisation) - do - submit_cluster_job $i & - done - else echo "No requests found in table"; fi - -elif [ "$mode" = "selected-partition" ]; then - # add partitions to config - partitionString="\\\"delta\\\":0,\\\"partitions\\\":[$selected_partitions]" - if [ -z "$start_date" ]; then - job_config=$(config $job) - cluster_job_config=${job_config//'"'/'\"'} - finalConfig=${cluster_job_config/'\"delta\":0'/$partitionString} - echo $finalConfig - echo "Running $job by partitions." - classVariable="org.ekstep.analytics.job.JobExecutor" - argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\"]" - else - job_config=$(config $job '__endDate__') - cluster_job_config=${job_config//'"'/'\"'} - finalConfig=${cluster_job_config/'\"delta\":0'/$partitionString} - echo $finalConfig - echo "Running $job by partitions via Replay-Supervisor." - classVariable="org.ekstep.analytics.job.ReplaySupervisor" - argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\", \"--fromDate\", \"$start_date\", \"--toDate\", \"$end_date\"]" - fi - argsStr="\"className\": \"org.ekstep.analytics.job.JobExecutor\", $argsList" - clusterConfig=`cat cluster-config.json` - requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} - finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} - echo $finalRequestBody -{% if dp_object_store_type == "azure" %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% elif (dp_object_store_type == "oci") %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% endif %} -else - if [ -z "$start_date" ]; then - echo "Running $job without partition via run-job." - job_config=$(config $job) - cluster_job_config=${job_config//'"'/'\"'} - classVariable="org.ekstep.analytics.job.JobExecutor" - argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$cluster_job_config\"]" - else - job_config=$(config $job '__endDate__') - cluster_job_config=${job_config//'"'/'\"'} - echo "Running $job without partition via Replay-Supervisor." - classVariable="org.ekstep.analytics.job.ReplaySupervisor" - argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$cluster_job_config\", \"--fromDate\", \"$start_date\", \"--toDate\", \"$end_date\"]" - fi - argsStr="\"className\": \"org.ekstep.analytics.job.JobExecutor\", $argsList" - echo $argsStr - clusterConfig=`cat cluster-config.json` - requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} - finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} - echo $finalRequestBody -{% if dp_object_store_type == "azure" %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% elif (dp_object_store_type == "oci") %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% endif %} - -fi diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/update-job-requests.py.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/update-job-requests.py.j2 deleted file mode 100644 index cfd986b008..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/update-job-requests.py.j2 +++ /dev/null @@ -1,119 +0,0 @@ -from __future__ import division -import math -import psycopg2 -import sys -import pandas as pd -from IPython.display import display -from psycopg2 import sql, connect -import json - - -def updateExhaustRequests(db, table, update_list): - for r in update_list: - cursor = db.cursor() - batchNum = r['batch_number'] - requestId = r['request_id'] - insertQry = "UPDATE {0} SET batch_number = {1} WHERE request_id = '{2}'".format(table, batchNum, requestId) - n = cursor.execute(insertQry) - -def updateDruidRequests(db, table, update_list): - for r in update_list: - cursor = db.cursor() - batchNum = r['batch_number'] - reportId = r['report_id'] - insertQry = "UPDATE {0} SET batch_number = {1} WHERE report_id = '{2}'".format(table, batchNum, reportId) - n = cursor.execute(insertQry) - -def processRequests(totalRequestsDf, jobId, batchSize, db, table,jobType): - # Compute parallelism from batchSize & totalRequests - # update batch_number to table - - totalRequests = len(totalRequestsDf.index) - print("totalRequests {0}".format(totalRequests)) - - parallelism = int(math.ceil(totalRequests/batchSize)) - print("parallelism computed {0}".format(parallelism)) - - if totalRequests > 0: - if jobType == 'exhaust': - totalRequestsDf["row_num"] = totalRequestsDf.groupby(by=['job_id'])['request_id'].transform(lambda x: x.rank()) - else: - totalRequestsDf["row_num"] = totalRequestsDf['report_id'].transform(lambda x: x.rank()) - #display(totalRequestsDf) - - start_index = 1 - end_index = batchSize - for i in range(1, parallelism+1): - subSetDf = totalRequestsDf[(totalRequestsDf['row_num'] >= start_index) & (totalRequestsDf['row_num'] <= end_index)] - subSetDf["batch_number"] = i - print(start_index,end_index) - if jobType == 'exhaust': - updateExhaustRequests(db, table, json.loads(subSetDf.to_json(orient='records'))) - else: - updateDruidRequests(db, table, json.loads(subSetDf.to_json(orient='records'))) - start_index = 1 + end_index - end_index = end_index + batchSize - db.commit() - db.close() - return parallelism - else: - return 0 - -def postgresql_to_dataframe(db, select_query, column_names): - cursor = db.cursor() - try: - cursor.execute(select_query) - except (Exception, psycopg2.DatabaseError) as error: - print("Error: %s" % error) - return 1 - - tupples = cursor.fetchall() - - df = pd.DataFrame(tupples, columns=column_names) - #display(df) - return df - -def get_columns_names(db,table): - columns = [] - col_cursor = db.cursor() - col_names_str = "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS WHERE " - col_names_str += "table_name = '{}';".format( table ) - try: - sql_object = sql.SQL(col_names_str).format(sql.Identifier( table)) - col_cursor.execute( sql_object ) - col_names = (col_cursor.fetchall()) - for tup in col_names: - columns += [ tup[0] ] - col_cursor.close() - except Exception as err: - print ("get_columns_names ERROR:", err) - return columns - -def main(batchSize, jobId,jobType,table): - host="{{postgres.db_url}}" - port={{postgres.db_port}} - user="{{postgres.db_username}}" - password="{{postgres.db_password}}" - database="{{postgres.db_name}}" - url_connect = "jdbc:postgresql://{0}:{1}/{2}".format(host, port, database) - - db = psycopg2.connect(host=host, user=user, password=password, database=database, port=port) - - column_names = get_columns_names(db, table) - - if jobType == 'exhaust': - jobId = jobId.split("-v2")[0] if "-v2" in jobId else jobId - selectQuery = "select * from {0} where job_id = '{1}' and status IN ('SUBMITTED', 'FAILED') and iteration < 3;".format(table, jobId) - else: - selectQuery = "select * from {0} where status IN ('ACTIVE')".format(table) - df = postgresql_to_dataframe(db, selectQuery, column_names) - - parallelism = processRequests(df, jobId, batchSize, db, table,jobType) - return parallelism - -batchSize =int(sys.argv[2]) -jobId=sys.argv[1] -jobType = sys.argv[3] -table = sys.argv[4] -parallelism = main(batchSize, jobId,jobType,table) -print("returning parallelism value: {0}".format(parallelism)) From 5ebf3298b3f83bee83e1b1ec05ae2d8394a19687 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 15:32:47 +1000 Subject: [PATCH 084/161] updated spark env Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 4 ++-- .../data-products-deploy/templates/update-job-requests.py.j2 | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index ee6b40c1cb..d26896b348 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -434,10 +434,10 @@ shell: | if echo "{{jobs}}" | grep 'druid' then - python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} druid {{env}}_report_config + python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} druid {{spark_env}}_report_config elif echo "{{jobs}}" | grep 'exhaust' then - python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} exhaust {{env}}_job_request + python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} exhaust {{spark_env}}_job_request fi tags: - parallel-jobs-submit diff --git a/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 b/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 index cfd986b008..4085041965 100644 --- a/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 +++ b/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 @@ -94,7 +94,7 @@ def main(batchSize, jobId,jobType,table): port={{postgres.db_port}} user="{{postgres.db_username}}" password="{{postgres.db_password}}" - database="{{postgres.db_name}}" + database="{{postgres.spark_db_name}}" url_connect = "jdbc:postgresql://{0}:{1}/{2}".format(host, port, database) db = psycopg2.connect(host=host, user=user, password=password, database=database, port=port) From d584d8f0ff7d72e3b1884bf3ecae127707b1e510 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 15:37:24 +1000 Subject: [PATCH 085/161] python debug Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index d26896b348..57acbbe83d 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -430,6 +430,18 @@ tags: - parallel-jobs-submit +- name: Check python version + shell: python --version + register: python_version + tags: + - parallel-jobs-submit + +- name: Check python version + debug: | + msg: {{python_version.stdout}} + tags: + - parallel-jobs-submit + - name: Execute python script to populate batch numbers shell: | if echo "{{jobs}}" | grep 'druid' From 4c152bdebd6325f56d6724a08a9bc01e61248a21 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 16:22:36 +1000 Subject: [PATCH 086/161] updated model to include storege keys Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 12 ------------ .../data-products-deploy/templates/model-config.j2 | 2 +- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 57acbbe83d..d26896b348 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -430,18 +430,6 @@ tags: - parallel-jobs-submit -- name: Check python version - shell: python --version - register: python_version - tags: - - parallel-jobs-submit - -- name: Check python version - debug: | - msg: {{python_version.stdout}} - tags: - - parallel-jobs-submit - - name: Execute python script to populate batch numbers shell: | if echo "{{jobs}}" | grep 'druid' diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 86f376b65d..89ca4128c4 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}", "apiVersion":"v2", "parallelization":200},"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From 6ae6963cf1dc3eccc867a6a69a85a6703bd3a0f2 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 17:32:25 +1000 Subject: [PATCH 087/161] updated spark db name Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index e0ec7005df..9ee857590a 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -223,7 +223,7 @@ metric.kafka.broker="{{groups['processing-cluster-kafka']|join(':9092,')}}:9092" metric.kafka.topic="{{ env }}.prom.monitoring.metrics" //Postgres Config -postgres.db="{{postgres.db_name}}" +postgres.db="{{postgres.spark_db_name}}" postgres.url="jdbc:postgresql://{{postgres.db_url}}:{{postgres.db_port}}/" postgres.user="{{postgres.db_username}}" postgres.pass="{{postgres.db_password}}" From 47946eb56daddaf5a82fc0eb8381e0d2d7b780c7 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 11 Apr 2023 09:46:56 +1000 Subject: [PATCH 088/161] disabled verbose logging for ansible Signed-off-by: Deepak Devadathan --- pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel b/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel index dad65d4e73..480e880609 100644 --- a/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel +++ b/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel @@ -26,7 +26,7 @@ node() { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/spark-cluster-job-submit.yml" - ansibleExtraArgs = "--vault-password-file /var/lib/jenkins/secrets/vault-pass --extra-vars \"jobs=${params.jobs_to_submit} oci_install_loc=${params.oci_install_loc} batch_size=${params.batch_size}\" --tags config-update,${params.jobs_submit_type} -vvvv " + ansibleExtraArgs = "--vault-password-file /var/lib/jenkins/secrets/vault-pass --extra-vars \"jobs=${params.jobs_to_submit} oci_install_loc=${params.oci_install_loc} batch_size=${params.batch_size}\" --tags config-update,${params.jobs_submit_type}" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) From 341ef4fd11aab68e27fa31b18b02950d339b99bc Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 11 Apr 2023 18:09:09 +1000 Subject: [PATCH 089/161] updated model parameters for userinfo-exhaust Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 89ca4128c4..9719dae2a7 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -68,7 +68,7 @@ config() { echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"{{dp_object_store_type}}","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":"","storageContainer":"reports"},"parallelization":8,"appName":"UCI Response Exhaust"}' ;; "userinfo-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' ;; "program-collection-summary-report") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"], "keywords":"'$keyword'", "store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' From 11d111c825e501180bac2e44b66c1ad30ca2aa0c Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 11 Apr 2023 18:16:52 +1000 Subject: [PATCH 090/161] added storage key and secret for response-exhaust-v2 Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 9719dae2a7..4efbd29bc9 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -77,7 +77,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust"}' ;; "response-exhaust-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}", "apiVersion":"v2", "parallelization":200},"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' From dfde1795e4d5bedffd1fbddc0a1f39e881c475e1 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 13 Apr 2023 16:39:38 +1000 Subject: [PATCH 091/161] added bucket as a parameter Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.bds | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds index 3deb57cc6a..e2903ca267 100644 --- a/pipelines/provision/spark/Jenkinsfile.bds +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -20,7 +20,7 @@ node('build-slave') { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" - ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} key_alias=${params.key_alias} user_id=${params.user_id} bucket=${params.bucket} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) From 4a4de7e9569a57d42895a07a253e281ab2a527dd Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 13 Apr 2023 17:33:22 +1000 Subject: [PATCH 092/161] added the bucket name to the sprk provisioning script Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.bds | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds index e2903ca267..1b754d1678 100644 --- a/pipelines/provision/spark/Jenkinsfile.bds +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -33,6 +33,7 @@ node('build-slave') { } stage('create and provision spark OCI BDS') { oci_namespace=params.oci_namespace + bucket=params.bucket withCredentials([usernamePassword(credentialsId: 'oci-bds-credential', passwordVariable: 'cluster_password', usernameVariable: 'ambari_user')]) { sh ''' currentws=$(pwd) @@ -40,7 +41,7 @@ node('build-slave') { cd /tmp ./create-cluster.sh $ambari_user $cluster_password export ANSIBLE_HOST_KEY_CHECKING=False - ansible-playbook -i $currentws/ansible/inventory/env $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass + ansible-playbook -i $currentws/ansible/inventory/env $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace bucket=$bucket" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass ''' } From 640ff44deab11987652d2a81d0691154101d6771 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 13 Apr 2023 23:22:05 +1000 Subject: [PATCH 093/161] for testing added batchmodel in jars list Signed-off-by: Deepak Devadathan --- .../data-products-deploy/templates/cluster-config.json.j2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 12ebf0bde0..2f5ee03ca8 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -60,7 +60,8 @@ "jars": [ "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_core_artifact }}", "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ scruid_artifact }}", - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}" ], "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", "files": [ From 36d96d5be138aa27978db43867b2ccf95e24c705 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 14 Apr 2023 20:49:42 +1000 Subject: [PATCH 094/161] testing change Signed-off-by: Deepak Devadathan --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 2f5ee03ca8..d8dc0eb033 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -80,6 +80,8 @@ "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", + "spark.executor.userClassPathFirst: true", + "spark.driver.userClassPathFirst: true", "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } From 2fa3df190c4c03b2ce0561991f6c64ac4c91f946 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 14 Apr 2023 20:57:25 +1000 Subject: [PATCH 095/161] corrected json Signed-off-by: Deepak Devadathan --- .../data-products-deploy/templates/cluster-config.json.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index d8dc0eb033..ea641acdc5 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -80,8 +80,8 @@ "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.executor.userClassPathFirst: true", - "spark.driver.userClassPathFirst: true", + "spark.executor.userClassPathFirst": "true", + "spark.driver.userClassPathFirst": "true", "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } From 444108df96b83e63ecef75a08ab3b17c75b6e0ee Mon Sep 17 00:00:00 2001 From: subhash_chandra_budde Date: Fri, 14 Apr 2023 22:36:31 +0530 Subject: [PATCH 096/161] removed the classpath params --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 -- 1 file changed, 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index ea641acdc5..2f5ee03ca8 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -80,8 +80,6 @@ "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.executor.userClassPathFirst": "true", - "spark.driver.userClassPathFirst": "true", "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } From d53cdbd8a4b39a6af7b95c141d7a12c1a17aee6f Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sun, 16 Apr 2023 01:52:15 +1000 Subject: [PATCH 097/161] added model params storageContainer for progress exhaust Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 4efbd29bc9..9690ac0330 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}", "apiVersion":"v2", "parallelization":200},"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200},"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From de04dd91ff8c8b954aca8052d8c64e4e10e9f9ce Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sun, 16 Apr 2023 03:08:04 +1000 Subject: [PATCH 098/161] added store as s3 for testing Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 9690ac0330..f7edaf85c1 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200},"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200},"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From 3be25300f439187f92f0c7d41fdfe9f3870db473 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sun, 16 Apr 2023 03:23:10 +1000 Subject: [PATCH 099/161] corrected json syntax for progress exhaust Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index f7edaf85c1..fc42846675 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200},"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From 4325d723422dc066028943feec987b7c4a58479a Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sun, 16 Apr 2023 04:21:23 +1000 Subject: [PATCH 100/161] removed store from model params for progress exhust Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index fc42846675..77b50d6762 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From 3df3cbc399ff994dd65ac442b4ad349c9b3c3394 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sun, 16 Apr 2023 23:27:05 +1000 Subject: [PATCH 101/161] added store as s3 for progress exhaust Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 77b50d6762..fc42846675 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From 70fc149966616eb5ffdc493fb38d43219fbb0f69 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 17 Apr 2023 00:07:07 +1000 Subject: [PATCH 102/161] added store key and secret placeholders Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index fc42846675..41e6dc7313 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"reports.storage.key.config", "storageSecretConfig":"reports.storage.secret.config","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From afec78c11f13967921c4cbdb82f33dcd6a052bfc Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 17 Apr 2023 06:45:35 +1000 Subject: [PATCH 103/161] updated model-config for endpoint addition Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 41e6dc7313..8c30e4cb75 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"reports.storage.key.config", "storageSecretConfig":"reports.storage.secret.config","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"reports.storage.key.config", "storageSecretConfig":"reports.storage.secret.config","storageContainer":"{{secor_bucket}}","storageEndpoint":"cloud_storage_endpoint_with_protocol", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From 9367598d23f8ac17b0ff50b9a6ffb8789191f9d9 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 17 Apr 2023 06:51:12 +1000 Subject: [PATCH 104/161] variablized store type Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 8c30e4cb75..e6bf1bca56 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"reports.storage.key.config", "storageSecretConfig":"reports.storage.secret.config","storageContainer":"{{secor_bucket}}","storageEndpoint":"cloud_storage_endpoint_with_protocol", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"reports.storage.key.config", "storageSecretConfig":"reports.storage.secret.config","storageContainer":"{{secor_bucket}}","storageEndpoint":"cloud_storage_endpoint_with_protocol", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From 7f717dbf39bc6c6f6de68118b0c7b8208679773d Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 17 Apr 2023 07:42:10 +1000 Subject: [PATCH 105/161] updated model config for progress exhaust Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index e6bf1bca56..bec9c69a42 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"reports.storage.key.config", "storageSecretConfig":"reports.storage.secret.config","storageContainer":"{{secor_bucket}}","storageEndpoint":"cloud_storage_endpoint_with_protocol", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}","storageEndpoint":"{{dp_storage_endpoint_config}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From 71531b4f57cb2ec44ad61a1dfcfb5a3ac8c59c31 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 17 Apr 2023 12:31:26 +1000 Subject: [PATCH 106/161] changed the report container Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index bec9c69a42..34e5b01aa3 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}","storageEndpoint":"{{dp_storage_endpoint_config}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From f63f5b9e267210067c688f00a624de0bd15ebf5b Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 18 Apr 2023 14:41:47 +1000 Subject: [PATCH 107/161] updated progress-exhaust config Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 34e5b01aa3..4257d79cb9 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config", "storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From eb653b271324eaabc6f036858aa3760f67a8f033 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 18 Apr 2023 18:43:33 +1000 Subject: [PATCH 108/161] updated model config for userinfo Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 4257d79cb9..baccd205bb 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -68,7 +68,7 @@ config() { echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"{{dp_object_store_type}}","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":"","storageContainer":"reports"},"parallelization":8,"appName":"UCI Response Exhaust"}' ;; "userinfo-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config", "storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' ;; "program-collection-summary-report") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"], "keywords":"'$keyword'", "store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' From 8ffb87f2e66f4328c37b855e68d388e3ac4aca1a Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 18 Apr 2023 22:16:43 +1000 Subject: [PATCH 109/161] added jets3t properties Signed-off-by: Deepak Devadathan --- .../data-products-deploy/templates/cluster-config.json.j2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 2f5ee03ca8..bdefbd27a9 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -65,7 +65,8 @@ ], "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", "files": [ - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf" + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf", + "/usr/odh/2.0.1/spark/conf/jets3t.properties" ], "className": "org.ekstep.analytics.job.JobExecutor", "executorCores": {{ spark_cluster.executor_core }}, From 46f5d8ab424c56cce86f2884db7abc9c1d4ebe64 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 18 Apr 2023 22:24:43 +1000 Subject: [PATCH 110/161] updated the list of files Signed-off-by: Deepak Devadathan --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index bdefbd27a9..1af3854ec2 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -66,7 +66,7 @@ "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", "files": [ "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf", - "/usr/odh/2.0.1/spark/conf/jets3t.properties" + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/jets3t.properties" ], "className": "org.ekstep.analytics.job.JobExecutor", "executorCores": {{ spark_cluster.executor_core }}, From 729f6fa366f3d029a302526439962a0cda38ec29 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 18 Apr 2023 22:28:38 +1000 Subject: [PATCH 111/161] added local jets3t files Signed-off-by: Deepak Devadathan --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 1af3854ec2..3b9910f6cf 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -66,7 +66,7 @@ "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", "files": [ "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf", - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/jets3t.properties" + "local:///usr/odh/2.0.1/spark/conf/jets3t.properties" ], "className": "org.ekstep.analytics.job.JobExecutor", "executorCores": {{ spark_cluster.executor_core }}, From 7cb0be4df161c566bc84d5dbaab263042d6d3768 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 18 Apr 2023 22:33:43 +1000 Subject: [PATCH 112/161] added files for jets3t Signed-off-by: Deepak Devadathan --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 3b9910f6cf..1af3854ec2 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -66,7 +66,7 @@ "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", "files": [ "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf", - "local:///usr/odh/2.0.1/spark/conf/jets3t.properties" + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/jets3t.properties" ], "className": "org.ekstep.analytics.job.JobExecutor", "executorCores": {{ spark_cluster.executor_core }}, From 988b7bb5dd4d0f53b222ec43789d2155ccead330 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 19 Apr 2023 09:59:24 +1000 Subject: [PATCH 113/161] updated model config for userinfo-exhaust Signed-off-by: Deepak Devadathan --- .../data-products-deploy/templates/cluster-config.json.j2 | 4 ++-- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 1af3854ec2..49acc70781 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -81,8 +81,8 @@ "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}, -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}} -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }}" } } {% endif %} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index baccd205bb..bb08633a47 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -68,7 +68,7 @@ config() { echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"{{dp_object_store_type}}","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":"","storageContainer":"reports"},"parallelization":8,"appName":"UCI Response Exhaust"}' ;; "userinfo-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config", "storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageContainer":"{{reports_container}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' ;; "program-collection-summary-report") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"], "keywords":"'$keyword'", "store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' From b84e7eaa1eed9c8715b03af01040602c11bd1edd Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 19 Apr 2023 12:13:47 +1000 Subject: [PATCH 114/161] updated for flexible sizing Signed-off-by: Deepak Devadathan --- .../oci-bds-spark-cluster/templates/create-cluster.sh.j2 | 6 +++--- pipelines/provision/spark/Jenkinsfile.bds | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 index 2e880a92d2..fea3bd313e 100644 --- a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 +++ b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 @@ -246,17 +246,17 @@ function create_cluster() { for i in `seq 1 $master` do - json="$json{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"MASTER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 32, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + json="$json{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"MASTER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": "{{head_node_memory}}", \"ocpus\": "{{head_node_cpu}}"},\"subnetId\": \"$subnet\" }" done for i in `seq 1 $utility` do - json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"UTILITY\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 32, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"UTILITY\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": "{{head_node_memory}}", \"ocpus\": "{{head_node_cpu}}"},\"subnetId\": \"$subnet\" }" done for i in `seq 1 $worker` do - json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"WORKER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 16, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"WORKER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": "{{worker_node_memory}}", \"ocpus\": "{{worker_node_cpu}}"},\"subnetId\": \"$subnet\" }" done json="$json]" diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds index 1b754d1678..418d5ded0a 100644 --- a/pipelines/provision/spark/Jenkinsfile.bds +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -20,7 +20,7 @@ node('build-slave') { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" - ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} key_alias=${params.key_alias} user_id=${params.user_id} bucket=${params.bucket} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} head_node_cpu=${params.head_node_cpu} worker_node_cpu=${params.worker_node_cpu} head_node_memory=${params.head_node_memory} worker_node_memory=${params.worker_node_memory} key_alias=${params.key_alias} user_id=${params.user_id} bucket=${params.bucket} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) From 054e5c426e4e8eec64334e7c9ab4b81946dbc73c Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 19 Apr 2023 12:36:42 +1000 Subject: [PATCH 115/161] added jets3t properties for spark cluster run Signed-off-by: Deepak Devadathan --- .../data-products-deploy/defaults/main.yml | 10 +++++++++- .../roles/data-products-deploy/tasks/main.yml | 18 ++++++++++++++++++ .../data-products-deploy/templates/jets3t.j2 | 8 ++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 ansible/roles/data-products-deploy/templates/jets3t.j2 diff --git a/ansible/roles/data-products-deploy/defaults/main.yml b/ansible/roles/data-products-deploy/defaults/main.yml index 38ddd70420..fec9fc0816 100755 --- a/ansible/roles/data-products-deploy/defaults/main.yml +++ b/ansible/roles/data-products-deploy/defaults/main.yml @@ -280,4 +280,12 @@ assessment_metric_primary_category: "{{ exhaust_job_assessment_primary_category # Default s3 variables sunbird_private_s3_storage_key: "" -sunbird_private_s3_storage_secret: "" \ No newline at end of file +sunbird_private_s3_storage_secret: "" + + +# jets3t s3 config, allows us to configure for s3-like object stores +jets3t_s3_request_signature_version: "{{ s3_request_signature_version }}" +jets3t_s3_endpoint_host: "{% if s3_storage_endpoint %}{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}{% endif %}" +jets3t_s3_disable_dns_buckets: "{{ s3_path_style_access }}" +jets3t_s3_https_only: "{{ s3_https_only }}" +jets3t_s3_default_bucket_location: "{{ s3_default_bucket_location }}" \ No newline at end of file diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index d26896b348..1db6a6cc3b 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -393,6 +393,24 @@ - run-job - config-update +- name: Copy jets3t.properties file + template: src=jets3t.j2 dest={{ analytics_cluster.home }}/jets3t.properties + delegate_to: localhost + tags: + - replay-job + - run-job + - config-update + +- name: Copy JetS3t.properties to oci oss + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/jets3t.properties --file {{ analytics_cluster.home }}/jets3t.properties --content-type auto --force" + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" + tags: + - replay-job + - run-job + - config-update + - name: Replay Job shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ job_id }} --mode {{ mode }} --partitions {{ partitions }} --parallelisation {{ parallelisation }} --startDate {{ start_date }} --endDate {{ end_date }} --sparkMaster {{ sparkMaster }} --selectedPartitions {{ selected_partitions }} &" async: "{{ (pause_min * 60) }}" diff --git a/ansible/roles/data-products-deploy/templates/jets3t.j2 b/ansible/roles/data-products-deploy/templates/jets3t.j2 new file mode 100644 index 0000000000..4de8480bc2 --- /dev/null +++ b/ansible/roles/data-products-deploy/templates/jets3t.j2 @@ -0,0 +1,8 @@ +storage-service.request-signature-version={{ jets3t_s3_request_signature_version }} +s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %} +s3service.disable-dns-buckets={{ jets3t_s3_disable_dns_buckets }} +s3service.https-only={{ jets3t_s3_https_only }} +{% if jets3t_s3_default_bucket_location %} +s3service.default-bucket-location={{ jets3t_s3_default_bucket_location }} +{% endif %} +uploads.stream-retry-buffer-size=2147483646 From 37ed4118c97784001a09407ed49fd0da4b65d1c8 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 19 Apr 2023 12:46:25 +1000 Subject: [PATCH 116/161] corrected jinja syntax Signed-off-by: Deepak Devadathan --- .../oci-bds-spark-cluster/templates/create-cluster.sh.j2 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 index fea3bd313e..ce84928ce1 100644 --- a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 +++ b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 @@ -246,17 +246,17 @@ function create_cluster() { for i in `seq 1 $master` do - json="$json{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"MASTER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": "{{head_node_memory}}", \"ocpus\": "{{head_node_cpu}}"},\"subnetId\": \"$subnet\" }" + json="$json{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"MASTER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": {{head_node_memory}}, \"ocpus\": {{head_node_cpu}}},\"subnetId\": \"$subnet\" }" done for i in `seq 1 $utility` do - json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"UTILITY\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": "{{head_node_memory}}", \"ocpus\": "{{head_node_cpu}}"},\"subnetId\": \"$subnet\" }" + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"UTILITY\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": {{head_node_memory}}, \"ocpus\": {{head_node_cpu}}},\"subnetId\": \"$subnet\" }" done for i in `seq 1 $worker` do - json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"WORKER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": "{{worker_node_memory}}", \"ocpus\": "{{worker_node_cpu}}"},\"subnetId\": \"$subnet\" }" + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"WORKER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": {{worker_node_memory}}, \"ocpus\": {{worker_node_cpu}}},\"subnetId\": \"$subnet\" }" done json="$json]" From 021c239d3df45651444bf322cc5fb8c67222454e Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 19 Apr 2023 13:26:06 +1000 Subject: [PATCH 117/161] added download jets3t properties Signed-off-by: Deepak Devadathan --- ansible/roles/provision-oci-spark-cluster/tasks/main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ansible/roles/provision-oci-spark-cluster/tasks/main.yml b/ansible/roles/provision-oci-spark-cluster/tasks/main.yml index 9df37915bd..9f766b2d65 100644 --- a/ansible/roles/provision-oci-spark-cluster/tasks/main.yml +++ b/ansible/roles/provision-oci-spark-cluster/tasks/main.yml @@ -73,6 +73,8 @@ - name: Download config to livy command: hdfs dfs -get -f oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf {{ spark_folder }}/conf/application.conf +- name: Download jets3t config to livy + command: hdfs dfs -get -f oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/jets3t.properties {{ spark_folder }}/conf/jets3t.properties - name: Update log4j.properties From e8230839264cdd6522f5ee12c6d1e56076a6c70f Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 19 Apr 2023 13:46:45 +1000 Subject: [PATCH 118/161] added additional jar download Signed-off-by: Deepak Devadathan --- .../roles/provision-oci-spark-cluster/defaults/main.yml | 9 +++++++-- ansible/roles/provision-oci-spark-cluster/tasks/main.yml | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/ansible/roles/provision-oci-spark-cluster/defaults/main.yml b/ansible/roles/provision-oci-spark-cluster/defaults/main.yml index 11e3e6357b..777554f8ba 100644 --- a/ansible/roles/provision-oci-spark-cluster/defaults/main.yml +++ b/ansible/roles/provision-oci-spark-cluster/defaults/main.yml @@ -19,9 +19,11 @@ jedis_version: 3.2.0 zip4j_version: 2.6.2 guice_version: 3.0 -jets3t_version: 0.9.4 +jets3t_version: 0.9.7 hadoop_aws_version: 2.7.3 java_xmlbuilder_version: 1.1 +cassandra_connector_version: 3.2.0 +commons_pool_version: 2.0 guava_url: https://repo1.maven.org/maven2/com/google/guava/guava/{{guava_version}}/guava-{{guava_version}}.jar guava_jre_url: https://repo1.maven.org/maven2/com/google/guava/guava/{{guava_jre_version}}/guava-{{guava_jre_version}}.jar @@ -33,7 +35,10 @@ zip4j_url: https://repo1.maven.org/maven2/net/lingala/zip4j/zip4j/{{zip4j_versio guice_url: https://repo1.maven.org/maven2/com/google/inject/guice/{{guice_version}}/guice-{{guice_version}}.jar guice_servlet_url: https://repo1.maven.org/maven2/com/google/inject/extensions/guice-servlet/{{guice_version}}/guice-servlet-{{guice_version}}.jar -jets3t_url: https://repo1.maven.org/maven2/net/java/dev/jets3t/jets3t/{{jets3t_version}}/jets3t-{{jets3t_version}}.jar +# jets3t_url: https://repo1.maven.org/maven2/net/java/dev/jets3t/jets3t/{{jets3t_version}}/jets3t-{{jets3t_version}}.jar +jets3t_url: https://repo1.maven.org/maven2/org/jets3t/jets3t/{{jets3t_version}}/jets3t-{{jets3t_version}}.jar hadoop_aws_url: https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/{{hadoop_aws_version}}/hadoop-aws-{{hadoop_aws_version}}.jar java_xmlbuilder_url: https://repo1.maven.org/maven2/com/jamesmurty/utils/java-xmlbuilder/{{java_xmlbuilder_version}}/java-xmlbuilder-{{java_xmlbuilder_version}}.jar +common_pool_url: "https://repo1.maven.org/maven2/org/apache/commons/commons-pool2/{{commons_pool_version}}/commons-pool2-{{commons_pool_version}}.jar" +spark_cassandra_connector_assembly_url: "https://repo1.maven.org/maven2/com/datastax/spark/spark-cassandra-connector-assembly_2.12/{{cassandra_connector_version}}/spark-cassandra-connector-assembly_2.12-{{cassandra_connector_version}}.jar" diff --git a/ansible/roles/provision-oci-spark-cluster/tasks/main.yml b/ansible/roles/provision-oci-spark-cluster/tasks/main.yml index 9f766b2d65..4c2302d168 100644 --- a/ansible/roles/provision-oci-spark-cluster/tasks/main.yml +++ b/ansible/roles/provision-oci-spark-cluster/tasks/main.yml @@ -69,6 +69,14 @@ become: yes get_url: url={{ java_xmlbuilder_url }} dest={{ spark_folder }}/jars/java-xmlbuilder-{{java_xmlbuilder_version}}.jar timeout=1000 force=no +- name: Download spark_cassandra_connector and copy to Spark jars folder + become: yes + get_url: url={{ spark_cassandra_connector_assembly_url }} dest={{ spark_folder }}/jars/spark-cassandra-connector-assembly_2.12-{{cassandra_connector_version}}.jar timeout=1000 force=no + +- name: Download common_pool_url and copy to Spark jars folder + become: yes + get_url: url={{ common_pool_url }} dest={{ spark_folder }}/jars/commons-pool2-{{commons_pool_version}}.jar timeout=1000 force=no + - name: Download config to livy command: hdfs dfs -get -f oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf {{ spark_folder }}/conf/application.conf From 21c13278749984bffffa8991b8dbd34322e697a5 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 19 Apr 2023 16:21:33 +1000 Subject: [PATCH 119/161] corrected jinja Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/jets3t.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/jets3t.j2 b/ansible/roles/data-products-deploy/templates/jets3t.j2 index 4de8480bc2..5f97950498 100644 --- a/ansible/roles/data-products-deploy/templates/jets3t.j2 +++ b/ansible/roles/data-products-deploy/templates/jets3t.j2 @@ -1,8 +1,8 @@ storage-service.request-signature-version={{ jets3t_s3_request_signature_version }} -s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %} s3service.disable-dns-buckets={{ jets3t_s3_disable_dns_buckets }} s3service.https-only={{ jets3t_s3_https_only }} {% if jets3t_s3_default_bucket_location %} s3service.default-bucket-location={{ jets3t_s3_default_bucket_location }} {% endif %} uploads.stream-retry-buffer-size=2147483646 +s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %} From 223af2da8d43214c167a294aef38ef947270d403 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 19 Apr 2023 23:47:13 +1000 Subject: [PATCH 120/161] s3service.https-only to true for jets3t Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/jets3t.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/jets3t.j2 b/ansible/roles/data-products-deploy/templates/jets3t.j2 index 5f97950498..1ca346578c 100644 --- a/ansible/roles/data-products-deploy/templates/jets3t.j2 +++ b/ansible/roles/data-products-deploy/templates/jets3t.j2 @@ -1,6 +1,6 @@ storage-service.request-signature-version={{ jets3t_s3_request_signature_version }} s3service.disable-dns-buckets={{ jets3t_s3_disable_dns_buckets }} -s3service.https-only={{ jets3t_s3_https_only }} +s3service.https-only=true {% if jets3t_s3_default_bucket_location %} s3service.default-bucket-location={{ jets3t_s3_default_bucket_location }} {% endif %} From 1d19f60a6d1049796acbdc1f6c1b668971ca7ef5 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 25 Apr 2023 10:01:02 +1000 Subject: [PATCH 121/161] updated report bucket name Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index 9ee857590a..8b2f19dad9 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -117,7 +117,7 @@ azure { } ## Reports - Global config -cloud.container.reports="reports" +cloud.container.reports="{{cloud_storage_privatereports_bucketname}}" # course metrics container in azure course.metrics.cassandra.sunbirdKeyspace="sunbird" From 8f99438850dac173b8de5284383fa0934d5c7052 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 25 Apr 2023 10:08:22 +1000 Subject: [PATCH 122/161] updated report verifiction bucket Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index 8b2f19dad9..c9a3ba285e 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -263,7 +263,7 @@ druid.report.default.storage="s3" {% endif %} druid.report.date.format="yyyy-MM-dd" -druid.report.default.container="report-verification" +druid.report.default.container="{{cloud_storage_report_verfication_bucketname}}" ## Collection Exhaust Jobs Configuration -- Start ## From 4399d57fbba239dc68c30459c8c1fec4226e4715 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 25 Apr 2023 13:20:58 +1000 Subject: [PATCH 123/161] updated jets3t prop Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/jets3t.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/jets3t.j2 b/ansible/roles/data-products-deploy/templates/jets3t.j2 index 1ca346578c..09538d18a1 100644 --- a/ansible/roles/data-products-deploy/templates/jets3t.j2 +++ b/ansible/roles/data-products-deploy/templates/jets3t.j2 @@ -4,5 +4,5 @@ s3service.https-only=true {% if jets3t_s3_default_bucket_location %} s3service.default-bucket-location={{ jets3t_s3_default_bucket_location }} {% endif %} -uploads.stream-retry-buffer-size=2147483646 +uploads.stream-retry-buffer-size=268435456 s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %} From fe2d983d3ea1e54ed673535462d70cc8bebf4e56 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 25 Apr 2023 13:46:37 +1000 Subject: [PATCH 124/161] updated upload.stream buffer value Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/jets3t.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/jets3t.j2 b/ansible/roles/data-products-deploy/templates/jets3t.j2 index 09538d18a1..661c9c576c 100644 --- a/ansible/roles/data-products-deploy/templates/jets3t.j2 +++ b/ansible/roles/data-products-deploy/templates/jets3t.j2 @@ -4,5 +4,5 @@ s3service.https-only=true {% if jets3t_s3_default_bucket_location %} s3service.default-bucket-location={{ jets3t_s3_default_bucket_location }} {% endif %} -uploads.stream-retry-buffer-size=268435456 +uploads.stream-retry-buffer-size=131072 s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %} From a0c91349ff84546f2d1aaeb154d34b54654d9454 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 25 Apr 2023 15:12:50 +1000 Subject: [PATCH 125/161] testing with added parameter to reset mark error Signed-off-by: Deepak Devadathan --- .../data-products-deploy/templates/cluster-config.json.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 49acc70781..0c3ba9b886 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -81,8 +81,8 @@ "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}, -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }}", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}} -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }}" + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}, -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }} -Dcom.amazonaws.sdk.s3.defaultStreamBufferSize=512m", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}} -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }} -Dcom.amazonaws.sdk.s3.defaultStreamBufferSize=512m" } } {% endif %} \ No newline at end of file From 5c708811158a0fb8e5cb7947b8eb2f3bd5794d92 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 25 Apr 2023 15:41:51 +1000 Subject: [PATCH 126/161] changed buffer size Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/jets3t.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/jets3t.j2 b/ansible/roles/data-products-deploy/templates/jets3t.j2 index 661c9c576c..1ca346578c 100644 --- a/ansible/roles/data-products-deploy/templates/jets3t.j2 +++ b/ansible/roles/data-products-deploy/templates/jets3t.j2 @@ -4,5 +4,5 @@ s3service.https-only=true {% if jets3t_s3_default_bucket_location %} s3service.default-bucket-location={{ jets3t_s3_default_bucket_location }} {% endif %} -uploads.stream-retry-buffer-size=131072 +uploads.stream-retry-buffer-size=2147483646 s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %} From 057b7d7f536223a422874a04efd5bf12212f7281 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 26 Apr 2023 14:12:20 +1000 Subject: [PATCH 127/161] updated model-config for druid-dataset Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index bb08633a47..e13f465b93 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -141,7 +141,7 @@ config() { echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","primaryCategory":"Collection Category","identifier": "Collection ID","name": "Collection Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Folder Name","totalChapters": "Total number of first level folders","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "in","dimension": "primaryCategory","values": ["Digital Textbook", "Course", "Content Playlist","Question paper","Question Paper"]}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "{{ dp_object_store_type }}","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' ;; "druid-dataset") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.OnDemandDruidExhaustJob","modelParams":{"store":"{{ dp_object_store_type }}","container":"'$reportPostContainer'","key":"ml_reports/","format":"csv"},"output":[{"to": "console","params": {"printEvent": false}}],"parallelization":8,"appName":"ML Druid Data Model"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.OnDemandDruidExhaustJob","modelParams":{"store":"{{ dp_object_store_type }}","container":"'$reportPostContainer'","key":"ml_reports/","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","format":"csv"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"ML Druid Data Model"}' ;; "*") echo "Unknown model code" From fe245eada2ccf5d9218e3bac96d9fdff6bf8e24d Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 26 Apr 2023 16:01:26 +1000 Subject: [PATCH 128/161] updated the model-config params Signed-off-by: Deepak Devadathan --- .../templates/model-config.j2 | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index e13f465b93..95a2bf5515 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -33,21 +33,21 @@ config() { echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"model":"org.sunbird.analytics.model.report.AssessmentCorrectionModel","modelParams":{"parallelization":200,"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","granularity":"all","aggregations":[{"name":"count","type":"count","fieldName":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"}],"filters":[{"type":"equals","dimension":"contentType","value":"SelfAssess"}],"descending":"false"},"fileOutputConfig":{"to":"file","params":{"file":"{{ analytics.home }}/assessment-correction/skippedEvents"}},"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"kafka","params":{"brokerList":"'$brokerIngestionList'","topic":"'$assessTopic'"}}],"parallelization":200,"appName":"Assessment Correction Model"}' ;; "assessment-archival") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"store":"azure","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"store":"{{ dp_object_store_type }}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}' ;; "assessment-archived-removal") {% if dp_object_store_type == "azure" %} echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"azure","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' ;; {% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"{{dp_object_store_type}}","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"{{dp_object_store_type}}","blobExt":"csv.gz","reportPath":"archived-data/","container":"{{reports_container}}"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' ;; {% endif %} "collection-reconciliation-job") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CollectionReconciliationJob","modelParams":{"mode":"prodrun","brokerList":"{{ingestion_kafka_broker_host}}","topic":"{{env}}.issue.certificate.request","sparkCassandraConnectionHost":"{{ core_cassandra_host }}"},"parallelization":30,"appName":"CollectionReconciliationJob"}' ;; "collection-summary-report") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"], "contentType": "Course"},"fields":["identifier","name","organisation","channel"],"limit":10000}},"store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course"},"fields":["identifier","name","organisation","channel"],"limit":10000}},"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' ;; "score-metric-migration-job") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.ScoreMetricMigrationJob","modelParams":{"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Score Metric Migration Job"}' @@ -56,34 +56,34 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.AssessmentScoreCorrectionJob","modelParams":{"assessment.score.correction.batches":"","cassandraReadConsistency":"QUORUM","cassandraWriteConsistency":"QUORUM","csvPath":"/mount/data/analytics/score_correction","isDryRunMode":true,"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":30,"appName":"Assessment Score Correction Job"}' ;; "course-batch-status-updater") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob","modelParams":{"store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"http://{{ single_node_es_host }}:9200","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","kpLearningBasePath":"http://{{groups['learning'][0]}}:8080/learning-service","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Course Batch Status Updater Job"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","sparkElasticsearchConnectionHost":"http://{{ single_node_es_host }}:9200","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","kpLearningBasePath":"http://{{groups['learning'][0]}}:8080/learning-service","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Course Batch Status Updater Job"}' ;; "collection-summary-report-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJobV2","modelParams":{"storageKeyConfig":"druid_storage_account_key","storageSecretConfig":"druid_storage_account_secret","batchSize":50,"generateForAllBatches":true,"contentFields":["identifier","name","organisation","channel","status","keywords","createdFor","medium","subject"],"contentStatus":["Live","Unlisted","Retired"],"store":"{{dp_object_store_type}}","specPath":"/mount/data/analytics/scripts/collection-summary-ingestion-spec.json","druidIngestionUrl":"'$druidIngestionURL'","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report V2"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJobV2","modelParams":{"storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","batchSize":50,"generateForAllBatches":true,"contentFields":["identifier","name","organisation","channel","status","keywords","createdFor","medium","subject"],"contentStatus":["Live","Unlisted","Retired"],"store":"{{dp_object_store_type}}","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","specPath":"/mount/data/analytics/scripts/collection-summary-ingestion-spec.json","druidIngestionUrl":"'$druidIngestionURL'","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report V2"}' ;; "uci-private-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","storageContainer":"reports","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UCI Private Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","mode":"OnDemand","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UCI Private Exhaust"}' ;; "uci-response-exhaust") - echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"{{dp_object_store_type}}","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":"","storageContainer":"reports"},"parallelization":8,"appName":"UCI Response Exhaust"}' + echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":""},"parallelization":8,"appName":"UCI Response Exhaust"}' ;; "userinfo-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageContainer":"{{reports_container}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{},"sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' ;; "program-collection-summary-report") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"], "keywords":"'$keyword'", "store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"],"keywords":"'$keyword'","store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' ;; "response-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{},"sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust"}' ;; "response-exhaust-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{},"sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config", "storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","batchFilters":["TPD"],"searchFilter":{},"sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' ;; "druid_reports") echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.DruidQueryProcessingModel","modelParams":{"mode":"batch"},"parallelization":8,"appName":"Druid Reports"}' @@ -98,7 +98,7 @@ config() { echo '{"jobsCount":'$jobManagerJobsCount',"topic":"'$job_topic'","bootStrapServer":"'$brokerList'","zookeeperConnect":"'$zookeeper'","consumerGroup":"jobmanager","slackChannel":"#test_channel","slackUserName":"JobManager","tempBucket":"'$bucket'","tempFolder":"'$temp_folder'"}' ;; "wfs") - echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"{{ dp_raw_telemetry_backup_location }}","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}", "apiVersion":"v2", "parallelization":200},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"} }],"parallelization":200,"appName":"Workflow Summarizer","deviceMapping":true}' + echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"{{ dp_raw_telemetry_backup_location }}","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","store":"{{ dp_object_store_type }}","apiVersion":"v2","parallelization":200},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"parallelization":200,"appName":"Workflow Summarizer","deviceMapping":true}' #echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"apiVersion":"v2"},"output":[{"to":"console","params":{"printEvent": false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"parallelization":8,"appName":"Workflow Summarizer","deviceMapping":true}' ;; "video-streaming") @@ -126,19 +126,19 @@ config() { echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.ETBMetricsJob","modelParams":{"reportConfig":{"id":"etb_metrics","metrics":[],"labels":{"date":"Date","identifier":"Textbook ID","name":"Textbook Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","createdOn":"Created On","lastUpdatedOn":"Last Updated On","totalQRCodes":"Total number of QR codes","contentLinkedQR":"Number of QR codes with atleast 1 linked content","withoutContentQR":"Number of QR codes with no linked content","withoutContentT1":"Term 1 QR Codes with no linked content","withoutContentT2":"Term 2 QR Codes with no linked content","status":"Textbook Status","totalContentLinked":"Total content linked","totalQRLinked":"Total QR codes linked to content","totalQRNotLinked":"Total number of QR codes with no linked content","leafNodesCount":"Total number of leaf nodes","leafNodeUnlinked":"Number of leaf nodes with no content","l1Name":"Level 1 Name","l2Name":"Level 2 Name","l3Name":"Level 3 Name","l4Name":"Level 4 Name","l5Name":"Level 5 Name","dialcode":"QR Code","sum(scans)":"Total Scans","noOfContent":"Number of contents","nodeType":"Type of Node","term":"Term"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"dialcode_counts.csv","postContainer":"'$reportPostContainer'"}},"dialcodeReportConfig":{"id":"etb_metrics","metrics":[],"labels":{},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":1,"reportPath":"dialcode_counts.csv","rollupAge":"ACADEMIC_YEAR","rollupCol":"Date","rollupRange":10,"postContainer":"'$reportPostContainer'"}},"etbFileConfig":{"bucket":"'$reportPostContainer'","file":"dialcode_scans/dialcode_counts.csv"},"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"channel","aliasName":"channel"},{"fieldName":"identifier","aliasName":"identifier","type":"Extraction","outputType":"STRING","extractionFn":[{"type":"javascript","fn":"function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName":"name","aliasName":"name"},{"fieldName":"status","aliasName":"status"}],"filters":[{"type":"equals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live","Draft","Review"]}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"tenantConfig":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"ETB Metrics Model","deviceMapping":false}' ;; "course-enrollment-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.CourseEnrollmentJob","modelParams":{"reportConfig":{"id":"tpd_metrics","metrics":[],"labels":{"completionCount":"Completion Count","status":"Status","enrollmentCount":"Enrollment Count","courseName":"Course Name","batchName":"Batch Name"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"DAY","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"course_enrollment.csv"}},"esConfig":{"request":{"filters":{"objectType":["Content"],"contentType":["Course"],"identifier":[],"status":["Live"]},"limit":10000}},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"TPD Course Enrollment Metrics Model","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.CourseEnrollmentJob","modelParams":{"reportConfig":{"id":"tpd_metrics","metrics":[],"labels":{"completionCount":"Completion Count","status":"Status","enrollmentCount":"Enrollment Count","courseName":"Course Name","batchName":"Batch Name"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"DAY","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"course_enrollment.csv"}},"esConfig":{"request":{"filters":{"objectType":["Content"],"contentType":["Course"],"identifier":[],"status":["Live"]},"limit":10000}},"store":"{{ dp_object_store_type }}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"TPD Course Enrollment Metrics Model","deviceMapping":false}' ;; "course-consumption-report") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.model.report.CourseConsumptionJob","modelParams": {"esConfig": {"request": {"filters": {"objectType": ["Content"],"contentType": ["Course"],"identifier": [],"status": ["Live"]}}},"reportConfig": {"id": "tpd_metrics","labels": {"date": "Date","status": "Batch Status","timespent": "Timespent in mins","courseName": "Course Name","batchName": "Batch Name"},"dateRange": {"staticInterval": "LastDay","granularity": "all"},"metrics": [{"metric": "totalCoursePlays","label": "Total Course Plays (in mins)","druidQuery": {"queryType": "groupBy","dataSource": "summary-events","intervals":"LastDay","aggregations": [{"name": "sum__edata_time_spent","type": "doubleSum","fieldName": "edata_time_spent"}],"dimensions": [{"fieldName": "object_rollup_l1","aliasName": "courseId"}, {"fieldName": "uid","aliasName": "userId"}, {"fieldName": "context_cdata_id","aliasName": "batchId"}],"filters": [{"type": "equals","dimension": "eid","value": "ME_WORKFLOW_SUMMARY"}, {"type": "in","dimension": "dimensions_pdata_id","values": ["'$producerEnv'.app", "'$producerEnv'.portal"]}, {"type": "equals","dimension": "dimensions_type","value": "content"}, {"type": "equals","dimension": "dimensions_mode","value": "play"}, {"type": "equals","dimension": "context_cdata_type","value": "batch"}],"postAggregation": [{"type": "arithmetic","name": "timespent","fields": {"leftField": "sum__edata_time_spent","rightField": 60,"rightFieldType": "constant"},"fn": "/"}],"descending": "false"}}],"output": [{"type": "csv","metrics": ["timespent"],"dims": []}],"queryType": "groupBy"},"store": "{{ dp_object_store_type }}","format":"csv","key": "druid-reports/","filePath": "druid-reports/","container":"'$bucket'","folderPrefix": ["slug", "reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "TPD Course Consumption Metrics Model","deviceMapping": false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.CourseConsumptionJob","modelParams":{"esConfig":{"request":{"filters":{"objectType":["Content"],"contentType":["Course"],"identifier":[],"status":["Live"]}}},"reportConfig":{"id":"tpd_metrics","labels":{"date":"Date","status":"Batch Status","timespent":"Timespent in mins","courseName":"Course Name","batchName":"Batch Name"},"dateRange":{"staticInterval":"LastDay","granularity":"all"},"metrics":[{"metric":"totalCoursePlays","label":"Total Course Plays (in mins)","druidQuery":{"queryType":"groupBy","dataSource":"summary-events","intervals":"LastDay","aggregations":[{"name":"sum__edata_time_spent","type":"doubleSum","fieldName":"edata_time_spent"}],"dimensions":[{"fieldName":"object_rollup_l1","aliasName":"courseId"},{"fieldName":"uid","aliasName":"userId"},{"fieldName":"context_cdata_id","aliasName":"batchId"}],"filters":[{"type":"equals","dimension":"eid","value":"ME_WORKFLOW_SUMMARY"},{"type":"in","dimension":"dimensions_pdata_id","values":["'$producerEnv'.app","'$producerEnv'.portal"]},{"type":"equals","dimension":"dimensions_type","value":"content"},{"type":"equals","dimension":"dimensions_mode","value":"play"},{"type":"equals","dimension":"context_cdata_type","value":"batch"}],"postAggregation":[{"type":"arithmetic","name":"timespent","fields":{"leftField":"sum__edata_time_spent","rightField":60,"rightFieldType":"constant"},"fn":"/"}],"descending":"false"}}],"output":[{"type":"csv","metrics":["timespent"],"dims":[]}],"queryType":"groupBy"},"store":"{{ dp_object_store_type }}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"TPD Course Consumption Metrics Model","deviceMapping":false}' ;; "textbook-progress-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.TextBookProgressModel","modelParams":{"reportConfig":{"id":"content_progress_metrics","metrics":[],"labels":{"board":"Board","medium":"Medium","gradeLevel":"Grade","subject":"Subject","resourceType":"Content Type","totalContent": "Total Contents","live":"Live","review":"Review","draft":"Draft","unlisted":"Limited Sharing","application_ecml":"Created on Diksha","video_youtube":"YouTube Content","video_mp4":"Uploaded Videos","application_pdf":"Text Content","application_html":"Uploaded Interactive Content","identifier":"Content ID","creator":"Created By","createdOn":"Creation Date","lastPublishDate":"Last Publish Date","status":"Status","pkgVersion":"Number of times Published","lastPublishedOn":"Pending in current status since","pendingInCurrentStatus":"Pending in current status since"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"content_progress_metrics.csv","postContainer":"'$reportPostContainer'"}},"filter":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Textbook Progress Metrics Model","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.TextBookProgressModel","modelParams":{"reportConfig":{"id":"content_progress_metrics","metrics":[],"labels":{"board":"Board","medium":"Medium","gradeLevel":"Grade","subject":"Subject","resourceType":"Content Type","totalContent":"Total Contents","live":"Live","review":"Review","draft":"Draft","unlisted":"Limited Sharing","application_ecml":"Created on Diksha","video_youtube":"YouTube Content","video_mp4":"Uploaded Videos","application_pdf":"Text Content","application_html":"Uploaded Interactive Content","identifier":"Content ID","creator":"Created By","createdOn":"Creation Date","lastPublishDate":"Last Publish Date","status":"Status","pkgVersion":"Number of times Published","lastPublishedOn":"Pending in current status since","pendingInCurrentStatus":"Pending in current status since"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"content_progress_metrics.csv","postContainer":"'$reportPostContainer'"}},"filter":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Textbook Progress Metrics Model","deviceMapping":false}' ;; "audit-metrics-report") echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.MetricsAuditJob","modelParams":{"auditConfig":[{"name":"denorm","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"telemetry-denormalized/raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]},"filters":[{"name":"flags.user_data_retrieved","operator":"EQ","value":true},{"name":"flags.content_data_retrieved","operator":"EQ","value":true},{"name":"flags.device_data_retrieved","operator":"EQ","value":true},{"name":"flags.dialcode_data_retrieved","operator":"EQ","value":true},{"name":"flags.collection_data_retrieved","operator":"EQ","value":true},{"name":"flags.derived_location_retrieved","operator":"EQ","value":true}]},{"name":"failed","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"failed/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"unique","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"channel-raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/raw/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"channel-summary","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/summary/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"derived","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"telemetry-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"telemetry-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}},{"name":"summary-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"summary-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}}]},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$metricsTopic'"}}],"parallelization":8,"appName":"Metrics Audit"}' ;; "sourcing-metrics") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","primaryCategory":"Collection Category","identifier": "Collection ID","name": "Collection Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Folder Name","totalChapters": "Total number of first level folders","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "in","dimension": "primaryCategory","values": ["Digital Textbook", "Course", "Content Playlist","Question paper","Question Paper"]}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "{{ dp_object_store_type }}","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.sourcing.SourcingMetrics","modelParams":{"reportConfig":{"id":"textbook_report","metrics":[],"labels":{"date":"Date","primaryCategory":"Collection Category","identifier":"Collection ID","name":"Collection Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","createdOn":"Created On","lastUpdatedOn":"Last Updated On","reportDate":"Report generation date","board":"Board","grade":"Grade","chapters":"Folder Name","totalChapters":"Total number of first level folders","status":"Textbook Status"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]},{"type":"json","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}]},"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"channel","aliasName":"channel"},{"fieldName":"identifier","aliasName":"identifier","type":"Extraction","outputType":"STRING","extractionFn":[{"type":"javascript","fn":"function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName":"name","aliasName":"name"},{"fieldName":"createdFor","aliasName":"createdFor"},{"fieldName":"createdOn","aliasName":"createdOn"},{"fieldName":"lastUpdatedOn","aliasName":"lastUpdatedOn"},{"fieldName":"board","aliasName":"board"},{"fieldName":"medium","aliasName":"medium"},{"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName":"gradeLevel","aliasName":"gradeLevel"},{"fieldName":"subject","aliasName":"subject"},{"fieldName":"status","aliasName":"status"}],"filters":[{"type":"in","dimension":"primaryCategory","values":["Digital Textbook","Course","Content Playlist","Question paper","Question Paper"]},{"type":"in","dimension":"status","values":["Live"]}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"store":"{{ dp_object_store_type }}","storageContainer":"'$reportPostContainer'","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageEndpoint":"{{dp_storage_endpoint_config}}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$reportPostContainer'","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Textbook Report Job","deviceMapping":false}' ;; "druid-dataset") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.OnDemandDruidExhaustJob","modelParams":{"store":"{{ dp_object_store_type }}","container":"'$reportPostContainer'","key":"ml_reports/","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","format":"csv"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"ML Druid Data Model"}' From 0166b61609bfb360e74e349fe742af21f387e7e5 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 26 Apr 2023 16:03:24 +1000 Subject: [PATCH 129/161] updted to replace the temp dir when run on spark bds cluster Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 1db6a6cc3b..383b42110b 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -136,6 +136,17 @@ line: 'spark_output_temp_dir="/var/log/sparkapp/tmp/"' tags: - framework-spark-cluster + when: dp_object_store_type != "oci" + +- name: Update spark temp dir value for cluster + lineinfile: + path: '{{ analytics.home }}/models-{{ model_version }}/application.conf' + regexp: '^spark_output_temp_dir="/mount/data/analytics/tmp/"' + line: 'spark_output_temp_dir="/var/log/spark/"' + tags: + - framework-spark-cluster + when: dp_object_store_type == "oci" + - name: Update logger kafka config for cluster lineinfile: From c4ca689ca7d18effcf7dda5dd3ac085dd3445ac6 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 26 Apr 2023 22:02:44 +1000 Subject: [PATCH 130/161] updated variable for analytics db name Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index c9a3ba285e..488bb9486d 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -236,7 +236,7 @@ druid.ingestion.path="/druid/indexer/v1/task" druid.segment.path="/druid/coordinator/v1/metadata/datasources/" druid.deletesegment.path="/druid/coordinator/v1/datasources/" -postgres.druid.db="{{ druid_report_postgres_db_name }}" +postgres.druid.db="{{ spark_postgres_db_name }}" postgres.druid.url="jdbc:postgresql://{{postgres.db_url}}:{{postgres.db_port}}/" postgres.druid.user="{{ druid_report_postgres_db_username }}" postgres.druid.pass="{{ dp_vault_druid_postgress_pass }}" From 678b01d4e1ae326288084e6af50733e069d82258 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 27 Apr 2023 13:45:23 +1000 Subject: [PATCH 131/161] added the properties fs.s3.buffer.dir Signed-off-by: Deepak Devadathan --- .../roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 index ce84928ce1..161b5f3ecf 100644 --- a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 +++ b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 @@ -146,6 +146,7 @@ function update_bds_config(){ add_properties "fs.oci.client.auth.tenantId" $tenid add_properties "fs.oci.client.auth.userId" $usid add_properties "fs.oci.client.regionCodeOrId" $region + add_properties "fs.s3.buffer.dir" /tmp #Update it to ambari echo "updating ambari config" update_ambari_config From d5956565b3237d50f5b60f2a7704d18b7c3c2491 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 27 Apr 2023 14:09:12 +1000 Subject: [PATCH 132/161] updated the pg db name varible Signed-off-by: Deepak Devadathan --- .../data-products-deploy/templates/update-job-requests.py.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 b/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 index 4085041965..c8358b668d 100644 --- a/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 +++ b/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 @@ -92,7 +92,7 @@ def get_columns_names(db,table): def main(batchSize, jobId,jobType,table): host="{{postgres.db_url}}" port={{postgres.db_port}} - user="{{postgres.db_username}}" + user="{{spark_pg_db_name}}" password="{{postgres.db_password}}" database="{{postgres.spark_db_name}}" url_connect = "jdbc:postgresql://{0}:{1}/{2}".format(host, port, database) From 26838b52ffc13299494a7b5376742456e6af4e61 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 27 Apr 2023 14:13:25 +1000 Subject: [PATCH 133/161] added default value Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/defaults/main.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/defaults/main.yml b/ansible/roles/data-products-deploy/defaults/main.yml index fec9fc0816..06cdf1b0f2 100755 --- a/ansible/roles/data-products-deploy/defaults/main.yml +++ b/ansible/roles/data-products-deploy/defaults/main.yml @@ -288,4 +288,7 @@ jets3t_s3_request_signature_version: "{{ s3_request_signature_version }}" jets3t_s3_endpoint_host: "{% if s3_storage_endpoint %}{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}{% endif %}" jets3t_s3_disable_dns_buckets: "{{ s3_path_style_access }}" jets3t_s3_https_only: "{{ s3_https_only }}" -jets3t_s3_default_bucket_location: "{{ s3_default_bucket_location }}" \ No newline at end of file +jets3t_s3_default_bucket_location: "{{ s3_default_bucket_location }}" + + +spark_pg_db_name: "analytics" \ No newline at end of file From 1f144b7fa4774f1c61ae484e1a58ae50f73895a6 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 27 Apr 2023 14:22:58 +1000 Subject: [PATCH 134/161] updated the python variable Signed-off-by: Deepak Devadathan --- .../data-products-deploy/templates/update-job-requests.py.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 b/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 index c8358b668d..cfd986b008 100644 --- a/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 +++ b/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 @@ -92,9 +92,9 @@ def get_columns_names(db,table): def main(batchSize, jobId,jobType,table): host="{{postgres.db_url}}" port={{postgres.db_port}} - user="{{spark_pg_db_name}}" + user="{{postgres.db_username}}" password="{{postgres.db_password}}" - database="{{postgres.spark_db_name}}" + database="{{postgres.db_name}}" url_connect = "jdbc:postgresql://{0}:{1}/{2}".format(host, port, database) db = psycopg2.connect(host=host, user=user, password=password, database=database, port=port) From 5a379ba7d3aabec36dc71e609434cb88a95cc42c Mon Sep 17 00:00:00 2001 From: subhash_chandra_budde Date: Tue, 2 May 2023 20:02:40 +0530 Subject: [PATCH 135/161] Updated the storage config for admin reports --- ansible/roles/data-products-deploy/templates/model-config.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 95a2bf5515..f9991e1714 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -105,10 +105,10 @@ config() { echo '{"search":{"type":"{{ dp_object_store_type }}"},"model":"org.ekstep.analytics.job.VideoStreamingJob","modelParams":{"maxIterations":10},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Video Streaming Job","deviceMapping":false}' ;; "admin-user-reports") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin User Reports","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminReportJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin User Reports","deviceMapping":false}' ;; "admin-geo-reports") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminGeoReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin Geo Reports","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminGeoReportJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin Geo Reports","deviceMapping":false}' ;; "telemetry-replay") echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"'$inputBucket'","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"TelemetryReplayJob","deviceMapping":false}' From 4f9883f1c297c5c13525c491854e79f8772491cf Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 4 May 2023 10:47:03 +1000 Subject: [PATCH 136/161] added templates for ingestion spec for collection and sourcing Signed-off-by: Deepak Devadathan --- .../roles/data-products-deploy/tasks/main.yml | 4 +- .../collection-summary-ingestion-spec.j2 | 256 ++++++++++++++++++ .../templates/sourcing-ingestion-spec.j2 | 151 +++++++++++ 3 files changed, 409 insertions(+), 2 deletions(-) create mode 100644 ansible/roles/data-products-deploy/templates/collection-summary-ingestion-spec.j2 create mode 100644 ansible/roles/data-products-deploy/templates/sourcing-ingestion-spec.j2 diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 383b42110b..fafb9daa3a 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -340,7 +340,7 @@ - spark-jobs - name: Copy collection-summary ingestion spec - copy: src="collection-summary-ingestion-spec.json" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + copy: src="collection-summary-ingestion-spec.j2" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} tags: - ed-dataproducts @@ -369,7 +369,7 @@ - spark-jobs - name: Copy sourcing-summary ingestion spec - copy: src="sourcing-ingestion-spec.json" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + copy: src="sourcing-ingestion-spec.j2" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} tags: - ed-dataproducts diff --git a/ansible/roles/data-products-deploy/templates/collection-summary-ingestion-spec.j2 b/ansible/roles/data-products-deploy/templates/collection-summary-ingestion-spec.j2 new file mode 100644 index 0000000000..f26c2e6447 --- /dev/null +++ b/ansible/roles/data-products-deploy/templates/collection-summary-ingestion-spec.j2 @@ -0,0 +1,256 @@ +{ + "type": "index", + "spec": { + "dataSchema": { + "dataSource": "collection-summary-snapshot", + "parser": { + "type": "string", + "parseSpec": { + "format": "json", + "flattenSpec": { + "useFieldDiscovery": false, + "fields": [ + { + "type": "root", + "name": "content_org", + "expr": "contentorg" + }, + { + "type": "root", + "name": "user_org", + "expr": "orgname" + }, + { + "type": "root", + "name": "batch_start_date", + "expr": "startdate" + }, + { + "type": "root", + "name": "batch_end_date", + "expr": "enddate" + }, + { + "type": "root", + "name": "has_certificate", + "expr": "hascertified" + }, + { + "type": "root", + "name": "collection_id", + "expr": "courseid" + }, + { + "type": "root", + "name": "batch_id", + "expr": "batchid" + }, + { + "type": "root", + "name": "collection_name", + "expr": "collectionname" + }, + { + "type": "root", + "name": "batch_name", + "expr": "batchname" + }, + { + "type": "root", + "name": "total_enrolment", + "expr": "enrolleduserscount" + }, + { + "type": "root", + "name": "total_completion", + "expr": "completionuserscount" + }, + { + "type": "root", + "name": "total_certificates_issued", + "expr": "certificateissuedcount" + }, + { + "type": "root", + "name": "content_status", + "expr": "contentstatus" + }, + { + "type": "root", + "name": "user_state", + "expr": "state" + }, + { + "type": "root", + "name": "user_district", + "expr": "district" + }, + { + "type": "root", + "name": "content_channel", + "expr": "channel" + }, + { + "type": "root", + "name": "keywords", + "expr": "keywords" + }, + { + "type": "root", + "name": "timestamp", + "expr": "timestamp" + }, + { + "type": "root", + "name": "medium", + "expr": "medium" + }, + { + "type": "root", + "name": "subject", + "expr": "subject" + }, + { + "type": "root", + "name": "created_for", + "expr": "createdfor" + }, + { + "type": "root", + "name": "user_type", + "expr": "usertype" + }, + { + "type": "root", + "name": "user_subtype", + "expr": "usersubtype" + } + ] + }, + "dimensionsSpec": { + "dimensions": [ + { + "name": "content_org" + }, + { + "name": "user_org" + }, + { + "type": "string", + "name": "batch_id" + }, + { + "type": "string", + "name": "batch_start_date" + }, + { + "type": "string", + "name": "batch_end_date" + }, + { + "type": "string", + "name": "collection_id" + }, + { + "type": "string", + "name": "collection_name" + }, + { + "type": "string", + "name": "batch_name" + }, + { + "type": "long", + "name": "total_enrolment" + }, + { + "type": "long", + "name": "total_completion" + }, + { + "type": "long", + "name": "total_certificates_issued" + }, + { + "type": "string", + "name": "content_status" + }, + { + "type": "string", + "name": "user_state" + }, + { + "type": "string", + "name": "user_district" + }, + { + "name": "keywords" + }, + { + "name": "has_certificate" + }, + { + "type": "string", + "name": "content_channel" + }, + { + "name": "medium" + }, + { + "name": "subject" + }, + { + "name": "created_for" + }, + { + "type": "string", + "name": "user_type" + }, + { + "type": "string", + "name": "user_subtype" + } + ], + "dimensionsExclusions": [] + }, + "timestampSpec": { + "column": "timestamp", + "format": "auto" + } + } + }, + "metricsSpec": [], + "granularitySpec": { + "type": "uniform", + "segmentGranularity": "day", + "queryGranularity": "none", + "rollup": true + } + }, + "ioConfig": { + "type": "index", + "firehose": { +{% if dp_object_store_type == "azure" %} + "type": "static-azure-blobstore", + "blobs": [ + { + "container": "{{reports_container}}", + "path": "/collection-summary-reports-v2/collection-summary-report-latest.json" + } + ], +{% elif (dp_object_store_type == "oci") %} + "type": "static-s3", + "uris": [ "s3://{{reports_container}}/sourcing/SourcingSummaryReport.json"], +{% endif %} + "fetchTimeout": 300000 + } + }, + "tuningConfig": { + "type": "index", + "targetPartitionSize": 5000000, + "maxRowsInMemory": 25000, + "forceExtendableShardSpecs": false, + "logParseExceptions": true + } + } +} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy/templates/sourcing-ingestion-spec.j2 b/ansible/roles/data-products-deploy/templates/sourcing-ingestion-spec.j2 new file mode 100644 index 0000000000..41bb51afba --- /dev/null +++ b/ansible/roles/data-products-deploy/templates/sourcing-ingestion-spec.j2 @@ -0,0 +1,151 @@ +{ + "type": "index", + "spec": { + "dataSchema": { + "dataSource": "sourcing-summary-snapshot", + "parser": { + "type": "string", + "parseSpec": { + "format": "json", + "flattenSpec": { + "useFieldDiscovery": false, + "fields": [ + { + "type": "root", + "name": "program_id", + "expr": "program_id" + }, + { + "type": "root", + "name": "status", + "expr": "status" + }, + { + "type": "root", + "name": "rootorg_id", + "expr": "rootorg_id" + }, + { + "type": "root", + "name": "user_id", + "expr": "user_id" + }, + { + "type": "root", + "name": "osid", + "expr": "osid" + }, + { + "type": "root", + "name": "user_type", + "expr": "user_type" + }, + { + "type": "root", + "name": "contributor_id", + "expr": "contributor_id" + }, + { + "type": "root", + "name": "total_contributed_content", + "expr": "total_contributed_content" + }, + { + "type": "root", + "name": "primary_category", + "expr": "primary_category" + }, + { + "type": "root", + "name": "created_by", + "expr": "created_by" + } + ] + }, + "dimensionsSpec": { + "dimensions": [ + { + "type": "string", + "name": "program_id" + }, + { + "type": "string", + "name": "status" + }, + { + "type": "string", + "name": "rootorg_id" + }, + { + "type": "string", + "name": "user_id" + }, + { + "type": "string", + "name": "osid" + }, + { + "type": "string", + "name": "user_type" + }, + { + "type": "string", + "name": "contributor_id" + }, + { + "type": "string", + "name": "primary_category" + }, + { + "type": "string", + "name": "created_by" + } + ], + "dimensionsExclusions": [] + }, + "timestampSpec": { + "column": "timestamp", + "format": "auto" + } + } + }, + "metricsSpec": [ + { + "name": "total_count", + "type": "count" + } + ], + "granularitySpec": { + "type": "uniform", + "segmentGranularity": "day", + "queryGranularity": "none", + "rollup": true + } + }, + "ioConfig": { + "type": "index", + "firehose": { +{% if dp_object_store_type == "azure" %} + "type": "static-azure-blobstore", + "blobs": [ + { + "container": "{{reports_container}}", + "path": "/sourcing/SourcingSummaryReport.json" + } + ], +{% elif (dp_object_store_type == "oci") %} + "type": "static-s3", + "uris": [ "s3://{{reports_container}}/sourcing/SourcingSummaryReport.json"], +{% endif %} + "fetchTimeout": 300000 + } + }, + "tuningConfig": { + "type": "index", + "targetPartitionSize": 5000000, + "maxRowsInMemory": 25000, + "forceExtendableShardSpecs": false, + "logParseExceptions": true + } + } +} From 64a720e2ffce61d254ccb43f42de218e0dccd026 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 4 May 2023 10:52:36 +1000 Subject: [PATCH 137/161] used template command instead of copy Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index fafb9daa3a..c9a4caa3e2 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -340,7 +340,7 @@ - spark-jobs - name: Copy collection-summary ingestion spec - copy: src="collection-summary-ingestion-spec.j2" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + template: src="collection-summary-ingestion-spec.j2" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} tags: - ed-dataproducts @@ -369,7 +369,7 @@ - spark-jobs - name: Copy sourcing-summary ingestion spec - copy: src="sourcing-ingestion-spec.j2" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + template: src="sourcing-ingestion-spec.j2" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} tags: - ed-dataproducts From 039499c321ee75a98b8f68bf929498e5df679220 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 4 May 2023 10:54:26 +1000 Subject: [PATCH 138/161] copying as json file Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index c9a4caa3e2..1c15e14023 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -340,7 +340,7 @@ - spark-jobs - name: Copy collection-summary ingestion spec - template: src="collection-summary-ingestion-spec.j2" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + template: src="collection-summary-ingestion-spec.j2" dest={{ analytics.home }}/scripts/collection-summary-ingestion-spec.json mode=755 owner={{ analytics_user }} group={{ analytics_group }} tags: - ed-dataproducts @@ -369,7 +369,7 @@ - spark-jobs - name: Copy sourcing-summary ingestion spec - template: src="sourcing-ingestion-spec.j2" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + template: src="sourcing-ingestion-spec.j2" dest={{ analytics.home }}/scripts/sourcing-ingestion-spec.json mode=755 owner={{ analytics_user }} group={{ analytics_group }} tags: - ed-dataproducts From 896410ce1b654293ad5cefef0ac4ad747ce6cd89 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 4 May 2023 22:05:52 +1000 Subject: [PATCH 139/161] updated model-config.json.j2 Signed-off-by: Deepak Devadathan --- .../templates/model-config.json.j2 | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.json.j2 b/ansible/roles/data-products-deploy/templates/model-config.json.j2 index a3569c7f46..55b89f7845 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.json.j2 @@ -20,7 +20,12 @@ "model": "org.ekstep.analytics.model.WorkflowSummary", "modelParams": { "apiVersion": "v2", - "parallelization": 32 + "parallelization": 32, + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", + "store":"{{ dp_object_store_type }}" }, "output": [ { @@ -347,6 +352,10 @@ } }, "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "format":"csv", "key": "druid-reports/", "filePath": "druid-reports/", @@ -458,6 +467,10 @@ "queryType": "groupBy" }, "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "format":"csv", "key": "druid-reports/", "filePath": "druid-reports/", From 9418fb9d3c46204440806b227467bf6b67b8a928 Mon Sep 17 00:00:00 2001 From: subhash_chandra_budde Date: Fri, 5 May 2023 00:23:13 +0530 Subject: [PATCH 140/161] Updated model params --- .../templates/model-config.json.j2 | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ansible/roles/data-products-deploy/templates/model-config.json.j2 b/ansible/roles/data-products-deploy/templates/model-config.json.j2 index 55b89f7845..67cbd6a3cd 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.json.j2 @@ -73,6 +73,11 @@ }, "model": "org.ekstep.analytics.job.report.StateAdminReportJob", "modelParams": { + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", + "store":"{{ dp_object_store_type }}", "sparkCassandraConnectionHost": "{{core_cassandra_host}}", "sparkElasticsearchConnectionHost": "{{sunbird_es_host}}" }, @@ -94,6 +99,11 @@ }, "model": "org.ekstep.analytics.job.report.StateAdminGeoReportJob", "modelParams": { + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", + "store":"{{ dp_object_store_type }}", "sparkCassandraConnectionHost": "{{core_cassandra_host}}", "sparkElasticsearchConnectionHost": "{{sunbird_es_host}}" }, @@ -144,6 +154,11 @@ "pushMetrics": true, "brokerList": "{{ brokerlist }}", "topic": "{{ analytics_metrics_topic }}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", + "store":"{{ dp_object_store_type }}", "model": [ { "model": "WorkFlowSummaryModel", @@ -303,6 +318,10 @@ "slugName": "" }, "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "format": "csv", "key": "druid-reports/", "filePath": "druid-reports/", From 8458fa9d0ca9666a1e35dd4db7d4f09a6581ae52 Mon Sep 17 00:00:00 2001 From: subhash_chandra_budde Date: Fri, 5 May 2023 00:41:23 +0530 Subject: [PATCH 141/161] Updated storage config --- .../templates/model-config.json.j2 | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ansible/roles/data-products-deploy/templates/model-config.json.j2 b/ansible/roles/data-products-deploy/templates/model-config.json.j2 index 67cbd6a3cd..bb170b2193 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.json.j2 @@ -125,6 +125,11 @@ }, "model": "org.ekstep.analytics.updater.UpdateContentRating", "modelParams": { + "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "startDate": "$(date --date yesterday '+%Y-%m-%d')", "endDate": "$(date '+%Y-%m-%d')" }, @@ -221,6 +226,11 @@ }, "model": "org.ekstep.analytics.model.ExperimentDefinitionModel", "modelParams": { + "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "sparkElasticsearchConnectionHost": "{{ lp_composite_search_host }}" }, "output": [ @@ -514,6 +524,11 @@ }, "model": "org.ekstep.analytics.model.MetricsAuditJob", "modelParams": { + "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "auditConfig": [ { "name": "denorm", From e16a8264b2591a9b05e71ed7697f8a6726077213 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 5 May 2023 21:13:05 +1000 Subject: [PATCH 142/161] updated the right db name for druid rollup Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index 488bb9486d..c9a3ba285e 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -236,7 +236,7 @@ druid.ingestion.path="/druid/indexer/v1/task" druid.segment.path="/druid/coordinator/v1/metadata/datasources/" druid.deletesegment.path="/druid/coordinator/v1/datasources/" -postgres.druid.db="{{ spark_postgres_db_name }}" +postgres.druid.db="{{ druid_report_postgres_db_name }}" postgres.druid.url="jdbc:postgresql://{{postgres.db_url}}:{{postgres.db_port}}/" postgres.druid.user="{{ druid_report_postgres_db_username }}" postgres.druid.pass="{{ dp_vault_druid_postgress_pass }}" From 59603a67ee490f72aac292e9dd1684d7217c0c06 Mon Sep 17 00:00:00 2001 From: manojkumarsah13 <127202629+manojkumarsah13@users.noreply.github.com> Date: Thu, 3 Aug 2023 16:34:04 +0530 Subject: [PATCH 143/161] Update cluster-config.json.j2 Changing spark-cluster configuration --- .../data-products-deploy/templates/cluster-config.json.j2 | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 0c3ba9b886..6019c5c922 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -78,6 +78,12 @@ "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", "spark.scheduler.mode" : "FAIR", + "spark.driver.memory" : "40g", + "spark.driver.memoryOverhead" : "4g", + "spark.executor.memoryOverhead" : "4g", + "spark.memory.offHeap.enabled" : "true", + "spark.serializer" : "org.apache.spark.serializer.KryoSerializer", + "spark.yarn.scheduler.heartbeat.interval-ms" : "7200000", "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", @@ -85,4 +91,4 @@ "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}} -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }} -Dcom.amazonaws.sdk.s3.defaultStreamBufferSize=512m" } } -{% endif %} \ No newline at end of file +{% endif %} From d3ff4e9c93cd249b7b60e3545a7202f70b35b0be Mon Sep 17 00:00:00 2001 From: manojkumarsah13 <127202629+manojkumarsah13@users.noreply.github.com> Date: Thu, 3 Aug 2023 17:20:02 +0530 Subject: [PATCH 144/161] Update cluster-config.json.j2 Added parameters --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 6019c5c922..20cfd002b1 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -82,6 +82,7 @@ "spark.driver.memoryOverhead" : "4g", "spark.executor.memoryOverhead" : "4g", "spark.memory.offHeap.enabled" : "true", + "spark.memory.offHeap.size" : "4g", "spark.serializer" : "org.apache.spark.serializer.KryoSerializer", "spark.yarn.scheduler.heartbeat.interval-ms" : "7200000", "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", From dec6df5f6d75d190fba77db212c2f561696d6049 Mon Sep 17 00:00:00 2001 From: manojkumarsah13 <127202629+manojkumarsah13@users.noreply.github.com> Date: Thu, 3 Aug 2023 17:46:06 +0530 Subject: [PATCH 145/161] Update cluster-config.json.j2 Changes revert --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 -- 1 file changed, 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 20cfd002b1..be27c8e889 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -81,8 +81,6 @@ "spark.driver.memory" : "40g", "spark.driver.memoryOverhead" : "4g", "spark.executor.memoryOverhead" : "4g", - "spark.memory.offHeap.enabled" : "true", - "spark.memory.offHeap.size" : "4g", "spark.serializer" : "org.apache.spark.serializer.KryoSerializer", "spark.yarn.scheduler.heartbeat.interval-ms" : "7200000", "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", From 54c57ba00f87cfe0930a102e8623582271bd1d46 Mon Sep 17 00:00:00 2001 From: manojkumarsah13 <127202629+manojkumarsah13@users.noreply.github.com> Date: Fri, 4 Aug 2023 20:15:27 +0530 Subject: [PATCH 146/161] Update cluster-config.json.j2 driver memory 150gb --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index be27c8e889..a46ea1b280 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -78,7 +78,7 @@ "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", "spark.scheduler.mode" : "FAIR", - "spark.driver.memory" : "40g", + "spark.driver.memory" : "150g", "spark.driver.memoryOverhead" : "4g", "spark.executor.memoryOverhead" : "4g", "spark.serializer" : "org.apache.spark.serializer.KryoSerializer", From 3ddfa884681a0433658db0bec00c8abc4fc64bfd Mon Sep 17 00:00:00 2001 From: manojkumarsah13 <127202629+manojkumarsah13@users.noreply.github.com> Date: Mon, 7 Aug 2023 19:26:28 +0530 Subject: [PATCH 147/161] Update cluster-config.json.j2 Revert back changes --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index a46ea1b280..be27c8e889 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -78,7 +78,7 @@ "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", "spark.scheduler.mode" : "FAIR", - "spark.driver.memory" : "150g", + "spark.driver.memory" : "40g", "spark.driver.memoryOverhead" : "4g", "spark.executor.memoryOverhead" : "4g", "spark.serializer" : "org.apache.spark.serializer.KryoSerializer", From f505a60391ffd3f6f56bf20cf5e9e433485c1883 Mon Sep 17 00:00:00 2001 From: manojkumarsah13 <127202629+manojkumarsah13@users.noreply.github.com> Date: Fri, 25 Aug 2023 16:03:00 +0530 Subject: [PATCH 148/161] Update cluster-config.json.j2 Updated executed heartbeat and network timeout --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index be27c8e889..e866df80bb 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -83,6 +83,8 @@ "spark.executor.memoryOverhead" : "4g", "spark.serializer" : "org.apache.spark.serializer.KryoSerializer", "spark.yarn.scheduler.heartbeat.interval-ms" : "7200000", + "spark.executor.heartbeatInterval" : "1800s", + "spark.network.timeout" : "5400s", "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", From 4bd6cc79fd7bc9cfbf730cd0d96f37111cf4090d Mon Sep 17 00:00:00 2001 From: manojkumarsah13 <127202629+manojkumarsah13@users.noreply.github.com> Date: Wed, 30 Aug 2023 18:25:19 +0530 Subject: [PATCH 149/161] Update cluster-config.json.j2 Update cluster-config.json.j2 driver memory updated --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index e866df80bb..9008b9448e 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -78,7 +78,7 @@ "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", "spark.scheduler.mode" : "FAIR", - "spark.driver.memory" : "40g", + "spark.driver.memory" : "280g", "spark.driver.memoryOverhead" : "4g", "spark.executor.memoryOverhead" : "4g", "spark.serializer" : "org.apache.spark.serializer.KryoSerializer", From e245128972e695ea45471d30ba29edadaae65301 Mon Sep 17 00:00:00 2001 From: manojkumarsah13 <127202629+manojkumarsah13@users.noreply.github.com> Date: Thu, 31 Aug 2023 16:35:36 +0530 Subject: [PATCH 150/161] Update cluster-config.json.j2 Update cluster-config.json.j2 --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 9008b9448e..e866df80bb 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -78,7 +78,7 @@ "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", "spark.scheduler.mode" : "FAIR", - "spark.driver.memory" : "280g", + "spark.driver.memory" : "40g", "spark.driver.memoryOverhead" : "4g", "spark.executor.memoryOverhead" : "4g", "spark.serializer" : "org.apache.spark.serializer.KryoSerializer", From 43fc84f9ef7ec0e1fc0b880687b0c2c4d01f054d Mon Sep 17 00:00:00 2001 From: manojkumarsah13 <127202629+manojkumarsah13@users.noreply.github.com> Date: Tue, 10 Oct 2023 21:47:39 +0530 Subject: [PATCH 151/161] Update cluster-config.json.j2 Oracle Suggested Changes --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index e866df80bb..431b791207 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -82,6 +82,8 @@ "spark.driver.memoryOverhead" : "4g", "spark.executor.memoryOverhead" : "4g", "spark.serializer" : "org.apache.spark.serializer.KryoSerializer", + "spark.shuffle.service.enabled" : "true", + "spark.dynamicAllocation.enabled" : "true", "spark.yarn.scheduler.heartbeat.interval-ms" : "7200000", "spark.executor.heartbeatInterval" : "1800s", "spark.network.timeout" : "5400s", From bd42a0c0586b37cf1123ae3adf21f41f18bebec3 Mon Sep 17 00:00:00 2001 From: manojkumarsah13 <127202629+manojkumarsah13@users.noreply.github.com> Date: Sat, 14 Oct 2023 16:06:07 +0530 Subject: [PATCH 152/161] Update cluster-config.json.j2 updated the config --- .../data-products-deploy/templates/cluster-config.json.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 431b791207..46697920d6 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -78,8 +78,8 @@ "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", "spark.scheduler.mode" : "FAIR", - "spark.driver.memory" : "40g", - "spark.driver.memoryOverhead" : "4g", + "spark.driver.memory" : "240g", + "spark.driver.memoryOverhead" : "20g", "spark.executor.memoryOverhead" : "4g", "spark.serializer" : "org.apache.spark.serializer.KryoSerializer", "spark.shuffle.service.enabled" : "true", From 1c2e97b7731544e893cdfc271ca1b1401bc14043 Mon Sep 17 00:00:00 2001 From: manojkumarsah13 <127202629+manojkumarsah13@users.noreply.github.com> Date: Wed, 18 Oct 2023 14:30:35 +0530 Subject: [PATCH 153/161] Update cluster-config.json.j2 Memory configuration changes --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 46697920d6..b208725ed0 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -79,6 +79,7 @@ "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", "spark.scheduler.mode" : "FAIR", "spark.driver.memory" : "240g", + "spark.executor.memory" : "60g", "spark.driver.memoryOverhead" : "20g", "spark.executor.memoryOverhead" : "4g", "spark.serializer" : "org.apache.spark.serializer.KryoSerializer", From 939269a3c02d996c5615224ff1ab28bd62ac6c45 Mon Sep 17 00:00:00 2001 From: manojkumarsah13 <127202629+manojkumarsah13@users.noreply.github.com> Date: Wed, 18 Oct 2023 16:19:03 +0530 Subject: [PATCH 154/161] Update cluster-config.json.j2 Memory config changes --- .../data-products-deploy/templates/cluster-config.json.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index b208725ed0..806945a33d 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -78,8 +78,8 @@ "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", "spark.scheduler.mode" : "FAIR", - "spark.driver.memory" : "240g", - "spark.executor.memory" : "60g", + "spark.driver.memory" : "60g", + "spark.executor.memory" : "40g", "spark.driver.memoryOverhead" : "20g", "spark.executor.memoryOverhead" : "4g", "spark.serializer" : "org.apache.spark.serializer.KryoSerializer", From fa3d0616d9574cf0dbd570d4329ed437280606a7 Mon Sep 17 00:00:00 2001 From: manojkumarsah13 <127202629+manojkumarsah13@users.noreply.github.com> Date: Wed, 1 Nov 2023 17:24:35 +0530 Subject: [PATCH 155/161] Update collection-summary-ingestion-spec.j2 collection-summary-reports-v2 ingestion Spec updated --- .../templates/collection-summary-ingestion-spec.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/collection-summary-ingestion-spec.j2 b/ansible/roles/data-products-deploy/templates/collection-summary-ingestion-spec.j2 index f26c2e6447..d734ee3db2 100644 --- a/ansible/roles/data-products-deploy/templates/collection-summary-ingestion-spec.j2 +++ b/ansible/roles/data-products-deploy/templates/collection-summary-ingestion-spec.j2 @@ -240,7 +240,7 @@ ], {% elif (dp_object_store_type == "oci") %} "type": "static-s3", - "uris": [ "s3://{{reports_container}}/sourcing/SourcingSummaryReport.json"], + "uris": [ "s3://{{reports_container}}/collection-summary-reports-v2/collection-summary-report-latest.json"], {% endif %} "fetchTimeout": 300000 } @@ -253,4 +253,4 @@ "logParseExceptions": true } } -} \ No newline at end of file +} From d041265cc291c00091ffe688c09bfd33a31f0419 Mon Sep 17 00:00:00 2001 From: manojkumarsah13 <127202629+manojkumarsah13@users.noreply.github.com> Date: Tue, 14 Nov 2023 13:35:47 +0530 Subject: [PATCH 156/161] Update cluster-config.json.j2 - Heap memory settings changes Update cluster-config.json.j2 - Heap memory settings changes --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 806945a33d..65ca2ec4eb 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -88,6 +88,8 @@ "spark.yarn.scheduler.heartbeat.interval-ms" : "7200000", "spark.executor.heartbeatInterval" : "1800s", "spark.network.timeout" : "5400s", + "spark.executor.extraJavaOptions" : "-Xmx2G", + "spark.driver.extraJavaOptions" : "-Xmx2G", "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", From e329e2b3bee9ae9af1e4992ebd184445ddb9a163 Mon Sep 17 00:00:00 2001 From: shubham72a <141618807+shubham72a@users.noreply.github.com> Date: Tue, 14 Nov 2023 15:05:00 +0530 Subject: [PATCH 157/161] Update cluster-config.json.j2 increase buffer size --- .../data-products-deploy/templates/cluster-config.json.j2 | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 65ca2ec4eb..40551084ab 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -87,14 +87,12 @@ "spark.dynamicAllocation.enabled" : "true", "spark.yarn.scheduler.heartbeat.interval-ms" : "7200000", "spark.executor.heartbeatInterval" : "1800s", - "spark.network.timeout" : "5400s", - "spark.executor.extraJavaOptions" : "-Xmx2G", - "spark.driver.extraJavaOptions" : "-Xmx2G", + "spark.network.timeout" : "5400s", "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}, -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }} -Dcom.amazonaws.sdk.s3.defaultStreamBufferSize=512m", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}} -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }} -Dcom.amazonaws.sdk.s3.defaultStreamBufferSize=512m" + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}, -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }} -Dcom.amazonaws.sdk.s3.defaultStreamBufferSize=2048m -Xmx2g", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}} -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }} -Dcom.amazonaws.sdk.s3.defaultStreamBufferSize=2048m -Xmx2g" } } {% endif %} From a8935376cc947b721f64ee223d97f62ccc33450c Mon Sep 17 00:00:00 2001 From: shubham72a <141618807+shubham72a@users.noreply.github.com> Date: Tue, 14 Nov 2023 15:37:50 +0530 Subject: [PATCH 158/161] Update cluster-config.json.j2-increase heap memory to 4G --- .../data-products-deploy/templates/cluster-config.json.j2 | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 40551084ab..d6b6e18bb9 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -87,12 +87,14 @@ "spark.dynamicAllocation.enabled" : "true", "spark.yarn.scheduler.heartbeat.interval-ms" : "7200000", "spark.executor.heartbeatInterval" : "1800s", - "spark.network.timeout" : "5400s", + "spark.network.timeout" : "5400s", + "spark.executor.extraJavaOptions" : "-Xmx4g", + "spark.driver.extraJavaOptions" : "-Xmx4g", "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}, -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }} -Dcom.amazonaws.sdk.s3.defaultStreamBufferSize=2048m -Xmx2g", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}} -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }} -Dcom.amazonaws.sdk.s3.defaultStreamBufferSize=2048m -Xmx2g" + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}, -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }} -Dcom.amazonaws.sdk.s3.defaultStreamBufferSize=2048m", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}} -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }} -Dcom.amazonaws.sdk.s3.defaultStreamBufferSize=2048m" } } {% endif %} From 88e384098efd4733baa74d2adc2e838c19697142 Mon Sep 17 00:00:00 2001 From: shubham72a <141618807+shubham72a@users.noreply.github.com> Date: Wed, 15 Nov 2023 14:53:00 +0530 Subject: [PATCH 159/161] Update jets3t.j2-increas buffer size to 10 --- ansible/roles/data-products-deploy/templates/jets3t.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/jets3t.j2 b/ansible/roles/data-products-deploy/templates/jets3t.j2 index 1ca346578c..0ff6e2cea5 100644 --- a/ansible/roles/data-products-deploy/templates/jets3t.j2 +++ b/ansible/roles/data-products-deploy/templates/jets3t.j2 @@ -4,5 +4,5 @@ s3service.https-only=true {% if jets3t_s3_default_bucket_location %} s3service.default-bucket-location={{ jets3t_s3_default_bucket_location }} {% endif %} -uploads.stream-retry-buffer-size=2147483646 +uploads.stream-retry-buffer-size=10147483646 s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %} From 942ffbb673b8d7a81618b07e728322de680d664c Mon Sep 17 00:00:00 2001 From: shubham72a <141618807+shubham72a@users.noreply.github.com> Date: Wed, 15 Nov 2023 16:06:19 +0530 Subject: [PATCH 160/161] Update jets3t.j2-set buffer-size to 8gb --- ansible/roles/data-products-deploy/templates/jets3t.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/jets3t.j2 b/ansible/roles/data-products-deploy/templates/jets3t.j2 index 0ff6e2cea5..29e6a09d85 100644 --- a/ansible/roles/data-products-deploy/templates/jets3t.j2 +++ b/ansible/roles/data-products-deploy/templates/jets3t.j2 @@ -4,5 +4,5 @@ s3service.https-only=true {% if jets3t_s3_default_bucket_location %} s3service.default-bucket-location={{ jets3t_s3_default_bucket_location }} {% endif %} -uploads.stream-retry-buffer-size=10147483646 +uploads.stream-retry-buffer-size=8589934592 s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %} From faca6464c5dda5b2a4c1a114f193b892436fc5de Mon Sep 17 00:00:00 2001 From: shubham72a <141618807+shubham72a@users.noreply.github.com> Date: Wed, 15 Nov 2023 16:34:51 +0530 Subject: [PATCH 161/161] Update jets3t.j2-revert buffer size --- ansible/roles/data-products-deploy/templates/jets3t.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/jets3t.j2 b/ansible/roles/data-products-deploy/templates/jets3t.j2 index 29e6a09d85..1ca346578c 100644 --- a/ansible/roles/data-products-deploy/templates/jets3t.j2 +++ b/ansible/roles/data-products-deploy/templates/jets3t.j2 @@ -4,5 +4,5 @@ s3service.https-only=true {% if jets3t_s3_default_bucket_location %} s3service.default-bucket-location={{ jets3t_s3_default_bucket_location }} {% endif %} -uploads.stream-retry-buffer-size=8589934592 +uploads.stream-retry-buffer-size=2147483646 s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %}