diff --git a/.travis.yml b/.travis.yml index 3a4fe6e..c617b98 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,11 +5,14 @@ node_js: # Build the app and a docker image script: - npm run build -- ./scripts/docker-build.sh +- docker build -t ${TRAVIS_COMMIT} --build-arg KILLRVIDEO_YOUTUBE_API_KEY=$KILLRVIDEO_YOUTUBE_API_KEY . # If successful, see if we need to publish also after_success: -- ./scripts/travis-publish.sh +- test -z $TRAVIS_TAG && travis_terminate 0 +- docker tag ${TRAVIS_COMMIT} killrvideo/killrvideo-generator:${TRAVIS_TAG} +- docker login -u $DOCKER_USER -p $DOCKER_PASS +- docker push killrvideo/killrvideo-generator:${TRAVIS_TAG} # Sudo required for doing docker build sudo: required diff --git a/.vscode/launch.json b/.vscode/launch.json index 5e81686..6c4f438 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,6 +1,17 @@ { "version": "0.2.0", "configurations": [ + { + "name": "Docker: Attach to node", + "type": "node", + "request": "attach", + "port": 5858, + "address": "localhost", + "restart": true, + "sourceMaps": true, + "localRoot": "${workspaceRoot}", + "remoteRoot": "/opt/killrvideo-generator" + }, { "name": "Launch", "type": "node", @@ -23,18 +34,6 @@ "console": "integratedTerminal", "sourceMaps": true, "outFiles": ["${workspaceRoot}/dist/*"] - }, - { - "name": "Attach", - "type": "node", - "request": "attach", - "port": 5858, - "address": "localhost", - "restart": true, - "sourceMaps": true, - "outFiles": ["${workspaceRoot}/dist/*"], - "localRoot": "${workspaceRoot}", - "remoteRoot": null } ] } \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 92a06cc..9bf8986 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM node:6.11-slim # Add killrvideo group and user RUN groupadd -r killrvideo --gid=999 \ - && useradd -r -g killrvideo --uid=999 killrvideo + && useradd -m -r -g killrvideo --uid=999 killrvideo # Default to production environment ENV NODE_ENV production @@ -19,6 +19,7 @@ COPY npm-shrinkwrap.json /opt/killrvideo-generator/ # Add dependencies for node-gyp, then run npm install and remove dependencies RUN set -x \ + && printf "deb http://archive.debian.org/debian/ jessie main\ndeb-src http://archive.debian.org/debian/ jessie main\ndeb http://security.debian.org jessie/updates main\ndeb-src http://security.debian.org jessie/updates main" > /etc/apt/sources.list \ && apt-get update \ && apt-get install -y python \ make \ diff --git a/README.md b/README.md index b7c333f..d176397 100644 --- a/README.md +++ b/README.md @@ -11,23 +11,20 @@ This app is packaged and distributed as a Docker container. The typical usage is ## Setting up a Development Environment -After cloning the repo, first install all dependencies: +After cloning the repo, first install all dependencies and build the project: ``` -> npm install +> docker-compose run --no-deps -e NODE_ENV=development generator npm install +> docker-compose run --no-deps generator npm run build ``` -All environment dependencies can be spun up using `docker-compose` (i.e. Etcd and DataStax -Enterprise). First you need to generate a `.env` file that contains information about your -Docker environment. -In Windows, from a Powershell command prompt run: -``` -PS> .\lib\killrvideo-docker-common\create-environment.ps1 +If you have npm available locally, you may use it directly instead: ``` -Or on Mac/Linux, run: -``` -> ./lib/killrvideo-docker-common/create-environment.sh +> npm install +> npm run build ``` -You can then start those dependencies with: + +All environment dependencies can be spun up using `docker-compose` (i.e. Etcd and DataStax +Enterprise). You can start those dependencies with: ``` > docker-compose up -d ``` @@ -55,6 +52,10 @@ in `/dist`. If using VS Code for development, the tasks checked into the repo under `/.vscode` should allow you to start the program with debugging using `F5`. +By default docker-compose runs generator with debugger enabled and opens port 5858. You can use this to attach to the launched application to debug it. + +If you would like to use DataStax Studio to work directly with the database, please uncomment studio definition in ./docker-compose.yaml + ## Releasing The app is released as a Docker image for use with the service project implementations. @@ -69,4 +70,27 @@ Docker image. We use Travis CI for doing continuous integration builds and it will use those scripts to automatically publish any tagged Git commits to Docker Hub. You can, of course, manually -build and publish Docker images with those scripts as well. \ No newline at end of file +build and publish Docker images with those scripts as well. + +## Known Issues + +### Error: Bad Request + +Generator repeatedly logs error: + +``` +generator_1 | 2019-01-18T13:37:35.543Z - error: Error: Bad Request +generator_1 | at Request._callback (/opt/killrvideo-generator/node_modules/google-auth-library/lib/transporters.js:85:15) +``` + +The issue is ussually caused by unset youTubeApiKey. Check if you have set youTubeApiKey in config/local.yaml file and if it's still valid. + +### Could not initialize Cassandra + +``` +generator_1 | 2019-01-18T13:46:21.015Z - debug: NoHostAvailableError: All host(s) tried for query failed. First host tried, 172.26.0.2:9042: Error: connect ECONNREFUSED 172.26.0.2:9042. See innerErrors. +... +generator_1 | 2019-01-18T13:46:21.018Z - verbose: Could not initialize Cassandra. Retry 1 in 10000ms. +``` + +Start up of DSE/Cassandra takes noticeable time, so this error happens every time if you launch generator and DSE at the same time. Usually it can be ignored, but if connection takes too long time, please check if dse container running and check its logs. diff --git a/config/default.yaml b/config/default.yaml index 6272f50..10a907e 100644 --- a/config/default.yaml +++ b/config/default.yaml @@ -1,5 +1,5 @@ # Key required for calling YouTube APIs -youTubeApiKey: REPLACE_WITH_YOUR_KEY +youTubeApiKey: REPLACE_WITH_YOUR_KEY_IN config/local.yaml # Cassandra settings cassandra: @@ -25,10 +25,23 @@ schedules: - every 20 seconds addSampleUser: - - every 30 minutes + - every 10 minutes addSampleVideo: - - every 8 hours + - every 10 minutes addSampleVideoView: - - every 1 seconds \ No newline at end of file + - every 1 seconds + +services: + web: ['web:3000'] + cassandra: ['dse:9042'] + dse-search: ['dse:8983'] + UploadsService: ['backend:50101'] + RatingsService: ['backend:50101'] + CommentsService: ['backend:50101'] + SearchService: ['backend:50101'] + StatisticsService: ['backend:50101'] + VideoCatalogService: ['backend:50101'] + UserManagementService: ['backend:50101'] + SuggestedVideoService: ['backend:50101'] diff --git a/docker-compose.yaml b/docker-compose.yaml index c1b2b84..1c7a4aa 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,3 +1,62 @@ -# No services specific to this project, instead just uses base services defined -# in .\lib\killrvideo-docker-common\docker-compose.yaml -version: '2' \ No newline at end of file +version: '3' +services: + # The KillrVideo Sample Data Generator + generator: + build: . + #image: killrvideo/killrvideo-generator + volumes: + - .:/opt/killrvideo-generator + ports: + - "5858:5858" + depends_on: + - dse + - backend + environment: + KILLRVIDEO_LOGGING_LEVEL: debug + command: "node --debug /opt/killrvideo-generator/dist/index.js" + + # DataStax Enterprise + # start with search and graph modes enabled ("-s -g") + dse: + image: datastax/dse-server:6.0.0 + command: [ -s -g ] + ports: + - "9042:9042" + - "8983:8983" + - "8182:8182" + environment: + DS_LICENSE: accept + # Allow DSE to lock memory with mlock + cap_add: + - IPC_LOCK + ulimits: + memlock: -1 + + # Container to load KillrVideo schema and search config into DSE + # Provides options to configure secure users as well + # Runs only once and exits + dse-config: + image: killrvideo/killrvideo-dse-config:2.2.1 + depends_on: + - dse + environment: + KILLRVIDEO_SERVICE_DISCOVERY_DISABLED: 'true' + + # The KillrVideo backend application used by the generator to submit sample data + backend: + image: hadesarchitect/killrvideo-nodejs:no-etcd + ports: + - "50101:50101" + depends_on: + - dse + environment: + KILLRVIDEO_LOGGING_LEVEL: debug + +# studio: +# image: killrvideo/killrvideo-studio:2.0.0 +# ports: +# - "9091:9091" +# depends_on: +# - dse +# environment: +# DS_LICENSE: accept diff --git a/lib/killrvideo-docker-common/.gitignore b/lib/killrvideo-docker-common/.gitignore deleted file mode 100644 index 445ea2d..0000000 --- a/lib/killrvideo-docker-common/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -.env -.idea -*/.idea/* - diff --git a/lib/killrvideo-docker-common/LICENSE b/lib/killrvideo-docker-common/LICENSE deleted file mode 100644 index 021eb6b..0000000 --- a/lib/killrvideo-docker-common/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2016 Luke Tillman - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/lib/killrvideo-docker-common/README.md b/lib/killrvideo-docker-common/README.md deleted file mode 100644 index 7b4d165..0000000 --- a/lib/killrvideo-docker-common/README.md +++ /dev/null @@ -1 +0,0 @@ -# KillrVideo Common Docker Environment Setup diff --git a/lib/killrvideo-docker-common/config-secure/dse.yaml b/lib/killrvideo-docker-common/config-secure/dse.yaml deleted file mode 100644 index 362216d..0000000 --- a/lib/killrvideo-docker-common/config-secure/dse.yaml +++ /dev/null @@ -1,1183 +0,0 @@ -# Memory limit for DSE In-Memory tables as a fraction of system memory (the default is 0.2, or 20%) -max_memory_to_lock_fraction: 0.20 - -# # Can also be specified as a maximum in MB; the fraction value is ignored if this is set to a non-zero value. -# max_memory_to_lock_mb: 10240 - -########################## -# Authentication options -# -# These options are used if the authenticator option in cassandra.yaml is set to -# com.datastax.bdp.cassandra.auth.DseAuthenticator -# -# The enabled option controls whether the DseAuthenticator will authenticate users. If -# set to true users will be authenticated, if set to false they will not. -# -# DseAuthenticator allows multiple authentication schemes to be used at the same time. -# The schemes to be used are controlled by the default_scheme and allowed_schemes options. -# A driver can select the scheme to use during authentication. -# -# The default_scheme option selects which authentication scheme will be used if the driver -# does not request a specific scheme. This can be one of the following values: -# internal - plain text authentication using the Cassandra password authenticator -# ldap - plain text authentication using the passthrough ldap authenticator -# kerberos - gssapi authentication using the kerberos authenticator -# The other_schemes option is a list of schemes that can also be selected for use by a -# driver and can be a list of the above schemes. -# -# The scheme_permissions option controls whether roles need to have permission granted to -# them in order to use specific authentication schemes. These permissions can be granted -# only when the DseAuthorizer is used. -# -# The allow_digest_with_kerberos option controls whether digest-md5 authentication is also -# allowed when kerberos is one of the authentication schemes. If set to false, it will not -# be allowed. You must set allow_digest_with_kerberos to true in analytics clusters to use Hadoop -# inter-node authentication with Hadoop and Spark jobs. -# -# The plain_text_without_ssl controls how the DseAuthenticator reacts to plain text -# authentication requests over unencrypted client connections. It can be one of: -# block - block the request with an authentication error -# warn - log a warning about the request but allow it to continue -# allow - allow the request without any warning -# -# The transitional_mode option allows the DseAuthenticator to operate in a transitional -# mode during setup of authentication in a cluster. This can be one of the following values: -# disabled - transitional mode is disabled -# permissive - Only super users are authenticated and logged in, all other -# authentication attempts will be logged in as the anonymous user -# normal - If credentials are passed they are authenticated. If the -# authentication is successful then the user is logged in, otherwise -# the user is logged in as anonymous. If no credentials are passed, -# then the user is logged in as anonymous -# strict - If credentials are passed they are authenticated. If the -# authentication is successful, the user is logged in. If the -# authentication fails, an authentication error is returned. If no -# credentials are passed, the user is logged in as anonymous -authentication_options: - enabled: true -# default_scheme: internal -# allow_digest_with_kerberos: true -# plain_text_without_ssl: warn -# transitional_mode: default -# other_schemes: -# scheme_permissions: false - -########################## -# Role Management Options -# -# These options are used when the role_manager option in cassandra.yaml is set to -# com.datastax.bdp.cassandra.auth.DseRoleManager -# -# mode can be one of: -# internal - the granting and revoking of roles is managed internally -# using the GRANT ROLE and REVOKE ROLE statements -# ldap - the granting and revoking of roles is managed by an external -# LDAP server configured using the ldap_options. -role_management_options: - mode: internal - -########################## -# Authorization options -# -# The enabled option controls whether the DseAuthorizer will perform authorization. If -# set to true authorization is performed, if set to false it is not. -# -# The transitional_mode option allows the DseAuthorizer to operate in a transitional -# mode during setup of authorization in a cluster. This can be one of the following values: -# disabled - transitional mode is disabled -# normal - permissions can be granted to resources but are not enforced -# strict - permissions can be granted to resources and are enforced on -# authenticated users. They are not enforced against anonymous -# users -# allow_row_level_security - In order for row level security to be used, this must be set to allow it -# for the entire system. true or false -authorization_options: - enabled: true -# transitional_mode: disabled -# allow_row_level_security: false - -########################## -# Kerberos options -# -# The qop is the Quality of Protection (QOP) values that clients and servers -# can use for each connection. Below is a list of valid values and their meanings. -# auth - (default) authentication only -# auth-int - authentication plus integity protection of all transmitted data -# auth-conf - authentication plus integrity protection and encryption of all -# transmitted data -# -# Warning - Encryption using auth-conf is separate and completely independent -# of whether encryption is done using SSL. If auth-conf is selected here -# and SSL is enabled, the transmitted data is encrypted twice. -kerberos_options: - keytab: resources/dse/conf/dse.keytab - service_principal: dse/_HOST@REALM - http_principal: HTTP/_HOST@REALM - qop: auth - -########################## -# LDAP options -# -# These are options are used when the com.datastax.bdp.cassandra.auth.LdapAuthenticator -# is configured as the authenticator in cassandra.yaml - -# ldap_options: -# server_host: -# -# # Port to use to connect to the LDAP server. This is normally 389 for unencrypted -# # connections and 636 for ssl encrypted connections. If use_tls is set to true, use the -# # unencrypted port -# server_port: 389 -# -# # The distinguished name (DN) of the user that is used to search for other users on the -# # LDAP server. This user should have only the necessary permissions to do the search -# # If not present then an anonymous bind is used for the search -# search_dn: -# -# # Password of the search user -# search_password: -# -# # Set to true to use an SSL encrypted connection. In this case the server_port needs -# # to be set to the LDAP port for the server -# use_ssl: false -# -# # Set to true to initiate a TLS encrypted connection on the default ldap port -# use_tls: false -# -# truststore_path: -# truststore_password: -# truststore_type: jks -# user_search_base: -# user_search_filter: (uid={0}) -# -# # Set to the attribute on the user entry containing group membership information. -# user_memberof_attribute: memberof -# -# # The group_search_type defines how group membership will be determined for a user. It -# # can be one of: -# # directory_search - will do a subtree search of group_search_base using -# # group_search_filter to filter the results -# # memberof_search - will get groups from the memberof attribute of the user. This -# # requires the directory server to have memberof support -# group_search_type: directory_search -# group_search_base: -# group_search_filter: (uniquemember={0}) -# -# # The attribute in the group entry that holds the group name. -# group_name_attribute: cn -# -# # Validity period for the credentials cache in milli-seconds (remote bind is an expensive -# # operation). Defaults to 0, set to 0 to disable. -# credentials_validity_in_ms: 0 -# -# # Validity period for the search cache in seconds. Defaults to 0, set to 0 to disable. -# search_validity_in_seconds: 0 -# -# connection_pool: -# max_active: 8 -# max_idle: 8 - -# To ensure that records with TTLs are purged from DSE Search indexes when they expire, DSE -# periodically checks all indexes for expired documents and deletes them. These settings -# control the scheduling and execution of those checks. -ttl_index_rebuild_options: - - # By default, schedule a check every 300 seconds: - fixed_rate_period: 300 - - # The first check is delayed to speed up startup time: - initial_delay: 20 - - # All documents determined to be expired are deleted from the index during each check, but - # to avoid memory pressure, their unique keys are retrieved and deletes issued in batches. - # This determines the maximum number of documents per batch: - max_docs_per_batch: 4096 - - # Maximum number of search indexes that can execute TTL cleanup concurrently: - thread_pool_size: 1 - -# DSE Search resource upload size limit in MB. A value of '0' disables resource uploading. -solr_resource_upload_limit_mb: 10 - -# Transport options for inter-node communication between DSE Search nodes. -shard_transport_options: - # The cumulative shard request timeout, in milliseconds (default is 60000). - netty_client_request_timeout: 60000 - -# ---- DSE Search index encryption options - -solr_encryption_options: -# # Whether to allocate shared index decryption cache off JVM heap. -# # Default is off heap allocation (true). -# decryption_cache_offheap_allocation: true - -# # The maximum size of shared DSE Search decryption cache, in MB. -# # Default is 256 MB. -# decryption_cache_size_in_mb: 256 - -# ---- DSE Search indexing settings - -# # Max number of concurrent asynchronous indexing threads per Solr core. If set -# # to 1, the system reverts to the synchronous behavior, where data is -# # synchronously written into Cassandra and indexed by Solr. -# # -# # Default: On most Linux distributions, the number of physical CPU cores (even if those cores -# # have multiple threads). On other platforms, this defaults to the number of logical -# # CPU cores visible to the JVM. If the system property "cassandra.available_processors" -# # is set, the default here will be that value divided by the number of threads per CPU core. -# max_solr_concurrency_per_core: 2 -# -# # Allows back pressure system to adapt max auto soft commit time (defined per core in solrconfig.xml) to the actual load. -# # Setting is respected only for NRT (near real time) cores. When core has RT (real time) enabled, adaptive commits -# # are disabled regardless of this property value. -# # -# # Default: enabled (true) -# enable_back_pressure_adaptive_nrt_commit: true -# -# # The back pressure threshold is the target total number of queued asynchronous indexing requests per core; -# # the back pressure mechanism will throttle incoming requests to keep the queue size as close to the threshold as possible. -# # -# # Default: 1000 * max_solr_concurrency_per_core -# back_pressure_threshold_per_core: 2000 -# -# # The max time to wait for flushing of async index updates, happening either -# # at Solr commit time or Cassandra flush time. -# # Flushing should always complete successfully, in order to fully sync Solr indexes -# # with Cassandra data, so should always be set at a reasonable high value. -# # -# # Default: 5 minutes -# flush_max_time_per_core: 5 -# -# # The max time to wait for each Solr core to load upon startup or create/reload operations. -# # This is an advanced option, which should be changed only if any exceptions happen during core loading. -# # -# # Default: 5 minutes -# load_max_time_per_core: 5 -# -# # Applies the configured Cassandra disk failure policy to index write failures. -# # Default is disabled (false). -# enable_index_disk_failure_policy: false - -# # The directory to store index data; each DSE Search index will be stored under -# # a solrconfig_data_dir/keyspace.table directory. -# # Default is a solr.data directory inside Cassandra data directory, or as specified -# # by the dse.solr.data.dir system property -# solr_data_dir: /MyDir - -# # The Lucene field cache has been deprecated, instead set docValues="true" on the field -# # in the schema.xml file. After doing so RELOAD the core and reindex. -# # Default: false -# solr_field_cache_enabled: false - -# ---- Solr CQL query options - -# # Max number of threads to use for retrieving rows during CQL Solr queries. -# # This value is cross-request and cross-core. -# # Default is "number of available processors" * 10. -# cql_solr_query_executor_threads: 2 -# -# # Max time in milliseconds to wait for either each row (pre-5.0) or all rows (5.0 onwards) -# # to be read from Cassandra during CQL Solr queries. -# # Default is 10000 (10 seconds). -# cql_solr_query_row_timeout: 10000 - -########################## -# Global performance service options - -# # Maximum number of background threads used by the performance service. -# # Defaults to concurrent_writes specified in cassandra.yaml. -# performance_max_threads: 32 -# -# # The number of queued tasks in the backlog when the number of performance_max_threads are busy (minimum 0). -# performance_queue_capacity: 32000 -# -# # If the performance service requests more tasks than (performance_max_threads + performance_queue_capacity), -# # a dropped task warning will be issued. This indicates that collected statistics may not be up to date because the -# # server couldn't keep up under the current load. -# -# # You may either disable or reconfigure some services, or increase the queue size. - -########################## -# Core performance service options - -graph_events: - ttl_seconds: 600 - -# cql_slow_log_options: -# enabled: true -# -# # When t > 1, log queries taking longer than t milliseconds. -# # 0 <= t <= 1, log queries above t percentile -# threshold: 200.0 -# -# # Initial number of queries before percentile filter becomes active -# minimum_samples: 100 -# -# ttl_seconds: 259200 -# -# # keeps slow queries in-memory only and doesn't write data to C* -# # WARNING - if this is set to 'false' then set threshold >= 2000, otherwise there will be a high load on C* -# skip_writing_to_db: true -# -# # the number of slow queries to keep in-memory -# num_slowest_queries: 5 - -cql_system_info_options: - enabled: false - refresh_rate_ms: 10000 - -resource_level_latency_tracking_options: - enabled: false - refresh_rate_ms: 10000 - -db_summary_stats_options: - enabled: false - refresh_rate_ms: 10000 - -cluster_summary_stats_options: - enabled: false - refresh_rate_ms: 10000 - -spark_cluster_info_options: - enabled: false - refresh_rate_ms: 10000 - -# ---- Spark application stats options -spark_application_info_options: - enabled: false - refresh_rate_ms: 10000 - - driver: - # enables or disables writing of the metrics collected at Spark Driver to Cassandra - sink: false - - # enables or disables Spark Cassandra Connector metrics at Spark Driver - connectorSource: false - - # enables or disables JVM heap and GC metrics at Spark Driver - jvmSource: false - - # enables or disables application state metrics - stateSource: false - - executor: - # enables or disables writing of the metrics collected at executors to Cassandra - sink: false - - # enables or disables Spark Cassandra Connector metrics at executors - connectorSource: false - - # enables or disables JVM heap and GC metrics at executors - jvmSource: false - -# Column Family Histogram data tables options -histogram_data_options: - enabled: false - refresh_rate_ms: 10000 - retention_count: 3 - -# User/Resource latency tracking settings -user_level_latency_tracking_options: - enabled: false - refresh_rate_ms: 10000 - top_stats_limit: 100 - quantiles: false - -# ---- DSE Search Performance Objects - -solr_indexing_error_log_options: - enabled: false - ttl_seconds: 604800 - async_writers: 1 - -solr_slow_sub_query_log_options: - enabled: false - ttl_seconds: 604800 - async_writers: 1 - threshold_ms: 3000 - -solr_update_handler_metrics_options: - enabled: false - ttl_seconds: 604800 - refresh_rate_ms: 60000 - -solr_request_handler_metrics_options: - enabled: false - ttl_seconds: 604800 - refresh_rate_ms: 60000 - -solr_index_stats_options: - enabled: false - ttl_seconds: 604800 - refresh_rate_ms: 60000 - -solr_cache_stats_options: - enabled: false - ttl_seconds: 604800 - refresh_rate_ms: 60000 - -solr_latency_snapshot_options: - enabled: false - ttl_seconds: 604800 - refresh_rate_ms: 60000 - -# Node health is a score-based representation of how fit a node is to handle queries. The score is a -# function of how long a node has been up and the rate of dropped mutations in the recent past. -node_health_options: - refresh_rate_ms: 60000 - # The amount of continuous uptime required for the node to reach the maximum uptime score. If you - # are concerned with consistency during repair after a period of downtime, you may want to - # temporarily increase this to the expected time it will take to complete repair. - # - # (Default: 86400 seconds, or 1 day) - uptime_ramp_up_period_seconds: 86400 - # The window in the past over which the rate of dropped mutations affects the node health score. - # (Default: 30 minutes) - dropped_mutation_window_minutes: 30 - -# If enabled (true), replica selection for distributed DSE Search queries takes node health into account -# when multiple candidates exist for a particular token range. Set this to false to ignore -# node health when choosing replicas. -# -# Health-based routing allows us to make a trade-off between index consistency and query throughput. If -# the primary concern is query performance, it may make sense to set this to "false". -# -# Default is enabled (true). -enable_health_based_routing: true - -# If enabled (true), DSE Search reindexing of bootstrapped data will happen asynchronously, and the node will join the ring straight -# after bootstrap. -# -# Default is disabled (false): the node will wait for reindexing of bootstrapped data to finish before joining the ring. -async_bootstrap_reindex: false - -# Lease metrics. Enable these to help monitor the performance of the lease subsystem. -# ttl_seconds controls how long the log of lease holder changes persists. -lease_metrics_options: - enabled: false - ttl_seconds: 604800 - -# The directory where system keys are kept -# -# Keys used for sstable encryption must be distributed to all nodes -# DSE must be able to read and write to the directory. -# -# This directory should have 700 permissions and belong to the dse user -system_key_directory: /etc/dse/conf - -# If this is set to true, DSE will expect the following config values to be encrypted: -# resources/cassandra/conf/cassandra.yaml: -# server_encryption_options.keystore_password -# server_encryption_options.truststore_password -# client_encryption_options.keystore_password -# client_encryption_options.truststore_password -# resources/dse/conf/dse.yaml: -# ldap_options.search_password -# ldap_options.truststore_password -# -# It's an error if the passwords aren't encrypted. -# Config values can be encrypted with "dsetool encryptconfigvalue" -config_encryption_active: false - -# The name of the system key used to encrypt / decrypt passwords stored -# in configuration files. -# -# If config_encryption_active is true, it's an error if a valid key with -# this name isn't in the system key directory keyfiles, and KMIP managed -# keys can be created with "dsetool createsystemkey" -config_encryption_key_name: system_key - -########################## -# Spark-related settings - -# The fraction of available system resources to be used by Spark Worker. -# This the only initial value, once it is reconfigured, the new value is stored -# and retrieved on next run. -initial_spark_worker_resources: 0.7 - -# The length of a shared secret used to authenticate Spark components and encrypt the connections between them. -# Note that this is not the strength of the cipher used for encrypting connections. -spark_shared_secret_bit_length: 256 - -# Enables Spark security based on shared secret infrastructure. This means enabling mutual authentication of -# the Spark components as well as optionally encryption of communication channels except Web UI. -spark_security_enabled: false - -# Enables encryption on Spark connections except Web UI. It uses Digest-MD5 SASL based encryption mechanism. -# This options does make sense only if spark_security_enabled is true. -spark_security_encryption_enabled: false - -# # How often Spark plugin should check for Spark Master / Worker readiness to start. The value is -# # a time (in ms) between subsequent retries. -# spark_daemon_readiness_assertion_interval: 1000 - -# Beginning in DSE 5.1: Communication between Spark applications and the resource manager are now routed through -# the CQL native protocol. Enabling client encryption in the cassandra.yaml will also enable encryption for -# the communication with the DSE Spark Master. The communication between Spark Driver and Spark Executors can be -# secured by enabling Spark authentication and encryption for that application. -# On the other hand, mutual authentication and encryption of communication between DSE Spark Master and Workers are -# managed by spark_security_enabled and spark_security_encryption_enabled defined above. - -# Spark UI options apply to Spark Master and Spark Worker UIs - thus Spark daemon UIs in general. They do NOT apply to -# user applications even if they run in cluster mode. -spark_ui_options: - # Allowed values are: - # inherit - SSL settings are inherited from Cassandra client encryption options - # custom - SSL settings from encryption_options below - encryption: inherit - - encryption_options: - enabled: false - keystore: resources/dse/conf/.ui-keystore - keystore_password: cassandra - # require_client_auth: false - # Set trustore and truststore_password if require_client_auth is true - # truststore: resources/dse/conf/.ui-truststore - # truststore_password: cassandra - # More advanced defaults below: - # protocol: TLS - # algorithm: SunX509 - # store_type: JKS - # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] - -# Configure the way how the driver and executor processes are created and managed. -spark_process_runner: - # Allowed options are: default, run_as - runner_type: default - - # RunAs runner uses sudo to start Spark drivers and executors. A set of predefined fake users, called slots, is used - # for this purpose. All drivers and executors owned by some DSE user are run as some slot user x. At the same time - # drivers and executors of any other DSE user use different slots. - # Setting up slots: - # 1. create n users (n - number of slots), call them, say slot1, slot2, ..., slotn, with no login; each such user - # should have primary group the same as its name, so for example slot1:slot1, slot2:slot2, ... - # 2. add DSE service user (the one as who DSE server is run) to the slot user groups - DSE service user has to be - # in all slot user groups - # 3. modify sudoers files so that: - # a) DSE service user can execute any command as any slot user without providing a password - # b) umask is overridden to 007 for those commands so that files created by sub-processes will not be accessible - # by anyone by default, - # For example, if we have two slot users slot1, slot2, and DSE service user dse, this should be added to sudoers: - # Runas_Alias SLOTS = slot1, slot2 - # Defaults>SLOTS umask=007 - # Defaults>SLOTS umask_override - # dse ALL=(SLOTS) NOPASSWD: ALL - run_as_runner_options: - user_slots: - - slot1 - - slot2 - -########################## -# DSE File System options -# dsefs_options: -# -# # Determines whether DSEFS should be enabled on this node. -# # If not present, DSEFS is enabled only on the nodes that run Spark workload. -# enabled: -# -# # Keyspace for storing the DSE FS metadata -# keyspace_name: dsefs -# -# # The local directory for storing node-local metadata e.g. the node identifier -# # The amount of data stored there is tiny, so no special throughput, latency nor capacity are required. -# # The work directory must not be shared by DSE FS nodes. -# work_dir: /var/lib/dsefs -# -# # Port for DSE FS clients, the service on this port will be bound to RPC address. -# public_port: 5598 -# -# # Port for internode communication, must be not visible from outside of the cluster. -# # It will be bound to listen address. -# private_port: 5599 -# -# # Set of directories for storing the file data. 'dir' attribute is mandatory. -# # It is recommended to put them on different physical devices than devices used for Cassandra. -# # Using multiple directories on JBOD improves performance and capacity. -# data_directories: -# - dir: /var/lib/dsefs/data -# -# # How much data should be placed in this directory relative to other directories in the cluster -# storage_weight: 1.0 -# -# # Reserved space (in bytes) that is not going to be used for storing blocks -# min_free_space: 5368709120 -# -# # More advanced settings below: -# -# # How long DseFs Server is going to wait for services to bootstrap -# service_startup_timeout_ms: 30000 -# -# # How long DseFs Server is going to wait for services to close -# service_close_timeout_ms: 600000 -# -# # How long DseFs Server is going to wait close all pending connections during shutdown -# server_close_timeout_ms: 2147483647 # Integer.MAX_VALUE -# -# # The maximum accepted size of a compression frame (compression frame size is specified by a user during file -# # upload) -# compression_frame_max_size: 1048576 -# -# # Maximum number of elements in single DseFs Server query cache. DseFs reuses this value for every cache that -# # stores database query results. -# query_cache_size: 2048 -# -# # How long DseFs Server query cache element exists in cache. DseFs reuses this value for every cache that -# # stores database query results. -# query_cache_expire_after_ms: 2000 -# -# gossip_options: -# # The delay between gossip rounds -# round_delay_ms: 2000 -# -# # How long to wait after registering the Location and reading back all other Locations from Cassandra -# startup_delay_ms: 5000 -# -# # How long to wait after announcing shutdown before shutting down the node -# shutdown_delay_ms: 10000 -# -# rest_options: -# # How long RestClient is going to wait for a response corresponding to a given request -# request_timeout_ms: 330000 -# -# # How long RestClient is going to wait for establishing a new connection -# connection_open_timeout_ms: 55000 -# -# # How long RestClient is going to wait until all pending transfers are complete before closing -# client_close_timeout_ms: 60000 -# -# # How long to wait for the server rest call to complete -# server_request_timeout_ms: 300000 -# -# # How long to wait until idle connection is closed, 0 if disabled -# idle_connection_timeout_ms: 0 - -# transaction_options: -# # How long to allow a transaction to run before considering it for timing out and rollback -# transaction_timeout_ms: 60000 -# -# # How long to wait before retrying a transaction aborted due to a conflict -# conflict_retry_delay_ms: 10 -# -# # How many times the transaction is retried in case of a conflict before giving up -# conflict_retry_count: 40 -# -# # How long to wait before retrying a failed transaction payload execution -# execution_retry_delay_ms: 1000 -# -# # How many times to retry executing the payload before signaling the error to the application -# execution_retry_count: 3 -# -# block_allocator_options: -# # The overflow_margin_mb and overflow_factor options control how much additional data can be placed -# # on the local (coordinator) before the local node overflows to the other nodes. -# # A local node is preferred for a new block allocation, if -# # used_size_on_the_local_node < average_used_size_per_node * overflow_factor + overflow_margin. -# # The trade-off is between data locality of writes and balancing the cluster. -# # To disable the preference for allocating blocks on the coordinator node, set these values to 0 MB and 1.0. -# overflow_margin_mb: 1024 -# overflow_factor: 1.05 - -########################## -# Audit logging options -audit_logging_options: - enabled: false - - # The logger used for logging audit information - # Available loggers are: - # CassandraAuditWriter - logs audit info to a cassandra table. This logger can be run either synchronously, or - # asynchronously. Audit logs are stored in the dse_audit.audit_log table. - # When run synchronously, a query will not execute until it has been written - # to the audit log table successfully. If there is a failure between when an audit event is - # written, and it's query is executed, the audit logs may contain queries that were never - # executed. - # SLF4JAuditWriter - logs audit info to an slf4j logger. The logger name is `SLF4JAuditWriter`, - # and can be configured in the logback.xml file. - logger: SLF4JAuditWriter - -# # Comma separated list of audit event categories to be included or excluded from the audit log. -# # Defaults to including all categories and keyspaces. -# # Categories are: QUERY, DML, DDL, DCL, AUTH, ADMIN, ERROR -# # Specify either included or excluded categories. Specifying both is an error -# included_categories: -# excluded_categories: - -# # Comma separated list of keyspaces to be included or excluded from the audit log. -# # Specify either included or excluded keyspaces. Specifying both is an error -# included_keyspaces: -# excluded_keyspaces: - - # The amount of time, in hours, audit events are retained by supporting loggers - # Currently, only the CassandraAuditWriter supports retention time - # values of 0 or less retain events forever - retention_time: 0 - - cassandra_audit_writer_options: - # Sets the mode the writer runs in. - # - # When run synchronously, a query is not executed until the audit event is successfully written. - # - # When run asynchronously, audit events are queued for writing to the audit table, but are - # not necessarily logged before the query executes. A pool of writer threads consumes the - # audit events from the queue, and writes them to the audit table in batch queries. While - # this substantially improves performance under load, if there is a failure between when - # a query is executed, and it's audit event is written to the table, the audit table may - # be missing entries for queries that were executed. - # valid options are 'sync' and 'async' - mode: sync - - # The maximum number of events the writer will dequeue before writing them out to the table. - # If you're seeing warnings in your logs about batches being too large, decrease this value. - # Increasing batch_size_warn_threshold_in_kb in cassandra.yaml is also an option, but make sure you understand - # the implications before doing so. - # - # Only used in async mode. Must be >0 - batch_size: 50 - - # The maximum amount of time in milliseconds an event will be dequeued by a writer before being written out. This - # prevents events from waiting too long before being written to the table when there's not a lot of queries happening. - # - # Only used in async mode. Must be >0 - flush_time: 500 - - # The number of worker threads asynchronously logging events to the CassandraAuditWriter. - # - # Only used in async mode. Must be >0 - num_writers: 10 - - # The size of the queue feeding the asynchronous audit log writer threads. When there are more events being - # produced than the writers can write out, the queue will fill up, and newer queries will block until there - # is space on the queue. - # If a value of 0 is used, the queue size will be unbounded, which can lead to resource exhaustion under - # heavy query load. - queue_size: 10000 - - # the consistency level used to write audit events - write_consistency: QUORUM - -# # Where dropped events are logged -# dropped_event_log: /var/log/cassandra/dropped_audit_events.log - -# # Partition days into hours by default -# day_partition_millis: 3600000 - -########################## -# System information encryption settings -# -# If enabled, system tables that may contain sensitive information (system.batchlog, -# system.paxos), hints files and Cassandra commit logs are encrypted with the -# encryption settings below. -# -# If DSE Search index encryption is enabled, DSE Search index files are also encrypted with the settings below. -# If backing C* table encryption is enabled, DSE Search commit log is encrypted with settings below. -# -# When enabling system table encryption on a node with existing data, run -# `nodetool upgradesstables -a` on the listed tables to encrypt existing data -# -# When tracing is enabled, sensitive info will be written into the tables in the -# system_traces keyspace. Those tables should be configured to encrypt their data -# on disk by using an encrypting compressor. -# -# DataStax recommends using remote encryption keys from a KMIP server when using Transparent Data Encryption (TDE) features. -# Local key support is provided when a KMIP server is not available. -system_info_encryption: - enabled: false - cipher_algorithm: AES - secret_key_strength: 128 - chunk_length_kb: 64 - -# # Selects an alternate key provider for local encryption. Useful for using a kmip host as a key provider. -# key_provider: KmipKeyProviderFactory - -# # If KmipKeyProviderFactory is used for system_info_encryption, this specifies the kmip host to be used. -# kmip_host: kmip_host_name - -########################## -# Kmip hosts options -# -# Connection settings for key servers supporting the kmip protocol -# this allows DSE's encryption features to use keys that are not stored -# on the same machine running DSE. -# -# Hosts are configured as : {connection_settings}, which maps a user definable -# name to a set of hosts, truststores, etc used with a particular key server. This name is then -# used when referring to kmip hosts. DSE supports multiple kmip hosts. - -# kmip_hosts: -# # The unique name of this kmip host/cluster which is specified in the table schema. -# host.yourdomain.com: -# -# # Comma-separated list of kmip hosts host[:port] -# # The current implementation of KMIP connection management only supports failover, so all requests will -# # go through a single KMIP server. There is no load balancing. This is because there aren't any KMIP servers -# # available (that we've found) that support read replication, or other strategies for availability. -# # -# # Hosts are tried in the order they appear here. So add them in the same sequence they'll fail over in -# hosts: kmip1.yourdomain.com, kmip2.yourdomain.com -# -# # keystore/truststore info -# keystore_path: /path/to/keystore.jks -# keystore_type: jks -# keystore_password: password -# -# truststore_path: /path/to/truststore.jks, -# truststore_type: jks -# truststore_password: password -# -# # Keys read from the KMIP hosts are cached locally for the period of time specified below. -# # The longer keys are cached, the fewer requests are made to the key server, but the longer -# # it takes for changes (ie: revocation) to propagate to the DSE node -# key_cache_millis: 300000 -# -# # Socket timeout in milliseconds. -# timeout: 1000 - -# # When 'driver' DSE Search will use Solr cursor paging when pagination is enabled by the CQL driver. -# # -# # When 'off' DSE Search will ignore the driver's pagination settings and use normal Solr paging unless: -# # - The current workload is an analytics workload (ex. SearchAnalytics). -# # - The query parameter 'paging' is set to 'driver'. -# # -# # Default is 'off' -# # -# cql_solr_query_paging: off - -# Local settings for tiered storage -# -# Tiered supports multiple disk configurations, which are configured as : , and specified in DDL -# The tiers themselves are unnamed, and are just collections of paths, which need to be defined in the order they're to be used. -# Typically, you'd put your fastest storage in the top tier, and go down from there. -# -# Storage configurations don't need to be homogenous across the cluster, and internally, each node will only make use of the -# the number of tiers it actually has configured, or the number of tiers configured to be used in the DDL, whichever is less. -# -# Although the behavior of the tiered strategy for a given table is configured in the DDL, these settings can -# be overridden locally, per node, by specifying 'local_options' : {:, ...} in a config. This can be useful for testing -# options before deploying cluster wide, or for storage configurations which don't map cleanly to the DDL configuration. -# -# tiered_storage_options: -# strategy1: -# tiers: -# - paths: -# - /mnt1 -# - /mnt2 -# - paths: [ /mnt3, /mnt4 ] -# - paths: [ /mnt5, /mnt6 ] -# -# local_options: -# k1: v1 -# k2: v2 -# -# 'another strategy': -# tiers: [ paths: [ /mnt1 ] ] - -########################## -# DSE Advanced Replication configuration settings -# -# DSE Advanced replication supports one-way distributed data replication from remote -# clusters to central data hubs. -# When conf_driver_password_encryption_enabled: true, the configured passwords (including C* password, SSL keystore/truststore -# password, etc.) stored in the advrep config are expected to be encrypted -# with the dse configuration encryption using the systemkey. The same systemkey -# that was used to create the passwords, must be copied to every node in -# the cluster. -# advanced_replication_options: -# enabled: false -# conf_driver_password_encryption_enabled: false - -# # The directory under which Advanced Replication files (e.g. replication log files) will be stored. -# advanced_replication_directory: /var/lib/cassandra/advrep - -# # The base path that will be prepended to paths in the Advanced Replication -# # configuration locations, including locations to SSL keystore, SSL truststore etc. -# security_base_path: /base/path/to/advrep/security/files/ - -########################## -# These internode_messaging_options configure network services for internal communication -# for all nodes. These settings must be identical on all nodes in the cluster. -internode_messaging_options: - # TCP listen port (mandatory) - port: 8609 - -# # Max message frame length (default 256MB) -# frame_length_in_mb: 256 - -# # Number of server acceptor threads (default is number of available processors) -# server_acceptor_threads: 8 - -# # Number of server worker threads (default is number of available processors * 8) -# server_worker_threads: 16 - -# # Max number of client connections -# client_max_connections: 100 - -# # Number of client worker threads (default is number of available processors * 8) -# client_worker_threads: 16 - -# # Timeout for comm handshake process (default is 10 seconds) -# handshake_timeout_seconds: 10 - -# # Client request timeout, in seconds (default is 60). -# client_request_timeout_seconds: 60 - -########################## -# Graph configuration -# Contains all system-level configuration options and those shared between graph -# instances. -graph: - # The number of stale rows per second to clean from each graph's adjacency cache. - # Value: integer. - adjacency_cache_clean_rate: 1024 - - # The maximum entry size in each graph's adjacency cache. When set to zero, the - # default is calculated based on the cache size and the number of CPUs. Entries - # that would exceed this size are quietly dropped by the cache without producing - # an explicit error or log message. Value: integer. - adjacency_cache_max_entry_size_in_mb: 0 - - # The amount of ram to allocate to each graph's adjacency (edge and property) - # cache. Value: integer. - adjacency_cache_size_in_mb: 128 - - # Maximum time to wait for an analytic (Spark) traversal to evaluate. Value: a - # duration in minutes. - analytic_evaluation_timeout_in_minutes: 10080 - - # Enables or disables Gremlin Server. Value: boolean. - gremlin_server_enabled: true - - # The number of stale entries per second to clean from the adjacency cache. Value: - # integer. - index_cache_clean_rate: 1024 - - # The maximum entry size in the index adjacency cache. When set to zero, the - # default is calculated based on the cache size and the number of CPUs. Entries - # that would exceed this size are quietly dropped by the cache without producing - # an explicit error or log message. Value: integer. - index_cache_max_entry_size_in_mb: 0 - - # The amount of ram to allocate to the index cache. Value: integer. - index_cache_size_in_mb: 128 - - # The maximum number of CQL queries that can be queued as a result of Gremlin - # requests. Incoming queries are rejected if the queue size exceeds this setting. - # Value: integer. - max_query_queue: 10000 - - # The maximum number of threads to use for graph queries on Cassandra. When this - # option is not set, its effective default is 10 times either the gremlinPool - # setting (if gremlinPool is present in this file and nonzero), or the number of - # available CPU cores (if gremlinPool is not present in this file or set to zero). - # The gremlinPool setting lives under the gremlin_server subsection of the graph - # section in dse.yaml. Value: integer. - # max_query_threads (no explicit default) - - # Maximum time to wait for a real-time traversal to evaluate. Value: a duration in - # seconds. - realtime_evaluation_timeout_in_seconds: 30 - - # Maximum time to wait for cassandra to agree on schema versions before timing - # out. Value: a duration in milliseconds. - schema_agreement_timeout_in_ms: 10000 - - # Controls automatic schema creation. Setting this to "Development" permits - # loading graph data without explicitly specifying a graph schema through the - # graph schema API beforehand. Setting this to "Production" requires explicitly - # specifying a graph schema through the graph schema API before loading any - # dependent graph data (vertices, edges, properties). Value: one of Development, - # Production, Default. - schema_mode: Production - - # Maximum time to wait for a graph-system request to evaluate. Creating a new - # graph is an example of a graph-system request. Value: a duration in seconds. - system_evaluation_timeout_in_seconds: 180 - - # The number of samples to keep when aggregating log events. Only a small subset - # of graph'slog statements use this system. Modifying this setting is rarely - # necessary or helpful. Value: integer. - window_size: 100000 - - # The maximum number of parameters that can be passed on a graph query request for both TinkerPop drivers - # and those using the Cassandra native protocol. Generally speaking, it is considered an anti-pattern to - # pass "massive" numbers of parameters on requests, as it increases the script evaluation time. Consider - # other methods for parameterizing scripts (like passing a single Map or List if many arguments are needed) - # prior to increasing this value. Future releases will have this value set at 16. - max_query_params: 256 - - # Configuration options for standard vertex ID assignment and partitioning - # strategies. - ids: - # Graph's standard vertex ID allocator operates on blocks of contiguous IDs. Each - # block is allocated using a Cassandra lightweight transaction, which requires - # coordination latency. To hide the cost of allocating a standard ID block, the - # allocator begins asynchronously buffering a replacement block whenever a current - # block is nearly empty. This parameter defines "nearly empty". It expresses, as - # a floating point number between 0 and 1, how much of a standard ID block can be - # used before graph starts asynchronously allocating its replacement. This - # setting has no effect on custom IDs. Value: double. - block_renew: 0.8 - - # For graphs using standard vertex IDs, if a transaction creates multiple - # vertices, the allocator attempts to assign vertex IDs that colocate vertices on - # the same Cassandra replicas. If an especially large vertex cohort is created, - # the allocator chunks the vertex creation and assigns a random target location to - # avoid load hotspotting. This setting controls the vertex chunk size. The - # setting has no effect on custom IDs. Value: long. - community_reuse: 28 - - # Must be set to either DC_LOCAL or GLOBAL. If set to DC_LOCAL, then - # datacenter_id must be correctly configured on every node in the cluster. If set - # to GLOBAL, then datacenter_id is irrelevant and its value is ignored. This - # option must have the same value on every node in any given cluster. Its value - # can only be changed when the entire cluster is stopped. This setting has no - # effect on custom IDs. Value: one of GLOBAL, DC_LOCAL. - consistency_mode: GLOBAL - - # This option is ignored when consistency_mode is not set to DC_LOCAL. When - # consistency_mode is set to DC_LOCAL, this must be set to an arbitrary value - # between 1 and 127, inclusive. Any given value for this option must appear in at - # most one datacenter whenever consistency_mode is DC_LOCAL. Violating this - # constraint will corrupt the database. This setting has no effect on custom IDs. - # Value: integer. - # datacenter_id (no explicit default) - - # An integer between 1 and 2^24 (both inclusive) that affects maximum ID capacity - # and the maximum storage space used by ID allocations. Lower values reduce both - # storage space consumed and lightweight transaction overhead imposed at startup. - # Lower values also reduce the total number of IDs that can be allocated over the - # life of a graph, because this parameter is proportional to the allocatable ID - # space. However, the proportion coefficient is Long.MAX_VALUE (2^63-1), so ID - # headroom should be sufficient, practically speaking, even if this is set to 1. - # This setting has no effect on custom IDs. Value: integer. - id_hash_modulus: 20 - - # Graph's standard vertex ID allocator claims uniformly-sized blocks of contiguous - # IDs using lightweight transactionson Cassandra. This setting controls the size - # of each block. This setting has no effect on custom IDs. Value: integer. - member_block_size: 512 - - # Contains all registered state listeners identified by their name. - listener: -# # On the following line, "listener_name" is a placeholder string. This can be -# # changed to an arbitrary string composed of lowercase letters, numbers, and -# # underscores, where the string begins with a lowercase letter. -# listener_name: -# # The names of state types that are ignored. All state types but those given are -# # listened to. Value: YAML-formatted list of strings. -# black_types: # This list is empty by default -# -# # The interval in which the state values are logged. Value: a duration in seconds. -# interval_in_seconds: 3600 -# -# # The type of the state listener. Must be one of the following values: slf4j. -# # Value: string. -# type: slf4j -# -# # The names of state types that should be listened. Only those state types are -# # listened to and all others ignored. Value: YAML-formatted list of strings. -# white_types: # This list is empty by default -# - # Configuration options graph's internal query forwarding and lightweight - # messaging system. - msg: - # Graph messages must be acknowledged within this interval, or else the message - # will be assumed dropped/failed. Graph will retry the message or fail the - # responsible request if the retry limit has been exceeded. Value: a duration in - # milliseconds. - graph_msg_timeout_in_ms: 5000 - - # Contains all registered event observers identified by their name. - observer: -# # On the following line, "observer_name" is a placeholder string. This can be -# # changed to an arbitrary string composed of lowercase letters, numbers, and -# # underscores, where the string begins with a lowercase letter. -# observer_name: -# # The names of event types that are ignored. All event types but those given are -# # observed. Value: YAML-formatted list of strings. -# black_types: # This list is empty by default -# -# # The names of the graphs for which events are observed. Value: YAML-formatted -# # list of strings. -# observed_graphs: # This list is empty by default -# -# # Threshold at which slow events get reported. Value: a duration in milliseconds. -# slow_threshold_in_ms: 300000 -# -# # The type of the event observer. Must be one of the following values: slf4j, -# # slow_request. Value: string. -# type: slf4j -# -# # The names of event types that should be observed. Only those event types are -# # observed and all others ignored. Value: YAML-formatted list of strings. -# white_types: # This list is empty by default -# - # Shared data. - shared_data: - # The interval between refreshes. Value: a duration in milliseconds. - refresh_interval_in_ms: 60000 - - gremlin_server: - port: 8182 - - threadPoolWorker: 2 - - # The number of "Gremlin" threads available to execute actual scripts in a ScriptEngine. This pool represents - # the workers available to handle blocking operations in Gremlin Server. When set to zero, this value will - # be defaulted to the value of the JVM property "cassandra.available_processors" (if set) - # or to Runtime.getRuntime().availableProcessors(). - gremlinPool: 0 - maxContentLength: 65536000 - - # The maximum length of the content or each chunk. If the content length exceeds this value, the transfer - # encoding of the decoded request will be converted to chunked and the content will be split into multiple - # HttpContent objects. If the transfer encoding of the HTTP request is chunked already, each chunk will be split - # into smaller chunks if the length of the chunk exceeds this value. - maxChunkSize: 4096000 - - # The maximum length of the initial line (e.g. "GET / HTTP/1.0") processed in a request, which essentially - # controls the maximum length of the submitted URI. - maxInitialLineLength: 4096 - - # The maximum length of all headers. - maxHeaderSize: 8192 - - # Maximum number of request components that can be aggregated for a message. - maxAccumulationBufferComponents: 1024 - - # Defines the size in which the result of a request is "batched" back to the client. In other words, if set to 1, - # then a result that had ten items in it would get each result sent back individually. If set to 2 the same ten - # results would come back in five batches of two each. Note that this value can be overridden per request. - resultIterationBatchSize: 64 - - # Try to use epoll event loops (works only on Linux os) instead of netty NIO. - useEpollEventLoop: false - - # A List of Map settings, where each Map represents a MessageSerializer implementation to use along with its - # configuration. - serializers: - - { className: org.apache.tinkerpop.gremlin.driver.ser.GryoMessageSerializerV1d0, config: { ioRegistries: [org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerIoRegistry], classResolverSupplier: com.datastax.bdp.graph.impl.tinkerpop.io.DseClassResolverProvider }} - - { className: org.apache.tinkerpop.gremlin.driver.ser.GryoLiteMessageSerializerV1d0, config: { ioRegistries: [org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerIoRegistry], classResolverSupplier: com.datastax.bdp.graph.impl.tinkerpop.io.DseClassResolverProvider }} - - { className: org.apache.tinkerpop.gremlin.driver.ser.GryoMessageSerializerV1d0, config: { serializeResultToString: true }} - - { className: org.apache.tinkerpop.gremlin.driver.ser.GraphSONMessageSerializerGremlinV1d0, config: { ioRegistries: [org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerIoRegistry] }} - - { className: org.apache.tinkerpop.gremlin.driver.ser.GraphSONMessageSerializerGremlinV2d0, config: { ioRegistries: [org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerIoRegistryV2d0, com.datastax.bdp.graph.impl.tinkerpop.io.DseGraphIoRegistryV2d0] }} - - { className: org.apache.tinkerpop.gremlin.driver.ser.GraphSONMessageSerializerV1d0, config: { ioRegistries: [org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerIoRegistry] }} - -# # The gremlin-groovy script engine will always be added even if the configuration option is not present. -# # Additional imports may be added in the configuration for that script engine. -# scriptEngines: -# gremlin-groovy: -# config: -# # To disable the gremlin groovy sandbox entirely -# sandbox_enabled: false -# sandbox_rules: -# -# # To completely whitelist a package add the package name here -# whitelist_packages: -# - package.name -# -# # To whitelist an individual type add the name of the type here -# whitelist_types: -# - fully.qualified.class.name -# -# # To whitelist a super class add the name of the type here -# whitelist_supers: -# - fully.qualified.class.name diff --git a/lib/killrvideo-docker-common/config-secure/remote.yaml b/lib/killrvideo-docker-common/config-secure/remote.yaml deleted file mode 100644 index 052ea60..0000000 --- a/lib/killrvideo-docker-common/config-secure/remote.yaml +++ /dev/null @@ -1,37 +0,0 @@ -hosts: [localhost] -port: 8182 -serializer: { className: org.apache.tinkerpop.gremlin.driver.ser.GryoMessageSerializerV1d0, - config: { serializeResultToString: true, ioRegistries: [org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerIoRegistry] }} -connectionPool: { - enableSsl: false, - # The maximum length in bytes that a message can be sent to the server. This number can be no greater than the - # setting of the same name in the server configuration. - maxContentLength: 65536000, - # The maximum number of in-flight requests that can occur on a connection. - maxInProcessPerConnection: 4, - # The maximum number of times that a connection can be borrowed from the pool simultaneously. - maxSimultaneousUsagePerConnection: 16, - # The maximum size of a connection pool for a host. - maxSize: 8, - # The amount of time in milliseconds to wait for a new connection before timing out. - maxWaitForConnection: 3000, - # The amount of time in milliseconds to wait for a session to close before timing out (does not apply to - # sessionless connections). - maxWaitForSessionClose: 3000, - # The minimum number of in-flight requests that can occur on a connection. - minInProcessPerConnection: 1, - # The maximum number of times that a connection can be borrowed from the pool simultaneously. - minSimultaneousUsagePerConnection: 8, - # The minimum size of a connection pool for a host. - minSize: 2, - # The amount of time in milliseconds to wait before trying to reconnect to a dead host. - reconnectInterval: 1000, - # The override value for the size of the result batches to be returned from the server. - resultIterationBatchSize: 64 -} -# Sets the AuthProperties.Property.JAAS_ENTRY properties for authentication to Gremlin Server. -# jaasEntry: -# Sets the AuthProperties.Property.PROTOCOL properties for authentication to Gremlin Server. -# protocol: -username: cassandra -password: cassandra diff --git a/lib/killrvideo-docker-common/create-environment.ps1 b/lib/killrvideo-docker-common/create-environment.ps1 deleted file mode 100644 index 35ca6a7..0000000 --- a/lib/killrvideo-docker-common/create-environment.ps1 +++ /dev/null @@ -1,76 +0,0 @@ -<# - .DESCRIPTION - Gets the environment variables needed to run the Killrvideo docker-compose commands, then writes - them to a .env file in the working directory or the directory specified by the -Path switch. - - .PARAMETER Path - The path to create the environment file. Defaults to the current working directory. - - .PARAMETER FileName - The name of the environment file. Defaults to ".env". - - .PARAMETER ProjectName - The COMPOSE_PROJECT_NAME value to use. Defaults to "killrvideo". - - .PARAMETER Force - Switch to force creation of the file (i.e. overwrite it) if it already exists. -#> -[CmdletBinding()] -Param ( - [Parameter(Mandatory=$false)] - [string] - $Path = $((Resolve-Path .\).Path), - - [Parameter(Mandatory=$false)] - [string] - $FileName = '.env', - - [Parameter(Mandatory=$false)] - [string] - $ProjectName = 'killrvideo', - - [Parameter(Mandatory=$false)] - [switch] - $Force -) - -$ErrorActionPreference = 'stop' - -# Make sure we have an absolute path -if ([System.IO.Path]::IsPathRooted($Path) -eq $false) { - $cwd = (Resolve-Path .\).Path - $Path = Join-Path $cwd $Path -} - -# Path to the file and does it exist -$envFilePath = Join-Path $Path $FileName -if ((Test-Path $envFilePath) -and ($Force -eq $false)) { - Write-Host "Environment file $(Resolve-Path $envFilePath) already exists, will not overwrite" - Exit -} - -# Make sure the path exists for the .env we're generating -if ((Test-Path $Path) -eq $false) { - New-Item -Path $Path -Type Directory | Out-Null -} - -$scriptPath = Split-Path -parent $PSCommandPath - -# Use the base compose file from this project, plus one that should be in the same location -# as the .env file we're generating -Push-Location $Path -$composeFile = Resolve-Path -Relative "$scriptPath\docker-compose.yaml" -Pop-Location -$composeFile += ";.\docker-compose.yaml" - -# Base environment variables -$dockerEnv = @("COMPOSE_PROJECT_NAME=$ProjectName", "COMPOSE_FILE=$composeFile") - -# Get path to the get-environment script and run it, adding each value to the env array -$getEnvCommand = Resolve-Path "$scriptPath\get-environment.ps1" -& "$getEnvCommand" |% { $dockerEnv += $_ } - -# Write the file (have to use the .NET API here because we need UTF-8 WITHOUT the BOM) -$Utf8NoBom = New-Object System.Text.UTF8Encoding($false) -[System.IO.File]::WriteAllLines($envFilePath, $dockerEnv, $Utf8NoBom) -Write-Host "Environment file written to $(Resolve-Path $envFilePath)" \ No newline at end of file diff --git a/lib/killrvideo-docker-common/create-environment.sh b/lib/killrvideo-docker-common/create-environment.sh deleted file mode 100755 index f5e9a30..0000000 --- a/lib/killrvideo-docker-common/create-environment.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash - -set -e # Bail if something fails - -# This script tries to create a .env file in the current working directory for -# use with docker-compose. The file contains variables that include info on the -# user's docker setup like the IP address of the Host and VM. - -ENV_FILE_PATH="$PWD/.env" - -# TODO: Don't overwrite file if it already exists? - -# Relative path that contains this script -SCRIPT_PATH=${BASH_SOURCE%/*} - -# Create an alias for the loopback adapter so that the Mac and Docker VM can communicate using that IP -export LOOPBACK_IP='10.0.75.1' -echo 'We need to create an alias for the loopback adapter (lo0) using sudo' -echo 'so your Mac and the Docker VM can communicate. It will be created using' -echo "IP $LOOPBACK_IP. You will be prompted for your password." -if [ `uname` = "Darwin" ] ; then - sudo ifconfig lo0 alias $LOOPBACK_IP -else - sudo ifconfig lo:0 $LOOPBACK_IP/24 -fi - -# Should use compose file relative to this script, followed by a compose file relative to the -# working directory (i.e. where the .env file is going to be created) -COMPOSE_FILE="$SCRIPT_PATH/docker-compose.yaml:./docker-compose.yaml" -COMPOSE_PROJECT_NAME='killrvideo' - -# Get other variables from the get-environment.sh script -GET_ENV_OUTPUT=$(exec $SCRIPT_PATH/get-environment.sh) - -# Write to .env file in current working directory -echo "COMPOSE_PROJECT_NAME=$COMPOSE_PROJECT_NAME -COMPOSE_FILE=$COMPOSE_FILE -$GET_ENV_OUTPUT" > $ENV_FILE_PATH diff --git a/lib/killrvideo-docker-common/docker-compose-ops-center.yaml b/lib/killrvideo-docker-common/docker-compose-ops-center.yaml deleted file mode 100644 index fadf84e..0000000 --- a/lib/killrvideo-docker-common/docker-compose-ops-center.yaml +++ /dev/null @@ -1,69 +0,0 @@ -version: '3' - -# -# docker-compose-ops-center.yaml -# Use this compose file to add DataStax OpsCenter to the standard KillrVideo infrastructure configuration -# - -services: - # Etcd for our service registry - etcd: - image: quay.io/coreos/etcd:v2.3.6 - command: [ -advertise-client-urls, "http://${KILLRVIDEO_DOCKER_IP}:2379", -listen-client-urls, "http://0.0.0.0:2379" ] - ports: - # The client port - - "2379:2379" - environment: - SERVICE_2379_NAME: etcd - - # Registrator to register containers with Etcd - registrator: - image: gliderlabs/registrator:latest - # Tell registrator where the etcd HTTP API is and to use the docker VM's IP - command: [ -ip, "$KILLRVIDEO_DOCKER_IP", "etcd://etcd:2379/killrvideo/services" ] - volumes: - # So registrator can use the docker API to inspect containers - - "/var/run/docker.sock:/tmp/docker.sock" - depends_on: - - etcd - - # DataStax Enterprise - # configure this as the seed node - # start with search and graph modes enabled ("-s -g") - dse: - image: datastax/dse-server:6.0.0 - command: [ -s -g ] - ports: - - "9042:9042" - - "8983:8983" - - "8182:8182" - links: - - opscenter - environment: - DS_LICENSE: accept - # Allow DSE to lock memory with mlock - cap_add: - - IPC_LOCK - ulimits: - memlock: -1 - - # Container to load KillrVideo schema and search config into DSE - # Provides options to configure secure users as well - dse-config: - image: killrvideo/killrvideo-dse-config:1.2.1 - environment: - KILLRVIDEO_DOCKER_IP: $KILLRVIDEO_DOCKER_IP - KILLRVIDEO_DSE_EXTERNAL_IP: $KILLRVIDEO_DSE_EXTERNAL_IP - KILLRVIDEO_CASSANDRA_REPLICATION: $KILLRVIDEO_CASSANDRA_REPLICATION - KILLRVIDEO_GRAPH_REPLICATION: $KILLRVIDEO_GRAPH_REPLICATION - depends_on: - - dse - - # OpsCenter - opscenter: - image: datastax/dse-opscenter:6.5.0 - ports: - - "8888:8888" - environment: - SERVICE_8888_NAME: opscenter - DS_LICENSE: accept diff --git a/lib/killrvideo-docker-common/docker-compose-secure.yaml b/lib/killrvideo-docker-common/docker-compose-secure.yaml deleted file mode 100644 index 4d3060b..0000000 --- a/lib/killrvideo-docker-common/docker-compose-secure.yaml +++ /dev/null @@ -1,61 +0,0 @@ -version: '3' - -# -# docker-compose.yaml -# Default compose file for providing basic infrastructure required by KillrVideo -# - -services: - # Etcd for our service registry - etcd: - image: quay.io/coreos/etcd:v2.3.6 - command: [ -advertise-client-urls, "http://${KILLRVIDEO_DOCKER_IP}:2379", -listen-client-urls, "http://0.0.0.0:2379" ] - ports: - # The client port - - "2379:2379" - environment: - SERVICE_2379_NAME: etcd - - # Registrator to register containers with Etcd - registrator: - image: gliderlabs/registrator:latest - # Tell registrator where the etcd HTTP API is and to use the docker VM's IP - command: [ -ip, "$KILLRVIDEO_DOCKER_IP", "etcd://etcd:2379/killrvideo/services" ] - volumes: - # So registrator can use the docker API to inspect containers - - "/var/run/docker.sock:/tmp/docker.sock" - depends_on: - - etcd - - # DataStax Enterprise - # configure this as the seed node - # start with search and graph modes enabled ("-s -g") - dse: - image: datastax/dse-server:6.0.0 - command: [ -s -g ] - ports: - - "9042:9042" - - "8983:8983" - - "8182:8182" - environment: - DS_LICENSE: accept - # Allow DSE to lock memory with mlock - cap_add: - - IPC_LOCK - ulimits: - memlock: -1 - volumes: - # use custom config file with authentication/authorization enabled - - "./extras/config-secure:/config" - - # Container to load KillrVideo schema and search config into DSE - # Provides options to configure secure users as well - dse-config: - image: killrvideo/killrvideo-dse-config:1.2.1 - environment: - KILLRVIDEO_DOCKER_IP: $KILLRVIDEO_DOCKER_IP - KILLRVIDEO_DSE_EXTERNAL_IP: $KILLRVIDEO_DSE_EXTERNAL_IP - KILLRVIDEO_CASSANDRA_REPLICATION: $KILLRVIDEO_CASSANDRA_REPLICATION - KILLRVIDEO_GRAPH_REPLICATION: $KILLRVIDEO_GRAPH_REPLICATION - depends_on: - - dse diff --git a/lib/killrvideo-docker-common/docker-compose-studio.yaml b/lib/killrvideo-docker-common/docker-compose-studio.yaml deleted file mode 100644 index 1e1c876..0000000 --- a/lib/killrvideo-docker-common/docker-compose-studio.yaml +++ /dev/null @@ -1,70 +0,0 @@ -version: '3' - -# -# docker-compose-studio.yaml -# Use this compose file to add DSE Studio to the standard KillrVideo infrastructure configuration -# - -services: - # Etcd for our service registry - etcd: - image: quay.io/coreos/etcd:v2.3.6 - command: [ -advertise-client-urls, "http://${KILLRVIDEO_DOCKER_IP}:2379", -listen-client-urls, "http://0.0.0.0:2379" ] - ports: - # The client port - - "2379:2379" - environment: - SERVICE_2379_NAME: etcd - - # Registrator to register containers with Etcd - registrator: - image: gliderlabs/registrator:latest - # Tell registrator where the etcd HTTP API is and to use the docker VM's IP - command: [ -ip, "$KILLRVIDEO_DOCKER_IP", "etcd://etcd:2379/killrvideo/services" ] - volumes: - # So registrator can use the docker API to inspect containers - - "/var/run/docker.sock:/tmp/docker.sock" - depends_on: - - etcd - - # DataStax Enterprise - # configure this as the seed node - # start with search and graph modes enabled ("-s -g") - dse: - image: datastax/dse-server:6.0.0 - command: [ -s -g ] - ports: - - "9042:9042" - - "8983:8983" - - "8182:8182" - environment: - DS_LICENSE: accept - # Allow DSE to lock memory with mlock - cap_add: - - IPC_LOCK - ulimits: - memlock: -1 - - # Container to load KillrVideo schema and search config into DSE - # Provides options to configure secure users as well - dse-config: - image: killrvideo/killrvideo-dse-config:1.2.1 - environment: - KILLRVIDEO_DOCKER_IP: $KILLRVIDEO_DOCKER_IP - KILLRVIDEO_DSE_EXTERNAL_IP: $KILLRVIDEO_DSE_EXTERNAL_IP - KILLRVIDEO_CASSANDRA_REPLICATION: $KILLRVIDEO_CASSANDRA_REPLICATION - KILLRVIDEO_GRAPH_REPLICATION: $KILLRVIDEO_GRAPH_REPLICATION - depends_on: - - dse - - # One instance of DataStax Studio - studio: - image: killrvideo/killrvideo-studio:2.1.0 - ports: - # The Web UI exposed to our host - - "9091:9091" - depends_on: - - dse - environment: - SERVICE_9091_NAME: studio - DS_LICENSE: accept diff --git a/lib/killrvideo-docker-common/docker-compose-volumes.yaml b/lib/killrvideo-docker-common/docker-compose-volumes.yaml deleted file mode 100644 index da74c2e..0000000 --- a/lib/killrvideo-docker-common/docker-compose-volumes.yaml +++ /dev/null @@ -1,64 +0,0 @@ -version: '3' - -# -# docker-compose-volumes.yaml -# use this compose file to preserve DSE Cassandra data on a separate volume -# - -services: - # Etcd for our service registry - etcd: - image: quay.io/coreos/etcd:v2.3.6 - command: [ -advertise-client-urls, "http://${KILLRVIDEO_DOCKER_IP}:2379", -listen-client-urls, "http://0.0.0.0:2379" ] - ports: - # The client port - - "2379:2379" - environment: - SERVICE_2379_NAME: etcd - - # Registrator to register containers with Etcd - registrator: - image: gliderlabs/registrator:latest - # Tell registrator where the etcd HTTP API is and to use the docker VM's IP - command: [ -ip, "$KILLRVIDEO_DOCKER_IP", "etcd://etcd:2379/killrvideo/services" ] - volumes: - # So registrator can use the docker API to inspect containers - - "/var/run/docker.sock:/tmp/docker.sock" - depends_on: - - etcd - - # DataStax Enterprise with KillrVideo schema and search config - # configure this as the seed node - # start with search and graph modes enabled ("-s -g") - dse: - image: datastax/dse-server:6.0.0 - command: [ -s -g ] - ports: - - "9042:9042" - - "8983:8983" - - "8182:8182" - environment: - DS_LICENSE: accept - volumes: - # associate dse-data directory under the directory where we run docker-compose - # with the Cassandra data directory on our node - # (using relative paths to store at the root directory of the repository which includes - # these files, i.e. killrvideo-java or killrvideo-nodejs) - - "../../dse-data:/var/lib/cassandra" - # Allow DSE to lock memory with mlock - cap_add: - - IPC_LOCK - ulimits: - memlock: -1 - - # Container to load KillrVideo schema and search config into DSE - # Provides options to configure secure users as well - dse-config: - image: killrvideo/killrvideo-dse-config:1.2.1 - environment: - KILLRVIDEO_DOCKER_IP: $KILLRVIDEO_DOCKER_IP - KILLRVIDEO_DSE_EXTERNAL_IP: $KILLRVIDEO_DSE_EXTERNAL_IP - KILLRVIDEO_CASSANDRA_REPLICATION: $KILLRVIDEO_CASSANDRA_REPLICATION - KILLRVIDEO_GRAPH_REPLICATION: $KILLRVIDEO_GRAPH_REPLICATION - depends_on: - - dse diff --git a/lib/killrvideo-docker-common/docker-compose.yaml b/lib/killrvideo-docker-common/docker-compose.yaml deleted file mode 100644 index 8268688..0000000 --- a/lib/killrvideo-docker-common/docker-compose.yaml +++ /dev/null @@ -1,58 +0,0 @@ -version: '3' - -# -# docker-compose.yaml -# Default compose file for providing basic infrastructure required by KillrVideo -# - -services: - # Etcd for our service registry - etcd: - image: quay.io/coreos/etcd:v2.3.6 - command: [ -advertise-client-urls, "http://${KILLRVIDEO_DOCKER_IP}:2379", -listen-client-urls, "http://0.0.0.0:2379" ] - ports: - # The client port - - "2379:2379" - environment: - SERVICE_2379_NAME: etcd - - # Registrator to register containers with Etcd - registrator: - image: gliderlabs/registrator:latest - # Tell registrator where the etcd HTTP API is and to use the docker VM's IP - command: [ -ip, "$KILLRVIDEO_DOCKER_IP", "etcd://etcd:2379/killrvideo/services" ] - volumes: - # So registrator can use the docker API to inspect containers - - "/var/run/docker.sock:/tmp/docker.sock" - depends_on: - - etcd - - # DataStax Enterprise - # configure this as the seed node - # start with search and graph modes enabled ("-s -g") - dse: - image: datastax/dse-server:6.0.0 - command: [ -s -g ] - ports: - - "9042:9042" - - "8983:8983" - - "8182:8182" - environment: - DS_LICENSE: accept - # Allow DSE to lock memory with mlock - cap_add: - - IPC_LOCK - ulimits: - memlock: -1 - - # Container to load KillrVideo schema and search config into DSE - # Provides options to configure secure users as well - dse-config: - image: killrvideo/killrvideo-dse-config:1.2.1 - environment: - KILLRVIDEO_DOCKER_IP: $KILLRVIDEO_DOCKER_IP - KILLRVIDEO_DSE_EXTERNAL_IP: $KILLRVIDEO_DSE_EXTERNAL_IP - KILLRVIDEO_CASSANDRA_REPLICATION: $KILLRVIDEO_CASSANDRA_REPLICATION - KILLRVIDEO_GRAPH_REPLICATION: $KILLRVIDEO_GRAPH_REPLICATION - depends_on: - - dse diff --git a/lib/killrvideo-docker-common/extras/_with_server_and_external_down.sh b/lib/killrvideo-docker-common/extras/_with_server_and_external_down.sh deleted file mode 100755 index 5502f29..0000000 --- a/lib/killrvideo-docker-common/extras/_with_server_and_external_down.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh - -COMMON_DIR="lib/killrvideo-docker-common" -EXTRAS_DIR="$COMMON_DIR/extras" - -docker-compose -f $COMMON_DIR/docker-compose.yaml -f $EXTRAS_DIR/docker-compose-dse-external.yaml -f docker-compose.yaml -f $EXTRAS_DIR/docker-compose-server.yaml down diff --git a/lib/killrvideo-docker-common/extras/_with_server_and_external_up.sh b/lib/killrvideo-docker-common/extras/_with_server_and_external_up.sh deleted file mode 100755 index 7af2bad..0000000 --- a/lib/killrvideo-docker-common/extras/_with_server_and_external_up.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh - -COMMON_DIR="lib/killrvideo-docker-common" -EXTRAS_DIR="$COMMON_DIR/extras" - -docker-compose -f $COMMON_DIR/docker-compose.yaml -f $EXTRAS_DIR/docker-compose-dse-external.yaml -f docker-compose.yaml -f $EXTRAS_DIR/docker-compose-server.yaml up -d diff --git a/lib/killrvideo-docker-common/extras/_with_server_down.sh b/lib/killrvideo-docker-common/extras/_with_server_down.sh deleted file mode 100755 index 0e8f3bf..0000000 --- a/lib/killrvideo-docker-common/extras/_with_server_down.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh - -COMMON_DIR="lib/killrvideo-docker-common" -EXTRAS_DIR="$COMMON_DIR/extras" - -docker-compose -f $COMMON_DIR/docker-compose.yaml -f docker-compose.yaml -f $EXTRAS_DIR/docker-compose-server.yaml down diff --git a/lib/killrvideo-docker-common/extras/_with_server_up.sh b/lib/killrvideo-docker-common/extras/_with_server_up.sh deleted file mode 100755 index a4c6188..0000000 --- a/lib/killrvideo-docker-common/extras/_with_server_up.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh - -COMMON_DIR="lib/killrvideo-docker-common" -EXTRAS_DIR="$COMMON_DIR/extras" - -docker-compose -f $COMMON_DIR/docker-compose.yaml -f docker-compose.yaml -f $EXTRAS_DIR/docker-compose-server.yaml up -d diff --git a/lib/killrvideo-docker-common/extras/docker-compose-dse-external.yaml b/lib/killrvideo-docker-common/extras/docker-compose-dse-external.yaml deleted file mode 100644 index c039df2..0000000 --- a/lib/killrvideo-docker-common/extras/docker-compose-dse-external.yaml +++ /dev/null @@ -1,18 +0,0 @@ -version: '3' - -services: - # DataStax Enterprise with KillrVideo schema and search config - dse: - image: killrvideo/killrvideo-dse-external:1.0.6 - cap_add: - - IPC_LOCK - ulimits: - memlock: -1 - environment: - SERVICE_9042_NAME: cassandra - SERVICE_8983_NAME: dse-search - SERVICE_8182_NAME: gremlin - EXTERNAL_CLUSTER_IP: $EXTERNAL_CLUSTER_IP - KILLRVIDEO_DOCKER_IP: $KILLRVIDEO_DOCKER_IP - USERNAME: $KILLRVIDEO_DSE_USERNAME - PASSWORD: $KILLRVIDEO_DSE_PASSWORD diff --git a/lib/killrvideo-docker-common/extras/docker-compose-server.yaml b/lib/killrvideo-docker-common/extras/docker-compose-server.yaml deleted file mode 100644 index cd04472..0000000 --- a/lib/killrvideo-docker-common/extras/docker-compose-server.yaml +++ /dev/null @@ -1,19 +0,0 @@ -version: '3' - -# Other services are specified in .\lib\killrvideo-docker-common\docker-compose.yaml -# This is a helper docker service used to run the various language versions in a headless state. -# The default as you see it is set to using killrvideo-java-server, but could as easily be killrvideo-nodejs-server or some other version as long as an image is built. -services: - server: - image: killrvideo/killrvideo-java-server - ports: - - "8899:8899" - depends_on: - - dse - - etcd - - web - environment: - KILLRVIDEO_HOST_IP: $KILLRVIDEO_HOST_IP - KILLRVIDEO_DOCKER_IP: $KILLRVIDEO_DOCKER_IP - KILLRVIDEO_DSE_USERNAME: $KILLRVIDEO_DSE_USERNAME - KILLRVIDEO_DSE_PASSWORD: $KILLRVIDEO_DSE_PASSWORD diff --git a/lib/killrvideo-docker-common/get-environment.ps1 b/lib/killrvideo-docker-common/get-environment.ps1 deleted file mode 100644 index ffd4222..0000000 --- a/lib/killrvideo-docker-common/get-environment.ps1 +++ /dev/null @@ -1,63 +0,0 @@ -<# - .DESCRIPTION - Gets the environment variables needed to run the Killrvideo docker-compose commands and outputs - them to stdout. -#> -[CmdletBinding()] -Param () - -# Figure out if we're Docker for Windows or Docker Toolbox setup -Write-Host 'Determining docker installation type' - -# Docker toolbox sets an install path environment variable so check for it -$isToolbox = $false -if ($Env:DOCKER_TOOLBOX_INSTALL_PATH) { - $isToolbox = $true -} - -Write-Verbose " => Is Docker Toolbox: $isToolbox" - -# Do things differently for Toolbox vs Docker for Windows -if ($isToolbox) { - # See if the docker VM is running - & docker-machine status default | Tee-Object -Variable dockerMachineStatus | Out-Null - if ($dockerMachineStatus -ne 'Running') { - & docker-machine start default | Out-Null - } - - # Add environment to this shell - & docker-machine env | Invoke-Expression -} - -# Determine the Docker VM's IP address -Write-Host 'Getting Docker VM IP' -if ($isToolbox) { - # Just use the command that comes with docker-machine - & docker-machine ip | Tee-Object -Variable dockerIp | Out-Null -} else { - # The VM's IP should be the IP address for eth0 when running a container in host networking mode - $dockerIpCmd = "ip -4 addr show scope global dev eth0 | grep inet | awk `'{print `$2}`' | cut -d / -f 1" - & docker run --rm --net=host busybox bin/sh -c $dockerIpCmd | Tee-Object -Variable dockerIp | Out-Null -} -Write-Verbose " => Got Docker IP: $dockerIp" - -# Determine the VM host's IP address -Write-Host 'Getting corresponding local machine IP' -if ($isToolbox) { - # The host only CIDR address will contain the host's IP (along with a suffix like /24) - & docker-machine inspect --format '{{ .Driver.HostOnlyCIDR }}' default | - Tee-Object -Variable hostCidr | - Out-Null - $hostIp = $hostCidr -replace "\/\d{2}", "" -} else { - # The host's IP should be the default route for eth0 when running a container in host networking mode - $hostIpCmd = "ip -4 route list dev eth0 0/0 | cut -d `' `' -f 3" - & docker run --rm --net=host busybox bin/sh -c $hostIpCmd | Tee-Object -Variable hostIp | Out-Null -} -Write-Verbose " => Got Host IP: $hostIp" - -# Write environment variable pairs to stdout (so this can be piped to a file) -Write-Output "KILLRVIDEO_DOCKER_TOOLBOX=$($isToolbox.ToString().ToLower())" -Write-Output "KILLRVIDEO_HOST_IP=$hostIp" -Write-Output "KILLRVIDEO_DOCKER_IP=$dockerIp" - diff --git a/lib/killrvideo-docker-common/get-environment.sh b/lib/killrvideo-docker-common/get-environment.sh deleted file mode 100755 index 75d37cb..0000000 --- a/lib/killrvideo-docker-common/get-environment.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/sh - -set -e # Bail if something fails - -# This script will try and detect a user's docker setup and write some environment -# variable pairs to stdout. The stdout output from this script can then be used to, -# for example, create a .env file for use with docker-compose - -# TODO: Determine if a Docker Toolbox setup -IS_TOOLBOX=false - -if [ "$IS_TOOLBOX" = true ]; then - # Make sure default docker machine is started - STATUS=$(docker-machine status default) - if [ "$STATUS" != "Running" ]; then - docker-machine start default > /dev/null - fi - - # Load docker machine env into this shell - eval $(docker-machine env default) -fi - -# Get the docker VM's IP address -if [ "$IS_TOOLBOX" = true ]; then - # Just use the command that comes with docker-machine - DOCKER_IP=$(docker-machine ip) -else - # The create-environment.sh script should have setup a loopback alias, so use that IP - DOCKER_IP=$LOOPBACK_IP -fi - -# Get the docker VM Host's IP address -if [ "$IS_TOOLBOX" = true ]; then - # The host only CIDR address will contain the host's IP (along with a suffix like /24) - HOST_IP=$(docker-machine inspect --format '{{ .Driver.HostOnlyCIDR }}' default) - # Remove suffix - HOST_IP=${HOST_IP//\/[[:digit:]][[:digit:]]/} -else - # The create-environment.sh script should have setup a loopback alias, so use that IP - HOST_IP=$LOOPBACK_IP -fi - -# Write values to stdout -echo "KILLRVIDEO_DOCKER_TOOLBOX=$IS_TOOLBOX" -echo "KILLRVIDEO_HOST_IP=$HOST_IP" -echo "KILLRVIDEO_DOCKER_IP=$DOCKER_IP" \ No newline at end of file diff --git a/package.json b/package.json index eb53705..d8cdb37 100644 --- a/package.json +++ b/package.json @@ -1,11 +1,12 @@ { "name": "killrvideo-generator", - "version": "2.1.0", + "version": "3.0.0", "description": "Sample Data generator for KillrVideo", "main": "dist/index.js", "scripts": { "start": "node dist/index.js", "start:dev": "cross-env NODE_ENV=development node --debug -r dotenv/config dist/index.js", + "debug": "node --debug dist/index.js", "clean": "rimraf dist", "build": "npm-run-all --parallel \"build:*\"", "build:js": "babel src --out-dir dist --source-maps", diff --git a/scripts/travis-publish.sh b/scripts/travis-publish.sh deleted file mode 100755 index f8a5b39..0000000 --- a/scripts/travis-publish.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -set -e # Exit with nonzero exit code if anything fails - -# If a pull request -if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then - echo "Skipping publish for pull request" - exit 0 -fi - -# Invoke the publish script -MY_PATH="`dirname \"$0\"`" -( exec "$MY_PATH/docker-publish.sh" ) \ No newline at end of file diff --git a/src/index.js b/src/index.js index a900855..70fdb9b 100644 --- a/src/index.js +++ b/src/index.js @@ -30,7 +30,7 @@ async function startAsync() { try { // Make sure C* is ready to go - await withRetries(initCassandraAsync, 20, 10, 'Could not initialize Cassandra keyspace', false); + await withRetries(initCassandraAsync, 20, 10, 'Could not initialize Cassandra client', false); // Start scheduled tasks scheduler = new Scheduler(); diff --git a/src/utils/etcd.js b/src/utils/etcd.js deleted file mode 100644 index 1adff75..0000000 --- a/src/utils/etcd.js +++ /dev/null @@ -1,69 +0,0 @@ -import Promise from 'bluebird'; -import { concat as httpGet } from 'simple-get'; -import { logger } from './logging'; -import { ExtendableError } from './extendable-error'; - -let ETCD_URL = null; - -/** - * Get the URL for etcd. Value is computed once then cached and reused. - */ -function getEtcdUrl() { - if (ETCD_URL !== null) return ETCD_URL; - - // In development environments, use the KILLRVIDEO_DOCKER_IP, otherwise look for KILLRVIDEO_ETCD - let etcdHostAndPort = process.env.NODE_ENV === 'development' && !!process.env.KILLRVIDEO_DOCKER_IP - ? `${process.env.KILLRVIDEO_DOCKER_IP}:2379` - : process.env.KILLRVIDEO_ETCD; - - if (!etcdHostAndPort) { - throw new Error('Could not find etcd IP and port in KILLRVIDEO_ETCD environment variable'); - } - - ETCD_URL = `http://${etcdHostAndPort}/v2/keys/killrvideo`; - logger.log('verbose', `Using etcd endpoint ${ETCD_URL}`); - return ETCD_URL; -} - -// HTTP get as Promise returning function -const getAsync = Promise.promisify(httpGet, { multiArgs: true }); - -/** - * Error thrown when getting keys from etcd returns a status other than 200 OK. - */ -export class GetEtcdKeysError extends ExtendableError { - constructor(response, body) { - super('Error getting keys from etcd'); - - this.statusCode = response.statusCode; - this.statusMessage = response.statusMessage; - this.body = body; - } -}; - -/** - * Does the HTTP get to etcd to get the keys at the path specified. - */ -function getKeysAsync(path) { - let url = `${getEtcdUrl()}${path}`; - return getAsync(url) - .spread((res, data) => { - if (res.statusCode !== 200) { - throw new GetEtcdKeysError(res, data.toString()); - } - return JSON.parse(data.toString()); - }); -} - -/** - * Get the values for the keys at the given path in etcd. Returns a Promise of an array of values. - */ -export function getEtcdValuesAsync(path) { - return getKeysAsync(path) - .then(res => { - if (res.node.dir !== true) { - throw new Error(`${path} is not a directory in etcd`); - } - return res.node.nodes.map(n => n.value); - }); -}; \ No newline at end of file diff --git a/src/utils/lookup-service.js b/src/utils/lookup-service.js index 6ca9d18..ea7be98 100644 --- a/src/utils/lookup-service.js +++ b/src/utils/lookup-service.js @@ -1,31 +1,31 @@ -import { getEtcdValuesAsync, GetEtcdKeysError } from './etcd'; import { logger } from './logging'; import { ExtendableError } from './extendable-error'; +import config from 'config'; +import Promise from 'bluebird'; + +let registry = config.get('services'); /** - * Error thrown when a service can't be found in etcd. + * Error thrown when a service can't be found */ export class ServiceNotFoundError extends ExtendableError { constructor(serviceName) { - super(`Could not find service ${serviceName} in etcd`); + super(`Could not find service ${serviceName}`); } }; /** - * Looks up a service with a given name. Returns an array of strings in the format of 'ip:port'. + * Looks up a service with a given name. Returns a Promise with an array of strings in the format of 'ip:port' or throws ServiceNotFoundError. */ export function lookupServiceAsync(serviceName) { logger.log('verbose', `Looking up service ${serviceName}`); - - return getEtcdValuesAsync(`/services/${serviceName}`) - .tap(hosts => { - logger.log('verbose', `Found service ${serviceName} at ${JSON.stringify(hosts)} in etcd`); - }) - .catch(GetEtcdKeysError, err => { - if (err.statusCode === 404) { - throw new ServiceNotFoundError(serviceName); - } - throw err; - }); -}; \ No newline at end of file + if (!(serviceName in registry)) { + logger.log('error', `Found no service ${serviceName}`); + throw new ServiceNotFoundError(serviceName); + } + + logger.log('verbose', `Found service ${serviceName} at ${registry[serviceName]}`); + + return new Promise (function(resolve, reject){resolve(registry[serviceName])}); +};