From dbb7fba1916bd623d9ab6acdebf8510427e554e9 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Thu, 7 Nov 2024 13:01:11 +0100 Subject: [PATCH 1/2] Switch to the Elasticsearch distribution of the search backend and keep the OpenSearch available in the profile for tests --- .github/workflows/deploy.yml | 12 ++-- README.adoc | 26 ++++---- pom.xml | 33 ++++++++-- .../docker/elasticsearch-custom.Dockerfile | 5 ++ src/main/helm/values.staging.yaml | 8 +-- src/main/kubernetes/openshift.yml | 60 ++++++++++++------- src/main/resources/application.properties | 34 +++++------ 7 files changed, 110 insertions(+), 68 deletions(-) create mode 100644 src/main/docker/elasticsearch-custom.Dockerfile diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 733af4bd..9bffffbb 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -52,10 +52,10 @@ jobs: - name: Create ImageStreams run: | oc create imagestream search-quarkus-io || true - oc create imagestream opensearch-custom || true + oc create imagestream elasticsearch-custom || true # https://docs.openshift.com/container-platform/4.14/openshift_images/using-imagestreams-with-kube-resources.html oc set image-lookup search-quarkus-io - oc set image-lookup opensearch-custom + oc set image-lookup elasticsearch-custom - name: Retrieve OpenShift Container Registry URL id: oc-registry @@ -88,14 +88,14 @@ jobs: -Dquarkus.container-image.registry="$(oc registry info)" \ -Dquarkus.container-image.group="$(oc project --short)" - - name: Push OpenSearch container image + - name: Push Elasticsearch container image run: | - REMOTE_IMAGE_REF="$(oc registry info)/$(oc project --short)/opensearch-custom:${{ steps.app-version.outputs.value }}" + REMOTE_IMAGE_REF="$(oc registry info)/$(oc project --short)/elasticsearch-custom:${{ steps.app-version.outputs.value }}" # docker doesn't allow the `push source target` syntax, so we have to do this in two steps. - docker image tag "opensearch-custom:latest" "$REMOTE_IMAGE_REF" + docker image tag "elasticsearch-custom:latest" "$REMOTE_IMAGE_REF" docker push "$REMOTE_IMAGE_REF" - name: Deploy Helm charts run: | helm upgrade --install search-quarkus-io ./target/helm/openshift/search-quarkus-io \ - -f ./src/main/helm/values.$QUARKUS_PROFILE.yaml \ No newline at end of file + -f ./src/main/helm/values.$QUARKUS_PROFILE.yaml diff --git a/README.adoc b/README.adoc index 007f3812..bf6cb662 100644 --- a/README.adoc +++ b/README.adoc @@ -5,7 +5,7 @@ [[architecture]] == Architecture -The application is deployed on an OpenShift cluster, next to an OpenSearch instance. +The application is deployed on an OpenShift cluster, next to an Elasticsearch instance. It fetches the sources from quarkus.io and localized variants (pt.quarkus.io, ...) to index them, and exposes the search feature through a REST API. @@ -142,34 +142,34 @@ Then start it this way: [source,shell] ---- podman pod create -p 8080:8080 -p 9000:9000 -p 9200:9200 --name search.quarkus.io -# Start multiple OpenSearch containers +# Start multiple Elasticsearch containers podman container run -d --name search-backend-0 --pod search.quarkus.io \ --cpus=2 --memory=2g \ -e "node.name=search-backend-0" \ -e "discovery.seed_hosts=localhost" \ -e "cluster.initial_cluster_manager_nodes=search-backend-0,search-backend-1,search-backend-2" \ - -e "OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g" \ - -e "DISABLE_SECURITY_PLUGIN=true" \ + -e "ES_JAVA_OPTS=-Xms1g -Xmx1g" \ + -e "xpack.security.enabled=false" \ -e "cluster.routing.allocation.disk.threshold_enabled=false" \ - opensearch-custom:latest + elasticsearch-custom:latest podman container run -d --name search-backend-1 --pod search.quarkus.io \ --cpus=2 --memory=2g \ -e "node.name=search-backend-1" \ -e "discovery.seed_hosts=localhost" \ -e "cluster.initial_cluster_manager_nodes=search-backend-0,search-backend-1,search-backend-2" \ - -e "OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g" \ - -e "DISABLE_SECURITY_PLUGIN=true" \ + -e "ES_JAVA_OPTS=-Xms1g -Xmx1g" \ + -e "xpack.security.enabled=false" \ -e "cluster.routing.allocation.disk.threshold_enabled=false" \ - opensearch-custom:latest + elasticsearch-custom:latest podman container run -d --name search-backend-2 --pod search.quarkus.io \ --cpus=2 --memory=2g \ -e "node.name=search-backend-2" \ -e "discovery.seed_hosts=localhost" \ -e "cluster.initial_cluster_manager_nodes=search-backend-0,search-backend-1,search-backend-2" \ - -e "OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g" \ - -e "DISABLE_SECURITY_PLUGIN=true" \ + -e "ES_JAVA_OPTS=-Xms1g -Xmx1g" \ + -e "xpack.security.enabled=false" \ -e "cluster.routing.allocation.disk.threshold_enabled=false" \ - opensearch-custom:latest + elasticsearch-custom:latest # Then the app; this will fetch the actual data on startup (might take a while): podman container run -it --rm --name search.quarkus.io --pod search.quarkus.io search-quarkus-io:999-SNAPSHOT # OR, if you already have locals clones of *.quarkus.io: @@ -251,11 +251,11 @@ you will need to set up a few things manually: on quarkusio/search.quarkus.io. See `indexing.reporting.github` configuration properties for more details. `search-backend-config`:: - Environment variables for the OpenSearch instances. + Environment variables for the Elasticsearch instances. + Put in there whatever configuration you need for your specific cluster. `search-backend-secret`:: - Secret environment variables for the OpenSearch instances. + Secret environment variables for the Elasticsearch instances. + Put in there whatever secret configuration you need for your specific cluster. diff --git a/pom.xml b/pom.xml index d168ab0b..feb22ffd 100644 --- a/pom.xml +++ b/pom.xml @@ -37,7 +37,14 @@ 1.12.0 2.18 + + 8.15 1.7.3 + + ${project.basedir}/src/main/docker/elasticsearch-custom.Dockerfile + ${version.elasticsearch} + elasticsearch-custom + elastic @@ -269,7 +276,8 @@ ${revision} ${project.basedir}/src/test/resources - ${version.opensearch} + ${search.backend.dockerversion} + ${search.backend.dockername} @@ -295,7 +303,9 @@ ${maven.home} ${revision} ${project.basedir}/src/test/resources - ${version.opensearch} + ${search.backend.dockerversion} + ${search.backend.dockername} + ${search.backend.distribution} true @@ -375,14 +385,14 @@ - opensearch-custom + ${search.backend.dockername} - ${project.basedir}/src/main/docker/opensearch-custom.Dockerfile + ${search.backend.dockerfile} latest - ${version.opensearch} + ${search.backend.dockerversion} @@ -392,6 +402,19 @@ + + + opensearch + + ${project.basedir}/src/main/docker/opensearch-custom.Dockerfile + ${version.opensearch} + opensearch-custom + opensearch + + locker diff --git a/src/main/docker/elasticsearch-custom.Dockerfile b/src/main/docker/elasticsearch-custom.Dockerfile new file mode 100644 index 00000000..83169035 --- /dev/null +++ b/src/main/docker/elasticsearch-custom.Dockerfile @@ -0,0 +1,5 @@ +FROM elastic/elasticsearch:8.15.3 + +RUN /usr/share/elasticsearch/bin/elasticsearch-plugin install --batch analysis-kuromoji +RUN /usr/share/elasticsearch/bin/elasticsearch-plugin install --batch analysis-smartcn +RUN /usr/share/elasticsearch/bin/elasticsearch-plugin install --batch analysis-icu diff --git a/src/main/helm/values.staging.yaml b/src/main/helm/values.staging.yaml index 058bcfb8..7b06bc06 100644 --- a/src/main/helm/values.staging.yaml +++ b/src/main/helm/values.staging.yaml @@ -1,7 +1,7 @@ app: envs: QUARKUS_PROFILE: 'staging' - # Avoid overloading the rather resource-constrained OpenSearch instance + # Avoid overloading the rather resource-constrained Search backend instance INDEXING_QUEUE_COUNT: '4' INDEXING_BULK_SIZE: '10' resources: @@ -11,13 +11,13 @@ app: requests: cpu: 250m memory: 500Mi -opensearch: +elasticsearch: envs: - OPENSEARCH_JAVA_OPTS: ' -Xms500m -Xmx500m ' + ES_JAVA_OPTS: ' -Xms500m -Xmx500m ' resources: limits: cpu: 500m memory: 1.0Gi requests: cpu: 250m - memory: 750Mi \ No newline at end of file + memory: 750Mi diff --git a/src/main/kubernetes/openshift.yml b/src/main/kubernetes/openshift.yml index d41c9238..41baa138 100644 --- a/src/main/kubernetes/openshift.yml +++ b/src/main/kubernetes/openshift.yml @@ -53,18 +53,18 @@ spec: alpha.image.policy.openshift.io/resolve-names: '*' spec: containers: - - name: opensearch + - name: elasticsearch # The image gets pushed manually as part of the "deploy" workflow. # This gets replaced with the correct image ref (exact tag). - image: opensearch-custom:latest + image: elasticsearch-custom:latest imagePullPolicy: Always resources: limits: - cpu: '{{ .Values.opensearch.resources.limits.cpu }}' - memory: '{{ .Values.opensearch.resources.limits.memory }}' + cpu: '{{ .Values.elasticsearch.resources.limits.cpu }}' + memory: '{{ .Values.elasticsearch.resources.limits.memory }}' requests: - cpu: '{{ .Values.opensearch.resources.requests.cpu }}' - memory: '{{ .Values.opensearch.resources.requests.memory }}' + cpu: '{{ .Values.elasticsearch.resources.requests.cpu }}' + memory: '{{ .Values.elasticsearch.resources.requests.memory }}' readinessProbe: httpGet: scheme: HTTP @@ -80,7 +80,7 @@ spec: protocol: TCP volumeMounts: - name: data - mountPath: /usr/share/opensearch/data + mountPath: /usr/share/elasticsearch/data env: - name: cluster.name value: search-quarkus-io @@ -96,26 +96,40 @@ spec: # but this shouldn't be too bad as we don't expect swapping to be enabled. - name: bootstrap.memory_lock value: "false" - # OpenSearch doesn't seem to automatically adapt -Xmx to available memory, for some reason - - name: OPENSEARCH_JAVA_OPTS - value: '{{ .Values.opensearch.envs.OPENSEARCH_JAVA_OPTS }}' - # This is necessary to avoid OpenSearch trying to install various things on startup, - # which leads to filesystem operations (chmod/chown) that won't work - # because only user 1000 has the relevant permissions, - # and we can't run with user 1000 on OpenShift. - # See also: - # - https://github.com/opensearch-project/opensearch-devops/issues/97 - # - src/main/docker/opensearch-custom.Dockerfile - - name: DISABLE_PERFORMANCE_ANALYZER_AGENT_CLI - value: 'true' - - name: DISABLE_INSTALL_DEMO_CONFIG - value: 'true' + # Set the -Xmx explictly and don't rely on the search backend to figure out memory limits on its own. + - name: ES_JAVA_OPTS + value: '{{ .Values.elasticsearch.envs.ES_JAVA_OPTS }}' # Not exposed to the internet, no sensitive data # => We don't bother with HTTPS and pesky self-signed certificates # Setting this env variable is better than setting plugins.security.disabled # because this skips installing the plugin altogether (see above) - - name: DISABLE_SECURITY_PLUGIN + - name: xpack.security.enabled value: 'true' + # Disable disk-based shard allocation thresholds: on large, relatively full disks (>90% used), + # it will lead to index creation to get stuck waiting for other nodes to join the cluster, + # which will never happen since we only have one node. + # See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/modules-cluster.html#disk-based-shard-allocation + - name: cluster.routing.allocation.disk.threshold_enabled + value: 'false' + # Disable more plugins/features that we do not use: + - name: 'cluster.deprecation_indexing' + value: 'false' + - name: 'xpack.profiling.enabled' + value: 'false' + - name: 'xpack.ent_search.enabled' + value: 'false' + - name: 'indices.lifecycle.history_index_enabled' + value: 'false' + - name: 'slm.history_index_enabled' + value: 'false' + - name: 'stack.templates.enabled' + value: 'false' + - name: 'xpack.ml.enabled' + value: 'false' + - name: 'xpack.monitoring.templates.enabled' + value: 'false' + - name: 'xpack.watcher.enabled"' + value: 'false' envFrom: - configMapRef: name: search-backend-config @@ -134,4 +148,4 @@ spec: storageClassName: "gp2" resources: requests: - storage: 5Gi \ No newline at end of file + storage: 5Gi diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 3d1b9cf7..05162ebf 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -66,22 +66,22 @@ quarkus.rest.path=/api ######################## # Hibernate Search ######################## -# This version needs to match the version in src/main/docker/opensearch-custom.Dockerfile -quarkus.hibernate-search-standalone.elasticsearch.version=opensearch:2.18 +# This version needs to match the version in src/main/docker/elasticsearch-custom.Dockerfile +quarkus.hibernate-search-standalone.elasticsearch.version=${maven.distribution.search.backend:elastic}:${maven.version.search.backend:8.15} # Not using :latest here as a workaround until we get https://github.com/quarkusio/quarkus/pull/38896 -quarkus.elasticsearch.devservices.image-name=opensearch-custom:${maven.version.opensearch} -quarkus.elasticsearch.devservices.java-opts=${PROD_OPENSEARCH_JAVA_OPTS} -# Limit parallelism of indexing, because OpenSearch can only handle so many documents in its buffers. +quarkus.elasticsearch.devservices.image-name=${maven.name.search.backend}:${maven.version.search.backend} +quarkus.elasticsearch.devservices.java-opts=${PROD_ES_JAVA_OPTS} +# Limit parallelism of indexing, because the search backend can only handle so many documents in its buffers. # This leads to at most 8*12=96 documents being indexed in parallel, which should be plenty # given how large our documents can be. INDEXING_QUEUE_COUNT=8 INDEXING_BULK_SIZE=12 quarkus.hibernate-search-standalone.elasticsearch.indexing.queue-count=${INDEXING_QUEUE_COUNT} quarkus.hibernate-search-standalone.elasticsearch.indexing.max-bulk-size=${INDEXING_BULK_SIZE} -# We need to apply a custom OpenSearch mapping to exclude very large fields from the _source +# We need to apply a custom search backend mapping to exclude very large fields from the _source quarkus.hibernate-search-standalone.elasticsearch.schema-management.mapping-file=indexes/mapping-template.json quarkus.hibernate-search-standalone.elasticsearch.schema-management.settings-file=indexes/settings-template.json -# In production, we don't expect OpenSearch to be reachable when the application starts +# In production, we don't expect the search backend to be reachable when the application starts %prod.quarkus.hibernate-search-standalone.elasticsearch.version-check.enabled=false # ... and the application automatically creates indexes upon first indexing anyway. %prod.quarkus.hibernate-search-standalone.schema-management.strategy=none @@ -166,7 +166,7 @@ quarkus.swagger-ui.title=Quarkus Search API # We don't need it but more importantly it doesn't work (leads to marshalling errors) # for strings that look like numbers (e.g. 2.11) quarkus.helm.map-system-properties=false -# Set common k8s labels everywhere, even on OpenSearch resources +# Set common k8s labels everywhere, even on the search backend resources quarkus.helm.values."version".paths=metadata.labels.'app.kubernetes.io/version',spec.template.metadata.labels.'app.kubernetes.io/version' quarkus.helm.values."version".property=@.app.version quarkus.helm.values."version".value=${maven.revision} @@ -233,16 +233,16 @@ quarkus.openshift.add-version-to-label-selectors=false # so that changes to the image can be rolled back in sync with the app. # It happens that the revision passed to maven is a convenient unique version, # but in theory we could use another unique string. -quarkus.helm.values."opensearch-image".paths=(kind == StatefulSet).spec.template.spec.containers.image -quarkus.helm.values."opensearch-image".value=opensearch-custom:${maven.revision} -quarkus.helm.values."opensearch-image".property=@.opensearch.image +quarkus.helm.values."elasticsearch-image".paths=(kind == StatefulSet).spec.template.spec.containers.image +quarkus.helm.values."elasticsearch-image".value=${maven.name.search.backend}:${maven.revision} +quarkus.helm.values."elasticsearch-image".property=@.elasticsearch.image # Resource requirements (overridden for staging, see src/main/helm) -PROD_OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g -quarkus.helm.values."@.opensearch.envs.OPENSEARCH_JAVA_OPTS".value=\ ${PROD_OPENSEARCH_JAVA_OPTS} -quarkus.helm.values."@.opensearch.resources.limits.cpu".value=2000m -quarkus.helm.values."@.opensearch.resources.requests.cpu".value=500m -quarkus.helm.values."@.opensearch.resources.limits.memory".value=2Gi -quarkus.helm.values."@.opensearch.resources.requests.memory".value=1.9Gi +PROD_ES_JAVA_OPTS=-Xms1g -Xmx1g +quarkus.helm.values."@.elasticsearch.envs.ES_JAVA_OPTS".value=\ ${PROD_ES_JAVA_OPTS} +quarkus.helm.values."@.elasticsearch.resources.limits.cpu".value=2000m +quarkus.helm.values."@.elasticsearch.resources.requests.cpu".value=500m +quarkus.helm.values."@.elasticsearch.resources.limits.memory".value=2Gi +quarkus.helm.values."@.elasticsearch.resources.requests.memory".value=1.9Gi ######################## # Web Bundler config From 1f6979282cb190dddb512f6b8777e360d16489f3 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Fri, 8 Nov 2024 12:26:35 +0100 Subject: [PATCH 2/2] Use a different command as oc registry info is marked "deprecated" --- .github/workflows/deploy.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 9bffffbb..03bbdf38 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -61,7 +61,7 @@ jobs: id: oc-registry run: | echo -n "OC_REGISTRY_URL=" >> "$GITHUB_OUTPUT" - oc registry info >> "$GITHUB_OUTPUT" + oc get imagestream -o json | jq -r '.items[0].status.publicDockerImageRepository' | awk -F"[/]" '{print $1}' >> "$GITHUB_OUTPUT" - name: Log in to OpenShift Container Registry uses: docker/login-action@v3 with: @@ -85,12 +85,12 @@ jobs: -Drevision="${{ steps.app-version.outputs.value }}" \ -Dquarkus.container-image.build=true \ -Dquarkus.container-image.push=true \ - -Dquarkus.container-image.registry="$(oc registry info)" \ + -Dquarkus.container-image.registry="${{ steps.oc-registry.outputs.OC_REGISTRY_URL }}" \ -Dquarkus.container-image.group="$(oc project --short)" - name: Push Elasticsearch container image run: | - REMOTE_IMAGE_REF="$(oc registry info)/$(oc project --short)/elasticsearch-custom:${{ steps.app-version.outputs.value }}" + REMOTE_IMAGE_REF="${{ steps.oc-registry.outputs.OC_REGISTRY_URL }}/$(oc project --short)/elasticsearch-custom:${{ steps.app-version.outputs.value }}" # docker doesn't allow the `push source target` syntax, so we have to do this in two steps. docker image tag "elasticsearch-custom:latest" "$REMOTE_IMAGE_REF" docker push "$REMOTE_IMAGE_REF"