Skip to content

Commit

Permalink
Merge branch 'main' into concat_json_field
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz authored Jun 8, 2024
2 parents 515cc3f + 0816302 commit 77b5531
Show file tree
Hide file tree
Showing 248 changed files with 6,064 additions and 5,117 deletions.
8 changes: 6 additions & 2 deletions .cargo/config.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
[build]
rustflags = ["--cfg", "tokio_unstable"]
rustdocflags = ["--cfg", "tokio_unstable"]

[target.x86_64-unknown-linux-gnu]
# Targetting x86-64-v2 gives a ~2% performance boost while only
# Targeting x86-64-v2 gives a ~2% performance boost while only
# disallowing Intel CPUs older than 2008 and AMD CPUs older than 2011.
# None of those very old CPUs are used in GCP
# (https://cloud.google.com/compute/docs/cpu-platforms). Unfortunately,
# AWS does not seem to disclose the exact CPUs they use.
rustflags = ["-C", "target-cpu=x86-64-v2"]
rustflags = ["-C", "target-cpu=x86-64-v2", "--cfg", "tokio_unstable"]

66 changes: 60 additions & 6 deletions .github/workflows/cbench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,50 +9,69 @@ on:
- "quickwit/**"
- "!quickwit/quickwit-ui/**"
# For security reasons (to make sure the list of allowed users is
# trusted), make sure we run the workflow definition the base of the
# pull request.
# trusted), make sure we run the workflow definition from the base
# commit of the pull request.
pull_request_target:

# This is required for github.rest.issues.createComment.
permissions:
issues: write
pull-requests: write

env:
RUSTFLAGS: --cfg tokio_unstable

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true


jobs:
tests:
name: Benchmark
# The self-hosted runner must have the system deps installed for QW and
# the benchmark, because we don't have root access.
runs-on: self-hosted
timeout-minutes: 40
timeout-minutes: 60
steps:
- name: Set authorized users
id: authorized-users
# List of users allowed to trigger this workflow.
# Because it executes code on a self-hosted runner, it must be restricted to trusted users.
run: |
echo 'users=["ddelemeny", "fmassot", "fulmicoton", "guilload", "PSeitz", "rdettai", "trinity-1686a"]' >> $GITHUB_OUTPUT
- uses: actions/checkout@v4
if: contains(fromJSON(steps.authorized-users.outputs.users), github.actor) && github.event_name == 'pull_request_target'
name: Checkout quickwit (pull request commit)
with:
repository: quickwit-oss/quickwit
ref: ${{ github.event.pull_request.head.sha }}
path: ./quickwit

- uses: actions/checkout@v4
if: contains(fromJSON(steps.authorized-users.outputs.users), github.actor) && github.event_name != 'pull_request_target'
name: Checkout quickwit
with:
repository: quickwit-oss/quickwit
ref: ${{ github.sha }}
path: ./quickwit

- name: Checkout benchmarking code
uses: actions/checkout@v4
if: contains(fromJSON(steps.authorized-users.outputs.users), github.actor)
with:
repository: quickwit-oss/benchmarks
ref: main
path: ./benchmarks

- name: Install Rust
run: rustup update stable

- name: Install protoc
uses: taiki-e/install-action@v2
with:
tool: protoc

# We don't use rust-cache as it requires root access on the self-hosted runner, which we don't have.
- name: cargo build
if: contains(fromJSON(steps.authorized-users.outputs.users), github.actor)
Expand All @@ -65,19 +84,54 @@ jobs:
- name: Run Benchmark on SSD
if: contains(fromJSON(steps.authorized-users.outputs.users), github.actor)
id: bench-run-ssd
run: python3 ./run.py --search-only --storage pd-ssd --engine quickwit --track generated-logs --tags "${{ github.event_name }}_${{ github.ref_name }}" --manage-engine --source github_workflow --binary-path ../quickwit/quickwit/target/release/quickwit --instance "{autodetect_gcp}" --export-to-endpoint=https://qw-benchmarks.104.155.161.122.nip.io --engine-data-dir "{qwdata_local}" --write-exported-run-url-to-file $GITHUB_OUTPUT
run: python3 ./run.py --search-only --storage pd-ssd --engine quickwit --track generated-logs --tags "${{ github.event_name }}_${{ github.ref_name }}" --manage-engine --source github_workflow --binary-path ../quickwit/quickwit/target/release/quickwit --instance "{autodetect_gcp}" --export-to-endpoint=https://qw-benchmarks.104.155.161.122.nip.io --engine-data-dir "{qwdata_local}" --github-workflow-user "${{ github.actor }}" --github-workflow-run-id "${{ github.run_id }}" --comparison-reference-tag="push_main" --github-pr "${{ github.event_name == 'pull_request_target' && github.event.number || 0 }}" --comparison-reference-commit "${{ github.event_name == 'pull_request_target' && github.sha || github.event.before }}" --write-exported-run-url-to-file $GITHUB_OUTPUT
working-directory: ./benchmarks
- name: Run Benchmark on cloud storage
if: contains(fromJSON(steps.authorized-users.outputs.users), github.actor)
id: bench-run-cloud-storage
run: python3 ./run.py --search-only --storage gcs --engine quickwit --track generated-logs --tags "${{ github.event_name }}_${{ github.ref_name }}" --manage-engine --source github_workflow --binary-path ../quickwit/quickwit/target/release/quickwit --instance "{autodetect_gcp}" --export-to-endpoint=https://qw-benchmarks.104.155.161.122.nip.io --engine-data-dir "{qwdata_gcs}" --write-exported-run-url-to-file $GITHUB_OUTPUT
run: python3 ./run.py --search-only --storage gcs --engine quickwit --track generated-logs --tags "${{ github.event_name }}_${{ github.ref_name }}" --manage-engine --source github_workflow --binary-path ../quickwit/quickwit/target/release/quickwit --instance "{autodetect_gcp}" --export-to-endpoint=https://qw-benchmarks.104.155.161.122.nip.io --engine-data-dir "{qwdata_gcs}" --engine-config-file engines/quickwit/configs/cbench_quickwit_gcs.yaml --github-workflow-user "${{ github.actor }}" --github-workflow-run-id "${{ github.run_id }}" --comparison-reference-tag="push_main" --github-pr "${{ github.event_name == 'pull_request_target' && github.event.number || 0 }}" --comparison-reference-commit "${{ github.event_name == 'pull_request_target' && github.sha || github.event.before }}" --write-exported-run-url-to-file $GITHUB_OUTPUT
working-directory: ./benchmarks
- name: Show results links
if: contains(fromJSON(steps.authorized-users.outputs.users), github.actor)
run: |
echo "::notice title=Benchmark Results on SSD::${{ steps.bench-run-ssd.outputs.url }}"
echo "::notice title=Comparison of results on SSD::${{ steps.bench-run-ssd.outputs.comparison_text }}"
echo "::notice title=Benchmark Results on Cloud Storage::${{ steps.bench-run-cloud-storage.outputs.url }}"
echo "::notice title=Comparison of results on Cloud Storage::${{ steps.bench-run-cloud-storage.outputs.comparison_text }}"
- name: In case of auth error
if: ${{ ! contains(fromJSON(steps.authorized-users.outputs.users), github.actor) }}
run: |
echo "::error title=User not allowed to run the benchmark::User must be in list ${{ steps.authorized-users.outputs.users }}"
- name: Add a PR comment with comparison results
uses: actions/github-script@v7
if: contains(fromJSON(steps.authorized-users.outputs.users), github.actor) && github.event_name == 'pull_request_target'
# Inspired from: https://github.com/actions/github-script/blob/60a0d83039c74a4aee543508d2ffcb1c3799cdea/.github/workflows/pull-request-test.yml
with:
script: |
// Get the existing comments.
const {data: comments} = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.payload.number,
})
// Find any comment already made by the bot to update it.
const botComment = comments.find(comment => comment.user.id === 41898282)
const commentBody = "### On SSD:\n${{ steps.bench-run-ssd.outputs.comparison_text }}\n### On GCS:\n${{ steps.bench-run-cloud-storage.outputs.comparison_text }}\n"
if (botComment) {
// Update existing comment.
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: botComment.id,
body: commentBody
})
} else {
// New comment.
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.payload.number,
body: commentBody
})
}
3 changes: 2 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ env:
QW_TEST_DATABASE_URL: postgres://quickwit-dev:quickwit-dev@localhost:5432/quickwit-metastore-dev
RUST_BACKTRACE: 1
RUSTDOCFLAGS: -Dwarnings -Arustdoc::private_intra_doc_links
RUSTFLAGS: -Dwarnings
RUSTFLAGS: -Dwarnings --cfg tokio_unstable

# Ensures that we cancel running jobs for the same PR / same workflow.
concurrency:
Expand Down Expand Up @@ -92,6 +92,7 @@ jobs:
if: always() && steps.modified.outputs.rust_src == 'true'
run: python3 ./run_tests.py --binary ../target/debug/quickwit
working-directory: ./quickwit/rest-api-tests

lints:
name: Lints
runs-on: "ubuntu-latest"
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ env:
QW_S3_ENDPOINT: "http://localhost:4566" # Services are exposed as localhost because we are not running coverage in a container.
QW_S3_FORCE_PATH_STYLE_ACCESS: 1
QW_TEST_DATABASE_URL: postgres://quickwit-dev:quickwit-dev@localhost:5432/quickwit-metastore-dev
RUSTFLAGS: -Dwarnings --cfg tokio_unstable

jobs:
test:
Expand Down Expand Up @@ -143,7 +144,8 @@ jobs:
- name: Install Rust
run: rustup update stable

- uses: taiki-e/install-action@v2
- name: Install cargo-llvm-cov, cargo-nextest, and protoc
uses: taiki-e/install-action@v2
with:
tool: cargo-llvm-cov,nextest,protoc

Expand Down
4 changes: 1 addition & 3 deletions .github/workflows/publish_docker_images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ jobs:
include:
- os: ubuntu-latest
platform: linux/amd64
# Using 16 vcpu on arm as 8vcpu seems to stay stuck on runner acquisition.
# (5/13). We can switch back to 8vcpu one buildjet starts working as intended again.
- os: buildjet-16vcpu-ubuntu-2204-arm
- os: buildjet-8vcpu-ubuntu-2204-arm
platform: linux/arm64
runs-on: ${{ matrix.os }}
steps:
Expand Down
12 changes: 9 additions & 3 deletions .github/workflows/publish_lambda_packages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,15 @@ jobs:
- name: Install rustup
run: curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain none -y
- name: Install python dependencies
run: pip install ./distribution/lambda
- name: Mypy lint
run: mypy distribution/lambda/
run: |
pip install --user pipenv
pipenv install --system
working-directory: ./distribution/lambda
- name: Lint and format
run: |
mypy .
black . --check
working-directory: ./distribution/lambda
- name: Retrieve and export commit date, hash, and tags
run: |
echo "QW_COMMIT_DATE=$(TZ=UTC0 git log -1 --format=%cd --date=format-local:%Y-%m-%dT%H:%M:%SZ)" >> $GITHUB_ENV
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/ui-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ jobs:
sudo apt-get -y install protobuf-compiler
rustup show
CI=false yarn --cwd quickwit-ui build
cargo build --features=postgres
RUSTFLAGS="--cfg tokio_unstable" cargo build --features=postgres
mkdir qwdata
cargo run --features=postgres -- run --service searcher --service metastore --config ../config/quickwit.yaml &
RUSTFLAGS="--cfg tokio_unstable" cargo run --features=postgres -- run --service searcher --service metastore --config ../config/quickwit.yaml &
yarn --cwd quickwit-ui cypress run
- name: Lint
command: yarn --cwd quickwit-ui lint
Expand Down
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ COPY --from=ui-builder /quickwit/quickwit-ui/build /quickwit/quickwit-ui/build
WORKDIR /quickwit

RUN echo "Building workspace with feature(s) '$CARGO_FEATURES' and profile '$CARGO_PROFILE'" \
&& cargo build \
&& RUSTFLAGS="--cfg tokio_unstable" \
cargo build \
-p quickwit-cli \
--features $CARGO_FEATURES \
--bin quickwit \
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ docker-compose-up:
COMPOSE_PROFILES=$(DOCKER_SERVICES) docker compose -f docker-compose.yml up -d --remove-orphans --wait

docker-compose-down:
docker compose -f docker-compose.yml down --remove-orphans
docker compose -p quickwit down --remove-orphans

docker-compose-logs:
docker compose logs -f -t
Expand Down
32 changes: 21 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ Cloud-native search engine for observability (logs, traces, and soon metrics!).

### 🚀 Quickstart

- [Search and analytics on StackOverflow dataset](https://quickwit.io/docs/get-started/quickstart)
- [Tracing analytics with Grafana](https://quickwit.io/docs/get-started/tutorials/trace-analytics-with-grafana)
- [Tracing with Jaeger](https://quickwit.io/docs/get-started/tutorials/tutorial-jaeger)
- [Search and analytics on Stack Overflow dataset](https://quickwit.io/docs/get-started/quickstart)
- [Trace analytics with Grafana](https://quickwit.io/docs/get-started/tutorials/trace-analytics-with-grafana)
- [Distributed tracing with Jaeger](https://quickwit.io/docs/get-started/tutorials/tutorial-jaeger)

<br/>

Expand Down Expand Up @@ -94,34 +94,44 @@ Cloud-native search engine for observability (logs, traces, and soon metrics!).

# 🔮 Roadmap

- Quickwit 0.9 (June 2024)
- Indexing and search performance improvements
- Index configuration updates (retention policy, indexing and search settings)
- Concatenated field

- Quickwit 0.10 (September 2024)
- Schema (doc mapping) updates
- Native distributed ingestion
- Index templates

- [Long-term roadmap](ROADMAP.md)
- Live tail
- SQL
- Security (TLS, authentication, RBAC)
- Alerting
- [and more...](ROADMAP.md)

details are also available on the [github projects](https://github.com/orgs/quickwit-oss/projects?query=is%3Aopen)
details are also available on the [GitHub projects](https://github.com/orgs/quickwit-oss/projects?query=is%3Aopen)

# 🙋 FAQ

### How can I switch from Elasticsearch or Opensearch to Quickwit?
### How can I switch from Elasticsearch or OpenSearch to Quickwit?

Quickwit supports a large subset of Elasticsearch/Opensearch API.
Quickwit supports a large subset of Elasticsearch/OpenSearch API.

For instance, it has a ES-compatible ingest API to make it easier to migrate your log shippers (Vector, Fluent Bit, Syslog, ...) to Quickwit.
For instance, it has an ES-compatible ingest API to make it easier to migrate your log shippers (Vector, Fluent Bit, Syslog, ...) to Quickwit.

On the search-side, the most popular Elasticsearch endpoints, query DSL, and even aggregations are supported.
On the search side, the most popular Elasticsearch endpoints, query DSL, and even aggregations are supported.

The list of available endpoints and queries is available [here](https://quickwit.io/docs/reference/es_compatible_api), while the list of supported aggregations is available [here](https://quickwit.io/docs/reference/aggregation).

Let us know if part of the API you are using is missing!

If the client you are using is refusing to connect to Quickwit due to missing headers, you can use the `extra_headers` option in the [node configuration](https://quickwit.io/docs/configuration/node-config#rest-configuration) to impersonate any compatible version of Elasticsearch or Opensearch.
If the client you are using is refusing to connect to Quickwit due to missing headers, you can use the `extra_headers` option in the [node configuration](https://quickwit.io/docs/configuration/node-config#rest-configuration) to impersonate any compatible version of Elasticsearch or OpenSearch.

### How is Quickwit different from traditional search engines like Elasticsearch or Solr?

The core difference and advantage of Quickwit are its architecture built from the ground to search on cloud storage. We optimized IO paths, revamped the index data structures and made search stateless and sub-second on cloud storage.
The core difference and advantage of Quickwit is its architecture built from the ground to search on cloud storage. We optimized IO paths, revamped the index data structures and made search stateless and sub-second on cloud storage.

### How does Quickwit compare to Elastic in terms of cost?

Expand All @@ -132,7 +142,7 @@ We estimate that Quickwit can be up to 10x cheaper on average than Elastic. To u
Quickwit is open-source under the GNU Affero General Public License Version 3 - AGPLv3. Fundamentally, this means you are free to use Quickwit for your project if you don't modify Quickwit. However, if you do and you are distributing your modified version to the public, you have to make the modifications public.
We also provide a commercial license for enterprises to provide support and a voice on our roadmap.

### Is it possible to setup Quickwit for a High Availability (HA)?
### Is it possible to set up Quickwit for a High Availability (HA)?

HA is available for search, for indexing it's available only with a Kafka source.

Expand Down
1 change: 1 addition & 0 deletions ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- Metrics
- Geospatial search
- Native Go, Java, and Python clients
- SQL
- Pipe-based query language
- Security (TLS, authentication, RBAC)
- and more...
Expand Down
6 changes: 3 additions & 3 deletions distribution/ecs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ Metastore database backups are disabled as restoring one would lead to
inconsistencies with the index store on S3. To ensure high availability, you
should enable `rds_config.multi_az` instead. To use your own Postgres database
instead of creating a new RDS instance, configure the
`external_postgres_uri_ssm_parameter_arn` variable (e.g
`postgres://user:password@domain:port/db`).
`external_postgres_uri_secret_arn` variable (e.g ARN of an SSM parameter with
the value `postgres://user:password@domain:port/db`).

Using NAT Gateways for the image registry is quite costly (approx. $0.05/hour/AZ). If
you are not already using NAT Gateways in the AZs where Quickwit will be
Expand Down Expand Up @@ -64,7 +64,7 @@ IAM policies to indexers.
We provide an example of self contained deployment with an ad-hoc VPC.

> [!IMPORTANT]
> This stack costs ~$150/month to run (Fargate tasks, NAT Gateways
> This stack costs ~$200/month to run (Fargate tasks, NAT Gateways
> and RDS)
### Deploy the Quickwit module and connect through a bastion
Expand Down
2 changes: 1 addition & 1 deletion distribution/ecs/example/terraform.tf
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ module "quickwit" {
# multi_az = false
# }

# external_postgres_uri_ssm_parameter_arn = aws_ssm_parameter.postgres_uri.arn
# external_postgres_uri_secret_arn = aws_ssm_parameter.postgres_uri.arn

## Example logging configuration
# sidecar_container_definitions = {
Expand Down
4 changes: 2 additions & 2 deletions distribution/ecs/quickwit/configs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ locals {

quickwit_index_s3_prefix = var.quickwit_index_s3_prefix == "" ? aws_s3_bucket.index[0].id : var.quickwit_index_s3_prefix

use_external_rds = var.external_postgres_uri_ssm_parameter_arn != ""
postgres_uri_parameter_arn = var.external_postgres_uri_ssm_parameter_arn != "" ? var.external_postgres_uri_ssm_parameter_arn : aws_ssm_parameter.postgres_credential[0].arn
use_external_rds = var.external_postgres_uri_secret_arn != ""
postgres_uri_secret_arn = var.external_postgres_uri_secret_arn != "" ? var.external_postgres_uri_secret_arn : aws_ssm_parameter.postgres_credential[0].arn
}

resource "random_id" "module" {
Expand Down
3 changes: 1 addition & 2 deletions distribution/ecs/quickwit/iam.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ data "aws_iam_policy_document" "quickwit_task_permission" {
statement {
actions = [
"s3:ListBucket",
"s3:ListObjects",
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject"
Expand Down Expand Up @@ -47,7 +46,7 @@ data "aws_iam_policy_document" "quickwit_task_execution_permission" {
statement {
actions = ["ssm:GetParameters"]

resources = [local.postgres_uri_parameter_arn]
resources = [local.postgres_uri_secret_arn]
}

statement {
Expand Down
Loading

0 comments on commit 77b5531

Please sign in to comment.