diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c579c79648f..a5e6b2bdeeb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -57,8 +57,8 @@ Run `make test-all` to run all tests. * `make -k test-all docker-compose-down` - the same as above, but tears down the Docker services after running all the tests. * `make fmt` - runs formatter, this command requires the nightly toolchain to be installed by running `rustup toolchain install nightly`. * `make fix` - runs formatter and clippy checks. -* `make typos` - runs the spellcheck tool over the codebase. (Install by running `cargo install typos`) -* `make build-docs` - builds docs. +* `make typos` - runs the spellcheck tool over the codebase. (Install by running `cargo install typos-cli`) +* `make docs` - builds docs. * `make docker-compose-up` - starts Docker services. * `make docker-compose-down` - stops Docker services. * `make docker-compose-logs` - shows Docker logs. diff --git a/_typos.toml b/_typos.toml index 6bf83d5e6e0..fa9a3e64ed3 100644 --- a/_typos.toml +++ b/_typos.toml @@ -1,2 +1,6 @@ [files] extend-exclude = ["**/*.json"] + +[default.extend-words] +# Don't correct the surname "Teh" +strat = "strat" diff --git a/distribution/ecs/README.md b/distribution/ecs/README.md index a1e20c1acad..22ba1c48b3e 100644 --- a/distribution/ecs/README.md +++ b/distribution/ecs/README.md @@ -42,7 +42,7 @@ deployed, you should probably push the Quickwit image to ECR and use ECR interface VPC endpoints instead (approx. ~$0.01/hour/AZ). When using the default image, you will quickly run into the Docker Hub rate -limiting. We recommand pushing the Quickwit image to ECR and configure that as +limiting. We recommend pushing the Quickwit image to ECR and configure that as `quickwit_image`. Note that the architecture of the image that you push to ECR must match the `quickwit_cpu_architecture` variable (`ARM64` by default). diff --git a/distribution/ecs/quickwit/variables.tf b/distribution/ecs/quickwit/variables.tf index ef3be5e1467..2dfd2e14967 100644 --- a/distribution/ecs/quickwit/variables.tf +++ b/distribution/ecs/quickwit/variables.tf @@ -65,7 +65,7 @@ variable "enable_cloudwatch_logging" { } variable "log_configuration" { - description = "Custom log configuraiton for Quickwit tasks" + description = "Custom log configuration for Quickwit tasks" default = {} } diff --git a/distribution/lambda/README.md b/distribution/lambda/README.md index c4c64d5c344..48db36d878c 100644 --- a/distribution/lambda/README.md +++ b/distribution/lambda/README.md @@ -52,7 +52,7 @@ Provided demonstration setups: ### Deploy and run -The Makefile is a usefull entrypoint to show how the Lambda deployment can used. +The Makefile is a useful entrypoint to show how the Lambda deployment can used. Configure your shell and AWS account: ```bash diff --git a/distribution/lambda/cdk/cli.py b/distribution/lambda/cdk/cli.py index f7374296d58..c18fd14f289 100644 --- a/distribution/lambda/cdk/cli.py +++ b/distribution/lambda/cdk/cli.py @@ -382,7 +382,7 @@ def benchmark_hdfs_indexing(): indexer_result = invoke_hdfs_indexer() bench_result["lambda_report"] = indexer_result.extract_report() except Exception as e: - bench_result["invokation_error"] = repr(e) + bench_result["invocation_error"] = repr(e) print(f"Failed to invoke indexer") with open(f"lambda-bench.log", "a+") as f: @@ -409,7 +409,7 @@ def benchmark_hdfs_search(payload: str): indexer_result = invoke_hdfs_searcher(payload, download_logs=False) bench_result["lambda_report"] = indexer_result.extract_report() except Exception as e: - bench_result["invokation_error"] = repr(e) + bench_result["invocation_error"] = repr(e) print(f"Failed to invoke searcher") with open(f"lambda-bench.log", "a+") as f: diff --git a/docker-compose.yml b/docker-compose.yml index f3ada05ad80..58b9d8b99f7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -53,7 +53,7 @@ services: image: postgres:${POSTGRES_VERSION:-12.17-alpine} container_name: postgres ports: - - "${MAP_HOST_POSTGRESS:-127.0.0.1}:5432:5432" + - "${MAP_HOST_POSTGRES:-127.0.0.1}:5432:5432" profiles: - all - postgres diff --git a/docs/configuration/index-config.md b/docs/configuration/index-config.md index 60b044d42ce..ef6383c06cb 100644 --- a/docs/configuration/index-config.md +++ b/docs/configuration/index-config.md @@ -135,7 +135,7 @@ fast: | `tokenizer` | Name of the `Tokenizer`. ([See tokenizers](#description-of-available-tokenizers)) for a list of available tokenizers. | `default` | | `record` | Describes the amount of information indexed, choices between `basic`, `freq` and `position` | `basic` | | `fieldnorms` | Whether to store fieldnorms for the field. Fieldnorms are required to calculate the BM25 Score of the document. | `false` | -| `fast` | Whether value is stored in a fast field. The fast field will contain the term ids and the dictionary. The default behaviour for `true` is to store the original text unchanged. The normalizers on the fast field is seperately configured. It can be configured via `normalizer: lowercase`. ([See normalizers](#description-of-available-normalizers)) for a list of available normalizers. | `false` | +| `fast` | Whether value is stored in a fast field. The fast field will contain the term ids and the dictionary. The default behaviour for `true` is to store the original text unchanged. The normalizers on the fast field is separately configured. It can be configured via `normalizer: lowercase`. ([See normalizers](#description-of-available-normalizers)) for a list of available normalizers. | `false` | ##### Description of available tokenizers @@ -327,7 +327,7 @@ stored: true indexed: true fast: true input_format: hex -output_foramt: hex +output_format: hex ``` **Parameters for bytes field** @@ -432,7 +432,7 @@ tokenizer: default record: basic ``` -Concatenate fields don't support fast fields, and are never stored. They uses their own tokenizer, independantly of the +Concatenate fields don't support fast fields, and are never stored. They uses their own tokenizer, independently of the tokenizer configured on the individual fields. At query time, concatenate fields don't support range queries. Only the following types are supported inside a concatenate field: text, bool, i64, u64, json. Other types are rejected @@ -458,7 +458,7 @@ when the features are supported, add these: --- Only the following types are supported inside a concatenate field: text, datetime, bool, i64, u64, ip, json. Other types are rejected --- -Datetime can only be queried in their RFC-3339 form, possibly omiting later components. # todo! will have to confirm this is achievable +Datetime can only be queried in their RFC-3339 form, possibly omitting later components. # todo! will have to confirm this is achievable --- plan: - implement text/bool/i64/u64 (nothing to do on search side for it to work). all gets converted to strings diff --git a/docs/configuration/storage-config.md b/docs/configuration/storage-config.md index a1bc613fe3f..e531870a281 100644 --- a/docs/configuration/storage-config.md +++ b/docs/configuration/storage-config.md @@ -66,7 +66,7 @@ Hardcoding credentials into configuration files is not secure and strongly disco | Env variable | Description | | --- | --- | | `QW_S3_ENDPOINT` | Custom S3 endpoint. | -| `QW_S3_MAX_CONCURRENCY` | Limit the number of concurent requests to S3 | +| `QW_S3_MAX_CONCURRENCY` | Limit the number of concurrent requests to S3 | #### Storage flavors diff --git a/docs/deployment/cluster-sizing.md b/docs/deployment/cluster-sizing.md index a82093a6abd..37e985e81ce 100644 --- a/docs/deployment/cluster-sizing.md +++ b/docs/deployment/cluster-sizing.md @@ -62,7 +62,7 @@ Searcher nodes: - Searcher nodes don't use disk unless the [split cache](../configuration/node-config.md#Searcher-split-cache-configuration) is - explicitely enabled + explicitly enabled One strength of Quickwit is that its Searchers are stateless, which makes it easy to scale them up and down based on the workload. Scale the number of diff --git a/docs/deployment/kubernetes/glasskube.md b/docs/deployment/kubernetes/glasskube.md index c365c977fa5..4712d2a7e51 100644 --- a/docs/deployment/kubernetes/glasskube.md +++ b/docs/deployment/kubernetes/glasskube.md @@ -15,7 +15,7 @@ To deploy Quickwit on Kubernetes, you will need: 1. Install `kubectl` and `glasskube` cli. -To install `kubectl` locally, you can refere to [this documentation](https://kubernetes.io/docs/tasks/tools/#install-kubectl). +To install `kubectl` locally, you can refer to [this documentation](https://kubernetes.io/docs/tasks/tools/#install-kubectl). To install `glasskube` cli locally, you can refer to [this documentation](https://glasskube.dev/docs/getting-started/install) and choose the right installation options according to your operating system. @@ -52,7 +52,7 @@ Or use the CLI instead: glasskube install quickwit ``` -In both, you'll have to set the value of thoses parameters: +In both, you'll have to set the value of those parameters: * `defaultIndexRootUri`: the default index URI is a S3 compliant bucket which usually looks like this: `s3:///` * `metastoreUri`: if you're not using PostgreSQL and object storage, you can pick the same bucket and value you used for the `defaultIndexRootUri` parameter diff --git a/docs/get-started/query-language-intro.md b/docs/get-started/query-language-intro.md index d89e9740067..dcad7f65292 100644 --- a/docs/get-started/query-language-intro.md +++ b/docs/get-started/query-language-intro.md @@ -33,7 +33,7 @@ Quickwit support various types of clauses to express different kinds of conditio | term prefix | `field:prefix*` | `app_name:tant*`
`quick*` | A term clause tests the existence of a token starting with the provided value | yes | | term set | `field:IN [token token ..]` |`severity:IN [error warn]` | A term set clause tests the existence of any of the provided value in the field's tokens| yes | | phrase | `field:"sequence of tokens"` | `full_name:"john doe"` | A phrase clause tests the existence of the provided sequence of tokens | yes | -| phrase prefix | `field:"sequence of tokens"*` | `title:"how to m"*` | A phrase prefix clause tests the exsitence of a sequence of tokens, the last one used like in a prefix clause | yes | +| phrase prefix | `field:"sequence of tokens"*` | `title:"how to m"*` | A phrase prefix clause tests the existence of a sequence of tokens, the last one used like in a prefix clause | yes | | all | `*` | `*` | A match-all clause will match every document | no | | exist | `field:*` | `error:*` | An exist clause tests the existence of any value for the field, it will match only if the field exists | no | | range | `field:bounds` |`duration:[0 TO 1000}`
`last_name:[banner TO miller]` | A term clause tests the existence of a token between the provided bounds | no | diff --git a/docs/internals/scroll.md b/docs/internals/scroll.md index a16c533bd7a..f13930813e3 100644 --- a/docs/internals/scroll.md +++ b/docs/internals/scroll.md @@ -60,7 +60,7 @@ We only mutate the state server side to update the cache whenever needed. The idea here is that if that if the put request failed, we can still return the right results even if we have an obsolete version of the `ScrollContext`. -# Quickwit implementation (improvment, quirks and shortcuts) +# Quickwit implementation (improvement, quirks and shortcuts) We do not do explicitly protect the split from our store Point-In-Time information from deletion. Instead we simply rely on the existing grace period mechanism (a split diff --git a/docs/internals/sorting.md b/docs/internals/sorting.md index 12df74ff9e8..1dc2eca1805 100644 --- a/docs/internals/sorting.md +++ b/docs/internals/sorting.md @@ -2,7 +2,7 @@ Quickwit can sort results based on fastfield values or score. This document discuss where and how it happens. -It also tries to describe optimizations that may be enabled (but are not necessarily implemente) +It also tries to describe optimizations that may be enabled (but are not necessarily implemented) by this behavior. ## Behavior @@ -35,7 +35,7 @@ results. It reduces the risks of inconsistencies between in-split and between-sp `SortOrder` gets new `compare` and `compare_opt` method which can be used to compare two values with respect to the particular sort order required, and with proper handling of the `None` special case. -# Optimization permited +# Optimization permitted Both orders allow an optimization when sorting by date (either direction), by leveraging splits meta-data to know in advance if a split can, or not, contain better results. Changing the sorting diff --git a/docs/reference/cli.md b/docs/reference/cli.md index e97e9effb59..614982e0fed 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -353,9 +353,9 @@ quickwit index ingest | `--index` | ID of the target index | | `--input-path` | Location of the input file. | | `--batch-size-limit` | Size limit of each submitted document batch. | -| `--wait` | Wait for all documents to be commited and available for search before exiting | +| `--wait` | Wait for all documents to be committed and available for search before exiting | | `--force` | Force a commit after the last document is sent, and wait for all documents to be committed and available for search before exiting | -| `--commit-timeout` | Timeout for ingest operations that require waiting for the final commit (`--wait` or `--force`). This is different from the `commit_timeout_secs` indexing setting, which sets the maximum time before commiting splits after their creation. | +| `--commit-timeout` | Timeout for ingest operations that require waiting for the final commit (`--wait` or `--force`). This is different from the `commit_timeout_secs` indexing setting, which sets the maximum time before committing splits after their creation. | *Examples* diff --git a/docs/reference/es_compatible_api.md b/docs/reference/es_compatible_api.md index f859ea4d6f5..99b4a08c2aa 100644 --- a/docs/reference/es_compatible_api.md +++ b/docs/reference/es_compatible_api.md @@ -21,7 +21,7 @@ POST api/v1/_elastic/_bulk POST api/v1/_elastic//_bulk ``` -The _bulk ingestion API makes it possible to index a batch of documents, possibly targetting several indices in the same request. +The _bulk ingestion API makes it possible to index a batch of documents, possibly targeting several indices in the same request. #### Request Body example @@ -228,7 +228,7 @@ You can pass the `sort` value of the last hit in a subsequent request where othe ```json { // keep all fields from the original request - "seach_after": [ + "search_after": [ 1701962929199 ] } @@ -256,7 +256,7 @@ POST api/v1/_elastic/_msearch Runs several search requests at once. The payload is expected to alternate: -- a `header` json object, containing the targetted index id. +- a `header` json object, containing the targeted index id. - a `search request body` as defined in the [`_search` endpoint section]. @@ -725,7 +725,7 @@ Search APIs that accept requests path parameter also support multi-ta ### Multi-target syntax -In multi-target syntax, you can use a comma or its URL encoded version '%2C' seperated list to run a request on multiple indices: test1,test2,test3. You can also sue [glob-like](https://en.wikipedia.org/wiki/Glob_(programming)) wildcard ( \* ) expressions to target indices that match a pattern: test\* or \*test or te\*t or \*test\*. +In multi-target syntax, you can use a comma or its URL encoded version '%2C' separated list to run a request on multiple indices: test1,test2,test3. You can also sue [glob-like](https://en.wikipedia.org/wiki/Glob_(programming)) wildcard ( \* ) expressions to target indices that match a pattern: test\* or \*test or te\*t or \*test\*. The multi-target expression has the following constraints: diff --git a/docs/reference/query-language.md b/docs/reference/query-language.md index 4449ba4f32b..7e1d1205f46 100644 --- a/docs/reference/query-language.md +++ b/docs/reference/query-language.md @@ -95,7 +95,7 @@ Matches if the document contains any of the tokens provided. ###### Examples `field:IN [ab cd]` will match 'ab' or 'cd', but nothing else. -###### Perfomance Note +###### Performance Note This is a lot like writing `field:ab OR field:cd`. When there are only a handful of terms to search for, using ORs is usually faster. When there are many values to match, a term set query can become more efficient. @@ -150,7 +150,7 @@ bounds = term TO term | '*' TO term comparison_range = comparison_operator term -comparision_operator = '<' | '>' | '<=' | '>=' +comparison_operator = '<' | '>' | '<=' | '>=' ``` Matches if the document contains a token between the provided bounds for that field. diff --git a/docs/reference/rest-api.md b/docs/reference/rest-api.md index a7d02954266..b70286a01de 100644 --- a/docs/reference/rest-api.md +++ b/docs/reference/rest-api.md @@ -61,8 +61,8 @@ POST api/v1//search | Variable | Type | Description | Default value | |---------------------|------------|-----------------|-----------------| | `query` | `String` | Query text. See the [query language doc](query-language.md) | _required_ | -| `start_timestamp` | `i64` | If set, restrict search to documents with a `timestamp >= start_timestamp`, taking advantage of potential time pruning oportunities. The value must be in seconds. | | -| `end_timestamp` | `i64` | If set, restrict search to documents with a `timestamp < end_timestamp`, taking advantage of potential time pruning oportunities. The value must be in seconds. | | +| `start_timestamp` | `i64` | If set, restrict search to documents with a `timestamp >= start_timestamp`, taking advantage of potential time pruning opportunities. The value must be in seconds. | | +| `end_timestamp` | `i64` | If set, restrict search to documents with a `timestamp < end_timestamp`, taking advantage of potential time pruning opportunities. The value must be in seconds. | | | `start_offset` | `Integer` | Number of documents to skip | `0` | | `max_hits` | `Integer` | Maximum number of hits to return (by default 20) | `20` | | `search_field` | `[String]` | Fields to search on if no field name is specified in the query. Comma-separated list, e.g. "field1,field2" | index_config.search_settings.default_search_fields | diff --git a/quickwit/quickwit-actors/src/lib.rs b/quickwit/quickwit-actors/src/lib.rs index bc214481ea2..57667a5ced2 100644 --- a/quickwit/quickwit-actors/src/lib.rs +++ b/quickwit/quickwit-actors/src/lib.rs @@ -90,13 +90,13 @@ fn heartbeat_from_env_or_default() -> Duration { return Duration::from_millis(500); } match std::env::var("QW_ACTOR_HEARTBEAT_SECS") { - Ok(actor_hearbeat_secs_str) => { - if let Ok(actor_hearbeat_secs) = actor_hearbeat_secs_str.parse::() { - info!("set the actor heartbeat to {actor_hearbeat_secs} seconds"); - return Duration::from_secs(actor_hearbeat_secs.get()); + Ok(actor_heartbeat_secs_str) => { + if let Ok(actor_heartbeat_secs) = actor_heartbeat_secs_str.parse::() { + info!("set the actor heartbeat to {actor_heartbeat_secs} seconds"); + return Duration::from_secs(actor_heartbeat_secs.get()); } else { warn!( - "failed to parse `QW_ACTOR_HEARTBEAT_SECS={actor_hearbeat_secs_str}` in \ + "failed to parse `QW_ACTOR_HEARTBEAT_SECS={actor_heartbeat_secs_str}` in \ seconds > 0, using default heartbeat (30 seconds)" ); }; diff --git a/quickwit/quickwit-cli/src/index.rs b/quickwit/quickwit-cli/src/index.rs index 9028fa26c56..981bff53de2 100644 --- a/quickwit/quickwit-cli/src/index.rs +++ b/quickwit/quickwit-cli/src/index.rs @@ -149,7 +149,7 @@ pub fn build_index_command() -> Command { Arg::new("wait") .long("wait") .short('w') - .help("Wait for all documents to be commited and available for search before exiting") + .help("Wait for all documents to be committed and available for search before exiting") .action(ArgAction::SetTrue), // TODO remove me after Quickwit 0.7. Arg::new("v2") @@ -165,7 +165,7 @@ pub fn build_index_command() -> Command { .conflicts_with("wait"), Arg::new("commit-timeout") .long("commit-timeout") - .help("Timeout for ingest operations that require waiting for the final commit (`--wait` or `--force`). This is different from the `commit_timeout_secs` indexing setting, which sets the maximum time before commiting splits after their creation.") + .help("Timeout for ingest operations that require waiting for the final commit (`--wait` or `--force`). This is different from the `commit_timeout_secs` indexing setting, which sets the maximum time before committing splits after their creation.") .required(false) .global(true), ]) diff --git a/quickwit/quickwit-cli/src/lib.rs b/quickwit/quickwit-cli/src/lib.rs index 21ec50cfdaf..98029541f05 100644 --- a/quickwit/quickwit-cli/src/lib.rs +++ b/quickwit/quickwit-cli/src/lib.rs @@ -397,7 +397,7 @@ pub mod busy_detector { }) .is_err() { - // a debug was emited recently, don't emit log for this one + // a debug was emitted recently, don't emit log for this one SUPPRESSED_DEBUG_COUNT.fetch_add(1, Ordering::Relaxed); return; } diff --git a/quickwit/quickwit-cli/src/split.rs b/quickwit/quickwit-cli/src/split.rs index 039aeb280ce..ceeb8e5e45f 100644 --- a/quickwit/quickwit-cli/src/split.rs +++ b/quickwit/quickwit-cli/src/split.rs @@ -124,7 +124,7 @@ impl FromStr for OutputFormat { "pretty-json" | "pretty_json" => Ok(OutputFormat::PrettyJson), "table" => Ok(OutputFormat::Table), _ => bail!( - "unkown output format `{output_format_str}`. supported formats are: `table`, \ + "unknown output format `{output_format_str}`. supported formats are: `table`, \ `json`, and `pretty-json`" ), } diff --git a/quickwit/quickwit-cluster/src/member.rs b/quickwit/quickwit-cluster/src/member.rs index 942df8f0c1b..9970089f1da 100644 --- a/quickwit/quickwit-cluster/src/member.rs +++ b/quickwit/quickwit-cluster/src/member.rs @@ -131,7 +131,7 @@ fn parse_indexing_cpu_capacity(node_state: &NodeState) -> CpuCapacity { if let Ok(indexing_capacity) = CpuCapacity::from_str(indexing_capacity_str) { indexing_capacity } else { - error!(indexing_capacity=?indexing_capacity_str, "received an unparseable indexing capacity from node"); + error!(indexing_capacity=?indexing_capacity_str, "received an unparsable indexing capacity from node"); CpuCapacity::zero() } } diff --git a/quickwit/quickwit-codegen/src/codegen.rs b/quickwit/quickwit-codegen/src/codegen.rs index 72d5507af42..2775d712b1c 100644 --- a/quickwit/quickwit-codegen/src/codegen.rs +++ b/quickwit/quickwit-codegen/src/codegen.rs @@ -110,7 +110,7 @@ impl CodegenBuilder { self } - /// Generates `RpcName` trait implentations for request types. + /// Generates `RpcName` trait implementations for request types. pub fn generate_rpc_name_impls(mut self) -> Self { self.generate_prom_labels_for_requests = true; self @@ -517,7 +517,7 @@ fn generate_client(context: &CodegenContext) -> TokenStream { let tower_layer_stack_name = &context.tower_layer_stack_name; let mock_name = &context.mock_name; let mock_wrapper_name = quote::format_ident!("{}Wrapper", mock_name); - let error_mesage = format!( + let error_message = format!( "`{}` must be wrapped in a `{}`: use `{}::from_mock(mock)` to instantiate the client", mock_name, mock_wrapper_name, client_name ); @@ -555,7 +555,7 @@ fn generate_client(context: &CodegenContext) -> TokenStream { T: #service_name, { #[cfg(any(test, feature = "testsuite"))] - assert!(std::any::TypeId::of::() != std::any::TypeId::of::<#mock_name>(), #error_mesage); + assert!(std::any::TypeId::of::() != std::any::TypeId::of::<#mock_name>(), #error_message); Self { inner: #inner_client_name(std::sync::Arc::new(instance)), } diff --git a/quickwit/quickwit-common/src/lib.rs b/quickwit/quickwit-common/src/lib.rs index f2a18afe0bf..dff26829584 100644 --- a/quickwit/quickwit-common/src/lib.rs +++ b/quickwit/quickwit-common/src/lib.rs @@ -207,7 +207,7 @@ pub fn num_cpus() -> usize { match std::thread::available_parallelism() { Ok(num_cpus) => num_cpus.get(), Err(io_error) => { - error!(errror=?io_error, "failed to detect the number of threads available: arbitrarily returning 2"); + error!(error=?io_error, "failed to detect the number of threads available: arbitrarily returning 2"); 2 } } diff --git a/quickwit/quickwit-common/src/rate_limited_tracing.rs b/quickwit/quickwit-common/src/rate_limited_tracing.rs index c03d39ece34..e067c2cecbe 100644 --- a/quickwit/quickwit-common/src/rate_limited_tracing.rs +++ b/quickwit/quickwit-common/src/rate_limited_tracing.rs @@ -28,8 +28,8 @@ use coarsetime::{Duration, Instant}; /// /// `call_count` is the number of calls since the last upgrade of generation, it's stored /// in the lower 32b of the atomic, so it can just be incremented on the fast path. -/// `generation` is the number of time we reseted the `call_count`. It isn't used as is, and -/// is just compared to itself to detect and handle properly concurent resets frop multiple +/// `generation` is the number of time we reset the `call_count`. It isn't used as is, and +/// is just compared to itself to detect and handle properly concurrent resets from multiple /// threads. #[derive(Clone, Copy)] struct LogSiteMetadata { @@ -62,7 +62,7 @@ pub fn should_log Instant>( ) -> bool { // count_atomic is treated as 2 u32: upper bits count "generation", lower bits count number of // calls since LAST_RESET. We assume there won't be 2**32 calls to this log in ~60s. - // Generation is free to wrap arround. + // Generation is free to wrap around. // Because the lower 32 bits are storing the log count, we can // increment the entire u64 to record this log call. diff --git a/quickwit/quickwit-common/src/temp_dir.rs b/quickwit/quickwit-common/src/temp_dir.rs index 8a96658aebc..2c2c08a5848 100644 --- a/quickwit/quickwit-common/src/temp_dir.rs +++ b/quickwit/quickwit-common/src/temp_dir.rs @@ -130,10 +130,10 @@ impl<'a> Builder<'a> { size } - /// Constracts the prefix from the parts specified by the join function. - /// If parts are small enough they will be simply contcatenated with the + /// Constructs the prefix from the parts specified by the join function. + /// If parts are small enough they will be simply concatenated with the /// separator character in between. If parts are too large they will - /// trancated by replacing the middle of each part with "..". The resulting + /// truncated by replacing the middle of each part with "..". The resulting /// string will be at most max_length characters long. fn prefix(&self) -> io::Result { if self.parts.is_empty() { @@ -144,7 +144,7 @@ impl<'a> Builder<'a> { } else { self.parts.len() - 1 }; - // We want to preserve at least one letter from each part with separatos. + // We want to preserve at least one letter from each part with separators. if self.max_length < self.parts.len() + separator_count + self.num_rand_chars { return Err(io::Error::new( io::ErrorKind::InvalidInput, @@ -163,7 +163,7 @@ impl<'a> Builder<'a> { // parts can use in addition to the average. for part in &self.parts { if part.len() <= average_len { - // Adjust the avaible length from the parts that are shorter + // Adjust the available length from the parts that are shorter leftovers += average_len - part.len(); } } diff --git a/quickwit/quickwit-common/src/tower/one_task_per_call_layer.rs b/quickwit/quickwit-common/src/tower/one_task_per_call_layer.rs index d4d88dc8e6b..caf7ca3cdec 100644 --- a/quickwit/quickwit-common/src/tower/one_task_per_call_layer.rs +++ b/quickwit/quickwit-common/src/tower/one_task_per_call_layer.rs @@ -199,7 +199,7 @@ mod tests { let mut one_task_per_call_service = OneTaskPerCallService { service }; tokio::select!( _ = async { one_task_per_call_service.ready().await.unwrap().call(Request).await } => { - panic!("this sould have timed out"); + panic!("this should have timed out"); }, _ = tokio::time::sleep(Duration::from_millis(10)) => (), ); diff --git a/quickwit/quickwit-common/src/tower/rate_estimator.rs b/quickwit/quickwit-common/src/tower/rate_estimator.rs index 22ad5a671cf..513343f3231 100644 --- a/quickwit/quickwit-common/src/tower/rate_estimator.rs +++ b/quickwit/quickwit-common/src/tower/rate_estimator.rs @@ -185,8 +185,8 @@ impl Bucket { let current_bits = self.bits.fetch_add(work, Ordering::Relaxed) + work; let bucket_val = BucketVal::from(current_bits); - // This is not the bucket we targetted, we need to retry and update the bucket with the new - // bucket_ord and a resetted value. + // This is not the bucket we targeted, we need to retry and update the bucket with the new + // bucket_ord and a reset value. if bucket_val.bucket_ord_hash != expected_bucket_ord_hash { let mut expected_bits = current_bits; let new_bits: u64 = BucketVal { diff --git a/quickwit/quickwit-config/src/index_config/serialize.rs b/quickwit/quickwit-config/src/index_config/serialize.rs index f4e9554ea54..f789158197e 100644 --- a/quickwit/quickwit-config/src/index_config/serialize.rs +++ b/quickwit/quickwit-config/src/index_config/serialize.rs @@ -381,7 +381,7 @@ mod test { assert_eq!(updated_config.index_uri.as_str(), "s3://mybucket/hdfs-logs"); } { - // use the current index_uri explicitely + // use the current index_uri explicitly let updated_config_yaml = r#" version: 0.8 index_id: hdfs-logs diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs index 725b69d8556..f8a5611d75e 100644 --- a/quickwit/quickwit-config/src/node_config/mod.rs +++ b/quickwit/quickwit-config/src/node_config/mod.rs @@ -290,7 +290,7 @@ impl Default for IngestApiConfig { impl IngestApiConfig { /// Returns the replication factor, as defined in environment variable or in the configuration - /// in that order (the environment variable can overrrides the configuration). + /// in that order (the environment variable can overrides the configuration). pub fn replication_factor(&self) -> anyhow::Result { if let Ok(replication_factor_str) = env::var("QW_INGEST_REPLICATION_FACTOR") { let replication_factor = match replication_factor_str.trim() { diff --git a/quickwit/quickwit-config/src/source_config/mod.rs b/quickwit/quickwit-config/src/source_config/mod.rs index 2354b608d8e..bc1c0cf3168 100644 --- a/quickwit/quickwit-config/src/source_config/mod.rs +++ b/quickwit/quickwit-config/src/source_config/mod.rs @@ -476,7 +476,7 @@ impl TransformConfig { // If we are missing the VRL feature we do not return an error here, // to avoid breaking unit tests. // - // We do return an explicit error on instanciation of the program however. + // We do return an explicit error on instantiation of the program however. Ok(()) } diff --git a/quickwit/quickwit-config/src/templating.rs b/quickwit/quickwit-config/src/templating.rs index 83443df9270..86ad5503365 100644 --- a/quickwit/quickwit-config/src/templating.rs +++ b/quickwit/quickwit-config/src/templating.rs @@ -57,7 +57,7 @@ pub fn render_config(config_content: &[u8]) -> Result { // This line is commented out, return the line as is. captures .get(0) - .expect("0th capture should aways be set") + .expect("0th capture should always be set") .as_str() .to_string() } else if let Ok(env_var_value) = std::env::var(env_var_key) { diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs index ac8ddc7663d..ff1bd5658ad 100644 --- a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs +++ b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs @@ -32,7 +32,7 @@ use once_cell::sync::OnceCell; use quickwit_common::pretty::PrettySample; use quickwit_proto::indexing::{ ApplyIndexingPlanRequest, CpuCapacity, IndexingService, IndexingTask, PIPELINE_FULL_CAPACITY, - PIPELINE_THROUGHTPUT, + PIPELINE_THROUGHPUT, }; use quickwit_proto::metastore::SourceType; use quickwit_proto::types::NodeId; @@ -63,7 +63,7 @@ pub struct IndexingSchedulerState { pub last_applied_plan_timestamp: Option, } -/// The [`IndexingScheduler`] is responsible for listing indexing tasks and assiging them to +/// The [`IndexingScheduler`] is responsible for listing indexing tasks and assigning them to /// indexers. /// We call this duty `scheduling`. Contrary to what the name suggests, most indexing tasks are /// ever running. We just borrowed the terminology to Kubernetes. @@ -150,10 +150,10 @@ fn compute_load_per_shard(shard_entries: &[&ShardEntry]) -> NonZeroU32 { .div_ceil(num_shards) // A shard throughput cannot exceed PIPELINE_THROUGHPUT in the long term (this is // enforced by the configuration). - .min(PIPELINE_THROUGHTPUT.as_u64()); + .min(PIPELINE_THROUGHPUT.as_u64()); let num_cpu_millis = (PIPELINE_FULL_CAPACITY.cpu_millis() as u64 * average_throughput_per_shard_bytes) - / PIPELINE_THROUGHTPUT.as_u64(); + / PIPELINE_THROUGHPUT.as_u64(); const MIN_CPU_LOAD_PER_SHARD: u32 = 50u32; NonZeroU32::new((num_cpu_millis as u32).max(MIN_CPU_LOAD_PER_SHARD)).unwrap() } else { diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs index bc9d6ec6d91..fe7c33d1b46 100644 --- a/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs +++ b/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/mod.rs @@ -292,7 +292,7 @@ fn convert_scheduling_solution_to_physical_plan_single_node_single_source( } fn convert_scheduling_solution_to_physical_plan_single_node( - indexer_assigment: &IndexerAssignment, + indexer_assignment: &IndexerAssignment, previous_tasks: &[IndexingTask], sources: &[SourceToSchedule], id_to_ord_map: &IdToOrdMap, @@ -301,7 +301,7 @@ fn convert_scheduling_solution_to_physical_plan_single_node( for source in sources { let source_num_shards = if let Some(source_ord) = id_to_ord_map.source_ord(&source.source_uid) { - indexer_assigment.num_shards(source_ord) + indexer_assignment.num_shards(source_ord) } else { // This can happen for IngestV1 1u32 @@ -1135,7 +1135,7 @@ mod tests { let node1 = NodeId::new("node1".to_string()); let node2 = NodeId::new("node2".to_string()); // This node is missing from the capacity map. - // It should not be assigned any task despite being pressent in shard locations. + // It should not be assigned any task despite being present in shard locations. let node_missing = NodeId::new("node_missing".to_string()); let mut remaining_num_shards_per_node = HashMap::default(); remaining_num_shards_per_node diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/scheduling_logic.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/scheduling_logic.rs index d339d8e5459..6714e261085 100644 --- a/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/scheduling_logic.rs +++ b/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/scheduling_logic.rs @@ -160,7 +160,7 @@ fn assert_remove_extraneous_shards_post_condition( // ------------------------------------------------------------------------- // Phase 2 -// Releave sources from the node that are exceeding their maximum load. +// Relieve sources from the node that are exceeding their maximum load. fn enforce_indexers_cpu_capacity(problem: &SchedulingProblem, solution: &mut SchedulingSolution) { for indexer_assignment in &mut solution.indexer_assignments { @@ -277,7 +277,7 @@ fn place_unassigned_shards_with_affinity( // We go through the sources in decreasing order of their load, // in two passes. // -// In the first pase, we have a look at +// In the first pass, we have a look at // the nodes with which there is an affinity. // // If one of them has room for all of the shards, then we assign all @@ -429,7 +429,7 @@ mod tests { use super::*; #[test] - fn test_remove_extranous_shards() { + fn test_remove_extraneous_shards() { let mut problem = SchedulingProblem::with_indexer_cpu_capacities(vec![mcpu(4_000), mcpu(5_000)]); problem.add_source(1, NonZeroU32::new(1_000u32).unwrap()); @@ -442,7 +442,7 @@ mod tests { } #[test] - fn test_remove_extranous_shards_2() { + fn test_remove_extraneous_shards_2() { let mut problem = SchedulingProblem::with_indexer_cpu_capacities(vec![mcpu(5_000), mcpu(4_000)]); problem.add_source(2, NonZeroU32::new(1_000).unwrap()); diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/scheduling_logic_model.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/scheduling_logic_model.rs index eee1f416638..95096d9c22e 100644 --- a/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/scheduling_logic_model.rs +++ b/quickwit/quickwit-control-plane/src/indexing_scheduler/scheduling/scheduling_logic_model.rs @@ -31,7 +31,7 @@ pub struct Source { pub source_ord: SourceOrd, pub load_per_shard: NonZeroU32, /// Affinities of the source for each indexer. - /// In the begginning, affinities are initialized to be the count of shards of the source + /// In the beginning, affinities are initialized to be the count of shards of the source /// that are located on the indexer. /// /// As we compute unassigned sources, we decrease the affinity by the given number of shards, diff --git a/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs b/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs index 2ccaead6801..29111f28b23 100644 --- a/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs +++ b/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs @@ -2377,8 +2377,8 @@ mod tests { IndexMetadata::for_test(&index_uid.index_id, "ram://indexes/test-index:0"); model.add_index(index_metadata); - let souce_config = SourceConfig::ingest_v2(); - model.add_source(&index_uid, souce_config).unwrap(); + let source_config = SourceConfig::ingest_v2(); + model.add_source(&index_uid, source_config).unwrap(); let progress = Progress::default(); diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs index 0dfc283dea4..6f05700fd92 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs @@ -1658,7 +1658,7 @@ mod tests { { "name": "concat", "type": "concatenate", - "concatenate_fields": ["inexistant_field"] + "concatenate_fields": ["inexistent_field"] } ] }"# diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs index d235630d262..920e1cc0c55 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs @@ -282,7 +282,7 @@ impl LeafType { let val = u64::from_json_to_self(&json_val, numeric_options.coerce)?; Ok(OneOrIter::one((val).into())) } - LeafType::F64(_) => Err("unsuported concat type: f64".to_string()), + LeafType::F64(_) => Err("unsupported concat type: f64".to_string()), LeafType::Bool(_) => { if let JsonValue::Bool(val) = json_val { Ok(OneOrIter::one((val).into())) @@ -290,11 +290,13 @@ impl LeafType { Err(format!("expected boolean, got `{json_val}`")) } } - LeafType::IpAddr(_) => Err("unsuported concat type: IpAddr".to_string()), + LeafType::IpAddr(_) => Err("unsupported concat type: IpAddr".to_string()), LeafType::DateTime(_date_time_options) => { - Err("unsuported concat type: DateTime".to_string()) + Err("unsupported concat type: DateTime".to_string()) + } + LeafType::Bytes(_binary_options) => { + Err("unsupported concat type: DateTime".to_string()) } - LeafType::Bytes(_binary_options) => Err("unsuported concat type: DateTime".to_string()), LeafType::Json(_) => { if let JsonValue::Object(json_obj) = json_val { Ok(OneOrIter::Iter( @@ -2035,8 +2037,8 @@ mod tests { vec!["one.two", "three"] ); assert_eq!(super::build_field_path_from_str(r#"one."#), vec!["one"]); - // Those are invalid field paths, but we chekc that it does not panick. - // Issue #3538 is about validating field paths before trying ot build the path. + // Those are invalid field paths, but we check that it does not panic. + // Issue #3538 is about validating field paths before trying to build the path. assert_eq!(super::build_field_path_from_str("\\."), vec!["."]); assert_eq!(super::build_field_path_from_str("a."), vec!["a"]); assert_eq!(super::build_field_path_from_str(".a"), vec!["", "a"]); diff --git a/quickwit/quickwit-doc-mapper/src/doc_mapper.rs b/quickwit/quickwit-doc-mapper/src/doc_mapper.rs index b42b3e40a5a..307a8e7d449 100644 --- a/quickwit/quickwit-doc-mapper/src/doc_mapper.rs +++ b/quickwit/quickwit-doc-mapper/src/doc_mapper.rs @@ -547,7 +547,7 @@ mod tests { for (field, term, pos) in elements { let field = Field::from_field_id(*field); let term = Term::from_field_text(field, term); - // this is a 1 element bound, but it's enought for testing. + // this is a 1 element bound, but it's enough for testing. let range = TermRange { start: Bound::Included(term.clone()), end: Bound::Included(term), diff --git a/quickwit/quickwit-doc-mapper/src/routing_expression/mod.rs b/quickwit/quickwit-doc-mapper/src/routing_expression/mod.rs index 374281e85f9..b72636d9beb 100644 --- a/quickwit/quickwit-doc-mapper/src/routing_expression/mod.rs +++ b/quickwit/quickwit-doc-mapper/src/routing_expression/mod.rs @@ -476,7 +476,7 @@ mod expression_dsl { })(input) } - /// Parse a field name into a path, de-escaping where appropirate. + /// Parse a field name into a path, de-escaping where appropriate. pub(crate) fn parse_field_name(input: &str) -> anyhow::Result>> { let (i, res) = separated_list0(tag("."), escaped_key)(input) .finish() diff --git a/quickwit/quickwit-indexing/Cargo.toml b/quickwit/quickwit-indexing/Cargo.toml index eccfb4f1aa5..d7008a4579c 100644 --- a/quickwit/quickwit-indexing/Cargo.toml +++ b/quickwit/quickwit-indexing/Cargo.toml @@ -125,5 +125,5 @@ name = "doc_process_vrl_bench" harness = false [package.metadata.cargo-machete] -# used to vendor/static build native dependancies +# used to vendor/static build native dependencies ignored = ["libz-sys", "openssl"] diff --git a/quickwit/quickwit-indexing/src/actors/cooperative_indexing.rs b/quickwit/quickwit-indexing/src/actors/cooperative_indexing.rs index 07ddb1c80ef..b036b66f3e7 100644 --- a/quickwit/quickwit-indexing/src/actors/cooperative_indexing.rs +++ b/quickwit/quickwit-indexing/src/actors/cooperative_indexing.rs @@ -62,7 +62,7 @@ static ORIGIN_OF_TIME: Lazy = Lazy::new(Instant::now); /// a uniform distribution over the interval [0, commit_timeout). /// /// Each period of this cycle is divided into three phases. -/// - waking [t_wake..t_work_start) acquisition of the period guard (this is instantenous) +/// - waking [t_wake..t_work_start) acquisition of the period guard (this is instantaneous) /// acquisition of the semaphore /// - working [t_work_start..t_work_end) /// - sleeping [t=t_work_end..t_sleep_end) @@ -222,7 +222,7 @@ mod tests { fn assert_approx_equal_sleep_time(left: Duration, right: Duration) { let delta = (left.as_millis() as i128 - right.as_millis() as i128).unsigned_abs(); if delta >= NUDGE_TOLERANCE.mul_f32(1.1).as_millis() { - panic!("{left:?} and {right:?} are not approximatively equal."); + panic!("{left:?} and {right:?} are not approximately equal."); } } diff --git a/quickwit/quickwit-indexing/src/actors/merge_executor.rs b/quickwit/quickwit-indexing/src/actors/merge_executor.rs index fd58195aff4..2ac8ae5b8c3 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_executor.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_executor.rs @@ -512,7 +512,7 @@ impl MergeExecutor { let query_ast: QueryAst = serde_json::from_str(&delete_query.query_ast) .context("invalid query_ast json")?; // We ignore the docmapper default fields when we consider delete query. - // We reparse the query here defensivley, but actually, it should already have been + // We reparse the query here defensively, but actually, it should already have been // done in the delete task rest handler. let parsed_query_ast = query_ast.parse_user_query(&[]).context("invalid query")?; debug!( diff --git a/quickwit/quickwit-indexing/src/actors/uploader.rs b/quickwit/quickwit-indexing/src/actors/uploader.rs index 7933ad8d201..3fae76eb01c 100644 --- a/quickwit/quickwit-indexing/src/actors/uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/uploader.rs @@ -360,7 +360,7 @@ impl Handler for Uploader { if let Err(cause) = upload_result { warn!(cause=?cause, split_id=packaged_split.split_id(), "Failed to upload split. Killing!"); kill_switch.kill(); - bail!("failed to upload split `{}`. killing the actor contex", packaged_split.split_id()); + bail!("failed to upload split `{}`. killing the actor context", packaged_split.split_id()); } packaged_splits_and_metadata.push((packaged_split, metadata)); diff --git a/quickwit/quickwit-indexing/src/models/indexed_split.rs b/quickwit/quickwit-indexing/src/models/indexed_split.rs index b5b37fe8070..b60a39d66c8 100644 --- a/quickwit/quickwit-indexing/src/models/indexed_split.rs +++ b/quickwit/quickwit-indexing/src/models/indexed_split.rs @@ -191,7 +191,7 @@ pub struct IndexedSplitBatchBuilder { pub _split_builders_guard: GaugeGuard<'static>, } -/// Sends notifications to the Publisher that the last batch of splits was emtpy. +/// Sends notifications to the Publisher that the last batch of splits was empty. #[derive(Debug)] pub struct EmptySplit { pub index_uid: IndexUid, diff --git a/quickwit/quickwit-indexing/src/models/merge_scratch.rs b/quickwit/quickwit-indexing/src/models/merge_scratch.rs index 8a718d440b1..cf004e9730a 100644 --- a/quickwit/quickwit-indexing/src/models/merge_scratch.rs +++ b/quickwit/quickwit-indexing/src/models/merge_scratch.rs @@ -24,7 +24,7 @@ use crate::merge_policy::MergeTask; #[derive(Debug)] pub struct MergeScratch { - /// A [`MergeTask`] tracked by either the `MergePlannner` or the `DeleteTaksPlanner` + /// A [`MergeTask`] tracked by either the `MergePlanner` or the `DeleteTaskPlanner` /// See planners docs to understand the usage. pub merge_task: MergeTask, /// Scratch directory for computing the merge. diff --git a/quickwit/quickwit-indexing/src/source/file_source.rs b/quickwit/quickwit-indexing/src/source/file_source.rs index 9ad169f701f..e4674f7a5ce 100644 --- a/quickwit/quickwit-indexing/src/source/file_source.rs +++ b/quickwit/quickwit-indexing/src/source/file_source.rs @@ -199,7 +199,7 @@ impl FileSourceReader { } // This function is only called for GZIP file. - // Because they cannot be seeked into, we have to scan them to the right initial position. + // Because they cannot be sought into, we have to scan them to the right initial position. async fn skip(&mut self) -> io::Result<()> { // Allocate once a 64kb buffer. let mut buf = [0u8; 64000]; diff --git a/quickwit/quickwit-indexing/src/source/kinesis/kinesis_source.rs b/quickwit/quickwit-indexing/src/source/kinesis/kinesis_source.rs index 6152c53f3db..0f3da02a5d0 100644 --- a/quickwit/quickwit-indexing/src/source/kinesis/kinesis_source.rs +++ b/quickwit/quickwit-indexing/src/source/kinesis/kinesis_source.rs @@ -355,7 +355,7 @@ pub(super) async fn get_region( if let Some(endpoint) = sdk_config.endpoint_url() { return Ok(RegionOrEndpoint::Endpoint(endpoint.to_string())); } - bail!("unable to sniff region from envioronment") + bail!("unable to sniff region from environment") } #[cfg(all(test, feature = "kinesis-localstack-tests"))] diff --git a/quickwit/quickwit-ingest/src/ingest_v2/broadcast.rs b/quickwit/quickwit-ingest/src/ingest_v2/broadcast.rs index 7b3323d9823..565598ae2b9 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/broadcast.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/broadcast.rs @@ -167,7 +167,7 @@ const SHARD_THROUGHPUT_LONG_TERM_WINDOW_LEN: usize = 12; #[derive(Default)] struct ShardThroughputTimeSeriesMap { - shard_time_series: HashMap<(SourceUid, ShardId), ShardThroughputTimeSerie>, + shard_time_series: HashMap<(SourceUid, ShardId), ShardThroughputTimeSeries>, } impl ShardThroughputTimeSeriesMap { @@ -185,12 +185,12 @@ impl ShardThroughputTimeSeriesMap { .retain(|key, _| shard_throughputs.contains_key(key)); for ((source_uid, shard_id), (shard_state, throughput)) in shard_throughputs { let throughput_measurement = throughput.rescale(Duration::from_secs(1)).work_bytes(); - let shard_time_serie = self + let shard_time_series = self .shard_time_series .entry((source_uid.clone(), shard_id.clone())) .or_default(); - shard_time_serie.shard_state = shard_state; - shard_time_serie.record(throughput_measurement); + shard_time_series.shard_state = shard_state; + shard_time_series.record(throughput_measurement); } } @@ -232,13 +232,13 @@ impl ShardThroughputTimeSeriesMap { } #[derive(Default)] -struct ShardThroughputTimeSerie { +struct ShardThroughputTimeSeries { shard_state: ShardState, measurements: [ByteSize; SHARD_THROUGHPUT_LONG_TERM_WINDOW_LEN], len: usize, } -impl ShardThroughputTimeSerie { +impl ShardThroughputTimeSeries { fn last(&self) -> ByteSize { self.measurements.last().copied().unwrap_or_default() } @@ -740,24 +740,24 @@ mod tests { } #[test] - fn test_shard_throughput_time_serie() { - let mut time_serie = ShardThroughputTimeSerie::default(); - assert_eq!(time_serie.last(), ByteSize::mb(0)); - assert_eq!(time_serie.average(), ByteSize::mb(0)); - time_serie.record(ByteSize::mb(2)); - assert_eq!(time_serie.last(), ByteSize::mb(2)); - assert_eq!(time_serie.average(), ByteSize::mb(2)); - time_serie.record(ByteSize::mb(1)); - assert_eq!(time_serie.last(), ByteSize::mb(1)); - assert_eq!(time_serie.average(), ByteSize::kb(1500)); - time_serie.record(ByteSize::mb(3)); - assert_eq!(time_serie.last(), ByteSize::mb(3)); - assert_eq!(time_serie.average(), ByteSize::mb(2)); + fn test_shard_throughput_time_series() { + let mut time_series = ShardThroughputTimeSeries::default(); + assert_eq!(time_series.last(), ByteSize::mb(0)); + assert_eq!(time_series.average(), ByteSize::mb(0)); + time_series.record(ByteSize::mb(2)); + assert_eq!(time_series.last(), ByteSize::mb(2)); + assert_eq!(time_series.average(), ByteSize::mb(2)); + time_series.record(ByteSize::mb(1)); + assert_eq!(time_series.last(), ByteSize::mb(1)); + assert_eq!(time_series.average(), ByteSize::kb(1500)); + time_series.record(ByteSize::mb(3)); + assert_eq!(time_series.last(), ByteSize::mb(3)); + assert_eq!(time_series.average(), ByteSize::mb(2)); for _ in 0..SHARD_THROUGHPUT_LONG_TERM_WINDOW_LEN { - time_serie.record(ByteSize::mb(4)); - assert_eq!(time_serie.last(), ByteSize::mb(4)); + time_series.record(ByteSize::mb(4)); + assert_eq!(time_series.last(), ByteSize::mb(4)); } - assert_eq!(time_serie.last(), ByteSize::mb(4)); + assert_eq!(time_series.last(), ByteSize::mb(4)); } } diff --git a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs index d59b5bdafe6..1fae0c3b829 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs @@ -852,7 +852,7 @@ impl Ingester { } let Entry::Vacant(entry) = state_guard.replication_tasks.entry(leader_id.clone()) else { return Err(IngestV2Error::Internal(format!( - "a replication stream betwen {leader_id} and {follower_id} is already opened" + "a replication stream between {leader_id} and {follower_id} is already opened" ))); }; // Channel capacity: there is no need to bound the capacity of the channel here because it diff --git a/quickwit/quickwit-ingest/src/ingest_v2/mod.rs b/quickwit/quickwit-ingest/src/ingest_v2/mod.rs index 76d3881ec86..fd046d2e975 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/mod.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/mod.rs @@ -149,7 +149,7 @@ pub struct IngestRequestV2Builder { } impl IngestRequestV2Builder { - /// Adds a document to the request, returning the ID of the subrequest to wich it was added and + /// Adds a document to the request, returning the ID of the subrequest to which it was added and /// its newly assigned [`DocUid`]. pub fn add_doc(&mut self, index_id: IndexId, doc: &[u8]) -> (SubrequestId, DocUid) { match self.per_index_id_doc_batch_builders.entry(index_id) { diff --git a/quickwit/quickwit-ingest/src/ingest_v2/replication.md b/quickwit/quickwit-ingest/src/ingest_v2/replication.md index c95fa7d7851..f0570b87767 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/replication.md +++ b/quickwit/quickwit-ingest/src/ingest_v2/replication.md @@ -23,6 +23,6 @@ Two gRPC streams back the independent streams of requests and responses between ### Replication stream errors -- When a replication request fails, the leader and follower close the shard(s) targetted by the request. +- When a replication request fails, the leader and follower close the shard(s) targeted by the request. -- When a replication stream fails (transport error, timeout), the leader and follower close the shard(s) targetted by the stream. Then, the leader reopens a new stream if necessary. +- When a replication stream fails (transport error, timeout), the leader and follower close the shard(s) targeted by the stream. Then, the leader reopens a new stream if necessary. diff --git a/quickwit/quickwit-ingest/src/ingest_v2/router.rs b/quickwit/quickwit-ingest/src/ingest_v2/router.rs index 7c786aa7c82..470cf57bf12 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/router.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/router.rs @@ -210,7 +210,7 @@ impl IngestRouter { info!(closed_shards=?debounced_request.closed_shards, "reporting closed shard(s) to control plane"); } if !debounced_request.is_empty() && !unavailable_leaders.is_empty() { - info!(unvailable_leaders=?unavailable_leaders, "reporting unavailable leader(s) to control plane"); + info!(unavailable_leaders=?unavailable_leaders, "reporting unavailable leader(s) to control plane"); for unavailable_leader in unavailable_leaders.iter() { debounced_request diff --git a/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs b/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs index b066456cf68..bfa452b0213 100644 --- a/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs +++ b/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs @@ -232,7 +232,7 @@ impl ClusterSandbox { tick.tick().await; if now.elapsed() > Duration::from_secs(5) { - panic!("standlone node timed out"); + panic!("standalone node timed out"); } if sandbox .indexer_rest_client diff --git a/quickwit/quickwit-integration-tests/src/tests/basic_tests.rs b/quickwit/quickwit-integration-tests/src/tests/basic_tests.rs index 236b23fa924..ee6f18eafe0 100644 --- a/quickwit/quickwit-integration-tests/src/tests/basic_tests.rs +++ b/quickwit/quickwit-integration-tests/src/tests/basic_tests.rs @@ -205,7 +205,7 @@ async fn test_multi_nodes_cluster() { ) .await .unwrap(); - // Wait until split is commited and search. + // Wait until split is committed and search. tokio::time::sleep(Duration::from_secs(4)).await; let search_response_one_hit = sandbox .searcher_rest_client diff --git a/quickwit/quickwit-integration-tests/src/tests/index_tests.rs b/quickwit/quickwit-integration-tests/src/tests/index_tests.rs index b805f0f8157..4eaad8d95d4 100644 --- a/quickwit/quickwit-integration-tests/src/tests/index_tests.rs +++ b/quickwit/quickwit-integration-tests/src/tests/index_tests.rs @@ -658,7 +658,7 @@ async fn test_ingest_traces_with_otlp_grpc_api() { let sandbox = ClusterSandbox::start_cluster_with_otlp_service(&nodes_services) .await .unwrap(); - // Wait fo the pipelines to start (one for logs and one for traces) + // Wait for the pipelines to start (one for logs and one for traces) sandbox.wait_for_indexing_pipelines(2).await.unwrap(); let scope_spans = vec![ScopeSpans { diff --git a/quickwit/quickwit-integration-tests/src/tests/index_update_tests.rs b/quickwit/quickwit-integration-tests/src/tests/index_update_tests.rs index d9bfdbac5eb..18e7d3f80a6 100644 --- a/quickwit/quickwit-integration-tests/src/tests/index_update_tests.rs +++ b/quickwit/quickwit-integration-tests/src/tests/index_update_tests.rs @@ -99,7 +99,7 @@ async fn test_update_on_multi_nodes_cluster() { ) .await .unwrap(); - // Wait until split is commited and search. + // Wait until split is committed and search. tokio::time::sleep(Duration::from_secs(4)).await; // No hit because default_search_fields covers "title" only let search_response_no_hit = sandbox diff --git a/quickwit/quickwit-lambda/src/indexer/ingest/helpers.rs b/quickwit/quickwit-lambda/src/indexer/ingest/helpers.rs index 098cc1841ff..176dc8bbc9a 100644 --- a/quickwit/quickwit-lambda/src/indexer/ingest/helpers.rs +++ b/quickwit/quickwit-lambda/src/indexer/ingest/helpers.rs @@ -136,7 +136,7 @@ pub(super) async fn send_telemetry() { quickwit_telemetry::send_telemetry_event(TelemetryEvent::RunCommand).await; } -/// Convert the incomming file path to a source config +/// Convert the incoming file path to a source config pub(super) async fn configure_source( input_path: PathBuf, input_format: SourceInputFormat, @@ -356,7 +356,7 @@ pub(super) async fn wait_for_merges( break; } // We tolerate a relatively low refresh rate because the indexer - // typically runs for longuer periods of times and merges happen only + // typically runs for longer periods of times and merges happen only // occasionally. tokio::time::sleep(tokio::time::Duration::from_millis(500)).await; } diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/shards.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/shards.rs index 3c636afbac4..236a4dcd9c7 100644 --- a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/shards.rs +++ b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/shards.rs @@ -338,7 +338,7 @@ mod tests { }; let MutationOccurred::Yes(subresponse) = shards.open_shard(subrequest.clone()).unwrap() else { - panic!("expected `MutationOccured::Yes`"); + panic!("expected `MutationOccurred::Yes`"); }; assert_eq!(subresponse.subrequest_id, 0); @@ -352,7 +352,7 @@ mod tests { assert_eq!(shard.publish_position_inclusive(), Position::Beginning); let MutationOccurred::No(subresponse) = shards.open_shard(subrequest).unwrap() else { - panic!("Expected `MutationOccured::No`"); + panic!("Expected `MutationOccurred::No`"); }; assert_eq!(subresponse.subrequest_id, 0); @@ -369,7 +369,7 @@ mod tests { doc_mapping_uid: Some(DocMappingUid::default()), }; let MutationOccurred::Yes(subresponse) = shards.open_shard(subrequest).unwrap() else { - panic!("Expected `MutationOccured::No`"); + panic!("Expected `MutationOccurred::No`"); }; assert_eq!(subresponse.subrequest_id, 0); @@ -454,7 +454,7 @@ mod tests { publish_token: "test-publish-token".to_string(), }; let MutationOccurred::No(response) = shards.acquire_shards(request).unwrap() else { - panic!("Expected `MutationOccured::No`"); + panic!("Expected `MutationOccurred::No`"); }; assert!(response.acquired_shards.is_empty()); @@ -465,7 +465,7 @@ mod tests { publish_token: "test-publish-token".to_string(), }; let MutationOccurred::No(response) = shards.acquire_shards(request.clone()).unwrap() else { - panic!("Expected `MutationOccured::No`"); + panic!("Expected `MutationOccurred::No`"); }; assert!(response.acquired_shards.is_empty()); @@ -482,7 +482,7 @@ mod tests { ); let MutationOccurred::Yes(response) = shards.acquire_shards(request.clone()).unwrap() else { - panic!("expected `MutationOccured::Yes`"); + panic!("expected `MutationOccurred::Yes`"); }; assert_eq!(response.acquired_shards.len(), 1); let acquired_shard = &response.acquired_shards[0]; @@ -511,7 +511,7 @@ mod tests { force: false, }; let MutationOccurred::No(response) = shards.delete_shards(request).unwrap() else { - panic!("expected `MutationOccured::No`"); + panic!("expected `MutationOccurred::No`"); }; assert_eq!(response.index_uid(), &index_uid); assert_eq!(response.source_id, source_id); @@ -525,7 +525,7 @@ mod tests { force: false, }; let MutationOccurred::No(response) = shards.delete_shards(request).unwrap() else { - panic!("expected `MutationOccured::No`"); + panic!("expected `MutationOccurred::No`"); }; assert_eq!(response.index_uid(), &index_uid); assert_eq!(response.source_id, source_id); @@ -562,7 +562,7 @@ mod tests { force: false, }; let MutationOccurred::Yes(response) = shards.delete_shards(request).unwrap() else { - panic!("expected `MutationOccured::Yes`"); + panic!("expected `MutationOccurred::Yes`"); }; assert_eq!(response.index_uid(), &index_uid); assert_eq!(response.source_id, source_id); @@ -578,7 +578,7 @@ mod tests { force: true, }; let MutationOccurred::Yes(response) = shards.delete_shards(request).unwrap() else { - panic!("expected `MutationOccured::Yes`"); + panic!("expected `MutationOccurred::Yes`"); }; assert_eq!(response.index_uid(), &index_uid); assert_eq!(response.source_id, source_id); diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs index 8a011f65e4c..1fbfd57e08d 100644 --- a/quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs @@ -120,7 +120,7 @@ impl From for MutationOccurred<()> { /// ready to be fetched and updated. /// /// Transitioning states are useful to track inconsistencies between the in-memory and on-disk data -/// structures when error(s) occurr during index creations and deletions: +/// structures when error(s) occur during index creations and deletions: /// - `Creating` indicates that the metastore updated the manifest file with this state but not yet /// the index metadata file; /// - `Deleting` indicates that the metastore updated the manifest file with this state but the diff --git a/quickwit/quickwit-metastore/src/tests/split.rs b/quickwit/quickwit-metastore/src/tests/split.rs index 67ee925ab6a..eb8865731f6 100644 --- a/quickwit/quickwit-metastore/src/tests/split.rs +++ b/quickwit/quickwit-metastore/src/tests/split.rs @@ -647,7 +647,7 @@ pub async fn test_metastore_publish_splits< .await .unwrap(); - let publish_splits_resquest = PublishSplitsRequest { + let publish_splits_request = PublishSplitsRequest { index_uid: Some(index_uid.clone()), staged_split_ids: vec![split_id_1.clone(), split_id_2.clone()], index_checkpoint_delta_json_opt: Some({ @@ -658,7 +658,7 @@ pub async fn test_metastore_publish_splits< ..Default::default() }; let error = metastore - .publish_splits(publish_splits_resquest) + .publish_splits(publish_splits_request) .await .unwrap_err(); assert!(matches!( diff --git a/quickwit/quickwit-proto/protos/quickwit/ingester.proto b/quickwit/quickwit-proto/protos/quickwit/ingester.proto index f0fbe9e804a..a65d7d9fcfd 100644 --- a/quickwit/quickwit-proto/protos/quickwit/ingester.proto +++ b/quickwit/quickwit-proto/protos/quickwit/ingester.proto @@ -32,7 +32,7 @@ service IngesterService { rpc OpenReplicationStream(stream SynReplicationMessage) returns (stream AckReplicationMessage); // Streams records from a leader or a follower. The client can optionally specify a range of positions to fetch, - // otherwise the stream will go undefinitely or until the shard is closed. + // otherwise the stream will go indefinitely or until the shard is closed. rpc OpenFetchStream(OpenFetchStreamRequest) returns (stream FetchMessage); // Streams status updates, called "observations", from an ingester. diff --git a/quickwit/quickwit-proto/protos/quickwit/search.proto b/quickwit/quickwit-proto/protos/quickwit/search.proto index d3d8954377b..b08d8b32016 100644 --- a/quickwit/quickwit-proto/protos/quickwit/search.proto +++ b/quickwit/quickwit-proto/protos/quickwit/search.proto @@ -63,11 +63,11 @@ service SearchService { // Performs a scroll request. rpc Scroll(ScrollRequest) returns (SearchResponse); - // gRPC request used to store a key in the local storage of the targetted node. + // gRPC request used to store a key in the local storage of the targeted node. // This RPC is used in the mini distributed immutable KV store embedded in quickwit. rpc PutKV(PutKVRequest) returns (PutKVResponse); - // Gets a key from the local storage of the targetted node. + // Gets a key from the local storage of the targeted node. // This RPC is used in the mini distributed immutable KV store embedded in quickwit. rpc GetKV(GetKVRequest) returns (GetKVResponse); diff --git a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.ingest.ingester.rs b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.ingest.ingester.rs index d3daaec9bd1..0b169811727 100644 --- a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.ingest.ingester.rs +++ b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.ingest.ingester.rs @@ -625,7 +625,7 @@ pub trait IngesterService: std::fmt::Debug + Send + Sync + 'static { request: quickwit_common::ServiceStream, ) -> crate::ingest::IngestV2Result>; /// Streams records from a leader or a follower. The client can optionally specify a range of positions to fetch, - /// otherwise the stream will go undefinitely or until the shard is closed. + /// otherwise the stream will go indefinitely or until the shard is closed. async fn open_fetch_stream( &self, request: OpenFetchStreamRequest, @@ -2471,7 +2471,7 @@ pub mod ingester_service_grpc_client { self.inner.streaming(req, path, codec).await } /// Streams records from a leader or a follower. The client can optionally specify a range of positions to fetch, - /// otherwise the stream will go undefinitely or until the shard is closed. + /// otherwise the stream will go indefinitely or until the shard is closed. pub async fn open_fetch_stream( &mut self, request: impl tonic::IntoRequest, @@ -2724,7 +2724,7 @@ pub mod ingester_service_grpc_server { + Send + 'static; /// Streams records from a leader or a follower. The client can optionally specify a range of positions to fetch, - /// otherwise the stream will go undefinitely or until the shard is closed. + /// otherwise the stream will go indefinitely or until the shard is closed. async fn open_fetch_stream( &self, request: tonic::Request, diff --git a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs index 70b7ba42191..a2c90de2ae0 100644 --- a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs +++ b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs @@ -1081,7 +1081,7 @@ pub mod search_service_client { .insert(GrpcMethod::new("quickwit.search.SearchService", "Scroll")); self.inner.unary(req, path, codec).await } - /// gRPC request used to store a key in the local storage of the targetted node. + /// gRPC request used to store a key in the local storage of the targeted node. /// This RPC is used in the mini distributed immutable KV store embedded in quickwit. pub async fn put_kv( &mut self, @@ -1105,7 +1105,7 @@ pub mod search_service_client { .insert(GrpcMethod::new("quickwit.search.SearchService", "PutKV")); self.inner.unary(req, path, codec).await } - /// Gets a key from the local storage of the targetted node. + /// Gets a key from the local storage of the targeted node. /// This RPC is used in the mini distributed immutable KV store embedded in quickwit. pub async fn get_kv( &mut self, @@ -1322,13 +1322,13 @@ pub mod search_service_server { &self, request: tonic::Request, ) -> std::result::Result, tonic::Status>; - /// gRPC request used to store a key in the local storage of the targetted node. + /// gRPC request used to store a key in the local storage of the targeted node. /// This RPC is used in the mini distributed immutable KV store embedded in quickwit. async fn put_kv( &self, request: tonic::Request, ) -> std::result::Result, tonic::Status>; - /// Gets a key from the local storage of the targetted node. + /// Gets a key from the local storage of the targeted node. /// This RPC is used in the mini distributed immutable KV store embedded in quickwit. async fn get_kv( &self, diff --git a/quickwit/quickwit-proto/src/indexing/mod.rs b/quickwit/quickwit-proto/src/indexing/mod.rs index ae8666978ab..16e96495d94 100644 --- a/quickwit/quickwit-proto/src/indexing/mod.rs +++ b/quickwit/quickwit-proto/src/indexing/mod.rs @@ -190,7 +190,7 @@ pub const PIPELINE_FULL_CAPACITY: CpuCapacity = CpuCapacity::from_cpu_millis(4_0 /// One full pipeline (including merging) is supposed to have the capacity to index at least 20mb/s. /// This is a defensive value: In reality, this is typically above 30mb/s. -pub const PIPELINE_THROUGHTPUT: ByteSize = ByteSize::mb(20); +pub const PIPELINE_THROUGHPUT: ByteSize = ByteSize::mb(20); /// The CpuCapacity represents an amount of CPU resource available. /// diff --git a/quickwit/quickwit-proto/src/search/mod.rs b/quickwit/quickwit-proto/src/search/mod.rs index 888b1b610b7..4a267ee2657 100644 --- a/quickwit/quickwit-proto/src/search/mod.rs +++ b/quickwit/quickwit-proto/src/search/mod.rs @@ -188,7 +188,7 @@ impl std::hash::Hash for SortValue { impl SortValue { /// Where multiple variant could represent the same logical value, convert to a canonical form. /// - /// For number, we prefer to represent them, in order, as i64, then as u64 and finaly as f64. + /// For number, we prefer to represent them, in order, as i64, then as u64 and finally as f64. pub fn normalize(&self) -> Self { match self { SortValue::I64(_) => *self, diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs index 56d7b9adb2c..3fcf7be0d84 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs @@ -21,7 +21,7 @@ use serde::Deserialize; use serde_with::formats::PreferMany; use serde_with::{serde_as, DefaultOnNull, OneOrMany}; -use crate::elastic_query_dsl::{ConvertableToQueryAst, ElasticQueryDslInner}; +use crate::elastic_query_dsl::{ConvertibleToQueryAst, ElasticQueryDslInner}; use crate::not_nan_f32::NotNaNf32; use crate::query_ast::{self, QueryAst}; @@ -68,7 +68,7 @@ fn convert_vec(query_dsls: Vec) -> anyhow::Result anyhow::Result { let bool_query_ast = query_ast::BoolQuery { must: convert_vec(self.must)?, diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/exists_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/exists_query.rs index b775f40e429..707c9afab6c 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/exists_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/exists_query.rs @@ -19,7 +19,7 @@ use serde::Deserialize; -use crate::elastic_query_dsl::ConvertableToQueryAst; +use crate::elastic_query_dsl::ConvertibleToQueryAst; use crate::query_ast::{self, QueryAst}; #[derive(Deserialize, Clone, Eq, PartialEq, Debug)] @@ -27,7 +27,7 @@ pub struct ExistsQuery { field: String, } -impl ConvertableToQueryAst for ExistsQuery { +impl ConvertibleToQueryAst for ExistsQuery { fn convert_to_query_ast(self) -> anyhow::Result { Ok(QueryAst::FieldPresence(query_ast::FieldPresenceQuery { field: self.field, diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/match_bool_prefix.rs b/quickwit/quickwit-query/src/elastic_query_dsl/match_bool_prefix.rs index 03997449519..9b246ff0e08 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/match_bool_prefix.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/match_bool_prefix.rs @@ -21,7 +21,7 @@ use serde::Deserialize; use super::{ElasticQueryDslInner, StringOrStructForSerialization}; use crate::elastic_query_dsl::match_query::MatchQueryParams; -use crate::elastic_query_dsl::{default_max_expansions, ConvertableToQueryAst}; +use crate::elastic_query_dsl::{default_max_expansions, ConvertibleToQueryAst}; use crate::query_ast::{FullTextParams, FullTextQuery, QueryAst}; use crate::OneFieldMap; @@ -34,7 +34,7 @@ pub(crate) struct MatchBoolPrefixQuery { pub(crate) params: MatchQueryParams, } -impl ConvertableToQueryAst for MatchBoolPrefixQuery { +impl ConvertibleToQueryAst for MatchBoolPrefixQuery { fn convert_to_query_ast(self) -> anyhow::Result { let full_text_params = FullTextParams { tokenizer: None, diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/match_phrase_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/match_phrase_query.rs index 3260cec01ad..fdd99a97065 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/match_phrase_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/match_phrase_query.rs @@ -20,7 +20,7 @@ use serde::Deserialize; use crate::elastic_query_dsl::{ - ConvertableToQueryAst, ElasticQueryDslInner, StringOrStructForSerialization, + ConvertibleToQueryAst, ElasticQueryDslInner, StringOrStructForSerialization, }; use crate::query_ast::{FullTextMode, FullTextParams, FullTextQuery, QueryAst}; use crate::{MatchAllOrNone, OneFieldMap}; @@ -46,7 +46,7 @@ pub struct MatchPhraseQueryParams { pub(crate) slop: u32, } -impl ConvertableToQueryAst for MatchPhraseQuery { +impl ConvertibleToQueryAst for MatchPhraseQuery { fn convert_to_query_ast(self) -> anyhow::Result { let full_text_params = FullTextParams { tokenizer: self.params.analyzer, diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs index 03cc8145473..6db5ac0b618 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs @@ -20,7 +20,7 @@ use serde::Deserialize; use crate::elastic_query_dsl::{ - ConvertableToQueryAst, ElasticQueryDslInner, StringOrStructForSerialization, + ConvertibleToQueryAst, ElasticQueryDslInner, StringOrStructForSerialization, }; use crate::query_ast::{FullTextParams, FullTextQuery, QueryAst}; use crate::{BooleanOperand, MatchAllOrNone, OneFieldMap}; @@ -49,7 +49,7 @@ pub(crate) struct MatchQueryParams { pub(crate) _lenient: bool, } -impl ConvertableToQueryAst for MatchQuery { +impl ConvertibleToQueryAst for MatchQuery { fn convert_to_query_ast(self) -> anyhow::Result { let full_text_params = FullTextParams { tokenizer: None, diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs b/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs index 74aececa40c..9e49c866d95 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs @@ -93,11 +93,11 @@ impl TryFrom for QueryAst { } } -pub(crate) trait ConvertableToQueryAst { +pub(crate) trait ConvertibleToQueryAst { fn convert_to_query_ast(self) -> anyhow::Result; } -impl ConvertableToQueryAst for ElasticQueryDslInner { +impl ConvertibleToQueryAst for ElasticQueryDslInner { fn convert_to_query_ast(self) -> anyhow::Result { match self { Self::QueryString(query_string_query) => query_string_query.convert_to_query_ast(), diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/multi_match.rs b/quickwit/quickwit-query/src/elastic_query_dsl/multi_match.rs index 0c316449b2d..14aed6f1442 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/multi_match.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/multi_match.rs @@ -28,7 +28,7 @@ use crate::elastic_query_dsl::match_query::{MatchQuery, MatchQueryParams}; use crate::elastic_query_dsl::phrase_prefix_query::{ MatchPhrasePrefixQuery, MatchPhrasePrefixQueryParams, }; -use crate::elastic_query_dsl::{ConvertableToQueryAst, ElasticQueryDslInner}; +use crate::elastic_query_dsl::{ConvertibleToQueryAst, ElasticQueryDslInner}; /// Multi match queries are a bit odd. They end up being expanded into another type query of query. /// In Quickwit, we operate this expansion in generic way at the time of deserialization. @@ -156,7 +156,7 @@ pub enum MatchType { BoolPrefix, } -impl ConvertableToQueryAst for MultiMatchQuery { +impl ConvertibleToQueryAst for MultiMatchQuery { fn convert_to_query_ast(self) -> anyhow::Result { self.0.convert_to_query_ast() } diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/phrase_prefix_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/phrase_prefix_query.rs index 0ce0e9c72b4..3955a175c64 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/phrase_prefix_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/phrase_prefix_query.rs @@ -21,7 +21,7 @@ use serde::Deserialize; use crate::elastic_query_dsl::one_field_map::OneFieldMap; use crate::elastic_query_dsl::{ - default_max_expansions, ConvertableToQueryAst, ElasticQueryDslInner, + default_max_expansions, ConvertibleToQueryAst, ElasticQueryDslInner, }; use crate::query_ast::{self, FullTextMode, FullTextParams, QueryAst}; use crate::MatchAllOrNone; @@ -48,7 +48,7 @@ impl From for ElasticQueryDslInner { } } -impl ConvertableToQueryAst for MatchPhrasePrefixQuery { +impl ConvertibleToQueryAst for MatchPhrasePrefixQuery { fn convert_to_query_ast(self) -> anyhow::Result { let MatchPhrasePrefixQueryParams { query, diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/query_string_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/query_string_query.rs index 81b7fa4dbef..1df29b78a07 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/query_string_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/query_string_query.rs @@ -19,7 +19,7 @@ use serde::Deserialize; -use crate::elastic_query_dsl::ConvertableToQueryAst; +use crate::elastic_query_dsl::ConvertibleToQueryAst; use crate::not_nan_f32::NotNaNf32; use crate::query_ast::UserInputQuery; use crate::BooleanOperand; @@ -47,7 +47,7 @@ pub(crate) struct QueryStringQuery { _lenient: bool, } -impl ConvertableToQueryAst for QueryStringQuery { +impl ConvertibleToQueryAst for QueryStringQuery { fn convert_to_query_ast(self) -> anyhow::Result { if self.default_field.is_some() && self.fields.is_some() { anyhow::bail!("fields and default_field cannot be both set in `query_string` queries"); @@ -67,7 +67,7 @@ impl ConvertableToQueryAst for QueryStringQuery { #[cfg(test)] mod tests { - use crate::elastic_query_dsl::{ConvertableToQueryAst, QueryStringQuery}; + use crate::elastic_query_dsl::{ConvertibleToQueryAst, QueryStringQuery}; use crate::query_ast::{QueryAst, UserInputQuery}; use crate::BooleanOperand; diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs index 273496bbdeb..9e7d07e23da 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs @@ -24,7 +24,7 @@ use quickwit_datetime::StrptimeParser; use serde::Deserialize; use crate::elastic_query_dsl::one_field_map::OneFieldMap; -use crate::elastic_query_dsl::ConvertableToQueryAst; +use crate::elastic_query_dsl::ConvertibleToQueryAst; use crate::not_nan_f32::NotNaNf32; use crate::query_ast::QueryAst; use crate::JsonLiteral; @@ -48,7 +48,7 @@ pub struct RangeQueryParams { pub type RangeQuery = OneFieldMap; -impl ConvertableToQueryAst for RangeQuery { +impl ConvertibleToQueryAst for RangeQuery { fn convert_to_query_ast(self) -> anyhow::Result { let field = self.field; let RangeQueryParams { diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/term_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/term_query.rs index 91d6968ca7c..b54c1951581 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/term_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/term_query.rs @@ -21,7 +21,7 @@ use serde::{Deserialize, Deserializer, Serialize}; use super::StringOrStructForSerialization; use crate::elastic_query_dsl::one_field_map::OneFieldMap; -use crate::elastic_query_dsl::{ConvertableToQueryAst, ElasticQueryDslInner}; +use crate::elastic_query_dsl::{ConvertibleToQueryAst, ElasticQueryDslInner}; use crate::not_nan_f32::NotNaNf32; use crate::query_ast::{self, QueryAst}; @@ -93,7 +93,7 @@ impl From for ElasticQueryDslInner { } } -impl ConvertableToQueryAst for TermQuery { +impl ConvertibleToQueryAst for TermQuery { fn convert_to_query_ast(self) -> anyhow::Result { let TermQueryParams { value, boost } = self.value; let term_ast: QueryAst = query_ast::TermQuery { diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs index 5b3c6993250..255b7ddb32b 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs @@ -22,7 +22,7 @@ use serde::Deserialize; use crate::elastic_query_dsl::bool_query::BoolQuery; use crate::elastic_query_dsl::one_field_map::OneFieldMap; use crate::elastic_query_dsl::term_query::term_query_from_field_value; -use crate::elastic_query_dsl::{ConvertableToQueryAst, ElasticQueryDslInner}; +use crate::elastic_query_dsl::{ConvertibleToQueryAst, ElasticQueryDslInner}; use crate::not_nan_f32::NotNaNf32; use crate::query_ast::QueryAst; @@ -71,7 +71,7 @@ impl TryFrom for TermsQuery { } } -impl ConvertableToQueryAst for TermsQuery { +impl ConvertibleToQueryAst for TermsQuery { fn convert_to_query_ast(self) -> anyhow::Result { let term_queries: Vec = self .values diff --git a/quickwit/quickwit-query/src/query_ast/full_text_query.rs b/quickwit/quickwit-query/src/query_ast/full_text_query.rs index 7264854be74..cf1142e5cf6 100644 --- a/quickwit/quickwit-query/src/query_ast/full_text_query.rs +++ b/quickwit/quickwit-query/src/query_ast/full_text_query.rs @@ -177,8 +177,9 @@ pub enum FullTextMode { }, BoolPrefix { operator: BooleanOperand, - // max_expansions correspond to the fuzzy stop of query evalution. It's not the same as the - // max_expansions of a PhrasePrefixQuery, where it's used for the range expansion. + // max_expansions correspond to the fuzzy stop of query evaluation. It's not the same as + // the max_expansions of a PhrasePrefixQuery, where it's used for the range + // expansion. max_expansions: u32, }, // Act as Phrase with slop 0 if the field has positions, @@ -247,7 +248,7 @@ impl BuildTantivyAst for FullTextQuery { } impl FullTextQuery { - /// Returns the last term of the query assuming the query is targetting a string or a Json + /// Returns the last term of the query assuming the query is targeting a string or a Json /// field. /// /// This strange method is used to identify which term range should be warmed up for diff --git a/quickwit/quickwit-query/src/query_ast/tantivy_query_ast.rs b/quickwit/quickwit-query/src/query_ast/tantivy_query_ast.rs index 91013c5ad21..b89130424cf 100644 --- a/quickwit/quickwit-query/src/query_ast/tantivy_query_ast.rs +++ b/quickwit/quickwit-query/src/query_ast/tantivy_query_ast.rs @@ -119,7 +119,7 @@ impl From for Box { } } -// Remove the occurence of trivial AST in the given list of asts. +// Remove the occurrence of trivial AST in the given list of asts. // // If `stop_before_empty` is true, then we will make sure to stop removing asts if it is // the last element. diff --git a/quickwit/quickwit-query/src/query_ast/term_set_query.rs b/quickwit/quickwit-query/src/query_ast/term_set_query.rs index 868b57fdf29..68ea20cc4ab 100644 --- a/quickwit/quickwit-query/src/query_ast/term_set_query.rs +++ b/quickwit/quickwit-query/src/query_ast/term_set_query.rs @@ -45,7 +45,7 @@ impl TermSetQuery { let mut terms: HashSet = HashSet::default(); for (full_path, values) in &self.terms_per_field { for value in values { - // Mapping a text (field, value) is non-trival: + // Mapping a text (field, value) is non-trivial: // It depends on the schema of course, and can actually result in a disjunction of // multiple terms if the query targets a dynamic field (due to the // different types). diff --git a/quickwit/quickwit-query/src/query_ast/user_input_query.rs b/quickwit/quickwit-query/src/query_ast/user_input_query.rs index 5f02f7553cb..aa354346fa4 100644 --- a/quickwit/quickwit-query/src/query_ast/user_input_query.rs +++ b/quickwit/quickwit-query/src/query_ast/user_input_query.rs @@ -42,7 +42,7 @@ const DEFAULT_PHRASE_QUERY_MAX_EXPANSION: u32 = 50; pub struct UserInputQuery { pub user_text: String, // Set of search fields to search into for text not specifically - // targetting a field. + // targeting a field. // // If None, the default search fields, as defined in the DocMapper // will be used. diff --git a/quickwit/quickwit-query/src/tokenizers/chinese_compatible.rs b/quickwit/quickwit-query/src/tokenizers/chinese_compatible.rs index 6757850d793..12e17467a1a 100644 --- a/quickwit/quickwit-query/src/tokenizers/chinese_compatible.rs +++ b/quickwit/quickwit-query/src/tokenizers/chinese_compatible.rs @@ -145,7 +145,7 @@ mod tests { res.push(tok.clone()); } - // latin alphabet splited on white spaces, Han split on each char + // latin alphabet split on white spaces, Han split on each char let expected = [ Token { offset_from: 0, diff --git a/quickwit/quickwit-query/src/tokenizers/code_tokenizer.rs b/quickwit/quickwit-query/src/tokenizers/code_tokenizer.rs index 50adf6ce5ca..1a7dc4092d4 100644 --- a/quickwit/quickwit-query/src/tokenizers/code_tokenizer.rs +++ b/quickwit/quickwit-query/src/tokenizers/code_tokenizer.rs @@ -22,7 +22,7 @@ use std::str::CharIndices; use tantivy::tokenizer::{Token, TokenStream, Tokenizer}; -/// A Tokenizer spliting based on casing families often used in code such ase camelCase or +/// A Tokenizer splitting based on casing families often used in code such ase camelCase or /// PascalCase. /// /// For instance, it splits `PigCaféFactory2` as `[Pig, Café, Factory, 2]`, or `RPCResult` into @@ -91,7 +91,7 @@ impl<'a> CodeTokenStream<'a> { AdvanceResult::Backtrack => { self.chars = checkpoint; self.state.reset(); - // this can't recurse more than once, Backtrack is only emited from hex state, + // this can't recurse more than once, Backtrack is only emitted from hex state, // and calling with false prevent that state from being generated. return self.advance_inner(false); } @@ -348,9 +348,9 @@ struct ProcessingHexState { } enum HexResult { - // no token emited + // no token emitted None, - // a token is being emited, after that the state needs to be reset. + // a token is being emitted, after that the state needs to be reset. Emit(TokenOffsets), // we got an error, but where able to generate a code tokenizer state RecoverableError(ProcessingCharsState), @@ -386,7 +386,7 @@ impl ProcessingHexState { return HexResult::Emit(self.start_offset..next_char_offset); } } - // we got an invalid non-delimiter, or our sequence is an odd-lenght. Either way, + // we got an invalid non-delimiter, or our sequence is an odd-length. Either way, // we need to go switch to the code tokenizer return self.to_processing_chars_state(); } diff --git a/quickwit/quickwit-query/src/tokenizers/tokenizer_manager.rs b/quickwit/quickwit-query/src/tokenizers/tokenizer_manager.rs index cf977dd561e..4b87672a688 100644 --- a/quickwit/quickwit-query/src/tokenizers/tokenizer_manager.rs +++ b/quickwit/quickwit-query/src/tokenizers/tokenizer_manager.rs @@ -44,7 +44,7 @@ impl TokenizerManager { is_lowercaser: Arc::new(RwLock::new(HashMap::new())), }; - // in practice these will almost always be overriden in + // in practice these will almost always be overridden in // create_default_quickwit_tokenizer_manager() let raw_tokenizer = TextAnalyzer::builder(RawTokenizer::default()) .filter(RemoveLongFilter::limit(DEFAULT_REMOVE_TOKEN_LENGTH)) diff --git a/quickwit/quickwit-search/src/client.rs b/quickwit/quickwit-search/src/client.rs index b5baf32e857..434b4a430e7 100644 --- a/quickwit/quickwit-search/src/client.rs +++ b/quickwit/quickwit-search/src/client.rs @@ -249,9 +249,9 @@ impl SearchServiceClient { } } - /// Gets the value associated to a key stored locally in the targetted node. + /// Gets the value associated to a key stored locally in the targeted node. /// This call is not "distributed". - /// If the key is not present on the targetted search `None` is simply returned. + /// If the key is not present on the targeted search `None` is simply returned. pub async fn get_kv(&mut self, get_kv_req: GetKvRequest) -> crate::Result>> { match &mut self.client_impl { SearchServiceClientImpl::Local(service) => { @@ -269,7 +269,7 @@ impl SearchServiceClient { } } - /// Gets the value associated to a key stored locally in the targetted node. + /// Gets the value associated to a key stored locally in the targeted node. /// This call is not "distributed". It is up to the client to put the K,V pair /// on several nodes. pub async fn put_kv(&mut self, put_kv_req: PutKvRequest) -> crate::Result<()> { diff --git a/quickwit/quickwit-search/src/cluster_client.rs b/quickwit/quickwit-search/src/cluster_client.rs index 8d30b6daeab..25d53ca7554 100644 --- a/quickwit/quickwit-search/src/cluster_client.rs +++ b/quickwit/quickwit-search/src/cluster_client.rs @@ -718,7 +718,7 @@ mod tests { #[tokio::test] async fn test_put_kv_happy_path() { // 3 servers 1, 2, 3 - // Targetted key has affinity [2, 3, 1]. + // Targeted key has affinity [2, 3, 1]. // // Put on 2 and 3 is successful // Get succeeds on 2. @@ -763,7 +763,7 @@ mod tests { #[tokio::test] async fn test_put_kv_failing_get() { // 3 servers 1, 2, 3 - // Targetted key has affinity [2, 3, 1]. + // Targeted key has affinity [2, 3, 1]. // // Put on 2 and 3 is successful // Get fails on 2. diff --git a/quickwit/quickwit-search/src/collector.rs b/quickwit/quickwit-search/src/collector.rs index 215ea7e146e..9b775e1aa88 100644 --- a/quickwit/quickwit-search/src/collector.rs +++ b/quickwit/quickwit-search/src/collector.rs @@ -1388,8 +1388,8 @@ mod tests { } fn sort_dataset() -> Vec<(Option, Option)> { - // every comination of 0..=2 + None, in random order. - // (2, 1) is dupplicated to allow testing for DocId sorting with two sort fields + // every combination of 0..=2 + None, in random order. + // (2, 1) is duplicated to allow testing for DocId sorting with two sort fields vec![ (Some(2), Some(1)), (Some(0), Some(1)), @@ -1500,7 +1500,7 @@ mod tests { let cmp_2_asc = |a: &Doc, b: &Doc| reverse_int(&b.1 .1).cmp(&reverse_int(&a.1 .1)); { - // the logic for sorting isn't easy to wrap one's head arround. These simple tests are + // the logic for sorting isn't easy to wrap one's head around. These simple tests are // here to convince oneself they do what we want them todo let mut data = vec![(1, (None, None)), (0, (None, None))]; let data_copy = data.clone(); @@ -1604,7 +1604,7 @@ mod tests { assert_eq!( res.partial_hits.len(), slice_len, - "missmatch slice_len for \"{sort_str}\":{slice_len}" + "mismatch slice_len for \"{sort_str}\":{slice_len}" ); for (expected, got) in dataset.iter().zip(res.partial_hits.iter()) { if expected.0 as u32 != got.doc_id { @@ -1628,7 +1628,7 @@ mod tests { .collect::>(); eprintln!("expected: {:#?}", expected_docids); eprintln!("got: {:#?}", got_docids); - panic!("missmatch ordering for \"{sort_str}\":{slice_len}"); + panic!("mismatch ordering for \"{sort_str}\":{slice_len}"); } } } @@ -1669,7 +1669,7 @@ mod tests { }), }) .collect::>(); - // we eliminte based on sort value + // we eliminate based on sort value for (i, search_after) in partial_sort_value.into_iter().enumerate() { let request = SearchRequest { max_hits: 1000, @@ -1706,7 +1706,7 @@ mod tests { } } - // we eliminte based on split id + // we eliminate based on split id { let search_after = PartialHit { split_id: "fake_split_id2".to_string(), diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index 5f73757ea56..b225a53c13d 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -440,7 +440,7 @@ async fn leaf_search_single_split( Ok(leaf_search_response) } -/// Rewrite a request removing parts which incure additional download or computation with no +/// Rewrite a request removing parts which incur additional download or computation with no /// effect. /// /// This include things such as sorting result by a field or _score when no document is requested, @@ -702,7 +702,7 @@ impl<'a> RemoveTimestampRange<'a> { use quickwit_query::InterpretUserInput; let Some(lower_bound) = DateTime::interpret_json(lower_bound) else { // we shouldn't be able to get here, we would have errored much earlier in root search - warn!("unparseable time bound in leaf search: {lower_bound:?}"); + warn!("unparsable time bound in leaf search: {lower_bound:?}"); return; }; let bound = if included { @@ -718,7 +718,7 @@ impl<'a> RemoveTimestampRange<'a> { use quickwit_query::InterpretUserInput; let Some(upper_bound) = DateTime::interpret_json(upper_bound) else { // we shouldn't be able to get here, we would have errored much earlier in root search - warn!("unparseable time bound in leaf search: {upper_bound:?}"); + warn!("unparsable time bound in leaf search: {upper_bound:?}"); return; }; let bound = if included { @@ -1338,7 +1338,7 @@ async fn leaf_search_single_split_wrapper( }), } if let Some(last_hit) = locked_incremental_merge_collector.peek_worst_hit() { - // TODO: we could use the RWLock instead and read the value instead of updateing it + // TODO: we could use the RWLock instead and read the value instead of updating it // unconditionally. split_filter .write() @@ -1398,7 +1398,7 @@ mod tests { let timestamp_field = "timestamp".to_string(); - // cases where the bounds are larger than the split: no bound is emited + // cases where the bounds are larger than the split: no bound is emitted let split = SplitIdAndFooterOffsets { timestamp_start: Some(time2), timestamp_end: Some(time3), @@ -1588,7 +1588,7 @@ mod tests { let timestamp_field = "timestamp".to_string(); - // cases where the bounds are larger than the split: no bound is emited + // cases where the bounds are larger than the split: no bound is emitted let split = SplitIdAndFooterOffsets { timestamp_start: Some(time1), timestamp_end: Some(time3), diff --git a/quickwit/quickwit-search/src/list_fields.rs b/quickwit/quickwit-search/src/list_fields.rs index a45cc19b7f6..2aac8cc43f6 100644 --- a/quickwit/quickwit-search/src/list_fields.rs +++ b/quickwit/quickwit-search/src/list_fields.rs @@ -396,7 +396,7 @@ mod tests { assert!(!matches_any_pattern("field1", &["fi*eld".to_string()])); assert!(!matches_any_pattern("field1", &["field".to_string()])); - // 2.nd pattern matches + // 2nd pattern matches assert!(matches_any_pattern( "field", &["a".to_string(), "field".to_string()] diff --git a/quickwit/quickwit-search/src/root.rs b/quickwit/quickwit-search/src/root.rs index 73472ce2631..0c92af089f5 100644 --- a/quickwit/quickwit-search/src/root.rs +++ b/quickwit/quickwit-search/src/root.rs @@ -176,7 +176,7 @@ struct RequestMetadata { /// - timestamp fields (if any) are equal across indexes. /// - resolved query ASTs are the same across indexes. /// - if a sort field is of type datetime, it must be a datetime field on all indexes. This -/// contraint come from the need to support datetime formatting on sort values. +/// constraint come from the need to support datetime formatting on sort values. /// Returns the timestamp field, the resolved query AST and the indexes metadatas /// needed for leaf search requests. /// Note: the requirements on timestamp fields and resolved query ASTs can be lifted diff --git a/quickwit/quickwit-search/src/search_job_placer.rs b/quickwit/quickwit-search/src/search_job_placer.rs index 64330fedac4..9b4965eaa89 100644 --- a/quickwit/quickwit-search/src/search_job_placer.rs +++ b/quickwit/quickwit-search/src/search_job_placer.rs @@ -180,7 +180,7 @@ impl SearchJobPlacer { let total_load: usize = jobs.iter().map(|job| job.cost()).sum(); - // allow arround 5% disparity. Round up so we never end up in a case where + // allow around 5% disparity. Round up so we never end up in a case where // target_load * num_nodes < total_load // some of our tests needs 2 splits to be put on 2 different searchers. It makes sens for // these tests to keep doing so (testing root merge). Either we can make the allowed diff --git a/quickwit/quickwit-search/src/search_response_rest.rs b/quickwit/quickwit-search/src/search_response_rest.rs index e34895ac659..b2f94565692 100644 --- a/quickwit/quickwit-search/src/search_response_rest.rs +++ b/quickwit/quickwit-search/src/search_response_rest.rs @@ -130,7 +130,7 @@ pub struct StorageRequestCount { pub fastfield: usize, /// Number of fieldnorm downloaded pub fieldnorm: usize, - /// Number of sstable dowloaded + /// Number of sstable downloaded pub sstable: usize, /// Number of posting list downloaded pub posting: usize, diff --git a/quickwit/quickwit-search/src/search_stream/leaf.rs b/quickwit/quickwit-search/src/search_stream/leaf.rs index cca351ef4e2..a541a742976 100644 --- a/quickwit/quickwit-search/src/search_stream/leaf.rs +++ b/quickwit/quickwit-search/src/search_stream/leaf.rs @@ -662,7 +662,7 @@ mod tests { } #[tokio::test] - async fn test_leaf_search_stream_to_partitionned_clickhouse_binary_output_with_filtering( + async fn test_leaf_search_stream_to_partitioned_clickhouse_binary_output_with_filtering( ) -> anyhow::Result<()> { let index_id = "single-node-simple-2"; let doc_mapping_yaml = r#" diff --git a/quickwit/quickwit-search/src/search_stream/root.rs b/quickwit/quickwit-search/src/search_stream/root.rs index cea0ec4d6de..c3bc3d88379 100644 --- a/quickwit/quickwit-search/src/search_stream/root.rs +++ b/quickwit/quickwit-search/src/search_stream/root.rs @@ -203,7 +203,7 @@ mod tests { } #[tokio::test] - async fn test_root_search_stream_single_split_partitionned() -> anyhow::Result<()> { + async fn test_root_search_stream_single_split_partitioned() -> anyhow::Result<()> { let request = quickwit_proto::search::SearchStreamRequest { index_id: "test-index".to_string(), query_ast: qast_json_helper("test", &["body"]), diff --git a/quickwit/quickwit-search/src/service.rs b/quickwit/quickwit-search/src/service.rs index e7d0f685315..bee1dd61253 100644 --- a/quickwit/quickwit-search/src/service.rs +++ b/quickwit/quickwit-search/src/service.rs @@ -152,7 +152,7 @@ pub trait SearchService: 'static + Send + Sync { ) -> crate::Result; /// Describe how a search would be processed. - async fn search_plan(&self, reqiest: SearchRequest) -> crate::Result; + async fn search_plan(&self, request: SearchRequest) -> crate::Result; } impl SearchServiceImpl { diff --git a/quickwit/quickwit-search/src/top_k_collector.rs b/quickwit/quickwit-search/src/top_k_collector.rs index 41f55025d27..d0371fdf419 100644 --- a/quickwit/quickwit-search/src/top_k_collector.rs +++ b/quickwit/quickwit-search/src/top_k_collector.rs @@ -738,7 +738,7 @@ impl QuickwitSegmentTopKCollector for GenericQuickwitSegmentTopKCollector { ); } } else { - // Probaly would make sense to check the fence against e.g. sort_values1 earlier, + // Probably would make sense to check the fence against e.g. sort_values1 earlier, // before creating the SegmentPartialHit. // // Below are different versions to avoid iterating the caches if they are unused. diff --git a/quickwit/quickwit-serve/src/developer_api/server.rs b/quickwit/quickwit-serve/src/developer_api/server.rs index bf4ad37b7fd..301d1c594ff 100644 --- a/quickwit/quickwit-serve/src/developer_api/server.rs +++ b/quickwit/quickwit-serve/src/developer_api/server.rs @@ -92,9 +92,9 @@ impl DeveloperService for DeveloperApiServer { "chitchat_state": cluster_snapshot.chitchat_state_snapshot.node_state_snapshots, }) }); - if let Some(control_plane_maibox) = &self.control_plane_mailbox_opt { + if let Some(control_plane_mailbox) = &self.control_plane_mailbox_opt { if roles.is_empty() || roles.contains(&QuickwitService::ControlPlane) { - debug_info["control_plane"] = match control_plane_maibox.ask(GetDebugInfo).await { + debug_info["control_plane"] = match control_plane_mailbox.ask(GetDebugInfo).await { Ok(debug_info) => debug_info, Err(error) => { json!({"error": error.to_string()}) diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/model/mod.rs b/quickwit/quickwit-serve/src/elasticsearch_api/model/mod.rs index ec7957f942f..d8f377f66cc 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/model/mod.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/model/mod.rs @@ -60,7 +60,7 @@ pub struct SortField { #[serde(rename_all = "snake_case")] pub enum ElasticDateFormat { /// Sort values are in milliseconds by default to ease migration from ES. - /// We allow the user to sepecify nanoseconds if needed. + /// We allow the user to specify nanoseconds if needed. /// We add `Int` to the name to avoid confusion ES variant `EpochMillis` which, /// returns milliseconds as strings. EpochNanosInt, diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs b/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs index ab6aa37a6c6..caea3f66e8d 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs @@ -555,10 +555,10 @@ async fn es_compat_index_cat_indices( }) .map(|cat_index| cat_index.serialize_filtered(&query_params.h)) .collect::, serde_json::Error>>() - .map_err(|serde_errror| { + .map_err(|serde_error| { ElasticsearchError::new( StatusCode::INTERNAL_SERVER_ERROR, - format!("Failed to serialize cat indices response: {}", serde_errror), + format!("Failed to serialize cat indices response: {}", serde_error), None, ) })?; diff --git a/quickwit/quickwit-serve/src/index_api/rest_handler.rs b/quickwit/quickwit-serve/src/index_api/rest_handler.rs index 39de55d653c..778911e8d10 100644 --- a/quickwit/quickwit-serve/src/index_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/index_api/rest_handler.rs @@ -788,9 +788,9 @@ async fn reset_source_checkpoint( source_id: SourceId, metastore: MetastoreServiceClient, ) -> MetastoreResult<()> { - let index_metadata_resquest = IndexMetadataRequest::for_index_id(index_id.to_string()); + let index_metadata_request = IndexMetadataRequest::for_index_id(index_id.to_string()); let index_uid: IndexUid = metastore - .index_metadata(index_metadata_resquest) + .index_metadata(index_metadata_request) .await? .deserialize_index_metadata()? .index_uid; diff --git a/quickwit/quickwit-serve/src/template_api/rest_handler.rs b/quickwit/quickwit-serve/src/template_api/rest_handler.rs index a9fc9bb0de0..fca22b2b4a7 100644 --- a/quickwit/quickwit-serve/src/template_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/template_api/rest_handler.rs @@ -78,7 +78,7 @@ fn create_index_template_handler( path = "/templates", request_body = VersionedIndexTemplate, responses( - (status = 200, description = "The index template was successfuly created.") + (status = 200, description = "The index template was successfully created.") ), )] /// Creates a new index template. @@ -125,7 +125,7 @@ fn get_index_template_handler( tag = "Templates", path = "/templates/{template_id}", responses( - (status = 200, description = "The index template was successfuly retrieved."), + (status = 200, description = "The index template was successfully retrieved."), (status = 404, description = "The index template was not found.") ), )] @@ -161,7 +161,7 @@ fn update_index_template_handler( tag = "Templates", path = "/templates/{template_id}", responses( - (status = 200, description = "The index template was successfuly retrieved."), + (status = 200, description = "The index template was successfully retrieved."), (status = 404, description = "The index template was not found.") ), )] @@ -216,7 +216,7 @@ fn delete_index_template_handler( tag = "Templates", path = "/templates/{template_id}", responses( - (status = 200, description = "The index template was successfuly deleted."), + (status = 200, description = "The index template was successfully deleted."), (status = 404, description = "The index template was not found.") ), )] @@ -249,7 +249,7 @@ fn list_index_templates_handler( tag = "Templates", path = "/templates", responses( - (status = 200, description = "The index template was successfuly retrieved."), + (status = 200, description = "The index template was successfully retrieved."), ), )] /// Retrieves all the index templates stored in the metastore. @@ -384,7 +384,7 @@ mod tests { .method("PUT") .json(&json!({ "version": "0.7", - "template_id": "test-template-bar", // This `template_id` should be ignored and overriden by the path parameter. + "template_id": "test-template-bar", // This `template_id` should be ignored and overridden by the path parameter. "index_id_patterns": ["test-index-foo*"], "doc_mapping": {}, })) diff --git a/quickwit/quickwit-storage/src/cache/byte_range_cache.rs b/quickwit/quickwit-storage/src/cache/byte_range_cache.rs index b556de4e8f7..9068e23657e 100644 --- a/quickwit/quickwit-storage/src/cache/byte_range_cache.rs +++ b/quickwit/quickwit-storage/src/cache/byte_range_cache.rs @@ -143,7 +143,7 @@ impl NeedMutByteRangeCache { .unwrap_or(true); let (final_range, final_bytes) = if can_drop_first && can_drop_last { - // if we are here, either ther was no overlapping block, or there was, but this buffer + // if we are here, either there was no overlapping block, or there was, but this buffer // covers entirely every block it overlapped with. There is no merging to do. (byte_range, bytes) } else { @@ -207,7 +207,7 @@ impl NeedMutByteRangeCache { self.update_counter_drop_item(range.end - range.start); } - // and finaly insert the newly added buffer + // and finally insert the newly added buffer key.range_start = final_range.start; let value = CacheValue { range_end: final_range.end, diff --git a/quickwit/quickwit-storage/src/file_descriptor_cache.rs b/quickwit/quickwit-storage/src/file_descriptor_cache.rs index 9ea2ffea8cc..eee90a513b0 100644 --- a/quickwit/quickwit-storage/src/file_descriptor_cache.rs +++ b/quickwit/quickwit-storage/src/file_descriptor_cache.rs @@ -227,7 +227,7 @@ mod tests { assert_eq!(cache_metrics.misses_num_items.get(), 10); } - // This mimicks Quickwit's workload where the fd cache is much smaller than the number of + // This mimics Quickwit's workload where the fd cache is much smaller than the number of // splits. Each search will read from the same split file, and the cache will help avoid // opening the file several times. #[tokio::test] diff --git a/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs b/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs index 6008e88451e..8644bbd2b5a 100644 --- a/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs +++ b/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs @@ -56,7 +56,7 @@ use crate::{ StorageResolverError, StorageResult, STORAGE_METRICS, }; -/// Semaphore to limit the number of concurent requests to the object store. Some object stores +/// Semaphore to limit the number of concurrent requests to the object store. Some object stores /// (R2, SeaweedFs...) return errors when too many concurrent requests are emitted. static REQUEST_SEMAPHORE: Lazy = Lazy::new(|| { let num_permits: usize = quickwit_common::get_from_env("QW_S3_MAX_CONCURRENCY", 10_000usize); diff --git a/quickwit/quickwit-storage/src/split.rs b/quickwit/quickwit-storage/src/split.rs index 7bf9fb720f1..b5691b8ff40 100644 --- a/quickwit/quickwit-storage/src/split.rs +++ b/quickwit/quickwit-storage/src/split.rs @@ -139,7 +139,7 @@ impl PutPayload for FilePayload { #[derive(Default)] pub struct SplitPayloadBuilder { /// File name, payload, and range of the payload in the bundle file - /// Range coud be computed on the fly, and is just kept here for convenience. + /// Range could be computed on the fly, and is just kept here for convenience. payloads: Vec<(String, Box, Range)>, current_offset: usize, } diff --git a/quickwit/quickwit-ui/src/utils/models.ts b/quickwit/quickwit-ui/src/utils/models.ts index ae67249d326..c58e47631e3 100644 --- a/quickwit/quickwit-ui/src/utils/models.ts +++ b/quickwit/quickwit-ui/src/utils/models.ts @@ -139,7 +139,7 @@ export function extractAggregationResults(aggregation: any): ParsedAggregationRe } else if ("term_agg" in aggregation) { // we have a term aggregation, but maybe there is an histogram inside const term_buckets = aggregation.term_agg.buckets; - if (term_buckets.lenght == 0) { + if (term_buckets.length == 0) { return null; } if (term_buckets.length > 0 && "histo_agg" in term_buckets[0]) { diff --git a/quickwit/rest-api-tests/README.md b/quickwit/rest-api-tests/README.md index a0742438e4b..dbd41d4d286 100644 --- a/quickwit/rest-api-tests/README.md +++ b/quickwit/rest-api-tests/README.md @@ -56,7 +56,7 @@ This engine-specific context is perfect if you know all steps will target a spec Once the context is loaded, the steps described in `_setup.yaml` and `_setup..yaml` (if present) will be executed. These steps are just like any other steps except you are guaranteed they will be executed respectively before and after all other steps. -In particular, when targetting one specific test using the `--test flag`, +In particular, when targeting one specific test using the `--test flag`, the necessary `setup` and `teardown` script will be automatically executed. # teardown diff --git a/quickwit/rest-api-tests/run_tests.py b/quickwit/rest-api-tests/run_tests.py index 4a690be37b0..905945768a5 100755 --- a/quickwit/rest-api-tests/run_tests.py +++ b/quickwit/rest-api-tests/run_tests.py @@ -364,7 +364,7 @@ def main(): prog="rest-api-test", description="Runs a set of calls against a REST API and checks for conditions over the results." ) - arg_parser.add_argument("--engine", help="Targetted engine (elastic/quickwit).", default="quickwit") + arg_parser.add_argument("--engine", help="Targeted engine (elastic/quickwit).", default="quickwit") arg_parser.add_argument("--test", help="Specific prefix to select the tests to run. If not specified, all tests are run.", nargs="*") arg_parser.add_argument("--binary", help="Specific the quickwit binary to run.", nargs="?") parsed_args = arg_parser.parse_args() diff --git a/quickwit/rest-api-tests/scenarii/default_search_fields/0002_invalid_default_fields.yaml b/quickwit/rest-api-tests/scenarii/default_search_fields/0002_invalid_default_fields.yaml index ac7bf1cc1fa..ba66f5db6c5 100644 --- a/quickwit/rest-api-tests/scenarii/default_search_fields/0002_invalid_default_fields.yaml +++ b/quickwit/rest-api-tests/scenarii/default_search_fields/0002_invalid_default_fields.yaml @@ -1,5 +1,5 @@ # should fail because we are not in dynamic, -# yet we are targetting a field not in the field mapping. +# yet we are targeting a field not in the field mapping. method: POST endpoint: indexes/ json: diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0010-match_phrase_prefix_query.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0010-match_phrase_prefix_query.yaml index ef08c8c4224..f54f41bc709 100644 --- a/quickwit/rest-api-tests/scenarii/es_compatibility/0010-match_phrase_prefix_query.yaml +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0010-match_phrase_prefix_query.yaml @@ -86,12 +86,12 @@ expected: # This is a bit of a sloppy just testing that the tokenizer property is # plugged # -# We only apply it to quickwit becasue the raw tokenizer does not exist in ES. +# We only apply it to quickwit because the raw tokenizer does not exist in ES. method: [GET] json: query: match_phrase_prefix: payload.commits.message: query: "automated comm" - analyzer: inexistant_tokenizer + analyzer: inexistent_tokenizer status_code: 400 diff --git a/quickwit/rest-api-tests/scenarii/es_field_capabilities/0001-field-capabilities.yaml b/quickwit/rest-api-tests/scenarii/es_field_capabilities/0001-field-capabilities.yaml index 49d0879a52b..bd3cd917acd 100644 --- a/quickwit/rest-api-tests/scenarii/es_field_capabilities/0001-field-capabilities.yaml +++ b/quickwit/rest-api-tests/scenarii/es_field_capabilities/0001-field-capabilities.yaml @@ -311,7 +311,7 @@ expected: searchable: true aggregatable: true --- -# Exact match index + Non matching excact index +# Exact match index + Non matching exact index method: [GET] engines: - quickwit @@ -334,4 +334,3 @@ engines: - elasticsearch endpoint: doesno*texist/_field_caps?fields=date status_code: 200 - diff --git a/quickwit/rest-api-tests/scenarii/qw_search_api/0003_negative_search.yaml b/quickwit/rest-api-tests/scenarii/qw_search_api/0003_negative_search.yaml index 598fcf17c34..921fa6ee643 100644 --- a/quickwit/rest-api-tests/scenarii/qw_search_api/0003_negative_search.yaml +++ b/quickwit/rest-api-tests/scenarii/qw_search_api/0003_negative_search.yaml @@ -1,4 +1,4 @@ -# regression test for bizare handling of - vs NOT when no positive clause is present +# regression test for bizarre handling of - vs NOT when no positive clause is present endpoint: simple/search params: query: "-ts:1234567890 AND -ts:1234567891"