Fix typos all over the codebase (#5259)

quickwit-oss · Jul 27, 2024 · b6db60d · b6db60d
1 parent 6e2e630
commit b6db60d
Show file tree

Hide file tree

Showing 116 changed files with 246 additions and 240 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -57,8 +57,8 @@ Run `make test-all` to run all tests.
 * `make -k test-all docker-compose-down` - the same as above, but tears down the Docker services after running all the tests.
 * `make fmt` - runs formatter, this command requires the nightly toolchain to be installed by running `rustup toolchain install nightly`.
 * `make fix` - runs formatter and clippy checks.
-* `make typos` - runs the spellcheck tool over the codebase. (Install by running `cargo install typos`)
-* `make build-docs` - builds docs.
+* `make typos` - runs the spellcheck tool over the codebase. (Install by running `cargo install typos-cli`)
+* `make docs` - builds docs.
 * `make docker-compose-up` - starts Docker services.
 * `make docker-compose-down` - stops Docker services.
 * `make docker-compose-logs` - shows Docker logs.

diff --git a/_typos.toml b/_typos.toml
@@ -1,2 +1,6 @@
 [files]
 extend-exclude = ["**/*.json"]
+
+[default.extend-words]
+# Don't correct the surname "Teh"
+strat = "strat"
diff --git a/distribution/ecs/README.md b/distribution/ecs/README.md
@@ -42,7 +42,7 @@ deployed, you should probably push the Quickwit image to ECR and use ECR
 interface VPC endpoints instead (approx. ~$0.01/hour/AZ).
 
 When using the default image, you will quickly run into the Docker Hub rate
-limiting. We recommand pushing the Quickwit image to ECR and configure that as
+limiting. We recommend pushing the Quickwit image to ECR and configure that as
 `quickwit_image`. Note that the architecture of the image that you push to ECR
 must match the `quickwit_cpu_architecture` variable (`ARM64` by default).
 

diff --git a/distribution/ecs/quickwit/variables.tf b/distribution/ecs/quickwit/variables.tf
@@ -65,7 +65,7 @@ variable "enable_cloudwatch_logging" {
 }
 
 variable "log_configuration" {
-  description = "Custom log configuraiton for Quickwit tasks"
+  description = "Custom log configuration for Quickwit tasks"
   default     = {}
 }
 

diff --git a/distribution/lambda/README.md b/distribution/lambda/README.md
@@ -52,7 +52,7 @@ Provided demonstration setups:
 
 ### Deploy and run
 
-The Makefile is a usefull entrypoint to show how the Lambda deployment can used.
+The Makefile is a useful entrypoint to show how the Lambda deployment can used.
 
 Configure your shell and AWS account:
 ```bash

diff --git a/distribution/lambda/cdk/cli.py b/distribution/lambda/cdk/cli.py
@@ -382,7 +382,7 @@ def benchmark_hdfs_indexing():
         indexer_result = invoke_hdfs_indexer()
         bench_result["lambda_report"] = indexer_result.extract_report()
     except Exception as e:
-        bench_result["invokation_error"] = repr(e)
+        bench_result["invocation_error"] = repr(e)
         print(f"Failed to invoke indexer")
 
     with open(f"lambda-bench.log", "a+") as f:
@@ -409,7 +409,7 @@ def benchmark_hdfs_search(payload: str):
             indexer_result = invoke_hdfs_searcher(payload, download_logs=False)
             bench_result["lambda_report"] = indexer_result.extract_report()
         except Exception as e:
-            bench_result["invokation_error"] = repr(e)
+            bench_result["invocation_error"] = repr(e)
             print(f"Failed to invoke searcher")
 
         with open(f"lambda-bench.log", "a+") as f:

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -53,7 +53,7 @@ services:
     image: postgres:${POSTGRES_VERSION:-12.17-alpine}
     container_name: postgres
     ports:
-      - "${MAP_HOST_POSTGRESS:-127.0.0.1}:5432:5432"
+      - "${MAP_HOST_POSTGRES:-127.0.0.1}:5432:5432"
     profiles:
       - all
       - postgres

diff --git a/docs/configuration/index-config.md b/docs/configuration/index-config.md
@@ -135,7 +135,7 @@ fast:
 | `tokenizer` | Name of the `Tokenizer`. ([See tokenizers](#description-of-available-tokenizers)) for a list of available tokenizers.  | `default` |
 | `record`    | Describes the amount of information indexed, choices between `basic`, `freq` and `position` | `basic` |
 | `fieldnorms` | Whether to store fieldnorms for the field. Fieldnorms are required to calculate the BM25 Score of the document. | `false` |
-| `fast`     | Whether value is stored in a fast field. The fast field will contain the term ids and the dictionary. The default behaviour for `true` is to store the original text unchanged. The normalizers on the fast field is seperately configured. It can be configured via `normalizer: lowercase`. ([See normalizers](#description-of-available-normalizers)) for a list of available normalizers. | `false` |
+| `fast`     | Whether value is stored in a fast field. The fast field will contain the term ids and the dictionary. The default behaviour for `true` is to store the original text unchanged. The normalizers on the fast field is separately configured. It can be configured via `normalizer: lowercase`. ([See normalizers](#description-of-available-normalizers)) for a list of available normalizers. | `false` |
 
 ##### Description of available tokenizers
 
@@ -327,7 +327,7 @@ stored: true
 indexed: true
 fast: true
 input_format: hex
-output_foramt: hex
+output_format: hex
 ```
 
 **Parameters for bytes field**
@@ -432,7 +432,7 @@ tokenizer: default
 record: basic
 ```
 
-Concatenate fields don't support fast fields, and are never stored. They uses their own tokenizer, independantly of the
+Concatenate fields don't support fast fields, and are never stored. They uses their own tokenizer, independently of the
 tokenizer configured on the individual fields.
 At query time, concatenate fields don't support range queries.
 Only the following types are supported inside a concatenate field: text, bool, i64, u64, json. Other types are rejected
@@ -458,7 +458,7 @@ when the features are supported, add these:
 ---
 Only the following types are supported inside a concatenate field: text, datetime, bool, i64, u64, ip, json. Other types are rejected
 ---
-Datetime can only be queried in their RFC-3339 form, possibly omiting later components. # todo! will have to confirm this is achievable
+Datetime can only be queried in their RFC-3339 form, possibly omitting later components. # todo! will have to confirm this is achievable
 ---
 plan:
 - implement text/bool/i64/u64 (nothing to do on search side for it to work). all gets converted to strings

diff --git a/docs/configuration/storage-config.md b/docs/configuration/storage-config.md
@@ -66,7 +66,7 @@ Hardcoding credentials into configuration files is not secure and strongly disco
 | Env variable | Description |
 | --- | --- |
 | `QW_S3_ENDPOINT` | Custom S3 endpoint. |
-| `QW_S3_MAX_CONCURRENCY` | Limit the number of concurent requests to S3 |
+| `QW_S3_MAX_CONCURRENCY` | Limit the number of concurrent requests to S3 |
 
 #### Storage flavors
 

diff --git a/docs/deployment/cluster-sizing.md b/docs/deployment/cluster-sizing.md
@@ -62,7 +62,7 @@ Searcher nodes:
 <!-- 1GB fast_field_cache_capacity + 0.5GB split_footer_cache_capacity + 0.5GB/req aggregation_memory_limit -->
 - Searcher nodes don't use disk unless the [split
   cache](../configuration/node-config.md#Searcher-split-cache-configuration) is
-  explicitely enabled
+  explicitly enabled
 
 One strength of Quickwit is that its Searchers are stateless, which makes it
 easy to scale them up and down based on the workload. Scale the number of

diff --git a/docs/deployment/kubernetes/glasskube.md b/docs/deployment/kubernetes/glasskube.md
@@ -15,7 +15,7 @@ To deploy Quickwit on Kubernetes, you will need:
 
 1. Install `kubectl` and `glasskube` cli.
 
-To install `kubectl` locally, you can refere to [this documentation](https://kubernetes.io/docs/tasks/tools/#install-kubectl).
+To install `kubectl` locally, you can refer to [this documentation](https://kubernetes.io/docs/tasks/tools/#install-kubectl).
 
 To install `glasskube` cli locally, you can refer to [this documentation](https://glasskube.dev/docs/getting-started/install) and choose the right installation options according to your operating system.
 
@@ -52,7 +52,7 @@ Or use the CLI instead:
 glasskube install quickwit
 ```
 
-In both, you'll have to set the value of thoses parameters:
+In both, you'll have to set the value of those parameters:
 
 * `defaultIndexRootUri`: the default index URI is a S3 compliant bucket which usually looks like this: `s3://<bucket-name>/<optional-base-path>`
 * `metastoreUri`: if you're not using PostgreSQL and object storage, you can pick the same bucket and value you used for the `defaultIndexRootUri` parameter

diff --git a/docs/get-started/query-language-intro.md b/docs/get-started/query-language-intro.md
@@ -33,7 +33,7 @@ Quickwit support various types of clauses to express different kinds of conditio
 | term prefix | `field:prefix*` | `app_name:tant*` <br/> `quick*` | A term clause tests the existence of a token starting with the provided value | yes |
 | term set | `field:IN [token token ..]` |`severity:IN [error warn]` | A term set clause tests the existence of any of the provided value in the field's tokens| yes |
 | phrase | `field:"sequence of tokens"` | `full_name:"john doe"` | A phrase clause tests the existence of the provided sequence of tokens | yes |
-| phrase prefix | `field:"sequence of tokens"*` | `title:"how to m"*` | A phrase prefix clause tests the exsitence of a sequence of tokens, the last one used like in a prefix clause | yes |
+| phrase prefix | `field:"sequence of tokens"*` | `title:"how to m"*` | A phrase prefix clause tests the existence of a sequence of tokens, the last one used like in a prefix clause | yes |
 | all | `*` | `*` | A match-all clause will match every document | no |
 | exist | `field:*` | `error:*` | An exist clause tests the existence of any value for the field, it will match only if the field exists | no |
 | range | `field:bounds` |`duration:[0 TO 1000}` <br/> `last_name:[banner TO miller]` | A term clause tests the existence of a token between the provided bounds | no |

diff --git a/docs/internals/scroll.md b/docs/internals/scroll.md
@@ -60,7 +60,7 @@ We only mutate the state server side to update the cache whenever needed.
 
 The idea here is that if that if the put request failed, we can still return the right results even if we have an obsolete version of the `ScrollContext`.
 
-# Quickwit implementation (improvment, quirks and shortcuts)
+# Quickwit implementation (improvement, quirks and shortcuts)
 
 We do not do explicitly protect the split from our store Point-In-Time information
 from deletion. Instead we simply rely on the existing grace period mechanism (a split

diff --git a/docs/internals/sorting.md b/docs/internals/sorting.md
@@ -2,7 +2,7 @@
 
 Quickwit can sort results based on fastfield values or score. This document discuss where and how
  it happens.
-It also tries to describe optimizations that may be enabled (but are not necessarily implemente)
+It also tries to describe optimizations that may be enabled (but are not necessarily implemented)
 by this behavior.
 
 ## Behavior
@@ -35,7 +35,7 @@ results. It reduces the risks of inconsistencies between in-split and between-sp
 `SortOrder` gets new `compare` and `compare_opt` method which can be used to compare two values with
  respect to the particular sort order required, and with proper handling of the `None` special case.
 
-# Optimization permited
+# Optimization permitted
 
 Both orders allow an optimization when sorting by date (either direction), by leveraging splits
 meta-data to know in advance if a split can, or not, contain better results. Changing the sorting

diff --git a/docs/reference/cli.md b/docs/reference/cli.md
@@ -353,9 +353,9 @@ quickwit index ingest
 | `--index` | ID of the target index |
 | `--input-path` | Location of the input file. |
 | `--batch-size-limit` | Size limit of each submitted document batch. |
-| `--wait` | Wait for all documents to be commited and available for search before exiting |
+| `--wait` | Wait for all documents to be committed and available for search before exiting |
 | `--force` | Force a commit after the last document is sent, and wait for all documents to be committed and available for search before exiting |
-| `--commit-timeout` | Timeout for ingest operations that require waiting for the final commit (`--wait` or `--force`). This is different from the `commit_timeout_secs` indexing setting, which sets the maximum time before commiting splits after their creation. |
+| `--commit-timeout` | Timeout for ingest operations that require waiting for the final commit (`--wait` or `--force`). This is different from the `commit_timeout_secs` indexing setting, which sets the maximum time before committing splits after their creation. |
 
 *Examples*
 

diff --git a/docs/reference/es_compatible_api.md b/docs/reference/es_compatible_api.md
@@ -21,7 +21,7 @@ POST api/v1/_elastic/_bulk
 POST api/v1/_elastic/<index>/_bulk
 ```
 
-The _bulk ingestion API makes it possible to index a batch of documents, possibly targetting several indices in the same request.
+The _bulk ingestion API makes it possible to index a batch of documents, possibly targeting several indices in the same request.
 
 #### Request Body example
 
@@ -228,7 +228,7 @@ You can pass the `sort` value of the last hit in a subsequent request where othe
 ```json
 {
   // keep all fields from the original request
-  "seach_after": [
+  "search_after": [
     1701962929199
   ]
 }
@@ -256,7 +256,7 @@ POST api/v1/_elastic/_msearch
 Runs several search requests at once.
 
 The payload is expected to alternate:
-- a `header` json object, containing the targetted index id.
+- a `header` json object, containing the targeted index id.
 - a `search request body` as defined in the [`_search` endpoint section].
 
 
@@ -725,7 +725,7 @@ Search APIs that accept <index_id> requests path parameter also support multi-ta
 
 ### Multi-target syntax
 
-In multi-target syntax, you can use a comma or its URL encoded version '%2C' seperated list to run a request on multiple indices: test1,test2,test3. You can also sue [glob-like](https://en.wikipedia.org/wiki/Glob_(programming)) wildcard ( \* ) expressions to target indices that match a pattern: test\* or \*test or te\*t or \*test\*.
+In multi-target syntax, you can use a comma or its URL encoded version '%2C' separated list to run a request on multiple indices: test1,test2,test3. You can also sue [glob-like](https://en.wikipedia.org/wiki/Glob_(programming)) wildcard ( \* ) expressions to target indices that match a pattern: test\* or \*test or te\*t or \*test\*.
 
 The multi-target expression has the following constraints:
 

diff --git a/docs/reference/query-language.md b/docs/reference/query-language.md
@@ -95,7 +95,7 @@ Matches if the document contains any of the tokens provided.
 ###### Examples
 `field:IN [ab cd]` will match 'ab' or 'cd', but nothing else.
 
-###### Perfomance Note
+###### Performance Note
 This is a lot like writing `field:ab OR field:cd`. When there are only a handful of terms to search for, using ORs is usually faster.
 When there are many values to match, a term set query can become more efficient.
 
@@ -150,7 +150,7 @@ bounds = term TO term
        | '*' TO term
 
 comparison_range = comparison_operator term
-comparision_operator = '<' | '>' | '<=' | '>='
+comparison_operator = '<' | '>' | '<=' | '>='
 ```
 
 Matches if the document contains a token between the provided bounds for that field.

diff --git a/docs/reference/rest-api.md b/docs/reference/rest-api.md
@@ -61,8 +61,8 @@ POST api/v1/<index id>/search
 | Variable            | Type       | Description     | Default value   |
 |---------------------|------------|-----------------|-----------------|
 | `query`           | `String`   | Query text. See the [query language doc](query-language.md) | _required_ |
-| `start_timestamp` | `i64`      | If set, restrict search to documents with a `timestamp >= start_timestamp`, taking advantage of potential time pruning oportunities. The value must be in seconds. | |
-| `end_timestamp`   | `i64`      | If set, restrict search to documents with a `timestamp < end_timestamp`, taking advantage of potential time pruning oportunities. The value must be in seconds.    | |
+| `start_timestamp` | `i64`      | If set, restrict search to documents with a `timestamp >= start_timestamp`, taking advantage of potential time pruning opportunities. The value must be in seconds. | |
+| `end_timestamp`   | `i64`      | If set, restrict search to documents with a `timestamp < end_timestamp`, taking advantage of potential time pruning opportunities. The value must be in seconds.    | |
 | `start_offset`    | `Integer`  | Number of documents to skip | `0` |
 | `max_hits`        | `Integer`  | Maximum number of hits to return (by default 20) | `20` |
 | `search_field`    | `[String]` | Fields to search on if no field name is specified in the query. Comma-separated list, e.g. "field1,field2"  | index_config.search_settings.default_search_fields |

diff --git a/quickwit/quickwit-actors/src/lib.rs b/quickwit/quickwit-actors/src/lib.rs
@@ -90,13 +90,13 @@ fn heartbeat_from_env_or_default() -> Duration {
         return Duration::from_millis(500);
     }
     match std::env::var("QW_ACTOR_HEARTBEAT_SECS") {
-        Ok(actor_hearbeat_secs_str) => {
-            if let Ok(actor_hearbeat_secs) = actor_hearbeat_secs_str.parse::<NonZeroU64>() {
-                info!("set the actor heartbeat to {actor_hearbeat_secs} seconds");
-                return Duration::from_secs(actor_hearbeat_secs.get());
+        Ok(actor_heartbeat_secs_str) => {
+            if let Ok(actor_heartbeat_secs) = actor_heartbeat_secs_str.parse::<NonZeroU64>() {
+                info!("set the actor heartbeat to {actor_heartbeat_secs} seconds");
+                return Duration::from_secs(actor_heartbeat_secs.get());
             } else {
                 warn!(
-                    "failed to parse `QW_ACTOR_HEARTBEAT_SECS={actor_hearbeat_secs_str}` in \
+                    "failed to parse `QW_ACTOR_HEARTBEAT_SECS={actor_heartbeat_secs_str}` in \
                      seconds > 0, using default heartbeat (30 seconds)"
                 );
             };

diff --git a/quickwit/quickwit-cli/src/index.rs b/quickwit/quickwit-cli/src/index.rs
@@ -149,7 +149,7 @@ pub fn build_index_command() -> Command {
                     Arg::new("wait")
                         .long("wait")
                         .short('w')
-                        .help("Wait for all documents to be commited and available for search before exiting")
+                        .help("Wait for all documents to be committed and available for search before exiting")
                         .action(ArgAction::SetTrue),
                     // TODO remove me after Quickwit 0.7.
                     Arg::new("v2")
@@ -165,7 +165,7 @@ pub fn build_index_command() -> Command {
                         .conflicts_with("wait"),
                     Arg::new("commit-timeout")
                         .long("commit-timeout")
-                        .help("Timeout for ingest operations that require waiting for the final commit (`--wait` or `--force`). This is different from the `commit_timeout_secs` indexing setting, which sets the maximum time before commiting splits after their creation.")
+                        .help("Timeout for ingest operations that require waiting for the final commit (`--wait` or `--force`). This is different from the `commit_timeout_secs` indexing setting, which sets the maximum time before committing splits after their creation.")
                         .required(false)
                         .global(true),
                 ])

diff --git a/quickwit/quickwit-cli/src/lib.rs b/quickwit/quickwit-cli/src/lib.rs
@@ -397,7 +397,7 @@ pub mod busy_detector {
             })
             .is_err()
         {
-            // a debug was emited recently, don't emit log for this one
+            // a debug was emitted recently, don't emit log for this one
             SUPPRESSED_DEBUG_COUNT.fetch_add(1, Ordering::Relaxed);
             return;
         }

diff --git a/quickwit/quickwit-cli/src/split.rs b/quickwit/quickwit-cli/src/split.rs
@@ -124,7 +124,7 @@ impl FromStr for OutputFormat {
             "pretty-json" | "pretty_json" => Ok(OutputFormat::PrettyJson),
             "table" => Ok(OutputFormat::Table),
             _ => bail!(
-                "unkown output format `{output_format_str}`. supported formats are: `table`, \
+                "unknown output format `{output_format_str}`. supported formats are: `table`, \
                  `json`, and `pretty-json`"
             ),
         }

diff --git a/quickwit/quickwit-cluster/src/member.rs b/quickwit/quickwit-cluster/src/member.rs
@@ -131,7 +131,7 @@ fn parse_indexing_cpu_capacity(node_state: &NodeState) -> CpuCapacity {
     if let Ok(indexing_capacity) = CpuCapacity::from_str(indexing_capacity_str) {
         indexing_capacity
     } else {
-        error!(indexing_capacity=?indexing_capacity_str, "received an unparseable indexing capacity from node");
+        error!(indexing_capacity=?indexing_capacity_str, "received an unparsable indexing capacity from node");
         CpuCapacity::zero()
     }
 }