diff --git a/.github/scripts/check-front-matter.js b/.github/scripts/check-front-matter.js index d09ec63fa..c4086cb6e 100644 --- a/.github/scripts/check-front-matter.js +++ b/.github/scripts/check-front-matter.js @@ -7,7 +7,8 @@ const github = require('@actions/github'); // Add the special files here, for example, the template files const exceptionFiles = [ 'docs/user-guide/ingest-data/for-iot/grpc-sdks/template.md', - 'i18n/zh/docusaurus-plugin-content-docs/current/user-guide/ingest-data/for-iot/grpc-sdks/template.md' + 'i18n/zh/docusaurus-plugin-content-docs/current/user-guide/ingest-data/for-iot/grpc-sdks/template.md', + 'i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/template.md' ]; // This function checks if a markdown file contains the required front matter. diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/greptimecloud/integrations/kafka.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/greptimecloud/integrations/kafka.md index d2b7ace2e..30108f9ef 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/greptimecloud/integrations/kafka.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/greptimecloud/integrations/kafka.md @@ -60,3 +60,8 @@ username = "" password = "" tls = {} ``` + +## 参考文档 + +请参考[通过 Kafka 写入数据](https://docs.greptime.cn/nightly/user-guide/ingest-data/for-observerbility/kafka)获取数据写入过程的详细信息。 + diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/reference/sql/alter.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/reference/sql/alter.md index e5ae6006e..51ada9b91 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/reference/sql/alter.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/reference/sql/alter.md @@ -120,7 +120,7 @@ ALTER TABLE monitor MODIFY COLUMN load_15 STRING; `ALTER TABLE` 语句也可以用来更改表的选项。 当前支持修改以下表选项: - `ttl`: 表数据的保留时间。 -- `compaction.twcs.time_window`: TWCS compaction 策略的时间窗口。 +- `compaction.twcs.time_window`: TWCS compaction 策略的时间窗口,其值是一个[时间范围字符段](/reference/time-durations.md)。 - `compaction.twcs.max_output_file_size`: TWCS compaction 策略的最大允许输出文件大小。 - `compaction.twcs.max_active_window_runs`: TWCS compaction 策略的活跃窗口中最多允许的有序组数量。 - `compaction.twcs.max_inactive_window_runs`: TWCS compaction 策略的非活跃窗口中最多允许的有序组数量。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/reference/sql/create.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/reference/sql/create.md index 225c334d6..7e907dadc 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/reference/sql/create.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/reference/sql/create.md @@ -128,17 +128,7 @@ CREATE TABLE IF NOT EXISTS temperatures( ``` `ttl` 值是一个字符串,支持以下类型的值: -- 一个时间范围字符串,如 `1hour 12min 5s`,时间范围对象是时间段的连接。每个时间段由一个整数和一个后缀组成。支持的后缀有: - - `nsec`, `ns` – 纳秒(nanoseconds) - - `usec`, `us` – 微秒(microseconds) - - `msec`, `ms` – 毫秒(milliseconds) - - `seconds`, `second`, `sec`, `s` - 秒 - - `minutes`, `minute`, `min`, `m` - 分钟 - - `hours`, `hour`, `hr`, `h` - 小时 - - `days`, `day`, `d` - 天 - - `weeks`, `week`, `w` - 周 - - `months`, `month`, `M` – 月,定义为 30.44 天 - - `years`, `year`, `y` – 年,定义为 365.25 天 +- [时间范围字符串](/reference/time-durations.md),如 `1hour 12min 5s`。 - `forever`, `NULL`, `0s` (或任何长度为 0 的时间范围,如 `0d`)或空字符串 `''`,表示数据永远不会被删除。 - `instant`, 注意数据库的 TTL 不能设置为 `instant`。`instant` 表示数据在插入时立即删除,如果你想将输入发送到流任务而不保存它,可以使用 `instant`,请参阅[流管理文档](/user-guide/flow-computation/manage-flow.md#manage-flows)了解更多细节。 - 未设置,可以使用 `ALTER TABLE UNSET 'ttl'` 来取消表的 `ttl` 设置,这样表将继承数据库的 `ttl` 策略(如果有的话)。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/reference/time-durations.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/reference/time-durations.md new file mode 100644 index 000000000..ec32d8ad7 --- /dev/null +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/reference/time-durations.md @@ -0,0 +1,46 @@ +--- +keywords: [时间范围, 时间跨度, 时间单位] +description: 了解 GreptimeDB 中时间范围对象的表示方法,包括支持的时间单位和示例。 +--- + +# 时间范围对象 + +GreptimeDB 使用时间范围对象来表示各种上下文中的时间跨度, +包括 SQL 查询、配置文件和 API 请求。 +时间持续时间表示为由连接的时间跨度组成的字符串, +每个时间跨度由一个十进制数字序列和一个单位后缀表示。 +这些后缀不区分大小写,并且支持单数和复数形式。例如,`1hour 12min 5s`。 + +每个时间跨度由一个整数和一个后缀组成。支持的后缀有: + +- `nsec`, `ns`: 纳秒 +- `usec`, `us`: 微秒 +- `msec`, `ms`: 毫秒 +- `seconds`, `second`, `sec`, `s`: 秒 +- `minutes`, `minute`, `min`, `m`: 分钟 +- `hours`, `hour`, `hr`, `h`: 小时 +- `days`, `day`, `d`: 天 +- `weeks`, `week`, `w`: 周 +- `months`, `month`, `M`: 定义为 30.44 天 +- `years`, `year`, `y`: 定义为 365.25 天 + +在十进制整数后附加上述单位之一,表示等值的秒数。 +例如: + +- `1s`: 等效于 1 秒 +- `2m`: 等效于 120 秒 +- `1ms`: 等效于 0.001 秒 +- `2h`: 等效于 7200 秒 + +以下写法无效: + +- `0xABm`: 不支持十六进制数字 +- `1.5h`: 不支持浮点数 +- `+Infd`: 不支持 `±Inf` 或 `NaN` 值 + +以下是一些有效的时间范围示例: + +- `1h`: 一小时 +- `1h30m`, `1h 30m`: 一小时三十分钟 +- `1h30m10s`, `1h 30m 10s`: 一小时三十分钟十秒 + diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/administration/performance-tuning-tips.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/administration/performance-tuning-tips.md index f1ac45d48..409e555fa 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/administration/performance-tuning-tips.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/administration/performance-tuning-tips.md @@ -46,7 +46,10 @@ region = "your-region" cache_capacity = "10G" ``` -写入缓存起到 write-through 缓存的作用,在将文件上传到对象存储之前,会先将它们存储在本地磁盘上。这可以减少第一次查询的延迟。以下示例展示了如何启用写入缓存。 +写入缓存起到 write-through 缓存的作用,在将文件上传到对象存储之前,会先将它们存储在本地磁盘上。这可以减少第一次查询的延迟。 + + +以下示例展示了在 `v0.12` 版本之前如何启用写入缓存。 - `enable_experimental_write_cache` 开关可用来启用写入缓存。从 `v0.11` 版本开始,当配置对象存储服务的时候,该值将默认设置为 `true`,即启用。 - `experimental_write_cache_size` 用来设置缓存的容量。从 0.11 版本开始,默认初始值为 `5GiB`。 - `experimental_write_cache_path` 用来设置存储缓存文件的路径。默认情况下它位于数据主目录下。 @@ -70,6 +73,29 @@ experimental_write_cache_ttl = "8h" 以下是一个例子: +```toml +[[region_engine]] +[region_engine.mito] +# 写入缓存的缓存大小。此缓存的 `type` 标签值为 `file`。 +write_cache_size = "10G" +# SST 元数据的缓存大小。此缓存的 `type` 标签值为 `sst_meta`。 +sst_meta_cache_size = "128MB" +# 向量和箭头数组的缓存大小。此缓存的 `type` 标签值为 `vector`。 +vector_cache_size = "512MB" +# SST 行组页面的缓存大小。此缓存的 `type` 标签值为 `page`。 +page_cache_size = "512MB" +# 时间序列查询结果(例如 `last_value()`)的缓存大小。此缓存的 `type` 标签值为 `selector_result`。 +selector_result_cache_size = "512MB" + +[region_engine.mito.index] +## 索引暂存目录的最大容量。 +staging_size = "10GB" +``` + + + +对于 `v0.12` 之前的版本 + ```toml [[region_engine]] [region_engine.mito] @@ -92,7 +118,7 @@ staging_size = "10GB" ``` 一些建议: -- 至少将 `experimental_write_cache_size` 设置为磁盘空间的 1/10 +- 至少将写入缓存设置为磁盘空间的 1/10 - 如果数据库内存使用率低于 20%,则可以至少将 `page_cache_size` 设置为总内存大小的 1/4 - 如果缓存命中率低于 50%,则可以将缓存大小翻倍 - 如果使用全文索引,至少将 `staging_size` 设置为磁盘空间的 1/10 diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/concepts/key-concepts.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/concepts/key-concepts.md index 1ccdea3b5..d0b9e403b 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/concepts/key-concepts.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/concepts/key-concepts.md @@ -39,7 +39,7 @@ GreptimeDB 中的数据是强类型的,当创建表时,Auto-schema 功能提 ## 索引 索引是一种性能调优方法,可以加快数据的更快地检索速度。 -GreptimeDB 使用[倒排索引](/contributor-guide/datanode/data-persistence-indexing.md#倒排索引)来加速查询。 +GreptimeDB 提供多种类型的[索引](/user-guide/manage-data/data-index.md)来加速查询。 ## View diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/concepts/why-greptimedb.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/concepts/why-greptimedb.md index 5de69645f..a7d68ace4 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/concepts/why-greptimedb.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/concepts/why-greptimedb.md @@ -111,116 +111,3 @@ GreptimeDB 提供了一种称为时序表的新数据模型(见下图),以 然而,我们对 schema 的定义不是强制性的,而是更倾向于 MongoDB 等数据库的无 schema 方法。 表将在数据写入时动态自动创建,并自动添加新出现的列(Tag 和 Field)。 ---- -keywords: [时序数据库, 云原生, 分布式, 高性能, 用户友好, 存算分离, PromQL, SQL, Python] -description: 介绍 GreptimeDB 的特点、设计原则和优势,包括统一指标、日志和事件,云原生设计,高性能和用户友好等。 ---- - -# 为什么选择 GreptimeDB - -GreptimeDB 是一个云原生、分布式和开源的时序数据库,旨在处理、存储和分析大量的指标、日志和事件数据(计划中还包括 Trace)。 -它在处理涉及时序和实时分析的混合处理工作负载方面非常高效,同时为开发者提供了极佳的体验。 - -可以阅读博客文章《[This Time, for Real](https://greptime.com/blogs/2022-11-15-this-time-for-real)》和《[Unifying Logs and Metrics](https://greptime.com/blogs/2024-06-25-logs-and-metrics)》了解我们开发 GreptimeDB 的动机。 -在这些文章中,我们深入探讨了 Greptime 高性能背后的原因以及一些突出的功能。 - -## 统一的指标、日志和事件 - -通过[时序表](./data-model.md)的模型设计、对 SQL 的原生支持以及存算分离架构带来的混合工作负载, -GreptimeDB 可以同时处理指标、日志和事件, -增强不同时间序列数据之间的关联分析, -并简化架构、部署成本和 API。 - -阅读 [SQL 示例](/user-guide/overview.md#sql-query-example) 了解详细信息。 - -## 可用性、可扩展性和弹性 - -从第一天起,GreptimeDB 就按照云原生数据库的原则设计,这意味着它能够充分利用云的优势。其一些好处包括: - -1. 高可用的按需计算资源,目标是实现 99.999% 的可用性和正常运行时间,即每年大约只有五分钟十五秒的停机时间。 -2. 弹性和可扩展性,允许开发者根据使用情况轻松扩展或缩减、添加或移动资源。 -3. 高可靠性和容错性以防止数据丢失。系统的目标是实现 99.9999% 的可用性率。 - -这些功能共同确保 GreptimeDB 始终提供最佳的性能。 -下面是关于如何实现这些功能的额外技术解释。 - -### 可弹性扩展的资源隔离 - -![存储/计算分离,计算/计算分离](/storage-compute-disaggregation-compute-compute-separation.png) - -存储和计算资源是分离的,允许每个资源独立扩展、消耗和定价。 -这不仅大大提高了计算资源的利用率,还适配了“按需付费”的定价模式,避免了资源未充分利用的浪费。 - -除了存储和计算隔离,不同的计算资源也被隔离,避免了因数据写入、实时查询以及数据压缩或降采样等任务产生的资源竞争, -从而实现高效率的大规模实时分析。 - -数据可以在多个应用程序之间共享而无需争用同一资源池, -这不仅大大提高了效率, -还可以根据需求提供无限的可扩展性。 - -### 灵活的架构支持多种部署策略 - -![GreptimeDB 的架构](/architecture-2.png) - -通过灵活的架构设计原则,不同的模块和组件可以通过模块化和分层设计独立切换、组合或分离。 -例如,我们可以将 Frontend、Datanode 和 Metasrc 模块合并为一个独立的二进制文件,也可以为每个表独立启用或禁用 WAL。 - -灵活的架构允许 GreptimeDB 满足从边缘到云的各种场景中的部署和使用需求,同时仍然使用同一套 API 和控制面板。 -通过良好抽象的分层和封装隔离,GreptimeDB 的部署形式支持从嵌入式、独立、传统集群到云原生的各种环境。 - -## 优异的成本效益 - -GreptimeDB 利用流行的对象存储解决方案来存储大量的时序数据,例如 AWS S3 和 Azure Blob Storage,允许开发者只为使用的存储资源付费。 - -GreptimeDB 使用自适应压缩算法,根据数据的类型和基数来减少数据量,以满足时间和空间复杂性约束。 -例如,对于字符串数据类型,当块的基数超过某个阈值时,GreptimeDB 使用字典压缩; -对于浮点数,GreptimeDB 采用 Chimp 算法,该算法通过分析实际的时间序列数据集来增强 Gorilla(Facebook 的内存 TSDB)的算法, -并提供比传统算法(如 zstd、Snappy 等)更高的压缩率和空间效率。 - -## 高性能 - -在性能优化方面,GreptimeDB 利用 LSM 树、数据分片和基于 Quorum 的 WAL 设计等不同技术来处理大量的时序数据写入时的工作负载。 - -GreptimeDB 的查询引擎强大且快速,得益于矢量化执行和分布式并行处理,并结合了索引功能。 -为了提升数据修剪和过滤效率,GreptimeDB 构建了智能索引和大规模并行处理(MPP)架构。 -该架构使用独立的索引文件记录统计信息,类似于 Apache Parquet 的行组元数据,同时还使用了内置指标记录不同查询的工作负载。 -通过结合基于成本的优化(CBO)和开发者定义的提示,GreptimeDB 能够启发式地构建智能索引,从而进一步提升查询性能。 - -## 易于使用 - -### 易于部署和维护 - -为了简化部署和维护过程,GreptimeDB 提供了 [K8s operator](https://github.com/GreptimeTeam/greptimedb-operator)、[命令行工具](https://github.com/GreptimeTeam/gtctl)、嵌入式[仪表盘](https://github.com/GreptimeTeam/dashboard)和其他有用的工具, -使得开发者可以轻松配置和管理数据库。 -请访问我们官网的 [GreptimeCloud](https://greptime.com) 了解更多信息。 - -### 易于集成 - -GreptimeDB 支持多种数据库连接协议,包括 MySQL、PostgreSQL、InfluxDB、OpenTSDB、Prometheus Remote Storage 和高性能 gRPC。 -此外,还提供了多种编程语言的 SDK,如 Java、Go、Erlang 等。 -我们还在不断与生态系统中的其他开源软件进行集成和连接,以增强开发者体验。 -接下来将详细介绍三种流行的语言:PromQL、SQL 和 Python。 - -PromQL 是一种流行的查询语言, -允许开发者选择和聚合由 Prometheus 提供的实时时序数据。 -它比 SQL 更简单,适用于使用 Grafana 进行可视化和创建告警规则。 -GreptimeDB 原生支持 PromQL,查询引擎会将其转换为查询计划,对其进行优化和执行。 - -SQL 是一种高效的工具, -用于分析跨越较长时间跨度或涉及多个表(如 join)的数据。 -此外,它在数据库管理方面也非常方便。 - -Python 在数据科学家和 AI 专家中非常流行。 -GreptimeDB 允许直接在数据库中运行 Python 脚本。 -开发者可以编写 UDF 和 DataFrame API,通过嵌入 Python 解释器来加速数据处理。 - -### 简单的数据模型与自动创建表 - -结合指标(Tag/Field/Timestamp)模型和关系数据模型(Table), -GreptimeDB 提供了一种称为时序表的新数据模型(见下图),以表格形式呈现数据,由行和列组成,指标的 Tag 和 Value 映射到列,并强制时间索引约束表示时间戳。 - -![时序表](/time-series-table.png) - -然而,我们对 schema 的定义不是强制性的,而是更倾向于 MongoDB 等数据库的无 schema 方法。 -表将在数据写入时动态自动创建,并自动添加新出现的列(Tag 和 Field)。 - diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/deployments/configuration.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/deployments/configuration.md index 1b76894d2..af8350c02 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/deployments/configuration.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/deployments/configuration.md @@ -305,9 +305,7 @@ cache_capacity = "10GiB" ``` -我们建议你不用设置缓存的目录,因为数据库会自动创建该目录。默认的缓存目录位于 -- `{data_home}`(`v0.11.2` 之后) -- `{data_home}/object_cache`(`v0.11.2` 之前) +我们建议你不用设置缓存的目录,因为数据库会自动创建该目录。默认的缓存目录位于 `{data_home}` 目录下。 对于 v0.11 之前的版本,你需要通过在存储设置中配置 `cache_path` 来手动启用读取缓存: @@ -326,6 +324,17 @@ cache_capacity = "5GiB" `cache_path` 指定存储缓存文件的本地目录,而 `cache_capacity` 则决定缓存目录中允许的最大文件总大小(以字节为单位)。你可以通过将 `cache_path` 设置为空字符串来禁用读取缓存。 + +自 `v0.12` 之后,写入缓存不再是实验性的功能。你可以通过修改 mito 的配置调整缓存的大小 + +```toml +[[region_engine]] +[region_engine.mito] + +write_cache_size = "10GiB" +``` + + 对于 v0.11 之前版本的写入缓存,你需要在 `[region_engine.mito]` 部分将 `enable_experimental_write_cache` 设置为 `true` 来启用: ```toml @@ -337,11 +346,7 @@ experimental_write_cache_path = "/var/data/s3_write_cache" experimental_write_cache_size = "5GiB" ``` -`experimental_write_cache_path` 的默认值是 -- `{data_home}`(`v0.11.2` 之后) -- `{data_home}/object_cache/write`(`v0.11.2` 之前) - - +`experimental_write_cache_path` 默认值位于 `{data_home}` 目录下。 要禁用写入缓存,请将 `enable_experimental_write_cache` 设置为 `false`。 更详细的信息请参阅[性能调优技巧](/user-guide/administration/performance-tuning-tips)。 @@ -526,7 +531,7 @@ fork_dictionary_bytes = "1GiB" | `inverted_index.intermediate_path` | 字符串 | `""` | 存放外排临时文件的路径 (默认 `{data_home}/index_intermediate`). | | `inverted_index.metadata_cache_size` | 字符串 | `64MiB` | 倒排索引元数据缓存大小 | | `inverted_index.content_cache_size` | 字符串 | `128MiB` | 倒排索引文件内容缓存大小 | -| `inverted_index.content_cache_page_size` | 字符串 | `8MiB` | 倒排索引文件内容缓存页大小。倒排索引文件内容以页为单位进行读取和缓存,该配置项用于调整读取和缓存的粒度,优化缓存命中率。 | +| `inverted_index.content_cache_page_size` | 字符串 | `64KiB` | 倒排索引文件内容缓存页大小。倒排索引文件内容以页为单位进行读取和缓存,该配置项用于调整读取和缓存的粒度,优化缓存命中率。 | | `memtable.type` | 字符串 | `time_series` | Memtable type.
- `time_series`: time-series memtable
- `partition_tree`: partition tree memtable (实验性功能) | | `memtable.index_max_keys_per_shard` | 整数 | `8192` | 一个 shard 内的主键数
只对 `partition_tree` memtable 生效 | | `memtable.data_freeze_threshold` | 整数 | `32768` | 一个 shard 内写缓存可容纳的最大行数
只对 `partition_tree` memtable 生效 | diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/common-helm-chart-configurations.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/common-helm-chart-configurations.md new file mode 100644 index 000000000..9656601b6 --- /dev/null +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/common-helm-chart-configurations.md @@ -0,0 +1,313 @@ +--- +keywords: [Kubernetes, 部署, Helm Chart, 配置] +description: 常见 Helm Chart 配置项 +--- + +# 常见 Helm Chart 配置项 + +对于每一个 Helm Chart,你都可以通过创建 `values.yaml` 来进行配置。当你需要应用配置时,你可以通过 `helm upgrade` 命令来应用配置: + +``` +helm upgrade --install ${release-name} ${chart-name} --namespace ${namespace} -f values.yaml +``` + +## GreptimeDB Cluster Chart + +完整的配置项可参考 [GreptimeDB Cluster Chart](https://github.com/GreptimeTeam/helm-charts/tree/main/charts/greptimedb-cluster/README.md)。 + +### GreptimeDB 运行镜像配置 + +顶层变量 `image` 用于配置集群全局的运行镜像,如下所示: + +```yaml +image: + # -- The image registry + registry: greptime-registry.cn-hangzhou.cr.aliyuncs.com + # -- The image repository + repository: greptime/greptimedb + # -- The image tag + tag: "v0.11.0" + # -- The image pull secrets + pullSecrets: [] +``` + +如果你想为集群中的每个 Role 配置不同的镜像,可以使用 `${role}.podTemplate.main.image` 字段(其中 `role` 可以是 `meta`、`frontend`、`datanode` 和 `flownode`),该字段会**覆盖顶层**变量 `image` 的配置,如下所示: + +```yaml +image: + # -- The image registry + registry: greptime-registry.cn-hangzhou.cr.aliyuncs.com + # -- The image repository + repository: greptime/greptimedb + # -- The image tag + tag: "v0.11.0" + # -- The image pull secrets + pullSecrets: [] + +frontend: + podTemplate: + main: + image: "greptime-registry.cn-hangzhou.cr.aliyuncs.com/greptime/greptimedb:latest" +``` + +此时 `frontend` 的镜像将会被设置为 `greptime-registry.cn-hangzhou.cr.aliyuncs.com/greptime/greptimedb:latest`,而其他组件的镜像将会使用顶层变量 `image` 的配置。 + +### 服务端口配置 + +你可以使用如下字段来配置服务端口,如下所示: + +- `httpServicePort`: 用于配置 HTTP 服务的端口,默认 4000; +- `grpcServicePort`: 用于配置 SQL 服务的端口,默认 4001; +- `mysqlServicePort`: 用于配置 MySQL 服务的端口,默认 4002; +- `postgresServicePort`: 用于配置 PostgreSQL 服务的端口,默认 4003; + +### Datanode 存储配置 + +你可以通过 `datanode.storage` 字段来配置 Datanode 的存储,如下所示: + +```yaml +datanode: + storage: + # -- Storage class for datanode persistent volume + storageClassName: null + # -- Storage size for datanode persistent volume + storageSize: 10Gi + # -- Storage retain policy for datanode persistent volume + storageRetainPolicy: Retain + # -- The dataHome directory, default is "/data/greptimedb/" + dataHome: "/data/greptimedb" +``` + +- `storageClassName`: 用于配置 StorageClass,默认使用 Kubernetes 当前默认的 StorageClass; +- `storageSize`: 用于配置 Storage 的大小,默认 10Gi。你可以使用常用的容量单位,如 `10Gi`、`10GB` 等; +- `storageRetainPolicy`: 用于配置 Storage 的保留策略,默认 `Retain`,如果设置为 `Delete`,则当集群被删除时,相应的 Storage 也会被删除; +- `dataHome`: 用于配置数据目录,默认 `/data/greptimedb/`; + +### 运行资源配置 + +顶层变量 `base.podTemplate.main.resources` 用于全局配置每个 Role 的资源,如下所示: + +```yaml +base: + podTemplate: + main: + resources: + requests: + memory: "1Gi" + cpu: "1" + limits: + memory: "2Gi" + cpu: "2" +``` + +如果你想为集群中的每个 Role 配置不同的资源,可以使用 `${role}.podTemplate.main.resources` 字段(其中 `role` 可以是 `meta`、`frontend`、`datanode` 等),改字段会**覆盖顶层**变量 `base.podTemplate.main.resources` 的配置,如下所示: + +```yaml +base: + podTemplate: + main: + resources: + requests: + memory: "1Gi" + cpu: "1" + limits: + memory: "2Gi" + cpu: "2" + +frontend: + podTemplate: + main: + resources: + requests: + cpu: "2" + memory: "4Gi" + limits: + cpu: "4" + memory: "8Gi" +``` + +### 服务运行副本数配置 + +对于不同 Role 的副本数,可以通过 `${role}.replicas` 字段进行配置对应的副本数,如下所示: + +```yaml +frontend: + replicas: 3 + +datanode: + replicas: 3 +``` + +你可以通过配置其副本数来实现水平扩缩。 + +### 环境变量配置 + +你既可以通过 `base.podTemplate.main.env` 字段配置全局的环境变量,也可以通过 `${role}.podTemplate.main.env` 字段为每个 Role 配置不同的环境变量,如下所示: + +```yaml +base: + podTemplate: + main: + env: + - name: GLOBAL_ENV + value: "global_value" + +frontend: + podTemplate: + main: + env: + - name: FRONTEND_ENV + value: "frontend_value" +``` + +### 注入配置文件 + +对于不同 Role 的服务,你可以通过 `${role}.configData` 字段注入自定义的 TOML 配置文件,如下所示: + +```yaml +frontend: + configData: | + [[region_engine]] + [region_engine.mito] + # Number of region workers + num_workers = 8 +``` + +你可以通过 [config.md](https://github.com/GreptimeTeam/greptimedb/blob/main/config/config.md) 了解 GreptimeDB 的配置项。 + +除了使用 `${role}.configData` 字段注入配置文件,你还可以通过 `${role}.configFile` 来指定相应的文件,如下所示: + +```yaml +frontend: + configFile: "configs/frontend.toml" +``` + +此时需要确保对应的配置文件路径与执行 `helm upgrade` 命令时所处的目录一致。 + +:::note +用户注入的配置文件默认优先级低于由 GreptimeDB Operator 所接管的配置项,某些配置项仅能通过 GreptimeDB Operator 进行配置,而这些配置项默认会暴露在 `values.yaml` 中。 + +如下默认配置将由 GreptimeDB Operator 管理: + +- Logging 配置; +- Datanode 的 Node ID; +::: + +### 鉴权配置 + +Helm Chart 默认不启用 User Provider 模式的鉴权,你可以通过 `auth.enabled` 字段启用 User Provider 模式的鉴权并配置相应的用户信息,如下所示: + +```yaml +auth: + enabled: true + users: + - name: admin + password: "admin" +``` + +### 日志配置 + +顶层变量 `logging` 用于配置全局日志级别,如下所示: + +```yaml +# -- Global logging configuration +logging: + # -- The log level for greptimedb, only support "debug", "info", "warn", "debug" + level: "info" + + # -- The log format for greptimedb, only support "json" and "text" + format: "text" + + # -- The logs directory for greptimedb + logsDir: "/data/greptimedb/logs" + + # -- Whether to log to stdout only + onlyLogToStdout: false + + # -- indicates whether to persist the log with the datanode data storage. It **ONLY** works for the datanode component. + persistentWithData: false + + # -- The log filters, use the syntax of `target[span\{field=value\}]=level` to filter the logs. + filters: [] + + # -- The slow query log configuration. + slowQuery: + # -- Enable slow query log. + enabled: false + + # -- The threshold of slow query log in seconds. + threshold: "10s" + + # -- Sample ratio of slow query log. + sampleRatio: "1.0" +``` + +其中: + +- `logging.level`: 用于配置全局日志级别,支持 `debug`、`info`、`warn`、`error` 四个级别; +- `logging.format`: 用于配置全局日志格式,支持 `json` 和 `text` 两种格式; +- `logging.logsDir`: 用于配置全局日志目录,默认位于 `/data/greptimedb/logs`; +- `logging.onlyLogToStdout`: 用于配置是否仅输出到标准输出,默认不启用; +- `logging.persistentWithData`: 用于配置是否将日志持久化到数据存储,仅适用于 `datanode` 组件,默认不启用; +- `logging.filters`: 用于配置全局日志过滤器,支持 `target[span\{field=value\}]=level` 的语法,特步地,如果你希望对某些组件启动 `debug` 级别的日志,可以配置如下: + + ```yaml + logging: + level: "info" + format: "json" + filters: + - mito2=debug + ``` + +你还可以通过 `logging.slowQuery` 字段配置来启用慢查询日志,如下所示: + +```yaml +logging: + slowQuery: + enabled: true + threshold: "100ms" + sampleRatio: "1.0" +``` + +其中: + +- `logging.slowQuery.enabled`: 用于配置是否启用慢查询日志,默认不启用; +- `logging.slowQuery.threshold`: 用于配置慢查询日志的阈值; +- `logging.slowQuery.sampleRatio`: 用于配置慢查询日志的采样率,默认 1.0(即全部采样); + +如果配置了输出目录 `logging.logsDir`,则慢查询日志会输出到该目录下。 + +每一个 Role 的日志配置都可以通过 `${role}.logging` 字段进行配置,其字段与顶层 `logging` 一致,并会**覆盖**顶层变量 `logging` 的配置,比如: + +```yaml +frontend: + logging: + level: "debug" +``` + +### 启用 Flownode + +Helm Chart 默认不启用 Flownode,你可以通过 `flownode.enabled` 字段启用 Flownode,如下所示: + +```yaml +flownode: + enabled: true +``` + +`flownode` 的其他字段的配置与其他 Role 的配置一致,比如: + +```yaml +flownode: + enabled: false + replicas: 1 + podTemplate: + main: + resources: + requests: + memory: "1Gi" + cpu: "1" + limits: + memory: "2Gi" + cpu: "2" +``` + diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/monitoring/cluster-monitoring-deployment.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/monitoring/cluster-monitoring-deployment.md new file mode 100644 index 000000000..7922264cd --- /dev/null +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/monitoring/cluster-monitoring-deployment.md @@ -0,0 +1,222 @@ +--- +keywords: [Kubernetes 部署, 集群, 监控] +description: 在 Kubernetes 上部署 GreptimeDB 集群的监控指南,包括自监控和 Prometheus 监控的详细步骤。 +--- + +# 集群监控部署 + +当你使用 GreptimeDB Operator 部署 GreptimeDB 集群后,默认其对应组件(如 Metasrv / Datanode / Frontend)的 HTTP 端口(默认为 `4000`)将会暴露 `/metrics` 端点用于暴露 Prometheus 指标。 + +我们将提供两种方式来监控 GreptimeDB 集群: + +1. **启用 GreptimeDB 自监控**:GreptimeDB Operator 将额外启动一个 GreptimeDB Standalone 实例和 Vector Sidecar 容器,分别用于收集和存储 GreptimeDB 集群的指标和日志数据; +2. **使用 Prometheus Operator 配置 Prometheus 指标监控**:用户需先部署 Prometheus Operator,并创建相应的 Prometheus 实例,然后通过 Prometheus Operator 的 `PodMonitor` 来将 GreptimeDB 集群的 Metrics 数据写入到相应的 Prometheus 中; + +用户可根据自身需求选择合适的监控方式。 + +## 启用 GreptimeDB 自监控 + +自监控模式下 GreptimeDB Operator 将会额外启动一个 GreptimeDB Standalone 实例,用于收集 GreptimeDB 集群的指标和日志数据,其中日志数据将包括集群日志和慢查询日志。为了收集日志数据,GreptimeDB Operator 会在每一个 Pod 中启动一个 [Vector](https://vector.dev/) 的 Sidecar 容器,用于收集 Pod 的日志数据。启用该模式后,集群将自动开启 JSON 格式的日志输出。 + +如果你使用 Helm Chart 部署 GreptimeDB 集群(可参考[立即开始](../getting-started.md)),可对 Helm Chart 的 `values.yaml` 文件进行如下配置: + +```yaml +monitoring: + enabled: true +``` + +此时 Helm Chart 将会部署一个名为 `${cluster}-monitoring` 的 GreptimeDB Standalone 实例,用于收集 GreptimeDB 集群的指标和日志数据,你可以用如下命令进行查看: + +``` +kubectl get greptimedbstandalones.greptime.io ${cluster}-monitoring -n ${namespace} +``` + +默认该 GreptimeDB Standalone 实例会将监控数据使用 Kubernetes 当前默认的 StorageClass 将数据保存于本地存储,你可以根据实际情况进行调整。 + +GreptimeDB Standalone 实例的配置可以通过 Helm Chart 的 `values.yaml` 中的 `monitoring.standalone` 字段进行调整,如下例子所示: + +```yaml +monitoring: + enabled: true + standalone: + base: + main: + # 用于配置 GreptimeDB Standalone 实例的镜像 + image: "greptime-registry.cn-hangzhou.cr.aliyuncs.com/greptime/greptimedb:latest" + + # 用于配置 GreptimeDB Standalone 实例的资源配置 + resources: + requests: + cpu: "2" + memory: "4Gi" + limits: + cpu: "2" + memory: "4Gi" + + # 用于配置 GreptimeDB Standalone 实例的对象存储 + objectStorage: + s3: + # 用于配置 GreptimeDB Standalone 实例的对象存储的 bucket + bucket: "monitoring" + # 用于配置 GreptimeDB Standalone 实例的对象存储的 region + region: "ap-southeast-1" + # 用于配置 GreptimeDB Standalone 实例的对象存储的 secretName + secretName: "s3-credentials" + # 用于配置 GreptimeDB Standalone 实例的对象存储的 root + root: "standalone-with-s3-data" +``` + +GreptimeDB Standalone 实例将会使用 `${cluster}-monitoring-standalone` 作为 Kubernetes Service 的名称来暴露相应的服务,你可以使用如下地址来用于监控数据的读取: + +- **Prometheus 协议的指标监控**:`http://${cluster}-monitor-standalone.${namespace}.svc.cluster.local:4000/v1/prometheus`。 +- **SQL 协议的日志监控**:`${cluster}-monitor-standalone.${namespace}.svc.cluster.local:4002`。默认集群日志会存储于 `public._gt_logs` 表,而将慢查询日志存储于 `public._gt_slow_queries` 表。 + +GreptimeDB 自监控模式将使用 Vector Sidecar 来收集日志数据,你可以通过 `monitoring.vector` 字段来配置 Vector 的配置,如下所示: + +```yaml +monitoring: + enabled: true + vector: + # 用于配置 Vector 的镜像仓库 + registry: greptime-registry.cn-hangzhou.cr.aliyuncs.com + # 用于配置 Vector 的镜像仓库 + repository: timberio/vector + # 用于配置 Vector 的镜像标签 + tag: nightly-alpine + + # 用于配置 Vector 的资源配置 + resources: + requests: + cpu: "50m" + memory: "64Mi" + limits: + cpu: "50m" + memory: "64Mi" +``` + +:::note +如果你没有使用 Helm Chart 进行部署,你也可以通过如下 `GreptimeDBCluster` 的 YAML 来手动配置自监控模式,如下所示: + +```yaml +apiVersion: greptime.io/v1alpha1 +kind: GreptimeDBCluster +metadata: + name: basic +spec: + base: + main: + image: greptime/greptimedb:latest + frontend: + replicas: 1 + meta: + replicas: 1 + etcdEndpoints: + - "etcd.etcd-cluster.svc.cluster.local:2379" + datanode: + replicas: 1 + monitoring: + enabled: true +``` + +其中 `monitoring` 字段用于配置自监控模式,具体可参考 [`GreptimeDBCluster` API 文档](https://github.com/GreptimeTeam/greptimedb-operator/blob/main/docs/api-references/docs.md#monitoringspec)。 +::: + +## 使用 Prometheus Operator 配置 Prometheus 指标监控 + +用户需先部署 Prometheus Operator 并创建相应的 Prometheus 实例,例如可以使用 [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) 来部署相应的 Prometheus 技术栈,具体过程可参考其对应的[官方文档](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack)。 + +当部署完 Prometheus Operator 和 Prometheus 实例后,用户可通过 Helm Chart 的 `values.yaml` 的 `prometheusMonitor` 字段来配置 Prometheus 监控,如下所示: + +```yaml +prometheusMonitor: + # 用于配置是否启用 Prometheus 监控,此时 GreptimeDB Operator 将会自动创建 Prometheus Operator 的 `PodMonitor` 资源 + enabled: true + # 用于配置 Prometheus 监控的抓取间隔 + interval: "30s" + # 用于配置 Prometheus 监控的标签 + labels: + release: prometheus +``` + +:::note +`labels` 字段需要与相应用于创建 Prometheus 实例的 `matchLabels` 字段保持一致,否则将无法正常抓取到 GreptimeDB 集群的 Metrics 数据。 +::: + +当我们配置完 `prometheusMonitor` 字段后,GreptimeDB Operator 将会自动创建 Prometheus Operator 的 `PodMonitor` 资源,并将 GreptimeDB 集群的 Metrics 数据导入到 Prometheus 中,比如我们可以用如下命令来查看创建的 `PodMonitor` 资源: + +``` +kubectl get podmonitors.monitoring.coreos.com -n ${namespace} +``` + +:::note +如果你没有使用 Helm Chart 进行部署,你也可以通过如下 `GreptimeDBCluster` 的 YAML 来手动配置 Prometheus 监控,如下所示: + +```yaml +apiVersion: greptime.io/v1alpha1 +kind: GreptimeDBCluster +metadata: + name: basic +spec: + base: + main: + image: greptime/greptimedb:latest + frontend: + replicas: 1 + meta: + replicas: 1 + etcdEndpoints: + - "etcd.etcd-cluster.svc.cluster.local:2379" + datanode: + replicas: 1 + prometheusMonitor: + enabled: true + interval: "30s" + labels: + release: prometheus +``` + +其中 `prometheusMonitor` 字段用于配置 Prometheus 监控。 +::: + +## 导入 Grafana Dashboard + +目前 GreptimeDB 集群可使用如下 3 个 Grafana Dashboard 来配置监控面板: + +- [集群指标 Dashboard](https://github.com/GreptimeTeam/greptimedb/blob/main/grafana/greptimedb-cluster.json) +- [集群日志 Dashboard](https://github.com/GreptimeTeam/helm-charts/blob/main/charts/greptimedb-cluster/dashboards/greptimedb-cluster-logs.json) +- [慢查询日志 Dashboard](https://github.com/GreptimeTeam/helm-charts/blob/main/charts/greptimedb-cluster/dashboards/greptimedb-cluster-slow-queries.json) + + +:::note +其中 **集群日志 Dashboard** 和 **慢查询日志 Dashboard** 仅适用于自监控模式,而 **集群指标 Dashboard** 则适用于自监控模式和 Prometheus 监控模式。 +::: + +如果你使用 Helm Chart 部署 GreptimeDB 集群,你可以通过启用 `grafana.enabled` 来一键部署 Grafana 实例,并导入相应的 Dashboard(可参考[立即开始](../getting-started.md)),如下所示: + +```yaml +grafana: + enabled: true +``` + +如果你是已经部署了 Grafana 实例,你可以参考如下步骤来导入相应的 Dashboard: + +1. **添加相应的 Data Sources** + + 你可以参考 Grafana 官方文档的 [datasources](https://grafana.com/docs/grafana/latest/datasources/) 来添加如下 3 个数据源: + + - **`metrics` 数据源** + + 用于导入集群的 Prometheus 监控数据,适用于自监控模式和 Prometheus 监控模式。如上文所述,当使用自监控模式时,此时可使使用 `http://${cluster}-monitor-standalone.${namespace}.svc.cluster.local:4000/v1/prometheus` 作为数据源的 URL。如果使用 Prometheus 监控模式,用户可根据具体 Prometheus 实例的 URL 来配置数据源。 + + - **`information-schema` 数据源** + + 这部分数据源用于使用 SQL 协议导入集群内部的元数据信息,适用于自监控模式和 Prometheus 监控模式。此时我们可以用 `${cluster}-frontend.${namespace}.svc.cluster.local:4002` 作为 SQL 协议的地址,并使用 `information_schema` 作为数据库名称进行连接。 + + - **`logs` 数据源** + + 这部分数据源用于使用 SQL 协议导入集群的日志和慢查询日志,**仅适用于自监控模式**。此时我们可以用 `${cluster}-monitor-standalone.${namespace}.svc.cluster.local:4002` 作为 SQL 协议的地址,并使用 `public` 作为数据库名称进行连接。 + + +2. **导入相应的 Dashboard** + + 你可以参考 Grafana 官方文档的 [Import dashboards](https://grafana.com/docs/grafana/latest/dashboards/build-dashboards/import-dashboards/) 来导入相应的 Dashboard。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/overview.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/overview.md index 59d2dea5f..47cd1a66c 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/overview.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/overview.md @@ -5,16 +5,24 @@ description: 在 Kubernetes 上部署 GreptimeDB 的概述,介绍了 GreptimeD # 概述 -## GreptimeDB Operator +## GreptimeDB on Kubernetes -[GreptimeDB Operator](https://github.com/GrepTimeTeam/greptimedb-operator) 采用 [Operator 模式](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/)运行于 Kubernetes 中,用于自动化管理 GreptimeDB 资源。基于 GreptimeDB Operator,你可以很轻松地部署、升级和管理 GreptimeDB 集群和单机实例。无论是私有还是公有云部署,GreptimeDB Operator 都将快速部署和扩容 GreptimeDB 变得简单易行。 +GreptimeDB 是专为云原生环境而设计的时序数据库,自诞生以来就支持在 Kubernetes 上部署。我们提供了一个 [GreptimeDB Operator](https://github.com/GrepTimeTeam/greptimedb-operator) 来管理 GreptimeDB 在 Kubernetes 上的部署、配置和扩容。基于 GreptimeDB Operator,你可以很轻松地部署、升级和管理 GreptimeDB 集群和单机实例。无论是私有还是公有云部署,GreptimeDB Operator 都将快速部署和扩容 GreptimeDB 变得简单易行。 我们**强烈建议**使用 GreptimeDB Operator 在 Kubernetes 上部署 GreptimeDB。 -## 使用 GreptimeDB Operator 来管理 GreptimeDB +## 立即开始 你可以将 [立即开始](./getting-started.md) 作为你的第一篇指南以了解整体情况。在该指南中,我们提供了用于在 Kubernetes 上部署 GreptimeDB 集群的完整过程。 -在完成**立即开始**章节后,你可以参考以下文档以获取有关生产部署的更多详细信息。 +## GreptimeDB Operator - [GreptimeDB Operator 管理](./greptimedb-operator-management.md) + +## 监控 + +- [集群监控部署](./monitoring/cluster-monitoring-deployment.md) + +## 配置 + +- [常见 Helm Chart 配置项](./common-helm-chart-configurations.md) diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/go.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/go.md index d336cdc24..803e45f43 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/go.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/go.md @@ -34,6 +34,7 @@ go get -u github.com/GreptimeTeam/greptimedb-ingester-go@VAR::goSdkVersion ```go import ( greptime "github.com/GreptimeTeam/greptimedb-ingester-go" + ingesterContext "github.com/GreptimeTeam/greptimedb-ingester-go/context" "github.com/GreptimeTeam/greptimedb-ingester-go/table" "github.com/GreptimeTeam/greptimedb-ingester-go/table/types" ) @@ -59,6 +60,31 @@ cli, _ := greptime.NewClient(cfg) ``` +
+ +你可以使用 `ingesterContext` 设置表选项。 +例如设置 `ttl` 选项: + +```go +hints := []*ingesterContext.Hint{ + { + Key: "ttl", + Value: "3d", + }, +} + +ctx, cancel := context.WithTimeout(context.Background(), time.Second*3) +ctx = ingesterContext.New(ctx, ingesterContext.WithHints(hints)) +// 使用 ingesterContext写入数据到 GreptimeDB +// `data` 对象在之后的章节中描述 +resp, err := c.client.Write(ctx, data) +if err != nil { + return err +} +``` + +
+
```go diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/java.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/java.md index 1c5285931..7a538124e 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/java.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/java.md @@ -80,6 +80,21 @@ GreptimeDB client = GreptimeDB.create(opts);
+
+ +你可以使用 `Context` 设置表选项。 +例如,使用以下代码设置 `ttl` 选项: + +```java +Context ctx = Context.newDefault(); +ctx.withHint("ttl", "3d"); +// 使用 ctx 对象写入数据 +// `cpuMetric` 和 `memMetric` 是定义的数据对象,之后的章节中有详细描述 +CompletableFuture> future = greptimeDB.write(Arrays.asList(cpuMetric, memMetric), WriteOp.Insert, ctx); +``` + +
+
```java diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/template.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/template.md index 41589bb65..8c06f02cf 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/template.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/template.md @@ -26,6 +26,18 @@ GreptimeDB 提供了用于高吞吐量数据写入的 ingester 库。 表中的每条行数据包含三种类型的列:`Tag`、`Timestamp` 和 `Field`。更多信息请参考 [数据模型](/user-guide/concepts/data-model.md)。 列值的类型可以是 `String`、`Float`、`Int`、`JSON`, `Timestamp` 等。更多信息请参考 [数据类型](/reference/sql/data-types.md)。 +## 设置表选项 + +虽然在通过 SDK 向 GreptimeDB 写入数据时会自动创建时间序列表,但你仍然可以配置表选项。 +SDK 支持以下表选项: + +- `auto_create_table`:默认值为 `True`。如果设置为 `False`,则表示表已经存在且不需要自动创建,这可以提高写入性能。 +- `ttl`、`append_mode`、`merge_mode`:更多详情请参考[表选项](/reference/sql/create.md#table-options)。 + + + +关于如何向 GreptimeDB 写入数据,请参考以下各节。 + ## 低层级 API GreptimeDB 的低层级 API 通过向具有预定义模式的 `table` 对象添加 `row` 来写入数据。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-iot/kafka.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-iot/kafka.md new file mode 100644 index 000000000..bde191ee7 --- /dev/null +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-iot/kafka.md @@ -0,0 +1,9 @@ +--- +keywords: [Kafka, 数据写入] +description: 将数据从 Kafka 写入到 GreptimeDB. +--- + +# Kafka + +请参考 [Kafka 文档](/user-guide/ingest-data/for-observerbility/kafka.md)了解如何将数据从 Kafka 写入到 GreptimeDB。 + diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/alloy.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/alloy.md new file mode 100644 index 000000000..7b33a5999 --- /dev/null +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/alloy.md @@ -0,0 +1,120 @@ +--- +keywords: [Grafana Alloy, Prometheus Remote Write, OpenTelemetry, 数据管道] +description: 绍了如何将 GreptimeDB 配置为 Grafana Alloy 的数据接收端,包括 Prometheus Remote Write 和 OpenTelemetry 的配置示例。通过这些配置,你可以将 GreptimeDB 集成到可观测性数据管道中,实现对指标和日志的高效管理和分析。 +--- + +# Grafana Alloy + +[Grafana Alloy](https://grafana.com/docs/alloy/latest/) 是一个用于 OpenTelemetry (OTel)、Prometheus、Pyroscope、Loki 等其他指标、日志、追踪和分析工具的可观测性数据管道。 +你可以将 GreptimeDB 集成为 Alloy 的数据接收端。 + +## Prometheus Remote Write + +将 GreptimeDB 配置为远程写入目标: + +```hcl +prometheus.remote_write "greptimedb" { + endpoint { + url = "${GREPTIME_SCHEME:=http}://${GREPTIME_HOST:=greptimedb}:${GREPTIME_PORT:=4000}/v1/prometheus/write?db=${GREPTIME_DB:=public}" + + basic_auth { + username = "${GREPTIME_USERNAME}" + password = "${GREPTIME_PASSWORD}" + } + } +} +``` + +- `GREPTIME_HOST`: GreptimeDB 主机地址,例如 `localhost`。 +- `GREPTIME_DB`: GreptimeDB 数据库名称,默认是 `public`。 +- `GREPTIME_USERNAME` 和 `GREPTIME_PASSWORD`: GreptimeDB的[鉴权认证信息](/user-guide/deployments/authentication/static.md)。 + +有关从 Prometheus 到 GreptimeDB 的数据模型转换的详细信息,请参阅 Prometheus Remote Write 指南中的[数据模型](/user-guide/ingest-data/for-observerbility/prometheus.md#数据模型)部分。 + +## OpenTelemetry + +GreptimeDB 也可以配置为 OpenTelemetry Collector 的目标。 + +### 指标 + +```hcl +otelcol.exporter.otlphttp "greptimedb" { + client { + endpoint = "${GREPTIME_SCHEME:=http}://${GREPTIME_HOST:=greptimedb}:${GREPTIME_PORT:=4000}/v1/otlp/" + headers = { + "X-Greptime-DB-Name" = "${GREPTIME_DB:=public}", + } + auth = otelcol.auth.basic.credentials.handler + } +} + +otelcol.auth.basic "credentials" { + username = "${GREPTIME_USERNAME}" + password = "${GREPTIME_PASSWORD}" +} +``` + +- `GREPTIME_HOST`: GreptimeDB 主机地址,例如 `localhost`。 +- `GREPTIME_DB`: GreptimeDB 数据库名称,默认是 `public`。 +- `GREPTIME_USERNAME` 和 `GREPTIME_PASSWORD`: GreptimeDB 的[鉴权认证信息](/user-guide/deployments/authentication/static.md)。 + +有关从 OpenTelemetry 到 GreptimeDB 的指标数据模型转换的详细信息,请参阅 OpenTelemetry 指南中的[数据模型](/user-guide/ingest-data/for-observerbility/opentelemetry.md#数据模型)部分。 + +### 日志 + +以下示例设置了一个使用 Loki 和 OpenTelemetry Collector (otelcol) 的日志管道,将日志转发到 GreptimeDB: + +```hcl +loki.source.file "greptime" { + targets = [ + {__path__ = "/tmp/foo.txt"}, + ] + forward_to = [otelcol.receiver.loki.greptime.receiver] +} + +otelcol.receiver.loki "greptime" { + output { + logs = [otelcol.exporter.otlphttp.greptimedb_logs.input] + } +} + +otelcol.auth.basic "credentials" { + username = "${GREPTIME_USERNAME}" + password = "${GREPTIME_PASSWORD}" +} + +otelcol.exporter.otlphttp "greptimedb_logs" { + client { + endpoint = "${GREPTIME_SCHEME:=http}://${GREPTIME_HOST:=greptimedb}:${GREPTIME_PORT:=4000}/v1/otlp/" + headers = { + "X-Greptime-DB-Name" = "${GREPTIME_DB:=public}", + "X-Greptime-Log-Table-Name" = "${LOG_TABLE_NAME}", + "X-Greptime-Gog-Extract-Keys" = "${EXTRACT_KEYS}", + } + auth = otelcol.auth.basic.credentials.handler + } +} +``` + +- Loki source 配置 + - `loki.source.file "greptime"` 块定义了 source,用于 Loki 从位于 `/tmp/foo.txt` 的文件中读取日志。 + - `forward_to` 数组指示从该文件读取的日志应转发到 `otelcol.receiver.loki.greptime.receiver`。 +- OpenTelemetry Collector Receiver 配置: + - `otelcol.receiver.loki "greptime"` 在 OpenTelemetry Collector 中设置了一个 receiver,以接收来自 Loki 的日志。 + - `output` 指定接收到的日志应转发到 `otelcol.exporter.otlphttp.greptimedb_logs.input`。 +- OpenTelemetry Collector Exporter 配置: + - `otelcol.exporter.otlphttp "greptimedb_logs"` 块配置了一个 HTTP Exporter,将日志发送到 GreptimeDB。 + - `GREPTIME_HOST`: GreptimeDB 主机地址,例如 `localhost`。 + - `GREPTIME_DB`: GreptimeDB 数据库名称,默认是 `public`。 + - `GREPTIME_USERNAME` 和 `GREPTIME_PASSWORD`: GreptimeDB 的[鉴权认证信息](/user-guide/deployments/authentication/static.md)。 + - `LOG_TABLE_NAME`: 存储日志的表名,默认表名为 `opentelemetry_logs`。 + - `EXTRACT_KEYS`: 从属性中提取对应 key 的值到表的顶级字段,用逗号分隔,例如 `filename,log.file.name,loki.attribute.labels`,详情请看 [HTTP API 文档](opentelemetry.md#otlphttp-api-1)。 + +有关从 OpenTelemetry 到 GreptimeDB 的日志数据模型转换的详细信息,请参阅 OpenTelemetry 指南中的[数据模型](/user-guide/ingest-data/for-observerbility/opentelemetry.md#数据模型-1)部分。 + +:::tip 提示 +上述示例代码可能会过时,请参考 OpenTelemetry 和 Grafana Alloy 的官方文档以获取最新信息。 +::: + +有关示例代码的更多信息,请参阅你首选编程语言的官方文档。 + diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/kafka.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/kafka.md new file mode 100644 index 000000000..1cd1a3667 --- /dev/null +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/kafka.md @@ -0,0 +1,170 @@ +--- +keywords: [Kafka, 数据提取, 可观察性, 指标, 日志, JSON 日志, 文本日志, Vector, InfluxDB 行协议] +description: 了解如何使用 Vector 将可观察性数据从 Kafka 写入到 GreptimeDB。本指南涵盖指标和日志提取,包括 JSON 和文本日志格式,并附有详细的配置示例。 +--- + +# Kafka + +如果你使用 Kafka 或兼容 Kafka 的消息队列进行可观测性数据传输,可以直接将数据写入到 GreptimeDB 中。 + +这里我们使用 Vector 作为工具将数据从 Kafka 传输到 GreptimeDB。 + +## 指标 + +从 Kafka 写入指标到 GreptimeDB 时,消息应采用 InfluxDB 行协议格式。例如: + +```txt +census,location=klamath,scientist=anderson bees=23 1566086400000000000 +``` + +然后配置 Vector 使用 `influxdb` 解码器来处理这些消息。 + +```toml +[sources.metrics_mq] +# 指定源类型为 Kafka +type = "kafka" +# Kafka 的消费者组 ID +group_id = "vector0" +# 要消费消息的 Kafka 主题列表 +topics = ["test_metric_topic"] +# 要连接的 Kafka 地址 +bootstrap_servers = "kafka:9092" +# `influxdb` 表示消息应采用 InfluxDB 行协议格式 +decoding.codec = "influxdb" + +[sinks.metrics_in] +inputs = ["metrics_mq"] +# 指定接收器类型为 `greptimedb_metrics` +type = "greptimedb_metrics" +# GreptimeDB 服务器的端点 +# 将 替换为实际的主机名或 IP 地址 +endpoint = ":4001" +dbname = "" +username = "" +password = "" +tls = {} +``` + +有关 InfluxDB 行协议指标如何映射到 GreptimeDB 数据的详细信息,请参阅 InfluxDB 行协议文档中的[数据模型](/user-guide/ingest-data/for-iot/influxdb-line-protocol.md#数据模型)部分。 + +## 日志 + +开发人员通常处理两种类型的日志:JSON 日志和纯文本日志。 +例如以下从 Kafka 发送的日志示例。 + +纯文本日志: + +```txt +127.0.0.1 - - [25/May/2024:20:16:37 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" +``` + +或 JSON 日志: + +```json +{ + "timestamp": "2024-12-23T10:00:00Z", + "level": "INFO", + "message": "Service started" +} +``` + +GreptimeDB 将这些日志转换为具有多个列的结构化数据,并自动创建必要的表。 +Pipeline 在写入到 GreptimeDB 之前将日志处理为结构化数据。 +不同的日志格式需要不同的 [Pipeline](/user-guide/logs/quick-start.md#write-logs-by-pipeline) 来解析,详情请继续阅读下面的内容。 + +### JSON 格式的日志 + +对于 JSON 格式的日志(例如 `{"timestamp": "2024-12-23T10:00:00Z", "level": "INFO", "message": "Service started"}`), +你可以使用内置的 [`greptime_identity`](/user-guide/logs/manage-pipelines.md#greptime_identity) pipeline 直接写入日志。 +此 pipeline 根据 JSON 日志消息中的字段自动创建列。 + +你只需要配置 Vector 的 `transforms` 设置以解析 JSON 消息,并使用 `greptime_identity` pipeline,如以下示例所示: + +```toml +[sources.logs_in] +type = "kafka" +# Kafka 的消费者组 ID +group_id = "vector0" +# 要消费消息的 Kafka 主题列表 +topics = ["test_log_topic"] +# 要连接的 Kafka 代理地址 +bootstrap_servers = "kafka:9092" + +# 将日志转换为 JSON 格式 +[transforms.logs_json] +type = "remap" +inputs = ["logs_in"] +source = ''' +. = parse_json!(.message) +''' + +[sinks.logs_out] +# 指定此接收器将接收来自 `logs_json` 源的数据 +inputs = ["logs_json"] +# 指定接收器类型为 `greptimedb_logs` +type = "greptimedb_logs" +# GreptimeDB 服务器的端点 +endpoint = "http://:4000" +compression = "gzip" +# 将 替换为实际值 +dbname = "" +username = "" +password = "" +# GreptimeDB 中的表名,如果不存在,将自动创建 +table = "demo_logs" +# 使用内置的 `greptime_identity` 管道 +pipeline_name = "greptime_identity" +``` + +### 文本格式的日志 + +对于文本格式的日志,例如下面的访问日志格式,你需要创建自定义 pipeline 来解析它们: + +``` +127.0.0.1 - - [25/May/2024:20:16:37 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" +``` + +#### 创建 pipeline + +要创建自定义 pipeline, +请参阅[创建 pipeline](/user-guide/logs/quick-start.md#创建-pipeline) 和 [pipeline 配置](/user-guide/logs/pipeline-config.md)文档获取详细说明。 + +#### 写入数据 + +创建 pipeline 后,将其配置到 Vector 配置文件中的 `pipeline_name` 字段。 + +```toml +# sample.toml +[sources.log_mq] +# 指定源类型为 Kafka +type = "kafka" +# Kafka 的消费者组 ID +group_id = "vector0" +# 要消费消息的 Kafka 主题列表 +topics = ["test_log_topic"] +# 要连接的 Kafka 地址 +bootstrap_servers = "kafka:9092" + +[sinks.sink_greptime_logs] +# 指定接收器类型为 `greptimedb_logs` +type = "greptimedb_logs" +# 指定此接收器将接收来自 `log_mq` 源的数据 +inputs = [ "log_mq" ] +# 使用 `gzip` 压缩以节省带宽 +compression = "gzip" +# GreptimeDB 服务器的端点 +# 将 替换为实际的主机名或 IP 地址 +endpoint = "http://:4000" +dbname = "" +username = "" +password = "" +# GreptimeDB 中的表名,如果不存在,将自动创建 +table = "demo_logs" +# 你创建的自定义管道名称 +pipeline_name = "your_custom_pipeline" +``` + +## Demo + +有关数据转换和写入的可运行演示,请参阅 [Kafka Ingestion Demo](https://github.com/GreptimeTeam/demo-scene/tree/main/kafka-ingestion)。 + diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/opentelemetry.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/opentelemetry.md index 12a2d7ff4..f3b5e5b64 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/opentelemetry.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/opentelemetry.md @@ -7,15 +7,34 @@ description: 介绍如何使用 OpenTelemetry Protocol (OTLP) 将观测数据( [OpenTelemetry](https://opentelemetry.io/) 是一个供应商中立的开源可观测性框架,用于检测、生成、收集和导出观测数据,例如 traces, metrics 和 logs。 OpenTelemetry Protocol (OTLP) 定义了观测数据在观测源和中间进程(例如收集器和观测后端)之间的编码、传输机制。 + +## OpenTelemetry Collectors + +你可以很简单地将 GreptimeDB 配置为 OpenTelemetry Collector 的目标。 +有关更多信息,请参阅 [Grafana Alloy](alloy.md) 示例。 + ## Metrics -### OTLP/HTTP +GreptimeDB 通过原生支持 [OTLP/HTTP](https://opentelemetry.io/docs/specs/otlp/#otlphttp) 协议,可以作为后端存储服务来接收 OpenTelemetry 指标数据。 + +### OTLP/HTTP API -import Includeotlpmetrycsintegration from '../../../db-cloud-shared/clients/otlp-metrics-integration.md' +使用下面的信息通过 Opentelemetry SDK 库发送 Metrics 到 GreptimeDB: + +* URL: `https:///v1/otlp/v1/metrics` +* Headers: + * `X-Greptime-DB-Name`: `` +* `Authorization`: `Basic` 认证,是 `:` 的 Base64 编码字符串。更多信息请参考 [鉴权](https://docs.greptime.cn/user-guide/deployments/authentication/static/) 和 [HTTP API](https://docs.greptime.cn/user-guide/protocols/http#authentication)。 + +请求中使用 binary protobuf 编码 payload,因此你需要使用支持 `HTTP/protobuf` 的包。例如,在 Node.js 中,可以使用 [`exporter-trace-otlp-proto`](https://www.npmjs.com/package/@opentelemetry/exporter-trace-otlp-proto);在 Go 中,可以使用 [`go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp`](https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp);在 Java 中,可以使用 [`io.opentelemetry:opentelemetry-exporter-otlp`](https://mvnrepository.com/artifact/io.opentelemetry/opentelemetry-exporter-otlp);在 Python 中,可以使用 [`opentelemetry-exporter-otlp-proto-http`](https://pypi.org/project/opentelemetry-exporter-otlp-proto-http/)。 + +:::tip 注意 +包名可能会根据 OpenTelemetry 的发展发生变化,因此建议你参考 OpenTelemetry 官方文档以获取最新信息。 +::: - +请参考 Opentelementry 的官方文档获取它所支持的编程语言的更多信息。 -#### 示例代码 +### 示例代码 下面是一些编程语言设置请求的示例代码: @@ -95,7 +114,7 @@ exporter = OTLPMetricExporter( 关于示例代码,请参考 Opentelementry 的官方文档获取它所支持的编程语言获取更多信息。 -#### 数据模型 +### 数据模型 OTLP 指标数据模型按照下方的规则被映射到 GreptimeDB 数据模型中: @@ -108,57 +127,34 @@ OTLP 指标数据模型按照下方的规则被映射到 GreptimeDB 数据模型 ## Logs -### OTLP/HTTP - -import Includeotlplogintegration from '../../../db-cloud-shared/clients/otlp-logs-integration.md' - - - -#### 示例代码 - -以下是一些关于如何使用 Grafana Alloy 将 OpenTelemetry 日志发送到 GreptimeDB 的示例代码: - -```hcl -loki.source.file "greptime" { - targets = [ - {__path__ = "/tmp/foo.txt"}, - ] - forward_to = [otelcol.receiver.loki.greptime.receiver] -} - -otelcol.receiver.loki "greptime" { - output { - logs = [otelcol.exporter.otlphttp.greptimedb_logs.input] - } -} - -otelcol.auth.basic "credentials" { - username = "${GREPTIME_USERNAME}" - password = "${GREPTIME_PASSWORD}" -} - -otelcol.exporter.otlphttp "greptimedb_logs" { - client { - endpoint = "${GREPTIME_SCHEME:=http}://${GREPTIME_HOST:=greptimedb}:${GREPTIME_PORT:=4000}/v1/otlp/" - headers = { - "X-Greptime-DB-Name" = "${GREPTIME_DB:=public}", - "x-greptime-log-table-name" = "demo_logs", - "x-greptime-log-extract-keys" = "filename,log.file.name,loki.attribute.labels", - } - auth = otelcol.auth.basic.credentials.handler - } -} -``` +GreptimeDB 是能够通过 [OTLP/HTTP](https://opentelemetry.io/docs/specs/otlp/#otlphttp) 协议原生地消费 OpenTelemetry 日志。 + +### OTLP/HTTP API + +要通过 OpenTelemetry SDK 库将 OpenTelemetry 日志发送到 GreptimeDB,请使用以下信息: -此示例监听文件的变化,并通过 OTLP 协议将最新的值发送到 GreptimeDB。 +* **URL:** `https:///v1/otlp/v1/logs` +* **Headers:** + * `X-Greptime-DB-Name`: `` + * `Authorization`: `Basic` 认证,这是一个 Base64 编码的 `:` 字符串。更多信息,请参考 [鉴权](/user-guide/deployments/authentication/static.md) 和 [HTTP API](/user-guide/protocols/http.md#鉴权)。 + * `X-Greptime-Log-Table-Name`: ``(可选)- 存储日志的表名。如果未提供,默认表名为 `opentelemetry_logs`。 + * `X-Greptime-Log-Extract-Keys`: ``(可选)- 从属性中提取对应 key 的值到表的顶级字段。key 应以逗号(`,`)分隔。例如,`key1,key2,key3` 将从属性中提取 `key1`、`key2` 和 `key3`,并将它们提升到日志的顶层,设置为标签。如果提取的字段类型是数组、浮点数或对象,将返回错误。如果提供了 pipeline name,此设置将被忽略。 + * `X-Greptime-Log-Pipeline-Name`: ``(可选)- 处理日志的 pipeline 名称。如果未提供,将使用 `X-Greptime-Log-Extract-Keys` 来处理日志。 + * `X-Greptime-Log-Pipeline-Version`: ``(可选)- 处理日志的 pipeline 的版本。如果未提供,将使用 pipeline 的最新版本。 + +请求使用二进制 protobuf 编码负载,因此您需要使用支持 `HTTP/protobuf` 的包。 :::tip 提示 -上述示例代码可能会因 OpenTelemetry 的更新而过时。我们建议您参考 OpenTelemetry 和 Grafana Alloy 的官方文档以获取最新信息。 +包名可能会根据 OpenTelemetry 的更新而变化,因此我们建议您参考官方 OpenTelemetry 文档以获取最新信息。 ::: -有关示例代码的更多信息,请参考您首选编程语言的官方文档。 +有关 OpenTelemetry SDK 的更多信息,请参考您首选编程语言的官方文档。 + +### 示例代码 -#### 数据模型 +请参考 [Alloy 文档](alloy.md#日志)中的示例代码,了解如何将 OpenTelemetry 日志发送到 GreptimeDB。 + +### 数据模型 OTLP 日志数据模型根据以下规则映射到 GreptimeDB 数据模型: @@ -188,4 +184,5 @@ OTLP 日志数据模型根据以下规则映射到 GreptimeDB 数据模型: - 您可以使用 `X-Greptime-Log-Table-Name` 指定存储日志的表名。如果未提供,默认表名为 `opentelemetry_logs`。 - 所有属性,包括资源属性、范围属性和日志属性,将作为 JSON 列存储在 GreptimeDB 表中。 -- 日志的时间戳将用作 GreptimeDB 中的时间戳索引,列名为 `timestamp`。建议使用 `time_unix_nano` 作为时间戳列。如果未提供 `time_unix_nano`,将使用 `observed_time_unix_nano`。 \ No newline at end of file +- 日志的时间戳将用作 GreptimeDB 中的时间戳索引,列名为 `timestamp`。建议使用 `time_unix_nano` 作为时间戳列。如果未提供 `time_unix_nano`,将使用 `observed_time_unix_nano`。 + diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/prometheus.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/prometheus.md index 97bcfab11..184744eac 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/prometheus.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/prometheus.md @@ -9,6 +9,8 @@ GreptimeDB 可以作为 Prometheus 的长期存储解决方案,提供无缝集 ## 配置 Remote Write +### Prometheus 配置文件 + 要将 GreptimeDB 集成到 Prometheus 中, 请按照以下步骤更新你的 [Prometheus 配置文件](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#configuration-file)(`prometheus.yml`): @@ -32,6 +34,10 @@ remote_read: - URL 中的 `db` 参数表示要写入的数据库。它是可选的。默认情况下,数据库设置为 `public`。 - `basic_auth` 是身份鉴权配置。如果 GreptimeDB 启用了鉴权,请填写用户名和密码。请参阅 [鉴权认证文档](/user-guide/deployments/authentication/overview.md)。 +### Grafana Alloy 配置文件 + +如果你使用 Grafana Alloy,请在 Alloy 配置文件(`config.alloy`)中配置 Remote Write。有关更多信息,请参阅 [Alloy 文档](alloy.md#prometheus-remote-write)。 + ## 数据模型 在 GreptimeDB 的[数据模型](/user-guide/concepts/data-model.md)中,数据被组织成具有 tag、time index 和 field 的表。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/vector.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/vector.md index bb18cfa4b..f987406ab 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/vector.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/ingest-data/for-observerbility/vector.md @@ -3,18 +3,16 @@ keywords: [Vector, 数据写入, gRPC 通信, 数据模型, 配置示例] description: 介绍如何使用 Vector 将数据写入 GreptimeDB,包括最小配置示例和数据模型的映射规则。 --- -import DocTemplate from '../../../db-cloud-shared/clients/vector-integration.md' - - # Vector - - -[Vector](https://vector.dev/) 是一种高性能的可以帮助工程师控制可观测性数据的通道工具。我们的 Vector 集成页面在[这里](https://vector.dev/docs/reference/configuration/sinks/greptimedb/)。 +Vector 是高性能的可观测数据管道。 +它原生支持 GreptimeDB 指标数据接收端。 +通过 Vector,你可以从各种来源接收指标数据,包括 Prometheus、OpenTelemetry、StatsD 等。 +GreptimeDB 可以作为 Vector 的 Sink 组件来接收指标数据。 -
+## 收集主机指标 -## 集成 +### 配置 使用 GreptimeDB 的 Vector 集成的最小配置如下: @@ -26,7 +24,7 @@ type = "host_metrics" [sinks.my_sink_id] inputs = ["in"] -type = "greptimedb" +type = "greptimedb_metrics" endpoint = ":4001" dbname = "" username = "" @@ -37,11 +35,15 @@ new_naming = true GreptimeDB 使用 gRPC 与 Vector 进行通信,因此 Vector sink 的默认端口是 `4001`。 如果你在使用 [自定义配置](/user-guide/deployments/configuration.md#configuration-file) 启动 GreptimeDB 时更改了默认的 gRPC 端口,请使用你自己的端口。 -
+启动 Vector: + +``` +vector -c sample.toml +``` -
+请前往 [Vector GreptimeDB Configuration](https://vector.dev/docs/reference/configuration/sinks/greptimedb_metrics/) 查看更多配置项。 -## 数据模型 +### 数据模型 我们使用这样的规则将 Vector 指标存入 GreptimeDB: @@ -56,6 +58,12 @@ GreptimeDB 使用 gRPC 与 Vector 进行通信,因此 Vector sink 的默认端 - AggregatedSummary 类型,各个百分位数值点分别存入 `pxx` 列,其中 xx 是 quantile 数值,此外我们还会记录 `sum/count` 列; - Sketch 类型,各个百分位数值点分别存入 `pxx` 列,其中 xx 是 quantile 数值,此外我们还会记录 `min/max/avg/sum` 列; -
+## 收集 InfluxDB 行协议格式的指标 + +Vector 可以收集 InfluxDB 行协议格式的指标并将其发送到 GreptimeDB。更多信息请参考 [Kafka 指南](/user-guide/ingest-data/for-observerbility/kafka.md#指标)。 + + +## 收集日志 + +Vector 可以收集日志并发送到 GreptimeDB。更多信息请参考 [Kafka 指南](/user-guide/ingest-data/for-observerbility/kafka.md#日志)。 -
diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/integrations/alloy.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/integrations/alloy.md new file mode 100644 index 000000000..40c3acc57 --- /dev/null +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/integrations/alloy.md @@ -0,0 +1,9 @@ +--- +keywords: [Alloy, Grafana Alloy, GreptimeDB] +description: 将 GreptimeDB 与 Grafana Alloy 集成。 +--- + +# Grafana Alloy + +你可以将 GreptimeDB 设置为 Grafana Alloy 的数据接收端。 +更多信息,请参考[通过 Grafana Alloy 写入数据](/user-guide/ingest-data/for-observerbility/alloy.md)指南。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/integrations/grafana.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/integrations/grafana.md index f0bb53c63..5278bdc9c 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/integrations/grafana.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/integrations/grafana.md @@ -27,6 +27,10 @@ GreptimeDB 数据源插件目前仅支持在本地 Grafana 中的安装, ```shell grafana cli --pluginUrl https://github.com/GreptimeTeam/greptimedb-grafana-datasource/releases/latest/download/info8fcc-greptimedb-datasource.zip plugins install info8fcc ``` +- 使用我们 [预构建的 Grafana 镜 + 像](https://hub.docker.com/r/greptime/grafana-greptimedb),已经提前包含了 + GreptimeDB 数据源插件 `docker run -p 3000:3000 + greptime/grafana-greptimedb:latest` 注意,安装插件后可能需要重新启动 Grafana 服务器。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/integrations/kafka.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/integrations/kafka.md new file mode 100644 index 000000000..c3eacdc5c --- /dev/null +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/integrations/kafka.md @@ -0,0 +1,9 @@ +--- +keywords: [Kafka, 数据传输, 可观测性, 指标, 日志] +description: 从 Kafka 写入数据到 GreptimeDB。 +--- + +# Kafka + +你可以使用 Vector 作为从 Kafka 到 GreptimeDB 的数据传输工具。 +请前往[通过 Kafka 写入数据](/user-guide/ingest-data/for-observerbility/kafka.md)了解更多信息。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/manage-data/data-index.md b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/manage-data/data-index.md new file mode 100644 index 000000000..46a3962a7 --- /dev/null +++ b/i18n/zh/docusaurus-plugin-content-docs/version-0.11/user-guide/manage-data/data-index.md @@ -0,0 +1,105 @@ +--- +keywords: [索引, 倒排索引, 跳数索引, 全文索引, 查询性能] +description: 了解 GreptimeDB 支持的各类索引,包括倒排索引、跳数索引和全文索引,以及如何合理使用这些索引来提升查询效率。 +--- + +# 数据索引 + +GreptimeDB 提供了多种索引机制来提升查询性能。作为数据库中的核心组件,索引通过建立高效的数据检索路径,显著优化了数据的查询操作。 + +## 概述 + +在 GreptimeDB 中,索引是在表创建时定义的,其设计目的是针对不同的数据类型和查询模式来优化查询性能。目前支持的索引类型包括: + +- 倒排索引(Inverted Index) +- 跳数索引(Skipping Index) +- 全文索引(Fulltext Index) + +需要说明的是,本章节重点讨论数据值索引。虽然主键(PRIMARY KEY)和 TIME INDEX 也在某种程度上具有索引的特性,但不在本章讨论范围内。 + +## 索引类型 + +### 倒排索引 + +倒排索引主要用于优化标签列的查询效率。它通过在唯一值和对应数据行之间建立映射关系,实现对特定标签值的快速定位。 + +**适用场景:** +- 基于标签值的数据查询 +- 字符串列的过滤操作 +- 标签列的精确查询 + +示例: +```sql +CREATE TABLE monitoring_data ( + host STRING, + region STRING PRIMARY KEY, + cpu_usage DOUBLE, + timestamp TIMESTAMP TIME INDEX, + INDEX INVERTED_INDEX(host, region) +); +``` + +需要注意的是,当标签值的组合数(即倒排索引覆盖的列的笛卡尔积)非常大时,倒排索引可能会带来较高的维护成本,导致内存占用增加和索引体积膨胀。这种情况下,建议考虑使用跳数索引作为替代方案。 + +### 跳数索引 + +跳数索引是专为列式存储系统(如 GreptimeDB)优化设计的索引类型。它通过维护数据块内值域范围的元数据,使查询引擎能够在进行范围查询时快速跳过不相关的数据块。与其他索引相比,跳数索引的存储开销相对较小。 + +**适用场景:** +- 数据分布稀疏的场景,例如日志中的 MAC 地址 +- 在大规模数据集中查询出现频率较低的值 + +示例: +```sql +CREATE TABLE sensor_data ( + domain STRING PRIMARY KEY, + device_id STRING SKIPPING INDEX, + temperature DOUBLE, + timestamp TIMESTAMP TIME INDEX, +); +``` + +然而,跳数索引无法处理复杂的过滤条件,并且其过滤性能通常不如倒排索引或全文索引。 + +### 全文索引 + +全文索引专门用于优化字符串列的文本搜索操作。它支持基于词的匹配和文本搜索功能,能够实现对文本内容的高效检索。用户可以使用灵活的关键词、短语或模式匹配来查询文本数据。 + +**适用场景:** +- 文本内容搜索 +- 模式匹配查询 +- 大规模文本过滤 + +示例: +```sql +CREATE TABLE logs ( + message STRING FULLTEXT INDEX, + level STRING PRIMARY KEY, + timestamp TIMESTAMP TIME INDEX, +); +``` + +使用全文索引时需要注意以下限制: + +- 存储开销较大,因需要保存词条和位置信息 +- 文本分词和索引过程会增加数据刷新和压缩的延迟 +- 对于简单的前缀或后缀匹配可能不是最优选择 + +建议仅在需要高级文本搜索功能和灵活查询模式时使用全文索引。 + +## 最佳实践 + +1. 根据实际的数据特征和查询模式选择合适的索引类型 +2. 只为频繁出现在 WHERE 子句中的列创建索引 +3. 在查询性能、写入性能和资源消耗之间寻找平衡 +4. 定期监控索引使用情况并持续优化索引策略 + +## 性能考虑 + +索引虽然能够显著提升查询性能,但也会带来一定开销: + +- 需要额外的存储空间维护索引结构 +- 索引维护会影响数据刷新和压缩性能 +- 索引缓存会占用系统内存 + +建议根据具体应用场景和性能需求,合理规划索引策略。 diff --git a/variables/variables-0.11.ts b/variables/variables-0.11.ts index ecb1d6e4d..d04ff3acb 100644 --- a/variables/variables-0.11.ts +++ b/variables/variables-0.11.ts @@ -1,5 +1,5 @@ export const variables = { - greptimedbVersion: 'v0.11.1', + greptimedbVersion: 'v0.11.2', prometheusVersion: 'v2.52.0', nodeExporterVersion: 'v1.8.0', goSdkVersion: 'v0.6.0', diff --git a/variables/variables-nightly.ts b/variables/variables-nightly.ts index ecb1d6e4d..d04ff3acb 100644 --- a/variables/variables-nightly.ts +++ b/variables/variables-nightly.ts @@ -1,5 +1,5 @@ export const variables = { - greptimedbVersion: 'v0.11.1', + greptimedbVersion: 'v0.11.2', prometheusVersion: 'v2.52.0', nodeExporterVersion: 'v1.8.0', goSdkVersion: 'v0.6.0', diff --git a/versioned_docs/version-0.11/contributor-guide/flownode/overview.md b/versioned_docs/version-0.11/contributor-guide/flownode/overview.md index bdb84ea41..0a6f754e8 100644 --- a/versioned_docs/version-0.11/contributor-guide/flownode/overview.md +++ b/versioned_docs/version-0.11/contributor-guide/flownode/overview.md @@ -11,7 +11,7 @@ description: Overview of Flownode, a component providing streaming process capab `Flownode` provides a simple streaming process (known as `flow`) ability to the database. `Flownode` manages `flows` which are tasks that receive data from the `source` and send data to the `sink`. -In current version, `Flownode` only supports standalone mode. In the future, we will support distributed mode. +`Flownode` support both `standalone` and `distributed` mode. In `standalone` mode, `Flownode` runs in the same process as the database. In `distributed` mode, `Flownode` runs in a separate process and communicates with the database through the network. ## Components diff --git a/versioned_docs/version-0.11/db-cloud-shared/clients/otlp-logs-integration.md b/versioned_docs/version-0.11/db-cloud-shared/clients/otlp-logs-integration.md deleted file mode 100644 index 4a6191164..000000000 --- a/versioned_docs/version-0.11/db-cloud-shared/clients/otlp-logs-integration.md +++ /dev/null @@ -1,22 +0,0 @@ -GreptimeDB is an observability backend to consume OpenTelemetry Logs natively via [OTLP/HTTP](https://opentelemetry.io/docs/specs/otlp/#otlphttp) protocol. - -#### API - -To send OpenTelemetry Logs to GreptimeDB through OpenTelemetry SDK libraries, use the following information: - -* URL: `http{s}:///v1/otlp/v1/logs` -* Headers: - * `X-Greptime-DB-Name`: `` - * `Authorization`: `Basic` authentication, which is a Base64 encoded string of `:`. For more information, please refer to [Authentication](https://docs.greptime.com/user-guide/deployments/authentication/static/) and [HTTP API](https://docs.greptime.com/user-guide/protocols/http#authentication). - * `X-Greptime-Log-Table-Name`: `` (optional) - The table name to store the logs. If not provided, the default table name is `opentelemetry_logs`. - * `X-Greptime-Log-Extract-Keys`: `` (optional) - The keys to extract from the attributes. The keys should be separated by commas (`,`). For example, `key1,key2,key3` will extract the keys `key1`, `key2`, and `key3` from the attributes and promote them to the top level of the log, setting them as tags. If the field type is array, float, or object, an error will be returned. If a pipeline is provided, this setting will be ignored. - * `X-Greptime-Log-Pipeline-Name`: `` (optional) - The pipeline name to process the logs. If not provided, the extract keys will be used to process the logs. - * `X-Greptime-Log-Pipeline-Version`: `` (optional) - The pipeline version to process the logs. If not provided, the latest version of the pipeline will be used. - -The request uses binary protobuf to encode the payload, so you need to use packages that support `HTTP/protobuf`. - -:::tip NOTE -The package names may change according to OpenTelemetry, so we recommend that you refer to the official OpenTelemetry documentation for the most up-to-date information. -::: - -For more information about the OpenTelemetry SDK, please refer to the official documentation for your preferred programming language. diff --git a/versioned_docs/version-0.11/db-cloud-shared/clients/otlp-metrics-integration.md b/versioned_docs/version-0.11/db-cloud-shared/clients/otlp-metrics-integration.md deleted file mode 100644 index 7a8a31ecf..000000000 --- a/versioned_docs/version-0.11/db-cloud-shared/clients/otlp-metrics-integration.md +++ /dev/null @@ -1,18 +0,0 @@ -GreptimeDB is an observability backend to consume OpenTelemetry Metrics natively via [OTLP/HTTP](https://opentelemetry.io/docs/specs/otlp/#otlphttp) protocol. - -#### API - -To send OpenTelemetry Metrics to GreptimeDB through OpenTelemetry SDK libraries, use the following information: - -* URL: `http{s}:///v1/otlp/v1/metrics` -* Headers: - * `X-Greptime-DB-Name`: `` - * `Authorization`: `Basic` authentication, which is a Base64 encoded string of `:`. For more information, please refer to [Authentication](https://docs.greptime.com/user-guide/deployments/authentication/static/) and [HTTP API](https://docs.greptime.com/user-guide/protocols/http#authentication) - -The request uses binary protobuf to encode the payload, so you need to use packages that support `HTTP/protobuf`. For example, in Node.js, you can use [`exporter-trace-otlp-proto`](https://www.npmjs.com/package/@opentelemetry/exporter-trace-otlp-proto); in Go, you can use [`go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp`](https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp); in Java, you can use [`io.opentelemetry:opentelemetry-exporter-otlp`](https://mvnrepository.com/artifact/io.opentelemetry/opentelemetry-exporter-otlp); and in Python, you can use [`opentelemetry-exporter-otlp-proto-http`](https://pypi.org/project/opentelemetry-exporter-otlp-proto-http/). - -:::tip NOTE -The package names may change according to OpenTelemetry, so we recommend that you refer to the official OpenTelemetry documentation for the most up-to-date information. -::: - -For more information about the OpenTelemetry SDK, please refer to the official documentation for your preferred programming language. diff --git a/versioned_docs/version-0.11/db-cloud-shared/clients/vector-integration.md b/versioned_docs/version-0.11/db-cloud-shared/clients/vector-integration.md deleted file mode 100644 index fa773c0f9..000000000 --- a/versioned_docs/version-0.11/db-cloud-shared/clients/vector-integration.md +++ /dev/null @@ -1,19 +0,0 @@ - -Vector is [a high performance observability data -pipeline](https://vector.dev). It has native support for GreptimeDB metrics data -sink. With vector, you can ingest metrics data from various sources, including -Prometheus, OpenTelemetry, StatsD and many more. -GreptimeDB can be used as a Vector Sink component to receive metrics. - - - -Execute Vector with: - -``` -vector -c sample.toml -``` - -For more configuration options, see [Vector GreptimeDB -Configuration](https://vector.dev/docs/reference/sinks/greptimedb/). - - diff --git a/versioned_docs/version-0.11/greptimecloud/integrations/kafka.md b/versioned_docs/version-0.11/greptimecloud/integrations/kafka.md index ae3f7016e..0dfe9bf61 100644 --- a/versioned_docs/version-0.11/greptimecloud/integrations/kafka.md +++ b/versioned_docs/version-0.11/greptimecloud/integrations/kafka.md @@ -63,3 +63,8 @@ username = "" password = "" tls = {} ``` + +## Reference + +For detailed information on the data ingestion process, please refer to the [Ingest Data via Kafka](https://docs.greptime.com/nightly/user-guide/ingest-data/for-observerbility/kafka) guide. + diff --git a/versioned_docs/version-0.11/reference/sql/alter.md b/versioned_docs/version-0.11/reference/sql/alter.md index 6594c48af..3e6946691 100644 --- a/versioned_docs/version-0.11/reference/sql/alter.md +++ b/versioned_docs/version-0.11/reference/sql/alter.md @@ -116,7 +116,7 @@ The modified column cannot be a tag (primary key) or time index, and it must be Currently following options are supported: - `ttl`: the retention time of data in table. -- `compaction.twcs.time_window`: the time window parameter of TWCS compaction strategy. +- `compaction.twcs.time_window`: the time window parameter of TWCS compaction strategy. The value should be a [time duration string](/reference/time-durations.md). - `compaction.twcs.max_output_file_size`: the maximum allowed output file size of TWCS compaction strategy. - `compaction.twcs.max_active_window_runs`: the maximum allowed sorted runs in the active window of TWCS compaction strategy. - `compaction.twcs.max_inactive_window_runs`: the maximum allowed sorted runs in the inactive windows of TWCS compaction strategy. diff --git a/versioned_docs/version-0.11/reference/sql/create.md b/versioned_docs/version-0.11/reference/sql/create.md index 6c57944a8..742b6afb8 100644 --- a/versioned_docs/version-0.11/reference/sql/create.md +++ b/versioned_docs/version-0.11/reference/sql/create.md @@ -127,17 +127,7 @@ CREATE TABLE IF NOT EXISTS temperatures( The `ttl` value can be one of the following: -- A duration like `1hour 12min 5s`, The duration object is a concatenation of time spans. Where each time span is an integer number and a suffix. Supported suffixes: - - `nsec`, `ns` – nanoseconds - - `usec`, `us` – microseconds - - `msec`, `ms` – milliseconds - - `seconds`, `second`, `sec`, `s` - - `minutes`, `minute`, `min`, `m` - - `hours`, `hour`, `hr`, `h` - - `days`, `day`, `d` - - `weeks`, `week`, `w` - - `months`, `month`, `M` – defined as 30.44 days - - `years`, `year`, `y` – defined as 365.25 days +- A [duration](/reference/time-durations.md) like `1hour 12min 5s`. - `forever`, `NULL`, an empty string `''` and `0s` (or any zero length duration, like `0d`), means the data will never be deleted. - `instant`, note that database's TTL can't be set to `instant`. `instant` means the data will be deleted instantly when inserted, useful if you want to send input to a flow task without saving it, see more details in [flow management documents](/user-guide/flow-computation/manage-flow.md#manage-flows). - Unset, `ttl` can be unset by using `ALTER TABLE UNSET 'ttl'`, which means the table will inherit the database's ttl policy (if any). diff --git a/versioned_docs/version-0.11/reference/time-durations.md b/versioned_docs/version-0.11/reference/time-durations.md new file mode 100644 index 000000000..92ae14382 --- /dev/null +++ b/versioned_docs/version-0.11/reference/time-durations.md @@ -0,0 +1,47 @@ +--- +keywords: [time durations, time spans, time units] +description: Learn how GreptimeDB utilizes time durations to represent time spans in SQL queries, configuration files, and API requests with supported suffixes and examples. +--- + +# Time Durations + +GreptimeDB utilizes time durations to represent time spans in various contexts, +including SQL queries, configuration files, and API requests. +A time duration is expressed as a string composed of concatenated time spans, +each represented by a sequence of decimal numbers followed by a unit suffix. +These suffixes are case-insensitive and support both singular and plural forms. For example, `1hour 12min 5s`. + +Each time span consists of an integer and a suffix. +The supported suffixes are: + +- `nsec`, `ns`: nanoseconds +- `usec`, `us`: microseconds +- `msec`, `ms`: milliseconds +- `seconds`, `second`, `sec`, `s` +- `minutes`, `minute`, `min`, `m` +- `hours`, `hour`, `hr`, `h` +- `days`, `day`, `d` +- `weeks`, `week`, `w` +- `months`, `month`, `M`: defined as 30.44 days +- `years`, `year`, `y`: defined as 365.25 days + +Appending a decimal integer with one of the above units represents the equivalent number of seconds as a bare float literal. +Examples: + +- `1s`: Equivalent to 1 second +- `2m`: Equivalent to 120 seconds +- `1ms`: Equivalent to 0.001 seconds +- `2h`: Equivalent to 7200 seconds + +The following examples are invalid: + +- `0xABm`: Hexadecimal numbers are not supported +- `1.5h`: Floating point numbers are not supported +- `+Infd`: `±Inf` or `NaN` values are not supported + + +The following are some valid time duration examples: + +- `1h`: one hour +- `1h30m`, `1h 30m`: one hour and thirty minutes +- `1h30m10s`, `1h 30m 10s`: one hour, thirty minutes, and ten seconds diff --git a/versioned_docs/version-0.11/user-guide/administration/performance-tuning-tips.md b/versioned_docs/version-0.11/user-guide/administration/performance-tuning-tips.md index 5fa383e4d..f3827d704 100644 --- a/versioned_docs/version-0.11/user-guide/administration/performance-tuning-tips.md +++ b/versioned_docs/version-0.11/user-guide/administration/performance-tuning-tips.md @@ -46,7 +46,10 @@ region = "your-region" cache_capacity = "10G" ``` -The write cache acts as a write-through cache that stores files on the local disk before uploading them to the object store. This reduces the first query latency. The following example shows how to enable the write cache. +The write cache acts as a write-through cache that stores files on the local disk before uploading them to the object store. This reduces the first query latency. + + +The following example shows how to enable the write cache in versions before `v0.12`. - The `enable_experimental_write_cache` flag enables the write cache, enabled by default when configuring remote object stores since `v0.11`. - The `experimental_write_cache_size` sets the capacity of the cache, defaults to `5GiB` since `v0.11`. @@ -68,7 +71,31 @@ You can monitor the `greptime_mito_cache_bytes` and `greptime_mito_cache_miss` m If the `greptime_mito_cache_miss` metric is consistently high and increasing, or if the `greptime_mito_cache_bytes` metric reaches the cache capacity, you may need to adjust the cache size configurations of the storage engine. -Here's an example: + +Here is an example: + + +```toml +[[region_engine]] +[region_engine.mito] +# Cache size for the write cache. The `type` label value for this cache is `file`. +write_cache_size = "10G" +# Cache size for SST metadata. The `type` label value for this cache is `sst_meta`. +sst_meta_cache_size = "128MB" +# Cache size for vectors and arrow arrays. The `type` label value for this cache is `vector`. +vector_cache_size = "512MB" +# Cache size for pages of SST row groups. The `type` label value for this cache is `page`. +page_cache_size = "512MB" +# Cache size for time series selector (e.g. `last_value()`). The `type` label value for this cache is `selector_result`. +selector_result_cache_size = "512MB" + +[region_engine.mito.index] +## The max capacity of the index staging directory. +staging_size = "10GB" +``` + + +For versions before `v0.12`: ```toml [[region_engine]] @@ -93,7 +120,7 @@ staging_size = "10GB" Some tips: -- 1/10 of disk space for the `experimental_write_cache_size` at least +- 1/10 of disk space for the write cache at least - 1/4 of total memory for the `page_cache_size` at least if the memory usage is under 20% - Double the cache size if the cache hit ratio is less than 50% - If using full-text index, leave 1/10 of disk space for the `staging_size` at least diff --git a/versioned_docs/version-0.11/user-guide/concepts/key-concepts.md b/versioned_docs/version-0.11/user-guide/concepts/key-concepts.md index d8da7a008..48aa92348 100644 --- a/versioned_docs/version-0.11/user-guide/concepts/key-concepts.md +++ b/versioned_docs/version-0.11/user-guide/concepts/key-concepts.md @@ -44,7 +44,7 @@ Find all the supported data types in [Data Types](/reference/sql/data-types.md). ## Index -The `index` is a performance-tuning method that allows faster retrieval of records. GreptimeDB uses the [inverted index](/contributor-guide/datanode/data-persistence-indexing.md#inverted-index) to accelerate queries. +The `index` is a performance-tuning method that allows faster retrieval of records. GreptimeDB provides various kinds of [indexes](/user-guide/manage-data/data-index.md) to accelerate queries. ## View diff --git a/versioned_docs/version-0.11/user-guide/deployments/configuration.md b/versioned_docs/version-0.11/user-guide/deployments/configuration.md index e7afb1773..650945b8a 100644 --- a/versioned_docs/version-0.11/user-guide/deployments/configuration.md +++ b/versioned_docs/version-0.11/user-guide/deployments/configuration.md @@ -317,9 +317,7 @@ cache_capacity = "10GiB" ``` -We recommend that you don't set the cache directory because the database can choose it automatically. The default cache root directory is: -- `{data_home}` (since `v0.11.2`) -- `{data_home}/object_cache` (before `v0.11.2`) +We recommend that you don't set the cache directory because the database can choose it automatically. The default cache directory is under the `{data_home}`. For versions before v0.11, you need to manually enable the read cache by configuring `cache_path` in the storage settings: @@ -338,6 +336,16 @@ cache_capacity = "5GiB" The `cache_path` specifies the local directory for storing cache files, while `cache_capacity` determines the maximum total file size allowed in the cache directory in bytes. You can disable the read cache by setting `cache_path` to an empty string. + +The write cache is no more experimental since `v0.12`. You can configure the cache size in the mito config if you don't want to use the default value. +```toml +[[region_engine]] +[region_engine.mito] + +write_cache_size = "10GiB" +``` + + For write cache in versions before v0.11, you need to enable it by setting `enable_experimental_write_cache` to `true` in the `[region_engine.mito]` section: ```toml @@ -349,11 +357,7 @@ experimental_write_cache_path = "/var/data/s3_write_cache" experimental_write_cache_size = "5GiB" ``` -The default value of `experimental_write_cache_path`: -- `{data_home}` (since `v0.11.2`) -- `{data_home}/object_cache/write` (before `v0.11.2`) - -To disable the write cache, set `enable_experimental_write_cache` to `false`. +The `experimental_write_cache_path` is under `{data_home}` by default. To disable the write cache, set `enable_experimental_write_cache` to `false`. Read [Performance Tuning Tips](/user-guide/administration/performance-tuning-tips) for more detailed info. @@ -538,7 +542,7 @@ Available options: | `inverted_index.intermediate_path` | String | `""` | File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`). | | `inverted_index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. | | `inverted_index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. | -| `inverted_index.content_cache_page_size`| String | `8MiB` | Page size for inverted index content cache. Inverted index content will be read and cached in page size. Adjust this value to change the granularity of cache and optimize the cache hit rate. | +| `inverted_index.content_cache_page_size`| String | `64KiB` | Page size for inverted index content cache. Inverted index content will be read and cached in page size. Adjust this value to change the granularity of cache and optimize the cache hit rate. | | `memtable.type` | String | `time_series` | Memtable type.
- `time_series`: time-series memtable
- `partition_tree`: partition tree memtable (experimental) | | `memtable.index_max_keys_per_shard` | Integer | `8192` | The max number of keys in one shard.
Only available for `partition_tree` memtable. | | `memtable.data_freeze_threshold` | Integer | `32768` | The max rows of data inside the actively writing buffer in one shard.
Only available for `partition_tree` memtable. | diff --git a/versioned_docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/common-helm-chart-configurations.md b/versioned_docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/common-helm-chart-configurations.md new file mode 100644 index 000000000..d1396d864 --- /dev/null +++ b/versioned_docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/common-helm-chart-configurations.md @@ -0,0 +1,312 @@ +--- +keywords: [Kubernetes, Deployment, Helm Chart, Configuration] +description: Common Helm Chart Configurations +--- + +# Common Helm Chart Configurations + +For each Helm Chart, you can create a `values.yaml` file for configuration. When you need to apply configurations, you can use the `helm upgrade` command: + +``` +helm upgrade --install ${release-name} ${chart-name} --namespace ${namespace} -f values.yaml +``` + +## GreptimeDB Cluster Chart + +For complete configuration options, please refer to [GreptimeDB Cluster Chart](https://github.com/GreptimeTeam/helm-charts/tree/main/charts/greptimedb-cluster/README.md). + +### GreptimeDB Image Configuration + +The top-level variable `image` is used to configure the global GreptimeDB image for the cluster, as shown below: + +```yaml +image: + # -- The image registry + registry: docker.io + # -- The image repository + repository: greptime/greptimedb + # -- The image tag + tag: "v0.11.0" + # -- The image pull secrets + pullSecrets: [] +``` + +If you want to configure different images for each role in the cluster, you can use the `${role}.podTemplate.main.image` field (where `role` can be `meta`, `frontend`, `datanode` and `flownode`). This field will **override** the top-level `image` configuration, as shown below: + +```yaml +image: + # -- The image registry + registry: docker.io + # -- The image repository + repository: greptime/greptimedb + # -- The image tag + tag: "v0.11.0" + # -- The image pull secrets + pullSecrets: [] + +frontend: + podTemplate: + main: + image: "greptime/greptimedb:latest" +``` + +In this case, the `frontend` image will be set to `greptime/greptimedb:latest`, while other components will use the top-level `image` configuration. + +### Service Ports Configuration + +You can configure service ports using the following fields: + +- `httpServicePort`: Configures the HTTP service port, default `4000` +- `grpcServicePort`: Configures the SQL service port, default `4001` +- `mysqlServicePort`: Configures the MySQL service port, default `4002` +- `postgresServicePort`: Configures the PostgreSQL service port, default `4003` + +### Datanode Storage Configuration + +You can configure Datanode storage through the `datanode.storage` field, as shown below: + +```yaml +datanode: + storage: + # -- Storage class for datanode persistent volume + storageClassName: null + # -- Storage size for datanode persistent volume + storageSize: 10Gi + # -- Storage retain policy for datanode persistent volume + storageRetainPolicy: Retain + # -- The dataHome directory, default is "/data/greptimedb/" + dataHome: "/data/greptimedb" +``` + +- `storageClassName`: Configures the StorageClass, defaults to Kubernetes' current default StorageClass +- `storageSize`: Configures the storage size, default `10Gi`. You can use common capacity units, such as `10Gi`, `10GB`, etc. +- `storageRetainPolicy`: Configures the storage retention policy, default `Retain`. If set to `Delete`, the storage will be deleted when the cluster is deleted +- `dataHome`: Configures the data directory, default `/data/greptimedb/` + +### Resource Configuration + +The top-level variable `base.podTemplate.main.resources` is used to globally configure resources for each role, as shown below: + +```yaml +base: + podTemplate: + main: + resources: + requests: + memory: "1Gi" + cpu: "1" + limits: + memory: "2Gi" + cpu: "2" +``` + +If you want to configure different resources for each role in the cluster, you can use the `${role}.podTemplate.main.resources` field (where `role` can be `meta`, `frontend`, `datanode`, etc.). This field will **override** the top-level `base.podTemplate.main.resources` configuration, as shown below: + +```yaml +base: + podTemplate: + main: + resources: + requests: + memory: "1Gi" + cpu: "1" + limits: + memory: "2Gi" + cpu: "2" + +frontend: + podTemplate: + main: + resources: + requests: + cpu: "2" + memory: "4Gi" + limits: + cpu: "4" + memory: "8Gi" +``` + +### Role Replicas Configuration + +For different roles, the number of replicas can be configured through the `${role}.replicas` field, as shown below: + +```yaml +frontend: + replicas: 3 + +datanode: + replicas: 3 +``` + +You can achieve horizontal scaling by configuring the number of replicas. + +### Environment Variable Configuration + +You can configure global environment variables through the `base.podTemplate.main.env` field, and configure different environment variables for each Role through the `${role}.podTemplate.main.env` field, as shown below: + +```yaml +base: + podTemplate: + main: + env: + - name: GLOBAL_ENV + value: "global_value" + +frontend: + podTemplate: + main: + env: + - name: FRONTEND_ENV + value: "frontend_value" +``` + +### Injecting Configuration Files + +For different Role services, youcan inject custom TOML configuration files through the `${role}.configData` field, as shown below: + +```yaml +frontend: + configData: | + [[region_engine]] + [region_engine.mito] + # Number of region workers + num_workers = 8 +``` + +You can learn about GreptimeDB configuration options through [config.md](https://github.com/GreptimeTeam/greptimedb/blob/main/config/config.md). + +In addition to using the `${role}.configData` field to inject configuration files, you can also specify corresponding files through `${role}.configFile`, as shown below: + +```yaml +frontend: + configFile: "configs/frontend.toml" +``` + +In this case, ensure that the configuration file path matches the directory where the `helm upgrade` command is executed. + +:::note +User-injected configuration files have lower priority by default than configuration items managed by GreptimeDB Operator. Some configuration items can only be configured through GreptimeDB Operator, and these items are exposed by default in `values.yaml`. + +The following default configurations are managed by GreptimeDB Operator: + +- Logging configuration; +- Datanode's Node ID; +::: + +### Authentication Configuration + +The Helm Chart does not enable User Provider mode authentication by default. You can enable User Provider mode authentication and configure user information through the `auth.enabled` field, as shown below: + +```yaml +auth: + enabled: true + users: + - name: admin + password: "admin" +``` + +### Logging Configuration + +The top-level variable `logging` is used to configure global logging levels, as shown below: + +```yaml +# -- Global logging configuration +logging: + # -- The log level for greptimedb, only support "debug", "info", "warn", "debug" + level: "info" + + # -- The log format for greptimedb, only support "json" and "text" + format: "text" + + # -- The logs directory for greptimedb + logsDir: "/data/greptimedb/logs" + + # -- Whether to log to stdout only + onlyLogToStdout: false + + # -- indicates whether to persist the log with the datanode data storage. It **ONLY** works for the datanode component. + persistentWithData: false + + # -- The log filters, use the syntax of `target[span\{field=value\}]=level` to filter the logs. + filters: [] + + # -- The slow query log configuration. + slowQuery: + # -- Enable slow query log. + enabled: false + + # -- The threshold of slow query log in seconds. + threshold: "10s" + + # -- Sample ratio of slow query log. + sampleRatio: "1.0" +``` + +Where: + +- `logging.level`: Configures the global log level, supports `debug`, `info`, `warn`, `error`. +- `logging.format`: Configures the global log format, supports `json` and `text`. +- `logging.logsDir`: Configures the global log directory, default `/data/greptimedb/logs`. +- `logging.onlyLogToStdout`: Configures whether to output only to stdout, disabled by default. +- `logging.persistentWithData`: Configures whether to persist logs with data storage, only applies to the `datanode` component, disabled by default. +- `logging.filters`: Configures global log filters, supports the syntax `target[span\{field=value\}]=level`. For example, if you want to enable `debug` level logging for certain components: + + ```yaml + logging: + level: "info" + format: "json" + filters: + - mito2=debug + ``` + +You can also enable slow query logging through the `logging.slowQuery` field configuration, as shown below: + +```yaml +logging: + slowQuery: + enabled: true + threshold: "100ms" + sampleRatio: "1.0" +``` + +Where: + +- `logging.slowQuery.enabled`: Configures whether to enable slow query logging, disabled by default. +- `logging.slowQuery.threshold`: Configures the threshold for slow query logging. +- `logging.slowQuery.sampleRatio`: Configures the sampling ratio for slow query logging, default `1.0` (full sampling). + +If the output directory `logging.logsDir` is configured, slow query logs will be output to that directory. + +Each role's logging configuration can be configured through the `${role}.logging` field, with fields consistent with the top-level `logging` and will **override** the top-level `logging` configuration, for example: + +```yaml +frontend: + logging: + level: "debug" +``` + +### Enabling Flownode + +The Helm Chart does not enable Flownode by default. You can enable Flownode through the `flownode.enabled` field, as shown below: + +```yaml +flownode: + enabled: true +``` + +Other fields of `flownode` are configured similarly to other Roles, for example: + +```yaml +flownode: + enabled: false + replicas: 1 + podTemplate: + main: + resources: + requests: + memory: "1Gi" + cpu: "1" + limits: + memory: "2Gi" + cpu: "2" +``` diff --git a/versioned_docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/monitoring/cluster-monitoring-deployment.md b/versioned_docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/monitoring/cluster-monitoring-deployment.md new file mode 100644 index 000000000..f11f69d1b --- /dev/null +++ b/versioned_docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/monitoring/cluster-monitoring-deployment.md @@ -0,0 +1,220 @@ +--- +keywords: [Kubernetes deployment, cluster, monitoring] +description: Guide to deploying monitoring for GreptimeDB clusters on Kubernetes, including self-monitoring and Prometheus monitoring steps. +--- + +# Cluster Monitoring Deployment + +After deploying a GreptimeDB cluster using GreptimeDB Operator, by default, its components (Metasrv / Datanode / Frontend) expose a `/metrics` endpoint on their HTTP port (default `4000`) for Prometheus metrics. + +We provide two approaches to monitor the GreptimeDB cluster: + +1. **Enable GreptimeDB Self-Monitoring**: The GreptimeDB Operator will launch an additional GreptimeDB Standalone instance and Vector Sidecar container to collect and store metrics and logs from the GreptimeDB cluster. +2. **Use Prometheus Operator to Configure Prometheus Metrics Monitoring**: Users need first to deploy Prometheus Operator and create Prometheus instance, then use Prometheus Operator's `PodMonitor` to write GreptimeDB cluster metrics into Prometheus. + +Users can choose the appropriate monitoring approach based on their needs. + +## Enable GreptimeDB Self-Monitoring + +In self-monitoring mode, GreptimeDB Operator will launch an additional GreptimeDB Standalone instance to collect metrics and logs from the GreptimeDB cluster, including cluster logs and slow query logs. To collect log data, GreptimeDB Operator will start a [Vector](https://vector.dev/) sidecar container in each Pod. When this mode is enabled, JSON format logging will be automatically enabled for the cluster. + +If you deploy the GreptimeDB cluster using Helm Chart (refer to [Getting Started](../getting-started.md)), you can configure the `values.yaml` file as follows: + +```yaml +monitoring: + enabled: true +``` + +This will deploy a GreptimeDB Standalone instance named `${cluster}-monitoring` to collect metrics and logs. You can check it with: + +``` +kubectl get greptimedbstandalones.greptime.io ${cluster}-monitoring -n ${namespace} +``` + +By default, this GreptimeDB Standalone instance will store monitoring data using the Kubernetes default StorageClass in local storage. You can adjust this based on your needs. + +The GreptimeDB Standalone instance can be configured via the `monitoring.standalone` field in `values.yaml`, for example: + +```yaml +monitoring: + enabled: true + standalone: + base: + main: + # Configure GreptimeDB Standalone instance image + image: "greptime/greptimedb:latest" + + # Configure GreptimeDB Standalone instance resources + resources: + requests: + cpu: "2" + memory: "4Gi" + limits: + cpu: "2" + memory: "4Gi" + + # Configure object storage for GreptimeDB Standalone instance + objectStorage: + s3: + # Configure bucket + bucket: "monitoring" + # Configure region + region: "ap-southeast-1" + # Configure secret name + secretName: "s3-credentials" + # Configure root path + root: "standalone-with-s3-data" +``` + +The GreptimeDB Standalone instance will expose services using `${cluster}-monitoring-standalone` as the Kubernetes Service name. You can use the following addresses to read monitoring data: + +- **Prometheus metrics**: `http://${cluster}-monitor-standalone.${namespace}.svc.cluster.local:4000/v1/prometheus` +- **SQL logs**: `${cluster}-monitor-standalone.${namespace}.svc.cluster.local:4002`. By default, cluster logs are stored in `public._gt_logs` table and slow query logs are stored in `public._gt_slow_queries` table. + +The Vector sidecar configuration for log collection can be customized via the `monitoring.vector` field: + +```yaml +monitoring: + enabled: true + vector: + # Configure Vector image registry + registry: docker.io + # Configure Vector image repository + repository: timberio/vector + # Configure Vector image tag + tag: nightly-alpine + + # Configure Vector resources + resources: + requests: + cpu: "50m" + memory: "64Mi" + limits: + cpu: "50m" + memory: "64Mi" +``` + +:::note +If you're not using Helm Chart, you can manually configure self-monitoring mode in the `GreptimeDBCluster` YAML: + +```yaml +apiVersion: greptime.io/v1alpha1 +kind: GreptimeDBCluster +metadata: + name: basic +spec: + base: + main: + image: greptime/greptimedb:latest + frontend: + replicas: 1 + meta: + replicas: 1 + etcdEndpoints: + - "etcd.etcd-cluster.svc.cluster.local:2379" + datanode: + replicas: 1 + monitoring: + enabled: true +``` + +The `monitoring` field configures self-monitoring mode. See [`GreptimeDBCluster` API docs](https://github.com/GreptimeTeam/greptimedb-operator/blob/main/docs/api-references/docs.md#monitoringspec) for details. +::: + +## Use Prometheus Operator to Configure Prometheus Metrics Monitoring + +Users need to first deploy Prometheus Operator and create Prometheus instance. For example, you can use [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) to deploy the Prometheus stack. You can refer to its [official documentation](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) for more details. + +After deploying Prometheus Operator and instances, you can configure Prometheus monitoring via the `prometheusMonitor` field in `values.yaml`: + +```yaml +prometheusMonitor: + # Enable Prometheus monitoring - this will create PodMonitor resources + enabled: true + # Configure scrape interval + interval: "30s" + # Configure labels + labels: + release: prometheus +``` + +:::note +The `labels` field must match the `matchLabels` field used to create the Prometheus instance, otherwise metrics collection won't work properly. +::: + +After configuring `prometheusMonitor`, GreptimeDB Operator will automatically create `PodMonitor` resources and import metrics into Prometheus. You can check the `PodMonitor` resources with: + +``` +kubectl get podmonitors.monitoring.coreos.com -n ${namespace} +``` + +:::note +If not using Helm Chart, you can manually configure Prometheus monitoring in the `GreptimeDBCluster` YAML: + +```yaml +apiVersion: greptime.io/v1alpha1 +kind: GreptimeDBCluster +metadata: + name: basic +spec: + base: + main: + image: greptime/greptimedb:latest + frontend: + replicas: 1 + meta: + replicas: 1 + etcdEndpoints: + - "etcd.etcd-cluster.svc.cluster.local:2379" + datanode: + replicas: 1 + prometheusMonitor: + enabled: true + interval: "30s" + labels: + release: prometheus +``` + +The `prometheusMonitor` field configures Prometheus monitoring. +::: + +## Import Grafana Dashboards + +GreptimeDB cluster currently provides 3 Grafana dashboards: + +- [Cluster Metrics Dashboard](https://github.com/GreptimeTeam/greptimedb/blob/main/grafana/greptimedb-cluster.json) +- [Cluster Logs Dashboard](https://github.com/GreptimeTeam/helm-charts/blob/main/charts/greptimedb-cluster/dashboards/greptimedb-cluster-logs.json) +- [Slow Query Logs Dashboard](https://github.com/GreptimeTeam/helm-charts/blob/main/charts/greptimedb-cluster/dashboards/greptimedb-cluster-slow-queries.json) + +:::note +The Cluster Logs Dashboard and Slow Query Logs Dashboard are only for self-monitoring mode, while the Cluster Metrics Dashboard works for both self-monitoring and Prometheus monitoring modes. +::: + +If using Helm Chart, you can enable `grafana.enabled` to deploy Grafana and import dashboards automatically (see [Getting Started](../getting-started.md)): + +```yaml +grafana: + enabled: true +``` + +If you already have Grafana deployed, follow these steps to import the dashboards: + +1. **Add Data Sources** + + You can refer to Grafana's [datasources](https://grafana.com/docs/grafana/latest/datasources/) docs to add the following 3 data sources: + + - **`metrics` data source** + + For importing Prometheus metrics, works with both monitoring modes. For self-monitoring mode, use `http://${cluster}-monitor-standalone.${namespace}.svc.cluster.local:4000/v1/prometheus` as the URL. For your own Prometheus instance, use your Prometheus instance URL. + + - **`information-schema` data source** + + For importing cluster metadata via SQL, works with both monitoring modes. Use `${cluster}-frontend.${namespace}.svc.cluster.local:4002` as the SQL address with database `information_schema`. + + - **`logs` data source** + + For importing cluster and slow query logs via SQL, **only works with self-monitoring mode**. Use `${cluster}-monitor-standalone.${namespace}.svc.cluster.local:4002` as the SQL address with database `public`. + +2. **Import Dashboards** + + You can refer to Grafana's [Import dashboards](https://grafana.com/docs/grafana/latest/dashboards/build-dashboards/import-dashboards/) docs. diff --git a/versioned_docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/overview.md b/versioned_docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/overview.md index e72476aee..bbbe93727 100644 --- a/versioned_docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/overview.md +++ b/versioned_docs/version-0.11/user-guide/deployments/deploy-on-kubernetes/overview.md @@ -5,17 +5,24 @@ description: Overview of deploying GreptimeDB on Kubernetes using the GreptimeDB # Overview -## GreptimeDB Operator +## GreptimeDB on Kubernetes -The [GreptimeDB Operator](https://github.com/GrepTimeTeam/greptimedb-operator) uses the [Operator pattern](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) to manage GreptimeDB on Kubernetes, automating the setup, provisioning, and management of GreptimeDB cluster and standalone instances. - This makes it easy to quickly deploy and scale GreptimeDB in any Kubernetes environment, whether on-premises or in the cloud. +GreptimeDB is a time-series database built for cloud-native environments and can be deployed on Kubernetes since day one. We provide a [GreptimeDB Operator](https://github.com/GrepTimeTeam/greptimedb-operator) to manage GreptimeDB on Kubernetes, automating the setup, provisioning, and management of GreptimeDB cluster and standalone instances. This makes it easy to quickly deploy and scale GreptimeDB in any Kubernetes environment, whether on-premises or in the cloud. We **highly recommend** using the GreptimeDB Operator to deploy GreptimeDB on Kubernetes. -## Manage GreptimeDB with the GreptimeDB Operator +## Getting Started You can take [Getting Started](./getting-started.md) as your first guide to understand the whole picture. This guide provides the complete process of deploying the GreptimeDB cluster on Kubernetes. -After getting started, you can refer to the following documents for more details about the production deployment. +## GreptimeDB Operator - [GreptimeDB Operator Management](./greptimedb-operator-management.md) + +## Monitoring + +- [Cluster Monitoring Deployment](./monitoring/cluster-monitoring-deployment.md) + +## Configurations + +- [Common Helm Chart Configurations](./common-helm-chart-configurations.md) diff --git a/versioned_docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/go.md b/versioned_docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/go.md index 5e3f3d142..8e7589dd0 100644 --- a/versioned_docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/go.md +++ b/versioned_docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/go.md @@ -35,6 +35,7 @@ Import the library in your code: ```go import ( greptime "github.com/GreptimeTeam/greptimedb-ingester-go" + ingesterContext "github.com/GreptimeTeam/greptimedb-ingester-go/context" "github.com/GreptimeTeam/greptimedb-ingester-go/table" "github.com/GreptimeTeam/greptimedb-ingester-go/table/types" ) @@ -60,6 +61,31 @@ cli, _ := greptime.NewClient(cfg) ```
+
+ +You can set table options using the `ingesterContext` context. +For example, to set the `ttl` option, use the following code: + +```go +hints := []*ingesterContext.Hint{ + { + Key: "ttl", + Value: "3d", + }, +} + +ctx, cancel := context.WithTimeout(context.Background(), time.Second*3) +ctx = ingesterContext.New(ctx, ingesterContext.WithHints(hints)) +// Use the ingesterContext when writing data to GreptimeDB. +// The `data` object is described in the following sections. +resp, err := c.client.Write(ctx, data) +if err != nil { + return err +} +``` + +
+
```go @@ -123,7 +149,7 @@ if err != nil {
```go -resp, err := cli.Write(context.Background(), cpuMetric, memMetric) +resp, err := cli.Write(ctx, cpuMetric, memMetric) if err != nil { // Handle error appropriately } @@ -135,7 +161,7 @@ log.Printf("affected rows: %d\n", resp.GetAffectedRows().GetValue())
```go -err := cli.StreamWrite(context.Background(), cpuMetric, memMetric) +err := cli.StreamWrite(ctx, cpuMetric, memMetric) if err != nil { // Handle error appropriately } @@ -201,7 +227,7 @@ memMetrics := []MemMetric{
```go -resp, err := cli.WriteObject(context.Background(), cpuMetrics) +resp, err := cli.WriteObject(ctx, cpuMetrics) log.Printf("affected rows: %d\n", resp.GetAffectedRows().GetValue()) ``` @@ -210,7 +236,7 @@ log.Printf("affected rows: %d\n", resp.GetAffectedRows().GetValue())
```go -err := cli.StreamWriteObject(context.Background(), cpuMetrics) +err := cli.StreamWriteObject(ctx, cpuMetrics) ``` Close the stream writing after all data has been written. diff --git a/versioned_docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/java.md b/versioned_docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/java.md index fc515dc3f..bb58a6119 100644 --- a/versioned_docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/java.md +++ b/versioned_docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/java.md @@ -83,6 +83,20 @@ For customizing the connection options, please refer to [API Documentation](#ing
+
+ +You can set table options using the `Context`. +For example, to set the `ttl` option, use the following code: + +```java +Context ctx = Context.newDefault(); +ctx.withHint("ttl", "3d"); +// Use the ctx when writing data to GreptimeDB +// The data object `cpuMetric` and `memMetric` are described in the following sections +CompletableFuture> future = greptimeDB.write(Arrays.asList(cpuMetric, memMetric), WriteOp.Insert, ctx); +``` + +
diff --git a/versioned_docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/template.md b/versioned_docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/template.md index d4fcb8c53..e10635a5c 100644 --- a/versioned_docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/template.md +++ b/versioned_docs/version-0.11/user-guide/ingest-data/for-iot/grpc-sdks/template.md @@ -27,6 +27,19 @@ The following example shows how to set the username and password when using the Each row item in a table consists of three types of columns: `Tag`, `Timestamp`, and `Field`. For more information, see [Data Model](/user-guide/concepts/data-model.md). The types of column values could be `String`, `Float`, `Int`, `Timestamp`, `JSON` etc. For more information, see [Data Types](/reference/sql/data-types.md). +## Set table options + +Although the time series table is created automatically when writing data to GreptimeDB via the SDK, +you can still configure table options. +The SDK supports the following table options: + +- `auto_create_table`: Default is `True`. If set to `False`, it indicates that the table already exists and does not need automatic creation, which can improve write performance. +- `ttl`, `append_mode`, `merge_mode`: For more details, refer to the [table options](/reference/sql/create.md#table-options). + + + +For how to write data to GreptimeDB, see the following sections. + ## Low-level API The GreptimeDB low-level API provides a straightforward method to write data to GreptimeDB diff --git a/versioned_docs/version-0.11/user-guide/ingest-data/for-iot/kafka.md b/versioned_docs/version-0.11/user-guide/ingest-data/for-iot/kafka.md new file mode 100644 index 000000000..c7200e76f --- /dev/null +++ b/versioned_docs/version-0.11/user-guide/ingest-data/for-iot/kafka.md @@ -0,0 +1,8 @@ +--- +keywords: [Kafka, Data Ingestion] +description: Write data from Kafka to GreptimeDB. +--- + +# Kafka + +Please refer to the [Kafka documentation](/user-guide/ingest-data/for-observerbility/kafka.md) for instructions on how to ingest data from Kafka into GreptimeDB. diff --git a/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/alloy.md b/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/alloy.md new file mode 100644 index 000000000..78ff91868 --- /dev/null +++ b/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/alloy.md @@ -0,0 +1,120 @@ +--- +keywords: [Grafana Alloy, Prometheus Remote Write, OpenTelemetry, data pipeline] +description: Instructions on integrating GreptimeDB with Grafana Alloy for Prometheus Remote Write and OpenTelemetry. +--- + +# Grafana Alloy + +[Grafana Alloy](https://grafana.com/docs/alloy/latest/) is an observability data pipeline for OpenTelemetry (OTel), Prometheus, Pyroscope, Loki, and many other metrics, logs, traces, and profiling tools. +You can integrate GreptimeDB as a data sink for Alloy. + +## Prometheus Remote Write + +Configure GreptimeDB as remote write target: + +```hcl +prometheus.remote_write "greptimedb" { + endpoint { + url = "${GREPTIME_SCHEME:=http}://${GREPTIME_HOST:=greptimedb}:${GREPTIME_PORT:=4000}/v1/prometheus/write?db=${GREPTIME_DB:=public}" + + basic_auth { + username = "${GREPTIME_USERNAME}" + password = "${GREPTIME_PASSWORD}" + } + } +} +``` + +- `GREPTIME_HOST`: GreptimeDB host address, e.g., `localhost`. +- `GREPTIME_DB`: GreptimeDB database name, default is `public`. +- `GREPTIME_USERNAME` and `GREPTIME_PASSWORD`: The [authentication credentials](/user-guide/deployments/authentication/static.md) for GreptimeDB. + +For details on the data model transformation from Prometheus to GreptimeDB, refer to the [Data Model](/user-guide/ingest-data/for-observerbility/prometheus.md#data-model) section in the Prometheus Remote Write guide. + +## OpenTelemetry + +GreptimeDB can also be configured as OpenTelemetry collector. + +### Metrics + +```hcl +otelcol.exporter.otlphttp "greptimedb" { + client { + endpoint = "${GREPTIME_SCHEME:=http}://${GREPTIME_HOST:=greptimedb}:${GREPTIME_PORT:=4000}/v1/otlp/" + headers = { + "X-Greptime-DB-Name" = "${GREPTIME_DB:=public}", + } + auth = otelcol.auth.basic.credentials.handler + } +} + +otelcol.auth.basic "credentials" { + username = "${GREPTIME_USERNAME}" + password = "${GREPTIME_PASSWORD}" +} +``` + +- `GREPTIME_HOST`: GreptimeDB host address, e.g., `localhost`. +- `GREPTIME_DB`: GreptimeDB database name, default is `public`. +- `GREPTIME_USERNAME` and `GREPTIME_PASSWORD`: The [authentication credentials](/user-guide/deployments/authentication/static.md) for GreptimeDB. + +For details on the metrics data model transformation from OpenTelemetry to GreptimeDB, refer to the [Data Model](/user-guide/ingest-data/for-observerbility/opentelemetry.md#data-model) section in the OpenTelemetry guide. + +### Logs + +The following example setting up a logging pipeline using Loki and OpenTelemetry Collector (otelcol) to forward logs to a GreptimeDB: + +```hcl +loki.source.file "greptime" { + targets = [ + {__path__ = "/tmp/foo.txt"}, + ] + forward_to = [otelcol.receiver.loki.greptime.receiver] +} + +otelcol.receiver.loki "greptime" { + output { + logs = [otelcol.exporter.otlphttp.greptimedb_logs.input] + } +} + +otelcol.auth.basic "credentials" { + username = "${GREPTIME_USERNAME}" + password = "${GREPTIME_PASSWORD}" +} + +otelcol.exporter.otlphttp "greptimedb_logs" { + client { + endpoint = "${GREPTIME_SCHEME:=http}://${GREPTIME_HOST:=greptimedb}:${GREPTIME_PORT:=4000}/v1/otlp/" + headers = { + "X-Greptime-DB-Name" = "${GREPTIME_DB:=public}", + "X-Greptime-Log-Table-Name" = "demo_logs", + "X-Greptime-Gog-Extract-Keys" = "filename,log.file.name,loki.attribute.labels", + } + auth = otelcol.auth.basic.credentials.handler + } +} +``` + +- Loki Source Configuration + - The `loki.source.file "greptime"` block defines a source for Loki to read logs from a file located at `/tmp/foo.txt` + - The `forward_to` array indicates that the logs read from this file should be forwarded to the `otelcol.receiver.loki.greptime.receiver` +- OpenTelemetry Collector Receiver Configuration: + - The `otelcol.receiver.loki "greptime"` block sets up a receiver within the OpenTelemetry Collector to receive logs from Loki. + - The `output` section specifies that the received logs should be forwarded to the `otelcol.exporter.otlphttp.greptimedb_logs.input`. +- OpenTelemetry Collector Exporter Configuration: + - The `otelcol.exporter.otlphttp "greptimedb_logs"` block configures an HTTP exporter to send logs to GreptimeDB. + - `GREPTIME_HOST`: GreptimeDB host address, e.g., `localhost`. + - `GREPTIME_DB`: GreptimeDB database name, default is `public`. + - `GREPTIME_USERNAME` and `GREPTIME_PASSWORD`: The [authentication credentials](/user-guide/deployments/authentication/static.md) for GreptimeDB. + - `LOG_TABLE_NAME`: The name of the table to store logs, default table name is `opentelemetry_logs`. + - `EXTRACT_KEYS`: The keys to extract from the attributes, separated by commas (`,`), e.g., `filename,log.file.name,loki.attribute.labels`, see [HTTP API documentation](opentelemetry.md#otlphttp-api-1) for details. + +For details on the log data model transformation from OpenTelemetry to GreptimeDB, refer to the [Data Model](/user-guide/ingest-data/for-observerbility/opentelemetry.md#data-model-1) section in the OpenTelemetry guide. + +:::tip NOTE +The example codes above may be outdated according to OpenTelemetry. We recommend that you refer to the official OpenTelemetry documentation And Grafana Alloy for the most up-to-date information. +::: + +For more information on the example code, please refer to the official documentation for your preferred programming language. + diff --git a/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/kafka.md b/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/kafka.md new file mode 100644 index 000000000..5d5b9c61b --- /dev/null +++ b/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/kafka.md @@ -0,0 +1,172 @@ +--- +keywords: [Kafka, data ingestion, observability, metrics, logs, JSON logs, text logs, Vector, InfluxDB line protocol] +description: Learn how to ingest observability data from Kafka into GreptimeDB using Vector. This guide covers metrics and logs ingestion, including JSON and text log formats, with detailed configuration examples. +--- + +# Kafka + +If you are using Kafka or Kafka-compatible message queue for observability data +transporting, it's possible to ingest data into GreptimeDB directly. + +Here we are using Vector as the tool to transport data from Kafka to GreptimeDB. + +## Metrics + +When ingesting metrics from Kafka into GreptimeDB, messages should be formatted in InfluxDB line protocol. For example: + +```txt +census,location=klamath,scientist=anderson bees=23 1566086400000000000 +``` + +Then configure Vector to use the `influxdb` decoding codec to process these messages. + +```toml +[sources.metrics_mq] +# Specifies that the source type is Kafka +type = "kafka" +# The consumer group ID for Kafka +group_id = "vector0" +# The list of Kafka topics to consume messages from +topics = ["test_metric_topic"] +# The address of the Kafka broker to connect to +bootstrap_servers = "kafka:9092" +# The `influxdb` means the messages are expected to be in InfluxDB line protocol format. +decoding.codec = "influxdb" + +[sinks.metrics_in] +inputs = ["metrics_mq"] +# Specifies that the sink type is `greptimedb_metrics` +type = "greptimedb_metrics" +# The endpoint of the GreptimeDB server. +# Replace with the actual hostname or IP address. +endpoint = ":4001" +dbname = "" +username = "" +password = "" +tls = {} +``` + +For details on how InfluxDB line protocol metrics are mapped to GreptimeDB data, please refer to the [Data Model](/user-guide/ingest-data/for-iot/influxdb-line-protocol.md#data-model) section in the InfluxDB line protocol documentation. + + +## Logs + +Developers commonly work with two types of logs: JSON logs and plain text logs. +Consider the following examples sent from Kafka. + +A plain text log: + +```txt +127.0.0.1 - - [25/May/2024:20:16:37 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" +``` + +Or a JSON log: + +```json +{ + "timestamp": "2024-12-23T10:00:00Z", + "level": "INFO", + "message": "Service started" +} +``` + +GreptimeDB transforms these logs into structured data with multiple columns and automatically creates the necessary tables. +A pipeline processes the logs into structured data before ingestion into GreptimeDB. Different log formats require different [Pipelines](/user-guide/logs/quick-start.md#write-logs-by-pipeline) for parsing. See the following sections for details. + +### Logs with JSON format + +For logs in JSON format (e.g., `{"timestamp": "2024-12-23T10:00:00Z", "level": "INFO", "message": "Service started"}`), +you can use the built-in [`greptime_identity`](/user-guide/logs/manage-pipelines.md#greptime_identity) pipeline for direct ingestion. +This pipeline creates columns automatically based on the fields in your JSON log message. + +Simply configure Vector's `transforms` settings to parse the JSON message and use the `greptime_identity` pipeline as shown in the following example: + +```toml +[sources.logs_in] +type = "kafka" +# The consumer group ID for Kafka +group_id = "vector0" +# The list of Kafka topics to consume messages from +topics = ["test_log_topic"] +# The address of the Kafka broker to connect to +bootstrap_servers = "kafka:9092" + +# transform the log to JSON format +[transforms.logs_json] +type = "remap" +inputs = ["logs_in"] +source = ''' +. = parse_json!(.message) +''' + +[sinks.logs_out] +# Specifies that this sink will receive data from the `logs_json` source +inputs = ["logs_json"] +# Specifies that the sink type is `greptimedb_logs` +type = "greptimedb_logs" +# The endpoint of the GreptimeDB server +endpoint = "http://:4000" +compression = "gzip" +# Replace , , and with the actual values +dbname = "" +username = "" +password = "" +# The table name in GreptimeDB, if it doesn't exist, it will be created automatically +table = "demo_logs" +# Use the built-in `greptime_identity` pipeline +pipeline_name = "greptime_identity" +``` + +### Logs with text format + +For logs in text format, such as the access log format below, you'll need to create a custom pipeline to parse them: + +``` +127.0.0.1 - - [25/May/2024:20:16:37 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" +``` + +#### Create a pipeline + +To create a custom pipeline, +please refer to the [Create Pipeline](/user-guide/logs/quick-start.md#create-a-pipeline) +and [Pipeline Configuration](/user-guide/logs/pipeline-config.md) documentation for detailed instructions. + +#### Ingest data + +After creating the pipeline, configure it to the `pipeline_name` field in the Vector configuration file. + +```toml +# sample.toml +[sources.log_mq] +# Specifies that the source type is Kafka +type = "kafka" +# The consumer group ID for Kafka +group_id = "vector0" +# The list of Kafka topics to consume messages from +topics = ["test_log_topic"] +# The address of the Kafka broker to connect to +bootstrap_servers = "kafka:9092" + +[sinks.sink_greptime_logs] +# Specifies that the sink type is `greptimedb_logs` +type = "greptimedb_logs" +# Specifies that this sink will receive data from the `log_mq` source +inputs = [ "log_mq" ] +# Use `gzip` compression to save bandwidth +compression = "gzip" +# The endpoint of the GreptimeDB server +# Replace with the actual hostname or IP address +endpoint = "http://:4000" +dbname = "" +username = "" +password = "" +# The table name in GreptimeDB, if it doesn't exist, it will be created automatically +table = "demo_logs" +# The custom pipeline name that you created +pipeline_name = "your_custom_pipeline" +``` + +## Demo + +For a runnable demo of data transformation and ingestion, please refer to the [Kafka Ingestion Demo](https://github.com/GreptimeTeam/demo-scene/tree/main/kafka-ingestion). + diff --git a/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/opentelemetry.md b/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/opentelemetry.md index eeb48a18b..9ff771e85 100644 --- a/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/opentelemetry.md +++ b/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/opentelemetry.md @@ -7,15 +7,33 @@ description: Instructions for integrating OpenTelemetry with GreptimeDB, includi [OpenTelemetry](https://opentelemetry.io/) is a vendor-neutral open-source observability framework for instrumenting, generating, collecting, and exporting telemetry data such as traces, metrics, logs. The OpenTelemetry Protocol (OTLP) defines the encoding, transport, and delivery mechanism of telemetry data between telemetry sources, intermediate processes such as collectors and telemetry backends. +## OpenTelemetry Collectors + +You can easily configure GreptimeDB as the target for your OpenTelemetry collector. +For more information, please refer to the [Grafana Alloy](alloy.md) example. + ## Metrics -### OTLP/HTTP +GreptimeDB is an observability backend to consume OpenTelemetry Metrics natively via [OTLP/HTTP](https://opentelemetry.io/docs/specs/otlp/#otlphttp) protocol. + +### OTLP/HTTP API + +To send OpenTelemetry Metrics to GreptimeDB through OpenTelemetry SDK libraries, use the following information: -import Includeotlpmetrycsintegration from '../../../db-cloud-shared/clients/otlp-metrics-integration.md' +* URL: `http{s}:///v1/otlp/v1/metrics` +* Headers: + * `X-Greptime-DB-Name`: `` + * `Authorization`: `Basic` authentication, which is a Base64 encoded string of `:`. For more information, please refer to [Authentication](https://docs.greptime.com/user-guide/deployments/authentication/static/) and [HTTP API](https://docs.greptime.com/user-guide/protocols/http#authentication) + +The request uses binary protobuf to encode the payload, so you need to use packages that support `HTTP/protobuf`. For example, in Node.js, you can use [`exporter-trace-otlp-proto`](https://www.npmjs.com/package/@opentelemetry/exporter-trace-otlp-proto); in Go, you can use [`go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp`](https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp); in Java, you can use [`io.opentelemetry:opentelemetry-exporter-otlp`](https://mvnrepository.com/artifact/io.opentelemetry/opentelemetry-exporter-otlp); and in Python, you can use [`opentelemetry-exporter-otlp-proto-http`](https://pypi.org/project/opentelemetry-exporter-otlp-proto-http/). + +:::tip NOTE +The package names may change according to OpenTelemetry, so we recommend that you refer to the official OpenTelemetry documentation for the most up-to-date information. +::: - +For more information about the OpenTelemetry SDK, please refer to the official documentation for your preferred programming language. -#### Example Code +### Example Code Here are some example codes about how to setup the request in different languages: @@ -95,7 +113,7 @@ The example codes above may be outdated according to OpenTelemetry. We recommend For more information on the example code, please refer to the official documentation for your preferred programming language. -#### Data Model +### Data Model The OTLP metrics data model is mapped to the GreptimeDB data model according to the following rules: @@ -108,57 +126,34 @@ The OTLP metrics data model is mapped to the GreptimeDB data model according to ## Logs -### OTLP/HTTP - -import Includeotlplogintegration from '../../../db-cloud-shared/clients/otlp-logs-integration.md' - - - -#### Example Code - -Here are some example codes about how to use Grafana Alloy to send OpenTelemetry logs to GreptimeDB: - -```hcl -loki.source.file "greptime" { - targets = [ - {__path__ = "/tmp/foo.txt"}, - ] - forward_to = [otelcol.receiver.loki.greptime.receiver] -} - -otelcol.receiver.loki "greptime" { - output { - logs = [otelcol.exporter.otlphttp.greptimedb_logs.input] - } -} - -otelcol.auth.basic "credentials" { - username = "${GREPTIME_USERNAME}" - password = "${GREPTIME_PASSWORD}" -} - -otelcol.exporter.otlphttp "greptimedb_logs" { - client { - endpoint = "${GREPTIME_SCHEME:=http}://${GREPTIME_HOST:=greptimedb}:${GREPTIME_PORT:=4000}/v1/otlp/" - headers = { - "X-Greptime-DB-Name" = "${GREPTIME_DB:=public}", - "X-Greptime-Log-Table-Name" = "demo_logs", - "X-Greptime-Gog-Extract-Keys" = "filename,log.file.name,loki.attribute.labels", - } - auth = otelcol.auth.basic.credentials.handler - } -} -``` +GreptimeDB consumes OpenTelemetry Logs natively via [OTLP/HTTP](https://opentelemetry.io/docs/specs/otlp/#otlphttp) protocol. + +### OTLP/HTTP API API + +To send OpenTelemetry Logs to GreptimeDB through OpenTelemetry SDK libraries, use the following information: + +* URL: `http{s}:///v1/otlp/v1/logs` +* Headers: + * `X-Greptime-DB-Name`: `` + * `Authorization`: `Basic` authentication, which is a Base64 encoded string of `:`. For more information, please refer to [Authentication](/user-guide/deployments/authentication/static.md) and [HTTP API](/user-guide/protocols/http.md#authentication). + * `X-Greptime-Log-Table-Name`: `` (optional) - The table name to store the logs. If not provided, the default table name is `opentelemetry_logs`. + * `X-Greptime-Log-Extract-Keys`: `` (optional) - The keys to extract from the attributes. The keys should be separated by commas (`,`). For example, `key1,key2,key3` will extract the keys `key1`, `key2`, and `key3` from the attributes and promote them to the top level of the log, setting them as tags. If the field type is array, float, or object, an error will be returned. If a pipeline is provided, this setting will be ignored. + * `X-Greptime-Log-Pipeline-Name`: `` (optional) - The pipeline name to process the logs. If not provided, the extract keys will be used to process the logs. + * `X-Greptime-Log-Pipeline-Version`: `` (optional) - The pipeline version to process the logs. If not provided, the latest version of the pipeline will be used. -This example listens for changes to the file and sends the latest values to GreptimeDB via the otlp protocol. +The request uses binary protobuf to encode the payload, so you need to use packages that support `HTTP/protobuf`. :::tip NOTE -The example codes above may be outdated according to OpenTelemetry. We recommend that you refer to the official OpenTelemetry documentation And Grafana Alloy for the most up-to-date information. +The package names may change according to OpenTelemetry, so we recommend that you refer to the official OpenTelemetry documentation for the most up-to-date information. ::: -For more information on the example code, please refer to the official documentation for your preferred programming language. +For more information about the OpenTelemetry SDK, please refer to the official documentation for your preferred programming language. + +### Example Code -#### Data Model +Please refer to the [Alloy documentation](alloy.md#logs) for example code on how to send OpenTelemetry logs to GreptimeDB. + +### Data Model The OTLP logs data model is mapped to the GreptimeDB data model according to the following rules: @@ -188,4 +183,5 @@ Default table schema: - You can use `X-Greptime-Log-Table-Name` to specify the table name for storing the logs. If not provided, the default table name is `opentelemetry_logs`. - All attributes, including resource attributes, scope attributes, and log attributes, will be stored as a JSON column in the GreptimeDB table. -- The timestamp of the log will be used as the timestamp index in GreptimeDB, with the column name `timestamp`. It is preferred to use `time_unix_nano` as the timestamp column. If `time_unix_nano` is not provided, `observed_time_unix_nano` will be used instead. \ No newline at end of file +- The timestamp of the log will be used as the timestamp index in GreptimeDB, with the column name `timestamp`. It is preferred to use `time_unix_nano` as the timestamp column. If `time_unix_nano` is not provided, `observed_time_unix_nano` will be used instead. + diff --git a/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/prometheus.md b/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/prometheus.md index e8c780ef4..81b67d2c0 100644 --- a/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/prometheus.md +++ b/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/prometheus.md @@ -10,6 +10,8 @@ providing a seamless integration experience. ## Remote write configuration +### Prometheus configuration file + To configure Prometheus with GreptimeDB, update your [Prometheus configuration file](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#configuration-file) (`prometheus.yml`) as follows: @@ -33,6 +35,10 @@ remote_read: - The `db` parameter in the URL represents the database to which we want to write data. It is optional. By default, the database is set to `public`. - `basic_auth` is the authentication configuration. Fill in the username and password if GreptimeDB authentication is enabled. Please refer to the [authentication document](/user-guide/deployments/authentication/overview.md). +### Grafana Alloy configuration file + +If you are using Grafana Alloy, configure the remote write endpoint in the Alloy configuration file (`config.alloy`). For more information, refer to the [Alloy documentation](alloy.md#prometheus-remote-write). + ## Data Model In the [data model](/user-guide/concepts/data-model.md) of GreptimeDB, data is organized into tables with columns for tags, time index, and fields. diff --git a/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/vector.md b/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/vector.md index d965fdd18..566753c6d 100644 --- a/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/vector.md +++ b/versioned_docs/version-0.11/user-guide/ingest-data/for-observerbility/vector.md @@ -3,16 +3,17 @@ keywords: [Vector, integration, configuration, data model, metrics] description: Instructions for integrating Vector with GreptimeDB, including configuration, data model mapping, and example configurations. --- -import DocTemplate from '../../../db-cloud-shared/clients/vector-integration.md' - - # Vector - +Vector is [a high performance observability data +pipeline](https://vector.dev). It has native support for GreptimeDB metrics data +sink. With vector, you can ingest metrics data from various sources, including +Prometheus, OpenTelemetry, StatsD and many more. +GreptimeDB can be used as a Vector Sink component to receive metrics. -
+## Collect host metrics -## Integration +### Configuration A minimal configuration of when using your GreptimeDB instance can be: @@ -24,7 +25,7 @@ type = "host_metrics" [sinks.my_sink_id] inputs = ["in"] -type = "greptimedb" +type = "greptimedb_metrics" endpoint = ":4001" dbname = "" username = "" @@ -35,11 +36,16 @@ new_naming = true GreptimeDB uses gRPC to communicate with Vector, so the default port for the Vector sink is `4001`. If you have changed the default gRPC port when starting GreptimeDB with [custom configurations](/user-guide/deployments/configuration.md#configuration-file), use your own port instead. -
+Execute Vector with: + +``` +vector -c sample.toml +``` -
+For more configuration options, see [Vector GreptimeDB +Configuration](https://vector.dev/docs/reference/configuration/sinks/greptimedb_metrics/). -## Data Model +### Data Model The following rules are used when storing Vector metrics into GreptimeDB: @@ -54,6 +60,11 @@ The following rules are used when storing Vector metrics into GreptimeDB: - For AggregatedSummary metrics, the values of each percentile are stored in the `pxx` column, where xx is the percentile, and the `sum/count` columns are also stored; - For Sketch metrics, the values of each percentile are stored in the `pxx` column, where xx is the percentile, and the `min/max/avg/sum` columns are also stored; -
+## Collect metrics with InfluxDB line protocol format + +Vector can collect metrics in the InfluxDB line protocol format and send them to GreptimeDB. For more information, refer to the [Kafka guide](/user-guide/ingest-data/for-observerbility/kafka.md#metrics). + +## Collect logs + +Vector can also collect logs and send them to GreptimeDB. For more details, refer to the [Kafka guide](/user-guide/ingest-data/for-observerbility/kafka.md#logs). -
diff --git a/versioned_docs/version-0.11/user-guide/integrations/alloy.md b/versioned_docs/version-0.11/user-guide/integrations/alloy.md new file mode 100644 index 000000000..52ea6dc75 --- /dev/null +++ b/versioned_docs/version-0.11/user-guide/integrations/alloy.md @@ -0,0 +1,10 @@ +--- +keywords: [Alloy, Grafana Alloy, GreptimeDB, Ingest Data] +description: Integrate GreptimeDB with Grafana Alloy. +--- + +# Grafana Alloy + +GreptimeDB can be set up as a data sink for Grafana Alloy. +For more information, please refer to the [Ingest Data through Grafana Alloy](/user-guide/ingest-data/for-observerbility/alloy.md) guide. + diff --git a/versioned_docs/version-0.11/user-guide/integrations/grafana.md b/versioned_docs/version-0.11/user-guide/integrations/grafana.md index 78bbdeb6e..ea2f4d63b 100644 --- a/versioned_docs/version-0.11/user-guide/integrations/grafana.md +++ b/versioned_docs/version-0.11/user-guide/integrations/grafana.md @@ -29,6 +29,10 @@ directory](https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafa ```shell grafana cli --pluginUrl https://github.com/GreptimeTeam/greptimedb-grafana-datasource/releases/latest/download/info8fcc-greptimedb-datasource.zip plugins install info8fcc ``` +- Use our [prebuilt Grafana docker + image](https://hub.docker.com/r/greptime/grafana-greptimedb), which ships the + plugin by default: `docker run -p 3000:3000 + greptime/grafana-greptimedb:latest` Note that you may need to restart your grafana server after installing the plugin. diff --git a/versioned_docs/version-0.11/user-guide/integrations/kafka.md b/versioned_docs/version-0.11/user-guide/integrations/kafka.md new file mode 100644 index 000000000..67c04764e --- /dev/null +++ b/versioned_docs/version-0.11/user-guide/integrations/kafka.md @@ -0,0 +1,10 @@ +--- +keywords: [Kafka, data ingestion, observability, metrics, logs] +description: Learn how to ingest observability data from Kafka into GreptimeDB using Vector. +--- + +# Kafka + +Vector can be used as a tool to transport data from Kafka to GreptimeDB. +For more information, please refer to the [Ingest Data via Kafka](/user-guide/ingest-data/for-observerbility/kafka.md) guide. + diff --git a/versioned_docs/version-0.11/user-guide/manage-data/data-index.md b/versioned_docs/version-0.11/user-guide/manage-data/data-index.md new file mode 100644 index 000000000..712251b87 --- /dev/null +++ b/versioned_docs/version-0.11/user-guide/manage-data/data-index.md @@ -0,0 +1,105 @@ +--- +keywords: [index, inverted index, skipping index, fulltext index, query performance] +description: Learn about different types of indexes in GreptimeDB, including inverted index, skipping index, and fulltext index, and how to use them effectively to optimize query performance. +--- + +# Data Index + +GreptimeDB provides various indexing mechanisms to accelerate query performance. Indexes are essential database structures that help optimize data retrieval operations by creating efficient lookup paths to specific data. + +## Overview + +Indexes in GreptimeDB are specified during table creation and are designed to improve query performance for different types of data and query patterns. The database currently supports these types of indexes: + +- Inverted Index +- Skipping Index +- Fulltext Index + +Notice that in this chapter we are narrowing the word "index" to those related to data value indexing. PRIMARY KEY and TIME INDEX can also be treated as index in some scenarios, but they are not covered here. + +## Index Types + +### Inverted Index + +An inverted index is particularly useful for tag columns. It creates a mapping between unique tag values and their corresponding rows, enabling fast lookups for specific tag values. + +**Typical Use Cases:** +- Querying data by tag values +- Filtering operations on string columns +- Point queries on tag columns + +Example: +```sql +CREATE TABLE monitoring_data ( + host STRING, + region STRING PRIMARY KEY, + cpu_usage DOUBLE, + timestamp TIMESTAMP TIME INDEX, + INDEX INVERTED_INDEX(host, region) +); +``` + +However, when you have a large number of unique tag values (Cartesian product among all columns indexed by inverted index), the inverted index may not be the best choice due to the overhead of maintaining the index. It may bring high memory consumption and large index size. In this case, you may consider using the skipping index. + +### Skipping Index + +Skipping index suits for columnar data systems like GreptimeDB. It maintains metadata about value ranges within data blocks, allowing the query engine to skip irrelevant data blocks during range queries efficiently. This index also has smaller size compare to others. + +**Use Cases:** +- When certain values are sparse, such as MAC address codes in logs. +- Querying specific values that occur infrequently within large datasets + +Example: +```sql +CREATE TABLE sensor_data ( + domain STRING PRIMARY KEY, + device_id STRING SKIPPING INDEX, + temperature DOUBLE, + timestamp TIMESTAMP TIME INDEX, +); +``` + +Skipping index can't handle complex filter conditions, and usually has a lower filtering performance compared to inverted index or fulltext index. + +### Fulltext Index + +Fulltext index is designed for text search operations on string columns. It enables efficient searching of text content using word-based matching and text search capabilities. You can query text data with flexible keywords, phrases, or pattern matching queries. + +**Use Cases:** +- Text search operations +- Pattern matching queries +- Large text filtering + +Example: +```sql +CREATE TABLE logs ( + message STRING FULLTEXT INDEX, + level STRING PRIMARY KEY, + timestamp TIMESTAMP TIME INDEX, +); +``` + +Fulltext index usually comes with following drawbacks: + +- Higher storage overhead compared to regular indexes due to storing word tokens and positions +- Increased flush and compaction latency as each text document needs to be tokenized and indexed +- May not be optimal for simple prefix or suffix matching operations + +Consider using fulltext index only when you need advanced text search capabilities and flexible query patterns. + +## Best Practices + +1. Choose the appropriate index type based on your data type and query patterns +2. Index only the columns that are frequently used in WHERE clauses +3. Consider the trade-off between query performance, ingest performance and resource consumption +4. Monitor index usage and performance to optimize your indexing strategy continuously + +## Performance Considerations + +While indexes can significantly improve query performance, they come with some overhead: + +- Additional storage space required for index structures +- Impact on flush and compaction performance due to index maintenance +- Memory usage for index caching + +Choose indexes carefully based on your specific use case and performance requirements. diff --git a/versioned_sidebars/version-0.11-sidebars.json b/versioned_sidebars/version-0.11-sidebars.json index 062b85871..ea088983a 100644 --- a/versioned_sidebars/version-0.11-sidebars.json +++ b/versioned_sidebars/version-0.11-sidebars.json @@ -55,7 +55,9 @@ "user-guide/ingest-data/for-observerbility/vector", "user-guide/ingest-data/for-observerbility/opentelemetry", "user-guide/ingest-data/for-observerbility/influxdb-line-protocol", - "user-guide/ingest-data/for-observerbility/loki" + "user-guide/ingest-data/for-observerbility/kafka", + "user-guide/ingest-data/for-observerbility/loki", + "user-guide/ingest-data/for-observerbility/alloy" ] }, { @@ -74,6 +76,7 @@ ] }, "user-guide/ingest-data/for-iot/influxdb-line-protocol", + "user-guide/ingest-data/for-iot/kafka", "user-guide/ingest-data/for-iot/emqx", "user-guide/ingest-data/for-iot/opentsdb" ] @@ -96,7 +99,8 @@ "type": "category", "label": "Manage Data", "items": [ - "user-guide/manage-data/overview" + "user-guide/manage-data/overview", + "user-guide/manage-data/data-index" ] }, { @@ -106,11 +110,13 @@ "user-guide/integrations/overview", "user-guide/integrations/prometheus", "user-guide/integrations/vector", + "user-guide/integrations/kafka", "user-guide/integrations/grafana", "user-guide/integrations/superset", "user-guide/integrations/metabase", "user-guide/integrations/emqx", - "user-guide/integrations/dbeaver" + "user-guide/integrations/dbeaver", + "user-guide/integrations/alloy" ] }, { @@ -178,7 +184,15 @@ "items": [ "user-guide/deployments/deploy-on-kubernetes/overview", "user-guide/deployments/deploy-on-kubernetes/getting-started", - "user-guide/deployments/deploy-on-kubernetes/greptimedb-operator-management" + "user-guide/deployments/deploy-on-kubernetes/greptimedb-operator-management", + "user-guide/deployments/deploy-on-kubernetes/common-helm-chart-configurations", + { + "type": "category", + "label": "Monitoring", + "items": [ + "user-guide/deployments/deploy-on-kubernetes/monitoring/cluster-monitoring-deployment" + ] + } ] }, "user-guide/deployments/configuration", @@ -383,6 +397,7 @@ "items": [ "reference/command-lines", "reference/sql-tools", + "reference/time-durations", { "type": "category", "label": "SQL",