From 45cdb540870357d5e7ef46e3f79a7231e8de8a76 Mon Sep 17 00:00:00 2001 From: David Bernard Date: Sat, 6 Jan 2024 20:05:52 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=97=83=EF=B8=8F=20=20define=20a=20cdevent?= =?UTF-8?q?s=20lake=20table=20to=20store=20incoming=20cdevents=20as=20json?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 174 +++++++++++++++++- ...91b44dfcc1eccb9978b4a51e2c80764252c0e.json | 15 -- ...9390d13fe4488e4acc200a143122505ddefd9.json | 15 ++ cdviz-collector/src/db.rs | 2 +- migrations/20240101182725_init.up.sql | 24 ++- 5 files changed, 204 insertions(+), 26 deletions(-) delete mode 100644 .sqlx/query-098856a9e50709b6723e01b83ac91b44dfcc1eccb9978b4a51e2c80764252c0e.json create mode 100644 .sqlx/query-17752ec6d8d06c32be0e722452a9390d13fe4488e4acc200a143122505ddefd9.json diff --git a/.gitignore b/.gitignore index 4ee2167..394bcf2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ -# Created by https://www.toptal.com/developers/gitignore/api/git,bazel,vim,emacs,visualstudiocode,jetbrains+all,helm,rust -# Edit at https://www.toptal.com/developers/gitignore?templates=git,bazel,vim,emacs,visualstudiocode,jetbrains+all,helm,rust +# Created by https://www.toptal.com/developers/gitignore/api/git,vim,helm,rust,bazel,emacs,jetbrains+all,visualstudiocode,node,go +# Edit at https://www.toptal.com/developers/gitignore?templates=git,vim,helm,rust,bazel,emacs,jetbrains+all,visualstudiocode,node,go ### Bazel ### # gitignore template for Bazel build system @@ -66,6 +66,7 @@ flycheck_*.el # network security /network-security.data + ### Git ### # Created by git for backups. To disable backups in Git: # $ git config --global mergetool.keepBackup false @@ -81,6 +82,29 @@ flycheck_*.el *_LOCAL_*.txt *_REMOTE_*.txt +### Go ### +# If you prefer the allow list template instead of the deny list, see community template: +# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore +# +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ + +# Go workspace file +go.work + ### Helm ### # Chart dependencies **/charts/*.tgz @@ -173,6 +197,146 @@ fabric.properties !.idea/codeStyles !.idea/runConfigurations +### Node ### +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* +.pnpm-debug.log* + +# Diagnostic reports (https://nodejs.org/api/report.html) +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage +*.lcov + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# Snowpack dependency directory (https://snowpack.dev/) +web_modules/ + +# TypeScript cache +*.tsbuildinfo + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Optional stylelint cache +.stylelintcache + +# Microbundle cache +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# parcel-bundler cache (https://parceljs.org/) +.cache +.parcel-cache + +# Next.js build output +.next +out + +# Nuxt.js build / generate output +.nuxt +dist + +# Gatsby files +.cache/ +# Comment in the public line in if your project uses Gatsby and not Next.js +# https://nextjs.org/blog/next-9-1#public-directory-support +# public + +# vuepress build output +.vuepress/dist + +# vuepress v2.x temp and cache directory +.temp + +# Docusaurus cache and generated files +.docusaurus + +# Serverless directories +.serverless/ + +# FuseBox cache +.fusebox/ + +# DynamoDB Local files +.dynamodb/ + +# TernJS port file +.tern-port + +# Stores VSCode versions used for testing VSCode extensions +.vscode-test + +# yarn v2 +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* + +### Node Patch ### +# Serverless Webpack directories +.webpack/ + +# Optional stylelint cache + +# SvelteKit build / generate output +.svelte-kit + ### Rust ### # Generated by Cargo # will have compiled files and executables @@ -192,7 +356,7 @@ Cargo.lock ### Vim ### # Swap [._]*.s[a-v][a-z] -!*.svg # comment out if you don't need vector files +!*.svg # comment out if you don't need vector files [._]*.sw[a-p] [._]s[a-rt-v][a-z] [._]ss[a-gi-z] @@ -228,9 +392,7 @@ tags .history .ionide -# Support for Project snippet scope - -# End of https://www.toptal.com/developers/gitignore/api/git,bazel,vim,emacs,visualstudiocode,jetbrains+all,helm,rust +# End of https://www.toptal.com/developers/gitignore/api/git,vim,helm,rust,bazel,emacs,jetbrains+all,visualstudiocode,node,go # ignore downloaded charts *.tgz diff --git a/.sqlx/query-098856a9e50709b6723e01b83ac91b44dfcc1eccb9978b4a51e2c80764252c0e.json b/.sqlx/query-098856a9e50709b6723e01b83ac91b44dfcc1eccb9978b4a51e2c80764252c0e.json deleted file mode 100644 index aa8dcd3..0000000 --- a/.sqlx/query-098856a9e50709b6723e01b83ac91b44dfcc1eccb9978b4a51e2c80764252c0e.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "\n INSERT INTO events (timestamp, raw)\n VALUES ($1, $2)\n ", - "describe": { - "columns": [], - "parameters": { - "Left": [ - "Timestamptz", - "Jsonb" - ] - }, - "nullable": [] - }, - "hash": "098856a9e50709b6723e01b83ac91b44dfcc1eccb9978b4a51e2c80764252c0e" -} diff --git a/.sqlx/query-17752ec6d8d06c32be0e722452a9390d13fe4488e4acc200a143122505ddefd9.json b/.sqlx/query-17752ec6d8d06c32be0e722452a9390d13fe4488e4acc200a143122505ddefd9.json new file mode 100644 index 0000000..17808f9 --- /dev/null +++ b/.sqlx/query-17752ec6d8d06c32be0e722452a9390d13fe4488e4acc200a143122505ddefd9.json @@ -0,0 +1,15 @@ +{ + "db_name": "PostgreSQL", + "query": "\n INSERT INTO cdevents_lake (timestamp, payload)\n VALUES ($1, $2)\n ", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Timestamptz", + "Jsonb" + ] + }, + "nullable": [] + }, + "hash": "17752ec6d8d06c32be0e722452a9390d13fe4488e4acc200a143122505ddefd9" +} diff --git a/cdviz-collector/src/db.rs b/cdviz-collector/src/db.rs index 947b51e..388cefb 100644 --- a/cdviz-collector/src/db.rs +++ b/cdviz-collector/src/db.rs @@ -61,7 +61,7 @@ pub(crate) async fn store_event(pg_pool: &PgPool, event: Event) -> Result<()> { sqlx::query!( r#" - INSERT INTO events (timestamp, raw) + INSERT INTO cdevents_lake (timestamp, payload) VALUES ($1, $2) "#, event.timestamp, diff --git a/migrations/20240101182725_init.up.sql b/migrations/20240101182725_init.up.sql index 3e63dae..ae259f1 100644 --- a/migrations/20240101182725_init.up.sql +++ b/migrations/20240101182725_init.up.sql @@ -1,6 +1,22 @@ -- Add up migration script here -CREATE TABLE IF NOT EXISTS events ( - id serial PRIMARY KEY, - timestamp timestamptz NOT NULL, - raw jsonb NOT NULL +CREATE TABLE IF NOT EXISTS cdevents_lake ( + timestamp TIMESTAMP WITH TIME ZONE NOT NULL, + payload JSONB NOT NULL ); + +-- TODO switch to brin index when more data (see [Avoiding the Pitfalls of BRIN Indexes in Postgres](https://www.crunchydata.com/blog/avoiding-the-pitfalls-of-brin-indexes-in-postgres)) +CREATE INDEX IF NOT EXISTS cdevents_lake_timestamp_idx ON cdevents_lake (timestamp); + +-- create a view based on fields in the json payload +-- source: [Postgresql json column to view - Database Administrators Stack Exchange](https://dba.stackexchange.com/questions/151838/postgresql-json-column-to-view?newreg=ed0a9389843a45699bfb02559dd32038) +-- DO $$ +-- DECLARE l_keys text; +-- BEGIN +-- drop view if exists YOUR_VIEW_NAME cascade; + +-- select string_agg(distinct format('jerrayel ->> %L as %I',jkey, jkey), ', ') +-- into l_keys +-- from cdevents_lake, jsonb_array_elements(payload) as t(jerrayel), jsonb_object_keys(t.jerrayel) as a(jkey); + +-- execute 'create view cdevents_flatten as select '||l_keys||' from cdevents_lake, jsonb_array_elements(payload) as t(jerrayel)'; +-- END$$;