diff --git a/test/e2e b/test/e2e index 8847ca00..14260227 160000 --- a/test/e2e +++ b/test/e2e @@ -1 +1 @@ -Subproject commit 8847ca00a0deda194008bb2d8dccc02d879267d2 +Subproject commit 142602272531e9cb4548d6d4bbd5ffe0329a9ca4 diff --git a/traceql/clickhouse_transpiler/attr_condition.js b/traceql/clickhouse_transpiler/attr_condition.js index ba1afd2e..406d617d 100644 --- a/traceql/clickhouse_transpiler/attr_condition.js +++ b/traceql/clickhouse_transpiler/attr_condition.js @@ -3,6 +3,7 @@ const Sql = require('@cloki/clickhouse-sql') module.exports = class Builder { constructor () { this.main = null + this.precondition = null this.terms = [] this.conds = null this.aggregatedAttr = '' @@ -51,6 +52,11 @@ module.exports = class Builder { return this } + withPrecondition (precondition) { + this.precondition = precondition + return this + } + /** * @returns {ProcessFn} */ @@ -58,7 +64,10 @@ module.exports = class Builder { const self = this /** @type {BuiltProcessFn} */ const res = (ctx) => { - const sel = this.main(ctx) + const sel = self.main(ctx) + const withPreconditionSel = self.precondition + ? new Sql.With('precond', self.buildPrecondition(ctx)) + : null self.alias = 'bsCond' for (const term of self.terms) { const sqlTerm = self.getTerm(term) @@ -83,37 +92,52 @@ module.exports = class Builder { sel.conditions, Sql.Eq(new Sql.Raw(`cityHash64(trace_id) % ${ctx.randomFilter[0]}`), Sql.val(ctx.randomFilter[1]))) } + if (withPreconditionSel) { + sel.with(withPreconditionSel) + sel.conditions = Sql.And( + sel.conditions, + new Sql.In(new Sql.Raw('(trace_id, span_id)'), 'in', new Sql.WithReference(withPreconditionSel))) + } sel.having(having) return sel } return res } + buildPrecondition (ctx) { + if (!this.precondition) { + return null + } + const sel = this.precondition(ctx) + sel.select_list = sel.select_list.filter(x => Array.isArray(x) && (x[1] === 'trace_id' || x[1] === 'span_id')) + sel.order_expressions = [] + return sel + } + /** * @typedef {{simpleIdx: number, op: string, complex: [Condition]}} Condition */ /** - * @param c {Condition} + * @param c {Token || [any]} */ getCond (c) { - if (c.simpleIdx === -1) { - const subs = [] - for (const s of c.complex) { - subs.push(this.getCond(s)) - } - switch (c.op) { - case '&&': - return Sql.And(...subs) - case '||': - return Sql.Or(...subs) + if (c.name) { + let left = new Sql.Raw(this.alias) + if (!this.isAliased) { + left = groupBitOr(bitSet(this.sqlConditions), this.alias) } - throw new Error(`unsupported condition operator ${c.op}`) + const termIdx = this.terms.findIndex(x => x.value === c.value) + return Sql.Ne(bitAnd(left, new Sql.Raw((BigInt(1) << BigInt(termIdx)).toString())), Sql.val(0)) } - let left = new Sql.Raw(this.alias) - if (!this.isAliased) { - left = groupBitOr(bitSet(this.sqlConditions), this.alias) + const op = c[0] + const subs = c.slice(1).map(x => this.getCond(x)) + switch (op) { + case '&&': + return Sql.And(...subs) + case '||': + return Sql.Or(...subs) } - return Sql.Ne(bitAnd(left, new Sql.Raw((BigInt(1) << BigInt(c.simpleIdx)).toString())), Sql.val(0)) + throw new Error(`unsupported condition operator ${c.op}`) } /** diff --git a/traceql/clickhouse_transpiler/attr_condition_eval.js b/traceql/clickhouse_transpiler/attr_condition_eval.js index 43cf2a02..2512ebc0 100644 --- a/traceql/clickhouse_transpiler/attr_condition_eval.js +++ b/traceql/clickhouse_transpiler/attr_condition_eval.js @@ -1,5 +1,5 @@ const attrCondition = require('./attr_condition') -const {bitSet} = require('./shared') +const { bitSet } = require('./shared') const Sql = require('@cloki/clickhouse-sql') module.exports = class Builder extends attrCondition { build () { @@ -10,7 +10,7 @@ module.exports = class Builder extends attrCondition { const sel = superBuild(ctx) sel.having_conditions = [] sel.aggregations = [bitSet(self.sqlConditions)] - sel.select_list = [[new Sql.Raw('count()'), 'count']] + sel.select_list = [[bitSet(self.sqlConditions), 'cond'], [new Sql.Raw('count()'), 'count']] sel.order_expressions = [] return sel } diff --git a/traceql/clickhouse_transpiler/index.js b/traceql/clickhouse_transpiler/index.js index f34c009f..44b07ea8 100644 --- a/traceql/clickhouse_transpiler/index.js +++ b/traceql/clickhouse_transpiler/index.js @@ -5,33 +5,42 @@ const IndexGroupByPlanner = require('./group_by') const AggregatorPlanner = require('./aggregator') const IndexLimitPlanner = require('./limit') const TracesDataPlanner = require('./traces_data') +const { th } = require('date-fns/locale') /** * @param script {Token} */ module.exports.transpile = (script) => { - return new Planner(script).plan() + return new module.exports.Planner(script).plan() } /** * @param script {Token} */ module.exports.evaluateCmpl = (script) => { - return new Planner(script).planEval() + return new module.exports.Planner(script).planEval() } -class Planner { +module.exports.Planner = class Planner { /** * * @param script {Token} */ constructor (script) { this.script = script - this.termIdx = [] this.cond = null + this.terms = {} + this.termIdx = [] + + this.eval = null + + this.preCond = null + this.preCondTerms = {} + this.precondTermsIdx = [] + this.aggregatedAttr = '' this.cmpVal = '' - this.terms = {} + this.aggFn = '' } @@ -43,7 +52,14 @@ class Planner { .withConditions(this.cond) .withAggregatedAttr(this.aggregatedAttr) .withMain((new InitIndexPlanner()).build()) - .build() + if (this.preCond) { + const preCond = (new AttrConditionPlanner()) + .withTerms(this.precondTermsIdx) + .withConditions(this.preCond) + .withMain((new InitIndexPlanner()).build()) + res = res.withPrecondition(preCond.build()) + } + res = res.build() res = (new IndexGroupByPlanner()).withMain(res).build() if (this.aggFn) { res = (new AggregatorPlanner()) @@ -74,6 +90,35 @@ class Planner { return res } + setEvaluationResult (result) { + this.eval = {} + for (const row of result) { + this.eval[row.cond] = row.count + } + } + + minify () { + const subcost = {} + for (let i = 0; i < this.termIdx.length; i++) { + subcost[this.termIdx[i].value] = Object.entries(this.eval) + .find(x => parseInt(x[0]) === i + 1) + subcost[this.termIdx[i].value] = subcost[this.termIdx[i].value] + ? parseInt(subcost[this.termIdx[i].value][1]) + : 0 + } + if (!this.isDNF(this.cond)) { + return this.estimateCost(this.cond, subcost) + } + this.preCond = this.getSimplePrecondition(this.cond, subcost) + if (this.preCond) { + this.extractTermsIdx(this.preCond, this.precondTermsIdx, this.preCondTerms) + } + + return this.preCond + ? this.estimateCost(this.preCond, subcost) + : this.estimateCost(this.cond, subcost) + } + check () { if (this.script.Children('SYNTAX').length > 1) { throw new Error('more than one selector is not supported') @@ -86,35 +131,160 @@ class Planner { this.analyzeAgg() } + /** + * + * @param token {Token} + * @param tree {{root: any}} + * @param place {{ref: any}} + */ + buildExpressionTree (token, tree, place) { + if (token.name !== 'attr_selector_exp') { + throw new Error('unsupported selector') + } + let leftHand = token.tokens[0] + if (token.tokens[0].name === 'complex_head') { + const newTree = { root: { ref: null } } + this.buildExpressionTree(token.tokens[0].tokens.find(x => x.name === 'attr_selector_exp'), + newTree, + newTree.root + ) + leftHand = newTree.root + } + const tail = token.tokens.find(x => x.name === 'tail') + if (!tail) { + // if we have `a` + place.ref = leftHand + return + } + const andOr = token.tokens.find(x => x.name === 'and_or').value + const newPlace = { ref: null } + switch (andOr) { + case '&&': + place.ref = ['&&', { ref: leftHand }, newPlace] + this.buildExpressionTree(tail.tokens[0], tree, newPlace) + return + case '||': + place.ref = leftHand + tree.root = ['||', { ref: tree.root }, newPlace] + this.buildExpressionTree(tail.tokens[0], tree, newPlace) + } + } + + /** + * + * @param t {{ref: any} | Token | Array} + * @returns {Token | Array} + */ + minimizeTree (t) { + while (t.ref) { + t = t.ref + } + if (!Array.isArray(t)) { + return t + } + for (let i = t.length - 1; i > 0; i--) { + t[i] = this.minimizeTree(t[i]) + if (Array.isArray(t[i]) && t[i][0] === t[0]) { + t.splice(i, 1, ...t[i].slice(1)) + } + } + return t + } + + /** + * @param t {Token | Array} + * @returns {boolean} + */ + isDNF (t) { + if (t.name) { + return true + } + const fn = t[0] + for (let i = 1; i < t.length; i++) { + if (!this.isDNF(t[i])) { + return false + } + if (Array.isArray(t[i]) && fn === '&&' && t[i][0] === '||') { + return false + } + } + return true + } + + /** + * + * @param t {Token | Array} + * @param subcosts {{[key: string]: number}} + * @returns number + */ + estimateCost (t, subcosts) { + if (t.name) { + return subcosts[t.value] + } + const fn = t[0] + const costs = t.slice(1).map(x => this.estimateCost(x, subcosts)) + switch (fn) { + case '&&': + return Math.min(...costs) + case '||': + return costs.reduce((a, b) => a + b) + } + throw new Error('unsupported function') + } + + /** + * + * @param t {Token | Array} + * @param subcosts {{[key: string]: number}} + */ + getSimplePrecondition (t, subcosts) { + if (!this.isDNF(t)) { + return null + } + if (t.name) { + return subcosts[t.value] < 10000000 ? t : null + } + const fn = t[0] + const self = this + const simplify = x => x.length === 2 ? x[1] : x + if (fn === '&&') { + const res = t.slice(1).filter(x => self.estimateCost(x, subcosts) < 10000000) + return res.length > 0 ? simplify(['&&', ...res]) : null + } + if (fn === '||') { + const res = t.slice(1).map(x => self.getSimplePrecondition(x, subcosts)).filter(x => x) + return res.length === t.length - 1 ? simplify(['||', ...res]) : null + } + throw new Error('unsupported function') + } + /** * * @param token {Token} */ analyzeCond (token) { - let res = {} - const complexHead = token.tokens.find(x => x.name === 'complex_head') - const simpleHead = token.tokens.find(x => x.name === 'attr_selector') - if (complexHead) { - res = this.analyzeCond(complexHead.tokens.find(x => x.name === 'attr_selector_exp')) - } else if (simpleHead) { - const term = simpleHead.value - if (this.terms[term]) { - res = { simpleIdx: this.terms[term] - 1, op: '', complex: [] } + const tree = { root: { ref: null } } + this.buildExpressionTree(token, tree, tree.root) + tree.root = this.minimizeTree(tree.root) + this.extractTermsIdx(tree.root, this.termIdx, this.terms) + return tree.root + } + + extractTermsIdx (t, termIdx, terms) { + const self = this + if (t.name) { + if (!terms[t.value]) { + termIdx.push(t) + terms[t.value] = termIdx.length + t.termIdx = termIdx.length - 1 } else { - this.termIdx.push(simpleHead) - this.terms[term] = this.termIdx.length - res = { simpleIdx: this.termIdx.length - 1, op: '', complex: [] } + t.termIdx = terms[t.value] - 1 } + return } - const tail = token.tokens.find(x => x.name === 'tail') - if (tail) { - res = { - simpleIdx: -1, - op: token.tokens.find(x => x.name === 'and_or').value, - complex: [res, this.analyzeCond(tail.tokens.find(x => x.name === 'attr_selector_exp'))] - } + if (Array.isArray(t)) { + t.forEach(x => self.extractTermsIdx(x, termIdx, terms)) } - return res } analyzeAgg () { diff --git a/traceql/clickhouse_transpiler/init.js b/traceql/clickhouse_transpiler/init.js index 5d3cf131..1deec485 100644 --- a/traceql/clickhouse_transpiler/init.js +++ b/traceql/clickhouse_transpiler/init.js @@ -12,6 +12,7 @@ const { standardBuilder } = require('./shared') * tracesDistTable: string, * randomFilter: number[]|undefined, * cachedTraceIds: string[]|undefined, + * planner: Planner * }} Context */ /** diff --git a/traceql/index.js b/traceql/index.js index 37beedf9..bec8b969 100644 --- a/traceql/index.js +++ b/traceql/index.js @@ -1,5 +1,5 @@ const parser = require('./parser') -const { transpile, evaluateCmpl } = require('./clickhouse_transpiler') +const { Planner } = require('./clickhouse_transpiler') const logger = require('../lib/logger') const { DATABASE_NAME } = require('../lib/utils') const { clusterName } = require('../common') @@ -15,6 +15,7 @@ const { rawRequest } = require('../lib/db/clickhouse') */ const search = async (query, limit, from, to) => { const _dbname = DATABASE_NAME() + const scrpit = parser.ParseScript(query) /** @type {Context} */ const ctx = { tracesDistTable: `${_dbname}.tempo_traces_dist`, @@ -24,11 +25,15 @@ const search = async (query, limit, from, to) => { from: from, to: to, limit: limit, - randomFilter: null + randomFilter: null, + planner: new Planner(scrpit.rootToken) } - const scrpit = parser.ParseScript(query) - const complexity = await evaluateComplexity(ctx, scrpit.rootToken) + + let complexity = await evaluateComplexity(ctx, scrpit.rootToken) let res = [] + if (complexity > 10000000) { + complexity = ctx.planner.minify() + } if (complexity > 10000000) { res = await processComplexResult(ctx, scrpit.rootToken, complexity) } else { @@ -49,9 +54,10 @@ const search = async (query, limit, from, to) => { * @param script {Token} */ const evaluateComplexity = async (ctx, script) => { - const evaluator = evaluateCmpl(script) + const evaluator = ctx.planner.planEval() const sql = evaluator(ctx) const response = await rawRequest(sql + ' FORMAT JSON', null, DATABASE_NAME()) + ctx.planner.setEvaluationResult(response.data.data) return response.data.data.reduce((acc, row) => Math.max(acc, row.count), 0) } @@ -62,7 +68,7 @@ const evaluateComplexity = async (ctx, script) => { * @param complexity {number} */ async function processComplexResult (ctx, script, complexity) { - const planner = transpile(script) + const planner = ctx.planner.plan() const maxFilter = Math.floor(complexity / 10000000) let traces = [] for (let i = 0; i < maxFilter; i++) { @@ -110,7 +116,7 @@ async function processComplexResult (ctx, script, complexity) { * @param script {Token} */ async function processSmallResult (ctx, script) { - const planner = transpile(script) + const planner = ctx.planner.plan() const sql = planner(ctx) const response = await rawRequest(sql + ' FORMAT JSON', null, DATABASE_NAME()) const traces = response.data.data.map(row => ({ @@ -119,6 +125,16 @@ async function processSmallResult (ctx, script) { rootTraceName: row.root_trace_name, startTimeUnixNano: row.start_time_unix_nano, durationMs: row.duration_ms, + spanSet: { + spans: row.span_id.map((spanId, i) => ({ + spanID: spanId, + startTimeUnixNano: row.timestamp_ns[i], + spanStartTime: row.timestamp_ns[i], + durationNanos: row.duration[i], + attributes: [] + })), + matched: row.span_id.length + }, spanSets: [ { spans: row.span_id.map((spanId, i) => ({