diff --git a/x-pack/platform/plugins/shared/integration_assistant/server/graphs/ecs/pipeline.test.ts b/x-pack/platform/plugins/shared/integration_assistant/server/graphs/ecs/pipeline.test.ts index 3dda9208c3094..0ad142390b865 100644 --- a/x-pack/platform/plugins/shared/integration_assistant/server/graphs/ecs/pipeline.test.ts +++ b/x-pack/platform/plugins/shared/integration_assistant/server/graphs/ecs/pipeline.test.ts @@ -79,7 +79,7 @@ describe('Testing pipeline templates', () => { target_field: '@timestamp', formats: ["yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS'Z'", 'ISO8601'], tag: 'date_processor_xdfsfs.ds.@timestamp', - if: 'ctx.xdfsfs?.ds?.@timestamp != null', + if: 'ctx.xdfsfs?.ds?.get("@timestamp") != null', }, }, { diff --git a/x-pack/platform/plugins/shared/integration_assistant/server/graphs/ecs/pipeline.ts b/x-pack/platform/plugins/shared/integration_assistant/server/graphs/ecs/pipeline.ts index dda48c97bdf98..d729df1edde3d 100644 --- a/x-pack/platform/plugins/shared/integration_assistant/server/graphs/ecs/pipeline.ts +++ b/x-pack/platform/plugins/shared/integration_assistant/server/graphs/ecs/pipeline.ts @@ -12,10 +12,8 @@ import { Pipeline, ESProcessorItem } from '../../../common'; import type { EcsMappingState } from '../../types'; import { ECS_TYPES } from './constants'; import { deepCopy } from '../../util/util'; - -interface IngestPipeline { - [key: string]: unknown; -} +import { type FieldPath, fieldPathToProcessorString } from '../../util/fields'; +import { fieldPathToPainlessExpression, SafePainlessExpression } from '../../util/painless'; interface ECSField { target: string; @@ -24,16 +22,36 @@ interface ECSField { type: string; } +const KNOWN_ES_TYPES = ['long', 'float', 'scaled_float', 'ip', 'boolean', 'keyword']; +type KnownESType = (typeof KNOWN_ES_TYPES)[number]; + +/** + * Clarifies the types of specific fields in pipeline processors. + * + * This includes safety requirements for Painless script fields. + * Restricted to the processors that we generate in this file. + */ +interface SafeESProcessorItem extends ESProcessorItem { + [k: string]: { + field?: string; + if?: SafePainlessExpression; + ignore_missing?: boolean; + target_field?: string; + type?: KnownESType; + formats?: string[]; + }; +} + function generateProcessor( - currentPath: string, + currentPath: FieldPath, ecsField: ECSField, expectedEcsType: string, sampleValue: unknown -): object { +): SafeESProcessorItem { if (needsTypeConversion(sampleValue, expectedEcsType)) { return { convert: { - field: currentPath, + field: fieldPathToProcessorString(currentPath), target_field: ecsField.target, type: getConvertProcessorType(expectedEcsType), ignore_missing: true, @@ -44,17 +62,17 @@ function generateProcessor( if (ecsField.type === 'date') { return { date: { - field: currentPath, + field: fieldPathToProcessorString(currentPath), target_field: ecsField.target, formats: convertIfIsoDate(ecsField.date_formats), - if: currentPath.replace(/\./g, '?.'), + if: fieldPathToPainlessExpression(currentPath), }, }; } return { rename: { - field: currentPath, + field: fieldPathToProcessorString(currentPath), target_field: ecsField.target, ignore_missing: true, }, @@ -74,10 +92,9 @@ function convertIfIsoDate(date: string[]): string[] { return date; } -function getSampleValue(key: string, samples: Record): unknown { - const keyList = key.split('.'); +function getSampleValue(fieldPath: FieldPath, samples: Record): unknown { let value: any = samples; - for (const k of keyList) { + for (const k of fieldPath) { if (value === undefined || value === null) { return null; } @@ -91,7 +108,7 @@ function getEcsType(ecsField: ECSField, ecsTypes: Record): strin return ecsTypes[ecsTarget]; } -function getConvertProcessorType(expectedEcsType: string): string { +function getConvertProcessorType(expectedEcsType: KnownESType): KnownESType { if (expectedEcsType === 'long') { return 'long'; } @@ -107,7 +124,7 @@ function getConvertProcessorType(expectedEcsType: string): string { return 'string'; } -function needsTypeConversion(sample: unknown, expected: string): boolean { +function needsTypeConversion(sample: unknown, expected: KnownESType): boolean { if (sample === null || sample === undefined) { return false; } @@ -136,16 +153,20 @@ function needsTypeConversion(sample: unknown, expected: string): boolean { return false; } -function generateProcessors(ecsMapping: object, samples: object, basePath: string = ''): object[] { +function generateProcessors( + ecsMapping: object, + samples: object, + basePath: FieldPath = [] +): SafeESProcessorItem[] { if (Object.keys(ecsMapping).length === 0) { return []; } const ecsTypes = ECS_TYPES; const valueFieldKeys = new Set(['target', 'confidence', 'date_formats', 'type']); - const results: object[] = []; + const results: SafeESProcessorItem[] = []; for (const [key, value] of Object.entries(ecsMapping)) { - const currentPath = basePath ? `${basePath}.${key}` : key; + const currentPath = [...basePath, key]; if (value !== null && typeof value === 'object' && value?.target !== null) { const valueKeys = new Set(Object.keys(value)); @@ -162,10 +183,11 @@ function generateProcessors(ecsMapping: object, samples: object, basePath: strin } } } + return results; } -export function createPipeline(state: EcsMappingState): IngestPipeline { +export function createPipeline(state: EcsMappingState): Pipeline { const samples = JSON.parse(state.combinedSamples); const processors = generateProcessors(state.finalMapping, samples); diff --git a/x-pack/platform/plugins/shared/integration_assistant/server/templates/pipeline.yml.njk b/x-pack/platform/plugins/shared/integration_assistant/server/templates/pipeline.yml.njk index 116d5cc66719f..1af205fb5e549 100644 --- a/x-pack/platform/plugins/shared/integration_assistant/server/templates/pipeline.yml.njk +++ b/x-pack/platform/plugins/shared/integration_assistant/server/templates/pipeline.yml.njk @@ -53,7 +53,8 @@ processors: - {{ format }} {% endfor %} tag: date_processor_{{ value.field}} - if: "ctx.{{ value.if }} != null"{% endif %} + if: |- + {{ value.if }} != null{% endif %} {% if key == 'convert' %} - {{ key }}: field: {{ value.field }} diff --git a/x-pack/platform/plugins/shared/integration_assistant/server/util/fields.test.ts b/x-pack/platform/plugins/shared/integration_assistant/server/util/fields.test.ts new file mode 100644 index 0000000000000..efda66df2116a --- /dev/null +++ b/x-pack/platform/plugins/shared/integration_assistant/server/util/fields.test.ts @@ -0,0 +1,20 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { fieldPathToProcessorString } from './fields'; + +describe('fieldPathToProcessorString', () => { + it('should join an array of strings with dots', () => { + const result = fieldPathToProcessorString(['foo', 'bar', 'baz']); + expect(result).toBe('foo.bar.baz'); + }); + + it('should return an empty string if array is empty', () => { + const result = fieldPathToProcessorString([]); + expect(result).toBe(''); + }); +}); diff --git a/x-pack/platform/plugins/shared/integration_assistant/server/util/fields.ts b/x-pack/platform/plugins/shared/integration_assistant/server/util/fields.ts new file mode 100644 index 0000000000000..810754b23e150 --- /dev/null +++ b/x-pack/platform/plugins/shared/integration_assistant/server/util/fields.ts @@ -0,0 +1,25 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +/** + * Represents a path to a field as an array of string segments. + * Each element in the array represents a level in the field hierarchy. + * + * A segment might contain a character that is invalid in some contexts. + * @example ['person', 'address', 'street-level'] + */ +export type FieldPath = string[]; + +/** + * Converts a FieldPath array into a string useable as the field in the ingest pipeline. + * + * @param fieldPath - The array of field names representing the path. + * @returns The processor string created by joining the field names with a dot. + */ +export function fieldPathToProcessorString(fieldPath: FieldPath): string { + return fieldPath.join('.'); +} diff --git a/x-pack/platform/plugins/shared/integration_assistant/server/util/painless.test.ts b/x-pack/platform/plugins/shared/integration_assistant/server/util/painless.test.ts new file mode 100644 index 0000000000000..e2ec2963f2ee3 --- /dev/null +++ b/x-pack/platform/plugins/shared/integration_assistant/server/util/painless.test.ts @@ -0,0 +1,88 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { + isPainlessIdentifier, + painlessStringRepresentation, + addPainlessFieldAccess, + fieldPathToPainlessExpression, + type SafePainlessExpression, +} from './painless'; + +describe('isPainlessIdentifier', () => { + it('should return true for valid identifiers', () => { + expect(isPainlessIdentifier('_validIdentifier123')).toBe(true); + expect(isPainlessIdentifier('valid')).toBe(true); + }); + + it('should return false for invalid identifiers', () => { + expect(isPainlessIdentifier('123start')).toBe(false); // Identifiers cannot start with a number + expect(isPainlessIdentifier('new')).toBe(true); // Reserved words are valid identifiers + expect(isPainlessIdentifier('_source')).toBe(true); // Underscore-prefixed identifiers are valid + expect(isPainlessIdentifier('invalid-char!')).toBe(false); // Identifiers cannot contain special characters + }); +}); + +describe('painlessFieldEscape', () => { + it('should return a quoted and escaped string', () => { + expect(painlessStringRepresentation('simple')).toBe('"simple"'); + expect(painlessStringRepresentation('"quote"')).toBe('"\\"quote\\""'); + expect(painlessStringRepresentation('back\\slash')).toBe('"back\\\\slash"'); + }); +}); + +describe('addPainlessFieldAccess', () => { + it('should add a dot-access for valid identifiers', () => { + const expr = 'root' as SafePainlessExpression; + const result = addPainlessFieldAccess('foo', expr, false); + expect(result).toBe('root.foo'); + }); + + it('should add a nullable dot-access for valid identifiers', () => { + const expr = 'root' as SafePainlessExpression; + const result = addPainlessFieldAccess('foo', expr); + expect(result).toBe('root?.foo'); + }); + + it('should add a get-access for invalid identifiers', () => { + const expr = 'root' as SafePainlessExpression; + const result = addPainlessFieldAccess('foo-bar', expr, false); + expect(result).toContain('"foo-bar"'); + expect(result).toBe('root.get("foo-bar")'); + }); + + it('should add a nullable get-access for invalid identifiers in the chain', () => { + const expr = 'root' as SafePainlessExpression; + const result = addPainlessFieldAccess('foo-bar', expr, true); + expect(result).toContain('"foo-bar"'); + expect(result).toBe('root?.get("foo-bar")'); + }); +}); + +describe('fieldPathToPainlessExpression', () => { + it('should build a nested expression from a simple field path', () => { + const result = fieldPathToPainlessExpression(['source', 'ip']); + expect(result).toBe('ctx.source?.ip'); + }); + + it('should quote invalid identifiers', () => { + const result = fieldPathToPainlessExpression(['ip-address']); + expect(result).toContain('"ip-address"'); + expect(result).toBe('ctx.get("ip-address")'); + }); + + it('should use nullable get access for nested invalid identifiers', () => { + const result = fieldPathToPainlessExpression(['field', 'ip-address']); + expect(result).toContain('"ip-address"'); + expect(result).toBe('ctx.field?.get("ip-address")'); + }); + + it('should return just "ctx" if the path is empty', () => { + const result = fieldPathToPainlessExpression([]); + expect(result).toBe('ctx'); + }); +}); diff --git a/x-pack/platform/plugins/shared/integration_assistant/server/util/painless.ts b/x-pack/platform/plugins/shared/integration_assistant/server/util/painless.ts new file mode 100644 index 0000000000000..797a14422f7b2 --- /dev/null +++ b/x-pack/platform/plugins/shared/integration_assistant/server/util/painless.ts @@ -0,0 +1,124 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { FieldPath } from './fields'; + +/** + * Branded type respresenting a string that is a safe Painless expression. + * + * Painless is a scripting language used in Elasticsearch, here we are using it to + * generate the ingest pipeline with Automatic Import. + * + * This type is used to ensure that a string has been validated + * and is considered safe to be used as a Painless expression. + * The `__isSafePainlessExpression` property is a type brand + * to distinguish this type from a regular string. + */ +export type SafePainlessExpression = string & { __isSafePainlessExpression: true }; + +export type SafeNonNullablePainlessExpression = SafePainlessExpression & { + __isNonNullablePainlessExpression: true; +}; + +/** + * A constant representing the context variable used in Elasticsearch ingest pipeline painless scripts. + * This is typed as a safe painless expression to ensure type safety when used in pipeline definitions. + * + * @link https://www.elastic.co/guide/en/elasticsearch/painless/8.17/painless-contexts.html + * @constant {SafePainlessExpression} + */ +const INGEST_PIPELINE_PAINLESS_CONTEXT = 'ctx' as const as SafePainlessExpression; + +/** + * A regular expression that matches valid Painless script identifiers. + * + * Identifiers in Painless + * must start with an underscore or a letter (a-z, A-Z), followed by any combination + * of underscores, letters, or digits. + * + * This regular expression ensures that the identifier conforms to these rules: + * - The first character must be an underscore or a letter. + * - Subsequent characters can be underscores, letters, or digits. + * + * This is the ID and DOTID regexp in the Painless grammar under the following link: + * @link packages/kbn-monaco/src/painless/antlr/painless_parser.g4 + */ +const PAINLESS_IDENTIFIER_REGEXP = /^[_a-zA-Z][_a-zA-Z0-9]*$/; + +/** + * Checks if a given string is a valid Painless identifier (though possibly a reserved word). + * + * @link https://www.elastic.co/guide/en/elasticsearch/painless/8.17/painless-identifiers.html + * @param s - The string to check. + * @returns `true` if the string is a valid Painless identifier, `false` otherwise. + */ +export function isPainlessIdentifier(s: string): boolean { + return PAINLESS_IDENTIFIER_REGEXP.test(s); +} + +/** + * Creates a string literal for use in Painless scripts. + * + * Quoting rules: + * - Use a \" token to include a double-quote as part of a double-quoted string literal. + * - Use a \\ token to include a backslash as part of any string literal. + * + * @link https://www.elastic.co/guide/en/elasticsearch/painless/8.17/painless-literals.html#string-literals + * @param s - The string to escape. + * @returns The escaped string. + */ +export function painlessStringRepresentation(s: string): SafePainlessExpression { + return `"${s.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"` as SafePainlessExpression; +} + +/** + * Adds a field access to a Painless expression. + * + * This function is used to add a field access to a Painless expression. + * It ensures that the field access is properly escaped and that the expression remains safe. + * It is still possible to access fields that are not valid Painless identifiers by using a map access. + * + * @param expr - The Painless expression to add the field access to. + * @param fieldName - The subfield to access. + * @returns The new Painless expression with the added field access; safe but possibly null. + */ +export function addPainlessFieldAccess( + fieldName: string, + expr: SafePainlessExpression, + exprNullable: boolean = true +): SafePainlessExpression { + const nonNullableExpr = exprNullable ? (`${expr}?` as SafePainlessExpression) : expr; + const isValidIdentifier = isPainlessIdentifier(fieldName); + + if (isValidIdentifier) { + return `${nonNullableExpr}.${fieldName}` as SafePainlessExpression; + } + + const representedName = painlessStringRepresentation(fieldName); + return `${nonNullableExpr}.get(${representedName})` as SafePainlessExpression; +} + +/** + * Converts a field path to a Painless script expression. + * + * This function takes a `FieldPath` (an array of strings representing the path to a field) + * and converts it into a `SafePainlessExpression` by reducing the array and adding Painless + * field access for each subfield. + * + * We assume that all field paths accesses except the context itself can result in nullable fields, + * so we always add a null check before accessing the subfields. + * + * @param fieldPath - The path to the field as an array of strings. + * @returns A `SafePainlessExpression` representing the field path in Painless script syntax. + */ +export function fieldPathToPainlessExpression(fieldPath: FieldPath): SafePainlessExpression { + return fieldPath.reduce( + (expr: SafePainlessExpression, subfield: string) => + addPainlessFieldAccess(subfield, expr, expr !== INGEST_PIPELINE_PAINLESS_CONTEXT), + INGEST_PIPELINE_PAINLESS_CONTEXT + ); +}