diff --git a/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/__snapshots__/py_lang_client.test.tsx.snap b/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/__snapshots__/py_lang_client.test.tsx.snap index 0001f45600ec2..7944e0ecc188a 100644 --- a/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/__snapshots__/py_lang_client.test.tsx.snap +++ b/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/__snapshots__/py_lang_client.test.tsx.snap @@ -40,11 +40,12 @@ def get_elasticsearch_results(): def create_openai_prompt(results): context = \\"\\" for hit in results: - inner_hit_path = f\\"{hit['_index']}.{index_source_fields.get(hit['_index'])[0]}\\" - - ## For semantic_text matches, we need to extract the text from the inner_hits - if 'inner_hits' in hit and inner_hit_path in hit['inner_hits']: - context += '\\\\n --- \\\\n'.join(inner_hit['_source']['text'] for inner_hit in hit['inner_hits'][inner_hit_path]['hits']['hits']) + ## For semantic_text matches, we need to extract the text from the highlighted field + if \\"highlight\\" in hit: + highlighted_texts = [] + for values in hit[\\"highlight\\"].values(): + highlighted_texts.extend(values) + context += \\"\\\\n --- \\\\n\\".join(highlighted_texts) else: source_field = index_source_fields.get(hit[\\"_index\\"])[0] hit_context = hit[\\"_source\\"][source_field] diff --git a/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/py_lang_client.tsx b/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/py_lang_client.tsx index a2d92583c6b63..746ecd293ad5e 100644 --- a/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/py_lang_client.tsx +++ b/x-pack/solutions/search/plugins/search_playground/public/components/view_code/examples/py_lang_client.tsx @@ -40,11 +40,12 @@ def get_elasticsearch_results(): def create_openai_prompt(results): context = "" for hit in results: - inner_hit_path = f"{hit['_index']}.{index_source_fields.get(hit['_index'])[0]}" - - ## For semantic_text matches, we need to extract the text from the inner_hits - if 'inner_hits' in hit and inner_hit_path in hit['inner_hits']: - context += '\\n --- \\n'.join(inner_hit['_source']['text'] for inner_hit in hit['inner_hits'][inner_hit_path]['hits']['hits']) + ## For semantic_text matches, we need to extract the text from the highlighted field + if "highlight" in hit: + highlighted_texts = [] + for values in hit["highlight"].values(): + highlighted_texts.extend(values) + context += "\\n --- \\n".join(highlighted_texts) else: source_field = index_source_fields.get(hit["_index"])[0] hit_context = hit["_source"][source_field] diff --git a/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.test.ts b/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.test.ts index c4c986e7b06e6..d6001dd1f2224 100644 --- a/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.test.ts +++ b/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.test.ts @@ -516,20 +516,9 @@ describe('create_query', () => { { standard: { query: { - nested: { - inner_hits: { - _source: ['field2.inference.chunks.text'], - name: 'index1.field2', - size: 2, - }, - path: 'field2.inference.chunks', - query: { - sparse_vector: { - field: 'field2.inference.chunks.embeddings', - inference_id: 'model2', - query: '{query}', - }, - }, + semantic: { + field: 'field2', + query: '{query}', }, }, }, @@ -542,6 +531,15 @@ describe('create_query', () => { ], }, }, + highlight: { + fields: { + field2: { + number_of_fragments: 2, + order: 'score', + type: 'semantic', + }, + }, + }, }); }); @@ -638,24 +636,9 @@ describe('create_query', () => { { standard: { query: { - nested: { - inner_hits: { - _source: ['field2.inference.chunks.text'], - name: 'index1.field2', - size: 2, - }, - path: 'field2.inference.chunks', - query: { - knn: { - field: 'field2.inference.chunks.embeddings', - query_vector_builder: { - text_embedding: { - model_id: 'model2', - model_text: '{query}', - }, - }, - }, - }, + semantic: { + field: 'field2', + query: '{query}', }, }, }, @@ -668,6 +651,15 @@ describe('create_query', () => { ], }, }, + highlight: { + fields: { + field2: { + number_of_fragments: 2, + order: 'score', + type: 'semantic', + }, + }, + }, }); }); diff --git a/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts b/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts index 63cdcdf76bb65..cf0a1846bfb65 100644 --- a/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts +++ b/x-pack/solutions/search/plugins/search_playground/public/utils/create_query.ts @@ -5,7 +5,7 @@ * 2.0. */ -import { RetrieverContainer } from '@elastic/elasticsearch/lib/api/types'; +import { RetrieverContainer, SearchHighlight } from '@elastic/elasticsearch/lib/api/types'; import { IndicesQuerySourceFields, QuerySourceFields } from '../types'; export type IndexFields = Record; @@ -36,6 +36,8 @@ const SUGGESTED_SOURCE_FIELDS = [ 'text_field', ]; +const SEMANTIC_FIELD_TYPE = 'semantic'; + interface Matches { queryMatches: any[]; knnMatches: any[]; @@ -52,7 +54,7 @@ export function createQuery( rerankOptions: ReRankOptions = { rrf: true, } -): { retriever: RetrieverContainer } { +): { retriever: RetrieverContainer; highlight?: SearchHighlight } { const indices = Object.keys(fieldDescriptors); const boolMatches = Object.keys(fields).reduce( (acc, index) => { @@ -64,60 +66,8 @@ export function createQuery( const semanticMatches = indexFields.map((field) => { const semanticField = indexFieldDescriptors.semantic_fields.find((x) => x.field === field); - const isSourceField = sourceFields[index].includes(field); - - // this is needed to get the inner_hits for the source field - // we cant rely on only the semantic field - // in future inner_hits option will be added to semantic - if (semanticField && isSourceField) { - if (semanticField.embeddingType === 'dense_vector') { - const filter = - semanticField.indices.length < indices.length - ? { filter: { terms: { _index: semanticField.indices } } } - : {}; - return { - nested: { - path: `${semanticField.field}.inference.chunks`, - query: { - knn: { - field: `${semanticField.field}.inference.chunks.embeddings`, - ...filter, - query_vector_builder: { - text_embedding: { - model_id: semanticField.inferenceId, - model_text: '{query}', - }, - }, - }, - }, - inner_hits: { - size: 2, - name: `${index}.${semanticField.field}`, - _source: [`${semanticField.field}.inference.chunks.text`], - }, - }, - }; - } else if (semanticField.embeddingType === 'sparse_vector') { - return { - nested: { - path: `${semanticField.field}.inference.chunks`, - query: { - sparse_vector: { - inference_id: semanticField.inferenceId, - field: `${semanticField.field}.inference.chunks.embeddings`, - query: '{query}', - }, - }, - inner_hits: { - size: 2, - name: `${index}.${semanticField.field}`, - _source: [`${semanticField.field}.inference.chunks.text`], - }, - }, - }; - } - } else if (semanticField) { + if (semanticField) { return { semantic: { field: semanticField.field, @@ -241,12 +191,34 @@ export function createQuery( // for single Elser support to make it easy to read - skips bool query if (boolMatches.queryMatches.length === 1 && boolMatches.knnMatches.length === 0) { + const semanticField = boolMatches.queryMatches[0].semantic?.field ?? null; + + let isSourceField = false; + indices.forEach((index) => { + if (sourceFields[index].includes(semanticField)) { + isSourceField = true; + } + }); + return { retriever: { standard: { query: boolMatches.queryMatches[0], }, }, + ...(isSourceField + ? { + highlight: { + fields: { + [semanticField]: { + type: SEMANTIC_FIELD_TYPE, + number_of_fragments: 2, + order: 'score', + }, + }, + }, + } + : {}), }; } @@ -285,12 +257,39 @@ export function createQuery( }; }); + const semanticFields = matches + .filter((match) => match.semantic) + .map((match) => match.semantic.field) + .filter((field) => { + let isSourceField = false; + indices.forEach((index) => { + if (sourceFields[index].includes(field)) { + isSourceField = true; + } + }); + return isSourceField; + }); + return { retriever: { rrf: { retrievers, }, }, + ...(semanticFields.length > 0 + ? { + highlight: { + fields: semanticFields.reduce((acc, field) => { + acc[field] = { + type: SEMANTIC_FIELD_TYPE, + number_of_fragments: 2, + order: 'score', + }; + return acc; + }, {}), + }, + } + : {}), }; } diff --git a/x-pack/solutions/search/plugins/search_playground/server/lib/conversational_chain.test.ts b/x-pack/solutions/search/plugins/search_playground/server/lib/conversational_chain.test.ts index d8958da6ff112..5a59ddead7d9c 100644 --- a/x-pack/solutions/search/plugins/search_playground/server/lib/conversational_chain.test.ts +++ b/x-pack/solutions/search/plugins/search_playground/server/lib/conversational_chain.test.ts @@ -237,19 +237,7 @@ describe('conversational chain', () => { { _index: 'index', _id: '1', - inner_hits: { - 'index.field': { - hits: { - hits: [ - { - _source: { - text: 'value', - }, - }, - ], - }, - }, - }, + highlight: { field: ['value'] }, }, ], expectedDocs: [ diff --git a/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.test.ts b/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.test.ts index 7eae929cc70c0..11351c56adb97 100644 --- a/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.test.ts +++ b/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.test.ts @@ -78,49 +78,30 @@ describe('getValueForSelectedField', () => { expect(getValueForSelectedField(hit, 'bla.sources')).toBe(''); }); - test('should return when its a chunked passage', () => { + test('should return when it has highlighted messages', () => { const hit = { - _index: 'sample-index', + _index: 'books', _id: '8jSNY48B6iHEi98DL1C-', _score: 0.7789394, _source: { - test: 'The Shawshank Redemption', + test: 'The Big Bang and Black Holes', metadata: { source: - 'Over the course of several years, two convicts form a friendship, seeking consolation and, eventually, redemption through basic compassion', + 'This book explores the origins of the universe, beginning with the Big Bang—an immense explosion that created space, time, and matter. It delves into how black holes, regions of space where gravity is so strong that not even light can escape, play a crucial role in the evolution of galaxies and the universe as a whole. Stephen Hawking’s groundbreaking discoveries about black hole radiation, often referred to as Hawking Radiation, are also discussed in detail.', }, }, - inner_hits: { - 'sample-index.test': { - hits: { - hits: [ - { - _source: { - text: 'Over the course of several years', - }, - }, - { - _source: { - text: 'two convicts form a friendship', - }, - }, - { - _source: { - text: 'seeking consolation and, eventually, redemption through basic compassion', - }, - }, - ], - }, - }, + highlight: { + test: [ + 'This book explores the origins of the universe.', + 'The beginning with the Big Bang—an immense explosion that created space, time, and matter. It delves into how black holes, regions of space where gravity is so strong that not even light can escape, play a crucial role in the evolution of galaxies and the universe as a whole. Stephen Hawking’s groundbreaking discoveries about black hole radiation, often referred to as Hawking Radiation, are also discussed in detail.', + ], }, }; expect(getValueForSelectedField(hit as any, 'test')).toMatchInlineSnapshot(` - "Over the course of several years - --- - two convicts form a friendship + "This book explores the origins of the universe. --- - seeking consolation and, eventually, redemption through basic compassion" + The beginning with the Big Bang—an immense explosion that created space, time, and matter. It delves into how black holes, regions of space where gravity is so strong that not even light can escape, play a crucial role in the evolution of galaxies and the universe as a whole. Stephen Hawking’s groundbreaking discoveries about black hole radiation, often referred to as Hawking Radiation, are also discussed in detail." `); }); diff --git a/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.ts b/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.ts index 5556e407de979..fe0772a314327 100644 --- a/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.ts +++ b/x-pack/solutions/search/plugins/search_playground/server/utils/get_value_for_selected_field.ts @@ -14,11 +14,8 @@ export const getValueForSelectedField = (hit: SearchHit, path: string): string = } // for semantic_text matches - const innerHitPath = `${hit._index}.${path}`; - if (!!hit.inner_hits?.[innerHitPath]) { - return hit.inner_hits[innerHitPath].hits.hits - .map((innerHit) => innerHit._source.text) - .join('\n --- \n'); + if (hit.highlight && hit.highlight[path]) { + return hit.highlight[path].flat().join('\n --- \n'); } return has(hit._source, `${path}.text`)