From aa8c9ab621a29250f7dc497e513be23240133b59 Mon Sep 17 00:00:00 2001 From: Ashwin Mathur <97467100+awinml@users.noreply.github.com> Date: Wed, 8 Nov 2023 16:12:44 +0530 Subject: [PATCH] Update examples with refactored document class (#63) --- integrations/instructor-embedder.md | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/integrations/instructor-embedder.md b/integrations/instructor-embedder.md index c30927aa..2f79f3bb 100644 --- a/integrations/instructor-embedder.md +++ b/integrations/instructor-embedder.md @@ -88,6 +88,10 @@ text_embedder = InstructorTextEmbedder( model_name_or_path="hkunlp/instructor-base", instruction=instruction, device="cpu" ) +text_embedder.warm_up() +result = text_embedder.run(text) +print(f"Embedding: {result['embedding']}") +print(f"Embedding Dimension: {len(result['embedding'])}") ``` ### Using the Document Embedder @@ -111,22 +115,22 @@ doc_embedder.warm_up() # Text taken from PubMed QA Dataset (https://huggingface.co/datasets/pubmed_qa) document_list = [ Document( - text="Oxidative stress generated within inflammatory joints can produce autoimmune phenomena and joint destruction. Radical species with oxidative activity, including reactive nitrogen species, represent mediators of inflammation and cartilage damage.", - metadata={ + content="Oxidative stress generated within inflammatory joints can produce autoimmune phenomena and joint destruction. Radical species with oxidative activity, including reactive nitrogen species, represent mediators of inflammation and cartilage damage.", + meta={ "pubid": "25,445,628", "long_answer": "yes", }, ), Document( - text="Plasma levels of pancreatic polypeptide (PP) rise upon food intake. Although other pancreatic islet hormones, such as insulin and glucagon, have been extensively investigated, PP secretion and actions are still poorly understood.", - metadata={ + content="Plasma levels of pancreatic polypeptide (PP) rise upon food intake. Although other pancreatic islet hormones, such as insulin and glucagon, have been extensively investigated, PP secretion and actions are still poorly understood.", + meta={ "pubid": "25,445,712", "long_answer": "yes", }, ), Document( - text="Disturbed sleep is associated with mood disorders. Both depression and insomnia may increase the risk of disability retirement. The longitudinal links among insomnia, depression and work incapacity are poorly known.", - metadata={ + content="Disturbed sleep is associated with mood disorders. Both depression and insomnia may increase the risk of disability retirement. The longitudinal links among insomnia, depression and work incapacity are poorly known.", + meta={ "pubid": "25,451,441", "long_answer": "yes", }, @@ -134,7 +138,7 @@ document_list = [ ] result = doc_embedder.run(document_list) -print(f"Document Text: {result['documents'][0].text}") +print(f"Document Text: {result['documents'][0].content}") print(f"Document Embedding: {result['documents'][0].embedding}") print(f"Embedding Dimension: {len(result['documents'][0].embedding)}") ``` @@ -187,8 +191,8 @@ dataset = load_dataset("xsum", split="train") # Create Document objects from the dataset and add them to the document store using the indexing pipeline docs = [ Document( - text=doc["document"], - metadata={ + content=doc["document"], + meta={ "summary": doc["summary"], "doc_id": doc["id"], }, @@ -236,8 +240,8 @@ results = query_pipeline.run( # Print information about retrieved documents for doc in results["Retriever"]["documents"]: - print(f"Text:\n{doc.text[:150]}...\n") - print(f"Metadata: {doc.metadata}") + print(f"Text:\n{doc.content[:150]}...\n") + print(f"Metadata: {doc.meta}") print(f"Score: {doc.score}") print("-" * 10 + "\n") ```