-
Notifications
You must be signed in to change notification settings - Fork 0
/
pipeline.yml
166 lines (166 loc) · 7.34 KB
/
pipeline.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
components:
answer_builder:
init_parameters:
pattern: null
reference_pattern: null
type: haystack.components.builders.answer_builder.AnswerBuilder
generator:
init_parameters:
api_base_url: null
api_key:
env_vars:
- OPENAI_API_KEY
strict: true
type: env_var
generation_kwargs: {}
model: gpt-4o-mini
organization: null
streaming_callback: null
system_prompt: 'You are an expert assistant skilled in generating structured,
comprehensive documentation. Your role is to create thorough, accurate documentation
based strictly on the provided context, without using external knowledge or
general information. The content must be divided into two main sections: one
for non-technical users and one for technical users, with each section tailored
to their needs. The output should be clear, detailed, and organized, including
code snippets, examples, and conflict resolution where applicable. Your final
output should be ready in Markdown format.'
type: haystack.components.generators.openai.OpenAIGenerator
joiner:
init_parameters:
join_mode: concatenate
sort_by_score: true
top_k: null
weights: null
type: haystack.components.joiners.document_joiner.DocumentJoiner
prompt_builder:
init_parameters:
required_variables: null
template: "**Task:** \nGenerate comprehensive documentation based solely on\
\ the provided documents. **Do not use any external knowledge** or information\
\ outside of the provided context. Divide the documentation into two main\
\ sections:\n\n- **Non-Technical User:** Focus on explanations that are accessible\
\ to non-technical stakeholders (e.g., **business decision-makers or managers**).\
\ Highlight the purpose, value, and practical outcomes, avoiding technical\
\ jargon. Use examples and high-level explanations to ensure understanding.\n\
\n- **Technical User:** Provide in-depth technical content for an audience\
\ of **developers, engineers, or system architects**. Include detailed explanations\
\ of key technical concepts, code snippets with thorough descriptions, and\
\ examples where applicable. Explain the functionality and purpose of each\
\ code fragment, along with any potential issues or caveats.\n\n**Requirements:**\n\
\n1. **Length:** The documentation should be detailed and cover all aspects\
\ of the provided content.\n\n2. **Structure:** \n - **Title Page:** Include\
\ a title reflecting the main theme of the documents. \n - **Introduction:**\
\ Provide a clear overview of the content, purpose, and scope of the documentation.\
\ \n - **Non-Technical User Section:** \n - Clear, high-level explanations\
\ that avoid technical depth. \n - Emphasize the **purpose, benefits,\
\ and outcomes** for non-technical users. \n - Include examples and **highlight\
\ key points** with bold text where necessary. \n - **Technical User Section:**\
\ \n - Detailed explanations of key technical concepts. \n - Include\
\ well-explained code snippets, with a focus on their **practical application**\
\ and any **potential pitfalls**. \n - **Conflict Resolution:** If conflicting\
\ information is present, resolve it using the document date or highlight\
\ unresolved contradictions **inline** within the relevant sections.\n\n3.\
\ **Content Guidelines:** \n - Represent all information accurately from\
\ the documents. \n - Expand bullet points into full sentences and paragraphs.\
\ \n - Use bullet points, tables, or code fragments where necessary, with\
\ in-depth explanations. \n - Ensure smooth transitions between sections.\n\
\n4. **Style:** \n - The non-technical section should be accessible and\
\ easy to understand. \n - The technical section should be professional\
\ and formal, with precise technical language.\n\n5. **Specifics:** \n \
\ - Highlight any critical findings, data, or statistics from the documents.\
\ \n - Emphasize unique aspects with full reasoning and analysis. \n \
\ - If any sections lack sufficient detail, indicate **gaps or missing information**\
\ rather than assuming or fabricating content.\n\n\nUser Question: {{question}}\n\
Documents to Analyze:\n{% for doc in documents %}\nDate: {{doc.meta['date']}}\n\
Title: {{doc.meta['title']}} - {{doc.meta['headline']}}\nContent: \n{{doc.content}}\n\
{% endfor %}"
variables: null
type: haystack.components.builders.prompt_builder.PromptBuilder
retriever_docs_wire:
init_parameters:
document_store:
init_parameters:
api_key:
env_vars:
- PINECONE_API_KEY
strict: true
type: env_var
batch_size: 100
dimension: 384
index: wire-rag
metric: cosine
namespace: docs-wire
spec:
serverless:
cloud: aws
region: us-east-1
type: haystack_integrations.document_stores.pinecone.document_store.PineconeDocumentStore
filter_policy: replace
filters: {}
top_k: 25
type: haystack_integrations.components.retrievers.pinecone.embedding_retriever.PineconeEmbeddingRetriever
retriever_gh:
init_parameters:
document_store:
init_parameters:
api_key:
env_vars:
- PINECONE_API_KEY
strict: true
type: env_var
batch_size: 100
dimension: 384
index: wire-rag
metric: cosine
namespace: github-wireapp
spec:
serverless:
cloud: aws
region: us-east-1
type: haystack_integrations.document_stores.pinecone.document_store.PineconeDocumentStore
filter_policy: replace
filters: {}
top_k: 25
type: haystack_integrations.components.retrievers.pinecone.embedding_retriever.PineconeEmbeddingRetriever
text_embedder:
init_parameters:
batch_size: 32
device:
device: cpu
type: single
model: sentence-transformers/all-MiniLM-L6-v2
model_kwargs: null
normalize_embeddings: false
precision: float32
prefix: ''
progress_bar: true
suffix: ''
token:
env_vars:
- HF_API_TOKEN
- HF_TOKEN
strict: false
type: env_var
tokenizer_kwargs: null
truncate_dim: null
trust_remote_code: false
type: haystack.components.embedders.sentence_transformers_text_embedder.SentenceTransformersTextEmbedder
connections:
- receiver: retriever_gh.query_embedding
sender: text_embedder.embedding
- receiver: retriever_docs_wire.query_embedding
sender: text_embedder.embedding
- receiver: joiner.documents
sender: retriever_gh.documents
- receiver: joiner.documents
sender: retriever_docs_wire.documents
- receiver: prompt_builder.documents
sender: joiner.documents
- receiver: answer_builder.documents
sender: joiner.documents
- receiver: generator.prompt
sender: prompt_builder.prompt
- receiver: answer_builder.replies
sender: generator.replies
max_loops_allowed: 100
metadata: {}