Skip to content

Commit ab25220

Browse files
authored
Merge branch 'main' into openai-longprobs
2 parents cc3e1e0 + edd5a99 commit ab25220

File tree

8 files changed

+392
-723
lines changed

8 files changed

+392
-723
lines changed

docs-website/docs/concepts/pipelines/asyncpipeline.mdx

Lines changed: 64 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -52,37 +52,85 @@ You can find more details in our [API Reference](/reference/pipeline-api#asyncpi
5252

5353
```python
5454
import asyncio
55-
from haystack import AsyncPipeline
56-
from haystack.components.embedders import SentenceTransformersTextEmbedder
57-
from haystack.components.retrievers import InMemoryEmbeddingRetriever, InMemoryBM25Retriever
58-
from haystack.components.joiners import DocumentJoiner
55+
56+
from haystack import AsyncPipeline, Document
5957
from haystack.components.builders import ChatPromptBuilder
58+
from haystack.components.embedders import (
59+
SentenceTransformersDocumentEmbedder,
60+
SentenceTransformersTextEmbedder,
61+
)
6062
from haystack.components.generators.chat import OpenAIChatGenerator
63+
from haystack.components.joiners import DocumentJoiner
64+
from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
65+
from haystack.dataclasses import ChatMessage
66+
from haystack.document_stores.in_memory import InMemoryDocumentStore
67+
68+
documents = [
69+
Document(content="Khufu is the largest pyramid."),
70+
Document(content="Khafre is the middle pyramid."),
71+
Document(content="Menkaure is the smallest pyramid."),
72+
]
73+
74+
docs_embedder = SentenceTransformersDocumentEmbedder()
75+
docs_embedder.warm_up()
76+
77+
document_store = InMemoryDocumentStore()
78+
document_store.write_documents(docs_embedder.run(documents=documents)["documents"])
79+
80+
prompt_template = [
81+
ChatMessage.from_system(
82+
"""
83+
You are a precise, factual QA assistant.
84+
According to the following documents:
85+
{% for document in documents %}
86+
{{document.content}}
87+
{% endfor %}
88+
89+
If an answer cannot be deduced from the documents, say "I don't know based on these documents".
90+
91+
When answering:
92+
- be concise
93+
- list the documents that support your answer
94+
95+
Answer the given question.
96+
"""
97+
),
98+
ChatMessage.from_user("{{query}}"),
99+
ChatMessage.from_system("Answer:"),
100+
]
61101

62102
hybrid_rag_retrieval = AsyncPipeline()
63103
hybrid_rag_retrieval.add_component("text_embedder", SentenceTransformersTextEmbedder())
64-
hybrid_rag_retrieval.add_component("embedding_retriever", InMemoryEmbeddingRetriever(document_store=document_store))
65-
hybrid_rag_retrieval.add_component("bm25_retriever", InMemoryBM25Retriever(document_store=document_store))
104+
hybrid_rag_retrieval.add_component(
105+
"embedding_retriever", InMemoryEmbeddingRetriever(document_store=document_store, top_k=3)
106+
)
107+
hybrid_rag_retrieval.add_component("bm25_retriever", InMemoryBM25Retriever(document_store=document_store, top_k=3))
66108
hybrid_rag_retrieval.add_component("document_joiner", DocumentJoiner())
67109
hybrid_rag_retrieval.add_component("prompt_builder", ChatPromptBuilder(template=prompt_template))
68110
hybrid_rag_retrieval.add_component("llm", OpenAIChatGenerator())
69111

70-
hybrid_rag_retrieval.connect("text_embedder", "embedding_retriever")
71-
hybrid_rag_retrieval.connect("bm25_retriever", "document_joiner")
72-
hybrid_rag_retrieval.connect("embedding_retriever", "document_joiner")
73-
hybrid_rag_retrieval.connect("document_joiner", "prompt_builder.documents")
74-
hybrid_rag_retrieval.connect("prompt_builder", "llm")
112+
hybrid_rag_retrieval.connect("text_embedder.embedding", "embedding_retriever.query_embedding")
113+
hybrid_rag_retrieval.connect("bm25_retriever.documents", "document_joiner.documents")
114+
hybrid_rag_retrieval.connect("embedding_retriever.documents", "document_joiner.documents")
115+
hybrid_rag_retrieval.connect("document_joiner.documents", "prompt_builder.documents")
116+
hybrid_rag_retrieval.connect("prompt_builder.prompt", "llm.messages")
117+
118+
question = "Which pyramid is neither the smallest nor the biggest?"
119+
120+
data = {
121+
"prompt_builder": {"query": question},
122+
"text_embedder": {"text": question},
123+
"bm25_retriever": {"query": question},
124+
}
75125

76126
async def process_results():
77127
async for partial_output in hybrid_rag_retrieval.run_async_generator(
78-
data=data,
79-
include_outputs_from={"document_joiner", "llm"}
128+
data=data, include_outputs_from={"document_joiner", "llm"}
80129
):
81-
# Each partial_output contains the results from a completed component
82-
if "retriever" in partial_output:
130+
if "document_joiner" in partial_output:
83131
print("Retrieved documents:", len(partial_output["document_joiner"]["documents"]))
84132
if "llm" in partial_output:
85133
print("Generated answer:", partial_output["llm"]["replies"][0])
86134

87135
asyncio.run(process_results())
88-
```
136+
```

docs-website/docs/concepts/pipelines/visualizing-pipelines.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,6 @@ This is an example of what a pipeline graph may look like:
8484

8585
## Importing a Pipeline to deepset Studio
8686

87-
YYou can import your Haystack pipeline into deepset Studio and continue visually building your pipeline
87+
You can import your Haystack pipeline into deepset Studio and continue visually building your pipeline
8888

8989
To do that, follow the steps described in our deepset AI Platform [documentation](https://docs.cloud.deepset.ai/docs/import-a-pipeline#import-your-pipeline).

docs-website/reference/integrations-api/mcp.md

Lines changed: 46 additions & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,8 @@ async def connect() -> list[types.Tool]
353353

354354
Connect to an MCP server using SSE transport.
355355

356+
Note: If both custom headers and token are provided, custom headers take precedence.
357+
356358
**Raises**:
357359

358360
- `MCPConnectionError`: If connection to the server fails
@@ -396,6 +398,8 @@ async def connect() -> list[types.Tool]
396398

397399
Connect to an MCP server using streamable HTTP transport.
398400

401+
Note: If both custom headers and token are provided, custom headers take precedence.
402+
399403
**Raises**:
400404

401405
- `MCPConnectionError`: If connection to the server fails
@@ -476,11 +480,31 @@ server_info = SSEServerInfo(
476480
)
477481
```
478482

483+
For custom headers (e.g., non-standard authentication):
484+
485+
```python
486+
# Single custom header with Secret
487+
server_info = SSEServerInfo(
488+
url="https://my-mcp-server.com",
489+
headers={"X-API-Key": Secret.from_env_var("API_KEY")},
490+
)
491+
492+
# Multiple headers (mix of Secret and plain strings)
493+
server_info = SSEServerInfo(
494+
url="https://my-mcp-server.com",
495+
headers={
496+
"X-API-Key": Secret.from_env_var("API_KEY"),
497+
"X-Client-ID": "my-client-id",
498+
},
499+
)
500+
```
501+
479502
**Arguments**:
480503

481504
- `url`: Full URL of the MCP server (including /sse endpoint)
482505
- `base_url`: Base URL of the MCP server (deprecated, use url instead)
483-
- `token`: Authentication token for the server (optional)
506+
- `token`: Authentication token for the server (optional, generates "Authorization: Bearer `<token>`" header)
507+
- `headers`: Custom HTTP headers (optional, takes precedence over token parameter if provided)
484508
- `timeout`: Connection timeout in seconds
485509

486510
<a id="haystack_integrations.tools.mcp.mcp_tool.SSEServerInfo.base_url"></a>
@@ -529,10 +553,30 @@ server_info = StreamableHttpServerInfo(
529553
)
530554
```
531555

556+
For custom headers (e.g., non-standard authentication):
557+
558+
```python
559+
# Single custom header with Secret
560+
server_info = StreamableHttpServerInfo(
561+
url="https://my-mcp-server.com",
562+
headers={"X-API-Key": Secret.from_env_var("API_KEY")},
563+
)
564+
565+
# Multiple headers (mix of Secret and plain strings)
566+
server_info = StreamableHttpServerInfo(
567+
url="https://my-mcp-server.com",
568+
headers={
569+
"X-API-Key": Secret.from_env_var("API_KEY"),
570+
"X-Client-ID": "my-client-id",
571+
},
572+
)
573+
```
574+
532575
**Arguments**:
533576

534577
- `url`: Full URL of the MCP server (streamable HTTP endpoint)
535-
- `token`: Authentication token for the server (optional)
578+
- `token`: Authentication token for the server (optional, generates "Authorization: Bearer `<token>`" header)
579+
- `headers`: Custom HTTP headers (optional, takes precedence over token parameter if provided)
536580
- `timeout`: Connection timeout in seconds
537581

538582
<a id="haystack_integrations.tools.mcp.mcp_tool.StreamableHttpServerInfo.__post_init__"></a>
@@ -795,27 +839,6 @@ def __del__()
795839

796840
Cleanup resources when the tool is garbage collected.
797841

798-
<a id="haystack_integrations.tools.mcp.mcp_tool.MCPTool.tool_spec"></a>
799-
800-
#### MCPTool.tool\_spec
801-
802-
```python
803-
@property
804-
def tool_spec() -> dict[str, Any]
805-
```
806-
807-
Return the Tool specification to be used by the Language Model.
808-
809-
<a id="haystack_integrations.tools.mcp.mcp_tool.MCPTool.invoke"></a>
810-
811-
#### MCPTool.invoke
812-
813-
```python
814-
def invoke(**kwargs: Any) -> Any
815-
```
816-
817-
Invoke the Tool with the provided keyword arguments.
818-
819842
<a id="haystack_integrations.tools.mcp.mcp_tool._MCPClientSessionManager"></a>
820843

821844
### \_MCPClientSessionManager
@@ -1030,126 +1053,3 @@ def close()
10301053

10311054
Close the underlying MCP client safely.
10321055

1033-
<a id="haystack_integrations.tools.mcp.mcp_toolset.MCPToolset.__post_init__"></a>
1034-
1035-
#### MCPToolset.\_\_post\_init\_\_
1036-
1037-
```python
1038-
def __post_init__()
1039-
```
1040-
1041-
Validate and set up the toolset after initialization.
1042-
1043-
This handles the case when tools are provided during initialization.
1044-
1045-
<a id="haystack_integrations.tools.mcp.mcp_toolset.MCPToolset.__iter__"></a>
1046-
1047-
#### MCPToolset.\_\_iter\_\_
1048-
1049-
```python
1050-
def __iter__() -> Iterator[Tool]
1051-
```
1052-
1053-
Return an iterator over the Tools in this Toolset.
1054-
1055-
This allows the Toolset to be used wherever a list of Tools is expected.
1056-
1057-
**Returns**:
1058-
1059-
An iterator yielding Tool instances
1060-
1061-
<a id="haystack_integrations.tools.mcp.mcp_toolset.MCPToolset.__contains__"></a>
1062-
1063-
#### MCPToolset.\_\_contains\_\_
1064-
1065-
```python
1066-
def __contains__(item: Any) -> bool
1067-
```
1068-
1069-
Check if a tool is in this Toolset.
1070-
1071-
Supports checking by:
1072-
- Tool instance: tool in toolset
1073-
- Tool name: "tool_name" in toolset
1074-
1075-
**Arguments**:
1076-
1077-
- `item`: Tool instance or tool name string
1078-
1079-
**Returns**:
1080-
1081-
True if contained, False otherwise
1082-
1083-
<a id="haystack_integrations.tools.mcp.mcp_toolset.MCPToolset.add"></a>
1084-
1085-
#### MCPToolset.add
1086-
1087-
```python
1088-
def add(tool: Union[Tool, "Toolset"]) -> None
1089-
```
1090-
1091-
Add a new Tool or merge another Toolset.
1092-
1093-
**Arguments**:
1094-
1095-
- `tool`: A Tool instance or another Toolset to add
1096-
1097-
**Raises**:
1098-
1099-
- `ValueError`: If adding the tool would result in duplicate tool names
1100-
- `TypeError`: If the provided object is not a Tool or Toolset
1101-
1102-
<a id="haystack_integrations.tools.mcp.mcp_toolset.MCPToolset.__add__"></a>
1103-
1104-
#### MCPToolset.\_\_add\_\_
1105-
1106-
```python
1107-
def __add__(other: Union[Tool, "Toolset", list[Tool]]) -> "Toolset"
1108-
```
1109-
1110-
Concatenate this Toolset with another Tool, Toolset, or list of Tools.
1111-
1112-
**Arguments**:
1113-
1114-
- `other`: Another Tool, Toolset, or list of Tools to concatenate
1115-
1116-
**Raises**:
1117-
1118-
- `TypeError`: If the other parameter is not a Tool, Toolset, or list of Tools
1119-
- `ValueError`: If the combination would result in duplicate tool names
1120-
1121-
**Returns**:
1122-
1123-
A new Toolset containing all tools
1124-
1125-
<a id="haystack_integrations.tools.mcp.mcp_toolset.MCPToolset.__len__"></a>
1126-
1127-
#### MCPToolset.\_\_len\_\_
1128-
1129-
```python
1130-
def __len__() -> int
1131-
```
1132-
1133-
Return the number of Tools in this Toolset.
1134-
1135-
**Returns**:
1136-
1137-
Number of Tools
1138-
1139-
<a id="haystack_integrations.tools.mcp.mcp_toolset.MCPToolset.__getitem__"></a>
1140-
1141-
#### MCPToolset.\_\_getitem\_\_
1142-
1143-
```python
1144-
def __getitem__(index)
1145-
```
1146-
1147-
Get a Tool by index.
1148-
1149-
**Arguments**:
1150-
1151-
- `index`: Index of the Tool to get
1152-
1153-
**Returns**:
1154-
1155-
The Tool at the specified index

0 commit comments

Comments
 (0)