Skip to content

Commit 59af538

Browse files
you-n-gSunsetWolf
andauthored
feat: init pydantic ai agent & context 7 mcp (#1240)
* feat: init pydantic ai agent & context 7 mcp * feat: integrate MCP documentation search into data science pipeline evaluation * fix: disable MCP documentation search and update related docstrings and defaults * lint * fix: correct prompt formatting and conditional blocks in pipeline_eval section * lint * feat: add query method to PAIAgent for synchronous agent execution * fix: apply nest_asyncio for agent and update context7 query method * lint * lint * lint * lint * docs: update MCP folder docstring and rename test class in test_pydantic.py * refactor: centralize completion kwargs logic and update pydantic_ai integration * fixbug * typo * fix: bug triggered by padantic-ai version backtracking. --------- Co-authored-by: Linlang <[email protected]>
1 parent 7d749b8 commit 59af538

File tree

19 files changed

+515
-53
lines changed

19 files changed

+515
-53
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,4 +177,4 @@ rdagent/app/benchmark/factor/example.json
177177

178178
# UI Server resources
179179
videos/
180-
static/
180+
static/

constraints/3.10.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,4 @@ azure-identity==1.17.1
22
dill==0.3.9
33
pillow==10.4.0
44
psutil==6.1.0
5-
rich==13.9.2
65
scipy==1.14.1
7-
tqdm==4.66.5

constraints/3.11.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,4 @@ azure-identity==1.17.1
22
dill==0.3.9
33
pillow==10.4.0
44
psutil==6.1.0
5-
rich==13.9.2
65
scipy==1.14.1
7-
tqdm==4.66.5

rdagent/app/data_science/conf.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
4747
enable_doc_dev: bool = False
4848
model_dump_check_level: Literal["medium", "high"] = "medium"
4949

50+
#### MCP documentation search integration
51+
enable_mcp_documentation_search: bool = False
52+
"""Enable MCP documentation search for error resolution. Requires MCP_ENABLED=true and MCP_CONTEXT7_ENABLED=true in environment."""
53+
5054
### specific feature
5155

5256
### notebook integration
@@ -181,7 +185,7 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
181185

182186
DS_RD_SETTING = DataScienceBasePropSetting()
183187

184-
# enable_cross_trace_diversity llm_select_hypothesis should not be true at the same time
188+
# enable_cross_trace_diversity and llm_select_hypothesis should not be true at the same time
185189
assert not (
186190
DS_RD_SETTING.enable_cross_trace_diversity and DS_RD_SETTING.llm_select_hypothesis
187191
), "enable_cross_trace_diversity and llm_select_hypothesis cannot be true at the same time"
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
"""
2+
Some agent that can be shared across different scenarios.
3+
"""

rdagent/components/agent/base.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from abc import abstractmethod
2+
3+
import nest_asyncio
4+
from pydantic_ai import Agent
5+
from pydantic_ai.mcp import MCPServerStreamableHTTP
6+
7+
from rdagent.oai.backend.pydantic_ai import get_agent_model
8+
9+
10+
class BaseAgent:
11+
12+
@abstractmethod
13+
def __init__(self, system_prompt: str, toolsets: list[str]): ...
14+
15+
@abstractmethod
16+
def query(self, query: str) -> str: ...
17+
18+
19+
class PAIAgent(BaseAgent):
20+
"""
21+
Pydantic-AI agent
22+
"""
23+
24+
agent: Agent
25+
26+
def __init__(self, system_prompt: str, toolsets: list[str | MCPServerStreamableHTTP]):
27+
toolsets = [(ts if isinstance(ts, MCPServerStreamableHTTP) else MCPServerStreamableHTTP(ts)) for ts in toolsets]
28+
self.agent = Agent(get_agent_model(), system_prompt=system_prompt, toolsets=toolsets)
29+
30+
def query(self, query: str) -> str:
31+
"""
32+
33+
Parameters
34+
----------
35+
query : str
36+
37+
Returns
38+
-------
39+
str
40+
"""
41+
42+
nest_asyncio.apply() # NOTE: very important. Because pydantic-ai uses asyncio!
43+
result = self.agent.run_sync(query)
44+
return result.output
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from typing import Optional
2+
3+
from pydantic_ai.mcp import MCPServerStreamableHTTP
4+
5+
from rdagent.components.agent.base import PAIAgent
6+
from rdagent.components.agent.mcp.context7 import SETTINGS
7+
from rdagent.log import rdagent_logger as logger
8+
from rdagent.utils.agent.tpl import T
9+
10+
11+
class Agent(PAIAgent):
12+
"""
13+
A specific agent for context7
14+
"""
15+
16+
def __init__(self):
17+
toolsets = [MCPServerStreamableHTTP(SETTINGS.url, timeout=SETTINGS.timeout)]
18+
super().__init__(system_prompt=T(".prompts:system_prompt").r(), toolsets=toolsets)
19+
20+
def _build_enhanced_query(self, error_message: str, full_code: Optional[str] = None) -> str:
21+
"""Build enhanced query using experimental prompt templates."""
22+
# Build context information using template
23+
context_info = ""
24+
if full_code:
25+
context_info = T(".prompts:code_context_template").r(full_code=full_code)
26+
27+
# Check for timm library special case (experimental optimization)
28+
timm_trigger = error_message.lower().count("timm") >= 3
29+
timm_trigger_text = ""
30+
if timm_trigger:
31+
timm_trigger_text = T(".prompts:timm_special_case").r()
32+
logger.info("🎯 Timm special handling triggered", tag="context7")
33+
34+
# Construct enhanced query using experimental template
35+
enhanced_query = T(".prompts:context7_enhanced_query_template").r(
36+
error_message=error_message, context_info=context_info, timm_trigger_text=timm_trigger_text
37+
)
38+
39+
return enhanced_query
40+
41+
def query(self, query: str) -> str:
42+
"""
43+
44+
Parameters
45+
----------
46+
query : str
47+
It should be something like error message.
48+
49+
Returns
50+
-------
51+
str
52+
"""
53+
query = self._build_enhanced_query(error_message=query)
54+
return super().query(query)
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Context7 MCP Enhanced Query Prompts
2+
3+
system_prompt: |-
4+
You are a helpful assistant.
5+
You help to user to search documentation based on error message and provide API reference information.
6+
7+
context7_enhanced_query_template: |-
8+
ERROR MESSAGE:
9+
{{error_message}}
10+
{{context_info}}
11+
IMPORTANT INSTRUCTIONS:
12+
1. ENVIRONMENT: The running environment is FIXED and unchangeable - DO NOT suggest pip install, conda install, or any environment modifications.
13+
2. DOCUMENTATION SEARCH REQUIREMENTS:
14+
- Search for official API documentation related to the error
15+
- Focus on parameter specifications, method signatures, and usage patterns
16+
- Find compatible alternatives if the original API doesn't exist
17+
- Consider the current code context and maintain consistency with existing architecture
18+
- Provide API reference information, NOT complete code solutions
19+
3. TOOL USAGE REQUIREMENTS:
20+
- ⚠️ CRITICAL: For EVERY call to 'resolve-library-id', you MUST follow it with A CORRESPONDING call to 'get-library-docs'
21+
- If you call 'resolve-library-id' N times, you MUST call 'get-library-docs' N times (one for each library you found)
22+
- Complete the full workflow: resolve → get-docs → analyze → respond
23+
- Do NOT provide final answers without first getting detailed documentation via 'get-library-docs'
24+
- If 'get-library-docs' returns "Documentation not found" or 404 error, you should never provide guidance based on the library information from 'resolve-library-id'
25+
4. RESPONSE FORMAT:
26+
- Start with a brief explanation of the root cause
27+
- Provide relevant API documentation excerpts
28+
- List available parameters and their descriptions
29+
- Show method signatures and basic usage patterns
30+
- If multiple API options exist, document all viable alternatives
31+
5. STRICT CONSTRAINTS:
32+
- DO NOT provide complete working code replacements
33+
- DO NOT suggest hardware configuration changes (CPU/GPU)
34+
- DO NOT recommend architecture or framework changes
35+
- DO NOT provide performance optimization suggestions
36+
- ONLY provide API documentation and parameter information
37+
6. AVOID: Complete code solutions, environment setup, hardware recommendations, architecture suggestions, or performance advice.
38+
{{timm_trigger_text}}
39+
Example response format:
40+
```
41+
The error occurs because [brief explanation].
42+
API Documentation:
43+
- Method: library.function_name(param1, param2, ...)
44+
- Parameters:
45+
* param1 (type): description
46+
* param2 (type): description
47+
- Usage pattern: Basic syntax without complete implementation
48+
- Alternative APIs (if applicable): list of alternative methods with signatures
49+
```
50+
Please search the documentation and provide API reference information only.
51+
# Special case templates
52+
timm_special_case: |-
53+
7.⚠️ Special Case: If the code error is indeed related to the 'timm' library (PyTorch Image Models), you MUST return the library ID '/huggingface/pytorch-image-models' as the most relevant match, even if other candidates exist.
54+
# Code context template
55+
code_context_template: |-
56+
CURRENT CODE CONTEXT:
57+
```python
58+
{{full_code}}
59+
```
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
"""
2+
Here are a list of MCP servers.
3+
4+
The MCP server is a individual RESTful API. So the only following things are included in the folder:
5+
- Settings.
6+
- e.g., mcp/<mcp_name>.py:class Settings(BaseSettings); then it is initialized as a global variable SETTINGS.
7+
- It only defines the format of the settings in Python Class (i.e., Pydantic BaseSettings).
8+
- health_check:
9+
- e.g., mcp/<mcp_name>.py:def health_check() -> bool;
10+
"""
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
"""
2+
The context7 is based on a modified version of the context7.
3+
4+
You can follow the instructions to install it
5+
6+
mkdir -p ~/tmp/
7+
cd ~/tmp/ && git clone https://github.com/Hoder-zyf/context7.git
8+
cd ~/tmp/context7
9+
npm install -g bun
10+
bun i && bun run build
11+
bun run dist/index.js --transport http --port 8123 # > bun.out 2>&1 &
12+
"""
13+
14+
from pydantic_settings import BaseSettings, SettingsConfigDict
15+
16+
17+
class Settings(BaseSettings):
18+
"""Project specific settings."""
19+
20+
url: str = "http://localhost:8123/mcp"
21+
timeout: int = 120
22+
23+
model_config = SettingsConfigDict(
24+
env_prefix="CONTEXT7_",
25+
# extra="allow", # Does it allow extrasettings
26+
)
27+
28+
29+
SETTINGS = Settings()

0 commit comments

Comments
 (0)