Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 58 additions & 6 deletions gmail/gmail_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from typing import Optional, List, Dict, Literal

from email.mime.text import MIMEText
from bs4 import BeautifulSoup

from fastapi import Body
from pydantic import Field
Expand All @@ -32,6 +33,38 @@
HTML_BODY_TRUNCATE_LIMIT = 20000


def _html_to_text(html: str) -> str:
"""
Convierte HTML a texto legible.

Args:
html: Contenido HTML

Returns:
Texto plano legible
"""
try:
# Parse HTML con BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')

# Remover scripts y estilos
for script in soup(["script", "style"]):
script.decompose()

# Obtener texto
text = soup.get_text()

# Limpiar espacios en blanco excesivos
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = '\n'.join(chunk for chunk in chunks if chunk)

return text
except Exception as e:
logger.warning(f"Failed to convert HTML to text: {e}")
return html # Fallback al HTML crudo


def _extract_message_body(payload):
"""
Helper function to extract plain text body from a Gmail message payload.
Expand Down Expand Up @@ -103,20 +136,39 @@ def _format_body_content(text_body: str, html_body: str) -> str:
"""
Helper function to format message body content with HTML fallback and truncation.

Detects when text/plain is a useless fallback by checking for HTML comments.
Plain text should never contain HTML comments (<!--), so their presence
indicates the text is a fallback message, not actual content.

Args:
text_body: Plain text body content
html_body: HTML body content

Returns:
Formatted body content string
"""
if text_body.strip():
# Detect HTML comments in plain text (indicates fallback)
has_html_comment = "<!--" in text_body

# Use HTML if:
# 1. Text is empty
# 2. Text contains HTML comments (fallback indicator)
# 3. HTML is significantly longer (50x+) than text
use_html = (
not text_body.strip() or
has_html_comment or
(html_body.strip() and len(html_body) > len(text_body) * 50)
)

if use_html and html_body.strip():
# Convert HTML to readable text
text_from_html = _html_to_text(html_body)
# Truncate very large content to keep responses manageable
if len(text_from_html) > HTML_BODY_TRUNCATE_LIMIT:
text_from_html = text_from_html[:HTML_BODY_TRUNCATE_LIMIT] + "\n\n[Content truncated...]"
return text_from_html
elif text_body.strip():
return text_body
elif html_body.strip():
# Truncate very large HTML to keep responses manageable
if len(html_body) > HTML_BODY_TRUNCATE_LIMIT:
html_body = html_body[:HTML_BODY_TRUNCATE_LIMIT] + "\n\n[HTML content truncated...]"
return f"[HTML Content Converted]\n{html_body}"
else:
return "[No readable content found]"

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ readme = "README.md"
keywords = [ "mcp", "google", "workspace", "llm", "ai", "claude", "model", "context", "protocol", "server"]
requires-python = ">=3.10"
dependencies = [
"beautifulsoup4>=4.12.0",
"fastapi>=0.115.12",
"fastmcp==2.12.5",
"google-api-python-client>=2.168.0",
Expand Down