feat(02-05): Slack file_share extraction and channel-aware outbound routing

- Add gateway/channels/slack_media.py with is_file_share_event, media_type_from_mime,
  build_slack_storage_key, build_attachment_from_slack_file, download_and_store_slack_file
- Add _send_response() helper to orchestrator/tasks.py for channel-aware dispatch
  (Slack -> chat.update, WhatsApp -> send_whatsapp_message)
- Add send_whatsapp_message import to orchestrator/tasks.py for WhatsApp outbound
- Add boto3>=1.35.0 to gateway dependencies for MinIO S3 client
- Add 23 unit tests in test_slack_media.py (TDD)
This commit is contained in:
2026-03-23 15:06:45 -06:00
parent eba6c85188
commit 9dd7c481a3
6 changed files with 809 additions and 1 deletions

View File

@@ -0,0 +1,258 @@
"""
Slack file_share media extraction helpers.
Handles Slack file_share events by:
1. Detecting file_share subtype in Slack events
2. Downloading files from Slack's private download URL using the bot token
3. Uploading files to MinIO with tenant-isolated keys
4. Building MediaAttachment objects with correct media_type, filename, mime_type
Storage key format: {tenant_id}/{agent_id}/{message_id}/{filename}
This module is intentionally import-side-effect free — no boto3 or httpx
imports at the module level to keep import times fast. Both are imported
inside the async function that requires them.
"""
from __future__ import annotations
import logging
from shared.models.message import MediaAttachment, MediaType
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# MIME type → MediaType mapping
# ---------------------------------------------------------------------------
_MIME_TO_MEDIA_TYPE: dict[str, MediaType] = {
# Images
"image/jpeg": MediaType.IMAGE,
"image/jpg": MediaType.IMAGE,
"image/png": MediaType.IMAGE,
"image/gif": MediaType.IMAGE,
"image/webp": MediaType.IMAGE,
"image/bmp": MediaType.IMAGE,
"image/tiff": MediaType.IMAGE,
"image/svg+xml": MediaType.IMAGE,
# Audio
"audio/mpeg": MediaType.AUDIO,
"audio/mp3": MediaType.AUDIO,
"audio/ogg": MediaType.AUDIO,
"audio/wav": MediaType.AUDIO,
"audio/webm": MediaType.AUDIO,
"audio/aac": MediaType.AUDIO,
# Video
"video/mp4": MediaType.VIDEO,
"video/webm": MediaType.VIDEO,
"video/ogg": MediaType.VIDEO,
"video/quicktime": MediaType.VIDEO,
"video/x-msvideo": MediaType.VIDEO,
}
def media_type_from_mime(mime_type: str) -> MediaType:
"""
Map a MIME type string to a MediaType enum value.
Checks explicit MIME map first, then falls back to:
- image/* → IMAGE
- audio/* → AUDIO
- video/* → VIDEO
- everything else → DOCUMENT
Args:
mime_type: MIME type string (e.g. "image/png", "application/pdf").
Returns:
The appropriate MediaType enum value.
"""
if not mime_type:
return MediaType.DOCUMENT
lower = mime_type.lower()
# Check exact match first
if lower in _MIME_TO_MEDIA_TYPE:
return _MIME_TO_MEDIA_TYPE[lower]
# Fall back to prefix match
if lower.startswith("image/"):
return MediaType.IMAGE
if lower.startswith("audio/"):
return MediaType.AUDIO
if lower.startswith("video/"):
return MediaType.VIDEO
# Default: treat as document
return MediaType.DOCUMENT
# ---------------------------------------------------------------------------
# Event detection
# ---------------------------------------------------------------------------
def is_file_share_event(event: dict) -> bool:
"""
Return True if the Slack event is a file_share subtype event.
Args:
event: Slack event dict.
Returns:
True if event["subtype"] == "file_share", else False.
"""
return event.get("subtype") == "file_share"
# ---------------------------------------------------------------------------
# Storage key builder
# ---------------------------------------------------------------------------
def build_slack_storage_key(
tenant_id: str,
agent_id: str,
message_id: str,
filename: str,
) -> str:
"""
Build the MinIO storage key for a Slack file attachment.
Format: ``{tenant_id}/{agent_id}/{message_id}/{filename}``
This key ensures tenant-isolated, message-scoped storage. The agent_id
is included to allow per-agent media lifecycle management in the future.
Args:
tenant_id: Konstruct tenant ID.
agent_id: Agent ID that received the message.
message_id: Slack message timestamp (event ts) or generated UUID.
filename: Original filename from Slack.
Returns:
MinIO object key string.
"""
return f"{tenant_id}/{agent_id}/{message_id}/{filename}"
# ---------------------------------------------------------------------------
# Attachment builder
# ---------------------------------------------------------------------------
def build_attachment_from_slack_file(
file_info: dict,
storage_key: str,
) -> MediaAttachment:
"""
Build a MediaAttachment from Slack file metadata + storage key.
Args:
file_info: Slack file metadata dict (from files.info or inline event).
Expected keys: id, name, mimetype, size.
storage_key: MinIO storage key already written by download_and_store_slack_file.
Returns:
A populated MediaAttachment instance.
"""
mime_type: str = file_info.get("mimetype", "") or ""
filename: str = file_info.get("name", "") or ""
size_bytes: int | None = file_info.get("size")
return MediaAttachment(
media_type=media_type_from_mime(mime_type),
storage_key=storage_key,
mime_type=mime_type or None,
filename=filename or None,
size_bytes=size_bytes,
)
# ---------------------------------------------------------------------------
# Download + upload
# ---------------------------------------------------------------------------
async def download_and_store_slack_file(
tenant_id: str,
agent_id: str,
message_id: str,
file_info: dict,
bot_token: str,
expiry: int = 3600,
) -> tuple[str, str]:
"""
Download a Slack private file and store it in MinIO.
Slack files are only accessible with a valid bot token via
``url_private_download``. This function:
1. Downloads the file using the bot token in the Authorization header
2. Uploads to MinIO with the key ``{tenant_id}/{agent_id}/{message_id}/{filename}``
3. Generates a presigned URL valid for ``expiry`` seconds
Args:
tenant_id: Konstruct tenant ID.
agent_id: Agent ID (for scoped storage key).
message_id: Message ID (for scoped storage key).
file_info: Slack file metadata dict. Must include:
- ``name`` (filename)
- ``mimetype`` (MIME type)
- ``url_private_download`` (Slack private URL)
bot_token: Slack bot token (xoxb-...) for Authorization header.
expiry: Presigned URL expiry in seconds (default: 3600 = 1 hour).
Returns:
Tuple of (storage_key, presigned_url).
"""
import boto3 # type: ignore[import-untyped]
import httpx
from shared.config import settings
filename: str = file_info.get("name", "file") or "file"
mime_type: str = file_info.get("mimetype", "") or ""
download_url: str = file_info.get("url_private_download", "") or ""
storage_key = build_slack_storage_key(tenant_id, agent_id, message_id, filename)
# Download the file from Slack using the bot token
async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as client:
response = await client.get(
download_url,
headers={"Authorization": f"Bearer {bot_token}"},
)
response.raise_for_status()
file_bytes = response.content
# Upload to MinIO
s3_client = boto3.client(
"s3",
endpoint_url=settings.minio_endpoint,
aws_access_key_id=settings.minio_access_key,
aws_secret_access_key=settings.minio_secret_key,
region_name="us-east-1", # MinIO ignores region but boto3 requires it
)
s3_client.put_object(
Bucket=settings.minio_media_bucket,
Key=storage_key,
Body=file_bytes,
ContentType=mime_type or "application/octet-stream",
)
# Generate presigned URL
presigned_url: str = s3_client.generate_presigned_url(
"get_object",
Params={"Bucket": settings.minio_media_bucket, "Key": storage_key},
ExpiresIn=expiry,
)
logger.info(
"download_and_store_slack_file: stored tenant=%s key=%s",
tenant_id,
storage_key,
)
return storage_key, presigned_url

View File

@@ -16,6 +16,7 @@ dependencies = [
"python-telegram-bot>=21.0",
"httpx>=0.28.0",
"redis>=5.0.0",
"boto3>=1.35.0",
]
[tool.uv.sources]