feat(02-05): Slack file_share extraction and channel-aware outbound routing
- Add gateway/channels/slack_media.py with is_file_share_event, media_type_from_mime, build_slack_storage_key, build_attachment_from_slack_file, download_and_store_slack_file - Add _send_response() helper to orchestrator/tasks.py for channel-aware dispatch (Slack -> chat.update, WhatsApp -> send_whatsapp_message) - Add send_whatsapp_message import to orchestrator/tasks.py for WhatsApp outbound - Add boto3>=1.35.0 to gateway dependencies for MinIO S3 client - Add 23 unit tests in test_slack_media.py (TDD)
This commit is contained in:
258
packages/gateway/gateway/channels/slack_media.py
Normal file
258
packages/gateway/gateway/channels/slack_media.py
Normal file
@@ -0,0 +1,258 @@
|
||||
"""
|
||||
Slack file_share media extraction helpers.
|
||||
|
||||
Handles Slack file_share events by:
|
||||
1. Detecting file_share subtype in Slack events
|
||||
2. Downloading files from Slack's private download URL using the bot token
|
||||
3. Uploading files to MinIO with tenant-isolated keys
|
||||
4. Building MediaAttachment objects with correct media_type, filename, mime_type
|
||||
|
||||
Storage key format: {tenant_id}/{agent_id}/{message_id}/{filename}
|
||||
|
||||
This module is intentionally import-side-effect free — no boto3 or httpx
|
||||
imports at the module level to keep import times fast. Both are imported
|
||||
inside the async function that requires them.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from shared.models.message import MediaAttachment, MediaType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MIME type → MediaType mapping
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_MIME_TO_MEDIA_TYPE: dict[str, MediaType] = {
|
||||
# Images
|
||||
"image/jpeg": MediaType.IMAGE,
|
||||
"image/jpg": MediaType.IMAGE,
|
||||
"image/png": MediaType.IMAGE,
|
||||
"image/gif": MediaType.IMAGE,
|
||||
"image/webp": MediaType.IMAGE,
|
||||
"image/bmp": MediaType.IMAGE,
|
||||
"image/tiff": MediaType.IMAGE,
|
||||
"image/svg+xml": MediaType.IMAGE,
|
||||
# Audio
|
||||
"audio/mpeg": MediaType.AUDIO,
|
||||
"audio/mp3": MediaType.AUDIO,
|
||||
"audio/ogg": MediaType.AUDIO,
|
||||
"audio/wav": MediaType.AUDIO,
|
||||
"audio/webm": MediaType.AUDIO,
|
||||
"audio/aac": MediaType.AUDIO,
|
||||
# Video
|
||||
"video/mp4": MediaType.VIDEO,
|
||||
"video/webm": MediaType.VIDEO,
|
||||
"video/ogg": MediaType.VIDEO,
|
||||
"video/quicktime": MediaType.VIDEO,
|
||||
"video/x-msvideo": MediaType.VIDEO,
|
||||
}
|
||||
|
||||
|
||||
def media_type_from_mime(mime_type: str) -> MediaType:
|
||||
"""
|
||||
Map a MIME type string to a MediaType enum value.
|
||||
|
||||
Checks explicit MIME map first, then falls back to:
|
||||
- image/* → IMAGE
|
||||
- audio/* → AUDIO
|
||||
- video/* → VIDEO
|
||||
- everything else → DOCUMENT
|
||||
|
||||
Args:
|
||||
mime_type: MIME type string (e.g. "image/png", "application/pdf").
|
||||
|
||||
Returns:
|
||||
The appropriate MediaType enum value.
|
||||
"""
|
||||
if not mime_type:
|
||||
return MediaType.DOCUMENT
|
||||
|
||||
lower = mime_type.lower()
|
||||
|
||||
# Check exact match first
|
||||
if lower in _MIME_TO_MEDIA_TYPE:
|
||||
return _MIME_TO_MEDIA_TYPE[lower]
|
||||
|
||||
# Fall back to prefix match
|
||||
if lower.startswith("image/"):
|
||||
return MediaType.IMAGE
|
||||
if lower.startswith("audio/"):
|
||||
return MediaType.AUDIO
|
||||
if lower.startswith("video/"):
|
||||
return MediaType.VIDEO
|
||||
|
||||
# Default: treat as document
|
||||
return MediaType.DOCUMENT
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Event detection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def is_file_share_event(event: dict) -> bool:
|
||||
"""
|
||||
Return True if the Slack event is a file_share subtype event.
|
||||
|
||||
Args:
|
||||
event: Slack event dict.
|
||||
|
||||
Returns:
|
||||
True if event["subtype"] == "file_share", else False.
|
||||
"""
|
||||
return event.get("subtype") == "file_share"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Storage key builder
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def build_slack_storage_key(
|
||||
tenant_id: str,
|
||||
agent_id: str,
|
||||
message_id: str,
|
||||
filename: str,
|
||||
) -> str:
|
||||
"""
|
||||
Build the MinIO storage key for a Slack file attachment.
|
||||
|
||||
Format: ``{tenant_id}/{agent_id}/{message_id}/{filename}``
|
||||
|
||||
This key ensures tenant-isolated, message-scoped storage. The agent_id
|
||||
is included to allow per-agent media lifecycle management in the future.
|
||||
|
||||
Args:
|
||||
tenant_id: Konstruct tenant ID.
|
||||
agent_id: Agent ID that received the message.
|
||||
message_id: Slack message timestamp (event ts) or generated UUID.
|
||||
filename: Original filename from Slack.
|
||||
|
||||
Returns:
|
||||
MinIO object key string.
|
||||
"""
|
||||
return f"{tenant_id}/{agent_id}/{message_id}/{filename}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Attachment builder
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def build_attachment_from_slack_file(
|
||||
file_info: dict,
|
||||
storage_key: str,
|
||||
) -> MediaAttachment:
|
||||
"""
|
||||
Build a MediaAttachment from Slack file metadata + storage key.
|
||||
|
||||
Args:
|
||||
file_info: Slack file metadata dict (from files.info or inline event).
|
||||
Expected keys: id, name, mimetype, size.
|
||||
storage_key: MinIO storage key already written by download_and_store_slack_file.
|
||||
|
||||
Returns:
|
||||
A populated MediaAttachment instance.
|
||||
"""
|
||||
mime_type: str = file_info.get("mimetype", "") or ""
|
||||
filename: str = file_info.get("name", "") or ""
|
||||
size_bytes: int | None = file_info.get("size")
|
||||
|
||||
return MediaAttachment(
|
||||
media_type=media_type_from_mime(mime_type),
|
||||
storage_key=storage_key,
|
||||
mime_type=mime_type or None,
|
||||
filename=filename or None,
|
||||
size_bytes=size_bytes,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Download + upload
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def download_and_store_slack_file(
|
||||
tenant_id: str,
|
||||
agent_id: str,
|
||||
message_id: str,
|
||||
file_info: dict,
|
||||
bot_token: str,
|
||||
expiry: int = 3600,
|
||||
) -> tuple[str, str]:
|
||||
"""
|
||||
Download a Slack private file and store it in MinIO.
|
||||
|
||||
Slack files are only accessible with a valid bot token via
|
||||
``url_private_download``. This function:
|
||||
1. Downloads the file using the bot token in the Authorization header
|
||||
2. Uploads to MinIO with the key ``{tenant_id}/{agent_id}/{message_id}/{filename}``
|
||||
3. Generates a presigned URL valid for ``expiry`` seconds
|
||||
|
||||
Args:
|
||||
tenant_id: Konstruct tenant ID.
|
||||
agent_id: Agent ID (for scoped storage key).
|
||||
message_id: Message ID (for scoped storage key).
|
||||
file_info: Slack file metadata dict. Must include:
|
||||
- ``name`` (filename)
|
||||
- ``mimetype`` (MIME type)
|
||||
- ``url_private_download`` (Slack private URL)
|
||||
bot_token: Slack bot token (xoxb-...) for Authorization header.
|
||||
expiry: Presigned URL expiry in seconds (default: 3600 = 1 hour).
|
||||
|
||||
Returns:
|
||||
Tuple of (storage_key, presigned_url).
|
||||
"""
|
||||
import boto3 # type: ignore[import-untyped]
|
||||
import httpx
|
||||
|
||||
from shared.config import settings
|
||||
|
||||
filename: str = file_info.get("name", "file") or "file"
|
||||
mime_type: str = file_info.get("mimetype", "") or ""
|
||||
download_url: str = file_info.get("url_private_download", "") or ""
|
||||
storage_key = build_slack_storage_key(tenant_id, agent_id, message_id, filename)
|
||||
|
||||
# Download the file from Slack using the bot token
|
||||
async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as client:
|
||||
response = await client.get(
|
||||
download_url,
|
||||
headers={"Authorization": f"Bearer {bot_token}"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
file_bytes = response.content
|
||||
|
||||
# Upload to MinIO
|
||||
s3_client = boto3.client(
|
||||
"s3",
|
||||
endpoint_url=settings.minio_endpoint,
|
||||
aws_access_key_id=settings.minio_access_key,
|
||||
aws_secret_access_key=settings.minio_secret_key,
|
||||
region_name="us-east-1", # MinIO ignores region but boto3 requires it
|
||||
)
|
||||
|
||||
s3_client.put_object(
|
||||
Bucket=settings.minio_media_bucket,
|
||||
Key=storage_key,
|
||||
Body=file_bytes,
|
||||
ContentType=mime_type or "application/octet-stream",
|
||||
)
|
||||
|
||||
# Generate presigned URL
|
||||
presigned_url: str = s3_client.generate_presigned_url(
|
||||
"get_object",
|
||||
Params={"Bucket": settings.minio_media_bucket, "Key": storage_key},
|
||||
ExpiresIn=expiry,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"download_and_store_slack_file: stored tenant=%s key=%s",
|
||||
tenant_id,
|
||||
storage_key,
|
||||
)
|
||||
return storage_key, presigned_url
|
||||
@@ -16,6 +16,7 @@ dependencies = [
|
||||
"python-telegram-bot>=21.0",
|
||||
"httpx>=0.28.0",
|
||||
"redis>=5.0.0",
|
||||
"boto3>=1.35.0",
|
||||
]
|
||||
|
||||
[tool.uv.sources]
|
||||
|
||||
Reference in New Issue
Block a user