feat(02-03): add MediaAttachment model, WhatsApp normalizer, and signature verification

- Add MediaType(StrEnum) and MediaAttachment(BaseModel) to shared/models/message.py
- Add media: list[MediaAttachment] field to MessageContent
- Add whatsapp_app_secret, whatsapp_verify_token, and MinIO settings to shared/config.py
- Add normalize_whatsapp_event() to gateway/normalize.py (text, image, document support)
- Create whatsapp.py adapter with verify_whatsapp_signature() and verify_hub_challenge()
- 30 new passing tests (signature verification + normalizer)
This commit is contained in:
2026-03-23 14:41:48 -06:00
parent b2e86f1046
commit 370a860622
7 changed files with 1987 additions and 3 deletions

View File

@@ -0,0 +1,721 @@
"""
WhatsApp Business Cloud API channel adapter.
Handles Meta Cloud API v20.0 webhooks for WhatsApp Business accounts.
EVENT FLOW (inbound):
1. Meta sends webhook to POST /whatsapp/webhook
2. Read raw body BEFORE any JSON parsing (required for HMAC verification)
3. Verify HMAC-SHA256 signature on raw body bytes
4. Parse JSON and skip non-message events (status updates, read receipts)
5. Normalize via normalize_whatsapp_event() -> KonstructMessage
6. Resolve tenant from phone_number_id as workspace_id
7. Check rate limit
8. Check idempotency (Meta retries on non-200; we dedup on message_id)
9. Business-function scoping gate (tier 1: keyword, tier 2: LLM via system prompt)
10. Download media from Meta API and store in MinIO (if media message)
11. Dispatch handle_message.delay()
12. Always return 200 OK to Meta (Meta retries on non-200)
HUB CHALLENGE (webhook registration):
Meta sends GET /whatsapp/webhook with hub.mode, hub.verify_token, hub.challenge.
We verify the token and return hub.challenge as plain text.
OUTBOUND:
send_whatsapp_message() sends text responses directly via Meta Graph API.
LLM-generated response routing is wired in Plan 02-05 (orchestrator tasks).
SECURITY NOTE:
Always read raw body via request.body() BEFORE json parsing.
HMAC-SHA256 is computed on raw bytes — parsing first discards the original bytes.
"""
from __future__ import annotations
import hashlib
import hmac
import logging
from typing import Any
import httpx
from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import JSONResponse, PlainTextResponse
from gateway.normalize import normalize_whatsapp_event
from router.idempotency import is_duplicate
from router.ratelimit import RateLimitExceeded, check_rate_limit
from router.tenant import resolve_tenant
from shared.config import settings
from shared.db import async_session_factory
logger = logging.getLogger(__name__)
whatsapp_router = APIRouter()
# Meta Graph API base URL
_META_API_BASE = "https://graph.facebook.com/v20.0"
# ---------------------------------------------------------------------------
# Signature verification helpers
# ---------------------------------------------------------------------------
def verify_whatsapp_signature(
body: bytes,
signature_header: str,
app_secret: str,
) -> bytes:
"""
Verify the HMAC-SHA256 signature on the raw webhook body.
Meta sends the signature in the X-Hub-Signature-256 header as:
``sha256={hex_digest}``
The signature is computed over the raw body bytes using the app secret
as the HMAC key. We use hmac.compare_digest for timing-safe comparison
to prevent timing oracle attacks.
Args:
body: Raw request body bytes (read BEFORE JSON parsing).
signature_header: Value of the X-Hub-Signature-256 header.
app_secret: WhatsApp app secret (from settings.whatsapp_app_secret).
Returns:
The raw body bytes (unchanged) if the signature is valid.
Raises:
HTTPException(403): If the signature is missing, malformed, or invalid.
"""
if not signature_header.startswith("sha256="):
logger.warning("WhatsApp webhook: missing or malformed X-Hub-Signature-256 header")
raise HTTPException(status_code=403, detail="Invalid signature")
received_sig = signature_header[len("sha256="):]
# Compute expected HMAC-SHA256 over raw body
expected_sig = hmac.new(
app_secret.encode("utf-8"),
body,
hashlib.sha256,
).hexdigest()
# Timing-safe comparison
if not hmac.compare_digest(expected_sig, received_sig):
logger.warning("WhatsApp webhook: signature mismatch — possible unauthorized request")
raise HTTPException(status_code=403, detail="Invalid signature")
return body
def verify_hub_challenge(
hub_mode: str,
hub_verify_token: str,
hub_challenge: str,
expected_token: str,
) -> str:
"""
Verify the hub challenge for WhatsApp webhook registration.
Meta sends a GET request with hub.mode, hub.verify_token, and hub.challenge
when registering a webhook URL. We verify the token and return hub.challenge
as plain text to confirm ownership of the endpoint.
Args:
hub_mode: Must be "subscribe" for webhook registration.
hub_verify_token: Token sent by Meta (must match expected_token).
hub_challenge: Random string from Meta that we echo back.
expected_token: Our configured verify token (settings.whatsapp_verify_token).
Returns:
The hub_challenge string if verification passes.
Raises:
HTTPException(403): If mode is not "subscribe" or token doesn't match.
"""
if hub_mode != "subscribe":
logger.warning("WhatsApp hub: unexpected mode=%r", hub_mode)
raise HTTPException(status_code=403, detail="Invalid hub.mode")
if not hmac.compare_digest(hub_verify_token, expected_token):
logger.warning("WhatsApp hub: verify_token mismatch")
raise HTTPException(status_code=403, detail="Invalid verify_token")
return hub_challenge
# ---------------------------------------------------------------------------
# Business-function scoping helpers
# ---------------------------------------------------------------------------
def is_clearly_off_topic(text: str, allowed_functions: list[str]) -> bool:
"""
Tier 1 business-function scoping gate.
Performs a simple keyword overlap check between the message text and the
agent's allowed business functions. Returns True only when there is ZERO
keyword overlap — i.e., the message shares no words (case-insensitive) with
any of the allowed function strings.
This implements the Meta 2026 business messaging policy requirement:
AI agents must be scoped to their declared business function and must
reject clearly off-topic requests without an LLM call.
Args:
text: The inbound message text (lowercased internally).
allowed_functions: List of function/topic strings for this agent
(e.g. ["customer support", "order tracking", "returns"]).
Returns:
True if the message is clearly off-topic (zero keyword overlap).
False if any keyword overlap exists (message may be on-topic).
"""
if not allowed_functions:
# No scoping configured — let all messages through
return False
if not text.strip():
# Empty message — not clearly off-topic, let adapter decide
return False
text_lower = text.lower()
text_words = set(text_lower.split())
for func in allowed_functions:
func_words = set(func.lower().split())
if text_words & func_words:
# Found overlap — not clearly off-topic
return False
return True
def build_off_topic_reply(agent_name: str, allowed_functions: list[str]) -> str:
"""
Build a canned redirect response for clearly off-topic messages.
Args:
agent_name: The agent's display name.
allowed_functions: The agent's declared allowed business functions.
Returns:
A human-readable redirect message mentioning the agent and topics.
"""
topics = ", ".join(allowed_functions) if allowed_functions else "my designated topics"
return (
f"{agent_name} is here to help with {topics}. "
f"How can I assist you with one of those?"
)
# ---------------------------------------------------------------------------
# Outbound message delivery
# ---------------------------------------------------------------------------
async def send_whatsapp_message(
phone_number_id: str,
access_token: str,
recipient_wa_id: str,
text: str,
) -> None:
"""
Send a text message via Meta Cloud API v20.0.
POST to /v20.0/{phone_number_id}/messages with:
- messaging_product: "whatsapp"
- to: recipient_wa_id
- type: "text"
- text: {"body": text}
Args:
phone_number_id: The sending phone number ID from channel_connections.
access_token: The WhatsApp Business API access token.
recipient_wa_id: The recipient's WhatsApp ID (phone number).
text: The message text to send.
Raises:
httpx.HTTPStatusError: If the Meta API returns an error status.
"""
url = f"{_META_API_BASE}/{phone_number_id}/messages"
headers = {
"Authorization": f"Bearer {access_token}",
"Content-Type": "application/json",
}
payload: dict[str, Any] = {
"messaging_product": "whatsapp",
"to": recipient_wa_id,
"type": "text",
"text": {"body": text},
}
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.post(url, json=payload, headers=headers)
resp.raise_for_status()
logger.info(
"send_whatsapp_message: sent to=%s phone_number_id=%s",
recipient_wa_id,
phone_number_id,
)
async def send_whatsapp_media(
phone_number_id: str,
access_token: str,
recipient_wa_id: str,
media_url: str,
media_type: str,
) -> None:
"""
Send a media message (image or document) via Meta Cloud API v20.0.
Args:
phone_number_id: The sending phone number ID.
access_token: The WhatsApp Business API access token.
recipient_wa_id: The recipient's WhatsApp ID.
media_url: Publicly accessible URL for the media file.
media_type: Media type string: "image" or "document".
Raises:
httpx.HTTPStatusError: If the Meta API returns an error status.
"""
url = f"{_META_API_BASE}/{phone_number_id}/messages"
headers = {
"Authorization": f"Bearer {access_token}",
"Content-Type": "application/json",
}
payload: dict[str, Any] = {
"messaging_product": "whatsapp",
"to": recipient_wa_id,
"type": media_type,
media_type: {"link": media_url},
}
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.post(url, json=payload, headers=headers)
resp.raise_for_status()
logger.info(
"send_whatsapp_media: sent %s to=%s phone_number_id=%s",
media_type,
recipient_wa_id,
phone_number_id,
)
# ---------------------------------------------------------------------------
# Media download and MinIO storage
# ---------------------------------------------------------------------------
async def _download_and_store_media(
tenant_id: str,
message_id: str,
media_url: str,
access_token: str,
filename: str | None,
mime_type: str | None,
) -> tuple[str, str]:
"""
Download a media file from Meta API and store it in MinIO.
The ``media_url`` here is the ``meta-media://{media_id}`` placeholder set
by the normalizer. We first resolve the actual download URL from Meta's
media endpoint, then download and upload to MinIO.
Args:
tenant_id: Konstruct tenant ID (for MinIO key prefix).
message_id: WhatsApp message ID (for MinIO key).
media_url: Placeholder URL ``meta-media://{media_id}``.
access_token: WhatsApp Business API token.
filename: Original filename (or None for images).
mime_type: MIME type of the media.
Returns:
Tuple of (storage_key, presigned_url).
"""
import boto3 # type: ignore[import-untyped]
# Extract media_id from placeholder URL
media_id = media_url.replace("meta-media://", "")
# Step 1: Get actual download URL from Meta API
meta_url = f"{_META_API_BASE}/{media_id}"
headers = {"Authorization": f"Bearer {access_token}"}
async with httpx.AsyncClient(timeout=30.0) as client:
url_resp = await client.get(meta_url, headers=headers)
url_resp.raise_for_status()
download_url: str = url_resp.json().get("url", "")
# Step 2: Download the media file
media_resp = await client.get(download_url, headers=headers)
media_resp.raise_for_status()
media_bytes = media_resp.content
# Step 3: Determine filename
ext = ""
if mime_type:
ext_map = {
"image/jpeg": ".jpg",
"image/png": ".png",
"image/webp": ".webp",
"application/pdf": ".pdf",
"audio/ogg": ".ogg",
"audio/mpeg": ".mp3",
"video/mp4": ".mp4",
}
ext = ext_map.get(mime_type, "")
safe_filename = filename or f"{media_id}{ext}"
storage_key = f"{tenant_id}/media/{message_id}/{safe_filename}"
# Step 4: Upload to MinIO
s3_client = boto3.client(
"s3",
endpoint_url=settings.minio_endpoint,
aws_access_key_id=settings.minio_access_key,
aws_secret_access_key=settings.minio_secret_key,
region_name="us-east-1", # MinIO ignores region but boto3 requires it
)
s3_client.put_object(
Bucket=settings.minio_media_bucket,
Key=storage_key,
Body=media_bytes,
ContentType=mime_type or "application/octet-stream",
)
# Step 5: Generate presigned URL (1 hour TTL)
presigned_url: str = s3_client.generate_presigned_url(
"get_object",
Params={"Bucket": settings.minio_media_bucket, "Key": storage_key},
ExpiresIn=3600,
)
logger.info(
"_download_and_store_media: stored tenant=%s key=%s",
tenant_id,
storage_key,
)
return storage_key, presigned_url
# ---------------------------------------------------------------------------
# Webhook routes
# ---------------------------------------------------------------------------
@whatsapp_router.get("/whatsapp/webhook")
async def whatsapp_webhook_verify(request: Request) -> PlainTextResponse:
"""
WhatsApp webhook URL verification (hub challenge handshake).
Meta sends a GET request when you register or update a webhook URL.
We must return hub.challenge as plain text to confirm ownership.
Query params:
hub.mode: Must be "subscribe"
hub.verify_token: Must match our configured WHATSAPP_VERIFY_TOKEN
hub.challenge: Random string we echo back
"""
hub_mode = request.query_params.get("hub.mode", "")
hub_verify_token = request.query_params.get("hub.verify_token", "")
hub_challenge = request.query_params.get("hub.challenge", "")
challenge = verify_hub_challenge(
hub_mode=hub_mode,
hub_verify_token=hub_verify_token,
hub_challenge=hub_challenge,
expected_token=settings.whatsapp_verify_token,
)
return PlainTextResponse(challenge)
@whatsapp_router.post("/whatsapp/webhook")
async def whatsapp_webhook_inbound(request: Request) -> JSONResponse:
"""
WhatsApp inbound message webhook.
CRITICAL: Read raw body FIRST (before JSON parsing) for HMAC verification.
Always return 200 OK to Meta — Meta retries delivery on non-200 responses.
Full adapter sequence:
1. Read raw body (BEFORE JSON parsing)
2. Verify HMAC-SHA256 signature
3. Parse JSON
4. Skip non-message events (status updates, read receipts)
5. Normalize to KonstructMessage
6. Resolve tenant from phone_number_id
7. Check rate limit
8. Check idempotency
9. Business-function scoping (tier 1 keyword gate)
10. Download media and store in MinIO (if media message)
11. Dispatch handle_message.delay()
"""
# Always return 200 OK — Meta retries on non-200
try:
await _process_whatsapp_webhook(request)
except HTTPException:
# Re-raise HTTPExceptions from signature verification (403)
raise
except Exception:
logger.exception("WhatsApp webhook: unexpected error during processing")
# Return 200 anyway to prevent Meta retry storms
return JSONResponse({"status": "ok"})
async def _process_whatsapp_webhook(request: Request) -> None:
"""
Internal handler for WhatsApp inbound webhook processing.
Separated from the route handler so we can raise HTTPException for 403s
while still returning 200 for all other errors.
"""
# Step 1: Read raw body BEFORE any JSON parsing
raw_body = await request.body()
# Step 2: Verify HMAC-SHA256 signature
sig_header = request.headers.get("X-Hub-Signature-256", "")
verify_whatsapp_signature(raw_body, sig_header, settings.whatsapp_app_secret)
# Step 3: Parse JSON from the already-read raw body
import json
payload = json.loads(raw_body)
# Step 4: Skip non-message events
value = payload.get("entry", [{}])[0].get("changes", [{}])[0].get("value", {})
if "messages" not in value:
logger.debug("WhatsApp webhook: no messages in payload — skipping (status update?)")
return
# Step 5: Normalize to KonstructMessage
msg = normalize_whatsapp_event(payload)
phone_number_id: str = msg.channel_metadata.get("phone_number_id", "")
wa_id: str = msg.sender.user_id
message_id: str = msg.channel_metadata.get("message_id", msg.id)
# Step 6: Resolve tenant from phone_number_id
tenant_id: str | None = None
async with async_session_factory() as session:
tenant_id = await resolve_tenant(
workspace_id=phone_number_id,
channel_type="whatsapp",
session=session,
)
if tenant_id is None:
logger.warning(
"WhatsApp webhook: unknown phone_number_id=%r — ignoring",
phone_number_id,
)
return
msg.tenant_id = tenant_id
# Get Redis client from the gateway module
from gateway.main import _get_redis # noqa: PLC0415
redis = _get_redis()
# Step 7: Check rate limit
try:
await check_rate_limit(tenant_id=tenant_id, channel="whatsapp", redis=redis)
except RateLimitExceeded as exc:
logger.info(
"WhatsApp rate limit exceeded: tenant=%s — sending ephemeral rejection",
tenant_id,
)
# Load access_token from DB for reply
access_token = await _get_channel_access_token(tenant_id, phone_number_id)
if access_token:
try:
await send_whatsapp_message(
phone_number_id=phone_number_id,
access_token=access_token,
recipient_wa_id=wa_id,
text=(
f"I'm receiving too many requests right now. "
f"Please try again in about {exc.remaining_seconds} seconds."
),
)
except Exception:
logger.exception("WhatsApp: failed to send rate limit rejection")
return
# Step 8: Check idempotency
if await is_duplicate(tenant_id, message_id, redis):
logger.debug(
"WhatsApp duplicate message: tenant=%s message_id=%s — skipping",
tenant_id,
message_id,
)
return
# Step 9: Business-function scoping (tier 1 keyword gate)
agent_name, allowed_functions, access_token = await _get_agent_scoping_info(
tenant_id, phone_number_id
)
if is_clearly_off_topic(msg.content.text, allowed_functions):
logger.info(
"WhatsApp: off-topic message from wa_id=%s tenant=%s — sending canned redirect",
wa_id,
tenant_id,
)
if access_token:
try:
await send_whatsapp_message(
phone_number_id=phone_number_id,
access_token=access_token,
recipient_wa_id=wa_id,
text=build_off_topic_reply(agent_name, allowed_functions),
)
except Exception:
logger.exception("WhatsApp: failed to send off-topic redirect")
return
# Step 10: Download media and store in MinIO (if media message)
if msg.content.media and access_token:
for attachment in msg.content.media:
if attachment.url and attachment.url.startswith("meta-media://"):
try:
storage_key, presigned_url = await _download_and_store_media(
tenant_id=tenant_id,
message_id=message_id,
media_url=attachment.url,
access_token=access_token,
filename=attachment.filename,
mime_type=attachment.mime_type,
)
attachment.storage_key = storage_key
attachment.url = presigned_url
except Exception:
logger.exception(
"WhatsApp: failed to download/store media for message_id=%s",
message_id,
)
# Step 11: Dispatch to Celery
from orchestrator.tasks import handle_message as handle_message_task # noqa: PLC0415
task_payload = msg.model_dump() | {
"phone_number_id": phone_number_id,
"bot_token": access_token or "",
}
try:
handle_message_task.delay(task_payload)
except Exception:
logger.exception(
"WhatsApp: failed to dispatch handle_message task: tenant=%s msg_id=%s",
tenant_id,
msg.id,
)
logger.info(
"WhatsApp: dispatched msg_id=%s tenant=%s wa_id=%s",
msg.id,
tenant_id,
wa_id,
)
# ---------------------------------------------------------------------------
# DB helpers
# ---------------------------------------------------------------------------
async def _get_channel_access_token(
tenant_id: str,
phone_number_id: str,
) -> str | None:
"""Load the WhatsApp access token from channel_connections for a given phone number."""
from sqlalchemy import select, text # noqa: PLC0415
from shared.models.tenant import ChannelConnection, ChannelTypeEnum # noqa: PLC0415
try:
async with async_session_factory() as session:
await session.execute(text(f"SET LOCAL app.current_tenant = '{tenant_id}'"))
stmt = (
select(ChannelConnection.config)
.where(ChannelConnection.channel_type == ChannelTypeEnum.WHATSAPP)
.where(ChannelConnection.workspace_id == phone_number_id)
.limit(1)
)
result = await session.execute(stmt)
row = result.scalar_one_or_none()
if row:
return row.get("access_token")
except Exception:
logger.exception(
"_get_channel_access_token: DB error tenant=%s phone_number_id=%s",
tenant_id,
phone_number_id,
)
return None
async def _get_agent_scoping_info(
tenant_id: str,
phone_number_id: str,
) -> tuple[str, list[str], str | None]:
"""
Load agent name, allowed_functions, and access_token for scoping gate.
Returns:
Tuple of (agent_name, allowed_functions, access_token).
Falls back to ("AI Assistant", [], None) if not found.
"""
from sqlalchemy import select, text # noqa: PLC0415
from shared.models.tenant import Agent, ChannelConnection, ChannelTypeEnum # noqa: PLC0415
agent_name = "AI Assistant"
allowed_functions: list[str] = []
access_token: str | None = None
try:
async with async_session_factory() as session:
await session.execute(text(f"SET LOCAL app.current_tenant = '{tenant_id}'"))
# Load channel connection config (access_token + agent_id)
conn_stmt = (
select(ChannelConnection)
.where(ChannelConnection.channel_type == ChannelTypeEnum.WHATSAPP)
.where(ChannelConnection.workspace_id == phone_number_id)
.limit(1)
)
conn_result = await session.execute(conn_stmt)
conn = conn_result.scalar_one_or_none()
if conn:
config: dict = conn.config or {}
access_token = config.get("access_token")
agent_id = str(conn.agent_id) if conn.agent_id else None
if agent_id:
import uuid as _uuid # noqa: PLC0415
agent_stmt = (
select(Agent)
.where(Agent.id == _uuid.UUID(agent_id))
.limit(1)
)
agent_result = await session.execute(agent_stmt)
agent = agent_result.scalar_one_or_none()
if agent:
agent_name = agent.name
# Use tools as proxy for allowed_functions
tools: list[str] = agent.tools or []
allowed_functions = tools
except Exception:
logger.exception(
"_get_agent_scoping_info: DB error tenant=%s phone_number_id=%s",
tenant_id,
phone_number_id,
)
return agent_name, allowed_functions, access_token

View File

@@ -1,10 +1,11 @@
"""
Slack event normalization.
Channel event normalization.
Converts Slack Events API payloads into KonstructMessage format.
Converts channel-specific event payloads (Slack, WhatsApp, etc.) into the
unified KonstructMessage format.
All channel adapters produce KonstructMessage — the router and orchestrator
never inspect Slack-specific fields directly.
never inspect channel-specific fields directly.
"""
from __future__ import annotations
@@ -16,6 +17,8 @@ from datetime import datetime, timezone
from shared.models.message import (
ChannelType,
KonstructMessage,
MediaAttachment,
MediaType,
MessageContent,
SenderInfo,
)
@@ -98,3 +101,109 @@ def normalize_slack_event(
timestamp=timestamp,
thread_id=thread_ts,
)
# ---------------------------------------------------------------------------
# WhatsApp normalization
# ---------------------------------------------------------------------------
# Map from WhatsApp media message type strings to MediaType enum
_WHATSAPP_MEDIA_TYPES: dict[str, MediaType] = {
"image": MediaType.IMAGE,
"document": MediaType.DOCUMENT,
"audio": MediaType.AUDIO,
"video": MediaType.VIDEO,
}
def normalize_whatsapp_event(payload: dict) -> KonstructMessage:
"""
Normalize a Meta Cloud API v20.0 webhook payload into a KonstructMessage.
Handles text, image, and document message types.
For media messages (image/document), creates a MediaAttachment with a
placeholder URL in the format ``meta-media://{media_id}``. The actual
download from Meta's API and upload to MinIO happens in the WhatsApp
channel adapter after normalization.
WhatsApp has no threading concept — thread_id is set to the sender's
wa_id so that conversation history is scoped per phone number.
Args:
payload: Parsed Meta Cloud API v20.0 webhook JSON (already-deserialized dict).
Returns:
A KonstructMessage. ``tenant_id`` is ``None`` at this stage — the
Message Router populates it via channel_connections lookup.
"""
# Meta Cloud API structure: entry[0].changes[0].value
value: dict = payload["entry"][0]["changes"][0]["value"]
metadata: dict = value.get("metadata", {})
messages: list[dict] = value.get("messages", [])
if not messages:
# Status updates, read receipts, etc. — no messages to normalize
raise ValueError("WhatsApp webhook payload contains no messages")
message: dict = messages[0]
msg_type: str = message.get("type", "text")
wa_id: str = message.get("from", "")
message_id: str = message.get("id", "")
phone_number_id: str = metadata.get("phone_number_id", "")
# Timestamp — Meta sends Unix integer timestamps as strings
ts_raw: str = message.get("timestamp", "0")
try:
timestamp = datetime.fromtimestamp(int(ts_raw), tz=timezone.utc)
except (ValueError, TypeError):
timestamp = datetime.now(tz=timezone.utc)
# Extract display name from contacts array
contacts: list[dict] = value.get("contacts", [])
display_name: str = wa_id
if contacts:
display_name = contacts[0].get("profile", {}).get("name", wa_id)
# Extract content based on message type
text_body: str = ""
media_attachments: list[MediaAttachment] = []
if msg_type == "text":
text_body = message.get("text", {}).get("body", "")
elif msg_type in _WHATSAPP_MEDIA_TYPES:
media_data: dict = message.get(msg_type, {})
media_id: str = media_data.get("id", "")
attachment = MediaAttachment(
media_type=_WHATSAPP_MEDIA_TYPES[msg_type],
# Placeholder URL — actual download happens in adapter
url=f"meta-media://{media_id}" if media_id else None,
mime_type=media_data.get("mime_type"),
filename=media_data.get("filename"),
)
media_attachments.append(attachment)
# Caption is the text for media messages (may not be present)
text_body = media_data.get("caption", "")
return KonstructMessage(
id=str(uuid.uuid4()),
tenant_id=None, # Populated by Message Router
channel=ChannelType.WHATSAPP,
channel_metadata={
"phone_number_id": phone_number_id,
"message_id": message_id,
"wa_id": wa_id,
},
sender=SenderInfo(
user_id=wa_id,
display_name=display_name,
),
content=MessageContent(
text=text_body,
media=media_attachments,
),
timestamp=timestamp,
# WhatsApp has no threading — use wa_id as conversation scope identifier
thread_id=wa_id,
)