test(02-04): add failing tests for escalation handler
- Unit tests: rule matching, natural language escalation, transcript formatting - Integration tests: Slack API calls, Redis key, audit log, return value
This commit is contained in:
236
tests/unit/test_escalation.py
Normal file
236
tests/unit/test_escalation.py
Normal file
@@ -0,0 +1,236 @@
|
||||
"""
|
||||
Unit tests for the escalation handler.
|
||||
|
||||
Tests cover:
|
||||
- check_escalation_rules: rule matching logic with conversation metadata
|
||||
- check_escalation_rules: natural language escalation trigger
|
||||
- build_transcript: message formatting and truncation
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from orchestrator.escalation.handler import build_transcript, check_escalation_rules
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def make_agent(rules: list[dict], name: str = "Mara") -> MagicMock:
|
||||
"""Create a mock Agent with escalation_rules and name."""
|
||||
agent = MagicMock()
|
||||
agent.name = name
|
||||
agent.escalation_rules = rules
|
||||
return agent
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# check_escalation_rules — rule-based matching
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestCheckEscalationRules:
|
||||
def test_no_rules_returns_none(self) -> None:
|
||||
"""Agent with no escalation rules always returns None."""
|
||||
agent = make_agent([])
|
||||
result = check_escalation_rules(agent, "hello", {})
|
||||
assert result is None
|
||||
|
||||
def test_billing_dispute_and_attempts_matches(self) -> None:
|
||||
"""Condition 'billing_dispute AND attempts > 2' matches when metadata has both."""
|
||||
agent = make_agent([{"condition": "billing_dispute AND attempts > 2", "action": "handoff_human"}])
|
||||
result = check_escalation_rules(
|
||||
agent,
|
||||
"I want to cancel",
|
||||
{"billing_dispute": True, "attempts": 3},
|
||||
)
|
||||
assert result is not None
|
||||
assert result["action"] == "handoff_human"
|
||||
|
||||
def test_billing_dispute_insufficient_attempts_no_match(self) -> None:
|
||||
"""Condition 'billing_dispute AND attempts > 2' does NOT match when attempts <= 2."""
|
||||
agent = make_agent([{"condition": "billing_dispute AND attempts > 2", "action": "handoff_human"}])
|
||||
result = check_escalation_rules(
|
||||
agent,
|
||||
"help me",
|
||||
{"billing_dispute": True, "attempts": 2},
|
||||
)
|
||||
assert result is None
|
||||
|
||||
def test_billing_dispute_false_no_match(self) -> None:
|
||||
"""Condition 'billing_dispute AND attempts > 2' does NOT match when billing_dispute=False."""
|
||||
agent = make_agent([{"condition": "billing_dispute AND attempts > 2", "action": "handoff_human"}])
|
||||
result = check_escalation_rules(
|
||||
agent,
|
||||
"help me",
|
||||
{"billing_dispute": False, "attempts": 5},
|
||||
)
|
||||
assert result is None
|
||||
|
||||
def test_first_matching_rule_returned(self) -> None:
|
||||
"""When multiple rules exist, the first matching rule is returned."""
|
||||
agent = make_agent([
|
||||
{"condition": "billing_dispute AND attempts > 2", "action": "handoff_human"},
|
||||
{"condition": "billing_dispute AND attempts > 1", "action": "other_action"},
|
||||
])
|
||||
result = check_escalation_rules(
|
||||
agent,
|
||||
"help",
|
||||
{"billing_dispute": True, "attempts": 3},
|
||||
)
|
||||
assert result is not None
|
||||
assert result["action"] == "handoff_human"
|
||||
|
||||
def test_missing_metadata_field_no_match(self) -> None:
|
||||
"""Condition with field missing from metadata returns None (not an error)."""
|
||||
agent = make_agent([{"condition": "billing_dispute AND attempts > 2", "action": "handoff_human"}])
|
||||
result = check_escalation_rules(
|
||||
agent,
|
||||
"help",
|
||||
{}, # no metadata at all
|
||||
)
|
||||
assert result is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# check_escalation_rules — natural language escalation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestNaturalLanguageEscalation:
|
||||
def test_talk_to_human_phrase_matches_when_enabled(self) -> None:
|
||||
"""'can I talk to a human?' triggers escalation when natural_lang_enabled=True."""
|
||||
agent = make_agent([{"condition": "natural_language_escalation", "action": "handoff_human"}])
|
||||
result = check_escalation_rules(
|
||||
agent,
|
||||
"can I talk to a human?",
|
||||
{},
|
||||
natural_lang_enabled=True,
|
||||
)
|
||||
assert result is not None
|
||||
assert result["action"] == "handoff_human"
|
||||
|
||||
def test_talk_to_human_phrase_no_match_when_disabled(self) -> None:
|
||||
"""Natural language escalation returns None when natural_lang_enabled=False."""
|
||||
agent = make_agent([{"condition": "natural_language_escalation", "action": "handoff_human"}])
|
||||
result = check_escalation_rules(
|
||||
agent,
|
||||
"can I talk to a human?",
|
||||
{},
|
||||
natural_lang_enabled=False,
|
||||
)
|
||||
assert result is None
|
||||
|
||||
def test_speak_to_someone_phrase(self) -> None:
|
||||
"""'speak to someone' triggers natural language escalation when enabled."""
|
||||
agent = make_agent([{"condition": "natural_language_escalation", "action": "handoff_human"}])
|
||||
result = check_escalation_rules(agent, "I need to speak to someone", {}, natural_lang_enabled=True)
|
||||
assert result is not None
|
||||
|
||||
def test_get_a_person_phrase(self) -> None:
|
||||
"""'get a person' triggers natural language escalation when enabled."""
|
||||
agent = make_agent([{"condition": "natural_language_escalation", "action": "handoff_human"}])
|
||||
result = check_escalation_rules(agent, "can I get a person please", {}, natural_lang_enabled=True)
|
||||
assert result is not None
|
||||
|
||||
def test_human_agent_phrase(self) -> None:
|
||||
"""'human agent' triggers natural language escalation when enabled."""
|
||||
agent = make_agent([{"condition": "natural_language_escalation", "action": "handoff_human"}])
|
||||
result = check_escalation_rules(agent, "I want a human agent", {}, natural_lang_enabled=True)
|
||||
assert result is not None
|
||||
|
||||
def test_real_person_phrase(self) -> None:
|
||||
"""'real person' triggers natural language escalation when enabled."""
|
||||
agent = make_agent([{"condition": "natural_language_escalation", "action": "handoff_human"}])
|
||||
result = check_escalation_rules(agent, "let me talk to a real person", {}, natural_lang_enabled=True)
|
||||
assert result is not None
|
||||
|
||||
def test_manager_phrase(self) -> None:
|
||||
"""'manager' triggers natural language escalation when enabled."""
|
||||
agent = make_agent([{"condition": "natural_language_escalation", "action": "handoff_human"}])
|
||||
result = check_escalation_rules(agent, "get me your manager", {}, natural_lang_enabled=True)
|
||||
assert result is not None
|
||||
|
||||
def test_normal_message_does_not_trigger(self) -> None:
|
||||
"""Normal message does not trigger natural language escalation even when enabled."""
|
||||
agent = make_agent([{"condition": "natural_language_escalation", "action": "handoff_human"}])
|
||||
result = check_escalation_rules(agent, "what is my account balance?", {}, natural_lang_enabled=True)
|
||||
assert result is None
|
||||
|
||||
def test_natural_lang_only_when_no_other_rules_match(self) -> None:
|
||||
"""Agent with no natural_language_escalation condition does not trigger on phrases."""
|
||||
agent = make_agent([{"condition": "billing_dispute AND attempts > 2", "action": "handoff_human"}])
|
||||
result = check_escalation_rules(
|
||||
agent,
|
||||
"can I talk to a human?",
|
||||
{},
|
||||
natural_lang_enabled=True,
|
||||
)
|
||||
assert result is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# build_transcript — formatting
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestBuildTranscript:
|
||||
def test_empty_messages(self) -> None:
|
||||
"""build_transcript with empty list returns empty string."""
|
||||
result = build_transcript([])
|
||||
assert result == ""
|
||||
|
||||
def test_single_user_message(self) -> None:
|
||||
"""Single user message is formatted correctly."""
|
||||
messages = [{"role": "user", "content": "Hello!"}]
|
||||
result = build_transcript(messages)
|
||||
assert result == "*User:* Hello!"
|
||||
|
||||
def test_single_assistant_message(self) -> None:
|
||||
"""Single assistant message is formatted correctly."""
|
||||
messages = [{"role": "assistant", "content": "How can I help?"}]
|
||||
result = build_transcript(messages)
|
||||
assert result == "*Assistant:* How can I help?"
|
||||
|
||||
def test_conversation_turn(self) -> None:
|
||||
"""User and assistant messages are joined with newlines."""
|
||||
messages = [
|
||||
{"role": "user", "content": "I have a billing issue"},
|
||||
{"role": "assistant", "content": "I can help with that"},
|
||||
]
|
||||
result = build_transcript(messages)
|
||||
assert result == "*User:* I have a billing issue\n*Assistant:* I can help with that"
|
||||
|
||||
def test_multiple_turns(self) -> None:
|
||||
"""Multiple conversation turns maintain order."""
|
||||
messages = [
|
||||
{"role": "user", "content": "First"},
|
||||
{"role": "assistant", "content": "Second"},
|
||||
{"role": "user", "content": "Third"},
|
||||
]
|
||||
result = build_transcript(messages)
|
||||
lines = result.split("\n")
|
||||
assert len(lines) == 3
|
||||
assert lines[0] == "*User:* First"
|
||||
assert lines[1] == "*Assistant:* Second"
|
||||
assert lines[2] == "*User:* Third"
|
||||
|
||||
def test_transcript_truncated_at_3000_chars(self) -> None:
|
||||
"""build_transcript truncates output to 3000 characters max."""
|
||||
# Create a message that will produce output well over 3000 chars
|
||||
long_content = "x" * 200
|
||||
messages = [{"role": "user", "content": long_content} for _ in range(20)]
|
||||
result = build_transcript(messages)
|
||||
assert len(result) <= 3000
|
||||
|
||||
def test_transcript_under_limit_not_truncated(self) -> None:
|
||||
"""build_transcript does not truncate output under 3000 chars."""
|
||||
messages = [
|
||||
{"role": "user", "content": "Short message"},
|
||||
{"role": "assistant", "content": "Short reply"},
|
||||
]
|
||||
result = build_transcript(messages)
|
||||
assert "..." not in result or len(result) <= 3000
|
||||
assert "*User:* Short message" in result
|
||||
assert "*Assistant:* Short reply" in result
|
||||
Reference in New Issue
Block a user