- Add ingest_document Celery task (sync def + asyncio.run per arch constraint) - Add ingest_document_pipeline: MinIO download, extract, chunk, embed, store - Add chunk_text sliding window chunker (500 chars default, 50 overlap) - Update execute_tool to inject tenant_id/agent_id into all tool handler kwargs - Update web_search to use settings.brave_api_key (shared config) not os.getenv - Unit tests: test_ingestion.py (9 tests) and test_executor_injection.py (5 tests) all pass
187 lines
5.8 KiB
Python
187 lines
5.8 KiB
Python
"""
|
|
Unit tests for executor tenant_id/agent_id injection.
|
|
|
|
Tests that execute_tool injects tenant_id and agent_id into handler kwargs
|
|
before calling the handler, so context-aware tools (kb_search, calendar_lookup)
|
|
receive tenant context without the LLM needing to provide it.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import uuid
|
|
from typing import Any
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
import pytest
|
|
|
|
|
|
def _make_tool(handler: Any, requires_confirmation: bool = False) -> Any:
|
|
"""Create a minimal ToolDefinition-like object for tests."""
|
|
tool = MagicMock()
|
|
tool.handler = handler
|
|
tool.requires_confirmation = requires_confirmation
|
|
tool.parameters = {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {"type": "string"},
|
|
},
|
|
"required": ["query"],
|
|
}
|
|
return tool
|
|
|
|
|
|
class TestExecutorTenantInjection:
|
|
@pytest.mark.asyncio
|
|
async def test_tenant_id_injected_into_handler_kwargs(self) -> None:
|
|
"""Handler should receive tenant_id even though LLM didn't provide it."""
|
|
from orchestrator.tools.executor import execute_tool
|
|
|
|
received_kwargs: dict[str, Any] = {}
|
|
|
|
async def mock_handler(**kwargs: Any) -> str:
|
|
received_kwargs.update(kwargs)
|
|
return "handler result"
|
|
|
|
tool = _make_tool(mock_handler)
|
|
registry = {"test_tool": tool}
|
|
|
|
tenant_id = uuid.uuid4()
|
|
agent_id = uuid.uuid4()
|
|
audit_logger = MagicMock()
|
|
audit_logger.log_tool_call = AsyncMock()
|
|
|
|
tool_call = {
|
|
"function": {
|
|
"name": "test_tool",
|
|
"arguments": '{"query": "hello world"}',
|
|
}
|
|
}
|
|
|
|
result = await execute_tool(tool_call, registry, tenant_id, agent_id, audit_logger)
|
|
|
|
assert result == "handler result"
|
|
assert "tenant_id" in received_kwargs
|
|
assert received_kwargs["tenant_id"] == str(tenant_id)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_agent_id_injected_into_handler_kwargs(self) -> None:
|
|
"""Handler should receive agent_id even though LLM didn't provide it."""
|
|
from orchestrator.tools.executor import execute_tool
|
|
|
|
received_kwargs: dict[str, Any] = {}
|
|
|
|
async def mock_handler(**kwargs: Any) -> str:
|
|
received_kwargs.update(kwargs)
|
|
return "ok"
|
|
|
|
tool = _make_tool(mock_handler)
|
|
registry = {"test_tool": tool}
|
|
|
|
tenant_id = uuid.uuid4()
|
|
agent_id = uuid.uuid4()
|
|
audit_logger = MagicMock()
|
|
audit_logger.log_tool_call = AsyncMock()
|
|
|
|
tool_call = {
|
|
"function": {
|
|
"name": "test_tool",
|
|
"arguments": '{"query": "test"}',
|
|
}
|
|
}
|
|
|
|
await execute_tool(tool_call, registry, tenant_id, agent_id, audit_logger)
|
|
|
|
assert "agent_id" in received_kwargs
|
|
assert received_kwargs["agent_id"] == str(agent_id)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_injected_ids_are_strings(self) -> None:
|
|
"""Injected tenant_id and agent_id should be strings, not UUIDs."""
|
|
from orchestrator.tools.executor import execute_tool
|
|
|
|
received_kwargs: dict[str, Any] = {}
|
|
|
|
async def mock_handler(**kwargs: Any) -> str:
|
|
received_kwargs.update(kwargs)
|
|
return "ok"
|
|
|
|
tool = _make_tool(mock_handler)
|
|
registry = {"test_tool": tool}
|
|
|
|
tenant_id = uuid.uuid4()
|
|
agent_id = uuid.uuid4()
|
|
audit_logger = MagicMock()
|
|
audit_logger.log_tool_call = AsyncMock()
|
|
|
|
tool_call = {
|
|
"function": {
|
|
"name": "test_tool",
|
|
"arguments": '{"query": "test"}',
|
|
}
|
|
}
|
|
|
|
await execute_tool(tool_call, registry, tenant_id, agent_id, audit_logger)
|
|
|
|
assert isinstance(received_kwargs["tenant_id"], str)
|
|
assert isinstance(received_kwargs["agent_id"], str)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_llm_provided_args_preserved(self) -> None:
|
|
"""Original LLM-provided args should still be present after injection."""
|
|
from orchestrator.tools.executor import execute_tool
|
|
|
|
received_kwargs: dict[str, Any] = {}
|
|
|
|
async def mock_handler(**kwargs: Any) -> str:
|
|
received_kwargs.update(kwargs)
|
|
return "ok"
|
|
|
|
tool = _make_tool(mock_handler)
|
|
registry = {"test_tool": tool}
|
|
|
|
tenant_id = uuid.uuid4()
|
|
agent_id = uuid.uuid4()
|
|
audit_logger = MagicMock()
|
|
audit_logger.log_tool_call = AsyncMock()
|
|
|
|
tool_call = {
|
|
"function": {
|
|
"name": "test_tool",
|
|
"arguments": '{"query": "search term from LLM"}',
|
|
}
|
|
}
|
|
|
|
await execute_tool(tool_call, registry, tenant_id, agent_id, audit_logger)
|
|
|
|
assert received_kwargs["query"] == "search term from LLM"
|
|
assert received_kwargs["tenant_id"] == str(tenant_id)
|
|
assert received_kwargs["agent_id"] == str(agent_id)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_injection_after_schema_validation(self) -> None:
|
|
"""Injection happens after validation — injected keys don't cause schema failures."""
|
|
from orchestrator.tools.executor import execute_tool
|
|
|
|
# Tool requires exactly 'query', nothing else in schema required
|
|
# Schema should pass even though we inject tenant_id/agent_id
|
|
async def mock_handler(**kwargs: Any) -> str:
|
|
return "passed"
|
|
|
|
tool = _make_tool(mock_handler)
|
|
registry = {"test_tool": tool}
|
|
|
|
tenant_id = uuid.uuid4()
|
|
agent_id = uuid.uuid4()
|
|
audit_logger = MagicMock()
|
|
audit_logger.log_tool_call = AsyncMock()
|
|
|
|
tool_call = {
|
|
"function": {
|
|
"name": "test_tool",
|
|
"arguments": '{"query": "test"}',
|
|
}
|
|
}
|
|
|
|
result = await execute_tool(tool_call, registry, tenant_id, agent_id, audit_logger)
|
|
assert result == "passed"
|