import pytest from unittest.mock import MagicMock, patch from ea_chatbot.graph.workflow import create_workflow from ea_chatbot.graph.state import AgentState from langchain_core.messages import AIMessage, HumanMessage def test_orchestrator_loop_retry_budget(): """Verify that the orchestrator loop is bounded and terminates after max retries.""" mock_analyzer = MagicMock() mock_planner = MagicMock() mock_delegate = MagicMock() mock_worker = MagicMock() mock_reflector = MagicMock() mock_synthesizer = MagicMock() # 1. Analyzer: Proceed to planning mock_analyzer.return_value = {"next_action": "plan"} # 2. Planner: One task mock_planner.return_value = { "checklist": [{"task": "Unsolvable Task", "worker": "data_analyst"}], "current_step": 0, "iterations": 0 } # We'll use the REAL delegate and reflector logic to verify the fix # But we mock the LLM calls inside them if necessary. # Actually, it's easier to just mock the node return values but follow the logic. from ea_chatbot.graph.nodes.delegate import delegate_node from ea_chatbot.graph.nodes.reflector import reflector_node # Mocking the LLM inside reflector to always be unsatisfied with patch("ea_chatbot.graph.nodes.reflector.get_llm_model") as mock_get_llm: mock_llm = MagicMock() # Mark as NOT satisfied mock_llm.with_structured_output.return_value.invoke.return_value = MagicMock(satisfied=False, reasoning="Still bad.") mock_get_llm.return_value = mock_llm app = create_workflow( query_analyzer=mock_analyzer, planner=mock_planner, # delegate=delegate_node, # Use real data_analyst_worker=mock_worker, # reflector=reflector_node, # Use real synthesizer=mock_synthesizer ) # Mock worker to return something mock_worker.return_value = {"result": "Bad Output", "messages": [AIMessage(content="Bad")]} mock_synthesizer.return_value = {"messages": [AIMessage(content="Failure Summary")], "next_action": "end"} initial_state = AgentState( messages=[HumanMessage(content="test")], question="test", analysis={}, next_action="", iterations=0, checklist=[], current_step=0, vfs={}, plots=[], dfs={} ) # Run the graph. If fix works, it should hit iterations=3 and route to synthesizer. # We use a recursion_limit higher than our retry budget but low enough to fail fast if unbounded. final_state = app.invoke(initial_state, config={"recursion_limit": 20}) # Assertions # 1. We tried 3 times (iterations 0, 1, 2) and failed on 3rd. # Wait, delegate routes to summarize when iterations >= 3. # Reflector increments iterations. # Loop: # Start: it=0 # Delegate (it=0) -> Worker -> Reflector (fail, it=1) -> Delegate (it=1) # Delegate (it=1) -> Worker -> Reflector (fail, it=2) -> Delegate (it=2) # Delegate (it=2) -> Worker -> Reflector (fail, it=3) -> Delegate (it=3) # Delegate (it=3) -> Summarize (it=0) assert final_state["iterations"] == 0 # Reset in delegate or handled in synthesizer # Check if we hit the failure summary assert "Failed to complete task" in final_state["summary"] assert mock_worker.call_count == 3