import pytest from unittest.mock import MagicMock, patch from langchain_core.messages import HumanMessage, AIMessage from ea_chatbot.graph.nodes.query_analyzer import query_analyzer_node, QueryAnalysis from ea_chatbot.graph.state import AgentState @pytest.fixture def base_state(): return { "messages": [], "question": "", "analysis": None, "next_action": "", "summary": "" } @patch("ea_chatbot.graph.nodes.query_analyzer.get_llm_model") def test_refinement_coreference_from_history(mock_get_llm, base_state): """ Test that the analyzer can resolve Year/State from history. User asks "What about in NJ?" after a Florida 2024 query. Expected: next_action = 'plan', NOT 'clarify' due to missing year. """ state = base_state.copy() state["messages"] = [ HumanMessage(content="Show me 2024 results for Florida"), AIMessage(content="Here are the 2024 results for Florida...") ] state["question"] = "What about in New Jersey?" state["summary"] = "The user is looking for 2024 election results." mock_llm = MagicMock() mock_get_llm.return_value = mock_llm mock_structured = MagicMock() mock_llm.with_structured_output.return_value = mock_structured # We expect the LLM to eventually return 'plan' because it sees the context. # For now, if it returns 'clarify', this test should fail once we update the prompt to BE less strict. mock_structured.invoke.return_value = QueryAnalysis( data_required=["2024 results", "New Jersey"], unknowns=[], ambiguities=[], conditions=["state=NJ", "year=2024"], next_action="plan" ) result = query_analyzer_node(state) assert result["next_action"] == "plan" assert "NJ" in str(result["analysis"]["conditions"]) @patch("ea_chatbot.graph.nodes.query_analyzer.get_llm_model") def test_refinement_tolerance_for_missing_format(mock_get_llm, base_state): """ Test that the analyzer doesn't flag missing output format or database name. User asks "Give me a graph of turnout". Expected: next_action = 'plan', even if 'format' or 'db' is not in query. """ state = base_state.copy() state["question"] = "Give me a graph of voter turnout in 2024 for Florida" mock_llm = MagicMock() mock_get_llm.return_value = mock_llm mock_structured = MagicMock() mock_llm.with_structured_output.return_value = mock_structured mock_structured.invoke.return_value = QueryAnalysis( data_required=["voter turnout", "Florida"], unknowns=[], ambiguities=[], conditions=["year=2024"], next_action="plan" ) result = query_analyzer_node(state) assert result["next_action"] == "plan" # Ensure no ambiguities were added by the analyzer itself (hallucinated requirement) assert len(result["analysis"]["ambiguities"]) == 0 @patch("ea_chatbot.graph.nodes.query_analyzer.get_llm_model") def test_refinement_enforces_voter_identity_clarification(mock_get_llm, base_state): """ Test that 'track the same voter' still triggers clarification. """ state = base_state.copy() state["question"] = "Track the same voter participation in 2020 and 2024." mock_llm = MagicMock() mock_get_llm.return_value = mock_llm mock_structured = MagicMock() mock_llm.with_structured_output.return_value = mock_structured # We WANT it to clarify here because voter identity is not defined. mock_structured.invoke.return_value = QueryAnalysis( data_required=["voter participation"], unknowns=[], ambiguities=["Please define what fields constitute 'the same voter' (e.g. ID, or Name and DOB)."], conditions=[], next_action="clarify" ) result = query_analyzer_node(state) assert result["next_action"] == "clarify" assert "identity" in str(result["analysis"]["ambiguities"]).lower() or "same voter" in str(result["analysis"]["ambiguities"]).lower()