feat(workers): Implement Researcher worker subgraph for web research tasks

2026-02-23 06:10:15 -08:00
parent 120b6fd11a
commit 5cc5bd91ae
6 changed files with 223 additions and 0 deletions
--- a/backend/src/ea_chatbot/graph/workers/researcher/mapping.py
+++ b/backend/src/ea_chatbot/graph/workers/researcher/mapping.py
@@ -0,0 +1,31 @@
 from typing import Dict, Any, List
 from langchain_core.messages import HumanMessage, AIMessage
 from ea_chatbot.graph.state import AgentState
 from ea_chatbot.graph.workers.researcher.state import WorkerState
 def prepare_researcher_input(state: AgentState) -> Dict[str, Any]:
    """Prepare the initial state for the Researcher worker."""
    checklist = state.get("checklist", [])
    current_step = state.get("current_step", 0)
    task_desc = "Perform research"
    if 0 <= current_step < len(checklist):
        task_desc = checklist[current_step].get("task", task_desc)
    return {
        "task": task_desc,
        "messages": [HumanMessage(content=task_desc)],
        "queries": [],
        "raw_results": [],
        "iterations": 0,
        "result": None
    }
 def merge_researcher_output(worker_state: WorkerState) -> Dict[str, Any]:
    """Map researcher results back to the global AgentState."""
    result = worker_state.get("result", "Research complete.")
    return {
        "messages": [AIMessage(content=result)],
        # Researcher doesn't usually update VFS or Plots, but we keep the structure
    }
--- a/backend/src/ea_chatbot/graph/workers/researcher/nodes/init.py
+++ b/backend/src/ea_chatbot/graph/workers/researcher/nodes/init.py
--- a/backend/src/ea_chatbot/graph/workers/researcher/nodes/searcher.py
+++ b/backend/src/ea_chatbot/graph/workers/researcher/nodes/searcher.py
@@ -0,0 +1,60 @@
 from langchain_openai import ChatOpenAI
 from langchain_google_genai import ChatGoogleGenerativeAI
 from ea_chatbot.graph.workers.researcher.state import WorkerState
 from ea_chatbot.config import Settings
 from ea_chatbot.utils.llm_factory import get_llm_model
 from ea_chatbot.utils import helpers
 from ea_chatbot.utils.logging import get_logger, LangChainLoggingHandler
 from ea_chatbot.graph.prompts.researcher import RESEARCHER_PROMPT
 def searcher_node(state: WorkerState) -> dict:
    """Execute web research for the specific task."""
    task = state["task"]
    logger = get_logger("researcher_worker:searcher")
    logger.info(f"Researching task: {task[:50]}...")
    settings = Settings()
    llm = get_llm_model(
        settings.researcher_llm,
        callbacks=[LangChainLoggingHandler(logger=logger)]
    )
    date_str = helpers.get_readable_date()
    # Adapt the global researcher prompt for the sub-task
    messages = RESEARCHER_PROMPT.format_messages(
        date=date_str,
        question=task,
        history=[], # Worker has fresh context or task-specific history
        summary=""
    )
    # Tool binding
    try:
        if isinstance(llm, ChatGoogleGenerativeAI):
            llm_with_tools = llm.bind_tools([{"google_search": {}}])
        elif isinstance(llm, ChatOpenAI):
            llm_with_tools = llm.bind_tools([{"type": "web_search"}])
        else:
            llm_with_tools = llm
    except Exception as e:
        logger.warning(f"Failed to bind search tools: {str(e)}")
        llm_with_tools = llm
    try:
        response = llm_with_tools.invoke(messages)
        logger.info("[bold green]Search complete.[/bold green]")
        # In a real tool-use scenario, we'd extract the tool outputs here.
        # For now, we'll store the response content as a 'raw_result'.
        content = response.content if hasattr(response, "content") else str(response)
        return {
            "messages": [response],
            "raw_results": [content],
            "iterations": state.get("iterations", 0) + 1
        }
    except Exception as e:
        logger.error(f"Search failed: {str(e)}")
        raise e
--- a/backend/src/ea_chatbot/graph/workers/researcher/nodes/summarizer.py
+++ b/backend/src/ea_chatbot/graph/workers/researcher/nodes/summarizer.py
@@ -0,0 +1,41 @@
 from ea_chatbot.graph.workers.researcher.state import WorkerState
 from ea_chatbot.config import Settings
 from ea_chatbot.utils.llm_factory import get_llm_model
 from ea_chatbot.utils.logging import get_logger, LangChainLoggingHandler
 def summarizer_node(state: WorkerState) -> dict:
    """Summarize research results for the Orchestrator."""
    task = state["task"]
    raw_results = state.get("raw_results", [])
    logger = get_logger("researcher_worker:summarizer")
    logger.info("Summarizing research results...")
    settings = Settings()
    llm = get_llm_model(
        settings.planner_llm,
        callbacks=[LangChainLoggingHandler(logger=logger)]
    )
    results_str = "\n---\n".join(raw_results)
    prompt = f"""You are a Research Specialist sub-agent. You have completed a research sub-task.
 Task: {task}
 Raw Research Findings:
 {results_str}
 Provide a concise, factual summary of the findings for the top-level Orchestrator.
 Ensure all key facts, dates, and sources (if provided) are preserved.
 Do NOT include internal reasoning, just the factual summary."""
    try:
        response = llm.invoke(prompt)
        result = response.content if hasattr(response, "content") else str(response)
        logger.info("[bold green]Research summary complete.[/bold green]")
        return {
            "result": result
        }
    except Exception as e:
        logger.error(f"Failed to summarize research: {str(e)}")
        raise e
--- a/backend/src/ea_chatbot/graph/workers/researcher/workflow.py
+++ b/backend/src/ea_chatbot/graph/workers/researcher/workflow.py
@@ -0,0 +1,24 @@
 from langgraph.graph import StateGraph, END
 from ea_chatbot.graph.workers.researcher.state import WorkerState
 from ea_chatbot.graph.workers.researcher.nodes.searcher import searcher_node
 from ea_chatbot.graph.workers.researcher.nodes.summarizer import summarizer_node
 def create_researcher_worker(
    searcher=searcher_node,
    summarizer=summarizer_node
 ) -> StateGraph:
    """Create the Researcher worker subgraph."""
    workflow = StateGraph(WorkerState)
    # Add Nodes
    workflow.add_node("searcher", searcher)
    workflow.add_node("summarizer", summarizer)
    # Set entry point
    workflow.set_entry_point("searcher")
    # Add Edges
    workflow.add_edge("searcher", "summarizer")
    workflow.add_edge("summarizer", END)
    return workflow.compile()
--- a/backend/tests/test_researcher_worker.py
+++ b/backend/tests/test_researcher_worker.py
@@ -0,0 +1,67 @@
 import pytest
 from unittest.mock import MagicMock
 from ea_chatbot.graph.workers.researcher.workflow import create_researcher_worker, WorkerState
 from ea_chatbot.graph.workers.researcher.mapping import prepare_researcher_input, merge_researcher_output
 from ea_chatbot.graph.state import AgentState
 from langchain_core.messages import AIMessage
 def test_researcher_worker_flow():
    """Verify that the Researcher worker flow works as expected."""
    mock_searcher = MagicMock()
    mock_summarizer = MagicMock()
    mock_searcher.return_value = {
        "raw_results": ["Result A"],
        "messages": [AIMessage(content="Search result")]
    }
    mock_summarizer.return_value = {"result": "Consolidated Summary"}
    graph = create_researcher_worker(
        searcher=mock_searcher,
        summarizer=mock_summarizer
    )
    initial_state = WorkerState(
        messages=[],
        task="Find governor",
        queries=[],
        raw_results=[],
        iterations=0,
        result=None
    )
    final_state = graph.invoke(initial_state)
    assert final_state["result"] == "Consolidated Summary"
    assert mock_searcher.called
    assert mock_summarizer.called
 def test_researcher_mapping():
    """Verify that we correctly map states for the researcher."""
    global_state = AgentState(
        checklist=[{"task": "Search X", "worker": "researcher"}],
        current_step=0,
        messages=[],
        question="test",
        analysis={},
        next_action="",
        iterations=0,
        vfs={},
        plots=[],
        dfs={}
    )
    worker_input = prepare_researcher_input(global_state)
    assert worker_input["task"] == "Search X"
    worker_output = WorkerState(
        messages=[],
        task="Search X",
        queries=[],
        raw_results=[],
        iterations=1,
        result="Found X"
    )
    updates = merge_researcher_output(worker_output)
    assert updates["messages"][0].content == "Found X"