feat(orchestrator): Implement high-level task decomposition in Planner node

2026-02-23 05:21:15 -08:00
parent 013208b929
commit 575e1a2e53
4 changed files with 77 additions and 42 deletions
--- a/backend/src/ea_chatbot/graph/nodes/planner.py
+++ b/backend/src/ea_chatbot/graph/nodes/planner.py
@@ -5,30 +5,29 @@ from ea_chatbot.utils.llm_factory import get_llm_model
 from ea_chatbot.utils import helpers, database_inspection
 from ea_chatbot.utils.logging import get_logger, LangChainLoggingHandler
 from ea_chatbot.graph.prompts.planner import PLANNER_PROMPT
-from ea_chatbot.schemas import TaskPlanResponse
+from ea_chatbot.schemas import ChecklistResponse

 def planner_node(state: AgentState) -> dict:
-    """Generate a structured plan based on the query analysis."""
+    """Generate a high-level task checklist for the Orchestrator."""
    question = state["question"]
    history = state.get("messages", [])[-6:]
    summary = state.get("summary", "")
    
    settings = Settings()
-    logger = get_logger("planner")
+    logger = get_logger("orchestrator:planner")
    
-    logger.info("Generating task plan...")
+    logger.info("Generating high-level task checklist...")
    
    llm = get_llm_model(
        settings.planner_llm,
        callbacks=[LangChainLoggingHandler(logger=logger)]
    )
-    structured_llm = llm.with_structured_output(TaskPlanResponse)
+    structured_llm = llm.with_structured_output(ChecklistResponse)
    
    date_str = helpers.get_readable_date()
    
-    # Always provide data summary; LLM decides relevance.
+    # Data summary for context
    database_description = database_inspection.get_data_summary(data_dir=settings.data_dir) or "No data available."
-    example_plan = ""
    
    messages = PLANNER_PROMPT.format_messages(
        date=date_str,
@@ -36,16 +35,19 @@ def planner_node(state: AgentState) -> dict:
        history=history,
        summary=summary,
        database_description=database_description,
-        example_plan=example_plan
+        example_plan="Decompose into data_analyst and researcher tasks."
    )
    
-    # Generate the structured plan
    try:
        response = structured_llm.invoke(messages)
-        # Convert the structured response back to YAML string for the state
-        plan_yaml = yaml.dump(response.model_dump(), sort_keys=False)
-        logger.info("[bold green]Plan generated successfully.[/bold green]")
-        return {"plan": plan_yaml}
+        # Convert ChecklistTask objects to dicts for state
+        checklist = [task.model_dump() for task in response.checklist]
+        logger.info(f"[bold green]Checklist generated with {len(checklist)} tasks.[/bold green]")
+        return {
+            "checklist": checklist,
+            "current_step": 0,
+            "summary": response.reflection # Use reflection as initial summary
+        }
    except Exception as e:
-        logger.error(f"Failed to generate plan: {str(e)}")
+        logger.error(f"Failed to generate checklist: {str(e)}")
        raise e
--- a/backend/src/ea_chatbot/graph/prompts/planner.py
+++ b/backend/src/ea_chatbot/graph/prompts/planner.py
@@ -1,41 +1,29 @@
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

-PLANNER_SYSTEM = """You are a Research Specialist and your job is to find answers and educate the user. 
-Provide factual information responding directly to the user's question. Include key details and context to ensure your response comprehensively answers their query.
+PLANNER_SYSTEM = """You are a Lead Orchestrator for an Election Analytics Chatbot.
+Your job is to decompose complex user queries into a high-level checklist of tasks.
+
+**Specialized Workers:**
+1. `data_analyst`: Handles SQL queries, Python data analysis, and plotting. Use this when the user needs numbers, trends, or charts from the internal database.
+2. `researcher`: Performs web searches for current news, facts, or external data not in the primary database.
+
+**Orchestration Strategy:**
+- Analyze the user's question and the available data summary.
+- Create a logical sequence of tasks (checklist) for these workers.
+- Be specific in the task description for the worker (e.g., "Find the total votes in Florida 2020").
+- If the query is ambiguous, the Orchestrator loop will later handle clarification, but for now, make the best plan possible.

 Today's Date is: {date}"""

 PLANNER_USER = """Conversation Summary: {summary}

-TASK:
+USER QUESTION:
 {question}

-AVAILABLE DATA SUMMARY (Use only if relevant to the task):
+AVAILABLE DATABASE SUMMARY:
 {database_description}

-First: Evaluate whether you have all necessary and requested information to provide a solution. 
-Use the dataset description above to determine what data and in what format you have available to you.
-You are able to search internet if the user asks for it, or you require any information that you can not derive from the given dataset or the instruction.
-
-Second: Incorporate any additional relevant context, reasoning, or details from previous interactions or internal chain-of-thought that may impact the solution.
-Ensure that all such information is fully included in your response rather than referring to previous answers indirectly.
-
-Third: Reflect on the problem and briefly describe it, while addressing the problem goal, inputs, outputs,
-rules, constraints, and other relevant details that appear in the problem description.
-
-Fourth: Based on the preceding steps, formulate your response as an algorithm, breaking the solution in up to eight simple concise yet descriptive, clear English steps. 
-You MUST Include all values or instructions as described in the above task, or retrieved using internet search!
-If fewer steps suffice, that's acceptable. If more are needed, please include them.
-Remember to explain steps rather than write code.
-
-This algorithm will be later converted to Python code.
-If a dataframe is required, assume it is named 'df' and is already defined/populated based on the data summary.
-
-There is a list variable called `plots` that you need to use to store any plots you generate. Do not use `plt.show()` as it will render the plot and cause an error.
-
-Output the algorithm as a YAML string. Always enclose the YAML string within ```yaml tags.
-
-**Note: Ensure that any necessary context from prior interactions is fully embedded in the plan. Do not use phrases like "refer to previous answer"; instead, provide complete details inline.**
+Decompose the question into a strategic checklist. For each task, specify which worker should handle it.

 {example_plan}"""