feat(orchestrator): Implement high-level task decomposition in Planner node

This commit is contained in:
Yunxiao Xu
2026-02-23 05:21:15 -08:00
parent 013208b929
commit 575e1a2e53
4 changed files with 77 additions and 42 deletions

View File

@@ -5,30 +5,29 @@ from ea_chatbot.utils.llm_factory import get_llm_model
from ea_chatbot.utils import helpers, database_inspection
from ea_chatbot.utils.logging import get_logger, LangChainLoggingHandler
from ea_chatbot.graph.prompts.planner import PLANNER_PROMPT
from ea_chatbot.schemas import TaskPlanResponse
from ea_chatbot.schemas import ChecklistResponse
def planner_node(state: AgentState) -> dict:
"""Generate a structured plan based on the query analysis."""
"""Generate a high-level task checklist for the Orchestrator."""
question = state["question"]
history = state.get("messages", [])[-6:]
summary = state.get("summary", "")
settings = Settings()
logger = get_logger("planner")
logger = get_logger("orchestrator:planner")
logger.info("Generating task plan...")
logger.info("Generating high-level task checklist...")
llm = get_llm_model(
settings.planner_llm,
callbacks=[LangChainLoggingHandler(logger=logger)]
)
structured_llm = llm.with_structured_output(TaskPlanResponse)
structured_llm = llm.with_structured_output(ChecklistResponse)
date_str = helpers.get_readable_date()
# Always provide data summary; LLM decides relevance.
# Data summary for context
database_description = database_inspection.get_data_summary(data_dir=settings.data_dir) or "No data available."
example_plan = ""
messages = PLANNER_PROMPT.format_messages(
date=date_str,
@@ -36,16 +35,19 @@ def planner_node(state: AgentState) -> dict:
history=history,
summary=summary,
database_description=database_description,
example_plan=example_plan
example_plan="Decompose into data_analyst and researcher tasks."
)
# Generate the structured plan
try:
response = structured_llm.invoke(messages)
# Convert the structured response back to YAML string for the state
plan_yaml = yaml.dump(response.model_dump(), sort_keys=False)
logger.info("[bold green]Plan generated successfully.[/bold green]")
return {"plan": plan_yaml}
# Convert ChecklistTask objects to dicts for state
checklist = [task.model_dump() for task in response.checklist]
logger.info(f"[bold green]Checklist generated with {len(checklist)} tasks.[/bold green]")
return {
"checklist": checklist,
"current_step": 0,
"summary": response.reflection # Use reflection as initial summary
}
except Exception as e:
logger.error(f"Failed to generate plan: {str(e)}")
logger.error(f"Failed to generate checklist: {str(e)}")
raise e

View File

@@ -1,41 +1,29 @@
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
PLANNER_SYSTEM = """You are a Research Specialist and your job is to find answers and educate the user.
Provide factual information responding directly to the user's question. Include key details and context to ensure your response comprehensively answers their query.
PLANNER_SYSTEM = """You are a Lead Orchestrator for an Election Analytics Chatbot.
Your job is to decompose complex user queries into a high-level checklist of tasks.
**Specialized Workers:**
1. `data_analyst`: Handles SQL queries, Python data analysis, and plotting. Use this when the user needs numbers, trends, or charts from the internal database.
2. `researcher`: Performs web searches for current news, facts, or external data not in the primary database.
**Orchestration Strategy:**
- Analyze the user's question and the available data summary.
- Create a logical sequence of tasks (checklist) for these workers.
- Be specific in the task description for the worker (e.g., "Find the total votes in Florida 2020").
- If the query is ambiguous, the Orchestrator loop will later handle clarification, but for now, make the best plan possible.
Today's Date is: {date}"""
PLANNER_USER = """Conversation Summary: {summary}
TASK:
USER QUESTION:
{question}
AVAILABLE DATA SUMMARY (Use only if relevant to the task):
AVAILABLE DATABASE SUMMARY:
{database_description}
First: Evaluate whether you have all necessary and requested information to provide a solution.
Use the dataset description above to determine what data and in what format you have available to you.
You are able to search internet if the user asks for it, or you require any information that you can not derive from the given dataset or the instruction.
Second: Incorporate any additional relevant context, reasoning, or details from previous interactions or internal chain-of-thought that may impact the solution.
Ensure that all such information is fully included in your response rather than referring to previous answers indirectly.
Third: Reflect on the problem and briefly describe it, while addressing the problem goal, inputs, outputs,
rules, constraints, and other relevant details that appear in the problem description.
Fourth: Based on the preceding steps, formulate your response as an algorithm, breaking the solution in up to eight simple concise yet descriptive, clear English steps.
You MUST Include all values or instructions as described in the above task, or retrieved using internet search!
If fewer steps suffice, that's acceptable. If more are needed, please include them.
Remember to explain steps rather than write code.
This algorithm will be later converted to Python code.
If a dataframe is required, assume it is named 'df' and is already defined/populated based on the data summary.
There is a list variable called `plots` that you need to use to store any plots you generate. Do not use `plt.show()` as it will render the plot and cause an error.
Output the algorithm as a YAML string. Always enclose the YAML string within ```yaml tags.
**Note: Ensure that any necessary context from prior interactions is fully embedded in the plan. Do not use phrases like "refer to previous answer"; instead, provide complete details inline.**
Decompose the question into a strategic checklist. For each task, specify which worker should handle it.
{example_plan}"""