fix(orchestrator): Enforce retry budget to prevent unbounded loops

This commit is contained in:
Yunxiao Xu
2026-02-23 15:52:23 -08:00
parent 2cfbc5d1d0
commit 557b553c59
4 changed files with 102 additions and 6 deletions

View File

@@ -5,14 +5,24 @@ def delegate_node(state: AgentState) -> dict:
"""Determine which worker subgraph to call next based on the checklist."""
checklist = state.get("checklist", [])
current_step = state.get("current_step", 0)
iterations = state.get("iterations", 0)
logger = get_logger("orchestrator:delegate")
if not checklist or current_step >= len(checklist):
logger.info("Checklist complete or empty. Routing to summarizer.")
return {"next_action": "summarize"}
# Enforce retry budget
if iterations >= 3:
logger.error(f"Max retries reached for task {current_step}. Routing to summary with failure.")
return {
"next_action": "summarize",
"iterations": 0, # Reset for next turn
"summary": f"Failed to complete task {current_step} after {iterations} attempts."
}
task_info = checklist[current_step]
worker = task_info.get("worker", "data_analyst")
logger.info(f"Delegating next task to worker: {worker}")
logger.info(f"Delegating next task to worker: {worker} (Attempt {iterations + 1})")
return {"next_action": worker}

View File

@@ -45,6 +45,7 @@ def planner_node(state: AgentState) -> dict:
return {
"checklist": checklist,
"current_step": 0,
"iterations": 0, # Reset iteration counter for the new plan
"summary": response.reflection # Use reflection as initial summary
}
except Exception as e:

View File

@@ -44,20 +44,20 @@ If there were major errors or the output is missing critical data requested in t
logger.info("[bold green]Sub-task satisfied.[/bold green] Advancing plan.")
return {
"current_step": current_step + 1,
"iterations": 0, # Reset for next task
"next_action": "delegate"
}
else:
logger.warning(f"[bold yellow]Sub-task NOT satisfied.[/bold yellow] Reason: {response.reasoning}")
# Do NOT advance the step. This triggers a retry of the same task.
# In a more advanced version, we might route to a 'planner' for revision.
# Do NOT advance the step. Increment iterations to track retries.
return {
"current_step": current_step,
"iterations": state.get("iterations", 0) + 1,
"next_action": "delegate"
}
except Exception as e:
logger.error(f"Failed to reflect: {str(e)}")
# On error, do not advance to be safe
# On error, increment iterations to avoid infinite loop if LLM is stuck
return {
"current_step": current_step,
"iterations": state.get("iterations", 0) + 1,
"next_action": "delegate"
}