Refactor: Move backend files to backend/ directory and split .gitignore

This commit is contained in:
Yunxiao Xu
2026-02-11 17:40:44 -08:00
parent 48924affa0
commit 7a69133e26
96 changed files with 144 additions and 176 deletions

View File

@@ -0,0 +1,45 @@
from langchain_core.messages import AIMessage
from ea_chatbot.graph.state import AgentState
from ea_chatbot.config import Settings
from ea_chatbot.utils.llm_factory import get_llm_model
from ea_chatbot.utils.logging import get_logger, LangChainLoggingHandler
def clarification_node(state: AgentState) -> dict:
"""Ask the user for missing information or clarifications."""
question = state["question"]
analysis = state.get("analysis", {})
ambiguities = analysis.get("ambiguities", [])
settings = Settings()
logger = get_logger("clarification")
logger.info(f"Generating clarification for {len(ambiguities)} ambiguities.")
llm = get_llm_model(
settings.query_analyzer_llm,
callbacks=[LangChainLoggingHandler(logger=logger)]
)
system_prompt = """You are a Clarification Specialist. Your role is to identify what information is missing from a user's request to perform a data analysis or research task.
Based on the analysis of the user's question, formulate a polite and concise request for the missing information."""
prompt = f"""Original Question: {question}
Missing/Ambiguous Information: {', '.join(ambiguities) if ambiguities else 'Unknown ambiguities'}
Please ask the user for the necessary details."""
messages = [
("system", system_prompt),
("user", prompt)
]
try:
response = llm.invoke(messages)
logger.info("[bold green]Clarification generated.[/bold green]")
return {
"messages": [response],
"next_action": "end" # To indicate we are done for now
}
except Exception as e:
logger.error(f"Failed to generate clarification: {str(e)}")
raise e

View File

@@ -0,0 +1,47 @@
from ea_chatbot.graph.state import AgentState
from ea_chatbot.config import Settings
from ea_chatbot.utils.llm_factory import get_llm_model
from ea_chatbot.utils import helpers, database_inspection
from ea_chatbot.utils.logging import get_logger, LangChainLoggingHandler
from ea_chatbot.graph.prompts.coder import CODE_GENERATOR_PROMPT
from ea_chatbot.schemas import CodeGenerationResponse
def coder_node(state: AgentState) -> dict:
"""Generate Python code based on the plan and data summary."""
question = state["question"]
plan = state.get("plan", "")
code_output = state.get("code_output", "None")
settings = Settings()
logger = get_logger("coder")
logger.info("Generating Python code...")
llm = get_llm_model(
settings.coder_llm,
callbacks=[LangChainLoggingHandler(logger=logger)]
)
structured_llm = llm.with_structured_output(CodeGenerationResponse)
# Always provide data summary
database_description = database_inspection.get_data_summary(data_dir=settings.data_dir) or "No data available."
example_code = "" # Placeholder
messages = CODE_GENERATOR_PROMPT.format_messages(
question=question,
plan=plan,
database_description=database_description,
code_exec_results=code_output,
example_code=example_code
)
try:
response = structured_llm.invoke(messages)
logger.info("[bold green]Code generated.[/bold green]")
return {
"code": response.parsed_code,
"error": None # Clear previous errors on new code generation
}
except Exception as e:
logger.error(f"Failed to generate code: {str(e)}")
raise e

View File

@@ -0,0 +1,44 @@
from ea_chatbot.graph.state import AgentState
from ea_chatbot.config import Settings
from ea_chatbot.utils.llm_factory import get_llm_model
from ea_chatbot.utils.logging import get_logger, LangChainLoggingHandler
from ea_chatbot.graph.prompts.coder import ERROR_CORRECTOR_PROMPT
from ea_chatbot.schemas import CodeGenerationResponse
def error_corrector_node(state: AgentState) -> dict:
"""Fix the code based on the execution error."""
code = state.get("code", "")
error = state.get("error", "Unknown error")
settings = Settings()
logger = get_logger("error_corrector")
logger.warning(f"[bold red]Execution error detected:[/bold red] {error[:100]}...")
logger.info("Attempting to correct the code...")
# Reuse coder LLM config or add a new one. Using coder_llm for now.
llm = get_llm_model(
settings.coder_llm,
callbacks=[LangChainLoggingHandler(logger=logger)]
)
structured_llm = llm.with_structured_output(CodeGenerationResponse)
messages = ERROR_CORRECTOR_PROMPT.format_messages(
code=code,
error=error
)
try:
response = structured_llm.invoke(messages)
logger.info("[bold green]Correction generated.[/bold green]")
current_iterations = state.get("iterations", 0)
return {
"code": response.parsed_code,
"error": None, # Clear error after fix attempt
"iterations": current_iterations + 1
}
except Exception as e:
logger.error(f"Failed to correct code: {str(e)}")
raise e

View File

@@ -0,0 +1,102 @@
import io
import sys
import traceback
from contextlib import redirect_stdout
from typing import Any, Dict, List, TYPE_CHECKING
import pandas as pd
from matplotlib.figure import Figure
from ea_chatbot.graph.state import AgentState
from ea_chatbot.utils.db_client import DBClient
from ea_chatbot.utils.logging import get_logger
from ea_chatbot.config import Settings
if TYPE_CHECKING:
from ea_chatbot.types import DBSettings
def executor_node(state: AgentState) -> dict:
"""Execute the Python code and capture output, plots, and dataframes."""
code = state.get("code")
logger = get_logger("executor")
if not code:
logger.error("No code provided to executor.")
return {"error": "No code provided to executor."}
logger.info("Executing Python code...")
settings = Settings()
db_settings: "DBSettings" = {
"host": settings.db_host,
"port": settings.db_port,
"user": settings.db_user,
"pswd": settings.db_pswd,
"db": settings.db_name,
"table": settings.db_table
}
db_client = DBClient(settings=db_settings)
# Initialize local variables for execution
# 'db' is the DBClient instance, 'plots' is for matplotlib figures
local_vars = {
'db': db_client,
'plots': [],
'pd': pd
}
stdout_buffer = io.StringIO()
error = None
code_output = ""
plots = []
dfs = {}
try:
with redirect_stdout(stdout_buffer):
# Execute the code in the context of local_vars
exec(code, {}, local_vars)
code_output = stdout_buffer.getvalue()
# Limit the output length if it's too long
if code_output.count('\n') > 32:
code_output = '\n'.join(code_output.split('\n')[:32]) + '\n...'
# Extract plots
raw_plots = local_vars.get('plots', [])
if isinstance(raw_plots, list):
plots = [p for p in raw_plots if isinstance(p, Figure)]
# Extract DataFrames that were likely intended for display
# We look for DataFrames in local_vars that were mentioned in the code
for key, value in local_vars.items():
if isinstance(value, pd.DataFrame):
# Heuristic: if the variable name is in the code, it might be a result DF
if key in code:
dfs[key] = value
logger.info(f"[bold green]Execution complete.[/bold green] Captured {len(plots)} plots and {len(dfs)} dataframes.")
except Exception as e:
# Capture the traceback
exc_type, exc_value, tb = sys.exc_info()
full_traceback = traceback.format_exc()
# Filter traceback to show only the relevant part (the executed string)
filtered_tb_lines = [line for line in full_traceback.split('\n') if '<string>' in line]
error = '\n'.join(filtered_tb_lines)
if error:
error += '\n'
error += f"{exc_type.__name__ if exc_type else 'Exception'}: {exc_value}"
logger.error(f"Execution failed: {str(e)}")
# If we have an error, we still might want to see partial stdout
code_output = stdout_buffer.getvalue()
return {
"code_output": code_output,
"error": error,
"plots": plots,
"dfs": dfs
}

View File

@@ -0,0 +1,51 @@
import yaml
from ea_chatbot.graph.state import AgentState
from ea_chatbot.config import Settings
from ea_chatbot.utils.llm_factory import get_llm_model
from ea_chatbot.utils import helpers, database_inspection
from ea_chatbot.utils.logging import get_logger, LangChainLoggingHandler
from ea_chatbot.graph.prompts.planner import PLANNER_PROMPT
from ea_chatbot.schemas import TaskPlanResponse
def planner_node(state: AgentState) -> dict:
"""Generate a structured plan based on the query analysis."""
question = state["question"]
history = state.get("messages", [])[-6:]
summary = state.get("summary", "")
settings = Settings()
logger = get_logger("planner")
logger.info("Generating task plan...")
llm = get_llm_model(
settings.planner_llm,
callbacks=[LangChainLoggingHandler(logger=logger)]
)
structured_llm = llm.with_structured_output(TaskPlanResponse)
date_str = helpers.get_readable_date()
# Always provide data summary; LLM decides relevance.
database_description = database_inspection.get_data_summary(data_dir=settings.data_dir) or "No data available."
example_plan = ""
messages = PLANNER_PROMPT.format_messages(
date=date_str,
question=question,
history=history,
summary=summary,
database_description=database_description,
example_plan=example_plan
)
# Generate the structured plan
try:
response = structured_llm.invoke(messages)
# Convert the structured response back to YAML string for the state
plan_yaml = yaml.dump(response.model_dump(), sort_keys=False)
logger.info("[bold green]Plan generated successfully.[/bold green]")
return {"plan": plan_yaml}
except Exception as e:
logger.error(f"Failed to generate plan: {str(e)}")
raise e

View File

@@ -0,0 +1,73 @@
from typing import List, Literal
from pydantic import BaseModel, Field
from ea_chatbot.graph.state import AgentState
from ea_chatbot.config import Settings
from ea_chatbot.utils.llm_factory import get_llm_model
from ea_chatbot.utils.logging import get_logger, LangChainLoggingHandler
from ea_chatbot.graph.prompts.query_analyzer import QUERY_ANALYZER_PROMPT
class QueryAnalysis(BaseModel):
"""Analysis of the user's query."""
data_required: List[str] = Field(description="List of data points or entities mentioned (e.g., ['2024 results', 'Florida']).")
unknowns: List[str] = Field(description="List of target information the user wants to know or needed for final answer (e.g., 'who won', 'total votes').")
ambiguities: List[str] = Field(description="List of CRITICAL missing details that prevent ANY analysis. Do NOT include database names or plot types if defaults can be used.")
conditions: List[str] = Field(description="List of any filters or constraints (e.g., ['year=2024', 'state=Florida']). Include context resolved from history.")
next_action: Literal["plan", "clarify", "research"] = Field(description="The next action to take. 'plan' for data analysis (even with defaults), 'research' for general knowledge, or 'clarify' ONLY for critical ambiguities.")
def query_analyzer_node(state: AgentState) -> dict:
"""Analyze the user's question and determine the next course of action."""
question = state["question"]
history = state.get("messages", [])
summary = state.get("summary", "")
# Keep last 3 turns (6 messages)
history = history[-6:]
settings = Settings()
logger = get_logger("query_analyzer")
logger.info(f"Analyzing question: [italic]\"{question}\"[/italic]")
# Initialize the LLM with structured output using the factory
# Pass logging callback to track LLM usage
llm = get_llm_model(
settings.query_analyzer_llm,
callbacks=[LangChainLoggingHandler(logger=logger)]
)
structured_llm = llm.with_structured_output(QueryAnalysis)
# Prepare messages using the prompt template
messages = QUERY_ANALYZER_PROMPT.format_messages(
question=question,
history=history,
summary=summary
)
try:
# Invoke the structured LLM directly with the list of messages
analysis_result = structured_llm.invoke(messages)
analysis_result = QueryAnalysis.model_validate(analysis_result)
analysis_dict = analysis_result.model_dump()
analysis_dict.pop("next_action")
next_action = analysis_result.next_action
logger.info(f"Analysis complete. Next action: [bold magenta]{next_action}[/bold magenta]")
except Exception as e:
logger.error(f"Error during query analysis: {str(e)}")
analysis_dict = {
"data_required": [],
"unknowns": [],
"ambiguities": [f"Error during analysis: {str(e)}"],
"conditions": []
}
next_action = "clarify"
return {
"analysis": analysis_dict,
"next_action": next_action,
"iterations": 0
}

View File

@@ -0,0 +1,60 @@
from langchain_core.messages import AIMessage
from langchain_openai import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI
from ea_chatbot.graph.state import AgentState
from ea_chatbot.config import Settings
from ea_chatbot.utils.llm_factory import get_llm_model
from ea_chatbot.utils import helpers
from ea_chatbot.utils.logging import get_logger, LangChainLoggingHandler
from ea_chatbot.graph.prompts.researcher import RESEARCHER_PROMPT
def researcher_node(state: AgentState) -> dict:
"""Handle general research queries or web searches."""
question = state["question"]
history = state.get("messages", [])[-6:]
summary = state.get("summary", "")
settings = Settings()
logger = get_logger("researcher")
logger.info(f"Researching question: [italic]\"{question}\"[/italic]")
# Use researcher_llm from settings
llm = get_llm_model(
settings.researcher_llm,
callbacks=[LangChainLoggingHandler(logger=logger)]
)
date_str = helpers.get_readable_date()
messages = RESEARCHER_PROMPT.format_messages(
date=date_str,
question=question,
history=history,
summary=summary
)
# Provider-aware tool binding
try:
if isinstance(llm, ChatGoogleGenerativeAI):
# Native Google Search for Gemini
llm_with_tools = llm.bind_tools([{"google_search": {}}])
elif isinstance(llm, ChatOpenAI):
# Native Web Search for OpenAI (built-in tool)
llm_with_tools = llm.bind_tools([{"type": "web_search"}])
else:
# Fallback for other providers that might not support these specific search tools
llm_with_tools = llm
except Exception as e:
logger.warning(f"Failed to bind search tools: {str(e)}. Falling back to base LLM.")
llm_with_tools = llm
try:
response = llm_with_tools.invoke(messages)
logger.info("[bold green]Research complete.[/bold green]")
return {
"messages": [response]
}
except Exception as e:
logger.error(f"Research failed: {str(e)}")
raise e

View File

@@ -0,0 +1,52 @@
from langchain_core.messages import SystemMessage, HumanMessage
from ea_chatbot.graph.state import AgentState
from ea_chatbot.config import Settings
from ea_chatbot.utils.llm_factory import get_llm_model
from ea_chatbot.utils.logging import get_logger, LangChainLoggingHandler
def summarize_conversation_node(state: AgentState) -> dict:
"""Update the conversation summary based on the latest interaction."""
summary = state.get("summary", "")
messages = state.get("messages", [])
# We only summarize if there are messages
if not messages:
return {}
# Get the last turn (User + Assistant)
last_turn = messages[-2:]
settings = Settings()
logger = get_logger("summarize_conversation")
logger.info("Updating conversation summary...")
# Use summarizer_llm for this task as well
llm = get_llm_model(
settings.summarizer_llm,
callbacks=[LangChainLoggingHandler(logger=logger)]
)
if summary:
prompt = (
f"This is a summary of the conversation so far: {summary}\n\n"
"Extend the summary by taking into account the new messages above."
)
else:
prompt = "Create a summary of the conversation above."
# Construct the messages for the summarization LLM
summarization_messages = [
SystemMessage(content=f"Current summary: {summary}" if summary else "You are a helpful assistant that summarizes conversations."),
HumanMessage(content=f"Recent messages:\n{last_turn}\n\n{prompt}\n\nKeep the summary concise and focused on the key topics and data points discussed.")
]
try:
response = llm.invoke(summarization_messages)
new_summary = response.content
logger.info("[bold green]Conversation summary updated.[/bold green]")
return {"summary": new_summary}
except Exception as e:
logger.error(f"Failed to update summary: {str(e)}")
# If summarization fails, we keep the old one
return {"summary": summary}

View File

@@ -0,0 +1,44 @@
from langchain_core.messages import AIMessage
from ea_chatbot.graph.state import AgentState
from ea_chatbot.config import Settings
from ea_chatbot.utils.llm_factory import get_llm_model
from ea_chatbot.utils.logging import get_logger, LangChainLoggingHandler
from ea_chatbot.graph.prompts.summarizer import SUMMARIZER_PROMPT
def summarizer_node(state: AgentState) -> dict:
"""Summarize the code execution results into a final answer."""
question = state["question"]
plan = state.get("plan", "")
code_output = state.get("code_output", "")
history = state.get("messages", [])[-6:]
summary = state.get("summary", "")
settings = Settings()
logger = get_logger("summarizer")
logger.info("Generating final summary...")
llm = get_llm_model(
settings.summarizer_llm,
callbacks=[LangChainLoggingHandler(logger=logger)]
)
messages = SUMMARIZER_PROMPT.format_messages(
question=question,
plan=plan,
code_output=code_output,
history=history,
summary=summary
)
try:
response = llm.invoke(messages)
logger.info("[bold green]Summary generated.[/bold green]")
# Return the final message to be added to the state
return {
"messages": [response]
}
except Exception as e:
logger.error(f"Failed to generate summary: {str(e)}")
raise e