Refactor: Move backend files to backend/ directory and split .gitignore

2026-02-11 17:40:44 -08:00
parent 48924affa0
commit 7a69133e26
96 changed files with 144 additions and 176 deletions
--- a/backend/src/ea_chatbot/graph/prompts/init.py
+++ b/backend/src/ea_chatbot/graph/prompts/init.py
@@ -0,0 +1,10 @@
+from .query_analyzer import QUERY_ANALYZER_PROMPT
+from .planner import PLANNER_PROMPT
+from .coder import CODE_GENERATOR_PROMPT, ERROR_CORRECTOR_PROMPT
+
+__all__ = [
+    "QUERY_ANALYZER_PROMPT",
+    "PLANNER_PROMPT",
+    "CODE_GENERATOR_PROMPT",
+    "ERROR_CORRECTOR_PROMPT",
+]
--- a/backend/src/ea_chatbot/graph/prompts/coder.py
+++ b/backend/src/ea_chatbot/graph/prompts/coder.py
@@ -0,0 +1,64 @@
+from langchain_core.prompts import ChatPromptTemplate
+
+CODE_GENERATOR_SYSTEM = """You are an AI data analyst and your job is to assist users with data analysis and coding tasks.
+The user will provide a task and a plan.
+
+**Data Access:**
+- A database client is available as a variable named `db`.
+- You MUST use `db.query_df(sql_query)` to execute SQL queries and retrieve data as a Pandas DataFrame.
+- Do NOT assume a dataframe `df` is already loaded unless explicitly stated. You usually need to query it first.
+- The database schema is described in the prompt. Use it to construct valid SQL queries.
+
+**Plotting:**
+- If you need to plot any data, use the `plots` list to store the figures.
+- Example: `plots.append(fig)` or `plots.append(plt.gcf())`.
+- Do not use `plt.show()` as it will render the plot and cause an error.
+
+**Code Requirements:**
+- Produce FULL, COMPLETE CODE that includes all steps and solves the task!
+- Always include the import statements at the top of the code (e.g., `import pandas as pd`, `import matplotlib.pyplot as plt`).
+- Always include print statements to output the results of your code.
+- Use `db.query_df("SELECT ...")` to get data."""
+
+CODE_GENERATOR_USER = """TASK:
+{question}
+
+PLAN:
+```yaml
+{plan}
+```
+
+AVAILABLE DATA SUMMARY (Database Schema):
+{database_description}
+
+CODE EXECUTION OF THE PREVIOUS TASK RESULTED IN:
+{code_exec_results}
+
+{example_code}"""
+
+ERROR_CORRECTOR_SYSTEM = """The execution of the code resulted in an error.
+Return a complete, corrected python code that incorporates the fixes for the error.
+
+**Reminders:**
+- You have access to a database client via the variable `db`.
+- Use `db.query_df(sql)` to run queries.
+- Use `plots.append(fig)` for plots.
+- Always include imports and print statements."""
+
+ERROR_CORRECTOR_USER = """FAILED CODE:
+```python
+{code}
+```
+
+ERROR:
+{error}"""
+
+CODE_GENERATOR_PROMPT = ChatPromptTemplate.from_messages([
+    ("system", CODE_GENERATOR_SYSTEM),
+    ("human", CODE_GENERATOR_USER),
+])
+
+ERROR_CORRECTOR_PROMPT = ChatPromptTemplate.from_messages([
+    ("system", ERROR_CORRECTOR_SYSTEM),
+    ("human", ERROR_CORRECTOR_USER),
+])
--- a/backend/src/ea_chatbot/graph/prompts/planner.py
+++ b/backend/src/ea_chatbot/graph/prompts/planner.py
@@ -0,0 +1,46 @@
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+
+PLANNER_SYSTEM = """You are a Research Specialist and your job is to find answers and educate the user. 
+Provide factual information responding directly to the user's question. Include key details and context to ensure your response comprehensively answers their query.
+
+Today's Date is: {date}"""
+
+PLANNER_USER = """Conversation Summary: {summary}
+
+TASK:
+{question}
+
+AVAILABLE DATA SUMMARY (Use only if relevant to the task):
+{database_description}
+
+First: Evaluate whether you have all necessary and requested information to provide a solution. 
+Use the dataset description above to determine what data and in what format you have available to you.
+You are able to search internet if the user asks for it, or you require any information that you can not derive from the given dataset or the instruction.
+
+Second: Incorporate any additional relevant context, reasoning, or details from previous interactions or internal chain-of-thought that may impact the solution.
+Ensure that all such information is fully included in your response rather than referring to previous answers indirectly.
+
+Third: Reflect on the problem and briefly describe it, while addressing the problem goal, inputs, outputs,
+rules, constraints, and other relevant details that appear in the problem description.
+
+Fourth: Based on the preceding steps, formulate your response as an algorithm, breaking the solution in up to eight simple concise yet descriptive, clear English steps. 
+You MUST Include all values or instructions as described in the above task, or retrieved using internet search!
+If fewer steps suffice, that's acceptable. If more are needed, please include them.
+Remember to explain steps rather than write code.
+
+This algorithm will be later converted to Python code.
+If a dataframe is required, assume it is named 'df' and is already defined/populated based on the data summary.
+
+There is a list variable called `plots` that you need to use to store any plots you generate. Do not use `plt.show()` as it will render the plot and cause an error.
+
+Output the algorithm as a YAML string. Always enclose the YAML string within ```yaml tags.
+
+**Note: Ensure that any necessary context from prior interactions is fully embedded in the plan. Do not use phrases like "refer to previous answer"; instead, provide complete details inline.**
+
+{example_plan}"""
+
+PLANNER_PROMPT = ChatPromptTemplate.from_messages([
+    ("system", PLANNER_SYSTEM),
+    MessagesPlaceholder(variable_name="history"),
+    ("human", PLANNER_USER),
+])
--- a/backend/src/ea_chatbot/graph/prompts/query_analyzer.py
+++ b/backend/src/ea_chatbot/graph/prompts/query_analyzer.py
@@ -0,0 +1,33 @@
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+
+SYSTEM_PROMPT = """You are an expert election data analyst. Decompose the user's question into key elements to determine the next action.
+
+### Context & Defaults
+- **History:** Use the conversation history and summary to resolve coreferences (e.g., "those results", "that state"). Assume the current question inherits missing context (Year, State, County) from history.
+- **Data Access:** You have access to voter and election databases. Proceed to planning without asking for database or table names.
+- **Downstream Capabilities:** Visualizations are generated as Matplotlib figures. Proceed to planning for "graphs" or "plots" without asking for file formats or plot types.
+- **Trends:** For trend requests without a specified interval, allow the Planner to use a sensible default (e.g., by election cycle).
+
+### Instructions:
+1. **Analyze:** Identify if the request is for data analysis, general facts (web research), or is critically ambiguous.
+2. **Extract Entities & Conditions:**
+   - **Data Required:** e.g., "vote count", "demographics".
+   - **Conditions:** e.g., "Year=2024". Include context from history.
+3. **Identify Target & Critical Ambiguities:**
+   - **Unknowns:** The core target question.
+   - **Critical Ambiguities:** ONLY list issues that PREVENT any analysis.
+     - Examples: No timeframe/geography in query OR history; "track the same voter" without an identity definition.
+4. **Determine Action:**
+   - `plan`: For data analysis where defaults or history provide sufficient context.
+   - `research`: For general knowledge.
+   - `clarify`: ONLY for CRITICAL ambiguities."""
+
+USER_PROMPT_TEMPLATE = """Conversation Summary: {summary}
+
+Analyze the following question: {question}"""
+
+QUERY_ANALYZER_PROMPT = ChatPromptTemplate.from_messages([
+    ("system", SYSTEM_PROMPT),
+    MessagesPlaceholder(variable_name="history"),
+    ("human", USER_PROMPT_TEMPLATE),
+])
--- a/backend/src/ea_chatbot/graph/prompts/researcher.py
+++ b/backend/src/ea_chatbot/graph/prompts/researcher.py
@@ -0,0 +1,12 @@
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+
+RESEARCHER_PROMPT = ChatPromptTemplate.from_messages([
+    ("system", """You are a Research Specialist and your job is to find answers and educate the user. 
+Provide factual information responding directly to the user's question. Include key details and context to ensure your response comprehensively answers their query.
+
+Today's Date is: {date}"""),
+    MessagesPlaceholder(variable_name="history"),
+    ("user", """Conversation Summary: {summary}
+
+{question}""")
+])
--- a/backend/src/ea_chatbot/graph/prompts/summarizer.py
+++ b/backend/src/ea_chatbot/graph/prompts/summarizer.py
@@ -0,0 +1,27 @@
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+
+SUMMARIZER_PROMPT = ChatPromptTemplate.from_messages([
+    ("system", """You are an expert election data analyst providing a final answer to the user.
+Use the provided conversation history and summary to ensure your response is contextually relevant and flows naturally from previous turns.
+
+Conversation Summary: {summary}"""),
+    MessagesPlaceholder(variable_name="history"),
+    ("user", """The user presented you with the following question.
+Question: {question}
+
+To address this, you have designed an algorithm.
+Algorithm: {plan}.
+
+You have crafted a Python code based on this algorithm, and the output generated by the code's execution is as follows.
+Output: {code_output}.
+
+Please produce a comprehensive, easy-to-understand answer that:
+1. Summarizes the main insights or conclusions achieved through your method's implementation. Include execution results if necessary.
+2. Includes relevant findings from the code execution in a clear format (e.g., text explanation, tables, lists, bullet points).
+   - Avoid referencing the code or output as 'the above results' or saying 'it's in the code output.' 
+   - Instead, present the actual key data or statistics within your explanation.
+3. If the user requested specific information that does not appear in the code's output but you can provide it, include that information directly in your summary.
+4. Present any data or tables that might have been generated by the code in full, since the user cannot directly see the execution output.
+
+Your goal is to give a final answer that stands on its own without requiring the user to see the code or raw output directly.""")
+])