Refactor: Move backend files to backend/ directory and split .gitignore

This commit is contained in:
Yunxiao Xu
2026-02-11 17:40:44 -08:00
parent 48924affa0
commit 7a69133e26
96 changed files with 144 additions and 176 deletions

View File

@@ -0,0 +1,10 @@
from .query_analyzer import QUERY_ANALYZER_PROMPT
from .planner import PLANNER_PROMPT
from .coder import CODE_GENERATOR_PROMPT, ERROR_CORRECTOR_PROMPT
__all__ = [
"QUERY_ANALYZER_PROMPT",
"PLANNER_PROMPT",
"CODE_GENERATOR_PROMPT",
"ERROR_CORRECTOR_PROMPT",
]

View File

@@ -0,0 +1,64 @@
from langchain_core.prompts import ChatPromptTemplate
CODE_GENERATOR_SYSTEM = """You are an AI data analyst and your job is to assist users with data analysis and coding tasks.
The user will provide a task and a plan.
**Data Access:**
- A database client is available as a variable named `db`.
- You MUST use `db.query_df(sql_query)` to execute SQL queries and retrieve data as a Pandas DataFrame.
- Do NOT assume a dataframe `df` is already loaded unless explicitly stated. You usually need to query it first.
- The database schema is described in the prompt. Use it to construct valid SQL queries.
**Plotting:**
- If you need to plot any data, use the `plots` list to store the figures.
- Example: `plots.append(fig)` or `plots.append(plt.gcf())`.
- Do not use `plt.show()` as it will render the plot and cause an error.
**Code Requirements:**
- Produce FULL, COMPLETE CODE that includes all steps and solves the task!
- Always include the import statements at the top of the code (e.g., `import pandas as pd`, `import matplotlib.pyplot as plt`).
- Always include print statements to output the results of your code.
- Use `db.query_df("SELECT ...")` to get data."""
CODE_GENERATOR_USER = """TASK:
{question}
PLAN:
```yaml
{plan}
```
AVAILABLE DATA SUMMARY (Database Schema):
{database_description}
CODE EXECUTION OF THE PREVIOUS TASK RESULTED IN:
{code_exec_results}
{example_code}"""
ERROR_CORRECTOR_SYSTEM = """The execution of the code resulted in an error.
Return a complete, corrected python code that incorporates the fixes for the error.
**Reminders:**
- You have access to a database client via the variable `db`.
- Use `db.query_df(sql)` to run queries.
- Use `plots.append(fig)` for plots.
- Always include imports and print statements."""
ERROR_CORRECTOR_USER = """FAILED CODE:
```python
{code}
```
ERROR:
{error}"""
CODE_GENERATOR_PROMPT = ChatPromptTemplate.from_messages([
("system", CODE_GENERATOR_SYSTEM),
("human", CODE_GENERATOR_USER),
])
ERROR_CORRECTOR_PROMPT = ChatPromptTemplate.from_messages([
("system", ERROR_CORRECTOR_SYSTEM),
("human", ERROR_CORRECTOR_USER),
])

View File

@@ -0,0 +1,46 @@
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
PLANNER_SYSTEM = """You are a Research Specialist and your job is to find answers and educate the user.
Provide factual information responding directly to the user's question. Include key details and context to ensure your response comprehensively answers their query.
Today's Date is: {date}"""
PLANNER_USER = """Conversation Summary: {summary}
TASK:
{question}
AVAILABLE DATA SUMMARY (Use only if relevant to the task):
{database_description}
First: Evaluate whether you have all necessary and requested information to provide a solution.
Use the dataset description above to determine what data and in what format you have available to you.
You are able to search internet if the user asks for it, or you require any information that you can not derive from the given dataset or the instruction.
Second: Incorporate any additional relevant context, reasoning, or details from previous interactions or internal chain-of-thought that may impact the solution.
Ensure that all such information is fully included in your response rather than referring to previous answers indirectly.
Third: Reflect on the problem and briefly describe it, while addressing the problem goal, inputs, outputs,
rules, constraints, and other relevant details that appear in the problem description.
Fourth: Based on the preceding steps, formulate your response as an algorithm, breaking the solution in up to eight simple concise yet descriptive, clear English steps.
You MUST Include all values or instructions as described in the above task, or retrieved using internet search!
If fewer steps suffice, that's acceptable. If more are needed, please include them.
Remember to explain steps rather than write code.
This algorithm will be later converted to Python code.
If a dataframe is required, assume it is named 'df' and is already defined/populated based on the data summary.
There is a list variable called `plots` that you need to use to store any plots you generate. Do not use `plt.show()` as it will render the plot and cause an error.
Output the algorithm as a YAML string. Always enclose the YAML string within ```yaml tags.
**Note: Ensure that any necessary context from prior interactions is fully embedded in the plan. Do not use phrases like "refer to previous answer"; instead, provide complete details inline.**
{example_plan}"""
PLANNER_PROMPT = ChatPromptTemplate.from_messages([
("system", PLANNER_SYSTEM),
MessagesPlaceholder(variable_name="history"),
("human", PLANNER_USER),
])

View File

@@ -0,0 +1,33 @@
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
SYSTEM_PROMPT = """You are an expert election data analyst. Decompose the user's question into key elements to determine the next action.
### Context & Defaults
- **History:** Use the conversation history and summary to resolve coreferences (e.g., "those results", "that state"). Assume the current question inherits missing context (Year, State, County) from history.
- **Data Access:** You have access to voter and election databases. Proceed to planning without asking for database or table names.
- **Downstream Capabilities:** Visualizations are generated as Matplotlib figures. Proceed to planning for "graphs" or "plots" without asking for file formats or plot types.
- **Trends:** For trend requests without a specified interval, allow the Planner to use a sensible default (e.g., by election cycle).
### Instructions:
1. **Analyze:** Identify if the request is for data analysis, general facts (web research), or is critically ambiguous.
2. **Extract Entities & Conditions:**
- **Data Required:** e.g., "vote count", "demographics".
- **Conditions:** e.g., "Year=2024". Include context from history.
3. **Identify Target & Critical Ambiguities:**
- **Unknowns:** The core target question.
- **Critical Ambiguities:** ONLY list issues that PREVENT any analysis.
- Examples: No timeframe/geography in query OR history; "track the same voter" without an identity definition.
4. **Determine Action:**
- `plan`: For data analysis where defaults or history provide sufficient context.
- `research`: For general knowledge.
- `clarify`: ONLY for CRITICAL ambiguities."""
USER_PROMPT_TEMPLATE = """Conversation Summary: {summary}
Analyze the following question: {question}"""
QUERY_ANALYZER_PROMPT = ChatPromptTemplate.from_messages([
("system", SYSTEM_PROMPT),
MessagesPlaceholder(variable_name="history"),
("human", USER_PROMPT_TEMPLATE),
])

View File

@@ -0,0 +1,12 @@
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
RESEARCHER_PROMPT = ChatPromptTemplate.from_messages([
("system", """You are a Research Specialist and your job is to find answers and educate the user.
Provide factual information responding directly to the user's question. Include key details and context to ensure your response comprehensively answers their query.
Today's Date is: {date}"""),
MessagesPlaceholder(variable_name="history"),
("user", """Conversation Summary: {summary}
{question}""")
])

View File

@@ -0,0 +1,27 @@
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
SUMMARIZER_PROMPT = ChatPromptTemplate.from_messages([
("system", """You are an expert election data analyst providing a final answer to the user.
Use the provided conversation history and summary to ensure your response is contextually relevant and flows naturally from previous turns.
Conversation Summary: {summary}"""),
MessagesPlaceholder(variable_name="history"),
("user", """The user presented you with the following question.
Question: {question}
To address this, you have designed an algorithm.
Algorithm: {plan}.
You have crafted a Python code based on this algorithm, and the output generated by the code's execution is as follows.
Output: {code_output}.
Please produce a comprehensive, easy-to-understand answer that:
1. Summarizes the main insights or conclusions achieved through your method's implementation. Include execution results if necessary.
2. Includes relevant findings from the code execution in a clear format (e.g., text explanation, tables, lists, bullet points).
- Avoid referencing the code or output as 'the above results' or saying 'it's in the code output.'
- Instead, present the actual key data or statistics within your explanation.
3. If the user requested specific information that does not appear in the code's output but you can provide it, include that information directly in your summary.
4. Present any data or tables that might have been generated by the code in full, since the user cannot directly see the execution output.
Your goal is to give a final answer that stands on its own without requiring the user to see the code or raw output directly.""")
])