diff --git a/GEMINI.md b/GEMINI.md index 67273a9..a748d6c 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -1,167 +1,22 @@ -# Election Analytics Chatbot - Project Guide +# Election Analytics Chatbot ## Overview -This document serves as a guide for rewriting the current "BambooAI" based chatbot system into a modern, stateful, and graph-based architecture using **LangGraph**. The goal is to improve maintainability, observability, and flexibility of the agentic workflows. +This project is an Election Analytics Chatbot built with a modern, stateful, and graph-based architecture. It is divided into a backend (Python, LangGraph) and a frontend (React, TypeScript). -## 1. Migration Goals -- **Framework Switch**: Move from the custom linear `ChatBot` class (in `src/ea_chatbot/bambooai/core/chatbot.py`) to `LangGraph`. -- **State Management**: explicit state management using LangGraph's `StateGraph`. -- **Modularity**: Break down monolithic methods (`pd_agent_converse`, `execute_code`) into distinct Nodes. -- **Observability**: Easier debugging of the decision process (Routing -> Planning -> Coding -> Executing). +## Project Structure +- **[Backend](./backend/GEMINI.md)**: Python-based LangGraph agent for data analysis and query processing. +- **[Frontend](./frontend/GEMINI.md)**: React application for user interaction. -## 2. Architecture Proposal +## Key Technologies +- **Backend**: LangGraph, LangChain, OpenAI/Google Gemini, PostgreSQL. +- **Frontend**: React, TypeScript, Vite. -### 2.1. The Graph State -The state will track the conversation and execution context. +## Documentation +- **[Backend Guide](./backend/GEMINI.md)**: Detailed information about the backend architecture, migration goals, and implementation steps. +- **[Frontend Guide](./frontend/GEMINI.md)**: Frontend development guide and technology stack. +- **LangChain Docs**: See the `langchain-docs/` folder for local LangChain and LangGraph documentation. -```python -from typing import TypedDict, Annotated, List, Dict, Any, Optional -from langchain_core.messages import BaseMessage -import operator - -class AgentState(TypedDict): - # Conversation history - messages: Annotated[List[BaseMessage], operator.add] - - # Task context - question: str - - # Query Analysis (Decomposition results) - analysis: Optional[Dict[str, Any]] - # Expected keys: "requires_dataset", "expert", "data", "unknown", "condition" - - # Step-by-step reasoning - plan: Optional[str] - - # Code execution context - code: Optional[str] - code_output: Optional[str] - error: Optional[str] - - # Artifacts (for UI display) - plots: List[Figure] # Matplotlib figures - dfs: Dict[str, DataFrame] # Pandas DataFrames - - # Control flow - iterations: int - next_action: str # Routing hint: "clarify", "plan", "research", "end" -``` - -### 2.2. Nodes (The Actors) -We will map existing logic to these nodes: - -1. **`query_analyzer_node`** (Router & Refiner): - * **Logic**: Replaces `Expert Selector` and `Analyst Selector`. - * **Function**: - 1. Decomposes the user's query into key elements (Data, Unknowns, Conditions). - 2. Determines if the query is ambiguous or missing critical information. - * **Output**: Updates `messages`. Returns routing decision: - * `clarification_node` (if ambiguous). - * `planner_node` (if clear data task). - * `researcher_node` (if general/web task). - -2. **`clarification_node`** (Human-in-the-loop): - * **Logic**: Replaces `Theorist-Clarification`. - * **Function**: Formulates a specific question to ask the user for missing details. - * **Output**: Returns a message to the user and **interrupts** the graph execution to await user input. - -3. **`researcher_node`** (Theorist): - * **Logic**: Handles general queries or web searches. - * **Function**: Uses `GoogleSearch` tool if necessary. - * **Output**: Final answer. - -4. **`planner_node`**: - * **Logic**: Replaces `Planner`. - * **Function**: Generates a step-by-step plan based on the decomposed query elements and dataframe ontology. - * **Output**: Updates `plan`. - -5. **`coder_node`**: - * **Logic**: Replaces `Code Generator` & `Error Corrector`. - * **Function**: Generates Python code. If `error` exists in state, it attempts to fix it. - * **Output**: Updates `code`. - -6. **`executor_node`**: - * **Logic**: Replaces `Code Executor`. - * **Function**: Executes the Python code in a safe(r) environment. It needs access to the `DBClient`. - * **Output**: Updates `code_output`, `plots`, `dfs`. If exception, updates `error`. - -7. **`summarizer_node`**: - * **Logic**: Replaces `Solution Summarizer`. - * **Function**: Interprets the code output and generates a natural language response. - * **Output**: Final response message. - -### 2.3. The Workflow (Graph) - -```mermaid -graph TD - Start --> QueryAnalyzer - QueryAnalyzer -->|Ambiguous| Clarification - Clarification -->|User Input| QueryAnalyzer - QueryAnalyzer -->|General/Web| Researcher - QueryAnalyzer -->|Data Analysis| Planner - Planner --> Coder - Coder --> Executor - Executor -->|Success| Summarizer - Executor -->|Error| Coder - Researcher --> End - Summarizer --> End -``` - -## 3. Implementation Steps - -### Step 1: Dependencies -Add the following packages to `pyproject.toml`: -* `langgraph` -* `langchain` -* `langchain-openai` -* `langchain-google-genai` -* `langchain-community` - -### Step 2: Directory Structure -Create a new package for the graph logic to keep it separate from the old one during migration. - -``` -src/ea_chatbot/ -├── graph/ -│ ├── __init__.py -│ ├── state.py # State definition -│ ├── nodes/ # Individual node implementations -│ │ ├── __init__.py -│ │ ├── router.py -│ │ ├── planner.py -│ │ ├── coder.py -│ │ ├── executor.py -│ │ └── ... -│ ├── workflow.py # Graph construction -│ └── tools/ # DB and Search tools wrapped for LangChain -└── ... -``` - -### Step 3: Tool Wrapping -Wrap the existing `DBClient` (from `src/ea_chatbot/bambooai/utils/db_client.py`) into a structure accessible by the `executor_node`. The `executor_node` will likely keep the existing `exec()` based approach initially for compatibility with the generated code, but structured as a graph node. - -### Step 4: Prompt Migration -Port the prompts from `data/PROMPT_TEMPLATES.json` or `src/ea_chatbot/bambooai/prompts/strings.py` into the respective nodes. Use LangChain's `ChatPromptTemplate` for better management. - -### Step 5: Streamlit Integration -Update `src/ea_chatbot/app.py` to use the new `workflow.compile()` runnable. -* Instead of `chatbot.pd_agent_converse(...)`, use `app.stream(...)` (LangGraph app). -* Handle the streaming output to update the UI progressively. - -## 4. Key Considerations for Refactoring - -* **Database Connection**: Ensure `DBClient` is initialized once and passed to the `Executor` node efficiently (e.g., via `configurable` parameters or closure). -* **Prompt Templating**: The current system uses simple `format` strings. Switching to LangChain templates allows for easier model switching and partial formatting. -* **Token Management**: LangGraph provides built-in tracing (if LangSmith is enabled), but we should ensure the `OutputManager` logic (printing costs/tokens) is preserved or adapted if still needed for the CLI/Logs. -* **Vector DB**: The current system has `PineconeWrapper` for RAG. This should be integrated into the `Planner` or `Coder` node to fetch few-shot examples or context. - -## 5. Next Actions -1. **Initialize**: Create the folder structure. -2. **Define State**: Create `src/ea_chatbot/graph/state.py`. -3. **Implement Router**: Create the first node to replicate `Expert Selector` logic. -4. **Implement Executor**: Port the `exec()` logic to a node. - -## 6. Git Operations +## Git Operations - Branches should be used for specific features or bug fixes. - New branches should be created from the `main` branch and `conductor` branch. - The conductor should always use the `conductor` branch and derived branches. diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 68be3aa..03e0f55 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -5,6 +5,7 @@ import { LoginForm } from "./components/auth/LoginForm" import { RegisterForm } from "./components/auth/RegisterForm" import { AuthCallback } from "./components/auth/AuthCallback" import { AuthService, type UserResponse } from "./services/auth" +import { registerUnauthorizedCallback } from "./services/api" function App() { const [isAuthenticated, setIsAuthenticated] = useState(false) @@ -13,6 +14,12 @@ function App() { const [isLoading, setIsLoading] = useState(true) useEffect(() => { + // Register callback to handle session expiration from anywhere in the app + registerUnauthorizedCallback(() => { + setIsAuthenticated(false) + setUser(null) + }) + const initAuth = async () => { try { const userData = await AuthService.getMe() diff --git a/frontend/src/components/auth/AuthCallback.tsx b/frontend/src/components/auth/AuthCallback.tsx index 00f4846..064b968 100644 --- a/frontend/src/components/auth/AuthCallback.tsx +++ b/frontend/src/components/auth/AuthCallback.tsx @@ -7,9 +7,18 @@ export function AuthCallback() { useEffect(() => { const verifyAuth = async () => { + const urlParams = new URLSearchParams(window.location.search) + const code = urlParams.get("code") + try { - // The cookie should have been set by the backend redirect - await AuthService.getMe() + if (code) { + // If we have a code, exchange it for a cookie + await AuthService.exchangeOIDCCode(code) + } else { + // If no code, just verify existing cookie (backend-driven redirect) + await AuthService.getMe() + } + // Success - go to home. We use window.location.href to ensure a clean reload of App state window.location.href = "/" } catch (err) { diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts index 6d34a7a..f42744d 100644 --- a/frontend/src/services/api.ts +++ b/frontend/src/services/api.ts @@ -7,15 +7,26 @@ const api = axios.create({ withCredentials: true, // Crucial for HttpOnly cookies }) +// Optional callback for unauthorized errors +let onUnauthorized: (() => void) | null = null + +export const registerUnauthorizedCallback = (callback: () => void) => { + onUnauthorized = callback +} + // Add a response interceptor to handle 401s api.interceptors.response.use( (response) => response, (error) => { - if (error.response?.status === 401) { - // Unauthorized - session likely expired - // We can't use useNavigate here as it's not a React component - // But we can redirect to home which will trigger the login view in App.tsx - window.location.href = "/" + // Only handle if it's not an auth endpoint + // This prevents loops during bootstrap and allows login form to show errors + const isAuthEndpoint = /^\/auth\//.test(error.config?.url) + + if (error.response?.status === 401 && !isAuthEndpoint) { + // Unauthorized - session likely expired on a protected data route + if (onUnauthorized) { + onUnauthorized() + } } return Promise.reject(error) } diff --git a/frontend/src/services/auth.ts b/frontend/src/services/auth.ts index 1b322a8..e2a033f 100644 --- a/frontend/src/services/auth.ts +++ b/frontend/src/services/auth.ts @@ -28,6 +28,11 @@ export const AuthService = { } }, + async exchangeOIDCCode(code: string): Promise { + const response = await api.get(`/auth/oidc/callback?code=${code}`) + return response.data + }, + async register(email: string, password: string): Promise { const response = await api.post("/auth/register", { email,