From b56da2facd22b696633dee8d3088f1e7704565a3 Mon Sep 17 00:00:00 2001 From: Michael Chihlas Date: Sat, 25 Apr 2026 22:22:33 -0400 Subject: [PATCH 1/4] fix(chat): sync currentChatRef when prefill creates a new chat session MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dashboard prefill flow in AssistantChatPage set activeChatId after creating a new session but never updated currentChatRef.current. Every later handleSend / handleTaskSubmit then tripped the `currentChatRef.current !== sentForChatId` guard that was supposed to discard responses for stale chats — and silently dropped the AI's follow-up. The user saw their submitted message but no assistant reply, no toast, no task-lane update. Mirrors what handleNewChat and handleResumeNew already do. Adds an e2e regression test that drives the dashboard prefill, submits a partial task-lane response, and asserts the second AI turn renders. Co-Authored-By: Claude Opus 4.7 --- frontend/e2e/assistant-chat-prefill.spec.ts | 111 ++++++++++++++++++++ frontend/src/pages/AssistantChatPage.tsx | 6 ++ 2 files changed, 117 insertions(+) create mode 100644 frontend/e2e/assistant-chat-prefill.spec.ts diff --git a/frontend/e2e/assistant-chat-prefill.spec.ts b/frontend/e2e/assistant-chat-prefill.spec.ts new file mode 100644 index 00000000..4d71acd8 --- /dev/null +++ b/frontend/e2e/assistant-chat-prefill.spec.ts @@ -0,0 +1,111 @@ +import { expect, test } from '@playwright/test' + +/** + * Regression test for the prefill-handoff `currentChatRef` bug. + * + * Symptom: a chat session created via the dashboard prefill flow + * looked fine on the first AI turn, but submitting partial answers + * from the task lane silently dropped the AI's follow-up response. + * The user saw their answers in the chat, no assistant reply, no + * toast. + * + * Root cause: the prefill effect in `AssistantChatPage` set + * `activeChatId` without also updating `currentChatRef.current`, so + * the `currentChatRef.current !== sentForChatId` guard in + * `handleTaskSubmit` (and `handleSend`) tripped on every subsequent + * request and discarded the AI response. + * + * Strategy: drive the real prefill flow against the real backend, but + * intercept the `/chat` endpoint with `page.route` so we get + * deterministic question payloads on turn 1 and a deterministic + * follow-up on turn 2. The fix is what makes turn 2 visible. + */ +test.describe('AssistantChatPage — prefill handoff regression', () => { + test('AI follow-up renders after submitting partial task lane answers', async ({ page }) => { + let chatCallCount = 0 + + // Clear any persisted active-chat-id so the page does not auto-resume a + // stale session left behind by a sibling spec. + await page.addInitScript(() => { + try { + sessionStorage.removeItem('rf-active-chat-id') + sessionStorage.removeItem('rf-tasklane-meta') + } catch { /* ignore */ } + }) + + // Intercept only the chat endpoint. Session creation, listSessions, + // facts, suggested-fixes, etc. all hit the real backend so the page + // renders normally — only the LLM call is deterministic. The pattern + // matches `/ai-sessions//chat` and nothing nested beneath it. + await page.route(/\/api\/v1\/ai-sessions\/[^/]+\/chat$/, async (route) => { + if (route.request().method() !== 'POST') { + await route.fallback() + return + } + chatCallCount += 1 + if (chatCallCount === 1) { + await route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify({ + content: 'Initial diagnostic plan. Please answer the questions in the task lane.', + suggested_flows: [], + fork: null, + actions: [], + questions: [ + { text: 'Has the user recently changed their password?' }, + { text: 'Is the lockout happening at a consistent time of day?' }, + ], + }), + }) + return + } + await route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify({ + content: 'Got it — based on your answer, here is what to check next.', + suggested_flows: [], + fork: null, + actions: [], + questions: [], + }), + }) + }) + + // Drive the prefill flow exactly the way the dashboard does. The textarea + // is keyed by its placeholder copy on QuickStartPage. + await page.goto('/') + const prefillBox = page.getByPlaceholder(/Describe the issue/i) + await expect(prefillBox).toBeVisible({ timeout: 10_000 }) + await prefillBox.fill('User locked out of AD weekly') + await prefillBox.press('Enter') + + // After the prefill submits we land on /pilot and the first stubbed AI + // turn surfaces the task-lane question text. + await expect(page).toHaveURL(/\/pilot/) + await expect( + page.getByText('Has the user recently changed their password?'), + ).toBeVisible({ timeout: 15_000 }) + + // Answer the first question. UI flow: click "Answer" to open the + // textarea, type, click the inline "Answer" button to mark done. + await page.getByRole('button', { name: /^Answer$/ }).first().click() + await page.getByPlaceholder('Type your answer...').fill('No, password is months old') + await page.getByRole('button', { name: /^Answer$/ }).first().click() + + // Submit the partial response. Pre-fix: the response was silently dropped + // here because `currentChatRef.current` still held the mount-time value. + await page.getByRole('button', { name: /Send 1 of 2 Responses/ }).click() + + // Bug repro: the assistant message must render. Pre-fix this assertion + // fails because `handleTaskSubmit` early-returns at the + // `currentChatRef.current !== sentForChatId` guard. + await expect( + page.getByText('Got it — based on your answer, here is what to check next.'), + ).toBeVisible({ timeout: 15_000 }) + + // Both chat calls must have actually happened. + expect(chatCallCount).toBe(2) + }) +}) diff --git a/frontend/src/pages/AssistantChatPage.tsx b/frontend/src/pages/AssistantChatPage.tsx index 4b99ec4f..2cc8aa69 100644 --- a/frontend/src/pages/AssistantChatPage.tsx +++ b/frontend/src/pages/AssistantChatPage.tsx @@ -255,6 +255,12 @@ export default function AssistantChatPage() { } setChats(prev => [chatItem, ...prev]) setActiveChatId(session.session_id) + // Keep the in-flight guard ref in sync. Without this, currentChatRef + // stays at its mount-time value (often a stale id from sessionStorage + // or null), so subsequent handleSend / handleTaskSubmit calls bail at + // their `currentChatRef.current !== sentForChatId` check and the AI + // response is silently dropped. + currentChatRef.current = session.session_id setMessages([{ role: 'user', content: prefill }]) setLoading(true) -- 2.49.1 From 1559feb7599feea54e06b8fdeeda7bdf07ae39a0 Mon Sep 17 00:00:00 2001 From: Michael Chihlas Date: Sat, 25 Apr 2026 23:44:11 -0400 Subject: [PATCH 2/4] docs(ai): track currentChatRef silent-swallow follow-up in TODO The guard pattern that masked the prefill-ref bug fixed in PR #153 is applied across handleSend, handleTaskSubmit, selectChat, refreshFacts, refreshActiveFix, and refreshPreview. Worth either logging the mismatch path or distinguishing expected-stale from unexpected-stale so the next instance of this class of bug surfaces instead of hiding. Co-Authored-By: Claude Opus 4.7 --- .ai/TODO.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.ai/TODO.md b/.ai/TODO.md index eb13b867..b7d5270f 100644 --- a/.ai/TODO.md +++ b/.ai/TODO.md @@ -14,3 +14,4 @@ - [ ] **Add `data-testid` attributes to e2e-critical interactive elements.** PR #152 fixed five Playwright tests by chasing UI-text changes (`Sessions` → `Session History`, `Account Settings` → `Account Management`, `/assistant` → `/pilot`, "Flow Sessions" tab, Resume button on session cards). Each was a one-line selector update, but every UI churn re-breaks them. Adding stable `data-testid` attributes on the targeted elements (page heading wrappers, tab nav, primary action buttons) and switching tests to `getByTestId` would make these immune to copy/route renames. Scope it small — start with `SessionHistoryPage` heading, the AI/Flow Sessions tab buttons, the per-session `Resume` button, and the command-palette FlowPilot option. - [ ] **Per-test transactional rollback in `test_db` fixture.** Bigger engineering than xdist (which we already shipped). Instead of `DROP SCHEMA public CASCADE` per test, wrap each test in a savepoint and rollback at teardown. ~30-40% additional speedup on top of xdist for test-DB-heavy tests. Real refactor; only worth it if the suite gets significantly larger or runs more frequently. - [ ] **Consider `pytest-testmon` for PR-time test selection.** Tracks which tests touched which source files and only re-runs affected ones. Best for small PRs touching ~few files. Adds cache-invalidation complexity; only worth it if the suite stays painfully long even after xdist. +- [ ] **AssistantChatPage `currentChatRef` guard is a silent return** — `handleSend`, `handleTaskSubmit`, `selectChat`, `refreshFacts`, `refreshActiveFix`, and `refreshPreview` all bail with `if (currentChatRef.current !== sentForChatId) return` when stale. This is by design for chat switching, but it also silently masked the prefill-ref bug fixed in PR #153 — the user just saw "no AI response" with no log, no toast, no Sentry event. Either (a) log a `console.warn`/Sentry breadcrumb on the mismatch path so future drift is visible, or (b) split "expected stale" (chat switch) from "unexpected stale" (ref never updated) so only the latter alerts. Pair with an audit of every `currentChatRef.current = ...` assignment vs every `setActiveChatId(...)` call to make sure they're paired everywhere. -- 2.49.1 From 43eed720d93c95d98feb24967b9052da5372965b Mon Sep 17 00:00:00 2001 From: Michael Chihlas Date: Sun, 26 Apr 2026 00:30:50 -0400 Subject: [PATCH 3/4] docs(ai): close out PR #150, set PR #153 as active task MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - CURRENT_TASK.md rolled forward — the CI-recovery task is complete (PR #150 merged as 87bb20b; backend gate is in required checks). Active task is now landing PR #153. - HANDOFF.md rewritten — new resume point is watching CI on the rebased SHA 1559feb and merging when all three checks are green. - SESSION_LOG.md gains a 2026-04-26 entry covering the prefill bug diagnosis, fix, regression test, and the rebase off post-#150 main. Co-Authored-By: Claude Opus 4.7 --- .ai/CURRENT_TASK.md | 39 +++++++++++++++++------------ .ai/HANDOFF.md | 61 +++++++++++++++++++-------------------------- .ai/SESSION_LOG.md | 12 +++++++++ 3 files changed, 61 insertions(+), 51 deletions(-) diff --git a/.ai/CURRENT_TASK.md b/.ai/CURRENT_TASK.md index 41635201..6e9db75e 100644 --- a/.ai/CURRENT_TASK.md +++ b/.ai/CURRENT_TASK.md @@ -1,21 +1,28 @@ # CURRENT_TASK.md +**Task:** Land PR #153 — fix the AssistantChatPage prefill `currentChatRef` bug that silently dropped AI follow-up responses in the task lane. + +**Status:** in-progress (CI running on rebased branch) + +**Definition of Done:** +- [ ] PR #153 (`fix/tasklane-prefill-ref`) CI green on the rebased SHA `1559feb`. Backend, frontend, and e2e all pass. +- [ ] PR #153 merged into `main`. +- [ ] User-visible verification: from the dashboard, type a prefill, answer a subset of task-lane questions, click *Send N of M Responses* — AI follow-up renders. + +**Assumptions:** +- Rebasing onto post-#150 main pulls in the workflow fixes (no host-port mapping for postgres, no upload-artifact step), so the prior CI failures on this PR are resolved structurally. +- The new e2e regression spec (`frontend/e2e/assistant-chat-prefill.spec.ts`) doesn't depend on Anthropic — it uses `page.route` to stub `/ai-sessions/*/chat` deterministically, so it should be CI-stable. + +**Out of scope (deferred to TODO):** +- Hardening the `currentChatRef.current !== sentForChatId` silent-return pattern across `handleSend`, `handleTaskSubmit`, `selectChat`, `refreshFacts`, `refreshActiveFix`, `refreshPreview`. PR #153 fixes the specific symptom; the broader audit is its own task in `TODO.md`. +- Promoting `CI / e2e (pull_request)` to required on `main`. Holding off until two consecutive green PR runs (PR #150 was one; PR #153's run will be the second if it passes). + +## Previous task — closed out + **Task:** Land consolidated CI-recovery PR #150 and lock reliable CI gates on `main`. -**Status:** in-progress +**Status:** complete (2026-04-26). -**Definition of Done:** -- [ ] PR #150 (`fix/ci-workflow-config`) merged. `CI / backend (pull_request)`, `CI / frontend (pull_request)`, and `CI / e2e (pull_request)` show success before merge. -- [ ] `CI / backend (pull_request)` added to required status checks on `main` in Gitea branch protection (frontend is already required). -- [ ] Optional: `CI / e2e (pull_request)` confirmed clean across at least one PR run and added to required checks. - -**Assumptions:** -- The 8-core homelab Gitea Actions runner can support `-n auto` (8 xdist workers). If memory pressure shows up in CI, drop to `-n 4`. -- pytest-cov's xdist support continues to handle the coverage merge and `--cov-fail-under=50` check correctly. -- The per-worker DB creation in `conftest.py` is idempotent and racing workers on first import won't all try to CREATE DATABASE simultaneously — postgres serializes that, but if it surfaces issues, wrap with an advisory lock. - -**Out of scope:** -- Frontend lint warnings (23 remain after #149). -- The 23 react-hooks/exhaustive-deps warnings. -- RLS test suite (gated behind `RUN_RLS_TESTS=1`; not in default CI). -- Per-test transactional rollback (would shave another 30-40% off backend time but is a much bigger refactor — capture in TODO if interested). +- PR #150 merged as commit `87bb20b` on `main`. Backend, frontend, and e2e all green on the merge SHA. +- `CI / backend (pull_request)` added to required status checks on `main` (alongside the pre-existing `CI / frontend (pull_request)` requirement). +- `CI / e2e (pull_request)` left as not-required pending one more green PR run. diff --git a/.ai/HANDOFF.md b/.ai/HANDOFF.md index 54367b03..b46e91e7 100644 --- a/.ai/HANDOFF.md +++ b/.ai/HANDOFF.md @@ -2,63 +2,54 @@ # HANDOFF.md -**Last updated:** 2026-04-25 16:41 EDT +**Last updated:** 2026-04-26 03:50 EDT -**Active task:** Land PR #150 (the consolidated CI-recovery PR), then enable backend and eventually e2e gates on `main`. See [CURRENT_TASK.md](CURRENT_TASK.md). +**Active task:** Ship PR #153 — the AssistantChatPage prefill `currentChatRef` bug fix. See [CURRENT_TASK.md](CURRENT_TASK.md). -**Branch:** `fix/ci-workflow-config` -> PR #150. PRs #151 and #152 were closed and consolidated into this branch. +**Branch:** `fix/tasklane-prefill-ref` → PR #153. ## Current resume point -Latest PR #150 CI had backend and frontend green, but `CI / e2e (pull_request)` failed on the resume smoke test. +PR #150 is merged. Branch `fix/tasklane-prefill-ref` has been rebased onto the new `main` (HEAD `1559feb`) and force-pushed; that pulled in the workflow fixes from #150, so the prior backend port-conflict and frontend upload-artifact failures are gone structurally. -The failure was not product behavior. Playwright was using: +CI was kicked off automatically on the rebased SHA. Watch: -```ts -page.locator('.bg-card').filter({ hasText: tree.name }).first() -``` +- `CI / backend (pull_request)` — should pass now that postgres uses the docker-network DNS name `postgres:5432` (no host port race). +- `CI / frontend (pull_request)` — should pass now that the `actions/upload-artifact@v4` step is removed. +- `CI / e2e (pull_request)` — runs on its own postgres + builds frontend inline. Includes the new `e2e/assistant-chat-prefill.spec.ts` regression test, which stubs `/ai-sessions/*/chat` with `page.route` and is independent of Anthropic. -On the session history page this matched the tree filter `` before the intended session card. -- 2.49.1 From 11fe32f4c6d60d479fdea665bb60fc17e16b0491 Mon Sep 17 00:00:00 2001 From: Michael Chihlas Date: Sun, 26 Apr 2026 00:51:39 -0400 Subject: [PATCH 4/4] fix(ci): set stub ANTHROPIC_API_KEY for e2e job so AI-gated endpoints respond MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit POST /api/v1/ai-sessions and friends call _require_ai_enabled(), which returns 503 when no provider key is set. The new prefill-handoff regression test (e2e/assistant-chat-prefill.spec.ts) drives the dashboard prefill flow, which has to create a chat session before its page.route stub on /chat can fire — so without a key, session creation 503s and the test never sees the task lane. The Playwright stub intercepts /chat in the browser, so the backend never actually contacts Anthropic — but the AI-enabled gate still needs to pass. A stub value is enough. Co-Authored-By: Claude Opus 4.7 --- .gitea/workflows/ci.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index d3f2f8c9..d87bdacc 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -161,6 +161,12 @@ jobs: PLAYWRIGHT_SECRET_KEY: ci-playwright-secret-key PLAYWRIGHT_TEST_EMAIL: teamadmin@resolutionflow.example.com PLAYWRIGHT_TEST_PASSWORD: TestPass123! + # AI-touching endpoints (POST /ai-sessions, /chat, /respond, etc.) are + # gated by `_require_ai_enabled()`, which returns 503 when no provider + # key is set. Tests that exercise those flows stub the AI calls in the + # browser via `page.route`, so the backend never actually contacts + # Anthropic — but the gate still has to pass. A stub value is enough. + ANTHROPIC_API_KEY: ci-stub-key-not-used-by-tests steps: - uses: actions/checkout@v4 -- 2.49.1