From 7d6e1a876c088941a8fdd9e1ffef62f1cca086fc Mon Sep 17 00:00:00 2001
From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com>
Date: Thu, 25 Jun 2026 11:30:46 -0700
Subject: [PATCH 01/12] feat(web): show context-window usage in Ask Sourcebot

Resolve each model's context window from the models.dev catalog (already
fetched by the setup wizard) and bake it into chat message metadata. The
Details card now renders a usage gauge from the latest step's input tokens.
Models with no catalog entry (openai-compatible/self-hosted) fall back to
the existing raw token count.
---
 .../web/src/app/api/(server)/ee/chat/route.ts |   7 ++
 packages/web/src/ee/features/chat/agent.ts    |   4 +
 .../components/chatThread/detailsCard.tsx     |  43 ++++++-
 .../chat/modelContextWindow.server.ts         | 109 ++++++++++++++++++
 .../features/chat/modelContextWindow.test.ts  |  98 ++++++++++++++++
 packages/web/src/features/chat/types.ts       |   5 +
 6 files changed, 265 insertions(+), 1 deletion(-)
 create mode 100644 packages/web/src/features/chat/modelContextWindow.server.ts
 create mode 100644 packages/web/src/features/chat/modelContextWindow.test.ts
diff --git a/packages/web/src/app/api/(server)/ee/chat/route.ts b/packages/web/src/app/api/(server)/ee/chat/route.ts
index 0f20ee8e3..7d16acc96 100644
--- a/packages/web/src/app/api/(server)/ee/chat/route.ts
+++ b/packages/web/src/app/api/(server)/ee/chat/route.ts
@@ -6,6 +6,7 @@ import { additionalChatRequestParamsSchema } from "@/features/chat/types";
 import { getLanguageModelKey } from "@/features/chat/utils";
 import { checkAskEntitlement, getConfiguredLanguageModels, isOwnerOfChat, updateChatMessages } from "@/features/chat/utils.server";
 import { getAISDKLanguageModelAndOptions } from "@/features/chat/llm.server";
+import { resolveContextWindow } from "@/features/chat/modelContextWindow.server";
 import { apiHandler } from "@/lib/apiHandler";
 import { ErrorCode } from "@/lib/errorCodes";
 import { captureEvent } from "@/lib/posthog";
@@ -89,6 +90,11 @@ export const POST = apiHandler(async (req: NextRequest) => {
 
             const { model, providerOptions, temperature } = await getAISDKLanguageModelAndOptions(languageModelConfig);
 
+            // Total context window for the selected model, used as the
+            // denominator for the UI's context-usage gauge. Undefined when
+            // unknown (e.g. openai-compatible / self-hosted models).
+            const contextWindow = await resolveContextWindow(languageModelConfig);
+
             // No-op for non-Anthropic providers / when caching is disabled, so
             // it never perturbs other providers' requests.
             const promptCacheStrategy = getPromptCacheStrategy(
@@ -139,6 +145,7 @@ export const POST = apiHandler(async (req: NextRequest) => {
                 disabledMcpServerIds,
                 model,
                 modelName: languageModelConfig.displayName ?? languageModelConfig.model,
+                contextWindow,
                 promptCacheStrategy,
                 modelProviderOptions: providerOptions,
                 modelTemperature: temperature,
diff --git a/packages/web/src/ee/features/chat/agent.ts b/packages/web/src/ee/features/chat/agent.ts
index d2f3a4761..f376c7c6d 100644
--- a/packages/web/src/ee/features/chat/agent.ts
+++ b/packages/web/src/ee/features/chat/agent.ts
@@ -54,6 +54,8 @@ interface CreateMessageStreamResponseProps {
     disabledMcpServerIds?: string[];
     model: AISDKLanguageModelV3;
     modelName: string;
+    // Total context window of the model in tokens, or undefined when unknown.
+    contextWindow?: number;
     promptCacheStrategy: PromptCacheStrategy;
     onFinish: UIMessageStreamOnFinishCallback<SBChatMessage>;
     onError: (error: unknown) => string;
@@ -73,6 +75,7 @@ export const createMessageStream = async ({
     disabledMcpServerIds,
     model,
     modelName,
+    contextWindow,
     promptCacheStrategy,
     modelProviderOptions,
     modelTemperature,
@@ -279,6 +282,7 @@ export const createMessageStream = async ({
                     // phases so earlier phases' steps are preserved in order.
                     stepTokenUsage: [...(priorMetadata?.stepTokenUsage ?? []), ...stepTokenUsage],
                     modelName,
+                    contextWindow,
                     traceId,
                 }
             });
diff --git a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
index e95af69d4..a2161de70 100644
--- a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
+++ b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
@@ -9,7 +9,7 @@ import useCaptureEvent from '@/hooks/useCaptureEvent';
 import { cn, getShortenedNumberDisplayString } from '@/lib/utils';
 import isEqual from "fast-deep-equal/react";
 import { useStickToBottom } from 'use-stick-to-bottom';
-import { Brain, ChevronDown, ChevronRight, Clock, InfoIcon, Loader2, ScanSearchIcon, ShieldQuestion, Wrench, Zap } from 'lucide-react';
+import { Brain, ChevronDown, ChevronRight, Clock, Gauge, InfoIcon, Loader2, ScanSearchIcon, ShieldQuestion, Wrench, Zap } from 'lucide-react';
 import { memo, ReactNode, useCallback, useEffect, useMemo, useState } from 'react';
 import { usePrevious } from '@uidotdev/usehooks';
 import { SBChatMessageMetadata, SBChatMessagePart, StepTokenUsageEntry } from '@/features/chat/types';
@@ -86,6 +86,20 @@ const DetailsCardComponent = ({
         ? Math.round((cacheReadTokens / inputTokens) * 100)
         : 0;
 
+    // Context-window usage gauge. "In use" is the input the model saw on its
+    // most recent step — i.e. the full accumulated prompt occupying the window
+    // right now — not the cumulative totalInputTokens (a billing sum). The
+    // gauge is shown only when the model's window is known (resolved from the
+    // models.dev catalog); unknown windows degrade to the raw token count.
+    const stepTokenUsage = metadata?.stepTokenUsage;
+    const currentContextTokens = stepTokenUsage && stepTokenUsage.length > 0
+        ? stepTokenUsage[stepTokenUsage.length - 1].inputTokens
+        : undefined;
+    const contextWindow = metadata?.contextWindow;
+    const contextUsagePercent = currentContextTokens !== undefined && contextWindow !== undefined && contextWindow > 0
+        ? Math.min(100, Math.round((currentContextTokens / contextWindow) * 100))
+        : undefined;
+
     const handleExpandedChanged = useCallback((next: boolean) => {
         captureEvent('wa_chat_details_card_toggled', { chatId, isExpanded: next });
         onExpandedChanged(next);
@@ -193,6 +207,33 @@ const DetailsCardComponent = ({
                                                 )}
                                             </div>
                                         )}
+                                        {contextUsagePercent !== undefined && currentContextTokens !== undefined && contextWindow !== undefined && (
+                                            <Tooltip>
+                                                <TooltipTrigger asChild>
+                                                    <div className="flex items-center gap-1.5 text-xs cursor-help">
+                                                        <Gauge className="w-3 h-3 flex-shrink-0" />
+                                                        <div className="h-1.5 w-12 rounded-full bg-muted overflow-hidden">
+                                                            <div
+                                                                className={cn("h-full rounded-full", {
+                                                                    "bg-destructive": contextUsagePercent >= 90,
+                                                                    "bg-yellow-500": contextUsagePercent >= 75 && contextUsagePercent < 90,
+                                                                    "bg-foreground": contextUsagePercent < 75,
+                                                                })}
+                                                                style={{ width: `${contextUsagePercent}%` }}
+                                                            />
+                                                        </div>
+                                                        <span>
+                                                            {getShortenedNumberDisplayString(currentContextTokens, 0)} / {getShortenedNumberDisplayString(contextWindow, 0)} ({contextUsagePercent}%)
+                                                        </span>
+                                                    </div>
+                                                </TooltipTrigger>
+                                                <TooltipContent side="bottom">
+                                                    <div className="max-w-xs text-xs">
+                                                        The most recent step&apos;s prompt used {currentContextTokens.toLocaleString()} of the model&apos;s {contextWindow.toLocaleString()}-token context window ({contextUsagePercent}%).
+                                                    </div>
+                                                </TooltipContent>
+                                            </Tooltip>
+                                        )}
                                         {metadata?.totalResponseTimeMs && (
                                             <div className="flex items-center text-xs">
                                                 <Clock className="w-3 h-3 mr-1 flex-shrink-0" />
diff --git a/packages/web/src/features/chat/modelContextWindow.server.ts b/packages/web/src/features/chat/modelContextWindow.server.ts
new file mode 100644
index 000000000..d15336e4c
--- /dev/null
+++ b/packages/web/src/features/chat/modelContextWindow.server.ts
@@ -0,0 +1,109 @@
+import 'server-only';
+
+import { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type';
+import { createLogger } from '@sourcebot/shared';
+
+const logger = createLogger('model-context-window');
+
+// The same public, unauthenticated catalog the setup wizard already consumes
+// (see packages/setupWizard/src/models.ts). Each model entry exposes a
+// `limit.context` field holding the total context window in tokens.
+const MODELS_DEV_API_URL = 'https://models.dev/api.json';
+const FETCH_TIMEOUT_MS = 8000;
+// Re-fetch the (~2.4 MB) catalog at most once per this interval per server
+// process. New models trickle in daily; a stale window for a few hours is fine.
+const CATALOG_TTL_MS = 6 * 60 * 60 * 1000;
+
+// Sourcebot provider id -> models.dev top-level catalog key. Only providers
+// whose Sourcebot id differs from the models.dev id need an entry; everything
+// else (anthropic, openai, azure, amazon-bedrock, mistral, deepseek, xai,
+// openrouter, google-vertex, google-vertex-anthropic) matches 1:1.
+const PROVIDER_ID_OVERRIDES: Record<string, string> = {
+    'google-generative-ai': 'google',
+};
+
+type ModelsDevModel = {
+    id: string;
+    limit?: {
+        context?: number;
+        output?: number;
+    };
+};
+
+type ModelsDevProvider = {
+    id: string;
+    models?: Record<string, ModelsDevModel>;
+};
+
+export type ModelsDevCatalog = Record<string, ModelsDevProvider>;
+
+let catalogPromise: Promise<ModelsDevCatalog | null> | null = null;
+let catalogFetchedAt = 0;
+
+const fetchCatalog = async (): Promise<ModelsDevCatalog | null> => {
+    try {
+        const response = await fetch(MODELS_DEV_API_URL, {
+            signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
+        });
+        if (!response.ok) {
+            logger.warn(`Failed to fetch models.dev catalog: ${response.status} ${response.statusText}`);
+            return null;
+        }
+        return await response.json() as ModelsDevCatalog;
+    } catch (error) {
+        logger.warn(`Failed to fetch models.dev catalog: ${error}`);
+        return null;
+    }
+};
+
+const loadCatalog = async (): Promise<ModelsDevCatalog | null> => {
+    const now = Date.now();
+    if (!catalogPromise || now - catalogFetchedAt > CATALOG_TTL_MS) {
+        catalogFetchedAt = now;
+        catalogPromise = fetchCatalog().then((catalog) => {
+            // Don't memoize failures — let the next caller retry instead of
+            // being stuck with a null catalog until the TTL expires.
+            if (!catalog) {
+                catalogPromise = null;
+            }
+            return catalog;
+        });
+    }
+    return catalogPromise;
+};
+
+/**
+ * Pure lookup of a model's context window in a models.dev catalog. Separated
+ * from the network fetch so it can be unit-tested directly.
+ *
+ * Returns the total context window (input + output share it) in tokens, or
+ * `undefined` when the model isn't catalogued or has no usable window.
+ */
+export const lookupContextWindow = (
+    catalog: ModelsDevCatalog | null,
+    config: Pick<LanguageModel, 'provider' | 'model'>,
+): number | undefined => {
+    if (!catalog) {
+        return undefined;
+    }
+    const providerId = PROVIDER_ID_OVERRIDES[config.provider] ?? config.provider;
+    const context = catalog[providerId]?.models?.[config.model]?.limit?.context;
+    // `limit` is schema-optional, and models.dev reports a 0 context window for
+    // non-text models (image/audio/etc.). Treat both as "unknown" so the UI
+    // gracefully omits the gauge rather than rendering a bogus denominator.
+    return typeof context === 'number' && context > 0 ? context : undefined;
+};
+
+/**
+ * Resolves the context window (in tokens) for a configured language model from
+ * the models.dev catalog. Returns `undefined` when unknown — e.g. arbitrary
+ * openai-compatible / self-hosted ids, provider/model ids that don't match the
+ * catalog's keys (bedrock ARNs, vertex `@`-suffixed ids, azure deployments), or
+ * when models.dev is unreachable. Never throws into the request path.
+ */
+export const resolveContextWindow = async (
+    config: Pick<LanguageModel, 'provider' | 'model'>,
+): Promise<number | undefined> => {
+    const catalog = await loadCatalog();
+    return lookupContextWindow(catalog, config);
+};
diff --git a/packages/web/src/features/chat/modelContextWindow.test.ts b/packages/web/src/features/chat/modelContextWindow.test.ts
new file mode 100644
index 000000000..e8adc3517
--- /dev/null
+++ b/packages/web/src/features/chat/modelContextWindow.test.ts
@@ -0,0 +1,98 @@
+import { afterEach, describe, expect, test, vi } from 'vitest';
+import type { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type';
+
+vi.mock('server-only', () => ({ default: vi.fn() }));
+
+vi.mock('@sourcebot/shared', () => ({
+    createLogger: () => ({
+        info: vi.fn(),
+        warn: vi.fn(),
+        error: vi.fn(),
+        debug: vi.fn(),
+    }),
+}));
+
+import { lookupContextWindow, resolveContextWindow, type ModelsDevCatalog } from './modelContextWindow.server';
+
+const catalog: ModelsDevCatalog = {
+    anthropic: {
+        id: 'anthropic',
+        models: {
+            'claude-sonnet-4-5': { id: 'claude-sonnet-4-5', limit: { context: 200000, output: 64000 } },
+        },
+    },
+    // models.dev keys Gemini under 'google', whereas Sourcebot's provider id is
+    // 'google-generative-ai' — exercises PROVIDER_ID_OVERRIDES.
+    google: {
+        id: 'google',
+        models: {
+            'gemini-2.5-pro': { id: 'gemini-2.5-pro', limit: { context: 1048576, output: 65536 } },
+        },
+    },
+    openai: {
+        id: 'openai',
+        models: {
+            'gpt-4.1': { id: 'gpt-4.1', limit: { context: 1047576 } },
+            // Non-text model: models.dev reports a 0 context window.
+            'gpt-image-1': { id: 'gpt-image-1', limit: { context: 0, output: 0 } },
+            // Catalogued model with no `limit` object at all.
+            'no-limit-model': { id: 'no-limit-model' },
+        },
+    },
+};
+
+const model = (provider: string, modelId: string) =>
+    ({ provider, model: modelId }) as Pick<LanguageModel, 'provider' | 'model'>;
+
+describe('lookupContextWindow', () => {
+    test('returns the context window for a direct provider/model hit', () => {
+        expect(lookupContextWindow(catalog, model('anthropic', 'claude-sonnet-4-5'))).toBe(200000);
+        expect(lookupContextWindow(catalog, model('openai', 'gpt-4.1'))).toBe(1047576);
+    });
+
+    test('maps google-generative-ai to the catalog\'s google key', () => {
+        expect(lookupContextWindow(catalog, model('google-generative-ai', 'gemini-2.5-pro'))).toBe(1048576);
+    });
+
+    test('returns undefined for an uncatalogued provider', () => {
+        expect(lookupContextWindow(catalog, model('mistral', 'mistral-large-latest'))).toBeUndefined();
+    });
+
+    test('returns undefined for an uncatalogued model id (e.g. openai-compatible / self-hosted)', () => {
+        expect(lookupContextWindow(catalog, model('openai-compatible', 'my-local-model'))).toBeUndefined();
+        expect(lookupContextWindow(catalog, model('anthropic', 'claude-unknown'))).toBeUndefined();
+    });
+
+    test('treats a 0 context window (non-text models) as unknown', () => {
+        expect(lookupContextWindow(catalog, model('openai', 'gpt-image-1'))).toBeUndefined();
+    });
+
+    test('treats a missing limit object as unknown', () => {
+        expect(lookupContextWindow(catalog, model('openai', 'no-limit-model'))).toBeUndefined();
+    });
+
+    test('returns undefined when the catalog is null (fetch failed / unreachable)', () => {
+        expect(lookupContextWindow(null, model('anthropic', 'claude-sonnet-4-5'))).toBeUndefined();
+    });
+});
+
+describe('resolveContextWindow', () => {
+    afterEach(() => {
+        vi.unstubAllGlobals();
+    });
+
+    test('fetches the catalog once and resolves windows (incl. provider mapping)', async () => {
+        const fetchMock = vi.fn(async () => ({
+            ok: true,
+            json: async () => catalog,
+        }) as unknown as Response);
+        vi.stubGlobal('fetch', fetchMock);
+
+        expect(await resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBe(200000);
+        // Subsequent lookups reuse the cached catalog rather than refetching.
+        expect(await resolveContextWindow(model('google-generative-ai', 'gemini-2.5-pro'))).toBe(1048576);
+        expect(await resolveContextWindow(model('openai-compatible', 'my-local-model'))).toBeUndefined();
+
+        expect(fetchMock).toHaveBeenCalledTimes(1);
+    });
+});
diff --git a/packages/web/src/features/chat/types.ts b/packages/web/src/features/chat/types.ts
index 38a737a09..ca01d9d22 100644
--- a/packages/web/src/features/chat/types.ts
+++ b/packages/web/src/features/chat/types.ts
@@ -59,6 +59,11 @@ export const sbChatMessageMetadataSchema = z.object({
     totalCacheReadTokens: z.number().optional(),
     totalCacheWriteTokens: z.number().optional(),
     totalResponseTimeMs: z.number().optional(),
+    // Total context window of the model used for this turn, in tokens, resolved
+    // from the models.dev catalog. Undefined when the window is unknown (e.g.
+    // openai-compatible / self-hosted ids, uncatalogued models). Baked into the
+    // message so the gauge denominator stays stable across catalog changes.
+    contextWindow: z.number().optional(),
     feedback: z.array(z.object({
         type: z.enum(['like', 'dislike']),
         timestamp: z.string(), // ISO date string

From af5de37c067d0a707888ee79e83cb6b28f1657d4 Mon Sep 17 00:00:00 2001
From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com>
Date: Thu, 25 Jun 2026 12:26:55 -0700
Subject: [PATCH 02/12] fix(web): resolve contextWindow in the programmatic
 askCodebase path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

createMessageStream's MCP/programmatic caller omitted contextWindow, so
chats created via ask_codebase rendered the Details card without a usage
gauge even for catalogued models — unlike the same chat created from the
web API. Resolve it from the already-available languageModelConfig so the
gauge is deterministic per model, not per entry point.
---
 packages/web/src/ee/features/mcp/askCodebase.ts | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/packages/web/src/ee/features/mcp/askCodebase.ts b/packages/web/src/ee/features/mcp/askCodebase.ts
index 4b7cfb7b0..e2b234037 100644
--- a/packages/web/src/ee/features/mcp/askCodebase.ts
+++ b/packages/web/src/ee/features/mcp/askCodebase.ts
@@ -2,6 +2,7 @@ import { sew } from "@/middleware/sew";
 import { getConfiguredLanguageModels, updateChatMessages, checkAskEntitlement } from "@/features/chat/utils.server";
 import { generateChatNameFromMessage } from "@/ee/features/chat/llm.server";
 import { getAISDKLanguageModelAndOptions } from "@/features/chat/llm.server";
+import { resolveContextWindow } from "@/features/chat/modelContextWindow.server";
 import { LanguageModelInfo, SBChatMessage, SearchScope } from "@/features/chat/types";
 import { convertLLMOutputToPortableMarkdown, getAnswerPartFromAssistantMessage, getLanguageModelKey } from "@/features/chat/utils";
 import { ErrorCode } from "@/lib/errorCodes";
@@ -84,6 +85,7 @@ export const askCodebase = (params: AskCodebaseParams): Promise<AskCodebaseResul
 
             const { model, providerOptions, temperature } = await getAISDKLanguageModelAndOptions(languageModelConfig);
             const modelName = languageModelConfig.displayName ?? languageModelConfig.model;
+            const contextWindow = await resolveContextWindow(languageModelConfig);
 
             // No-op for non-Anthropic providers / when caching is disabled.
             const promptCacheStrategy = getPromptCacheStrategy(
@@ -182,6 +184,7 @@ export const askCodebase = (params: AskCodebaseParams): Promise<AskCodebaseResul
                 prisma,
                 model,
                 modelName,
+                contextWindow,
                 promptCacheStrategy,
                 modelProviderOptions: providerOptions,
                 modelTemperature: temperature,

From ae923f2f82687af910a9a5b038473192db0e869c Mon Sep 17 00:00:00 2001
From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com>
Date: Thu, 25 Jun 2026 14:56:19 -0700
Subject: [PATCH 03/12] feat(web): render context-window usage as a colored
 ring gauge
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the horizontal bar with a circular ring showing the percentage
inside and the used / total token counts beside it. The arc and percentage
are colored by usage — green below 70%, yellow from 70%, red from 90%.
---
 .../components/chatThread/detailsCard.tsx     | 91 +++++++++++++++----
 1 file changed, 75 insertions(+), 16 deletions(-)

diff --git a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
index a2161de70..57f8352fd 100644
--- a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
+++ b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
@@ -9,7 +9,7 @@ import useCaptureEvent from '@/hooks/useCaptureEvent';
 import { cn, getShortenedNumberDisplayString } from '@/lib/utils';
 import isEqual from "fast-deep-equal/react";
 import { useStickToBottom } from 'use-stick-to-bottom';
-import { Brain, ChevronDown, ChevronRight, Clock, Gauge, InfoIcon, Loader2, ScanSearchIcon, ShieldQuestion, Wrench, Zap } from 'lucide-react';
+import { Brain, ChevronDown, ChevronRight, Clock, InfoIcon, Loader2, ScanSearchIcon, ShieldQuestion, Wrench, Zap } from 'lucide-react';
 import { memo, ReactNode, useCallback, useEffect, useMemo, useState } from 'react';
 import { usePrevious } from '@uidotdev/usehooks';
 import { SBChatMessageMetadata, SBChatMessagePart, StepTokenUsageEntry } from '@/features/chat/types';
@@ -210,21 +210,12 @@ const DetailsCardComponent = ({
                                         {contextUsagePercent !== undefined && currentContextTokens !== undefined && contextWindow !== undefined && (
                                             <Tooltip>
                                                 <TooltipTrigger asChild>
-                                                    <div className="flex items-center gap-1.5 text-xs cursor-help">
-                                                        <Gauge className="w-3 h-3 flex-shrink-0" />
-                                                        <div className="h-1.5 w-12 rounded-full bg-muted overflow-hidden">
-                                                            <div
-                                                                className={cn("h-full rounded-full", {
-                                                                    "bg-destructive": contextUsagePercent >= 90,
-                                                                    "bg-yellow-500": contextUsagePercent >= 75 && contextUsagePercent < 90,
-                                                                    "bg-foreground": contextUsagePercent < 75,
-                                                                })}
-                                                                style={{ width: `${contextUsagePercent}%` }}
-                                                            />
-                                                        </div>
-                                                        <span>
-                                                            {getShortenedNumberDisplayString(currentContextTokens, 0)} / {getShortenedNumberDisplayString(contextWindow, 0)} ({contextUsagePercent}%)
-                                                        </span>
+                                                    <div className="cursor-help">
+                                                        <ContextWindowGauge
+                                                            used={currentContextTokens}
+                                                            total={contextWindow}
+                                                            percent={contextUsagePercent}
+                                                        />
                                                     </div>
                                                 </TooltipTrigger>
                                                 <TooltipContent side="bottom">
@@ -408,6 +399,74 @@ const StepTokenUsage = ({ usage, label = 'step' }: { usage: StepTokenUsageEntry,
     );
 }
 
+
+// Usage thresholds for the context-window gauge. Below `YELLOW` the window has
+// plenty of headroom (green); past `RED` it's nearly full (red).
+const CONTEXT_USAGE_YELLOW_PERCENT = 70;
+const CONTEXT_USAGE_RED_PERCENT = 90;
+
+const getContextUsageColorClass = (percent: number): string => {
+    if (percent >= CONTEXT_USAGE_RED_PERCENT) {
+        return "text-red-500";
+    }
+    if (percent >= CONTEXT_USAGE_YELLOW_PERCENT) {
+        return "text-yellow-500";
+    }
+    return "text-green-500";
+};
+
+// A circular ring showing how much of the model's context window the most
+// recent step occupies, with the percentage inside the ring and the
+// "<used> / <total>" token counts beside it. The progress arc and percentage
+// share a single usage-based color (green/yellow/red) over a neutral track.
+const ContextWindowGauge = ({ used, total, percent }: { used: number, total: number, percent: number }) => {
+    const size = 34;
+    const strokeWidth = 4;
+    const radius = (size - strokeWidth) / 2;
+    const circumference = 2 * Math.PI * radius;
+    const dashOffset = circumference * (1 - Math.min(100, percent) / 100);
+    const colorClass = getContextUsageColorClass(percent);
+
+    return (
+        <div className="flex items-center gap-2">
+            <div className="relative flex-shrink-0" style={{ width: size, height: size }}>
+                <svg width={size} height={size} className="-rotate-90">
+                    {/* Neutral track. */}
+                    <circle
+                        cx={size / 2}
+                        cy={size / 2}
+                        r={radius}
+                        fill="none"
+                        stroke="currentColor"
+                        strokeWidth={strokeWidth}
+                        className="text-muted-foreground/25"
+                    />
+                    {/* Progress arc. */}
+                    <circle
+                        cx={size / 2}
+                        cy={size / 2}
+                        r={radius}
+                        fill="none"
+                        stroke="currentColor"
+                        strokeWidth={strokeWidth}
+                        strokeLinecap="round"
+                        strokeDasharray={circumference}
+                        strokeDashoffset={dashOffset}
+                        className={cn("transition-all duration-300", colorClass)}
+                    />
+                </svg>
+                <span className={cn("absolute inset-0 flex items-center justify-center text-[9px] font-semibold", colorClass)}>
+                    {percent}%
+                </span>
+            </div>
+            <span className="text-sm whitespace-nowrap">
+                <span className="font-semibold text-foreground">{getShortenedNumberDisplayString(used, 0).toUpperCase()}</span>
+                <span className="text-muted-foreground"> / {getShortenedNumberDisplayString(total, 0).toUpperCase()}</span>
+            </span>
+        </div>
+    );
+}
+
 type GuardedToolType =
     | 'tool-read_file'
     | 'tool-grep'

From c72acdab343a5dc048b6b0b21e693416f55c33b5 Mon Sep 17 00:00:00 2001
From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com>
Date: Thu, 25 Jun 2026 15:02:12 -0700
Subject: [PATCH 04/12] feat(web): make the context-window gauge a compact
 inline indicator

Shrink the ring and move the percentage beside it, reading
"<percent>% of <total>" instead of a number-in-ring. The arc and
percentage stay colored by usage (green/yellow/red).
---
 .../components/chatThread/detailsCard.tsx     | 78 +++++++++----------
 1 file changed, 35 insertions(+), 43 deletions(-)

diff --git a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
index 57f8352fd..cd37ffaf6 100644
--- a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
+++ b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
@@ -212,7 +212,6 @@ const DetailsCardComponent = ({
                                                 <TooltipTrigger asChild>
                                                     <div className="cursor-help">
                                                         <ContextWindowGauge
-                                                            used={currentContextTokens}
                                                             total={contextWindow}
                                                             percent={contextUsagePercent}
                                                         />
@@ -415,54 +414,47 @@ const getContextUsageColorClass = (percent: number): string => {
     return "text-green-500";
 };
 
-// A circular ring showing how much of the model's context window the most
-// recent step occupies, with the percentage inside the ring and the
-// "<used> / <total>" token counts beside it. The progress arc and percentage
-// share a single usage-based color (green/yellow/red) over a neutral track.
-const ContextWindowGauge = ({ used, total, percent }: { used: number, total: number, percent: number }) => {
-    const size = 34;
-    const strokeWidth = 4;
+// A compact context-window indicator: a small ring whose arc tracks usage,
+// followed by the usage percentage and the model's total window size
+// ("<percent>% of <total>"). The ring and percentage share a single
+// usage-based color (green/yellow/red) over a neutral track.
+const ContextWindowGauge = ({ total, percent }: { total: number, percent: number }) => {
+    const size = 18;
+    const strokeWidth = 2.5;
     const radius = (size - strokeWidth) / 2;
     const circumference = 2 * Math.PI * radius;
     const dashOffset = circumference * (1 - Math.min(100, percent) / 100);
     const colorClass = getContextUsageColorClass(percent);
 
     return (
-        <div className="flex items-center gap-2">
-            <div className="relative flex-shrink-0" style={{ width: size, height: size }}>
-                <svg width={size} height={size} className="-rotate-90">
-                    {/* Neutral track. */}
-                    <circle
-                        cx={size / 2}
-                        cy={size / 2}
-                        r={radius}
-                        fill="none"
-                        stroke="currentColor"
-                        strokeWidth={strokeWidth}
-                        className="text-muted-foreground/25"
-                    />
-                    {/* Progress arc. */}
-                    <circle
-                        cx={size / 2}
-                        cy={size / 2}
-                        r={radius}
-                        fill="none"
-                        stroke="currentColor"
-                        strokeWidth={strokeWidth}
-                        strokeLinecap="round"
-                        strokeDasharray={circumference}
-                        strokeDashoffset={dashOffset}
-                        className={cn("transition-all duration-300", colorClass)}
-                    />
-                </svg>
-                <span className={cn("absolute inset-0 flex items-center justify-center text-[9px] font-semibold", colorClass)}>
-                    {percent}%
-                </span>
-            </div>
-            <span className="text-sm whitespace-nowrap">
-                <span className="font-semibold text-foreground">{getShortenedNumberDisplayString(used, 0).toUpperCase()}</span>
-                <span className="text-muted-foreground"> / {getShortenedNumberDisplayString(total, 0).toUpperCase()}</span>
-            </span>
+        <div className="flex items-center gap-1.5 text-xs whitespace-nowrap">
+            <svg width={size} height={size} className="-rotate-90 flex-shrink-0">
+                {/* Neutral track. */}
+                <circle
+                    cx={size / 2}
+                    cy={size / 2}
+                    r={radius}
+                    fill="none"
+                    stroke="currentColor"
+                    strokeWidth={strokeWidth}
+                    className="text-muted-foreground/25"
+                />
+                {/* Progress arc. */}
+                <circle
+                    cx={size / 2}
+                    cy={size / 2}
+                    r={radius}
+                    fill="none"
+                    stroke="currentColor"
+                    strokeWidth={strokeWidth}
+                    strokeLinecap="round"
+                    strokeDasharray={circumference}
+                    strokeDashoffset={dashOffset}
+                    className={cn("transition-all duration-300", colorClass)}
+                />
+            </svg>
+            <span className={cn("font-semibold", colorClass)}>{percent}%</span>
+            <span className="text-muted-foreground">of {getShortenedNumberDisplayString(total, 0).toUpperCase()}</span>
         </div>
     );
 }

From 1b5c6c5cb5b55f6b8d5a2908c29f8e169958daf6 Mon Sep 17 00:00:00 2001
From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com>
Date: Thu, 25 Jun 2026 15:09:35 -0700
Subject: [PATCH 05/12] feat(web): shrink context gauge ring, gray track,
 desaturated green

Reduce the ring to 14px, switch the track to a solid palette gray (the
theme tokens lack an alpha channel, so /opacity on them was ignored and
the track rendered at full brightness), and use a desaturated sage green
for the in-range percentage.
---
 .../chat/components/chatThread/detailsCard.tsx      | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
index cd37ffaf6..d879598bf 100644
--- a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
+++ b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
@@ -411,7 +411,8 @@ const getContextUsageColorClass = (percent: number): string => {
     if (percent >= CONTEXT_USAGE_YELLOW_PERCENT) {
         return "text-yellow-500";
     }
-    return "text-green-500";
+    // Desaturated sage green rather than the neon green-500.
+    return "text-[#6cb38f]";
 };
 
 // A compact context-window indicator: a small ring whose arc tracks usage,
@@ -419,8 +420,8 @@ const getContextUsageColorClass = (percent: number): string => {
 // ("<percent>% of <total>"). The ring and percentage share a single
 // usage-based color (green/yellow/red) over a neutral track.
 const ContextWindowGauge = ({ total, percent }: { total: number, percent: number }) => {
-    const size = 18;
-    const strokeWidth = 2.5;
+    const size = 14;
+    const strokeWidth = 2;
     const radius = (size - strokeWidth) / 2;
     const circumference = 2 * Math.PI * radius;
     const dashOffset = circumference * (1 - Math.min(100, percent) / 100);
@@ -429,7 +430,9 @@ const ContextWindowGauge = ({ total, percent }: { total: number, percent: number
     return (
         <div className="flex items-center gap-1.5 text-xs whitespace-nowrap">
             <svg width={size} height={size} className="-rotate-90 flex-shrink-0">
-                {/* Neutral track. */}
+                {/* Neutral gray track. (Theme tokens here are defined without an
+                    alpha channel, so an /opacity modifier on them is silently
+                    ignored — use a solid palette gray instead.) */}
                 <circle
                     cx={size / 2}
                     cy={size / 2}
@@ -437,7 +440,7 @@ const ContextWindowGauge = ({ total, percent }: { total: number, percent: number
                     fill="none"
                     stroke="currentColor"
                     strokeWidth={strokeWidth}
-                    className="text-muted-foreground/25"
+                    className="text-zinc-500"
                 />
                 {/* Progress arc. */}
                 <circle

From 0a776870d433a7b9cae2ffe496e448b9012f8894 Mon Sep 17 00:00:00 2001
From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com>
Date: Thu, 25 Jun 2026 19:59:09 -0700
Subject: [PATCH 06/12] chore(web): trim verbose comments on the context-window
 code

---
 packages/web/src/app/api/(server)/ee/chat/route.ts        | 2 +-
 packages/web/src/ee/features/chat/agent.ts                | 1 -
 .../features/chat/components/chatThread/detailsCard.tsx   | 8 +-------
 packages/web/src/features/chat/types.ts                   | 4 ----
 4 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/packages/web/src/app/api/(server)/ee/chat/route.ts b/packages/web/src/app/api/(server)/ee/chat/route.ts
index 7d16acc96..cbb11f06e 100644
--- a/packages/web/src/app/api/(server)/ee/chat/route.ts
+++ b/packages/web/src/app/api/(server)/ee/chat/route.ts
@@ -92,7 +92,7 @@ export const POST = apiHandler(async (req: NextRequest) => {
 
             // Total context window for the selected model, used as the
             // denominator for the UI's context-usage gauge. Undefined when
-            // unknown (e.g. openai-compatible / self-hosted models).
+            // unknown (e.g. self-hosted models).
             const contextWindow = await resolveContextWindow(languageModelConfig);
 
             // No-op for non-Anthropic providers / when caching is disabled, so
diff --git a/packages/web/src/ee/features/chat/agent.ts b/packages/web/src/ee/features/chat/agent.ts
index f376c7c6d..8f5daa749 100644
--- a/packages/web/src/ee/features/chat/agent.ts
+++ b/packages/web/src/ee/features/chat/agent.ts
@@ -54,7 +54,6 @@ interface CreateMessageStreamResponseProps {
     disabledMcpServerIds?: string[];
     model: AISDKLanguageModelV3;
     modelName: string;
-    // Total context window of the model in tokens, or undefined when unknown.
     contextWindow?: number;
     promptCacheStrategy: PromptCacheStrategy;
     onFinish: UIMessageStreamOnFinishCallback<SBChatMessage>;
diff --git a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
index d879598bf..3715f8632 100644
--- a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
+++ b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
@@ -88,9 +88,7 @@ const DetailsCardComponent = ({
 
     // Context-window usage gauge. "In use" is the input the model saw on its
     // most recent step — i.e. the full accumulated prompt occupying the window
-    // right now — not the cumulative totalInputTokens (a billing sum). The
-    // gauge is shown only when the model's window is known (resolved from the
-    // models.dev catalog); unknown windows degrade to the raw token count.
+    // right now — not the cumulative totalInputTokens.
     const stepTokenUsage = metadata?.stepTokenUsage;
     const currentContextTokens = stepTokenUsage && stepTokenUsage.length > 0
         ? stepTokenUsage[stepTokenUsage.length - 1].inputTokens
@@ -415,10 +413,6 @@ const getContextUsageColorClass = (percent: number): string => {
     return "text-[#6cb38f]";
 };
 
-// A compact context-window indicator: a small ring whose arc tracks usage,
-// followed by the usage percentage and the model's total window size
-// ("<percent>% of <total>"). The ring and percentage share a single
-// usage-based color (green/yellow/red) over a neutral track.
 const ContextWindowGauge = ({ total, percent }: { total: number, percent: number }) => {
     const size = 14;
     const strokeWidth = 2;
diff --git a/packages/web/src/features/chat/types.ts b/packages/web/src/features/chat/types.ts
index ca01d9d22..dc0758462 100644
--- a/packages/web/src/features/chat/types.ts
+++ b/packages/web/src/features/chat/types.ts
@@ -59,10 +59,6 @@ export const sbChatMessageMetadataSchema = z.object({
     totalCacheReadTokens: z.number().optional(),
     totalCacheWriteTokens: z.number().optional(),
     totalResponseTimeMs: z.number().optional(),
-    // Total context window of the model used for this turn, in tokens, resolved
-    // from the models.dev catalog. Undefined when the window is unknown (e.g.
-    // openai-compatible / self-hosted ids, uncatalogued models). Baked into the
-    // message so the gauge denominator stays stable across catalog changes.
     contextWindow: z.number().optional(),
     feedback: z.array(z.object({
         type: z.enum(['like', 'dislike']),

From 8f361e7d2fcc139fc2051dde94cf198b9788d7b3 Mon Sep 17 00:00:00 2001
From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com>
Date: Thu, 25 Jun 2026 20:07:29 -0700
Subject: [PATCH 07/12] docs: add changelog entry for the context-window usage
 gauge

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 689718d36..671b6aaa1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 - Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353)
+- Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#1370](https://github.com/sourcebot-dev/sourcebot/pull/1370)
 
 ### Fixed
 - Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367)

From 2a9e9e35a7536d2195bfe949a2c94df8cb4c6745 Mon Sep 17 00:00:00 2001
From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com>
Date: Thu, 25 Jun 2026 20:25:20 -0700
Subject: [PATCH 08/12] chore(web): trim comments and lower context-gauge color
 thresholds

Also drop the gauge's yellow/red thresholds from 70/90 to 50/80 so the
ring shifts color earlier as the context fills.
---
 .../chat/components/chatThread/detailsCard.tsx        | 11 +++--------
 .../src/features/chat/modelContextWindow.server.ts    | 11 ++---------
 2 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
index 3715f8632..7b6c7867f 100644
--- a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
+++ b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx
@@ -397,10 +397,8 @@ const StepTokenUsage = ({ usage, label = 'step' }: { usage: StepTokenUsageEntry,
 }
 
 
-// Usage thresholds for the context-window gauge. Below `YELLOW` the window has
-// plenty of headroom (green); past `RED` it's nearly full (red).
-const CONTEXT_USAGE_YELLOW_PERCENT = 70;
-const CONTEXT_USAGE_RED_PERCENT = 90;
+const CONTEXT_USAGE_YELLOW_PERCENT = 50;
+const CONTEXT_USAGE_RED_PERCENT = 80;
 
 const getContextUsageColorClass = (percent: number): string => {
     if (percent >= CONTEXT_USAGE_RED_PERCENT) {
@@ -409,7 +407,6 @@ const getContextUsageColorClass = (percent: number): string => {
     if (percent >= CONTEXT_USAGE_YELLOW_PERCENT) {
         return "text-yellow-500";
     }
-    // Desaturated sage green rather than the neon green-500.
     return "text-[#6cb38f]";
 };
 
@@ -424,9 +421,7 @@ const ContextWindowGauge = ({ total, percent }: { total: number, percent: number
     return (
         <div className="flex items-center gap-1.5 text-xs whitespace-nowrap">
             <svg width={size} height={size} className="-rotate-90 flex-shrink-0">
-                {/* Neutral gray track. (Theme tokens here are defined without an
-                    alpha channel, so an /opacity modifier on them is silently
-                    ignored — use a solid palette gray instead.) */}
+                {/* Neutral gray track. */}
                 <circle
                     cx={size / 2}
                     cy={size / 2}
diff --git a/packages/web/src/features/chat/modelContextWindow.server.ts b/packages/web/src/features/chat/modelContextWindow.server.ts
index d15336e4c..62b2345c9 100644
--- a/packages/web/src/features/chat/modelContextWindow.server.ts
+++ b/packages/web/src/features/chat/modelContextWindow.server.ts
@@ -76,8 +76,8 @@ const loadCatalog = async (): Promise<ModelsDevCatalog | null> => {
  * Pure lookup of a model's context window in a models.dev catalog. Separated
  * from the network fetch so it can be unit-tested directly.
  *
- * Returns the total context window (input + output share it) in tokens, or
- * `undefined` when the model isn't catalogued or has no usable window.
+ * Returns the total context window in tokens, or `undefined` when the model 
+ * isn't catalogued or has no usable window.
  */
 export const lookupContextWindow = (
     catalog: ModelsDevCatalog | null,
@@ -94,13 +94,6 @@ export const lookupContextWindow = (
     return typeof context === 'number' && context > 0 ? context : undefined;
 };
 
-/**
- * Resolves the context window (in tokens) for a configured language model from
- * the models.dev catalog. Returns `undefined` when unknown — e.g. arbitrary
- * openai-compatible / self-hosted ids, provider/model ids that don't match the
- * catalog's keys (bedrock ARNs, vertex `@`-suffixed ids, azure deployments), or
- * when models.dev is unreachable. Never throws into the request path.
- */
 export const resolveContextWindow = async (
     config: Pick<LanguageModel, 'provider' | 'model'>,
 ): Promise<number | undefined> => {

From 15af04c90cb2f96f4b9cb494389a45b2b252c1fd Mon Sep 17 00:00:00 2001
From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com>
Date: Thu, 25 Jun 2026 20:30:28 -0700
Subject: [PATCH 09/12] docs: use placeholder PR number in context-window
 changelog entry

Real PR number to be filled in once the PR is opened.
---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 671b6aaa1..f9cf81007 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 - Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353)
-- Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#1370](https://github.com/sourcebot-dev/sourcebot/pull/1370)
+- Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#XXXX](https://github.com/sourcebot-dev/sourcebot/pull/XXXX)
 
 ### Fixed
 - Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367)

From e4b54e449e4237c0cbb6b4a070da6b2f0fced43c Mon Sep 17 00:00:00 2001
From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com>
Date: Thu, 25 Jun 2026 20:41:18 -0700
Subject: [PATCH 10/12] docs: fill in PR number for context-window changelog
 entry

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f9cf81007..671b6aaa1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 - Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353)
-- Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#XXXX](https://github.com/sourcebot-dev/sourcebot/pull/XXXX)
+- Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#1370](https://github.com/sourcebot-dev/sourcebot/pull/1370)
 
 ### Fixed
 - Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367)

From 32089ba363d8ff97b0aac387ac6834cadd5656fa Mon Sep 17 00:00:00 2001
From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com>
Date: Thu, 25 Jun 2026 20:59:29 -0700
Subject: [PATCH 11/12] fix(web): serve last-known-good models.dev catalog on
 fetch failure

Previously a failed fetch reset the cache to null, so every chat send
re-attempted the fetch and blocked up to the 8s timeout during a models.dev
outage; a failed TTL refresh also discarded the previously-cached catalog.

Switch loadCatalog to stale-while-revalidate: after the first successful
load the request path never blocks (it serves the last-known-good catalog
and refreshes in the background), failed refreshes keep the cached value,
and a 60s negative-cache window bounds retries during an outage.
---
 .../chat/modelContextWindow.server.ts         | 42 +++++++++---
 .../features/chat/modelContextWindow.test.ts  | 64 +++++++++++++++++++
 2 files changed, 97 insertions(+), 9 deletions(-)

diff --git a/packages/web/src/features/chat/modelContextWindow.server.ts b/packages/web/src/features/chat/modelContextWindow.server.ts
index 62b2345c9..0e70dc04f 100644
--- a/packages/web/src/features/chat/modelContextWindow.server.ts
+++ b/packages/web/src/features/chat/modelContextWindow.server.ts
@@ -13,6 +13,9 @@ const FETCH_TIMEOUT_MS = 8000;
 // Re-fetch the (~2.4 MB) catalog at most once per this interval per server
 // process. New models trickle in daily; a stale window for a few hours is fine.
 const CATALOG_TTL_MS = 6 * 60 * 60 * 1000;
+// After a failed fetch, don't reattempt for this long. Without it, an outage in
+// models.dev would make every chat send pay the fetch timeout on the request path.
+const NEGATIVE_CACHE_MS = 60 * 1000;
 
 // Sourcebot provider id -> models.dev top-level catalog key. Only providers
 // whose Sourcebot id differs from the models.dev id need an entry; everything
@@ -37,8 +40,14 @@ type ModelsDevProvider = {
 
 export type ModelsDevCatalog = Record<string, ModelsDevProvider>;
 
-let catalogPromise: Promise<ModelsDevCatalog | null> | null = null;
+// Last successfully-fetched catalog. Served while fresh, and kept as a fallback
+// when a later refresh fails. `catalogFetchedAt` is when it was fetched (TTL),
+// `lastFailedAt` the most recent fetch failure (negative-cache backoff), and
+// `inFlightFetch` dedupes concurrent fetches.
+let cachedCatalog: ModelsDevCatalog | null = null;
 let catalogFetchedAt = 0;
+let lastFailedAt = 0;
+let inFlightFetch: Promise<ModelsDevCatalog | null> | null = null;
 
 const fetchCatalog = async (): Promise<ModelsDevCatalog | null> => {
     try {
@@ -58,18 +67,33 @@ const fetchCatalog = async (): Promise<ModelsDevCatalog | null> => {
 
 const loadCatalog = async (): Promise<ModelsDevCatalog | null> => {
     const now = Date.now();
-    if (!catalogPromise || now - catalogFetchedAt > CATALOG_TTL_MS) {
-        catalogFetchedAt = now;
-        catalogPromise = fetchCatalog().then((catalog) => {
-            // Don't memoize failures — let the next caller retry instead of
-            // being stuck with a null catalog until the TTL expires.
-            if (!catalog) {
-                catalogPromise = null;
+    const isFresh = cachedCatalog !== null && now - catalogFetchedAt <= CATALOG_TTL_MS;
+    const isBackingOff = now - lastFailedAt < NEGATIVE_CACHE_MS;
+
+    // Kick off a (deduped) refresh when the cache is stale/empty and we're not
+    // within the post-failure backoff window. On success it replaces the cache;
+    // on failure it only records the failure time, leaving the last-known-good
+    // catalog intact.
+    if (!isFresh && !isBackingOff && !inFlightFetch) {
+        inFlightFetch = fetchCatalog().then((catalog) => {
+            if (catalog) {
+                cachedCatalog = catalog;
+                catalogFetchedAt = Date.now();
+            } else {
+                lastFailedAt = Date.now();
             }
+            inFlightFetch = null;
             return catalog;
         });
     }
-    return catalogPromise;
+
+    // Once a catalog has loaded once, never block the request path on the
+    // network: serve the last-known-good value (even if stale) and let any
+    // refresh settle in the background. Only the very first load awaits.
+    if (cachedCatalog !== null) {
+        return cachedCatalog;
+    }
+    return inFlightFetch ?? null;
 };
 
 /**
diff --git a/packages/web/src/features/chat/modelContextWindow.test.ts b/packages/web/src/features/chat/modelContextWindow.test.ts
index e8adc3517..9476820ae 100644
--- a/packages/web/src/features/chat/modelContextWindow.test.ts
+++ b/packages/web/src/features/chat/modelContextWindow.test.ts
@@ -96,3 +96,67 @@ describe('resolveContextWindow', () => {
         expect(fetchMock).toHaveBeenCalledTimes(1);
     });
 });
+
+describe('resolveContextWindow resilience', () => {
+    afterEach(() => {
+        vi.unstubAllGlobals();
+        vi.restoreAllMocks();
+        vi.resetModules();
+    });
+
+    // Re-import the module so each scenario starts with fresh internal cache state.
+    const importFresh = async () => {
+        vi.resetModules();
+        return await import('./modelContextWindow.server');
+    };
+
+    test('negative-caches failures instead of refetching on every call', async () => {
+        const fetchMock = vi.fn(async () => ({
+            ok: false,
+            status: 503,
+            statusText: 'Service Unavailable',
+        }) as unknown as Response);
+        vi.stubGlobal('fetch', fetchMock);
+
+        const mod = await importFresh();
+
+        expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBeUndefined();
+        expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBeUndefined();
+        expect(await mod.resolveContextWindow(model('openai', 'gpt-4.1'))).toBeUndefined();
+
+        // Only the first attempt hit the network; the rest were short-circuited
+        // by the negative-cache window, so chat sends don't repeatedly block.
+        expect(fetchMock).toHaveBeenCalledTimes(1);
+    });
+
+    test('preserves the last-known-good catalog when a refresh fails', async () => {
+        let nowMs = 1_700_000_000_000;
+        vi.spyOn(Date, 'now').mockImplementation(() => nowMs);
+
+        let shouldFail = false;
+        const fetchMock = vi.fn(async () => (shouldFail
+            ? { ok: false, status: 503, statusText: 'Service Unavailable' }
+            : { ok: true, json: async () => catalog }) as unknown as Response);
+        vi.stubGlobal('fetch', fetchMock);
+
+        const mod = await importFresh();
+
+        // First load populates the cache.
+        expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBe(200000);
+        expect(fetchMock).toHaveBeenCalledTimes(1);
+
+        // Advance past the TTL and make every refresh fail.
+        nowMs += 7 * 60 * 60 * 1000;
+        shouldFail = true;
+
+        // Stale-while-revalidate: serves the cached value and refreshes in the
+        // background (which fails).
+        expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBe(200000);
+        // Let the background refresh settle.
+        await new Promise((resolve) => setTimeout(resolve, 0));
+        expect(fetchMock).toHaveBeenCalledTimes(2);
+
+        // The failed refresh must not have discarded the good catalog.
+        expect(await mod.resolveContextWindow(model('anthropic', 'claude-sonnet-4-5'))).toBe(200000);
+    });
+});

From 28f61eb6c38345ca769a5d0b45b28283562737af Mon Sep 17 00:00:00 2001
From: Jack Minnetian <270441393+BlueBottleLatte@users.noreply.github.com>
Date: Fri, 26 Jun 2026 09:39:08 -0700
Subject: [PATCH 12/12] docs: mark context-window gauge changelog entry as [EE]

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 671b6aaa1..e6d3a2c05 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 - Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353)
-- Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#1370](https://github.com/sourcebot-dev/sourcebot/pull/1370)
+- [EE] Added a context-window usage gauge to the Ask Sourcebot chat details, showing how much of the selected model's context window each turn occupies. Window sizes are resolved from the models.dev catalog. [#1370](https://github.com/sourcebot-dev/sourcebot/pull/1370)
 
 ### Fixed
 - Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367)